2024-10-16 05:14:53 -06:00
|
|
|
#include "arc/std/parser.h"
|
2024-10-24 19:56:26 -06:00
|
|
|
#include "arc/std/bool.h"
|
2024-10-16 23:46:16 -06:00
|
|
|
#include "arc/std/errno.h"
|
2024-10-16 05:14:53 -06:00
|
|
|
#include "arc/std/lexer.h"
|
2024-10-24 19:56:26 -06:00
|
|
|
//#include "arc/std/vector.h"
|
2024-10-16 05:14:53 -06:00
|
|
|
#include <stdint.h>
|
|
|
|
|
#include <stdlib.h>
|
|
|
|
|
#include <string.h>
|
|
|
|
|
|
|
|
|
|
struct ARC_Parser {
|
|
|
|
|
ARC_Array language;
|
|
|
|
|
|
|
|
|
|
ARC_Lexer *lexer;
|
|
|
|
|
};
|
|
|
|
|
|
2024-10-16 18:00:52 -06:00
|
|
|
void ARC_Parser_Create(ARC_Parser **parser, ARC_Array *language, ARC_Parser_InitLexerRulesFn initLexerRulesFn){
|
2024-10-16 05:14:53 -06:00
|
|
|
*parser = (ARC_Parser *)malloc(sizeof(ARC_Parser));
|
|
|
|
|
|
|
|
|
|
//set the language size to 0 and data to NULL in case the language is NULL
|
|
|
|
|
(*parser)->language.size = 0;
|
|
|
|
|
(*parser)->language.data = NULL;
|
|
|
|
|
|
|
|
|
|
//if the language exists, copy the language
|
|
|
|
|
if(language != NULL){
|
|
|
|
|
(*parser)->language.size = language->size;
|
|
|
|
|
(*parser)->language.data = malloc(sizeof(ARC_ParserLanguageTag) * language->size);
|
|
|
|
|
|
2024-10-30 07:36:43 -06:00
|
|
|
memcpy((*parser)->language.data, language->data, sizeof(ARC_ParserLanguageTag) * language->size);
|
2024-11-12 13:07:23 -07:00
|
|
|
for(uint32_t index = 0; index < language->size; index++){
|
|
|
|
|
ARC_ParserLanguageTag *languageTag = ((ARC_ParserLanguageTag *)language->data) + index;
|
|
|
|
|
ARC_ParserLanguageTag *currentTag = ((ARC_ParserLanguageTag *)(*parser)->language.data) + index;
|
|
|
|
|
|
|
|
|
|
//copy the currentTag's tokenOrTags
|
|
|
|
|
memcpy(currentTag->tokensOrTags, languageTag->tokensOrTags, languageTag->tokensOrTagsSize);
|
|
|
|
|
}
|
2024-10-16 05:14:53 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//create the lexer
|
|
|
|
|
ARC_Lexer_Create(&((*parser)->lexer));
|
|
|
|
|
|
2024-10-16 18:00:52 -06:00
|
|
|
//register instructions to the lexer
|
|
|
|
|
initLexerRulesFn(((*parser)->lexer));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void ARC_Parser_CreateFromString(ARC_Parser **parser, ARC_String *languageString, ARC_Parser_InitLexerRulesFn initLexerRulesFn){
|
2024-10-16 05:14:53 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void ARC_Parser_Destroy(ARC_Parser *parser){
|
2024-11-12 13:07:23 -07:00
|
|
|
//clear all the copied token or tags from memory
|
|
|
|
|
for(uint32_t index = 0; index < parser->language.size; index++){
|
|
|
|
|
ARC_ParserLanguageTag *currentTag = ((ARC_ParserLanguageTag *)parser->language.data) + index;
|
|
|
|
|
free(currentTag->tokensOrTags);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//clear the copied language from memory
|
2024-10-16 05:14:53 -06:00
|
|
|
free(parser->language.data);
|
|
|
|
|
|
|
|
|
|
ARC_Lexer_Destroy(parser->lexer);
|
|
|
|
|
|
|
|
|
|
free(parser);
|
|
|
|
|
}
|
2024-10-16 18:00:52 -06:00
|
|
|
|
2024-10-16 23:46:16 -06:00
|
|
|
//private recusive function to parse a tag
|
2024-10-31 19:58:11 -06:00
|
|
|
ARC_Bool ARC_Parser_ParseTag(ARC_Parser *parser, uint32_t *lexerIndex, uint32_t tagId){
|
2024-10-16 23:46:16 -06:00
|
|
|
//get the current tag
|
|
|
|
|
ARC_ParserLanguageTag *tag = NULL;
|
|
|
|
|
for(uint32_t index = 0; index < parser->language.size; index++){
|
|
|
|
|
ARC_ParserLanguageTag *foundTag = ((ARC_ParserLanguageTag *)parser->language.data) + index;
|
|
|
|
|
if(foundTag->tagId == tagId){
|
|
|
|
|
tag = foundTag;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//if the tag was not found can't do much, so throw an error
|
|
|
|
|
if(tag == NULL){
|
|
|
|
|
arc_errno = ARC_ERRNO_NULL;
|
|
|
|
|
ARC_DEBUG_LOG_ERROR_WITH_VARIABLES("ARC_Parser_ParseTag(parser, subdata, tagId), could not find tag with id: %u", tagId);
|
2024-10-31 19:58:11 -06:00
|
|
|
return ARC_False;
|
2024-10-16 23:46:16 -06:00
|
|
|
}
|
2024-10-21 13:36:45 -06:00
|
|
|
|
|
|
|
|
//loop through each or section of the tags and tokens
|
|
|
|
|
for(uint32_t orIndex = 0; orIndex < tag->tokensOrTagsSize; orIndex++){
|
|
|
|
|
//loop through each token or tag to check if the lexed data matches
|
2024-10-24 19:56:26 -06:00
|
|
|
uint32_t lexerCheckIndex = *lexerIndex;
|
|
|
|
|
ARC_Bool foundRule = ARC_True;
|
2024-10-21 13:36:45 -06:00
|
|
|
for(uint32_t tokenOrTagIndex = 1; tokenOrTagIndex < tag->tokensOrTags[orIndex][0] + 1; tokenOrTagIndex++){
|
2024-10-31 19:58:11 -06:00
|
|
|
//check if it is lambda (can return safely)
|
2024-11-01 04:39:45 -06:00
|
|
|
if(tag->tokensOrTags[orIndex][tokenOrTagIndex] == ARC_PARSER_TAG_LAMBDA){
|
|
|
|
|
break;
|
2024-10-31 19:58:11 -06:00
|
|
|
}
|
|
|
|
|
|
2024-10-24 19:56:26 -06:00
|
|
|
//if the value isn't a token it is a tag, so recurs if it isn't a token
|
|
|
|
|
ARC_Bool isToken = ARC_Lexer_IsTokenId(parser->lexer, tag->tokensOrTags[orIndex][tokenOrTagIndex]);
|
|
|
|
|
if(isToken == ARC_False){
|
2024-10-31 19:58:11 -06:00
|
|
|
//check if the tag works if not break to continue checking next or
|
2024-11-01 04:39:45 -06:00
|
|
|
uint32_t nextTagId = tag->tokensOrTags[orIndex][tokenOrTagIndex];
|
|
|
|
|
foundRule = ARC_Parser_ParseTag(parser, lexerIndex, nextTagId);
|
|
|
|
|
if(foundRule == ARC_False){
|
2024-10-31 19:58:11 -06:00
|
|
|
break;
|
|
|
|
|
}
|
2024-11-01 04:39:45 -06:00
|
|
|
|
|
|
|
|
//this will probably never be called as lambda is usually the last instruction, but just in case we can continue instead of break
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//check if there is another token that can be used
|
|
|
|
|
if(lexerCheckIndex >= ARC_Lexer_GetTokensSize(parser->lexer)){
|
|
|
|
|
//out of tokens to the current or does not work, so break
|
|
|
|
|
foundRule = ARC_False;
|
|
|
|
|
break;
|
2024-10-24 19:56:26 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//get the next token in the lexer and increment the lexers index
|
|
|
|
|
ARC_LexerToken token = ARC_Lexer_GetToken(parser->lexer, lexerCheckIndex);
|
|
|
|
|
lexerCheckIndex++;
|
|
|
|
|
|
|
|
|
|
//if the token rule does not match the current token in the current or statement the token rule could not be found for the current or index so break
|
|
|
|
|
if(token.rule != tag->tokensOrTags[orIndex][tokenOrTagIndex]){
|
|
|
|
|
foundRule = ARC_False;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//if the rule is found we don't need to check anymore so we can return out
|
|
|
|
|
if(foundRule == ARC_True){
|
|
|
|
|
*lexerIndex = lexerCheckIndex;
|
2024-10-28 21:00:48 -06:00
|
|
|
//TODO: set tag into datastructure
|
2024-10-31 19:58:11 -06:00
|
|
|
return ARC_True;
|
2024-10-21 13:36:45 -06:00
|
|
|
}
|
|
|
|
|
}
|
2024-10-24 19:56:26 -06:00
|
|
|
|
2024-11-01 04:39:45 -06:00
|
|
|
//no rule was found, so return false
|
2024-10-31 19:58:11 -06:00
|
|
|
return ARC_False;
|
2024-10-16 23:46:16 -06:00
|
|
|
}
|
2024-10-16 18:00:52 -06:00
|
|
|
|
2024-10-24 19:56:26 -06:00
|
|
|
void ARC_Parser_Parse(ARC_Parser *parser, ARC_String **data){
|
2024-10-30 07:36:43 -06:00
|
|
|
//make sure the parser has a language
|
|
|
|
|
if(parser->language.size == 0){
|
|
|
|
|
ARC_DEBUG_LOG_ERROR("ARC_Parser_Parse(parser, data), no parser language defined");
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2024-10-24 19:56:26 -06:00
|
|
|
//lex the subdata
|
|
|
|
|
ARC_Lexer_LexString(parser->lexer, data);
|
|
|
|
|
if(arc_errno){
|
|
|
|
|
ARC_DEBUG_LOG_ERROR("ARC_Parser_Parse(parser, data), could not lex the given data");
|
2024-10-30 18:41:01 -06:00
|
|
|
ARC_Lexer_Clear(parser->lexer);
|
2024-10-28 21:00:48 -06:00
|
|
|
return;
|
2024-10-24 19:56:26 -06:00
|
|
|
}
|
|
|
|
|
|
2024-10-30 07:36:43 -06:00
|
|
|
//set the lexer index to start and get the first tag
|
2024-10-24 19:56:26 -06:00
|
|
|
uint32_t lexerIndex = 0;
|
2024-10-30 07:36:43 -06:00
|
|
|
ARC_ParserLanguageTag *startTag = parser->language.data;
|
2024-10-24 19:56:26 -06:00
|
|
|
|
2024-11-01 04:39:45 -06:00
|
|
|
//TODO: handle error checks for if parsing fails
|
2024-10-24 19:56:26 -06:00
|
|
|
//recursivly parse from the inital start tag
|
2024-11-01 04:39:45 -06:00
|
|
|
ARC_Bool parsed = ARC_Parser_ParseTag(parser, &lexerIndex, startTag->tagId);
|
|
|
|
|
ARC_Bool allTokensParsed = lexerIndex == ARC_Lexer_GetTokensSize(parser->lexer);
|
2024-10-30 18:41:01 -06:00
|
|
|
ARC_Lexer_Clear(parser->lexer);
|
2024-11-01 04:39:45 -06:00
|
|
|
if(parsed == ARC_False || allTokensParsed == ARC_False || arc_errno){
|
|
|
|
|
arc_errno = ARC_ERRNO_DATA;
|
|
|
|
|
ARC_DEBUG_LOG_ERROR_WITH_VARIABLES("ARC_Parser_Parse(parser, data), could not parse the given data at lexer index: %u", lexerIndex);
|
2024-10-28 21:00:48 -06:00
|
|
|
return;
|
2024-10-24 19:56:26 -06:00
|
|
|
}
|
2024-10-16 23:46:16 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void ARC_Parser_ParseFile(ARC_Parser *parser, ARC_String *path){
|
|
|
|
|
|
|
|
|
|
}
|