#include "arc/std/parser.h" #include "arc/std/bool.h" #include "arc/std/errno.h" #include "arc/std/lexer.h" //#include "arc/std/vector.h" #include #include #include struct ARC_Parser { ARC_Array language; ARC_Lexer *lexer; }; void ARC_Parser_Create(ARC_Parser **parser, ARC_Array *language, ARC_Parser_InitLexerRulesFn initLexerRulesFn){ *parser = (ARC_Parser *)malloc(sizeof(ARC_Parser)); //set the language size to 0 and data to NULL in case the language is NULL (*parser)->language.size = 0; (*parser)->language.data = NULL; //if the language exists, copy the language if(language != NULL){ (*parser)->language.size = language->size; (*parser)->language.data = malloc(sizeof(ARC_ParserLanguageTag) * language->size); for(uint32_t index = 0; index < language->size; index++){ ARC_ParserLanguageTag *languageTag = ((ARC_ParserLanguageTag *)language->data) + index; ARC_ParserLanguageTag *currentTag = ((ARC_ParserLanguageTag *)(*parser)->language.data) + index; //copy the language tag into the current tag currentTag->tagId = languageTag->tagId; currentTag->tokensOrTagsSize = languageTag->tokensOrTagsSize; //create place to store tokens or tags currentTag->tokensOrTags = (uint32_t **)malloc(sizeof(uint32_t *) * languageTag->tokensOrTagsSize); //copy each or section into the tokensOrTags for(uint32_t orIndex = 0; orIndex < languageTag->tokensOrTagsSize; orIndex++){ currentTag->tokensOrTags[orIndex] = (uint32_t *)malloc(sizeof(uint32_t) * (languageTag->tokensOrTags[orIndex][0] + 1)); for(uint32_t tokenOrTagIndex = 0; tokenOrTagIndex < languageTag->tokensOrTags[orIndex][0] + 1; tokenOrTagIndex++){ currentTag->tokensOrTags[orIndex][tokenOrTagIndex] = languageTag->tokensOrTags[orIndex][tokenOrTagIndex]; } } } } //create the lexer ARC_Lexer_Create(&((*parser)->lexer)); //register instructions to the lexer initLexerRulesFn(((*parser)->lexer)); } void ARC_Parser_CreateFromString(ARC_Parser **parser, ARC_String *languageString, ARC_Parser_InitLexerRulesFn initLexerRulesFn){ } void ARC_Parser_Destroy(ARC_Parser *parser){ //clear all the copied token or tags from memory for(uint32_t index = 0; index < parser->language.size; index++){ ARC_ParserLanguageTag *currentTag = ((ARC_ParserLanguageTag *)parser->language.data) + index; //free the orIndex vlues for(uint32_t orIndex = 0; orIndex < currentTag->tokensOrTagsSize; orIndex++){ free(currentTag->tokensOrTags[orIndex]); } free(currentTag->tokensOrTags); } //clear the copied language from memory free(parser->language.data); ARC_Lexer_Destroy(parser->lexer); free(parser); } //private recusive function to parse a tag ARC_Bool ARC_Parser_ParseTag(ARC_Parser *parser, uint32_t *lexerIndex, uint32_t tagId){ //get the current tag ARC_ParserLanguageTag *tag = NULL; for(uint32_t index = 0; index < parser->language.size; index++){ ARC_ParserLanguageTag *foundTag = ((ARC_ParserLanguageTag *)parser->language.data) + index; if(foundTag->tagId == tagId){ tag = foundTag; break; } } //if the tag was not found can't do much, so throw an error if(tag == NULL){ arc_errno = ARC_ERRNO_NULL; ARC_DEBUG_LOG_ERROR_WITH_VARIABLES("ARC_Parser_ParseTag(parser, subdata, tagId), could not find tag with id: %u", tagId); return ARC_False; } //loop through each or section of the tags and tokens for(uint32_t orIndex = 0; orIndex < tag->tokensOrTagsSize; orIndex++){ //loop through each token or tag to check if the lexed data matches uint32_t lexerCheckIndex = *lexerIndex; ARC_Bool foundRule = ARC_True; for(uint32_t tokenOrTagIndex = 1; tokenOrTagIndex < tag->tokensOrTags[orIndex][0] + 1; tokenOrTagIndex++){ //check if it is lambda (can return safely) if(tag->tokensOrTags[orIndex][tokenOrTagIndex] == ARC_PARSER_TAG_LAMBDA){ break; } //if the value isn't a token it is a tag, so recurs if it isn't a token ARC_Bool isToken = ARC_Lexer_IsTokenId(parser->lexer, tag->tokensOrTags[orIndex][tokenOrTagIndex]); if(isToken == ARC_False){ //check if the tag works if not break to continue checking next or uint32_t nextTagId = tag->tokensOrTags[orIndex][tokenOrTagIndex]; foundRule = ARC_Parser_ParseTag(parser, lexerIndex, nextTagId); if(foundRule == ARC_False){ break; } //increase the lexer check index as a recursed rule was found lexerCheckIndex = *lexerIndex; //this will probably never be called as lambda is usually the last instruction, but just in case we can continue instead of break continue; } //check if there is another token that can be used if(lexerCheckIndex >= ARC_Lexer_GetTokensSize(parser->lexer)){ //out of tokens to the current or does not work, so break foundRule = ARC_False; break; } //get the next token in the lexer and increment the lexers index ARC_LexerToken token = ARC_Lexer_GetToken(parser->lexer, lexerCheckIndex); lexerCheckIndex++; //if the token rule does not match the current token in the current or statement the token rule could not be found for the current or index so break if(token.rule != tag->tokensOrTags[orIndex][tokenOrTagIndex]){ foundRule = ARC_False; printf("%u ", token.rule); break; } } //if the rule is found we don't need to check anymore so we can return out if(foundRule == ARC_True){ *lexerIndex = lexerCheckIndex; //TODO: set tag into datastructure return ARC_True; } } //no rule was found, so return false return ARC_False; } void ARC_Parser_Parse(ARC_Parser *parser, ARC_String **data){ //make sure the parser has a language if(parser->language.size == 0){ ARC_DEBUG_LOG_ERROR("ARC_Parser_Parse(parser, data), no parser language defined"); return; } //lex the subdata ARC_Lexer_LexString(parser->lexer, data); if(arc_errno){ ARC_DEBUG_LOG_ERROR("ARC_Parser_Parse(parser, data), could not lex the given data"); ARC_Lexer_Clear(parser->lexer); return; } //set the lexer index to start and get the first tag uint32_t lexerIndex = 0; ARC_ParserLanguageTag *startTag = parser->language.data; //TODO: handle error checks for if parsing fails //recursivly parse from the inital start tag ARC_Bool parsed = ARC_Parser_ParseTag(parser, &lexerIndex, startTag->tagId); ARC_Bool allTokensParsed = lexerIndex == ARC_Lexer_GetTokensSize(parser->lexer); ARC_Lexer_Clear(parser->lexer); if(parsed == ARC_False || allTokensParsed == ARC_False || arc_errno){ arc_errno = ARC_ERRNO_DATA; ARC_DEBUG_LOG_ERROR_WITH_VARIABLES("ARC_Parser_Parse(parser, data), could not parse the given data at lexer index: %u", lexerIndex); return; } } void ARC_Parser_ParseFile(ARC_Parser *parser, ARC_String *path){ }