#include "arc/std/parser.h" #include "arc/std/bool.h" #include "arc/std/errno.h" #include "arc/std/lexer.h" #include "arc/std/vector.h" #include #include #include struct ARC_Parser { ARC_Array language; ARC_Lexer *lexer; void *data; ARC_ParserData_CreateFn *createDataFn; ARC_ParserData_DestroyFn *destroyDataFn; }; void ARC_Parser_Create(ARC_Parser **parser, ARC_Array *language, ARC_Parser_InitLexerRulesFn initLexerRulesFn, ARC_ParserData_CreateFn *createDataFn, ARC_ParserData_DestroyFn *destroyDataFn){ *parser = (ARC_Parser *)malloc(sizeof(ARC_Parser)); //set the language size to 0 and data to NULL in case the language is NULL (*parser)->language.size = 0; (*parser)->language.data = NULL; //if the language exists, copy the language if(language != NULL){ (*parser)->language.size = language->size; (*parser)->language.data = malloc(sizeof(ARC_ParserLanguageTag) * language->size); for(uint32_t index = 0; index < language->size; index++){ ARC_ParserLanguageTag *languageTag = ((ARC_ParserLanguageTag *)language->data) + index; ARC_ParserLanguageTag *currentTag = ((ARC_ParserLanguageTag *)(*parser)->language.data) + index; //copy the language tag into the current tag currentTag->tagId = languageTag->tagId; currentTag->tokensOrTagsSize = languageTag->tokensOrTagsSize; //create place to store tokens or tags currentTag->tokensOrTags = (uint32_t **)malloc(sizeof(uint32_t *) * languageTag->tokensOrTagsSize); //copy each or section into the tokensOrTags for(uint32_t orIndex = 0; orIndex < languageTag->tokensOrTagsSize; orIndex++){ currentTag->tokensOrTags[orIndex] = (uint32_t *)malloc(sizeof(uint32_t) * (languageTag->tokensOrTags[orIndex][0] + 1)); for(uint32_t tokenOrTagIndex = 0; tokenOrTagIndex < languageTag->tokensOrTags[orIndex][0] + 1; tokenOrTagIndex++){ currentTag->tokensOrTags[orIndex][tokenOrTagIndex] = languageTag->tokensOrTags[orIndex][tokenOrTagIndex]; } } //add the add function currentTag->addDataFn = NULL; if(languageTag->addDataFn != NULL){ currentTag->addDataFn = (ARC_ParserLanguageTag_AddDataFn *)malloc(sizeof(ARC_ParserLanguageTag_AddDataFn)); *(currentTag->addDataFn) = *(languageTag->addDataFn); } } } //create the lexer ARC_Lexer_Create(&((*parser)->lexer)); //register instructions to the lexer initLexerRulesFn(((*parser)->lexer)); //create the data and copy the creation function if the creation function exists (*parser)->data = NULL; (*parser)->createDataFn = NULL; if(createDataFn != NULL){ (*parser)->createDataFn = (ARC_ParserData_CreateFn *)malloc(sizeof(ARC_ParserData_CreateFn)); *((*parser)->createDataFn) = *createDataFn; (*createDataFn)(&((*parser)->data)); } (*parser)->destroyDataFn = NULL; if(createDataFn != NULL){ (*parser)->destroyDataFn = (ARC_ParserData_DestroyFn *)malloc(sizeof(ARC_ParserData_DestroyFn)); *((*parser)->destroyDataFn) = *destroyDataFn; } } void ARC_Parser_CreateFromString(ARC_Parser **parser, ARC_String *languageString, ARC_Parser_InitLexerRulesFn initLexerRulesFn){ } void ARC_Parser_Destroy(ARC_Parser *parser){ //clear all the copied token or tags from memory for(uint32_t index = 0; index < parser->language.size; index++){ ARC_ParserLanguageTag *currentTag = ((ARC_ParserLanguageTag *)parser->language.data) + index; //free the orIndex vlues for(uint32_t orIndex = 0; orIndex < currentTag->tokensOrTagsSize; orIndex++){ free(currentTag->tokensOrTags[orIndex]); } if(currentTag->addDataFn != NULL){ free(currentTag->addDataFn); } //free the tokens or tags free(currentTag->tokensOrTags); } //free the creation function callback if(parser->createDataFn != NULL){ free(parser->createDataFn); } //free the data and the deletion function callback if(parser->destroyDataFn != NULL){ (*(parser->destroyDataFn))(parser->data); free(parser->destroyDataFn); } //clear the copied language from memory free(parser->language.data); ARC_Lexer_Destroy(parser->lexer); free(parser); } //private recusive function to parse a tag ARC_Bool ARC_Parser_ParseTag(ARC_Parser *parser, uint32_t *lexerIndex, uint32_t tagId){ //get the current tag ARC_ParserLanguageTag *tag = NULL; for(uint32_t index = 0; index < parser->language.size; index++){ ARC_ParserLanguageTag *foundTag = ((ARC_ParserLanguageTag *)parser->language.data) + index; if(foundTag->tagId == tagId){ tag = foundTag; break; } } //if the tag was not found can't do much, so throw an error if(tag == NULL){ arc_errno = ARC_ERRNO_NULL; ARC_DEBUG_LOG_ERROR_WITH_VARIABLES("ARC_Parser_ParseTag(parser, subdata, tagId), could not find tag with id: %u", tagId); return ARC_False; } //create a vector of found tokens to use if a rule is validated, a comparison function is not needed as it will be iterated through, the destroy function is not needed as well because they will be pointers to lexer tokens (the lexer owns the tokens) ARC_Vector *foundTokens; ARC_Vector_Create(&foundTokens, NULL, NULL); //loop through each or section of the tags and tokens for(uint32_t orIndex = 0; orIndex < tag->tokensOrTagsSize; orIndex++){ //reset the tokens for each or index ARC_Vector_Clear(foundTokens); //loop through each token or tag to check if the lexed data matches uint32_t lexerCheckIndex = *lexerIndex; ARC_Bool foundRule = ARC_True; for(uint32_t tokenOrTagIndex = 1; tokenOrTagIndex < tag->tokensOrTags[orIndex][0] + 1; tokenOrTagIndex++){ //check if it is lambda (can return safely) if(tag->tokensOrTags[orIndex][tokenOrTagIndex] == ARC_PARSER_TAG_LAMBDA){ break; } //if the value isn't a token it is a tag, so recurs if it isn't a token ARC_Bool isToken = ARC_Lexer_IsTokenId(parser->lexer, tag->tokensOrTags[orIndex][tokenOrTagIndex]); if(isToken == ARC_False){ //create a temporary lexer index in case the rule does not exist uint32_t tempLexerCheckIndex = lexerCheckIndex; //check if the tag works if not break to continue checking next or uint32_t nextTagId = tag->tokensOrTags[orIndex][tokenOrTagIndex]; foundRule = ARC_Parser_ParseTag(parser, &tempLexerCheckIndex, nextTagId); if(foundRule == ARC_False){ break; } //increase the lexer check index as a recursed rule was found, and continue checking lexerCheckIndex = tempLexerCheckIndex; continue; } //check if there is another token that can be used if(lexerCheckIndex >= ARC_Lexer_GetTokensSize(parser->lexer)){ //out of tokens to the current or does not work, so break foundRule = ARC_False; break; } //get the next token in the lexer and increment the lexers index ARC_LexerToken *token = ARC_Lexer_GetToken(parser->lexer, lexerCheckIndex); lexerCheckIndex++; //if the token rule does not match the current token in the current or statement the token rule could not be found for the current or index so break if(token->rule != tag->tokensOrTags[orIndex][tokenOrTagIndex]){ foundRule = ARC_False; break; } //the rule was a match so add it to the found tokens ARC_Vector_Add(foundTokens, (void *)token); } //if the rule is found we don't need to check anymore so we can return out if(foundRule == ARC_True){ *lexerIndex = lexerCheckIndex; //if there is an addDataFunction for the tag, add the tokens if(tag->addDataFn != NULL){ //iterate through the tokens with the add callback for(uint32_t index = 0; index < ARC_Vector_GetSize(foundTokens); index++){ ARC_LexerToken *token = (ARC_LexerToken *)ARC_Vector_Get(foundTokens, index); (*(tag->addDataFn))(&(parser->data), tagId, token, tag->addUserData); } } //free the found tokens vector ARC_Vector_Destroy(foundTokens); //cleanup return ARC_True; } } //cleanup ARC_Vector_Destroy(foundTokens); //no rule was found, so return false return ARC_False; } void ARC_Parser_Parse(ARC_Parser *parser, ARC_String **data){ //make sure the parser has a language if(parser->language.size == 0){ ARC_DEBUG_LOG_ERROR("ARC_Parser_Parse(parser, data), no parser language defined"); return; } //lex the subdata ARC_Lexer_LexString(parser->lexer, data); if(arc_errno){ ARC_DEBUG_LOG_ERROR("ARC_Parser_Parse(parser, data), could not lex the given data"); ARC_Lexer_Clear(parser->lexer); return; } //set the lexer index to start and get the first tag uint32_t lexerIndex = 0; ARC_ParserLanguageTag *startTag = parser->language.data; //TODO: handle error checks for if parsing fails //recursivly parse from the inital start tag ARC_Bool parsed = ARC_Parser_ParseTag(parser, &lexerIndex, startTag->tagId); ARC_Bool allTokensParsed = lexerIndex == ARC_Lexer_GetTokensSize(parser->lexer); ARC_Lexer_Clear(parser->lexer); if(parsed == ARC_False || allTokensParsed == ARC_False || arc_errno){ arc_errno = ARC_ERRNO_DATA; ARC_DEBUG_LOG_ERROR_WITH_VARIABLES("ARC_Parser_Parse(parser, data), could not parse the given data at lexer index: %u", lexerIndex); return; } } void ARC_Parser_ParseFile(ARC_Parser *parser, ARC_String *path){ } void ARC_Parser_ClearData(ARC_Parser *parser){ (*(parser->destroyDataFn))(parser->data); (*(parser->createDataFn))(parser->data); } void *ARC_Parser_GetData(ARC_Parser *parser){ return parser->data; }