#include "arc/std/parser/parserlang.h" #include "arc/std/parser.h" #include "arc/std/bool.h" #include "arc/std/errno.h" #include "arc/std/lexer.h" #include "arc/std/vector.h" #include #include #include struct ARC_Parser { ARC_Array language; ARC_Lexer *lexer; void *data; void *userData; ARC_ParserData_CreateFn *createDataFn; ARC_ParserData_DestroyFn *destroyDataFn; }; void ARC_Parser_Create(ARC_Parser **parser, ARC_Array *language, ARC_Parser_InitLexerRulesFn initLexerRulesFn, ARC_ParserData_CreateFn *createDataFn, ARC_ParserData_DestroyFn *destroyDataFn, void *userData){ *parser = (ARC_Parser *)malloc(sizeof(ARC_Parser)); //set the language size to 0 and data to NULL in case the language is NULL (*parser)->language.size = 0; (*parser)->language.data = NULL; //if the language exists, copy the language if(language != NULL){ (*parser)->language.size = language->size; (*parser)->language.data = malloc(sizeof(ARC_ParserTag) * language->size); for(uint32_t index = 0; index < language->size; index++){ ARC_ParserTag *languageTag = ((ARC_ParserTag *)language->data) + index; ARC_ParserTag *currentTag = ((ARC_ParserTag *)(*parser)->language.data) + index; //copy the language tag into the current tag currentTag->tagId = languageTag->tagId; currentTag->tokensOrTagsSize = languageTag->tokensOrTagsSize; //create place to store tokens or tags currentTag->tokensOrTags = (uint32_t **)malloc(sizeof(uint32_t *) * languageTag->tokensOrTagsSize); //copy each or section into the tokensOrTags for(uint32_t orIndex = 0; orIndex < languageTag->tokensOrTagsSize; orIndex++){ currentTag->tokensOrTags[orIndex] = (uint32_t *)malloc(sizeof(uint32_t) * (languageTag->tokensOrTags[orIndex][0] + 1)); for(uint32_t tokenOrTagIndex = 0; tokenOrTagIndex < languageTag->tokensOrTags[orIndex][0] + 1; tokenOrTagIndex++){ currentTag->tokensOrTags[orIndex][tokenOrTagIndex] = languageTag->tokensOrTags[orIndex][tokenOrTagIndex]; } } } } //create the lexer ARC_Lexer_Create(&((*parser)->lexer)); //register instructions to the lexer initLexerRulesFn(((*parser)->lexer)); //set the data to null (the parse function is what creates it) (*parser)->data = NULL; //set the userData for the create callback function (*parser)->userData = userData; //init the create function callback with null, then copy the callback if it exists (*parser)->createDataFn = NULL; if(createDataFn != NULL){ (*parser)->createDataFn = (ARC_ParserData_CreateFn *)malloc(sizeof(ARC_ParserData_CreateFn)); *((*parser)->createDataFn) = *createDataFn; } //init the destroy function callback with null, then copy the callback if it exists (*parser)->destroyDataFn = NULL; if(createDataFn != NULL){ (*parser)->destroyDataFn = (ARC_ParserData_DestroyFn *)malloc(sizeof(ARC_ParserData_DestroyFn)); *((*parser)->destroyDataFn) = *destroyDataFn; } } void ARC_Parser_CreateFromVector(ARC_Parser **parser, ARC_Vector *language, ARC_Parser_InitLexerRulesFn initLexerRulesFn, ARC_ParserData_CreateFn *createDataFn, ARC_ParserData_DestroyFn *destroyDataFn, void *userData){ //creates the variables to copy the vector into const uint32_t languageSize = ARC_Vector_GetSize(language); ARC_ParserTag languageArray[languageSize]; //copy the language from a vector into an array for(uint32_t index = 0; index < languageSize; index++){ languageArray[index] = *(ARC_ParserTag *)ARC_Vector_Get(language, index); } //set the vector data as an ARC_Array ARC_Array languageAsArray = { languageSize, languageArray }; //create the parser ARC_Parser_Create(parser, &languageAsArray, initLexerRulesFn, createDataFn, destroyDataFn, userData); } void ARC_Parser_CreateFromString(ARC_Parser **parser, ARC_String *languageString, ARC_Parser_InitLexerRulesFn initLexerRulesFn, ARC_Parser_GetStringIdFn getStringIdFn, ARC_ParserData_CreateFn *createDataFn, ARC_ParserData_DestroyFn *destroyDataFn, void *userData){ //create the language from the string ARC_Parser *parserlangParser; ARC_ParserLang_CreateAsParser(&parserlangParser, getStringIdFn); if(arc_errno){ ARC_DEBUG_LOG_ERROR_WITH_VARIABLES("ARC_Parser_CreateFromString(parser, languageString, initLexerRulesFn, getStringIdFn, createDataFn, destroyDataFn, userData), failed to create language from: %s", languageString->data); return; } //copy the string because parse will destroy the string that is passed in ARC_String *parserLangString; ARC_String_Copy(&parserLangString, languageString); ARC_Parser_Parse(parserlangParser, &parserLangString); //create the parser from the parsed language ARC_Vector *language = (ARC_Vector *)ARC_Parser_GetData(parserlangParser); ARC_Parser_CreateFromVector(parser, language, initLexerRulesFn, createDataFn, destroyDataFn, userData); //cleanup ARC_Parser_Destroy(parserlangParser); } void ARC_Parser_Destroy(ARC_Parser *parser){ //clear all the copied token or tags from memory for(uint32_t index = 0; index < parser->language.size; index++){ ARC_ParserTag *currentTag = ((ARC_ParserTag *)parser->language.data) + index; //free the orIndex vlues for(uint32_t orIndex = 0; orIndex < currentTag->tokensOrTagsSize; orIndex++){ free(currentTag->tokensOrTags[orIndex]); } //free the tokens or tags free(currentTag->tokensOrTags); } //free the creation function callback if(parser->createDataFn != NULL){ free(parser->createDataFn); } //do the same thing as clear but this time pass in the userData as well to clean that up if(parser->destroyDataFn != NULL){ (*(parser->destroyDataFn))(parser->data, parser->userData); free(parser->destroyDataFn); } //clear the copied language from memory free(parser->language.data); ARC_Lexer_Destroy(parser->lexer); free(parser); } //private creation function for ARC_ParserTagToken type //note: token and tagTokens will be set to null, it is safe to create tagTokens outside of this as the destructor will clean it up void ARC_ParserTagToken_Create(ARC_ParserTagToken **tagToken, uint32_t id){ *tagToken = (ARC_ParserTagToken *)malloc(sizeof(ARC_ParserTagToken)); (*tagToken)->id = id; (*tagToken)->token = NULL; (*tagToken)->tagTokens = NULL; } //private destroy function for ARC_ParserTagToken type void ARC_ParserTagToken_Destroy(ARC_ParserTagToken *tagToken){ //destroy the tag token vector if(tagToken->tagTokens != NULL){ ARC_Vector_Destroy(tagToken->tagTokens); } free(tagToken); } //private function to cleanup a parser tag token from a vector void ARC_ParserTagToken_VectorDestroyDataFn(void *data){ //we can just use the destroy function with casted data ARC_ParserTagToken_Destroy((ARC_ParserTagToken *)data); } //private recusive function to parse a tag ARC_Bool ARC_Parser_ParseTag(ARC_Parser *parser, ARC_ParserTagToken *tagToken, uint32_t *lexerIndex){ //get the current tag ARC_ParserTag *tag = NULL; for(uint32_t index = 0; index < parser->language.size; index++){ ARC_ParserTag *foundTag = ((ARC_ParserTag *)parser->language.data) + index; if(foundTag->tagId == tagToken->id){ tag = foundTag; break; } } //if the tag was not found can't do much, so throw an error if(tag == NULL){ arc_errno = ARC_ERRNO_NULL; ARC_DEBUG_LOG_ERROR_WITH_VARIABLES("ARC_Parser_ParseTag(parser, subdata, tagId), could not find tag with id: %u", tagToken->id); return ARC_False; } //create a vector of tag token to use if a rule is validated, a comparison function is not needed as it will be iterated through ARC_Vector_DestroyDataFn destroyTokenTagFn = ARC_ParserTagToken_VectorDestroyDataFn; ARC_Vector_Create(&(tagToken->tagTokens), NULL, &destroyTokenTagFn); //loop through each or section of the tags and tokens for(uint32_t orIndex = 0; orIndex < tag->tokensOrTagsSize; orIndex++){ //reset the tag tokens for each or index ARC_Vector_Clear(tagToken->tagTokens); //loop through each token or tag to check if the lexed data matches uint32_t lexerCheckIndex = *lexerIndex; ARC_Bool foundRule = ARC_True; for(uint32_t tokenOrTagIndex = 1; tokenOrTagIndex < tag->tokensOrTags[orIndex][0] + 1; tokenOrTagIndex++){ //get next tag id to check uint32_t nextTagId = tag->tokensOrTags[orIndex][tokenOrTagIndex]; //check if it is lambda (can return safely) if(nextTagId == ARC_PARSER_TAG_LAMBDA){ break; } //if the value isn't a token it is a tag, so recurs if it isn't a token ARC_Bool isToken = ARC_Lexer_IsTokenId(parser->lexer, nextTagId); if(isToken == ARC_False){ //create a temporary lexer index in case the rule does not exist uint32_t tempLexerCheckIndex = lexerCheckIndex; //create tag token for if the rule works ARC_ParserTagToken *nextTagToken; ARC_ParserTagToken_Create(&nextTagToken, nextTagId); //check if the tag works if not break to continue checking next or index foundRule = ARC_Parser_ParseTag(parser, nextTagToken, &tempLexerCheckIndex); if(foundRule == ARC_False){ //clean up the tag token ARC_ParserTagToken_Destroy(nextTagToken); break; } //add the tag token because rule was found ARC_Vector_Add(tagToken->tagTokens, nextTagToken); //increase the lexer check index as a recursed rule was found, and continue checking lexerCheckIndex = tempLexerCheckIndex; continue; } //check if there is another token that can be used if(lexerCheckIndex >= ARC_Lexer_GetTokensSize(parser->lexer)){ //out of tokens to the current or does not work, so break foundRule = ARC_False; break; } //get the next token in the lexer and increment the lexers index ARC_LexerToken *token = ARC_Lexer_GetToken(parser->lexer, lexerCheckIndex); lexerCheckIndex++; //if the token rule does not match the current token in the current or statement the token rule could not be found for the current or index so break if(token->rule != tag->tokensOrTags[orIndex][tokenOrTagIndex]){ foundRule = ARC_False; break; } //the rule was a match so create a tag token to store the token in ARC_ParserTagToken *nextTagToken; ARC_ParserTagToken_Create(&nextTagToken, nextTagId); nextTagToken->token = token; //add the token to the tag tokens ARC_Vector_Add(tagToken->tagTokens, nextTagToken); } //if the rule is found we don't need to check anymore so we can return out if(foundRule == ARC_True){ *lexerIndex = lexerCheckIndex; //cleanup return ARC_True; } } //cleanup ARC_Vector_Destroy(tagToken->tagTokens); tagToken->tagTokens = NULL; //no rule was found, so return false return ARC_False; } void ARC_Parser_Parse(ARC_Parser *parser, ARC_String **data){ //make sure the parser has a language if(parser->language.size == 0){ ARC_DEBUG_LOG_ERROR("ARC_Parser_Parse(parser, data), no parser language defined"); return; } //lex the subdata ARC_Lexer_LexString(parser->lexer, data); if(arc_errno){ ARC_DEBUG_LOG_ERROR("ARC_Parser_Parse(parser, data), could not lex the given data"); ARC_Lexer_Clear(parser->lexer); return; } //set the lexer index to start and get the first tag uint32_t lexerIndex = 0; ARC_ParserTag *startTag = parser->language.data; //setup a tag token that will be passed to the creation callback on success ARC_ParserTagToken *tagToken; ARC_ParserTagToken_Create(&tagToken, startTag->tagId); //TODO: handle error checks for if parsing fails //recursivly parse from the inital start tag ARC_Bool parsed = ARC_Parser_ParseTag(parser, tagToken, &lexerIndex); ARC_Bool allTokensParsed = lexerIndex == ARC_Lexer_GetTokensSize(parser->lexer); //error if anything went wrong if(parsed == ARC_False || allTokensParsed == ARC_False || arc_errno){ ARC_Lexer_Clear(parser->lexer); ARC_ParserTagToken_Destroy(tagToken); arc_errno = ARC_ERRNO_DATA; ARC_DEBUG_LOG_ERROR_WITH_VARIABLES("ARC_Parser_Parse(parser, data), could not parse the given data at lexer index: %u", lexerIndex); return; } //create the data if the creation callback exists if(parser->createDataFn != NULL){ (*(parser->createDataFn))(&(parser->data), tagToken, parser->userData); } //cleanup ARC_Lexer_Clear(parser->lexer); ARC_ParserTagToken_Destroy(tagToken); } void ARC_Parser_ParseFile(ARC_Parser *parser, ARC_String *path){ } void ARC_Parser_ClearData(ARC_Parser *parser){ //check if that data exists and the destructor exists to make sure they can be run if(parser->data != NULL && parser->destroyDataFn != NULL){ (*(parser->destroyDataFn))(parser->data, NULL); } //TODO: might want to error here parser->data = NULL; } void *ARC_Parser_GetData(ARC_Parser *parser){ return parser->data; }