#include "arc/std/parser/parserlang.h" #include "arc/std/errno.h" #include "arc/std/lexer.h" #include "arc/std/parser.h" #include "arc/std/string.h" #include "arc/std/vector.h" #include #include //private function to initalize the lexer rules for the language void ARC_ParserLang_InitLexerRulesFn(ARC_Lexer *lexer){ //null ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_PARSERLANG_TOKEN_NULL, 0)); //number ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharOrBetween(ARC_PARSERLANG_TOKEN_NUMBER, '0', '9')); //alpha char ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharOrBetween(ARC_PARSERLANG_TOKEN_ALPHA_LOWER_CHAR, 'a', 'z')); ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharOrBetween(ARC_PARSERLANG_TOKEN_ALPHA_UPPER_CHAR, 'A', 'Z')); //whitespace ARC_String *whitespaceString; ARC_String_CreateWithStrlen(&whitespaceString, " \t"); ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharInStringRule(ARC_LEXER_TOKEN_WHITESPACE, whitespaceString)); ARC_String_Destroy(whitespaceString); //single char tokens ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_PARSERLANG_TOKEN_NEWLINE_ID , ARC_PARSERLANG_TOKEN_NEWLINE_CHAR )); ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_PARSERLANG_TOKEN_LESS_THAN_ID , ARC_PARSERLANG_TOKEN_LESS_THAN_CHAR )); ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_PARSERLANG_TOKEN_GREATER_THAN_ID, ARC_PARSERLANG_TOKEN_GREATER_THAN_CHAR)); ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_PARSERLANG_TOKEN_OR_ID , ARC_PARSERLANG_TOKEN_OR_CHAR )); ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_PARSERLANG_TOKEN_UNDERSCORE_ID , ARC_PARSERLANG_TOKEN_UNDERSCORE_CHAR )); //arrow ARC_String *arrowString; ARC_String_CreateWithStrlen(&arrowString, ARC_PARSERLANG_TOKEN_ARROW_CSTRING); ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchStringRule(ARC_PARSERLANG_TOKEN_ARROW_ID, arrowString)); ARC_String_Destroy(arrowString); } void ARC_ParserLang_VectorDestroyVector(void *data){ ARC_Vector *vector = (ARC_Vector *)data; ARC_Vector_Destroy(vector); } void ARC_ParserLang_VectorDestroyUInt32(void *data){ uint32_t *uint = (uint32_t *)data; free(uint); } void ARC_ParserLang_VectorDestroyParserTagFn(void *data){ ARC_ParserTag *currentTag = (ARC_ParserTag *)data; //free the orIndex vlues for(uint32_t orIndex = 0; orIndex < currentTag->tokensOrTagsSize; orIndex++){ free(currentTag->tokensOrTags[orIndex]); } //free the tokens or tags free(currentTag->tokensOrTags); //free the tag itself free(currentTag); } void ARC_ParserLangParsedData_RecurseStringAdd(ARC_String **data, ARC_ParserTagToken *tagToken){ if(tagToken->token != NULL){ //less than rule does not have a string so add it manually and return if(tagToken->token->rule == ARC_PARSERLANG_TOKEN_LESS_THAN_ID){ char tokenChar = ARC_PARSERLANG_TOKEN_LESS_THAN_CHAR; ARC_String_AppendCString(data, &tokenChar, 1); return; } //greater than rule does not have a string so add it manually and return if(tagToken->token->rule == ARC_PARSERLANG_TOKEN_GREATER_THAN_ID){ char tokenChar = ARC_PARSERLANG_TOKEN_GREATER_THAN_CHAR; ARC_String_AppendCString(data, &tokenChar, 1); return; } ARC_String_Append(data, tagToken->token->data); return; } //TODO: probs don't need this if(tagToken->tagTokens == NULL){ return; } for(uint32_t index = 0; index < ARC_Vector_GetSize(tagToken->tagTokens); index++){ ARC_ParserLangParsedData_RecurseStringAdd(data, (ARC_ParserTagToken *)ARC_Vector_Get(tagToken->tagTokens, index)); } } void ARC_ParserLangParsedData_CreateTagString(ARC_String **tagString, ARC_ParserTagToken *tagToken){ ARC_String_Create(tagString, NULL, 0); for(uint32_t index = 0; index < ARC_Vector_GetSize(tagToken->tagTokens); index++){ ARC_ParserTagToken *childTagToken = (ARC_ParserTagToken *)ARC_Vector_Get(tagToken->tagTokens, index); ARC_ParserLangParsedData_RecurseStringAdd(tagString, childTagToken); } } /* -> WHITESPACE | */ void ARC_ParserLangParsedData_GetArgumentTag(ARC_Vector *orTokensOrTags, ARC_ParserTagToken *tagToken, ARC_ParserLang_GetIdFn *getIdFn){ for(uint32_t index = 0; index < ARC_Vector_GetSize(tagToken->tagTokens); index++){ ARC_ParserTagToken *childTagToken = (ARC_ParserTagToken *)ARC_Vector_Get(tagToken->tagTokens, index); //switch variables ARC_String *tagOrConstantString = NULL; uint32_t *id = NULL; switch(childTagToken->id){ case ARC_PARSERLANG_ARGUMENT: //recurse to check all the arguments ARC_ParserLangParsedData_GetArgumentTag(orTokensOrTags, childTagToken, getIdFn); continue; case ARC_PARSERLANG_TAG_OR_CONSTANT: //get the id of the tag/constant ARC_ParserLangParsedData_RecurseStringAdd(&tagOrConstantString, childTagToken); id = (uint32_t *)malloc(sizeof(uint32_t)); *id = (*getIdFn)(tagOrConstantString); //add the id to the matching or vector ARC_Vector_Add(orTokensOrTags, (void *)id); //cleanup ARC_String_Destroy(tagOrConstantString); continue; default: //this should only be whitespace continue; } } } /* -> WHITESPACE OR WHITESPACE | */ void ARC_ParserLangParsedData_GetArgumentsTag(ARC_Vector *tokensOrTags, ARC_ParserTagToken *tagToken, ARC_ParserLang_GetIdFn *getIdFn){ for(uint32_t index = 0; index < ARC_Vector_GetSize(tagToken->tagTokens); index++){ ARC_ParserTagToken *childTagToken = (ARC_ParserTagToken *)ARC_Vector_Get(tagToken->tagTokens, index); //switch variables ARC_String *tagOrConstantString; ARC_String_Create(&tagOrConstantString, NULL, 0); uint32_t *id = NULL; uint32_t tokensOrTagsIndex = 0; ARC_Vector *orTokensOrTags = NULL; ARC_Vector_DestroyDataFn destroyUint32Fn = ARC_ParserLang_VectorDestroyUInt32; switch(childTagToken->id){ case ARC_PARSERLANG_ARGUMENT: continue; case ARC_PARSERLANG_TOKEN_OR_ID: //add a new vector to tagsOrTokens for the or statment ARC_Vector_Create(&orTokensOrTags, NULL, &destroyUint32Fn); //add the first or vector to the tokensOrTags ARC_Vector_Add(tokensOrTags, (void *)orTokensOrTags); continue; case ARC_PARSERLANG_ARGUMENTS: //recurse to check all the arguments ARC_ParserLangParsedData_GetArgumentsTag(tokensOrTags, childTagToken, getIdFn); continue; case ARC_PARSERLANG_TAG_OR_CONSTANT: //get the id of the tag/constant ARC_ParserLangParsedData_RecurseStringAdd(&tagOrConstantString, childTagToken); id = (uint32_t *)malloc(sizeof(uint32_t)); *id = (*getIdFn)(tagOrConstantString); //get the last vector within tokens or tags to add the tag/constant to tokensOrTagsIndex = ARC_Vector_GetSize(tokensOrTags); orTokensOrTags = (ARC_Vector *)ARC_Vector_Get(tokensOrTags, tokensOrTagsIndex - 1); //add the id to the last or vector ARC_Vector_Add(orTokensOrTags, (void *)id); //cleanup ARC_String_Destroy(tagOrConstantString); continue; default: //this should only be whitespace continue; } } } void ARC_ParserLangParsedData_CreateBodyTag(ARC_ParserTag **tag, ARC_ParserTagToken *tagToken, ARC_ParserLang_GetIdFn *getIdFn){ //create the tag to store the body in ARC_ParserTag *bodyTag = (ARC_ParserTag *)malloc(sizeof(ARC_ParserTag)); /* ~ Tag Id ~ */ //the first tag will always be the tagId, and as the rule is validated there is no need to check for NULL ARC_ParserTagToken *tagIdToken = (ARC_ParserTagToken *)ARC_Vector_Get(tagToken->tagTokens, 0); //get the tagId as a string ARC_String *tagIdString; ARC_ParserLangParsedData_CreateTagString(&tagIdString, tagIdToken); //get the tag id as a uint32_t bodyTag->tagId = (*getIdFn)(tagIdString); //cleanup the tagIdString ARC_String_Destroy(tagIdString); /* ~ Tokens Or Tags Array ~ */ //create a vector to store another vector of data ARC_Vector *tokensOrTags; ARC_Vector_DestroyDataFn destroyVectorFn = ARC_ParserLang_VectorDestroyVector; ARC_Vector_Create(&tokensOrTags, NULL, &destroyVectorFn); //create vector within the tokens or tags vector to store the or rule in ARC_Vector *orTokensOrTags; ARC_Vector_DestroyDataFn destroyUint32Fn = ARC_ParserLang_VectorDestroyUInt32; ARC_Vector_Create(&orTokensOrTags, NULL, &destroyUint32Fn); //add the first or vector to the tokensOrTags ARC_Vector_Add(tokensOrTags, (void *)orTokensOrTags); //skipping whitespace and arrow tokens, the arguments index starts at 4 ARC_ParserTagToken *argumentsToken = (ARC_ParserTagToken *)ARC_Vector_Get(tagToken->tagTokens, 4); ARC_ParserLangParsedData_GetArgumentsTag(tokensOrTags, argumentsToken, getIdFn); //TODO: move the data from the vector to an array //cleanup ARC_Vector_Destroy(tokensOrTags); } void ARC_ParserLangParsedData_RunLineTag(ARC_Vector *tags, ARC_ParserTagToken *tagToken, ARC_ParserLang_GetIdFn *getIdFn){ //loop through the tags either going to the next line or the next body for(uint32_t index = 0; index < ARC_Vector_GetSize(tagToken->tagTokens); index++){ ARC_ParserTagToken *childTagToken = (ARC_ParserTagToken *)ARC_Vector_Get(tagToken->tagTokens, index); //for some reason can't create this in the switch, so placed here ARC_ParserTag *tag; switch(childTagToken->id){ //recuse to run the next line case ARC_PARSERLANG_LINE: ARC_ParserLangParsedData_RunLineTag(tags, childTagToken, getIdFn); continue; //get a tag case ARC_PARSERLANG_BODY: ARC_ParserLangParsedData_CreateBodyTag(&tag, childTagToken, getIdFn); continue; default: continue; } } } //private function to create the saved data for the language void ARC_ParserLang_CreateDataFn(void **data, ARC_ParserTagToken *parsedData, void *userData){ ARC_ParserLang_GetIdFn *getIdFn = (ARC_ParserLang_GetIdFn *)userData; //make sure there is parsed data to use if(parsedData == NULL){ arc_errno = ARC_ERRNO_NULL; ARC_DEBUG_LOG_ERROR("ARC_ParserLang_CreateDataFn(void **data, ARC_ParserTagToken *parsedData, void *userData), parsedData was NULL when passed in"); return; } //function callback to cleanup added tags ARC_Vector_DestroyDataFn destroyParserTagFn = ARC_ParserLang_VectorDestroyParserTagFn; //I don't see a reason to have a comparison function right now. this might change in the future ARC_Vector_Create((ARC_Vector **)data, NULL, &destroyParserTagFn); //check if there are any tags (thes could be empty if a file is blank) if(parsedData->tagTokens == NULL){ return; } //load the language into a vector recursivly ARC_ParserLangParsedData_RunLineTag((ARC_Vector *)data, parsedData, getIdFn); } //private function to destroy the saved data for the language void ARC_ParserLang_DestroyDataFn(void *data, void *userData){ ARC_ParserLang_GetIdFn *getIdFn = (ARC_ParserLang_GetIdFn *)userData; free(getIdFn); ARC_Vector_Destroy((ARC_Vector *)data); } void ARC_Parser_CreateAsParserLang(ARC_Parser **parser, ARC_ParserLang_GetIdFn getIdFn){ // -> NEWLINE | | NEWLINE | LAMBDA uint32_t *line[] = { (uint32_t[]){ 3, ARC_PARSERLANG_BODY, ARC_PARSERLANG_TOKEN_NEWLINE_ID, ARC_PARSERLANG_LINE }, (uint32_t[]){ 1, ARC_PARSERLANG_BODY }, (uint32_t[]){ 2, ARC_PARSERLANG_TOKEN_NEWLINE_ID, ARC_PARSERLANG_LINE }, (uint32_t[]){ 1, ARC_PARSERLANG_LAMBDA } }; // -> WHITESPACE ARROW WHITESPACE uint32_t *body[] = { (uint32_t[]){ 5, ARC_PARSERLANG_TAG, ARC_PARSERLANG_TOKEN_WHITESPACE, ARC_PARSERLANG_TOKEN_ARROW_ID, ARC_PARSERLANG_TOKEN_WHITESPACE, ARC_PARSERLANG_ARGUMENTS } }; // -> WHITESPACE OR WHITESPACE | uint32_t *arguments[] = { (uint32_t[]){ 5, ARC_PARSERLANG_ARGUMENT, ARC_PARSERLANG_TOKEN_WHITESPACE, ARC_PARSERLANG_TOKEN_OR_ID, ARC_PARSERLANG_TOKEN_WHITESPACE, ARC_PARSERLANG_ARGUMENTS }, (uint32_t[]){ 1, ARC_PARSERLANG_TAG_OR_CONSTANT } }; // -> WHITESPACE | uint32_t *argument[] = { (uint32_t[]){ 3, ARC_PARSERLANG_TAG_OR_CONSTANT, ARC_PARSERLANG_TOKEN_WHITESPACE, ARC_PARSERLANG_ARGUMENT }, (uint32_t[]){ 1, ARC_PARSERLANG_TAG_OR_CONSTANT } }; // -> | uint32_t *tagOrConstant[] = { (uint32_t[]){ 1, ARC_PARSERLANG_TAG }, (uint32_t[]){ 1, ARC_PARSERLANG_CONSTANT } }; // -> ALPHA_UPPER_CHAR uint32_t *constant[] = { (uint32_t[]){ 2, ARC_PARSERLANG_TOKEN_ALPHA_UPPER_CHAR, ARC_PARSERLANG_CONSTANT_BODY } }; // -> | LAMBDA uint32_t *constantBody[] = { (uint32_t[]){ 2, ARC_PARSERLANG_CONSTANT_CHAR, ARC_PARSERLANG_CONSTANT_BODY }, (uint32_t[]){ 1, ARC_PARSERLANG_LAMBDA } }; // -> ALPHA_UPPER_CHAR | UNDERSCORE uint32_t *constantChar[] = { (uint32_t[]){ 2, ARC_PARSERLANG_TOKEN_ALPHA_UPPER_CHAR, ARC_PARSERLANG_TOKEN_UNDERSCORE_ID } }; // -> LESS_THAN GREATER_THAN uint32_t *tag[] = { (uint32_t[]){ 3, ARC_PARSERLANG_TOKEN_LESS_THAN_ID, ARC_PARSERLANG_VARIABLE, ARC_PARSERLANG_TOKEN_GREATER_THAN_ID } }; // -> | UNDERSCORE uint32_t *variable[] = { (uint32_t[]){ 2, ARC_PARSERLANG_ALPHA_CHAR, ARC_PARSERLANG_VARIABLE_BODY }, (uint32_t[]){ 2, ARC_PARSERLANG_TOKEN_UNDERSCORE_ID, ARC_PARSERLANG_VARIABLE_BODY } }; // -> | LAMBDA uint32_t *variableBody[] = { (uint32_t[]){ 2, ARC_PARSERLANG_VARIABLE_CHAR, ARC_PARSERLANG_VARIABLE_BODY }, (uint32_t[]){ 1, ARC_PARSERLANG_LAMBDA } }; // -> | NUMBER | UNDERSCORE uint32_t *variableChar[] = { (uint32_t[]){ 1, ARC_PARSERLANG_ALPHA_CHAR }, (uint32_t[]){ 1, ARC_PARSERLANG_TOKEN_NUMBER }, (uint32_t[]){ 1, ARC_PARSERLANG_TOKEN_UNDERSCORE_ID } }; // -> ALPHA_LOWER_CHAR | ALPHA_UPPER_CHAR uint32_t *alphaChar[] = { (uint32_t[]){ 1, ARC_PARSERLANG_TOKEN_ALPHA_LOWER_CHAR }, (uint32_t[]){ 1, ARC_PARSERLANG_TOKEN_ALPHA_UPPER_CHAR }}; ARC_ParserTag parserLangTags[13] = { { ARC_PARSERLANG_LINE , line , 4 }, { ARC_PARSERLANG_BODY , body , 1 }, { ARC_PARSERLANG_ARGUMENTS , arguments , 2 }, { ARC_PARSERLANG_ARGUMENT , argument , 2 }, { ARC_PARSERLANG_TAG_OR_CONSTANT, tagOrConstant, 2 }, { ARC_PARSERLANG_CONSTANT , constant , 1 }, { ARC_PARSERLANG_CONSTANT_BODY , constantBody , 2 }, { ARC_PARSERLANG_CONSTANT_CHAR , constantChar , 2 }, { ARC_PARSERLANG_TAG , tag , 1 }, { ARC_PARSERLANG_VARIABLE , variable , 2 }, { ARC_PARSERLANG_VARIABLE_BODY , variableBody , 2 }, { ARC_PARSERLANG_VARIABLE_CHAR , variableChar , 3 }, { ARC_PARSERLANG_ALPHA_CHAR , alphaChar , 2 } }; ARC_Array parserLanguageArray = { 13, //size parserLangTags //data }; ARC_ParserData_CreateFn createDataFn = ARC_ParserLang_CreateDataFn; ARC_ParserData_DestroyFn destroyDataFn = ARC_ParserLang_DestroyDataFn; //this will be cleaned up by the destroyDataFn ARC_ParserLang_GetIdFn *newGetIdFn = (ARC_ParserLang_GetIdFn *)malloc(sizeof(ARC_ParserLang_GetIdFn)); *newGetIdFn = getIdFn; //TODO: add the create, destroy, and add callbacks ARC_Parser_Create(parser, &parserLanguageArray, ARC_ParserLang_InitLexerRulesFn, &createDataFn, &destroyDataFn, newGetIdFn); }