From d69844dab168f1e6c9381fba6950b6be698d489b Mon Sep 17 00:00:00 2001 From: herbglitch Date: Sat, 23 Nov 2024 19:27:30 -0700 Subject: [PATCH] still working on parser, plan to rework to parsing first, then calling struct creation callback after with vector of tokens and tags --- include/arc/std/lexer.h | 9 ++ include/arc/std/parser.h | 19 +++-- include/arc/std/parser/parserlang.h | 8 +- src/std/lexer.c | 4 + src/std/parser.c | 38 +++++++-- src/std/parser/parserlang.c | 125 +++++++++++++++++++++++++++- temp_parser.txt | 2 + tests/std/lexer.c | 30 +++++++ tests/std/parser.c | 43 +++++++--- 9 files changed, 251 insertions(+), 27 deletions(-) diff --git a/include/arc/std/lexer.h b/include/arc/std/lexer.h index 560c48b..5c4b154 100644 --- a/include/arc/std/lexer.h +++ b/include/arc/std/lexer.h @@ -130,6 +130,15 @@ ARC_LexerToken *ARC_Lexer_GetToken(ARC_Lexer *lexer, uint32_t index); */ uint32_t ARC_Lexer_GetTokensSize(ARC_Lexer *lexer); +/** + * @brief returns a boolean based on if a lexers rules are continious + * + * @param[in] lexer the lexer to check if its ruls are continious + * + * @return ARC_True if the set rules are continious +*/ +ARC_Bool ARC_Lexer_IsContinious(ARC_Lexer *lexer); + /** * @brief returns a boolean based on if a given id is a stored token rule id * diff --git a/include/arc/std/parser.h b/include/arc/std/parser.h index acb3c97..6708619 100644 --- a/include/arc/std/parser.h +++ b/include/arc/std/parser.h @@ -6,8 +6,8 @@ extern "C" { #endif #include "arc/std/array.h" -//#include "arc/std/bool.h" #include "arc/std/lexer.h" +#include "arc/std/vector.h" #include /** @@ -28,20 +28,20 @@ typedef void (* ARC_ParserData_DestroyFn)(void *data); /** * @brief TODO: write this */ -typedef void (* ARC_ParserLanguageTag_AddDataFn)(void **data, uint32_t tagId, ARC_LexerToken *token, void *userData); +typedef void (* ARC_ParserTag_AddDataFn)(void **data, uint32_t tagId, uint32_t tagIndex, ARC_LexerToken *token, void *userData); /** * @brief a langue tag type for the parser //TODO: explain this better */ -typedef struct ARC_ParserLanguageTag { +typedef struct ARC_ParserTag { uint32_t tagId; uint32_t **tokensOrTags; uint32_t tokensOrTagsSize; - ARC_ParserLanguageTag_AddDataFn *addDataFn; + ARC_ParserTag_AddDataFn *addDataFn; void *addUserData; -} ARC_ParserLanguageTag; +} ARC_ParserTag; /** * @brief a callback function to initialize the lexer the parser uses with rules @@ -72,6 +72,15 @@ typedef void (* ARC_Parser_InitLexerRulesFn)(ARC_Lexer *lexer); */ void ARC_Parser_Create(ARC_Parser **parser, ARC_Array *language, ARC_Parser_InitLexerRulesFn initLexerRulesFn, ARC_ParserData_CreateFn *createDataFn, ARC_ParserData_DestroyFn *destroyDataFn); +/** + * @brief creates an ARC_Parser type from an arc vector + * + * @param[out] parser ARC_Parser to create + * @param[in] language an vector of ARC_ParserLanguageTags defining a langauge + * @param[in] initLexerRulesFn a callback used to initalize the token rules the lexer within the parser will use +*/ +void ARC_Parser_CreateFromVector(ARC_Parser **parser, ARC_Vector *language, ARC_Parser_InitLexerRulesFn initLexerRulesFn, ARC_ParserData_CreateFn *createDataFn, ARC_ParserData_DestroyFn *destroyDataFn); + /** * @brief creates an ARC_Parser type from a string * diff --git a/include/arc/std/parser/parserlang.h b/include/arc/std/parser/parserlang.h index 6b1fc6d..b063d06 100644 --- a/include/arc/std/parser/parserlang.h +++ b/include/arc/std/parser/parserlang.h @@ -13,7 +13,7 @@ extern "C" { -> WHITESPACE OR WHITESPACE | -> WHITESPACE | - -> | + -> | -> ALPHA_UPPER_CHAR -> | LAMBDA @@ -26,10 +26,16 @@ extern "C" { -> ALPHA_LOWER_CHAR | ALPHA_UPPER_CHAR */ +/* + * @brief +*/ +typedef uint32_t (* ARC_ParserLang_GetConstId)(ARC_String *constant); + /* * @brief creates a parser for the Parser Lang * * @note the rules will be inited for the parser lang + * @note the parsed data will be saved as a vector of ARC_ParserLanguageTag * * @param[out] parser the parser to create */ diff --git a/src/std/lexer.c b/src/std/lexer.c index d68118a..c4f2033 100644 --- a/src/std/lexer.c +++ b/src/std/lexer.c @@ -287,6 +287,10 @@ uint32_t ARC_Lexer_GetTokensSize(ARC_Lexer *lexer){ return ARC_Vector_GetSize(lexer->tokens); } +ARC_Bool ARC_Lexer_IsContinious(ARC_Lexer *lexer){ + return lexer->tokenRulesAreContinuous; +} + ARC_Bool ARC_Lexer_IsTokenId(ARC_Lexer *lexer, uint32_t id){ //if the rules are continuous we can just check if it is less than the max rules value if(lexer->tokenRulesAreContinuous == ARC_True){ diff --git a/src/std/parser.c b/src/std/parser.c index 80f37d3..221d1b0 100644 --- a/src/std/parser.c +++ b/src/std/parser.c @@ -27,11 +27,11 @@ void ARC_Parser_Create(ARC_Parser **parser, ARC_Array *language, ARC_Parser_Init //if the language exists, copy the language if(language != NULL){ (*parser)->language.size = language->size; - (*parser)->language.data = malloc(sizeof(ARC_ParserLanguageTag) * language->size); + (*parser)->language.data = malloc(sizeof(ARC_ParserTag) * language->size); for(uint32_t index = 0; index < language->size; index++){ - ARC_ParserLanguageTag *languageTag = ((ARC_ParserLanguageTag *)language->data) + index; - ARC_ParserLanguageTag *currentTag = ((ARC_ParserLanguageTag *)(*parser)->language.data) + index; + ARC_ParserTag *languageTag = ((ARC_ParserTag *)language->data) + index; + ARC_ParserTag *currentTag = ((ARC_ParserTag *)(*parser)->language.data) + index; //copy the language tag into the current tag currentTag->tagId = languageTag->tagId; @@ -52,7 +52,7 @@ void ARC_Parser_Create(ARC_Parser **parser, ARC_Array *language, ARC_Parser_Init //add the add function currentTag->addDataFn = NULL; if(languageTag->addDataFn != NULL){ - currentTag->addDataFn = (ARC_ParserLanguageTag_AddDataFn *)malloc(sizeof(ARC_ParserLanguageTag_AddDataFn)); + currentTag->addDataFn = (ARC_ParserTag_AddDataFn *)malloc(sizeof(ARC_ParserTag_AddDataFn)); *(currentTag->addDataFn) = *(languageTag->addDataFn); } } @@ -80,13 +80,33 @@ void ARC_Parser_Create(ARC_Parser **parser, ARC_Array *language, ARC_Parser_Init } } +void ARC_Parser_CreateFromVector(ARC_Parser **parser, ARC_Vector *language, ARC_Parser_InitLexerRulesFn initLexerRulesFn, ARC_ParserData_CreateFn *createDataFn, ARC_ParserData_DestroyFn *destroyDataFn){ + //creates the variables to copy the vector into + const uint32_t languageSize = ARC_Vector_GetSize(language); + ARC_ParserTag languageArray[languageSize]; + + //copy the language from a vector into an array + for(uint32_t index = 0; index < languageSize; index++){ + languageArray[index] = *(ARC_ParserTag *)ARC_Vector_Get(language, index); + } + + //set the vector data as an ARC_Array + ARC_Array languageAsArray = { + languageSize, + languageArray + }; + + //create the parser + ARC_Parser_Create(parser, &languageAsArray, initLexerRulesFn, createDataFn, destroyDataFn); +} + void ARC_Parser_CreateFromString(ARC_Parser **parser, ARC_String *languageString, ARC_Parser_InitLexerRulesFn initLexerRulesFn){ } void ARC_Parser_Destroy(ARC_Parser *parser){ //clear all the copied token or tags from memory for(uint32_t index = 0; index < parser->language.size; index++){ - ARC_ParserLanguageTag *currentTag = ((ARC_ParserLanguageTag *)parser->language.data) + index; + ARC_ParserTag *currentTag = ((ARC_ParserTag *)parser->language.data) + index; //free the orIndex vlues for(uint32_t orIndex = 0; orIndex < currentTag->tokensOrTagsSize; orIndex++){ @@ -123,9 +143,9 @@ void ARC_Parser_Destroy(ARC_Parser *parser){ //private recusive function to parse a tag ARC_Bool ARC_Parser_ParseTag(ARC_Parser *parser, uint32_t *lexerIndex, uint32_t tagId){ //get the current tag - ARC_ParserLanguageTag *tag = NULL; + ARC_ParserTag *tag = NULL; for(uint32_t index = 0; index < parser->language.size; index++){ - ARC_ParserLanguageTag *foundTag = ((ARC_ParserLanguageTag *)parser->language.data) + index; + ARC_ParserTag *foundTag = ((ARC_ParserTag *)parser->language.data) + index; if(foundTag->tagId == tagId){ tag = foundTag; break; @@ -205,7 +225,7 @@ ARC_Bool ARC_Parser_ParseTag(ARC_Parser *parser, uint32_t *lexerIndex, uint32_t //iterate through the tokens with the add callback for(uint32_t index = 0; index < ARC_Vector_GetSize(foundTokens); index++){ ARC_LexerToken *token = (ARC_LexerToken *)ARC_Vector_Get(foundTokens, index); - (*(tag->addDataFn))(&(parser->data), tagId, token, tag->addUserData); + (*(tag->addDataFn))(&(parser->data), tagId, index, token, tag->addUserData); } } @@ -241,7 +261,7 @@ void ARC_Parser_Parse(ARC_Parser *parser, ARC_String **data){ //set the lexer index to start and get the first tag uint32_t lexerIndex = 0; - ARC_ParserLanguageTag *startTag = parser->language.data; + ARC_ParserTag *startTag = parser->language.data; //TODO: handle error checks for if parsing fails //recursivly parse from the inital start tag diff --git a/src/std/parser/parserlang.c b/src/std/parser/parserlang.c index 781dd03..b786c82 100644 --- a/src/std/parser/parserlang.c +++ b/src/std/parser/parserlang.c @@ -2,8 +2,51 @@ #include "arc/std/lexer.h" #include "arc/std/parser.h" #include "arc/std/string.h" +#include "arc/std/vector.h" #include +#include +/* + -> NEWLINE | | NEWLINE | LAMBDA + -> WHITESPACE ARROW WHITESPACE + + -> WHITESPACE OR WHITESPACE | + -> WHITESPACE | + -> | + + -> ALPHA_UPPER_CHAR + -> | LAMBDA + -> ALPHA_UPPER_CHAR | UNDERSCORE + + -> LESS_THAN GREATER_THAN + -> | UNDERSCORE + -> | LAMBDA + -> | NUMBER | UNDERSCORE + -> ALPHA_LOWER_CHAR | ALPHA_UPPER_CHAR +*/ + +/* + * @brief +*/ +typedef struct ARC_ParserLangLineData { + ARC_Vector *body; +} ARC_ParserLangLineData; + +typedef struct ARC_ParserLangBodyData { + ARC_String *tagName; + ARC_Vector *arguments; +} ARC_ParserLangBodyData; + +typedef struct ARC_ParserLangArgumentData { + ARC_Vector *tagsOrConstants; +} ARC_ParserLangArgumentData; + +typedef struct ARC_ParserLangVectorStringData { + ARC_String *string; + ARC_Vector *vector; +} ARC_ParserLangVectorStringData; + +//private function to initalize the lexer rules for the language void ARC_ParserLang_InitLexerRulesFn(ARC_Lexer *lexer){ //null ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_PARSERLANG_TOKEN_NULL, 0)); @@ -35,6 +78,81 @@ void ARC_ParserLang_InitLexerRulesFn(ARC_Lexer *lexer){ ARC_String_Destroy(arrowString); } +void ARC_ParserLang_VectorDestroyParserTagFn(void *data){ + ARC_ParserTag *currentTag = (ARC_ParserTag *)data; + + //free the orIndex vlues + for(uint32_t orIndex = 0; orIndex < currentTag->tokensOrTagsSize; orIndex++){ + free(currentTag->tokensOrTags[orIndex]); + } + + if(currentTag->addDataFn != NULL){ + free(currentTag->addDataFn); + } + + //free the tokens or tags + free(currentTag->tokensOrTags); + + //free the tag itself + free(currentTag); +} + +//private function to create the saved data for the language +void ARC_ParserLang_CreateDataFn(void **data){ + //function callback to cleanup added tags + ARC_Vector_DestroyDataFn destroyParserTagFn = ARC_ParserLang_VectorDestroyParserTagFn; + + //I don't see a reason to have a comparison function right now. this might change in the future + ARC_Vector_Create((ARC_Vector **)data, NULL, &destroyParserTagFn); +} + +//private function to destroy the saved data for the language +void ARC_ParserLang_DestroyDataFn(void *data){ + ARC_Vector_Destroy(data); +} + +//private function to add char to constant name +void ARC_ParserLang_AddCharFn(void **data, uint32_t tagId, uint32_t tagIndex, ARC_LexerToken *token, void *userData){ + if(userData == NULL){ + return; + } + + //recast the addData to make it easier to use + ARC_String **variable = (ARC_String **)userData; + if(*variable == NULL){ + return; + } + + //create the const string if it is null + if(variable == NULL){ + //this will be freed in the main parser lang add + ARC_String_Create(variable, NULL, 0); + } + + ARC_String_Append(variable, token->data); +} + +//private function to get details from a constant +void ARC_ParserLang_AddFirstCharFn(void **data, uint32_t tagId, uint32_t tagIndex, ARC_LexerToken *token, void *userData){ + if(userData == NULL){ + return; + } + + //recast the addData to make it easier to use + ARC_ParserLangVectorStringData *vectorStringData = (ARC_ParserLangVectorStringData *)userData; + + //add the first character to the temp const + ARC_String *tokenData = NULL; + ARC_String_Copy(&tokenData, token->data); + ARC_String_Append(&tokenData, vectorStringData->string); + + //cleanup the string as it will be added to the vector + ARC_String_Destroy(vectorStringData->string); + vectorStringData->string = NULL; + + ARC_Vector_Add(vectorStringData->vector, tokenData); +} + void ARC_Parser_CreateAsParserLang(ARC_Parser **parser){ // -> NEWLINE | | NEWLINE | LAMBDA uint32_t *line[] = { (uint32_t[]){ 3, ARC_PARSERLANG_BODY, ARC_PARSERLANG_TOKEN_NEWLINE_ID, ARC_PARSERLANG_LINE }, (uint32_t[]){ 1, ARC_PARSERLANG_BODY }, (uint32_t[]){ 2, ARC_PARSERLANG_TOKEN_NEWLINE_ID, ARC_PARSERLANG_LINE }, (uint32_t[]){ 1, ARC_PARSERLANG_LAMBDA } }; @@ -75,7 +193,7 @@ void ARC_Parser_CreateAsParserLang(ARC_Parser **parser){ // -> ALPHA_LOWER_CHAR | ALPHA_UPPER_CHAR uint32_t *alphaChar[] = { (uint32_t[]){ 1, ARC_PARSERLANG_TOKEN_ALPHA_LOWER_CHAR }, (uint32_t[]){ 1, ARC_PARSERLANG_TOKEN_ALPHA_UPPER_CHAR }}; - ARC_ParserLanguageTag parserLangTags[13] = { + ARC_ParserTag parserLangTags[13] = { { ARC_PARSERLANG_LINE , line , 4, NULL, NULL }, { ARC_PARSERLANG_BODY , body , 1, NULL, NULL }, { ARC_PARSERLANG_ARGUMENTS , arguments , 2, NULL, NULL }, @@ -96,6 +214,9 @@ void ARC_Parser_CreateAsParserLang(ARC_Parser **parser){ parserLangTags //data }; + ARC_ParserData_CreateFn createDataFn = ARC_ParserLang_CreateDataFn; + ARC_ParserData_DestroyFn destroyDataFn = ARC_ParserLang_DestroyDataFn; + //TODO: add the create, destroy, and add callbacks - ARC_Parser_Create(parser, &parserLanguageArray, ARC_ParserLang_InitLexerRulesFn, NULL, NULL); + ARC_Parser_Create(parser, &parserLanguageArray, ARC_ParserLang_InitLexerRulesFn, &createDataFn, &destroyDataFn); } diff --git a/temp_parser.txt b/temp_parser.txt index c444168..1e326d4 100644 --- a/temp_parser.txt +++ b/temp_parser.txt @@ -66,3 +66,5 @@ defineIntLine │ └───────────────── └─────────────────── + + diff --git a/tests/std/lexer.c b/tests/std/lexer.c index 03f4e2f..8468abb 100644 --- a/tests/std/lexer.c +++ b/tests/std/lexer.c @@ -94,3 +94,33 @@ ARC_TEST(Lexer_Check_Id_Unordered_Not_Continious){ ARC_Lexer_Destroy(lexer); } + +ARC_TEST(Lexer_Check_Continious){ + ARC_Lexer *lexer; + ARC_Lexer_Create(&lexer); + + ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(2, ':')); + ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(0, 0 )); + ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(3, ':')); + ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(1, ':')); + ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(4, ':')); + + ARC_CHECK(ARC_Lexer_IsContinious(lexer) == ARC_True); + + ARC_Lexer_Destroy(lexer); +} + +ARC_TEST(Lexer_Check_Not_Continious){ + ARC_Lexer *lexer; + ARC_Lexer_Create(&lexer); + + ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(2, ':')); + ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(8, 0 )); + ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(3, ':')); + ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(1, ':')); + ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(4, ':')); + + ARC_CHECK(ARC_Lexer_IsContinious(lexer) == ARC_False); + + ARC_Lexer_Destroy(lexer); +} diff --git a/tests/std/parser.c b/tests/std/parser.c index 63ea4d7..35a1631 100644 --- a/tests/std/parser.c +++ b/tests/std/parser.c @@ -2,6 +2,7 @@ #include "arc/std/errno.h" #include "arc/std/parser.h" #include "arc/std/lexer.h" +#include "arc/std/vector.h" #include "arc/std/parser/parserlang.h" #include @@ -12,16 +13,16 @@ #define VARIABLE_NAME 24 #define VARIABLE 25 -void TEST_ParserLanguageTag_CreateStringFn(void **data){ +void TEST_ParserData_CreateStringFn(void **data){ ARC_String_Create((ARC_String **)data, NULL, 0); } -void TEST_ParserLanguageTag_DestroyStringFn(void *data){ +void TEST_ParserData_DestroyStringFn(void *data){ ARC_String_Destroy((ARC_String *)data); } //for this very basic example, the tagId does not matter -void TEST_ParserLanguageTag_AddFirstCharFn(void **data, uint32_t tagId, ARC_LexerToken *token, void *userData){ +void TEST_ParserTag_AddFirstCharFn(void **data, uint32_t tagId, uint32_t tagIndex, ARC_LexerToken *token, void *userData){ if(*data == NULL){ return; } @@ -34,7 +35,7 @@ void TEST_ParserLanguageTag_AddFirstCharFn(void **data, uint32_t tagId, ARC_Lexe } //for this very basic example, the tagId does not matter -void TEST_ParserLanguageTag_AddCharFn(void **data, uint32_t tagId, ARC_LexerToken *token, void *userData){ +void TEST_ParserTag_AddCharFn(void **data, uint32_t tagId, uint32_t tagIndex, ARC_LexerToken *token, void *userData){ if(*data == NULL){ return; } @@ -47,13 +48,13 @@ uint32_t *variableNameTags[] = { (uint32_t[]){ 2, CHAR_OR_NUM, VARIABLE_NAME uint32_t *variableTokensOrTags[] = { (uint32_t[]){ 2, CHAR, VARIABLE_NAME } }; //TODO: note how language function callbacks work, and how they use the parentData if createDataFn is NULL -ARC_ParserData_CreateFn createStringFn = TEST_ParserLanguageTag_CreateStringFn; -ARC_ParserData_DestroyFn destroyStringFn = TEST_ParserLanguageTag_DestroyStringFn; +ARC_ParserData_CreateFn createStringFn = TEST_ParserData_CreateStringFn; +ARC_ParserData_DestroyFn destroyStringFn = TEST_ParserData_DestroyStringFn; -ARC_ParserLanguageTag_AddDataFn addCharFn = TEST_ParserLanguageTag_AddCharFn; -ARC_ParserLanguageTag_AddDataFn addFirstCharFn = TEST_ParserLanguageTag_AddFirstCharFn; +ARC_ParserTag_AddDataFn addCharFn = TEST_ParserTag_AddCharFn; +ARC_ParserTag_AddDataFn addFirstCharFn = TEST_ParserTag_AddFirstCharFn; -ARC_ParserLanguageTag testTags[3] = { +ARC_ParserTag testTags[3] = { { VARIABLE, //tagId variableTokensOrTags, //tokensOrTags @@ -213,7 +214,6 @@ ARC_TEST(Parser_Basic_GetParsedValue){ ARC_Parser_Destroy(parser); } - ARC_TEST(Parser_ParserLang_BasicTest){ ARC_Parser *parser; ARC_Parser_CreateAsParserLang(&parser); @@ -229,3 +229,26 @@ ARC_TEST(Parser_ParserLang_BasicTest){ ARC_CHECK(arc_errno == 0); } +ARC_TEST(Parser_ParserLang_BasicVector){ + ARC_Vector *testLanguage; + ARC_Vector_Create(&testLanguage, NULL, NULL); + + ARC_Vector_Add(testLanguage, testTags + 0); + ARC_Vector_Add(testLanguage, testTags + 1); + ARC_Vector_Add(testLanguage, testTags + 2); + + ARC_Parser *parser; + ARC_Parser_CreateFromVector(&parser, testLanguage, TEST_Parser_InitLexerRulesFn, NULL, NULL); + + ARC_String *tempString; + ARC_String_CreateWithStrlen(&tempString, "variablename"); + + //this destroys string, so no need for cleanup + ARC_Parser_Parse(parser, &tempString); + + //cleanup + ARC_Parser_Destroy(parser); + ARC_Vector_Destroy(testLanguage); + + ARC_CHECK(arc_errno == 0); +}