From 5a5eaabc144e9f873cc84dc7754d8f5bbdc93889 Mon Sep 17 00:00:00 2001 From: herbglitch Date: Tue, 3 Dec 2024 18:21:28 -0700 Subject: [PATCH] parserlang now working --- CMakeLists.txt | 2 +- include/arc/std/parser/parserlang.h | 2 +- src/std/parser.c | 18 +- src/std/parser/parserlang.c | 76 +++++--- tests/std/parser.c | 282 +++++++++++++++------------- 5 files changed, 214 insertions(+), 166 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ffa3940..2768a14 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -129,7 +129,7 @@ if(ARCHEUS_STD_TESTS) tests/test.c #tests/std/vector.c - tests/std/lexer.c + #tests/std/lexer.c tests/std/parser.c ${ARCHEUS_STD_SOURCES} diff --git a/include/arc/std/parser/parserlang.h b/include/arc/std/parser/parserlang.h index 61cee9c..bc28d00 100644 --- a/include/arc/std/parser/parserlang.h +++ b/include/arc/std/parser/parserlang.h @@ -11,7 +11,7 @@ extern "C" { -> NEWLINE | | NEWLINE | LAMBDA -> WHITESPACE ARROW WHITESPACE - -> WHITESPACE OR WHITESPACE | + -> WHITESPACE OR WHITESPACE | -> WHITESPACE | -> | diff --git a/src/std/parser.c b/src/std/parser.c index 54b65b9..c04e9d2 100644 --- a/src/std/parser.c +++ b/src/std/parser.c @@ -300,21 +300,23 @@ void ARC_Parser_Parse(ARC_Parser *parser, ARC_String **data){ ARC_Bool parsed = ARC_Parser_ParseTag(parser, tagToken, &lexerIndex); ARC_Bool allTokensParsed = lexerIndex == ARC_Lexer_GetTokensSize(parser->lexer); + //error if anything went wrong + if(parsed == ARC_False || allTokensParsed == ARC_False || arc_errno){ + ARC_Lexer_Clear(parser->lexer); + + arc_errno = ARC_ERRNO_DATA; + ARC_DEBUG_LOG_ERROR_WITH_VARIABLES("ARC_Parser_Parse(parser, data), could not parse the given data at lexer index: %u", lexerIndex); + return; + } + //create the data if the creation callback exists if(parser->createDataFn != NULL){ (*(parser->createDataFn))(&(parser->data), tagToken, parser->userData); } //cleanup - ARC_ParserTagToken_Destroy(tagToken); ARC_Lexer_Clear(parser->lexer); - - //error if anything went wrong - if(parsed == ARC_False || allTokensParsed == ARC_False || arc_errno){ - arc_errno = ARC_ERRNO_DATA; - ARC_DEBUG_LOG_ERROR_WITH_VARIABLES("ARC_Parser_Parse(parser, data), could not parse the given data at lexer index: %u", lexerIndex); - return; - } + ARC_ParserTagToken_Destroy(tagToken); } void ARC_Parser_ParseFile(ARC_Parser *parser, ARC_String *path){ diff --git a/src/std/parser/parserlang.c b/src/std/parser/parserlang.c index 6ca8dab..9ca24a2 100644 --- a/src/std/parser/parserlang.c +++ b/src/std/parser/parserlang.c @@ -80,6 +80,12 @@ void ARC_ParserLangParsedData_RecurseStringAdd(ARC_String **data, ARC_ParserTagT return; } + if(tagToken->token->rule == ARC_PARSERLANG_TOKEN_UNDERSCORE_ID){ + char tokenChar = ARC_PARSERLANG_TOKEN_UNDERSCORE_CHAR; + ARC_String_AppendCString(data, &tokenChar, 1); + return; + } + ARC_String_Append(data, tagToken->token->data); return; } @@ -122,6 +128,9 @@ void ARC_ParserLangParsedData_GetArgumentTag(ARC_Vector *orTokensOrTags, ARC_Par continue; case ARC_PARSERLANG_TAG_OR_CONSTANT: + //initialize the string to use + ARC_String_Create(&tagOrConstantString, NULL, 0); + //get the id of the tag/constant ARC_ParserLangParsedData_RecurseStringAdd(&tagOrConstantString, childTagToken); id = (uint32_t *)malloc(sizeof(uint32_t)); @@ -142,24 +151,25 @@ void ARC_ParserLangParsedData_GetArgumentTag(ARC_Vector *orTokensOrTags, ARC_Par } /* - -> WHITESPACE OR WHITESPACE | + -> WHITESPACE OR WHITESPACE | */ void ARC_ParserLangParsedData_GetArgumentsTag(ARC_Vector *tokensOrTags, ARC_ParserTagToken *tagToken, ARC_ParserLang_GetIdFn *getIdFn){ for(uint32_t index = 0; index < ARC_Vector_GetSize(tagToken->tagTokens); index++){ ARC_ParserTagToken *childTagToken = (ARC_ParserTagToken *)ARC_Vector_Get(tagToken->tagTokens, index); //switch variables - ARC_String *tagOrConstantString; - ARC_String_Create(&tagOrConstantString, NULL, 0); - - uint32_t *id = NULL; - uint32_t tokensOrTagsIndex = 0; - ARC_Vector *orTokensOrTags = NULL; + uint32_t tokensOrTagsIndex = 0; + ARC_Vector *orTokensOrTags = NULL; ARC_Vector_DestroyDataFn destroyUint32Fn = ARC_ParserLang_VectorDestroyUInt32; switch(childTagToken->id){ case ARC_PARSERLANG_ARGUMENT: + //get the last vector within tokens or tags to add the tag/constant to + tokensOrTagsIndex = ARC_Vector_GetSize(tokensOrTags); + orTokensOrTags = (ARC_Vector *)ARC_Vector_Get(tokensOrTags, tokensOrTagsIndex - 1); + + ARC_ParserLangParsedData_GetArgumentTag(orTokensOrTags, childTagToken, getIdFn); continue; case ARC_PARSERLANG_TOKEN_OR_ID: @@ -175,23 +185,6 @@ void ARC_ParserLangParsedData_GetArgumentsTag(ARC_Vector *tokensOrTags, ARC_Pars ARC_ParserLangParsedData_GetArgumentsTag(tokensOrTags, childTagToken, getIdFn); continue; - case ARC_PARSERLANG_TAG_OR_CONSTANT: - //get the id of the tag/constant - ARC_ParserLangParsedData_RecurseStringAdd(&tagOrConstantString, childTagToken); - id = (uint32_t *)malloc(sizeof(uint32_t)); - *id = (*getIdFn)(tagOrConstantString); - - //get the last vector within tokens or tags to add the tag/constant to - tokensOrTagsIndex = ARC_Vector_GetSize(tokensOrTags); - orTokensOrTags = (ARC_Vector *)ARC_Vector_Get(tokensOrTags, tokensOrTagsIndex - 1); - - //add the id to the last or vector - ARC_Vector_Add(orTokensOrTags, (void *)id); - - //cleanup - ARC_String_Destroy(tagOrConstantString); - continue; - default: //this should only be whitespace continue; @@ -235,7 +228,28 @@ void ARC_ParserLangParsedData_CreateBodyTag(ARC_ParserTag **tag, ARC_ParserTagTo ARC_ParserTagToken *argumentsToken = (ARC_ParserTagToken *)ARC_Vector_Get(tagToken->tagTokens, 4); ARC_ParserLangParsedData_GetArgumentsTag(tokensOrTags, argumentsToken, getIdFn); - //TODO: move the data from the vector to an array + //initialize the tokens or tags array to the needed size + bodyTag->tokensOrTagsSize = ARC_Vector_GetSize(tokensOrTags); + bodyTag->tokensOrTags = (uint32_t **)malloc(sizeof(uint32_t *) * bodyTag->tokensOrTagsSize); + + //copy each or section into the tokensOrTags + for(uint32_t orIndex = 0; orIndex < bodyTag->tokensOrTagsSize; orIndex++){ + //get the current or vector and its tags + orTokensOrTags = (ARC_Vector *)ARC_Vector_Get(tokensOrTags, orIndex); + uint32_t orTokensOrTagsSize = ARC_Vector_GetSize(orTokensOrTags); + + //create the or array with one extra space and store the size in that space + bodyTag->tokensOrTags[orIndex] = (uint32_t *)malloc(sizeof(uint32_t) * (orTokensOrTagsSize + 1)); + bodyTag->tokensOrTags[orIndex][0] = orTokensOrTagsSize; + + //copy the or data to the tokensOrTags + for(uint32_t tokenOrTagIndex = 0; tokenOrTagIndex < orTokensOrTagsSize; tokenOrTagIndex++){ + bodyTag->tokensOrTags[orIndex][tokenOrTagIndex + 1] = *(uint32_t *)ARC_Vector_Get(orTokensOrTags, tokenOrTagIndex); + } + } + + //set the tag to the bodyTag + *tag = bodyTag; //cleanup ARC_Vector_Destroy(tokensOrTags); @@ -258,6 +272,7 @@ void ARC_ParserLangParsedData_RunLineTag(ARC_Vector *tags, ARC_ParserTagToken *t //get a tag case ARC_PARSERLANG_BODY: ARC_ParserLangParsedData_CreateBodyTag(&tag, childTagToken, getIdFn); + ARC_Vector_Add(tags, (void *)tag); continue; default: @@ -289,7 +304,7 @@ void ARC_ParserLang_CreateDataFn(void **data, ARC_ParserTagToken *parsedData, vo } //load the language into a vector recursivly - ARC_ParserLangParsedData_RunLineTag((ARC_Vector *)data, parsedData, getIdFn); + ARC_ParserLangParsedData_RunLineTag(*((ARC_Vector **)data), parsedData, getIdFn); } //private function to destroy the saved data for the language @@ -297,7 +312,10 @@ void ARC_ParserLang_DestroyDataFn(void *data, void *userData){ ARC_ParserLang_GetIdFn *getIdFn = (ARC_ParserLang_GetIdFn *)userData; free(getIdFn); - ARC_Vector_Destroy((ARC_Vector *)data); + //check if there is data to free + if((ARC_Vector *)data != NULL){ + ARC_Vector_Destroy((ARC_Vector *)data); + } } void ARC_Parser_CreateAsParserLang(ARC_Parser **parser, ARC_ParserLang_GetIdFn getIdFn){ @@ -307,8 +325,8 @@ void ARC_Parser_CreateAsParserLang(ARC_Parser **parser, ARC_ParserLang_GetIdFn g // -> WHITESPACE ARROW WHITESPACE uint32_t *body[] = { (uint32_t[]){ 5, ARC_PARSERLANG_TAG, ARC_PARSERLANG_TOKEN_WHITESPACE, ARC_PARSERLANG_TOKEN_ARROW_ID, ARC_PARSERLANG_TOKEN_WHITESPACE, ARC_PARSERLANG_ARGUMENTS } }; - // -> WHITESPACE OR WHITESPACE | - uint32_t *arguments[] = { (uint32_t[]){ 5, ARC_PARSERLANG_ARGUMENT, ARC_PARSERLANG_TOKEN_WHITESPACE, ARC_PARSERLANG_TOKEN_OR_ID, ARC_PARSERLANG_TOKEN_WHITESPACE, ARC_PARSERLANG_ARGUMENTS }, (uint32_t[]){ 1, ARC_PARSERLANG_TAG_OR_CONSTANT } }; + // -> WHITESPACE OR WHITESPACE | + uint32_t *arguments[] = { (uint32_t[]){ 5, ARC_PARSERLANG_ARGUMENT, ARC_PARSERLANG_TOKEN_WHITESPACE, ARC_PARSERLANG_TOKEN_OR_ID, ARC_PARSERLANG_TOKEN_WHITESPACE, ARC_PARSERLANG_ARGUMENTS }, (uint32_t[]){ 1, ARC_PARSERLANG_ARGUMENT } }; // -> WHITESPACE | uint32_t *argument[] = { (uint32_t[]){ 3, ARC_PARSERLANG_TAG_OR_CONSTANT, ARC_PARSERLANG_TOKEN_WHITESPACE, ARC_PARSERLANG_ARGUMENT }, (uint32_t[]){ 1, ARC_PARSERLANG_TAG_OR_CONSTANT } }; diff --git a/tests/std/parser.c b/tests/std/parser.c index 43a1cfe..78a976f 100644 --- a/tests/std/parser.c +++ b/tests/std/parser.c @@ -13,6 +13,11 @@ #define VARIABLE_NAME 24 #define VARIABLE 25 +const uint32_t TEST_ALPHA_LAMBDA = 0; +const uint32_t TEST_ALPHA_LOWER_CHAR = 1; +const uint32_t TEST_ALPHA_UPPER_CHAR = 2; +const uint32_t TEST_ALPHA_CHAR = 3; + void TEST_ParserData_RecurseStringAdd(ARC_String **data, ARC_ParserTagToken *tagToken){ if(tagToken->token != NULL){ ARC_String_Append(data, tagToken->token->data); @@ -75,133 +80,145 @@ void TEST_Parser_InitLexerRulesFn(ARC_Lexer *lexer){ ARC_Lexer_InitBasicTokenRules(lexer); } -ARC_TEST(Parser_Init){ - ARC_Parser *parser; - ARC_Parser_Create(&parser, &languageArray, TEST_Parser_InitLexerRulesFn, NULL, NULL, NULL); - - ARC_Parser_Destroy(parser); - - ARC_CHECK(arc_errno == 0); -} - -ARC_TEST(Parser_Basic_Parse){ - ARC_Parser *parser; - ARC_Parser_Create(&parser, &languageArray, TEST_Parser_InitLexerRulesFn, NULL, NULL, NULL); - - ARC_String *tempString; - - - /* ~ first test ~ */ - ARC_String_CreateWithStrlen(&tempString, "myvar1"); - - //this destroys string, so no need for cleanup - ARC_Parser_Parse(parser, &tempString); - - ARC_CHECK(arc_errno == 0); - - - /* ~ second test ~ */ - ARC_String_CreateWithStrlen(&tempString, "z1xwvq"); - //this destroys string, so no need for cleanup - ARC_Parser_Parse(parser, &tempString); - - - ARC_CHECK(arc_errno == 0); - - - /* ~ third test ~ */ - ARC_String_CreateWithStrlen(&tempString, "z1234"); - - //this destroys string, so no need for cleanup - ARC_Parser_Parse(parser, &tempString); - - ARC_CHECK(arc_errno == 0); - - - /* ~ fourth test ~ */ - ARC_String_CreateWithStrlen(&tempString, "aaaaa"); - - //this destroys string, so no need for cleanup - ARC_Parser_Parse(parser, &tempString); - - ARC_CHECK(arc_errno == 0); - - - /* ~ cleanup ~ */ - ARC_Parser_Destroy(parser); -} - -ARC_TEST(Parser_Basic_ParseError){ - ARC_Parser *parser; - ARC_Parser_Create(&parser, &languageArray, TEST_Parser_InitLexerRulesFn, NULL, NULL, NULL); - - ARC_String *tempString; - - - /* ~ first test ~ */ - ARC_String_CreateWithStrlen(&tempString, "!myVar1"); - - //this destroys string, so no need for cleanup - ARC_Parser_Parse(parser, &tempString); - - ARC_CHECK(arc_errno == ARC_ERRNO_DATA); - - - /* ~ second test ~ */ - //check again with moved character - arc_errno = 0; - ARC_String_CreateWithStrlen(&tempString, "my!Var1"); - - //this destroys string, so no need for cleanup - ARC_Parser_Parse(parser, &tempString); - - ARC_CHECK(arc_errno == ARC_ERRNO_DATA); - - - /* ~ third test ~ */ - //check again with moved character - arc_errno = 0; - ARC_String_CreateWithStrlen(&tempString, "myVar1!"); - - //this destroys string, so no need for cleanup - ARC_Parser_Parse(parser, &tempString); - - ARC_CHECK(arc_errno == ARC_ERRNO_DATA); - - - /* ~ cleanup ~ */ - ARC_Parser_Destroy(parser); - - //reset for next test - arc_errno = 0; -} - -ARC_TEST(Parser_Basic_GetParsedValue){ - ARC_Parser *parser; - ARC_Parser_Create(&parser, &languageArray, TEST_Parser_InitLexerRulesFn, &createStringFn, &destroyStringFn, NULL); - - ARC_String *tempString; - - - /* ~ first test ~ */ - ARC_String_CreateWithStrlen(&tempString, "myvar1"); - - //this destroys string, so no need for cleanup - ARC_Parser_Parse(parser, &tempString); - - ARC_CHECK(arc_errno == 0); - - ARC_String *checkValue = (ARC_String *)ARC_Parser_GetData(parser); - ARC_CHECK(ARC_String_EqualsCStringWithStrlen(checkValue, "myvar1")); - - - /* ~ cleanup ~ */ - ARC_Parser_Destroy(parser); -} +//ARC_TEST(Parser_Init){ +// ARC_Parser *parser; +// ARC_Parser_Create(&parser, &languageArray, TEST_Parser_InitLexerRulesFn, NULL, NULL, NULL); +// +// ARC_Parser_Destroy(parser); +// +// ARC_CHECK(arc_errno == 0); +//} +// +//ARC_TEST(Parser_Basic_Parse){ +// ARC_Parser *parser; +// ARC_Parser_Create(&parser, &languageArray, TEST_Parser_InitLexerRulesFn, NULL, NULL, NULL); +// +// ARC_String *tempString; +// +// +// /* ~ first test ~ */ +// ARC_String_CreateWithStrlen(&tempString, "myvar1"); +// +// //this destroys string, so no need for cleanup +// ARC_Parser_Parse(parser, &tempString); +// +// ARC_CHECK(arc_errno == 0); +// +// +// /* ~ second test ~ */ +// ARC_String_CreateWithStrlen(&tempString, "z1xwvq"); +// //this destroys string, so no need for cleanup +// ARC_Parser_Parse(parser, &tempString); +// +// +// ARC_CHECK(arc_errno == 0); +// +// +// /* ~ third test ~ */ +// ARC_String_CreateWithStrlen(&tempString, "z1234"); +// +// //this destroys string, so no need for cleanup +// ARC_Parser_Parse(parser, &tempString); +// +// ARC_CHECK(arc_errno == 0); +// +// +// /* ~ fourth test ~ */ +// ARC_String_CreateWithStrlen(&tempString, "aaaaa"); +// +// //this destroys string, so no need for cleanup +// ARC_Parser_Parse(parser, &tempString); +// +// ARC_CHECK(arc_errno == 0); +// +// +// /* ~ cleanup ~ */ +// ARC_Parser_Destroy(parser); +//} +// +//ARC_TEST(Parser_Basic_ParseError){ +// ARC_Parser *parser; +// ARC_Parser_Create(&parser, &languageArray, TEST_Parser_InitLexerRulesFn, NULL, NULL, NULL); +// +// ARC_String *tempString; +// +// +// /* ~ first test ~ */ +// ARC_String_CreateWithStrlen(&tempString, "!myVar1"); +// +// //this destroys string, so no need for cleanup +// ARC_Parser_Parse(parser, &tempString); +// +// ARC_CHECK(arc_errno == ARC_ERRNO_DATA); +// +// +// /* ~ second test ~ */ +// //check again with moved character +// arc_errno = 0; +// ARC_String_CreateWithStrlen(&tempString, "my!Var1"); +// +// //this destroys string, so no need for cleanup +// ARC_Parser_Parse(parser, &tempString); +// +// ARC_CHECK(arc_errno == ARC_ERRNO_DATA); +// +// +// /* ~ third test ~ */ +// //check again with moved character +// arc_errno = 0; +// ARC_String_CreateWithStrlen(&tempString, "myVar1!"); +// +// //this destroys string, so no need for cleanup +// ARC_Parser_Parse(parser, &tempString); +// +// ARC_CHECK(arc_errno == ARC_ERRNO_DATA); +// +// +// /* ~ cleanup ~ */ +// ARC_Parser_Destroy(parser); +// +// //reset for next test +// arc_errno = 0; +//} +// +//ARC_TEST(Parser_Basic_GetParsedValue){ +// ARC_Parser *parser; +// ARC_Parser_Create(&parser, &languageArray, TEST_Parser_InitLexerRulesFn, &createStringFn, &destroyStringFn, NULL); +// +// ARC_String *tempString; +// +// +// /* ~ first test ~ */ +// ARC_String_CreateWithStrlen(&tempString, "myvar1"); +// +// //this destroys string, so no need for cleanup +// ARC_Parser_Parse(parser, &tempString); +// +// ARC_CHECK(arc_errno == 0); +// +// ARC_String *checkValue = (ARC_String *)ARC_Parser_GetData(parser); +// ARC_CHECK(ARC_String_EqualsCStringWithStrlen(checkValue, "myvar1")); +// +// +// /* ~ cleanup ~ */ +// ARC_Parser_Destroy(parser); +//} /* ~ parserlang tests ~ */ -uint32_t TEST_ParserLang_GetIdFn(ARC_String *constant){ - printf("tag: %s\n", constant->data); +uint32_t TEST_ParserLang_GetIdFn(ARC_String *string){ + if(ARC_String_EqualsCStringWithStrlen(string, "")){ + return TEST_ALPHA_CHAR; + } + + if(ARC_String_EqualsCStringWithStrlen(string, "ALPHA_LOWER_CHAR")){ + return TEST_ALPHA_LOWER_CHAR; + } + + if(ARC_String_EqualsCStringWithStrlen(string, "ALPHA_UPPER_CHAR")){ + return TEST_ALPHA_UPPER_CHAR; + } + + printf("tag: %s\n", string->data); return 0; } @@ -210,14 +227,25 @@ ARC_TEST(Parser_ParserLang_BasicTest){ ARC_Parser_CreateAsParserLang(&parser, TEST_ParserLang_GetIdFn); ARC_String *tempString; - ARC_String_CreateWithStrlen(&tempString, " -> CHAR \n"); + ARC_String_CreateWithStrlen(&tempString, " -> ALPHA_LOWER_CHAR | ALPHA_UPPER_CHAR\n"); //this destroys string, so no need for cleanup ARC_Parser_Parse(parser, &tempString); + ARC_CHECK(arc_errno == 0); + + void *data = ARC_Parser_GetData(parser); + ARC_ParserTag *tag = ARC_Vector_Get((ARC_Vector *)data, 0); + + ARC_CHECK(tag->tagId == TEST_ALPHA_CHAR); + + ARC_CHECK(tag->tokensOrTagsSize == 2); + ARC_CHECK(tag->tokensOrTags[0][0] == 1); + ARC_CHECK(tag->tokensOrTags[0][1] == TEST_ALPHA_LOWER_CHAR); + ARC_CHECK(tag->tokensOrTags[1][0] == 1); + ARC_CHECK(tag->tokensOrTags[1][1] == TEST_ALPHA_UPPER_CHAR); ARC_Parser_Destroy(parser); - ARC_CHECK(arc_errno == 0); } //ARC_TEST(Parser_ParserLang_BasicVector){