diff --git a/include/arc/std/lexer.h b/include/arc/std/lexer.h index db46c3a..15ec6b4 100644 --- a/include/arc/std/lexer.h +++ b/include/arc/std/lexer.h @@ -99,6 +99,15 @@ void ARC_Lexer_LexString(ARC_Lexer *lexer, ARC_String **data); */ void ARC_Lexer_LexFile(ARC_Lexer *lexer, ARC_String *path); +/** + * @brief prints rule id and hex of the function name + * + * @note this is mostly used for debugging + * + * @param[in] lexer the lexer to print rules from +*/ +void ARC_Lexer_PrintTokenRules(ARC_Lexer *lexer); + /** * @brief gets a token at a given index from a lexer * diff --git a/include/arc/std/parser.h b/include/arc/std/parser.h index a78dd58..488bf1e 100644 --- a/include/arc/std/parser.h +++ b/include/arc/std/parser.h @@ -90,6 +90,11 @@ void ARC_Parser_Parse(ARC_Parser *parser, ARC_String **data); */ void ARC_Parser_ParseFile(ARC_Parser *parser, ARC_String *path); +/** + * @brief basic tag for letting the parser know it is ok to end +*/ +#define ARC_PARSER_TAG_LAMBDA 0 + #ifdef __cplusplus } #endif diff --git a/src/std/lexer.c b/src/std/lexer.c index d45dd13..77203f7 100644 --- a/src/std/lexer.c +++ b/src/std/lexer.c @@ -257,6 +257,13 @@ void ARC_Lexer_LexFile(ARC_Lexer *lexer, ARC_String *path){ } } +void ARC_Lexer_PrintTokenRules(ARC_Lexer *lexer){ + for(uint32_t index = 0; index < ARC_Vector_GetSize(lexer->tokenRules); index++){ + ARC_LexerTokenRule *tokenRule = ARC_Vector_Get(lexer->tokenRules, index); + printf("Rule: %02i\tFunction: %p\n", tokenRule->id, tokenRule->automataFn); + } +} + ARC_LexerToken ARC_Lexer_GetToken(ARC_Lexer *lexer, uint32_t index){ //get the token and log if there is an error ARC_LexerToken *token = ARC_Vector_Get(lexer->tokens, index); diff --git a/src/std/parser.c b/src/std/parser.c index 603eadf..6f38228 100644 --- a/src/std/parser.c +++ b/src/std/parser.c @@ -72,19 +72,29 @@ ARC_Bool ARC_Parser_ParseTag(ARC_Parser *parser, uint32_t *lexerIndex, uint32_t ARC_Bool foundRule = ARC_True; for(uint32_t tokenOrTagIndex = 1; tokenOrTagIndex < tag->tokensOrTags[orIndex][0] + 1; tokenOrTagIndex++){ //check if it is lambda (can return safely) - if(tag->tokensOrTags[orIndex][tokenOrTagIndex] == 0){ - return ARC_True; + if(tag->tokensOrTags[orIndex][tokenOrTagIndex] == ARC_PARSER_TAG_LAMBDA){ + break; } //if the value isn't a token it is a tag, so recurs if it isn't a token ARC_Bool isToken = ARC_Lexer_IsTokenId(parser->lexer, tag->tokensOrTags[orIndex][tokenOrTagIndex]); - uint32_t nextTagId = tag->tokensOrTags[orIndex][tokenOrTagIndex]; if(isToken == ARC_False){ //check if the tag works if not break to continue checking next or - ARC_Bool tagFound = ARC_Parser_ParseTag(parser, lexerIndex, nextTagId); - if(tagFound == ARC_False){ + uint32_t nextTagId = tag->tokensOrTags[orIndex][tokenOrTagIndex]; + foundRule = ARC_Parser_ParseTag(parser, lexerIndex, nextTagId); + if(foundRule == ARC_False){ break; } + + //this will probably never be called as lambda is usually the last instruction, but just in case we can continue instead of break + continue; + } + + //check if there is another token that can be used + if(lexerCheckIndex >= ARC_Lexer_GetTokensSize(parser->lexer)){ + //out of tokens to the current or does not work, so break + foundRule = ARC_False; + break; } //get the next token in the lexer and increment the lexers index @@ -106,9 +116,7 @@ ARC_Bool ARC_Parser_ParseTag(ARC_Parser *parser, uint32_t *lexerIndex, uint32_t } } - //no rule was found, so set an error and log - arc_errno = ARC_ERRNO_DATA; - ARC_DEBUG_LOG_ERROR_WITH_VARIABLES("ARC_Parser_ParseTag(parser, lexerIndex, tagId), tag id: %u could not find a matching rule at token index %u", tagId, *lexerIndex); + //no rule was found, so return false return ARC_False; } @@ -131,11 +139,14 @@ void ARC_Parser_Parse(ARC_Parser *parser, ARC_String **data){ uint32_t lexerIndex = 0; ARC_ParserLanguageTag *startTag = parser->language.data; + //TODO: handle error checks for if parsing fails //recursivly parse from the inital start tag - ARC_Parser_ParseTag(parser, &lexerIndex, startTag->tagId); + ARC_Bool parsed = ARC_Parser_ParseTag(parser, &lexerIndex, startTag->tagId); + ARC_Bool allTokensParsed = lexerIndex == ARC_Lexer_GetTokensSize(parser->lexer); ARC_Lexer_Clear(parser->lexer); - if(arc_errno){ - ARC_DEBUG_LOG_ERROR("ARC_Parser_Parse(parser, data), could not parse the given data"); + if(parsed == ARC_False || allTokensParsed == ARC_False || arc_errno){ + arc_errno = ARC_ERRNO_DATA; + ARC_DEBUG_LOG_ERROR_WITH_VARIABLES("ARC_Parser_Parse(parser, data), could not parse the given data at lexer index: %u", lexerIndex); return; } } diff --git a/tests/std/parser.c b/tests/std/parser.c index 1136e1e..c1edbfe 100644 --- a/tests/std/parser.c +++ b/tests/std/parser.c @@ -3,7 +3,7 @@ #include "arc/std/parser.h" //TODO: fix lambda -#define LAMBDA 0 +#define LAMBDA ARC_PARSER_TAG_LAMBDA #define CHAR ARC_LEXER_TOKEN_ALPHALOWERCHAR #define NUM ARC_LEXER_TOKEN_NUMBER #define CHAR_OR_NUM 23 @@ -82,7 +82,8 @@ ARC_TEST(Parser_Basic_Parse){ ARC_Parser_Create(&parser, &languageArray, TEST_Parser_InitLexerRulesFn); ARC_String *tempString; - ARC_String_CreateWithStrlen(&tempString, "myVar1"); + //ARC_String_CreateWithStrlen(&tempString, "myvar1"); + ARC_String_CreateWithStrlen(&tempString, "m"); //this destroys string, so no need for cleanup ARC_Parser_Parse(parser, &tempString); @@ -119,6 +120,9 @@ ARC_TEST(Parser_Basic_ParseError){ testTags //data }; + //TODO: remove this + arc_errno = 0; + ARC_Parser_Create(&parser, &languageArray, TEST_Parser_InitLexerRulesFn); ARC_String *tempString;