parser mostly fixed, still needs a lot more testing though, and need to store parsed values in a datatype

2024-11-01 04:39:45 -06:00 · 2024-11-01 04:39:45 -06:00 · 006b4c63f7
commit 006b4c63f7
parent 963b99c6bd
5 changed files with 49 additions and 13 deletions
--- a/include/arc/std/lexer.h
+++ b/include/arc/std/lexer.h
@ -99,6 +99,15 @@ void ARC_Lexer_LexString(ARC_Lexer *lexer, ARC_String **data);
 */
 void ARC_Lexer_LexFile(ARC_Lexer *lexer, ARC_String *path);

+/**
+ * @brief prints rule id and hex of the function name
+ *
+ * @note this is mostly used for debugging
+ *
+ * @param[in] lexer the lexer to print rules from
+*/
+void ARC_Lexer_PrintTokenRules(ARC_Lexer *lexer);
+
 /**
 * @brief gets a token at a given index from a lexer
 *
--- a/include/arc/std/parser.h
+++ b/include/arc/std/parser.h
@ -90,6 +90,11 @@ void ARC_Parser_Parse(ARC_Parser *parser, ARC_String **data);
 */
 void ARC_Parser_ParseFile(ARC_Parser *parser, ARC_String *path);

+/**
+ * @brief basic tag for letting the parser know it is ok to end
+*/
+#define ARC_PARSER_TAG_LAMBDA 0
+
 #ifdef __cplusplus
 }
 #endif
--- a/src/std/lexer.c
+++ b/src/std/lexer.c
@ -257,6 +257,13 @@ void ARC_Lexer_LexFile(ARC_Lexer *lexer, ARC_String *path){
    }
 }

+void ARC_Lexer_PrintTokenRules(ARC_Lexer *lexer){
+    for(uint32_t index = 0; index < ARC_Vector_GetSize(lexer->tokenRules); index++){
+        ARC_LexerTokenRule *tokenRule = ARC_Vector_Get(lexer->tokenRules, index);
+        printf("Rule: %02i\tFunction: %p\n", tokenRule->id, tokenRule->automataFn);
+    }
+}
+
 ARC_LexerToken ARC_Lexer_GetToken(ARC_Lexer *lexer, uint32_t index){
    //get the token and log if there is an error
    ARC_LexerToken *token = ARC_Vector_Get(lexer->tokens, index);
--- a/src/std/parser.c
+++ b/src/std/parser.c
@ -72,19 +72,29 @@ ARC_Bool ARC_Parser_ParseTag(ARC_Parser *parser, uint32_t *lexerIndex, uint32_t
        ARC_Bool foundRule = ARC_True;
        for(uint32_t tokenOrTagIndex = 1; tokenOrTagIndex < tag->tokensOrTags[orIndex][0] + 1; tokenOrTagIndex++){
            //check if it is lambda (can return safely)
-            if(tag->tokensOrTags[orIndex][tokenOrTagIndex] == 0){
-                return ARC_True;
+            if(tag->tokensOrTags[orIndex][tokenOrTagIndex] == ARC_PARSER_TAG_LAMBDA){
+                break;
            }

            //if the value isn't a token it is a tag, so recurs if it isn't a token
            ARC_Bool isToken = ARC_Lexer_IsTokenId(parser->lexer, tag->tokensOrTags[orIndex][tokenOrTagIndex]);
-            uint32_t nextTagId = tag->tokensOrTags[orIndex][tokenOrTagIndex];
            if(isToken == ARC_False){
                //check if the tag works if not break to continue checking next or
-                ARC_Bool tagFound = ARC_Parser_ParseTag(parser, lexerIndex, nextTagId);
-                if(tagFound == ARC_False){
+                uint32_t nextTagId = tag->tokensOrTags[orIndex][tokenOrTagIndex];
+                foundRule = ARC_Parser_ParseTag(parser, lexerIndex, nextTagId);
+                if(foundRule == ARC_False){
                    break;
                }
+
+                //this will probably never be called as lambda is usually the last instruction, but just in case we can continue instead of break
+                continue;
+            }
+
+            //check if there is another token that can be used
+            if(lexerCheckIndex >= ARC_Lexer_GetTokensSize(parser->lexer)){
+                //out of tokens to the current or does not work, so break
+                foundRule = ARC_False;
+                break;
            }

            //get the next token in the lexer and increment the lexers index
@ -106,9 +116,7 @@ ARC_Bool ARC_Parser_ParseTag(ARC_Parser *parser, uint32_t *lexerIndex, uint32_t
        }
    }

-    //no rule was found, so set an error and log
-    arc_errno = ARC_ERRNO_DATA;
-    ARC_DEBUG_LOG_ERROR_WITH_VARIABLES("ARC_Parser_ParseTag(parser, lexerIndex, tagId), tag id: %u could not find a matching rule at token index %u", tagId, *lexerIndex);
+    //no rule was found, so return false
    return ARC_False;
 }

@ -131,11 +139,14 @@ void ARC_Parser_Parse(ARC_Parser *parser, ARC_String **data){
    uint32_t lexerIndex = 0;
    ARC_ParserLanguageTag *startTag = parser->language.data;

+    //TODO: handle error checks for if parsing fails
    //recursivly parse from the inital start tag
-    ARC_Parser_ParseTag(parser, &lexerIndex, startTag->tagId);
+    ARC_Bool parsed = ARC_Parser_ParseTag(parser, &lexerIndex, startTag->tagId);
+    ARC_Bool allTokensParsed = lexerIndex == ARC_Lexer_GetTokensSize(parser->lexer);
    ARC_Lexer_Clear(parser->lexer);
-    if(arc_errno){
-        ARC_DEBUG_LOG_ERROR("ARC_Parser_Parse(parser, data), could not parse the given data");
+    if(parsed == ARC_False || allTokensParsed == ARC_False || arc_errno){
+        arc_errno = ARC_ERRNO_DATA;
+        ARC_DEBUG_LOG_ERROR_WITH_VARIABLES("ARC_Parser_Parse(parser, data), could not parse the given data at lexer index: %u", lexerIndex);
        return;
    }
 }
--- a/tests/std/parser.c
+++ b/tests/std/parser.c
@ -3,7 +3,7 @@
 #include "arc/std/parser.h"

 //TODO: fix lambda
-#define LAMBDA        0
+#define LAMBDA        ARC_PARSER_TAG_LAMBDA
 #define CHAR          ARC_LEXER_TOKEN_ALPHALOWERCHAR
 #define NUM           ARC_LEXER_TOKEN_NUMBER
 #define CHAR_OR_NUM   23
@ -82,7 +82,8 @@ ARC_TEST(Parser_Basic_Parse){
    ARC_Parser_Create(&parser, &languageArray, TEST_Parser_InitLexerRulesFn);

    ARC_String *tempString;
-    ARC_String_CreateWithStrlen(&tempString, "myVar1");
+    //ARC_String_CreateWithStrlen(&tempString, "myvar1");
+    ARC_String_CreateWithStrlen(&tempString, "m");

    //this destroys string, so no need for cleanup
    ARC_Parser_Parse(parser, &tempString);
@ -119,6 +120,9 @@ ARC_TEST(Parser_Basic_ParseError){
        testTags //data
    };

+    //TODO: remove this
+    arc_errno = 0;
+
    ARC_Parser_Create(&parser, &languageArray, TEST_Parser_InitLexerRulesFn);

    ARC_String *tempString;