parser mostly fixed, still needs a lot more testing though, and need to store parsed values in a datatype

2024-11-01 04:39:45 -06:00 · 2024-11-01 04:39:45 -06:00 · 006b4c63f7
commit 006b4c63f7
parent 963b99c6bd
5 changed files with 49 additions and 13 deletions
--- a/src/std/lexer.c
+++ b/src/std/lexer.c
@ -257,6 +257,13 @@ void ARC_Lexer_LexFile(ARC_Lexer *lexer, ARC_String *path){
    }
 }

+void ARC_Lexer_PrintTokenRules(ARC_Lexer *lexer){
+    for(uint32_t index = 0; index < ARC_Vector_GetSize(lexer->tokenRules); index++){
+        ARC_LexerTokenRule *tokenRule = ARC_Vector_Get(lexer->tokenRules, index);
+        printf("Rule: %02i\tFunction: %p\n", tokenRule->id, tokenRule->automataFn);
+    }
+}
+
 ARC_LexerToken ARC_Lexer_GetToken(ARC_Lexer *lexer, uint32_t index){
    //get the token and log if there is an error
    ARC_LexerToken *token = ARC_Vector_Get(lexer->tokens, index);
--- a/src/std/parser.c
+++ b/src/std/parser.c
@ -72,19 +72,29 @@ ARC_Bool ARC_Parser_ParseTag(ARC_Parser *parser, uint32_t *lexerIndex, uint32_t
        ARC_Bool foundRule = ARC_True;
        for(uint32_t tokenOrTagIndex = 1; tokenOrTagIndex < tag->tokensOrTags[orIndex][0] + 1; tokenOrTagIndex++){
            //check if it is lambda (can return safely)
-            if(tag->tokensOrTags[orIndex][tokenOrTagIndex] == 0){
-                return ARC_True;
+            if(tag->tokensOrTags[orIndex][tokenOrTagIndex] == ARC_PARSER_TAG_LAMBDA){
+                break;
            }

            //if the value isn't a token it is a tag, so recurs if it isn't a token
            ARC_Bool isToken = ARC_Lexer_IsTokenId(parser->lexer, tag->tokensOrTags[orIndex][tokenOrTagIndex]);
-            uint32_t nextTagId = tag->tokensOrTags[orIndex][tokenOrTagIndex];
            if(isToken == ARC_False){
                //check if the tag works if not break to continue checking next or
-                ARC_Bool tagFound = ARC_Parser_ParseTag(parser, lexerIndex, nextTagId);
-                if(tagFound == ARC_False){
+                uint32_t nextTagId = tag->tokensOrTags[orIndex][tokenOrTagIndex];
+                foundRule = ARC_Parser_ParseTag(parser, lexerIndex, nextTagId);
+                if(foundRule == ARC_False){
                    break;
                }
+
+                //this will probably never be called as lambda is usually the last instruction, but just in case we can continue instead of break
+                continue;
+            }
+
+            //check if there is another token that can be used
+            if(lexerCheckIndex >= ARC_Lexer_GetTokensSize(parser->lexer)){
+                //out of tokens to the current or does not work, so break
+                foundRule = ARC_False;
+                break;
            }

            //get the next token in the lexer and increment the lexers index
@ -106,9 +116,7 @@ ARC_Bool ARC_Parser_ParseTag(ARC_Parser *parser, uint32_t *lexerIndex, uint32_t
        }
    }

-    //no rule was found, so set an error and log
-    arc_errno = ARC_ERRNO_DATA;
-    ARC_DEBUG_LOG_ERROR_WITH_VARIABLES("ARC_Parser_ParseTag(parser, lexerIndex, tagId), tag id: %u could not find a matching rule at token index %u", tagId, *lexerIndex);
+    //no rule was found, so return false
    return ARC_False;
 }

@ -131,11 +139,14 @@ void ARC_Parser_Parse(ARC_Parser *parser, ARC_String **data){
    uint32_t lexerIndex = 0;
    ARC_ParserLanguageTag *startTag = parser->language.data;

+    //TODO: handle error checks for if parsing fails
    //recursivly parse from the inital start tag
-    ARC_Parser_ParseTag(parser, &lexerIndex, startTag->tagId);
+    ARC_Bool parsed = ARC_Parser_ParseTag(parser, &lexerIndex, startTag->tagId);
+    ARC_Bool allTokensParsed = lexerIndex == ARC_Lexer_GetTokensSize(parser->lexer);
    ARC_Lexer_Clear(parser->lexer);
-    if(arc_errno){
-        ARC_DEBUG_LOG_ERROR("ARC_Parser_Parse(parser, data), could not parse the given data");
+    if(parsed == ARC_False || allTokensParsed == ARC_False || arc_errno){
+        arc_errno = ARC_ERRNO_DATA;
+        ARC_DEBUG_LOG_ERROR_WITH_VARIABLES("ARC_Parser_Parse(parser, data), could not parse the given data at lexer index: %u", lexerIndex);
        return;
    }
 }