parser working, needs more tests and an actual language to make sure that is true though

2024-11-20 10:27:17 -07:00 · 2024-11-20 10:27:17 -07:00 · 606f8e4bad
commit 606f8e4bad
parent 63dfb98aad
10 changed files with 365 additions and 148 deletions
--- a/src/std/parser.c
+++ b/src/std/parser.c
@ -2,7 +2,7 @@
 #include "arc/std/bool.h"
 #include "arc/std/errno.h"
 #include "arc/std/lexer.h"
-//#include "arc/std/vector.h"
+#include "arc/std/vector.h"
 #include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
@ -11,9 +11,13 @@ struct ARC_Parser {
    ARC_Array language;

    ARC_Lexer *lexer;
+
+    void *data;
+    ARC_ParserData_CreateFn  *createDataFn;
+    ARC_ParserData_DestroyFn *destroyDataFn;
 };

-void ARC_Parser_Create(ARC_Parser **parser, ARC_Array *language, ARC_Parser_InitLexerRulesFn initLexerRulesFn){
+void ARC_Parser_Create(ARC_Parser **parser, ARC_Array *language, ARC_Parser_InitLexerRulesFn initLexerRulesFn, ARC_ParserData_CreateFn *createDataFn, ARC_ParserData_DestroyFn *destroyDataFn){
    *parser = (ARC_Parser *)malloc(sizeof(ARC_Parser));

    //set the language size to 0 and data to NULL in case the language is NULL
@ -44,6 +48,13 @@ void ARC_Parser_Create(ARC_Parser **parser, ARC_Array *language, ARC_Parser_Init
                    currentTag->tokensOrTags[orIndex][tokenOrTagIndex] = languageTag->tokensOrTags[orIndex][tokenOrTagIndex];
                }
            }
+
+            //add the add function
+            currentTag->addDataFn = NULL;
+            if(languageTag->addDataFn != NULL){
+                currentTag->addDataFn = (ARC_ParserLanguageTag_AddDataFn *)malloc(sizeof(ARC_ParserLanguageTag_AddDataFn));
+                *(currentTag->addDataFn) = *(languageTag->addDataFn);
+            }
        }
    }

@ -52,6 +63,21 @@ void ARC_Parser_Create(ARC_Parser **parser, ARC_Array *language, ARC_Parser_Init

    //register instructions to the lexer
    initLexerRulesFn(((*parser)->lexer));
+
+    //create the data and copy the creation function if the creation function exists
+    (*parser)->data = NULL;
+    (*parser)->createDataFn = NULL;
+    if(createDataFn != NULL){
+        (*parser)->createDataFn = (ARC_ParserData_CreateFn *)malloc(sizeof(ARC_ParserData_CreateFn));
+        *((*parser)->createDataFn) = *createDataFn;
+        (*createDataFn)(&((*parser)->data));
+    }
+
+    (*parser)->destroyDataFn = NULL;
+    if(createDataFn != NULL){
+        (*parser)->destroyDataFn = (ARC_ParserData_DestroyFn *)malloc(sizeof(ARC_ParserData_DestroyFn));
+        *((*parser)->destroyDataFn) = *destroyDataFn;
+    }
 }

 void ARC_Parser_CreateFromString(ARC_Parser **parser, ARC_String *languageString, ARC_Parser_InitLexerRulesFn initLexerRulesFn){
@ -67,9 +93,25 @@ void ARC_Parser_Destroy(ARC_Parser *parser){
            free(currentTag->tokensOrTags[orIndex]);
        }

+        if(currentTag->addDataFn != NULL){
+            free(currentTag->addDataFn);
+        }
+
+        //free the tokens or tags
        free(currentTag->tokensOrTags);
    }

+    //free the creation function callback
+    if(parser->createDataFn != NULL){
+        free(parser->createDataFn);
+    }
+
+    //free the data and the deletion function callback
+    if(parser->destroyDataFn != NULL){
+        (*(parser->destroyDataFn))(parser->data);
+        free(parser->destroyDataFn);
+    }
+
    //clear the copied language from memory
    free(parser->language.data);

@ -97,8 +139,15 @@ ARC_Bool ARC_Parser_ParseTag(ARC_Parser *parser, uint32_t *lexerIndex, uint32_t
        return ARC_False;
    }

+    //create a vector of found tokens to use if a rule is validated, a comparison function is not needed as it will be iterated through, the destroy function is not needed as well because they will be pointers to lexer tokens (the lexer owns the tokens)
+    ARC_Vector *foundTokens;
+    ARC_Vector_Create(&foundTokens, NULL, NULL);
+
    //loop through each or section of the tags and tokens
    for(uint32_t orIndex = 0; orIndex < tag->tokensOrTagsSize; orIndex++){
+        //reset the tokens for each or index
+        ARC_Vector_Clear(foundTokens);
+
        //loop through each token or tag to check if the lexed data matches
        uint32_t lexerCheckIndex = *lexerIndex;
        ARC_Bool foundRule = ARC_True;
@ -121,10 +170,8 @@ ARC_Bool ARC_Parser_ParseTag(ARC_Parser *parser, uint32_t *lexerIndex, uint32_t
                    break;
                }

-                //increase the lexer check index as a recursed rule was found
+                //increase the lexer check index as a recursed rule was found, and continue checking
                lexerCheckIndex = tempLexerCheckIndex;
-
-                //this will probably never be called as lambda is usually the last instruction, but just in case we can continue instead of break
                continue;
            }

@ -136,24 +183,43 @@ ARC_Bool ARC_Parser_ParseTag(ARC_Parser *parser, uint32_t *lexerIndex, uint32_t
            }

            //get the next token in the lexer and increment the lexers index
-            ARC_LexerToken token = ARC_Lexer_GetToken(parser->lexer, lexerCheckIndex);
+            ARC_LexerToken *token = ARC_Lexer_GetToken(parser->lexer, lexerCheckIndex);
            lexerCheckIndex++;

            //if the token rule does not match the current token in the current or statement the token rule could not be found for the current or index so break
-            if(token.rule != tag->tokensOrTags[orIndex][tokenOrTagIndex]){
+            if(token->rule != tag->tokensOrTags[orIndex][tokenOrTagIndex]){
                foundRule = ARC_False;
                break;
            }
+
+            //the rule was a match so add it to the found tokens
+            ARC_Vector_Add(foundTokens, (void *)token);
        }

        //if the rule is found we don't need to check anymore so we can return out
        if(foundRule == ARC_True){
            *lexerIndex = lexerCheckIndex;
-            //TODO: set tag into datastructure
+
+            //if there is an addDataFunction for the tag, add the tokens
+            if(tag->addDataFn != NULL){
+                //iterate through the tokens with the add callback
+                for(uint32_t index = 0; index < ARC_Vector_GetSize(foundTokens); index++){
+                    ARC_LexerToken *token = (ARC_LexerToken *)ARC_Vector_Get(foundTokens, index);
+                    (*(tag->addDataFn))(&(parser->data), tagId, token);
+                }
+            }
+
+            //free the found tokens vector
+            ARC_Vector_Destroy(foundTokens);
+
+            //cleanup
            return ARC_True;
        }
    }

+    //cleanup
+    ARC_Vector_Destroy(foundTokens);
+
    //no rule was found, so return false
    return ARC_False;
 }
@ -192,3 +258,12 @@ void ARC_Parser_Parse(ARC_Parser *parser, ARC_String **data){
 void ARC_Parser_ParseFile(ARC_Parser *parser, ARC_String *path){

 }
+
+void ARC_Parser_ClearData(ARC_Parser *parser){
+    (*(parser->destroyDataFn))(parser->data);
+    (*(parser->createDataFn))(parser->data);
+}
+
+void *ARC_Parser_GetData(ARC_Parser *parser){
+    return parser->data;
+}