archeus/src/std/lexer.c

#include "arc/std/lexer.h"

#include "arc/std/bool.h"
#include "arc/std/errno.h"
#include "arc/std/string.h"
#include "arc/std/vector.h"
#include "arc/std/io.h"
#include <stdlib.h>

struct ARC_Lexer {
    ARC_Vector *tokenRules;
    ARC_Vector *tokens;
};

//private function for checking if two lexer token rules are the same in a vector (based on id)
ARC_Bool ARC_LexerTokenRule_VectorCompareDataFn(void *dataA, void *dataB){
    ARC_LexerTokenRule *tokenRuleA = (ARC_LexerTokenRule *)dataA;
    ARC_LexerTokenRule *tokenRuleB = (ARC_LexerTokenRule *)dataB;

    if(tokenRuleA->id == tokenRuleB->id){
        return ARC_True;
    }

    return ARC_False;
}

//private function for destroying a lexer token rule from a vector
void ARC_LexerTokenRule_VectorDestroyDataFn(void *data){
    ARC_LexerTokenRule *tokenRule = (ARC_LexerTokenRule *)data;
    tokenRule->destroyAutomataDataFn(tokenRule->automataData);
    free(tokenRule);
}


//private function for destroying a lexer token from a vector
void ARC_LexerToken_VectorDestroyDataFn(void *data){
    ARC_LexerToken *token = (ARC_LexerToken *)data;
    free(token);
}

void ARC_Lexer_Create(ARC_Lexer **lexer){
    //create the lexer
    *lexer = (ARC_Lexer *)malloc(sizeof(ARC_Lexer));

    //setup token rules vector with compare and delete functions
    ARC_Vector_CompareDataFn tokenRulesVectorCompareDataFn = ARC_LexerTokenRule_VectorCompareDataFn;
    ARC_Vector_DestroyDataFn tokenRulesVectorDestroyDataFn = ARC_LexerTokenRule_VectorDestroyDataFn;
    ARC_Vector_Create(&(*lexer)->tokenRules, &tokenRulesVectorCompareDataFn, &tokenRulesVectorDestroyDataFn);

    //setup tokens vector with delete funtion, we don't want a deleteDataFn because their index will be used as the id
    ARC_Vector_DestroyDataFn tokenVectorDestroyDataFn = ARC_LexerToken_VectorDestroyDataFn;
    ARC_Vector_Create(&(*lexer)->tokens, NULL, &tokenVectorDestroyDataFn);
}

void ARC_Lexer_Destroy(ARC_Lexer *lexer){
    //free the tokens (there is a vectorDeleteDataFn, so tokens should be freed)
    ARC_Vector_Destroy(lexer->tokens);

    //free the token rules (there is a vectorDeleteDataFn, so token rules should be freed)
    ARC_Vector_Destroy(lexer->tokenRules);

    //free the lexer
    free(lexer);
}

void ARC_Lexer_RegisterTokenRule(ARC_Lexer *lexer, ARC_LexerTokenRule tokenRule){
    //copy the token to a new pointer
    ARC_LexerTokenRule *storedTokenRule = (ARC_LexerTokenRule *)malloc(sizeof(ARC_LexerTokenRule));
    *storedTokenRule = tokenRule;

    //add to the vector and check for error (I'd be surprised if the error ever happened because that would most likely mean overflow)
    ARC_Vector_Add(lexer->tokenRules, storedTokenRule);
    if(arc_errno){
        ARC_DEBUG_LOG_ERROR("ARC_Lexer_RegisterTokenRule(lexer, tokenRule), errored when running ARC_Vector_Add(lexer->tokenRules, storedTokenRule);. check logs for more info");
        free(storedTokenRule);
    }
}

void ARC_Lexer_LexString(ARC_Lexer *lexer, ARC_String **data){
    //check if there are any token rules to use
    if(ARC_Vector_GetSize(lexer->tokenRules) == 0){
        arc_errno = ARC_ERRNO_DATA;
        ARC_DEBUG_LOG_ERROR("ARC_Lexer_LexString(lexer, data), no tokens registered to lexer to use");
        return;
    }

    //this will run untill everything token is stripped or there is an error
    while(*data != NULL){
        ARC_Bool tokenFound = ARC_False;
        for(uint32_t index = 0; index < ARC_Vector_GetSize(lexer->tokenRules); index++){
            //check if the token rule is found
            ARC_LexerTokenRule *tokenRule = ARC_Vector_Get(lexer->tokenRules, index);

            //tokenData should only exist if tokenFound is ARC_True as stated in the header
            ARC_String *tokenData;
            tokenFound = tokenRule->automataFn(data, &tokenData, tokenRule->automataData);

            //check if a token was found if it wasn't continue. I'm doing this to try to cut down on the ammount of indentation
            if(tokenFound != ARC_True){
                continue;
            }

            //create the token to add
            ARC_LexerToken *token = (ARC_LexerToken *)malloc(sizeof(ARC_LexerToken));
            token->rule = tokenRule->id;
            token->data = tokenData;

            //add to the vector and check for error (I'd be surprised if the error ever happened because that would most likely mean overflow)
            ARC_Vector_Add(lexer->tokens, (void *)token);
            if(arc_errno){
                ARC_DEBUG_LOG_ERROR("ARC_Lexer_LexString(lexer, data), errored when running ARC_Vector_Add(lexer->tokens, token);. check logs for more info");
                free(token);

                //clean up errored string
                ARC_String_Destroy(*data);
                *data = NULL;
                return;
            }

            //the token was added, so break to start checking tokens again
            break;
        }

        //if no token was found, throw an error
        if(tokenFound == ARC_False){
            arc_errno = ARC_ERRNO_DATA;
            ARC_DEBUG_LOG_ERROR_WITH_VARIABLES("ARC_Lexer_LexString(lexer, data), no tokens found with current string: \"%s\"", (*data)->data);

            //clean up errored string
            ARC_String_Destroy(*data);
            *data = NULL;

            //TODO: might want to do smthn with already tokened data
            return;
        }
    }
}

void ARC_Lexer_LexFile(ARC_Lexer *lexer, ARC_String *path){
    //read file and clean up if it errors
    ARC_String *data;
    ARC_IO_FileToStr(path, &data);
    if(arc_errno){
        ARC_DEBUG_LOG_ERROR("ARC_Lexer_LexFile(lexer, path), errored when running ARC_IO_FileToStr(path, &data);. check logs for more info");
        if(data != NULL){
            ARC_String_Destroy(data);
        }

        return;
    }

    //lex the string and log if there is an error, ARC_Lexer_LexString will clean up the string
    ARC_Lexer_LexString(lexer, &data);
    if(arc_errno){
        ARC_DEBUG_LOG_ERROR("ARC_Lexer_LexFile(lexer, path), errored when running ARC_Lexer_LexString(lexer, data);. check logs for more info");
    }
}

ARC_LexerToken ARC_Lexer_GetToken(ARC_Lexer *lexer, uint32_t index){
    //get the token and log if there is an error
    ARC_LexerToken *token = ARC_Vector_Get(lexer->tokens, index);
    if(arc_errno){
        ARC_DEBUG_LOG_ERROR("ARC_Lexer_GetToken(lexer, index), errored when running ARC_Vector_Get(lexer->tokens, index);. check logs for more info");

        //return a token with max rule value, and NULL for the string to signify an error
        return (ARC_LexerToken){
            ~(uint32_t)0,
            NULL
        };
    }

    //the token was found, so return a copy to that
    return *token;
}

uint32_t ARC_Lexer_GetTokensSize(ARC_Lexer *lexer){
    return ARC_Vector_GetSize(lexer->tokens);
}

ARC_Bool ARC_Lexer_AutomataMatchCharFn(ARC_String **string, ARC_String **tokenData, void *automataData){
    //if there is a match the token will be the same as automataData, so we don't need to store it again
    *tokenData = NULL;

    //check to see if there is a match with automataData as a char
    if((*string)->data[0] == *(char *)automataData){
        //to keep from erroring instead of stripping from a 1 character string we can just delete it
        if((*string)->length == 1){
            ARC_String_Destroy(*string);
            *string = NULL;
            return ARC_True;
        }

        //strip the charater from the front of the string and return that a match was found
        ARC_String_ReplaceWithSubstring(string, 1, (*string)->length - 1);
        return ARC_True;
    }

    //no match was found
    return ARC_False;
}

ARC_Bool ARC_Lexer_AutomataMatchStringFn(ARC_String **string, ARC_String **tokenData, void *automataData){
    //if there is a match the token will be the same as automataData, so we don't need to store it again
    *tokenData = NULL;

    //check to see if there is a match with automataData as a string
    ARC_String *automataDataString = (ARC_String *)automataData;

    //to keep from erroring instead of stripping from a same length string we can just delete it
    if(ARC_String_Equals(*string, automataDataString)){
        if((*string)->length == automataDataString->length){
            ARC_String_Destroy(*string);
            *string = NULL;
        }

        //strip the token string from the front of the string and return that a match was found
        ARC_String_ReplaceWithSubstring(string, automataDataString->length, (*string)->length - automataDataString->length);
        return ARC_True;
    }

    //no match was found
    return ARC_False;
}

//private function to free automataData stored as a char
void ARC_LexerTokenRule_DestroyCharAutomataDataFn(void *automataData){
    free((char *)automataData);
}

ARC_LexerTokenRule ARC_LexerTokenRule_CreateAndReturnMatchCharRule(uint32_t id, char character){
    //create the token rule
    ARC_LexerTokenRule tokenRule;

    //set the id
    tokenRule.id = id;

    //create and store the automataData (which is just a char)
    char *automataData = (char *)malloc(sizeof(char));
    *automataData = character;
    tokenRule.automataData = (void *)automataData;

    //we can use the ARC_Lexer_AutomataMatchCharFn for this
    tokenRule.automataFn = ARC_Lexer_AutomataMatchCharFn;

    //add the private destroy function
    tokenRule.destroyAutomataDataFn = ARC_LexerTokenRule_DestroyCharAutomataDataFn;

    //return the created tokenRule
    return tokenRule;
}

//private function to free automataData stored as an ARC_String
void ARC_LexerTokenRule_DestroyStringAutomataDataFn(void *automataData){
    ARC_String_Destroy((ARC_String *)automataData);
}

ARC_LexerTokenRule ARC_LexerTokenRule_CreateAndReturnMatchStringRule(uint32_t id, ARC_String *string){
    //create the token rule
    ARC_LexerTokenRule tokenRule;

    //set the id
    tokenRule.id = id;

    //copy and store the automataData (which is just an ARC_String)
    ARC_String *automataData;
    ARC_String_Copy(&automataData, string);
    tokenRule.automataData = (void *)automataData;

    //we can use the ARC_Lexer_AutomataMatchCharFn for this
    tokenRule.automataFn = ARC_Lexer_AutomataMatchCharFn;

    //add the private destroy function
    tokenRule.destroyAutomataDataFn = ARC_LexerTokenRule_DestroyStringAutomataDataFn;

    //return the created tokenRule
    return tokenRule;
}

void ARC_Lexer_InitBasicTokenRules(ARC_Lexer *lexer){
    ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_COLON_ID            , ARC_LEXER_TOKEN_COLON_CHAR            ));
    ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_SEMICOLON_ID        , ARC_LEXER_TOKEN_SEMICOLON_CHAR        ));
    ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_COMMA_ID            , ARC_LEXER_TOKEN_COMMA_CHAR            ));
    ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_PERIOD_ID           , ARC_LEXER_TOKEN_PERIOD_CHAR           ));
    ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_FORWARD_SLASH_ID    , ARC_LEXER_TOKEN_FORWARD_SLASH_CHAR    ));
    ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_BACK_SLASH_ID       , ARC_LEXER_TOKEN_BACK_SLASH_CHAR       ));
    ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_LEFT_PARENTHESIS_ID , ARC_LEXER_TOKEN_LEFT_PARENTHESIS_CHAR ));
    ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_RIGHT_PARENTHESIS_ID, ARC_LEXER_TOKEN_RIGHT_PARENTHESIS_CHAR));
    ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_LEFT_CURLY_BRACE_ID , ARC_LEXER_TOKEN_LEFT_CURLY_BRACE_CHAR ));
    ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_RIGHT_CURLY_BRACE_ID, ARC_LEXER_TOKEN_RIGHT_CURLY_BRACE_CHAR));
    ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_BANG_ID             , ARC_LEXER_TOKEN_BANG_CHAR             ));
    ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_AT_ID               , ARC_LEXER_TOKEN_AT_CHAR               ));
    ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_HASH_ID             , ARC_LEXER_TOKEN_HASH_CHAR             ));
    ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_PERCENT_ID          , ARC_LEXER_TOKEN_PERCENT_CHAR          ));
}
working on lexer, and updated vector (still needs testing) 2024-08-28 20:04:18 -06:00			`#include "arc/std/lexer.h"`

			`#include "arc/std/bool.h"`
basic vector complete but untested, added destroy callback to vector 2024-08-29 05:04:08 -06:00			`#include "arc/std/errno.h"`
working on lexer, and updated vector (still needs testing) 2024-08-28 20:04:18 -06:00			`#include "arc/std/string.h"`
			`#include "arc/std/vector.h"`
			`#include "arc/std/io.h"`
			`#include <stdlib.h>`

			`struct ARC_Lexer {`
basic vector complete but untested, added destroy callback to vector 2024-08-29 05:04:08 -06:00			`ARC_Vector *tokenRules;`
working on lexer, and updated vector (still needs testing) 2024-08-28 20:04:18 -06:00			`ARC_Vector *tokens;`
			`};`

basic vector complete but untested, added destroy callback to vector 2024-08-29 05:04:08 -06:00			`//private function for checking if two lexer token rules are the same in a vector (based on id)`
			`ARC_Bool ARC_LexerTokenRule_VectorCompareDataFn(void dataA, void dataB){`
			`ARC_LexerTokenRule tokenRuleA = (ARC_LexerTokenRule )dataA;`
			`ARC_LexerTokenRule tokenRuleB = (ARC_LexerTokenRule )dataB;`

			`if(tokenRuleA->id == tokenRuleB->id){`
			`return ARC_True;`
			`}`

			`return ARC_False;`
			`}`

			`//private function for destroying a lexer token rule from a vector`
			`void ARC_LexerTokenRule_VectorDestroyDataFn(void *data){`
			`ARC_LexerTokenRule tokenRule = (ARC_LexerTokenRule )data;`
			`tokenRule->destroyAutomataDataFn(tokenRule->automataData);`
			`free(tokenRule);`
			`}`


			`//private function for destroying a lexer token from a vector`
			`void ARC_LexerToken_VectorDestroyDataFn(void *data){`
			`ARC_LexerToken token = (ARC_LexerToken )data;`
			`free(token);`
			`}`

working on lexer, and updated vector (still needs testing) 2024-08-28 20:04:18 -06:00			`void ARC_Lexer_Create(ARC_Lexer **lexer){`
basic vector complete but untested, added destroy callback to vector 2024-08-29 05:04:08 -06:00			`//create the lexer`
working on lexer, and updated vector (still needs testing) 2024-08-28 20:04:18 -06:00			`lexer = (ARC_Lexer )malloc(sizeof(ARC_Lexer));`

basic vector complete but untested, added destroy callback to vector 2024-08-29 05:04:08 -06:00			`//setup token rules vector with compare and delete functions`
			`ARC_Vector_CompareDataFn tokenRulesVectorCompareDataFn = ARC_LexerTokenRule_VectorCompareDataFn;`
			`ARC_Vector_DestroyDataFn tokenRulesVectorDestroyDataFn = ARC_LexerTokenRule_VectorDestroyDataFn;`
			`ARC_Vector_Create(&(*lexer)->tokenRules, &tokenRulesVectorCompareDataFn, &tokenRulesVectorDestroyDataFn);`

			`//setup tokens vector with delete funtion, we don't want a deleteDataFn because their index will be used as the id`
			`ARC_Vector_DestroyDataFn tokenVectorDestroyDataFn = ARC_LexerToken_VectorDestroyDataFn;`
			`ARC_Vector_Create(&(*lexer)->tokens, NULL, &tokenVectorDestroyDataFn);`
working on lexer, and updated vector (still needs testing) 2024-08-28 20:04:18 -06:00			`}`

			`void ARC_Lexer_Destroy(ARC_Lexer *lexer){`
basic vector complete but untested, added destroy callback to vector 2024-08-29 05:04:08 -06:00			`//free the tokens (there is a vectorDeleteDataFn, so tokens should be freed)`
working on lexer, and updated vector (still needs testing) 2024-08-28 20:04:18 -06:00			`ARC_Vector_Destroy(lexer->tokens);`

basic vector complete but untested, added destroy callback to vector 2024-08-29 05:04:08 -06:00			`//free the token rules (there is a vectorDeleteDataFn, so token rules should be freed)`
			`ARC_Vector_Destroy(lexer->tokenRules);`

			`//free the lexer`
working on lexer, and updated vector (still needs testing) 2024-08-28 20:04:18 -06:00			`free(lexer);`
			`}`

basic vector complete but untested, added destroy callback to vector 2024-08-29 05:04:08 -06:00			`void ARC_Lexer_RegisterTokenRule(ARC_Lexer *lexer, ARC_LexerTokenRule tokenRule){`
			`//copy the token to a new pointer`
			`ARC_LexerTokenRule storedTokenRule = (ARC_LexerTokenRule )malloc(sizeof(ARC_LexerTokenRule));`
			`*storedTokenRule = tokenRule;`

			`//add to the vector and check for error (I'd be surprised if the error ever happened because that would most likely mean overflow)`
			`ARC_Vector_Add(lexer->tokenRules, storedTokenRule);`
			`if(arc_errno){`
			`ARC_DEBUG_LOG_ERROR("ARC_Lexer_RegisterTokenRule(lexer, tokenRule), errored when running ARC_Vector_Add(lexer->tokenRules, storedTokenRule);. check logs for more info");`
			`free(storedTokenRule);`
			`}`
			`}`

			`void ARC_Lexer_LexString(ARC_Lexer lexer, ARC_String *data){`
			`//check if there are any token rules to use`
			`if(ARC_Vector_GetSize(lexer->tokenRules) == 0){`
			`arc_errno = ARC_ERRNO_DATA;`
			`ARC_DEBUG_LOG_ERROR("ARC_Lexer_LexString(lexer, data), no tokens registered to lexer to use");`
			`return;`
			`}`

			`//this will run untill everything token is stripped or there is an error`
testing working, and fixed memory leak in replace with substring function 2024-08-31 06:09:33 -06:00			`while(*data != NULL){`
basic vector complete but untested, added destroy callback to vector 2024-08-29 05:04:08 -06:00			`ARC_Bool tokenFound = ARC_False;`
			`for(uint32_t index = 0; index < ARC_Vector_GetSize(lexer->tokenRules); index++){`
			`//check if the token rule is found`
			`ARC_LexerTokenRule *tokenRule = ARC_Vector_Get(lexer->tokenRules, index);`

			`//tokenData should only exist if tokenFound is ARC_True as stated in the header`
			`ARC_String *tokenData;`
			`tokenFound = tokenRule->automataFn(data, &tokenData, tokenRule->automataData);`

			`//check if a token was found if it wasn't continue. I'm doing this to try to cut down on the ammount of indentation`
			`if(tokenFound != ARC_True){`
			`continue;`
			`}`

			`//create the token to add`
			`ARC_LexerToken token = (ARC_LexerToken )malloc(sizeof(ARC_LexerToken));`
			`token->rule = tokenRule->id;`
			`token->data = tokenData;`

			`//add to the vector and check for error (I'd be surprised if the error ever happened because that would most likely mean overflow)`
testing working, and fixed memory leak in replace with substring function 2024-08-31 06:09:33 -06:00			`ARC_Vector_Add(lexer->tokens, (void *)token);`
basic vector complete but untested, added destroy callback to vector 2024-08-29 05:04:08 -06:00			`if(arc_errno){`
			`ARC_DEBUG_LOG_ERROR("ARC_Lexer_LexString(lexer, data), errored when running ARC_Vector_Add(lexer->tokens, token);. check logs for more info");`
			`free(token);`

			`//clean up errored string`
			`ARC_String_Destroy(*data);`
			`*data = NULL;`
			`return;`
			`}`

			`//the token was added, so break to start checking tokens again`
			`break;`
			`}`

			`//if no token was found, throw an error`
			`if(tokenFound == ARC_False){`
			`arc_errno = ARC_ERRNO_DATA;`
			`ARC_DEBUG_LOG_ERROR_WITH_VARIABLES("ARC_Lexer_LexString(lexer, data), no tokens found with current string: \"%s\"", (*data)->data);`

			`//clean up errored string`
			`ARC_String_Destroy(*data);`
			`*data = NULL;`

			`//TODO: might want to do smthn with already tokened data`
			`return;`
			`}`
			`}`
working on lexer, and updated vector (still needs testing) 2024-08-28 20:04:18 -06:00			`}`

			`void ARC_Lexer_LexFile(ARC_Lexer lexer, ARC_String path){`
basic vector complete but untested, added destroy callback to vector 2024-08-29 05:04:08 -06:00			`//read file and clean up if it errors`
working on lexer, and updated vector (still needs testing) 2024-08-28 20:04:18 -06:00			`ARC_String *data;`
			`ARC_IO_FileToStr(path, &data);`
basic vector complete but untested, added destroy callback to vector 2024-08-29 05:04:08 -06:00			`if(arc_errno){`
			`ARC_DEBUG_LOG_ERROR("ARC_Lexer_LexFile(lexer, path), errored when running ARC_IO_FileToStr(path, &data);. check logs for more info");`
			`if(data != NULL){`
			`ARC_String_Destroy(data);`
			`}`

			`return;`
			`}`
working on lexer, and updated vector (still needs testing) 2024-08-28 20:04:18 -06:00
basic vector complete but untested, added destroy callback to vector 2024-08-29 05:04:08 -06:00			`//lex the string and log if there is an error, ARC_Lexer_LexString will clean up the string`
			`ARC_Lexer_LexString(lexer, &data);`
			`if(arc_errno){`
			`ARC_DEBUG_LOG_ERROR("ARC_Lexer_LexFile(lexer, path), errored when running ARC_Lexer_LexString(lexer, data);. check logs for more info");`
			`}`
			`}`
working on lexer, and updated vector (still needs testing) 2024-08-28 20:04:18 -06:00
basic vector complete but untested, added destroy callback to vector 2024-08-29 05:04:08 -06:00			`ARC_LexerToken ARC_Lexer_GetToken(ARC_Lexer *lexer, uint32_t index){`
			`//get the token and log if there is an error`
			`ARC_LexerToken *token = ARC_Vector_Get(lexer->tokens, index);`
			`if(arc_errno){`
			`ARC_DEBUG_LOG_ERROR("ARC_Lexer_GetToken(lexer, index), errored when running ARC_Vector_Get(lexer->tokens, index);. check logs for more info");`

			`//return a token with max rule value, and NULL for the string to signify an error`
			`return (ARC_LexerToken){`
			`~(uint32_t)0,`
			`NULL`
			`};`
working on lexer, and updated vector (still needs testing) 2024-08-28 20:04:18 -06:00			`}`
basic vector complete but untested, added destroy callback to vector 2024-08-29 05:04:08 -06:00
			`//the token was found, so return a copy to that`
			`return *token;`
			`}`

			`uint32_t ARC_Lexer_GetTokensSize(ARC_Lexer *lexer){`
			`return ARC_Vector_GetSize(lexer->tokens);`
working on lexer, and updated vector (still needs testing) 2024-08-28 20:04:18 -06:00			`}`

			`ARC_Bool ARC_Lexer_AutomataMatchCharFn(ARC_String string, ARC_String tokenData, void *automataData){`
basic vector complete but untested, added destroy callback to vector 2024-08-29 05:04:08 -06:00			`//if there is a match the token will be the same as automataData, so we don't need to store it again`
working on lexer, and updated vector (still needs testing) 2024-08-28 20:04:18 -06:00			`*tokenData = NULL;`

basic vector complete but untested, added destroy callback to vector 2024-08-29 05:04:08 -06:00			`//check to see if there is a match with automataData as a char`
working on lexer, and updated vector (still needs testing) 2024-08-28 20:04:18 -06:00			`if((string)->data[0] == (char *)automataData){`
basic vector complete but untested, added destroy callback to vector 2024-08-29 05:04:08 -06:00			`//to keep from erroring instead of stripping from a 1 character string we can just delete it`
working on lexer, and updated vector (still needs testing) 2024-08-28 20:04:18 -06:00			`if((*string)->length == 1){`
			`ARC_String_Destroy(*string);`
			`*string = NULL;`
basic vector complete but untested, added destroy callback to vector 2024-08-29 05:04:08 -06:00			`return ARC_True;`
working on lexer, and updated vector (still needs testing) 2024-08-28 20:04:18 -06:00			`}`

basic vector complete but untested, added destroy callback to vector 2024-08-29 05:04:08 -06:00			`//strip the charater from the front of the string and return that a match was found`
working on lexer, and updated vector (still needs testing) 2024-08-28 20:04:18 -06:00			`ARC_String_ReplaceWithSubstring(string, 1, (*string)->length - 1);`
			`return ARC_True;`
			`}`

basic vector complete but untested, added destroy callback to vector 2024-08-29 05:04:08 -06:00			`//no match was found`
working on lexer, and updated vector (still needs testing) 2024-08-28 20:04:18 -06:00			`return ARC_False;`
			`}`

			`ARC_Bool ARC_Lexer_AutomataMatchStringFn(ARC_String string, ARC_String tokenData, void *automataData){`
basic vector complete but untested, added destroy callback to vector 2024-08-29 05:04:08 -06:00			`//if there is a match the token will be the same as automataData, so we don't need to store it again`
working on lexer, and updated vector (still needs testing) 2024-08-28 20:04:18 -06:00			`*tokenData = NULL;`

basic vector complete but untested, added destroy callback to vector 2024-08-29 05:04:08 -06:00			`//check to see if there is a match with automataData as a string`
working on lexer, and updated vector (still needs testing) 2024-08-28 20:04:18 -06:00			`ARC_String automataDataString = (ARC_String )automataData;`
testing working, and fixed memory leak in replace with substring function 2024-08-31 06:09:33 -06:00
basic vector complete but untested, added destroy callback to vector 2024-08-29 05:04:08 -06:00			`//to keep from erroring instead of stripping from a same length string we can just delete it`
working on lexer, and updated vector (still needs testing) 2024-08-28 20:04:18 -06:00			`if(ARC_String_Equals(*string, automataDataString)){`
			`if((*string)->length == automataDataString->length){`
			`ARC_String_Destroy(*string);`
			`*string = NULL;`
			`}`

basic vector complete but untested, added destroy callback to vector 2024-08-29 05:04:08 -06:00			`//strip the token string from the front of the string and return that a match was found`
working on lexer, and updated vector (still needs testing) 2024-08-28 20:04:18 -06:00			`ARC_String_ReplaceWithSubstring(string, automataDataString->length, (*string)->length - automataDataString->length);`
			`return ARC_True;`
			`}`

basic vector complete but untested, added destroy callback to vector 2024-08-29 05:04:08 -06:00			`//no match was found`
working on lexer, and updated vector (still needs testing) 2024-08-28 20:04:18 -06:00			`return ARC_False;`
			`}`
basic vector complete but untested, added destroy callback to vector 2024-08-29 05:04:08 -06:00
			`//private function to free automataData stored as a char`
			`void ARC_LexerTokenRule_DestroyCharAutomataDataFn(void *automataData){`
			`free((char *)automataData);`
			`}`

			`ARC_LexerTokenRule ARC_LexerTokenRule_CreateAndReturnMatchCharRule(uint32_t id, char character){`
			`//create the token rule`
			`ARC_LexerTokenRule tokenRule;`

			`//set the id`
			`tokenRule.id = id;`

			`//create and store the automataData (which is just a char)`
			`char automataData = (char )malloc(sizeof(char));`
			`*automataData = character;`
			`tokenRule.automataData = (void *)automataData;`

			`//we can use the ARC_Lexer_AutomataMatchCharFn for this`
			`tokenRule.automataFn = ARC_Lexer_AutomataMatchCharFn;`

			`//add the private destroy function`
			`tokenRule.destroyAutomataDataFn = ARC_LexerTokenRule_DestroyCharAutomataDataFn;`

			`//return the created tokenRule`
			`return tokenRule;`
			`}`

			`//private function to free automataData stored as an ARC_String`
			`void ARC_LexerTokenRule_DestroyStringAutomataDataFn(void *automataData){`
			`ARC_String_Destroy((ARC_String *)automataData);`
			`}`

			`ARC_LexerTokenRule ARC_LexerTokenRule_CreateAndReturnMatchStringRule(uint32_t id, ARC_String *string){`
			`//create the token rule`
			`ARC_LexerTokenRule tokenRule;`

			`//set the id`
			`tokenRule.id = id;`

			`//copy and store the automataData (which is just an ARC_String)`
			`ARC_String *automataData;`
			`ARC_String_Copy(&automataData, string);`
			`tokenRule.automataData = (void *)automataData;`

			`//we can use the ARC_Lexer_AutomataMatchCharFn for this`
			`tokenRule.automataFn = ARC_Lexer_AutomataMatchCharFn;`

			`//add the private destroy function`
			`tokenRule.destroyAutomataDataFn = ARC_LexerTokenRule_DestroyStringAutomataDataFn;`

			`//return the created tokenRule`
			`return tokenRule;`
			`}`

			`void ARC_Lexer_InitBasicTokenRules(ARC_Lexer *lexer){`
testing working, and fixed memory leak in replace with substring function 2024-08-31 06:09:33 -06:00			`ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_COLON_ID , ARC_LEXER_TOKEN_COLON_CHAR ));`
			`ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_SEMICOLON_ID , ARC_LEXER_TOKEN_SEMICOLON_CHAR ));`
			`ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_COMMA_ID , ARC_LEXER_TOKEN_COMMA_CHAR ));`
			`ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_PERIOD_ID , ARC_LEXER_TOKEN_PERIOD_CHAR ));`
			`ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_FORWARD_SLASH_ID , ARC_LEXER_TOKEN_FORWARD_SLASH_CHAR ));`
			`ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_BACK_SLASH_ID , ARC_LEXER_TOKEN_BACK_SLASH_CHAR ));`
			`ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_LEFT_PARENTHESIS_ID , ARC_LEXER_TOKEN_LEFT_PARENTHESIS_CHAR ));`
			`ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_RIGHT_PARENTHESIS_ID, ARC_LEXER_TOKEN_RIGHT_PARENTHESIS_CHAR));`
			`ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_LEFT_CURLY_BRACE_ID , ARC_LEXER_TOKEN_LEFT_CURLY_BRACE_CHAR ));`
			`ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_RIGHT_CURLY_BRACE_ID, ARC_LEXER_TOKEN_RIGHT_CURLY_BRACE_CHAR));`
			`ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_BANG_ID , ARC_LEXER_TOKEN_BANG_CHAR ));`
			`ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_AT_ID , ARC_LEXER_TOKEN_AT_CHAR ));`
			`ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_HASH_ID , ARC_LEXER_TOKEN_HASH_CHAR ));`
			`ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_PERCENT_ID , ARC_LEXER_TOKEN_PERCENT_CHAR ));`
basic vector complete but untested, added destroy callback to vector 2024-08-29 05:04:08 -06:00			`}`