added ability to check if token id is a lexer rule and wrote parser, still need to test

This commit is contained in:
herbglitch 2024-10-24 19:56:26 -06:00
parent 7a3495f7ae
commit d8d1a1a107
7 changed files with 211 additions and 13 deletions

View file

@ -10,6 +10,10 @@
struct ARC_Lexer {
ARC_Vector *tokenRules;
ARC_Vector *tokens;
//these are used for checking if an uint32_t is a value, if token rules are continuous we can just check the max token value
ARC_Bool tokenRulesAreContinuous;
uint32_t tokenRulesMaxVal;
};
//private function for checking if two lexer token rules are the same in a vector (based on id)
@ -50,6 +54,10 @@ void ARC_Lexer_Create(ARC_Lexer **lexer){
//setup tokens vector with delete funtion, we don't want a deleteDataFn because their index will be used as the id
ARC_Vector_DestroyDataFn tokenVectorDestroyDataFn = ARC_LexerToken_VectorDestroyDataFn;
ARC_Vector_Create(&(*lexer)->tokens, NULL, &tokenVectorDestroyDataFn);
//set token rules to continuous and initialize the token rules max value
(*lexer)->tokenRulesAreContinuous = ARC_True;
(*lexer)->tokenRulesMaxVal = 0;
}
void ARC_Lexer_Destroy(ARC_Lexer *lexer){
@ -74,6 +82,73 @@ void ARC_Lexer_RegisterTokenRule(ARC_Lexer *lexer, ARC_LexerTokenRule tokenRule)
ARC_DEBUG_LOG_ERROR("ARC_Lexer_RegisterTokenRule(lexer, tokenRule), errored when running ARC_Vector_Add(lexer->tokenRules, storedTokenRule);. check logs for more info");
free(storedTokenRule);
}
//check if the value still is continuous
if(lexer->tokenRulesAreContinuous == ARC_True){
//if it is already continuous we just check if it is one value above the tokens already in the vector
for(uint32_t tokenRuleIndex = ARC_Vector_GetSize(lexer->tokenRules) - 1; tokenRuleIndex > 0; tokenRuleIndex--){
//get the current token rule
ARC_LexerTokenRule *currentTokenRule = (ARC_LexerTokenRule *)ARC_Vector_Get(lexer->tokenRules, tokenRuleIndex - 1);
//check if the token rule is continuous (then next max value by one)
if(tokenRule.id - currentTokenRule->id == 1){
//the token rule is already continuous so we can update the max value and return
lexer->tokenRulesMaxVal = tokenRule.id;
return;
}
}
//the token is no longer continous
lexer->tokenRulesAreContinuous = ARC_False;
return;
}
//check to see if this value makes the token rule continuous again
//TODO: might want to optomize this
uint32_t minValue = ~(uint32_t)0;
for(uint32_t tokenRuleIndex = 0; tokenRuleIndex < ARC_Vector_GetSize(lexer->tokenRules); tokenRuleIndex++){
//get the current token rule
ARC_LexerTokenRule *currentTokenRule = (ARC_LexerTokenRule *)ARC_Vector_Get(lexer->tokenRules, tokenRuleIndex);
//check each token to find the minimum one
if(currentTokenRule->id < minValue){
minValue = currentTokenRule->id;
}
}
//loop through untill either all the values are checked and in order or the token rule is not continuous
//TODO: might want to optomize this
for(uint32_t foundSize = 0; foundSize != ARC_Vector_GetSize(lexer->tokenRules); foundSize++){
//check all current rules
ARC_Bool currentAreContinuous = ARC_False;
for(uint32_t tokenRuleIndex = 0; tokenRuleIndex < ARC_Vector_GetSize(lexer->tokenRules); tokenRuleIndex++){
//get the current token rule
ARC_LexerTokenRule *currentTokenRule = (ARC_LexerTokenRule *)ARC_Vector_Get(lexer->tokenRules, tokenRuleIndex);
//check if the value is smaller than or equal to the minimum value and if it is we can skip it
if(currentTokenRule->id <= minValue){
continue;
}
//check if the value is continous
if(currentTokenRule->id - minValue == 1){
//set the token rule max val to the next most continuous value
lexer->tokenRulesMaxVal = currentTokenRule->id;
//set the next smallest value to check to the the next most continuous value
minValue = currentTokenRule->id;
currentAreContinuous = ARC_True;
break;
}
}
//the current values are not continuous so we can return as token rules are continuous is already set to false
if(currentAreContinuous == ARC_False){
return;
}
//a continuous value was found so loop to next value
}
}
void ARC_Lexer_LexString(ARC_Lexer *lexer, ARC_String **data){
@ -198,6 +273,23 @@ uint32_t ARC_Lexer_GetTokensSize(ARC_Lexer *lexer){
return ARC_Vector_GetSize(lexer->tokens);
}
ARC_Bool ARC_Lexer_IsTokenId(ARC_Lexer *lexer, uint32_t id){
//if the rules are continuous we can just check if it is less than the max rules value
if(lexer->tokenRulesAreContinuous == ARC_True){
return id <= lexer->tokenRulesMaxVal;
}
//the rules are not continuous so we need to check each individually
for(uint32_t index = 0; index < ARC_Vector_GetSize(lexer->tokenRules); index++){
ARC_LexerTokenRule *currentTokenRule = (ARC_LexerTokenRule *)ARC_Vector_Get(lexer->tokenRules, index);
if(currentTokenRule->id == id){
return ARC_True;
}
}
return ARC_False;
}
uint32_t ARC_Lexer_AutomataMatchCharFn(ARC_String **tokenData, ARC_String *string, void *automataData){
//if there is a match the token will be the same as automataData, so we don't need to store it again
*tokenData = NULL;
@ -321,6 +413,7 @@ ARC_LexerTokenRule ARC_LexerTokenRule_CreateAndReturnMatchCharInStringRule(uint3
}
void ARC_Lexer_InitBasicTokenRules(ARC_Lexer *lexer){
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_NULL , 0 ));
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_COLON_ID , ARC_LEXER_TOKEN_COLON_CHAR ));
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_SEMICOLON_ID , ARC_LEXER_TOKEN_SEMICOLON_CHAR ));
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_COMMA_ID , ARC_LEXER_TOKEN_COMMA_CHAR ));