fixed lexer, continous should now work fairly efficiently

This commit is contained in:
herbglitch 2025-01-07 16:29:15 -07:00
parent 136344c009
commit 5b2b8ef21c
3 changed files with 74 additions and 77 deletions

View file

@ -14,6 +14,7 @@ struct ARC_Lexer {
//these are used for checking if an uint32_t is a value, if token rules are continuous we can just check the max token value
ARC_Bool tokenRulesAreContinuous;
uint32_t tokenRulesMaxVal;
uint32_t tokenRulesMinVal;
};
//private function for checking if two lexer token rules are the same in a vector (based on id)
@ -63,6 +64,7 @@ void ARC_Lexer_Create(ARC_Lexer **lexer){
//set token rules to continuous and initialize the token rules max value
(*lexer)->tokenRulesAreContinuous = ARC_True;
(*lexer)->tokenRulesMaxVal = 0;
(*lexer)->tokenRulesMinVal = 0;
}
void ARC_Lexer_Destroy(ARC_Lexer *lexer){
@ -90,19 +92,42 @@ void ARC_Lexer_RegisterTokenRule(ARC_Lexer *lexer, ARC_LexerTokenRule tokenRule)
free(storedTokenRule);
}
//check if the value still is continuous
if(lexer->tokenRulesAreContinuous == ARC_True){
//if it is already continuous we just check if it is one value above the tokens already in the vector
for(uint32_t tokenRuleIndex = ARC_Vector_GetSize(lexer->tokenRules) - 1; tokenRuleIndex > 0; tokenRuleIndex--){
//get the current token rule
ARC_LexerTokenRule *currentTokenRule = (ARC_LexerTokenRule *)ARC_Vector_Get(lexer->tokenRules, tokenRuleIndex - 1);
//check if the token rule is continuous (then next max value by one)
if(tokenRule.id - currentTokenRule->id == 1){
//the token rule is already continuous so we can update the max value and return
//check if first or index to init the min
if(ARC_Vector_GetSize(lexer->tokenRules) == 1){
lexer->tokenRulesMinVal = tokenRule.id;
lexer->tokenRulesMaxVal = tokenRule.id;
lexer->tokenRulesAreContinuous = ARC_True;
return;
}
//check if the value still is continuous
if(lexer->tokenRulesAreContinuous == ARC_True){
//check if there is a new min if the minimum is bigger than zero (min is a uint so the zero check is to prevent underflow)
if(lexer->tokenRulesMinVal != 0 && tokenRule.id < lexer->tokenRulesMinVal){
//if the value is smaller than the min val minus one it is no longer continuous
if(lexer->tokenRulesMinVal - 1 != tokenRule.id){
lexer->tokenRulesAreContinuous = ARC_False;
}
lexer->tokenRulesMinVal = tokenRule.id;
return;
}
//check if the value is beetween the continous range
if(tokenRule.id <= lexer->tokenRulesMaxVal && tokenRule.id >= lexer->tokenRulesMinVal){
return;
}
//check if there is a new max making sure not to overflow
if(lexer->tokenRulesMaxVal != ~(uint32_t)0 && tokenRule.id > lexer->tokenRulesMaxVal){
//if the value is bigger than the max val plus one it is no longer continuous
if(lexer->tokenRulesMaxVal + 1 != tokenRule.id){
lexer->tokenRulesAreContinuous = ARC_False;
}
lexer->tokenRulesMaxVal = tokenRule.id;
return;
}
//the token is no longer continous
@ -110,52 +135,48 @@ void ARC_Lexer_RegisterTokenRule(ARC_Lexer *lexer, ARC_LexerTokenRule tokenRule)
return;
}
//if the token is not within min max bounds, then there is no chance it will be continous
if(lexer->tokenRulesMinVal != 0 && tokenRule.id < lexer->tokenRulesMinVal){
lexer->tokenRulesMinVal = tokenRule.id;
return;
}
//if the token is not within min max bounds, then there is no chance it will be continous
if(lexer->tokenRulesMaxVal != ~(uint32_t)0 && tokenRule.id > lexer->tokenRulesMaxVal){
lexer->tokenRulesMaxVal = tokenRule.id;
return;
}
//check to see if this value makes the token rule continuous again
//TODO: might want to optomize this
uint32_t minValue = ~(uint32_t)0;
for(uint32_t tokenRuleIndex = 0; tokenRuleIndex < ARC_Vector_GetSize(lexer->tokenRules); tokenRuleIndex++){
uint32_t currentVal = lexer->tokenRulesMinVal;
//minimum exists, so add one to the current value
currentVal++;
//check to see if every index between min an max exist
for(; currentVal < lexer->tokenRulesMaxVal; currentVal++){
ARC_Bool currentContinous = ARC_False;
//TODO: probs want to optomize this
for(uint32_t index = 0; index < ARC_Vector_GetSize(lexer->tokenRules); index++){
//get the current token rule
ARC_LexerTokenRule *currentTokenRule = (ARC_LexerTokenRule *)ARC_Vector_Get(lexer->tokenRules, tokenRuleIndex);
ARC_LexerTokenRule *currentTokenRule = (ARC_LexerTokenRule *)ARC_Vector_Get(lexer->tokenRules, index);
//check each token to find the minimum one
if(currentTokenRule->id < minValue){
minValue = currentTokenRule->id;
}
}
//loop through untill either all the values are checked and in order or the token rule is not continuous
//TODO: might want to optomize this
for(uint32_t foundSize = 0; foundSize != ARC_Vector_GetSize(lexer->tokenRules); foundSize++){
//check all current rules
ARC_Bool currentAreContinuous = ARC_False;
for(uint32_t tokenRuleIndex = 0; tokenRuleIndex < ARC_Vector_GetSize(lexer->tokenRules); tokenRuleIndex++){
//get the current token rule
ARC_LexerTokenRule *currentTokenRule = (ARC_LexerTokenRule *)ARC_Vector_Get(lexer->tokenRules, tokenRuleIndex);
//check if the value is smaller than or equal to the minimum value and if it is we can skip it
if(currentTokenRule->id <= minValue){
continue;
}
//check if the value is continous
if(currentTokenRule->id - minValue == 1){
//set the token rule max val to the next most continuous value
lexer->tokenRulesMaxVal = currentTokenRule->id;
//set the next smallest value to check to the the next most continuous value
minValue = currentTokenRule->id;
currentAreContinuous = ARC_True;
//check if a token matches the current needed value
if(currentTokenRule->id == currentVal){
currentContinous = ARC_True;
break;
}
}
//the current values are not continuous so we can return as token rules are continuous is already set to false
if(currentAreContinuous == ARC_False){
//if it is still not continuous return
if(currentContinous == ARC_False){
return;
}
//a continuous value was found so loop to next value
}
//the tokens were all continuous
lexer->tokenRulesAreContinuous = ARC_True;
}
void ARC_Lexer_Clear(ARC_Lexer *lexer){
@ -294,7 +315,7 @@ ARC_Bool ARC_Lexer_IsContinious(ARC_Lexer *lexer){
ARC_Bool ARC_Lexer_IsTokenId(ARC_Lexer *lexer, uint32_t id){
//if the rules are continuous we can just check if it is less than the max rules value
if(lexer->tokenRulesAreContinuous == ARC_True){
return id <= lexer->tokenRulesMaxVal;
return (ARC_Bool)(id >= lexer->tokenRulesMinVal && id <= lexer->tokenRulesMaxVal);
}
//the rules are not continuous so we need to check each individually

View file

@ -48,6 +48,8 @@ ARC_TEST(Lexer_Check_Id_Basic){
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(3, ':'));
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(4, ':'));
ARC_CHECK(ARC_Lexer_IsContinious(lexer) == ARC_True);
ARC_CHECK(ARC_Lexer_IsTokenId(lexer, 0) == ARC_True );
ARC_CHECK(ARC_Lexer_IsTokenId(lexer, 7) == ARC_False);
ARC_CHECK(ARC_Lexer_IsTokenId(lexer, 2) == ARC_True );
@ -67,6 +69,8 @@ ARC_TEST(Lexer_Check_Id_Unordered_But_Continious){
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(1, ':'));
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(4, ':'));
ARC_CHECK(ARC_Lexer_IsContinious(lexer) == ARC_True);
ARC_CHECK(ARC_Lexer_IsTokenId(lexer, 0) == ARC_True );
ARC_CHECK(ARC_Lexer_IsTokenId(lexer, 7) == ARC_False);
ARC_CHECK(ARC_Lexer_IsTokenId(lexer, 2) == ARC_True );
@ -86,6 +90,8 @@ ARC_TEST(Lexer_Check_Id_Unordered_Not_Continious){
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(1, ':'));
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(4, ':'));
ARC_CHECK(ARC_Lexer_IsContinious(lexer) == ARC_False);
ARC_CHECK(ARC_Lexer_IsTokenId(lexer, 8) == ARC_True );
ARC_CHECK(ARC_Lexer_IsTokenId(lexer, 7) == ARC_False);
ARC_CHECK(ARC_Lexer_IsTokenId(lexer, 2) == ARC_True );
@ -94,33 +100,3 @@ ARC_TEST(Lexer_Check_Id_Unordered_Not_Continious){
ARC_Lexer_Destroy(lexer);
}
ARC_TEST(Lexer_Check_Continious){
ARC_Lexer *lexer;
ARC_Lexer_Create(&lexer);
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(2, ':'));
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(0, 0 ));
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(3, ':'));
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(1, ':'));
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(4, ':'));
ARC_CHECK(ARC_Lexer_IsContinious(lexer) == ARC_True);
ARC_Lexer_Destroy(lexer);
}
ARC_TEST(Lexer_Check_Not_Continious){
ARC_Lexer *lexer;
ARC_Lexer_Create(&lexer);
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(2, ':'));
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(8, 0 ));
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(3, ':'));
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(1, ':'));
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(4, ':'));
ARC_CHECK(ARC_Lexer_IsContinious(lexer) == ARC_False);
ARC_Lexer_Destroy(lexer);
}

View file

@ -90,10 +90,10 @@ ARC_TEST(Parser_Basic_Parse){
/* ~ second test ~ */
ARC_String_CreateWithStrlen(&tempString, "z1xwvq");
//this destroys string, so no need for cleanup
ARC_Parser_Parse(parser, &tempString);
ARC_CHECK(arc_errno == 0);