fixed a lot of the lexer and parser (the segfault, and now can do char ranges)

This commit is contained in:
herbglitch 2024-10-30 07:36:43 -06:00
parent 050f7a8452
commit b10f9b9123
4 changed files with 115 additions and 47 deletions

View file

@ -304,6 +304,22 @@ uint32_t ARC_Lexer_AutomataMatchCharFn(ARC_String **tokenData, ARC_String *strin
return 0;
}
uint32_t ARC_Lexer_AutomataMatchCharOrBetweenFn(ARC_String **tokenData, ARC_String *string, void *automataData){
//if there is a match the token will be the same as automataData, so we don't need to store it again
*tokenData = NULL;
//check to see if there is a match with automataData as a range of chars
char *automataDataChars = (char *)automataData;
if(string->data[0] >= automataDataChars[0] && string->data[0] <= ((char *)automataData)[1]){
//return the token as token data and the token was found of length 1
ARC_String_Create(tokenData, string->data, 1);
return 1;
}
//no match was found
return 0;
}
uint32_t ARC_Lexer_AutomataMatchStringFn(ARC_String **tokenData, ARC_String *string, void *automataData){
//if there is a match the token will be the same as automataData, so we don't need to store it again
*tokenData = NULL;
@ -363,6 +379,28 @@ ARC_LexerTokenRule ARC_LexerTokenRule_CreateAndReturnMatchCharRule(uint32_t id,
return tokenRule;
}
ARC_LexerTokenRule ARC_LexerTokenRule_CreateAndReturnMatchCharOrBetween(uint32_t id, char start, char end){
//create the token rule
ARC_LexerTokenRule tokenRule;
//set the id
tokenRule.id = id;
//create and store the automataData (which is just two chars (the minumum and manximum))
char *automataData = (char *)malloc(sizeof(char) * 2);
automataData[0] = start;
automataData[1] = end;
tokenRule.automataData = (void *)automataData;
//we can use the ARC_Lexer_AutomataMatchCharInStringFn for this
tokenRule.automataFn = ARC_Lexer_AutomataMatchCharOrBetweenFn;
//add the private destroy function (we can use the char as it destroys a char pointer of any size)
tokenRule.destroyAutomataDataFn = ARC_LexerTokenRule_DestroyCharAutomataDataFn;
//return the created tokenRule
return tokenRule;
}
//private function to free automataData stored as an ARC_String
void ARC_LexerTokenRule_DestroyStringAutomataDataFn(void *automataData){
ARC_String_Destroy((ARC_String *)automataData);
@ -413,7 +451,27 @@ ARC_LexerTokenRule ARC_LexerTokenRule_CreateAndReturnMatchCharInStringRule(uint3
}
void ARC_Lexer_InitBasicTokenRules(ARC_Lexer *lexer){
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_NULL , 0 ));
//null
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_NULL, 0));
//number
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharOrBetween(ARC_LEXER_TOKEN_NUMBER, '0', '9'));
//alpha char
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharOrBetween(ARC_LEXER_TOKEN_ALPHALOWERCHAR, 'a', 'z'));
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharOrBetween(ARC_LEXER_TOKEN_ALPHAUPPERCHAR, 'A', 'Z'));
//whitespace
//TODO: fix this
ARC_String *whitespaceString;
ARC_String_CreateWithStrlen(&whitespaceString, " \t");
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharInStringRule(ARC_LEXER_TOKEN_WHITESPACE, whitespaceString));
ARC_String_Destroy(whitespaceString);
//TEMP FIX:
//ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_WHITESPACE, ' '));
//single char tokens
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_NEWLINE_ID , ARC_LEXER_TOKEN_NEWLINE_CHAR ));
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_COLON_ID , ARC_LEXER_TOKEN_COLON_CHAR ));
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_SEMICOLON_ID , ARC_LEXER_TOKEN_SEMICOLON_CHAR ));
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_COMMA_ID , ARC_LEXER_TOKEN_COMMA_CHAR ));