fixed a lot of the lexer and parser (the segfault, and now can do char ranges)
This commit is contained in:
parent
050f7a8452
commit
b10f9b9123
4 changed files with 115 additions and 47 deletions
|
|
@ -207,55 +207,57 @@ ARC_LexerTokenRule ARC_LexerTokenRule_CreateAndReturnMatchCharInStringRule(uint3
|
|||
/**
|
||||
* @brief basic tokens
|
||||
*/
|
||||
#define ARC_LEXER_TOKEN_NULL 0
|
||||
#define ARC_LEXER_TOKEN_EOF 1
|
||||
#define ARC_LEXER_TOKEN_NUMBER 2
|
||||
#define ARC_LEXER_TOKEN_ALPHACHAR 3
|
||||
#define ARC_LEXER_TOKEN_WHITESPACE 4
|
||||
#define ARC_LEXER_TOKEN_NULL 0
|
||||
#define ARC_LEXER_TOKEN_NUMBER 1
|
||||
#define ARC_LEXER_TOKEN_ALPHALOWERCHAR 2
|
||||
#define ARC_LEXER_TOKEN_ALPHAUPPERCHAR 3
|
||||
#define ARC_LEXER_TOKEN_WHITESPACE 4
|
||||
|
||||
/**
|
||||
* @brief basic token type ids, chars, and tags
|
||||
*/
|
||||
#define ARC_LEXER_TOKEN_COLON_ID 1
|
||||
#define ARC_LEXER_TOKEN_NEWLINE_ID 5
|
||||
#define ARC_LEXER_TOKEN_NEWLINE_CHAR '\n'
|
||||
#define ARC_LEXER_TOKEN_COLON_ID 6
|
||||
#define ARC_LEXER_TOKEN_COLON_CHAR ':'
|
||||
#define ARC_LEXER_TOKEN_COLON_TAG "COLON"
|
||||
#define ARC_LEXER_TOKEN_SEMICOLON_ID 2
|
||||
#define ARC_LEXER_TOKEN_SEMICOLON_ID 7
|
||||
#define ARC_LEXER_TOKEN_SEMICOLON_CHAR ';'
|
||||
#define ARC_LEXER_TOKEN_SEMICOLON_TAG "SEMICOLON"
|
||||
#define ARC_LEXER_TOKEN_COMMA_ID 3
|
||||
#define ARC_LEXER_TOKEN_COMMA_ID 8
|
||||
#define ARC_LEXER_TOKEN_COMMA_CHAR ','
|
||||
#define ARC_LEXER_TOKEN_COMMA_TAG "COMMA"
|
||||
#define ARC_LEXER_TOKEN_PERIOD_ID 4
|
||||
#define ARC_LEXER_TOKEN_PERIOD_ID 9
|
||||
#define ARC_LEXER_TOKEN_PERIOD_CHAR '.'
|
||||
#define ARC_LEXER_TOKEN_PERIOD_TAG "PERIOD"
|
||||
#define ARC_LEXER_TOKEN_FORWARD_SLASH_ID 5
|
||||
#define ARC_LEXER_TOKEN_FORWARD_SLASH_ID 10
|
||||
#define ARC_LEXER_TOKEN_FORWARD_SLASH_CHAR '/'
|
||||
#define ARC_LEXER_TOKEN_FORWARD_SLASH_TAG "FORWARD_SLASH"
|
||||
#define ARC_LEXER_TOKEN_BACK_SLASH_ID 6
|
||||
#define ARC_LEXER_TOKEN_BACK_SLASH_ID 11
|
||||
#define ARC_LEXER_TOKEN_BACK_SLASH_CHAR '\\'
|
||||
#define ARC_LEXER_TOKEN_BACK_SLASH_TAG "BACK_SLASH"
|
||||
#define ARC_LEXER_TOKEN_LEFT_PARENTHESIS_ID 7
|
||||
#define ARC_LEXER_TOKEN_LEFT_PARENTHESIS_ID 12
|
||||
#define ARC_LEXER_TOKEN_LEFT_PARENTHESIS_CHAR '('
|
||||
#define ARC_LEXER_TOKEN_LEFT_PARENTHESIS_TAG "LEFT_PARENTHESIS"
|
||||
#define ARC_LEXER_TOKEN_RIGHT_PARENTHESIS_ID 8
|
||||
#define ARC_LEXER_TOKEN_RIGHT_PARENTHESIS_ID 13
|
||||
#define ARC_LEXER_TOKEN_RIGHT_PARENTHESIS_CHAR ')'
|
||||
#define ARC_LEXER_TOKEN_RIGHT_PARENTHESIS_TAG "RIGHT_PARENTHESIS"
|
||||
#define ARC_LEXER_TOKEN_LEFT_CURLY_BRACE_ID 9
|
||||
#define ARC_LEXER_TOKEN_LEFT_CURLY_BRACE_ID 14
|
||||
#define ARC_LEXER_TOKEN_LEFT_CURLY_BRACE_CHAR '{'
|
||||
#define ARC_LEXER_TOKEN_LEFT_CURLY_BRACE_TAG "LEFT_CURLY_BRACE"
|
||||
#define ARC_LEXER_TOKEN_RIGHT_CURLY_BRACE_ID 10
|
||||
#define ARC_LEXER_TOKEN_RIGHT_CURLY_BRACE_ID 15
|
||||
#define ARC_LEXER_TOKEN_RIGHT_CURLY_BRACE_CHAR '}'
|
||||
#define ARC_LEXER_TOKEN_RIGHT_CURLY_BRACE_TAG "RIGHT_CURLY_BRACE"
|
||||
#define ARC_LEXER_TOKEN_BANG_ID 11
|
||||
#define ARC_LEXER_TOKEN_BANG_ID 16
|
||||
#define ARC_LEXER_TOKEN_BANG_CHAR '!'
|
||||
#define ARC_LEXER_TOKEN_BANG_TAG "BANG"
|
||||
#define ARC_LEXER_TOKEN_AT_ID 12
|
||||
#define ARC_LEXER_TOKEN_AT_ID 17
|
||||
#define ARC_LEXER_TOKEN_AT_CHAR '!'
|
||||
#define ARC_LEXER_TOKEN_AT_TAG "AT"
|
||||
#define ARC_LEXER_TOKEN_HASH_ID 13
|
||||
#define ARC_LEXER_TOKEN_HASH_ID 18
|
||||
#define ARC_LEXER_TOKEN_HASH_CHAR '#'
|
||||
#define ARC_LEXER_TOKEN_HASH_TAG "HASH"
|
||||
#define ARC_LEXER_TOKEN_PERCENT_ID 14
|
||||
#define ARC_LEXER_TOKEN_PERCENT_ID 19
|
||||
#define ARC_LEXER_TOKEN_PERCENT_CHAR '%'
|
||||
#define ARC_LEXER_TOKEN_PERCENT_TAG "PERCENT"
|
||||
|
||||
|
|
|
|||
|
|
@ -304,6 +304,22 @@ uint32_t ARC_Lexer_AutomataMatchCharFn(ARC_String **tokenData, ARC_String *strin
|
|||
return 0;
|
||||
}
|
||||
|
||||
uint32_t ARC_Lexer_AutomataMatchCharOrBetweenFn(ARC_String **tokenData, ARC_String *string, void *automataData){
|
||||
//if there is a match the token will be the same as automataData, so we don't need to store it again
|
||||
*tokenData = NULL;
|
||||
|
||||
//check to see if there is a match with automataData as a range of chars
|
||||
char *automataDataChars = (char *)automataData;
|
||||
if(string->data[0] >= automataDataChars[0] && string->data[0] <= ((char *)automataData)[1]){
|
||||
//return the token as token data and the token was found of length 1
|
||||
ARC_String_Create(tokenData, string->data, 1);
|
||||
return 1;
|
||||
}
|
||||
|
||||
//no match was found
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t ARC_Lexer_AutomataMatchStringFn(ARC_String **tokenData, ARC_String *string, void *automataData){
|
||||
//if there is a match the token will be the same as automataData, so we don't need to store it again
|
||||
*tokenData = NULL;
|
||||
|
|
@ -363,6 +379,28 @@ ARC_LexerTokenRule ARC_LexerTokenRule_CreateAndReturnMatchCharRule(uint32_t id,
|
|||
return tokenRule;
|
||||
}
|
||||
|
||||
ARC_LexerTokenRule ARC_LexerTokenRule_CreateAndReturnMatchCharOrBetween(uint32_t id, char start, char end){
|
||||
//create the token rule
|
||||
ARC_LexerTokenRule tokenRule;
|
||||
|
||||
//set the id
|
||||
tokenRule.id = id;
|
||||
|
||||
//create and store the automataData (which is just two chars (the minumum and manximum))
|
||||
char *automataData = (char *)malloc(sizeof(char) * 2);
|
||||
automataData[0] = start;
|
||||
automataData[1] = end;
|
||||
tokenRule.automataData = (void *)automataData;
|
||||
|
||||
//we can use the ARC_Lexer_AutomataMatchCharInStringFn for this
|
||||
tokenRule.automataFn = ARC_Lexer_AutomataMatchCharOrBetweenFn;
|
||||
|
||||
//add the private destroy function (we can use the char as it destroys a char pointer of any size)
|
||||
tokenRule.destroyAutomataDataFn = ARC_LexerTokenRule_DestroyCharAutomataDataFn;
|
||||
|
||||
//return the created tokenRule
|
||||
return tokenRule;
|
||||
}
|
||||
//private function to free automataData stored as an ARC_String
|
||||
void ARC_LexerTokenRule_DestroyStringAutomataDataFn(void *automataData){
|
||||
ARC_String_Destroy((ARC_String *)automataData);
|
||||
|
|
@ -413,7 +451,27 @@ ARC_LexerTokenRule ARC_LexerTokenRule_CreateAndReturnMatchCharInStringRule(uint3
|
|||
}
|
||||
|
||||
void ARC_Lexer_InitBasicTokenRules(ARC_Lexer *lexer){
|
||||
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_NULL , 0 ));
|
||||
//null
|
||||
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_NULL, 0));
|
||||
|
||||
//number
|
||||
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharOrBetween(ARC_LEXER_TOKEN_NUMBER, '0', '9'));
|
||||
|
||||
//alpha char
|
||||
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharOrBetween(ARC_LEXER_TOKEN_ALPHALOWERCHAR, 'a', 'z'));
|
||||
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharOrBetween(ARC_LEXER_TOKEN_ALPHAUPPERCHAR, 'A', 'Z'));
|
||||
|
||||
//whitespace
|
||||
//TODO: fix this
|
||||
ARC_String *whitespaceString;
|
||||
ARC_String_CreateWithStrlen(&whitespaceString, " \t");
|
||||
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharInStringRule(ARC_LEXER_TOKEN_WHITESPACE, whitespaceString));
|
||||
ARC_String_Destroy(whitespaceString);
|
||||
//TEMP FIX:
|
||||
//ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_WHITESPACE, ' '));
|
||||
|
||||
//single char tokens
|
||||
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_NEWLINE_ID , ARC_LEXER_TOKEN_NEWLINE_CHAR ));
|
||||
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_COLON_ID , ARC_LEXER_TOKEN_COLON_CHAR ));
|
||||
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_SEMICOLON_ID , ARC_LEXER_TOKEN_SEMICOLON_CHAR ));
|
||||
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_COMMA_ID , ARC_LEXER_TOKEN_COMMA_CHAR ));
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ void ARC_Parser_Create(ARC_Parser **parser, ARC_Array *language, ARC_Parser_Init
|
|||
(*parser)->language.size = language->size;
|
||||
(*parser)->language.data = malloc(sizeof(ARC_ParserLanguageTag) * language->size);
|
||||
|
||||
memcpy((*parser)->language.data, language->data, language->size);
|
||||
memcpy((*parser)->language.data, language->data, sizeof(ARC_ParserLanguageTag) * language->size);
|
||||
}
|
||||
|
||||
//create the lexer
|
||||
|
|
@ -103,6 +103,12 @@ void ARC_Parser_ParseTag(ARC_Parser *parser, uint32_t *lexerIndex, uint32_t tagI
|
|||
}
|
||||
|
||||
void ARC_Parser_Parse(ARC_Parser *parser, ARC_String **data){
|
||||
//make sure the parser has a language
|
||||
if(parser->language.size == 0){
|
||||
ARC_DEBUG_LOG_ERROR("ARC_Parser_Parse(parser, data), no parser language defined");
|
||||
return;
|
||||
}
|
||||
|
||||
//lex the subdata
|
||||
ARC_Lexer_LexString(parser->lexer, data);
|
||||
if(arc_errno){
|
||||
|
|
@ -110,11 +116,12 @@ void ARC_Parser_Parse(ARC_Parser *parser, ARC_String **data){
|
|||
return;
|
||||
}
|
||||
|
||||
//set the lexer index to start and get the first tag
|
||||
uint32_t lexerIndex = 0;
|
||||
ARC_ParserLanguageTag startTag = ((ARC_ParserLanguageTag *)parser->language.data)[0];
|
||||
ARC_ParserLanguageTag *startTag = parser->language.data;
|
||||
|
||||
//recursivly parse from the inital start tag
|
||||
ARC_Parser_ParseTag(parser, &lexerIndex, startTag.tagId);
|
||||
ARC_Parser_ParseTag(parser, &lexerIndex, startTag->tagId);
|
||||
if(arc_errno){
|
||||
ARC_DEBUG_LOG_ERROR("ARC_Parser_Parse(parser, data), could not parse the given data");
|
||||
return;
|
||||
|
|
|
|||
|
|
@ -2,12 +2,13 @@
|
|||
#include "arc/std/errno.h"
|
||||
#include "arc/std/parser.h"
|
||||
|
||||
#define LAMBDA 0
|
||||
#define CHAR 1
|
||||
#define NUM 2
|
||||
#define CHAR_OR_NUM 3
|
||||
#define VARIABLE_NAME 4
|
||||
#define VARIABLE 5
|
||||
//TODO: fix lambda
|
||||
#define LAMBDA 20
|
||||
#define CHAR ARC_LEXER_TOKEN_ALPHALOWERCHAR
|
||||
#define NUM ARC_LEXER_TOKEN_NUMBER
|
||||
#define CHAR_OR_NUM 23
|
||||
#define VARIABLE_NAME 24
|
||||
#define VARIABLE 25
|
||||
|
||||
void TEST_Parser_InitLexerRulesFn(ARC_Lexer *lexer){
|
||||
ARC_Lexer_InitBasicTokenRules(lexer);
|
||||
|
|
@ -21,9 +22,9 @@ ARC_TEST(Parser_Init){
|
|||
uint32_t *variableTokensOrTags[] = { (uint32_t[]){ 2, CHAR, VARIABLE_NAME } };
|
||||
ARC_ParserLanguageTag testTags[3] = {
|
||||
{
|
||||
CHAR_OR_NUM, //tagId
|
||||
charOrNumTokens, //tokensOrTags
|
||||
2 //tokenOrTagsSize
|
||||
VARIABLE, //tagId
|
||||
variableTokensOrTags, //tokensOrTags
|
||||
1 //tokenOrTagsSize
|
||||
},
|
||||
{
|
||||
VARIABLE_NAME, //tagId
|
||||
|
|
@ -31,9 +32,9 @@ ARC_TEST(Parser_Init){
|
|||
2 //tokenOrTagsSize
|
||||
},
|
||||
{
|
||||
VARIABLE, //tagId
|
||||
variableTokensOrTags, //tokensOrTags
|
||||
1 //tokenOrTagsSize
|
||||
CHAR_OR_NUM, //tagId
|
||||
charOrNumTokens, //tokensOrTags
|
||||
2 //tokenOrTagsSize
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -57,9 +58,9 @@ ARC_TEST(Parser_Basic_Parse){
|
|||
uint32_t *variableTokensOrTags[] = { (uint32_t[]){ 2, CHAR, VARIABLE_NAME } };
|
||||
ARC_ParserLanguageTag testTags[3] = {
|
||||
{
|
||||
CHAR_OR_NUM, //tagId
|
||||
charOrNumTokens, //tokensOrTags
|
||||
2 //tokenOrTagsSize
|
||||
VARIABLE, //tagId
|
||||
variableTokensOrTags, //tokensOrTags
|
||||
1 //tokenOrTagsSize
|
||||
},
|
||||
{
|
||||
VARIABLE_NAME, //tagId
|
||||
|
|
@ -67,9 +68,9 @@ ARC_TEST(Parser_Basic_Parse){
|
|||
2 //tokenOrTagsSize
|
||||
},
|
||||
{
|
||||
VARIABLE, //tagId
|
||||
variableTokensOrTags, //tokensOrTags
|
||||
1 //tokenOrTagsSize
|
||||
CHAR_OR_NUM, //tagId
|
||||
charOrNumTokens, //tokensOrTags
|
||||
2 //tokenOrTagsSize
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -97,9 +98,9 @@ ARC_TEST(Parser_Basic_ParseError){
|
|||
uint32_t *variableTokensOrTags[] = { (uint32_t[]){ 2, CHAR, VARIABLE_NAME } };
|
||||
ARC_ParserLanguageTag testTags[3] = {
|
||||
{
|
||||
CHAR_OR_NUM, //tagId
|
||||
charOrNumTokens, //tokensOrTags
|
||||
2 //tokenOrTagsSize
|
||||
VARIABLE, //tagId
|
||||
variableTokensOrTags, //tokensOrTags
|
||||
1 //tokenOrTagsSize
|
||||
},
|
||||
{
|
||||
VARIABLE_NAME, //tagId
|
||||
|
|
@ -107,9 +108,9 @@ ARC_TEST(Parser_Basic_ParseError){
|
|||
2 //tokenOrTagsSize
|
||||
},
|
||||
{
|
||||
VARIABLE, //tagId
|
||||
variableTokensOrTags, //tokensOrTags
|
||||
1 //tokenOrTagsSize
|
||||
CHAR_OR_NUM, //tagId
|
||||
charOrNumTokens, //tokensOrTags
|
||||
2 //tokenOrTagsSize
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue