merge
This commit is contained in:
commit
2d70208978
5 changed files with 154 additions and 94 deletions
|
|
@ -176,11 +176,6 @@ void ARC_Lexer_LexString(ARC_Lexer *lexer, ARC_String **data){
|
|||
//check if the token rule is found
|
||||
ARC_LexerTokenRule *tokenRule = ARC_Vector_Get(lexer->tokenRules, index);
|
||||
|
||||
//set the last token length if the last token had a length
|
||||
if(tokenLength > 0){
|
||||
lastTokenLength = tokenLength;
|
||||
}
|
||||
|
||||
//tokenData should only exist if tokenLength is ARC_True as stated in the header
|
||||
ARC_String *tokenData;
|
||||
tokenLength = tokenRule->automataFn(&tokenData, *data, tokenRule->automataData);
|
||||
|
|
@ -201,6 +196,9 @@ void ARC_Lexer_LexString(ARC_Lexer *lexer, ARC_String **data){
|
|||
token = (ARC_LexerToken *)malloc(sizeof(ARC_LexerToken));
|
||||
token->rule = tokenRule->id;
|
||||
token->data = tokenData;
|
||||
|
||||
//update the last found tokenLength to the max length
|
||||
lastTokenLength = tokenLength;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -415,6 +413,7 @@ ARC_LexerTokenRule ARC_LexerTokenRule_CreateAndReturnMatchCharOrBetween(uint32_t
|
|||
//return the created tokenRule
|
||||
return tokenRule;
|
||||
}
|
||||
|
||||
//private function to free automataData stored as an ARC_String
|
||||
void ARC_LexerTokenRule_DestroyStringAutomataDataFn(void *automataData){
|
||||
ARC_String_Destroy((ARC_String *)automataData);
|
||||
|
|
|
|||
99
src/std/parser/parserlang.c
Normal file
99
src/std/parser/parserlang.c
Normal file
|
|
@ -0,0 +1,99 @@
|
|||
#include "arc/std/parser/parserlang.h"
|
||||
#include "arc/std/lexer.h"
|
||||
#include "arc/std/parser.h"
|
||||
#include "arc/std/string.h"
|
||||
|
||||
void ARC_ParserLang_InitLexerRulesFn(ARC_Lexer *lexer){
|
||||
//null
|
||||
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_PARSERLANG_TOKEN_NULL, 0));
|
||||
|
||||
//number
|
||||
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharOrBetween(ARC_PARSERLANG_TOKEN_NUMBER, '0', '9'));
|
||||
|
||||
//alpha char
|
||||
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharOrBetween(ARC_PARSERLANG_TOKEN_ALPHA_LOWER_CHAR, 'a', 'z'));
|
||||
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharOrBetween(ARC_PARSERLANG_TOKEN_ALPHA_UPPER_CHAR, 'A', 'Z'));
|
||||
|
||||
//whitespace
|
||||
ARC_String *whitespaceString;
|
||||
ARC_String_CreateWithStrlen(&whitespaceString, " \t");
|
||||
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharInStringRule(ARC_LEXER_TOKEN_WHITESPACE, whitespaceString));
|
||||
ARC_String_Destroy(whitespaceString);
|
||||
|
||||
//single char tokens
|
||||
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_PARSERLANG_TOKEN_NEWLINE_ID , ARC_PARSERLANG_TOKEN_NEWLINE_CHAR ));
|
||||
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_PARSERLANG_TOKEN_LESS_THAN_ID , ARC_PARSERLANG_TOKEN_LESS_THAN_CHAR ));
|
||||
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_PARSERLANG_TOKEN_GREATER_THAN_ID, ARC_PARSERLANG_TOKEN_GREATER_THAN_CHAR));
|
||||
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_PARSERLANG_TOKEN_OR_ID , ARC_PARSERLANG_TOKEN_OR_CHAR ));
|
||||
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_PARSERLANG_TOKEN_UNDERSCORE_ID , ARC_PARSERLANG_TOKEN_UNDERSCORE_CHAR ));
|
||||
|
||||
//arrow
|
||||
ARC_String *arrowString;
|
||||
ARC_String_CreateWithStrlen(&arrowString, ARC_PARSERLANG_TOKEN_ARROW_CSTRING);
|
||||
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchStringRule(ARC_PARSERLANG_TOKEN_ARROW_ID, arrowString));
|
||||
ARC_String_Destroy(arrowString);
|
||||
}
|
||||
|
||||
void ARC_Parser_CreateAsParserLang(ARC_Parser **parser){
|
||||
//<line> -> <body> NEWLINE <line> | <body> | NEWLINE <line> | LAMBDA
|
||||
uint32_t *line[] = { (uint32_t[]){ 3, ARC_PARSERLANG_BODY, ARC_PARSERLANG_TOKEN_NEWLINE_ID, ARC_PARSERLANG_TOKEN_ARROW_ID }, (uint32_t[]){ 1, ARC_PARSERLANG_BODY }, (uint32_t[]){ 2, ARC_PARSERLANG_TOKEN_NEWLINE_ID, ARC_PARSERLANG_LINE }, (uint32_t[]){ 1, ARC_PARSERLANG_LAMBDA } };
|
||||
|
||||
//<body> -> <tag> WHITESPACE ARROW WHITESPACE <arguments>
|
||||
uint32_t *body[] = { (uint32_t[]){ 5, ARC_PARSERLANG_TAG, ARC_PARSERLANG_TOKEN_WHITESPACE, ARC_PARSERLANG_TOKEN_ARROW_ID, ARC_PARSERLANG_TOKEN_WHITESPACE, ARC_PARSERLANG_ARGUMENTS } };
|
||||
|
||||
//<arguments> -> <argument> WHITESPACE OR WHITESPACE <arguments> | <tagOrConstant>
|
||||
uint32_t *arguments[] = { (uint32_t[]){ 5, ARC_PARSERLANG_ARGUMENT, ARC_PARSERLANG_TOKEN_WHITESPACE, ARC_PARSERLANG_TOKEN_OR_ID, ARC_PARSERLANG_TOKEN_WHITESPACE, ARC_PARSERLANG_ARGUMENTS }, (uint32_t[]){ 1, ARC_PARSERLANG_TAG_OR_CONSTANT } };
|
||||
|
||||
//<argument> -> <tagOrConstant> WHITESPACE <argument> | <tagOrConstant>
|
||||
uint32_t *argument[] = { (uint32_t[]){ 3, ARC_PARSERLANG_TAG_OR_CONSTANT, ARC_PARSERLANG_TOKEN_WHITESPACE, ARC_PARSERLANG_ARGUMENT }, (uint32_t[]){ 1, ARC_PARSERLANG_TAG_OR_CONSTANT } };
|
||||
|
||||
//<tagOrConstant> -> <parserLangageTag> | <constant>
|
||||
uint32_t *tagOrConstant[] = { (uint32_t[]){ 1, ARC_PARSERLANG_TAG }, (uint32_t[]){ 1, ARC_PARSERLANG_CONSTANT } };
|
||||
|
||||
//<constant> -> ALPHA_UPPER_CHAR <constantBody>
|
||||
uint32_t *constant[] = { (uint32_t[]){ 2, ARC_PARSERLANG_TOKEN_ALPHA_UPPER_CHAR, ARC_PARSERLANG_CONSTANT_BODY } };
|
||||
|
||||
//<constantBody> -> <constantChar> <constantBody> | LAMBDA
|
||||
uint32_t *constantBody[] = { (uint32_t[]){ 2, ARC_PARSERLANG_CONSTANT_CHAR, ARC_PARSERLANG_CONSTANT_BODY }, (uint32_t[]){ 1, ARC_PARSERLANG_LAMBDA } };
|
||||
|
||||
//<constantChar> -> ALPHA_UPPER_CHAR | UNDERSCORE
|
||||
uint32_t *constantChar[] = { (uint32_t[]){ 2, ARC_PARSERLANG_TOKEN_ALPHA_UPPER_CHAR, ARC_PARSERLANG_TOKEN_UNDERSCORE_ID } };
|
||||
|
||||
//<tag> -> LESS_THAN <variable> GREATER_THAN
|
||||
uint32_t *tag[] = { (uint32_t[]){ 3, ARC_PARSERLANG_TOKEN_LESS_THAN_ID, ARC_PARSERLANG_VARIABLE, ARC_PARSERLANG_TOKEN_GREATER_THAN_ID } };
|
||||
|
||||
//<variable> -> <alphaChar> <variableBody> | UNDERSCORE <variableBody>
|
||||
uint32_t *variable[] = { (uint32_t[]){ 2, ARC_PARSERLANG_ALPHA_CHAR, ARC_PARSERLANG_VARIABLE_BODY }, (uint32_t[]){ 2, ARC_PARSERLANG_TOKEN_UNDERSCORE_ID, ARC_PARSERLANG_VARIABLE_BODY } };
|
||||
|
||||
//<variableBody> -> <variableChar> <variableBody> | LAMBDA
|
||||
uint32_t *variableBody[] = { (uint32_t[]){ 2, ARC_PARSERLANG_VARIABLE_CHAR, ARC_PARSERLANG_VARIABLE_BODY }, (uint32_t[]){ 1, ARC_PARSERLANG_LAMBDA } };
|
||||
|
||||
//<variableChar> -> <alphaChar> | NUMBER | UNDERSCORE
|
||||
uint32_t *variableChar[] = { (uint32_t[]){ 1, ARC_PARSERLANG_ALPHA_CHAR }, (uint32_t[]){ 1, ARC_PARSERLANG_TOKEN_NUMBER }, (uint32_t[]){ 1, ARC_PARSERLANG_TOKEN_UNDERSCORE_ID } };
|
||||
|
||||
//<alphaChar> -> ALPHA_LOWER_CHAR | ALPHA_UPPER_CHAR
|
||||
uint32_t *alphaChar[] = { (uint32_t[]){ 1, ARC_PARSERLANG_TOKEN_ALPHA_LOWER_CHAR }, (uint32_t[]){ 1, ARC_PARSERLANG_TOKEN_ALPHA_UPPER_CHAR }};
|
||||
|
||||
ARC_ParserLanguageTag parserLangTags[13] = {
|
||||
{ ARC_PARSERLANG_LINE , line , 4 },
|
||||
{ ARC_PARSERLANG_BODY , body , 1 },
|
||||
{ ARC_PARSERLANG_ARGUMENTS , arguments , 2 },
|
||||
{ ARC_PARSERLANG_ARGUMENT , argument , 2 },
|
||||
{ ARC_PARSERLANG_TAG_OR_CONSTANT, tagOrConstant, 2 },
|
||||
{ ARC_PARSERLANG_CONSTANT , constant , 1 },
|
||||
{ ARC_PARSERLANG_CONSTANT_BODY , constantBody , 2 },
|
||||
{ ARC_PARSERLANG_CONSTANT_CHAR , constantChar , 2 },
|
||||
{ ARC_PARSERLANG_TAG , tag , 1 },
|
||||
{ ARC_PARSERLANG_VARIABLE , variable , 2 },
|
||||
{ ARC_PARSERLANG_VARIABLE_BODY , variableBody , 2 },
|
||||
{ ARC_PARSERLANG_VARIABLE_CHAR , variableChar , 3 },
|
||||
{ ARC_PARSERLANG_ALPHA_CHAR , alphaChar , 2 }
|
||||
};
|
||||
|
||||
ARC_Array parserLanguageArray = {
|
||||
13, //size
|
||||
parserLangTags //data
|
||||
};
|
||||
|
||||
ARC_Parser_Create(parser, &parserLanguageArray, ARC_ParserLang_InitLexerRulesFn);
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue