diff --git a/include/arc/std/parser/parserlang.h b/include/arc/std/parser/parserlang.h new file mode 100644 index 0000000..5f75287 --- /dev/null +++ b/include/arc/std/parser/parserlang.h @@ -0,0 +1,136 @@ +#ifndef ARC_STD_PARSER_PARSERLANG_H_ +#define ARC_STD_PARSER_PARSERLANG_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include + +#define ARC_PARSERLANG_TOKEN_NULL 0 +#define ARC_PARSERLANG_TOKEN_NUMBER 1 +#define ARC_PARSERLANG_TOKEN_ALPHA_LOWER_CHAR 2 +#define ARC_PARSERLANG_TOKEN_ALPHA_UPPER_CHAR 3 +#define ARC_PARSERLANG_TOKEN_WHITESPACE 4 + +#define ARC_PARSERLANG_TOKEN_NEWLINE_ID 5 +#define ARC_PARSERLANG_TOKEN_NEWLINE_CHAR '\n' +#define ARC_PARSERLANG_TOKEN_LESS_THAN_ID 6 +#define ARC_PARSERLANG_TOKEN_LESS_THAN_CHAR '<' +#define ARC_PARSERLANG_TOKEN_GREATER_THAN_ID 7 +#define ARC_PARSERLANG_TOKEN_GREATER_THAN_CHAR '>' +#define ARC_PARSERLANG_TOKEN_OR_ID 8 +#define ARC_PARSERLANG_TOKEN_OR_CHAR '|' +#define ARC_PARSERLANG_TOKEN_UNDERSCORE_ID 9 +#define ARC_PARSERLANG_TOKEN_UNDERSCORE_CHAR '_' + +#define ARC_PARSERLANG_TOKEN_ARROW_ID 10 +#define ARC_PARSERLANG_TOKEN_ARROW_CSTRING "->" + +#define ARC_PARSERLANG_LAMBDA ARC_PARSER_TAG_LAMBDA +#define ARC_PARSERLANG_LINE 11 +#define ARC_PARSERLANG_BODY 12 +#define ARC_PARSERLANG_ARGUMENTS 13 +#define ARC_PARSERLANG_ARGUMENT 14 +#define ARC_PARSERLANG_TAG_OR_CONSTANT 15 +#define ARC_PARSERLANG_CONSTANT 16 +#define ARC_PARSERLANG_CONSTANT_BODY 17 +#define ARC_PARSERLANG_CONSTANT_CHAR 18 +#define ARC_PARSERLANG_TAG 19 +#define ARC_PARSERLANG_VARIABLE 20 +#define ARC_PARSERLANG_VARIABLE_BODY 21 +#define ARC_PARSERLANG_VARIABLE_CHAR 22 +#define ARC_PARSERLANG_ALPHA_CHAR 23 + +void ARC_Language_InitLexerRulesFn(ARC_Lexer *lexer){ + //null + ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_PARSERLANG_TOKEN_NULL, 0)); + + //number + ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharOrBetween(ARC_PARSERLANG_TOKEN_NUMBER, '0', '9')); + + //alpha char + ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharOrBetween(ARC_PARSERLANG_TOKEN_ALPHA_LOWER_CHAR, 'a', 'z')); + ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharOrBetween(ARC_PARSERLANG_TOKEN_ALPHA_UPPER_CHAR, 'A', 'Z')); + + //whitespace + ARC_String *whitespaceString; + ARC_String_CreateWithStrlen(&whitespaceString, " \t"); + ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharInStringRule(ARC_LEXER_TOKEN_WHITESPACE, whitespaceString)); + ARC_String_Destroy(whitespaceString); + + //single char tokens + ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_PARSERLANG_TOKEN_NEWLINE_ID , ARC_PARSERLANG_TOKEN_NEWLINE_CHAR )); + ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_PARSERLANG_TOKEN_LESS_THAN_ID , ARC_PARSERLANG_TOKEN_LESS_THAN_CHAR )); + ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_PARSERLANG_TOKEN_GREATER_THAN_ID, ARC_PARSERLANG_TOKEN_GREATER_THAN_CHAR)); + ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_PARSERLANG_TOKEN_OR_ID , ARC_PARSERLANG_TOKEN_OR_CHAR )); + ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_PARSERLANG_TOKEN_UNDERSCORE_ID , ARC_PARSERLANG_TOKEN_UNDERSCORE_CHAR )); + + //arrow + ARC_String *arrowString; + ARC_String_CreateWithStrlen(&arrowString, ARC_PARSERLANG_TOKEN_ARROW_CSTRING); + ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchStringRule(ARC_PARSERLANG_TOKEN_ARROW_ID, arrowString)); + ARC_String_Destroy(arrowString); +} + +/* + -> NEWLINE | | NEWLINE | LAMBDA + -> WHITESPACE ARROW + + -> WHITESPACE OR WHITESPACE | + -> WHITESPACE | + -> | + + -> ALPHA_UPPER_CHAR + -> | LAMBDA + -> ALPHA_UPPER_CHAR | UNDERSCORE + + -> LESS_THAN GREATER_THAN + -> | UNDERSCORE + -> | LAMBDA + -> | NUMBER | UNDERSCORE + -> ALPHA_LOWER_CHAR | ALPHA_UPPER_CHAR +*/ + +// -> | LAMBDA +uint32_t *variableBody[] = { (uint32_t[]){ 2, ARC_PARSERLANG_VARIABLE_CHAR, ARC_PARSERLANG_VARIABLE_BODY }, (uint32_t[]){ 1, ARC_PARSERLANG_LAMBDA }}; + +// -> | NUMBER | UNDERSCORE +uint32_t *variableChar[] = { (uint32_t[]){ 1, ARC_PARSERLANG_ALPHA_CHAR }, (uint32_t[]){ 1, ARC_PARSERLANG_TOKEN_NUMBER }, (uint32_t[]){ 1, ARC_PARSERLANG_TOKEN_NUMBER }}; + +// -> ALPHA_LOWER_CHAR | ALPHA_UPPER_CHAR +uint32_t *alphaChar[] = { (uint32_t[]){ 1, ARC_PARSERLANG_TOKEN_ALPHA_LOWER_CHAR }, (uint32_t[]){ 1, ARC_PARSERLANG_TOKEN_ALPHA_UPPER_CHAR }}; + +/* +ARC_ParserLanguageTag testTags[3] = { + { + VARIABLE, //tagId + variableTokensOrTags, //tokensOrTags + 1 //tokenOrTagsSize + }, + { + VARIABLE_NAME, //tagId + variableNameTags, //tokensOrTags + 2 //tokenOrTagsSize + }, + { + CHAR_OR_NUM, //tagId + charOrNumTokens, //tokensOrTags + 2 //tokenOrTagsSize + } +}; + +ARC_Array languageArray = { + 3, //size + testTags //data +}; +*/ + + +#ifdef __cplusplus +} +#endif + +#endif //ARC_STD_PARSER_PARSERLANG_H_