archeus/include/arc/std/parser/parserlang.h
2024-11-04 19:58:23 -07:00

136 lines
5.9 KiB
C

#ifndef ARC_STD_PARSER_PARSERLANG_H_
#define ARC_STD_PARSER_PARSERLANG_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <arc/std/lexer.h>
#include <arc/std/parser.h>
#include <arc/std/string.h>
#define ARC_PARSERLANG_TOKEN_NULL 0
#define ARC_PARSERLANG_TOKEN_NUMBER 1
#define ARC_PARSERLANG_TOKEN_ALPHA_LOWER_CHAR 2
#define ARC_PARSERLANG_TOKEN_ALPHA_UPPER_CHAR 3
#define ARC_PARSERLANG_TOKEN_WHITESPACE 4
#define ARC_PARSERLANG_TOKEN_NEWLINE_ID 5
#define ARC_PARSERLANG_TOKEN_NEWLINE_CHAR '\n'
#define ARC_PARSERLANG_TOKEN_LESS_THAN_ID 6
#define ARC_PARSERLANG_TOKEN_LESS_THAN_CHAR '<'
#define ARC_PARSERLANG_TOKEN_GREATER_THAN_ID 7
#define ARC_PARSERLANG_TOKEN_GREATER_THAN_CHAR '>'
#define ARC_PARSERLANG_TOKEN_OR_ID 8
#define ARC_PARSERLANG_TOKEN_OR_CHAR '|'
#define ARC_PARSERLANG_TOKEN_UNDERSCORE_ID 9
#define ARC_PARSERLANG_TOKEN_UNDERSCORE_CHAR '_'
#define ARC_PARSERLANG_TOKEN_ARROW_ID 10
#define ARC_PARSERLANG_TOKEN_ARROW_CSTRING "->"
#define ARC_PARSERLANG_LAMBDA ARC_PARSER_TAG_LAMBDA
#define ARC_PARSERLANG_LINE 11
#define ARC_PARSERLANG_BODY 12
#define ARC_PARSERLANG_ARGUMENTS 13
#define ARC_PARSERLANG_ARGUMENT 14
#define ARC_PARSERLANG_TAG_OR_CONSTANT 15
#define ARC_PARSERLANG_CONSTANT 16
#define ARC_PARSERLANG_CONSTANT_BODY 17
#define ARC_PARSERLANG_CONSTANT_CHAR 18
#define ARC_PARSERLANG_TAG 19
#define ARC_PARSERLANG_VARIABLE 20
#define ARC_PARSERLANG_VARIABLE_BODY 21
#define ARC_PARSERLANG_VARIABLE_CHAR 22
#define ARC_PARSERLANG_ALPHA_CHAR 23
void ARC_Language_InitLexerRulesFn(ARC_Lexer *lexer){
//null
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_PARSERLANG_TOKEN_NULL, 0));
//number
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharOrBetween(ARC_PARSERLANG_TOKEN_NUMBER, '0', '9'));
//alpha char
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharOrBetween(ARC_PARSERLANG_TOKEN_ALPHA_LOWER_CHAR, 'a', 'z'));
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharOrBetween(ARC_PARSERLANG_TOKEN_ALPHA_UPPER_CHAR, 'A', 'Z'));
//whitespace
ARC_String *whitespaceString;
ARC_String_CreateWithStrlen(&whitespaceString, " \t");
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharInStringRule(ARC_LEXER_TOKEN_WHITESPACE, whitespaceString));
ARC_String_Destroy(whitespaceString);
//single char tokens
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_PARSERLANG_TOKEN_NEWLINE_ID , ARC_PARSERLANG_TOKEN_NEWLINE_CHAR ));
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_PARSERLANG_TOKEN_LESS_THAN_ID , ARC_PARSERLANG_TOKEN_LESS_THAN_CHAR ));
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_PARSERLANG_TOKEN_GREATER_THAN_ID, ARC_PARSERLANG_TOKEN_GREATER_THAN_CHAR));
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_PARSERLANG_TOKEN_OR_ID , ARC_PARSERLANG_TOKEN_OR_CHAR ));
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_PARSERLANG_TOKEN_UNDERSCORE_ID , ARC_PARSERLANG_TOKEN_UNDERSCORE_CHAR ));
//arrow
ARC_String *arrowString;
ARC_String_CreateWithStrlen(&arrowString, ARC_PARSERLANG_TOKEN_ARROW_CSTRING);
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchStringRule(ARC_PARSERLANG_TOKEN_ARROW_ID, arrowString));
ARC_String_Destroy(arrowString);
}
/*
<line> -> <body> NEWLINE <line> | <body> | NEWLINE <line> | LAMBDA
<body> -> <tag> WHITESPACE ARROW <arguments>
<arguments> -> <argument> WHITESPACE OR WHITESPACE <arguments> | <tagOrConstant>
<argument> -> <tagOrConstant> WHITESPACE <argument> | <tagOrConstant>
<tagOrConstant> -> <parserLangageTag> | <constant>
<constant> -> ALPHA_UPPER_CHAR <constantBody>
<constantBody> -> <constantChar> <constantBody> | LAMBDA
<constantChar> -> ALPHA_UPPER_CHAR | UNDERSCORE
<tag> -> LESS_THAN <variable> GREATER_THAN
<variable> -> <noCaseAlphaChar> <variableBody> | UNDERSCORE <variableBody>
<variableBody> -> <variableChar> <variableBody> | LAMBDA
<variableChar> -> <alphaChar> | NUMBER | UNDERSCORE
<alphaChar> -> ALPHA_LOWER_CHAR | ALPHA_UPPER_CHAR
*/
//<variableBody> -> <variableChar> <variableBody> | LAMBDA
uint32_t *variableBody[] = { (uint32_t[]){ 2, ARC_PARSERLANG_VARIABLE_CHAR, ARC_PARSERLANG_VARIABLE_BODY }, (uint32_t[]){ 1, ARC_PARSERLANG_LAMBDA }};
//<variableChar> -> <alphaChar> | NUMBER | UNDERSCORE
uint32_t *variableChar[] = { (uint32_t[]){ 1, ARC_PARSERLANG_ALPHA_CHAR }, (uint32_t[]){ 1, ARC_PARSERLANG_TOKEN_NUMBER }, (uint32_t[]){ 1, ARC_PARSERLANG_TOKEN_NUMBER }};
//<alphaChar> -> ALPHA_LOWER_CHAR | ALPHA_UPPER_CHAR
uint32_t *alphaChar[] = { (uint32_t[]){ 1, ARC_PARSERLANG_TOKEN_ALPHA_LOWER_CHAR }, (uint32_t[]){ 1, ARC_PARSERLANG_TOKEN_ALPHA_UPPER_CHAR }};
/*
ARC_ParserLanguageTag testTags[3] = {
{
VARIABLE, //tagId
variableTokensOrTags, //tokensOrTags
1 //tokenOrTagsSize
},
{
VARIABLE_NAME, //tagId
variableNameTags, //tokensOrTags
2 //tokenOrTagsSize
},
{
CHAR_OR_NUM, //tagId
charOrNumTokens, //tokensOrTags
2 //tokenOrTagsSize
}
};
ARC_Array languageArray = {
3, //size
testTags //data
};
*/
#ifdef __cplusplus
}
#endif
#endif //ARC_STD_PARSER_PARSERLANG_H_