2024-11-06 06:45:04 -07:00
|
|
|
#include "arc/std/parser/parserlang.h"
|
|
|
|
|
#include "arc/std/lexer.h"
|
|
|
|
|
#include "arc/std/parser.h"
|
|
|
|
|
#include "arc/std/string.h"
|
2024-11-23 19:27:30 -07:00
|
|
|
#include "arc/std/vector.h"
|
2024-11-20 10:27:17 -07:00
|
|
|
#include <stddef.h>
|
2024-11-23 19:27:30 -07:00
|
|
|
#include <stdlib.h>
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
<line> -> <body> NEWLINE <line> | <body> | NEWLINE <line> | LAMBDA
|
|
|
|
|
<body> -> <tag> WHITESPACE ARROW WHITESPACE <arguments>
|
|
|
|
|
|
|
|
|
|
<arguments> -> <argument> WHITESPACE OR WHITESPACE <arguments> | <tagOrConstant>
|
|
|
|
|
<argument> -> <tagOrConstant> WHITESPACE <argument> | <tagOrConstant>
|
|
|
|
|
<tagOrConstant> -> <tag> | <constant>
|
|
|
|
|
|
|
|
|
|
<constant> -> ALPHA_UPPER_CHAR <constantBody>
|
|
|
|
|
<constantBody> -> <constantChar> <constantBody> | LAMBDA
|
|
|
|
|
<constantChar> -> ALPHA_UPPER_CHAR | UNDERSCORE
|
|
|
|
|
|
|
|
|
|
<tag> -> LESS_THAN <variable> GREATER_THAN
|
|
|
|
|
<variable> -> <alphaChar> <variableBody> | UNDERSCORE <variableBody>
|
|
|
|
|
<variableBody> -> <variableChar> <variableBody> | LAMBDA
|
|
|
|
|
<variableChar> -> <alphaChar> | NUMBER | UNDERSCORE
|
|
|
|
|
<alphaChar> -> ALPHA_LOWER_CHAR | ALPHA_UPPER_CHAR
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* @brief
|
|
|
|
|
*/
|
|
|
|
|
typedef struct ARC_ParserLangLineData {
|
|
|
|
|
ARC_Vector *body;
|
|
|
|
|
} ARC_ParserLangLineData;
|
|
|
|
|
|
|
|
|
|
typedef struct ARC_ParserLangBodyData {
|
|
|
|
|
ARC_String *tagName;
|
|
|
|
|
ARC_Vector *arguments;
|
|
|
|
|
} ARC_ParserLangBodyData;
|
|
|
|
|
|
|
|
|
|
typedef struct ARC_ParserLangArgumentData {
|
|
|
|
|
ARC_Vector *tagsOrConstants;
|
|
|
|
|
} ARC_ParserLangArgumentData;
|
|
|
|
|
|
|
|
|
|
typedef struct ARC_ParserLangVectorStringData {
|
|
|
|
|
ARC_String *string;
|
|
|
|
|
ARC_Vector *vector;
|
|
|
|
|
} ARC_ParserLangVectorStringData;
|
|
|
|
|
|
|
|
|
|
//private function to initalize the lexer rules for the language
|
2024-11-06 06:45:04 -07:00
|
|
|
void ARC_ParserLang_InitLexerRulesFn(ARC_Lexer *lexer){
|
|
|
|
|
//null
|
|
|
|
|
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_PARSERLANG_TOKEN_NULL, 0));
|
|
|
|
|
|
|
|
|
|
//number
|
|
|
|
|
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharOrBetween(ARC_PARSERLANG_TOKEN_NUMBER, '0', '9'));
|
|
|
|
|
|
|
|
|
|
//alpha char
|
|
|
|
|
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharOrBetween(ARC_PARSERLANG_TOKEN_ALPHA_LOWER_CHAR, 'a', 'z'));
|
|
|
|
|
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharOrBetween(ARC_PARSERLANG_TOKEN_ALPHA_UPPER_CHAR, 'A', 'Z'));
|
|
|
|
|
|
|
|
|
|
//whitespace
|
|
|
|
|
ARC_String *whitespaceString;
|
|
|
|
|
ARC_String_CreateWithStrlen(&whitespaceString, " \t");
|
|
|
|
|
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharInStringRule(ARC_LEXER_TOKEN_WHITESPACE, whitespaceString));
|
|
|
|
|
ARC_String_Destroy(whitespaceString);
|
|
|
|
|
|
|
|
|
|
//single char tokens
|
|
|
|
|
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_PARSERLANG_TOKEN_NEWLINE_ID , ARC_PARSERLANG_TOKEN_NEWLINE_CHAR ));
|
|
|
|
|
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_PARSERLANG_TOKEN_LESS_THAN_ID , ARC_PARSERLANG_TOKEN_LESS_THAN_CHAR ));
|
|
|
|
|
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_PARSERLANG_TOKEN_GREATER_THAN_ID, ARC_PARSERLANG_TOKEN_GREATER_THAN_CHAR));
|
|
|
|
|
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_PARSERLANG_TOKEN_OR_ID , ARC_PARSERLANG_TOKEN_OR_CHAR ));
|
|
|
|
|
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_PARSERLANG_TOKEN_UNDERSCORE_ID , ARC_PARSERLANG_TOKEN_UNDERSCORE_CHAR ));
|
|
|
|
|
|
|
|
|
|
//arrow
|
|
|
|
|
ARC_String *arrowString;
|
|
|
|
|
ARC_String_CreateWithStrlen(&arrowString, ARC_PARSERLANG_TOKEN_ARROW_CSTRING);
|
|
|
|
|
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchStringRule(ARC_PARSERLANG_TOKEN_ARROW_ID, arrowString));
|
|
|
|
|
ARC_String_Destroy(arrowString);
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-23 19:27:30 -07:00
|
|
|
void ARC_ParserLang_VectorDestroyParserTagFn(void *data){
|
|
|
|
|
ARC_ParserTag *currentTag = (ARC_ParserTag *)data;
|
|
|
|
|
|
|
|
|
|
//free the orIndex vlues
|
|
|
|
|
for(uint32_t orIndex = 0; orIndex < currentTag->tokensOrTagsSize; orIndex++){
|
|
|
|
|
free(currentTag->tokensOrTags[orIndex]);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if(currentTag->addDataFn != NULL){
|
|
|
|
|
free(currentTag->addDataFn);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//free the tokens or tags
|
|
|
|
|
free(currentTag->tokensOrTags);
|
|
|
|
|
|
|
|
|
|
//free the tag itself
|
|
|
|
|
free(currentTag);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//private function to create the saved data for the language
|
|
|
|
|
void ARC_ParserLang_CreateDataFn(void **data){
|
|
|
|
|
//function callback to cleanup added tags
|
|
|
|
|
ARC_Vector_DestroyDataFn destroyParserTagFn = ARC_ParserLang_VectorDestroyParserTagFn;
|
|
|
|
|
|
|
|
|
|
//I don't see a reason to have a comparison function right now. this might change in the future
|
|
|
|
|
ARC_Vector_Create((ARC_Vector **)data, NULL, &destroyParserTagFn);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//private function to destroy the saved data for the language
|
|
|
|
|
void ARC_ParserLang_DestroyDataFn(void *data){
|
|
|
|
|
ARC_Vector_Destroy(data);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//private function to add char to constant name
|
|
|
|
|
void ARC_ParserLang_AddCharFn(void **data, uint32_t tagId, uint32_t tagIndex, ARC_LexerToken *token, void *userData){
|
|
|
|
|
if(userData == NULL){
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//recast the addData to make it easier to use
|
|
|
|
|
ARC_String **variable = (ARC_String **)userData;
|
|
|
|
|
if(*variable == NULL){
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//create the const string if it is null
|
|
|
|
|
if(variable == NULL){
|
|
|
|
|
//this will be freed in the main parser lang add
|
|
|
|
|
ARC_String_Create(variable, NULL, 0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ARC_String_Append(variable, token->data);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//private function to get details from a constant
|
|
|
|
|
void ARC_ParserLang_AddFirstCharFn(void **data, uint32_t tagId, uint32_t tagIndex, ARC_LexerToken *token, void *userData){
|
|
|
|
|
if(userData == NULL){
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//recast the addData to make it easier to use
|
|
|
|
|
ARC_ParserLangVectorStringData *vectorStringData = (ARC_ParserLangVectorStringData *)userData;
|
|
|
|
|
|
|
|
|
|
//add the first character to the temp const
|
|
|
|
|
ARC_String *tokenData = NULL;
|
|
|
|
|
ARC_String_Copy(&tokenData, token->data);
|
|
|
|
|
ARC_String_Append(&tokenData, vectorStringData->string);
|
|
|
|
|
|
|
|
|
|
//cleanup the string as it will be added to the vector
|
|
|
|
|
ARC_String_Destroy(vectorStringData->string);
|
|
|
|
|
vectorStringData->string = NULL;
|
|
|
|
|
|
|
|
|
|
ARC_Vector_Add(vectorStringData->vector, tokenData);
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-06 06:45:04 -07:00
|
|
|
void ARC_Parser_CreateAsParserLang(ARC_Parser **parser){
|
2024-11-14 00:07:56 -07:00
|
|
|
//<line> -> <body> NEWLINE <line> | <body> | NEWLINE <line> | LAMBDA
|
|
|
|
|
uint32_t *line[] = { (uint32_t[]){ 3, ARC_PARSERLANG_BODY, ARC_PARSERLANG_TOKEN_NEWLINE_ID, ARC_PARSERLANG_LINE }, (uint32_t[]){ 1, ARC_PARSERLANG_BODY }, (uint32_t[]){ 2, ARC_PARSERLANG_TOKEN_NEWLINE_ID, ARC_PARSERLANG_LINE }, (uint32_t[]){ 1, ARC_PARSERLANG_LAMBDA } };
|
2024-11-06 06:45:04 -07:00
|
|
|
|
|
|
|
|
//<body> -> <tag> WHITESPACE ARROW WHITESPACE <arguments>
|
|
|
|
|
uint32_t *body[] = { (uint32_t[]){ 5, ARC_PARSERLANG_TAG, ARC_PARSERLANG_TOKEN_WHITESPACE, ARC_PARSERLANG_TOKEN_ARROW_ID, ARC_PARSERLANG_TOKEN_WHITESPACE, ARC_PARSERLANG_ARGUMENTS } };
|
|
|
|
|
|
|
|
|
|
//<arguments> -> <argument> WHITESPACE OR WHITESPACE <arguments> | <tagOrConstant>
|
|
|
|
|
uint32_t *arguments[] = { (uint32_t[]){ 5, ARC_PARSERLANG_ARGUMENT, ARC_PARSERLANG_TOKEN_WHITESPACE, ARC_PARSERLANG_TOKEN_OR_ID, ARC_PARSERLANG_TOKEN_WHITESPACE, ARC_PARSERLANG_ARGUMENTS }, (uint32_t[]){ 1, ARC_PARSERLANG_TAG_OR_CONSTANT } };
|
|
|
|
|
|
|
|
|
|
//<argument> -> <tagOrConstant> WHITESPACE <argument> | <tagOrConstant>
|
|
|
|
|
uint32_t *argument[] = { (uint32_t[]){ 3, ARC_PARSERLANG_TAG_OR_CONSTANT, ARC_PARSERLANG_TOKEN_WHITESPACE, ARC_PARSERLANG_ARGUMENT }, (uint32_t[]){ 1, ARC_PARSERLANG_TAG_OR_CONSTANT } };
|
|
|
|
|
|
|
|
|
|
//<tagOrConstant> -> <parserLangageTag> | <constant>
|
|
|
|
|
uint32_t *tagOrConstant[] = { (uint32_t[]){ 1, ARC_PARSERLANG_TAG }, (uint32_t[]){ 1, ARC_PARSERLANG_CONSTANT } };
|
|
|
|
|
|
|
|
|
|
//<constant> -> ALPHA_UPPER_CHAR <constantBody>
|
|
|
|
|
uint32_t *constant[] = { (uint32_t[]){ 2, ARC_PARSERLANG_TOKEN_ALPHA_UPPER_CHAR, ARC_PARSERLANG_CONSTANT_BODY } };
|
|
|
|
|
|
|
|
|
|
//<constantBody> -> <constantChar> <constantBody> | LAMBDA
|
|
|
|
|
uint32_t *constantBody[] = { (uint32_t[]){ 2, ARC_PARSERLANG_CONSTANT_CHAR, ARC_PARSERLANG_CONSTANT_BODY }, (uint32_t[]){ 1, ARC_PARSERLANG_LAMBDA } };
|
|
|
|
|
|
|
|
|
|
//<constantChar> -> ALPHA_UPPER_CHAR | UNDERSCORE
|
|
|
|
|
uint32_t *constantChar[] = { (uint32_t[]){ 2, ARC_PARSERLANG_TOKEN_ALPHA_UPPER_CHAR, ARC_PARSERLANG_TOKEN_UNDERSCORE_ID } };
|
|
|
|
|
|
|
|
|
|
//<tag> -> LESS_THAN <variable> GREATER_THAN
|
|
|
|
|
uint32_t *tag[] = { (uint32_t[]){ 3, ARC_PARSERLANG_TOKEN_LESS_THAN_ID, ARC_PARSERLANG_VARIABLE, ARC_PARSERLANG_TOKEN_GREATER_THAN_ID } };
|
|
|
|
|
|
|
|
|
|
//<variable> -> <alphaChar> <variableBody> | UNDERSCORE <variableBody>
|
|
|
|
|
uint32_t *variable[] = { (uint32_t[]){ 2, ARC_PARSERLANG_ALPHA_CHAR, ARC_PARSERLANG_VARIABLE_BODY }, (uint32_t[]){ 2, ARC_PARSERLANG_TOKEN_UNDERSCORE_ID, ARC_PARSERLANG_VARIABLE_BODY } };
|
|
|
|
|
|
|
|
|
|
//<variableBody> -> <variableChar> <variableBody> | LAMBDA
|
|
|
|
|
uint32_t *variableBody[] = { (uint32_t[]){ 2, ARC_PARSERLANG_VARIABLE_CHAR, ARC_PARSERLANG_VARIABLE_BODY }, (uint32_t[]){ 1, ARC_PARSERLANG_LAMBDA } };
|
|
|
|
|
|
|
|
|
|
//<variableChar> -> <alphaChar> | NUMBER | UNDERSCORE
|
|
|
|
|
uint32_t *variableChar[] = { (uint32_t[]){ 1, ARC_PARSERLANG_ALPHA_CHAR }, (uint32_t[]){ 1, ARC_PARSERLANG_TOKEN_NUMBER }, (uint32_t[]){ 1, ARC_PARSERLANG_TOKEN_UNDERSCORE_ID } };
|
|
|
|
|
|
|
|
|
|
//<alphaChar> -> ALPHA_LOWER_CHAR | ALPHA_UPPER_CHAR
|
|
|
|
|
uint32_t *alphaChar[] = { (uint32_t[]){ 1, ARC_PARSERLANG_TOKEN_ALPHA_LOWER_CHAR }, (uint32_t[]){ 1, ARC_PARSERLANG_TOKEN_ALPHA_UPPER_CHAR }};
|
|
|
|
|
|
2024-11-23 19:27:30 -07:00
|
|
|
ARC_ParserTag parserLangTags[13] = {
|
2024-11-20 13:50:52 -07:00
|
|
|
{ ARC_PARSERLANG_LINE , line , 4, NULL, NULL },
|
|
|
|
|
{ ARC_PARSERLANG_BODY , body , 1, NULL, NULL },
|
|
|
|
|
{ ARC_PARSERLANG_ARGUMENTS , arguments , 2, NULL, NULL },
|
|
|
|
|
{ ARC_PARSERLANG_ARGUMENT , argument , 2, NULL, NULL },
|
|
|
|
|
{ ARC_PARSERLANG_TAG_OR_CONSTANT, tagOrConstant, 2, NULL, NULL },
|
|
|
|
|
{ ARC_PARSERLANG_CONSTANT , constant , 1, NULL, NULL },
|
|
|
|
|
{ ARC_PARSERLANG_CONSTANT_BODY , constantBody , 2, NULL, NULL },
|
|
|
|
|
{ ARC_PARSERLANG_CONSTANT_CHAR , constantChar , 2, NULL, NULL },
|
|
|
|
|
{ ARC_PARSERLANG_TAG , tag , 1, NULL, NULL },
|
|
|
|
|
{ ARC_PARSERLANG_VARIABLE , variable , 2, NULL, NULL },
|
|
|
|
|
{ ARC_PARSERLANG_VARIABLE_BODY , variableBody , 2, NULL, NULL },
|
|
|
|
|
{ ARC_PARSERLANG_VARIABLE_CHAR , variableChar , 3, NULL, NULL },
|
|
|
|
|
{ ARC_PARSERLANG_ALPHA_CHAR , alphaChar , 2, NULL, NULL }
|
2024-11-06 06:45:04 -07:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
ARC_Array parserLanguageArray = {
|
|
|
|
|
13, //size
|
|
|
|
|
parserLangTags //data
|
|
|
|
|
};
|
|
|
|
|
|
2024-11-23 19:27:30 -07:00
|
|
|
ARC_ParserData_CreateFn createDataFn = ARC_ParserLang_CreateDataFn;
|
|
|
|
|
ARC_ParserData_DestroyFn destroyDataFn = ARC_ParserLang_DestroyDataFn;
|
|
|
|
|
|
2024-11-20 10:27:17 -07:00
|
|
|
//TODO: add the create, destroy, and add callbacks
|
2024-11-23 19:27:30 -07:00
|
|
|
ARC_Parser_Create(parser, &parserLanguageArray, ARC_ParserLang_InitLexerRulesFn, &createDataFn, &destroyDataFn);
|
2024-11-06 06:45:04 -07:00
|
|
|
}
|