still working on parser, plan to rework to parsing first, then calling struct creation callback after with vector of tokens and tags
This commit is contained in:
parent
fcc07493d3
commit
d69844dab1
9 changed files with 251 additions and 27 deletions
|
|
@ -287,6 +287,10 @@ uint32_t ARC_Lexer_GetTokensSize(ARC_Lexer *lexer){
|
|||
return ARC_Vector_GetSize(lexer->tokens);
|
||||
}
|
||||
|
||||
ARC_Bool ARC_Lexer_IsContinious(ARC_Lexer *lexer){
|
||||
return lexer->tokenRulesAreContinuous;
|
||||
}
|
||||
|
||||
ARC_Bool ARC_Lexer_IsTokenId(ARC_Lexer *lexer, uint32_t id){
|
||||
//if the rules are continuous we can just check if it is less than the max rules value
|
||||
if(lexer->tokenRulesAreContinuous == ARC_True){
|
||||
|
|
|
|||
|
|
@ -27,11 +27,11 @@ void ARC_Parser_Create(ARC_Parser **parser, ARC_Array *language, ARC_Parser_Init
|
|||
//if the language exists, copy the language
|
||||
if(language != NULL){
|
||||
(*parser)->language.size = language->size;
|
||||
(*parser)->language.data = malloc(sizeof(ARC_ParserLanguageTag) * language->size);
|
||||
(*parser)->language.data = malloc(sizeof(ARC_ParserTag) * language->size);
|
||||
|
||||
for(uint32_t index = 0; index < language->size; index++){
|
||||
ARC_ParserLanguageTag *languageTag = ((ARC_ParserLanguageTag *)language->data) + index;
|
||||
ARC_ParserLanguageTag *currentTag = ((ARC_ParserLanguageTag *)(*parser)->language.data) + index;
|
||||
ARC_ParserTag *languageTag = ((ARC_ParserTag *)language->data) + index;
|
||||
ARC_ParserTag *currentTag = ((ARC_ParserTag *)(*parser)->language.data) + index;
|
||||
|
||||
//copy the language tag into the current tag
|
||||
currentTag->tagId = languageTag->tagId;
|
||||
|
|
@ -52,7 +52,7 @@ void ARC_Parser_Create(ARC_Parser **parser, ARC_Array *language, ARC_Parser_Init
|
|||
//add the add function
|
||||
currentTag->addDataFn = NULL;
|
||||
if(languageTag->addDataFn != NULL){
|
||||
currentTag->addDataFn = (ARC_ParserLanguageTag_AddDataFn *)malloc(sizeof(ARC_ParserLanguageTag_AddDataFn));
|
||||
currentTag->addDataFn = (ARC_ParserTag_AddDataFn *)malloc(sizeof(ARC_ParserTag_AddDataFn));
|
||||
*(currentTag->addDataFn) = *(languageTag->addDataFn);
|
||||
}
|
||||
}
|
||||
|
|
@ -80,13 +80,33 @@ void ARC_Parser_Create(ARC_Parser **parser, ARC_Array *language, ARC_Parser_Init
|
|||
}
|
||||
}
|
||||
|
||||
void ARC_Parser_CreateFromVector(ARC_Parser **parser, ARC_Vector *language, ARC_Parser_InitLexerRulesFn initLexerRulesFn, ARC_ParserData_CreateFn *createDataFn, ARC_ParserData_DestroyFn *destroyDataFn){
|
||||
//creates the variables to copy the vector into
|
||||
const uint32_t languageSize = ARC_Vector_GetSize(language);
|
||||
ARC_ParserTag languageArray[languageSize];
|
||||
|
||||
//copy the language from a vector into an array
|
||||
for(uint32_t index = 0; index < languageSize; index++){
|
||||
languageArray[index] = *(ARC_ParserTag *)ARC_Vector_Get(language, index);
|
||||
}
|
||||
|
||||
//set the vector data as an ARC_Array
|
||||
ARC_Array languageAsArray = {
|
||||
languageSize,
|
||||
languageArray
|
||||
};
|
||||
|
||||
//create the parser
|
||||
ARC_Parser_Create(parser, &languageAsArray, initLexerRulesFn, createDataFn, destroyDataFn);
|
||||
}
|
||||
|
||||
void ARC_Parser_CreateFromString(ARC_Parser **parser, ARC_String *languageString, ARC_Parser_InitLexerRulesFn initLexerRulesFn){
|
||||
}
|
||||
|
||||
void ARC_Parser_Destroy(ARC_Parser *parser){
|
||||
//clear all the copied token or tags from memory
|
||||
for(uint32_t index = 0; index < parser->language.size; index++){
|
||||
ARC_ParserLanguageTag *currentTag = ((ARC_ParserLanguageTag *)parser->language.data) + index;
|
||||
ARC_ParserTag *currentTag = ((ARC_ParserTag *)parser->language.data) + index;
|
||||
|
||||
//free the orIndex vlues
|
||||
for(uint32_t orIndex = 0; orIndex < currentTag->tokensOrTagsSize; orIndex++){
|
||||
|
|
@ -123,9 +143,9 @@ void ARC_Parser_Destroy(ARC_Parser *parser){
|
|||
//private recusive function to parse a tag
|
||||
ARC_Bool ARC_Parser_ParseTag(ARC_Parser *parser, uint32_t *lexerIndex, uint32_t tagId){
|
||||
//get the current tag
|
||||
ARC_ParserLanguageTag *tag = NULL;
|
||||
ARC_ParserTag *tag = NULL;
|
||||
for(uint32_t index = 0; index < parser->language.size; index++){
|
||||
ARC_ParserLanguageTag *foundTag = ((ARC_ParserLanguageTag *)parser->language.data) + index;
|
||||
ARC_ParserTag *foundTag = ((ARC_ParserTag *)parser->language.data) + index;
|
||||
if(foundTag->tagId == tagId){
|
||||
tag = foundTag;
|
||||
break;
|
||||
|
|
@ -205,7 +225,7 @@ ARC_Bool ARC_Parser_ParseTag(ARC_Parser *parser, uint32_t *lexerIndex, uint32_t
|
|||
//iterate through the tokens with the add callback
|
||||
for(uint32_t index = 0; index < ARC_Vector_GetSize(foundTokens); index++){
|
||||
ARC_LexerToken *token = (ARC_LexerToken *)ARC_Vector_Get(foundTokens, index);
|
||||
(*(tag->addDataFn))(&(parser->data), tagId, token, tag->addUserData);
|
||||
(*(tag->addDataFn))(&(parser->data), tagId, index, token, tag->addUserData);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -241,7 +261,7 @@ void ARC_Parser_Parse(ARC_Parser *parser, ARC_String **data){
|
|||
|
||||
//set the lexer index to start and get the first tag
|
||||
uint32_t lexerIndex = 0;
|
||||
ARC_ParserLanguageTag *startTag = parser->language.data;
|
||||
ARC_ParserTag *startTag = parser->language.data;
|
||||
|
||||
//TODO: handle error checks for if parsing fails
|
||||
//recursivly parse from the inital start tag
|
||||
|
|
|
|||
|
|
@ -2,8 +2,51 @@
|
|||
#include "arc/std/lexer.h"
|
||||
#include "arc/std/parser.h"
|
||||
#include "arc/std/string.h"
|
||||
#include "arc/std/vector.h"
|
||||
#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
/*
|
||||
<line> -> <body> NEWLINE <line> | <body> | NEWLINE <line> | LAMBDA
|
||||
<body> -> <tag> WHITESPACE ARROW WHITESPACE <arguments>
|
||||
|
||||
<arguments> -> <argument> WHITESPACE OR WHITESPACE <arguments> | <tagOrConstant>
|
||||
<argument> -> <tagOrConstant> WHITESPACE <argument> | <tagOrConstant>
|
||||
<tagOrConstant> -> <tag> | <constant>
|
||||
|
||||
<constant> -> ALPHA_UPPER_CHAR <constantBody>
|
||||
<constantBody> -> <constantChar> <constantBody> | LAMBDA
|
||||
<constantChar> -> ALPHA_UPPER_CHAR | UNDERSCORE
|
||||
|
||||
<tag> -> LESS_THAN <variable> GREATER_THAN
|
||||
<variable> -> <alphaChar> <variableBody> | UNDERSCORE <variableBody>
|
||||
<variableBody> -> <variableChar> <variableBody> | LAMBDA
|
||||
<variableChar> -> <alphaChar> | NUMBER | UNDERSCORE
|
||||
<alphaChar> -> ALPHA_LOWER_CHAR | ALPHA_UPPER_CHAR
|
||||
*/
|
||||
|
||||
/*
|
||||
* @brief
|
||||
*/
|
||||
typedef struct ARC_ParserLangLineData {
|
||||
ARC_Vector *body;
|
||||
} ARC_ParserLangLineData;
|
||||
|
||||
typedef struct ARC_ParserLangBodyData {
|
||||
ARC_String *tagName;
|
||||
ARC_Vector *arguments;
|
||||
} ARC_ParserLangBodyData;
|
||||
|
||||
typedef struct ARC_ParserLangArgumentData {
|
||||
ARC_Vector *tagsOrConstants;
|
||||
} ARC_ParserLangArgumentData;
|
||||
|
||||
typedef struct ARC_ParserLangVectorStringData {
|
||||
ARC_String *string;
|
||||
ARC_Vector *vector;
|
||||
} ARC_ParserLangVectorStringData;
|
||||
|
||||
//private function to initalize the lexer rules for the language
|
||||
void ARC_ParserLang_InitLexerRulesFn(ARC_Lexer *lexer){
|
||||
//null
|
||||
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_PARSERLANG_TOKEN_NULL, 0));
|
||||
|
|
@ -35,6 +78,81 @@ void ARC_ParserLang_InitLexerRulesFn(ARC_Lexer *lexer){
|
|||
ARC_String_Destroy(arrowString);
|
||||
}
|
||||
|
||||
void ARC_ParserLang_VectorDestroyParserTagFn(void *data){
|
||||
ARC_ParserTag *currentTag = (ARC_ParserTag *)data;
|
||||
|
||||
//free the orIndex vlues
|
||||
for(uint32_t orIndex = 0; orIndex < currentTag->tokensOrTagsSize; orIndex++){
|
||||
free(currentTag->tokensOrTags[orIndex]);
|
||||
}
|
||||
|
||||
if(currentTag->addDataFn != NULL){
|
||||
free(currentTag->addDataFn);
|
||||
}
|
||||
|
||||
//free the tokens or tags
|
||||
free(currentTag->tokensOrTags);
|
||||
|
||||
//free the tag itself
|
||||
free(currentTag);
|
||||
}
|
||||
|
||||
//private function to create the saved data for the language
|
||||
void ARC_ParserLang_CreateDataFn(void **data){
|
||||
//function callback to cleanup added tags
|
||||
ARC_Vector_DestroyDataFn destroyParserTagFn = ARC_ParserLang_VectorDestroyParserTagFn;
|
||||
|
||||
//I don't see a reason to have a comparison function right now. this might change in the future
|
||||
ARC_Vector_Create((ARC_Vector **)data, NULL, &destroyParserTagFn);
|
||||
}
|
||||
|
||||
//private function to destroy the saved data for the language
|
||||
void ARC_ParserLang_DestroyDataFn(void *data){
|
||||
ARC_Vector_Destroy(data);
|
||||
}
|
||||
|
||||
//private function to add char to constant name
|
||||
void ARC_ParserLang_AddCharFn(void **data, uint32_t tagId, uint32_t tagIndex, ARC_LexerToken *token, void *userData){
|
||||
if(userData == NULL){
|
||||
return;
|
||||
}
|
||||
|
||||
//recast the addData to make it easier to use
|
||||
ARC_String **variable = (ARC_String **)userData;
|
||||
if(*variable == NULL){
|
||||
return;
|
||||
}
|
||||
|
||||
//create the const string if it is null
|
||||
if(variable == NULL){
|
||||
//this will be freed in the main parser lang add
|
||||
ARC_String_Create(variable, NULL, 0);
|
||||
}
|
||||
|
||||
ARC_String_Append(variable, token->data);
|
||||
}
|
||||
|
||||
//private function to get details from a constant
|
||||
void ARC_ParserLang_AddFirstCharFn(void **data, uint32_t tagId, uint32_t tagIndex, ARC_LexerToken *token, void *userData){
|
||||
if(userData == NULL){
|
||||
return;
|
||||
}
|
||||
|
||||
//recast the addData to make it easier to use
|
||||
ARC_ParserLangVectorStringData *vectorStringData = (ARC_ParserLangVectorStringData *)userData;
|
||||
|
||||
//add the first character to the temp const
|
||||
ARC_String *tokenData = NULL;
|
||||
ARC_String_Copy(&tokenData, token->data);
|
||||
ARC_String_Append(&tokenData, vectorStringData->string);
|
||||
|
||||
//cleanup the string as it will be added to the vector
|
||||
ARC_String_Destroy(vectorStringData->string);
|
||||
vectorStringData->string = NULL;
|
||||
|
||||
ARC_Vector_Add(vectorStringData->vector, tokenData);
|
||||
}
|
||||
|
||||
void ARC_Parser_CreateAsParserLang(ARC_Parser **parser){
|
||||
//<line> -> <body> NEWLINE <line> | <body> | NEWLINE <line> | LAMBDA
|
||||
uint32_t *line[] = { (uint32_t[]){ 3, ARC_PARSERLANG_BODY, ARC_PARSERLANG_TOKEN_NEWLINE_ID, ARC_PARSERLANG_LINE }, (uint32_t[]){ 1, ARC_PARSERLANG_BODY }, (uint32_t[]){ 2, ARC_PARSERLANG_TOKEN_NEWLINE_ID, ARC_PARSERLANG_LINE }, (uint32_t[]){ 1, ARC_PARSERLANG_LAMBDA } };
|
||||
|
|
@ -75,7 +193,7 @@ void ARC_Parser_CreateAsParserLang(ARC_Parser **parser){
|
|||
//<alphaChar> -> ALPHA_LOWER_CHAR | ALPHA_UPPER_CHAR
|
||||
uint32_t *alphaChar[] = { (uint32_t[]){ 1, ARC_PARSERLANG_TOKEN_ALPHA_LOWER_CHAR }, (uint32_t[]){ 1, ARC_PARSERLANG_TOKEN_ALPHA_UPPER_CHAR }};
|
||||
|
||||
ARC_ParserLanguageTag parserLangTags[13] = {
|
||||
ARC_ParserTag parserLangTags[13] = {
|
||||
{ ARC_PARSERLANG_LINE , line , 4, NULL, NULL },
|
||||
{ ARC_PARSERLANG_BODY , body , 1, NULL, NULL },
|
||||
{ ARC_PARSERLANG_ARGUMENTS , arguments , 2, NULL, NULL },
|
||||
|
|
@ -96,6 +214,9 @@ void ARC_Parser_CreateAsParserLang(ARC_Parser **parser){
|
|||
parserLangTags //data
|
||||
};
|
||||
|
||||
ARC_ParserData_CreateFn createDataFn = ARC_ParserLang_CreateDataFn;
|
||||
ARC_ParserData_DestroyFn destroyDataFn = ARC_ParserLang_DestroyDataFn;
|
||||
|
||||
//TODO: add the create, destroy, and add callbacks
|
||||
ARC_Parser_Create(parser, &parserLanguageArray, ARC_ParserLang_InitLexerRulesFn, NULL, NULL);
|
||||
ARC_Parser_Create(parser, &parserLanguageArray, ARC_ParserLang_InitLexerRulesFn, &createDataFn, &destroyDataFn);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue