still working on parser, plan to rework to parsing first, then calling struct creation callback after with vector of tokens and tags

This commit is contained in:
herbglitch 2024-11-23 19:27:30 -07:00
parent fcc07493d3
commit d69844dab1
9 changed files with 251 additions and 27 deletions

View file

@ -287,6 +287,10 @@ uint32_t ARC_Lexer_GetTokensSize(ARC_Lexer *lexer){
return ARC_Vector_GetSize(lexer->tokens);
}
ARC_Bool ARC_Lexer_IsContinious(ARC_Lexer *lexer){
return lexer->tokenRulesAreContinuous;
}
ARC_Bool ARC_Lexer_IsTokenId(ARC_Lexer *lexer, uint32_t id){
//if the rules are continuous we can just check if it is less than the max rules value
if(lexer->tokenRulesAreContinuous == ARC_True){

View file

@ -27,11 +27,11 @@ void ARC_Parser_Create(ARC_Parser **parser, ARC_Array *language, ARC_Parser_Init
//if the language exists, copy the language
if(language != NULL){
(*parser)->language.size = language->size;
(*parser)->language.data = malloc(sizeof(ARC_ParserLanguageTag) * language->size);
(*parser)->language.data = malloc(sizeof(ARC_ParserTag) * language->size);
for(uint32_t index = 0; index < language->size; index++){
ARC_ParserLanguageTag *languageTag = ((ARC_ParserLanguageTag *)language->data) + index;
ARC_ParserLanguageTag *currentTag = ((ARC_ParserLanguageTag *)(*parser)->language.data) + index;
ARC_ParserTag *languageTag = ((ARC_ParserTag *)language->data) + index;
ARC_ParserTag *currentTag = ((ARC_ParserTag *)(*parser)->language.data) + index;
//copy the language tag into the current tag
currentTag->tagId = languageTag->tagId;
@ -52,7 +52,7 @@ void ARC_Parser_Create(ARC_Parser **parser, ARC_Array *language, ARC_Parser_Init
//add the add function
currentTag->addDataFn = NULL;
if(languageTag->addDataFn != NULL){
currentTag->addDataFn = (ARC_ParserLanguageTag_AddDataFn *)malloc(sizeof(ARC_ParserLanguageTag_AddDataFn));
currentTag->addDataFn = (ARC_ParserTag_AddDataFn *)malloc(sizeof(ARC_ParserTag_AddDataFn));
*(currentTag->addDataFn) = *(languageTag->addDataFn);
}
}
@ -80,13 +80,33 @@ void ARC_Parser_Create(ARC_Parser **parser, ARC_Array *language, ARC_Parser_Init
}
}
void ARC_Parser_CreateFromVector(ARC_Parser **parser, ARC_Vector *language, ARC_Parser_InitLexerRulesFn initLexerRulesFn, ARC_ParserData_CreateFn *createDataFn, ARC_ParserData_DestroyFn *destroyDataFn){
//creates the variables to copy the vector into
const uint32_t languageSize = ARC_Vector_GetSize(language);
ARC_ParserTag languageArray[languageSize];
//copy the language from a vector into an array
for(uint32_t index = 0; index < languageSize; index++){
languageArray[index] = *(ARC_ParserTag *)ARC_Vector_Get(language, index);
}
//set the vector data as an ARC_Array
ARC_Array languageAsArray = {
languageSize,
languageArray
};
//create the parser
ARC_Parser_Create(parser, &languageAsArray, initLexerRulesFn, createDataFn, destroyDataFn);
}
void ARC_Parser_CreateFromString(ARC_Parser **parser, ARC_String *languageString, ARC_Parser_InitLexerRulesFn initLexerRulesFn){
}
void ARC_Parser_Destroy(ARC_Parser *parser){
//clear all the copied token or tags from memory
for(uint32_t index = 0; index < parser->language.size; index++){
ARC_ParserLanguageTag *currentTag = ((ARC_ParserLanguageTag *)parser->language.data) + index;
ARC_ParserTag *currentTag = ((ARC_ParserTag *)parser->language.data) + index;
//free the orIndex vlues
for(uint32_t orIndex = 0; orIndex < currentTag->tokensOrTagsSize; orIndex++){
@ -123,9 +143,9 @@ void ARC_Parser_Destroy(ARC_Parser *parser){
//private recusive function to parse a tag
ARC_Bool ARC_Parser_ParseTag(ARC_Parser *parser, uint32_t *lexerIndex, uint32_t tagId){
//get the current tag
ARC_ParserLanguageTag *tag = NULL;
ARC_ParserTag *tag = NULL;
for(uint32_t index = 0; index < parser->language.size; index++){
ARC_ParserLanguageTag *foundTag = ((ARC_ParserLanguageTag *)parser->language.data) + index;
ARC_ParserTag *foundTag = ((ARC_ParserTag *)parser->language.data) + index;
if(foundTag->tagId == tagId){
tag = foundTag;
break;
@ -205,7 +225,7 @@ ARC_Bool ARC_Parser_ParseTag(ARC_Parser *parser, uint32_t *lexerIndex, uint32_t
//iterate through the tokens with the add callback
for(uint32_t index = 0; index < ARC_Vector_GetSize(foundTokens); index++){
ARC_LexerToken *token = (ARC_LexerToken *)ARC_Vector_Get(foundTokens, index);
(*(tag->addDataFn))(&(parser->data), tagId, token, tag->addUserData);
(*(tag->addDataFn))(&(parser->data), tagId, index, token, tag->addUserData);
}
}
@ -241,7 +261,7 @@ void ARC_Parser_Parse(ARC_Parser *parser, ARC_String **data){
//set the lexer index to start and get the first tag
uint32_t lexerIndex = 0;
ARC_ParserLanguageTag *startTag = parser->language.data;
ARC_ParserTag *startTag = parser->language.data;
//TODO: handle error checks for if parsing fails
//recursivly parse from the inital start tag

View file

@ -2,8 +2,51 @@
#include "arc/std/lexer.h"
#include "arc/std/parser.h"
#include "arc/std/string.h"
#include "arc/std/vector.h"
#include <stddef.h>
#include <stdlib.h>
/*
<line> -> <body> NEWLINE <line> | <body> | NEWLINE <line> | LAMBDA
<body> -> <tag> WHITESPACE ARROW WHITESPACE <arguments>
<arguments> -> <argument> WHITESPACE OR WHITESPACE <arguments> | <tagOrConstant>
<argument> -> <tagOrConstant> WHITESPACE <argument> | <tagOrConstant>
<tagOrConstant> -> <tag> | <constant>
<constant> -> ALPHA_UPPER_CHAR <constantBody>
<constantBody> -> <constantChar> <constantBody> | LAMBDA
<constantChar> -> ALPHA_UPPER_CHAR | UNDERSCORE
<tag> -> LESS_THAN <variable> GREATER_THAN
<variable> -> <alphaChar> <variableBody> | UNDERSCORE <variableBody>
<variableBody> -> <variableChar> <variableBody> | LAMBDA
<variableChar> -> <alphaChar> | NUMBER | UNDERSCORE
<alphaChar> -> ALPHA_LOWER_CHAR | ALPHA_UPPER_CHAR
*/
/*
* @brief
*/
typedef struct ARC_ParserLangLineData {
ARC_Vector *body;
} ARC_ParserLangLineData;
typedef struct ARC_ParserLangBodyData {
ARC_String *tagName;
ARC_Vector *arguments;
} ARC_ParserLangBodyData;
typedef struct ARC_ParserLangArgumentData {
ARC_Vector *tagsOrConstants;
} ARC_ParserLangArgumentData;
typedef struct ARC_ParserLangVectorStringData {
ARC_String *string;
ARC_Vector *vector;
} ARC_ParserLangVectorStringData;
//private function to initalize the lexer rules for the language
void ARC_ParserLang_InitLexerRulesFn(ARC_Lexer *lexer){
//null
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_PARSERLANG_TOKEN_NULL, 0));
@ -35,6 +78,81 @@ void ARC_ParserLang_InitLexerRulesFn(ARC_Lexer *lexer){
ARC_String_Destroy(arrowString);
}
void ARC_ParserLang_VectorDestroyParserTagFn(void *data){
ARC_ParserTag *currentTag = (ARC_ParserTag *)data;
//free the orIndex vlues
for(uint32_t orIndex = 0; orIndex < currentTag->tokensOrTagsSize; orIndex++){
free(currentTag->tokensOrTags[orIndex]);
}
if(currentTag->addDataFn != NULL){
free(currentTag->addDataFn);
}
//free the tokens or tags
free(currentTag->tokensOrTags);
//free the tag itself
free(currentTag);
}
//private function to create the saved data for the language
void ARC_ParserLang_CreateDataFn(void **data){
//function callback to cleanup added tags
ARC_Vector_DestroyDataFn destroyParserTagFn = ARC_ParserLang_VectorDestroyParserTagFn;
//I don't see a reason to have a comparison function right now. this might change in the future
ARC_Vector_Create((ARC_Vector **)data, NULL, &destroyParserTagFn);
}
//private function to destroy the saved data for the language
void ARC_ParserLang_DestroyDataFn(void *data){
ARC_Vector_Destroy(data);
}
//private function to add char to constant name
void ARC_ParserLang_AddCharFn(void **data, uint32_t tagId, uint32_t tagIndex, ARC_LexerToken *token, void *userData){
if(userData == NULL){
return;
}
//recast the addData to make it easier to use
ARC_String **variable = (ARC_String **)userData;
if(*variable == NULL){
return;
}
//create the const string if it is null
if(variable == NULL){
//this will be freed in the main parser lang add
ARC_String_Create(variable, NULL, 0);
}
ARC_String_Append(variable, token->data);
}
//private function to get details from a constant
void ARC_ParserLang_AddFirstCharFn(void **data, uint32_t tagId, uint32_t tagIndex, ARC_LexerToken *token, void *userData){
if(userData == NULL){
return;
}
//recast the addData to make it easier to use
ARC_ParserLangVectorStringData *vectorStringData = (ARC_ParserLangVectorStringData *)userData;
//add the first character to the temp const
ARC_String *tokenData = NULL;
ARC_String_Copy(&tokenData, token->data);
ARC_String_Append(&tokenData, vectorStringData->string);
//cleanup the string as it will be added to the vector
ARC_String_Destroy(vectorStringData->string);
vectorStringData->string = NULL;
ARC_Vector_Add(vectorStringData->vector, tokenData);
}
void ARC_Parser_CreateAsParserLang(ARC_Parser **parser){
//<line> -> <body> NEWLINE <line> | <body> | NEWLINE <line> | LAMBDA
uint32_t *line[] = { (uint32_t[]){ 3, ARC_PARSERLANG_BODY, ARC_PARSERLANG_TOKEN_NEWLINE_ID, ARC_PARSERLANG_LINE }, (uint32_t[]){ 1, ARC_PARSERLANG_BODY }, (uint32_t[]){ 2, ARC_PARSERLANG_TOKEN_NEWLINE_ID, ARC_PARSERLANG_LINE }, (uint32_t[]){ 1, ARC_PARSERLANG_LAMBDA } };
@ -75,7 +193,7 @@ void ARC_Parser_CreateAsParserLang(ARC_Parser **parser){
//<alphaChar> -> ALPHA_LOWER_CHAR | ALPHA_UPPER_CHAR
uint32_t *alphaChar[] = { (uint32_t[]){ 1, ARC_PARSERLANG_TOKEN_ALPHA_LOWER_CHAR }, (uint32_t[]){ 1, ARC_PARSERLANG_TOKEN_ALPHA_UPPER_CHAR }};
ARC_ParserLanguageTag parserLangTags[13] = {
ARC_ParserTag parserLangTags[13] = {
{ ARC_PARSERLANG_LINE , line , 4, NULL, NULL },
{ ARC_PARSERLANG_BODY , body , 1, NULL, NULL },
{ ARC_PARSERLANG_ARGUMENTS , arguments , 2, NULL, NULL },
@ -96,6 +214,9 @@ void ARC_Parser_CreateAsParserLang(ARC_Parser **parser){
parserLangTags //data
};
ARC_ParserData_CreateFn createDataFn = ARC_ParserLang_CreateDataFn;
ARC_ParserData_DestroyFn destroyDataFn = ARC_ParserLang_DestroyDataFn;
//TODO: add the create, destroy, and add callbacks
ARC_Parser_Create(parser, &parserLanguageArray, ARC_ParserLang_InitLexerRulesFn, NULL, NULL);
ARC_Parser_Create(parser, &parserLanguageArray, ARC_ParserLang_InitLexerRulesFn, &createDataFn, &destroyDataFn);
}