still working on parser, plan to rework to parsing first, then calling struct creation callback after with vector of tokens and tags

This commit is contained in:
herbglitch 2024-11-23 19:27:30 -07:00
parent fcc07493d3
commit d69844dab1
9 changed files with 251 additions and 27 deletions

View file

@ -130,6 +130,15 @@ ARC_LexerToken *ARC_Lexer_GetToken(ARC_Lexer *lexer, uint32_t index);
*/
uint32_t ARC_Lexer_GetTokensSize(ARC_Lexer *lexer);
/**
* @brief returns a boolean based on if a lexers rules are continious
*
* @param[in] lexer the lexer to check if its ruls are continious
*
* @return ARC_True if the set rules are continious
*/
ARC_Bool ARC_Lexer_IsContinious(ARC_Lexer *lexer);
/**
* @brief returns a boolean based on if a given id is a stored token rule id
*

View file

@ -6,8 +6,8 @@ extern "C" {
#endif
#include "arc/std/array.h"
//#include "arc/std/bool.h"
#include "arc/std/lexer.h"
#include "arc/std/vector.h"
#include <stdint.h>
/**
@ -28,20 +28,20 @@ typedef void (* ARC_ParserData_DestroyFn)(void *data);
/**
* @brief TODO: write this
*/
typedef void (* ARC_ParserLanguageTag_AddDataFn)(void **data, uint32_t tagId, ARC_LexerToken *token, void *userData);
typedef void (* ARC_ParserTag_AddDataFn)(void **data, uint32_t tagId, uint32_t tagIndex, ARC_LexerToken *token, void *userData);
/**
* @brief a langue tag type for the parser //TODO: explain this better
*/
typedef struct ARC_ParserLanguageTag {
typedef struct ARC_ParserTag {
uint32_t tagId;
uint32_t **tokensOrTags;
uint32_t tokensOrTagsSize;
ARC_ParserLanguageTag_AddDataFn *addDataFn;
ARC_ParserTag_AddDataFn *addDataFn;
void *addUserData;
} ARC_ParserLanguageTag;
} ARC_ParserTag;
/**
* @brief a callback function to initialize the lexer the parser uses with rules
@ -72,6 +72,15 @@ typedef void (* ARC_Parser_InitLexerRulesFn)(ARC_Lexer *lexer);
*/
void ARC_Parser_Create(ARC_Parser **parser, ARC_Array *language, ARC_Parser_InitLexerRulesFn initLexerRulesFn, ARC_ParserData_CreateFn *createDataFn, ARC_ParserData_DestroyFn *destroyDataFn);
/**
* @brief creates an ARC_Parser type from an arc vector
*
* @param[out] parser ARC_Parser to create
* @param[in] language an vector of ARC_ParserLanguageTags defining a langauge
* @param[in] initLexerRulesFn a callback used to initalize the token rules the lexer within the parser will use
*/
void ARC_Parser_CreateFromVector(ARC_Parser **parser, ARC_Vector *language, ARC_Parser_InitLexerRulesFn initLexerRulesFn, ARC_ParserData_CreateFn *createDataFn, ARC_ParserData_DestroyFn *destroyDataFn);
/**
* @brief creates an ARC_Parser type from a string
*

View file

@ -13,7 +13,7 @@ extern "C" {
<arguments> -> <argument> WHITESPACE OR WHITESPACE <arguments> | <tagOrConstant>
<argument> -> <tagOrConstant> WHITESPACE <argument> | <tagOrConstant>
<tagOrConstant> -> <parserLangageTag> | <constant>
<tagOrConstant> -> <tag> | <constant>
<constant> -> ALPHA_UPPER_CHAR <constantBody>
<constantBody> -> <constantChar> <constantBody> | LAMBDA
@ -26,10 +26,16 @@ extern "C" {
<alphaChar> -> ALPHA_LOWER_CHAR | ALPHA_UPPER_CHAR
*/
/*
* @brief
*/
typedef uint32_t (* ARC_ParserLang_GetConstId)(ARC_String *constant);
/*
* @brief creates a parser for the Parser Lang
*
* @note the rules will be inited for the parser lang
* @note the parsed data will be saved as a vector of ARC_ParserLanguageTag
*
* @param[out] parser the parser to create
*/

View file

@ -287,6 +287,10 @@ uint32_t ARC_Lexer_GetTokensSize(ARC_Lexer *lexer){
return ARC_Vector_GetSize(lexer->tokens);
}
ARC_Bool ARC_Lexer_IsContinious(ARC_Lexer *lexer){
return lexer->tokenRulesAreContinuous;
}
ARC_Bool ARC_Lexer_IsTokenId(ARC_Lexer *lexer, uint32_t id){
//if the rules are continuous we can just check if it is less than the max rules value
if(lexer->tokenRulesAreContinuous == ARC_True){

View file

@ -27,11 +27,11 @@ void ARC_Parser_Create(ARC_Parser **parser, ARC_Array *language, ARC_Parser_Init
//if the language exists, copy the language
if(language != NULL){
(*parser)->language.size = language->size;
(*parser)->language.data = malloc(sizeof(ARC_ParserLanguageTag) * language->size);
(*parser)->language.data = malloc(sizeof(ARC_ParserTag) * language->size);
for(uint32_t index = 0; index < language->size; index++){
ARC_ParserLanguageTag *languageTag = ((ARC_ParserLanguageTag *)language->data) + index;
ARC_ParserLanguageTag *currentTag = ((ARC_ParserLanguageTag *)(*parser)->language.data) + index;
ARC_ParserTag *languageTag = ((ARC_ParserTag *)language->data) + index;
ARC_ParserTag *currentTag = ((ARC_ParserTag *)(*parser)->language.data) + index;
//copy the language tag into the current tag
currentTag->tagId = languageTag->tagId;
@ -52,7 +52,7 @@ void ARC_Parser_Create(ARC_Parser **parser, ARC_Array *language, ARC_Parser_Init
//add the add function
currentTag->addDataFn = NULL;
if(languageTag->addDataFn != NULL){
currentTag->addDataFn = (ARC_ParserLanguageTag_AddDataFn *)malloc(sizeof(ARC_ParserLanguageTag_AddDataFn));
currentTag->addDataFn = (ARC_ParserTag_AddDataFn *)malloc(sizeof(ARC_ParserTag_AddDataFn));
*(currentTag->addDataFn) = *(languageTag->addDataFn);
}
}
@ -80,13 +80,33 @@ void ARC_Parser_Create(ARC_Parser **parser, ARC_Array *language, ARC_Parser_Init
}
}
void ARC_Parser_CreateFromVector(ARC_Parser **parser, ARC_Vector *language, ARC_Parser_InitLexerRulesFn initLexerRulesFn, ARC_ParserData_CreateFn *createDataFn, ARC_ParserData_DestroyFn *destroyDataFn){
//creates the variables to copy the vector into
const uint32_t languageSize = ARC_Vector_GetSize(language);
ARC_ParserTag languageArray[languageSize];
//copy the language from a vector into an array
for(uint32_t index = 0; index < languageSize; index++){
languageArray[index] = *(ARC_ParserTag *)ARC_Vector_Get(language, index);
}
//set the vector data as an ARC_Array
ARC_Array languageAsArray = {
languageSize,
languageArray
};
//create the parser
ARC_Parser_Create(parser, &languageAsArray, initLexerRulesFn, createDataFn, destroyDataFn);
}
void ARC_Parser_CreateFromString(ARC_Parser **parser, ARC_String *languageString, ARC_Parser_InitLexerRulesFn initLexerRulesFn){
}
void ARC_Parser_Destroy(ARC_Parser *parser){
//clear all the copied token or tags from memory
for(uint32_t index = 0; index < parser->language.size; index++){
ARC_ParserLanguageTag *currentTag = ((ARC_ParserLanguageTag *)parser->language.data) + index;
ARC_ParserTag *currentTag = ((ARC_ParserTag *)parser->language.data) + index;
//free the orIndex vlues
for(uint32_t orIndex = 0; orIndex < currentTag->tokensOrTagsSize; orIndex++){
@ -123,9 +143,9 @@ void ARC_Parser_Destroy(ARC_Parser *parser){
//private recusive function to parse a tag
ARC_Bool ARC_Parser_ParseTag(ARC_Parser *parser, uint32_t *lexerIndex, uint32_t tagId){
//get the current tag
ARC_ParserLanguageTag *tag = NULL;
ARC_ParserTag *tag = NULL;
for(uint32_t index = 0; index < parser->language.size; index++){
ARC_ParserLanguageTag *foundTag = ((ARC_ParserLanguageTag *)parser->language.data) + index;
ARC_ParserTag *foundTag = ((ARC_ParserTag *)parser->language.data) + index;
if(foundTag->tagId == tagId){
tag = foundTag;
break;
@ -205,7 +225,7 @@ ARC_Bool ARC_Parser_ParseTag(ARC_Parser *parser, uint32_t *lexerIndex, uint32_t
//iterate through the tokens with the add callback
for(uint32_t index = 0; index < ARC_Vector_GetSize(foundTokens); index++){
ARC_LexerToken *token = (ARC_LexerToken *)ARC_Vector_Get(foundTokens, index);
(*(tag->addDataFn))(&(parser->data), tagId, token, tag->addUserData);
(*(tag->addDataFn))(&(parser->data), tagId, index, token, tag->addUserData);
}
}
@ -241,7 +261,7 @@ void ARC_Parser_Parse(ARC_Parser *parser, ARC_String **data){
//set the lexer index to start and get the first tag
uint32_t lexerIndex = 0;
ARC_ParserLanguageTag *startTag = parser->language.data;
ARC_ParserTag *startTag = parser->language.data;
//TODO: handle error checks for if parsing fails
//recursivly parse from the inital start tag

View file

@ -2,8 +2,51 @@
#include "arc/std/lexer.h"
#include "arc/std/parser.h"
#include "arc/std/string.h"
#include "arc/std/vector.h"
#include <stddef.h>
#include <stdlib.h>
/*
<line> -> <body> NEWLINE <line> | <body> | NEWLINE <line> | LAMBDA
<body> -> <tag> WHITESPACE ARROW WHITESPACE <arguments>
<arguments> -> <argument> WHITESPACE OR WHITESPACE <arguments> | <tagOrConstant>
<argument> -> <tagOrConstant> WHITESPACE <argument> | <tagOrConstant>
<tagOrConstant> -> <tag> | <constant>
<constant> -> ALPHA_UPPER_CHAR <constantBody>
<constantBody> -> <constantChar> <constantBody> | LAMBDA
<constantChar> -> ALPHA_UPPER_CHAR | UNDERSCORE
<tag> -> LESS_THAN <variable> GREATER_THAN
<variable> -> <alphaChar> <variableBody> | UNDERSCORE <variableBody>
<variableBody> -> <variableChar> <variableBody> | LAMBDA
<variableChar> -> <alphaChar> | NUMBER | UNDERSCORE
<alphaChar> -> ALPHA_LOWER_CHAR | ALPHA_UPPER_CHAR
*/
/*
* @brief
*/
typedef struct ARC_ParserLangLineData {
ARC_Vector *body;
} ARC_ParserLangLineData;
typedef struct ARC_ParserLangBodyData {
ARC_String *tagName;
ARC_Vector *arguments;
} ARC_ParserLangBodyData;
typedef struct ARC_ParserLangArgumentData {
ARC_Vector *tagsOrConstants;
} ARC_ParserLangArgumentData;
typedef struct ARC_ParserLangVectorStringData {
ARC_String *string;
ARC_Vector *vector;
} ARC_ParserLangVectorStringData;
//private function to initalize the lexer rules for the language
void ARC_ParserLang_InitLexerRulesFn(ARC_Lexer *lexer){
//null
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_PARSERLANG_TOKEN_NULL, 0));
@ -35,6 +78,81 @@ void ARC_ParserLang_InitLexerRulesFn(ARC_Lexer *lexer){
ARC_String_Destroy(arrowString);
}
void ARC_ParserLang_VectorDestroyParserTagFn(void *data){
ARC_ParserTag *currentTag = (ARC_ParserTag *)data;
//free the orIndex vlues
for(uint32_t orIndex = 0; orIndex < currentTag->tokensOrTagsSize; orIndex++){
free(currentTag->tokensOrTags[orIndex]);
}
if(currentTag->addDataFn != NULL){
free(currentTag->addDataFn);
}
//free the tokens or tags
free(currentTag->tokensOrTags);
//free the tag itself
free(currentTag);
}
//private function to create the saved data for the language
void ARC_ParserLang_CreateDataFn(void **data){
//function callback to cleanup added tags
ARC_Vector_DestroyDataFn destroyParserTagFn = ARC_ParserLang_VectorDestroyParserTagFn;
//I don't see a reason to have a comparison function right now. this might change in the future
ARC_Vector_Create((ARC_Vector **)data, NULL, &destroyParserTagFn);
}
//private function to destroy the saved data for the language
void ARC_ParserLang_DestroyDataFn(void *data){
ARC_Vector_Destroy(data);
}
//private function to add char to constant name
void ARC_ParserLang_AddCharFn(void **data, uint32_t tagId, uint32_t tagIndex, ARC_LexerToken *token, void *userData){
if(userData == NULL){
return;
}
//recast the addData to make it easier to use
ARC_String **variable = (ARC_String **)userData;
if(*variable == NULL){
return;
}
//create the const string if it is null
if(variable == NULL){
//this will be freed in the main parser lang add
ARC_String_Create(variable, NULL, 0);
}
ARC_String_Append(variable, token->data);
}
//private function to get details from a constant
void ARC_ParserLang_AddFirstCharFn(void **data, uint32_t tagId, uint32_t tagIndex, ARC_LexerToken *token, void *userData){
if(userData == NULL){
return;
}
//recast the addData to make it easier to use
ARC_ParserLangVectorStringData *vectorStringData = (ARC_ParserLangVectorStringData *)userData;
//add the first character to the temp const
ARC_String *tokenData = NULL;
ARC_String_Copy(&tokenData, token->data);
ARC_String_Append(&tokenData, vectorStringData->string);
//cleanup the string as it will be added to the vector
ARC_String_Destroy(vectorStringData->string);
vectorStringData->string = NULL;
ARC_Vector_Add(vectorStringData->vector, tokenData);
}
void ARC_Parser_CreateAsParserLang(ARC_Parser **parser){
//<line> -> <body> NEWLINE <line> | <body> | NEWLINE <line> | LAMBDA
uint32_t *line[] = { (uint32_t[]){ 3, ARC_PARSERLANG_BODY, ARC_PARSERLANG_TOKEN_NEWLINE_ID, ARC_PARSERLANG_LINE }, (uint32_t[]){ 1, ARC_PARSERLANG_BODY }, (uint32_t[]){ 2, ARC_PARSERLANG_TOKEN_NEWLINE_ID, ARC_PARSERLANG_LINE }, (uint32_t[]){ 1, ARC_PARSERLANG_LAMBDA } };
@ -75,7 +193,7 @@ void ARC_Parser_CreateAsParserLang(ARC_Parser **parser){
//<alphaChar> -> ALPHA_LOWER_CHAR | ALPHA_UPPER_CHAR
uint32_t *alphaChar[] = { (uint32_t[]){ 1, ARC_PARSERLANG_TOKEN_ALPHA_LOWER_CHAR }, (uint32_t[]){ 1, ARC_PARSERLANG_TOKEN_ALPHA_UPPER_CHAR }};
ARC_ParserLanguageTag parserLangTags[13] = {
ARC_ParserTag parserLangTags[13] = {
{ ARC_PARSERLANG_LINE , line , 4, NULL, NULL },
{ ARC_PARSERLANG_BODY , body , 1, NULL, NULL },
{ ARC_PARSERLANG_ARGUMENTS , arguments , 2, NULL, NULL },
@ -96,6 +214,9 @@ void ARC_Parser_CreateAsParserLang(ARC_Parser **parser){
parserLangTags //data
};
ARC_ParserData_CreateFn createDataFn = ARC_ParserLang_CreateDataFn;
ARC_ParserData_DestroyFn destroyDataFn = ARC_ParserLang_DestroyDataFn;
//TODO: add the create, destroy, and add callbacks
ARC_Parser_Create(parser, &parserLanguageArray, ARC_ParserLang_InitLexerRulesFn, NULL, NULL);
ARC_Parser_Create(parser, &parserLanguageArray, ARC_ParserLang_InitLexerRulesFn, &createDataFn, &destroyDataFn);
}

View file

@ -66,3 +66,5 @@ defineIntLine
│ └─────────────────
└───────────────────

View file

@ -94,3 +94,33 @@ ARC_TEST(Lexer_Check_Id_Unordered_Not_Continious){
ARC_Lexer_Destroy(lexer);
}
ARC_TEST(Lexer_Check_Continious){
ARC_Lexer *lexer;
ARC_Lexer_Create(&lexer);
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(2, ':'));
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(0, 0 ));
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(3, ':'));
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(1, ':'));
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(4, ':'));
ARC_CHECK(ARC_Lexer_IsContinious(lexer) == ARC_True);
ARC_Lexer_Destroy(lexer);
}
ARC_TEST(Lexer_Check_Not_Continious){
ARC_Lexer *lexer;
ARC_Lexer_Create(&lexer);
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(2, ':'));
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(8, 0 ));
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(3, ':'));
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(1, ':'));
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(4, ':'));
ARC_CHECK(ARC_Lexer_IsContinious(lexer) == ARC_False);
ARC_Lexer_Destroy(lexer);
}

View file

@ -2,6 +2,7 @@
#include "arc/std/errno.h"
#include "arc/std/parser.h"
#include "arc/std/lexer.h"
#include "arc/std/vector.h"
#include "arc/std/parser/parserlang.h"
#include <stddef.h>
@ -12,16 +13,16 @@
#define VARIABLE_NAME 24
#define VARIABLE 25
void TEST_ParserLanguageTag_CreateStringFn(void **data){
void TEST_ParserData_CreateStringFn(void **data){
ARC_String_Create((ARC_String **)data, NULL, 0);
}
void TEST_ParserLanguageTag_DestroyStringFn(void *data){
void TEST_ParserData_DestroyStringFn(void *data){
ARC_String_Destroy((ARC_String *)data);
}
//for this very basic example, the tagId does not matter
void TEST_ParserLanguageTag_AddFirstCharFn(void **data, uint32_t tagId, ARC_LexerToken *token, void *userData){
void TEST_ParserTag_AddFirstCharFn(void **data, uint32_t tagId, uint32_t tagIndex, ARC_LexerToken *token, void *userData){
if(*data == NULL){
return;
}
@ -34,7 +35,7 @@ void TEST_ParserLanguageTag_AddFirstCharFn(void **data, uint32_t tagId, ARC_Lexe
}
//for this very basic example, the tagId does not matter
void TEST_ParserLanguageTag_AddCharFn(void **data, uint32_t tagId, ARC_LexerToken *token, void *userData){
void TEST_ParserTag_AddCharFn(void **data, uint32_t tagId, uint32_t tagIndex, ARC_LexerToken *token, void *userData){
if(*data == NULL){
return;
}
@ -47,13 +48,13 @@ uint32_t *variableNameTags[] = { (uint32_t[]){ 2, CHAR_OR_NUM, VARIABLE_NAME
uint32_t *variableTokensOrTags[] = { (uint32_t[]){ 2, CHAR, VARIABLE_NAME } };
//TODO: note how language function callbacks work, and how they use the parentData if createDataFn is NULL
ARC_ParserData_CreateFn createStringFn = TEST_ParserLanguageTag_CreateStringFn;
ARC_ParserData_DestroyFn destroyStringFn = TEST_ParserLanguageTag_DestroyStringFn;
ARC_ParserData_CreateFn createStringFn = TEST_ParserData_CreateStringFn;
ARC_ParserData_DestroyFn destroyStringFn = TEST_ParserData_DestroyStringFn;
ARC_ParserLanguageTag_AddDataFn addCharFn = TEST_ParserLanguageTag_AddCharFn;
ARC_ParserLanguageTag_AddDataFn addFirstCharFn = TEST_ParserLanguageTag_AddFirstCharFn;
ARC_ParserTag_AddDataFn addCharFn = TEST_ParserTag_AddCharFn;
ARC_ParserTag_AddDataFn addFirstCharFn = TEST_ParserTag_AddFirstCharFn;
ARC_ParserLanguageTag testTags[3] = {
ARC_ParserTag testTags[3] = {
{
VARIABLE, //tagId
variableTokensOrTags, //tokensOrTags
@ -213,7 +214,6 @@ ARC_TEST(Parser_Basic_GetParsedValue){
ARC_Parser_Destroy(parser);
}
ARC_TEST(Parser_ParserLang_BasicTest){
ARC_Parser *parser;
ARC_Parser_CreateAsParserLang(&parser);
@ -229,3 +229,26 @@ ARC_TEST(Parser_ParserLang_BasicTest){
ARC_CHECK(arc_errno == 0);
}
ARC_TEST(Parser_ParserLang_BasicVector){
ARC_Vector *testLanguage;
ARC_Vector_Create(&testLanguage, NULL, NULL);
ARC_Vector_Add(testLanguage, testTags + 0);
ARC_Vector_Add(testLanguage, testTags + 1);
ARC_Vector_Add(testLanguage, testTags + 2);
ARC_Parser *parser;
ARC_Parser_CreateFromVector(&parser, testLanguage, TEST_Parser_InitLexerRulesFn, NULL, NULL);
ARC_String *tempString;
ARC_String_CreateWithStrlen(&tempString, "variablename");
//this destroys string, so no need for cleanup
ARC_Parser_Parse(parser, &tempString);
//cleanup
ARC_Parser_Destroy(parser);
ARC_Vector_Destroy(testLanguage);
ARC_CHECK(arc_errno == 0);
}