working on csv data, parser works but needs more testing, might add bool to ParserData_DestroyFn callback for clearing

This commit is contained in:
herbglitch 2024-12-04 02:43:58 -07:00
parent 4c3d357cb9
commit ca6a9c118f
13 changed files with 384 additions and 279 deletions

View file

@ -477,8 +477,8 @@ void ARC_Lexer_InitBasicTokenRules(ARC_Lexer *lexer){
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharOrBetween(ARC_LEXER_TOKEN_NUMBER, '0', '9'));
//alpha char
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharOrBetween(ARC_LEXER_TOKEN_ALPHALOWERCHAR, 'a', 'z'));
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharOrBetween(ARC_LEXER_TOKEN_ALPHAUPPERCHAR, 'A', 'Z'));
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharOrBetween(ARC_LEXER_TOKEN_ALPHA_LOWER_CHAR, 'a', 'z'));
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharOrBetween(ARC_LEXER_TOKEN_ALPHA_UPPER_CHAR, 'A', 'Z'));
//whitespace
//TODO: fix this

View file

@ -323,6 +323,7 @@ void ARC_Parser_Parse(ARC_Parser *parser, ARC_String **data){
//error if anything went wrong
if(parsed == ARC_False || allTokensParsed == ARC_False || arc_errno){
ARC_Lexer_Clear(parser->lexer);
ARC_ParserTagToken_Destroy(tagToken);
arc_errno = ARC_ERRNO_DATA;
ARC_DEBUG_LOG_ERROR_WITH_VARIABLES("ARC_Parser_Parse(parser, data), could not parse the given data at lexer index: %u", lexerIndex);

View file

@ -0,0 +1,71 @@
#include "arc/std/parser/csv.h"
#include "arc/std/parser/helpers.h"
#include "arc/std/parser.h"
#include <stddef.h>
#include <stdlib.h>
typedef struct ARC_ParserCSVUserData {
ARC_Bool header;
ARC_ParserCSV_CastTypeFn castTypeFn;
ARC_ParserCSV_DestroyTypeFn destroyTypeFn;
} ARC_ParserCSVUserData;
void ARC_ParserCSV_InitLexerRulesFn(ARC_Lexer *lexer){
}
uint32_t ARC_ParserCSV_GetStringIdFn(ARC_String *string){
return 0;
}
void ARC_ParserCSVData_CreateFn(void **data, ARC_ParserTagToken *parsedData, void *userData){
if(data == NULL){
return;
}
}
void ARC_ParserCSVData_DestroyFn(void *data, void *userData){
if(data != NULL){
ARC_ParserCSVData *csvData = (ARC_ParserCSVData *)data;
for(uint32_t x = 0; x < csvData->width; x++){
ARC_String *string = csvData->headers[x];
ARC_String_Destroy(string);
}
free(csvData->headers);
for(uint32_t y = 0; y < csvData->height; y++){
csvData->destroyTypeFn(csvData->data[y]);
}
free(csvData->data);
}
if(userData != NULL){
free((ARC_ParserCSVUserData *)userData);
}
}
void ARC_ParserCSV_CreateAsParser(ARC_Parser **parser, ARC_Bool header, ARC_ParserCSV_CastTypeFn castTypeFn, ARC_ParserCSV_DestroyTypeFn destroyTypeFn){
/* ~ define the language as a string ~ */
char *languageCString =
"<line> -> <data> NEWLINE <line> | <data> | NEWLINE <line> | LAMBDA"
"<data> -> <string> COMMA <data> | <string>"
"<string> -> COMMON_CHAR <string> | COMON_CHAR";
ARC_String *languageString;
ARC_String_CreateWithStrlen(&languageString, languageCString);
/* ~ init the userdata ~ */
ARC_ParserCSVUserData *userdata = (ARC_ParserCSVUserData *)malloc(sizeof(ARC_ParserCSVUserData));
userdata->header = header;
userdata->castTypeFn = castTypeFn;
userdata->destroyTypeFn = destroyTypeFn;
/* ~ create the language ~ */
ARC_ParserData_CreateFn createCharFn = ARC_ParserCSVData_CreateFn;
ARC_ParserData_DestroyFn destroyCharFn = ARC_ParserCSVData_DestroyFn;
ARC_Parser_CreateFromString(parser, languageString, ARC_ParserCSV_InitLexerRulesFn, ARC_ParserCSV_GetStringIdFn, &createCharFn, &destroyCharFn, userdata);
//cleanup
ARC_String_Destroy(languageString);
}

View file

@ -0,0 +1,26 @@
#include "arc/std/parser/helpers.h"
#include "arc/std/errno.h"
void ARC_ParserData_HelperRecurseStringAdd(ARC_String **data, ARC_ParserTagToken *tagToken){
if(*data == NULL){
arc_errno = ARC_ERRNO_NULL;
ARC_DEBUG_LOG_ERROR("ARC_ParserData_RecurseStringAdd(data, tagToken), string data was NULL");
return;
}
//only adds tokens with data (check the header for more information)
if(tagToken->token != NULL && tagToken->token->data != NULL){
ARC_String_Append(data, tagToken->token->data);
return;
}
//TODO: probs don't need this
if(tagToken->tagTokens == NULL){
return;
}
//recurse through all the tag tokens
for(uint32_t index = 0; index < ARC_Vector_GetSize(tagToken->tagTokens); index++){
ARC_ParserData_HelperRecurseStringAdd(data, (ARC_ParserTagToken *)ARC_Vector_Get(tagToken->tagTokens, index));
}
}

View file

@ -324,14 +324,14 @@ void ARC_ParserLang_CreateAsParser(ARC_Parser **parser, ARC_Parser_GetStringIdFn
//<line> -> <body> NEWLINE <line> | <body> | NEWLINE <line> | LAMBDA
uint32_t *line[] = { (uint32_t[]){ 3, ARC_PARSERLANG_BODY, ARC_PARSERLANG_TOKEN_NEWLINE_ID, ARC_PARSERLANG_LINE }, (uint32_t[]){ 1, ARC_PARSERLANG_BODY }, (uint32_t[]){ 2, ARC_PARSERLANG_TOKEN_NEWLINE_ID, ARC_PARSERLANG_LINE }, (uint32_t[]){ 1, ARC_PARSERLANG_LAMBDA } };
//<body> -> <tag> WHITESPACE ARROW WHITESPACE <arguments>
uint32_t *body[] = { (uint32_t[]){ 5, ARC_PARSERLANG_TAG, ARC_PARSERLANG_TOKEN_WHITESPACE, ARC_PARSERLANG_TOKEN_ARROW_ID, ARC_PARSERLANG_TOKEN_WHITESPACE, ARC_PARSERLANG_ARGUMENTS } };
//<body> -> <tag> <whitespace> ARROW <whitespace> <arguments>
uint32_t *body[] = { (uint32_t[]){ 5, ARC_PARSERLANG_TAG, ARC_PARSERLANG_WHITESPACE, ARC_PARSERLANG_TOKEN_ARROW_ID, ARC_PARSERLANG_WHITESPACE, ARC_PARSERLANG_ARGUMENTS } };
//<arguments> -> <argument> WHITESPACE OR WHITESPACE <arguments> | <argument>
uint32_t *arguments[] = { (uint32_t[]){ 5, ARC_PARSERLANG_ARGUMENT, ARC_PARSERLANG_TOKEN_WHITESPACE, ARC_PARSERLANG_TOKEN_OR_ID, ARC_PARSERLANG_TOKEN_WHITESPACE, ARC_PARSERLANG_ARGUMENTS }, (uint32_t[]){ 1, ARC_PARSERLANG_ARGUMENT } };
//<arguments> -> <argument> <whitespace> OR <whitespace> <arguments> | <argument>
uint32_t *arguments[] = { (uint32_t[]){ 5, ARC_PARSERLANG_ARGUMENT, ARC_PARSERLANG_WHITESPACE, ARC_PARSERLANG_TOKEN_OR_ID, ARC_PARSERLANG_WHITESPACE, ARC_PARSERLANG_ARGUMENTS }, (uint32_t[]){ 1, ARC_PARSERLANG_ARGUMENT } };
//<argument> -> <tagOrConstant> WHITESPACE <argument> | <tagOrConstant>
uint32_t *argument[] = { (uint32_t[]){ 3, ARC_PARSERLANG_TAG_OR_CONSTANT, ARC_PARSERLANG_TOKEN_WHITESPACE, ARC_PARSERLANG_ARGUMENT }, (uint32_t[]){ 1, ARC_PARSERLANG_TAG_OR_CONSTANT } };
//<argument> -> <tagOrConstant> <whitespace> <argument> | <tagOrConstant>
uint32_t *argument[] = { (uint32_t[]){ 3, ARC_PARSERLANG_TAG_OR_CONSTANT, ARC_PARSERLANG_WHITESPACE, ARC_PARSERLANG_ARGUMENT }, (uint32_t[]){ 1, ARC_PARSERLANG_TAG_OR_CONSTANT } };
//<tagOrConstant> -> <parserLangageTag> | <constant>
uint32_t *tagOrConstant[] = { (uint32_t[]){ 1, ARC_PARSERLANG_TAG }, (uint32_t[]){ 1, ARC_PARSERLANG_CONSTANT } };
@ -360,7 +360,10 @@ void ARC_ParserLang_CreateAsParser(ARC_Parser **parser, ARC_Parser_GetStringIdFn
//<alphaChar> -> ALPHA_LOWER_CHAR | ALPHA_UPPER_CHAR
uint32_t *alphaChar[] = { (uint32_t[]){ 1, ARC_PARSERLANG_TOKEN_ALPHA_LOWER_CHAR }, (uint32_t[]){ 1, ARC_PARSERLANG_TOKEN_ALPHA_UPPER_CHAR }};
ARC_ParserTag parserLangTags[13] = {
//<whitespace> -> WHITESPACE <whitespace> | WHITESPACE
uint32_t *whitespace[] = { (uint32_t[]){ 2, ARC_PARSERLANG_TOKEN_WHITESPACE, ARC_PARSERLANG_WHITESPACE }, (uint32_t[]){ 1, ARC_PARSERLANG_TOKEN_WHITESPACE }};
ARC_ParserTag parserLangTags[14] = {
{ ARC_PARSERLANG_LINE , line , 4 },
{ ARC_PARSERLANG_BODY , body , 1 },
{ ARC_PARSERLANG_ARGUMENTS , arguments , 2 },
@ -373,11 +376,12 @@ void ARC_ParserLang_CreateAsParser(ARC_Parser **parser, ARC_Parser_GetStringIdFn
{ ARC_PARSERLANG_VARIABLE , variable , 2 },
{ ARC_PARSERLANG_VARIABLE_BODY , variableBody , 2 },
{ ARC_PARSERLANG_VARIABLE_CHAR , variableChar , 3 },
{ ARC_PARSERLANG_ALPHA_CHAR , alphaChar , 2 }
{ ARC_PARSERLANG_ALPHA_CHAR , alphaChar , 2 },
{ ARC_PARSERLANG_WHITESPACE , whitespace , 2 }
};
ARC_Array parserLanguageArray = {
13, //size
14, //size
parserLangTags //data
};