moved doxygen to doc folder and still working on csv

This commit is contained in:
herbglitch 2024-12-08 22:19:43 -07:00
parent ca6a9c118f
commit 280a70c6e8
24 changed files with 282 additions and 105 deletions

View file

@ -1,7 +1,9 @@
#include "arc/std/parser/csv.h"
#include "arc/std/parser/helpers.h"
#include "arc/std/bool.h"
#include "arc/std/parser.h"
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
typedef struct ARC_ParserCSVUserData {
@ -11,22 +13,205 @@ typedef struct ARC_ParserCSVUserData {
} ARC_ParserCSVUserData;
void ARC_ParserCSV_InitLexerRulesFn(ARC_Lexer *lexer){
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_PARSER_CSV_CHAR_COMMA , ',' ));
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_PARSER_CSV_CHAR_NEWLINE, '\n'));
//NOTE: used an ascii table to get these values
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharOrBetween(ARC_PARSER_CSV_CHAR_BEFORE_COMMA, 0x21, 0x2b));
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharOrBetween(ARC_PARSER_CSV_CHAR_AFTER_COMMA , 0x2d, 0x7e));
}
uint32_t ARC_ParserCSV_GetStringIdFn(ARC_String *string){
return 0;
if(ARC_String_EqualsCStringWithStrlen(string, "COMMA")){
return ARC_PARSER_CSV_CHAR_COMMA;
}
if(ARC_String_EqualsCStringWithStrlen(string, "NEWLINE")){
return ARC_PARSER_CSV_CHAR_NEWLINE;
}
if(ARC_String_EqualsCStringWithStrlen(string, "CHAR_BEFORE_COMMA")){
return ARC_PARSER_CSV_CHAR_BEFORE_COMMA;
}
if(ARC_String_EqualsCStringWithStrlen(string, "CHAR_AFTER_COMMA")){
return ARC_PARSER_CSV_CHAR_AFTER_COMMA;
}
if(ARC_String_EqualsCStringWithStrlen(string, "<line>")){
return ARC_PARSER_CSV_LINE;
}
if(ARC_String_EqualsCStringWithStrlen(string, "<data>")){
return ARC_PARSER_CSV_DATA;
}
if(ARC_String_EqualsCStringWithStrlen(string, "<string>")){
return ARC_PARSER_CSV_STRING;
}
if(ARC_String_EqualsCStringWithStrlen(string, "<nonCommaChar>")){
return ARC_PARSER_CSV_NON_COMMA_CHAR;
}
return ~(uint32_t)0;
}
void ARC_ParserCSVData_CreateFn(void **data, ARC_ParserTagToken *parsedData, void *userData){
if(data == NULL){
return;
void ARC_ParserCSVData_VectorDestroyVectorFn(void *data){
ARC_Vector *vector = (ARC_Vector *)data;
ARC_Vector_Destroy(vector);
}
void ARC_ParserCSVData_GetDataTag(ARC_Vector *dataVector, ARC_ParserTagToken *tagToken, ARC_ParserCSVUserData *userData){
//loop through the tags either recursing to next body or adding data to vector
for(uint32_t index = 0; index < ARC_Vector_GetSize(tagToken->tagTokens); index++){
ARC_ParserTagToken *childTagToken = (ARC_ParserTagToken *)ARC_Vector_Get(tagToken->tagTokens, index);
ARC_String *stringData = NULL;
ARC_Vector *row = NULL;
uint32_t rowIndex = 0;
switch(childTagToken->id){
//recuse to run the next line
case ARC_PARSER_CSV_STRING:
//get the string of the data
ARC_String_Create(&stringData, NULL, 0);
ARC_ParserData_HelperRecurseStringAdd(&stringData, childTagToken);
//move data string and cleanup
void *data;
userData->castTypeFn(&data, stringData);
ARC_String_Destroy(stringData);
//get the last row vector
rowIndex = ARC_Vector_GetSize(dataVector) - 1;
row = (ARC_Vector *)ARC_Vector_Get(dataVector, rowIndex);
//add the data to the row vector
ARC_Vector_Add(row, data);
continue;
case ARC_PARSER_CSV_DATA:
ARC_ParserCSVData_GetDataTag(dataVector, childTagToken, userData);
continue;
default:
//this should only be the case for the comma
continue;
}
}
}
void ARC_ParserCSVData_DestroyFn(void *data, void *userData){
//<line> -> <data> NEWLINE <line> | <data> | NEWLINE <line> | LAMBDA
void ARC_ParserCSVData_RunLineTag(ARC_Vector *dataVector, ARC_ParserTagToken *tagToken, ARC_ParserCSVUserData *userData){
//loop through the tags either going to the next line or the next body
for(uint32_t index = 0; index < ARC_Vector_GetSize(tagToken->tagTokens); index++){
ARC_ParserTagToken *childTagToken = (ARC_ParserTagToken *)ARC_Vector_Get(tagToken->tagTokens, index);
ARC_Vector *row;
switch(childTagToken->id){
//recuse to run the next line
case ARC_PARSER_CSV_LINE:
ARC_ParserCSVData_RunLineTag(dataVector, childTagToken, userData);
continue;
case ARC_PARSER_CSV_DATA:
continue;
//add a new row for each new line
case ARC_PARSER_CSV_CHAR_NEWLINE:
//create a new row
ARC_Vector_Create(&row, NULL, NULL);
ARC_Vector_Add(dataVector, (void *)row);
continue;
default:
continue;
}
}
}
void ARC_ParserCSVData_CreateFn(void **data, ARC_ParserTagToken *parsedData, void *userData){
if(data == NULL || userData == NULL){
//TODO: error here?
return;
}
//cast the csv data back to its original type
ARC_ParserCSVUserData *csvUserData = (ARC_ParserCSVUserData *)userData;
//crate a vector for the data to be stored in before being copied to a 2d array
ARC_Vector *dataVector;
ARC_Vector_DestroyDataFn destroyVectorFn = ARC_ParserCSVData_VectorDestroyVectorFn;
ARC_Vector_Create(&dataVector, NULL, &destroyVectorFn);
//create the first row vector, does not need a destroy function as its contents will be moved
ARC_Vector *dataRowVector;
ARC_Vector_Create(&dataRowVector, NULL, NULL);
//add the row to the dataVector
ARC_Vector_Add(dataVector, (void *)dataRowVector);
//recursively add data from the parsedData to the data vector
ARC_ParserCSVData_RunLineTag(dataVector, parsedData, csvUserData);
if(ARC_Vector_GetSize(dataVector) == 0){
//TODO: iterate and clear the vector
ARC_Vector_Destroy(dataVector);
return;
}
//get the first row of dataVector for its width
dataRowVector = (ARC_Vector *)ARC_Vector_Get(dataVector, 0);
//check that all the rows are the same size
for(uint32_t rowIndex = 1; rowIndex < ARC_Vector_GetSize(dataVector); rowIndex++){
ARC_Vector *currentRowVector = (ARC_Vector *)ARC_Vector_Get(dataVector, rowIndex);
//TODO: probs want to error
//cleanup and exit if they don't match
if(ARC_Vector_GetSize(dataRowVector) != ARC_Vector_GetSize(currentRowVector)){
//TODO: iterate and clear the vector
ARC_Vector_Destroy(dataVector);
return;
}
}
uint32_t dataStartIndex = 0;
if(csvUserData->header == ARC_True){
//TODO: headers
dataStartIndex++;
}
//create the data that will be saved
ARC_ParserCSVData *csvData = (ARC_ParserCSVData *)malloc(sizeof(ARC_ParserCSVData));
csvData->height = ARC_Vector_GetSize(dataVector) - dataStartIndex;
csvData->width = ARC_Vector_GetSize(dataRowVector);
//copy the data
for(uint32_t y = 0; y < csvData->height; y++){
csvData->data[y] = (void **)malloc(sizeof(void *) * csvData->width);
ARC_Vector *currentRowVector = (ARC_Vector *)ARC_Vector_Get(dataVector, y + dataStartIndex);
for(uint32_t x = 0; x < csvData->width; x++){
csvData->data[y][x] = ARC_Vector_Get(currentRowVector, x);
}
}
ARC_Vector_Destroy(dataVector);
*data = (void *)csvData;
}
void ARC_ParserCSVData_DestroyFn(void *data, ARC_Bool clear, void *userData){
if(userData == NULL){ return; }
ARC_ParserCSVUserData *csvUserData = (ARC_ParserCSVUserData *)userData;
if(data != NULL){
ARC_ParserCSVData *csvData = (ARC_ParserCSVData *)data;
//cleanup the headers
for(uint32_t x = 0; x < csvData->width; x++){
ARC_String *string = csvData->headers[x];
ARC_String_Destroy(string);
@ -34,22 +219,30 @@ void ARC_ParserCSVData_DestroyFn(void *data, void *userData){
free(csvData->headers);
for(uint32_t y = 0; y < csvData->height; y++){
csvData->destroyTypeFn(csvData->data[y]);
//cleanup each element in the data
for(uint32_t x = 0; x < csvData->width; x++){
csvUserData->destroyTypeFn(csvData->data[y] + x);
}
//cleanup each row of data
free(csvData->data[y]);
}
//cleanup the data
free(csvData->data);
}
if(userData != NULL){
free((ARC_ParserCSVUserData *)userData);
if(clear == ARC_False){
free(csvUserData);
}
}
void ARC_ParserCSV_CreateAsParser(ARC_Parser **parser, ARC_Bool header, ARC_ParserCSV_CastTypeFn castTypeFn, ARC_ParserCSV_DestroyTypeFn destroyTypeFn){
/* ~ define the language as a string ~ */
char *languageCString =
"<line> -> <data> NEWLINE <line> | <data> | NEWLINE <line> | LAMBDA"
"<data> -> <string> COMMA <data> | <string>"
"<string> -> COMMON_CHAR <string> | COMON_CHAR";
"<line> -> <data> NEWLINE <line> | <data> | NEWLINE <line> | LAMBDA\n"
"<data> -> <string> COMMA <data> | <string>\n"
"<string> -> <nonCommaChar> <string> | <nonCommaChar>\n"
"<nonCommaChar> -> CHAR_BEFORE_COMMA | CHAR_AFTER_COMMA\n";
ARC_String *languageString;
ARC_String_CreateWithStrlen(&languageString, languageCString);

View file

@ -1,4 +1,5 @@
#include "arc/std/parser/parserlang.h"
#include "arc/std/bool.h"
#include "arc/std/errno.h"
#include "arc/std/lexer.h"
#include "arc/std/parser.h"
@ -39,12 +40,12 @@ void ARC_ParserLang_InitLexerRulesFn(ARC_Lexer *lexer){
ARC_String_Destroy(arrowString);
}
void ARC_ParserLang_VectorDestroyVector(void *data){
void ARC_ParserLang_VectorDestroyVectorFn(void *data){
ARC_Vector *vector = (ARC_Vector *)data;
ARC_Vector_Destroy(vector);
}
void ARC_ParserLang_VectorDestroyUInt32(void *data){
void ARC_ParserLang_VectorDestroyUInt32Fn(void *data){
uint32_t *uint = (uint32_t *)data;
free(uint);
}
@ -161,7 +162,7 @@ void ARC_ParserLangParsedData_GetArgumentsTag(ARC_Vector *tokensOrTags, ARC_Pars
uint32_t tokensOrTagsIndex = 0;
ARC_Vector *orTokensOrTags = NULL;
ARC_Vector_DestroyDataFn destroyUint32Fn = ARC_ParserLang_VectorDestroyUInt32;
ARC_Vector_DestroyDataFn destroyUint32Fn = ARC_ParserLang_VectorDestroyUInt32Fn;
switch(childTagToken->id){
case ARC_PARSERLANG_ARGUMENT:
@ -213,12 +214,12 @@ void ARC_ParserLangParsedData_CreateBodyTag(ARC_ParserTag **tag, ARC_ParserTagTo
/* ~ Tokens Or Tags Array ~ */
//create a vector to store another vector of data
ARC_Vector *tokensOrTags;
ARC_Vector_DestroyDataFn destroyVectorFn = ARC_ParserLang_VectorDestroyVector;
ARC_Vector_DestroyDataFn destroyVectorFn = ARC_ParserLang_VectorDestroyVectorFn;
ARC_Vector_Create(&tokensOrTags, NULL, &destroyVectorFn);
//create vector within the tokens or tags vector to store the or rule in
ARC_Vector *orTokensOrTags;
ARC_Vector_DestroyDataFn destroyUint32Fn = ARC_ParserLang_VectorDestroyUInt32;
ARC_Vector_DestroyDataFn destroyUint32Fn = ARC_ParserLang_VectorDestroyUInt32Fn;
ARC_Vector_Create(&orTokensOrTags, NULL, &destroyUint32Fn);
//add the first or vector to the tokensOrTags
@ -308,14 +309,14 @@ void ARC_ParserLang_CreateDataFn(void **data, ARC_ParserTagToken *parsedData, vo
}
//private function to destroy the saved data for the language
void ARC_ParserLang_DestroyDataFn(void *data, void *userData){
void ARC_ParserLang_DestroyDataFn(void *data, ARC_Bool clear, void *userData){
if(userData != NULL){
ARC_Parser_GetStringIdFn *getStringIdFn = (ARC_Parser_GetStringIdFn *)userData;
free(getStringIdFn);
}
//check if there is data to free
if((ARC_Vector *)data != NULL){
//if not clearing (full destroy) check if there is data to free
if(clear == ARC_False && (ARC_Vector *)data != NULL){
ARC_Vector_Destroy((ARC_Vector *)data);
}
}