#include "arc/std/parser/csv.h" #include "arc/std/parser/helpers.h" #include "arc/std/bool.h" #include "arc/std/parser.h" #include #include #include typedef struct ARC_ParserCSVUserData { ARC_Bool header; ARC_ParserCSV_CastTypeFn castTypeFn; ARC_ParserCSV_DestroyTypeFn destroyTypeFn; } ARC_ParserCSVUserData; void ARC_ParserCSV_InitLexerRulesFn(ARC_Lexer *lexer){ } uint32_t ARC_ParserCSV_GetStringIdFn(ARC_String *string){ if(ARC_String_EqualsCStringWithStrlen(string, "LAMBDA")){ return ARC_PARSER_TAG_LAMBDA; } return ~(uint32_t)0; } void ARC_ParserCSVData_VectorDestroyVectorFn(void *data){ ARC_Vector *vector = (ARC_Vector *)data; ARC_Vector_Destroy(vector); } void ARC_ParserCSVData_GetDataTag(ARC_Vector *dataVector, ARC_ParserTagToken *tagToken, ARC_ParserCSVUserData *userData){ //cast the csv data back to its original type ARC_ParserCSVUserData *csvUserData = (ARC_ParserCSVUserData *)userData; //loop through the tags either recursing to next body or adding data to vector for(uint32_t index = 0; index < ARC_Vector_GetSize(tagToken->tagTokens); index++){ ARC_ParserTagToken *childTagToken = (ARC_ParserTagToken *)ARC_Vector_Get(tagToken->tagTokens, index); ARC_String *stringData = NULL; ARC_Vector *row = NULL; uint32_t rowIndex = 0; switch(childTagToken->id){ //recuse to run the next line case ARC_PARSER_CSV_STRING: //get the string of the data ARC_String_Create(&stringData, NULL, 0); ARC_ParserData_HelperRecurseStringAdd(&stringData, childTagToken); //move data string and cleanup void *data = (void *)stringData; //get the last row vector rowIndex = ARC_Vector_GetSize(dataVector) - 1; row = (ARC_Vector *)ARC_Vector_Get(dataVector, rowIndex); //only call the userData cast type callback if it is not a header if(rowIndex != 0 || csvUserData->header == ARC_False){ data = NULL; userData->castTypeFn(&data, stringData); ARC_String_Destroy(stringData); } //add the data to the row vector ARC_Vector_Add(row, data); continue; case ARC_PARSER_CSV_DATA: ARC_ParserCSVData_GetDataTag(dataVector, childTagToken, userData); continue; default: //this should only be the case for the comma continue; } } } // -> NEWLINE | | NEWLINE | LAMBDA void ARC_ParserCSVData_RunLineTag(ARC_Vector *dataVector, ARC_ParserTagToken *tagToken, ARC_ParserCSVUserData *userData){ //loop through the tags either going to the next line or the next body for(uint32_t index = 0; index < ARC_Vector_GetSize(tagToken->tagTokens); index++){ ARC_ParserTagToken *childTagToken = (ARC_ParserTagToken *)ARC_Vector_Get(tagToken->tagTokens, index); ARC_Vector *row = NULL; uint32_t rowIndex = 0; switch(childTagToken->id){ //recuse to run the next line case ARC_PARSER_CSV_LINE: ARC_ParserCSVData_RunLineTag(dataVector, childTagToken, userData); continue; //get the row data case ARC_PARSER_CSV_DATA: ARC_ParserCSVData_GetDataTag(dataVector, childTagToken, userData); continue; //add a new row for each new line case ARC_PARSER_CSV_CHAR_NEWLINE: //check if current is not empty (no need to create another empty row) rowIndex = ARC_Vector_GetSize(dataVector) - 1; row = (ARC_Vector *)ARC_Vector_Get(dataVector, rowIndex); if(ARC_Vector_GetSize(row) == 0){ continue; } //create and add a new row ARC_Vector_Create(&row, NULL, NULL); ARC_Vector_Add(dataVector, (void *)row); continue; default: continue; } } } void ARC_ParserCSVData_CreateFn(void **data, ARC_ParserTagToken *parsedData, void *userData){ *data = NULL; if(data == NULL || userData == NULL){ //TODO: error here? *data = NULL; return; } //cast the csv data back to its original type ARC_ParserCSVUserData *csvUserData = (ARC_ParserCSVUserData *)userData; //crate a vector for the data to be stored in before being copied to a 2d array ARC_Vector *dataVector; ARC_Vector_DestroyDataFn destroyVectorFn = ARC_ParserCSVData_VectorDestroyVectorFn; ARC_Vector_Create(&dataVector, NULL, &destroyVectorFn); //create the first row vector, does not need a destroy function as its contents will be moved ARC_Vector *dataRowVector; ARC_Vector_Create(&dataRowVector, NULL, NULL); //add the row to the dataVector ARC_Vector_Add(dataVector, (void *)dataRowVector); //recursively add data from the parsedData to the data vector ARC_ParserCSVData_RunLineTag(dataVector, parsedData, csvUserData); //get the first line to check if it has any values dataRowVector = ARC_Vector_Get(dataVector, 0); if(ARC_Vector_GetSize(dataRowVector) == 0){ //TODO: iterate and clear the vector ARC_Vector_Destroy(dataVector); return; } //create the data that will be saved ARC_ParserCSVData *csvData = (ARC_ParserCSVData *)malloc(sizeof(ARC_ParserCSVData)); csvData->hasHeader = csvUserData->header; csvData->headers = NULL; csvData->height = 0; csvData->width = ARC_Vector_GetSize(dataRowVector); csvData->data = NULL; //create the starting index (to offset the header if it exists) uint32_t heightStartIndex = 0; //store the headers if they exist if(csvData->hasHeader == ARC_True){ heightStartIndex++; //copy the headers csvData->headers = (ARC_String **)malloc(sizeof(ARC_String *) * csvData->width); for(uint32_t headerIndex = 0; headerIndex < csvData->width; headerIndex++){ csvData->headers[headerIndex] = (ARC_String *)ARC_Vector_Get(dataRowVector, headerIndex); } } //check that all the rows are the same size for(uint32_t rowIndex = heightStartIndex; rowIndex < ARC_Vector_GetSize(dataVector); rowIndex++){ ARC_Vector *currentRowVector = (ARC_Vector *)ARC_Vector_Get(dataVector, rowIndex); //skip an empty line (this should only be the case for the last line) uint32_t currentRowVectorSize = ARC_Vector_GetSize(currentRowVector); if(currentRowVectorSize == 0){ continue; } //a row was found so update the height csvData->height++; //TODO: probs want to error //cleanup and exit if they don't match if(csvData->width != currentRowVectorSize){ //TODO: iterate and clear the vector ARC_Vector_Destroy(dataVector); return; } } //init location to copy data to csvData->data = (void ***)malloc(sizeof(void **) * csvData->height); //copy the data uint32_t rowIndex = 0; for(uint32_t y = 0; y < csvData->height; y++, rowIndex++){ ARC_Vector *currentRowVector = (ARC_Vector *)ARC_Vector_Get(dataVector, rowIndex + heightStartIndex); //skip an empty line (should only be the case for the last line) uint32_t currentRowVectorSize = ARC_Vector_GetSize(currentRowVector); if(currentRowVectorSize == 0){ y--; continue; } //create the current row csvData->data[y] = (void **)malloc(sizeof(void *) * csvData->width); for(uint32_t x = 0; x < csvData->width; x++){ csvData->data[y][x] = ARC_Vector_Get(currentRowVector, x); } } ARC_Vector_Destroy(dataVector); *data = (void *)csvData; } void ARC_ParserCSVData_DestroyFn(void *data, ARC_Bool clear, void *userData){ if(userData == NULL){ return; } ARC_ParserCSVUserData *csvUserData = (ARC_ParserCSVUserData *)userData; if(data != NULL){ ARC_ParserCSVData *csvData = (ARC_ParserCSVData *)data; //cleanup the headers if they exist if(csvData->hasHeader == ARC_True){ for(uint32_t x = 0; x < csvData->width; x++){ ARC_String *string = csvData->headers[x]; ARC_String_Destroy(string); } free(csvData->headers); } for(uint32_t y = 0; y < csvData->height; y++){ //cleanup each element in the data for(uint32_t x = 0; x < csvData->width; x++){ csvUserData->destroyTypeFn(csvData->data[y][x]); } //cleanup each row of data free(csvData->data[y]); } //cleanup the data free(csvData->data); free(csvData); } if(clear == ARC_False){ free(csvUserData); } } void ARC_ParserCSV_CreateAsParser(ARC_Parser **parser, ARC_Bool header, ARC_ParserCSV_CastTypeFn castTypeFn, ARC_ParserCSV_DestroyTypeFn destroyTypeFn){ /* ~ define the language as a string ~ */ char *languageCString = " -> NEWLINE | | NEWLINE | LAMBDA\n" " -> COMMA | \n" " -> | \n" " -> CHAR_BEFORE_COMMA | CHAR_AFTER_COMMA\n"; ARC_String *languageString; ARC_String_CreateWithStrlen(&languageString, languageCString); /* ~ init the userdata ~ */ ARC_ParserCSVUserData *userdata = (ARC_ParserCSVUserData *)malloc(sizeof(ARC_ParserCSVUserData)); userdata->header = header; userdata->castTypeFn = castTypeFn; userdata->destroyTypeFn = destroyTypeFn; /* ~ create the language ~ */ ARC_ParserData_CreateFn createCharFn = ARC_ParserCSVData_CreateFn; ARC_ParserData_DestroyFn destroyCharFn = ARC_ParserCSVData_DestroyFn; ARC_Parser_CreateFromString(parser, languageString, ARC_ParserCSV_InitLexerRulesFn, ARC_ParserCSV_GetStringIdFn, &createCharFn, &destroyCharFn, userdata); //cleanup ARC_String_Destroy(languageString); }