diff --git a/src/std/parser/csv.c b/src/std/parser/csv.c index b23ccb8..4f5e31b 100644 --- a/src/std/parser/csv.c +++ b/src/std/parser/csv.c @@ -67,6 +67,9 @@ void ARC_ParserCSVData_VectorDestroyVectorFn(void *data){ } void ARC_ParserCSVData_GetDataTag(ARC_Vector *dataVector, ARC_ParserTagToken *tagToken, ARC_ParserCSVUserData *userData){ + //cast the csv data back to its original type + ARC_ParserCSVUserData *csvUserData = (ARC_ParserCSVUserData *)userData; + //loop through the tags either recursing to next body or adding data to vector for(uint32_t index = 0; index < ARC_Vector_GetSize(tagToken->tagTokens); index++){ ARC_ParserTagToken *childTagToken = (ARC_ParserTagToken *)ARC_Vector_Get(tagToken->tagTokens, index); @@ -83,14 +86,19 @@ void ARC_ParserCSVData_GetDataTag(ARC_Vector *dataVector, ARC_ParserTagToken *ta ARC_ParserData_HelperRecurseStringAdd(&stringData, childTagToken); //move data string and cleanup - void *data; - userData->castTypeFn(&data, stringData); - ARC_String_Destroy(stringData); + void *data = (void *)stringData; //get the last row vector rowIndex = ARC_Vector_GetSize(dataVector) - 1; row = (ARC_Vector *)ARC_Vector_Get(dataVector, rowIndex); + //only call the userData cast type callback if it is not a header + if(rowIndex != 0 || csvUserData->header == ARC_False){ + data = NULL; + userData->castTypeFn(&data, stringData); + ARC_String_Destroy(stringData); + } + //add the data to the row vector ARC_Vector_Add(row, data); continue; @@ -112,7 +120,8 @@ void ARC_ParserCSVData_RunLineTag(ARC_Vector *dataVector, ARC_ParserTagToken *ta for(uint32_t index = 0; index < ARC_Vector_GetSize(tagToken->tagTokens); index++){ ARC_ParserTagToken *childTagToken = (ARC_ParserTagToken *)ARC_Vector_Get(tagToken->tagTokens, index); - ARC_Vector *row; + ARC_Vector *row = NULL; + uint32_t rowIndex = 0; switch(childTagToken->id){ //recuse to run the next line @@ -127,7 +136,14 @@ void ARC_ParserCSVData_RunLineTag(ARC_Vector *dataVector, ARC_ParserTagToken *ta //add a new row for each new line case ARC_PARSER_CSV_CHAR_NEWLINE: - //create a new row + //check if current is not empty (no need to create another empty row) + rowIndex = ARC_Vector_GetSize(dataVector) - 1; + row = (ARC_Vector *)ARC_Vector_Get(dataVector, rowIndex); + if(ARC_Vector_GetSize(row) == 0){ + continue; + } + + //create and add a new row ARC_Vector_Create(&row, NULL, NULL); ARC_Vector_Add(dataVector, (void *)row); continue; @@ -163,90 +179,68 @@ void ARC_ParserCSVData_CreateFn(void **data, ARC_ParserTagToken *parsedData, voi //recursively add data from the parsedData to the data vector ARC_ParserCSVData_RunLineTag(dataVector, parsedData, csvUserData); - if(ARC_Vector_GetSize(dataVector) == 0){ + + //get the first line to check if it has any values + dataRowVector = ARC_Vector_Get(dataVector, 0); + if(ARC_Vector_GetSize(dataRowVector) == 0){ //TODO: iterate and clear the vector ARC_Vector_Destroy(dataVector); return; } - uint32_t dataStartIndex = 0; - if(csvUserData->header == ARC_True){ - //TODO: headers - dataStartIndex++; - } + //create the data that will be saved + ARC_ParserCSVData *csvData = (ARC_ParserCSVData *)malloc(sizeof(ARC_ParserCSVData)); + csvData->hasHeader = csvUserData->header; + csvData->headers = NULL; + csvData->height = 0; + csvData->width = ARC_Vector_GetSize(dataRowVector); + csvData->data = NULL; - //init the height and width of all found rows, height starts at 1 as the first row is already found - uint32_t dataHeight = 1; - uint32_t dataWidth = 0; + //create the starting index (to offset the header if it exists) + uint32_t heightStartIndex = 0; - //TODO: fix this for headers - //get the first non-empty row of dataVector for its width - for(; dataStartIndex < ARC_Vector_GetSize(dataVector); dataStartIndex++){ - dataRowVector = (ARC_Vector *)ARC_Vector_Get(dataVector, dataStartIndex); - dataWidth = ARC_Vector_GetSize(dataRowVector); + //store the headers if they exist + if(csvData->hasHeader == ARC_True){ + heightStartIndex++; - //breakout if a valid row is found - if(dataWidth != 0){ - break; + //copy the headers + csvData->headers = (ARC_String **)malloc(sizeof(ARC_String *) * csvData->width); + for(uint32_t headerIndex = 0; headerIndex < csvData->width; headerIndex++){ + csvData->headers[headerIndex] = (ARC_String *)ARC_Vector_Get(dataRowVector, headerIndex); } } - //fix this for headers - //check if a valid row if found - if(dataWidth == 0){ - //TODO: iterate and clear the vector - //TODO: error here? - *data = NULL; - ARC_Vector_Destroy(dataVector); - return; - } - //check that all the rows are the same size - for(uint32_t rowIndex = dataStartIndex + 1; rowIndex < ARC_Vector_GetSize(dataVector); rowIndex++){ + for(uint32_t rowIndex = heightStartIndex; rowIndex < ARC_Vector_GetSize(dataVector); rowIndex++){ ARC_Vector *currentRowVector = (ARC_Vector *)ARC_Vector_Get(dataVector, rowIndex); - //skip an empty line + //skip an empty line (this should only be the case for the last line) uint32_t currentRowVectorSize = ARC_Vector_GetSize(currentRowVector); if(currentRowVectorSize == 0){ continue; } //a row was found so update the height - dataHeight++; + csvData->height++; //TODO: probs want to error //cleanup and exit if they don't match - if(dataWidth != currentRowVectorSize){ + if(csvData->width != currentRowVectorSize){ //TODO: iterate and clear the vector ARC_Vector_Destroy(dataVector); return; } } - //create the data that will be saved - ARC_ParserCSVData *csvData = (ARC_ParserCSVData *)malloc(sizeof(ARC_ParserCSVData)); - csvData->hasHeader = csvUserData->header; - //TODO: fix this - csvData->headers = NULL; - csvData->height = dataHeight; - csvData->width = dataWidth; - - if(csvData->height == 0 || csvData->width == 0){ - //TODO: error here? - free(csvData); - *data = NULL; - return; - } - //init location to copy data to csvData->data = (void ***)malloc(sizeof(void **) * csvData->height); //copy the data uint32_t rowIndex = 0; for(uint32_t y = 0; y < csvData->height; y++, rowIndex++){ - ARC_Vector *currentRowVector = (ARC_Vector *)ARC_Vector_Get(dataVector, rowIndex + dataStartIndex); + ARC_Vector *currentRowVector = (ARC_Vector *)ARC_Vector_Get(dataVector, rowIndex + heightStartIndex); - //skip an empty line + //skip an empty line (should only be the case for the last line) uint32_t currentRowVectorSize = ARC_Vector_GetSize(currentRowVector); if(currentRowVectorSize == 0){ y--; diff --git a/tests/std/parser/csv.c b/tests/std/parser/csv.c index 6ea3333..5270b1b 100644 --- a/tests/std/parser/csv.c +++ b/tests/std/parser/csv.c @@ -44,3 +44,38 @@ ARC_TEST(Parser_ParserCSV_BasicTest){ ARC_Parser_Destroy(parser); } + +ARC_TEST(Parser_ParserCSV_BasicHeaderTest){ + ARC_Parser *parser; + ARC_ParserCSV_CreateAsParser(&parser, ARC_True, TEST_ParserCSV_CastTypeFn, TEST_ParserCSV_DestroyTypeFn); + + const char *tempCString = + "a,b,c,d\n" + "4,3,2,1\n" + "7,3,2,1\n" + "4,2,4,1\n" + "7,7,7,7\n"; + + ARC_String *tempString; + ARC_String_CreateWithStrlen(&tempString, (char *)tempCString); + + //this destroys string, so no need for cleanup + ARC_Parser_Parse(parser, &tempString); + ARC_CHECK(arc_errno == 0); + + ARC_ParserCSVData *data = (ARC_ParserCSVData *)ARC_Parser_GetData(parser); + + for(uint32_t x = 0; x < data->width; x++){ + printf("%s ", data->headers[x]->data); + } + printf("\n"); + + for(uint32_t y = 0; y < data->height; y++){ + for(uint32_t x = 0; x < data->width; x++){ + printf("%d ", *(int32_t *)(data->data[y][x])); + } + printf("\n"); + } + + ARC_Parser_Destroy(parser); +}