From 4264f5c5ca9f31155d340cbdf936d9c86138bee1 Mon Sep 17 00:00:00 2001
From: herbglitch <herbglitch@protonmail.com>
Date: Sun, 22 Dec 2024 23:31:37 -0700
Subject: [PATCH] started working on chemical (arc config), also removed
 files/config as that is now handled by parser/csv

---
 include/arc/std/chemical.h |  50 +++++++
 src/std/chemical.c         | 289 +++++++++++++++++++++++++++++++++++++
 tests/std/chemical.c       |  14 ++
 3 files changed, 353 insertions(+)
 create mode 100644 include/arc/std/chemical.h
 create mode 100644 src/std/chemical.c
 create mode 100644 tests/std/chemical.c
diff --git a/include/arc/std/chemical.h b/include/arc/std/chemical.h
new file mode 100644
index 0000000..2d44df3
--- /dev/null
+++ b/include/arc/std/chemical.h
@@ -0,0 +1,50 @@
+#ifndef ARC_STD_PARSER_CHEMICAL_H_
+#define ARC_STD_PARSER_CHEMICAL_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+
+/*
+<language>     -> <group> <language> | <variableLine> <language> | <whitespace> <language>
+
+<group>        -> <groupName> <whitespace> <variable> <whitespace> LEFT_CURLY_BRACE <whitespace> <variableLine> <whitespace> RIGHT_CURLY_BRACE
+
+<variableLine> -> <type> <whitespace> <variable> <whitespace> EQUALS <whitespace> value <whitespace> SEMICOLON <whitespace>
+
+<groupName>    -> <variable>
+<type>         -> <variable>
+<value>        -> <variable> | <number> | <nestedValue>
+<nestedValue>  -> OPEN_CURLY_BRACE <whitespace> <valueArgs> <whitespace> CLOSE_CURLY_BRACE
+<valueArgs>    -> <value> | <value> COMMA <valueArgs>
+
+<variable>     -> ALPHA_UPPER_CHAR <variableName> | ALPHA_LOWER_CHAR <variableName> | UNDERSCORE <variableName>
+<variableName> -> <charOrNum> <variableName> | LAMBDA
+<charOrNum>    -> ALPHA_UPPER_CHAR | ALPHA_LOWER_CHAR | UNDERSCORE | NUM
+
+<number>       -> NUMBER <number> | NUMBER LAMBDA
+
+<whitespace>   -> SPACE <whitespace> | TAB <whitespace> | NEWLINE <whitespace> | LAMBDA
+*/
+
+typedef struct ARC_Chemical ARC_Chemical;
+
+/**
+ * @brief TODO: write this
+*/
+void ARC_Chemical_Create(ARC_Chemical **parser);
+
+
+/**
+ * @brief TODO: write this
+*/
+void ARC_Chemical_Destroy(ARC_Chemical *parser);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif //ARC_STD_PARSER_CHEMICAL_H_
diff --git a/src/std/chemical.c b/src/std/chemical.c
new file mode 100644
index 0000000..f586235
--- /dev/null
+++ b/src/std/chemical.c
@@ -0,0 +1,289 @@
+#include "arc/std/parser/csv.h"
+#include "arc/std/parser/helpers.h"
+#include "arc/std/bool.h"
+#include "arc/std/parser.h"
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+typedef struct ARC_ParserCSVUserData {
+    ARC_Bool header;
+    ARC_ParserCSV_CastTypeFn    castTypeFn;
+    ARC_ParserCSV_DestroyTypeFn destroyTypeFn;
+} ARC_ParserCSVUserData;
+
+void ARC_ParserCSV_InitLexerRulesFn(ARC_Lexer *lexer){
+
+}
+
+uint32_t ARC_ParserCSV_GetStringIdFn(ARC_String *string){
+    if(ARC_String_EqualsCStringWithStrlen(string, "LAMBDA")){
+        return ARC_PARSER_TAG_LAMBDA;
+    }
+
+    return ~(uint32_t)0;
+}
+
+void ARC_ParserCSVData_VectorDestroyVectorFn(void *data){
+    ARC_Vector *vector = (ARC_Vector *)data;
+    ARC_Vector_Destroy(vector);
+}
+
+void ARC_ParserCSVData_GetDataTag(ARC_Vector *dataVector, ARC_ParserTagToken *tagToken, ARC_ParserCSVUserData *userData){
+    //cast the csv data back to its original type
+    ARC_ParserCSVUserData *csvUserData = (ARC_ParserCSVUserData *)userData;
+
+    //loop through the tags either recursing to next body or adding data to vector
+    for(uint32_t index = 0; index < ARC_Vector_GetSize(tagToken->tagTokens); index++){
+        ARC_ParserTagToken *childTagToken = (ARC_ParserTagToken *)ARC_Vector_Get(tagToken->tagTokens, index);
+
+        ARC_String *stringData = NULL;
+        ARC_Vector *row        = NULL;
+        uint32_t    rowIndex   = 0;
+
+        switch(childTagToken->id){
+            //recuse to run the next line
+            case ARC_PARSER_CSV_STRING:
+                //get the string of the data
+                ARC_String_Create(&stringData, NULL, 0);
+                ARC_ParserData_HelperRecurseStringAdd(&stringData, childTagToken);
+
+                //move data string and cleanup
+                void *data = (void *)stringData;
+
+                //get the last row vector
+                rowIndex = ARC_Vector_GetSize(dataVector) - 1;
+                row = (ARC_Vector *)ARC_Vector_Get(dataVector, rowIndex);
+
+                //only call the userData cast type callback if it is not a header
+                if(rowIndex != 0 || csvUserData->header == ARC_False){
+                    data = NULL;
+                    userData->castTypeFn(&data, stringData);
+                    ARC_String_Destroy(stringData);
+                }
+
+                //add the data to the row vector
+                ARC_Vector_Add(row, data);
+                continue;
+
+            case ARC_PARSER_CSV_DATA:
+                ARC_ParserCSVData_GetDataTag(dataVector, childTagToken, userData);
+                continue;
+
+            default:
+                //this should only be the case for the comma
+                continue;
+        }
+    }
+}
+
+//<line>   -> <data> NEWLINE <line> | <data> | NEWLINE <line> | LAMBDA
+void ARC_ParserCSVData_RunLineTag(ARC_Vector *dataVector, ARC_ParserTagToken *tagToken, ARC_ParserCSVUserData *userData){
+    //loop through the tags either going to the next line or the next body
+    for(uint32_t index = 0; index < ARC_Vector_GetSize(tagToken->tagTokens); index++){
+        ARC_ParserTagToken *childTagToken = (ARC_ParserTagToken *)ARC_Vector_Get(tagToken->tagTokens, index);
+
+        ARC_Vector *row        = NULL;
+        uint32_t    rowIndex   = 0;
+
+        switch(childTagToken->id){
+            //recuse to run the next line
+            case ARC_PARSER_CSV_LINE:
+                ARC_ParserCSVData_RunLineTag(dataVector, childTagToken, userData);
+                continue;
+
+            //get the row data
+            case ARC_PARSER_CSV_DATA:
+                ARC_ParserCSVData_GetDataTag(dataVector, childTagToken, userData);
+                continue;
+
+            //add a new row for each new line
+            case ARC_PARSER_CSV_CHAR_NEWLINE:
+                //check if current is not empty (no need to create another empty row)
+                rowIndex = ARC_Vector_GetSize(dataVector) - 1;
+                row = (ARC_Vector *)ARC_Vector_Get(dataVector, rowIndex);
+                if(ARC_Vector_GetSize(row) == 0){
+                    continue;
+                }
+
+                //create and add a new row
+                ARC_Vector_Create(&row, NULL, NULL);
+                ARC_Vector_Add(dataVector, (void *)row);
+                continue;
+
+            default:
+                continue;
+        }
+    }
+}
+
+void ARC_ParserCSVData_CreateFn(void **data, ARC_ParserTagToken *parsedData, void *userData){
+    *data = NULL;
+    if(data == NULL || userData == NULL){
+        //TODO: error here?
+        *data = NULL;
+        return;
+    }
+
+    //cast the csv data back to its original type
+    ARC_ParserCSVUserData *csvUserData = (ARC_ParserCSVUserData *)userData;
+
+    //crate a vector for the data to be stored in before being copied to a 2d array
+    ARC_Vector *dataVector;
+    ARC_Vector_DestroyDataFn destroyVectorFn = ARC_ParserCSVData_VectorDestroyVectorFn;
+    ARC_Vector_Create(&dataVector, NULL, &destroyVectorFn);
+
+    //create the first row vector, does not need a destroy function as its contents will be moved
+    ARC_Vector *dataRowVector;
+    ARC_Vector_Create(&dataRowVector, NULL, NULL);
+
+    //add the row to the dataVector
+    ARC_Vector_Add(dataVector, (void *)dataRowVector);
+
+    //recursively add data from the parsedData to the data vector
+    ARC_ParserCSVData_RunLineTag(dataVector, parsedData, csvUserData);
+
+    //get the first line to check if it has any values
+    dataRowVector = ARC_Vector_Get(dataVector, 0);
+    if(ARC_Vector_GetSize(dataRowVector) == 0){
+        //TODO: iterate and clear the vector
+        ARC_Vector_Destroy(dataVector);
+        return;
+    }
+
+    //create the data that will be saved
+    ARC_ParserCSVData *csvData = (ARC_ParserCSVData *)malloc(sizeof(ARC_ParserCSVData));
+    csvData->hasHeader = csvUserData->header;
+    csvData->headers   = NULL;
+    csvData->height    = 0;
+    csvData->width     = ARC_Vector_GetSize(dataRowVector);
+    csvData->data      = NULL;
+
+    //create the starting index (to offset the header if it exists)
+    uint32_t heightStartIndex = 0;
+
+    //store the headers if they exist
+    if(csvData->hasHeader == ARC_True){
+        heightStartIndex++;
+
+        //copy the headers
+        csvData->headers = (ARC_String **)malloc(sizeof(ARC_String *) * csvData->width);
+        for(uint32_t headerIndex = 0; headerIndex < csvData->width; headerIndex++){
+            csvData->headers[headerIndex] = (ARC_String *)ARC_Vector_Get(dataRowVector, headerIndex);
+        }
+    }
+
+    //check that all the rows are the same size
+    for(uint32_t rowIndex = heightStartIndex; rowIndex < ARC_Vector_GetSize(dataVector); rowIndex++){
+        ARC_Vector *currentRowVector = (ARC_Vector *)ARC_Vector_Get(dataVector, rowIndex);
+
+        //skip an empty line (this should only be the case for the last line)
+        uint32_t currentRowVectorSize = ARC_Vector_GetSize(currentRowVector);
+        if(currentRowVectorSize == 0){
+            continue;
+        }
+
+        //a row was found so update the height
+        csvData->height++;
+
+        //TODO: probs want to error
+        //cleanup and exit if they don't match
+        if(csvData->width != currentRowVectorSize){
+            //TODO: iterate and clear the vector
+            ARC_Vector_Destroy(dataVector);
+            return;
+        }
+    }
+
+    //init location to copy data to
+    csvData->data = (void ***)malloc(sizeof(void **) * csvData->height);
+
+    //copy the data
+    uint32_t rowIndex = 0;
+    for(uint32_t y = 0; y < csvData->height; y++, rowIndex++){
+        ARC_Vector *currentRowVector = (ARC_Vector *)ARC_Vector_Get(dataVector, rowIndex + heightStartIndex);
+
+        //skip an empty line (should only be the case for the last line)
+        uint32_t currentRowVectorSize = ARC_Vector_GetSize(currentRowVector);
+        if(currentRowVectorSize == 0){
+            y--;
+            continue;
+        }
+
+        //create the current row
+        csvData->data[y] = (void **)malloc(sizeof(void *) * csvData->width);
+
+        for(uint32_t x = 0; x < csvData->width; x++){
+            csvData->data[y][x] = ARC_Vector_Get(currentRowVector, x);
+        }
+    }
+
+    ARC_Vector_Destroy(dataVector);
+
+    *data = (void *)csvData;
+}
+
+void ARC_ParserCSVData_DestroyFn(void *data, ARC_Bool clear, void *userData){
+    if(userData == NULL){
+        return;
+    }
+
+    ARC_ParserCSVUserData *csvUserData = (ARC_ParserCSVUserData *)userData;
+
+    if(data != NULL){
+        ARC_ParserCSVData *csvData = (ARC_ParserCSVData *)data;
+
+        //cleanup the headers if they exist
+        if(csvData->hasHeader == ARC_True){
+            for(uint32_t x = 0; x < csvData->width; x++){
+                ARC_String *string = csvData->headers[x];
+                ARC_String_Destroy(string);
+            }
+            free(csvData->headers);
+        }
+
+        for(uint32_t y = 0; y < csvData->height; y++){
+            //cleanup each element in the data
+            for(uint32_t x = 0; x < csvData->width; x++){
+                csvUserData->destroyTypeFn(csvData->data[y][x]);
+            }
+
+            //cleanup each row of data
+            free(csvData->data[y]);
+        }
+
+        //cleanup the data
+        free(csvData->data);
+        free(csvData);
+    }
+
+    if(clear == ARC_False){
+        free(csvUserData);
+    }
+}
+
+void ARC_ParserCSV_CreateAsParser(ARC_Parser **parser, ARC_Bool header, ARC_ParserCSV_CastTypeFn castTypeFn, ARC_ParserCSV_DestroyTypeFn destroyTypeFn){
+    /* ~ define the language as a string ~ */
+    char *languageCString =
+        "<line>         -> <data> NEWLINE <line> | <data> | NEWLINE <line> | LAMBDA\n"
+        "<data>         -> <string> COMMA <data> | <string>\n"
+        "<string>       -> <nonCommaChar> <string> | <nonCommaChar>\n"
+        "<nonCommaChar> -> CHAR_BEFORE_COMMA | CHAR_AFTER_COMMA\n";
+
+    ARC_String *languageString;
+    ARC_String_CreateWithStrlen(&languageString, languageCString);
+
+    /* ~ init the userdata ~ */
+    ARC_ParserCSVUserData *userdata = (ARC_ParserCSVUserData *)malloc(sizeof(ARC_ParserCSVUserData));
+    userdata->header                = header;
+    userdata->castTypeFn            = castTypeFn;
+    userdata->destroyTypeFn         = destroyTypeFn;
+
+    /* ~ create the language ~ */
+    ARC_ParserData_CreateFn  createCharFn  = ARC_ParserCSVData_CreateFn;
+    ARC_ParserData_DestroyFn destroyCharFn = ARC_ParserCSVData_DestroyFn;
+    ARC_Parser_CreateFromString(parser, languageString, ARC_ParserCSV_InitLexerRulesFn, ARC_ParserCSV_GetStringIdFn, &createCharFn, &destroyCharFn, userdata);
+
+    //cleanup
+    ARC_String_Destroy(languageString);
+}
diff --git a/tests/std/chemical.c b/tests/std/chemical.c
new file mode 100644
index 0000000..00fe960
--- /dev/null
+++ b/tests/std/chemical.c
@@ -0,0 +1,14 @@
+#include "../../test.h"
+#include "arc/std/errno.h"
+#include "arc/std/chemical.h"
+//#include <stdlib.h>
+
+
+ARC_TEST(Chemical_BasicTest){
+    ARC_Chemical *chemical;
+    ARC_Chemical_Create(&chemical);
+
+
+    //cleanup
+    ARC_Chemical_Destroy(chemical);
+}