basic vector complete but untested, added destroy callback to vector

2024-08-29 05:04:08 -06:00 · 2024-08-29 05:04:08 -06:00 · 4c0c5d377d
commit 4c0c5d377d
parent 6e814f12e6
4 changed files with 445 additions and 97 deletions
--- a/include/arc/std/lexer.h
+++ b/include/arc/std/lexer.h
@ -9,35 +9,6 @@ extern "C" {
 #include "arc/std/string.h"
 #include <stdint.h>

-/**
- * @brief
-*/
-typedef enum ARC_Lexer_BasicTokens {
-    LEXER_TOKEN_LAMBDA = 0,
-    LEXER_TOKEN_COLON = ':',
-    LEXER_TOKEN_SEMICOLON = ';',
-    LEXER_TOKEN_COMMA = ',',
-    LEXER_TOKEN_PERIOD = '.',
-    LEXER_TOKEN_FORWARD_SLASH = '/',
-    LEXER_TOKEN_BACK_SLASH = '\\',
-    LEXER_TOKEN_LEFT_PARENTHESIS = '(',
-    LEXER_TOKEN_RIGHT_PARENTHESIS = ')',
-    LEXER_TOKEN_LEFT_CURLY_BRACE = '{',
-    LEXER_TOKEN_RIGHT_CURLY_BRACE = '}',
-} ARC_Lexer_BasicTokens;
-
-/**
- * @brief checks to see if a string is a type of token
- *
- * @param[in/out] string       a string to be checked to see if it matches a token,
- *                             this needs to srip the token out for the lexer to avoid an infinite loop
- * @param[out]    tokenData    a place to store token data (like a variable name), can be NULL if not needed
- * @param[in]     automataData any data that needs to be used for the ARC_Lexer_AutomataFn
- *
- * @return if a token was successfully found ARC_True, otherwise ARC_False
-*/
-typedef ARC_Bool (* ARC_Lexer_AutomataFn)(ARC_String **string, ARC_String **tokenData, void *automataData);
-
 /**
 * @brief a lexer type
 */
@ -47,46 +18,110 @@ typedef struct ARC_Lexer ARC_Lexer;
 * @brief a lexer token type
 */
 typedef struct ARC_LexerToken {
-    uint32_t id;
-
-    ARC_Lexer_AutomataFn automataFn;
-    void *automataData;
-    //TODO: automataData free callback
+    uint32_t rule;
+    ARC_String *data;
 } ARC_LexerToken;

 /**
- * @brief
+ * @brief checks to see if a string is a type of token
+ *
+ * @note do not set tokenData if this function returns ARC_False, doing so will create a memory leak
+ *
+ * @param[in/out] string       a string to be checked to see if it matches a token,
+ *                             this needs to srip the token out for the lexer to avoid an infinite loop
+ * @param[out]    tokenData    a place to store token data (like a variable name), can be NULL if not needed
+ * @param[in]     automataData any data that needs to be used for the ARC_Lexer_AutomataFn
+ *
+ * @return if a token was successfully found ARC_True, otherwise ARC_False
+*/
+typedef ARC_Bool (* ARC_LexerTokenRule_AutomataFn)(ARC_String **string, ARC_String **tokenData, void *automataData);
+
+/**
+ * @brief a callback function to clean up ARC_LexerTokenRule's automataData
+ *
+ * @param automataData the void * automataData to destroy
+*/
+typedef void (* ARC_LexerTokenRule_DestroyAutomataDataFn)(void *automataData);
+
+
+/**
+ * @brief a lexer token rule type
+*/
+typedef struct ARC_LexerTokenRule {
+    uint32_t id;
+
+    void *automataData;
+
+    ARC_LexerTokenRule_AutomataFn automataFn;
+    ARC_LexerTokenRule_DestroyAutomataDataFn destroyAutomataDataFn;
+} ARC_LexerTokenRule;
+
+/**
+ * @brief creates an ARC_Lexer type
 *
 * @param[out] lexer
 */
 void ARC_Lexer_Create(ARC_Lexer **lexer);

 /**
- * @brief
+ * @brief destroys an ARC_Lexer type
 *
 * @param[in] lexer ARC_Lexer to free
 */
 void ARC_Lexer_Destroy(ARC_Lexer *lexer);

 /**
- * @brief
+ * @brief adds a token rule to a lexer
 *
- * @param [in] lexer
- * @param [in] token
+ * @param [in] lexer     the lexer to add a token rule to
+ * @param [in] tokenRule the token rule to add
 */
-void ARC_Lexer_RegisterToken(ARC_Lexer *lexer, ARC_LexerToken token);
+void ARC_Lexer_RegisterTokenRule(ARC_Lexer *lexer, ARC_LexerTokenRule tokenRule);

 /**
- * @brief
+ * @brief creates tokens using a given string with ARC_LexerToken rules
 *
- * @param[in] lexer
- * @param[in] path
+ * @param[in]     lexer the lexer to get the ARC_LexerTokens from
+ * @param[in/out] data the string to lex, will be freed and set to NULL by the end of this function
+*/
+void ARC_Lexer_LexString(ARC_Lexer *lexer, ARC_String **data);
+
+/**
+ * @brief reads in and lexs a file
+ *
+ * @note this function will call ARC_Lexer_LexString, so it's notes are applicable to this function
+ *
+ * @param[in] lexer the lexer which holds to rules to use
+ * @param[in] path  path of file to read in and lex
 */
 void ARC_Lexer_LexFile(ARC_Lexer *lexer, ARC_String *path);

+/**
+ * @brief gets a token at a given index from a lexer
+ *
+ * @note unless you have a very good reason, you probably don't want to mess with the tokens string.
+ *       that will probably change the token's string inside the lexer
+ *
+ * @param[in] lexer the lexer to get the token from
+ * @param[in] index the index of the token in the lexer to get
+ *
+ * @return a copy of the token, or a token with max value for rule and NULL for data on error
+*/
+ARC_LexerToken ARC_Lexer_GetToken(ARC_Lexer *lexer, uint32_t index);
+
+/**
+ * @brief gets a token at a given index from a lexer
+ *
+ * @param[in] lexer the lexer to get the tokens size from
+ *
+ * @return the size of the token array in a lexer
+*/
+uint32_t ARC_Lexer_GetTokensSize(ARC_Lexer *lexer);
+
 /**
 * @brief checks if the first character of string matches the automataData cast as a char
 *
+ * @note this is intended as a helper callback
 * @note this function is a ARC_Lexer_AutomataFn callback
 *
 * @param[in/out] string       a string to be checked to see if it matches a token,
@ -101,6 +136,7 @@ ARC_Bool ARC_Lexer_AutomataMatchCharFn(ARC_String **string, ARC_String **tokenDa
 /**
 * @brief checks if the substring automataData as an ARC_String matches the first part of string
 *
+ * @note this is intended as a helper callback
 * @note this function is a ARC_Lexer_AutomataFn callback
 *
 * @param[in/out] string       a string to be checked to see if it matches a token,
@ -112,6 +148,81 @@ ARC_Bool ARC_Lexer_AutomataMatchCharFn(ARC_String **string, ARC_String **tokenDa
 */
 ARC_Bool ARC_Lexer_AutomataMatchStringFn(ARC_String **string, ARC_String **tokenData, void *automataData);

+/**
+ * @brief creates a ARC_LexerTokenRule with a given id and character
+ *
+ * @note this is intended as a helper funtion
+ *
+ * @param[in] id        a tokens id (basically the token value)
+ * @param[in] character the character to match against
+ *
+ * @return a token rule based in the id and character
+*/
+ARC_LexerTokenRule ARC_LexerTokenRule_CreateAndReturnMatchCharRule(uint32_t id, char character);
+
+/**
+ * @brief creates a ARC_LexerTokenRule with a given id and string
+ *
+ * @note this is intended as a helper funtion
+ * #note string will not be freed (it will be copied and the copy will be freed)
+ *
+ * @param[in] id        a tokens id (basically the token value)
+ * @param[in] character the string to match against, will be copied
+ *
+ * @return a token rule based in the id and string
+*/
+ARC_LexerTokenRule ARC_LexerTokenRule_CreateAndReturnMatchStringRule(uint32_t id, ARC_String *string);
+
+/**
+ * @brief basic token type ids, chars, and tags
+*/
+#define LEXER_TOKEN_COLON_ID               1
+#define LEXER_TOKEN_COLON_CHAR             ':'
+#define LEXER_TOKEN_COLON_TAG              "COLON"
+#define LEXER_TOKEN_SEMICOLON_ID           2
+#define LEXER_TOKEN_SEMICOLON_CHAR         ';'
+#define LEXER_TOKEN_SEMICOLON_TAG          "SEMICOLON"
+#define LEXER_TOKEN_COMMA_ID               3
+#define LEXER_TOKEN_COMMA_CHAR             ','
+#define LEXER_TOKEN_COMMA_TAG              "COMMA"
+#define LEXER_TOKEN_PERIOD_ID              4
+#define LEXER_TOKEN_PERIOD_CHAR            '.'
+#define LEXER_TOKEN_PERIOD_TAG             "PERIOD"
+#define LEXER_TOKEN_FORWARD_SLASH_ID       5
+#define LEXER_TOKEN_FORWARD_SLASH_CHAR     '/'
+#define LEXER_TOKEN_FORWARD_SLASH_TAG      "FORWARD_SLASH"
+#define LEXER_TOKEN_BACK_SLASH_ID          6
+#define LEXER_TOKEN_BACK_SLASH_CHAR        '\\'
+#define LEXER_TOKEN_BACK_SLASH_TAG         "BACK_SLASH"
+#define LEXER_TOKEN_LEFT_PARENTHESIS_ID    7
+#define LEXER_TOKEN_LEFT_PARENTHESIS_CHAR  '('
+#define LEXER_TOKEN_LEFT_PARENTHESIS_TAG   "LEFT_PARENTHESIS"
+#define LEXER_TOKEN_RIGHT_PARENTHESIS_ID   8
+#define LEXER_TOKEN_RIGHT_PARENTHESIS_CHAR ')'
+#define LEXER_TOKEN_RIGHT_PARENTHESIS_TAG  "RIGHT_PARENTHESIS"
+#define LEXER_TOKEN_LEFT_CURLY_BRACE_ID    9
+#define LEXER_TOKEN_LEFT_CURLY_BRACE_CHAR  '{'
+#define LEXER_TOKEN_LEFT_CURLY_BRACE_TAG   "LEFT_CURLY_BRACE"
+#define LEXER_TOKEN_RIGHT_CURLY_BRACE_ID   10
+#define LEXER_TOKEN_RIGHT_CURLY_BRACE_CHAR '}'
+#define LEXER_TOKEN_RIGHT_CURLY_BRACE_TAG  "RIGHT_CURLY_BRACE"
+#define LEXER_TOKEN_BANG_ID                11
+#define LEXER_TOKEN_BANG_CHAR              '!'
+#define LEXER_TOKEN_BANG_TAG               "BANG"
+#define LEXER_TOKEN_AT_ID                  12
+#define LEXER_TOKEN_AT_CHAR                '!'
+#define LEXER_TOKEN_AT_TAG                 "AT"
+#define LEXER_TOKEN_HASH_ID                13
+#define LEXER_TOKEN_HASH_CHAR              '#'
+#define LEXER_TOKEN_HASH_TAG               "HASH"
+#define LEXER_TOKEN_PERCENT_ID             14
+#define LEXER_TOKEN_PERCENT_CHAR           '%'
+#define LEXER_TOKEN_PERCENT_TAG            "PERCENT"
+
+/**
+ * @brief adds a bunch of basic token rules (matching the BasicTokens above)
+*/
+void ARC_Lexer_InitBasicTokenRules(ARC_Lexer *lexer);

 #ifdef __cplusplus
 }
--- a/include/arc/std/vector.h
+++ b/include/arc/std/vector.h
@ -26,9 +26,9 @@ typedef ARC_Bool (* ARC_Vector_CompareDataFn)(void *dataA, void *dataB);
 /**
 * @brief a callback that cleans up memory when it is removed from the vector
 *
- * @param[in] data the item to delete
+ * @param[in] data the item to destroy
 */
-typedef void (* ARC_Vector_DeleteDataFn)(void *data);
+typedef void (* ARC_Vector_DestroyDataFn)(void *data);

 /**
 * @brief creates an ARC_Vector which is an "expandable" array
@ -39,9 +39,9 @@ typedef void (* ARC_Vector_DeleteDataFn)(void *data);
 * @param[out] vector        ARC_Vector to initialize
 * @param[in]  compareDataFn a callback that checks if data stored in the array matches,
 *                           if set to NULL and ARC_Vector_Remove is called, the pointer addresses will be compared
- * @param[in]  deleteDataFn  a callback that frees an item on remove or clear, can be set to NULL to do nothing
+ * @param[in]  destroyDataFn a callback that frees an item on remove or clear, can be set to NULL to do nothing
 */
-void ARC_Vector_Create(ARC_Vector **vector, ARC_Vector_CompareDataFn *compareDataFn, ARC_Vector_DeleteDataFn *deleteDataFn);
+void ARC_Vector_Create(ARC_Vector **vector, ARC_Vector_CompareDataFn *compareDataFn, ARC_Vector_DestroyDataFn *destroyDataFn);

 /**
 * @brief destroys an ARC_Vector
@ -86,6 +86,15 @@ void ARC_Vector_Remove(ARC_Vector *vector, void *data);
 */
 void ARC_Vector_RemoveIndex(ARC_Vector *vector, uint32_t index);

+/**
+ * @brief clears all items from a vector
+ *
+ * @note this function will call ARC_Vector_RemoveIndex, so it's notes are also applicable to this function
+ *
+ * @param[in] vector ARC_Vector to clear
+*/
+void ARC_Vector_Clear(ARC_Vector *vector);
+
 /**
 * @brief gets the current size of an ARC_Vector as an unsigned 32 bit integer
 *
--- a/src/std/lexer.c
+++ b/src/std/lexer.c
@ -1,73 +1,293 @@
 #include "arc/std/lexer.h"

 #include "arc/std/bool.h"
+#include "arc/std/errno.h"
 #include "arc/std/string.h"
 #include "arc/std/vector.h"
 #include "arc/std/io.h"
 #include <stdlib.h>

 struct ARC_Lexer {
+    ARC_Vector *tokenRules;
    ARC_Vector *tokens;
 };

-void ARC_Lexer_Create(ARC_Lexer **lexer){
-    *lexer = (ARC_Lexer *)malloc(sizeof(ARC_Lexer));
+//private function for checking if two lexer token rules are the same in a vector (based on id)
+ARC_Bool ARC_LexerTokenRule_VectorCompareDataFn(void *dataA, void *dataB){
+    ARC_LexerTokenRule *tokenRuleA = (ARC_LexerTokenRule *)dataA;
+    ARC_LexerTokenRule *tokenRuleB = (ARC_LexerTokenRule *)dataB;

-    //TODO: add compare and delete callbacks
-    ARC_Vector_Create(&(*lexer)->tokens, NULL, NULL);
-}
-
-void ARC_Lexer_Destroy(ARC_Lexer *lexer){
-    ARC_Vector_Destroy(lexer->tokens);
-
-    free(lexer);
-}
-
-void ARC_Lexer_RegisterToken(ARC_Lexer *lexer, ARC_LexerToken token){
-    ARC_LexerToken *storedToken = (ARC_LexerToken *)malloc(sizeof(ARC_LexerToken));
-    *storedToken = token;
-    ARC_Vector_Add(lexer->tokens, storedToken);
-}
-
-void ARC_Lexer_LexFile(ARC_Lexer *lexer, ARC_String *path){
-    ARC_String *data;
-    ARC_IO_FileToStr(path, &data);
-
-
-    if(data != NULL){
-        ARC_String_Destroy(data);
-    }
-}
-
-ARC_Bool ARC_Lexer_AutomataMatchCharFn(ARC_String **string, ARC_String **tokenData, void *automataData){
-    *tokenData = NULL;
-
-    if((*string)->data[0] == *(char *)automataData){
-        if((*string)->length == 1){
-            ARC_String_Destroy(*string);
-            *string = NULL;
-        }
-
-        ARC_String_ReplaceWithSubstring(string, 1, (*string)->length - 1);
+    if(tokenRuleA->id == tokenRuleB->id){
        return ARC_True;
    }

    return ARC_False;
 }

-ARC_Bool ARC_Lexer_AutomataMatchStringFn(ARC_String **string, ARC_String **tokenData, void *automataData){
+//private function for destroying a lexer token rule from a vector
+void ARC_LexerTokenRule_VectorDestroyDataFn(void *data){
+    ARC_LexerTokenRule *tokenRule = (ARC_LexerTokenRule *)data;
+    tokenRule->destroyAutomataDataFn(tokenRule->automataData);
+    free(tokenRule);
+}
+
+
+//private function for destroying a lexer token from a vector
+void ARC_LexerToken_VectorDestroyDataFn(void *data){
+    ARC_LexerToken *token = (ARC_LexerToken *)data;
+    free(token);
+}
+
+void ARC_Lexer_Create(ARC_Lexer **lexer){
+    //create the lexer
+    *lexer = (ARC_Lexer *)malloc(sizeof(ARC_Lexer));
+
+    //setup token rules vector with compare and delete functions
+    ARC_Vector_CompareDataFn tokenRulesVectorCompareDataFn = ARC_LexerTokenRule_VectorCompareDataFn;
+    ARC_Vector_DestroyDataFn tokenRulesVectorDestroyDataFn = ARC_LexerTokenRule_VectorDestroyDataFn;
+    ARC_Vector_Create(&(*lexer)->tokenRules, &tokenRulesVectorCompareDataFn, &tokenRulesVectorDestroyDataFn);
+
+    //setup tokens vector with delete funtion, we don't want a deleteDataFn because their index will be used as the id
+    ARC_Vector_DestroyDataFn tokenVectorDestroyDataFn = ARC_LexerToken_VectorDestroyDataFn;
+    ARC_Vector_Create(&(*lexer)->tokens, NULL, &tokenVectorDestroyDataFn);
+}
+
+void ARC_Lexer_Destroy(ARC_Lexer *lexer){
+    //free the tokens (there is a vectorDeleteDataFn, so tokens should be freed)
+    ARC_Vector_Destroy(lexer->tokens);
+
+    //free the token rules (there is a vectorDeleteDataFn, so token rules should be freed)
+    ARC_Vector_Destroy(lexer->tokenRules);
+
+    //free the lexer
+    free(lexer);
+}
+
+void ARC_Lexer_RegisterTokenRule(ARC_Lexer *lexer, ARC_LexerTokenRule tokenRule){
+    //copy the token to a new pointer
+    ARC_LexerTokenRule *storedTokenRule = (ARC_LexerTokenRule *)malloc(sizeof(ARC_LexerTokenRule));
+    *storedTokenRule = tokenRule;
+
+    //add to the vector and check for error (I'd be surprised if the error ever happened because that would most likely mean overflow)
+    ARC_Vector_Add(lexer->tokenRules, storedTokenRule);
+    if(arc_errno){
+        ARC_DEBUG_LOG_ERROR("ARC_Lexer_RegisterTokenRule(lexer, tokenRule), errored when running ARC_Vector_Add(lexer->tokenRules, storedTokenRule);. check logs for more info");
+        free(storedTokenRule);
+    }
+}
+
+void ARC_Lexer_LexString(ARC_Lexer *lexer, ARC_String **data){
+    //check if there are any token rules to use
+    if(ARC_Vector_GetSize(lexer->tokenRules) == 0){
+        arc_errno = ARC_ERRNO_DATA;
+        ARC_DEBUG_LOG_ERROR("ARC_Lexer_LexString(lexer, data), no tokens registered to lexer to use");
+        return;
+    }
+
+    //this will run untill everything token is stripped or there is an error
+    while(data != NULL){
+        ARC_Bool tokenFound = ARC_False;
+        for(uint32_t index = 0; index < ARC_Vector_GetSize(lexer->tokenRules); index++){
+            //check if the token rule is found
+            ARC_LexerTokenRule *tokenRule = ARC_Vector_Get(lexer->tokenRules, index);
+
+            //tokenData should only exist if tokenFound is ARC_True as stated in the header
+            ARC_String *tokenData;
+            tokenFound = tokenRule->automataFn(data, &tokenData, tokenRule->automataData);
+
+            //check if a token was found if it wasn't continue. I'm doing this to try to cut down on the ammount of indentation
+            if(tokenFound != ARC_True){
+                continue;
+            }
+
+            //create the token to add
+            ARC_LexerToken *token = (ARC_LexerToken *)malloc(sizeof(ARC_LexerToken));
+            token->rule = tokenRule->id;
+            token->data = tokenData;
+
+            //add to the vector and check for error (I'd be surprised if the error ever happened because that would most likely mean overflow)
+            ARC_Vector_Add(lexer->tokens, token);
+            if(arc_errno){
+                ARC_DEBUG_LOG_ERROR("ARC_Lexer_LexString(lexer, data), errored when running ARC_Vector_Add(lexer->tokens, token);. check logs for more info");
+                free(token);
+
+                //clean up errored string
+                ARC_String_Destroy(*data);
+                *data = NULL;
+                return;
+            }
+
+            //the token was added, so break to start checking tokens again
+            break;
+        }
+
+        //if no token was found, throw an error
+        if(tokenFound == ARC_False){
+            arc_errno = ARC_ERRNO_DATA;
+            ARC_DEBUG_LOG_ERROR_WITH_VARIABLES("ARC_Lexer_LexString(lexer, data), no tokens found with current string: \"%s\"", (*data)->data);
+
+            //clean up errored string
+            ARC_String_Destroy(*data);
+            *data = NULL;
+
+            //TODO: might want to do smthn with already tokened data
+            return;
+        }
+    }
+}
+
+void ARC_Lexer_LexFile(ARC_Lexer *lexer, ARC_String *path){
+    //read file and clean up if it errors
+    ARC_String *data;
+    ARC_IO_FileToStr(path, &data);
+    if(arc_errno){
+        ARC_DEBUG_LOG_ERROR("ARC_Lexer_LexFile(lexer, path), errored when running ARC_IO_FileToStr(path, &data);. check logs for more info");
+        if(data != NULL){
+            ARC_String_Destroy(data);
+        }
+
+        return;
+    }
+
+    //lex the string and log if there is an error, ARC_Lexer_LexString will clean up the string
+    ARC_Lexer_LexString(lexer, &data);
+    if(arc_errno){
+        ARC_DEBUG_LOG_ERROR("ARC_Lexer_LexFile(lexer, path), errored when running ARC_Lexer_LexString(lexer, data);. check logs for more info");
+    }
+}
+
+ARC_LexerToken ARC_Lexer_GetToken(ARC_Lexer *lexer, uint32_t index){
+    //get the token and log if there is an error
+    ARC_LexerToken *token = ARC_Vector_Get(lexer->tokens, index);
+    if(arc_errno){
+        ARC_DEBUG_LOG_ERROR("ARC_Lexer_GetToken(lexer, index), errored when running ARC_Vector_Get(lexer->tokens, index);. check logs for more info");
+
+        //return a token with max rule value, and NULL for the string to signify an error
+        return (ARC_LexerToken){
+            ~(uint32_t)0,
+            NULL
+        };
+    }
+
+    //the token was found, so return a copy to that
+    return *token;
+}
+
+uint32_t ARC_Lexer_GetTokensSize(ARC_Lexer *lexer){
+    return ARC_Vector_GetSize(lexer->tokens);
+}
+
+ARC_Bool ARC_Lexer_AutomataMatchCharFn(ARC_String **string, ARC_String **tokenData, void *automataData){
+    //if there is a match the token will be the same as automataData, so we don't need to store it again
    *tokenData = NULL;

+    //check to see if there is a match with automataData as a char
+    if((*string)->data[0] == *(char *)automataData){
+        //to keep from erroring instead of stripping from a 1 character string we can just delete it
+        if((*string)->length == 1){
+            ARC_String_Destroy(*string);
+            *string = NULL;
+            return ARC_True;
+        }
+
+        //strip the charater from the front of the string and return that a match was found
+        ARC_String_ReplaceWithSubstring(string, 1, (*string)->length - 1);
+        return ARC_True;
+    }
+
+    //no match was found
+    return ARC_False;
+}
+
+ARC_Bool ARC_Lexer_AutomataMatchStringFn(ARC_String **string, ARC_String **tokenData, void *automataData){
+    //if there is a match the token will be the same as automataData, so we don't need to store it again
+    *tokenData = NULL;
+
+    //check to see if there is a match with automataData as a string
    ARC_String *automataDataString = (ARC_String *)automataData;
+    //to keep from erroring instead of stripping from a same length string we can just delete it
    if(ARC_String_Equals(*string, automataDataString)){
        if((*string)->length == automataDataString->length){
            ARC_String_Destroy(*string);
            *string = NULL;
        }

+        //strip the token string from the front of the string and return that a match was found
        ARC_String_ReplaceWithSubstring(string, automataDataString->length, (*string)->length - automataDataString->length);
        return ARC_True;
    }

+    //no match was found
    return ARC_False;
 }
+
+//private function to free automataData stored as a char
+void ARC_LexerTokenRule_DestroyCharAutomataDataFn(void *automataData){
+    free((char *)automataData);
+}
+
+ARC_LexerTokenRule ARC_LexerTokenRule_CreateAndReturnMatchCharRule(uint32_t id, char character){
+    //create the token rule
+    ARC_LexerTokenRule tokenRule;
+
+    //set the id
+    tokenRule.id = id;
+
+    //create and store the automataData (which is just a char)
+    char *automataData = (char *)malloc(sizeof(char));
+    *automataData = character;
+    tokenRule.automataData = (void *)automataData;
+
+    //we can use the ARC_Lexer_AutomataMatchCharFn for this
+    tokenRule.automataFn = ARC_Lexer_AutomataMatchCharFn;
+
+    //add the private destroy function
+    tokenRule.destroyAutomataDataFn = ARC_LexerTokenRule_DestroyCharAutomataDataFn;
+
+    //return the created tokenRule
+    return tokenRule;
+}
+
+//private function to free automataData stored as an ARC_String
+void ARC_LexerTokenRule_DestroyStringAutomataDataFn(void *automataData){
+    ARC_String_Destroy((ARC_String *)automataData);
+}
+
+ARC_LexerTokenRule ARC_LexerTokenRule_CreateAndReturnMatchStringRule(uint32_t id, ARC_String *string){
+    //create the token rule
+    ARC_LexerTokenRule tokenRule;
+
+    //set the id
+    tokenRule.id = id;
+
+    //copy and store the automataData (which is just an ARC_String)
+    ARC_String *automataData;
+    ARC_String_Copy(&automataData, string);
+    tokenRule.automataData = (void *)automataData;
+
+    //we can use the ARC_Lexer_AutomataMatchCharFn for this
+    tokenRule.automataFn = ARC_Lexer_AutomataMatchCharFn;
+
+    //add the private destroy function
+    tokenRule.destroyAutomataDataFn = ARC_LexerTokenRule_DestroyStringAutomataDataFn;
+
+    //return the created tokenRule
+    return tokenRule;
+}
+
+void ARC_Lexer_InitBasicTokenRules(ARC_Lexer *lexer){
+    ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(LEXER_TOKEN_COLON_ID            , LEXER_TOKEN_COLON_CHAR            ));
+    ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(LEXER_TOKEN_SEMICOLON_ID        , LEXER_TOKEN_SEMICOLON_CHAR        ));
+    ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(LEXER_TOKEN_COMMA_ID            , LEXER_TOKEN_COMMA_CHAR            ));
+    ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(LEXER_TOKEN_PERIOD_ID           , LEXER_TOKEN_PERIOD_CHAR           ));
+    ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(LEXER_TOKEN_FORWARD_SLASH_ID    , LEXER_TOKEN_FORWARD_SLASH_CHAR    ));
+    ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(LEXER_TOKEN_BACK_SLASH_ID       , LEXER_TOKEN_BACK_SLASH_CHAR       ));
+    ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(LEXER_TOKEN_LEFT_PARENTHESIS_ID , LEXER_TOKEN_LEFT_PARENTHESIS_CHAR ));
+    ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(LEXER_TOKEN_RIGHT_PARENTHESIS_ID, LEXER_TOKEN_RIGHT_PARENTHESIS_CHAR));
+    ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(LEXER_TOKEN_LEFT_CURLY_BRACE_ID , LEXER_TOKEN_LEFT_CURLY_BRACE_CHAR ));
+    ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(LEXER_TOKEN_RIGHT_CURLY_BRACE_ID, LEXER_TOKEN_RIGHT_CURLY_BRACE_CHAR));
+    ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(LEXER_TOKEN_BANG_ID             , LEXER_TOKEN_BANG_CHAR             ));
+    ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(LEXER_TOKEN_AT_ID               , LEXER_TOKEN_AT_CHAR               ));
+    ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(LEXER_TOKEN_HASH_ID             , LEXER_TOKEN_HASH_CHAR             ));
+    ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(LEXER_TOKEN_PERCENT_ID          , LEXER_TOKEN_PERCENT_CHAR          ));
+}
--- a/src/std/vector.c
+++ b/src/std/vector.c
@ -13,7 +13,7 @@ struct ARC_Vector {
    void **data;

    ARC_Vector_CompareDataFn  compareDataFn;
-    ARC_Vector_DeleteDataFn  *deleteDataFn;
+    ARC_Vector_DestroyDataFn *destroyDataFn;
 };

 //this is a private function used as the default check for removing data from a given pointer
@ -25,7 +25,7 @@ ARC_Bool ARC_Vector_CompareDataDefaultFn(void *dataA, void *dataB){
    return ARC_False;
 }

-void ARC_Vector_Create(ARC_Vector **vector, ARC_Vector_CompareDataFn *compareDataFn, ARC_Vector_DeleteDataFn *deleteDataFn){
+void ARC_Vector_Create(ARC_Vector **vector, ARC_Vector_CompareDataFn *compareDataFn, ARC_Vector_DestroyDataFn *destroyDataFn){
    //create the vector
    *vector = (ARC_Vector *)malloc(sizeof(ARC_Vector));

@ -41,19 +41,20 @@ void ARC_Vector_Create(ARC_Vector **vector, ARC_Vector_CompareDataFn *compareDat
    }

    //set NULL as a default for deleteDataFn, then copy the delete data function callback if it exists
-    (*vector)->deleteDataFn = NULL;
-    if(deleteDataFn != NULL){
-        (*vector)->deleteDataFn = (ARC_Vector_DeleteDataFn *)malloc(sizeof(ARC_Vector_DeleteDataFn));
-        *((*vector)->deleteDataFn) = *deleteDataFn;
+    (*vector)->destroyDataFn = NULL;
+    if(destroyDataFn != NULL){
+        (*vector)->destroyDataFn = (ARC_Vector_DestroyDataFn *)malloc(sizeof(ARC_Vector_DestroyDataFn));
+        *((*vector)->destroyDataFn) = *destroyDataFn;
    }
 }

 void ARC_Vector_Destroy(ARC_Vector *vector){
-    //TODO: clear vector before destroying
+    //remove all the contents before destroying the vector
+    ARC_Vector_Clear(vector);

    //free the delete data function if it exists
-    if(vector->deleteDataFn){
-        free(vector->deleteDataFn);
+    if(vector->destroyDataFn){
+        free(vector->destroyDataFn);
    }

    //free everything stored in the vector
@ -112,8 +113,8 @@ void ARC_Vector_RemoveIndex(ARC_Vector *vector, uint32_t index){
    }

    //call delete data to clean up item if delete data function exists
-    if(vector->deleteDataFn != NULL){
-        (*(vector->deleteDataFn))(vector->data[index]);
+    if(vector->destroyDataFn != NULL){
+        (*(vector->destroyDataFn))(vector->data[index]);
    }

    //we will be using index to iterate as we will not use it again, so we can skip the first part of the for loop
@ -135,6 +136,13 @@ void ARC_Vector_RemoveIndex(ARC_Vector *vector, uint32_t index){
    vector->data = (void **)realloc(vector->data, sizeof(void *) * vector->currentCapacity);
 }

+void ARC_Vector_Clear(ARC_Vector *vector){
+    //remove each item in the vector untill the vector is empty
+    while(ARC_Vector_GetSize(vector) != 0){
+        ARC_Vector_RemoveIndex(vector, 0);
+    }
+}
+
 uint32_t ARC_Vector_GetSize(ARC_Vector *vector){
    return vector->currentSize;
 }