#ifndef ARC_STD_LEXER_H_
#define ARC_STD_LEXER_H_

#ifdef __cplusplus
extern "C" {
#endif
#include "arc/std/string.h"
#include <stdint.h>

/**
 * @brief a lexer type
*/
typedef struct ARC_Lexer ARC_Lexer;

/**
 * @brief a lexer token type
*/
typedef struct ARC_LexerToken {
    uint32_t rule;
    ARC_String *data;
} ARC_LexerToken;

/**
 * @brief checks to see if a string is a type of token
 *
 * @note do not set tokenData if this function returns 0, doing so will create a memory leak
 *
 * @param[out] tokenData    a place to store token data (like a variable name), can be NULL if not needed
 * @param[in]  string       a string to be checked to see if it matches a token
 * @param[in]  automataData any data that needs to be used for the ARC_Lexer_AutomataFn
 *
 * @return the size of the token found, or 0 if the token was not found
*/
typedef uint32_t (* ARC_LexerTokenRule_AutomataFn)(ARC_String **tokenData, ARC_String *string, void *automataData);

/**
 * @brief a callback function to clean up ARC_LexerTokenRule's automataData
 *
 * @param automataData the void * automataData to destroy
*/
typedef void (* ARC_LexerTokenRule_DestroyAutomataDataFn)(void *automataData);

/**
 * @brief a lexer token rule type
*/
typedef struct ARC_LexerTokenRule {
    uint32_t id;

    void *automataData;

    ARC_LexerTokenRule_AutomataFn automataFn;
    ARC_LexerTokenRule_DestroyAutomataDataFn destroyAutomataDataFn;
} ARC_LexerTokenRule;

/**
 * @brief creates an ARC_Lexer type
 *
 * @param[out] lexer ARC_Lexer to create
*/
void ARC_Lexer_Create(ARC_Lexer **lexer);

/**
 * @brief destroys an ARC_Lexer type
 *
 * @param[in] lexer ARC_Lexer to free
*/
void ARC_Lexer_Destroy(ARC_Lexer *lexer);

/**
 * @brief adds a token rule to a lexer
 *
 * @param [in] lexer     the lexer to add a token rule to
 * @param [in] tokenRule the token rule to add
*/
void ARC_Lexer_RegisterTokenRule(ARC_Lexer *lexer, ARC_LexerTokenRule tokenRule);

/**
 * @brief clears all tokens from a lexer (will not clear token rules)
 *
 * @param lexer the lexer to clear tokens from
*/
void ARC_Lexer_Clear(ARC_Lexer *lexer);

/**
 * @brief creates tokens using a given string with ARC_LexerToken rules
 *
 * @param[in]     lexer the lexer to get the ARC_LexerTokens from
 * @param[in/out] data the string to lex, will be freed and set to NULL by the end of this function
*/
void ARC_Lexer_LexString(ARC_Lexer *lexer, ARC_String **data);

/**
 * @brief reads in and lexs a file
 *
 * @note this function will call ARC_Lexer_LexString, so it's notes are applicable to this function
 *
 * @param[in] lexer the lexer which holds to rules to use
 * @param[in] path  path of file to read in and lex
*/
void ARC_Lexer_LexFile(ARC_Lexer *lexer, ARC_String *path);

/**
 * @brief prints rule id and hex of the function name
 *
 * @note this is mostly used for debugging
 *
 * @param[in] lexer the lexer to print rules from
*/
void ARC_Lexer_PrintTokenRules(ARC_Lexer *lexer);

/**
 * @brief gets a token at a given index from a lexer
 *
 * @note unless you have a very good reason, you probably don't want to mess with the tokens string.
 *       that will probably change the token's string inside the lexer
 *
 * @param[in] lexer the lexer to get the token from
 * @param[in] index the index of the token in the lexer to get
 *
 * @return a copy of the token, or a token with max value for rule and NULL for data on error
*/
ARC_LexerToken ARC_Lexer_GetToken(ARC_Lexer *lexer, uint32_t index);

/**
 * @brief gets a token at a given index from a lexer
 *
 * @param[in] lexer the lexer to get the tokens size from
 *
 * @return the size of the token array in a lexer
*/
uint32_t ARC_Lexer_GetTokensSize(ARC_Lexer *lexer);

/**
 * @brief returns a boolean based on if a given id is a stored token rule id
 *
 * @param[in] lexer the lexer to check stored token rule ids
 * @param[in] id    the id to check against the token rules
 *
 * @return ARC_True if the id is a rule id, ARC_False otherwise
*/
ARC_Bool ARC_Lexer_IsTokenId(ARC_Lexer *lexer, uint32_t id);

/**
 * @brief checks if the first character of string matches the automataData cast as a char
 *
 * @note this is intended as a helper callback
 * @note this function is a ARC_Lexer_AutomataFn callback
 *
 * @param[out] tokenData    a place to store token data (like a variable name), can be NULL if not needed
 * @param[in]  string       a string to be checked to see if it matches a token
 * @param[in]  automataData any data that needs to be used for the ARC_Lexer_AutomataFn
 *
 * @return the size of the token found, or 0 if the token was not found
*/
uint32_t ARC_Lexer_AutomataMatchCharFn(ARC_String **tokenData, ARC_String *string, void *automataData);

/**
 * @brief checks if the substring automataData as an ARC_String matches the first part of string
 *
 * @note this is intended as a helper callback
 * @note this function is a ARC_Lexer_AutomataFn callback
 *
 * @param[out] tokenData    a place to store token data (like a variable name), can be NULL if not needed
 * @param[in]  string       a string to be checked to see if it matches a token
 * @param[in]  automataData any data that needs to be used for the ARC_Lexer_AutomataFn
 *
 * @return the size of the token found, or 0 if the token was not found
*/
uint32_t ARC_Lexer_AutomataMatchStringFn(ARC_String **tokenData, ARC_String *string, void *automataData);

/**
 * @brief checks if the first part of string is a character in substring
 *
 * @note this is intended as a helper callback
 * @note this function is a ARC_Lexer_AutomataFn callback
 *
 * @param[out] tokenData    a place to store token data (like a variable name), can be NULL if not needed
 * @param[in]  string       a string to be checked to see if it matches a token
 * @param[in]  automataData any data that needs to be used for the ARC_Lexer_AutomataFn
 *
 * @return the size of the token found, or 0 if the token was not found
*/
uint32_t ARC_Lexer_AutomataMatchCharInStringFn(ARC_String **tokenData, ARC_String *string, void *automataData);

/**
 * @brief creates a ARC_LexerTokenRule with a given id and character
 *
 * @note this is intended as a helper funtion
 *
 * @param[in] id        a tokens id (basically the token value)
 * @param[in] character the character to match against
 *
 * @return a token rule based in the id and character
*/
ARC_LexerTokenRule ARC_LexerTokenRule_CreateAndReturnMatchCharRule(uint32_t id, char character);

/**
 * @brief creates a ARC_LexerTokenRule with a given id and string
 *
 * @note this is intended as a helper funtion
 * #note string will not be freed (it will be copied and the copy will be freed)
 *
 * @param[in] id        a tokens id (basically the token value)
 * @param[in] character the string to match against, will be copied
 *
 * @return a token rule based in the id and string
*/
ARC_LexerTokenRule ARC_LexerTokenRule_CreateAndReturnMatchStringRule(uint32_t id, ARC_String *string);

/**
 * @brief creates a ARC_LexerTokenRule with a given id and string
 *
 * @note this is intended as a helper funtion
 * #note string will not be freed (it will be copied and the copy will be freed)
 *
 * @param[in] id        a tokens id (basically the token value)
 * @param[in] character the string to match against, will be copied
 *
 * @return a token rule based in the id and string
*/
ARC_LexerTokenRule ARC_LexerTokenRule_CreateAndReturnMatchCharInStringRule(uint32_t id, ARC_String *string);

/**
 * @brief basic tokens
*/
#define ARC_LEXER_TOKEN_NULL           0
#define ARC_LEXER_TOKEN_NUMBER         1
#define ARC_LEXER_TOKEN_ALPHALOWERCHAR 2
#define ARC_LEXER_TOKEN_ALPHAUPPERCHAR 3
#define ARC_LEXER_TOKEN_WHITESPACE     4

/**
 * @brief basic token type ids, chars, and tags
*/
#define ARC_LEXER_TOKEN_NEWLINE_ID             5
#define ARC_LEXER_TOKEN_NEWLINE_CHAR           '\n'
#define ARC_LEXER_TOKEN_COLON_ID               6
#define ARC_LEXER_TOKEN_COLON_CHAR             ':'
#define ARC_LEXER_TOKEN_COLON_TAG              "COLON"
#define ARC_LEXER_TOKEN_SEMICOLON_ID           7
#define ARC_LEXER_TOKEN_SEMICOLON_CHAR         ';'
#define ARC_LEXER_TOKEN_SEMICOLON_TAG          "SEMICOLON"
#define ARC_LEXER_TOKEN_COMMA_ID               8
#define ARC_LEXER_TOKEN_COMMA_CHAR             ','
#define ARC_LEXER_TOKEN_COMMA_TAG              "COMMA"
#define ARC_LEXER_TOKEN_PERIOD_ID              9
#define ARC_LEXER_TOKEN_PERIOD_CHAR            '.'
#define ARC_LEXER_TOKEN_PERIOD_TAG             "PERIOD"
#define ARC_LEXER_TOKEN_FORWARD_SLASH_ID       10
#define ARC_LEXER_TOKEN_FORWARD_SLASH_CHAR     '/'
#define ARC_LEXER_TOKEN_FORWARD_SLASH_TAG      "FORWARD_SLASH"
#define ARC_LEXER_TOKEN_BACK_SLASH_ID          11
#define ARC_LEXER_TOKEN_BACK_SLASH_CHAR        '\\'
#define ARC_LEXER_TOKEN_BACK_SLASH_TAG         "BACK_SLASH"
#define ARC_LEXER_TOKEN_LEFT_PARENTHESIS_ID    12
#define ARC_LEXER_TOKEN_LEFT_PARENTHESIS_CHAR  '('
#define ARC_LEXER_TOKEN_LEFT_PARENTHESIS_TAG   "LEFT_PARENTHESIS"
#define ARC_LEXER_TOKEN_RIGHT_PARENTHESIS_ID   13
#define ARC_LEXER_TOKEN_RIGHT_PARENTHESIS_CHAR ')'
#define ARC_LEXER_TOKEN_RIGHT_PARENTHESIS_TAG  "RIGHT_PARENTHESIS"
#define ARC_LEXER_TOKEN_LEFT_CURLY_BRACE_ID    14
#define ARC_LEXER_TOKEN_LEFT_CURLY_BRACE_CHAR  '{'
#define ARC_LEXER_TOKEN_LEFT_CURLY_BRACE_TAG   "LEFT_CURLY_BRACE"
#define ARC_LEXER_TOKEN_RIGHT_CURLY_BRACE_ID   15
#define ARC_LEXER_TOKEN_RIGHT_CURLY_BRACE_CHAR '}'
#define ARC_LEXER_TOKEN_RIGHT_CURLY_BRACE_TAG  "RIGHT_CURLY_BRACE"
#define ARC_LEXER_TOKEN_BANG_ID                16
#define ARC_LEXER_TOKEN_BANG_CHAR              '!'
#define ARC_LEXER_TOKEN_BANG_TAG               "BANG"
#define ARC_LEXER_TOKEN_AT_ID                  17
#define ARC_LEXER_TOKEN_AT_CHAR                '!'
#define ARC_LEXER_TOKEN_AT_TAG                 "AT"
#define ARC_LEXER_TOKEN_HASH_ID                18
#define ARC_LEXER_TOKEN_HASH_CHAR              '#'
#define ARC_LEXER_TOKEN_HASH_TAG               "HASH"
#define ARC_LEXER_TOKEN_PERCENT_ID             19
#define ARC_LEXER_TOKEN_PERCENT_CHAR           '%'
#define ARC_LEXER_TOKEN_PERCENT_TAG            "PERCENT"

/**
 * @brief adds a bunch of basic token rules (matching the BasicTokens above)
*/
void ARC_Lexer_InitBasicTokenRules(ARC_Lexer *lexer);

#ifdef __cplusplus
}
#endif

#endif // !ARC_STD_LEXER_H_