1#ifndef ARC_STD_LEXER_H_
2#define ARC_STD_LEXER_H_
248#define ARC_LEXER_TOKEN_NULL 0
249#define ARC_LEXER_TOKEN_NUMBER 1
250#define ARC_LEXER_TOKEN_ALPHA_LOWER_CHAR 2
251#define ARC_LEXER_TOKEN_ALPHA_UPPER_CHAR 3
252#define ARC_LEXER_TOKEN_WHITESPACE 4
257#define ARC_LEXER_TOKEN_NEWLINE_ID 5
258#define ARC_LEXER_TOKEN_NEWLINE_CHAR '\n'
259#define ARC_LEXER_TOKEN_COLON_ID 6
260#define ARC_LEXER_TOKEN_COLON_CHAR ':'
261#define ARC_LEXER_TOKEN_COLON_TAG "COLON"
262#define ARC_LEXER_TOKEN_SEMICOLON_ID 7
263#define ARC_LEXER_TOKEN_SEMICOLON_CHAR ';'
264#define ARC_LEXER_TOKEN_SEMICOLON_TAG "SEMICOLON"
265#define ARC_LEXER_TOKEN_COMMA_ID 8
266#define ARC_LEXER_TOKEN_COMMA_CHAR ','
267#define ARC_LEXER_TOKEN_COMMA_TAG "COMMA"
268#define ARC_LEXER_TOKEN_PERIOD_ID 9
269#define ARC_LEXER_TOKEN_PERIOD_CHAR '.'
270#define ARC_LEXER_TOKEN_PERIOD_TAG "PERIOD"
271#define ARC_LEXER_TOKEN_FORWARD_SLASH_ID 10
272#define ARC_LEXER_TOKEN_FORWARD_SLASH_CHAR '/'
273#define ARC_LEXER_TOKEN_FORWARD_SLASH_TAG "FORWARD_SLASH"
274#define ARC_LEXER_TOKEN_BACK_SLASH_ID 11
275#define ARC_LEXER_TOKEN_BACK_SLASH_CHAR '\\'
276#define ARC_LEXER_TOKEN_BACK_SLASH_TAG "BACK_SLASH"
277#define ARC_LEXER_TOKEN_LEFT_PARENTHESIS_ID 12
278#define ARC_LEXER_TOKEN_LEFT_PARENTHESIS_CHAR '('
279#define ARC_LEXER_TOKEN_LEFT_PARENTHESIS_TAG "LEFT_PARENTHESIS"
280#define ARC_LEXER_TOKEN_RIGHT_PARENTHESIS_ID 13
281#define ARC_LEXER_TOKEN_RIGHT_PARENTHESIS_CHAR ')'
282#define ARC_LEXER_TOKEN_RIGHT_PARENTHESIS_TAG "RIGHT_PARENTHESIS"
283#define ARC_LEXER_TOKEN_LEFT_CURLY_BRACE_ID 14
284#define ARC_LEXER_TOKEN_LEFT_CURLY_BRACE_CHAR '{'
285#define ARC_LEXER_TOKEN_LEFT_CURLY_BRACE_TAG "LEFT_CURLY_BRACE"
286#define ARC_LEXER_TOKEN_RIGHT_CURLY_BRACE_ID 15
287#define ARC_LEXER_TOKEN_RIGHT_CURLY_BRACE_CHAR '}'
288#define ARC_LEXER_TOKEN_RIGHT_CURLY_BRACE_TAG "RIGHT_CURLY_BRACE"
289#define ARC_LEXER_TOKEN_BANG_ID 16
290#define ARC_LEXER_TOKEN_BANG_CHAR '!'
291#define ARC_LEXER_TOKEN_BANG_TAG "BANG"
292#define ARC_LEXER_TOKEN_AT_ID 17
293#define ARC_LEXER_TOKEN_AT_CHAR '!'
294#define ARC_LEXER_TOKEN_AT_TAG "AT"
295#define ARC_LEXER_TOKEN_HASH_ID 18
296#define ARC_LEXER_TOKEN_HASH_CHAR '#'
297#define ARC_LEXER_TOKEN_HASH_TAG "HASH"
298#define ARC_LEXER_TOKEN_PERCENT_ID 19
299#define ARC_LEXER_TOKEN_PERCENT_CHAR '%'
300#define ARC_LEXER_TOKEN_PERCENT_TAG "PERCENT"
void(* ARC_LexerTokenRule_DestroyAutomataDataFn)(void *automataData)
a callback function to clean up ARC_LexerTokenRule's automataData
struct ARC_LexerTokenRule ARC_LexerTokenRule
a lexer token rule type
void ARC_Lexer_LexFile(ARC_Lexer *lexer, ARC_String *path)
reads in and lexs a file
ARC_LexerToken * ARC_Lexer_GetToken(ARC_Lexer *lexer, uint32_t index)
gets a token at a given index from a lexer
ARC_Bool ARC_Lexer_IsTokenId(ARC_Lexer *lexer, uint32_t id)
returns a boolean based on if a given id is a stored token rule id
struct ARC_LexerToken ARC_LexerToken
a lexer token type
void ARC_Lexer_RegisterTokenRule(ARC_Lexer *lexer, ARC_LexerTokenRule tokenRule)
adds a token rule to a lexer
uint32_t ARC_Lexer_AutomataMatchCharFn(ARC_String **tokenData, ARC_String *string, void *automataData)
checks if the first character of string matches the automataData cast as a char
void ARC_Lexer_LexString(ARC_Lexer *lexer, ARC_String **data)
creates tokens using a given string with ARC_LexerToken rules
ARC_LexerTokenRule ARC_LexerTokenRule_CreateAndReturnMatchStringRule(uint32_t id, ARC_String *string)
creates a ARC_LexerTokenRule with a given id and string
void ARC_Lexer_Clear(ARC_Lexer *lexer)
clears all tokens from a lexer (will not clear token rules)
void ARC_Lexer_InitBasicTokenRules(ARC_Lexer *lexer)
adds a bunch of basic token rules (matching the BasicTokens above)
void ARC_Lexer_Create(ARC_Lexer **lexer)
creates an ARC_Lexer type
uint32_t ARC_Lexer_GetTokensSize(ARC_Lexer *lexer)
gets a token at a given index from a lexer
ARC_LexerTokenRule ARC_LexerTokenRule_CreateAndReturnMatchCharRule(uint32_t id, char character)
creates a ARC_LexerTokenRule with a given id and character
ARC_LexerTokenRule ARC_LexerTokenRule_CreateAndReturnMatchCharOrBetween(uint32_t id, char start, char end)
creates a ARC_LexerTokenRule with a given id and character range
uint32_t(* ARC_LexerTokenRule_AutomataFn)(ARC_String **tokenData, ARC_String *string, void *automataData)
checks to see if a string is a type of token
uint32_t ARC_Lexer_AutomataMatchStringFn(ARC_String **tokenData, ARC_String *string, void *automataData)
checks if the substring automataData as an ARC_String matches the first part of string
ARC_Bool ARC_Lexer_IsContinious(ARC_Lexer *lexer)
returns a boolean based on if a lexers rules are continious
ARC_LexerTokenRule ARC_LexerTokenRule_CreateAndReturnMatchCharInStringRule(uint32_t id, ARC_String *string)
creates a ARC_LexerTokenRule with a given id and string
uint32_t ARC_Lexer_AutomataMatchCharInStringFn(ARC_String **tokenData, ARC_String *string, void *automataData)
checks if the first part of string is a character in substring
void ARC_Lexer_PrintTokenRules(ARC_Lexer *lexer)
prints rule id and hex of the function name
struct ARC_Lexer ARC_Lexer
a lexer type
void ARC_Lexer_Destroy(ARC_Lexer *lexer)
destroys an ARC_Lexer type
ARC_LexerTokenRule_AutomataFn automataFn
ARC_LexerTokenRule_DestroyAutomataDataFn destroyAutomataDataFn
substring position within a string