Archeus 0.0.0
A C library and game engine that focuses on documentation
Loading...
Searching...
No Matches
lexer.h
Go to the documentation of this file.
1#ifndef ARC_STD_LEXER_H_
2#define ARC_STD_LEXER_H_
3
4#ifdef __cplusplus
5extern "C" {
6#endif
7#include "arc/std/string.h"
8#include <stdint.h>
9
10/**
11 * @brief a lexer type
12*/
13typedef struct ARC_Lexer ARC_Lexer;
14
15/**
16 * @brief a lexer token type
17*/
22
23/**
24 * @brief checks to see if a string is a type of token
25 *
26 * @note do not set tokenData if this function returns 0, doing so will create a memory leak
27 *
28 * @param[out] tokenData a place to store token data (like a variable name), can be NULL if not needed
29 * @param[in] string a string to be checked to see if it matches a token
30 * @param[in] automataData any data that needs to be used for the ARC_Lexer_AutomataFn
31 *
32 * @return the size of the token found, or 0 if the token was not found
33*/
34typedef uint32_t (* ARC_LexerTokenRule_AutomataFn)(ARC_String **tokenData, ARC_String *string, void *automataData);
35
36/**
37 * @brief a callback function to clean up ARC_LexerTokenRule's automataData
38 *
39 * @param automataData the void * automataData to destroy
40*/
41typedef void (* ARC_LexerTokenRule_DestroyAutomataDataFn)(void *automataData);
42
43/**
44 * @brief a lexer token rule type
45*/
54
55/**
56 * @brief creates an ARC_Lexer type
57 *
58 * @param[out] lexer ARC_Lexer to create
59*/
61
62/**
63 * @brief destroys an ARC_Lexer type
64 *
65 * @param[in] lexer ARC_Lexer to free
66*/
68
69/**
70 * @brief adds a token rule to a lexer
71 *
72 * @param [in] lexer the lexer to add a token rule to
73 * @param [in] tokenRule the token rule to add
74*/
76
77/**
78 * @brief clears all tokens from a lexer (will not clear token rules)
79 *
80 * @param lexer the lexer to clear tokens from
81*/
83
84/**
85 * @brief creates tokens using a given string with ARC_LexerToken rules
86 *
87 * @param[in] lexer the lexer to get the ARC_LexerTokens from
88 * @param[in/out] data the string to lex, will be freed and set to NULL by the end of this function
89*/
91
92/**
93 * @brief reads in and lexs a file
94 *
95 * @note this function will call ARC_Lexer_LexString, so it's notes are applicable to this function
96 *
97 * @param[in] lexer the lexer which holds to rules to use
98 * @param[in] path path of file to read in and lex
99*/
101
102/**
103 * @brief prints rule id and hex of the function name
104 *
105 * @note this is mostly used for debugging
106 *
107 * @param[in] lexer the lexer to print rules from
108*/
110
111/**
112 * @brief gets a token at a given index from a lexer
113 *
114 * @note unless you have a very good reason, you probably don't want to mess with the tokens string.
115 * that will probably change the token's string inside the lexer
116 *
117 * @param[in] lexer the lexer to get the token from
118 * @param[in] index the index of the token in the lexer to get
119 *
120 * @return a token at the lexer index on success, otherwise NULL
121*/
123
124/**
125 * @brief gets a token at a given index from a lexer
126 *
127 * @param[in] lexer the lexer to get the tokens size from
128 *
129 * @return the size of the token array in a lexer
130*/
132
133/**
134 * @brief returns a boolean based on if a lexers rules are continious
135 *
136 * @param[in] lexer the lexer to check if its ruls are continious
137 *
138 * @return ARC_True if the set rules are continious
139*/
141
142/**
143 * @brief returns a boolean based on if a given id is a stored token rule id
144 *
145 * @param[in] lexer the lexer to check stored token rule ids
146 * @param[in] id the id to check against the token rules
147 *
148 * @return ARC_True if the id is a rule id, ARC_False otherwise
149*/
151
152/**
153 * @brief checks if the first character of string matches the automataData cast as a char
154 *
155 * @note this is intended as a helper callback
156 * @note this function is a ARC_Lexer_AutomataFn callback
157 *
158 * @param[out] tokenData a place to store token data (like a variable name), can be NULL if not needed
159 * @param[in] string a string to be checked to see if it matches a token
160 * @param[in] automataData any data that needs to be used for the ARC_Lexer_AutomataFn
161 *
162 * @return the size of the token found, or 0 if the token was not found
163*/
164uint32_t ARC_Lexer_AutomataMatchCharFn(ARC_String **tokenData, ARC_String *string, void *automataData);
165
166/**
167 * @brief checks if the substring automataData as an ARC_String matches the first part of string
168 *
169 * @note this is intended as a helper callback
170 * @note this function is a ARC_Lexer_AutomataFn callback
171 *
172 * @param[out] tokenData a place to store token data (like a variable name), can be NULL if not needed
173 * @param[in] string a string to be checked to see if it matches a token
174 * @param[in] automataData any data that needs to be used for the ARC_Lexer_AutomataFn
175 *
176 * @return the size of the token found, or 0 if the token was not found
177*/
178uint32_t ARC_Lexer_AutomataMatchStringFn(ARC_String **tokenData, ARC_String *string, void *automataData);
179
180/**
181 * @brief checks if the first part of string is a character in substring
182 *
183 * @note this is intended as a helper callback
184 * @note this function is a ARC_Lexer_AutomataFn callback
185 *
186 * @param[out] tokenData a place to store token data (like a variable name), can be NULL if not needed
187 * @param[in] string a string to be checked to see if it matches a token
188 * @param[in] automataData any data that needs to be used for the ARC_Lexer_AutomataFn
189 *
190 * @return the size of the token found, or 0 if the token was not found
191*/
192uint32_t ARC_Lexer_AutomataMatchCharInStringFn(ARC_String **tokenData, ARC_String *string, void *automataData);
193
194/**
195 * @brief creates a ARC_LexerTokenRule with a given id and character
196 *
197 * @note this is intended as a helper funtion
198 *
199 * @param[in] id a tokens id (basically the token value)
200 * @param[in] character the character to match against
201 *
202 * @return a token rule based in the id and character
203*/
205
206/**
207 * @brief creates a ARC_LexerTokenRule with a given id and character range
208 *
209 * @note this is intended as a helper funtion
210 *
211 * @param[in] id a tokens id (basically the token value)
212 * @param[in] start the minimum character value to match against
213 * @param[in] end the maxamum character value to match against
214 *
215 * @return a token rule based in the id and character
216*/
218
219/**
220 * @brief creates a ARC_LexerTokenRule with a given id and string
221 *
222 * @note this is intended as a helper funtion
223 * #note string will not be freed (it will be copied and the copy will be freed)
224 *
225 * @param[in] id a tokens id (basically the token value)
226 * @param[in] character the string to match against, will be copied
227 *
228 * @return a token rule based in the id and string
229*/
231
232/**
233 * @brief creates a ARC_LexerTokenRule with a given id and string
234 *
235 * @note this is intended as a helper funtion
236 * #note string will not be freed (it will be copied and the copy will be freed)
237 *
238 * @param[in] id a tokens id (basically the token value)
239 * @param[in] character the string to match against, will be copied
240 *
241 * @return a token rule based in the id and string
242*/
244
245/**
246 * @brief basic tokens
247*/
248#define ARC_LEXER_TOKEN_NULL 0
249#define ARC_LEXER_TOKEN_NUMBER 1
250#define ARC_LEXER_TOKEN_ALPHA_LOWER_CHAR 2
251#define ARC_LEXER_TOKEN_ALPHA_UPPER_CHAR 3
252#define ARC_LEXER_TOKEN_WHITESPACE 4
253
254/**
255 * @brief basic token type ids, chars, and tags
256*/
257#define ARC_LEXER_TOKEN_NEWLINE_ID 5
258#define ARC_LEXER_TOKEN_NEWLINE_CHAR '\n'
259#define ARC_LEXER_TOKEN_COLON_ID 6
260#define ARC_LEXER_TOKEN_COLON_CHAR ':'
261#define ARC_LEXER_TOKEN_COLON_TAG "COLON"
262#define ARC_LEXER_TOKEN_SEMICOLON_ID 7
263#define ARC_LEXER_TOKEN_SEMICOLON_CHAR ';'
264#define ARC_LEXER_TOKEN_SEMICOLON_TAG "SEMICOLON"
265#define ARC_LEXER_TOKEN_COMMA_ID 8
266#define ARC_LEXER_TOKEN_COMMA_CHAR ','
267#define ARC_LEXER_TOKEN_COMMA_TAG "COMMA"
268#define ARC_LEXER_TOKEN_PERIOD_ID 9
269#define ARC_LEXER_TOKEN_PERIOD_CHAR '.'
270#define ARC_LEXER_TOKEN_PERIOD_TAG "PERIOD"
271#define ARC_LEXER_TOKEN_FORWARD_SLASH_ID 10
272#define ARC_LEXER_TOKEN_FORWARD_SLASH_CHAR '/'
273#define ARC_LEXER_TOKEN_FORWARD_SLASH_TAG "FORWARD_SLASH"
274#define ARC_LEXER_TOKEN_BACK_SLASH_ID 11
275#define ARC_LEXER_TOKEN_BACK_SLASH_CHAR '\\'
276#define ARC_LEXER_TOKEN_BACK_SLASH_TAG "BACK_SLASH"
277#define ARC_LEXER_TOKEN_LEFT_PARENTHESIS_ID 12
278#define ARC_LEXER_TOKEN_LEFT_PARENTHESIS_CHAR '('
279#define ARC_LEXER_TOKEN_LEFT_PARENTHESIS_TAG "LEFT_PARENTHESIS"
280#define ARC_LEXER_TOKEN_RIGHT_PARENTHESIS_ID 13
281#define ARC_LEXER_TOKEN_RIGHT_PARENTHESIS_CHAR ')'
282#define ARC_LEXER_TOKEN_RIGHT_PARENTHESIS_TAG "RIGHT_PARENTHESIS"
283#define ARC_LEXER_TOKEN_LEFT_CURLY_BRACE_ID 14
284#define ARC_LEXER_TOKEN_LEFT_CURLY_BRACE_CHAR '{'
285#define ARC_LEXER_TOKEN_LEFT_CURLY_BRACE_TAG "LEFT_CURLY_BRACE"
286#define ARC_LEXER_TOKEN_RIGHT_CURLY_BRACE_ID 15
287#define ARC_LEXER_TOKEN_RIGHT_CURLY_BRACE_CHAR '}'
288#define ARC_LEXER_TOKEN_RIGHT_CURLY_BRACE_TAG "RIGHT_CURLY_BRACE"
289#define ARC_LEXER_TOKEN_BANG_ID 16
290#define ARC_LEXER_TOKEN_BANG_CHAR '!'
291#define ARC_LEXER_TOKEN_BANG_TAG "BANG"
292#define ARC_LEXER_TOKEN_AT_ID 17
293#define ARC_LEXER_TOKEN_AT_CHAR '!'
294#define ARC_LEXER_TOKEN_AT_TAG "AT"
295#define ARC_LEXER_TOKEN_HASH_ID 18
296#define ARC_LEXER_TOKEN_HASH_CHAR '#'
297#define ARC_LEXER_TOKEN_HASH_TAG "HASH"
298#define ARC_LEXER_TOKEN_PERCENT_ID 19
299#define ARC_LEXER_TOKEN_PERCENT_CHAR '%'
300#define ARC_LEXER_TOKEN_PERCENT_TAG "PERCENT"
301
302/**
303 * @brief adds a bunch of basic token rules (matching the BasicTokens above)
304*/
306
307#ifdef __cplusplus
308}
309#endif
310
311#endif // !ARC_STD_LEXER_H_
#define ARC_Bool
Definition bool.h:10
void(* ARC_LexerTokenRule_DestroyAutomataDataFn)(void *automataData)
a callback function to clean up ARC_LexerTokenRule's automataData
Definition lexer.h:41
struct ARC_LexerTokenRule ARC_LexerTokenRule
a lexer token rule type
void ARC_Lexer_LexFile(ARC_Lexer *lexer, ARC_String *path)
reads in and lexs a file
ARC_LexerToken * ARC_Lexer_GetToken(ARC_Lexer *lexer, uint32_t index)
gets a token at a given index from a lexer
ARC_Bool ARC_Lexer_IsTokenId(ARC_Lexer *lexer, uint32_t id)
returns a boolean based on if a given id is a stored token rule id
struct ARC_LexerToken ARC_LexerToken
a lexer token type
void ARC_Lexer_RegisterTokenRule(ARC_Lexer *lexer, ARC_LexerTokenRule tokenRule)
adds a token rule to a lexer
uint32_t ARC_Lexer_AutomataMatchCharFn(ARC_String **tokenData, ARC_String *string, void *automataData)
checks if the first character of string matches the automataData cast as a char
void ARC_Lexer_LexString(ARC_Lexer *lexer, ARC_String **data)
creates tokens using a given string with ARC_LexerToken rules
ARC_LexerTokenRule ARC_LexerTokenRule_CreateAndReturnMatchStringRule(uint32_t id, ARC_String *string)
creates a ARC_LexerTokenRule with a given id and string
void ARC_Lexer_Clear(ARC_Lexer *lexer)
clears all tokens from a lexer (will not clear token rules)
void ARC_Lexer_InitBasicTokenRules(ARC_Lexer *lexer)
adds a bunch of basic token rules (matching the BasicTokens above)
void ARC_Lexer_Create(ARC_Lexer **lexer)
creates an ARC_Lexer type
uint32_t ARC_Lexer_GetTokensSize(ARC_Lexer *lexer)
gets a token at a given index from a lexer
ARC_LexerTokenRule ARC_LexerTokenRule_CreateAndReturnMatchCharRule(uint32_t id, char character)
creates a ARC_LexerTokenRule with a given id and character
ARC_LexerTokenRule ARC_LexerTokenRule_CreateAndReturnMatchCharOrBetween(uint32_t id, char start, char end)
creates a ARC_LexerTokenRule with a given id and character range
uint32_t(* ARC_LexerTokenRule_AutomataFn)(ARC_String **tokenData, ARC_String *string, void *automataData)
checks to see if a string is a type of token
Definition lexer.h:34
uint32_t ARC_Lexer_AutomataMatchStringFn(ARC_String **tokenData, ARC_String *string, void *automataData)
checks if the substring automataData as an ARC_String matches the first part of string
ARC_Bool ARC_Lexer_IsContinious(ARC_Lexer *lexer)
returns a boolean based on if a lexers rules are continious
ARC_LexerTokenRule ARC_LexerTokenRule_CreateAndReturnMatchCharInStringRule(uint32_t id, ARC_String *string)
creates a ARC_LexerTokenRule with a given id and string
uint32_t ARC_Lexer_AutomataMatchCharInStringFn(ARC_String **tokenData, ARC_String *string, void *automataData)
checks if the first part of string is a character in substring
void ARC_Lexer_PrintTokenRules(ARC_Lexer *lexer)
prints rule id and hex of the function name
struct ARC_Lexer ARC_Lexer
a lexer type
Definition lexer.h:13
void ARC_Lexer_Destroy(ARC_Lexer *lexer)
destroys an ARC_Lexer type
a lexer token rule type
Definition lexer.h:46
uint32_t id
Definition lexer.h:47
void * automataData
Definition lexer.h:49
ARC_LexerTokenRule_AutomataFn automataFn
Definition lexer.h:51
ARC_LexerTokenRule_DestroyAutomataDataFn destroyAutomataDataFn
Definition lexer.h:52
a lexer token type
Definition lexer.h:18
uint32_t rule
Definition lexer.h:19
ARC_String * data
Definition lexer.h:20
substring position within a string
Definition string.h:14