Archeus 0.0.0
A C library and game engine that focuses on documentation
Loading...
Searching...
No Matches
lexer.c File Reference
#include "arc/std/lexer.h"
#include "arc/std/bool.h"
#include "arc/std/errno.h"
#include "arc/std/string.h"
#include "arc/std/vector.h"
#include "arc/std/io.h"
#include <stdlib.h>

Go to the source code of this file.

Data Structures

struct  ARC_Lexer
 

Functions

ARC_Bool ARC_LexerTokenRule_VectorCompareDataFn (void *dataA, void *dataB)
 
void ARC_LexerTokenRule_VectorDestroyDataFn (void *data)
 
void ARC_LexerToken_VectorDestroyDataFn (void *data)
 
void ARC_Lexer_Create (ARC_Lexer **lexer)
 creates an ARC_Lexer type
 
void ARC_Lexer_Destroy (ARC_Lexer *lexer)
 destroys an ARC_Lexer type
 
void ARC_Lexer_RegisterTokenRule (ARC_Lexer *lexer, ARC_LexerTokenRule tokenRule)
 adds a token rule to a lexer
 
void ARC_Lexer_Clear (ARC_Lexer *lexer)
 clears all tokens from a lexer (will not clear token rules)
 
void ARC_Lexer_LexString (ARC_Lexer *lexer, ARC_String **data)
 creates tokens using a given string with ARC_LexerToken rules
 
void ARC_Lexer_LexFile (ARC_Lexer *lexer, ARC_String *path)
 reads in and lexs a file
 
void ARC_Lexer_PrintTokenRules (ARC_Lexer *lexer)
 prints rule id and hex of the function name
 
ARC_LexerTokenARC_Lexer_GetToken (ARC_Lexer *lexer, uint32_t index)
 gets a token at a given index from a lexer
 
uint32_t ARC_Lexer_GetTokensSize (ARC_Lexer *lexer)
 gets a token at a given index from a lexer
 
ARC_Bool ARC_Lexer_IsContinious (ARC_Lexer *lexer)
 returns a boolean based on if a lexers rules are continious
 
ARC_Bool ARC_Lexer_IsTokenId (ARC_Lexer *lexer, uint32_t id)
 returns a boolean based on if a given id is a stored token rule id
 
uint32_t ARC_Lexer_AutomataMatchCharFn (ARC_String **tokenData, ARC_String *string, void *automataData)
 checks if the first character of string matches the automataData cast as a char
 
uint32_t ARC_Lexer_AutomataMatchCharOrBetweenFn (ARC_String **tokenData, ARC_String *string, void *automataData)
 
uint32_t ARC_Lexer_AutomataMatchStringFn (ARC_String **tokenData, ARC_String *string, void *automataData)
 checks if the substring automataData as an ARC_String matches the first part of string
 
uint32_t ARC_Lexer_AutomataMatchCharInStringFn (ARC_String **tokenData, ARC_String *string, void *automataData)
 checks if the first part of string is a character in substring
 
void ARC_LexerTokenRule_DestroyCharAutomataDataFn (void *automataData)
 
ARC_LexerTokenRule ARC_LexerTokenRule_CreateAndReturnMatchCharRule (uint32_t id, char character)
 creates a ARC_LexerTokenRule with a given id and character
 
ARC_LexerTokenRule ARC_LexerTokenRule_CreateAndReturnMatchCharOrBetween (uint32_t id, char start, char end)
 creates a ARC_LexerTokenRule with a given id and character range
 
void ARC_LexerTokenRule_DestroyStringAutomataDataFn (void *automataData)
 
ARC_LexerTokenRule ARC_LexerTokenRule_CreateAndReturnMatchStringRule (uint32_t id, ARC_String *string)
 creates a ARC_LexerTokenRule with a given id and string
 
ARC_LexerTokenRule ARC_LexerTokenRule_CreateAndReturnMatchCharInStringRule (uint32_t id, ARC_String *string)
 creates a ARC_LexerTokenRule with a given id and string
 
void ARC_Lexer_InitBasicTokenRules (ARC_Lexer *lexer)
 adds a bunch of basic token rules (matching the BasicTokens above)
 

Function Documentation

◆ ARC_Lexer_AutomataMatchCharFn()

uint32_t ARC_Lexer_AutomataMatchCharFn ( ARC_String ** tokenData,
ARC_String * string,
void * automataData )

checks if the first character of string matches the automataData cast as a char

Note
this is intended as a helper callback
this function is a ARC_Lexer_AutomataFn callback
Parameters
[out]tokenDataa place to store token data (like a variable name), can be NULL if not needed
[in]stringa string to be checked to see if it matches a token
[in]automataDataany data that needs to be used for the ARC_Lexer_AutomataFn
Returns
the size of the token found, or 0 if the token was not found

Definition at line 311 of file lexer.c.

311 {
312 //if there is a match the token will be the same as automataData, so we don't need to store it again
313 *tokenData = NULL;
314
315 //check to see if there is a match with automataData as a char
316 if(string->data[0] == *(char *)automataData){
317 //return the token was found of length 1
318 return 1;
319 }
320
321 //no match was found
322 return 0;
323}
char * data
Definition string.h:15

References ARC_String::data.

Referenced by ARC_LexerTokenRule_CreateAndReturnMatchCharRule().

◆ ARC_Lexer_AutomataMatchCharInStringFn()

uint32_t ARC_Lexer_AutomataMatchCharInStringFn ( ARC_String ** tokenData,
ARC_String * string,
void * automataData )

checks if the first part of string is a character in substring

Note
this is intended as a helper callback
this function is a ARC_Lexer_AutomataFn callback
Parameters
[out]tokenDataa place to store token data (like a variable name), can be NULL if not needed
[in]stringa string to be checked to see if it matches a token
[in]automataDataany data that needs to be used for the ARC_Lexer_AutomataFn
Returns
the size of the token found, or 0 if the token was not found

Definition at line 356 of file lexer.c.

356 {
357 //if there is a match the token will be the same as automataData, so we don't need to store it again
358 *tokenData = NULL;
359
360 //check to see if there is a char match in automataData as a string
361 ARC_String *automataDataString = (ARC_String *)automataData;
362 for(uint64_t index = 0; index < automataDataString->length; index++){
363 if(string->data[0] == automataDataString->data[index]){
364 //return the token was found in the string of length 1
365 return 1;
366 }
367 }
368
369 //no match was found
370 return 0;
371}
substring position within a string
Definition string.h:14
uint64_t length
Definition string.h:16

References ARC_String::data, and ARC_String::length.

Referenced by ARC_LexerTokenRule_CreateAndReturnMatchCharInStringRule().

◆ ARC_Lexer_AutomataMatchCharOrBetweenFn()

uint32_t ARC_Lexer_AutomataMatchCharOrBetweenFn ( ARC_String ** tokenData,
ARC_String * string,
void * automataData )

Definition at line 325 of file lexer.c.

325 {
326 //if there is a match the token will be the same as automataData, so we don't need to store it again
327 *tokenData = NULL;
328
329 //check to see if there is a match with automataData as a range of chars
330 char *automataDataChars = (char *)automataData;
331 if(string->data[0] >= automataDataChars[0] && string->data[0] <= ((char *)automataData)[1]){
332 //return the token as token data and the token was found of length 1
333 ARC_String_Create(tokenData, string->data, 1);
334 return 1;
335 }
336
337 //no match was found
338 return 0;
339}
void ARC_String_Create(ARC_String **string, char *data, uint64_t length)
creates ARC_String type
Definition string.c:9

References ARC_String_Create(), and ARC_String::data.

Referenced by ARC_LexerTokenRule_CreateAndReturnMatchCharOrBetween().

◆ ARC_Lexer_AutomataMatchStringFn()

uint32_t ARC_Lexer_AutomataMatchStringFn ( ARC_String ** tokenData,
ARC_String * string,
void * automataData )

checks if the substring automataData as an ARC_String matches the first part of string

Note
this is intended as a helper callback
this function is a ARC_Lexer_AutomataFn callback
Parameters
[out]tokenDataa place to store token data (like a variable name), can be NULL if not needed
[in]stringa string to be checked to see if it matches a token
[in]automataDataany data that needs to be used for the ARC_Lexer_AutomataFn
Returns
the size of the token found, or 0 if the token was not found

Definition at line 341 of file lexer.c.

341 {
342 //if there is a match the token will be the same as automataData, so we don't need to store it again
343 *tokenData = NULL;
344
345 //check to see if there is a match with automataData as a string
346 ARC_String *automataDataString = (ARC_String *)automataData;
347 if(ARC_String_SubstringEquals(string, 0, automataDataString)){
348 //return the token was found of the string length
349 return automataDataString->length;
350 }
351
352 //no match was found
353 return 0;
354}
ARC_Bool ARC_String_SubstringEquals(ARC_String *first, uint64_t offset, ARC_String *second)
check if substring of first equals second string
Definition string.c:175

References ARC_String_SubstringEquals(), and ARC_String::length.

Referenced by ARC_LexerTokenRule_CreateAndReturnMatchStringRule().

◆ ARC_Lexer_Clear()

void ARC_Lexer_Clear ( ARC_Lexer * lexer)

clears all tokens from a lexer (will not clear token rules)

Parameters
lexerthe lexer to clear tokens from

Definition at line 161 of file lexer.c.

161 {
162 //clear the tokens vector
163 ARC_Vector_Clear(lexer->tokens);
164}
ARC_Vector * tokens
Definition lexer.c:12
void ARC_Vector_Clear(ARC_Vector *vector)
clears all items from a vector
Definition vector.c:142

References ARC_Vector_Clear(), and ARC_Lexer::tokens.

Referenced by ARC_Parser_Parse().

◆ ARC_Lexer_Create()

void ARC_Lexer_Create ( ARC_Lexer ** lexer)

creates an ARC_Lexer type

Parameters
[out]lexerARC_Lexer to create

Definition at line 50 of file lexer.c.

50 {
51 //create the lexer
52 *lexer = (ARC_Lexer *)malloc(sizeof(ARC_Lexer));
53
54 //setup token rules vector with compare and delete functions
57 ARC_Vector_Create(&(*lexer)->tokenRules, &tokenRulesVectorCompareDataFn, &tokenRulesVectorDestroyDataFn);
58
59 //setup tokens vector with delete funtion, we don't want a deleteDataFn because their index will be used as the id
61 ARC_Vector_Create(&(*lexer)->tokens, NULL, &tokenVectorDestroyDataFn);
62
63 //set token rules to continuous and initialize the token rules max value
64 (*lexer)->tokenRulesAreContinuous = ARC_True;
65 (*lexer)->tokenRulesMaxVal = 0;
66}
#define ARC_True
Definition bool.h:11
ARC_Bool ARC_LexerTokenRule_VectorCompareDataFn(void *dataA, void *dataB)
Definition lexer.c:20
void ARC_LexerTokenRule_VectorDestroyDataFn(void *data)
Definition lexer.c:32
void ARC_LexerToken_VectorDestroyDataFn(void *data)
Definition lexer.c:39
void(* ARC_Vector_DestroyDataFn)(void *data)
a callback that cleans up memory when it is removed from the vector
Definition vector.h:31
ARC_Bool(* ARC_Vector_CompareDataFn)(void *dataA, void *dataB)
a callback that allows the user to define a way to check the data stored in a vector for a match
Definition vector.h:24
void ARC_Vector_Create(ARC_Vector **vector, ARC_Vector_CompareDataFn *compareDataFn, ARC_Vector_DestroyDataFn *destroyDataFn)
creates an ARC_Vector which is an "expandable" array
Definition vector.c:31

References ARC_LexerToken_VectorDestroyDataFn(), ARC_LexerTokenRule_VectorCompareDataFn(), ARC_LexerTokenRule_VectorDestroyDataFn(), ARC_True, and ARC_Vector_Create().

Referenced by ARC_Parser_Create(), ARC_TEST(), ARC_TEST(), ARC_TEST(), ARC_TEST(), ARC_TEST(), and ARC_TEST().

◆ ARC_Lexer_Destroy()

void ARC_Lexer_Destroy ( ARC_Lexer * lexer)

destroys an ARC_Lexer type

Parameters
[in]lexerARC_Lexer to free

Definition at line 68 of file lexer.c.

68 {
69 //free the tokens (there is a vectorDeleteDataFn, so tokens should be freed)
71
72 //free the token rules (there is a vectorDeleteDataFn, so token rules should be freed)
74
75 //free the lexer
76 free(lexer);
77}
ARC_Vector * tokenRules
Definition lexer.c:11
void ARC_Vector_Destroy(ARC_Vector *vector)
destroys an ARC_Vector
Definition vector.c:54

References ARC_Vector_Destroy(), ARC_Lexer::tokenRules, and ARC_Lexer::tokens.

Referenced by ARC_Parser_Destroy(), ARC_TEST(), ARC_TEST(), ARC_TEST(), ARC_TEST(), ARC_TEST(), and ARC_TEST().

◆ ARC_Lexer_GetToken()

ARC_LexerToken * ARC_Lexer_GetToken ( ARC_Lexer * lexer,
uint32_t index )

gets a token at a given index from a lexer

Note
unless you have a very good reason, you probably don't want to mess with the tokens string. that will probably change the token's string inside the lexer
Parameters
[in]lexerthe lexer to get the token from
[in]indexthe index of the token in the lexer to get
Returns
a token at the lexer index on success, otherwise NULL

Definition at line 272 of file lexer.c.

272 {
273 //get the token and log if there is an error
274 ARC_LexerToken *token = ARC_Vector_Get(lexer->tokens, index);
275 if(arc_errno){
276 ARC_DEBUG_LOG_ERROR("ARC_Lexer_GetToken(lexer, index), errored when running ARC_Vector_Get(lexer->tokens, index);. check logs for more info");
277
278 //return a token with max rule value, and NULL for the string to signify an error
279 return NULL;
280 }
281
282 //the token was found, so return it
283 return token;
284}
int32_t arc_errno
Definition errno.c:5
#define ARC_DEBUG_LOG_ERROR(STR)
Definition errno.h:39
a lexer token type
Definition lexer.h:18
void * ARC_Vector_Get(ARC_Vector *vector, uint32_t index)
gets an item from an ARC_Vector at a position index
Definition vector.c:153

References ARC_DEBUG_LOG_ERROR, arc_errno, ARC_Vector_Get(), and ARC_Lexer::tokens.

Referenced by ARC_Parser_ParseTag(), and ARC_TEST().

◆ ARC_Lexer_GetTokensSize()

uint32_t ARC_Lexer_GetTokensSize ( ARC_Lexer * lexer)

gets a token at a given index from a lexer

Parameters
[in]lexerthe lexer to get the tokens size from
Returns
the size of the token array in a lexer

Definition at line 286 of file lexer.c.

286 {
287 return ARC_Vector_GetSize(lexer->tokens);
288}
uint32_t ARC_Vector_GetSize(ARC_Vector *vector)
gets the current size of an ARC_Vector as an unsigned 32 bit integer
Definition vector.c:149

References ARC_Vector_GetSize(), and ARC_Lexer::tokens.

Referenced by ARC_Parser_Parse(), and ARC_Parser_ParseTag().

◆ ARC_Lexer_InitBasicTokenRules()

void ARC_Lexer_InitBasicTokenRules ( ARC_Lexer * lexer)

adds a bunch of basic token rules (matching the BasicTokens above)

Definition at line 472 of file lexer.c.

472 {
473 //null
475
476 //number
478
479 //alpha char
482
483 //whitespace
484 //TODO: fix this
485 ARC_String *whitespaceString;
486 ARC_String_CreateWithStrlen(&whitespaceString, " \t");
488 ARC_String_Destroy(whitespaceString);
489
490 //single char tokens
506}
#define ARC_LEXER_TOKEN_WHITESPACE
Definition lexer.h:252
#define ARC_LEXER_TOKEN_FORWARD_SLASH_ID
Definition lexer.h:271
#define ARC_LEXER_TOKEN_ALPHA_UPPER_CHAR
Definition lexer.h:251
#define ARC_LEXER_TOKEN_PERIOD_CHAR
Definition lexer.h:269
#define ARC_LEXER_TOKEN_PERCENT_ID
Definition lexer.h:298
#define ARC_LEXER_TOKEN_PERCENT_CHAR
Definition lexer.h:299
#define ARC_LEXER_TOKEN_LEFT_PARENTHESIS_CHAR
Definition lexer.h:278
#define ARC_LEXER_TOKEN_PERIOD_ID
Definition lexer.h:268
#define ARC_LEXER_TOKEN_NULL
basic tokens
Definition lexer.h:248
#define ARC_LEXER_TOKEN_RIGHT_CURLY_BRACE_CHAR
Definition lexer.h:287
#define ARC_LEXER_TOKEN_COMMA_CHAR
Definition lexer.h:266
#define ARC_LEXER_TOKEN_AT_ID
Definition lexer.h:292
#define ARC_LEXER_TOKEN_AT_CHAR
Definition lexer.h:293
#define ARC_LEXER_TOKEN_COLON_CHAR
Definition lexer.h:260
#define ARC_LEXER_TOKEN_LEFT_CURLY_BRACE_ID
Definition lexer.h:283
#define ARC_LEXER_TOKEN_RIGHT_PARENTHESIS_ID
Definition lexer.h:280
#define ARC_LEXER_TOKEN_BANG_CHAR
Definition lexer.h:290
#define ARC_LEXER_TOKEN_NEWLINE_ID
basic token type ids, chars, and tags
Definition lexer.h:257
#define ARC_LEXER_TOKEN_COMMA_ID
Definition lexer.h:265
#define ARC_LEXER_TOKEN_RIGHT_CURLY_BRACE_ID
Definition lexer.h:286
#define ARC_LEXER_TOKEN_HASH_ID
Definition lexer.h:295
#define ARC_LEXER_TOKEN_NEWLINE_CHAR
Definition lexer.h:258
#define ARC_LEXER_TOKEN_BACK_SLASH_ID
Definition lexer.h:274
#define ARC_LEXER_TOKEN_BANG_ID
Definition lexer.h:289
#define ARC_LEXER_TOKEN_SEMICOLON_ID
Definition lexer.h:262
#define ARC_LEXER_TOKEN_FORWARD_SLASH_CHAR
Definition lexer.h:272
#define ARC_LEXER_TOKEN_ALPHA_LOWER_CHAR
Definition lexer.h:250
#define ARC_LEXER_TOKEN_BACK_SLASH_CHAR
Definition lexer.h:275
#define ARC_LEXER_TOKEN_COLON_ID
Definition lexer.h:259
#define ARC_LEXER_TOKEN_SEMICOLON_CHAR
Definition lexer.h:263
#define ARC_LEXER_TOKEN_LEFT_PARENTHESIS_ID
Definition lexer.h:277
#define ARC_LEXER_TOKEN_LEFT_CURLY_BRACE_CHAR
Definition lexer.h:284
#define ARC_LEXER_TOKEN_RIGHT_PARENTHESIS_CHAR
Definition lexer.h:281
#define ARC_LEXER_TOKEN_NUMBER
Definition lexer.h:249
#define ARC_LEXER_TOKEN_HASH_CHAR
Definition lexer.h:296
void ARC_Lexer_RegisterTokenRule(ARC_Lexer *lexer, ARC_LexerTokenRule tokenRule)
adds a token rule to a lexer
Definition lexer.c:79
ARC_LexerTokenRule ARC_LexerTokenRule_CreateAndReturnMatchCharRule(uint32_t id, char character)
creates a ARC_LexerTokenRule with a given id and character
Definition lexer.c:378
ARC_LexerTokenRule ARC_LexerTokenRule_CreateAndReturnMatchCharOrBetween(uint32_t id, char start, char end)
creates a ARC_LexerTokenRule with a given id and character range
Definition lexer.c:400
ARC_LexerTokenRule ARC_LexerTokenRule_CreateAndReturnMatchCharInStringRule(uint32_t id, ARC_String *string)
creates a ARC_LexerTokenRule with a given id and string
Definition lexer.c:450
void ARC_String_CreateWithStrlen(ARC_String **string, char *data)
creates ARC_String type with strinlen
Definition string.c:32
void ARC_String_Destroy(ARC_String *string)
destroys ARC_String type
Definition string.c:52

References ARC_Lexer_RegisterTokenRule(), ARC_LEXER_TOKEN_ALPHA_LOWER_CHAR, ARC_LEXER_TOKEN_ALPHA_UPPER_CHAR, ARC_LEXER_TOKEN_AT_CHAR, ARC_LEXER_TOKEN_AT_ID, ARC_LEXER_TOKEN_BACK_SLASH_CHAR, ARC_LEXER_TOKEN_BACK_SLASH_ID, ARC_LEXER_TOKEN_BANG_CHAR, ARC_LEXER_TOKEN_BANG_ID, ARC_LEXER_TOKEN_COLON_CHAR, ARC_LEXER_TOKEN_COLON_ID, ARC_LEXER_TOKEN_COMMA_CHAR, ARC_LEXER_TOKEN_COMMA_ID, ARC_LEXER_TOKEN_FORWARD_SLASH_CHAR, ARC_LEXER_TOKEN_FORWARD_SLASH_ID, ARC_LEXER_TOKEN_HASH_CHAR, ARC_LEXER_TOKEN_HASH_ID, ARC_LEXER_TOKEN_LEFT_CURLY_BRACE_CHAR, ARC_LEXER_TOKEN_LEFT_CURLY_BRACE_ID, ARC_LEXER_TOKEN_LEFT_PARENTHESIS_CHAR, ARC_LEXER_TOKEN_LEFT_PARENTHESIS_ID, ARC_LEXER_TOKEN_NEWLINE_CHAR, ARC_LEXER_TOKEN_NEWLINE_ID, ARC_LEXER_TOKEN_NULL, ARC_LEXER_TOKEN_NUMBER, ARC_LEXER_TOKEN_PERCENT_CHAR, ARC_LEXER_TOKEN_PERCENT_ID, ARC_LEXER_TOKEN_PERIOD_CHAR, ARC_LEXER_TOKEN_PERIOD_ID, ARC_LEXER_TOKEN_RIGHT_CURLY_BRACE_CHAR, ARC_LEXER_TOKEN_RIGHT_CURLY_BRACE_ID, ARC_LEXER_TOKEN_RIGHT_PARENTHESIS_CHAR, ARC_LEXER_TOKEN_RIGHT_PARENTHESIS_ID, ARC_LEXER_TOKEN_SEMICOLON_CHAR, ARC_LEXER_TOKEN_SEMICOLON_ID, ARC_LEXER_TOKEN_WHITESPACE, ARC_LexerTokenRule_CreateAndReturnMatchCharInStringRule(), ARC_LexerTokenRule_CreateAndReturnMatchCharOrBetween(), ARC_LexerTokenRule_CreateAndReturnMatchCharRule(), ARC_String_CreateWithStrlen(), and ARC_String_Destroy().

Referenced by ARC_TEST(), and TEST_Parser_InitLexerRulesFn().

◆ ARC_Lexer_IsContinious()

ARC_Bool ARC_Lexer_IsContinious ( ARC_Lexer * lexer)

returns a boolean based on if a lexers rules are continious

Parameters
[in]lexerthe lexer to check if its ruls are continious
Returns
ARC_True if the set rules are continious

Definition at line 290 of file lexer.c.

290 {
291 return lexer->tokenRulesAreContinuous;
292}
ARC_Bool tokenRulesAreContinuous
Definition lexer.c:15

References ARC_Lexer::tokenRulesAreContinuous.

Referenced by ARC_TEST(), and ARC_TEST().

◆ ARC_Lexer_IsTokenId()

ARC_Bool ARC_Lexer_IsTokenId ( ARC_Lexer * lexer,
uint32_t id )

returns a boolean based on if a given id is a stored token rule id

Parameters
[in]lexerthe lexer to check stored token rule ids
[in]idthe id to check against the token rules
Returns
ARC_True if the id is a rule id, ARC_False otherwise

Definition at line 294 of file lexer.c.

294 {
295 //if the rules are continuous we can just check if it is less than the max rules value
296 if(lexer->tokenRulesAreContinuous == ARC_True){
297 return id <= lexer->tokenRulesMaxVal;
298 }
299
300 //the rules are not continuous so we need to check each individually
301 for(uint32_t index = 0; index < ARC_Vector_GetSize(lexer->tokenRules); index++){
302 ARC_LexerTokenRule *currentTokenRule = (ARC_LexerTokenRule *)ARC_Vector_Get(lexer->tokenRules, index);
303 if(currentTokenRule->id == id){
304 return ARC_True;
305 }
306 }
307
308 return ARC_False;
309}
#define ARC_False
Definition bool.h:12
a lexer token rule type
Definition lexer.h:46
uint32_t id
Definition lexer.h:47
uint32_t tokenRulesMaxVal
Definition lexer.c:16

References ARC_False, ARC_True, ARC_Vector_Get(), ARC_Vector_GetSize(), ARC_LexerTokenRule::id, ARC_Lexer::tokenRules, ARC_Lexer::tokenRulesAreContinuous, and ARC_Lexer::tokenRulesMaxVal.

Referenced by ARC_Parser_ParseTag(), ARC_TEST(), ARC_TEST(), and ARC_TEST().

◆ ARC_Lexer_LexFile()

void ARC_Lexer_LexFile ( ARC_Lexer * lexer,
ARC_String * path )

reads in and lexs a file

Note
this function will call ARC_Lexer_LexString, so it's notes are applicable to this function
Parameters
[in]lexerthe lexer which holds to rules to use
[in]pathpath of file to read in and lex

Definition at line 245 of file lexer.c.

245 {
246 //read file and clean up if it errors
247 ARC_String *data;
248 ARC_IO_FileToStr(path, &data);
249 if(arc_errno){
250 ARC_DEBUG_LOG_ERROR("ARC_Lexer_LexFile(lexer, path), errored when running ARC_IO_FileToStr(path, &data);. check logs for more info");
251 if(data != NULL){
252 ARC_String_Destroy(data);
253 }
254
255 return;
256 }
257
258 //lex the string and log if there is an error, ARC_Lexer_LexString will clean up the string
259 ARC_Lexer_LexString(lexer, &data);
260 if(arc_errno){
261 ARC_DEBUG_LOG_ERROR("ARC_Lexer_LexFile(lexer, path), errored when running ARC_Lexer_LexString(lexer, data);. check logs for more info");
262 }
263}
void ARC_IO_FileToStr(ARC_String *path, ARC_String **data)
get string and size from file
Definition io.c:43
void ARC_Lexer_LexString(ARC_Lexer *lexer, ARC_String **data)
creates tokens using a given string with ARC_LexerToken rules
Definition lexer.c:166

References ARC_DEBUG_LOG_ERROR, arc_errno, ARC_IO_FileToStr(), ARC_Lexer_LexString(), and ARC_String_Destroy().

◆ ARC_Lexer_LexString()

void ARC_Lexer_LexString ( ARC_Lexer * lexer,
ARC_String ** data )

creates tokens using a given string with ARC_LexerToken rules

Parameters
[in]lexerthe lexer to get the ARC_LexerTokens from
[in/out]data the string to lex, will be freed and set to NULL by the end of this function

Definition at line 166 of file lexer.c.

166 {
167 //check if there are any token rules to use
168 if(ARC_Vector_GetSize(lexer->tokenRules) == 0){
170 ARC_DEBUG_LOG_ERROR("ARC_Lexer_LexString(lexer, data), no tokens registered to lexer to use");
171 return;
172 }
173
174 //this will run untill everything token is stripped or there is an error
175 while(*data != NULL){
176 uint32_t tokenLength = 0;
177 uint32_t lastTokenLength = 0;
178 ARC_LexerToken *token = NULL;
179
180 for(uint32_t index = 0; index < ARC_Vector_GetSize(lexer->tokenRules); index++){
181 //check if the token rule is found
182 ARC_LexerTokenRule *tokenRule = ARC_Vector_Get(lexer->tokenRules, index);
183
184 //tokenData should only exist if tokenLength is ARC_True as stated in the header
185 ARC_String *tokenData = NULL;
186 tokenLength = tokenRule->automataFn(&tokenData, *data, tokenRule->automataData);
187
188 //check if a token was found if it wasn't continue. I'm doing this to try to cut down on the ammount of indentation
189 if(tokenLength == 0){
190 continue;
191 }
192
193 //check to see if we found a better match
194 if(tokenLength > lastTokenLength){
195 //free the current token if it exists
196 if(token != NULL){
198 }
199
200 //create the token to add
201 token = (ARC_LexerToken *)malloc(sizeof(ARC_LexerToken));
202 token->rule = tokenRule->id;
203 token->data = tokenData;
204
205 //update the last found tokenLength to the max length
206 lastTokenLength = tokenLength;
207 }
208 }
209
210 //if no token was found, throw an error
211 if(token == NULL){
213 ARC_DEBUG_LOG_ERROR_WITH_VARIABLES("ARC_Lexer_LexString(lexer, data), no tokens found with current string: \"%s\"", (*data)->data);
214
215 //clean up errored string
216 ARC_String_Destroy(*data);
217 *data = NULL;
218 return;
219 }
220
221 //token exists (something must have gone very wrong if it doesn't), so add it and check for overflow (which I'd be surprised if that happens)
222 ARC_Vector_Add(lexer->tokens, (void *)token);
223 if(arc_errno){
224 ARC_DEBUG_LOG_ERROR("ARC_Lexer_LexString(lexer, data), errored when running ARC_Vector_Add(lexer->tokens, token);. check logs for more info");
225 free(token);
226
227 //clean up errored string
228 ARC_String_Destroy(*data);
229 *data = NULL;
230 return;
231 }
232
233 //if the last token was found, destroy the string and return
234 if(lastTokenLength == (*data)->length){
235 ARC_String_Destroy(*data);
236 *data = NULL;
237 return;
238 }
239
240 //strip the string
241 ARC_String_ReplaceWithSubstring(data, lastTokenLength, (*data)->length - lastTokenLength);
242 }
243}
#define ARC_DEBUG_LOG_ERROR_WITH_VARIABLES(STR,...)
Definition errno.h:40
#define ARC_ERRNO_DATA
Definition errno.h:7
void ARC_String_ReplaceWithSubstring(ARC_String **string, uint64_t start, uint64_t length)
replaces a string with a section of itself
Definition string.c:100
void * automataData
Definition lexer.h:49
ARC_LexerTokenRule_AutomataFn automataFn
Definition lexer.h:51
uint32_t rule
Definition lexer.h:19
ARC_String * data
Definition lexer.h:20
void ARC_Vector_Add(ARC_Vector *vector, void *data)
adds an item to an ARC_Vector
Definition vector.c:70

References ARC_DEBUG_LOG_ERROR, ARC_DEBUG_LOG_ERROR_WITH_VARIABLES, arc_errno, ARC_ERRNO_DATA, ARC_LexerToken_VectorDestroyDataFn(), ARC_String_Destroy(), ARC_String_ReplaceWithSubstring(), ARC_Vector_Add(), ARC_Vector_Get(), ARC_Vector_GetSize(), ARC_LexerTokenRule::automataData, ARC_LexerTokenRule::automataFn, ARC_LexerToken::data, ARC_LexerTokenRule::id, ARC_String::length, ARC_LexerToken::rule, ARC_Lexer::tokenRules, and ARC_Lexer::tokens.

Referenced by ARC_Lexer_LexFile(), ARC_Parser_Parse(), and ARC_TEST().

◆ ARC_Lexer_PrintTokenRules()

void ARC_Lexer_PrintTokenRules ( ARC_Lexer * lexer)

prints rule id and hex of the function name

Note
this is mostly used for debugging
Parameters
[in]lexerthe lexer to print rules from

Definition at line 265 of file lexer.c.

265 {
266 for(uint32_t index = 0; index < ARC_Vector_GetSize(lexer->tokenRules); index++){
267 ARC_LexerTokenRule *tokenRule = ARC_Vector_Get(lexer->tokenRules, index);
268 printf("Rule: %02i\tFunction: %p\n", tokenRule->id, tokenRule->automataFn);
269 }
270}

References ARC_Vector_Get(), ARC_Vector_GetSize(), ARC_LexerTokenRule::automataFn, ARC_LexerTokenRule::id, and ARC_Lexer::tokenRules.

◆ ARC_Lexer_RegisterTokenRule()

void ARC_Lexer_RegisterTokenRule ( ARC_Lexer * lexer,
ARC_LexerTokenRule tokenRule )

adds a token rule to a lexer

Parameters
[in]lexerthe lexer to add a token rule to
[in]tokenRulethe token rule to add

Definition at line 79 of file lexer.c.

79 {
80 //copy the token to a new pointer
81 ARC_LexerTokenRule *storedTokenRule = (ARC_LexerTokenRule *)malloc(sizeof(ARC_LexerTokenRule));
82 *storedTokenRule = tokenRule;
83
84 //TODO: add warning here for if arc_errno is already set
85
86 //add to the vector and check for error (I'd be surprised if the error ever happened because that would most likely mean overflow)
87 ARC_Vector_Add(lexer->tokenRules, storedTokenRule);
88 if(arc_errno){
89 ARC_DEBUG_LOG_ERROR("ARC_Lexer_RegisterTokenRule(lexer, tokenRule), errored when running ARC_Vector_Add(lexer->tokenRules, storedTokenRule);. check logs for more info");
90 free(storedTokenRule);
91 }
92
93 //check if the value still is continuous
95 //if it is already continuous we just check if it is one value above the tokens already in the vector
96 for(uint32_t tokenRuleIndex = ARC_Vector_GetSize(lexer->tokenRules) - 1; tokenRuleIndex > 0; tokenRuleIndex--){
97 //get the current token rule
98 ARC_LexerTokenRule *currentTokenRule = (ARC_LexerTokenRule *)ARC_Vector_Get(lexer->tokenRules, tokenRuleIndex - 1);
99
100 //check if the token rule is continuous (then next max value by one)
101 if(tokenRule.id - currentTokenRule->id == 1){
102 //the token rule is already continuous so we can update the max value and return
103 lexer->tokenRulesMaxVal = tokenRule.id;
104 return;
105 }
106 }
107
108 //the token is no longer continous
110 return;
111 }
112
113 //check to see if this value makes the token rule continuous again
114 //TODO: might want to optomize this
115 uint32_t minValue = ~(uint32_t)0;
116 for(uint32_t tokenRuleIndex = 0; tokenRuleIndex < ARC_Vector_GetSize(lexer->tokenRules); tokenRuleIndex++){
117 //get the current token rule
118 ARC_LexerTokenRule *currentTokenRule = (ARC_LexerTokenRule *)ARC_Vector_Get(lexer->tokenRules, tokenRuleIndex);
119
120 //check each token to find the minimum one
121 if(currentTokenRule->id < minValue){
122 minValue = currentTokenRule->id;
123 }
124 }
125
126 //loop through untill either all the values are checked and in order or the token rule is not continuous
127 //TODO: might want to optomize this
128 for(uint32_t foundSize = 0; foundSize != ARC_Vector_GetSize(lexer->tokenRules); foundSize++){
129 //check all current rules
130 ARC_Bool currentAreContinuous = ARC_False;
131 for(uint32_t tokenRuleIndex = 0; tokenRuleIndex < ARC_Vector_GetSize(lexer->tokenRules); tokenRuleIndex++){
132 //get the current token rule
133 ARC_LexerTokenRule *currentTokenRule = (ARC_LexerTokenRule *)ARC_Vector_Get(lexer->tokenRules, tokenRuleIndex);
134
135 //check if the value is smaller than or equal to the minimum value and if it is we can skip it
136 if(currentTokenRule->id <= minValue){
137 continue;
138 }
139
140 //check if the value is continous
141 if(currentTokenRule->id - minValue == 1){
142 //set the token rule max val to the next most continuous value
143 lexer->tokenRulesMaxVal = currentTokenRule->id;
144
145 //set the next smallest value to check to the the next most continuous value
146 minValue = currentTokenRule->id;
147 currentAreContinuous = ARC_True;
148 break;
149 }
150 }
151
152 //the current values are not continuous so we can return as token rules are continuous is already set to false
153 if(currentAreContinuous == ARC_False){
154 return;
155 }
156
157 //a continuous value was found so loop to next value
158 }
159}
#define ARC_Bool
Definition bool.h:10

References ARC_Bool, ARC_DEBUG_LOG_ERROR, arc_errno, ARC_False, ARC_True, ARC_Vector_Add(), ARC_Vector_Get(), ARC_Vector_GetSize(), ARC_LexerTokenRule::id, ARC_Lexer::tokenRules, ARC_Lexer::tokenRulesAreContinuous, and ARC_Lexer::tokenRulesMaxVal.

Referenced by ARC_Lexer_InitBasicTokenRules(), ARC_ParserCSV_InitLexerRulesFn(), ARC_ParserLang_InitLexerRulesFn(), ARC_TEST(), ARC_TEST(), ARC_TEST(), ARC_TEST(), ARC_TEST(), and TEST_Parser_InitBasicLexerTokenRules().

◆ ARC_LexerToken_VectorDestroyDataFn()

void ARC_LexerToken_VectorDestroyDataFn ( void * data)

Definition at line 39 of file lexer.c.

39 {
40 ARC_LexerToken *token = (ARC_LexerToken *)data;
41
42 //deletes the token data string if it exists
43 if(token->data != NULL){
45 }
46
47 free(token);
48}

References ARC_String_Destroy(), and ARC_LexerToken::data.

Referenced by ARC_Lexer_Create(), and ARC_Lexer_LexString().

◆ ARC_LexerTokenRule_CreateAndReturnMatchCharInStringRule()

ARC_LexerTokenRule ARC_LexerTokenRule_CreateAndReturnMatchCharInStringRule ( uint32_t id,
ARC_String * string )

creates a ARC_LexerTokenRule with a given id and string

Note
this is intended as a helper funtion #note string will not be freed (it will be copied and the copy will be freed)
Parameters
[in]ida tokens id (basically the token value)
[in]characterthe string to match against, will be copied
Returns
a token rule based in the id and string

Definition at line 450 of file lexer.c.

450 {
451 //create the token rule
452 ARC_LexerTokenRule tokenRule;
453
454 //set the id
455 tokenRule.id = id;
456
457 //copy and store the automataData (which is just an ARC_String)
458 ARC_String *automataData;
459 ARC_String_Copy(&automataData, string);
460 tokenRule.automataData = (void *)automataData;
461
462 //we can use the ARC_Lexer_AutomataMatchCharInStringFn for this
464
465 //add the private destroy function
467
468 //return the created tokenRule
469 return tokenRule;
470}
void ARC_LexerTokenRule_DestroyStringAutomataDataFn(void *automataData)
Definition lexer.c:424
uint32_t ARC_Lexer_AutomataMatchCharInStringFn(ARC_String **tokenData, ARC_String *string, void *automataData)
checks if the first part of string is a character in substring
Definition lexer.c:356
void ARC_String_Copy(ARC_String **copy, ARC_String *original)
copy a ARC_String
Definition string.c:62
ARC_LexerTokenRule_DestroyAutomataDataFn destroyAutomataDataFn
Definition lexer.h:52

References ARC_Lexer_AutomataMatchCharInStringFn(), ARC_LexerTokenRule_DestroyStringAutomataDataFn(), ARC_String_Copy(), ARC_LexerTokenRule::automataData, ARC_LexerTokenRule::automataFn, ARC_LexerTokenRule::destroyAutomataDataFn, and ARC_LexerTokenRule::id.

Referenced by ARC_Lexer_InitBasicTokenRules(), and ARC_ParserLang_InitLexerRulesFn().

◆ ARC_LexerTokenRule_CreateAndReturnMatchCharOrBetween()

ARC_LexerTokenRule ARC_LexerTokenRule_CreateAndReturnMatchCharOrBetween ( uint32_t id,
char start,
char end )

creates a ARC_LexerTokenRule with a given id and character range

Note
this is intended as a helper funtion
Parameters
[in]ida tokens id (basically the token value)
[in]startthe minimum character value to match against
[in]endthe maxamum character value to match against
Returns
a token rule based in the id and character

Definition at line 400 of file lexer.c.

400 {
401 //create the token rule
402 ARC_LexerTokenRule tokenRule;
403
404 //set the id
405 tokenRule.id = id;
406
407 //create and store the automataData (which is just two chars (the minumum and manximum))
408 char *automataData = (char *)malloc(sizeof(char) * 2);
409 automataData[0] = start;
410 automataData[1] = end;
411 tokenRule.automataData = (void *)automataData;
412
413 //we can use the ARC_Lexer_AutomataMatchCharInStringFn for this
415
416 //add the private destroy function (we can use the char as it destroys a char pointer of any size)
418
419 //return the created tokenRule
420 return tokenRule;
421}
uint32_t ARC_Lexer_AutomataMatchCharOrBetweenFn(ARC_String **tokenData, ARC_String *string, void *automataData)
Definition lexer.c:325
void ARC_LexerTokenRule_DestroyCharAutomataDataFn(void *automataData)
Definition lexer.c:374

References ARC_Lexer_AutomataMatchCharOrBetweenFn(), ARC_LexerTokenRule_DestroyCharAutomataDataFn(), ARC_LexerTokenRule::automataData, ARC_LexerTokenRule::automataFn, ARC_LexerTokenRule::destroyAutomataDataFn, and ARC_LexerTokenRule::id.

Referenced by ARC_Lexer_InitBasicTokenRules(), ARC_ParserCSV_InitLexerRulesFn(), ARC_ParserLang_InitLexerRulesFn(), and TEST_Parser_InitBasicLexerTokenRules().

◆ ARC_LexerTokenRule_CreateAndReturnMatchCharRule()

ARC_LexerTokenRule ARC_LexerTokenRule_CreateAndReturnMatchCharRule ( uint32_t id,
char character )

creates a ARC_LexerTokenRule with a given id and character

Note
this is intended as a helper funtion
Parameters
[in]ida tokens id (basically the token value)
[in]characterthe character to match against
Returns
a token rule based in the id and character

Definition at line 378 of file lexer.c.

378 {
379 //create the token rule
380 ARC_LexerTokenRule tokenRule;
381
382 //set the id
383 tokenRule.id = id;
384
385 //create and store the automataData (which is just a char)
386 char *automataData = (char *)malloc(sizeof(char));
387 *automataData = character;
388 tokenRule.automataData = (void *)automataData;
389
390 //we can use the ARC_Lexer_AutomataMatchCharFn for this
392
393 //add the private destroy function
395
396 //return the created tokenRule
397 return tokenRule;
398}
uint32_t ARC_Lexer_AutomataMatchCharFn(ARC_String **tokenData, ARC_String *string, void *automataData)
checks if the first character of string matches the automataData cast as a char
Definition lexer.c:311

References ARC_Lexer_AutomataMatchCharFn(), ARC_LexerTokenRule_DestroyCharAutomataDataFn(), ARC_LexerTokenRule::automataData, ARC_LexerTokenRule::automataFn, ARC_LexerTokenRule::destroyAutomataDataFn, and ARC_LexerTokenRule::id.

Referenced by ARC_Lexer_InitBasicTokenRules(), ARC_ParserCSV_InitLexerRulesFn(), ARC_ParserLang_InitLexerRulesFn(), ARC_TEST(), ARC_TEST(), ARC_TEST(), ARC_TEST(), ARC_TEST(), and TEST_Parser_InitBasicLexerTokenRules().

◆ ARC_LexerTokenRule_CreateAndReturnMatchStringRule()

ARC_LexerTokenRule ARC_LexerTokenRule_CreateAndReturnMatchStringRule ( uint32_t id,
ARC_String * string )

creates a ARC_LexerTokenRule with a given id and string

Note
this is intended as a helper funtion #note string will not be freed (it will be copied and the copy will be freed)
Parameters
[in]ida tokens id (basically the token value)
[in]characterthe string to match against, will be copied
Returns
a token rule based in the id and string

Definition at line 428 of file lexer.c.

428 {
429 //create the token rule
430 ARC_LexerTokenRule tokenRule;
431
432 //set the id
433 tokenRule.id = id;
434
435 //copy and store the automataData (which is just an ARC_String)
436 ARC_String *automataData;
437 ARC_String_Copy(&automataData, string);
438 tokenRule.automataData = (void *)automataData;
439
440 //we can use the ARC_Lexer_AutomataMatchStringFn for this
442
443 //add the private destroy function
445
446 //return the created tokenRule
447 return tokenRule;
448}
uint32_t ARC_Lexer_AutomataMatchStringFn(ARC_String **tokenData, ARC_String *string, void *automataData)
checks if the substring automataData as an ARC_String matches the first part of string
Definition lexer.c:341

References ARC_Lexer_AutomataMatchStringFn(), ARC_LexerTokenRule_DestroyStringAutomataDataFn(), ARC_String_Copy(), ARC_LexerTokenRule::automataData, ARC_LexerTokenRule::automataFn, ARC_LexerTokenRule::destroyAutomataDataFn, and ARC_LexerTokenRule::id.

Referenced by ARC_ParserLang_InitLexerRulesFn().

◆ ARC_LexerTokenRule_DestroyCharAutomataDataFn()

void ARC_LexerTokenRule_DestroyCharAutomataDataFn ( void * automataData)

Definition at line 374 of file lexer.c.

374 {
375 free((char *)automataData);
376}

Referenced by ARC_LexerTokenRule_CreateAndReturnMatchCharOrBetween(), and ARC_LexerTokenRule_CreateAndReturnMatchCharRule().

◆ ARC_LexerTokenRule_DestroyStringAutomataDataFn()

void ARC_LexerTokenRule_DestroyStringAutomataDataFn ( void * automataData)

◆ ARC_LexerTokenRule_VectorCompareDataFn()

ARC_Bool ARC_LexerTokenRule_VectorCompareDataFn ( void * dataA,
void * dataB )

Definition at line 20 of file lexer.c.

20 {
21 ARC_LexerTokenRule *tokenRuleA = (ARC_LexerTokenRule *)dataA;
22 ARC_LexerTokenRule *tokenRuleB = (ARC_LexerTokenRule *)dataB;
23
24 if(tokenRuleA->id == tokenRuleB->id){
25 return ARC_True;
26 }
27
28 return ARC_False;
29}

References ARC_False, ARC_True, and ARC_LexerTokenRule::id.

Referenced by ARC_Lexer_Create().

◆ ARC_LexerTokenRule_VectorDestroyDataFn()

void ARC_LexerTokenRule_VectorDestroyDataFn ( void * data)

Definition at line 32 of file lexer.c.

32 {
33 ARC_LexerTokenRule *tokenRule = (ARC_LexerTokenRule *)data;
34 tokenRule->destroyAutomataDataFn(tokenRule->automataData);
35 free(tokenRule);
36}

References ARC_LexerTokenRule::automataData, and ARC_LexerTokenRule::destroyAutomataDataFn.

Referenced by ARC_Lexer_Create().