d5/df3/lexer_8h_source.html

#ifndef ARC_STD_LEXER_H_

#define ARC_STD_LEXER_H_


#ifdef __cplusplus

extern "C" {

#endif

#include "arc/std/string.h"

#include <stdint.h>


/**

 * @brief a lexer type

*/

typedef struct ARC_Lexer ARC_Lexer;


/**

 * @brief a lexer token type

*/


typedef struct ARC_LexerToken {

    uint32_t rule;

    ARC_String *data;

} ARC_LexerToken;


/**

 * @brief checks to see if a string is a type of token

 *

 * @note do not set tokenData if this function returns 0, doing so will create a memory leak

 *

 * @param[out] tokenData    a place to store token data (like a variable name), can be NULL if not needed

 * @param[in]  string       a string to be checked to see if it matches a token

 * @param[in]  automataData any data that needs to be used for the ARC_Lexer_AutomataFn

 *

 * @return the size of the token found, or 0 if the token was not found

*/

typedef uint32_t (* ARC_LexerTokenRule_AutomataFn)(ARC_String **tokenData, ARC_String *string, void *automataData);


/**

 * @brief a callback function to clean up ARC_LexerTokenRule's automataData

 *

 * @param automataData the void * automataData to destroy

*/

typedef void (* ARC_LexerTokenRule_DestroyAutomataDataFn)(void *automataData);


/**

 * @brief a lexer token rule type

*/


typedef struct ARC_LexerTokenRule {

    uint32_t id;


    void *automataData;


    ARC_LexerTokenRule_AutomataFn automataFn;

    ARC_LexerTokenRule_DestroyAutomataDataFn destroyAutomataDataFn;

} ARC_LexerTokenRule;


/**

 * @brief creates an ARC_Lexer type

 *

 * @param[out] lexer ARC_Lexer to create

*/

void ARC_Lexer_Create(ARC_Lexer **lexer);


/**

 * @brief destroys an ARC_Lexer type

 *

 * @param[in] lexer ARC_Lexer to free

*/

void ARC_Lexer_Destroy(ARC_Lexer *lexer);


/**

 * @brief adds a token rule to a lexer

 *

 * @param [in] lexer     the lexer to add a token rule to

 * @param [in] tokenRule the token rule to add

*/

void ARC_Lexer_RegisterTokenRule(ARC_Lexer *lexer, ARC_LexerTokenRule tokenRule);


/**

 * @brief clears all tokens from a lexer (will not clear token rules)

 *

 * @param lexer the lexer to clear tokens from

*/

void ARC_Lexer_Clear(ARC_Lexer *lexer);


/**

 * @brief creates tokens using a given string with ARC_LexerToken rules

 *

 * @param[in]     lexer the lexer to get the ARC_LexerTokens from

 * @param[in/out] data the string to lex, will be freed and set to NULL by the end of this function

*/

void ARC_Lexer_LexString(ARC_Lexer *lexer, ARC_String **data);


/**

 * @brief reads in and lexs a file

 *

 * @note this function will call ARC_Lexer_LexString, so it's notes are applicable to this function

 *

 * @param[in] lexer the lexer which holds to rules to use

 * @param[in] path  path of file to read in and lex

*/

void ARC_Lexer_LexFile(ARC_Lexer *lexer, ARC_String *path);


/**

 * @brief prints rule id and hex of the function name

 *

 * @note this is mostly used for debugging

 *

 * @param[in] lexer the lexer to print rules from

*/

void ARC_Lexer_PrintTokenRules(ARC_Lexer *lexer);


/**

 * @brief gets a token at a given index from a lexer

 *

 * @note unless you have a very good reason, you probably don't want to mess with the tokens string.

 *       that will probably change the token's string inside the lexer

 *

 * @param[in] lexer the lexer to get the token from

 * @param[in] index the index of the token in the lexer to get

 *

 * @return a token at the lexer index on success, otherwise NULL

*/

ARC_LexerToken *ARC_Lexer_GetToken(ARC_Lexer *lexer, uint32_t index);


/**

 * @brief gets a token at a given index from a lexer

 *

 * @param[in] lexer the lexer to get the tokens size from

 *

 * @return the size of the token array in a lexer

*/

uint32_t ARC_Lexer_GetTokensSize(ARC_Lexer *lexer);


/**

 * @brief returns a boolean based on if a lexers rules are continious

 *

 * @param[in] lexer the lexer to check if its ruls are continious

 *

 * @return ARC_True if the set rules are continious

*/

ARC_Bool ARC_Lexer_IsContinious(ARC_Lexer *lexer);


/**

 * @brief returns a boolean based on if a given id is a stored token rule id

 *

 * @param[in] lexer the lexer to check stored token rule ids

 * @param[in] id    the id to check against the token rules

 *

 * @return ARC_True if the id is a rule id, ARC_False otherwise

*/

ARC_Bool ARC_Lexer_IsTokenId(ARC_Lexer *lexer, uint32_t id);


/**

 * @brief checks if the first character of string matches the automataData cast as a char

 *

 * @note this is intended as a helper callback

 * @note this function is a ARC_Lexer_AutomataFn callback

 *

 * @param[out] tokenData    a place to store token data (like a variable name), can be NULL if not needed

 * @param[in]  string       a string to be checked to see if it matches a token

 * @param[in]  automataData any data that needs to be used for the ARC_Lexer_AutomataFn

 *

 * @return the size of the token found, or 0 if the token was not found

*/

uint32_t ARC_Lexer_AutomataMatchCharFn(ARC_String **tokenData, ARC_String *string, void *automataData);


/**

 * @brief checks if the substring automataData as an ARC_String matches the first part of string

 *

 * @note this is intended as a helper callback

 * @note this function is a ARC_Lexer_AutomataFn callback

 *

 * @param[out] tokenData    a place to store token data (like a variable name), can be NULL if not needed

 * @param[in]  string       a string to be checked to see if it matches a token

 * @param[in]  automataData any data that needs to be used for the ARC_Lexer_AutomataFn

 *

 * @return the size of the token found, or 0 if the token was not found

*/

uint32_t ARC_Lexer_AutomataMatchStringFn(ARC_String **tokenData, ARC_String *string, void *automataData);


/**

 * @brief checks if the first part of string is a character in substring

 *

 * @note this is intended as a helper callback

 * @note this function is a ARC_Lexer_AutomataFn callback

 *

 * @param[out] tokenData    a place to store token data (like a variable name), can be NULL if not needed

 * @param[in]  string       a string to be checked to see if it matches a token

 * @param[in]  automataData any data that needs to be used for the ARC_Lexer_AutomataFn

 *

 * @return the size of the token found, or 0 if the token was not found

*/

uint32_t ARC_Lexer_AutomataMatchCharInStringFn(ARC_String **tokenData, ARC_String *string, void *automataData);


/**

 * @brief creates a ARC_LexerTokenRule with a given id and character

 *

 * @note this is intended as a helper funtion

 *

 * @param[in] id        a tokens id (basically the token value)

 * @param[in] character the character to match against

 *

 * @return a token rule based in the id and character

*/

ARC_LexerTokenRule ARC_LexerTokenRule_CreateAndReturnMatchCharRule(uint32_t id, char character);


/**

 * @brief creates a ARC_LexerTokenRule with a given id and character range

 *

 * @note this is intended as a helper funtion

 *

 * @param[in] id    a tokens id (basically the token value)

 * @param[in] start the minimum character value to match against

 * @param[in] end   the maxamum character value to match against

 *

 * @return a token rule based in the id and character

*/

ARC_LexerTokenRule ARC_LexerTokenRule_CreateAndReturnMatchCharOrBetween(uint32_t id, char start, char end);


/**

 * @brief creates a ARC_LexerTokenRule with a given id and string

 *

 * @note this is intended as a helper funtion

 * #note string will not be freed (it will be copied and the copy will be freed)

 *

 * @param[in] id        a tokens id (basically the token value)

 * @param[in] character the string to match against, will be copied

 *

 * @return a token rule based in the id and string

*/

ARC_LexerTokenRule ARC_LexerTokenRule_CreateAndReturnMatchStringRule(uint32_t id, ARC_String *string);


/**

 * @brief creates a ARC_LexerTokenRule with a given id and string

 *

 * @note this is intended as a helper funtion

 * #note string will not be freed (it will be copied and the copy will be freed)

 *

 * @param[in] id        a tokens id (basically the token value)

 * @param[in] character the string to match against, will be copied

 *

 * @return a token rule based in the id and string

*/

ARC_LexerTokenRule ARC_LexerTokenRule_CreateAndReturnMatchCharInStringRule(uint32_t id, ARC_String *string);


/**

 * @brief basic tokens

*/

#define ARC_LEXER_TOKEN_NULL             0

#define ARC_LEXER_TOKEN_NUMBER           1

#define ARC_LEXER_TOKEN_ALPHA_LOWER_CHAR 2

#define ARC_LEXER_TOKEN_ALPHA_UPPER_CHAR 3

#define ARC_LEXER_TOKEN_WHITESPACE       4


/**

 * @brief basic token type ids, chars, and tags

*/

#define ARC_LEXER_TOKEN_NEWLINE_ID             5

#define ARC_LEXER_TOKEN_NEWLINE_CHAR           '\n'

#define ARC_LEXER_TOKEN_COLON_ID               6

#define ARC_LEXER_TOKEN_COLON_CHAR             ':'

#define ARC_LEXER_TOKEN_COLON_TAG              "COLON"

#define ARC_LEXER_TOKEN_SEMICOLON_ID           7

#define ARC_LEXER_TOKEN_SEMICOLON_CHAR         ';'

#define ARC_LEXER_TOKEN_SEMICOLON_TAG          "SEMICOLON"

#define ARC_LEXER_TOKEN_COMMA_ID               8

#define ARC_LEXER_TOKEN_COMMA_CHAR             ','

#define ARC_LEXER_TOKEN_COMMA_TAG              "COMMA"

#define ARC_LEXER_TOKEN_PERIOD_ID              9

#define ARC_LEXER_TOKEN_PERIOD_CHAR            '.'

#define ARC_LEXER_TOKEN_PERIOD_TAG             "PERIOD"

#define ARC_LEXER_TOKEN_FORWARD_SLASH_ID       10

#define ARC_LEXER_TOKEN_FORWARD_SLASH_CHAR     '/'

#define ARC_LEXER_TOKEN_FORWARD_SLASH_TAG      "FORWARD_SLASH"

#define ARC_LEXER_TOKEN_BACK_SLASH_ID          11

#define ARC_LEXER_TOKEN_BACK_SLASH_CHAR        '\\'

#define ARC_LEXER_TOKEN_BACK_SLASH_TAG         "BACK_SLASH"

#define ARC_LEXER_TOKEN_LEFT_PARENTHESIS_ID    12

#define ARC_LEXER_TOKEN_LEFT_PARENTHESIS_CHAR  '('

#define ARC_LEXER_TOKEN_LEFT_PARENTHESIS_TAG   "LEFT_PARENTHESIS"

#define ARC_LEXER_TOKEN_RIGHT_PARENTHESIS_ID   13

#define ARC_LEXER_TOKEN_RIGHT_PARENTHESIS_CHAR ')'

#define ARC_LEXER_TOKEN_RIGHT_PARENTHESIS_TAG  "RIGHT_PARENTHESIS"

#define ARC_LEXER_TOKEN_LEFT_CURLY_BRACE_ID    14

#define ARC_LEXER_TOKEN_LEFT_CURLY_BRACE_CHAR  '{'

#define ARC_LEXER_TOKEN_LEFT_CURLY_BRACE_TAG   "LEFT_CURLY_BRACE"

#define ARC_LEXER_TOKEN_RIGHT_CURLY_BRACE_ID   15

#define ARC_LEXER_TOKEN_RIGHT_CURLY_BRACE_CHAR '}'

#define ARC_LEXER_TOKEN_RIGHT_CURLY_BRACE_TAG  "RIGHT_CURLY_BRACE"

#define ARC_LEXER_TOKEN_BANG_ID                16

#define ARC_LEXER_TOKEN_BANG_CHAR              '!'

#define ARC_LEXER_TOKEN_BANG_TAG               "BANG"

#define ARC_LEXER_TOKEN_AT_ID                  17

#define ARC_LEXER_TOKEN_AT_CHAR                '!'

#define ARC_LEXER_TOKEN_AT_TAG                 "AT"

#define ARC_LEXER_TOKEN_HASH_ID                18

#define ARC_LEXER_TOKEN_HASH_CHAR              '#'

#define ARC_LEXER_TOKEN_HASH_TAG               "HASH"

#define ARC_LEXER_TOKEN_PERCENT_ID             19

#define ARC_LEXER_TOKEN_PERCENT_CHAR           '%'

#define ARC_LEXER_TOKEN_PERCENT_TAG            "PERCENT"


/**

 * @brief adds a bunch of basic token rules (matching the BasicTokens above)

*/

void ARC_Lexer_InitBasicTokenRules(ARC_Lexer *lexer);


#ifdef __cplusplus

}

#endif


#endif // !ARC_STD_LEXER_H_

ARC_Bool
#define ARC_Bool
Definition bool.h:10

ARC_LexerTokenRule_DestroyAutomataDataFn
void(* ARC_LexerTokenRule_DestroyAutomataDataFn)(void *automataData)
a callback function to clean up ARC_LexerTokenRule's automataData
Definition lexer.h:41

ARC_LexerTokenRule
struct ARC_LexerTokenRule ARC_LexerTokenRule
a lexer token rule type

ARC_Lexer_LexFile
void ARC_Lexer_LexFile(ARC_Lexer *lexer, ARC_String *path)
reads in and lexs a file

ARC_Lexer_GetToken
ARC_LexerToken * ARC_Lexer_GetToken(ARC_Lexer *lexer, uint32_t index)
gets a token at a given index from a lexer

ARC_Lexer_IsTokenId
ARC_Bool ARC_Lexer_IsTokenId(ARC_Lexer *lexer, uint32_t id)
returns a boolean based on if a given id is a stored token rule id

ARC_LexerToken
struct ARC_LexerToken ARC_LexerToken
a lexer token type

ARC_Lexer_RegisterTokenRule
void ARC_Lexer_RegisterTokenRule(ARC_Lexer *lexer, ARC_LexerTokenRule tokenRule)
adds a token rule to a lexer

ARC_Lexer_AutomataMatchCharFn
uint32_t ARC_Lexer_AutomataMatchCharFn(ARC_String **tokenData, ARC_String *string, void *automataData)
checks if the first character of string matches the automataData cast as a char

ARC_Lexer_LexString
void ARC_Lexer_LexString(ARC_Lexer *lexer, ARC_String **data)
creates tokens using a given string with ARC_LexerToken rules

ARC_LexerTokenRule_CreateAndReturnMatchStringRule
ARC_LexerTokenRule ARC_LexerTokenRule_CreateAndReturnMatchStringRule(uint32_t id, ARC_String *string)
creates a ARC_LexerTokenRule with a given id and string

ARC_Lexer_Clear
void ARC_Lexer_Clear(ARC_Lexer *lexer)
clears all tokens from a lexer (will not clear token rules)

ARC_Lexer_InitBasicTokenRules
void ARC_Lexer_InitBasicTokenRules(ARC_Lexer *lexer)
adds a bunch of basic token rules (matching the BasicTokens above)

ARC_Lexer_Create
void ARC_Lexer_Create(ARC_Lexer **lexer)
creates an ARC_Lexer type

ARC_Lexer_GetTokensSize
uint32_t ARC_Lexer_GetTokensSize(ARC_Lexer *lexer)
gets a token at a given index from a lexer

ARC_LexerTokenRule_CreateAndReturnMatchCharRule
ARC_LexerTokenRule ARC_LexerTokenRule_CreateAndReturnMatchCharRule(uint32_t id, char character)
creates a ARC_LexerTokenRule with a given id and character

ARC_LexerTokenRule_CreateAndReturnMatchCharOrBetween
ARC_LexerTokenRule ARC_LexerTokenRule_CreateAndReturnMatchCharOrBetween(uint32_t id, char start, char end)
creates a ARC_LexerTokenRule with a given id and character range

ARC_LexerTokenRule_AutomataFn
uint32_t(* ARC_LexerTokenRule_AutomataFn)(ARC_String **tokenData, ARC_String *string, void *automataData)
checks to see if a string is a type of token
Definition lexer.h:34

ARC_Lexer_AutomataMatchStringFn
uint32_t ARC_Lexer_AutomataMatchStringFn(ARC_String **tokenData, ARC_String *string, void *automataData)
checks if the substring automataData as an ARC_String matches the first part of string

ARC_Lexer_IsContinious
ARC_Bool ARC_Lexer_IsContinious(ARC_Lexer *lexer)
returns a boolean based on if a lexers rules are continious

ARC_LexerTokenRule_CreateAndReturnMatchCharInStringRule
ARC_LexerTokenRule ARC_LexerTokenRule_CreateAndReturnMatchCharInStringRule(uint32_t id, ARC_String *string)
creates a ARC_LexerTokenRule with a given id and string

ARC_Lexer_AutomataMatchCharInStringFn
uint32_t ARC_Lexer_AutomataMatchCharInStringFn(ARC_String **tokenData, ARC_String *string, void *automataData)
checks if the first part of string is a character in substring

ARC_Lexer_PrintTokenRules
void ARC_Lexer_PrintTokenRules(ARC_Lexer *lexer)
prints rule id and hex of the function name

ARC_Lexer
struct ARC_Lexer ARC_Lexer
a lexer type
Definition lexer.h:13

ARC_Lexer_Destroy
void ARC_Lexer_Destroy(ARC_Lexer *lexer)
destroys an ARC_Lexer type

string.h

ARC_LexerTokenRule
a lexer token rule type
Definition lexer.h:46

ARC_LexerTokenRule::id
uint32_t id
Definition lexer.h:47

ARC_LexerTokenRule::automataData
void * automataData
Definition lexer.h:49

ARC_LexerTokenRule::automataFn
ARC_LexerTokenRule_AutomataFn automataFn
Definition lexer.h:51

ARC_LexerTokenRule::destroyAutomataDataFn
ARC_LexerTokenRule_DestroyAutomataDataFn destroyAutomataDataFn
Definition lexer.h:52

ARC_LexerToken
a lexer token type
Definition lexer.h:18

ARC_LexerToken::rule
uint32_t rule
Definition lexer.h:19

ARC_LexerToken::data
ARC_String * data
Definition lexer.h:20

ARC_String
substring position within a string
Definition string.h:14