2024-12-30 05:22:12 -07:00
|
|
|
\page standard-lexer ARC_Lexer
|
2025-03-28 04:12:57 -06:00
|
|
|
|
|
|
|
|
# Basic Overview
|
|
|
|
|
|
|
|
|
|
The ::ARC_Lexer type takes rules for what a token in, then takes a string and splits it up into tokens based on the rules.
|
|
|
|
|
|
|
|
|
|
The API Reference for ::ARC_Lexer can be found here: arc/std/lexer.h
|
|
|
|
|
|
|
|
|
|
Note: @ref standard-config "ARC_Config" uses a lot of functions from the lexer
|
|
|
|
|
|
|
|
|
|
# Basic Example
|
|
|
|
|
|
|
|
|
|
```c
|
|
|
|
|
#include <archeus.h>
|
|
|
|
|
|
|
|
|
|
const uint32_t TOKEN_NULL = 0;
|
|
|
|
|
const uint32_t TOKEN_NUMBER = 1;
|
|
|
|
|
const uint32_t TOKEN_ALPHA_LOWER_CHAR = 2;
|
|
|
|
|
const uint32_t TOKEN_ALPHA_UPPER_CHAR = 3;
|
|
|
|
|
const uint32_t TOKEN_HYPHEN = 4;
|
|
|
|
|
|
|
|
|
|
//private function to initialize the lexer rules for the language
|
|
|
|
|
void ARC_Example_InitLexerRulesFn(ARC_Lexer *lexer){
|
|
|
|
|
//null
|
|
|
|
|
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(TOKEN_NULL, 0));
|
|
|
|
|
|
|
|
|
|
//number
|
|
|
|
|
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharOrBetween(TOKEN_NUMBER, '0', '9'));
|
|
|
|
|
|
|
|
|
|
//alphabetic characters
|
|
|
|
|
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharOrBetween(TOKEN_ALPHA_LOWER_CHAR, 'a', 'z'));
|
|
|
|
|
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharOrBetween(TOKEN_ALPHA_UPPER_CHAR, 'A', 'Z'));
|
|
|
|
|
|
|
|
|
|
//hyphen
|
|
|
|
|
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(TOKEN_HYPHEN, '-'));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int main(){
|
|
|
|
|
ARC_Lexer *lexer;
|
|
|
|
|
ARC_Lexer_Create(&lexer);
|
|
|
|
|
|
|
|
|
|
//add the rules to the lexer
|
|
|
|
|
ARC_Example_InitLexerRulesFn(lexer);
|
|
|
|
|
|
|
|
|
|
//create the example string to lex
|
|
|
|
|
ARC_String *exampleString;
|
|
|
|
|
ARC_String_CreateWithStrlen(&exampleString, "T-1000");
|
|
|
|
|
|
|
|
|
|
//this function handles cleanup for the string
|
|
|
|
|
ARC_Lexer_LexString(lexer, &exampleString);
|
|
|
|
|
|
|
|
|
|
//loop through all the tokens printing the lexed contents
|
|
|
|
|
for(uint32_t index = 0; index < ARC_Lexer_GetTokenSize(lexer); index++){
|
|
|
|
|
ARC_LexerToken *token = ARC_Lexer_GetToken(lexer, index);
|
|
|
|
|
|
|
|
|
|
printf("%u) Token Rule: %u\n", index, token->rule);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//cleanup
|
|
|
|
|
ARC_Lexer_Destroy(lexer);
|
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
```
|