archeus/doc/pages/standard/lexer.md

2 KiB

\page standard-lexer ARC_Lexer

Basic Overview

The ::ARC_Lexer type takes rules for what a token in, then takes a string and splits it up into tokens based on the rules.

The API Reference for ::ARC_Lexer can be found here: arc/std/lexer.h

Note: @ref standard-config "ARC_Config" uses a lot of functions from the lexer

Basic Example

#include <archeus.h>

const uint32_t TOKEN_NULL             = 0;
const uint32_t TOKEN_NUMBER           = 1;
const uint32_t TOKEN_ALPHA_LOWER_CHAR = 2;
const uint32_t TOKEN_ALPHA_UPPER_CHAR = 3;
const uint32_t TOKEN_HYPHEN           = 4;

//private function to initialize the lexer rules for the language
void ARC_Example_InitLexerRulesFn(ARC_Lexer *lexer){
    //null
    ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(TOKEN_NULL, 0));

    //number
    ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharOrBetween(TOKEN_NUMBER, '0', '9'));

    //alphabetic characters
    ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharOrBetween(TOKEN_ALPHA_LOWER_CHAR, 'a', 'z'));
    ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharOrBetween(TOKEN_ALPHA_UPPER_CHAR, 'A', 'Z'));

    //hyphen
    ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(TOKEN_HYPHEN, '-'));
}

int main(){
    ARC_Lexer *lexer;
    ARC_Lexer_Create(&lexer);

    //add the rules to the lexer
    ARC_Example_InitLexerRulesFn(lexer);

    //create the example string to lex
    ARC_String *exampleString;
    ARC_String_CreateWithStrlen(&exampleString, "T-1000");

    //this function handles cleanup for the string
    ARC_Lexer_LexString(lexer, &exampleString);

    //loop through all the tokens printing the lexed contents
    for(uint32_t index = 0; index < ARC_Lexer_GetTokenSize(lexer); index++){
        ARC_LexerToken *token = ARC_Lexer_GetToken(lexer, index);

        printf("%u) Token Rule: %u\n", index, token->rule);
    }

    //cleanup
    ARC_Lexer_Destroy(lexer);

    return 0;
}