188 lines
9.8 KiB
C
188 lines
9.8 KiB
C
#include "../test.h"
|
|
#include "arc/std/lexer.h"
|
|
|
|
#define ARC_LEXER_TOKEN_NULL 0
|
|
#define ARC_LEXER_TOKEN_NUMBER 1
|
|
#define ARC_LEXER_TOKEN_ALPHA_LOWER_CHAR 2
|
|
#define ARC_LEXER_TOKEN_ALPHA_UPPER_CHAR 3
|
|
#define ARC_LEXER_TOKEN_WHITESPACE 4
|
|
#define ARC_LEXER_TOKEN_NEWLINE_ID 5
|
|
#define ARC_LEXER_TOKEN_NEWLINE_CHAR '\n'
|
|
#define ARC_LEXER_TOKEN_COLON_ID 6
|
|
#define ARC_LEXER_TOKEN_COLON_CHAR ':'
|
|
#define ARC_LEXER_TOKEN_COLON_TAG "COLON"
|
|
#define ARC_LEXER_TOKEN_SEMICOLON_ID 7
|
|
#define ARC_LEXER_TOKEN_SEMICOLON_CHAR ';'
|
|
#define ARC_LEXER_TOKEN_SEMICOLON_TAG "SEMICOLON"
|
|
#define ARC_LEXER_TOKEN_COMMA_ID 8
|
|
#define ARC_LEXER_TOKEN_COMMA_CHAR ','
|
|
#define ARC_LEXER_TOKEN_COMMA_TAG "COMMA"
|
|
#define ARC_LEXER_TOKEN_PERIOD_ID 9
|
|
#define ARC_LEXER_TOKEN_PERIOD_CHAR '.'
|
|
#define ARC_LEXER_TOKEN_PERIOD_TAG "PERIOD"
|
|
#define ARC_LEXER_TOKEN_FORWARD_SLASH_ID 10
|
|
#define ARC_LEXER_TOKEN_FORWARD_SLASH_CHAR '/'
|
|
#define ARC_LEXER_TOKEN_FORWARD_SLASH_TAG "FORWARD_SLASH"
|
|
#define ARC_LEXER_TOKEN_BACK_SLASH_ID 11
|
|
#define ARC_LEXER_TOKEN_BACK_SLASH_CHAR '\\'
|
|
#define ARC_LEXER_TOKEN_BACK_SLASH_TAG "BACK_SLASH"
|
|
#define ARC_LEXER_TOKEN_LEFT_PARENTHESIS_ID 12
|
|
#define ARC_LEXER_TOKEN_LEFT_PARENTHESIS_CHAR '('
|
|
#define ARC_LEXER_TOKEN_LEFT_PARENTHESIS_TAG "LEFT_PARENTHESIS"
|
|
#define ARC_LEXER_TOKEN_RIGHT_PARENTHESIS_ID 13
|
|
#define ARC_LEXER_TOKEN_RIGHT_PARENTHESIS_CHAR ')'
|
|
#define ARC_LEXER_TOKEN_RIGHT_PARENTHESIS_TAG "RIGHT_PARENTHESIS"
|
|
#define ARC_LEXER_TOKEN_LEFT_CURLY_BRACE_ID 14
|
|
#define ARC_LEXER_TOKEN_LEFT_CURLY_BRACE_CHAR '{'
|
|
#define ARC_LEXER_TOKEN_LEFT_CURLY_BRACE_TAG "LEFT_CURLY_BRACE"
|
|
#define ARC_LEXER_TOKEN_RIGHT_CURLY_BRACE_ID 15
|
|
#define ARC_LEXER_TOKEN_RIGHT_CURLY_BRACE_CHAR '}'
|
|
#define ARC_LEXER_TOKEN_RIGHT_CURLY_BRACE_TAG "RIGHT_CURLY_BRACE"
|
|
#define ARC_LEXER_TOKEN_BANG_ID 16
|
|
#define ARC_LEXER_TOKEN_BANG_CHAR '!'
|
|
#define ARC_LEXER_TOKEN_BANG_TAG "BANG"
|
|
#define ARC_LEXER_TOKEN_AT_ID 17
|
|
#define ARC_LEXER_TOKEN_AT_CHAR '!'
|
|
#define ARC_LEXER_TOKEN_AT_TAG "AT"
|
|
#define ARC_LEXER_TOKEN_HASH_ID 18
|
|
#define ARC_LEXER_TOKEN_HASH_CHAR '#'
|
|
#define ARC_LEXER_TOKEN_HASH_TAG "HASH"
|
|
#define ARC_LEXER_TOKEN_PERCENT_ID 19
|
|
#define ARC_LEXER_TOKEN_PERCENT_CHAR '%'
|
|
#define ARC_LEXER_TOKEN_PERCENT_TAG "PERCENT"
|
|
|
|
void ARC_Test_InitBasicLexerTokenRules(ARC_Lexer *lexer){
|
|
//null
|
|
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_NULL, 0));
|
|
|
|
//number
|
|
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharOrBetween(ARC_LEXER_TOKEN_NUMBER, '0', '9'));
|
|
|
|
//alpha char
|
|
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharOrBetween(ARC_LEXER_TOKEN_ALPHA_LOWER_CHAR, 'a', 'z'));
|
|
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharOrBetween(ARC_LEXER_TOKEN_ALPHA_UPPER_CHAR, 'A', 'Z'));
|
|
|
|
//whitespace
|
|
//TODO: fix this
|
|
ARC_String *whitespaceString;
|
|
ARC_String_CreateWithStrlen(&whitespaceString, " \t");
|
|
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharInStringRule(ARC_LEXER_TOKEN_WHITESPACE, whitespaceString));
|
|
ARC_String_Destroy(whitespaceString);
|
|
|
|
//single char tokens
|
|
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_NEWLINE_ID , ARC_LEXER_TOKEN_NEWLINE_CHAR ));
|
|
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_COLON_ID , ARC_LEXER_TOKEN_COLON_CHAR ));
|
|
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_SEMICOLON_ID , ARC_LEXER_TOKEN_SEMICOLON_CHAR ));
|
|
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_COMMA_ID , ARC_LEXER_TOKEN_COMMA_CHAR ));
|
|
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_PERIOD_ID , ARC_LEXER_TOKEN_PERIOD_CHAR ));
|
|
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_FORWARD_SLASH_ID , ARC_LEXER_TOKEN_FORWARD_SLASH_CHAR ));
|
|
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_BACK_SLASH_ID , ARC_LEXER_TOKEN_BACK_SLASH_CHAR ));
|
|
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_LEFT_PARENTHESIS_ID , ARC_LEXER_TOKEN_LEFT_PARENTHESIS_CHAR ));
|
|
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_RIGHT_PARENTHESIS_ID, ARC_LEXER_TOKEN_RIGHT_PARENTHESIS_CHAR));
|
|
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_LEFT_CURLY_BRACE_ID , ARC_LEXER_TOKEN_LEFT_CURLY_BRACE_CHAR ));
|
|
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_RIGHT_CURLY_BRACE_ID, ARC_LEXER_TOKEN_RIGHT_CURLY_BRACE_CHAR));
|
|
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_BANG_ID , ARC_LEXER_TOKEN_BANG_CHAR ));
|
|
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_AT_ID , ARC_LEXER_TOKEN_AT_CHAR ));
|
|
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_HASH_ID , ARC_LEXER_TOKEN_HASH_CHAR ));
|
|
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_LEXER_TOKEN_PERCENT_ID , ARC_LEXER_TOKEN_PERCENT_CHAR ));
|
|
}
|
|
|
|
ARC_TEST(Lexer_Char_Match){
|
|
ARC_Lexer *lexer;
|
|
ARC_Lexer_Create(&lexer);
|
|
|
|
ARC_Test_InitBasicLexerTokenRules(lexer);
|
|
|
|
ARC_String *simple;
|
|
ARC_String_CreateWithStrlen(&simple, "::{}!/.");
|
|
|
|
ARC_Lexer_LexString(lexer, &simple);
|
|
|
|
ARC_LexerToken *token;
|
|
|
|
token = ARC_Lexer_GetToken(lexer, 0);
|
|
ARC_CHECK(token->rule == ARC_LEXER_TOKEN_COLON_ID);
|
|
|
|
token = ARC_Lexer_GetToken(lexer, 1);
|
|
ARC_CHECK(token->rule == ARC_LEXER_TOKEN_COLON_ID);
|
|
|
|
token = ARC_Lexer_GetToken(lexer, 2);
|
|
ARC_CHECK(token->rule == ARC_LEXER_TOKEN_LEFT_CURLY_BRACE_ID);
|
|
|
|
token = ARC_Lexer_GetToken(lexer, 3);
|
|
ARC_CHECK(token->rule == ARC_LEXER_TOKEN_RIGHT_CURLY_BRACE_ID);
|
|
|
|
token = ARC_Lexer_GetToken(lexer, 4);
|
|
ARC_CHECK(token->rule == ARC_LEXER_TOKEN_BANG_ID);
|
|
|
|
token = ARC_Lexer_GetToken(lexer, 5);
|
|
ARC_CHECK(token->rule == ARC_LEXER_TOKEN_FORWARD_SLASH_ID);
|
|
|
|
token = ARC_Lexer_GetToken(lexer, 6);
|
|
ARC_CHECK(token->rule == ARC_LEXER_TOKEN_PERIOD_ID);
|
|
|
|
ARC_Lexer_Destroy(lexer);
|
|
}
|
|
|
|
ARC_TEST(Lexer_Check_Id_Basic){
|
|
ARC_Lexer *lexer;
|
|
ARC_Lexer_Create(&lexer);
|
|
|
|
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(0, 0 ));
|
|
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(1, ':'));
|
|
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(2, ':'));
|
|
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(3, ':'));
|
|
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(4, ':'));
|
|
|
|
ARC_CHECK(ARC_Lexer_IsContinious(lexer) == ARC_True);
|
|
|
|
ARC_CHECK(ARC_Lexer_IsTokenId(lexer, 0) == ARC_True );
|
|
ARC_CHECK(ARC_Lexer_IsTokenId(lexer, 7) == ARC_False);
|
|
ARC_CHECK(ARC_Lexer_IsTokenId(lexer, 2) == ARC_True );
|
|
ARC_CHECK(ARC_Lexer_IsTokenId(lexer, 4) == ARC_True );
|
|
ARC_CHECK(ARC_Lexer_IsTokenId(lexer, 5) == ARC_False);
|
|
|
|
ARC_Lexer_Destroy(lexer);
|
|
}
|
|
|
|
ARC_TEST(Lexer_Check_Id_Unordered_But_Continious){
|
|
ARC_Lexer *lexer;
|
|
ARC_Lexer_Create(&lexer);
|
|
|
|
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(2, ':'));
|
|
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(0, 0 ));
|
|
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(3, ':'));
|
|
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(1, ':'));
|
|
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(4, ':'));
|
|
|
|
ARC_CHECK(ARC_Lexer_IsContinious(lexer) == ARC_True);
|
|
|
|
ARC_CHECK(ARC_Lexer_IsTokenId(lexer, 0) == ARC_True );
|
|
ARC_CHECK(ARC_Lexer_IsTokenId(lexer, 7) == ARC_False);
|
|
ARC_CHECK(ARC_Lexer_IsTokenId(lexer, 2) == ARC_True );
|
|
ARC_CHECK(ARC_Lexer_IsTokenId(lexer, 4) == ARC_True );
|
|
ARC_CHECK(ARC_Lexer_IsTokenId(lexer, 5) == ARC_False);
|
|
|
|
ARC_Lexer_Destroy(lexer);
|
|
}
|
|
|
|
ARC_TEST(Lexer_Check_Id_Unordered_Not_Continious){
|
|
ARC_Lexer *lexer;
|
|
ARC_Lexer_Create(&lexer);
|
|
|
|
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(2, ':'));
|
|
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(8, 0 ));
|
|
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(3, ':'));
|
|
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(1, ':'));
|
|
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(4, ':'));
|
|
|
|
ARC_CHECK(ARC_Lexer_IsContinious(lexer) == ARC_False);
|
|
|
|
ARC_CHECK(ARC_Lexer_IsTokenId(lexer, 8) == ARC_True );
|
|
ARC_CHECK(ARC_Lexer_IsTokenId(lexer, 7) == ARC_False);
|
|
ARC_CHECK(ARC_Lexer_IsTokenId(lexer, 2) == ARC_True );
|
|
ARC_CHECK(ARC_Lexer_IsTokenId(lexer, 4) == ARC_True );
|
|
ARC_CHECK(ARC_Lexer_IsTokenId(lexer, 5) == ARC_False);
|
|
|
|
ARC_Lexer_Destroy(lexer);
|
|
}
|