diff --git a/.gitignore b/.gitignore index c7eb1b3..cd76ae7 100644 --- a/.gitignore +++ b/.gitignore @@ -526,4 +526,6 @@ FodyWeavers.xsd doxygen/html [Bb][Uu][Ii][Ll][Dd] -.ccls \ No newline at end of file +.ccls +.vscode +tests/test_error_out.txt diff --git a/CMakeLists.txt b/CMakeLists.txt index 0c56225..55ec589 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -63,6 +63,7 @@ set(ARCHEUS_STD_SOURCES src/std/hashtable.c src/std/io.c src/std/lexer.c + src/std/parser.c src/std/queue.c src/std/stack.c src/std/string.c @@ -127,7 +128,8 @@ if(ARCHEUS_STD_TESTS) tests/test.c #tests/std/vector.c - tests/std/lexer.c + #tests/std/lexer.c + tests/std/parser.c ${ARCHEUS_STD_SOURCES} ) diff --git a/include/arc/std/lexer.h b/include/arc/std/lexer.h index d5743ec..248a710 100644 --- a/include/arc/std/lexer.h +++ b/include/arc/std/lexer.h @@ -41,7 +41,6 @@ typedef uint32_t (* ARC_LexerTokenRule_AutomataFn)(ARC_String **tokenData, ARC_S */ typedef void (* ARC_LexerTokenRule_DestroyAutomataDataFn)(void *automataData); - /** * @brief a lexer token rule type */ @@ -57,7 +56,7 @@ typedef struct ARC_LexerTokenRule { /** * @brief creates an ARC_Lexer type * - * @param[out] lexer + * @param[out] lexer ARC_Lexer to create */ void ARC_Lexer_Create(ARC_Lexer **lexer); diff --git a/include/arc/std/parser.h b/include/arc/std/parser.h index 37657bf..8a4dd8f 100644 --- a/include/arc/std/parser.h +++ b/include/arc/std/parser.h @@ -5,7 +5,10 @@ extern "C" { #endif -#include "arc/std/string.h" +#include "arc/std/array.h" +//#include "arc/std/bool.h" +//#include "arc/std/lexer.h" +#include /** * @brief a parser type @@ -13,17 +16,56 @@ extern "C" { typedef struct ARC_Parser ARC_Parser; /** - * @brief a parser node + * @brief a langue tag type for the parser //TODO: explain this better */ -typedef struct ARC_ParserNode ARC_ParserNode; +typedef struct ARC_ParserLanguageTag { + uint32_t tagId; + + uint32_t **tokensOrTags; + uint32_t tokensOrTagsSize; +} ARC_ParserLanguageTag; + +/** + * @brief a callback function to initialize the lexer the parser uses with rules + * + * @param lexer the lexer used by the parser that rules should be added to +*/ +typedef void (* ARC_Parser_InitLexerRulesFn)(ARC_Lexer *lexer); /** * @brief creates an ARC_Parser type * - * @param[out] parser - * @param[in] language ..., can be NULL + * @TODO: probs want to move the note to another file + * @note array of tokens for langauge? like + * ARC_ParserTag tag = { + * VARIABLE_NAME, //tagId + * { + * { 2, CHAR_OR_NUM, VARIABLE_NAME }, + * { 1, LAMBDA }, + * }, //components + * 2 //componentsSize + * }; + * + * @param[out] parser ARC_Parser to create + * @param[in] language an arry of ARC_ParserLanguageTags defining a langauge + * @param[in] initLexerRulesFn a callback used to initalize the token rules the lexer within the parser will use */ -void ARC_Parser_Create(ARC_Parser **parser, ARC_String *language); +void ARC_Parser_Create(ARC_Parser **parser, ARC_Array *language, ARC_Parser_InitLexerRulesFn initLexerRulesFn); + +/** + * @brief creates an ARC_Parser type from a string + * + * @TODO: probs want to move the note to another file + * @note the syntax looks like: + * -> CHAR EOF + * -> | LAMBDA + * -> CHAR | NUM + * + * @param[out] parser ARC_Parser to create + * @param[in] language an arry of ARC_ParserLanguageTags defining a langauge + * @param[in] initLexerRulesFn a callback used to initalize the token rules the lexer within the parser will use +*/ +void ARC_Parser_CreateFromString(ARC_Parser **parser, ARC_String *languageString, ARC_Parser_InitLexerRulesFn initLexerRulesFn); /** * @brief destroys an ARC_Parser type @@ -33,26 +75,18 @@ void ARC_Parser_Create(ARC_Parser **parser, ARC_String *language); void ARC_Parser_Destroy(ARC_Parser *parser); /** - * @brief sets the definition of the parser, the language itself is parsed and will throw an error if invalid + * @brief * - * @param[in] parser ARC_Parser to set the language to - * @param[in] language the language as a string the parser should use -*/ -void ARC_Parser_SetLanguage(ARC_Parser *parser, ARC_String *language); - -/** - * @brief sets the definition of the parser, the language itself is parsed and will throw an error if invalid - * - * @param[in] parser ARC_Parser to set the language to - * @param[in] language the language as a string the parser should use + * @param[in] parser + * @param[in] language */ void ARC_Parser_Parse(ARC_Parser *parser, ARC_String *data); /** - * @brief sets the definition of the parser, the language itself is parsed and will throw an error if invalid + * @brief * - * @param[in] parser ARC_Parser to set the language to - * @param[in] language the language as a string the parser should use + * @param[in] parser + * @param[in] language */ void ARC_Parser_ParseFile(ARC_Parser *parser, ARC_String *path); @@ -60,4 +94,4 @@ void ARC_Parser_ParseFile(ARC_Parser *parser, ARC_String *path); } #endif -#endif // !ARC_STD_LEXER_H_ +#endif // !ARC_STD_PARSER_H_ diff --git a/include/arc/std/string.h b/include/arc/std/string.h index fc1b281..56d36eb 100644 --- a/include/arc/std/string.h +++ b/include/arc/std/string.h @@ -176,9 +176,18 @@ ARC_Bool ARC_String_SubstringEqualsCString(ARC_String *string, uint64_t offset, * * @param string string to check * - * @return 1 if alphabetic, 0 if not alphabetic + * @return ARC_True if alphabetic, ARC_False if not alphabetic */ -uint8_t ARC_String_Alpha(ARC_String *string); +ARC_Bool ARC_String_IsAlpha(ARC_String *string); + +/** + * @brief checks if string is made out of only numbers + * + * @param string string to check + * + * @return ARC_True if it is numeric, ARC_False if it is not numeric + */ +ARC_Bool ARC_String_IsNumeric(ARC_String *string); /** * @brief converst substring from string to uint64_t diff --git a/src/std/parser.c b/src/std/parser.c index e69de29..26054cb 100644 --- a/src/std/parser.c +++ b/src/std/parser.c @@ -0,0 +1,48 @@ +#include "arc/std/parser.h" +#include "arc/std/lexer.h" +#include +#include +#include + +struct ARC_Parser { + ARC_Array language; + + ARC_Lexer *lexer; +}; + +void ARC_Parser_Create(ARC_Parser **parser, ARC_Array *language, ARC_Parser_InitLexerRulesFn initLexerRulesFn){ + *parser = (ARC_Parser *)malloc(sizeof(ARC_Parser)); + + //set the language size to 0 and data to NULL in case the language is NULL + (*parser)->language.size = 0; + (*parser)->language.data = NULL; + + //if the language exists, copy the language + if(language != NULL){ + (*parser)->language.size = language->size; + (*parser)->language.data = malloc(sizeof(ARC_ParserLanguageTag) * language->size); + + memcpy((*parser)->language.data, language->data, language->size); + } + + //create the lexer + ARC_Lexer_Create(&((*parser)->lexer)); + + //register instructions to the lexer + initLexerRulesFn(((*parser)->lexer)); +} + +void ARC_Parser_CreateFromString(ARC_Parser **parser, ARC_String *languageString, ARC_Parser_InitLexerRulesFn initLexerRulesFn){ +} + +void ARC_Parser_Destroy(ARC_Parser *parser){ + free(parser->language.data); + + ARC_Lexer_Destroy(parser->lexer); + + free(parser); +} + +void ARC_Parser_Parse(ARC_Parser *parser, ARC_String *data); + +void ARC_Parser_ParseFile(ARC_Parser *parser, ARC_String *path); diff --git a/src/std/string.c b/src/std/string.c index 310ccc8..d03e911 100644 --- a/src/std/string.c +++ b/src/std/string.c @@ -173,7 +173,8 @@ ARC_Bool ARC_String_SubstringEqualsCString(ARC_String *string, uint64_t offset, return ARC_True; } -uint8_t ARC_String_Alpha(ARC_String *string){ +//TODO: fix this +ARC_Bool ARC_String_IsAlpha(ARC_String *string){ for(uint64_t length = string->length; length; length--){ if(string->data[length - 1] >= 'a' && string->data[length - 1] <= 'z'){ continue; @@ -183,10 +184,20 @@ uint8_t ARC_String_Alpha(ARC_String *string){ continue; } - return 1; + return ARC_True; } - return 0; + return ARC_False; +} + +ARC_Bool ARC_String_IsNumeric(ARC_String *string){ + for(uint64_t index = 0; index < string->length; index++){ + if(string->data[index] < '0' || string->data[index] > '9'){ + return ARC_False; + } + } + + return ARC_True; } uint64_t ARC_String_ToUint64_t(ARC_String *string){ diff --git a/tests/std/parser.c b/tests/std/parser.c new file mode 100644 index 0000000..8a67599 --- /dev/null +++ b/tests/std/parser.c @@ -0,0 +1,45 @@ +#include "../test.h" +#include "arc/std/parser.h" + +#define LAMBDA 0 +#define CHAR 1 +#define NUM 2 +#define CHAR_OR_NUM 3 +#define VARIABLE_NAME 4 +#define VARIABLE 5 + +ARC_TEST(Lexer_Char_Match){ + ARC_Parser *parser; + + uint32_t *charOrNumTokens[] = { (uint32_t[]){ 1, CHAR }, (uint32_t[]){ 1, NUM } }; + uint32_t *variableNameTags[] = { (uint32_t[]){ 2, CHAR_OR_NUM, VARIABLE_NAME }, (uint32_t[]){ 1, LAMBDA } }; + uint32_t *variableTokensOrTags[] = { (uint32_t[]){ 2, CHAR, VARIABLE_NAME } }; + ARC_ParserLanguageTag testTags[3] = { + { + CHAR_OR_NUM, //tagId + charOrNumTokens, //tokensOrTags + 2 //tokenOrTagsSize + }, + { + VARIABLE_NAME, //tagId + variableNameTags, //tokensOrTags + 2 //tokenOrTagsSize + }, + { + VARIABLE, //tagId + variableTokensOrTags, //tokensOrTags + 1 //tokenOrTagsSize + } + }; + + ARC_Array languageArray = { + 3, //size + testTags //data + }; + + ARC_Parser_Create(&parser, &languageArray); + + ARC_Parser_Destroy(parser); + + ARC_CHECK(1 == 1); +}