This commit is contained in:
herbglitch 2024-11-12 13:07:40 -07:00
commit 2d70208978
5 changed files with 154 additions and 94 deletions

View file

@ -64,6 +64,7 @@ set(ARCHEUS_STD_SOURCES
src/std/io.c src/std/io.c
src/std/lexer.c src/std/lexer.c
src/std/parser.c src/std/parser.c
src/std/parser/parserlang.c
src/std/queue.c src/std/queue.c
src/std/stack.c src/std/stack.c
src/std/string.c src/std/string.c
@ -130,6 +131,7 @@ if(ARCHEUS_STD_TESTS)
#tests/std/vector.c #tests/std/vector.c
tests/std/lexer.c tests/std/lexer.c
tests/std/parser.c tests/std/parser.c
tests/std/temp_parserlang.c
${ARCHEUS_STD_SOURCES} ${ARCHEUS_STD_SOURCES}
) )

View file

@ -5,9 +5,35 @@
extern "C" { extern "C" {
#endif #endif
#include <arc/std/lexer.h> #include "arc/std/parser.h"
#include <arc/std/parser.h>
#include <arc/std/string.h> /*
<line> -> <body> NEWLINE <line> | <body> | NEWLINE <line> | LAMBDA
<body> -> <tag> WHITESPACE ARROW WHITESPACE <arguments>
<arguments> -> <argument> WHITESPACE OR WHITESPACE <arguments> | <tagOrConstant>
<argument> -> <tagOrConstant> WHITESPACE <argument> | <tagOrConstant>
<tagOrConstant> -> <parserLangageTag> | <constant>
<constant> -> ALPHA_UPPER_CHAR <constantBody>
<constantBody> -> <constantChar> <constantBody> | LAMBDA
<constantChar> -> ALPHA_UPPER_CHAR | UNDERSCORE
<tag> -> LESS_THAN <variable> GREATER_THAN
<variable> -> <alphaChar> <variableBody> | UNDERSCORE <variableBody>
<variableBody> -> <variableChar> <variableBody> | LAMBDA
<variableChar> -> <alphaChar> | NUMBER | UNDERSCORE
<alphaChar> -> ALPHA_LOWER_CHAR | ALPHA_UPPER_CHAR
*/
/*
* @brief creates a parser for the Parser Lang
*
* @note the rules will be inited for the parser lang
*
* @param[out] parser the parser to create
*/
void ARC_Parser_CreateAsParserLang(ARC_Parser **parser);
#define ARC_PARSERLANG_TOKEN_NULL 0 #define ARC_PARSERLANG_TOKEN_NULL 0
#define ARC_PARSERLANG_TOKEN_NUMBER 1 #define ARC_PARSERLANG_TOKEN_NUMBER 1
@ -44,91 +70,6 @@ extern "C" {
#define ARC_PARSERLANG_VARIABLE_CHAR 22 #define ARC_PARSERLANG_VARIABLE_CHAR 22
#define ARC_PARSERLANG_ALPHA_CHAR 23 #define ARC_PARSERLANG_ALPHA_CHAR 23
void ARC_Language_InitLexerRulesFn(ARC_Lexer *lexer){
//null
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_PARSERLANG_TOKEN_NULL, 0));
//number
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharOrBetween(ARC_PARSERLANG_TOKEN_NUMBER, '0', '9'));
//alpha char
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharOrBetween(ARC_PARSERLANG_TOKEN_ALPHA_LOWER_CHAR, 'a', 'z'));
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharOrBetween(ARC_PARSERLANG_TOKEN_ALPHA_UPPER_CHAR, 'A', 'Z'));
//whitespace
ARC_String *whitespaceString;
ARC_String_CreateWithStrlen(&whitespaceString, " \t");
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharInStringRule(ARC_LEXER_TOKEN_WHITESPACE, whitespaceString));
ARC_String_Destroy(whitespaceString);
//single char tokens
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_PARSERLANG_TOKEN_NEWLINE_ID , ARC_PARSERLANG_TOKEN_NEWLINE_CHAR ));
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_PARSERLANG_TOKEN_LESS_THAN_ID , ARC_PARSERLANG_TOKEN_LESS_THAN_CHAR ));
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_PARSERLANG_TOKEN_GREATER_THAN_ID, ARC_PARSERLANG_TOKEN_GREATER_THAN_CHAR));
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_PARSERLANG_TOKEN_OR_ID , ARC_PARSERLANG_TOKEN_OR_CHAR ));
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_PARSERLANG_TOKEN_UNDERSCORE_ID , ARC_PARSERLANG_TOKEN_UNDERSCORE_CHAR ));
//arrow
ARC_String *arrowString;
ARC_String_CreateWithStrlen(&arrowString, ARC_PARSERLANG_TOKEN_ARROW_CSTRING);
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchStringRule(ARC_PARSERLANG_TOKEN_ARROW_ID, arrowString));
ARC_String_Destroy(arrowString);
}
/*
<line> -> <body> NEWLINE <line> | <body> | NEWLINE <line> | LAMBDA
<body> -> <tag> WHITESPACE ARROW <arguments>
<arguments> -> <argument> WHITESPACE OR WHITESPACE <arguments> | <tagOrConstant>
<argument> -> <tagOrConstant> WHITESPACE <argument> | <tagOrConstant>
<tagOrConstant> -> <parserLangageTag> | <constant>
<constant> -> ALPHA_UPPER_CHAR <constantBody>
<constantBody> -> <constantChar> <constantBody> | LAMBDA
<constantChar> -> ALPHA_UPPER_CHAR | UNDERSCORE
<tag> -> LESS_THAN <variable> GREATER_THAN
<variable> -> <noCaseAlphaChar> <variableBody> | UNDERSCORE <variableBody>
<variableBody> -> <variableChar> <variableBody> | LAMBDA
<variableChar> -> <alphaChar> | NUMBER | UNDERSCORE
<alphaChar> -> ALPHA_LOWER_CHAR | ALPHA_UPPER_CHAR
*/
//<variableBody> -> <variableChar> <variableBody> | LAMBDA
uint32_t *variableBody[] = { (uint32_t[]){ 2, ARC_PARSERLANG_VARIABLE_CHAR, ARC_PARSERLANG_VARIABLE_BODY }, (uint32_t[]){ 1, ARC_PARSERLANG_LAMBDA }};
//<variableChar> -> <alphaChar> | NUMBER | UNDERSCORE
uint32_t *variableChar[] = { (uint32_t[]){ 1, ARC_PARSERLANG_ALPHA_CHAR }, (uint32_t[]){ 1, ARC_PARSERLANG_TOKEN_NUMBER }, (uint32_t[]){ 1, ARC_PARSERLANG_TOKEN_NUMBER }};
//<alphaChar> -> ALPHA_LOWER_CHAR | ALPHA_UPPER_CHAR
uint32_t *alphaChar[] = { (uint32_t[]){ 1, ARC_PARSERLANG_TOKEN_ALPHA_LOWER_CHAR }, (uint32_t[]){ 1, ARC_PARSERLANG_TOKEN_ALPHA_UPPER_CHAR }};
/*
ARC_ParserLanguageTag testTags[3] = {
{
VARIABLE, //tagId
variableTokensOrTags, //tokensOrTags
1 //tokenOrTagsSize
},
{
VARIABLE_NAME, //tagId
variableNameTags, //tokensOrTags
2 //tokenOrTagsSize
},
{
CHAR_OR_NUM, //tagId
charOrNumTokens, //tokensOrTags
2 //tokenOrTagsSize
}
};
ARC_Array languageArray = {
3, //size
testTags //data
};
*/
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

View file

@ -176,11 +176,6 @@ void ARC_Lexer_LexString(ARC_Lexer *lexer, ARC_String **data){
//check if the token rule is found //check if the token rule is found
ARC_LexerTokenRule *tokenRule = ARC_Vector_Get(lexer->tokenRules, index); ARC_LexerTokenRule *tokenRule = ARC_Vector_Get(lexer->tokenRules, index);
//set the last token length if the last token had a length
if(tokenLength > 0){
lastTokenLength = tokenLength;
}
//tokenData should only exist if tokenLength is ARC_True as stated in the header //tokenData should only exist if tokenLength is ARC_True as stated in the header
ARC_String *tokenData; ARC_String *tokenData;
tokenLength = tokenRule->automataFn(&tokenData, *data, tokenRule->automataData); tokenLength = tokenRule->automataFn(&tokenData, *data, tokenRule->automataData);
@ -201,6 +196,9 @@ void ARC_Lexer_LexString(ARC_Lexer *lexer, ARC_String **data){
token = (ARC_LexerToken *)malloc(sizeof(ARC_LexerToken)); token = (ARC_LexerToken *)malloc(sizeof(ARC_LexerToken));
token->rule = tokenRule->id; token->rule = tokenRule->id;
token->data = tokenData; token->data = tokenData;
//update the last found tokenLength to the max length
lastTokenLength = tokenLength;
} }
} }
@ -415,6 +413,7 @@ ARC_LexerTokenRule ARC_LexerTokenRule_CreateAndReturnMatchCharOrBetween(uint32_t
//return the created tokenRule //return the created tokenRule
return tokenRule; return tokenRule;
} }
//private function to free automataData stored as an ARC_String //private function to free automataData stored as an ARC_String
void ARC_LexerTokenRule_DestroyStringAutomataDataFn(void *automataData){ void ARC_LexerTokenRule_DestroyStringAutomataDataFn(void *automataData){
ARC_String_Destroy((ARC_String *)automataData); ARC_String_Destroy((ARC_String *)automataData);

View file

@ -0,0 +1,99 @@
#include "arc/std/parser/parserlang.h"
#include "arc/std/lexer.h"
#include "arc/std/parser.h"
#include "arc/std/string.h"
void ARC_ParserLang_InitLexerRulesFn(ARC_Lexer *lexer){
//null
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_PARSERLANG_TOKEN_NULL, 0));
//number
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharOrBetween(ARC_PARSERLANG_TOKEN_NUMBER, '0', '9'));
//alpha char
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharOrBetween(ARC_PARSERLANG_TOKEN_ALPHA_LOWER_CHAR, 'a', 'z'));
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharOrBetween(ARC_PARSERLANG_TOKEN_ALPHA_UPPER_CHAR, 'A', 'Z'));
//whitespace
ARC_String *whitespaceString;
ARC_String_CreateWithStrlen(&whitespaceString, " \t");
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharInStringRule(ARC_LEXER_TOKEN_WHITESPACE, whitespaceString));
ARC_String_Destroy(whitespaceString);
//single char tokens
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_PARSERLANG_TOKEN_NEWLINE_ID , ARC_PARSERLANG_TOKEN_NEWLINE_CHAR ));
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_PARSERLANG_TOKEN_LESS_THAN_ID , ARC_PARSERLANG_TOKEN_LESS_THAN_CHAR ));
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_PARSERLANG_TOKEN_GREATER_THAN_ID, ARC_PARSERLANG_TOKEN_GREATER_THAN_CHAR));
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_PARSERLANG_TOKEN_OR_ID , ARC_PARSERLANG_TOKEN_OR_CHAR ));
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchCharRule(ARC_PARSERLANG_TOKEN_UNDERSCORE_ID , ARC_PARSERLANG_TOKEN_UNDERSCORE_CHAR ));
//arrow
ARC_String *arrowString;
ARC_String_CreateWithStrlen(&arrowString, ARC_PARSERLANG_TOKEN_ARROW_CSTRING);
ARC_Lexer_RegisterTokenRule(lexer, ARC_LexerTokenRule_CreateAndReturnMatchStringRule(ARC_PARSERLANG_TOKEN_ARROW_ID, arrowString));
ARC_String_Destroy(arrowString);
}
void ARC_Parser_CreateAsParserLang(ARC_Parser **parser){
//<line> -> <body> NEWLINE <line> | <body> | NEWLINE <line> | LAMBDA
uint32_t *line[] = { (uint32_t[]){ 3, ARC_PARSERLANG_BODY, ARC_PARSERLANG_TOKEN_NEWLINE_ID, ARC_PARSERLANG_TOKEN_ARROW_ID }, (uint32_t[]){ 1, ARC_PARSERLANG_BODY }, (uint32_t[]){ 2, ARC_PARSERLANG_TOKEN_NEWLINE_ID, ARC_PARSERLANG_LINE }, (uint32_t[]){ 1, ARC_PARSERLANG_LAMBDA } };
//<body> -> <tag> WHITESPACE ARROW WHITESPACE <arguments>
uint32_t *body[] = { (uint32_t[]){ 5, ARC_PARSERLANG_TAG, ARC_PARSERLANG_TOKEN_WHITESPACE, ARC_PARSERLANG_TOKEN_ARROW_ID, ARC_PARSERLANG_TOKEN_WHITESPACE, ARC_PARSERLANG_ARGUMENTS } };
//<arguments> -> <argument> WHITESPACE OR WHITESPACE <arguments> | <tagOrConstant>
uint32_t *arguments[] = { (uint32_t[]){ 5, ARC_PARSERLANG_ARGUMENT, ARC_PARSERLANG_TOKEN_WHITESPACE, ARC_PARSERLANG_TOKEN_OR_ID, ARC_PARSERLANG_TOKEN_WHITESPACE, ARC_PARSERLANG_ARGUMENTS }, (uint32_t[]){ 1, ARC_PARSERLANG_TAG_OR_CONSTANT } };
//<argument> -> <tagOrConstant> WHITESPACE <argument> | <tagOrConstant>
uint32_t *argument[] = { (uint32_t[]){ 3, ARC_PARSERLANG_TAG_OR_CONSTANT, ARC_PARSERLANG_TOKEN_WHITESPACE, ARC_PARSERLANG_ARGUMENT }, (uint32_t[]){ 1, ARC_PARSERLANG_TAG_OR_CONSTANT } };
//<tagOrConstant> -> <parserLangageTag> | <constant>
uint32_t *tagOrConstant[] = { (uint32_t[]){ 1, ARC_PARSERLANG_TAG }, (uint32_t[]){ 1, ARC_PARSERLANG_CONSTANT } };
//<constant> -> ALPHA_UPPER_CHAR <constantBody>
uint32_t *constant[] = { (uint32_t[]){ 2, ARC_PARSERLANG_TOKEN_ALPHA_UPPER_CHAR, ARC_PARSERLANG_CONSTANT_BODY } };
//<constantBody> -> <constantChar> <constantBody> | LAMBDA
uint32_t *constantBody[] = { (uint32_t[]){ 2, ARC_PARSERLANG_CONSTANT_CHAR, ARC_PARSERLANG_CONSTANT_BODY }, (uint32_t[]){ 1, ARC_PARSERLANG_LAMBDA } };
//<constantChar> -> ALPHA_UPPER_CHAR | UNDERSCORE
uint32_t *constantChar[] = { (uint32_t[]){ 2, ARC_PARSERLANG_TOKEN_ALPHA_UPPER_CHAR, ARC_PARSERLANG_TOKEN_UNDERSCORE_ID } };
//<tag> -> LESS_THAN <variable> GREATER_THAN
uint32_t *tag[] = { (uint32_t[]){ 3, ARC_PARSERLANG_TOKEN_LESS_THAN_ID, ARC_PARSERLANG_VARIABLE, ARC_PARSERLANG_TOKEN_GREATER_THAN_ID } };
//<variable> -> <alphaChar> <variableBody> | UNDERSCORE <variableBody>
uint32_t *variable[] = { (uint32_t[]){ 2, ARC_PARSERLANG_ALPHA_CHAR, ARC_PARSERLANG_VARIABLE_BODY }, (uint32_t[]){ 2, ARC_PARSERLANG_TOKEN_UNDERSCORE_ID, ARC_PARSERLANG_VARIABLE_BODY } };
//<variableBody> -> <variableChar> <variableBody> | LAMBDA
uint32_t *variableBody[] = { (uint32_t[]){ 2, ARC_PARSERLANG_VARIABLE_CHAR, ARC_PARSERLANG_VARIABLE_BODY }, (uint32_t[]){ 1, ARC_PARSERLANG_LAMBDA } };
//<variableChar> -> <alphaChar> | NUMBER | UNDERSCORE
uint32_t *variableChar[] = { (uint32_t[]){ 1, ARC_PARSERLANG_ALPHA_CHAR }, (uint32_t[]){ 1, ARC_PARSERLANG_TOKEN_NUMBER }, (uint32_t[]){ 1, ARC_PARSERLANG_TOKEN_UNDERSCORE_ID } };
//<alphaChar> -> ALPHA_LOWER_CHAR | ALPHA_UPPER_CHAR
uint32_t *alphaChar[] = { (uint32_t[]){ 1, ARC_PARSERLANG_TOKEN_ALPHA_LOWER_CHAR }, (uint32_t[]){ 1, ARC_PARSERLANG_TOKEN_ALPHA_UPPER_CHAR }};
ARC_ParserLanguageTag parserLangTags[13] = {
{ ARC_PARSERLANG_LINE , line , 4 },
{ ARC_PARSERLANG_BODY , body , 1 },
{ ARC_PARSERLANG_ARGUMENTS , arguments , 2 },
{ ARC_PARSERLANG_ARGUMENT , argument , 2 },
{ ARC_PARSERLANG_TAG_OR_CONSTANT, tagOrConstant, 2 },
{ ARC_PARSERLANG_CONSTANT , constant , 1 },
{ ARC_PARSERLANG_CONSTANT_BODY , constantBody , 2 },
{ ARC_PARSERLANG_CONSTANT_CHAR , constantChar , 2 },
{ ARC_PARSERLANG_TAG , tag , 1 },
{ ARC_PARSERLANG_VARIABLE , variable , 2 },
{ ARC_PARSERLANG_VARIABLE_BODY , variableBody , 2 },
{ ARC_PARSERLANG_VARIABLE_CHAR , variableChar , 3 },
{ ARC_PARSERLANG_ALPHA_CHAR , alphaChar , 2 }
};
ARC_Array parserLanguageArray = {
13, //size
parserLangTags //data
};
ARC_Parser_Create(parser, &parserLanguageArray, ARC_ParserLang_InitLexerRulesFn);
}

View file

@ -1,8 +1,8 @@
#include "../test.h" #include "../test.h"
#include "arc/std/errno.h" #include "arc/std/errno.h"
#include "arc/std/parser.h" #include "arc/std/parser.h"
#include "arc/std/parser/parserlang.h"
//TODO: fix lambda
#define LAMBDA ARC_PARSER_TAG_LAMBDA #define LAMBDA ARC_PARSER_TAG_LAMBDA
#define CHAR ARC_LEXER_TOKEN_ALPHALOWERCHAR #define CHAR ARC_LEXER_TOKEN_ALPHALOWERCHAR
#define NUM ARC_LEXER_TOKEN_NUMBER #define NUM ARC_LEXER_TOKEN_NUMBER
@ -150,4 +150,23 @@ ARC_TEST(Parser_Basic_ParseError){
ARC_Parser_Parse(parser, &tempString); ARC_Parser_Parse(parser, &tempString);
ARC_CHECK(arc_errno == ARC_ERRNO_DATA); ARC_CHECK(arc_errno == ARC_ERRNO_DATA);
//reset for next test
arc_errno = 0;
} }
//ARC_TEST(Parser_ParserLang_BasicTest){
// ARC_Parser *parser;
// ARC_Parser_CreateAsParserLang(&parser);
//
// ARC_String *tempString;
// ARC_String_CreateWithStrlen(&tempString, "<test> -> <testingStuffs>\n");
//
// //this destroys string, so no need for cleanup
// ARC_Parser_Parse(parser, &tempString);
//
// ARC_Parser_Destroy(parser);
//
// ARC_CHECK(arc_errno == 0);
//}
//