parser mostly fixed, still needs a lot more testing though, and need to store parsed values in a datatype

This commit is contained in:
herbglitch 2024-11-01 04:39:45 -06:00
parent 963b99c6bd
commit 006b4c63f7
5 changed files with 49 additions and 13 deletions

View file

@ -99,6 +99,15 @@ void ARC_Lexer_LexString(ARC_Lexer *lexer, ARC_String **data);
*/
void ARC_Lexer_LexFile(ARC_Lexer *lexer, ARC_String *path);
/**
* @brief prints rule id and hex of the function name
*
* @note this is mostly used for debugging
*
* @param[in] lexer the lexer to print rules from
*/
void ARC_Lexer_PrintTokenRules(ARC_Lexer *lexer);
/**
* @brief gets a token at a given index from a lexer
*

View file

@ -90,6 +90,11 @@ void ARC_Parser_Parse(ARC_Parser *parser, ARC_String **data);
*/
void ARC_Parser_ParseFile(ARC_Parser *parser, ARC_String *path);
/**
* @brief basic tag for letting the parser know it is ok to end
*/
#define ARC_PARSER_TAG_LAMBDA 0
#ifdef __cplusplus
}
#endif

View file

@ -257,6 +257,13 @@ void ARC_Lexer_LexFile(ARC_Lexer *lexer, ARC_String *path){
}
}
void ARC_Lexer_PrintTokenRules(ARC_Lexer *lexer){
for(uint32_t index = 0; index < ARC_Vector_GetSize(lexer->tokenRules); index++){
ARC_LexerTokenRule *tokenRule = ARC_Vector_Get(lexer->tokenRules, index);
printf("Rule: %02i\tFunction: %p\n", tokenRule->id, tokenRule->automataFn);
}
}
ARC_LexerToken ARC_Lexer_GetToken(ARC_Lexer *lexer, uint32_t index){
//get the token and log if there is an error
ARC_LexerToken *token = ARC_Vector_Get(lexer->tokens, index);

View file

@ -72,19 +72,29 @@ ARC_Bool ARC_Parser_ParseTag(ARC_Parser *parser, uint32_t *lexerIndex, uint32_t
ARC_Bool foundRule = ARC_True;
for(uint32_t tokenOrTagIndex = 1; tokenOrTagIndex < tag->tokensOrTags[orIndex][0] + 1; tokenOrTagIndex++){
//check if it is lambda (can return safely)
if(tag->tokensOrTags[orIndex][tokenOrTagIndex] == 0){
return ARC_True;
if(tag->tokensOrTags[orIndex][tokenOrTagIndex] == ARC_PARSER_TAG_LAMBDA){
break;
}
//if the value isn't a token it is a tag, so recurs if it isn't a token
ARC_Bool isToken = ARC_Lexer_IsTokenId(parser->lexer, tag->tokensOrTags[orIndex][tokenOrTagIndex]);
uint32_t nextTagId = tag->tokensOrTags[orIndex][tokenOrTagIndex];
if(isToken == ARC_False){
//check if the tag works if not break to continue checking next or
ARC_Bool tagFound = ARC_Parser_ParseTag(parser, lexerIndex, nextTagId);
if(tagFound == ARC_False){
uint32_t nextTagId = tag->tokensOrTags[orIndex][tokenOrTagIndex];
foundRule = ARC_Parser_ParseTag(parser, lexerIndex, nextTagId);
if(foundRule == ARC_False){
break;
}
//this will probably never be called as lambda is usually the last instruction, but just in case we can continue instead of break
continue;
}
//check if there is another token that can be used
if(lexerCheckIndex >= ARC_Lexer_GetTokensSize(parser->lexer)){
//out of tokens to the current or does not work, so break
foundRule = ARC_False;
break;
}
//get the next token in the lexer and increment the lexers index
@ -106,9 +116,7 @@ ARC_Bool ARC_Parser_ParseTag(ARC_Parser *parser, uint32_t *lexerIndex, uint32_t
}
}
//no rule was found, so set an error and log
arc_errno = ARC_ERRNO_DATA;
ARC_DEBUG_LOG_ERROR_WITH_VARIABLES("ARC_Parser_ParseTag(parser, lexerIndex, tagId), tag id: %u could not find a matching rule at token index %u", tagId, *lexerIndex);
//no rule was found, so return false
return ARC_False;
}
@ -131,11 +139,14 @@ void ARC_Parser_Parse(ARC_Parser *parser, ARC_String **data){
uint32_t lexerIndex = 0;
ARC_ParserLanguageTag *startTag = parser->language.data;
//TODO: handle error checks for if parsing fails
//recursivly parse from the inital start tag
ARC_Parser_ParseTag(parser, &lexerIndex, startTag->tagId);
ARC_Bool parsed = ARC_Parser_ParseTag(parser, &lexerIndex, startTag->tagId);
ARC_Bool allTokensParsed = lexerIndex == ARC_Lexer_GetTokensSize(parser->lexer);
ARC_Lexer_Clear(parser->lexer);
if(arc_errno){
ARC_DEBUG_LOG_ERROR("ARC_Parser_Parse(parser, data), could not parse the given data");
if(parsed == ARC_False || allTokensParsed == ARC_False || arc_errno){
arc_errno = ARC_ERRNO_DATA;
ARC_DEBUG_LOG_ERROR_WITH_VARIABLES("ARC_Parser_Parse(parser, data), could not parse the given data at lexer index: %u", lexerIndex);
return;
}
}

View file

@ -3,7 +3,7 @@
#include "arc/std/parser.h"
//TODO: fix lambda
#define LAMBDA 0
#define LAMBDA ARC_PARSER_TAG_LAMBDA
#define CHAR ARC_LEXER_TOKEN_ALPHALOWERCHAR
#define NUM ARC_LEXER_TOKEN_NUMBER
#define CHAR_OR_NUM 23
@ -82,7 +82,8 @@ ARC_TEST(Parser_Basic_Parse){
ARC_Parser_Create(&parser, &languageArray, TEST_Parser_InitLexerRulesFn);
ARC_String *tempString;
ARC_String_CreateWithStrlen(&tempString, "myVar1");
//ARC_String_CreateWithStrlen(&tempString, "myvar1");
ARC_String_CreateWithStrlen(&tempString, "m");
//this destroys string, so no need for cleanup
ARC_Parser_Parse(parser, &tempString);
@ -119,6 +120,9 @@ ARC_TEST(Parser_Basic_ParseError){
testTags //data
};
//TODO: remove this
arc_errno = 0;
ARC_Parser_Create(&parser, &languageArray, TEST_Parser_InitLexerRulesFn);
ARC_String *tempString;