parser mostly fixed, still needs a lot more testing though, and need to store parsed values in a datatype
This commit is contained in:
parent
963b99c6bd
commit
006b4c63f7
5 changed files with 49 additions and 13 deletions
|
|
@ -99,6 +99,15 @@ void ARC_Lexer_LexString(ARC_Lexer *lexer, ARC_String **data);
|
|||
*/
|
||||
void ARC_Lexer_LexFile(ARC_Lexer *lexer, ARC_String *path);
|
||||
|
||||
/**
|
||||
* @brief prints rule id and hex of the function name
|
||||
*
|
||||
* @note this is mostly used for debugging
|
||||
*
|
||||
* @param[in] lexer the lexer to print rules from
|
||||
*/
|
||||
void ARC_Lexer_PrintTokenRules(ARC_Lexer *lexer);
|
||||
|
||||
/**
|
||||
* @brief gets a token at a given index from a lexer
|
||||
*
|
||||
|
|
|
|||
|
|
@ -90,6 +90,11 @@ void ARC_Parser_Parse(ARC_Parser *parser, ARC_String **data);
|
|||
*/
|
||||
void ARC_Parser_ParseFile(ARC_Parser *parser, ARC_String *path);
|
||||
|
||||
/**
|
||||
* @brief basic tag for letting the parser know it is ok to end
|
||||
*/
|
||||
#define ARC_PARSER_TAG_LAMBDA 0
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -257,6 +257,13 @@ void ARC_Lexer_LexFile(ARC_Lexer *lexer, ARC_String *path){
|
|||
}
|
||||
}
|
||||
|
||||
void ARC_Lexer_PrintTokenRules(ARC_Lexer *lexer){
|
||||
for(uint32_t index = 0; index < ARC_Vector_GetSize(lexer->tokenRules); index++){
|
||||
ARC_LexerTokenRule *tokenRule = ARC_Vector_Get(lexer->tokenRules, index);
|
||||
printf("Rule: %02i\tFunction: %p\n", tokenRule->id, tokenRule->automataFn);
|
||||
}
|
||||
}
|
||||
|
||||
ARC_LexerToken ARC_Lexer_GetToken(ARC_Lexer *lexer, uint32_t index){
|
||||
//get the token and log if there is an error
|
||||
ARC_LexerToken *token = ARC_Vector_Get(lexer->tokens, index);
|
||||
|
|
|
|||
|
|
@ -72,19 +72,29 @@ ARC_Bool ARC_Parser_ParseTag(ARC_Parser *parser, uint32_t *lexerIndex, uint32_t
|
|||
ARC_Bool foundRule = ARC_True;
|
||||
for(uint32_t tokenOrTagIndex = 1; tokenOrTagIndex < tag->tokensOrTags[orIndex][0] + 1; tokenOrTagIndex++){
|
||||
//check if it is lambda (can return safely)
|
||||
if(tag->tokensOrTags[orIndex][tokenOrTagIndex] == 0){
|
||||
return ARC_True;
|
||||
if(tag->tokensOrTags[orIndex][tokenOrTagIndex] == ARC_PARSER_TAG_LAMBDA){
|
||||
break;
|
||||
}
|
||||
|
||||
//if the value isn't a token it is a tag, so recurs if it isn't a token
|
||||
ARC_Bool isToken = ARC_Lexer_IsTokenId(parser->lexer, tag->tokensOrTags[orIndex][tokenOrTagIndex]);
|
||||
uint32_t nextTagId = tag->tokensOrTags[orIndex][tokenOrTagIndex];
|
||||
if(isToken == ARC_False){
|
||||
//check if the tag works if not break to continue checking next or
|
||||
ARC_Bool tagFound = ARC_Parser_ParseTag(parser, lexerIndex, nextTagId);
|
||||
if(tagFound == ARC_False){
|
||||
uint32_t nextTagId = tag->tokensOrTags[orIndex][tokenOrTagIndex];
|
||||
foundRule = ARC_Parser_ParseTag(parser, lexerIndex, nextTagId);
|
||||
if(foundRule == ARC_False){
|
||||
break;
|
||||
}
|
||||
|
||||
//this will probably never be called as lambda is usually the last instruction, but just in case we can continue instead of break
|
||||
continue;
|
||||
}
|
||||
|
||||
//check if there is another token that can be used
|
||||
if(lexerCheckIndex >= ARC_Lexer_GetTokensSize(parser->lexer)){
|
||||
//out of tokens to the current or does not work, so break
|
||||
foundRule = ARC_False;
|
||||
break;
|
||||
}
|
||||
|
||||
//get the next token in the lexer and increment the lexers index
|
||||
|
|
@ -106,9 +116,7 @@ ARC_Bool ARC_Parser_ParseTag(ARC_Parser *parser, uint32_t *lexerIndex, uint32_t
|
|||
}
|
||||
}
|
||||
|
||||
//no rule was found, so set an error and log
|
||||
arc_errno = ARC_ERRNO_DATA;
|
||||
ARC_DEBUG_LOG_ERROR_WITH_VARIABLES("ARC_Parser_ParseTag(parser, lexerIndex, tagId), tag id: %u could not find a matching rule at token index %u", tagId, *lexerIndex);
|
||||
//no rule was found, so return false
|
||||
return ARC_False;
|
||||
}
|
||||
|
||||
|
|
@ -131,11 +139,14 @@ void ARC_Parser_Parse(ARC_Parser *parser, ARC_String **data){
|
|||
uint32_t lexerIndex = 0;
|
||||
ARC_ParserLanguageTag *startTag = parser->language.data;
|
||||
|
||||
//TODO: handle error checks for if parsing fails
|
||||
//recursivly parse from the inital start tag
|
||||
ARC_Parser_ParseTag(parser, &lexerIndex, startTag->tagId);
|
||||
ARC_Bool parsed = ARC_Parser_ParseTag(parser, &lexerIndex, startTag->tagId);
|
||||
ARC_Bool allTokensParsed = lexerIndex == ARC_Lexer_GetTokensSize(parser->lexer);
|
||||
ARC_Lexer_Clear(parser->lexer);
|
||||
if(arc_errno){
|
||||
ARC_DEBUG_LOG_ERROR("ARC_Parser_Parse(parser, data), could not parse the given data");
|
||||
if(parsed == ARC_False || allTokensParsed == ARC_False || arc_errno){
|
||||
arc_errno = ARC_ERRNO_DATA;
|
||||
ARC_DEBUG_LOG_ERROR_WITH_VARIABLES("ARC_Parser_Parse(parser, data), could not parse the given data at lexer index: %u", lexerIndex);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
#include "arc/std/parser.h"
|
||||
|
||||
//TODO: fix lambda
|
||||
#define LAMBDA 0
|
||||
#define LAMBDA ARC_PARSER_TAG_LAMBDA
|
||||
#define CHAR ARC_LEXER_TOKEN_ALPHALOWERCHAR
|
||||
#define NUM ARC_LEXER_TOKEN_NUMBER
|
||||
#define CHAR_OR_NUM 23
|
||||
|
|
@ -82,7 +82,8 @@ ARC_TEST(Parser_Basic_Parse){
|
|||
ARC_Parser_Create(&parser, &languageArray, TEST_Parser_InitLexerRulesFn);
|
||||
|
||||
ARC_String *tempString;
|
||||
ARC_String_CreateWithStrlen(&tempString, "myVar1");
|
||||
//ARC_String_CreateWithStrlen(&tempString, "myvar1");
|
||||
ARC_String_CreateWithStrlen(&tempString, "m");
|
||||
|
||||
//this destroys string, so no need for cleanup
|
||||
ARC_Parser_Parse(parser, &tempString);
|
||||
|
|
@ -119,6 +120,9 @@ ARC_TEST(Parser_Basic_ParseError){
|
|||
testTags //data
|
||||
};
|
||||
|
||||
//TODO: remove this
|
||||
arc_errno = 0;
|
||||
|
||||
ARC_Parser_Create(&parser, &languageArray, TEST_Parser_InitLexerRulesFn);
|
||||
|
||||
ARC_String *tempString;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue