archeus/src/std/parser.c

339 lines
13 KiB
C
Raw Normal View History

#include "arc/std/parser.h"
#include "arc/std/bool.h"
2024-10-16 23:46:16 -06:00
#include "arc/std/errno.h"
#include "arc/std/lexer.h"
#include "arc/std/vector.h"
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
struct ARC_Parser {
ARC_Array language;
ARC_Lexer *lexer;
void *data;
void *userData;
ARC_ParserData_CreateFn *createDataFn;
ARC_ParserData_DestroyFn *destroyDataFn;
};
void ARC_Parser_Create(ARC_Parser **parser, ARC_Array *language, ARC_Parser_InitLexerRulesFn initLexerRulesFn, ARC_ParserData_CreateFn *createDataFn, ARC_ParserData_DestroyFn *destroyDataFn, void *userData){
*parser = (ARC_Parser *)malloc(sizeof(ARC_Parser));
//set the language size to 0 and data to NULL in case the language is NULL
(*parser)->language.size = 0;
(*parser)->language.data = NULL;
//if the language exists, copy the language
if(language != NULL){
(*parser)->language.size = language->size;
(*parser)->language.data = malloc(sizeof(ARC_ParserTag) * language->size);
2024-11-12 13:07:23 -07:00
for(uint32_t index = 0; index < language->size; index++){
ARC_ParserTag *languageTag = ((ARC_ParserTag *)language->data) + index;
ARC_ParserTag *currentTag = ((ARC_ParserTag *)(*parser)->language.data) + index;
2024-11-12 13:07:23 -07:00
//copy the language tag into the current tag
currentTag->tagId = languageTag->tagId;
currentTag->tokensOrTagsSize = languageTag->tokensOrTagsSize;
//create place to store tokens or tags
currentTag->tokensOrTags = (uint32_t **)malloc(sizeof(uint32_t *) * languageTag->tokensOrTagsSize);
//copy each or section into the tokensOrTags
for(uint32_t orIndex = 0; orIndex < languageTag->tokensOrTagsSize; orIndex++){
currentTag->tokensOrTags[orIndex] = (uint32_t *)malloc(sizeof(uint32_t) * (languageTag->tokensOrTags[orIndex][0] + 1));
for(uint32_t tokenOrTagIndex = 0; tokenOrTagIndex < languageTag->tokensOrTags[orIndex][0] + 1; tokenOrTagIndex++){
currentTag->tokensOrTags[orIndex][tokenOrTagIndex] = languageTag->tokensOrTags[orIndex][tokenOrTagIndex];
}
}
2024-11-12 13:07:23 -07:00
}
}
//create the lexer
ARC_Lexer_Create(&((*parser)->lexer));
//register instructions to the lexer
initLexerRulesFn(((*parser)->lexer));
//set the data to null (the parse function is what creates it)
(*parser)->data = NULL;
//set the userData for the create callback function
(*parser)->userData = userData;
//init the create function callback with null, then copy the callback if it exists
(*parser)->createDataFn = NULL;
if(createDataFn != NULL){
(*parser)->createDataFn = (ARC_ParserData_CreateFn *)malloc(sizeof(ARC_ParserData_CreateFn));
*((*parser)->createDataFn) = *createDataFn;
}
//init the destroy function callback with null, then copy the callback if it exists
(*parser)->destroyDataFn = NULL;
if(createDataFn != NULL){
(*parser)->destroyDataFn = (ARC_ParserData_DestroyFn *)malloc(sizeof(ARC_ParserData_DestroyFn));
*((*parser)->destroyDataFn) = *destroyDataFn;
}
}
void ARC_Parser_CreateFromVector(ARC_Parser **parser, ARC_Vector *language, ARC_Parser_InitLexerRulesFn initLexerRulesFn, ARC_ParserData_CreateFn *createDataFn, ARC_ParserData_DestroyFn *destroyDataFn, void *userData){
//creates the variables to copy the vector into
const uint32_t languageSize = ARC_Vector_GetSize(language);
ARC_ParserTag languageArray[languageSize];
//copy the language from a vector into an array
for(uint32_t index = 0; index < languageSize; index++){
languageArray[index] = *(ARC_ParserTag *)ARC_Vector_Get(language, index);
}
//set the vector data as an ARC_Array
ARC_Array languageAsArray = {
languageSize,
languageArray
};
//create the parser
ARC_Parser_Create(parser, &languageAsArray, initLexerRulesFn, createDataFn, destroyDataFn, userData);
}
void ARC_Parser_CreateFromString(ARC_Parser **parser, ARC_String *languageString, ARC_Parser_InitLexerRulesFn initLexerRulesFn){
}
void ARC_Parser_Destroy(ARC_Parser *parser){
2024-11-12 13:07:23 -07:00
//clear all the copied token or tags from memory
for(uint32_t index = 0; index < parser->language.size; index++){
ARC_ParserTag *currentTag = ((ARC_ParserTag *)parser->language.data) + index;
//free the orIndex vlues
for(uint32_t orIndex = 0; orIndex < currentTag->tokensOrTagsSize; orIndex++){
free(currentTag->tokensOrTags[orIndex]);
}
//free the tokens or tags
2024-11-12 13:07:23 -07:00
free(currentTag->tokensOrTags);
}
//free the creation function callback
if(parser->createDataFn != NULL){
free(parser->createDataFn);
}
//free the data and the deletion function callback
if(parser->destroyDataFn != NULL){
(*(parser->destroyDataFn))(parser->data, parser->userData);
free(parser->destroyDataFn);
}
2024-11-12 13:07:23 -07:00
//clear the copied language from memory
free(parser->language.data);
ARC_Lexer_Destroy(parser->lexer);
free(parser);
}
//private creation function for ARC_ParserTagToken type
//note: token and tagTokens will be set to null, it is safe to create tagTokens outside of this as the destructor will clean it up
void ARC_ParserTagToken_Create(ARC_ParserTagToken **tagToken, uint32_t id){
*tagToken = (ARC_ParserTagToken *)malloc(sizeof(ARC_ParserTagToken));
(*tagToken)->id = id;
(*tagToken)->token = NULL;
(*tagToken)->tagTokens = NULL;
}
//private destroy function for ARC_ParserTagToken type
void ARC_ParserTagToken_Destroy(ARC_ParserTagToken *tagToken){
//destroy the tag token vector
if(tagToken->tagTokens != NULL){
ARC_Vector_Destroy(tagToken->tagTokens);
}
free(tagToken);
}
//private function to cleanup a parser tag token from a vector
void ARC_ParserTagToken_VectorDestroyDataFn(void *data){
//we can just use the destroy function with casted data
ARC_ParserTagToken_Destroy((ARC_ParserTagToken *)data);
}
2024-10-16 23:46:16 -06:00
//private recusive function to parse a tag
ARC_Bool ARC_Parser_ParseTag(ARC_Parser *parser, ARC_ParserTagToken *tagToken, uint32_t *lexerIndex){
2024-10-16 23:46:16 -06:00
//get the current tag
ARC_ParserTag *tag = NULL;
2024-10-16 23:46:16 -06:00
for(uint32_t index = 0; index < parser->language.size; index++){
ARC_ParserTag *foundTag = ((ARC_ParserTag *)parser->language.data) + index;
if(foundTag->tagId == tagToken->id){
2024-10-16 23:46:16 -06:00
tag = foundTag;
break;
}
}
//if the tag was not found can't do much, so throw an error
if(tag == NULL){
arc_errno = ARC_ERRNO_NULL;
ARC_DEBUG_LOG_ERROR_WITH_VARIABLES("ARC_Parser_ParseTag(parser, subdata, tagId), could not find tag with id: %u", tagToken->id);
return ARC_False;
2024-10-16 23:46:16 -06:00
}
//create a vector of tag token to use if a rule is validated, a comparison function is not needed as it will be iterated through
ARC_Vector_DestroyDataFn destroyTokenTagFn = ARC_ParserTagToken_VectorDestroyDataFn;
ARC_Vector_Create(&(tagToken->tagTokens), NULL, &destroyTokenTagFn);
//loop through each or section of the tags and tokens
for(uint32_t orIndex = 0; orIndex < tag->tokensOrTagsSize; orIndex++){
//reset the tag tokens for each or index
ARC_Vector_Clear(tagToken->tagTokens);
//loop through each token or tag to check if the lexed data matches
uint32_t lexerCheckIndex = *lexerIndex;
ARC_Bool foundRule = ARC_True;
for(uint32_t tokenOrTagIndex = 1; tokenOrTagIndex < tag->tokensOrTags[orIndex][0] + 1; tokenOrTagIndex++){
//get next tag id to check
uint32_t nextTagId = tag->tokensOrTags[orIndex][tokenOrTagIndex];
//check if it is lambda (can return safely)
if(nextTagId == ARC_PARSER_TAG_LAMBDA){
break;
}
//if the value isn't a token it is a tag, so recurs if it isn't a token
ARC_Bool isToken = ARC_Lexer_IsTokenId(parser->lexer, nextTagId);
if(isToken == ARC_False){
//create a temporary lexer index in case the rule does not exist
uint32_t tempLexerCheckIndex = lexerCheckIndex;
//create tag token for if the rule works
ARC_ParserTagToken *nextTagToken;
ARC_ParserTagToken_Create(&nextTagToken, nextTagId);
//check if the tag works if not break to continue checking next or index
foundRule = ARC_Parser_ParseTag(parser, nextTagToken, &tempLexerCheckIndex);
if(foundRule == ARC_False){
//clean up the tag token
ARC_ParserTagToken_Destroy(nextTagToken);
break;
}
//add the tag token because rule was found
ARC_Vector_Add(tagToken->tagTokens, nextTagToken);
//increase the lexer check index as a recursed rule was found, and continue checking
lexerCheckIndex = tempLexerCheckIndex;
continue;
}
//check if there is another token that can be used
if(lexerCheckIndex >= ARC_Lexer_GetTokensSize(parser->lexer)){
//out of tokens to the current or does not work, so break
foundRule = ARC_False;
break;
}
//get the next token in the lexer and increment the lexers index
ARC_LexerToken *token = ARC_Lexer_GetToken(parser->lexer, lexerCheckIndex);
lexerCheckIndex++;
//if the token rule does not match the current token in the current or statement the token rule could not be found for the current or index so break
if(token->rule != tag->tokensOrTags[orIndex][tokenOrTagIndex]){
foundRule = ARC_False;
break;
}
//the rule was a match so create a tag token to store the token in
ARC_ParserTagToken *nextTagToken;
ARC_ParserTagToken_Create(&nextTagToken, nextTagId);
nextTagToken->token = token;
//add the token to the tag tokens
ARC_Vector_Add(tagToken->tagTokens, nextTagToken);
}
//if the rule is found we don't need to check anymore so we can return out
if(foundRule == ARC_True){
*lexerIndex = lexerCheckIndex;
//cleanup
return ARC_True;
}
}
//cleanup
ARC_Vector_Destroy(tagToken->tagTokens);
tagToken->tagTokens = NULL;
//no rule was found, so return false
return ARC_False;
2024-10-16 23:46:16 -06:00
}
void ARC_Parser_Parse(ARC_Parser *parser, ARC_String **data){
//make sure the parser has a language
if(parser->language.size == 0){
ARC_DEBUG_LOG_ERROR("ARC_Parser_Parse(parser, data), no parser language defined");
return;
}
//lex the subdata
ARC_Lexer_LexString(parser->lexer, data);
if(arc_errno){
ARC_DEBUG_LOG_ERROR("ARC_Parser_Parse(parser, data), could not lex the given data");
2024-10-30 18:41:01 -06:00
ARC_Lexer_Clear(parser->lexer);
2024-10-28 21:00:48 -06:00
return;
}
//set the lexer index to start and get the first tag
uint32_t lexerIndex = 0;
ARC_ParserTag *startTag = parser->language.data;
//setup a tag token that will be passed to the creation callback on success
ARC_ParserTagToken *tagToken;
ARC_ParserTagToken_Create(&tagToken, startTag->tagId);
//TODO: handle error checks for if parsing fails
//recursivly parse from the inital start tag
ARC_Bool parsed = ARC_Parser_ParseTag(parser, tagToken, &lexerIndex);
ARC_Bool allTokensParsed = lexerIndex == ARC_Lexer_GetTokensSize(parser->lexer);
2024-12-03 18:21:28 -07:00
//error if anything went wrong
if(parsed == ARC_False || allTokensParsed == ARC_False || arc_errno){
ARC_Lexer_Clear(parser->lexer);
arc_errno = ARC_ERRNO_DATA;
ARC_DEBUG_LOG_ERROR_WITH_VARIABLES("ARC_Parser_Parse(parser, data), could not parse the given data at lexer index: %u", lexerIndex);
return;
}
//create the data if the creation callback exists
if(parser->createDataFn != NULL){
(*(parser->createDataFn))(&(parser->data), tagToken, parser->userData);
}
//cleanup
2024-10-30 18:41:01 -06:00
ARC_Lexer_Clear(parser->lexer);
2024-12-03 18:21:28 -07:00
ARC_ParserTagToken_Destroy(tagToken);
2024-10-16 23:46:16 -06:00
}
void ARC_Parser_ParseFile(ARC_Parser *parser, ARC_String *path){
}
void ARC_Parser_ClearData(ARC_Parser *parser){
//check if that data exists and the destructor exists to make sure they can be run
if(parser->data != NULL && parser->destroyDataFn != NULL){
(*(parser->destroyDataFn))(parser->data, parser->userData);
}
//TODO: might want to error here
parser->data = NULL;
}
void *ARC_Parser_GetData(ARC_Parser *parser){
return parser->data;
}