2024-10-16 05:14:53 -06:00
# include "arc/std/parser.h"
2024-10-24 19:56:26 -06:00
# include "arc/std/bool.h"
2024-10-16 23:46:16 -06:00
# include "arc/std/errno.h"
2024-10-16 05:14:53 -06:00
# include "arc/std/lexer.h"
2024-11-20 10:27:17 -07:00
# include "arc/std/vector.h"
2024-10-16 05:14:53 -06:00
# include <stdint.h>
# include <stdlib.h>
# include <string.h>
struct ARC_Parser {
ARC_Array language ;
ARC_Lexer * lexer ;
2024-11-20 10:27:17 -07:00
void * data ;
ARC_ParserData_CreateFn * createDataFn ;
ARC_ParserData_DestroyFn * destroyDataFn ;
2024-10-16 05:14:53 -06:00
} ;
2024-11-20 10:27:17 -07:00
void ARC_Parser_Create ( ARC_Parser * * parser , ARC_Array * language , ARC_Parser_InitLexerRulesFn initLexerRulesFn , ARC_ParserData_CreateFn * createDataFn , ARC_ParserData_DestroyFn * destroyDataFn ) {
2024-10-16 05:14:53 -06:00
* parser = ( ARC_Parser * ) malloc ( sizeof ( ARC_Parser ) ) ;
//set the language size to 0 and data to NULL in case the language is NULL
( * parser ) - > language . size = 0 ;
( * parser ) - > language . data = NULL ;
//if the language exists, copy the language
if ( language ! = NULL ) {
( * parser ) - > language . size = language - > size ;
2024-11-23 19:27:30 -07:00
( * parser ) - > language . data = malloc ( sizeof ( ARC_ParserTag ) * language - > size ) ;
2024-10-16 05:14:53 -06:00
2024-11-12 13:07:23 -07:00
for ( uint32_t index = 0 ; index < language - > size ; index + + ) {
2024-11-23 19:27:30 -07:00
ARC_ParserTag * languageTag = ( ( ARC_ParserTag * ) language - > data ) + index ;
ARC_ParserTag * currentTag = ( ( ARC_ParserTag * ) ( * parser ) - > language . data ) + index ;
2024-11-12 13:07:23 -07:00
2024-11-14 00:07:56 -07:00
//copy the language tag into the current tag
currentTag - > tagId = languageTag - > tagId ;
currentTag - > tokensOrTagsSize = languageTag - > tokensOrTagsSize ;
//create place to store tokens or tags
currentTag - > tokensOrTags = ( uint32_t * * ) malloc ( sizeof ( uint32_t * ) * languageTag - > tokensOrTagsSize ) ;
//copy each or section into the tokensOrTags
for ( uint32_t orIndex = 0 ; orIndex < languageTag - > tokensOrTagsSize ; orIndex + + ) {
currentTag - > tokensOrTags [ orIndex ] = ( uint32_t * ) malloc ( sizeof ( uint32_t ) * ( languageTag - > tokensOrTags [ orIndex ] [ 0 ] + 1 ) ) ;
for ( uint32_t tokenOrTagIndex = 0 ; tokenOrTagIndex < languageTag - > tokensOrTags [ orIndex ] [ 0 ] + 1 ; tokenOrTagIndex + + ) {
currentTag - > tokensOrTags [ orIndex ] [ tokenOrTagIndex ] = languageTag - > tokensOrTags [ orIndex ] [ tokenOrTagIndex ] ;
}
}
2024-11-12 13:07:23 -07:00
}
2024-10-16 05:14:53 -06:00
}
//create the lexer
ARC_Lexer_Create ( & ( ( * parser ) - > lexer ) ) ;
2024-10-16 18:00:52 -06:00
//register instructions to the lexer
initLexerRulesFn ( ( ( * parser ) - > lexer ) ) ;
2024-11-20 10:27:17 -07:00
//create the data and copy the creation function if the creation function exists
( * parser ) - > data = NULL ;
( * parser ) - > createDataFn = NULL ;
if ( createDataFn ! = NULL ) {
( * parser ) - > createDataFn = ( ARC_ParserData_CreateFn * ) malloc ( sizeof ( ARC_ParserData_CreateFn ) ) ;
* ( ( * parser ) - > createDataFn ) = * createDataFn ;
( * createDataFn ) ( & ( ( * parser ) - > data ) ) ;
}
( * parser ) - > destroyDataFn = NULL ;
if ( createDataFn ! = NULL ) {
( * parser ) - > destroyDataFn = ( ARC_ParserData_DestroyFn * ) malloc ( sizeof ( ARC_ParserData_DestroyFn ) ) ;
* ( ( * parser ) - > destroyDataFn ) = * destroyDataFn ;
}
2024-10-16 18:00:52 -06:00
}
2024-11-23 19:27:30 -07:00
void ARC_Parser_CreateFromVector ( ARC_Parser * * parser , ARC_Vector * language , ARC_Parser_InitLexerRulesFn initLexerRulesFn , ARC_ParserData_CreateFn * createDataFn , ARC_ParserData_DestroyFn * destroyDataFn ) {
//creates the variables to copy the vector into
const uint32_t languageSize = ARC_Vector_GetSize ( language ) ;
ARC_ParserTag languageArray [ languageSize ] ;
//copy the language from a vector into an array
for ( uint32_t index = 0 ; index < languageSize ; index + + ) {
languageArray [ index ] = * ( ARC_ParserTag * ) ARC_Vector_Get ( language , index ) ;
}
//set the vector data as an ARC_Array
ARC_Array languageAsArray = {
languageSize ,
languageArray
} ;
//create the parser
ARC_Parser_Create ( parser , & languageAsArray , initLexerRulesFn , createDataFn , destroyDataFn ) ;
}
2024-10-16 18:00:52 -06:00
void ARC_Parser_CreateFromString ( ARC_Parser * * parser , ARC_String * languageString , ARC_Parser_InitLexerRulesFn initLexerRulesFn ) {
2024-10-16 05:14:53 -06:00
}
void ARC_Parser_Destroy ( ARC_Parser * parser ) {
2024-11-12 13:07:23 -07:00
//clear all the copied token or tags from memory
for ( uint32_t index = 0 ; index < parser - > language . size ; index + + ) {
2024-11-23 19:27:30 -07:00
ARC_ParserTag * currentTag = ( ( ARC_ParserTag * ) parser - > language . data ) + index ;
2024-11-14 00:07:56 -07:00
//free the orIndex vlues
for ( uint32_t orIndex = 0 ; orIndex < currentTag - > tokensOrTagsSize ; orIndex + + ) {
free ( currentTag - > tokensOrTags [ orIndex ] ) ;
}
2024-11-20 10:27:17 -07:00
//free the tokens or tags
2024-11-12 13:07:23 -07:00
free ( currentTag - > tokensOrTags ) ;
}
2024-11-20 10:27:17 -07:00
//free the creation function callback
if ( parser - > createDataFn ! = NULL ) {
free ( parser - > createDataFn ) ;
}
//free the data and the deletion function callback
if ( parser - > destroyDataFn ! = NULL ) {
( * ( parser - > destroyDataFn ) ) ( parser - > data ) ;
free ( parser - > destroyDataFn ) ;
}
2024-11-12 13:07:23 -07:00
//clear the copied language from memory
2024-10-16 05:14:53 -06:00
free ( parser - > language . data ) ;
ARC_Lexer_Destroy ( parser - > lexer ) ;
free ( parser ) ;
}
2024-10-16 18:00:52 -06:00
2024-10-16 23:46:16 -06:00
//private recusive function to parse a tag
2024-10-31 19:58:11 -06:00
ARC_Bool ARC_Parser_ParseTag ( ARC_Parser * parser , uint32_t * lexerIndex , uint32_t tagId ) {
2024-10-16 23:46:16 -06:00
//get the current tag
2024-11-23 19:27:30 -07:00
ARC_ParserTag * tag = NULL ;
2024-10-16 23:46:16 -06:00
for ( uint32_t index = 0 ; index < parser - > language . size ; index + + ) {
2024-11-23 19:27:30 -07:00
ARC_ParserTag * foundTag = ( ( ARC_ParserTag * ) parser - > language . data ) + index ;
2024-10-16 23:46:16 -06:00
if ( foundTag - > tagId = = tagId ) {
tag = foundTag ;
break ;
}
}
//if the tag was not found can't do much, so throw an error
if ( tag = = NULL ) {
arc_errno = ARC_ERRNO_NULL ;
ARC_DEBUG_LOG_ERROR_WITH_VARIABLES ( " ARC_Parser_ParseTag(parser, subdata, tagId), could not find tag with id: %u " , tagId ) ;
2024-10-31 19:58:11 -06:00
return ARC_False ;
2024-10-16 23:46:16 -06:00
}
2024-10-21 13:36:45 -06:00
2024-11-20 10:27:17 -07:00
//create a vector of found tokens to use if a rule is validated, a comparison function is not needed as it will be iterated through, the destroy function is not needed as well because they will be pointers to lexer tokens (the lexer owns the tokens)
ARC_Vector * foundTokens ;
ARC_Vector_Create ( & foundTokens , NULL , NULL ) ;
2024-10-21 13:36:45 -06:00
//loop through each or section of the tags and tokens
for ( uint32_t orIndex = 0 ; orIndex < tag - > tokensOrTagsSize ; orIndex + + ) {
2024-11-20 10:27:17 -07:00
//reset the tokens for each or index
ARC_Vector_Clear ( foundTokens ) ;
2024-10-21 13:36:45 -06:00
//loop through each token or tag to check if the lexed data matches
2024-10-24 19:56:26 -06:00
uint32_t lexerCheckIndex = * lexerIndex ;
ARC_Bool foundRule = ARC_True ;
2024-10-21 13:36:45 -06:00
for ( uint32_t tokenOrTagIndex = 1 ; tokenOrTagIndex < tag - > tokensOrTags [ orIndex ] [ 0 ] + 1 ; tokenOrTagIndex + + ) {
2024-10-31 19:58:11 -06:00
//check if it is lambda (can return safely)
2024-11-01 04:39:45 -06:00
if ( tag - > tokensOrTags [ orIndex ] [ tokenOrTagIndex ] = = ARC_PARSER_TAG_LAMBDA ) {
break ;
2024-10-31 19:58:11 -06:00
}
2024-10-24 19:56:26 -06:00
//if the value isn't a token it is a tag, so recurs if it isn't a token
ARC_Bool isToken = ARC_Lexer_IsTokenId ( parser - > lexer , tag - > tokensOrTags [ orIndex ] [ tokenOrTagIndex ] ) ;
if ( isToken = = ARC_False ) {
2024-11-15 04:45:06 -07:00
//create a temporary lexer index in case the rule does not exist
uint32_t tempLexerCheckIndex = lexerCheckIndex ;
2024-10-31 19:58:11 -06:00
//check if the tag works if not break to continue checking next or
2024-11-01 04:39:45 -06:00
uint32_t nextTagId = tag - > tokensOrTags [ orIndex ] [ tokenOrTagIndex ] ;
2024-11-15 04:45:06 -07:00
foundRule = ARC_Parser_ParseTag ( parser , & tempLexerCheckIndex , nextTagId ) ;
2024-11-01 04:39:45 -06:00
if ( foundRule = = ARC_False ) {
2024-10-31 19:58:11 -06:00
break ;
}
2024-11-01 04:39:45 -06:00
2024-11-20 10:27:17 -07:00
//increase the lexer check index as a recursed rule was found, and continue checking
2024-11-15 04:45:06 -07:00
lexerCheckIndex = tempLexerCheckIndex ;
2024-11-01 04:39:45 -06:00
continue ;
}
//check if there is another token that can be used
if ( lexerCheckIndex > = ARC_Lexer_GetTokensSize ( parser - > lexer ) ) {
//out of tokens to the current or does not work, so break
foundRule = ARC_False ;
break ;
2024-10-24 19:56:26 -06:00
}
//get the next token in the lexer and increment the lexers index
2024-11-20 10:27:17 -07:00
ARC_LexerToken * token = ARC_Lexer_GetToken ( parser - > lexer , lexerCheckIndex ) ;
2024-10-24 19:56:26 -06:00
lexerCheckIndex + + ;
//if the token rule does not match the current token in the current or statement the token rule could not be found for the current or index so break
2024-11-20 10:27:17 -07:00
if ( token - > rule ! = tag - > tokensOrTags [ orIndex ] [ tokenOrTagIndex ] ) {
2024-10-24 19:56:26 -06:00
foundRule = ARC_False ;
break ;
}
2024-11-20 10:27:17 -07:00
//the rule was a match so add it to the found tokens
ARC_Vector_Add ( foundTokens , ( void * ) token ) ;
2024-10-24 19:56:26 -06:00
}
//if the rule is found we don't need to check anymore so we can return out
if ( foundRule = = ARC_True ) {
* lexerIndex = lexerCheckIndex ;
2024-11-20 10:27:17 -07:00
//free the found tokens vector
ARC_Vector_Destroy ( foundTokens ) ;
//cleanup
2024-10-31 19:58:11 -06:00
return ARC_True ;
2024-10-21 13:36:45 -06:00
}
}
2024-10-24 19:56:26 -06:00
2024-11-20 10:27:17 -07:00
//cleanup
ARC_Vector_Destroy ( foundTokens ) ;
2024-11-01 04:39:45 -06:00
//no rule was found, so return false
2024-10-31 19:58:11 -06:00
return ARC_False ;
2024-10-16 23:46:16 -06:00
}
2024-10-16 18:00:52 -06:00
2024-10-24 19:56:26 -06:00
void ARC_Parser_Parse ( ARC_Parser * parser , ARC_String * * data ) {
2024-10-30 07:36:43 -06:00
//make sure the parser has a language
if ( parser - > language . size = = 0 ) {
ARC_DEBUG_LOG_ERROR ( " ARC_Parser_Parse(parser, data), no parser language defined " ) ;
return ;
}
2024-10-24 19:56:26 -06:00
//lex the subdata
ARC_Lexer_LexString ( parser - > lexer , data ) ;
if ( arc_errno ) {
ARC_DEBUG_LOG_ERROR ( " ARC_Parser_Parse(parser, data), could not lex the given data " ) ;
2024-10-30 18:41:01 -06:00
ARC_Lexer_Clear ( parser - > lexer ) ;
2024-10-28 21:00:48 -06:00
return ;
2024-10-24 19:56:26 -06:00
}
2024-10-30 07:36:43 -06:00
//set the lexer index to start and get the first tag
2024-10-24 19:56:26 -06:00
uint32_t lexerIndex = 0 ;
2024-11-23 19:27:30 -07:00
ARC_ParserTag * startTag = parser - > language . data ;
2024-10-24 19:56:26 -06:00
2024-11-01 04:39:45 -06:00
//TODO: handle error checks for if parsing fails
2024-10-24 19:56:26 -06:00
//recursivly parse from the inital start tag
2024-11-01 04:39:45 -06:00
ARC_Bool parsed = ARC_Parser_ParseTag ( parser , & lexerIndex , startTag - > tagId ) ;
ARC_Bool allTokensParsed = lexerIndex = = ARC_Lexer_GetTokensSize ( parser - > lexer ) ;
2024-10-30 18:41:01 -06:00
ARC_Lexer_Clear ( parser - > lexer ) ;
2024-11-01 04:39:45 -06:00
if ( parsed = = ARC_False | | allTokensParsed = = ARC_False | | arc_errno ) {
arc_errno = ARC_ERRNO_DATA ;
ARC_DEBUG_LOG_ERROR_WITH_VARIABLES ( " ARC_Parser_Parse(parser, data), could not parse the given data at lexer index: %u " , lexerIndex ) ;
2024-10-28 21:00:48 -06:00
return ;
2024-10-24 19:56:26 -06:00
}
2024-10-16 23:46:16 -06:00
}
void ARC_Parser_ParseFile ( ARC_Parser * parser , ARC_String * path ) {
}
2024-11-20 10:27:17 -07:00
void ARC_Parser_ClearData ( ARC_Parser * parser ) {
( * ( parser - > destroyDataFn ) ) ( parser - > data ) ;
( * ( parser - > createDataFn ) ) ( parser - > data ) ;
}
void * ARC_Parser_GetData ( ARC_Parser * parser ) {
return parser - > data ;
}