parser working, needs more tests and an actual language to make sure that is true though
This commit is contained in:
parent
63dfb98aad
commit
606f8e4bad
10 changed files with 365 additions and 148 deletions
|
|
@ -35,10 +35,15 @@ void ARC_LexerTokenRule_VectorDestroyDataFn(void *data){
|
|||
free(tokenRule);
|
||||
}
|
||||
|
||||
|
||||
//private function for destroying a lexer token from a vector
|
||||
void ARC_LexerToken_VectorDestroyDataFn(void *data){
|
||||
ARC_LexerToken *token = (ARC_LexerToken *)data;
|
||||
|
||||
//deletes the token data string if it exists
|
||||
if(token->data != NULL){
|
||||
ARC_String_Destroy(token->data);
|
||||
}
|
||||
|
||||
free(token);
|
||||
}
|
||||
|
||||
|
|
@ -177,7 +182,7 @@ void ARC_Lexer_LexString(ARC_Lexer *lexer, ARC_String **data){
|
|||
ARC_LexerTokenRule *tokenRule = ARC_Vector_Get(lexer->tokenRules, index);
|
||||
|
||||
//tokenData should only exist if tokenLength is ARC_True as stated in the header
|
||||
ARC_String *tokenData;
|
||||
ARC_String *tokenData = NULL;
|
||||
tokenLength = tokenRule->automataFn(&tokenData, *data, tokenRule->automataData);
|
||||
|
||||
//check if a token was found if it wasn't continue. I'm doing this to try to cut down on the ammount of indentation
|
||||
|
|
@ -189,7 +194,7 @@ void ARC_Lexer_LexString(ARC_Lexer *lexer, ARC_String **data){
|
|||
if(tokenLength > lastTokenLength){
|
||||
//free the current token if it exists
|
||||
if(token != NULL){
|
||||
ARC_LexerTokenRule_VectorDestroyDataFn((void *)token);
|
||||
ARC_LexerToken_VectorDestroyDataFn((void *)token);
|
||||
}
|
||||
|
||||
//create the token to add
|
||||
|
|
@ -264,21 +269,18 @@ void ARC_Lexer_PrintTokenRules(ARC_Lexer *lexer){
|
|||
}
|
||||
}
|
||||
|
||||
ARC_LexerToken ARC_Lexer_GetToken(ARC_Lexer *lexer, uint32_t index){
|
||||
ARC_LexerToken *ARC_Lexer_GetToken(ARC_Lexer *lexer, uint32_t index){
|
||||
//get the token and log if there is an error
|
||||
ARC_LexerToken *token = ARC_Vector_Get(lexer->tokens, index);
|
||||
if(arc_errno){
|
||||
ARC_DEBUG_LOG_ERROR("ARC_Lexer_GetToken(lexer, index), errored when running ARC_Vector_Get(lexer->tokens, index);. check logs for more info");
|
||||
|
||||
//return a token with max rule value, and NULL for the string to signify an error
|
||||
return (ARC_LexerToken){
|
||||
~(uint32_t)0,
|
||||
NULL
|
||||
};
|
||||
return NULL;
|
||||
}
|
||||
|
||||
//the token was found, so return a copy to that
|
||||
return *token;
|
||||
//the token was found, so return it
|
||||
return token;
|
||||
}
|
||||
|
||||
uint32_t ARC_Lexer_GetTokensSize(ARC_Lexer *lexer){
|
||||
|
|
@ -324,8 +326,7 @@ uint32_t ARC_Lexer_AutomataMatchCharOrBetweenFn(ARC_String **tokenData, ARC_Stri
|
|||
char *automataDataChars = (char *)automataData;
|
||||
if(string->data[0] >= automataDataChars[0] && string->data[0] <= ((char *)automataData)[1]){
|
||||
//return the token as token data and the token was found of length 1
|
||||
//TODO: fix this
|
||||
//ARC_String_Create(tokenData, string->data, 1);
|
||||
ARC_String_Create(tokenData, string->data, 1);
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
#include "arc/std/bool.h"
|
||||
#include "arc/std/errno.h"
|
||||
#include "arc/std/lexer.h"
|
||||
//#include "arc/std/vector.h"
|
||||
#include "arc/std/vector.h"
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
|
@ -11,9 +11,13 @@ struct ARC_Parser {
|
|||
ARC_Array language;
|
||||
|
||||
ARC_Lexer *lexer;
|
||||
|
||||
void *data;
|
||||
ARC_ParserData_CreateFn *createDataFn;
|
||||
ARC_ParserData_DestroyFn *destroyDataFn;
|
||||
};
|
||||
|
||||
void ARC_Parser_Create(ARC_Parser **parser, ARC_Array *language, ARC_Parser_InitLexerRulesFn initLexerRulesFn){
|
||||
void ARC_Parser_Create(ARC_Parser **parser, ARC_Array *language, ARC_Parser_InitLexerRulesFn initLexerRulesFn, ARC_ParserData_CreateFn *createDataFn, ARC_ParserData_DestroyFn *destroyDataFn){
|
||||
*parser = (ARC_Parser *)malloc(sizeof(ARC_Parser));
|
||||
|
||||
//set the language size to 0 and data to NULL in case the language is NULL
|
||||
|
|
@ -44,6 +48,13 @@ void ARC_Parser_Create(ARC_Parser **parser, ARC_Array *language, ARC_Parser_Init
|
|||
currentTag->tokensOrTags[orIndex][tokenOrTagIndex] = languageTag->tokensOrTags[orIndex][tokenOrTagIndex];
|
||||
}
|
||||
}
|
||||
|
||||
//add the add function
|
||||
currentTag->addDataFn = NULL;
|
||||
if(languageTag->addDataFn != NULL){
|
||||
currentTag->addDataFn = (ARC_ParserLanguageTag_AddDataFn *)malloc(sizeof(ARC_ParserLanguageTag_AddDataFn));
|
||||
*(currentTag->addDataFn) = *(languageTag->addDataFn);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -52,6 +63,21 @@ void ARC_Parser_Create(ARC_Parser **parser, ARC_Array *language, ARC_Parser_Init
|
|||
|
||||
//register instructions to the lexer
|
||||
initLexerRulesFn(((*parser)->lexer));
|
||||
|
||||
//create the data and copy the creation function if the creation function exists
|
||||
(*parser)->data = NULL;
|
||||
(*parser)->createDataFn = NULL;
|
||||
if(createDataFn != NULL){
|
||||
(*parser)->createDataFn = (ARC_ParserData_CreateFn *)malloc(sizeof(ARC_ParserData_CreateFn));
|
||||
*((*parser)->createDataFn) = *createDataFn;
|
||||
(*createDataFn)(&((*parser)->data));
|
||||
}
|
||||
|
||||
(*parser)->destroyDataFn = NULL;
|
||||
if(createDataFn != NULL){
|
||||
(*parser)->destroyDataFn = (ARC_ParserData_DestroyFn *)malloc(sizeof(ARC_ParserData_DestroyFn));
|
||||
*((*parser)->destroyDataFn) = *destroyDataFn;
|
||||
}
|
||||
}
|
||||
|
||||
void ARC_Parser_CreateFromString(ARC_Parser **parser, ARC_String *languageString, ARC_Parser_InitLexerRulesFn initLexerRulesFn){
|
||||
|
|
@ -67,9 +93,25 @@ void ARC_Parser_Destroy(ARC_Parser *parser){
|
|||
free(currentTag->tokensOrTags[orIndex]);
|
||||
}
|
||||
|
||||
if(currentTag->addDataFn != NULL){
|
||||
free(currentTag->addDataFn);
|
||||
}
|
||||
|
||||
//free the tokens or tags
|
||||
free(currentTag->tokensOrTags);
|
||||
}
|
||||
|
||||
//free the creation function callback
|
||||
if(parser->createDataFn != NULL){
|
||||
free(parser->createDataFn);
|
||||
}
|
||||
|
||||
//free the data and the deletion function callback
|
||||
if(parser->destroyDataFn != NULL){
|
||||
(*(parser->destroyDataFn))(parser->data);
|
||||
free(parser->destroyDataFn);
|
||||
}
|
||||
|
||||
//clear the copied language from memory
|
||||
free(parser->language.data);
|
||||
|
||||
|
|
@ -97,8 +139,15 @@ ARC_Bool ARC_Parser_ParseTag(ARC_Parser *parser, uint32_t *lexerIndex, uint32_t
|
|||
return ARC_False;
|
||||
}
|
||||
|
||||
//create a vector of found tokens to use if a rule is validated, a comparison function is not needed as it will be iterated through, the destroy function is not needed as well because they will be pointers to lexer tokens (the lexer owns the tokens)
|
||||
ARC_Vector *foundTokens;
|
||||
ARC_Vector_Create(&foundTokens, NULL, NULL);
|
||||
|
||||
//loop through each or section of the tags and tokens
|
||||
for(uint32_t orIndex = 0; orIndex < tag->tokensOrTagsSize; orIndex++){
|
||||
//reset the tokens for each or index
|
||||
ARC_Vector_Clear(foundTokens);
|
||||
|
||||
//loop through each token or tag to check if the lexed data matches
|
||||
uint32_t lexerCheckIndex = *lexerIndex;
|
||||
ARC_Bool foundRule = ARC_True;
|
||||
|
|
@ -121,10 +170,8 @@ ARC_Bool ARC_Parser_ParseTag(ARC_Parser *parser, uint32_t *lexerIndex, uint32_t
|
|||
break;
|
||||
}
|
||||
|
||||
//increase the lexer check index as a recursed rule was found
|
||||
//increase the lexer check index as a recursed rule was found, and continue checking
|
||||
lexerCheckIndex = tempLexerCheckIndex;
|
||||
|
||||
//this will probably never be called as lambda is usually the last instruction, but just in case we can continue instead of break
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
@ -136,24 +183,43 @@ ARC_Bool ARC_Parser_ParseTag(ARC_Parser *parser, uint32_t *lexerIndex, uint32_t
|
|||
}
|
||||
|
||||
//get the next token in the lexer and increment the lexers index
|
||||
ARC_LexerToken token = ARC_Lexer_GetToken(parser->lexer, lexerCheckIndex);
|
||||
ARC_LexerToken *token = ARC_Lexer_GetToken(parser->lexer, lexerCheckIndex);
|
||||
lexerCheckIndex++;
|
||||
|
||||
//if the token rule does not match the current token in the current or statement the token rule could not be found for the current or index so break
|
||||
if(token.rule != tag->tokensOrTags[orIndex][tokenOrTagIndex]){
|
||||
if(token->rule != tag->tokensOrTags[orIndex][tokenOrTagIndex]){
|
||||
foundRule = ARC_False;
|
||||
break;
|
||||
}
|
||||
|
||||
//the rule was a match so add it to the found tokens
|
||||
ARC_Vector_Add(foundTokens, (void *)token);
|
||||
}
|
||||
|
||||
//if the rule is found we don't need to check anymore so we can return out
|
||||
if(foundRule == ARC_True){
|
||||
*lexerIndex = lexerCheckIndex;
|
||||
//TODO: set tag into datastructure
|
||||
|
||||
//if there is an addDataFunction for the tag, add the tokens
|
||||
if(tag->addDataFn != NULL){
|
||||
//iterate through the tokens with the add callback
|
||||
for(uint32_t index = 0; index < ARC_Vector_GetSize(foundTokens); index++){
|
||||
ARC_LexerToken *token = (ARC_LexerToken *)ARC_Vector_Get(foundTokens, index);
|
||||
(*(tag->addDataFn))(&(parser->data), tagId, token);
|
||||
}
|
||||
}
|
||||
|
||||
//free the found tokens vector
|
||||
ARC_Vector_Destroy(foundTokens);
|
||||
|
||||
//cleanup
|
||||
return ARC_True;
|
||||
}
|
||||
}
|
||||
|
||||
//cleanup
|
||||
ARC_Vector_Destroy(foundTokens);
|
||||
|
||||
//no rule was found, so return false
|
||||
return ARC_False;
|
||||
}
|
||||
|
|
@ -192,3 +258,12 @@ void ARC_Parser_Parse(ARC_Parser *parser, ARC_String **data){
|
|||
void ARC_Parser_ParseFile(ARC_Parser *parser, ARC_String *path){
|
||||
|
||||
}
|
||||
|
||||
void ARC_Parser_ClearData(ARC_Parser *parser){
|
||||
(*(parser->destroyDataFn))(parser->data);
|
||||
(*(parser->createDataFn))(parser->data);
|
||||
}
|
||||
|
||||
void *ARC_Parser_GetData(ARC_Parser *parser){
|
||||
return parser->data;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
#include "arc/std/lexer.h"
|
||||
#include "arc/std/parser.h"
|
||||
#include "arc/std/string.h"
|
||||
#include <stddef.h>
|
||||
|
||||
void ARC_ParserLang_InitLexerRulesFn(ARC_Lexer *lexer){
|
||||
//null
|
||||
|
|
@ -75,19 +76,19 @@ void ARC_Parser_CreateAsParserLang(ARC_Parser **parser){
|
|||
uint32_t *alphaChar[] = { (uint32_t[]){ 1, ARC_PARSERLANG_TOKEN_ALPHA_LOWER_CHAR }, (uint32_t[]){ 1, ARC_PARSERLANG_TOKEN_ALPHA_UPPER_CHAR }};
|
||||
|
||||
ARC_ParserLanguageTag parserLangTags[13] = {
|
||||
{ ARC_PARSERLANG_LINE , line , 4 },
|
||||
{ ARC_PARSERLANG_BODY , body , 1 },
|
||||
{ ARC_PARSERLANG_ARGUMENTS , arguments , 2 },
|
||||
{ ARC_PARSERLANG_ARGUMENT , argument , 2 },
|
||||
{ ARC_PARSERLANG_TAG_OR_CONSTANT, tagOrConstant, 2 },
|
||||
{ ARC_PARSERLANG_CONSTANT , constant , 1 },
|
||||
{ ARC_PARSERLANG_CONSTANT_BODY , constantBody , 2 },
|
||||
{ ARC_PARSERLANG_CONSTANT_CHAR , constantChar , 2 },
|
||||
{ ARC_PARSERLANG_TAG , tag , 1 },
|
||||
{ ARC_PARSERLANG_VARIABLE , variable , 2 },
|
||||
{ ARC_PARSERLANG_VARIABLE_BODY , variableBody , 2 },
|
||||
{ ARC_PARSERLANG_VARIABLE_CHAR , variableChar , 3 },
|
||||
{ ARC_PARSERLANG_ALPHA_CHAR , alphaChar , 2 }
|
||||
{ ARC_PARSERLANG_LINE , line , 4, NULL },
|
||||
{ ARC_PARSERLANG_BODY , body , 1, NULL },
|
||||
{ ARC_PARSERLANG_ARGUMENTS , arguments , 2, NULL },
|
||||
{ ARC_PARSERLANG_ARGUMENT , argument , 2, NULL },
|
||||
{ ARC_PARSERLANG_TAG_OR_CONSTANT, tagOrConstant, 2, NULL },
|
||||
{ ARC_PARSERLANG_CONSTANT , constant , 1, NULL },
|
||||
{ ARC_PARSERLANG_CONSTANT_BODY , constantBody , 2, NULL },
|
||||
{ ARC_PARSERLANG_CONSTANT_CHAR , constantChar , 2, NULL },
|
||||
{ ARC_PARSERLANG_TAG , tag , 1, NULL },
|
||||
{ ARC_PARSERLANG_VARIABLE , variable , 2, NULL },
|
||||
{ ARC_PARSERLANG_VARIABLE_BODY , variableBody , 2, NULL },
|
||||
{ ARC_PARSERLANG_VARIABLE_CHAR , variableChar , 3, NULL },
|
||||
{ ARC_PARSERLANG_ALPHA_CHAR , alphaChar , 2, NULL }
|
||||
};
|
||||
|
||||
ARC_Array parserLanguageArray = {
|
||||
|
|
@ -95,5 +96,6 @@ void ARC_Parser_CreateAsParserLang(ARC_Parser **parser){
|
|||
parserLangTags //data
|
||||
};
|
||||
|
||||
ARC_Parser_Create(parser, &parserLanguageArray, ARC_ParserLang_InitLexerRulesFn);
|
||||
//TODO: add the create, destroy, and add callbacks
|
||||
ARC_Parser_Create(parser, &parserLanguageArray, ARC_ParserLang_InitLexerRulesFn, NULL, NULL);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,40 +7,55 @@
|
|||
#include <stdlib.h>
|
||||
|
||||
void ARC_String_Create(ARC_String **string, char *data, uint64_t length){
|
||||
//check if the size is too big to create and error if so
|
||||
if(length == ~(uint64_t)0){
|
||||
arc_errno = ARC_ERRNO_OVERFLOW;
|
||||
ARC_DEBUG_LOG_ERROR("ARC_String_Create(string, data, length), length was max uint64_t which is bigger than allowed");
|
||||
*string = NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
//create the string container and malloc the char array (this will allways be bigger than zero so there will be no issue if zero is passed in)
|
||||
*string = (ARC_String *)malloc(sizeof(ARC_String));
|
||||
(*string)->data = (char *)malloc(sizeof(char) * (length + 1));
|
||||
(*string)->length = length;
|
||||
|
||||
//if the string has a size, copy the string
|
||||
if(length > 0){
|
||||
strncpy((*string)->data, data, length);
|
||||
}
|
||||
|
||||
//set the end of the string to \0 (to mirror how cstrings work)
|
||||
(*string)->data[length] = '\0';
|
||||
}
|
||||
|
||||
void ARC_String_CreateWithStrlen(ARC_String **string, char *data){
|
||||
*string = (ARC_String *)malloc(sizeof(ARC_String));
|
||||
(*string)->length = strlen(data);
|
||||
(*string)->data = (char *)malloc(sizeof(char) * ((*string)->length + 1));
|
||||
|
||||
strncpy((*string)->data, data, (*string)->length);
|
||||
(*string)->data[(*string)->length] = '\0';
|
||||
//create the string passing in the strlen of data for the length
|
||||
ARC_String_Create(string, data, strlen(data));
|
||||
}
|
||||
|
||||
void ARC_String_CreateEmpty(ARC_String **string, uint64_t length){
|
||||
*string = (ARC_String *)malloc(sizeof(ARC_String));
|
||||
(*string)->data = (char *)malloc(sizeof(char) * (length + 1));
|
||||
(*string)->length = length;
|
||||
|
||||
(*string)->data[0] = '\0';
|
||||
for(uint64_t index = 0; index <= length; index++){
|
||||
(*string)->data[index] = '\0';
|
||||
//check if the size is too big to create and error if so
|
||||
if(length == ~(uint64_t)0){
|
||||
arc_errno = ARC_ERRNO_OVERFLOW;
|
||||
ARC_DEBUG_LOG_ERROR("ARC_String_Create(string, data, length), length was max uint64_t which is bigger than allowed");
|
||||
*string = NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
//create the string container and initialize data with all 0s for the size
|
||||
*string = (ARC_String *)malloc(sizeof(ARC_String));
|
||||
(*string)->data = (char *)calloc(sizeof(char), length + 1);
|
||||
(*string)->length = length;
|
||||
}
|
||||
|
||||
void ARC_String_Destroy(ARC_String *string){
|
||||
if(string->data){
|
||||
//check if the string's data exists and if so free it
|
||||
if(string->data != NULL){
|
||||
free(string->data);
|
||||
}
|
||||
|
||||
//free the string itself
|
||||
free(string);
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue