parser working, needs more tests and an actual language to make sure that is true though

This commit is contained in:
herbglitch 2024-11-20 10:27:17 -07:00
parent 63dfb98aad
commit 606f8e4bad
10 changed files with 365 additions and 148 deletions

View file

@ -117,9 +117,9 @@ void ARC_Lexer_PrintTokenRules(ARC_Lexer *lexer);
* @param[in] lexer the lexer to get the token from * @param[in] lexer the lexer to get the token from
* @param[in] index the index of the token in the lexer to get * @param[in] index the index of the token in the lexer to get
* *
* @return a copy of the token, or a token with max value for rule and NULL for data on error * @return a token at the lexer index on success, otherwise NULL
*/ */
ARC_LexerToken ARC_Lexer_GetToken(ARC_Lexer *lexer, uint32_t index); ARC_LexerToken *ARC_Lexer_GetToken(ARC_Lexer *lexer, uint32_t index);
/** /**
* @brief gets a token at a given index from a lexer * @brief gets a token at a given index from a lexer

View file

@ -15,6 +15,21 @@ extern "C" {
*/ */
typedef struct ARC_Parser ARC_Parser; typedef struct ARC_Parser ARC_Parser;
/**
* @brief TODO: write this
*/
typedef void (* ARC_ParserData_CreateFn)(void **data);
/**
* @brief TODO: write this
*/
typedef void (* ARC_ParserData_DestroyFn)(void *data);
/**
* @brief TODO: write this
*/
typedef void (* ARC_ParserLanguageTag_AddDataFn)(void **data, uint32_t tagId, ARC_LexerToken *token);
/** /**
* @brief a langue tag type for the parser //TODO: explain this better * @brief a langue tag type for the parser //TODO: explain this better
*/ */
@ -23,6 +38,8 @@ typedef struct ARC_ParserLanguageTag {
uint32_t **tokensOrTags; uint32_t **tokensOrTags;
uint32_t tokensOrTagsSize; uint32_t tokensOrTagsSize;
ARC_ParserLanguageTag_AddDataFn *addDataFn;
} ARC_ParserLanguageTag; } ARC_ParserLanguageTag;
/** /**
@ -35,6 +52,8 @@ typedef void (* ARC_Parser_InitLexerRulesFn)(ARC_Lexer *lexer);
/** /**
* @brief creates an ARC_Parser type * @brief creates an ARC_Parser type
* *
* @TODO: fix this documentation to reflect changes
*
* @TODO: probs want to move the note to another file * @TODO: probs want to move the note to another file
* @note array of tokens for langauge? like * @note array of tokens for langauge? like
* ARC_ParserTag tag = { * ARC_ParserTag tag = {
@ -50,7 +69,7 @@ typedef void (* ARC_Parser_InitLexerRulesFn)(ARC_Lexer *lexer);
* @param[in] language an arry of ARC_ParserLanguageTags defining a langauge * @param[in] language an arry of ARC_ParserLanguageTags defining a langauge
* @param[in] initLexerRulesFn a callback used to initalize the token rules the lexer within the parser will use * @param[in] initLexerRulesFn a callback used to initalize the token rules the lexer within the parser will use
*/ */
void ARC_Parser_Create(ARC_Parser **parser, ARC_Array *language, ARC_Parser_InitLexerRulesFn initLexerRulesFn); void ARC_Parser_Create(ARC_Parser **parser, ARC_Array *language, ARC_Parser_InitLexerRulesFn initLexerRulesFn, ARC_ParserData_CreateFn *createDataFn, ARC_ParserData_DestroyFn *destroyDataFn);
/** /**
* @brief creates an ARC_Parser type from a string * @brief creates an ARC_Parser type from a string
@ -90,6 +109,20 @@ void ARC_Parser_Parse(ARC_Parser *parser, ARC_String **data);
*/ */
void ARC_Parser_ParseFile(ARC_Parser *parser, ARC_String *path); void ARC_Parser_ParseFile(ARC_Parser *parser, ARC_String *path);
/**
* @brief
*
* @param[in] parser
*/
void ARC_Parser_ClearData(ARC_Parser *parser);
/**
* @brief
*
* @param[in] parser
*/
void *ARC_Parser_GetData(ARC_Parser *parser);
/** /**
* @brief basic tag for letting the parser know it is ok to end * @brief basic tag for letting the parser know it is ok to end
*/ */

View file

@ -54,4 +54,4 @@ uint32_t ARC_Queue_Size(ARC_Queue *queue);
} }
#endif #endif
#endif //ARC_STD_QUEUE_H_ #endif //ARC_STD_QUEUE_H_

View file

@ -35,10 +35,15 @@ void ARC_LexerTokenRule_VectorDestroyDataFn(void *data){
free(tokenRule); free(tokenRule);
} }
//private function for destroying a lexer token from a vector //private function for destroying a lexer token from a vector
void ARC_LexerToken_VectorDestroyDataFn(void *data){ void ARC_LexerToken_VectorDestroyDataFn(void *data){
ARC_LexerToken *token = (ARC_LexerToken *)data; ARC_LexerToken *token = (ARC_LexerToken *)data;
//deletes the token data string if it exists
if(token->data != NULL){
ARC_String_Destroy(token->data);
}
free(token); free(token);
} }
@ -177,7 +182,7 @@ void ARC_Lexer_LexString(ARC_Lexer *lexer, ARC_String **data){
ARC_LexerTokenRule *tokenRule = ARC_Vector_Get(lexer->tokenRules, index); ARC_LexerTokenRule *tokenRule = ARC_Vector_Get(lexer->tokenRules, index);
//tokenData should only exist if tokenLength is ARC_True as stated in the header //tokenData should only exist if tokenLength is ARC_True as stated in the header
ARC_String *tokenData; ARC_String *tokenData = NULL;
tokenLength = tokenRule->automataFn(&tokenData, *data, tokenRule->automataData); tokenLength = tokenRule->automataFn(&tokenData, *data, tokenRule->automataData);
//check if a token was found if it wasn't continue. I'm doing this to try to cut down on the ammount of indentation //check if a token was found if it wasn't continue. I'm doing this to try to cut down on the ammount of indentation
@ -189,7 +194,7 @@ void ARC_Lexer_LexString(ARC_Lexer *lexer, ARC_String **data){
if(tokenLength > lastTokenLength){ if(tokenLength > lastTokenLength){
//free the current token if it exists //free the current token if it exists
if(token != NULL){ if(token != NULL){
ARC_LexerTokenRule_VectorDestroyDataFn((void *)token); ARC_LexerToken_VectorDestroyDataFn((void *)token);
} }
//create the token to add //create the token to add
@ -264,21 +269,18 @@ void ARC_Lexer_PrintTokenRules(ARC_Lexer *lexer){
} }
} }
ARC_LexerToken ARC_Lexer_GetToken(ARC_Lexer *lexer, uint32_t index){ ARC_LexerToken *ARC_Lexer_GetToken(ARC_Lexer *lexer, uint32_t index){
//get the token and log if there is an error //get the token and log if there is an error
ARC_LexerToken *token = ARC_Vector_Get(lexer->tokens, index); ARC_LexerToken *token = ARC_Vector_Get(lexer->tokens, index);
if(arc_errno){ if(arc_errno){
ARC_DEBUG_LOG_ERROR("ARC_Lexer_GetToken(lexer, index), errored when running ARC_Vector_Get(lexer->tokens, index);. check logs for more info"); ARC_DEBUG_LOG_ERROR("ARC_Lexer_GetToken(lexer, index), errored when running ARC_Vector_Get(lexer->tokens, index);. check logs for more info");
//return a token with max rule value, and NULL for the string to signify an error //return a token with max rule value, and NULL for the string to signify an error
return (ARC_LexerToken){ return NULL;
~(uint32_t)0,
NULL
};
} }
//the token was found, so return a copy to that //the token was found, so return it
return *token; return token;
} }
uint32_t ARC_Lexer_GetTokensSize(ARC_Lexer *lexer){ uint32_t ARC_Lexer_GetTokensSize(ARC_Lexer *lexer){
@ -324,8 +326,7 @@ uint32_t ARC_Lexer_AutomataMatchCharOrBetweenFn(ARC_String **tokenData, ARC_Stri
char *automataDataChars = (char *)automataData; char *automataDataChars = (char *)automataData;
if(string->data[0] >= automataDataChars[0] && string->data[0] <= ((char *)automataData)[1]){ if(string->data[0] >= automataDataChars[0] && string->data[0] <= ((char *)automataData)[1]){
//return the token as token data and the token was found of length 1 //return the token as token data and the token was found of length 1
//TODO: fix this ARC_String_Create(tokenData, string->data, 1);
//ARC_String_Create(tokenData, string->data, 1);
return 1; return 1;
} }

View file

@ -2,7 +2,7 @@
#include "arc/std/bool.h" #include "arc/std/bool.h"
#include "arc/std/errno.h" #include "arc/std/errno.h"
#include "arc/std/lexer.h" #include "arc/std/lexer.h"
//#include "arc/std/vector.h" #include "arc/std/vector.h"
#include <stdint.h> #include <stdint.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
@ -11,9 +11,13 @@ struct ARC_Parser {
ARC_Array language; ARC_Array language;
ARC_Lexer *lexer; ARC_Lexer *lexer;
void *data;
ARC_ParserData_CreateFn *createDataFn;
ARC_ParserData_DestroyFn *destroyDataFn;
}; };
void ARC_Parser_Create(ARC_Parser **parser, ARC_Array *language, ARC_Parser_InitLexerRulesFn initLexerRulesFn){ void ARC_Parser_Create(ARC_Parser **parser, ARC_Array *language, ARC_Parser_InitLexerRulesFn initLexerRulesFn, ARC_ParserData_CreateFn *createDataFn, ARC_ParserData_DestroyFn *destroyDataFn){
*parser = (ARC_Parser *)malloc(sizeof(ARC_Parser)); *parser = (ARC_Parser *)malloc(sizeof(ARC_Parser));
//set the language size to 0 and data to NULL in case the language is NULL //set the language size to 0 and data to NULL in case the language is NULL
@ -44,6 +48,13 @@ void ARC_Parser_Create(ARC_Parser **parser, ARC_Array *language, ARC_Parser_Init
currentTag->tokensOrTags[orIndex][tokenOrTagIndex] = languageTag->tokensOrTags[orIndex][tokenOrTagIndex]; currentTag->tokensOrTags[orIndex][tokenOrTagIndex] = languageTag->tokensOrTags[orIndex][tokenOrTagIndex];
} }
} }
//add the add function
currentTag->addDataFn = NULL;
if(languageTag->addDataFn != NULL){
currentTag->addDataFn = (ARC_ParserLanguageTag_AddDataFn *)malloc(sizeof(ARC_ParserLanguageTag_AddDataFn));
*(currentTag->addDataFn) = *(languageTag->addDataFn);
}
} }
} }
@ -52,6 +63,21 @@ void ARC_Parser_Create(ARC_Parser **parser, ARC_Array *language, ARC_Parser_Init
//register instructions to the lexer //register instructions to the lexer
initLexerRulesFn(((*parser)->lexer)); initLexerRulesFn(((*parser)->lexer));
//create the data and copy the creation function if the creation function exists
(*parser)->data = NULL;
(*parser)->createDataFn = NULL;
if(createDataFn != NULL){
(*parser)->createDataFn = (ARC_ParserData_CreateFn *)malloc(sizeof(ARC_ParserData_CreateFn));
*((*parser)->createDataFn) = *createDataFn;
(*createDataFn)(&((*parser)->data));
}
(*parser)->destroyDataFn = NULL;
if(createDataFn != NULL){
(*parser)->destroyDataFn = (ARC_ParserData_DestroyFn *)malloc(sizeof(ARC_ParserData_DestroyFn));
*((*parser)->destroyDataFn) = *destroyDataFn;
}
} }
void ARC_Parser_CreateFromString(ARC_Parser **parser, ARC_String *languageString, ARC_Parser_InitLexerRulesFn initLexerRulesFn){ void ARC_Parser_CreateFromString(ARC_Parser **parser, ARC_String *languageString, ARC_Parser_InitLexerRulesFn initLexerRulesFn){
@ -67,9 +93,25 @@ void ARC_Parser_Destroy(ARC_Parser *parser){
free(currentTag->tokensOrTags[orIndex]); free(currentTag->tokensOrTags[orIndex]);
} }
if(currentTag->addDataFn != NULL){
free(currentTag->addDataFn);
}
//free the tokens or tags
free(currentTag->tokensOrTags); free(currentTag->tokensOrTags);
} }
//free the creation function callback
if(parser->createDataFn != NULL){
free(parser->createDataFn);
}
//free the data and the deletion function callback
if(parser->destroyDataFn != NULL){
(*(parser->destroyDataFn))(parser->data);
free(parser->destroyDataFn);
}
//clear the copied language from memory //clear the copied language from memory
free(parser->language.data); free(parser->language.data);
@ -97,8 +139,15 @@ ARC_Bool ARC_Parser_ParseTag(ARC_Parser *parser, uint32_t *lexerIndex, uint32_t
return ARC_False; return ARC_False;
} }
//create a vector of found tokens to use if a rule is validated, a comparison function is not needed as it will be iterated through, the destroy function is not needed as well because they will be pointers to lexer tokens (the lexer owns the tokens)
ARC_Vector *foundTokens;
ARC_Vector_Create(&foundTokens, NULL, NULL);
//loop through each or section of the tags and tokens //loop through each or section of the tags and tokens
for(uint32_t orIndex = 0; orIndex < tag->tokensOrTagsSize; orIndex++){ for(uint32_t orIndex = 0; orIndex < tag->tokensOrTagsSize; orIndex++){
//reset the tokens for each or index
ARC_Vector_Clear(foundTokens);
//loop through each token or tag to check if the lexed data matches //loop through each token or tag to check if the lexed data matches
uint32_t lexerCheckIndex = *lexerIndex; uint32_t lexerCheckIndex = *lexerIndex;
ARC_Bool foundRule = ARC_True; ARC_Bool foundRule = ARC_True;
@ -121,10 +170,8 @@ ARC_Bool ARC_Parser_ParseTag(ARC_Parser *parser, uint32_t *lexerIndex, uint32_t
break; break;
} }
//increase the lexer check index as a recursed rule was found //increase the lexer check index as a recursed rule was found, and continue checking
lexerCheckIndex = tempLexerCheckIndex; lexerCheckIndex = tempLexerCheckIndex;
//this will probably never be called as lambda is usually the last instruction, but just in case we can continue instead of break
continue; continue;
} }
@ -136,24 +183,43 @@ ARC_Bool ARC_Parser_ParseTag(ARC_Parser *parser, uint32_t *lexerIndex, uint32_t
} }
//get the next token in the lexer and increment the lexers index //get the next token in the lexer and increment the lexers index
ARC_LexerToken token = ARC_Lexer_GetToken(parser->lexer, lexerCheckIndex); ARC_LexerToken *token = ARC_Lexer_GetToken(parser->lexer, lexerCheckIndex);
lexerCheckIndex++; lexerCheckIndex++;
//if the token rule does not match the current token in the current or statement the token rule could not be found for the current or index so break //if the token rule does not match the current token in the current or statement the token rule could not be found for the current or index so break
if(token.rule != tag->tokensOrTags[orIndex][tokenOrTagIndex]){ if(token->rule != tag->tokensOrTags[orIndex][tokenOrTagIndex]){
foundRule = ARC_False; foundRule = ARC_False;
break; break;
} }
//the rule was a match so add it to the found tokens
ARC_Vector_Add(foundTokens, (void *)token);
} }
//if the rule is found we don't need to check anymore so we can return out //if the rule is found we don't need to check anymore so we can return out
if(foundRule == ARC_True){ if(foundRule == ARC_True){
*lexerIndex = lexerCheckIndex; *lexerIndex = lexerCheckIndex;
//TODO: set tag into datastructure
//if there is an addDataFunction for the tag, add the tokens
if(tag->addDataFn != NULL){
//iterate through the tokens with the add callback
for(uint32_t index = 0; index < ARC_Vector_GetSize(foundTokens); index++){
ARC_LexerToken *token = (ARC_LexerToken *)ARC_Vector_Get(foundTokens, index);
(*(tag->addDataFn))(&(parser->data), tagId, token);
}
}
//free the found tokens vector
ARC_Vector_Destroy(foundTokens);
//cleanup
return ARC_True; return ARC_True;
} }
} }
//cleanup
ARC_Vector_Destroy(foundTokens);
//no rule was found, so return false //no rule was found, so return false
return ARC_False; return ARC_False;
} }
@ -192,3 +258,12 @@ void ARC_Parser_Parse(ARC_Parser *parser, ARC_String **data){
void ARC_Parser_ParseFile(ARC_Parser *parser, ARC_String *path){ void ARC_Parser_ParseFile(ARC_Parser *parser, ARC_String *path){
} }
void ARC_Parser_ClearData(ARC_Parser *parser){
(*(parser->destroyDataFn))(parser->data);
(*(parser->createDataFn))(parser->data);
}
void *ARC_Parser_GetData(ARC_Parser *parser){
return parser->data;
}

View file

@ -2,6 +2,7 @@
#include "arc/std/lexer.h" #include "arc/std/lexer.h"
#include "arc/std/parser.h" #include "arc/std/parser.h"
#include "arc/std/string.h" #include "arc/std/string.h"
#include <stddef.h>
void ARC_ParserLang_InitLexerRulesFn(ARC_Lexer *lexer){ void ARC_ParserLang_InitLexerRulesFn(ARC_Lexer *lexer){
//null //null
@ -75,19 +76,19 @@ void ARC_Parser_CreateAsParserLang(ARC_Parser **parser){
uint32_t *alphaChar[] = { (uint32_t[]){ 1, ARC_PARSERLANG_TOKEN_ALPHA_LOWER_CHAR }, (uint32_t[]){ 1, ARC_PARSERLANG_TOKEN_ALPHA_UPPER_CHAR }}; uint32_t *alphaChar[] = { (uint32_t[]){ 1, ARC_PARSERLANG_TOKEN_ALPHA_LOWER_CHAR }, (uint32_t[]){ 1, ARC_PARSERLANG_TOKEN_ALPHA_UPPER_CHAR }};
ARC_ParserLanguageTag parserLangTags[13] = { ARC_ParserLanguageTag parserLangTags[13] = {
{ ARC_PARSERLANG_LINE , line , 4 }, { ARC_PARSERLANG_LINE , line , 4, NULL },
{ ARC_PARSERLANG_BODY , body , 1 }, { ARC_PARSERLANG_BODY , body , 1, NULL },
{ ARC_PARSERLANG_ARGUMENTS , arguments , 2 }, { ARC_PARSERLANG_ARGUMENTS , arguments , 2, NULL },
{ ARC_PARSERLANG_ARGUMENT , argument , 2 }, { ARC_PARSERLANG_ARGUMENT , argument , 2, NULL },
{ ARC_PARSERLANG_TAG_OR_CONSTANT, tagOrConstant, 2 }, { ARC_PARSERLANG_TAG_OR_CONSTANT, tagOrConstant, 2, NULL },
{ ARC_PARSERLANG_CONSTANT , constant , 1 }, { ARC_PARSERLANG_CONSTANT , constant , 1, NULL },
{ ARC_PARSERLANG_CONSTANT_BODY , constantBody , 2 }, { ARC_PARSERLANG_CONSTANT_BODY , constantBody , 2, NULL },
{ ARC_PARSERLANG_CONSTANT_CHAR , constantChar , 2 }, { ARC_PARSERLANG_CONSTANT_CHAR , constantChar , 2, NULL },
{ ARC_PARSERLANG_TAG , tag , 1 }, { ARC_PARSERLANG_TAG , tag , 1, NULL },
{ ARC_PARSERLANG_VARIABLE , variable , 2 }, { ARC_PARSERLANG_VARIABLE , variable , 2, NULL },
{ ARC_PARSERLANG_VARIABLE_BODY , variableBody , 2 }, { ARC_PARSERLANG_VARIABLE_BODY , variableBody , 2, NULL },
{ ARC_PARSERLANG_VARIABLE_CHAR , variableChar , 3 }, { ARC_PARSERLANG_VARIABLE_CHAR , variableChar , 3, NULL },
{ ARC_PARSERLANG_ALPHA_CHAR , alphaChar , 2 } { ARC_PARSERLANG_ALPHA_CHAR , alphaChar , 2, NULL }
}; };
ARC_Array parserLanguageArray = { ARC_Array parserLanguageArray = {
@ -95,5 +96,6 @@ void ARC_Parser_CreateAsParserLang(ARC_Parser **parser){
parserLangTags //data parserLangTags //data
}; };
ARC_Parser_Create(parser, &parserLanguageArray, ARC_ParserLang_InitLexerRulesFn); //TODO: add the create, destroy, and add callbacks
ARC_Parser_Create(parser, &parserLanguageArray, ARC_ParserLang_InitLexerRulesFn, NULL, NULL);
} }

View file

@ -7,40 +7,55 @@
#include <stdlib.h> #include <stdlib.h>
void ARC_String_Create(ARC_String **string, char *data, uint64_t length){ void ARC_String_Create(ARC_String **string, char *data, uint64_t length){
//check if the size is too big to create and error if so
if(length == ~(uint64_t)0){
arc_errno = ARC_ERRNO_OVERFLOW;
ARC_DEBUG_LOG_ERROR("ARC_String_Create(string, data, length), length was max uint64_t which is bigger than allowed");
*string = NULL;
return;
}
//create the string container and malloc the char array (this will allways be bigger than zero so there will be no issue if zero is passed in)
*string = (ARC_String *)malloc(sizeof(ARC_String)); *string = (ARC_String *)malloc(sizeof(ARC_String));
(*string)->data = (char *)malloc(sizeof(char) * (length + 1)); (*string)->data = (char *)malloc(sizeof(char) * (length + 1));
(*string)->length = length; (*string)->length = length;
//if the string has a size, copy the string
if(length > 0){ if(length > 0){
strncpy((*string)->data, data, length); strncpy((*string)->data, data, length);
} }
//set the end of the string to \0 (to mirror how cstrings work)
(*string)->data[length] = '\0'; (*string)->data[length] = '\0';
} }
void ARC_String_CreateWithStrlen(ARC_String **string, char *data){ void ARC_String_CreateWithStrlen(ARC_String **string, char *data){
*string = (ARC_String *)malloc(sizeof(ARC_String)); //create the string passing in the strlen of data for the length
(*string)->length = strlen(data); ARC_String_Create(string, data, strlen(data));
(*string)->data = (char *)malloc(sizeof(char) * ((*string)->length + 1));
strncpy((*string)->data, data, (*string)->length);
(*string)->data[(*string)->length] = '\0';
} }
void ARC_String_CreateEmpty(ARC_String **string, uint64_t length){ void ARC_String_CreateEmpty(ARC_String **string, uint64_t length){
*string = (ARC_String *)malloc(sizeof(ARC_String)); //check if the size is too big to create and error if so
(*string)->data = (char *)malloc(sizeof(char) * (length + 1)); if(length == ~(uint64_t)0){
(*string)->length = length; arc_errno = ARC_ERRNO_OVERFLOW;
ARC_DEBUG_LOG_ERROR("ARC_String_Create(string, data, length), length was max uint64_t which is bigger than allowed");
(*string)->data[0] = '\0'; *string = NULL;
for(uint64_t index = 0; index <= length; index++){ return;
(*string)->data[index] = '\0';
} }
//create the string container and initialize data with all 0s for the size
*string = (ARC_String *)malloc(sizeof(ARC_String));
(*string)->data = (char *)calloc(sizeof(char), length + 1);
(*string)->length = length;
} }
void ARC_String_Destroy(ARC_String *string){ void ARC_String_Destroy(ARC_String *string){
if(string->data){ //check if the string's data exists and if so free it
if(string->data != NULL){
free(string->data); free(string->data);
} }
//free the string itself
free(string); free(string);
} }

68
temp_parser.txt Normal file
View file

@ -0,0 +1,68 @@
int i = 32;
<defineIntLine> -> INT <variable> EQUALS <value> SEMICOLON
<variable> -> <alphaChar> <variableBody>
<variableBody> -> <alphaCharOrNum> <variableBody> | LAMBDA
<alphaCharOrNum> -> UPPER_ALPHA_CHAR | LOWER_ALPHA_CHAR | NUMBER
<value> -> NUMBER <valueBody>
<valueBody> -> NUMBER <valueBody> | LAMBDA
typedef struct TEMP_LangVar = {
ARC_String *name;
void *data;
} TEMP_LangVar;
void *(* ARC_Parser_ConstantCallbackFn)(void **data, void *parentdata);
defineIntLine
┌─────────────────── TEMP_LangVar var = { NULL, NULL }
│ INT
│ ┌───────────────── <- NULL
│ │ int
│ └─────────────────
│ variable
│ ┌───────────────── ARC_String_Create(&(var.name));
│ │ alphaChar
│ │ ┌─────────────── ARC_String_Append(var.name, "i");
│ │ │ i
│ │ └───────────────
│ │
│ │ variableBody
│ │ ┌───────────────
│ │ │ LAMBDA
│ │ └───────────────
│ └─────────────────
│ EQUALS
│ ┌─────────────────
│ │ =
│ └─────────────────
│ value
│ ┌───────────────── var.name = malloc(sizeof(int32_t));
│ │ NUMBER
│ │ ┌─────────────── *(var.name) = 3
│ │ │ 3
│ │ └───────────────
│ │
│ │ valueBody
│ │ ┌───────────────
│ │ │ NUMBER
│ │ │ ┌───────────── *(var.name) = *var.name * 10 + 2
│ │ │ │ 2
│ │ │ └─────────────
│ │ │
│ │ │ valueBody
│ │ │ ┌─────────────
│ │ │ │ LAMBDA
│ │ │ └─────────────
│ │ └───────────────
│ └─────────────────
│ SEMICOLON
│ ┌─────────────────
│ │ ;
│ └─────────────────
└───────────────────

View file

@ -12,28 +12,28 @@ ARC_TEST(Lexer_Char_Match){
ARC_Lexer_LexString(lexer, &simple); ARC_Lexer_LexString(lexer, &simple);
ARC_LexerToken token; ARC_LexerToken *token;
token = ARC_Lexer_GetToken(lexer, 0); token = ARC_Lexer_GetToken(lexer, 0);
ARC_CHECK(token.rule == ARC_LEXER_TOKEN_COLON_ID); ARC_CHECK(token->rule == ARC_LEXER_TOKEN_COLON_ID);
token = ARC_Lexer_GetToken(lexer, 1); token = ARC_Lexer_GetToken(lexer, 1);
ARC_CHECK(token.rule == ARC_LEXER_TOKEN_COLON_ID); ARC_CHECK(token->rule == ARC_LEXER_TOKEN_COLON_ID);
token = ARC_Lexer_GetToken(lexer, 2); token = ARC_Lexer_GetToken(lexer, 2);
ARC_CHECK(token.rule == ARC_LEXER_TOKEN_LEFT_CURLY_BRACE_ID); ARC_CHECK(token->rule == ARC_LEXER_TOKEN_LEFT_CURLY_BRACE_ID);
token = ARC_Lexer_GetToken(lexer, 3); token = ARC_Lexer_GetToken(lexer, 3);
ARC_CHECK(token.rule == ARC_LEXER_TOKEN_RIGHT_CURLY_BRACE_ID); ARC_CHECK(token->rule == ARC_LEXER_TOKEN_RIGHT_CURLY_BRACE_ID);
token = ARC_Lexer_GetToken(lexer, 4); token = ARC_Lexer_GetToken(lexer, 4);
ARC_CHECK(token.rule == ARC_LEXER_TOKEN_BANG_ID); ARC_CHECK(token->rule == ARC_LEXER_TOKEN_BANG_ID);
token = ARC_Lexer_GetToken(lexer, 5); token = ARC_Lexer_GetToken(lexer, 5);
ARC_CHECK(token.rule == ARC_LEXER_TOKEN_FORWARD_SLASH_ID); ARC_CHECK(token->rule == ARC_LEXER_TOKEN_FORWARD_SLASH_ID);
token = ARC_Lexer_GetToken(lexer, 6); token = ARC_Lexer_GetToken(lexer, 6);
ARC_CHECK(token.rule == ARC_LEXER_TOKEN_PERIOD_ID); ARC_CHECK(token->rule == ARC_LEXER_TOKEN_PERIOD_ID);
ARC_Lexer_Destroy(lexer); ARC_Lexer_Destroy(lexer);
} }

View file

@ -1,7 +1,9 @@
#include "../test.h" #include "../test.h"
#include "arc/std/errno.h" #include "arc/std/errno.h"
#include "arc/std/parser.h" #include "arc/std/parser.h"
#include "arc/std/lexer.h"
#include "arc/std/parser/parserlang.h" #include "arc/std/parser/parserlang.h"
#include <stddef.h>
#define LAMBDA ARC_PARSER_TAG_LAMBDA #define LAMBDA ARC_PARSER_TAG_LAMBDA
#define CHAR ARC_LEXER_TOKEN_ALPHALOWERCHAR #define CHAR ARC_LEXER_TOKEN_ALPHALOWERCHAR
@ -10,6 +12,73 @@
#define VARIABLE_NAME 24 #define VARIABLE_NAME 24
#define VARIABLE 25 #define VARIABLE 25
void TEST_ParserLanguageTag_CreateStringFn(void **data){
ARC_String_Create((ARC_String **)data, NULL, 0);
}
void TEST_ParserLanguageTag_DestroyStringFn(void *data){
ARC_String_Destroy((ARC_String *)data);
}
//for this very basic example, the tagId does not matter
void TEST_ParserLanguageTag_AddFirstCharFn(void **data, uint32_t tagId, ARC_LexerToken *token){
if(*data == NULL){
return;
}
ARC_String *tokenData = NULL;
ARC_String_Copy(&tokenData, token->data);
ARC_String_Append(&tokenData, *data);
ARC_String_Destroy(*data);
*data = tokenData;
}
//for this very basic example, the tagId does not matter
void TEST_ParserLanguageTag_AddCharFn(void **data, uint32_t tagId, ARC_LexerToken *token){
if(*data == NULL){
return;
}
ARC_String_Append((ARC_String **)data, token->data);
}
uint32_t *charOrNumTokens[] = { (uint32_t[]){ 1, CHAR }, (uint32_t[]){ 1, NUM } };
uint32_t *variableNameTags[] = { (uint32_t[]){ 2, CHAR_OR_NUM, VARIABLE_NAME }, (uint32_t[]){ 1, LAMBDA } };
uint32_t *variableTokensOrTags[] = { (uint32_t[]){ 2, CHAR, VARIABLE_NAME } };
//TODO: note how language function callbacks work, and how they use the parentData if createDataFn is NULL
ARC_ParserData_CreateFn createStringFn = TEST_ParserLanguageTag_CreateStringFn;
ARC_ParserData_DestroyFn destroyStringFn = TEST_ParserLanguageTag_DestroyStringFn;
ARC_ParserLanguageTag_AddDataFn addCharFn = TEST_ParserLanguageTag_AddCharFn;
ARC_ParserLanguageTag_AddDataFn addFirstCharFn = TEST_ParserLanguageTag_AddFirstCharFn;
ARC_ParserLanguageTag testTags[3] = {
{
VARIABLE, //tagId
variableTokensOrTags, //tokensOrTags
1, //tokenOrTagsSize
&addFirstCharFn //addDataFn
},
{
VARIABLE_NAME, //tagId
variableNameTags, //tokensOrTags
2, //tokenOrTagsSize
NULL, //addDataFn
},
{
CHAR_OR_NUM, //tagId
charOrNumTokens, //tokensOrTags
2, //tokenOrTagsSize
&addCharFn //addDataFn
}
};
ARC_Array languageArray = {
3, //size
testTags //data
};
void TEST_Parser_InitLexerRulesFn(ARC_Lexer *lexer){ void TEST_Parser_InitLexerRulesFn(ARC_Lexer *lexer){
ARC_Lexer_InitBasicTokenRules(lexer); ARC_Lexer_InitBasicTokenRules(lexer);
} }
@ -17,33 +86,7 @@ void TEST_Parser_InitLexerRulesFn(ARC_Lexer *lexer){
ARC_TEST(Parser_Init){ ARC_TEST(Parser_Init){
ARC_Parser *parser; ARC_Parser *parser;
uint32_t *charOrNumTokens[] = { (uint32_t[]){ 1, CHAR }, (uint32_t[]){ 1, NUM } }; ARC_Parser_Create(&parser, &languageArray, TEST_Parser_InitLexerRulesFn, NULL, NULL);
uint32_t *variableNameTags[] = { (uint32_t[]){ 2, CHAR_OR_NUM, VARIABLE_NAME }, (uint32_t[]){ 1, LAMBDA } };
uint32_t *variableTokensOrTags[] = { (uint32_t[]){ 2, CHAR, VARIABLE_NAME } };
ARC_ParserLanguageTag testTags[3] = {
{
VARIABLE, //tagId
variableTokensOrTags, //tokensOrTags
1 //tokenOrTagsSize
},
{
VARIABLE_NAME, //tagId
variableNameTags, //tokensOrTags
2 //tokenOrTagsSize
},
{
CHAR_OR_NUM, //tagId
charOrNumTokens, //tokensOrTags
2 //tokenOrTagsSize
}
};
ARC_Array languageArray = {
3, //size
testTags //data
};
ARC_Parser_Create(&parser, &languageArray, TEST_Parser_InitLexerRulesFn);
ARC_Parser_Destroy(parser); ARC_Parser_Destroy(parser);
@ -53,37 +96,11 @@ ARC_TEST(Parser_Init){
ARC_TEST(Parser_Basic_Parse){ ARC_TEST(Parser_Basic_Parse){
ARC_Parser *parser; ARC_Parser *parser;
uint32_t *charOrNumTokens[] = { (uint32_t[]){ 1, CHAR }, (uint32_t[]){ 1, NUM } }; ARC_Parser_Create(&parser, &languageArray, TEST_Parser_InitLexerRulesFn, NULL, NULL);
uint32_t *variableNameTags[] = { (uint32_t[]){ 2, CHAR_OR_NUM, VARIABLE_NAME }, (uint32_t[]){ 1, LAMBDA } };
uint32_t *variableTokensOrTags[] = { (uint32_t[]){ 2, CHAR, VARIABLE_NAME } };
ARC_ParserLanguageTag testTags[3] = {
{
VARIABLE, //tagId
variableTokensOrTags, //tokensOrTags
1 //tokenOrTagsSize
},
{
VARIABLE_NAME, //tagId
variableNameTags, //tokensOrTags
2 //tokenOrTagsSize
},
{
CHAR_OR_NUM, //tagId
charOrNumTokens, //tokensOrTags
2 //tokenOrTagsSize
}
};
ARC_Array languageArray = {
3, //size
testTags //data
};
ARC_Parser_Create(&parser, &languageArray, TEST_Parser_InitLexerRulesFn);
ARC_String *tempString; ARC_String *tempString;
//first variable test /* ~ first test ~ */
ARC_String_CreateWithStrlen(&tempString, "myvar1"); ARC_String_CreateWithStrlen(&tempString, "myvar1");
//this destroys string, so no need for cleanup //this destroys string, so no need for cleanup
@ -92,7 +109,7 @@ ARC_TEST(Parser_Basic_Parse){
ARC_CHECK(arc_errno == 0); ARC_CHECK(arc_errno == 0);
//second variable test /* ~ second test ~ */
ARC_String_CreateWithStrlen(&tempString, "z1xwvq"); ARC_String_CreateWithStrlen(&tempString, "z1xwvq");
//this destroys string, so no need for cleanup //this destroys string, so no need for cleanup
@ -101,7 +118,7 @@ ARC_TEST(Parser_Basic_Parse){
ARC_CHECK(arc_errno == 0); ARC_CHECK(arc_errno == 0);
//third variable test /* ~ third test ~ */
ARC_String_CreateWithStrlen(&tempString, "z1234"); ARC_String_CreateWithStrlen(&tempString, "z1234");
//this destroys string, so no need for cleanup //this destroys string, so no need for cleanup
@ -110,7 +127,7 @@ ARC_TEST(Parser_Basic_Parse){
ARC_CHECK(arc_errno == 0); ARC_CHECK(arc_errno == 0);
//fourth variable test /* ~ fourth test ~ */
ARC_String_CreateWithStrlen(&tempString, "aaaaa"); ARC_String_CreateWithStrlen(&tempString, "aaaaa");
//this destroys string, so no need for cleanup //this destroys string, so no need for cleanup
@ -119,44 +136,19 @@ ARC_TEST(Parser_Basic_Parse){
ARC_CHECK(arc_errno == 0); ARC_CHECK(arc_errno == 0);
/* ~ cleanup ~ */
ARC_Parser_Destroy(parser); ARC_Parser_Destroy(parser);
} }
ARC_TEST(Parser_Basic_ParseError){ ARC_TEST(Parser_Basic_ParseError){
ARC_Parser *parser; ARC_Parser *parser;
uint32_t *charOrNumTokens[] = { (uint32_t[]){ 1, CHAR }, (uint32_t[]){ 1, NUM } }; ARC_Parser_Create(&parser, &languageArray, TEST_Parser_InitLexerRulesFn, NULL, NULL);
uint32_t *variableNameTags[] = { (uint32_t[]){ 2, CHAR_OR_NUM, VARIABLE_NAME }, (uint32_t[]){ 1, LAMBDA } };
uint32_t *variableTokensOrTags[] = { (uint32_t[]){ 2, CHAR, VARIABLE_NAME } };
ARC_ParserLanguageTag testTags[3] = {
{
VARIABLE, //tagId
variableTokensOrTags, //tokensOrTags
1 //tokenOrTagsSize
},
{
VARIABLE_NAME, //tagId
variableNameTags, //tokensOrTags
2 //tokenOrTagsSize
},
{
CHAR_OR_NUM, //tagId
charOrNumTokens, //tokensOrTags
2 //tokenOrTagsSize
}
};
ARC_Array languageArray = {
3, //size
testTags //data
};
//TODO: remove this
arc_errno = 0;
ARC_Parser_Create(&parser, &languageArray, TEST_Parser_InitLexerRulesFn);
ARC_String *tempString; ARC_String *tempString;
/* ~ first test ~ */
ARC_String_CreateWithStrlen(&tempString, "!myVar1"); ARC_String_CreateWithStrlen(&tempString, "!myVar1");
//this destroys string, so no need for cleanup //this destroys string, so no need for cleanup
@ -164,6 +156,8 @@ ARC_TEST(Parser_Basic_ParseError){
ARC_CHECK(arc_errno == ARC_ERRNO_DATA); ARC_CHECK(arc_errno == ARC_ERRNO_DATA);
/* ~ second test ~ */
//check again with moved character //check again with moved character
arc_errno = 0; arc_errno = 0;
ARC_String_CreateWithStrlen(&tempString, "my!Var1"); ARC_String_CreateWithStrlen(&tempString, "my!Var1");
@ -173,6 +167,8 @@ ARC_TEST(Parser_Basic_ParseError){
ARC_CHECK(arc_errno == ARC_ERRNO_DATA); ARC_CHECK(arc_errno == ARC_ERRNO_DATA);
/* ~ third test ~ */
//check again with moved character //check again with moved character
arc_errno = 0; arc_errno = 0;
ARC_String_CreateWithStrlen(&tempString, "myVar1!"); ARC_String_CreateWithStrlen(&tempString, "myVar1!");
@ -182,12 +178,39 @@ ARC_TEST(Parser_Basic_ParseError){
ARC_CHECK(arc_errno == ARC_ERRNO_DATA); ARC_CHECK(arc_errno == ARC_ERRNO_DATA);
/* ~ cleanup ~ */
ARC_Parser_Destroy(parser); ARC_Parser_Destroy(parser);
//reset for next test //reset for next test
arc_errno = 0; arc_errno = 0;
} }
ARC_TEST(Parser_Basic_GetParsedValue){
ARC_Parser *parser;
ARC_Parser_Create(&parser, &languageArray, TEST_Parser_InitLexerRulesFn, &createStringFn, &destroyStringFn);
ARC_String *tempString;
/* ~ first test ~ */
ARC_String_CreateWithStrlen(&tempString, "myvar1");
//this destroys string, so no need for cleanup
ARC_Parser_Parse(parser, &tempString);
ARC_CHECK(arc_errno == 0);
ARC_String *checkValue = (ARC_String *)ARC_Parser_GetData(parser);
ARC_CHECK(ARC_String_EqualsCStringWithStrlen(checkValue, "myvar1"));
/* ~ cleanup ~ */
ARC_Parser_Destroy(parser);
}
ARC_TEST(Parser_ParserLang_BasicTest){ ARC_TEST(Parser_ParserLang_BasicTest){
ARC_Parser *parser; ARC_Parser *parser;
ARC_Parser_CreateAsParserLang(&parser); ARC_Parser_CreateAsParserLang(&parser);