merged with old parser stuff, and worked on parser a bit more

This commit is contained in:
herbglitch 2024-10-16 18:00:52 -06:00
commit e4aa4a8b6d
8 changed files with 180 additions and 30 deletions

4
.gitignore vendored
View file

@ -526,4 +526,6 @@ FodyWeavers.xsd
doxygen/html doxygen/html
[Bb][Uu][Ii][Ll][Dd] [Bb][Uu][Ii][Ll][Dd]
.ccls .ccls
.vscode
tests/test_error_out.txt

View file

@ -63,6 +63,7 @@ set(ARCHEUS_STD_SOURCES
src/std/hashtable.c src/std/hashtable.c
src/std/io.c src/std/io.c
src/std/lexer.c src/std/lexer.c
src/std/parser.c
src/std/queue.c src/std/queue.c
src/std/stack.c src/std/stack.c
src/std/string.c src/std/string.c
@ -127,7 +128,8 @@ if(ARCHEUS_STD_TESTS)
tests/test.c tests/test.c
#tests/std/vector.c #tests/std/vector.c
tests/std/lexer.c #tests/std/lexer.c
tests/std/parser.c
${ARCHEUS_STD_SOURCES} ${ARCHEUS_STD_SOURCES}
) )

View file

@ -41,7 +41,6 @@ typedef uint32_t (* ARC_LexerTokenRule_AutomataFn)(ARC_String **tokenData, ARC_S
*/ */
typedef void (* ARC_LexerTokenRule_DestroyAutomataDataFn)(void *automataData); typedef void (* ARC_LexerTokenRule_DestroyAutomataDataFn)(void *automataData);
/** /**
* @brief a lexer token rule type * @brief a lexer token rule type
*/ */
@ -57,7 +56,7 @@ typedef struct ARC_LexerTokenRule {
/** /**
* @brief creates an ARC_Lexer type * @brief creates an ARC_Lexer type
* *
* @param[out] lexer * @param[out] lexer ARC_Lexer to create
*/ */
void ARC_Lexer_Create(ARC_Lexer **lexer); void ARC_Lexer_Create(ARC_Lexer **lexer);

View file

@ -5,7 +5,10 @@
extern "C" { extern "C" {
#endif #endif
#include "arc/std/string.h" #include "arc/std/array.h"
//#include "arc/std/bool.h"
//#include "arc/std/lexer.h"
#include <stdint.h>
/** /**
* @brief a parser type * @brief a parser type
@ -13,17 +16,56 @@ extern "C" {
typedef struct ARC_Parser ARC_Parser; typedef struct ARC_Parser ARC_Parser;
/** /**
* @brief a parser node * @brief a langue tag type for the parser //TODO: explain this better
*/ */
typedef struct ARC_ParserNode ARC_ParserNode; typedef struct ARC_ParserLanguageTag {
uint32_t tagId;
uint32_t **tokensOrTags;
uint32_t tokensOrTagsSize;
} ARC_ParserLanguageTag;
/**
* @brief a callback function to initialize the lexer the parser uses with rules
*
* @param lexer the lexer used by the parser that rules should be added to
*/
typedef void (* ARC_Parser_InitLexerRulesFn)(ARC_Lexer *lexer);
/** /**
* @brief creates an ARC_Parser type * @brief creates an ARC_Parser type
* *
* @param[out] parser * @TODO: probs want to move the note to another file
* @param[in] language ..., can be NULL * @note array of tokens for langauge? like
* ARC_ParserTag tag = {
* VARIABLE_NAME, //tagId
* {
* { 2, CHAR_OR_NUM, VARIABLE_NAME },
* { 1, LAMBDA },
* }, //components
* 2 //componentsSize
* };
*
* @param[out] parser ARC_Parser to create
* @param[in] language an arry of ARC_ParserLanguageTags defining a langauge
* @param[in] initLexerRulesFn a callback used to initalize the token rules the lexer within the parser will use
*/ */
void ARC_Parser_Create(ARC_Parser **parser, ARC_String *language); void ARC_Parser_Create(ARC_Parser **parser, ARC_Array *language, ARC_Parser_InitLexerRulesFn initLexerRulesFn);
/**
* @brief creates an ARC_Parser type from a string
*
* @TODO: probs want to move the note to another file
* @note the syntax looks like:
* <variable> -> CHAR <variableName> EOF
* <variableName> -> <charOrNum> <variableName> | LAMBDA
* <charOrNum> -> CHAR | NUM
*
* @param[out] parser ARC_Parser to create
* @param[in] language an arry of ARC_ParserLanguageTags defining a langauge
* @param[in] initLexerRulesFn a callback used to initalize the token rules the lexer within the parser will use
*/
void ARC_Parser_CreateFromString(ARC_Parser **parser, ARC_String *languageString, ARC_Parser_InitLexerRulesFn initLexerRulesFn);
/** /**
* @brief destroys an ARC_Parser type * @brief destroys an ARC_Parser type
@ -33,26 +75,18 @@ void ARC_Parser_Create(ARC_Parser **parser, ARC_String *language);
void ARC_Parser_Destroy(ARC_Parser *parser); void ARC_Parser_Destroy(ARC_Parser *parser);
/** /**
* @brief sets the definition of the parser, the language itself is parsed and will throw an error if invalid * @brief
* *
* @param[in] parser ARC_Parser to set the language to * @param[in] parser
* @param[in] language the language as a string the parser should use * @param[in] language
*/
void ARC_Parser_SetLanguage(ARC_Parser *parser, ARC_String *language);
/**
* @brief sets the definition of the parser, the language itself is parsed and will throw an error if invalid
*
* @param[in] parser ARC_Parser to set the language to
* @param[in] language the language as a string the parser should use
*/ */
void ARC_Parser_Parse(ARC_Parser *parser, ARC_String *data); void ARC_Parser_Parse(ARC_Parser *parser, ARC_String *data);
/** /**
* @brief sets the definition of the parser, the language itself is parsed and will throw an error if invalid * @brief
* *
* @param[in] parser ARC_Parser to set the language to * @param[in] parser
* @param[in] language the language as a string the parser should use * @param[in] language
*/ */
void ARC_Parser_ParseFile(ARC_Parser *parser, ARC_String *path); void ARC_Parser_ParseFile(ARC_Parser *parser, ARC_String *path);
@ -60,4 +94,4 @@ void ARC_Parser_ParseFile(ARC_Parser *parser, ARC_String *path);
} }
#endif #endif
#endif // !ARC_STD_LEXER_H_ #endif // !ARC_STD_PARSER_H_

View file

@ -176,9 +176,18 @@ ARC_Bool ARC_String_SubstringEqualsCString(ARC_String *string, uint64_t offset,
* *
* @param string string to check * @param string string to check
* *
* @return 1 if alphabetic, 0 if not alphabetic * @return ARC_True if alphabetic, ARC_False if not alphabetic
*/ */
uint8_t ARC_String_Alpha(ARC_String *string); ARC_Bool ARC_String_IsAlpha(ARC_String *string);
/**
* @brief checks if string is made out of only numbers
*
* @param string string to check
*
* @return ARC_True if it is numeric, ARC_False if it is not numeric
*/
ARC_Bool ARC_String_IsNumeric(ARC_String *string);
/** /**
* @brief converst substring from string to uint64_t * @brief converst substring from string to uint64_t

View file

@ -0,0 +1,48 @@
#include "arc/std/parser.h"
#include "arc/std/lexer.h"
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
struct ARC_Parser {
ARC_Array language;
ARC_Lexer *lexer;
};
void ARC_Parser_Create(ARC_Parser **parser, ARC_Array *language, ARC_Parser_InitLexerRulesFn initLexerRulesFn){
*parser = (ARC_Parser *)malloc(sizeof(ARC_Parser));
//set the language size to 0 and data to NULL in case the language is NULL
(*parser)->language.size = 0;
(*parser)->language.data = NULL;
//if the language exists, copy the language
if(language != NULL){
(*parser)->language.size = language->size;
(*parser)->language.data = malloc(sizeof(ARC_ParserLanguageTag) * language->size);
memcpy((*parser)->language.data, language->data, language->size);
}
//create the lexer
ARC_Lexer_Create(&((*parser)->lexer));
//register instructions to the lexer
initLexerRulesFn(((*parser)->lexer));
}
void ARC_Parser_CreateFromString(ARC_Parser **parser, ARC_String *languageString, ARC_Parser_InitLexerRulesFn initLexerRulesFn){
}
void ARC_Parser_Destroy(ARC_Parser *parser){
free(parser->language.data);
ARC_Lexer_Destroy(parser->lexer);
free(parser);
}
void ARC_Parser_Parse(ARC_Parser *parser, ARC_String *data);
void ARC_Parser_ParseFile(ARC_Parser *parser, ARC_String *path);

View file

@ -173,7 +173,8 @@ ARC_Bool ARC_String_SubstringEqualsCString(ARC_String *string, uint64_t offset,
return ARC_True; return ARC_True;
} }
uint8_t ARC_String_Alpha(ARC_String *string){ //TODO: fix this
ARC_Bool ARC_String_IsAlpha(ARC_String *string){
for(uint64_t length = string->length; length; length--){ for(uint64_t length = string->length; length; length--){
if(string->data[length - 1] >= 'a' && string->data[length - 1] <= 'z'){ if(string->data[length - 1] >= 'a' && string->data[length - 1] <= 'z'){
continue; continue;
@ -183,10 +184,20 @@ uint8_t ARC_String_Alpha(ARC_String *string){
continue; continue;
} }
return 1; return ARC_True;
} }
return 0; return ARC_False;
}
ARC_Bool ARC_String_IsNumeric(ARC_String *string){
for(uint64_t index = 0; index < string->length; index++){
if(string->data[index] < '0' || string->data[index] > '9'){
return ARC_False;
}
}
return ARC_True;
} }
uint64_t ARC_String_ToUint64_t(ARC_String *string){ uint64_t ARC_String_ToUint64_t(ARC_String *string){

45
tests/std/parser.c Normal file
View file

@ -0,0 +1,45 @@
#include "../test.h"
#include "arc/std/parser.h"
#define LAMBDA 0
#define CHAR 1
#define NUM 2
#define CHAR_OR_NUM 3
#define VARIABLE_NAME 4
#define VARIABLE 5
ARC_TEST(Lexer_Char_Match){
ARC_Parser *parser;
uint32_t *charOrNumTokens[] = { (uint32_t[]){ 1, CHAR }, (uint32_t[]){ 1, NUM } };
uint32_t *variableNameTags[] = { (uint32_t[]){ 2, CHAR_OR_NUM, VARIABLE_NAME }, (uint32_t[]){ 1, LAMBDA } };
uint32_t *variableTokensOrTags[] = { (uint32_t[]){ 2, CHAR, VARIABLE_NAME } };
ARC_ParserLanguageTag testTags[3] = {
{
CHAR_OR_NUM, //tagId
charOrNumTokens, //tokensOrTags
2 //tokenOrTagsSize
},
{
VARIABLE_NAME, //tagId
variableNameTags, //tokensOrTags
2 //tokenOrTagsSize
},
{
VARIABLE, //tagId
variableTokensOrTags, //tokensOrTags
1 //tokenOrTagsSize
}
};
ARC_Array languageArray = {
3, //size
testTags //data
};
ARC_Parser_Create(&parser, &languageArray);
ARC_Parser_Destroy(parser);
ARC_CHECK(1 == 1);
}