From e29b8cc0857e7de9c7a65e1c36ce8af5c4b26817 Mon Sep 17 00:00:00 2001 From: Michael Uleysky Date: Sun, 30 Aug 2015 18:01:03 +1000 Subject: [PATCH] Grammatical parser --- src/Makefile | 30 ++++-- src/debug.h | 2 +- src/globals.cpp | 24 +++++ src/globals.h | 22 ++++ src/init.cpp | 11 +- src/init.h | 4 + src/main.cpp | 7 +- src/object.cpp | 6 ++ src/object.h | 227 +++++++++++++++++++++++++++++++++++++++ src/parser/grammatical.y | 107 ++++++++++++++++++ src/parser/lexical.l | 56 ++++++---- src/parser/parser.h | 2 + src/parser/yyloc.h | 19 ++++ 13 files changed, 482 insertions(+), 35 deletions(-) create mode 100644 src/globals.cpp create mode 100644 src/globals.h create mode 100644 src/object.cpp create mode 100644 src/object.h create mode 100644 src/parser/grammatical.y create mode 100644 src/parser/yyloc.h diff --git a/src/Makefile b/src/Makefile index 04aa6ef..ca51610 100644 --- a/src/Makefile +++ b/src/Makefile @@ -1,21 +1,33 @@ -CFLAGS=-O2 -g - -OBJECTS=main.o debug.o init.o parser/lexical.o +CFLAGS=-O2 -g -std=gnu++11 +LDFLAGS= +CC=g++ +SOURCE = $(wildcard *.cpp) parser/lexical.cpp parser/grammatical.cpp +DEPENDS = $(subst .cpp,.d,$(SOURCE)) +OBJECTS = $(subst .cpp,.o,$(SOURCE)) makemap: $(OBJECTS) - g++ $(CFLAGS) -o $@ $(OBJECTS) + $(CC) $(LDFLAGS) -o $@ $(OBJECTS) + +include $(DEPENDS) + +%.o: %.cpp + $(CC) -c $(CFLAGS) -o $@ $< -main.o: debug.h init.h -init.o: parser/lexical.h parser/parser.h debug.h init.h -parser/lexical.o: parser/parser.h debug.h +%.d: %.cpp + $(CC) $(CFLAGS) -MM -MT $(subst .cpp,.o,$<) $< | sed 's%\(^.*\):%\1 $@ :%g' >$@ + +parser/grammatical.d: parser/lexical.h parser/lexical.h parser/lexical.cpp: parser/lexical.l cd parser && flex lexical.l +parser/grammatical.h parser/grammatical.cpp: parser/grammatical.y + cd parser && bison grammatical.y + clean: - rm -f *.o parser/*.o parser/lexical.{cpp,h} + rm -f *.o *.d parser/*.{o,d} parser/{lexical,grammatical}.{cpp,h} distclean: clean - rm -f makemap \ No newline at end of file + rm -f makemap \ No newline at end of file diff --git a/src/debug.h b/src/debug.h index 3bfdf3c..b5f6896 100644 --- a/src/debug.h +++ b/src/debug.h @@ -2,7 +2,7 @@ #define DEBUG_H #include -enum debug_level {INTERNALREQUEST,DEBUG,INFO,WARNING,ERROR}; +enum debug_level {INTERNALREQUEST,MOREDEBUG,DEBUG,INFO,WARNING,ERROR}; std::ostream& COUT(debug_level dl); diff --git a/src/globals.cpp b/src/globals.cpp new file mode 100644 index 0000000..c5031db --- /dev/null +++ b/src/globals.cpp @@ -0,0 +1,24 @@ +#include "globals.h" + +// Variables definitions +std::map G_vars; + +// Functions addresses +std::multimap G_funcs; + +// List of objects to save +std::list G_tosave; + +// List of objects to print +std::list G_toprint; + +void ClearGlobals() +{ + for(auto& it:G_vars) delete it.second; + for(auto& it:G_tosave) delete it; + for(auto& it:G_toprint) delete it; + + G_vars.clear(); + G_tosave.clear(); + G_toprint.clear(); +} diff --git a/src/globals.h b/src/globals.h new file mode 100644 index 0000000..c468ec9 --- /dev/null +++ b/src/globals.h @@ -0,0 +1,22 @@ +#ifndef GLOBALS_H +#define GLOBALS_H +#include +#include +#include +#include "object.h" + +// Variables definitions +extern std::map G_vars; + +// Functions addresses +typedef ObjectBase* (*Func)(ObjectList*); +extern std::multimap G_funcs; + +// List of objects to save +extern std::list G_tosave; + +// List of objects to print +extern std::list G_toprint; + +void ClearGlobals(); +#endif diff --git a/src/init.cpp b/src/init.cpp index d5879c9..11d57e3 100644 --- a/src/init.cpp +++ b/src/init.cpp @@ -1,7 +1,10 @@ +#include #include "init.h" #include "debug.h" -#include "parser/lexical.h" +#include "object.h" #include "parser/parser.h" +#include "parser/grammatical.h" +#include "parser/lexical.h" int ParseConfigFile(char* config) { @@ -18,9 +21,13 @@ int ParseConfigFile(char* config) extra.filename=config; extra.inclevel=0; extra.maxinclevel=10; + extra.curline=1; + extra.curpos=extra.curoffset=0; conflex_init_extra(&extra,&scanner); confset_in(conffd,scanner); - conflex(scanner); +// {YYSTYPE qqq; while(conflex(&qqq,scanner)>0);} + confparse(scanner); conflex_destroy(scanner); fclose(conffd); + return 0; } diff --git a/src/init.h b/src/init.h index 3a86955..539ba37 100644 --- a/src/init.h +++ b/src/init.h @@ -1,5 +1,9 @@ #ifndef INIT_H #define INIT_H +#ifndef YY_TYPEDEF_YY_SCANNER_T +#define YY_TYPEDEF_YY_SCANNER_T +typedef void* yyscan_t; +#endif int ParseConfigFile(char* config); diff --git a/src/main.cpp b/src/main.cpp index 8b42e67..ea540b1 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,13 +1,18 @@ #include "debug.h" #include "init.h" +#include "globals.h" int main(int argc, char** argv) { if(argc!=2) return 1; - SetDebugLevel(DEBUG); + SetDebugLevel(INFO); ParseConfigFile(argv[1]); + COUT(INFO)< std::string ObjectSimple::type="bool"; +template<> std::string ObjectSimple::type="integer"; +template<> std::string ObjectSimple::type="real"; +template<> std::string ObjectSimple::type="string"; diff --git a/src/object.h b/src/object.h new file mode 100644 index 0000000..7ce27ee --- /dev/null +++ b/src/object.h @@ -0,0 +1,227 @@ +#ifndef OBJECT_H +#define OBJECT_H +#include +#include +#include +#include +#include +#include +#include +#include +#include "debug.h" + +// Check if pointer is ObjectBase derivative class +#define IS_OTYPE(quo,equ) (std::type_index(typeid(*quo))==std::type_index(typeid(equ))) + +// Base class for all objects +class ObjectBase +{ +protected: +// No save by default +virtual const int8_t* Blob(size_t* size) const { *size=0; return 0; } +virtual void DeallocBlob(const void* ptr) const {}; +public: + + ObjectBase() = default; + ObjectBase(const ObjectBase&) = delete; + virtual ~ObjectBase(){} + virtual std::string Type() const=0; + virtual bool Print() const=0; + bool Save(const char* fname) const + { + size_t size,offset=0,wr; + const int8_t* dptr; + FILE* fd; + int serrno; + + fd=fopen(fname,"w"); + serrno=errno; + if(0==fd) + { + COUT(ERROR)<<"Can't open file "< +class ObjectSimple: public ObjectBase +{ +private: + T val; + static std::string type; + + const int8_t* Blob(size_t* size) const override + { + *size=sizeof(T); + return reinterpret_cast(&val); + } + +public: + ObjectSimple(T t):val(t) {} + ObjectSimple(const T* t):val(*t) {} + ~ObjectSimple() {} + bool Print() const override + { + COUT(INFO)<<"Object type: "< ObjectBool; +typedef ObjectSimple ObjectInt; +typedef ObjectSimple ObjectReal; +typedef ObjectSimple ObjectString; + +template<> +inline const int8_t* ObjectSimple::Blob(size_t* size) const +{ + *size=val.length(); + return reinterpret_cast(val.c_str()); +} + +// Class for name-value pair +class ObjectPair: public ObjectBase +{ +private: + std::string name; + ObjectBase* val; + +public: + ObjectPair():val(0) {} + ObjectPair(const std::string& n, ObjectBase* v):name(n),val(v) {} + ObjectPair(const std::string* n, ObjectBase* v):name(*n),val(v) {} + ~ObjectPair() + { + if(val!=0) delete val; + val=0; + } + bool Exist() const {return 0==val;} + + bool Print() const override + { + if(!Exist()) return false; + COUT(INFO)<<"Object type: "<Type()<::size_type Size() const {return vals.size();} + std::string Type() const override {return "list";} + ObjectList* PushFront(ObjectBase* p) {vals.push_front(p); return this;} + ObjectList* PushBack(ObjectBase* p) {vals.push_back(p); return this;} + std::string Dump() const override + { + std::string s("("); + for(auto& i: vals) s+=i->Dump()+", "; + if(vals.size()!=0) s.resize(s.length()-2); + return s+")"; + } +}; + +// Class for storing identifiers +class OId: public ObjectBase +{ + std::string name; +public: + OId(const std::string* t):name(*t) {} + ~OId() {} + bool Print() const override {return false;} + std::string Type() const override {return "IDENT";} + std::string Name() const {return name;} + void SetName(std::string s) {name=s;} + std::string Dump() const override {return Name();}; +}; + +// Class for storing functions +class OFunc: public ObjectBase +{ + std::string name; + ObjectList* args; +public: + OFunc(const std::string* t, ObjectBase* p):name(*t) + { + if(IS_OTYPE(p,ObjectList)) args=dynamic_cast(p); + else args=new ObjectList(p); + } + OFunc(const char* t, ObjectBase* p):name(t) + { + if(IS_OTYPE(p,ObjectList)) args=dynamic_cast(p); + else args=new ObjectList(p); + } + ~OFunc() {if(args!=0) delete args;} + bool Print() const override {return false;} + std::string Type() const override {return "FUNC";} + std::string Name() const {return name;} + void SetName(std::string s) {name=s;} + std::string Dump() const override {return Name()+args->Dump();}; +}; + +#endif diff --git a/src/parser/grammatical.y b/src/parser/grammatical.y new file mode 100644 index 0000000..adfe095 --- /dev/null +++ b/src/parser/grammatical.y @@ -0,0 +1,107 @@ +%name-prefix "conf" +%language "c" +%output "grammatical.cpp" +%defines "grammatical.h" +%param {yyscan_t scanner} +%define api.pure full +%define parse.lac full +%locations +//%no-lines + +%{ +#include +#include +#include "parser.h" +#include "../debug.h" +#include "../object.h" +#include "../globals.h" +#ifndef YY_TYPEDEF_YY_SCANNER_T +#define YY_TYPEDEF_YY_SCANNER_T +typedef void* yyscan_t; +#endif +#include "yyloc.h" +#include "grammatical.h" +inline void conferror(YYLTYPE *locp, yyscan_t sc, const char * str) +{ + COUT(ERROR)< REAL +%token BOOL +%token INTEGER +%token NAME +%token IDENTIFIER +%token STRING + +%left '-' '+' +%left '*' '/' +%precedence UNARY /* negation--unary minus */ +%right '^' /* exponentiation */ + +%type expression +%type call +%type object +%type pair +%type list +%% +input: + %empty {COUT(DEBUG)<<"Empty input\n";} + | input line {COUT(DEBUG)<<" input line\n";} + ; + +line: + NAME ASSIGN object ENDL {COUT(DEBUG)<<" NAME ASSIGN object ENDL\n"; if(G_vars.count(*$1)!=0) delete G_vars[*$1]; G_vars[*$1]=$3; delete $1;} + | NAME ASSIGN list ENDL {COUT(DEBUG)<<" NAME ASSIGN list ENDL\n"; if(G_vars.count(*$1)!=0) delete G_vars[*$1]; G_vars[*$1]=$3; delete $1;} + | NAME OBRACE list CBRACE ENDL {COUT(DEBUG)<<" NAME OBRACE list CBRACE ENDL\n"; transform($1->begin(),$1->end(),$1->begin(),::tolower); if(*$1=="save") G_tosave.push_back($3); if(*$1=="print") G_toprint.push_back($3); delete $1;} + | NAME OBRACE object CBRACE ENDL {COUT(DEBUG)<<" NAME OBRACE object CBRACE ENDL\n"; transform($1->begin(),$1->end(),$1->begin(),::tolower); if(*$1=="save") G_tosave.push_back($3); if(*$1=="print") G_toprint.push_back($3); delete $1;} + ; + +list: + object object {COUT(DEBUG)<<" object object\n"; $$=(new ObjectList($1))->PushBack($2);} + | list object {COUT(DEBUG)<<" list object\n"; dynamic_cast($1)->PushBack($2); $$=$1;} + ; + +pair: + IDENTIFIER ASSIGN object {COUT(DEBUG)<<" IDENTIFIER ASSIGN object\n"; $$=new ObjectPair($1,$3);} + ; + +object: + STRING {COUT(DEBUG)<<" STRING\n"; $$=new ObjectString($1);} + | BOOL {COUT(DEBUG)<<" BOOL\n"; $$=new ObjectBool($1);} + | OBRACE list CBRACE {COUT(DEBUG)<<" OBRACE list CBRACE\n"; $$=$2;} + | expression {COUT(DEBUG)<<" expression\n"; $$=$1;} + | pair {COUT(DEBUG)<<" pair\n"; $$=$1;} + | OBRACE object CBRACE {COUT(DEBUG)<<" OBRACE object CBRACE\n"; $$=$2;} + ; + +call: + IDENTIFIER OBRACE object CBRACE {COUT(DEBUG)<<" IDENTIFIER OBRACE object CBRACE\n"; $$=new OFunc($1,$3); delete $1;} + | IDENTIFIER OBRACE list CBRACE {COUT(DEBUG)<<" IDENTIFIER OBRACE list CBRACE\n"; $$=new OFunc($1,$3); delete $1;} + +expression: + IDENTIFIER {COUT(DEBUG)<<" IDENTIFIER\n"; $$=new OId($1); delete $1;} + | REAL {COUT(DEBUG)<<" REAL\n"; $$=new ObjectReal($1);} + | INTEGER {COUT(DEBUG)<<" INTEGER\n"; $$=new ObjectInt($1);} + | expression '-' expression {COUT(DEBUG)<<" -\n"; $$=new OFunc("SUB",(new ObjectList($1))->PushBack($3));} + | expression '+' expression {COUT(DEBUG)<<" +\n"; $$=new OFunc("ADD",(new ObjectList($1))->PushBack($3));} + | expression '/' expression {COUT(DEBUG)<<" /\n"; $$=new OFunc("DIV",(new ObjectList($1))->PushBack($3));} + | expression '*' expression {COUT(DEBUG)<<" *\n"; $$=new OFunc("MUL",(new ObjectList($1))->PushBack($3));} + | expression '^' expression {COUT(DEBUG)<<" ^\n"; $$=new OFunc("POW",(new ObjectList($1))->PushBack($3));} + | '-' expression %prec UNARY {COUT(DEBUG)<<" unary -\n"; $$=new OFunc("NEG",$2);} + | '+' expression %prec UNARY {COUT(DEBUG)<<" unary +\n"; $$=new OFunc("POS",$2);} + | OBRACE expression CBRACE {COUT(DEBUG)<<" OBRACE expression CBRACE\n"; $$=$2;} + | call {COUT(DEBUG)<<" call\n"; $$=$1;} + ; diff --git a/src/parser/lexical.l b/src/parser/lexical.l index 624aad0..fe85317 100644 --- a/src/parser/lexical.l +++ b/src/parser/lexical.l @@ -6,26 +6,36 @@ %option header-file="lexical.h" %option outfile="lexical.cpp" %option prefix="conf" -%option extra-type="const struct lexical_extra*" -%x STRING +%option extra-type="struct lexical_extra*" +%option bison-bridge +%option bison-locations +%x PSTRING %x PARSE %{ +#if __cplusplus > 199711L +#define register // Deprecated in C++11. +#endif // #if __cplusplus > 199711L +#include #include #include "../debug.h" +#include "../object.h" #include "parser.h" -int nc; +#include "grammatical.h" +static std::string str; %} %% -include\(\".+\"\); { +\n yyextra->curline++; yyextra->curpos=0; yyextra->curoffset++; REJECT; +. yyextra->curpos++; yyextra->curoffset++; REJECT; +include\(\".+\"\); {/* if(yyextra->inclevel>=yyextra->maxinclevel) { COUT(ERROR)<<"Max include level reached in file "<filename<<" at line "<inclevel+1; @@ -36,23 +46,25 @@ include\(\".+\"\); { yylex_destroy(scanner); fclose(fd); - } -[a-zA-Z][a-zA-Z0-9_]* printf("NAME\n"); BEGIN(PARSE); -[+-]?[0-9]+ printf("INTEGER\n"); -[+-]?[0-9]+(\.[0-9]*)?([eE][+-][0-9]+)? printf("REAL\n"); -\( printf("OBRACE\n"); -\) printf("CBRACE\n"); -\; printf("ENDL\n"); BEGIN(0); -= printf("ASSIGN\n"); -[a-zA-Z][a-zA-Z0-9_]* printf("IDENTIFIER\n"); + */} +[a-zA-Z][a-zA-Z0-9_]* COUT(MOREDEBUG)<<"NAME("<str=new std::string(yytext); return NAME; +[+\-*/^] COUT(MOREDEBUG)<<" OPERATION("<[0-9]+ COUT(MOREDEBUG)<<" INTEGER("<i=atoll(yytext); return INTEGER; +[0-9]+(\.[0-9]*)?([eE][+-][0-9]+)? COUT(MOREDEBUG)<<" REAL("<r=atof(yytext); return REAL; +[TF] COUT(MOREDEBUG)<<" BOOL("<b=(yytext[0]=='T')?true:false; return BOOL; +\( COUT(MOREDEBUG)<<" OBRACE()"; return OBRACE; +\) COUT(MOREDEBUG)<<" CBRACE()"; return CBRACE; +\; COUT(MOREDEBUG)<<" ENDL()"<= COUT(MOREDEBUG)<<" ASSIGN()"; return ASSIGN; +([a-zA-Z][a-zA-Z0-9_]*\.)*[a-zA-Z][a-zA-Z0-9_]* COUT(MOREDEBUG)<<" IDENTIFIER("<str=new std::string(yytext); return IDENTIFIER; [ ,\n\t] \#.* -\" BEGIN(STRING); nc=0; -. COUT(ERROR)<<"Unknown symbol "<filename<<" at line "<\\\\ nc++; -\\\" nc++; -\" BEGIN(PARSE); printf("STRING%d\n",nc); -. nc++; -<> COUT(ERROR)<<"Unclosed quote!"<> yyterminate(); +\" BEGIN(PSTRING); str.erase(); +. COUT(ERROR)<<"Unknown symbol "<filename<<" at line "<\\\\ str+='\\'; +\\\" str+='\"'; +\" BEGIN(PARSE); COUT(MOREDEBUG)<<" STRING("<str=&str; return STRING; +. str+=yytext[0]; +<> COUT(ERROR)<<"Unclosed quote!"<> str.erase(); yyterminate(); return 0; %% diff --git a/src/parser/parser.h b/src/parser/parser.h index b5960ab..088032d 100644 --- a/src/parser/parser.h +++ b/src/parser/parser.h @@ -5,5 +5,7 @@ struct lexical_extra { const char* filename; unsigned int inclevel,maxinclevel; + unsigned int curline,curpos,curoffset; }; + #endif diff --git a/src/parser/yyloc.h b/src/parser/yyloc.h new file mode 100644 index 0000000..fc61489 --- /dev/null +++ b/src/parser/yyloc.h @@ -0,0 +1,19 @@ +#ifndef PARSER_YYLOC_H +#define PARSER_YYLOC_H +#include + +#if ! defined YYLTYPE && ! defined YYLTYPE_IS_DECLARED +typedef struct YYLTYPE +{ + int first_line; + int first_column; + int last_line; + int last_column; + std::string filename; + std::string token_type; + std::string token_value; +} YYLTYPE; +#define YYLTYPE_IS_DECLARED 1 +#endif + +#endif