diff --git a/CMakeLists.txt b/CMakeLists.txt index ed593fd..3de8890 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,4 +3,12 @@ project(syntaxParser) set(CMAKE_CXX_STANDARD 14) -add_executable(syntaxParser main.cpp) \ No newline at end of file +set(CMAKE_CXX_FLAGS_RELEASE "/Ox") + +include_directories(./include) + +aux_source_directory(src SRC_FILES) + +add_executable(LR1Generator LR1Generator.cpp ${SRC_FILES}) + +add_executable(syntaxParser LR1Generator.cpp ${SRC_FILES}) \ No newline at end of file diff --git a/LR1Generator.cpp b/LR1Generator.cpp new file mode 100644 index 0000000..efa5436 --- /dev/null +++ b/LR1Generator.cpp @@ -0,0 +1,57 @@ +#include +#include + +#include +#include + +#include + +using std::vector; +using std::wstring; + +using std::wcout; +using std::endl; + + +#include +#include + + +int main() { + clock_t start,end;//定义clock_t变量 + start = clock(); //开始时间 + + const GrammarResourcePool *pool; + + const AnalyseTableGenerator *atg; + + + LR1Generator generator; + + generator.getProductions(); + + generator.run(); + + generator.output(pool, atg); + + //输出时间 + end = clock(); //结束时间 + double times = double(end-start)/CLOCKS_PER_SEC; + wcout<<"LR1Generator Run time = "<< times <<"s MicroSeconds" << " = " << times * 1000 <<"ms" << endl; + + start = clock(); //开始时间 + + SyntaxParser syntaxParser(pool, atg); + + syntaxParser.getToken(); + + syntaxParser.parse(); + + //输出时间 + end = clock(); //结束时间 + times = double(end-start)/CLOCKS_PER_SEC; + wcout<<"SyntaxParser Run time = "< "struct" "ID" "{" member_list "}" "EOF" -member_list-> type_spec declarators ";" +@struct_type -> "struct" "ID" "{" member_list "}" more_struct_type +more_struct_type -> "EOF" +more_struct_type -> 蔚 + +member_list -> type_spec declarators ";" member_list +member_list -> 蔚 + type_spec -> base_type_spec -type_spec -> struct_type +type_spec -> @struct_type base_type_spec -> floating_pt_type base_type_spec -> integer_type base_type_spec -> "char" @@ -23,13 +28,13 @@ signed_int -> "int8" unsigned_int -> "unsigned" "short" unsigned_int -> "unsigned" "long" unsigned_int -> "unsigned" "long" "long" -unsigned_int -> "unit16" -unsigned_int -> "unit32" -unsigned_int -> "unit64" -unsigned_int -> "unit8" +unsigned_int -> "uint16" +unsigned_int -> "uint32" +unsigned_int -> "uint64" +unsigned_int -> "uint8" declarators -> declarator more_declarators -more_declarators -> "," declarator +more_declarators -> "," declarator more_declarators more_declarators -> 蔚 declarator -> "ID" more_declarator @@ -37,32 +42,48 @@ more_declarator -> exp_list more_declarator -> 蔚 exp_list -> "[" or_expr more_or_expr "]" -more_or_expr -> "," or_expr +more_or_expr -> "," or_expr more_or_expr more_or_expr -> 蔚 + +or_expr -> xor_expr more_xor_expr +more_xor_expr -> "|" xor_expr more_xor_expr +more_xor_expr -> 蔚 + xor_expr -> and_expr more_and_expr -more_and_expr -> "^" and_expr +more_and_expr -> "^" and_expr more_and_expr more_and_expr -> 蔚 + and_expr -> shift_expr more_shift_expr -more_shift_expr -> "&" shift_expr +more_shift_expr -> "&" shift_expr more_shift_expr more_shift_expr -> 蔚 + shift_expr -> add_expr more_add_expr -more_add_expr -> shift_sign add_expr +more_add_expr -> shift_sign add_expr more_add_expr shift_sign -> ">>" shift_sign -> "<<" +more_add_expr -> 蔚 + add_expr -> multi_expr more_multi_expr -more_multi_expr -> multi_sign multi_expr +more_multi_expr -> multi_sign multi_expr more_multi_expr multi_sign -> "+" multi_sign -> "-" +more_multi_expr -> 蔚 + multi_expr -> unary_expr more_unary_expr -more_unary_expr -> unary_sign unary_expr -more_unary_expr -> 蔚 +more_unary_expr -> unary_sign unary_expr more_unary_expr unary_sign -> "*" unary_sign -> "/" unary_sign -> "%" +more_unary_expr -> 蔚 + unary_expr -> unary_sign_2 unary_declare unary_sign_2 -> "-" unary_sign_2 -> "+" unary_sign_2 -> "~" +unary_sign_2 -> 蔚 unary_declare -> "INTEGER" unary_declare -> "STRING" -unary_declare -> "BOOLEAN" \ No newline at end of file +unary_declare -> BOOLEAN + +BOOLEAN -> "TRUE" +BOOLEAN -> "FALSE" \ No newline at end of file diff --git a/cmake-build-debug/tokenOut.txt b/cmake-build-debug/tokenOut.txt new file mode 100644 index 0000000..a2e1df2 --- /dev/null +++ b/cmake-build-debug/tokenOut.txt @@ -0,0 +1,36 @@ +1 STRUCT(struct) ID(test) OPENING_BRACE({) +2 FLOAT(float) ID(a1) SEMICOLON(;) +3 DOUBLE(double) ID(a2) COMMA(,) ID(a3) COMMA(,) ID(a4) SEMICOLON(;) +4 LONG(long) DOUBLE(double) ID(a5) SEMICOLON(;) +5 STRUCT(struct) ID(warp_int) OPENING_BRACE({) +6 INT8(int8) ID(i1) SEMICOLON(;) +7 INT16(int16) ID(i2) SEMICOLON(;) +8 INT32(int32) ID(i3) COMMA(,) ID(i4) SEMICOLON(;) +9 INT64(int64) ID(i5) SEMICOLON(;) +10 SHORT(short) ID(i6) SEMICOLON(;) +11 LONG(long) ID(i7) SEMICOLON(;) +12 LONG(long) LONG(long) ID(i8) SEMICOLON(;) +13 CLOSING_BRACE(}) ID(int1) COMMA(,) ID(int2) SEMICOLON(;) +14 STRUCT(struct) ID(warp_signInt) OPENING_BRACE({) +15 UINT8(uint8) ID(s1) SEMICOLON(;) +16 UINT16(uint16) ID(s2) SEMICOLON(;) +17 UINT32(uint32) ID(s3) COMMA(,) ID(s4) SEMICOLON(;) +18 UINT64(uint64) ID(s5) SEMICOLON(;) +19 UNSIGNED(unsigned) SHORT(short) ID(s6) COMMA(,) ID(s7) SEMICOLON(;) +20 UNSIGNED(unsigned) LONG(long) ID(s8) SEMICOLON(;) +21 UNSIGNED(unsigned) LONG(long) LONG(long) ID(s9) SEMICOLON(;) +22 CLOSING_BRACE(}) ID(sign1) COMMA(,) ID(SIGN2) SEMICOLON(;) +23 CHAR(char) ID(a_6) LEFT_BRACKET([) STRING("compile") PLUS(+) STRING("studying") COMMA(,) STRING("\40") COMMA(,) STRING("abs\b\t\n\f\r\"\\abs") RIGHT_BRACKET(]) SEMICOLON(;) +24 BOOLEAN(boolean) ID(a_bool_7) LEFT_BRACKET([) INTEGER(10) INSERT(^) INTEGER(2) COMMA(,) INTEGER(1) AND(&) INTEGER(2) AND(&) INTEGER(3) COMMA(,) TRUE(TRUE) DELIMITER(|) FALSE(FALSE) COMMA(,) TILDE(~) FALSE(FALSE) DELIMITER(|) TILDE(~) TRUE(TRUE) RIGHT_BRACKET(]) SEMICOLON(;) +25 LONG(long) ID(a8) LEFT_BRACKET([) INTEGER(1024) RIGHT_SHIFT(>>) INTEGER(10) COMMA(,) INTEGER(0) LEFT_SHIFT(<<) INTEGER(10) COMMA(,) INTEGER(100) MULT(*) INTEGER(2) SLASH(/) INTEGER(10) PERCENT(%) INTEGER(2) COMMA(,) INTEGER(100) PLUS(+) INTEGER(21) SUB(-) INTEGER(19) RIGHT_BRACKET(]) SEMICOLON(;) +26 BOOLEAN(boolean) ID(a9) LEFT_BRACKET([) INTEGER(10) INSERT(^) INTEGER(2) AND(&) INTEGER(3) DELIMITER(|) SUB(-) INTEGER(1) RIGHT_SHIFT(>>) INTEGER(10) AND(&) INTEGER(100) LEFT_SHIFT(<<) SUB(-) INTEGER(10) SUB(-) INTEGER(10) PLUS(+) INTEGER(100) MULT(*) INTEGER(2) SLASH(/) INTEGER(10) PERCENT(%) INTEGER(2) RIGHT_BRACKET(]) SEMICOLON(;) +27 STRUCT(struct) ID(warp_1) OPENING_BRACE({) +28 FLOAT(float) ID(w1) SEMICOLON(;) +29 LONG(long) ID(w2) SEMICOLON(;) +30 STRUCT(struct) ID(warp_2) OPENING_BRACE({) +31 BOOLEAN(boolean) ID(w3) LEFT_BRACKET([) INTEGER(111) AND(&) INTEGER(2) RIGHT_BRACKET(]) SEMICOLON(;) +32 CHAR(char) ID(w4) LEFT_BRACKET([) STRING("\40\b\t\n\f\r\"\\\40") RIGHT_BRACKET(]) SEMICOLON(;) +33 CLOSING_BRACE(}) ID(w5) COMMA(,) ID(w6) SEMICOLON(;) +34 CLOSING_BRACE(}) ID(w7) COMMA(,) ID(w8) SEMICOLON(;) +35 CLOSING_BRACE(}) +36 EOF() \ No newline at end of file diff --git a/include/AnalyseTableGenerator.h b/include/AnalyseTableGenerator.h new file mode 100644 index 0000000..88aac18 --- /dev/null +++ b/include/AnalyseTableGenerator.h @@ -0,0 +1,73 @@ +// +// Created by Administrator on 2021/4/30. +// + +#ifndef SYNTAXPARSER_ANALYSETABLEGENERATOR_H +#define SYNTAXPARSER_ANALYSETABLEGENERATOR_H + +#include +#include + +#include +#include + + +using Action = enum { + MOVE, REDUCE, ACC, STEP_GOTO +}; + + +class AnalyseTableGenerator { + + struct Step { + + const Action action; + union Target{ + int index; + const Production *production; + } target{}; + + Step(Action action, int index) : action(action), target(Target{index}){} + Step(Action action, const Production *p_pdt) : action(action) { + target.production = p_pdt; + } + }; + + std::map ACTION; + + std::map GOTO; + + const ItemCollectionManager *icm; + + const GrammarResourcePool *pool; + + template + inline void hash_combine(std::size_t& seed, const T& v) const + { + std::hash hasher; + seed ^= hasher(v) + 0x9e3779b9 + (seed<<6) + (seed>>2); + } + + void add_action(int index, int terminator_symbol, Action action, const Production *target_pdt); + + void add_action(int index, int terminator_symbol, Action action, int target_index); + + void add_goto(int index, int non_terminator_symbol, int target_index); + +public: + + explicit AnalyseTableGenerator(const GrammarResourcePool *p_pool, const ItemCollectionManager *p_icm) + :pool(p_pool) , icm(p_icm) {} + + void generate(); + + const Step *findActionStep(int index, int terminator_symbol) const; + + const Step *findGotoStep(int index, int non_terminator_symbol) const; + + void print() const; + +}; + + +#endif //SYNTAXPARSER_ANALYSETABLEGENERATOR_H diff --git a/include/GrammarResourcePool.h b/include/GrammarResourcePool.h new file mode 100644 index 0000000..738d415 --- /dev/null +++ b/include/GrammarResourcePool.h @@ -0,0 +1,93 @@ +// +// Created by Administrator on 2021/4/30. +// + +#ifndef SYNTAXPARSER_GRAMMARRESOURCEPOOL_H +#define SYNTAXPARSER_GRAMMARRESOURCEPOOL_H + + +#include +#include +#include + +#include +#include + +// 语法资源池 +class GrammarResourcePool { + + int pdt_index = 0; + + // 符号表 + SymbolTable symbolTable; + + // 产生式 + std::vector productions; + + // FIRST结果存储表 + std::map *> firsts; + + // FOLLOW结果存储表 + std::map *> follows; + + // 去掉首尾空格 + static std::wstring& trim(std::wstring &&str) { + if (str.empty()) { + return str; + } + + str.erase(0,str.find_first_not_of(' ')); + str.erase(str.find_last_not_of(' ') + 1); + return str; + } + +public: + + const std::set *FIRST(const std::vector &symbols, int start_index); + + const std::set* FIRST(int symbol); + + const std::set *FOLLOW(int symbol); + + void FOLLOW(); + + std::set* get_follow_set(int symbol); + + + void print_symbols(const std::set &symbols_index); + + void parse_production_string_line(const std::wstring &temp_line); + + [[nodiscard]] const std::vector &get_productions() const { + return productions; + } + + [[nodiscard]] const Symbol *getSymbol(int symbol_index) const { + return symbolTable.getSymbol(symbol_index); + } + + [[nodiscard]] const Symbol *getStartSymbol() const { + return symbolTable.getStartSymbol(); + } + + int addSymbol(const std::wstring &name, bool terminator) { + return symbolTable.addSymbol(name, terminator); + } + + const Production *addProduction(int left, std::initializer_list right); + + [[nodiscard]] const std::vector &getAllSymbols() const { + return symbolTable.getAllSymbols(); + } + + void modifySymbol(int index, const std::wstring &name, bool terminator, bool start) { + symbolTable.modifySymbol(index, name, terminator, start); + } + + [[nodiscard]] int getSymbolIndex(const std::wstring &name) const { + return symbolTable.getSymbolIndex(name); + } +}; + + +#endif //SYNTAXPARSER_GRAMMARRESOURCEPOOL_H diff --git a/include/Item.h b/include/Item.h new file mode 100644 index 0000000..e327378 --- /dev/null +++ b/include/Item.h @@ -0,0 +1,53 @@ +// +// Created by Administrator on 2021/4/30. +// + +#ifndef SYNTAXPARSER_ITEM_H +#define SYNTAXPARSER_ITEM_H + +#include + +#include + +// 项 +class Item{ + // 对应的产生式 + const Production* const production; + + // 点的位置 + int dot_index = 0; + + const int terminator = 0; + +public: + + const bool generated = false; + + explicit Item(const Production *p_pdt, int m_terminator, bool m_generated = false) + : production(p_pdt), terminator(m_terminator), generated(m_generated) {} + + void set_dot_index(int m_dot_index); + + [[nodiscard]] int get_dot_index() const { + return dot_index; + } + + [[nodiscard]] size_t get_right_size() { + return production->right.size(); + } + + int get_dot_next_symbol() const; + + int get_dot_next_i_symbol(int i) const; + + [[nodiscard]] int get_terminator() const { + return terminator; + } + + [[nodiscard]] const Production *get_production() const { + return production; + } +}; + + +#endif //SYNTAXPARSER_ITEM_H diff --git a/include/ItemCollection.h b/include/ItemCollection.h new file mode 100644 index 0000000..0cd2015 --- /dev/null +++ b/include/ItemCollection.h @@ -0,0 +1,64 @@ +// +// Created by Administrator on 2021/4/30. +// + +#ifndef SYNTAXPARSER_ITEMCOLLECTION_H +#define SYNTAXPARSER_ITEMCOLLECTION_H + +#include +#include +#include +#include + + +#include +#include + +class ItemCollectionManager; + +class ItemCollection{ + + int index = 0; + + std::map items; + + std::vector cache; + + GrammarResourcePool *pool; + + friend ItemCollectionManager; + + template + inline void hash_combine(std::size_t& seed, const T& v) const + { + std::hash hasher; + seed ^= hasher(v) + 0x9e3779b9 + (seed<<6) + (seed>>2); + } + + static bool compare_item_ptr(const Item* lhs, const Item* rhs); + +public: + + explicit ItemCollection(GrammarResourcePool *pool) : pool(pool) { + + } + + [[nodiscard]] const std::vector &getItems() const { + return cache; + } + + [[nodiscard]] int getIndex() const { + return index; + } + + + bool addItem(const Production *p_pdt, int dot_index, int terminator, bool generated = false); + + void CLOSURE(); + + void print(std::wofstream &output) const; + + [[nodiscard]] size_t getHash() const; +}; + +#endif //SYNTAXPARSER_ITEMCOLLECTION_H diff --git a/include/ItemCollectionManager.h b/include/ItemCollectionManager.h new file mode 100644 index 0000000..23accd2 --- /dev/null +++ b/include/ItemCollectionManager.h @@ -0,0 +1,73 @@ +// +// Created by Administrator on 2021/4/30. +// + +#ifndef SYNTAXPARSER_ITEMCOLLECTIONMANAGER_H +#define SYNTAXPARSER_ITEMCOLLECTIONMANAGER_H + +#include +#include + +#include + + +class ItemCollectionManager{ + + std::wofstream output; + + int index = 0; + + std::map ic_map; + + std::map ic_content_map; + + std::vector ics; + + GrammarResourcePool *pool; + + const Production *start_pdt{}; + + template + inline void hash_combine(std::size_t& seed, const T& v) const + { + std::hash hasher; + seed ^= hasher(v) + 0x9e3779b9 + (seed<<6) + (seed>>2); + } + +public: + + explicit ItemCollectionManager(GrammarResourcePool *resource_pool) : + pool(resource_pool), + output("LR1Automata.txt", std::ios::binary) { + + auto* codeCvtToUTF8= new std::codecvt_utf8; + output.imbue(std::locale(output.getloc(), codeCvtToUTF8)); + + } + + ~ItemCollectionManager(){ + output.close(); + } + + void buildItems(); + + [[nodiscard]] const Production *getStartProduction() const { + return start_pdt; + } + + [[nodiscard]] const std::vector &getItemCollections() const{ + return ics; + } + + ItemCollection *getItemCollectionByHash(size_t hash); + + bool addItemCollection(int idx, int symbol, ItemCollection *p_ic); + + [[nodiscard]] const ItemCollection* getGOTO(int idx, int symbol) const; + + bool GOTO(const ItemCollection *p_ic, int symbol); + +}; + + +#endif //SYNTAXPARSER_ITEMCOLLECTIONMANAGER_H diff --git a/include/LR1Generator.h b/include/LR1Generator.h new file mode 100644 index 0000000..c6fefe8 --- /dev/null +++ b/include/LR1Generator.h @@ -0,0 +1,60 @@ +// +// Created by Administrator on 2021/4/30. +// + +#ifndef SYNTAXPARSER_LR1GENERATOR_H +#define SYNTAXPARSER_LR1GENERATOR_H + +#include + +#include +#include +#include + + +class LR1Generator{ + + // 文件输入 + std::wifstream input; + + GrammarResourcePool *pool; + + ItemCollectionManager *icm; + + AnalyseTableGenerator *atg; + +public: + + LR1Generator(): input("syntaxInput.txt", std::ios::binary), + pool(new GrammarResourcePool()), + icm(new ItemCollectionManager(pool)), + atg(new AnalyseTableGenerator(pool, icm)){ + + auto* codeCvtToUTF8= new std::codecvt_utf8; + + input.imbue(std::locale(input.getloc(), codeCvtToUTF8)); + } + + ~LR1Generator() { + input.close(); + } + + void run() { + pool->FOLLOW(); + icm->buildItems(); + atg->generate(); + atg->print(); + } + + // 得到所有的产生式 + void getProductions(); + + void output(const GrammarResourcePool *&m_pool, const AnalyseTableGenerator *&m_atg) { + m_pool = this->pool; + m_atg = this->atg; + } + +}; + + +#endif //SYNTAXPARSER_LR1GENERATOR_H diff --git a/include/Production.h b/include/Production.h new file mode 100644 index 0000000..d832b7d --- /dev/null +++ b/include/Production.h @@ -0,0 +1,22 @@ +// +// Created by Administrator on 2021/4/30. +// + +#ifndef SYNTAXPARSER_PRODUCTION_H +#define SYNTAXPARSER_PRODUCTION_H + +#include + +// 产生式 +struct Production { + + const int index; + const int left; + const std::vector right; + + Production(int index, int left, std::vector right): index(index), left(left), right(std::move(right)) {} + +}; + + +#endif //SYNTAXPARSER_PRODUCTION_H diff --git a/include/Symbol.h b/include/Symbol.h new file mode 100644 index 0000000..7909e5b --- /dev/null +++ b/include/Symbol.h @@ -0,0 +1,27 @@ +// +// Created by Administrator on 2021/4/30. +// + +#ifndef SYNTAXPARSER_SYMBOL_H +#define SYNTAXPARSER_SYMBOL_H + +#include + +struct Symbol { + + const int index; + std::wstring name; + bool terminator; + bool start; + + Symbol(int index, std::wstring name, bool terminator, bool start): + index(index), + name(std::move(name)), + terminator(terminator), + start(start) + {} + +}; + + +#endif //SYNTAXPARSER_SYMBOL_H diff --git a/include/SymbolTable.h b/include/SymbolTable.h new file mode 100644 index 0000000..0698174 --- /dev/null +++ b/include/SymbolTable.h @@ -0,0 +1,47 @@ +// +// Created by Administrator on 2021/4/30. +// + +#ifndef SYNTAXPARSER_SYMBOLTABLE_H +#define SYNTAXPARSER_SYMBOLTABLE_H + +#include +#include +#include +#include + +#include + + +class SymbolTable { + + int index = 1; + + std::map table; + + std::map cache; + + std::vector line; + +public: + + SymbolTable(); + + [[nodiscard]] const std::vector &getAllSymbols() const { + return line; + } + + int addSymbol(const std::wstring& name, bool terminator); + + [[nodiscard]] const Symbol *getSymbol(int symbol_index) const; + + [[nodiscard]] int getSymbolIndex(const std::wstring &name) const; + + void modifySymbol(int idx, const std::wstring &name, bool terminator, bool start); + + [[nodiscard]] const Symbol *getStartSymbol() const; + +}; + + +#endif //SYNTAXPARSER_SYMBOLTABLE_H diff --git a/include/SyntaxParser.h b/include/SyntaxParser.h new file mode 100644 index 0000000..1b4b32c --- /dev/null +++ b/include/SyntaxParser.h @@ -0,0 +1,78 @@ +// +// Created by Administrator on 2021/4/30. +// + +#ifndef SYNTAXPARSER_SYNTAXPARSER_H +#define SYNTAXPARSER_SYNTAXPARSER_H + +#include +#include +#include +#include + +#include +#include + + + +class SyntaxParser { + + // 文件输入 + std::wifstream input; + + std::wofstream output; + + const GrammarResourcePool *pool; + + const AnalyseTableGenerator *atg; + + std::queue tokens_queue; + + std::stack analyse_stack; + + std::stack status_stack; + + std::vector lines_index; + + std::wstringstream string_buffer; + + size_t now_line = 1; + + static std::vector ws_split(const std::wstring& in, const std::wstring& delim); + + static std::pair get_token_info(const std::wstring &token); + +public: + + SyntaxParser(const GrammarResourcePool *pool, const AnalyseTableGenerator *atg): + input("tokenOut.txt", std::ios::binary), + pool(pool), + atg(atg), + output("SyntaxOut.txt", std::ios::binary){ + + auto* codeCvtToUTF8= new std::codecvt_utf8; + input.imbue(std::locale(input.getloc(), codeCvtToUTF8)); + output.imbue(std::locale(output.getloc(), codeCvtToUTF8)); + } + + ~SyntaxParser() { + output.close(); + } + + // 得到所有的产生式 + void getToken(); + + void printSymbol(int symbol_index); + + void printProduction(const Production *p_pdt); + + // 自底向上语法分析 + void parse(); + + void printError(); + + void printDone(); +}; + + +#endif //SYNTAXPARSER_SYNTAXPARSER_H diff --git a/main.cpp b/main.cpp deleted file mode 100644 index 71747f2..0000000 --- a/main.cpp +++ /dev/null @@ -1,1235 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -using namespace std; - -using std::vector; -using std::wstring; -using std::wstringstream; -using std::pair; -using std::wcout; -using std::endl; -using std::to_string; -using std::hash; -using std::setw; -using std::stack; -using std::queue; - -struct Symbol { - - const int index; - wstring name; - bool terminator; - bool start; - - Symbol(int index, wstring name, bool terminator, bool start): - index(index), - name(std::move(name)), - terminator(terminator), - start(start) - {} - -}; - -class SymbolTable { - int index = 1; - - map table; - - map cache; - - vector line; - -public: - - SymbolTable() { - - auto symbol = new Symbol(0, L"ε", true, false); - table.insert(pair(L"ε", symbol)); - cache.insert(pair(0, symbol)); - line.push_back(symbol); - - symbol = new Symbol(-1, L"$", true, false); - table.insert(pair(L"$", symbol)); - cache.insert(pair(-1, symbol)); - line.push_back(symbol); - } - - [[nodiscard]] const vector &getAllSymbols() const { - return line; - } - - int addSymbol(const wstring& name, bool terminator) { - - Symbol *symbol = nullptr; - - if(name == L"ε") { - return 0; - } else if (name[0] == L'@') { - symbol = new Symbol(index, name, terminator, true); - } else { - symbol = new Symbol(index, name, terminator, false); - } - - const auto &it = table.find(name); - if (it != table.end()) { - return it->second->index; - } - table.insert(pair(symbol->name, symbol)); - cache.insert(pair(symbol->index, symbol)); - line.push_back(symbol); - - index++; - - return symbol->index; - } - - [[nodiscard]] const Symbol *getSymbol(int symbol_index) const { - const auto &it = cache.find(symbol_index); - if(it != cache.end()) { - return it->second; - } else { - throw runtime_error("symbol " + to_string(symbol_index) + " NOT Found"); - } - } - - [[nodiscard]] int getSymbolIndex(const wstring &name) const { - const auto &it = table.find(name); - if(it != table.end()) { - return it->second->index; - } else { - throw runtime_error("symbol NOT Found"); - } - } - - void modifySymbol(int idx, const wstring &name, bool terminator, bool start) { - auto it = cache.find(idx); - if(it != cache.end()) { - auto p_sym = it->second; - p_sym->name = name; - p_sym->terminator = terminator; - p_sym->start = start; - } - } - - [[nodiscard]] const Symbol *getStartSymbol() const { - for(const auto & symbol : getAllSymbols()) { - if(symbol->start) { - return symbol; - } - } - - throw runtime_error("start symbol NOT Found"); - } - - - -}; - -// 产生式 -struct Production { - const int index; - const int left; - const vector right; - - Production(int index, int left, vector right): index(index), left(left), right(std::move(right)) {} - -}; - -// 语法资源池 -class GrammarResourcePool { - - int pdt_index = 0; - - // 符号表 - SymbolTable symbolTable; - - // 产生式 - vector productions; - - // FIRST结果存储表 - map *> firsts; - - // FOLLOW结果存储表 - map *> follows; - - // 去掉首尾空格 - static wstring& trim(wstring &&str) { - if (str.empty()) { - return str; - } - - str.erase(0,str.find_first_not_of(' ')); - str.erase(str.find_last_not_of(' ') + 1); - return str; - } - -public: - - const set *FIRST(const vector &symbols, int start_index) { - - // 生成集合 - auto *non_terminator_symbols = new set(); - - for(int i = start_index; i < symbols.size(); i++) { - - const auto p_non_term_set = FIRST(symbols[i]); - - non_terminator_symbols->insert(p_non_term_set->begin(), p_non_term_set->end()); - - const auto sec_it = p_non_term_set->find(0); - if(sec_it != p_non_term_set->end()) { - continue; - } else { - break; - } - } - - return non_terminator_symbols; - } - - const set* FIRST(int symbol) { - - // 查找缓存 - const auto it = firsts.find(symbol); - if(it != firsts.end()) { - return it->second; - } - - // 生成集合 - auto *non_terminator_symbols = new set(); - - // 如果是终结符 - if(symbolTable.getSymbol(symbol)->terminator) { - non_terminator_symbols->insert(symbol); - } else { - - bool production_found = false; - - // 遍历每一产生式 - for (const auto &production : productions) { - const Production *p_pdt = production; - - if (p_pdt->left != symbol) continue; - - production_found = true; - - for (const auto &right_symbol : p_pdt->right) { - - const auto p_non_term_set = FIRST(right_symbol); - - non_terminator_symbols->insert(p_non_term_set->begin(), p_non_term_set->end()); - - const auto sec_it = p_non_term_set->find(0); - - if(sec_it != p_non_term_set->end()) { - continue; - } else { - break; - } - - } - } - - if (!production_found) non_terminator_symbols->insert(0); - } - - this->firsts.insert(pair *>(symbol, non_terminator_symbols)); - - return non_terminator_symbols; - } - - const set *FOLLOW(int symbol) { - if(follows.empty()) { - FOLLOW(); - } - - const auto it = follows.find(symbol); - if(it != follows.end()) { - return it->second; - } else { - throw runtime_error("symbol NOT Found"); - } - } - - void FOLLOW() { - - for (const auto &symbol : symbolTable.getAllSymbols()) { - if (!symbol->terminator) { - if (symbol->start) { - set *non_terminator_symbols = get_follow_set(symbol->index); - non_terminator_symbols->insert(-1); - } - } - } - - // 指导没有新的符号被添加到任意FOLLOW集合 - bool ifAdded = true; - - while(ifAdded) { - - ifAdded = false; - - - set *non_terminator_symbols = nullptr; - - - for (const auto &production : productions) { - - const auto &right_symbols = production->right; - - set equal_left_non_terminators; - - for (int i = 0; i < right_symbols.size() - 1; i++) { - - // 非终结符 - if (!symbolTable.getSymbol(right_symbols[i])->terminator) { - - const auto p_non_term_set = FIRST(right_symbols, i + 1); - - // 获得FOLLOW集 - non_terminator_symbols = get_follow_set(right_symbols[i]); - - const size_t set_size = non_terminator_symbols->size(); - - non_terminator_symbols->insert(p_non_term_set->begin(), p_non_term_set->end()); - - // 在集合中发现空字符 - if(non_terminator_symbols->find(0) != non_terminator_symbols->end()) { - non_terminator_symbols->erase(0); - equal_left_non_terminators.insert(right_symbols[i]); - } - - // 检查是否有新的终结符号被添加 - if(set_size < non_terminator_symbols->size()) { - ifAdded = true; - } - } - } - - if(!right_symbols.empty()) { - if (!symbolTable.getSymbol(right_symbols[right_symbols.size() - 1])->terminator) { - equal_left_non_terminators.insert(right_symbols[right_symbols.size() - 1]); - } - } - - for(const auto symbol : equal_left_non_terminators) { - // 获得左边非终结符的FOLLOW集 - const auto left_non_terminator_symbols = get_follow_set(production->left); - // 获得FOLLOW集 - non_terminator_symbols = get_follow_set(symbol); - - const size_t set_size = non_terminator_symbols->size(); - - non_terminator_symbols->insert( - left_non_terminator_symbols->begin(), - left_non_terminator_symbols->end()); - - if(non_terminator_symbols->find(0) != non_terminator_symbols->end()) { - non_terminator_symbols->erase(0); - } - - // 检查是否有新的终结符号被添加 - if(set_size < non_terminator_symbols->size()) { - ifAdded = true; - } - } - - } - - } - - } - - set* get_follow_set(int symbol) { - - set *non_terminator_symbols = nullptr; - - // 查找缓存 - auto it = follows.find(symbol); - if(it != follows.end()) { - non_terminator_symbols = it->second; - } else { - non_terminator_symbols = new set(); - this->follows.insert(pair *>(symbol, non_terminator_symbols)); - } - - return non_terminator_symbols; - - } - - - void print_symbols(const set &symbols_index) { - wcout << L"{ "; - for(const auto & symbol_index : symbols_index) { - auto *p_sym = symbolTable.getSymbol(symbol_index); - - if(p_sym->terminator) { - if (p_sym->name == L"ε") { - wcout << L" [Epsilon] "; - } - else wcout << L" \"" << p_sym->name << L"\" "; - } else { - wcout << L" " << p_sym->name << L" "; - } - - } - wcout << L"}" << endl; - } - - void parse_production_string_line(const wstring &temp_line) { - auto middle_index = temp_line.find(L"->", 0); - - - if(middle_index == string::npos) { - throw runtime_error("-> NOT FOUND"); - } - - wstring front = trim(temp_line.substr(0, middle_index)); - int left = symbolTable.addSymbol(front, false); - - wstring back = trim(temp_line.substr(middle_index + 2, temp_line.size() - middle_index - 2)); - - wstringstream terminator, non_terminator; - vector symbols; - bool is_terminator = false; - for(const auto &c : back) { - if (c == L'\"') { - if(is_terminator) { - symbols.push_back(symbolTable.addSymbol(trim(terminator.str()), true)); - terminator.str(L""); - terminator.clear(); - } - is_terminator = !is_terminator; - continue; - } - if(c == L' ' || c == L'\r') { - wstring temp_symbol = trim(non_terminator.str()); - if(!temp_symbol.empty()) { - symbols.push_back(symbolTable.addSymbol(trim(non_terminator.str()), false)); - non_terminator.str(L""); - non_terminator.clear(); - } - continue; - } - if(is_terminator) { - terminator << c; - } else { - non_terminator << c; - } - } - wstring temp_symbol = trim(non_terminator.str()); - if(!temp_symbol.empty()) { - symbols.push_back(symbolTable.addSymbol(trim(non_terminator.str()), false)); - } - - auto p_pdt = new Production(pdt_index++, left, symbols); - - productions.push_back(p_pdt); - } - - [[nodiscard]] const vector &get_productions() const { - return productions; - } - - [[nodiscard]] const Symbol *getSymbol(int symbol_index) const { - return symbolTable.getSymbol(symbol_index); - } - - [[nodiscard]] const Symbol *getStartSymbol() const { - return symbolTable.getStartSymbol(); - } - - int addSymbol(const wstring &name, bool terminator) { - return symbolTable.addSymbol(name, terminator); - } - - const Production *addProduction(int left, initializer_list right) { - vector right_vector; - for(int symbol : right) { - right_vector.push_back(symbol); - } - auto p_pdt = new Production(pdt_index++, left, right_vector); - productions.push_back(p_pdt); - return p_pdt; - } - - [[nodiscard]] const vector &getAllSymbols() const { - return symbolTable.getAllSymbols(); - } - - void modifySymbol(int index, const wstring &name, bool terminator, bool start) { - symbolTable.modifySymbol(index, name, terminator, start); - } -}; - -// 项 -class Item{ - // 对应的产生式 - const Production* const production; - - // 点的位置 - int dot_index = 0; - - const int terminator = 0; - -public: - - const bool generated = false; - - explicit Item(const Production *p_pdt, int m_terminator, bool m_generated = false) - : production(p_pdt), terminator(m_terminator), generated(m_generated) {} - - void set_dot_index(int m_dot_index) { - if(m_dot_index > production->right.size()) { - throw runtime_error("DOT_INDEX out of range"); - } - this->dot_index = m_dot_index; - } - - [[nodiscard]] int get_dot_index() const { - return dot_index; - } - - [[nodiscard]] int get_dot_next_symbol() const { - if(get_dot_index() == production->right.size()) { - return 0; - } else { - return production->right[dot_index]; - } - } - - [[nodiscard]] int get_dot_next_i_symbol(int i) const { - if(get_dot_index() + i >= production->right.size()) { - return 0; - } else { - return production->right[dot_index + i]; - } - } - - [[nodiscard]] int get_terminator() const { - return terminator; - } - - [[nodiscard]] const Production *get_production() const { - return production; - } -}; - -class ItemCollectionManager; - -class ItemCollection{ - - int index = 0; - - map items; - - vector cache; - - GrammarResourcePool *pool; - - friend ItemCollectionManager; - - template - inline void hash_combine(std::size_t& seed, const T& v) const - { - std::hash hasher; - seed ^= hasher(v) + 0x9e3779b9 + (seed<<6) + (seed>>2); - } - - static bool compare_item_ptr(const Item* lhs, const Item* rhs) - { - if(lhs->get_production() != rhs->get_production()) - return lhs->get_production() < rhs->get_production(); - else if(lhs->get_dot_index() != rhs->get_dot_index()) - return lhs->get_dot_index() < rhs->get_dot_index(); - else - return lhs->get_terminator() < rhs->get_terminator(); - } - -public: - - explicit ItemCollection(GrammarResourcePool *pool) : pool(pool) { - - } - - [[nodiscard]] const vector &getItems() const { - return cache; - } - - [[nodiscard]] int getIndex() const { - return index; - } - - - bool addItem(const Production *p_pdt, int dot_index, int terminator, bool generated = false) { - auto hasher = hash(); - size_t seed = hasher(reinterpret_cast(p_pdt)); - hash_combine(seed, dot_index); - hash_combine(seed, terminator); - - auto it = items.find(seed); - if(it != items.end()) { - return false; - } - - auto *p_item = new Item(p_pdt, terminator, generated); - p_item->set_dot_index(dot_index); - items.insert(pair(seed, p_item)); - cache.push_back(p_item); - - return true; - } - - void CLOSURE() { - - bool ifAdd = true; - - while(ifAdd) { - ifAdd = false; - - for(const auto & item : items) { - int next_symbol = item.second->get_dot_next_symbol(); - - if(next_symbol == 0 - || pool->getSymbol(next_symbol)->terminator) { - continue; - } - - for(auto *production : pool->get_productions()) { - if(production->left == next_symbol) { - vector first_args; - first_args.push_back(item.second->get_dot_next_i_symbol(1)); - first_args.push_back(item.second->get_terminator()); - - const auto first_set = pool->FIRST(first_args, 0); - for(auto terminator : *first_set) { - if(terminator == 0) continue; - if(this->addItem(production, 0, terminator, true)) { - ifAdd = true; - } - } - } - } - - } - - } - } - - void print() const { - - wcout << L"I" << index << L": "; - - for(const auto item : cache) { - const auto *p_pdt = item->get_production(); - int dot_index = item->get_dot_index(); - wcout << pool->getSymbol(p_pdt->left)->name << L" -> " ; - int i = 0; - for(const auto &symbol_index : p_pdt->right) { - - if(i > 0) wcout << " "; - if(i++ == dot_index) wcout << "·"; - - auto *symbol = pool->getSymbol(symbol_index); - - if(!symbol->index) { - wcout << L"[Epsilon]"; - continue; - } - - if(!symbol->terminator) - wcout << pool->getSymbol(symbol_index)->name; - else - wcout << L'"' << pool->getSymbol(symbol_index)->name << L'"'; - } - - if(i++ == dot_index) wcout << "·"; - - wcout << L", \"" << pool->getSymbol(item->get_terminator())->name << "\"" << endl; - } - cout << endl; - } - - [[nodiscard]] size_t getHash() const { - size_t seed = 0; - - vector cache_sorted(cache.begin(), cache.end()); - sort(cache_sorted.begin(), cache_sorted.end(), compare_item_ptr); - - for(const auto item : cache_sorted) { - - if(item->generated) { - continue; - } - - hash_combine(seed, item->get_production()); - hash_combine(seed, item->get_dot_index()); - hash_combine(seed, item->get_terminator()); - } - return seed; - } -}; - -class ItemCollectionManager{ - - int index = 0; - - map ic_map; - - map ic_content_map; - - vector ics; - - GrammarResourcePool *pool; - - const Production *start_pdt{}; - - template - inline void hash_combine(std::size_t& seed, const T& v) const - { - std::hash hasher; - seed ^= hasher(v) + 0x9e3779b9 + (seed<<6) + (seed>>2); - } - -public: - - explicit ItemCollectionManager(GrammarResourcePool *resource_pool) : pool(resource_pool) { - - } - - void buildItems() { - - const auto startSymbol = pool->getStartSymbol(); - - wstring new_symbol_name = startSymbol->name + L"'"; - - int new_symbol_index = pool->addSymbol(new_symbol_name, startSymbol->terminator); - - pool->modifySymbol(startSymbol->index, startSymbol->name.substr(1), false, false); - - const auto *p_pdt = pool->addProduction(new_symbol_index, {startSymbol->index}); - - this->start_pdt = p_pdt; - - auto *pi_ic = new ItemCollection(pool); - - // -1 代表 $ - pi_ic->addItem(p_pdt, 0, -1); - - pi_ic->CLOSURE(); - - addItemCollection(0, 0, pi_ic); - - bool ifAdd = true; - - while(ifAdd) { - - ifAdd = false; - const auto &r_ics = getItemCollections(); - vector temp_ics(r_ics.begin(), r_ics.end()); - for(const auto ic : temp_ics) { - for(const auto symbol : pool->getAllSymbols()) { - if(symbol->index <= 0) { - continue; - } - if(GOTO(ic, symbol->index)) { - ifAdd = true; - } - } - } - - } - - } - - [[nodiscard]] const Production *getStartProduction() const { - return start_pdt; - } - - [[nodiscard]] const vector &getItemCollections() const{ - return ics; - } - - ItemCollection *getItemCollectionByHash(size_t hash) { - ItemCollection *p_ic = nullptr; - auto it = ic_content_map.find(hash); - if(it != ic_content_map.end()) { - p_ic = it->second; - } - return p_ic; - } - - bool addItemCollection(int idx, int symbol, ItemCollection *p_ic){ - - size_t ic_hash = p_ic->getHash(); - auto it = ic_content_map.find(ic_hash); - if (it != ic_content_map.end()) { - p_ic = it->second; - } else { - p_ic->index = this->index++; - ic_content_map.insert(pair(ic_hash, p_ic)); - ics.push_back(p_ic); - } - - auto hasher = hash(); - size_t seed = hasher(idx); - hash_combine(seed, symbol); - - auto it2 = ic_map.find(seed); - if(it2 != ic_map.end()) { - return false; - } - - if(symbol != 0) { - auto p_symbol = pool->getSymbol(symbol); - if(p_symbol->terminator) - wcout << L"GOTO(" << idx << L", \"" << p_symbol->name << L"\")" << endl; - else - wcout << L"GOTO(" << idx << L", " << p_symbol->name << L")" << endl; - } else { - wcout << L"GOTO(" << idx << L", [Epsilon])" << endl; - } - - ic_map.insert(pair(seed, p_ic)); - p_ic->print(); - return true; - - } - - [[nodiscard]] const ItemCollection* getGOTO(int idx, int symbol) const { - - auto hasher = hash(); - size_t seed = hasher(idx); - hash_combine(seed, symbol); - - auto it = ic_map.find(seed); - if(it != ic_map.end()) { - return it->second; - } else { - return nullptr; - } - } - - bool GOTO(const ItemCollection *p_ic, int symbol) { - auto *pt_ic = new ItemCollection(pool); - - for(const auto &item : p_ic->cache) { - if(item->get_dot_next_symbol() == symbol) { - pt_ic->addItem(item->get_production(), item->get_dot_index() + 1, item->get_terminator()); - } - } - auto p_temp_ic = this->getItemCollectionByHash(pt_ic->getHash()); - if(p_temp_ic == nullptr) - pt_ic->CLOSURE(); - else pt_ic = p_temp_ic; - - if(!pt_ic->items.empty()) { - return this->addItemCollection(p_ic->index, symbol, pt_ic); - } else { - return false; - } - - } - -}; - -class AnalyseTableGenerator { - - using Action = enum { - MOVE, STATUTE, ACC, STEP_GOTO - }; - - struct Step { - - const Action action; - union Target{ - int index; - const Production *production; - } target{}; - - Step(Action action, int index) : action(action), target(Target{index}){} - Step(Action action, const Production *p_pdt) : action(action) { - target.production = p_pdt; - } - }; - - map ACTION; - - map GOTO; - - const ItemCollectionManager *icm; - - const GrammarResourcePool *pool; - - template - inline void hash_combine(std::size_t& seed, const T& v) const - { - std::hash hasher; - seed ^= hasher(v) + 0x9e3779b9 + (seed<<6) + (seed>>2); - } - - void add_action(int index, int terminator_symbol, Action action, const Production *target_pdt) { - size_t seed = 0; - hash_combine(seed, index); - hash_combine(seed, terminator_symbol); - - auto it = ACTION.find(seed); - if(it == ACTION.end()) { - auto step = new Step(action, target_pdt); - ACTION.insert(pair(seed, step)); - } else { - if(it->second->action != action || it->second->target.production != target_pdt) - throw runtime_error("Conflict Occurred, Syntax NOT LR(1)"); - } -} - -void add_action(int index, int terminator_symbol, Action action, int target_index) { - size_t seed = 0; - hash_combine(seed, index); - hash_combine(seed, terminator_symbol); - - auto it = ACTION.find(seed); - if(it == ACTION.end()) { - auto step = new Step(action, target_index); - ACTION.insert(pair(seed, step)); - } else { - if(it->second->action != action || it->second->target.index != target_index) - throw runtime_error("Conflict Occurred, Syntax NOT LR(1)"); - } - } - - void add_goto(int index, int non_terminator_symbol, int target_index) { - size_t seed = 0; - hash_combine(seed, index); - hash_combine(seed, non_terminator_symbol); - - auto it = GOTO.find(seed); - if(it == GOTO.end()) { - auto step = new Step(STEP_GOTO, target_index); - GOTO.insert(pair(seed, step)); - } else { - if(it->second->target.index != target_index) - throw runtime_error("Conflict Occurred, Syntax NOT LR(1)"); - } - } - -public: - - explicit AnalyseTableGenerator(const GrammarResourcePool *p_pool, const ItemCollectionManager *p_icm) - :pool(p_pool) , icm(p_icm) { - - } - - void generate() { - const auto &ics = icm->getItemCollections(); - for(const auto *ic : ics) { - for(const auto *item : ic->getItems()) { - if(item->get_production() == icm->getStartProduction() - && item->get_dot_next_symbol() == 0 - && item->get_terminator() == -1) { - this->add_action(ic->getIndex(), -1, ACC, 0); - } - int next_symbol = item->get_dot_next_symbol(); - if(next_symbol != 0) { - const auto *p_ic = icm->getGOTO(ic->getIndex(), next_symbol); - if(pool->getSymbol(next_symbol)->terminator) { - if (p_ic != nullptr) { - this->add_action(ic->getIndex(), next_symbol, MOVE, p_ic->getIndex()); - } - } else { - if (p_ic != nullptr) { - this->add_goto(ic->getIndex(), next_symbol, p_ic->getIndex()); - } - } - } else { - if(pool->getSymbol(next_symbol)->terminator) { - if (item->get_production()->left != pool->getStartSymbol()->index) { - this->add_action(ic->getIndex(), item->get_terminator(), STATUTE, item->get_production()); - } - } - } - } - } - } - - const Step *findActionStep(int index, int terminator_symbol) { - size_t seed = 0; - hash_combine(seed, index); - hash_combine(seed, terminator_symbol); - - auto it = ACTION.find(seed); - if(it != ACTION.end()) { - return it->second; - } else { - return nullptr; - } - } - - const Step *findGotoStep(int index, int non_terminator_symbol) { - size_t seed = 0; - hash_combine(seed, index); - hash_combine(seed, non_terminator_symbol); - - auto it = GOTO.find(seed); - if (it != GOTO.end()) { - return it->second; - } else { - return nullptr; - } - } - - void print() { - - std::wofstream output("tables.txt"); - - size_t space = 4; - - output << L"ACTION" << endl; - vector symbols; - - output << std::left << std::setw(space) << " "; - for(const auto *symbol : pool->getAllSymbols()) { - if(symbol->index == 0) continue; - if(symbol->terminator) { - space = std::max(space, symbol->name.size() + 2); - symbols.push_back(symbol->index); - } - } - - for(const auto symbol_index : symbols) { - output << std::left << std::setw(space) << pool->getSymbol(symbol_index)->name; - } - - output << endl; - - for(int i = 0; i < icm->getItemCollections().size(); i++){ - output << std::left << std::setw(space) << i; - for(int symbol : symbols) { - auto p_step = this->findActionStep(i, symbol); - if(p_step == nullptr) { - output << std::left << std::setw(space) << " "; - } else { - if(p_step->action == MOVE) - output << std::left << std::setw(space) << wstring(L"s") + to_wstring(p_step->target.index); - else if(p_step->action == ACC) - output << std::left << std::setw(space) << L"acc"; - else if(p_step->action == STATUTE) - output << std::left << std::setw(space) << L"r" + to_wstring(p_step->target.production->index); - } - } - output << endl; - - } - - output << endl; - - space = 4; - - output << "GOTO" << endl; - symbols.clear(); - - output << std::left << std::setw(space) << " "; - for(const auto *symbol : pool->getAllSymbols()) { - if(symbol->index == 0) continue; - if(!symbol->terminator && !symbol->start) { - space = std::max(space, symbol->name.size() + 2); - symbols.push_back(symbol->index); - } - } - - for(const auto symbol_index : symbols) { - output << std::left << std::setw(space) << pool->getSymbol(symbol_index)->name; - } - - output <getItemCollections().size(); k++) { - output << std::left << std::setw(space) << k; - for (int symbol : symbols) { - auto p_step = this->findGotoStep(k, symbol); - if(p_step == nullptr) { - output << std::left << std::setw(space) << " "; - } else { - output << std::left << std::setw(space) << to_wstring(p_step->target.index); - } - } - output << endl; - } - - output << endl << endl; - - output.close(); - } - -}; - -class LR0Generator{ - - // 文件输入 - wifstream input; - - GrammarResourcePool *pool; - - ItemCollectionManager *icm; - - AnalyseTableGenerator *atg; - -public: - - LR0Generator(): input("syntaxInput.txt", std::ios::binary), - pool(new GrammarResourcePool()), - icm(new ItemCollectionManager(pool)), - atg(new AnalyseTableGenerator(pool, icm)){ - - auto* codeCvtToUTF8= new std::codecvt_utf8; - - input.imbue(std::locale(input.getloc(), codeCvtToUTF8)); - } - - ~LR0Generator() { - input.close(); - } - - void run() { - pool->FOLLOW(); - icm->buildItems(); - atg->generate(); - atg->print(); - } - - // 得到所有的产生式 - void getProductions() { - - // 读入文法文件 - wstring temp_line; - - while (getline(input, temp_line)) { - if(temp_line.size() > 2 && temp_line[0] != '#') { - pool->parse_production_string_line(temp_line); - } - } - } - - void output(const GrammarResourcePool *&pool, const AnalyseTableGenerator *&atg) { - pool = this->pool; - atg = this->atg; - } - -}; - -class SyntaxParser { - - // 文件输入 - wifstream input; - - const GrammarResourcePool *pool; - - const AnalyseTableGenerator *atg; - - queue token_queue; - - vector ws_split(const std::wstring& in, const std::wstring& delim) { - std::wregex re{ delim }; - return std::vector { - std::wsregex_token_iterator(in.begin(), in.end(), re, -1), - std::wsregex_token_iterator() - }; - } - - pair get_token_info(const wstring &token) { - - auto pre_index = token.find(L'('); - - auto back_index = token.find(L')'); - - wstring name = token.substr(pre_index); - wstring value = token.substr(pre_index + 1, back_index - 1); - - return pair(name, value); - - } - -public: - - SyntaxParser(const GrammarResourcePool *pool, const AnalyseTableGenerator *atg): - input("outputToken.txt", std::ios::binary), - pool(pool), - atg(atg){ - - auto* codeCvtToUTF8= new std::codecvt_utf8; - - input.imbue(std::locale(input.getloc(), codeCvtToUTF8)); - } - - ~SyntaxParser() { - input.close(); - } - - // 得到所有的产生式 - void getToken() { - - // 读入文法文件 - wstring temp_line; - - wstring line_index; - while (getline(input, temp_line)) { - if(temp_line.size() > 2 && temp_line[0] != '#') { - vector tokens = ws_split(temp_line, L" "); - - line_index = tokens[0]; - - for(int i = 1; i < tokens.size(); i++) { - auto token_info = get_token_info(tokens[i]); - } - - } - } - } - - -}; - - -int main() { - clock_t start,end;//定义clock_t变量 - start = clock(); //开始时间 - - const GrammarResourcePool *pool; - - const AnalyseTableGenerator *atg; - - - LR0Generator generator; - - generator.getProductions(); - - generator.run(); - - generator.output(pool, atg); - - //输出时间 - end = clock(); //结束时间 - double times = double(end-start)/CLOCKS_PER_SEC; - cout<<"The Run time = "<(seed, step)); + } else { + if(it->second->action != action || it->second->target.production != target_pdt) + throw std::runtime_error("Conflict Occurred, Syntax NOT LR(1)"); + } +} + +void AnalyseTableGenerator::add_action(int index, int terminator_symbol, Action action, int target_index) { + size_t seed = 0; + hash_combine(seed, index); + hash_combine(seed, terminator_symbol); + + auto it = ACTION.find(seed); + if(it == ACTION.end()) { + auto step = new Step(action, target_index); + ACTION.insert(std::pair(seed, step)); + } else { + if(it->second->action != action || it->second->target.index != target_index) + throw std::runtime_error("Conflict Occurred, Syntax NOT LR(1)"); + } +} + +void AnalyseTableGenerator::add_goto(int index, int non_terminator_symbol, int target_index) { + size_t seed = 0; + hash_combine(seed, index); + hash_combine(seed, non_terminator_symbol); + + auto it = GOTO.find(seed); + if(it == GOTO.end()) { + auto step = new Step(STEP_GOTO, target_index); + GOTO.insert(std::pair(seed, step)); + } else { + if(it->second->target.index != target_index) + throw std::runtime_error("Conflict Occurred, Syntax NOT LR(1)"); + } +} + +void AnalyseTableGenerator::generate() { + const auto &ics = icm->getItemCollections(); + for(const auto *ic : ics) { + for(const auto *item : ic->getItems()) { + if(item->get_production() == icm->getStartProduction() + && item->get_dot_next_symbol() == 0 + && item->get_terminator() == -1) { + this->add_action(ic->getIndex(), -1, ACC, 0); + } + int next_symbol = item->get_dot_next_symbol(); + if(next_symbol != 0) { + const auto *p_ic = icm->getGOTO(ic->getIndex(), next_symbol); + if(pool->getSymbol(next_symbol)->terminator) { + if (p_ic != nullptr) { + this->add_action(ic->getIndex(), next_symbol, MOVE, p_ic->getIndex()); + } + } else { + if (p_ic != nullptr) { + this->add_goto(ic->getIndex(), next_symbol, p_ic->getIndex()); + } + } + } else { + if(pool->getSymbol(next_symbol)->terminator) { + if (item->get_production()->left != pool->getStartSymbol()->index) { + this->add_action(ic->getIndex(), item->get_terminator(), REDUCE, item->get_production()); + } + } + } + } + } +} + +const AnalyseTableGenerator::Step *AnalyseTableGenerator::findActionStep(int index, int terminator_symbol) const { + size_t seed = 0; + hash_combine(seed, index); + hash_combine(seed, terminator_symbol); + + auto it = ACTION.find(seed); + if(it != ACTION.end()) { + return it->second; + } else { + return nullptr; + } +} + +const AnalyseTableGenerator::Step *AnalyseTableGenerator::findGotoStep(int index, int non_terminator_symbol) const { + size_t seed = 0; + hash_combine(seed, index); + hash_combine(seed, non_terminator_symbol); + + auto it = GOTO.find(seed); + if (it != GOTO.end()) { + return it->second; + } else { + return nullptr; + } +} + +void AnalyseTableGenerator::print() const { + + std::wofstream output("tables.txt"); + + size_t space = 4; + + output << L"ACTION" << std::endl; + std::vector symbols; + + + for(const auto *symbol : pool->getAllSymbols()) { + if(symbol->index == 0) continue; + if(symbol->terminator) { + space = std::max(space, symbol->name.size() + 2); + symbols.push_back(symbol->index); + } + } + + output << std::left << std::setw(space) << " "; + for(const auto symbol_index : symbols) { + output << std::left << std::setw(space) << pool->getSymbol(symbol_index)->name; + } + + output << std::endl; + + for(int i = 0; i < icm->getItemCollections().size(); i++){ + output << std::left << std::setw(space) << i; + for(int symbol : symbols) { + auto p_step = this->findActionStep(i, symbol); + if(p_step == nullptr) { + output << std::left << std::setw(space) << " "; + } else { + if(p_step->action == MOVE) + output << std::left << std::setw(space) + << std::wstring(L"s") + std::to_wstring(p_step->target.index); + else if(p_step->action == ACC) + output << std::left << std::setw(space) << L"acc"; + else if(p_step->action == REDUCE) + output << std::left << std::setw(space) + << L"r" + std::to_wstring(p_step->target.production->index); + } + } + output << std::endl; + + } + + output << std::endl; + + space = 4; + + output << "GOTO" << std::endl; + symbols.clear(); + + for(const auto *symbol : pool->getAllSymbols()) { + if(symbol->index == 0) continue; + if(!symbol->terminator && !symbol->start) { + space = std::max(space, symbol->name.size() + 2); + symbols.push_back(symbol->index); + } + } + + output << std::left << std::setw(space) << " "; + for(const auto symbol_index : symbols) { + output << std::left << std::setw(space) << pool->getSymbol(symbol_index)->name; + } + + output <getItemCollections().size(); k++) { + output << std::left << std::setw(space) << k; + for (int symbol : symbols) { + auto p_step = this->findGotoStep(k, symbol); + if(p_step == nullptr) { + output << std::left << std::setw(space) << " "; + } else { + output << std::left << std::setw(space) << std::to_wstring(p_step->target.index); + } + } + output << std::endl; + } + + output << std::endl << std::endl; + + output.close(); +} diff --git a/src/GrammarResourcePool.cpp b/src/GrammarResourcePool.cpp new file mode 100644 index 0000000..61b3398 --- /dev/null +++ b/src/GrammarResourcePool.cpp @@ -0,0 +1,276 @@ +// +// Created by Administrator on 2021/4/30. +// + +#include "GrammarResourcePool.h" + +const std::set *GrammarResourcePool::FIRST(const std::vector &symbols, int start_index) { + + // 生成集合 + auto *non_terminator_symbols = new std::set(); + + for(int i = start_index; i < symbols.size(); i++) { + + const auto p_non_term_set = FIRST(symbols[i]); + + non_terminator_symbols->insert(p_non_term_set->begin(), p_non_term_set->end()); + + const auto sec_it = p_non_term_set->find(0); + if(sec_it != p_non_term_set->end()) { + continue; + } else { + break; + } + } + + return non_terminator_symbols; +} + +const std::set *GrammarResourcePool::FIRST(int symbol) { + + // 查找缓存 + const auto it = firsts.find(symbol); + if(it != firsts.end()) { + return it->second; + } + + // 生成集合 + auto *non_terminator_symbols = new std::set(); + + // 如果是终结符 + if(symbolTable.getSymbol(symbol)->terminator) { + non_terminator_symbols->insert(symbol); + } else { + + bool production_found = false; + + // 遍历每一产生式 + for (const auto &production : productions) { + const Production *p_pdt = production; + + if (p_pdt->left != symbol) continue; + + production_found = true; + + for (const auto &right_symbol : p_pdt->right) { + + const auto p_non_term_set = FIRST(right_symbol); + + non_terminator_symbols->insert(p_non_term_set->begin(), p_non_term_set->end()); + + const auto sec_it = p_non_term_set->find(0); + + if(sec_it != p_non_term_set->end()) { + continue; + } else { + break; + } + + } + } + + if (!production_found) non_terminator_symbols->insert(0); + } + + this->firsts.insert(std::pair *>(symbol, non_terminator_symbols)); + + return non_terminator_symbols; +} + +const std::set *GrammarResourcePool::FOLLOW(int symbol) { + if(follows.empty()) { + FOLLOW(); + } + + const auto it = follows.find(symbol); + if(it != follows.end()) { + return it->second; + } else { + throw std::runtime_error("symbol NOT Found"); + } +} + +void GrammarResourcePool::FOLLOW() { + + for (const auto &symbol : symbolTable.getAllSymbols()) { + if (!symbol->terminator) { + if (symbol->start) { + std::set *non_terminator_symbols = get_follow_set(symbol->index); + non_terminator_symbols->insert(-1); + } + } + } + + // 指导没有新的符号被添加到任意FOLLOW集合 + bool ifAdded = true; + + while(ifAdded) { + + ifAdded = false; + + + std::set *non_terminator_symbols = nullptr; + + + for (const auto &production : productions) { + + const auto &right_symbols = production->right; + + std::set equal_left_non_terminators; + + for (int i = 0; i < right_symbols.size() - 1; i++) { + + // 非终结符 + if (!symbolTable.getSymbol(right_symbols[i])->terminator) { + + const auto p_non_term_set = FIRST(right_symbols, i + 1); + + // 获得FOLLOW集 + non_terminator_symbols = get_follow_set(right_symbols[i]); + + const size_t set_size = non_terminator_symbols->size(); + + non_terminator_symbols->insert(p_non_term_set->begin(), p_non_term_set->end()); + + // 在集合中发现空字符 + if(non_terminator_symbols->find(0) != non_terminator_symbols->end()) { + non_terminator_symbols->erase(0); + equal_left_non_terminators.insert(right_symbols[i]); + } + + // 检查是否有新的终结符号被添加 + if(set_size < non_terminator_symbols->size()) { + ifAdded = true; + } + } + } + + if(!right_symbols.empty()) { + if (!symbolTable.getSymbol(right_symbols[right_symbols.size() - 1])->terminator) { + equal_left_non_terminators.insert(right_symbols[right_symbols.size() - 1]); + } + } + + for(const auto symbol : equal_left_non_terminators) { + // 获得左边非终结符的FOLLOW集 + const auto left_non_terminator_symbols = get_follow_set(production->left); + // 获得FOLLOW集 + non_terminator_symbols = get_follow_set(symbol); + + const size_t set_size = non_terminator_symbols->size(); + + non_terminator_symbols->insert( + left_non_terminator_symbols->begin(), + left_non_terminator_symbols->end()); + + if(non_terminator_symbols->find(0) != non_terminator_symbols->end()) { + non_terminator_symbols->erase(0); + } + + // 检查是否有新的终结符号被添加 + if(set_size < non_terminator_symbols->size()) { + ifAdded = true; + } + } + + } + + } + +} + +std::set *GrammarResourcePool::get_follow_set(int symbol) { + + std::set *non_terminator_symbols = nullptr; + + // 查找缓存 + auto it = follows.find(symbol); + if(it != follows.end()) { + non_terminator_symbols = it->second; + } else { + non_terminator_symbols = new std::set(); + this->follows.insert(std::pair *>(symbol, non_terminator_symbols)); + } + + return non_terminator_symbols; + +} + +void GrammarResourcePool::print_symbols(const std::set &symbols_index) { + std::wcout << L"{ "; + for(const auto & symbol_index : symbols_index) { + auto *p_sym = symbolTable.getSymbol(symbol_index); + + if(p_sym->terminator) { + if (p_sym->name == L"ε") { + std::wcout << L" [Epsilon] "; + } + else std::wcout << L" \"" << p_sym->name << L"\" "; + } else { + std::wcout << L" " << p_sym->name << L" "; + } + + } + std::wcout << L"}" << std::endl; +} + +void GrammarResourcePool::parse_production_string_line(const std::wstring &temp_line) { + auto middle_index = temp_line.find(L"->", 0); + + + if(middle_index == std::wstring::npos) { + throw std::runtime_error("-> NOT FOUND"); + } + + std::wstring front = trim(temp_line.substr(0, middle_index)); + int left = symbolTable.addSymbol(front, false); + + std::wstring back = trim(temp_line.substr(middle_index + 2, temp_line.size() - middle_index - 2)); + + std::wstringstream terminator, non_terminator; + std::vector symbols; + bool is_terminator = false; + for(const auto &c : back) { + if (c == L'\"') { + if(is_terminator) { + symbols.push_back(symbolTable.addSymbol(trim(terminator.str()), true)); + terminator.str(L""); + terminator.clear(); + } + is_terminator = !is_terminator; + continue; + } + if(c == L' ' || c == L'\r') { + std::wstring temp_symbol = trim(non_terminator.str()); + if(!temp_symbol.empty()) { + symbols.push_back(symbolTable.addSymbol(trim(non_terminator.str()), false)); + non_terminator.str(L""); + non_terminator.clear(); + } + continue; + } + if(is_terminator) { + terminator << c; + } else { + non_terminator << c; + } + } + std::wstring temp_symbol = trim(non_terminator.str()); + if(!temp_symbol.empty()) { + symbols.push_back(symbolTable.addSymbol(trim(non_terminator.str()), false)); + } + + auto p_pdt = new Production(pdt_index++, left, symbols); + + productions.push_back(p_pdt); +} + +const Production *GrammarResourcePool::addProduction(int left, std::initializer_list right) { + std::vector right_vector; + for(int symbol : right) { + right_vector.push_back(symbol); + } + auto p_pdt = new Production(pdt_index++, left, right_vector); + productions.push_back(p_pdt); + return p_pdt; +} diff --git a/src/Item.cpp b/src/Item.cpp new file mode 100644 index 0000000..a680111 --- /dev/null +++ b/src/Item.cpp @@ -0,0 +1,28 @@ +// +// Created by Administrator on 2021/4/30. +// + +#include "Item.h" + +void Item::set_dot_index(int m_dot_index) { + if(m_dot_index > production->right.size()) { + throw std::runtime_error("DOT_INDEX out of range"); + } + this->dot_index = m_dot_index; +} + +int Item::get_dot_next_symbol() const { + if(get_dot_index() == production->right.size()) { + return 0; + } else { + return production->right[dot_index]; + } +} + +int Item::get_dot_next_i_symbol(int i) const { + if(get_dot_index() + i >= production->right.size()) { + return 0; + } else { + return production->right[dot_index + i]; + } +} diff --git a/src/ItemCollection.cpp b/src/ItemCollection.cpp new file mode 100644 index 0000000..33b7242 --- /dev/null +++ b/src/ItemCollection.cpp @@ -0,0 +1,124 @@ +// +// Created by Administrator on 2021/4/30. +// +#include "ItemCollection.h" + +size_t ItemCollection::getHash() const { + size_t seed = 0; + + std::vector cache_sorted(cache.begin(), cache.end()); + std::sort(cache_sorted.begin(), cache_sorted.end(), compare_item_ptr); + + for(const auto item : cache_sorted) { + + if(item->generated) { + continue; + } + + hash_combine(seed, item->get_production()); + hash_combine(seed, item->get_dot_index()); + hash_combine(seed, item->get_terminator()); + } + return seed; +} + +void ItemCollection::print(std::wofstream &output) const { + + output << L"I" << index << L": "; + + for(const auto item : cache) { + const auto *p_pdt = item->get_production(); + int dot_index = item->get_dot_index(); + output << pool->getSymbol(p_pdt->left)->name << L" -> "; + int i = 0; + for(const auto &symbol_index : p_pdt->right) { + + if(i > 0) output << " "; + if(i++ == dot_index) output << "*"; + + auto *symbol = pool->getSymbol(symbol_index); + + if(!symbol->index) { + output << L"[Epsilon]"; + continue; + } + + if(!symbol->terminator) + output << pool->getSymbol(symbol_index)->name; + else + output << L'"' << pool->getSymbol(symbol_index)->name << L'"'; + } + + if(i++ == dot_index) output << "*"; + + output << L", \"" << pool->getSymbol(item->get_terminator())->name << "\"" << std::endl; + } + output << std::endl; +} + +void ItemCollection::CLOSURE() { + + bool ifAdd = true; + + while(ifAdd) { + ifAdd = false; + + for(const auto & item : items) { + int next_symbol = item.second->get_dot_next_symbol(); + + if(next_symbol == 0 + || pool->getSymbol(next_symbol)->terminator) { + continue; + } + + for(auto *production : pool->get_productions()) { + if(production->left == next_symbol) { + std::vector first_args; + auto p_ic = item.second; + const auto last_right_symbol_count = p_ic->get_right_size() - p_ic->get_dot_index(); + for(int i = 1; i <= last_right_symbol_count; i++) + first_args.push_back(p_ic->get_dot_next_i_symbol(i)); + first_args.push_back(p_ic->get_terminator()); + + const auto first_set = pool->FIRST(first_args, 0); + for(auto terminator : *first_set) { + if(terminator == 0) continue; + if(this->addItem(production, 0, terminator, true)) { + ifAdd = true; + } + } + } + } + + } + + } +} + +bool ItemCollection::addItem(const Production *p_pdt, int dot_index, int terminator, bool generated) { + auto hasher = std::hash(); + size_t seed = hasher(reinterpret_cast(p_pdt)); + hash_combine(seed, dot_index); + hash_combine(seed, terminator); + + auto it = items.find(seed); + if(it != items.end()) { + return false; + } + + auto *p_item = new Item(p_pdt, terminator, generated); + p_item->set_dot_index(dot_index); + items.insert(std::pair(seed, p_item)); + cache.push_back(p_item); + + return true; +} + +bool ItemCollection::compare_item_ptr(const Item *lhs, const Item *rhs) { + if(lhs->get_production() != rhs->get_production()) + return lhs->get_production() < rhs->get_production(); + else if(lhs->get_dot_index() != rhs->get_dot_index()) + return lhs->get_dot_index() < rhs->get_dot_index(); + else + return lhs->get_terminator() < rhs->get_terminator(); +} diff --git a/src/ItemCollectionManager.cpp b/src/ItemCollectionManager.cpp new file mode 100644 index 0000000..bd6b90f --- /dev/null +++ b/src/ItemCollectionManager.cpp @@ -0,0 +1,131 @@ +// +// Created by Administrator on 2021/4/30. +// + +#include "ItemCollectionManager.h" + +void ItemCollectionManager::buildItems() { + + const auto startSymbol = pool->getStartSymbol(); + + std::wstring new_symbol_name = startSymbol->name + L"'"; + + int new_symbol_index = pool->addSymbol(new_symbol_name, startSymbol->terminator); + + pool->modifySymbol(startSymbol->index, startSymbol->name.substr(1), false, false); + + const auto *p_pdt = pool->addProduction(new_symbol_index, {startSymbol->index}); + + this->start_pdt = p_pdt; + + auto *pi_ic = new ItemCollection(pool); + + // -1 代表 $ + pi_ic->addItem(p_pdt, 0, -1); + + pi_ic->CLOSURE(); + + addItemCollection(0, 0, pi_ic); + + bool ifAdd = true; + + while(ifAdd) { + + ifAdd = false; + const auto &r_ics = getItemCollections(); + std::vector temp_ics(r_ics.begin(), r_ics.end()); + for(const auto ic : temp_ics) { + for(const auto symbol : pool->getAllSymbols()) { + if(symbol->index <= 0) { + continue; + } + if(GOTO(ic, symbol->index)) { + ifAdd = true; + } + } + } + + } + +} + +ItemCollection *ItemCollectionManager::getItemCollectionByHash(size_t hash) { + ItemCollection *p_ic = nullptr; + auto it = ic_content_map.find(hash); + if(it != ic_content_map.end()) { + p_ic = it->second; + } + return p_ic; +} + +bool ItemCollectionManager::addItemCollection(int idx, int symbol, ItemCollection *p_ic) { + + size_t ic_hash = p_ic->getHash(); + auto it = ic_content_map.find(ic_hash); + if (it != ic_content_map.end()) { + p_ic = it->second; + } else { + p_ic->index = this->index++; + ic_content_map.insert(std::pair(ic_hash, p_ic)); + ics.push_back(p_ic); + } + + auto hasher = std::hash(); + size_t seed = hasher(idx); + hash_combine(seed, symbol); + + auto it2 = ic_map.find(seed); + if(it2 != ic_map.end()) { + return false; + } + + if(symbol != 0) { + auto p_symbol = pool->getSymbol(symbol); + if(p_symbol->terminator) + output << L"GOTO(" << idx << L", \"" << p_symbol->name << L"\")" << std::endl; + else + output << L"GOTO(" << idx << L", " << p_symbol->name << L")" << std::endl; + } else { + output << L"GOTO(" << idx << L", [Epsilon])" << std::endl; + } + + ic_map.insert(std::pair(seed, p_ic)); + p_ic->print(output); + return true; + +} + +const ItemCollection *ItemCollectionManager::getGOTO(int idx, int symbol) const { + + auto hasher = std::hash(); + size_t seed = hasher(idx); + hash_combine(seed, symbol); + + auto it = ic_map.find(seed); + if(it != ic_map.end()) { + return it->second; + } else { + return nullptr; + } +} + +bool ItemCollectionManager::GOTO(const ItemCollection *p_ic, int symbol) { + auto *pt_ic = new ItemCollection(pool); + + for(const auto &item : p_ic->cache) { + if(item->get_dot_next_symbol() == symbol) { + pt_ic->addItem(item->get_production(), item->get_dot_index() + 1, item->get_terminator()); + } + } + auto p_temp_ic = this->getItemCollectionByHash(pt_ic->getHash()); + if(p_temp_ic == nullptr) + pt_ic->CLOSURE(); + else pt_ic = p_temp_ic; + + if(!pt_ic->items.empty()) { + return this->addItemCollection(p_ic->index, symbol, pt_ic); + } else { + return false; + } + +} diff --git a/src/LR1Generator.cpp b/src/LR1Generator.cpp new file mode 100644 index 0000000..58dbdc7 --- /dev/null +++ b/src/LR1Generator.cpp @@ -0,0 +1,17 @@ +// +// Created by Administrator on 2021/4/30. +// + +#include "LR1Generator.h" + +void LR1Generator::getProductions() { + + // 读入文法文件 + std::wstring temp_line; + + while (getline(input, temp_line)) { + if(temp_line.size() > 2 && temp_line[0] != '#') { + pool->parse_production_string_line(temp_line); + } + } +} diff --git a/src/Production.cpp b/src/Production.cpp new file mode 100644 index 0000000..c2b0cbe --- /dev/null +++ b/src/Production.cpp @@ -0,0 +1,5 @@ +// +// Created by Administrator on 2021/4/30. +// + +#include "Production.h" diff --git a/src/Symbol.cpp b/src/Symbol.cpp new file mode 100644 index 0000000..b48cffc --- /dev/null +++ b/src/Symbol.cpp @@ -0,0 +1,5 @@ +// +// Created by Administrator on 2021/4/30. +// + +#include "Symbol.h" diff --git a/src/SymbolTable.cpp b/src/SymbolTable.cpp new file mode 100644 index 0000000..b6cd765 --- /dev/null +++ b/src/SymbolTable.cpp @@ -0,0 +1,81 @@ +// +// Created by Administrator on 2021/4/30. +// + +#include "SymbolTable.h" + +SymbolTable::SymbolTable() { + + auto symbol = new Symbol(0, L"ε", true, false); + table.insert(std::pair(L"ε", symbol)); + cache.insert(std::pair(0, symbol)); + line.push_back(symbol); + + symbol = new Symbol(-1, L"$", true, false); + table.insert(std::pair(L"$", symbol)); + cache.insert(std::pair(-1, symbol)); + line.push_back(symbol); +} + +int SymbolTable::addSymbol(const std::wstring &name, bool terminator) { + + Symbol *symbol = nullptr; + + if(name == L"ε") { + return 0; + } else if (name[0] == L'@') { + symbol = new Symbol(index, name, terminator, true); + } else { + symbol = new Symbol(index, name, terminator, false); + } + + const auto &it = table.find(name); + if (it != table.end()) { + return it->second->index; + } + table.insert(std::pair(symbol->name, symbol)); + cache.insert(std::pair(symbol->index, symbol)); + line.push_back(symbol); + + index++; + + return symbol->index; +} + +const Symbol *SymbolTable::getSymbol(int symbol_index) const { + const auto &it = cache.find(symbol_index); + if(it != cache.end()) { + return it->second; + } else { + throw std::runtime_error("symbol " + std::to_string(symbol_index) + " NOT Found"); + } +} + +int SymbolTable::getSymbolIndex(const std::wstring &name) const { + const auto &it = table.find(name); + if(it != table.end()) { + return it->second->index; + } else { + throw std::runtime_error("symbol NOT Found"); + } +} + +void SymbolTable::modifySymbol(int idx, const std::wstring &name, bool terminator, bool start) { + auto it = cache.find(idx); + if(it != cache.end()) { + auto p_sym = it->second; + p_sym->name = name; + p_sym->terminator = terminator; + p_sym->start = start; + } +} + +const Symbol *SymbolTable::getStartSymbol() const { + for(const auto & symbol : getAllSymbols()) { + if(symbol->start) { + return symbol; + } + } + + throw std::runtime_error("start symbol NOT Found"); +} diff --git a/src/SyntaxParser.cpp b/src/SyntaxParser.cpp new file mode 100644 index 0000000..d5520cf --- /dev/null +++ b/src/SyntaxParser.cpp @@ -0,0 +1,173 @@ +// +// Created by Administrator on 2021/4/30. +// + +#include "SyntaxParser.h" + +void SyntaxParser::parse() { + status_stack.push(0); + + now_line = 1; + size_t _line_index = 0, max_line_index = lines_index[now_line-1]; + while(!tokens_queue.empty()) { + + auto *p_step = atg->findActionStep(status_stack.top(), tokens_queue.front()); + + if(p_step == nullptr) { + printError(); + return; + } + + if(p_step->action == MOVE) { + output << "MOVE IN" << "(AUTOMATA STATUS " << status_stack.top() <<"): "; + printSymbol(tokens_queue.front()); + + status_stack.push(p_step->target.index); + analyse_stack.push(tokens_queue.front()); + + if(_line_index > max_line_index) { + string_buffer.str(L""); + string_buffer.clear(); + max_line_index = lines_index[now_line++]; + } + string_buffer << pool->getSymbol(tokens_queue.front())->name << " "; + tokens_queue.pop(); + _line_index++; + } + else if(p_step->action == REDUCE) { + + auto *p_pdt = p_step->target.production; + output << "REDUCE BY" << "(AUTOMATA STATUS " << status_stack.top() <<"): ["; + printProduction(p_pdt); + output << "]"; + + for(int i : p_pdt->right) { + if(i == 0) + continue; + analyse_stack.pop(); + status_stack.pop(); + } + + auto *p_goto_step = + atg->findGotoStep(status_stack.top(), p_pdt->left); + + if(p_goto_step == nullptr) { + printError(); + return; + } + + analyse_stack.push(p_pdt->left); + status_stack.push(p_goto_step->target.index); + + } else if (p_step->action == ACC) { + output << "ACC"; + printDone(); + return; + } else { + printError(); + return; + } + + output << std::endl; + } +} + +void SyntaxParser::printProduction(const Production *p_pdt) { + output << pool->getSymbol(p_pdt->left)->name << L" -> " ; + int i = 0; + for(const auto &symbol_index : p_pdt->right) { + + if(i++ > 0) output << " "; + + printSymbol(symbol_index); + + } +} + +void SyntaxParser::printSymbol(int symbol_index) { + auto *symbol = pool->getSymbol(symbol_index); + + if(!symbol->index) { + output << L"[Epsilon]"; + return; + } + if(!symbol->terminator) + output << pool->getSymbol(symbol_index)->name; + else + output << L'"' << pool->getSymbol(symbol_index)->name << L'"'; +} + +void SyntaxParser::getToken() { + + // 读入文法文件 + std::wstring temp_line; + + size_t _line_index = 0; + while (getline(input, temp_line)) { + if(temp_line.size() > 2 && temp_line[0] != '#') { + std::vector tokens = ws_split(temp_line, L" "); + + for(int i = 1; i < tokens.size(); i++) { + if(tokens[i] == L"\r") continue;; + auto token_info = get_token_info(tokens[i]); + int symbol_index; + + if(token_info.first == L"ID" + || token_info.first == L"EOF" + || token_info.first == L"INTEGER" + || token_info.first == L"STRING") + symbol_index = pool->getSymbolIndex(token_info.first); + else + symbol_index = pool->getSymbolIndex(token_info.second); + + tokens_queue.push(symbol_index); + _line_index++; + + } + lines_index.push_back(_line_index - 1); + + } + } + + // 加入终结符$ + tokens_queue.push(-1); +} + +std::vector SyntaxParser::ws_split(const std::wstring &in, const std::wstring &delim) { + std::wregex re{ delim }; + return std::vector { + std::wsregex_token_iterator(in.begin(), in.end(), re, -1), + std::wsregex_token_iterator() + }; +} + +std::pair SyntaxParser::get_token_info(const std::wstring &token) { + + auto pre_index = token.find(L'('); + + auto back_index = token.find(L')'); + + std::wstring name = token.substr(0, pre_index); + std::wstring value = token.substr(pre_index + 1, back_index - pre_index - 1); + + return std::pair(name, value); + +} + +void SyntaxParser::printDone() { + output << std::endl; + output << "------------------------------------------------------" << std::endl; + output << "Syntax Parser Work Done, No Error Found." << std::endl << std::endl; +} + +void SyntaxParser::printError() { + std::wstring temp_line = string_buffer.str(); + output << std::endl; + output << "------------------------------------------------------" << std::endl; + output.fill('-'); + output.width(24); + output << "Syntax Parser Found Error: " << std::endl + << "At [Line " << now_line << "]: " << temp_line + << "<- Next Token{" << pool->getSymbol(tokens_queue.front())->name << "}" << std::endl; + output << "AUTOMATA STATUS " << status_stack.top() << std::endl; +}