From c2742de73fbf3d014d780a65e11d3578380871da Mon Sep 17 00:00:00 2001 From: satunreric Date: Fri, 30 Apr 2021 13:33:22 +0800 Subject: [PATCH] =?UTF-8?q?=E5=AE=8C=E5=96=84=E4=BB=A3=E7=A0=81;?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cmake-build-debug/syntaxInput.txt | 71 ++++++++- main.cpp | 233 +++++++++++++++++++++--------- 2 files changed, 232 insertions(+), 72 deletions(-) diff --git a/cmake-build-debug/syntaxInput.txt b/cmake-build-debug/syntaxInput.txt index 84aafd0..f687c80 100644 --- a/cmake-build-debug/syntaxInput.txt +++ b/cmake-build-debug/syntaxInput.txt @@ -1,3 +1,68 @@ -@S -> C C -C -> "c" C -C -> "d" \ No newline at end of file +@struct_type -> "struct" "ID" "{" member_list "}" "EOF" +member_list-> type_spec declarators ";" +type_spec -> base_type_spec +type_spec -> struct_type +base_type_spec -> floating_pt_type +base_type_spec -> integer_type +base_type_spec -> "char" +base_type_spec -> "boolean" +floating_pt_type -> "float" +floating_pt_type -> "double" +floating_pt_type -> "long" "double" +integer_type -> signed_int +integer_type -> unsigned_int + +signed_int -> "short" +signed_int -> "int16" +signed_int -> "long" +signed_int -> "int32" +signed_int -> "long" "long" +signed_int -> "int64" +signed_int -> "int8" + +unsigned_int -> "unsigned" "short" +unsigned_int -> "unsigned" "long" +unsigned_int -> "unsigned" "long" "long" +unsigned_int -> "unit16" +unsigned_int -> "unit32" +unsigned_int -> "unit64" +unsigned_int -> "unit8" + +declarators -> declarator more_declarators +more_declarators -> "," declarator +more_declarators -> ε + +declarator -> "ID" more_declarator +more_declarator -> exp_list +more_declarator -> ε + +exp_list -> "[" or_expr more_or_expr "]" +more_or_expr -> "," or_expr +more_or_expr -> ε +xor_expr -> and_expr more_and_expr +more_and_expr -> "^" and_expr +more_and_expr -> ε +and_expr -> shift_expr more_shift_expr +more_shift_expr -> "&" shift_expr +more_shift_expr -> ε +shift_expr -> add_expr more_add_expr +more_add_expr -> shift_sign add_expr +shift_sign -> ">>" +shift_sign -> "<<" +add_expr -> multi_expr more_multi_expr +more_multi_expr -> multi_sign multi_expr +multi_sign -> "+" +multi_sign -> "-" +multi_expr -> unary_expr more_unary_expr +more_unary_expr -> unary_sign unary_expr +more_unary_expr -> ε +unary_sign -> "*" +unary_sign -> "/" +unary_sign -> "%" +unary_expr -> unary_sign_2 unary_declare +unary_sign_2 -> "-" +unary_sign_2 -> "+" +unary_sign_2 -> "~" +unary_declare -> "INTEGER" +unary_declare -> "STRING" +unary_declare -> "BOOLEAN" \ No newline at end of file diff --git a/main.cpp b/main.cpp index 8e81f02..df4f468 100644 --- a/main.cpp +++ b/main.cpp @@ -52,8 +52,8 @@ public: SymbolTable() { - auto symbol = new Symbol(0, L"ε", true, false); - table.insert(pair(L"ε", symbol)); + auto symbol = new Symbol(0, L"", true, false); + table.insert(pair(L"", symbol)); cache.insert(pair(0, symbol)); line.push_back(symbol); @@ -71,7 +71,7 @@ public: Symbol *symbol = nullptr; - if(name == L"ε") { + if(name == L"") { return 0; } else if (name[0] == L'@') { symbol = new Symbol(index, name, terminator, true); @@ -134,31 +134,34 @@ public: }; -// 产生式 +// ʽ struct Production { + const int index; const int left; const vector right; - Production(int left, vector right): left(left), right(std::move(right)) {} + Production(int index, int left, vector right): index(index), left(left), right(std::move(right)) {} }; -// 语法资源池 +// ﷨Դ class GrammarResourcePool { - // 符号表 + int pdt_index = 0; + + // ű SymbolTable symbolTable; - // 产生式 + // ʽ vector productions; - // FIRST结果存储表 + // FIRST洢 map *> firsts; - // FOLLOW结果存储表 + // FOLLOW洢 map *> follows; - // 去掉首尾空格 + // ȥβո static wstring& trim(wstring &&str) { if (str.empty()) { return str; @@ -173,7 +176,7 @@ public: const set *FIRST(const vector &symbols, int start_index) { - // 生成集合 + // ɼ auto *non_terminator_symbols = new set(); for(int i = start_index; i < symbols.size(); i++) { @@ -195,23 +198,23 @@ public: const set* FIRST(int symbol) { - // 查找缓存 + // һ const auto it = firsts.find(symbol); if(it != firsts.end()) { return it->second; } - // 生成集合 + // ɼ auto *non_terminator_symbols = new set(); - // 如果是终结符 + // ս if(symbolTable.getSymbol(symbol)->terminator) { non_terminator_symbols->insert(symbol); } else { bool production_found = false; - // 遍历每一产生式 + // ÿһʽ for (const auto &production : productions) { const Production *p_pdt = production; @@ -268,7 +271,7 @@ public: } } - // 指导没有新的符号被添加到任意FOLLOW集合 + // ָûµķűӵFOLLOW bool ifAdded = true; while(ifAdded) { @@ -287,25 +290,25 @@ public: for (int i = 0; i < right_symbols.size() - 1; i++) { - // 非终结符 + // ս if (!symbolTable.getSymbol(right_symbols[i])->terminator) { const auto p_non_term_set = FIRST(right_symbols, i + 1); - // 获得FOLLOW集 + // FOLLOW non_terminator_symbols = get_follow_set(right_symbols[i]); const size_t set_size = non_terminator_symbols->size(); non_terminator_symbols->insert(p_non_term_set->begin(), p_non_term_set->end()); - // 在集合中发现空字符 + // ڼзַֿ if(non_terminator_symbols->find(0) != non_terminator_symbols->end()) { non_terminator_symbols->erase(0); equal_left_non_terminators.insert(right_symbols[i]); } - // 检查是否有新的终结符号被添加 + // Ƿµսű if(set_size < non_terminator_symbols->size()) { ifAdded = true; } @@ -319,9 +322,9 @@ public: } for(const auto symbol : equal_left_non_terminators) { - // 获得左边非终结符的FOLLOW集 + // ߷սFOLLOW const auto left_non_terminator_symbols = get_follow_set(production->left); - // 获得FOLLOW集 + // FOLLOW non_terminator_symbols = get_follow_set(symbol); const size_t set_size = non_terminator_symbols->size(); @@ -334,7 +337,7 @@ public: non_terminator_symbols->erase(0); } - // 检查是否有新的终结符号被添加 + // Ƿµսű if(set_size < non_terminator_symbols->size()) { ifAdded = true; } @@ -350,7 +353,7 @@ public: set *non_terminator_symbols = nullptr; - // 查找缓存 + // һ auto it = follows.find(symbol); if(it != follows.end()) { non_terminator_symbols = it->second; @@ -370,7 +373,7 @@ public: auto *p_sym = symbolTable.getSymbol(symbol_index); if(p_sym->terminator) { - if (p_sym->name == L"ε") { + if (p_sym->name == L"") { wcout << L" [Epsilon] "; } else wcout << L" \"" << p_sym->name << L"\" "; @@ -423,8 +426,12 @@ public: non_terminator << c; } } + wstring temp_symbol = trim(non_terminator.str()); + if(!temp_symbol.empty()) { + symbols.push_back(symbolTable.addSymbol(trim(non_terminator.str()), false)); + } - auto p_pdt = new Production(left, symbols); + auto p_pdt = new Production(pdt_index++, left, symbols); productions.push_back(p_pdt); } @@ -450,7 +457,7 @@ public: for(int symbol : right) { right_vector.push_back(symbol); } - auto p_pdt = new Production(left, right_vector); + auto p_pdt = new Production(pdt_index++, left, right_vector); productions.push_back(p_pdt); return p_pdt; } @@ -464,12 +471,12 @@ public: } }; -// 项 +// class Item{ - // 对应的产生式 + // ӦIJʽ const Production* const production; - // 点的位置 + // λ int dot_index = 0; const int terminator = 0; @@ -627,14 +634,27 @@ public: int dot_index = item->get_dot_index(); wcout << pool->getSymbol(p_pdt->left)->name << L" -> " ; int i = 0; - for(const auto &symbol : p_pdt->right) { - if(i++ == dot_index) wcout << "·"; - wcout << pool->getSymbol(symbol)->name; + for(const auto &symbol_index : p_pdt->right) { + + if(i > 0) wcout << " "; + if(i++ == dot_index) wcout << ""; + + auto *symbol = pool->getSymbol(symbol_index); + + if(!symbol->index) { + wcout << L"[Epsilon]"; + continue; + } + + if(!symbol->terminator) + wcout << pool->getSymbol(symbol_index)->name; + else + wcout << L'"' << pool->getSymbol(symbol_index)->name << L'"'; } - if(i++ == dot_index) wcout << "·"; + if(i++ == dot_index) wcout << ""; - wcout << L',' << pool->getSymbol(item->get_terminator())->name << endl; + wcout << L", \"" << pool->getSymbol(item->get_terminator())->name << "\"" << endl; } cout << endl; } @@ -702,7 +722,7 @@ public: auto *pi_ic = new ItemCollection(pool); - // -1 代表 $ + // -1 $ pi_ic->addItem(p_pdt, 0, -1); pi_ic->CLOSURE(); @@ -769,8 +789,15 @@ public: return false; } - if(symbol != 0) - wcout << L"GOTO(" << idx << L", " << pool->getSymbol(symbol)->name << L")" << endl; + if(symbol != 0) { + auto p_symbol = pool->getSymbol(symbol); + if(p_symbol->terminator) + wcout << L"GOTO(" << idx << L", \"" << p_symbol->name << L"\")" << endl; + else + wcout << L"GOTO(" << idx << L", " << p_symbol->name << L")" << endl; + } else { + wcout << L"GOTO(" << idx << L", [Epsilon])" << endl; + } ic_map.insert(pair(seed, p_ic)); p_ic->print(); @@ -961,80 +988,97 @@ public: } void print() { - wcout << L"ACTION" << endl; + + std::wofstream output("tables.txt"); + + size_t space = 4; + + output << L"ACTION" << endl; vector symbols; - wcout << std::left << std::setw(4) << " "; + output << std::left << std::setw(space) << " "; for(const auto *symbol : pool->getAllSymbols()) { if(symbol->index == 0) continue; if(symbol->terminator) { - wcout << std::left << std::setw(4) << symbol->name; + space = std::max(space, symbol->name.size() + 2); symbols.push_back(symbol->index); } } - wcout << endl; + + for(const auto symbol_index : symbols) { + output << std::left << std::setw(space) << pool->getSymbol(symbol_index)->name; + } + + output << endl; for(int i = 0; i < icm->getItemCollections().size(); i++){ - wcout << std::left << std::setw(4) << i; + output << std::left << std::setw(space) << i; for(int symbol : symbols) { auto p_step = this->findActionStep(i, symbol); if(p_step == nullptr) { - wcout << std::left << std::setw(4) << " "; + output << std::left << std::setw(space) << " "; } else { if(p_step->action == MOVE) - wcout << std::left << std::setw(4) << wstring(L"s") + to_wstring(p_step->target.index); + output << std::left << std::setw(space) << wstring(L"s") + to_wstring(p_step->target.index); else if(p_step->action == ACC) - wcout << std::left << std::setw(4) << L"acc"; + output << std::left << std::setw(space) << L"acc"; else if(p_step->action == STATUTE) - wcout << std::left << std::setw(4) << L"r"; + output << std::left << std::setw(space) << L"r" + to_wstring(p_step->target.production->index); } } - wcout << endl; + output << endl; } - wcout << endl; + output << endl; - wcout << "GOTO" << endl; + space = 4; + + output << "GOTO" << endl; symbols.clear(); - wcout << std::left << std::setw(4) << " "; + output << std::left << std::setw(space) << " "; for(const auto *symbol : pool->getAllSymbols()) { if(symbol->index == 0) continue; if(!symbol->terminator && !symbol->start) { - wcout << std::left << std::setw(4) << symbol->name; + space = std::max(space, symbol->name.size() + 2); symbols.push_back(symbol->index); } } - wcout <getSymbol(symbol_index)->name; + } + + output <getItemCollections().size(); k++) { - wcout << std::left << std::setw(4) << k; + output << std::left << std::setw(space) << k; for (int symbol : symbols) { auto p_step = this->findGotoStep(k, symbol); if(p_step == nullptr) { - wcout << std::left << std::setw(4) << " "; + output << std::left << std::setw(space) << " "; } else { - wcout << std::left << std::setw(4) << to_wstring(p_step->target.index); + output << std::left << std::setw(space) << to_wstring(p_step->target.index); } } - wcout << endl; + output << endl; } - wcout << endl << endl; + output << endl << endl; + + output.close(); } }; class Generator{ - // 文件输入 + // ļ wifstream input; GrammarResourcePool *pool; - map C; - ItemCollectionManager *icm; AnalyseTableGenerator *atg; @@ -1051,6 +1095,10 @@ public: input.imbue(std::locale(input.getloc(), codeCvtToUTF8)); } + ~Generator() { + input.close(); + } + void run() { pool->FOLLOW(); icm->buildItems(); @@ -1058,23 +1106,70 @@ public: atg->print(); } - // 得到所有的产生式 + // õеIJʽ void getProductions() { - // 读入文法文件 + // ķļ wstring temp_line; while (getline(input, temp_line)) { - pool->parse_production_string_line(temp_line); + if(temp_line.size() > 2 && temp_line[0] != '#') { + pool->parse_production_string_line(temp_line); + } } } + void output(const GrammarResourcePool *&pool, const AnalyseTableGenerator *&atg) { + pool = this->pool; + atg = this->atg; + } + +}; + +class SyntaxParser { + + // ļ + wifstream input; + + const GrammarResourcePool *pool; + + const AnalyseTableGenerator *atg; + + SyntaxParser(const GrammarResourcePool *pool, const AnalyseTableGenerator *atg): + input("outputToken.txt", std::ios::binary), + pool(pool), + atg(atg){ + + auto* codeCvtToUTF8= new std::codecvt_utf8; + + input.imbue(std::locale(input.getloc(), codeCvtToUTF8)); + } + + ~SyntaxParser() { + input.close(); + } + + // õеIJʽ + void getToken() { + + // ķļ + wstring temp_line; + + int line_index = 0; + while (getline(input, temp_line)) { + if(temp_line.size() > 2 && temp_line[0] != '#') { + input >> line_index; + } + } + } + + }; int main() { - clock_t start,end;//定义clock_t变量 - start = clock(); //开始时间 + clock_t start,end;//clock_t + start = clock(); //ʼʱ Generator generator; @@ -1082,8 +1177,8 @@ int main() { generator.getProductions(); generator.run(); - //输出时间 - end = clock(); //结束时间 + //ʱ + end = clock(); //ʱ double times = double(end-start)/CLOCKS_PER_SEC; cout<<"The Run time = "<