完善代码;

This commit is contained in:
satunreric 2021-04-30 13:33:22 +08:00
parent e778be3772
commit c2742de73f
2 changed files with 232 additions and 72 deletions

View File

@ -1,3 +1,68 @@
@S -> C C @struct_type -> "struct" "ID" "{" member_list "}" "EOF"
C -> "c" C member_list-> type_spec declarators ";"
C -> "d" type_spec -> base_type_spec
type_spec -> struct_type
base_type_spec -> floating_pt_type
base_type_spec -> integer_type
base_type_spec -> "char"
base_type_spec -> "boolean"
floating_pt_type -> "float"
floating_pt_type -> "double"
floating_pt_type -> "long" "double"
integer_type -> signed_int
integer_type -> unsigned_int
signed_int -> "short"
signed_int -> "int16"
signed_int -> "long"
signed_int -> "int32"
signed_int -> "long" "long"
signed_int -> "int64"
signed_int -> "int8"
unsigned_int -> "unsigned" "short"
unsigned_int -> "unsigned" "long"
unsigned_int -> "unsigned" "long" "long"
unsigned_int -> "unit16"
unsigned_int -> "unit32"
unsigned_int -> "unit64"
unsigned_int -> "unit8"
declarators -> declarator more_declarators
more_declarators -> "," declarator
more_declarators -> ε
declarator -> "ID" more_declarator
more_declarator -> exp_list
more_declarator -> ε
exp_list -> "[" or_expr more_or_expr "]"
more_or_expr -> "," or_expr
more_or_expr -> ε
xor_expr -> and_expr more_and_expr
more_and_expr -> "^" and_expr
more_and_expr -> ε
and_expr -> shift_expr more_shift_expr
more_shift_expr -> "&" shift_expr
more_shift_expr -> ε
shift_expr -> add_expr more_add_expr
more_add_expr -> shift_sign add_expr
shift_sign -> ">>"
shift_sign -> "<<"
add_expr -> multi_expr more_multi_expr
more_multi_expr -> multi_sign multi_expr
multi_sign -> "+"
multi_sign -> "-"
multi_expr -> unary_expr more_unary_expr
more_unary_expr -> unary_sign unary_expr
more_unary_expr -> ε
unary_sign -> "*"
unary_sign -> "/"
unary_sign -> "%"
unary_expr -> unary_sign_2 unary_declare
unary_sign_2 -> "-"
unary_sign_2 -> "+"
unary_sign_2 -> "~"
unary_declare -> "INTEGER"
unary_declare -> "STRING"
unary_declare -> "BOOLEAN"

233
main.cpp
View File

@ -52,8 +52,8 @@ public:
SymbolTable() { SymbolTable() {
auto symbol = new Symbol(0, L"ε", true, false); auto symbol = new Symbol(0, L"ε", true, false);
table.insert(pair<wstring, Symbol *>(L"ε", symbol)); table.insert(pair<wstring, Symbol *>(L"ε", symbol));
cache.insert(pair<int, Symbol *>(0, symbol)); cache.insert(pair<int, Symbol *>(0, symbol));
line.push_back(symbol); line.push_back(symbol);
@ -71,7 +71,7 @@ public:
Symbol *symbol = nullptr; Symbol *symbol = nullptr;
if(name == L"ε") { if(name == L"ε") {
return 0; return 0;
} else if (name[0] == L'@') { } else if (name[0] == L'@') {
symbol = new Symbol(index, name, terminator, true); symbol = new Symbol(index, name, terminator, true);
@ -134,31 +134,34 @@ public:
}; };
// 产生式 // 产生式
struct Production { struct Production {
const int index;
const int left; const int left;
const vector<int> right; const vector<int> right;
Production(int left, vector<int> right): left(left), right(std::move(right)) {} Production(int index, int left, vector<int> right): index(index), left(left), right(std::move(right)) {}
}; };
// 语法资源池 // 语法资源池
class GrammarResourcePool { class GrammarResourcePool {
// 符号表 int pdt_index = 0;
// 符号表
SymbolTable symbolTable; SymbolTable symbolTable;
// 产生式 // 产生式
vector<const Production *> productions; vector<const Production *> productions;
// FIRST结果存储表 // FIRST结果存储表
map<int, const set<int> *> firsts; map<int, const set<int> *> firsts;
// FOLLOW结果存储表 // FOLLOW结果存储表
map<int, set<int> *> follows; map<int, set<int> *> follows;
// 去掉首尾空格 // 去掉首尾空格
static wstring& trim(wstring &&str) { static wstring& trim(wstring &&str) {
if (str.empty()) { if (str.empty()) {
return str; return str;
@ -173,7 +176,7 @@ public:
const set<int > *FIRST(const vector<int> &symbols, int start_index) { const set<int > *FIRST(const vector<int> &symbols, int start_index) {
// 生成集合 // 生成集合
auto *non_terminator_symbols = new set<int>(); auto *non_terminator_symbols = new set<int>();
for(int i = start_index; i < symbols.size(); i++) { for(int i = start_index; i < symbols.size(); i++) {
@ -195,23 +198,23 @@ public:
const set<int>* FIRST(int symbol) { const set<int>* FIRST(int symbol) {
// 查找缓存 // 查找缓存
const auto it = firsts.find(symbol); const auto it = firsts.find(symbol);
if(it != firsts.end()) { if(it != firsts.end()) {
return it->second; return it->second;
} }
// 生成集合 // 生成集合
auto *non_terminator_symbols = new set<int>(); auto *non_terminator_symbols = new set<int>();
// 如果是终结符 // 如果是终结符
if(symbolTable.getSymbol(symbol)->terminator) { if(symbolTable.getSymbol(symbol)->terminator) {
non_terminator_symbols->insert(symbol); non_terminator_symbols->insert(symbol);
} else { } else {
bool production_found = false; bool production_found = false;
// 遍历每一产生式 // 遍历每一产生式
for (const auto &production : productions) { for (const auto &production : productions) {
const Production *p_pdt = production; const Production *p_pdt = production;
@ -268,7 +271,7 @@ public:
} }
} }
// 指导没有新的符号被添加到任意FOLLOW集合 // 指导没有新的符号被添加到任意FOLLOW集合
bool ifAdded = true; bool ifAdded = true;
while(ifAdded) { while(ifAdded) {
@ -287,25 +290,25 @@ public:
for (int i = 0; i < right_symbols.size() - 1; i++) { for (int i = 0; i < right_symbols.size() - 1; i++) {
// 非终结符 // 非终结符
if (!symbolTable.getSymbol(right_symbols[i])->terminator) { if (!symbolTable.getSymbol(right_symbols[i])->terminator) {
const auto p_non_term_set = FIRST(right_symbols, i + 1); const auto p_non_term_set = FIRST(right_symbols, i + 1);
// 获得FOLLOW集 // 获得FOLLOW集
non_terminator_symbols = get_follow_set(right_symbols[i]); non_terminator_symbols = get_follow_set(right_symbols[i]);
const size_t set_size = non_terminator_symbols->size(); const size_t set_size = non_terminator_symbols->size();
non_terminator_symbols->insert(p_non_term_set->begin(), p_non_term_set->end()); non_terminator_symbols->insert(p_non_term_set->begin(), p_non_term_set->end());
// 在集合中发现空字符 // 在集合中发现空字符
if(non_terminator_symbols->find(0) != non_terminator_symbols->end()) { if(non_terminator_symbols->find(0) != non_terminator_symbols->end()) {
non_terminator_symbols->erase(0); non_terminator_symbols->erase(0);
equal_left_non_terminators.insert(right_symbols[i]); equal_left_non_terminators.insert(right_symbols[i]);
} }
// 检查是否有新的终结符号被添加 // 检查是否有新的终结符号被添加
if(set_size < non_terminator_symbols->size()) { if(set_size < non_terminator_symbols->size()) {
ifAdded = true; ifAdded = true;
} }
@ -319,9 +322,9 @@ public:
} }
for(const auto symbol : equal_left_non_terminators) { for(const auto symbol : equal_left_non_terminators) {
// 获得左边非终结符的FOLLOW集 // 获得左边非终结符的FOLLOW集
const auto left_non_terminator_symbols = get_follow_set(production->left); const auto left_non_terminator_symbols = get_follow_set(production->left);
// 获得FOLLOW集 // 获得FOLLOW集
non_terminator_symbols = get_follow_set(symbol); non_terminator_symbols = get_follow_set(symbol);
const size_t set_size = non_terminator_symbols->size(); const size_t set_size = non_terminator_symbols->size();
@ -334,7 +337,7 @@ public:
non_terminator_symbols->erase(0); non_terminator_symbols->erase(0);
} }
// 检查是否有新的终结符号被添加 // 检查是否有新的终结符号被添加
if(set_size < non_terminator_symbols->size()) { if(set_size < non_terminator_symbols->size()) {
ifAdded = true; ifAdded = true;
} }
@ -350,7 +353,7 @@ public:
set<int> *non_terminator_symbols = nullptr; set<int> *non_terminator_symbols = nullptr;
// 查找缓存 // 查找缓存
auto it = follows.find(symbol); auto it = follows.find(symbol);
if(it != follows.end()) { if(it != follows.end()) {
non_terminator_symbols = it->second; non_terminator_symbols = it->second;
@ -370,7 +373,7 @@ public:
auto *p_sym = symbolTable.getSymbol(symbol_index); auto *p_sym = symbolTable.getSymbol(symbol_index);
if(p_sym->terminator) { if(p_sym->terminator) {
if (p_sym->name == L"ε") { if (p_sym->name == L"ε") {
wcout << L" [Epsilon] "; wcout << L" [Epsilon] ";
} }
else wcout << L" \"" << p_sym->name << L"\" "; else wcout << L" \"" << p_sym->name << L"\" ";
@ -423,8 +426,12 @@ public:
non_terminator << c; non_terminator << c;
} }
} }
wstring temp_symbol = trim(non_terminator.str());
if(!temp_symbol.empty()) {
symbols.push_back(symbolTable.addSymbol(trim(non_terminator.str()), false));
}
auto p_pdt = new Production(left, symbols); auto p_pdt = new Production(pdt_index++, left, symbols);
productions.push_back(p_pdt); productions.push_back(p_pdt);
} }
@ -450,7 +457,7 @@ public:
for(int symbol : right) { for(int symbol : right) {
right_vector.push_back(symbol); right_vector.push_back(symbol);
} }
auto p_pdt = new Production(left, right_vector); auto p_pdt = new Production(pdt_index++, left, right_vector);
productions.push_back(p_pdt); productions.push_back(p_pdt);
return p_pdt; return p_pdt;
} }
@ -464,12 +471,12 @@ public:
} }
}; };
// 项 // 项
class Item{ class Item{
// 对应的产生式 // 对应的产生式
const Production* const production; const Production* const production;
// 点的位置 // 点的位置
int dot_index = 0; int dot_index = 0;
const int terminator = 0; const int terminator = 0;
@ -627,14 +634,27 @@ public:
int dot_index = item->get_dot_index(); int dot_index = item->get_dot_index();
wcout << pool->getSymbol(p_pdt->left)->name << L" -> " ; wcout << pool->getSymbol(p_pdt->left)->name << L" -> " ;
int i = 0; int i = 0;
for(const auto &symbol : p_pdt->right) { for(const auto &symbol_index : p_pdt->right) {
if(i++ == dot_index) wcout << "·";
wcout << pool->getSymbol(symbol)->name; if(i > 0) wcout << " ";
if(i++ == dot_index) wcout << "·";
auto *symbol = pool->getSymbol(symbol_index);
if(!symbol->index) {
wcout << L"[Epsilon]";
continue;
}
if(!symbol->terminator)
wcout << pool->getSymbol(symbol_index)->name;
else
wcout << L'"' << pool->getSymbol(symbol_index)->name << L'"';
} }
if(i++ == dot_index) wcout << "·"; if(i++ == dot_index) wcout << "·";
wcout << L',' << pool->getSymbol(item->get_terminator())->name << endl; wcout << L", \"" << pool->getSymbol(item->get_terminator())->name << "\"" << endl;
} }
cout << endl; cout << endl;
} }
@ -702,7 +722,7 @@ public:
auto *pi_ic = new ItemCollection(pool); auto *pi_ic = new ItemCollection(pool);
// -1 代表 $ // -1 代表 $
pi_ic->addItem(p_pdt, 0, -1); pi_ic->addItem(p_pdt, 0, -1);
pi_ic->CLOSURE(); pi_ic->CLOSURE();
@ -769,8 +789,15 @@ public:
return false; return false;
} }
if(symbol != 0) if(symbol != 0) {
wcout << L"GOTO(" << idx << L", " << pool->getSymbol(symbol)->name << L")" << endl; auto p_symbol = pool->getSymbol(symbol);
if(p_symbol->terminator)
wcout << L"GOTO(" << idx << L", \"" << p_symbol->name << L"\")" << endl;
else
wcout << L"GOTO(" << idx << L", " << p_symbol->name << L")" << endl;
} else {
wcout << L"GOTO(" << idx << L", [Epsilon])" << endl;
}
ic_map.insert(pair<size_t, ItemCollection *>(seed, p_ic)); ic_map.insert(pair<size_t, ItemCollection *>(seed, p_ic));
p_ic->print(); p_ic->print();
@ -961,80 +988,97 @@ public:
} }
void print() { void print() {
wcout << L"ACTION" << endl;
std::wofstream output("tables.txt");
size_t space = 4;
output << L"ACTION" << endl;
vector<int> symbols; vector<int> symbols;
wcout << std::left << std::setw(4) << " "; output << std::left << std::setw(space) << " ";
for(const auto *symbol : pool->getAllSymbols()) { for(const auto *symbol : pool->getAllSymbols()) {
if(symbol->index == 0) continue; if(symbol->index == 0) continue;
if(symbol->terminator) { if(symbol->terminator) {
wcout << std::left << std::setw(4) << symbol->name; space = std::max(space, symbol->name.size() + 2);
symbols.push_back(symbol->index); symbols.push_back(symbol->index);
} }
} }
wcout << endl;
for(const auto symbol_index : symbols) {
output << std::left << std::setw(space) << pool->getSymbol(symbol_index)->name;
}
output << endl;
for(int i = 0; i < icm->getItemCollections().size(); i++){ for(int i = 0; i < icm->getItemCollections().size(); i++){
wcout << std::left << std::setw(4) << i; output << std::left << std::setw(space) << i;
for(int symbol : symbols) { for(int symbol : symbols) {
auto p_step = this->findActionStep(i, symbol); auto p_step = this->findActionStep(i, symbol);
if(p_step == nullptr) { if(p_step == nullptr) {
wcout << std::left << std::setw(4) << " "; output << std::left << std::setw(space) << " ";
} else { } else {
if(p_step->action == MOVE) if(p_step->action == MOVE)
wcout << std::left << std::setw(4) << wstring(L"s") + to_wstring(p_step->target.index); output << std::left << std::setw(space) << wstring(L"s") + to_wstring(p_step->target.index);
else if(p_step->action == ACC) else if(p_step->action == ACC)
wcout << std::left << std::setw(4) << L"acc"; output << std::left << std::setw(space) << L"acc";
else if(p_step->action == STATUTE) else if(p_step->action == STATUTE)
wcout << std::left << std::setw(4) << L"r"; output << std::left << std::setw(space) << L"r" + to_wstring(p_step->target.production->index);
} }
} }
wcout << endl; output << endl;
} }
wcout << endl; output << endl;
wcout << "GOTO" << endl; space = 4;
output << "GOTO" << endl;
symbols.clear(); symbols.clear();
wcout << std::left << std::setw(4) << " "; output << std::left << std::setw(space) << " ";
for(const auto *symbol : pool->getAllSymbols()) { for(const auto *symbol : pool->getAllSymbols()) {
if(symbol->index == 0) continue; if(symbol->index == 0) continue;
if(!symbol->terminator && !symbol->start) { if(!symbol->terminator && !symbol->start) {
wcout << std::left << std::setw(4) << symbol->name; space = std::max(space, symbol->name.size() + 2);
symbols.push_back(symbol->index); symbols.push_back(symbol->index);
} }
} }
wcout <<endl;
for(const auto symbol_index : symbols) {
output << std::left << std::setw(space) << pool->getSymbol(symbol_index)->name;
}
output <<endl;
for(int k = 0; k < icm->getItemCollections().size(); k++) { for(int k = 0; k < icm->getItemCollections().size(); k++) {
wcout << std::left << std::setw(4) << k; output << std::left << std::setw(space) << k;
for (int symbol : symbols) { for (int symbol : symbols) {
auto p_step = this->findGotoStep(k, symbol); auto p_step = this->findGotoStep(k, symbol);
if(p_step == nullptr) { if(p_step == nullptr) {
wcout << std::left << std::setw(4) << " "; output << std::left << std::setw(space) << " ";
} else { } else {
wcout << std::left << std::setw(4) << to_wstring(p_step->target.index); output << std::left << std::setw(space) << to_wstring(p_step->target.index);
} }
} }
wcout << endl; output << endl;
} }
wcout << endl << endl; output << endl << endl;
output.close();
} }
}; };
class Generator{ class Generator{
// 文件输入 // 文件输入
wifstream input; wifstream input;
GrammarResourcePool *pool; GrammarResourcePool *pool;
map<string, ItemCollection *> C;
ItemCollectionManager *icm; ItemCollectionManager *icm;
AnalyseTableGenerator *atg; AnalyseTableGenerator *atg;
@ -1051,6 +1095,10 @@ public:
input.imbue(std::locale(input.getloc(), codeCvtToUTF8)); input.imbue(std::locale(input.getloc(), codeCvtToUTF8));
} }
~Generator() {
input.close();
}
void run() { void run() {
pool->FOLLOW(); pool->FOLLOW();
icm->buildItems(); icm->buildItems();
@ -1058,23 +1106,70 @@ public:
atg->print(); atg->print();
} }
// 得到所有的产生式 // 得到所有的产生式
void getProductions() { void getProductions() {
// 读入文法文件 // 读入文法文件
wstring temp_line; wstring temp_line;
while (getline(input, temp_line)) { while (getline(input, temp_line)) {
pool->parse_production_string_line(temp_line); if(temp_line.size() > 2 && temp_line[0] != '#') {
pool->parse_production_string_line(temp_line);
}
} }
} }
void output(const GrammarResourcePool *&pool, const AnalyseTableGenerator *&atg) {
pool = this->pool;
atg = this->atg;
}
};
class SyntaxParser {
// 文件输入
wifstream input;
const GrammarResourcePool *pool;
const AnalyseTableGenerator *atg;
SyntaxParser(const GrammarResourcePool *pool, const AnalyseTableGenerator *atg):
input("outputToken.txt", std::ios::binary),
pool(pool),
atg(atg){
auto* codeCvtToUTF8= new std::codecvt_utf8<wchar_t>;
input.imbue(std::locale(input.getloc(), codeCvtToUTF8));
}
~SyntaxParser() {
input.close();
}
// 得到所有的产生式
void getToken() {
// 读入文法文件
wstring temp_line;
int line_index = 0;
while (getline(input, temp_line)) {
if(temp_line.size() > 2 && temp_line[0] != '#') {
input >> line_index;
}
}
}
}; };
int main() { int main() {
clock_t start,end;//定义clock_t变量 clock_t start,end;//定义clock_t变量
start = clock(); //开始时间 start = clock(); //开始时间
Generator generator; Generator generator;
@ -1082,8 +1177,8 @@ int main() {
generator.getProductions(); generator.getProductions();
generator.run(); generator.run();
//输出时间 //输出时间
end = clock(); //结束时间 end = clock(); //结束时间
double times = double(end-start)/CLOCKS_PER_SEC; double times = double(end-start)/CLOCKS_PER_SEC;
cout<<"The Run time = "<<times<<"s" << " = " <<times * 1000 <<"ms" << endl; cout<<"The Run time = "<<times<<"s" << " = " <<times * 1000 <<"ms" << endl;
return 0; return 0;