继续添加相关函数
This commit is contained in:
parent
4d73c6c6f2
commit
70ac1bb025
@ -1,6 +1,6 @@
|
||||
cmake_minimum_required(VERSION 3.17)
|
||||
project(syntaxParser)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 14)
|
||||
set(CMAKE_CXX_STANDARD 20)
|
||||
|
||||
add_executable(syntaxParser main.cpp)
|
@ -1,8 +1,3 @@
|
||||
@E -> T E'
|
||||
E' -> "+" T E'
|
||||
E' -> ε
|
||||
T -> F T'
|
||||
T' -> "*" F T'
|
||||
T' -> ε
|
||||
F -> "(" @E ")"
|
||||
F -> "id"
|
||||
@S -> C C
|
||||
C -> "c" C
|
||||
C -> "d"
|
853
main.cpp
853
main.cpp
@ -8,6 +8,7 @@
|
||||
#include <set>
|
||||
#include <map>
|
||||
#include <functional>
|
||||
#include <iomanip>
|
||||
|
||||
using namespace std;
|
||||
|
||||
@ -19,13 +20,14 @@ using std::wcout;
|
||||
using std::endl;
|
||||
using std::to_string;
|
||||
using std::hash;
|
||||
using std::setw;
|
||||
|
||||
struct Symbol {
|
||||
|
||||
const int index;
|
||||
const wstring name;
|
||||
const bool terminator;
|
||||
const bool start;
|
||||
wstring name;
|
||||
bool terminator;
|
||||
bool start;
|
||||
|
||||
Symbol(int index, wstring name, bool terminator, bool start):
|
||||
index(index),
|
||||
@ -60,7 +62,7 @@ public:
|
||||
line.push_back(symbol);
|
||||
}
|
||||
|
||||
const vector<const Symbol *> &getAllSymbols() {
|
||||
const vector<const Symbol *> &getAllSymbols() const {
|
||||
return line;
|
||||
}
|
||||
|
||||
@ -89,7 +91,7 @@ public:
|
||||
return symbol->index;
|
||||
}
|
||||
|
||||
const Symbol *getSymbol(int symbol_index) {
|
||||
const Symbol *getSymbol(int symbol_index) const {
|
||||
const auto &it = cache.find(symbol_index);
|
||||
if(it != cache.end()) {
|
||||
return it->second;
|
||||
@ -98,7 +100,7 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
int getSymbolIndex(const wstring &name) {
|
||||
int getSymbolIndex(const wstring &name) const {
|
||||
const auto &it = table.find(name);
|
||||
if(it != table.end()) {
|
||||
return it->second->index;
|
||||
@ -107,6 +109,28 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
void modifySymbol(int idx, const wstring &name, bool terminator, bool start) {
|
||||
auto it = cache.find(idx);
|
||||
if(it != cache.end()) {
|
||||
auto p_sym = it->second;
|
||||
p_sym->name = name;
|
||||
p_sym->terminator = terminator;
|
||||
p_sym->start = start;
|
||||
}
|
||||
}
|
||||
|
||||
const Symbol *getStartSymbol() const {
|
||||
for(const auto & symbol : getAllSymbols()) {
|
||||
if(symbol->start) {
|
||||
return symbol;
|
||||
}
|
||||
}
|
||||
|
||||
throw runtime_error("start symbol NOT Found");
|
||||
}
|
||||
|
||||
|
||||
|
||||
};
|
||||
|
||||
// 产生式
|
||||
@ -118,137 +142,21 @@ struct Production {
|
||||
|
||||
};
|
||||
|
||||
// 项
|
||||
class Item{
|
||||
// 对应的产生式
|
||||
const Production* const production;
|
||||
|
||||
// 点的位置
|
||||
int dot_index = 0;
|
||||
|
||||
const int terminator = 0;
|
||||
|
||||
|
||||
public:
|
||||
explicit Item(Production *p_pdt, int m_terminator) : production(p_pdt), terminator(m_terminator) {}
|
||||
|
||||
void set_dot_index(int m_dot_index) {
|
||||
if(m_dot_index > production->right.size()) {
|
||||
throw runtime_error("DOT_INDEX out of range");
|
||||
}
|
||||
this->dot_index = m_dot_index;
|
||||
}
|
||||
|
||||
int get_dot_index() const {
|
||||
return dot_index;
|
||||
}
|
||||
|
||||
int get_dot_next_symbol() {
|
||||
if(get_dot_index() == production->right.size()) {
|
||||
return 0;
|
||||
} else {
|
||||
return production->right[dot_index];
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class ItemCollectionManager;
|
||||
|
||||
class ItemCollection{
|
||||
|
||||
int index = 0;
|
||||
|
||||
vector<Item *> items;
|
||||
|
||||
Production *G;
|
||||
|
||||
friend ItemCollectionManager;
|
||||
|
||||
public:
|
||||
|
||||
void addItem(Production *p_pdt, int dot_index, int terminator) {
|
||||
auto *p_item = new Item(p_pdt, terminator);
|
||||
p_item->set_dot_index(dot_index);
|
||||
items.push_back(p_item);
|
||||
}
|
||||
|
||||
void CLOSURE() {
|
||||
bool ifAdd = true;
|
||||
|
||||
while(ifAdd) {
|
||||
ifAdd = false;
|
||||
|
||||
for(const auto & item : items) {
|
||||
if(item->get_dot_next_symbol() == 0) {
|
||||
continue;
|
||||
}
|
||||
// TODO
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
class ItemCollectionManager{
|
||||
|
||||
int index = 1;
|
||||
|
||||
map<size_t, ItemCollection *> ic_map;
|
||||
|
||||
vector<const ItemCollection *> ics;
|
||||
|
||||
template <class T>
|
||||
inline void hash_combine(std::size_t& seed, const T& v)
|
||||
{
|
||||
std::hash<T> hasher;
|
||||
seed ^= hasher(v) + 0x9e3779b9 + (seed<<6) + (seed>>2);
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
const vector<const ItemCollection *> &getItemCollections() {
|
||||
return ics;
|
||||
}
|
||||
|
||||
bool addItemCollection(int idx, int terminator, ItemCollection *p_ic){
|
||||
auto hasher = hash<int>();
|
||||
size_t seed = hasher(idx);
|
||||
hash_combine(seed, terminator);
|
||||
|
||||
auto it = ic_map.find(seed);
|
||||
if(it != ic_map.end()) {
|
||||
return false;
|
||||
} else {
|
||||
p_ic->index = this->index++;
|
||||
ic_map.insert(pair<size_t, ItemCollection *>(seed, p_ic));
|
||||
ics.push_back(p_ic);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
class Generator{
|
||||
|
||||
// 文件输入
|
||||
wifstream input;
|
||||
// 语法资源池
|
||||
class GrammarResourcePool {
|
||||
|
||||
// 符号表
|
||||
SymbolTable symbolTable;
|
||||
|
||||
// 产生式
|
||||
vector<Production *> productions;
|
||||
vector<const Production *> productions;
|
||||
|
||||
// FIRST结果存储表
|
||||
map<int, set<int> *> firsts;
|
||||
map<int, const set<int> *> firsts;
|
||||
|
||||
// FOLLOW结果存储表
|
||||
map<int, set<int> *> follows;
|
||||
|
||||
map<string, ItemCollection *> C;
|
||||
|
||||
// 去掉首尾空格
|
||||
static wstring& trim(wstring &&str) {
|
||||
if (str.empty()) {
|
||||
@ -260,7 +168,9 @@ class Generator{
|
||||
return str;
|
||||
}
|
||||
|
||||
set<int > *FIRST(const vector<int> &symbols, int start_index) {
|
||||
public:
|
||||
|
||||
const set<int > *FIRST(const vector<int> &symbols, int start_index) {
|
||||
|
||||
// 生成集合
|
||||
auto *non_terminator_symbols = new set<int>();
|
||||
@ -282,7 +192,7 @@ class Generator{
|
||||
return non_terminator_symbols;
|
||||
}
|
||||
|
||||
set<int>* FIRST(int symbol) {
|
||||
const set<int>* FIRST(int symbol) {
|
||||
|
||||
// 查找缓存
|
||||
const auto it = firsts.find(symbol);
|
||||
@ -302,7 +212,7 @@ class Generator{
|
||||
|
||||
// 遍历每一产生式
|
||||
for (const auto &production : productions) {
|
||||
Production *p_pdt = production;
|
||||
const Production *p_pdt = production;
|
||||
|
||||
if (p_pdt->left != symbol) continue;
|
||||
|
||||
@ -328,7 +238,7 @@ class Generator{
|
||||
if (!production_found) non_terminator_symbols->insert(0);
|
||||
}
|
||||
|
||||
this->firsts.insert(pair<int, set<int> *>(symbol, non_terminator_symbols));
|
||||
this->firsts.insert(pair<int, const set<int> *>(symbol, non_terminator_symbols));
|
||||
|
||||
return non_terminator_symbols;
|
||||
}
|
||||
@ -471,84 +381,641 @@ class Generator{
|
||||
wcout << L"}" << endl;
|
||||
}
|
||||
|
||||
void parse_production_string_line(const wstring &temp_line) {
|
||||
auto middle_index = temp_line.find(L"->", 0);
|
||||
|
||||
|
||||
if(middle_index == string::npos) {
|
||||
throw runtime_error("-> NOT FOUND");
|
||||
}
|
||||
|
||||
wstring front = trim(temp_line.substr(0, middle_index));
|
||||
int left = symbolTable.addSymbol(front, false);
|
||||
|
||||
wstring back = trim(temp_line.substr(middle_index + 2, temp_line.size() - middle_index - 2));
|
||||
|
||||
wstringstream terminator, non_terminator;
|
||||
vector<int> symbols;
|
||||
bool is_terminator = false;
|
||||
for(const auto &c : back) {
|
||||
if (c == L'\"') {
|
||||
if(is_terminator) {
|
||||
symbols.push_back(symbolTable.addSymbol(trim(terminator.str()), true));
|
||||
terminator.str(L"");
|
||||
terminator.clear();
|
||||
}
|
||||
is_terminator = !is_terminator;
|
||||
continue;
|
||||
}
|
||||
if(c == L' ' || c == L'\r') {
|
||||
wstring temp_symbol = trim(non_terminator.str());
|
||||
if(!temp_symbol.empty()) {
|
||||
symbols.push_back(symbolTable.addSymbol(trim(non_terminator.str()), false));
|
||||
non_terminator.str(L"");
|
||||
non_terminator.clear();
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if(is_terminator) {
|
||||
terminator << c;
|
||||
} else {
|
||||
non_terminator << c;
|
||||
}
|
||||
}
|
||||
|
||||
auto p_pdt = new Production(left, symbols);
|
||||
|
||||
productions.push_back(p_pdt);
|
||||
}
|
||||
|
||||
const vector<const Production *> &get_productions() const {
|
||||
return productions;
|
||||
}
|
||||
|
||||
const Symbol *getSymbol(int symbol_index) const {
|
||||
return symbolTable.getSymbol(symbol_index);
|
||||
}
|
||||
|
||||
const Symbol *getStartSymbol() const {
|
||||
return symbolTable.getStartSymbol();
|
||||
}
|
||||
|
||||
int addSymbol(const wstring &name, bool terminator) {
|
||||
return symbolTable.addSymbol(name, terminator);
|
||||
}
|
||||
|
||||
const Production *addProduction(int left, initializer_list<int> right) {
|
||||
vector<int> right_vector;
|
||||
for(int symbol : right) {
|
||||
right_vector.push_back(symbol);
|
||||
}
|
||||
auto p_pdt = new Production(left, right_vector);
|
||||
productions.push_back(p_pdt);
|
||||
return p_pdt;
|
||||
}
|
||||
|
||||
const vector<const Symbol *> &getAllSymbols() const {
|
||||
return symbolTable.getAllSymbols();
|
||||
}
|
||||
|
||||
void modifySymbol(int index, const wstring &name, bool terminator, bool start) {
|
||||
symbolTable.modifySymbol(index, name, terminator, start);
|
||||
}
|
||||
};
|
||||
|
||||
// 项
|
||||
class Item{
|
||||
// 对应的产生式
|
||||
const Production* const production;
|
||||
|
||||
// 点的位置
|
||||
int dot_index = 0;
|
||||
|
||||
const int terminator = 0;
|
||||
|
||||
public:
|
||||
|
||||
Generator(): input("syntaxInput.txt", std::ios::binary) {
|
||||
const bool generated = false;
|
||||
|
||||
explicit Item(const Production *p_pdt, int m_terminator, bool m_generated = false)
|
||||
: production(p_pdt), terminator(m_terminator), generated(m_generated) {}
|
||||
|
||||
void set_dot_index(int m_dot_index) {
|
||||
if(m_dot_index > production->right.size()) {
|
||||
throw runtime_error("DOT_INDEX out of range");
|
||||
}
|
||||
this->dot_index = m_dot_index;
|
||||
}
|
||||
|
||||
int get_dot_index() const {
|
||||
return dot_index;
|
||||
}
|
||||
|
||||
int get_dot_next_symbol() const {
|
||||
if(get_dot_index() == production->right.size()) {
|
||||
return 0;
|
||||
} else {
|
||||
return production->right[dot_index];
|
||||
}
|
||||
}
|
||||
|
||||
int get_dot_next_i_symbol(int i) const {
|
||||
if(get_dot_index() + i >= production->right.size()) {
|
||||
return 0;
|
||||
} else {
|
||||
return production->right[dot_index + i];
|
||||
}
|
||||
}
|
||||
|
||||
int get_terminator() const {
|
||||
return terminator;
|
||||
}
|
||||
|
||||
const Production *get_production() const {
|
||||
return production;
|
||||
}
|
||||
};
|
||||
|
||||
class ItemCollectionManager;
|
||||
|
||||
class ItemCollection{
|
||||
|
||||
int index = 0;
|
||||
|
||||
map<size_t, Item *> items;
|
||||
|
||||
vector<const Item *> cache;
|
||||
|
||||
GrammarResourcePool *pool;
|
||||
|
||||
friend ItemCollectionManager;
|
||||
|
||||
template <class T>
|
||||
inline void hash_combine(std::size_t& seed, const T& v) const
|
||||
{
|
||||
std::hash<T> hasher;
|
||||
seed ^= hasher(v) + 0x9e3779b9 + (seed<<6) + (seed>>2);
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
explicit ItemCollection(GrammarResourcePool *pool) : pool(pool) {
|
||||
|
||||
}
|
||||
|
||||
const vector<const Item *> &getItems() const {
|
||||
return cache;
|
||||
}
|
||||
|
||||
int getIndex() const {
|
||||
return index;
|
||||
}
|
||||
|
||||
|
||||
bool addItem(const Production *p_pdt, int dot_index, int terminator, bool generated = false) {
|
||||
auto hasher = hash<int>();
|
||||
size_t seed = hasher(reinterpret_cast<const int>(p_pdt));
|
||||
hash_combine(seed, dot_index);
|
||||
hash_combine(seed, terminator);
|
||||
|
||||
auto it = items.find(seed);
|
||||
if(it != items.end()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto *p_item = new Item(p_pdt, terminator, generated);
|
||||
p_item->set_dot_index(dot_index);
|
||||
items.insert(pair<size_t, Item *>(seed, p_item));
|
||||
cache.push_back(p_item);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void CLOSURE() {
|
||||
|
||||
bool ifAdd = true;
|
||||
|
||||
while(ifAdd) {
|
||||
ifAdd = false;
|
||||
|
||||
for(const auto & item : items) {
|
||||
int next_symbol = item.second->get_dot_next_symbol();
|
||||
|
||||
if(next_symbol == 0
|
||||
|| pool->getSymbol(next_symbol)->terminator) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for(auto *production : pool->get_productions()) {
|
||||
if(production->left == next_symbol) {
|
||||
vector<int> first_args;
|
||||
first_args.push_back(item.second->get_dot_next_i_symbol(1));
|
||||
first_args.push_back(item.second->get_terminator());
|
||||
|
||||
const auto first_set = pool->FIRST(first_args, 0);
|
||||
for(auto terminator : *first_set) {
|
||||
if(terminator == 0) continue;
|
||||
if(this->addItem(production, 0, terminator, true)) {
|
||||
ifAdd = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
void print() const {
|
||||
|
||||
wcout << L"I" << index << L": ";
|
||||
|
||||
for(const auto item : cache) {
|
||||
const auto *p_pdt = item->get_production();
|
||||
int dot_index = item->get_dot_index();
|
||||
wcout << pool->getSymbol(p_pdt->left)->name << L" -> " ;
|
||||
int i = 0;
|
||||
for(const auto &symbol : p_pdt->right) {
|
||||
if(i++ == dot_index) cout << "·";
|
||||
wcout << pool->getSymbol(symbol)->name;
|
||||
}
|
||||
|
||||
if(i++ == dot_index) cout << "·";
|
||||
|
||||
wcout << L"," << pool->getSymbol(item->get_terminator())->name << endl;
|
||||
}
|
||||
cout << endl;
|
||||
}
|
||||
|
||||
size_t getHash() const {
|
||||
size_t seed = 0;
|
||||
for(const auto item : cache) {
|
||||
|
||||
if(item->generated) {
|
||||
continue;
|
||||
}
|
||||
|
||||
hash_combine(seed, item->get_production());
|
||||
hash_combine(seed, item->get_dot_index());
|
||||
hash_combine(seed, item->get_terminator());
|
||||
}
|
||||
return seed;
|
||||
}
|
||||
};
|
||||
|
||||
class ItemCollectionManager{
|
||||
|
||||
int index = 0;
|
||||
|
||||
map<size_t, ItemCollection *> ic_map;
|
||||
|
||||
map<size_t, ItemCollection *> ic_content_map;
|
||||
|
||||
vector<const ItemCollection *> ics;
|
||||
|
||||
GrammarResourcePool *pool;
|
||||
|
||||
const Production *start_pdt;
|
||||
|
||||
template <class T>
|
||||
inline void hash_combine(std::size_t& seed, const T& v) const
|
||||
{
|
||||
std::hash<T> hasher;
|
||||
seed ^= hasher(v) + 0x9e3779b9 + (seed<<6) + (seed>>2);
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
explicit ItemCollectionManager(GrammarResourcePool *resource_pool) : pool(resource_pool) {
|
||||
|
||||
}
|
||||
|
||||
void buildItems() {
|
||||
|
||||
const auto startSymbol = pool->getStartSymbol();
|
||||
|
||||
wstring new_symbol_name = startSymbol->name + L"'";
|
||||
|
||||
int new_symbol_index = pool->addSymbol(new_symbol_name, startSymbol->terminator);
|
||||
|
||||
pool->modifySymbol(startSymbol->index, startSymbol->name.substr(1), false, false);
|
||||
|
||||
const auto *p_pdt = pool->addProduction(new_symbol_index, {startSymbol->index});
|
||||
|
||||
this->start_pdt = p_pdt;
|
||||
|
||||
auto *pi_ic = new ItemCollection(pool);
|
||||
|
||||
// -1 代表 $
|
||||
pi_ic->addItem(p_pdt, 0, -1);
|
||||
|
||||
pi_ic->CLOSURE();
|
||||
|
||||
addItemCollection(0, 0, pi_ic);
|
||||
|
||||
bool ifAdd = true;
|
||||
|
||||
while(ifAdd) {
|
||||
|
||||
ifAdd = false;
|
||||
const auto &r_ics = getItemCollections();
|
||||
vector<const ItemCollection *> temp_ics(r_ics.begin(), r_ics.end());
|
||||
for(const auto ic : temp_ics) {
|
||||
for(const auto symbol : pool->getAllSymbols()) {
|
||||
if(symbol->index <= 0) {
|
||||
continue;
|
||||
}
|
||||
if(GOTO(ic, symbol->index)) {
|
||||
ifAdd = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
const Production *getStartProduction() const {
|
||||
return start_pdt;
|
||||
}
|
||||
|
||||
const vector<const ItemCollection *> &getItemCollections() const{
|
||||
return ics;
|
||||
}
|
||||
|
||||
ItemCollection *getItemCollectionByHash(size_t hash) {
|
||||
ItemCollection *p_ic = nullptr;
|
||||
auto it = ic_content_map.find(hash);
|
||||
if(it != ic_content_map.end()) {
|
||||
p_ic = it->second;
|
||||
}
|
||||
return p_ic;
|
||||
}
|
||||
|
||||
bool addItemCollection(int idx, int symbol, ItemCollection *p_ic){
|
||||
|
||||
size_t ic_hash = p_ic->getHash();
|
||||
auto it = ic_content_map.find(ic_hash);
|
||||
if (it != ic_content_map.end()) {
|
||||
p_ic = it->second;
|
||||
} else {
|
||||
p_ic->index = this->index++;
|
||||
ic_content_map.insert(pair<size_t, ItemCollection *>(ic_hash, p_ic));
|
||||
ics.push_back(p_ic);
|
||||
}
|
||||
|
||||
auto hasher = hash<int>();
|
||||
size_t seed = hasher(idx);
|
||||
hash_combine(seed, symbol);
|
||||
|
||||
auto it2 = ic_map.find(seed);
|
||||
if(it2 != ic_map.end()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if(symbol != 0)
|
||||
wcout << L"GOTO(" << idx << L", " << pool->getSymbol(symbol)->name << L")" << endl;
|
||||
|
||||
ic_map.insert(pair<size_t, ItemCollection *>(seed, p_ic));
|
||||
p_ic->print();
|
||||
return true;
|
||||
|
||||
}
|
||||
|
||||
const ItemCollection* getGOTO(int idx, int symbol) const {
|
||||
|
||||
auto hasher = hash<int>();
|
||||
size_t seed = hasher(idx);
|
||||
hash_combine(seed, symbol);
|
||||
|
||||
auto it = ic_map.find(seed);
|
||||
if(it != ic_map.end()) {
|
||||
return it->second;
|
||||
} else {
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
bool GOTO(const ItemCollection *p_ic, int symbol) {
|
||||
auto *pt_ic = new ItemCollection(pool);
|
||||
|
||||
for(const auto &item : p_ic->cache) {
|
||||
if(item->get_dot_next_symbol() == symbol) {
|
||||
pt_ic->addItem(item->get_production(), item->get_dot_index() + 1, item->get_terminator());
|
||||
}
|
||||
}
|
||||
auto p_temp_ic = this->getItemCollectionByHash(pt_ic->getHash());
|
||||
if(p_temp_ic == nullptr)
|
||||
pt_ic->CLOSURE();
|
||||
else pt_ic = p_temp_ic;
|
||||
|
||||
if(!pt_ic->items.empty()) {
|
||||
return this->addItemCollection(p_ic->index, symbol, pt_ic);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
class AnalyseTableGenerator {
|
||||
|
||||
using Action = enum {
|
||||
MOVE, STATUTE, ACC, STEP_GOTO
|
||||
};
|
||||
|
||||
struct Step {
|
||||
|
||||
const Action action;
|
||||
const union Target{
|
||||
int index;
|
||||
const Production *production;
|
||||
} target;
|
||||
|
||||
Step(Action action, int index) : action(action), target(Target{index}){}
|
||||
Step(Action action, const Production *p_pdt) : action(action), target(Target{.production = p_pdt}){}
|
||||
};
|
||||
|
||||
map<size_t, Step *> ACTION;
|
||||
|
||||
map<size_t, Step *> GOTO;
|
||||
|
||||
const ItemCollectionManager *icm;
|
||||
|
||||
const GrammarResourcePool *pool;
|
||||
|
||||
template <class T>
|
||||
inline void hash_combine(std::size_t& seed, const T& v) const
|
||||
{
|
||||
std::hash<T> hasher;
|
||||
seed ^= hasher(v) + 0x9e3779b9 + (seed<<6) + (seed>>2);
|
||||
}
|
||||
|
||||
void add_action(int index, int terminator_symbol, Action action, const Production *target_pdt) {
|
||||
size_t seed = 0;
|
||||
hash_combine(seed, index);
|
||||
hash_combine(seed, terminator_symbol);
|
||||
|
||||
auto it = ACTION.find(seed);
|
||||
if(it == ACTION.end()) {
|
||||
auto step = new Step(action, target_pdt);
|
||||
ACTION.insert(pair<size_t, Step *>(seed, step));
|
||||
} else {
|
||||
if(it->second->action != action || it->second->target.production != target_pdt)
|
||||
throw runtime_error("Conflict Occurred, Syntax NOT LR(1)");
|
||||
}
|
||||
}
|
||||
|
||||
void add_action(int index, int terminator_symbol, Action action, int target_index) {
|
||||
size_t seed = 0;
|
||||
hash_combine(seed, index);
|
||||
hash_combine(seed, terminator_symbol);
|
||||
|
||||
auto it = ACTION.find(seed);
|
||||
if(it == ACTION.end()) {
|
||||
auto step = new Step(action, target_index);
|
||||
ACTION.insert(pair<size_t, Step *>(seed, step));
|
||||
} else {
|
||||
if(it->second->action != action || it->second->target.index != target_index)
|
||||
throw runtime_error("Conflict Occurred, Syntax NOT LR(1)");
|
||||
}
|
||||
}
|
||||
|
||||
void add_goto(int index, int non_terminator_symbol, int target_index) {
|
||||
size_t seed = 0;
|
||||
hash_combine(seed, index);
|
||||
hash_combine(seed, non_terminator_symbol);
|
||||
|
||||
auto it = GOTO.find(seed);
|
||||
if(it == GOTO.end()) {
|
||||
auto step = new Step(STEP_GOTO, target_index);
|
||||
GOTO.insert(pair<size_t, Step *>(seed, step));
|
||||
} else {
|
||||
if(it->second->target.index != target_index)
|
||||
throw runtime_error("Conflict Occurred, Syntax NOT LR(1)");
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
explicit AnalyseTableGenerator(const GrammarResourcePool *p_pool, const ItemCollectionManager *p_icm)
|
||||
:pool(p_pool) , icm(p_icm) {
|
||||
|
||||
}
|
||||
|
||||
void generate() {
|
||||
const auto &ics = icm->getItemCollections();
|
||||
for(const auto *ic : ics) {
|
||||
for(const auto *item : ic->getItems()) {
|
||||
if(item->get_production() == icm->getStartProduction()
|
||||
&& item->get_dot_next_symbol() == 0
|
||||
&& item->get_terminator() == -1) {
|
||||
this->add_action(ic->getIndex(), -1, ACC, 0);
|
||||
}
|
||||
int next_symbol = item->get_dot_next_symbol();
|
||||
if(next_symbol != 0) {
|
||||
const auto *p_ic = icm->getGOTO(ic->getIndex(), next_symbol);
|
||||
if(pool->getSymbol(next_symbol)->terminator) {
|
||||
if (p_ic != nullptr) {
|
||||
this->add_action(ic->getIndex(), next_symbol, MOVE, p_ic->getIndex());
|
||||
}
|
||||
} else {
|
||||
if (p_ic != nullptr) {
|
||||
this->add_goto(ic->getIndex(), next_symbol, p_ic->getIndex());
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if(item->get_production()->left != pool->getStartSymbol()->index) {
|
||||
this->add_action(ic->getIndex(), next_symbol, STATUTE, item->get_production());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const Step *findActionStep(int index, int terminator_symbol) {
|
||||
size_t seed = 0;
|
||||
hash_combine(seed, index);
|
||||
hash_combine(seed, terminator_symbol);
|
||||
|
||||
auto it = ACTION.find(seed);
|
||||
if(it != ACTION.end()) {
|
||||
return it->second;
|
||||
} else {
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
const Step *findGotoStep(int index, int non_terminator_symbol) {
|
||||
size_t seed = 0;
|
||||
hash_combine(seed, index);
|
||||
hash_combine(seed, non_terminator_symbol);
|
||||
|
||||
auto it = GOTO.find(seed);
|
||||
if (it != GOTO.end()) {
|
||||
return it->second;
|
||||
} else {
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
void print() {
|
||||
wcout << L"ACTION" << endl;
|
||||
vector<int> symbols;
|
||||
|
||||
wcout << std::left << std::setw(4) << " ";
|
||||
for(const auto *symbol : pool->getAllSymbols()) {
|
||||
if(symbol->index == 0) continue;
|
||||
if(symbol->terminator) {
|
||||
wcout << std::left << std::setw(4) << symbol->name;
|
||||
symbols.push_back(symbol->index);
|
||||
}
|
||||
}
|
||||
wcout << endl;
|
||||
|
||||
for(int i = 0; i < icm->getItemCollections().size(); i++){
|
||||
wcout << std::left << std::setw(4) << i;
|
||||
for(int symbol : symbols) {
|
||||
auto p_step = this->findActionStep(i, symbol);
|
||||
if(p_step == nullptr) {
|
||||
wcout << std::left << std::setw(4) << " ";
|
||||
} else {
|
||||
if(p_step->action == MOVE)
|
||||
wcout << std::left << std::setw(4) << wstring(L"s") + to_wstring(p_step->target.index);
|
||||
else if(p_step->action == ACC)
|
||||
wcout << std::left << std::setw(4) << L"acc";
|
||||
else
|
||||
wcout << std::left << std::setw(4) << L"r";
|
||||
}
|
||||
}
|
||||
wcout << endl;
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
class Generator{
|
||||
|
||||
// 文件输入
|
||||
wifstream input;
|
||||
|
||||
GrammarResourcePool *pool;
|
||||
|
||||
map<string, ItemCollection *> C;
|
||||
|
||||
ItemCollectionManager *icm;
|
||||
|
||||
AnalyseTableGenerator *atg;
|
||||
|
||||
public:
|
||||
|
||||
Generator(): input("syntaxInput.txt", std::ios::binary),
|
||||
pool(new GrammarResourcePool()),
|
||||
icm(new ItemCollectionManager(pool)),
|
||||
atg(new AnalyseTableGenerator(pool, icm)){
|
||||
|
||||
auto* codeCvtToUTF8= new std::codecvt_utf8<wchar_t>;
|
||||
|
||||
input.imbue(std::locale(input.getloc(), codeCvtToUTF8));
|
||||
}
|
||||
|
||||
void test() {
|
||||
auto p_non_term_set = FIRST(1);
|
||||
print_symbols(*p_non_term_set);
|
||||
|
||||
p_non_term_set = FIRST(3);
|
||||
print_symbols(*p_non_term_set);
|
||||
|
||||
p_non_term_set = FIRST(6);
|
||||
print_symbols(*p_non_term_set);
|
||||
|
||||
FOLLOW();
|
||||
|
||||
print_symbols(*FOLLOW(2));
|
||||
print_symbols(*FOLLOW(6));
|
||||
print_symbols(*FOLLOW(3));
|
||||
void run() {
|
||||
pool->FOLLOW();
|
||||
icm->buildItems();
|
||||
atg->generate();
|
||||
atg->print();
|
||||
}
|
||||
|
||||
// 得到所有的产生式
|
||||
void getProductions() {
|
||||
|
||||
//读入文法文件
|
||||
// 读入文法文件
|
||||
wstring temp_line;
|
||||
|
||||
while (getline(input, temp_line)) {
|
||||
auto middle_index = temp_line.find(L"->", 0);
|
||||
|
||||
|
||||
if(middle_index == string::npos) {
|
||||
throw runtime_error("-> NOT FOUND");
|
||||
}
|
||||
|
||||
wstring front = trim(temp_line.substr(0, middle_index));
|
||||
int left = symbolTable.addSymbol(front, false);
|
||||
|
||||
wstring back = trim(temp_line.substr(middle_index + 2, temp_line.size() - middle_index - 2));
|
||||
|
||||
wstringstream terminator, non_terminator;
|
||||
vector<int> symbols;
|
||||
bool is_terminator = false;
|
||||
for(const auto &c : back) {
|
||||
if (c == L'\"') {
|
||||
if(is_terminator) {
|
||||
symbols.push_back(symbolTable.addSymbol(trim(terminator.str()), true));
|
||||
terminator.str(L"");
|
||||
terminator.clear();
|
||||
}
|
||||
is_terminator = !is_terminator;
|
||||
continue;
|
||||
}
|
||||
if(c == L' ' || c == L'\r') {
|
||||
wstring temp_symbol = trim(non_terminator.str());
|
||||
if(!temp_symbol.empty()) {
|
||||
symbols.push_back(symbolTable.addSymbol(trim(non_terminator.str()), false));
|
||||
non_terminator.str(L"");
|
||||
non_terminator.clear();
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if(is_terminator) {
|
||||
terminator << c;
|
||||
} else {
|
||||
non_terminator << c;
|
||||
}
|
||||
}
|
||||
|
||||
auto p_pdt = new Production(left, symbols);
|
||||
|
||||
productions.push_back(p_pdt);
|
||||
|
||||
pool->parse_production_string_line(temp_line);
|
||||
}
|
||||
}
|
||||
|
||||
@ -566,7 +1033,7 @@ int main() {
|
||||
|
||||
generator.getProductions();
|
||||
|
||||
generator.test();
|
||||
generator.run();
|
||||
//输出时间
|
||||
end = clock(); //结束时间
|
||||
cout << endl;
|
||||
|
Loading…
Reference in New Issue
Block a user