词法分析整合;代码调整与完善;

This commit is contained in:
Saturneric 2021-05-01 01:30:50 +08:00
parent 1714e6f560
commit afd82ebd74
8 changed files with 634 additions and 28 deletions

View File

@ -9,6 +9,4 @@ include_directories(./include)
aux_source_directory(src SRC_FILES)
add_executable(LR1Generator LR1Generator.cpp ${SRC_FILES})
add_executable(syntaxParser LR1Generator.cpp ${SRC_FILES})
add_executable(LR1Compiler Compiler.cpp ${SRC_FILES})

View File

@ -1,11 +1,15 @@
#include <iostream>
#include <ctime>
#include <Automata.h>
#include <SymbolTable.h>
#include <GrammarResourcePool.h>
#include <AnalyseTableGenerator.h>
#include <LR1Generator.h>
#include <SyntaxParser.h>
using std::vector;
using std::wstring;
@ -13,14 +17,33 @@ using std::wcout;
using std::endl;
#include <LR1Generator.h>
#include <SyntaxParser.h>
int main(int argc, const char* argv[]) {
int main() {
try {
clock_t start, end;//定义clock_t变量
start = clock(); //开始时间
printf("Compile Program Based on LR(1) Written By Saturneric\n");
if (argc < 2) {
printf("Usage: <Input Path>\n");
return -1;
}
clock_t start, end;
start = clock();
Automata atm(argv[1]);
atm.parse();
atm.output();
// Êä³öʱ¼ä
end = clock();
double times = double(end - start) / CLOCKS_PER_SEC;
wcout << "Token Automata Run time = " << times << "s MicroSeconds" << " = " << times * 1000 << "ms" << endl;
// LR1Éú³É
start = clock();
const GrammarResourcePool *pool;
@ -37,9 +60,11 @@ int main() {
//输出时间
end = clock(); //结束时间
double times = double(end - start) / CLOCKS_PER_SEC;
wcout << "LR1Generator Run time = " << times << "s MicroSeconds" << " = " << times * 1000 << "ms" << endl;
times = double(end - start) / CLOCKS_PER_SEC;
wcout << "LR(1) Generator Run time = " << times << "s MicroSeconds" << " = " << times * 1000 << "ms" << endl;
// Óï·¨·ÖÎö
start = clock(); //开始时间
SyntaxParser syntaxParser(pool, atg);
@ -51,7 +76,8 @@ int main() {
//输出时间
end = clock(); //结束时间
times = double(end - start) / CLOCKS_PER_SEC;
wcout << "SyntaxParser Run time = " << times << "s MicroSeconds " << " = " << times * 1000 << "ms" << endl;
wcout << "Syntax Parser Run time = " << times << "s MicroSeconds " << " = " << times * 1000 << "ms" << endl;
} catch(std::runtime_error &e) {
std::wcout << "Runtime Error: " << e.what() << endl;
}

View File

@ -1,7 +0,0 @@
//
// Created by Administrator on 2021/4/30.
//
int main() {
}

View File

@ -0,0 +1,89 @@
@struct_type -> "STRUCT" "ID" "OPENING_BRACE" member_list "CLOSING_BRACE" more_struct_type
more_struct_type -> "EOF"
more_struct_type -> ε
member_list -> type_spec declarators "SEMICOLON" member_list
member_list -> ε
type_spec -> base_type_spec
type_spec -> @struct_type
base_type_spec -> floating_pt_type
base_type_spec -> integer_type
base_type_spec -> "CHAR"
base_type_spec -> "BOOLEAN"
floating_pt_type -> "FLOAT"
floating_pt_type -> "DOUBLE"
floating_pt_type -> "LONG" "DOUBLE"
integer_type -> signed_int
integer_type -> unsigned_int
signed_int -> "SHORT"
signed_int -> "INT16"
signed_int -> "LONG"
signed_int -> "INT32"
signed_int -> "LONG" "LONG"
signed_int -> "INT64"
signed_int -> "INT8"
unsigned_int -> "UNSIGNED" "SHORT"
unsigned_int -> "UNSIGNED" "LONG"
unsigned_int -> "UNSIGNED" "LONG" "LONG"
unsigned_int -> "UINT16"
unsigned_int -> "UINT32"
unsigned_int -> "UINT64"
unsigned_int -> "UINT8"
declarators -> declarator more_declarators
more_declarators -> "COMMA" declarator more_declarators
more_declarators -> ε
declarator -> "ID" more_declarator
more_declarator -> exp_list
more_declarator -> ε
exp_list -> "LEFT_BRACKET" or_expr more_or_expr "RIGHT_BRACKET"
more_or_expr -> "COMMA" or_expr more_or_expr
more_or_expr -> ε
or_expr -> xor_expr more_xor_expr
more_xor_expr -> "DELIMITER" xor_expr more_xor_expr
more_xor_expr -> ε
xor_expr -> and_expr more_and_expr
more_and_expr -> "INSERT" and_expr more_and_expr
more_and_expr -> ε
and_expr -> shift_expr more_shift_expr
more_shift_expr -> "AND" shift_expr more_shift_expr
more_shift_expr -> ε
shift_expr -> add_expr more_add_expr
more_add_expr -> shift_sign add_expr more_add_expr
shift_sign -> "RIGHT_SHIFT"
shift_sign -> "LEFT_SHIFT"
more_add_expr -> ε
add_expr -> multi_expr more_multi_expr
more_multi_expr -> multi_sign multi_expr more_multi_expr
multi_sign -> "PLUS"
multi_sign -> "SUB"
more_multi_expr -> ε
multi_expr -> unary_expr more_unary_expr
more_unary_expr -> unary_sign unary_expr more_unary_expr
unary_sign -> "MULT"
unary_sign -> "SLASH"
unary_sign -> "PERCENT"
more_unary_expr -> ε
unary_expr -> unary_sign_2 unary_declare
unary_sign_2 -> "SUB"
unary_sign_2 -> "PLUS"
unary_sign_2 -> "TILDE"
unary_sign_2 -> ε
unary_declare -> "INTEGER"
unary_declare -> "STRING"
unary_declare -> BOOLEAN_VALUE
BOOLEAN_VALUE -> "TRUE"
BOOLEAN_VALUE -> "FALSE"

118
include/Automata.h Normal file
View File

@ -0,0 +1,118 @@
//
// Created by Administrator on 2021/5/1.
//
#ifndef SYNTAXPARSER_AUTOMATA_H
#define SYNTAXPARSER_AUTOMATA_H
#include <map>
#include <utility>
#include <vector>
#include <string>
#include <fstream>
#include <codecvt>
#include <sstream>
class Automata {
public:
std::wifstream input;
explicit Automata(const std::string& path) : input(path, std::ios::binary) {
auto* codeCvtToUTF8= new std::codecvt_utf8<wchar_t>;
input.imbue(std::locale(input.getloc(), codeCvtToUTF8));
}
~Automata() {
input.close();
}
void parse() {
while (!ifeof) {
tokens.push_back(nextToken());
}
}
void output();
private:
using TokenType = enum {
/* Reserve Words */
STRUCT, BOOLEAN, SHORT, LONG,
DOUBLE, FLOAT,
INT8, INT16, INT32, INT64,
UINT8, UINT16, UINT32, UINT64,
CHAR,
UNSIGNED,
/* Special Symbols */
OPENING_BRACE, CLOSING_BRACE,
SEMICOLON,
LEFT_BRACKET, RIGHT_BRACKET,
MULT, PLUS, SUB, TILDE, SLASH,
PERCENT, LEFT_SHIFT, RIGHT_SHIFT, AND, INSERT,
DELIMITER, COMMA,
/* Multicharacter Tokens */
ID, LETTER, DIGIT, UNDERLINE, T_TRUE, T_FALSE,
INTEGER, INTEGER_TYPE_SUFFIX, STRING, T_BOOLEAN,
/* None & Error & EOF */
NONE, ERROR, T_EOF
};
using StateType = enum {
START,
S_LETTER, S_UNDERLINE, S_DIGIT,
INT_0, INT_NOT_0, INT_TYPE_SUFFIX,
STRING_START, STRING_END,
S_SIGN,
DONE,
S_NONE
};
using ReservedWord = struct {
std::wstring str;
TokenType token;
};
struct TokenInfo {
const int line;
const TokenType token;
const std::wstring str;
const StateType state;
TokenInfo(const int line, const TokenType token, std::wstring str, StateType state)
: line(line), token(token), str(std::move(str)), state(state) {}
};
FILE* fp = nullptr;
bool ifeof = false;
std::vector<TokenInfo> tokens;
int line = 1;
const static std::vector<ReservedWord> reservedWords;
const static std::map<TokenType, std::wstring> tokenTypeStrMap;
const static std::map<StateType, std::wstring> stateTypeStrMap;
wchar_t Automata::nextChar();
void pushBackChar();
TokenInfo nextToken();
static TokenType reservedLookup(const std::wstring& s);
};
#endif //SYNTAXPARSER_AUTOMATA_H

386
src/Automata.cpp Normal file
View File

@ -0,0 +1,386 @@
//
// Created by Administrator on 2021/5/1.
//
#include "Automata.h"
const std::vector<Automata::ReservedWord> Automata::reservedWords = {
{L"struct", STRUCT},
{L"boolean", BOOLEAN},
{L"short", SHORT},
{L"long", LONG},
{L"double", DOUBLE},
{L"float", FLOAT},
{L"int8", INT8},
{L"int16", INT16},
{L"int32", INT32},
{L"int64", INT64},
{L"uint8", UINT8},
{L"uint16", UINT16},
{L"uint32", UINT32},
{L"uint64", UINT64},
{L"char", CHAR},
{L"unsigned", UNSIGNED},
{L"TRUE", T_TRUE},
{L"FALSE", T_FALSE}
};
const std::map<Automata::StateType, std::wstring> Automata::stateTypeStrMap = {
{START, L"START"},
{S_LETTER, L"LETTER"},
{S_UNDERLINE, L"UNDERLINE"},
{S_DIGIT, L"DIGIT"},
{INT_0, L"INT_0"},
{INT_NOT_0, L"INT_NOT_0"},
{INT_TYPE_SUFFIX, L"INT_TYPE_SUFFIX"},
{STRING_START, L"STRING_START"},
{STRING_END, L"STRING_END"},
{DONE, L"DONE"},
{S_SIGN, L"SIGN"}
};
const std::map<Automata::TokenType, std::wstring> Automata::tokenTypeStrMap = {
{STRUCT, L"STRUCT"},
{BOOLEAN, L"BOOLEAN"},
{SHORT, L"SHORT"},
{LONG, L"LONG"},
{FLOAT, L"FLOAT"},
{DOUBLE, L"DOUBLE"},
{INT8, L"INT8"},
{INT16, L"INT16"},
{INT32, L"INT32"},
{INT64, L"INT64"},
{UINT8, L"UINT8"},
{UINT16, L"UINT16"},
{UINT32, L"UINT32"},
{UINT64, L"UINT64"},
{CHAR, L"CHAR"},
{UNSIGNED, L"UNSIGNED"},
{OPENING_BRACE, L"OPENING_BRACE"},
{CLOSING_BRACE, L"CLOSING_BRACE"},
{SEMICOLON, L"SEMICOLON"},
{LEFT_BRACKET, L"LEFT_BRACKET"},
{RIGHT_BRACKET, L"RIGHT_BRACKET"},
{MULT, L"MULT"},
{PLUS, L"PLUS"},
{SUB, L"SUB"},
{TILDE, L"TILDE"},
{SLASH, L"SLASH"},
{PERCENT, L"PERCENT"},
{LEFT_SHIFT, L"LEFT_SHIFT"},
{RIGHT_SHIFT, L"RIGHT_SHIFT"},
{AND, L"AND"},
{INSERT, L"INSERT"},
{DELIMITER, L"DELIMITER"},
{COMMA, L"COMMA"},
{ID, L"ID"},
{LETTER, L"LETTER"},
{DIGIT, L"DIGIT"},
{UNDERLINE, L"UNDERLINE"},
{T_TRUE, L"TRUE"},
{T_FALSE, L"FALSE"},
{INTEGER, L"INTEGER"},
{INTEGER_TYPE_SUFFIX, L"INTEGER_TYPE_SUFFIX"},
{T_BOOLEAN, L"BOOLEAN"},
{STRING, L"STRING"},
{COMMA, L"COMMA"},
{NONE, L"NONE"},
{ERROR, L"ERROR"},
{T_EOF, L"EOF"}
};
void Automata::output() {
std::wofstream stream(L"./tokenOut.txt", std::ios::binary | std::ios::trunc);
int temp_line = 1;
stream << "1 ";
for (const auto& token : tokens) {
if (token.line > temp_line) {
temp_line = token.line;
stream << std::endl << temp_line << ' ';
}
if (token.token == ERROR) {
stream << tokenTypeStrMap.find(token.token)->second << '{' << token.str << ", " << stateTypeStrMap.find(token.state)->second << '}' << ' ';
}
else {
stream << tokenTypeStrMap.find(token.token)->second << '(' << token.str << ')' << ' ';
}
}
stream.close();
}
wchar_t Automata::nextChar() {
wchar_t c;
input >> std::noskipws >> c;
if (input.eof()) {
this->ifeof = true;
}
return c;
}
Automata::TokenInfo Automata::nextToken() {
TokenType currentToken = NONE;
StateType state = START, last_state = S_NONE;
std::wstringstream ss;
while (state != DONE) {
last_state = state;
wchar_t CH = nextChar();
bool save = true;
if (ifeof) {
currentToken = T_EOF;
break;
}
switch (state) {
case START:
if (isdigit(CH)) {
if (CH == '0') {
state = INT_0;
}
else {
state = INT_NOT_0;
}
}
else if (isalpha(CH)) {
state = S_LETTER;
}
else if (CH == '\"') {
state = STRING_START;
}
else if ((CH == ' ') || (CH == '\t') || (CH == '\r')) {
save = false;
}
else if (CH == '\n') {
this->line++;
save = false;
}
else {
state = S_SIGN;
switch (CH) {
case '{':
currentToken = OPENING_BRACE;
break;
case '}':
currentToken = CLOSING_BRACE;
break;
case ';':
currentToken = SEMICOLON;
break;
case '[':
currentToken = LEFT_BRACKET;
break;
case ']':
currentToken = RIGHT_BRACKET;
break;
case '*':
currentToken = MULT;
break;
case '+':
currentToken = PLUS;
break;
case '-':
currentToken = SUB;
break;
case '~':
currentToken = TILDE;
break;
case '/':
currentToken = SLASH;
break;
case '%':
currentToken = PERCENT;
break;
case '>':
CH = nextChar();
if (CH == '>') {
currentToken = RIGHT_SHIFT;
ss << CH;
}
else {
pushBackChar();
currentToken = ERROR;
}
break;
case '<':
CH = nextChar();
if (CH == '<') {
currentToken = LEFT_SHIFT;
ss << CH;
}
else {
pushBackChar();
currentToken = ERROR;
}
break;
case '&':
currentToken = AND;
break;
case '^':
currentToken = INSERT;
break;
case '|':
currentToken = DELIMITER;
break;
case ',':
currentToken = COMMA;
break;
default:
currentToken = ERROR;
break;
}
}
break;
case S_LETTER:
if (CH == '_') {
state = S_UNDERLINE;
}
else if (isdigit(CH)) {
state = S_DIGIT;
}
else if (isalpha(CH)) {
state = S_LETTER;
}
else {
currentToken = ID;
pushBackChar();
state = DONE;
save = false;
}
break;
case S_DIGIT:
if (isalpha(CH)) {
state = S_LETTER;
} else if (isdigit(CH)) {
state = S_DIGIT;
}
else {
currentToken = ID;
pushBackChar();
state = DONE;
save = false;
}
break;
case S_UNDERLINE:
if (isdigit(CH)) {
state = S_DIGIT;
} else if (isalpha(CH)) {
state = S_LETTER;
}
else {
pushBackChar();
currentToken = ERROR;
save = false;
}
break;
case INT_0:
if (CH == 'l' || CH == 'L') {
state = INT_TYPE_SUFFIX;
}
else {
currentToken = INTEGER;
pushBackChar();
state = DONE;
save = false;
}
break;
case INT_NOT_0:
if (CH == 'l' || CH == 'L') {
state = INT_TYPE_SUFFIX;
}
else if (isdigit(CH)) {
state = INT_NOT_0;
}
else {
currentToken = INTEGER;
pushBackChar();
state = DONE;
save = false;
}
break;
case INT_TYPE_SUFFIX:
state = DONE;
currentToken = INTEGER;
pushBackChar();
save = false;
break;
case STRING_START:
if (CH == '\\') {
wchar_t buff_c = CH;
CH = nextChar();
if ((CH == 'b') || (CH == 't') || (CH == 'n') || (CH == 'f') || (CH == 'r')
|| (CH == '"') || (CH == '\\')) {
ss << buff_c;
state = STRING_START;
}
else {
pushBackChar();
currentToken = ERROR;
}
}
else if (CH == '\"') {
state = STRING_END;
}
else if (CH == ' ') {
ss << '\\' << '4';
CH = '0';
state = STRING_START;
}
else {
state = STRING_START;
}
break;
case STRING_END:
state = DONE;
currentToken = STRING;
pushBackChar();
save = false;
break;
case S_SIGN:
state = DONE;
pushBackChar();
save = false;
break;
}
if (save) {
ss << CH;
}
if (state == DONE) {
const std::wstring token = ss.str();
if (currentToken == ID) {
currentToken = reservedLookup(token);
}
}
}
return TokenInfo(this->line, currentToken, ss.str(), last_state);
}
Automata::TokenType Automata::reservedLookup(const std::wstring &s) {
for (const auto& word : reservedWords)
if (word.str == s)
return word.token;
return ID;
}
void Automata::pushBackChar() {
input.seekg(-1L, std::ios::cur);
}

View File

@ -110,11 +110,7 @@ void SyntaxParser::getToken() {
for(int i = 1; i < tokens.size(); i++) {
if(tokens[i] == L"\r") continue;;
auto token_info = get_token_info(tokens[i]);
int symbol_index;
symbol_index = pool->getSymbolIndex(token_info.first);
tokens_queue.push(symbol_index);
tokens_queue.push(pool->getSymbolIndex(token_info.first));
_line_index++;
}