词法分析整合;代码调整与完善;
This commit is contained in:
parent
1714e6f560
commit
afd82ebd74
@ -9,6 +9,4 @@ include_directories(./include)
|
||||
|
||||
aux_source_directory(src SRC_FILES)
|
||||
|
||||
add_executable(LR1Generator LR1Generator.cpp ${SRC_FILES})
|
||||
|
||||
add_executable(syntaxParser LR1Generator.cpp ${SRC_FILES})
|
||||
add_executable(LR1Compiler Compiler.cpp ${SRC_FILES})
|
@ -1,11 +1,15 @@
|
||||
#include <iostream>
|
||||
#include <ctime>
|
||||
|
||||
#include <Automata.h>
|
||||
|
||||
#include <SymbolTable.h>
|
||||
#include <GrammarResourcePool.h>
|
||||
|
||||
#include <AnalyseTableGenerator.h>
|
||||
|
||||
#include <LR1Generator.h>
|
||||
#include <SyntaxParser.h>
|
||||
|
||||
using std::vector;
|
||||
using std::wstring;
|
||||
|
||||
@ -13,14 +17,33 @@ using std::wcout;
|
||||
using std::endl;
|
||||
|
||||
|
||||
#include <LR1Generator.h>
|
||||
#include <SyntaxParser.h>
|
||||
int main(int argc, const char* argv[]) {
|
||||
|
||||
|
||||
int main() {
|
||||
try {
|
||||
clock_t start, end;//定义clock_t变量
|
||||
start = clock(); //开始时间
|
||||
|
||||
printf("Compile Program Based on LR(1) Written By Saturneric\n");
|
||||
|
||||
if (argc < 2) {
|
||||
printf("Usage: <Input Path>\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
clock_t start, end;
|
||||
start = clock();
|
||||
|
||||
Automata atm(argv[1]);
|
||||
|
||||
atm.parse();
|
||||
|
||||
atm.output();
|
||||
|
||||
// Êä³öʱ¼ä
|
||||
end = clock();
|
||||
double times = double(end - start) / CLOCKS_PER_SEC;
|
||||
wcout << "Token Automata Run time = " << times << "s MicroSeconds" << " = " << times * 1000 << "ms" << endl;
|
||||
|
||||
// LR1Éú³É
|
||||
start = clock();
|
||||
|
||||
const GrammarResourcePool *pool;
|
||||
|
||||
@ -37,9 +60,11 @@ int main() {
|
||||
|
||||
//输出时间
|
||||
end = clock(); //结束时间
|
||||
double times = double(end - start) / CLOCKS_PER_SEC;
|
||||
wcout << "LR1Generator Run time = " << times << "s MicroSeconds" << " = " << times * 1000 << "ms" << endl;
|
||||
times = double(end - start) / CLOCKS_PER_SEC;
|
||||
wcout << "LR(1) Generator Run time = " << times << "s MicroSeconds" << " = " << times * 1000 << "ms" << endl;
|
||||
|
||||
|
||||
// Óï·¨·ÖÎö
|
||||
start = clock(); //开始时间
|
||||
|
||||
SyntaxParser syntaxParser(pool, atg);
|
||||
@ -51,7 +76,8 @@ int main() {
|
||||
//输出时间
|
||||
end = clock(); //结束时间
|
||||
times = double(end - start) / CLOCKS_PER_SEC;
|
||||
wcout << "SyntaxParser Run time = " << times << "s MicroSeconds " << " = " << times * 1000 << "ms" << endl;
|
||||
wcout << "Syntax Parser Run time = " << times << "s MicroSeconds " << " = " << times * 1000 << "ms" << endl;
|
||||
|
||||
} catch(std::runtime_error &e) {
|
||||
std::wcout << "Runtime Error: " << e.what() << endl;
|
||||
}
|
@ -1,7 +0,0 @@
|
||||
//
|
||||
// Created by Administrator on 2021/4/30.
|
||||
//
|
||||
|
||||
int main() {
|
||||
|
||||
}
|
@ -2,7 +2,7 @@
|
||||
2 FLOAT(float) ID(a1) SEMICOLON(;)
|
||||
3 DOUBLE(double) ID(a2) COMMA(,) ID(a3) COMMA(,) ID(a4) SEMICOLON(;)
|
||||
4 LONG(long) DOUBLE(double) ID(a5) SEMICOLON(;)
|
||||
5 STRUCT(struct) ID(warp_int) OPENING_BRACE({)
|
||||
5 STRUCT(struct) ID(warp_int) OPENING_BRACE({)
|
||||
6 INT8(int8) ID(i1) SEMICOLON(;)
|
||||
7 INT16(int16) ID(i2) SEMICOLON(;)
|
||||
8 INT32(int32) ID(i3) COMMA(,) ID(i4) SEMICOLON(;)
|
||||
@ -21,13 +21,13 @@
|
||||
21 UNSIGNED(unsigned) LONG(long) LONG(long) ID(s9) SEMICOLON(;)
|
||||
22 CLOSING_BRACE(}) ID(sign1) COMMA(,) ID(SIGN2) SEMICOLON(;)
|
||||
23 CHAR(char) ID(a_6) LEFT_BRACKET([) STRING("compile") PLUS(+) STRING("studying") COMMA(,) STRING("\40") COMMA(,) STRING("abs\b\t\n\f\r\"\\abs") RIGHT_BRACKET(]) SEMICOLON(;)
|
||||
24 BOOLEAN(boolean) ID(a_bool_7) LEFT_BRACKET([) INTEGER(10) INSERT(^) INTEGER(2) COMMA(,) INTEGER(1) AND(&) INTEGER(2) AND(&) INTEGER(3) COMMA(,) TRUE(TRUE) DELIMITER(|) FALSE(FALSE) COMMA(,) TILDE(~) FALSE(FALSE) DELIMITER(|) TILDE(~) TRUE(TRUE) RIGHT_BRACKET(]) SEMICOLON(;)
|
||||
24 BOOLEAN(boolean) ID(a_bool_7) LEFT_BRACKET([) INTEGER(10) INSERT(^) INTEGER(2) COMMA(,) INTEGER(1) AND(&) INTEGER(2) AND(&) INTEGER(3) COMMA(,) TRUE(TRUE) DELIMITER(|) FALSE(FALSE) COMMA(,) TILDE(~) FALSE(FALSE) DELIMITER(|) TILDE(~) TRUE(TRUE) RIGHT_BRACKET(]) SEMICOLON(;)
|
||||
25 LONG(long) ID(a8) LEFT_BRACKET([) INTEGER(1024) RIGHT_SHIFT(>>) INTEGER(10) COMMA(,) INTEGER(0) LEFT_SHIFT(<<) INTEGER(10) COMMA(,) INTEGER(100) MULT(*) INTEGER(2) SLASH(/) INTEGER(10) PERCENT(%) INTEGER(2) COMMA(,) INTEGER(100) PLUS(+) INTEGER(21) SUB(-) INTEGER(19) RIGHT_BRACKET(]) SEMICOLON(;)
|
||||
26 BOOLEAN(boolean) ID(a9) LEFT_BRACKET([) INTEGER(10) INSERT(^) INTEGER(2) AND(&) INTEGER(3) DELIMITER(|) SUB(-) INTEGER(1) RIGHT_SHIFT(>>) INTEGER(10) AND(&) INTEGER(100) LEFT_SHIFT(<<) SUB(-) INTEGER(10) SUB(-) INTEGER(10) PLUS(+) INTEGER(100) MULT(*) INTEGER(2) SLASH(/) INTEGER(10) PERCENT(%) INTEGER(2) RIGHT_BRACKET(]) SEMICOLON(;)
|
||||
27 STRUCT(struct) ID(warp_1) OPENING_BRACE({)
|
||||
28 FLOAT(float) ID(w1) SEMICOLON(;)
|
||||
29 LONG(long) ID(w2) SEMICOLON(;)
|
||||
30 STRUCT(struct) ID(warp_2) OPENING_BRACE({)
|
||||
30 STRUCT(struct) ID(warp_2) OPENING_BRACE({)
|
||||
31 BOOLEAN(boolean) ID(w3) LEFT_BRACKET([) INTEGER(111) AND(&) INTEGER(2) RIGHT_BRACKET(]) SEMICOLON(;)
|
||||
32 CHAR(char) ID(w4) LEFT_BRACKET([) STRING("\40\b\t\n\f\r\"\\\40") RIGHT_BRACKET(]) SEMICOLON(;)
|
||||
33 CLOSING_BRACE(}) ID(w5) COMMA(,) ID(w6) SEMICOLON(;)
|
||||
|
89
cmake-build-release/syntaxInput.txt
Normal file
89
cmake-build-release/syntaxInput.txt
Normal file
@ -0,0 +1,89 @@
|
||||
@struct_type -> "STRUCT" "ID" "OPENING_BRACE" member_list "CLOSING_BRACE" more_struct_type
|
||||
more_struct_type -> "EOF"
|
||||
more_struct_type -> ε
|
||||
|
||||
member_list -> type_spec declarators "SEMICOLON" member_list
|
||||
member_list -> ε
|
||||
|
||||
type_spec -> base_type_spec
|
||||
type_spec -> @struct_type
|
||||
base_type_spec -> floating_pt_type
|
||||
base_type_spec -> integer_type
|
||||
base_type_spec -> "CHAR"
|
||||
base_type_spec -> "BOOLEAN"
|
||||
floating_pt_type -> "FLOAT"
|
||||
floating_pt_type -> "DOUBLE"
|
||||
floating_pt_type -> "LONG" "DOUBLE"
|
||||
integer_type -> signed_int
|
||||
integer_type -> unsigned_int
|
||||
|
||||
signed_int -> "SHORT"
|
||||
signed_int -> "INT16"
|
||||
signed_int -> "LONG"
|
||||
signed_int -> "INT32"
|
||||
signed_int -> "LONG" "LONG"
|
||||
signed_int -> "INT64"
|
||||
signed_int -> "INT8"
|
||||
|
||||
unsigned_int -> "UNSIGNED" "SHORT"
|
||||
unsigned_int -> "UNSIGNED" "LONG"
|
||||
unsigned_int -> "UNSIGNED" "LONG" "LONG"
|
||||
unsigned_int -> "UINT16"
|
||||
unsigned_int -> "UINT32"
|
||||
unsigned_int -> "UINT64"
|
||||
unsigned_int -> "UINT8"
|
||||
|
||||
declarators -> declarator more_declarators
|
||||
more_declarators -> "COMMA" declarator more_declarators
|
||||
more_declarators -> ε
|
||||
|
||||
declarator -> "ID" more_declarator
|
||||
more_declarator -> exp_list
|
||||
more_declarator -> ε
|
||||
|
||||
exp_list -> "LEFT_BRACKET" or_expr more_or_expr "RIGHT_BRACKET"
|
||||
more_or_expr -> "COMMA" or_expr more_or_expr
|
||||
more_or_expr -> ε
|
||||
|
||||
or_expr -> xor_expr more_xor_expr
|
||||
more_xor_expr -> "DELIMITER" xor_expr more_xor_expr
|
||||
more_xor_expr -> ε
|
||||
|
||||
xor_expr -> and_expr more_and_expr
|
||||
more_and_expr -> "INSERT" and_expr more_and_expr
|
||||
more_and_expr -> ε
|
||||
|
||||
and_expr -> shift_expr more_shift_expr
|
||||
more_shift_expr -> "AND" shift_expr more_shift_expr
|
||||
more_shift_expr -> ε
|
||||
|
||||
shift_expr -> add_expr more_add_expr
|
||||
more_add_expr -> shift_sign add_expr more_add_expr
|
||||
shift_sign -> "RIGHT_SHIFT"
|
||||
shift_sign -> "LEFT_SHIFT"
|
||||
more_add_expr -> ε
|
||||
|
||||
add_expr -> multi_expr more_multi_expr
|
||||
more_multi_expr -> multi_sign multi_expr more_multi_expr
|
||||
multi_sign -> "PLUS"
|
||||
multi_sign -> "SUB"
|
||||
more_multi_expr -> ε
|
||||
|
||||
multi_expr -> unary_expr more_unary_expr
|
||||
more_unary_expr -> unary_sign unary_expr more_unary_expr
|
||||
unary_sign -> "MULT"
|
||||
unary_sign -> "SLASH"
|
||||
unary_sign -> "PERCENT"
|
||||
more_unary_expr -> ε
|
||||
|
||||
unary_expr -> unary_sign_2 unary_declare
|
||||
unary_sign_2 -> "SUB"
|
||||
unary_sign_2 -> "PLUS"
|
||||
unary_sign_2 -> "TILDE"
|
||||
unary_sign_2 -> ε
|
||||
unary_declare -> "INTEGER"
|
||||
unary_declare -> "STRING"
|
||||
unary_declare -> BOOLEAN_VALUE
|
||||
|
||||
BOOLEAN_VALUE -> "TRUE"
|
||||
BOOLEAN_VALUE -> "FALSE"
|
118
include/Automata.h
Normal file
118
include/Automata.h
Normal file
@ -0,0 +1,118 @@
|
||||
//
|
||||
// Created by Administrator on 2021/5/1.
|
||||
//
|
||||
|
||||
#ifndef SYNTAXPARSER_AUTOMATA_H
|
||||
#define SYNTAXPARSER_AUTOMATA_H
|
||||
|
||||
#include <map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <fstream>
|
||||
#include <codecvt>
|
||||
#include <sstream>
|
||||
|
||||
class Automata {
|
||||
public:
|
||||
|
||||
std::wifstream input;
|
||||
|
||||
explicit Automata(const std::string& path) : input(path, std::ios::binary) {
|
||||
auto* codeCvtToUTF8= new std::codecvt_utf8<wchar_t>;
|
||||
input.imbue(std::locale(input.getloc(), codeCvtToUTF8));
|
||||
}
|
||||
|
||||
~Automata() {
|
||||
input.close();
|
||||
}
|
||||
|
||||
void parse() {
|
||||
while (!ifeof) {
|
||||
tokens.push_back(nextToken());
|
||||
}
|
||||
}
|
||||
|
||||
void output();
|
||||
|
||||
private:
|
||||
|
||||
using TokenType = enum {
|
||||
/* Reserve Words */
|
||||
STRUCT, BOOLEAN, SHORT, LONG,
|
||||
DOUBLE, FLOAT,
|
||||
INT8, INT16, INT32, INT64,
|
||||
UINT8, UINT16, UINT32, UINT64,
|
||||
CHAR,
|
||||
UNSIGNED,
|
||||
/* Special Symbols */
|
||||
OPENING_BRACE, CLOSING_BRACE,
|
||||
SEMICOLON,
|
||||
LEFT_BRACKET, RIGHT_BRACKET,
|
||||
MULT, PLUS, SUB, TILDE, SLASH,
|
||||
PERCENT, LEFT_SHIFT, RIGHT_SHIFT, AND, INSERT,
|
||||
DELIMITER, COMMA,
|
||||
/* Multicharacter Tokens */
|
||||
ID, LETTER, DIGIT, UNDERLINE, T_TRUE, T_FALSE,
|
||||
INTEGER, INTEGER_TYPE_SUFFIX, STRING, T_BOOLEAN,
|
||||
/* None & Error & EOF */
|
||||
NONE, ERROR, T_EOF
|
||||
};
|
||||
|
||||
|
||||
using StateType = enum {
|
||||
START,
|
||||
S_LETTER, S_UNDERLINE, S_DIGIT,
|
||||
INT_0, INT_NOT_0, INT_TYPE_SUFFIX,
|
||||
STRING_START, STRING_END,
|
||||
S_SIGN,
|
||||
DONE,
|
||||
S_NONE
|
||||
};
|
||||
|
||||
using ReservedWord = struct {
|
||||
std::wstring str;
|
||||
TokenType token;
|
||||
};
|
||||
|
||||
struct TokenInfo {
|
||||
|
||||
const int line;
|
||||
const TokenType token;
|
||||
const std::wstring str;
|
||||
const StateType state;
|
||||
|
||||
TokenInfo(const int line, const TokenType token, std::wstring str, StateType state)
|
||||
: line(line), token(token), str(std::move(str)), state(state) {}
|
||||
|
||||
};
|
||||
|
||||
FILE* fp = nullptr;
|
||||
|
||||
bool ifeof = false;
|
||||
|
||||
std::vector<TokenInfo> tokens;
|
||||
|
||||
int line = 1;
|
||||
|
||||
|
||||
const static std::vector<ReservedWord> reservedWords;
|
||||
|
||||
const static std::map<TokenType, std::wstring> tokenTypeStrMap;
|
||||
|
||||
const static std::map<StateType, std::wstring> stateTypeStrMap;
|
||||
|
||||
wchar_t Automata::nextChar();
|
||||
|
||||
void pushBackChar();
|
||||
|
||||
TokenInfo nextToken();
|
||||
|
||||
static TokenType reservedLookup(const std::wstring& s);
|
||||
|
||||
|
||||
|
||||
};
|
||||
|
||||
|
||||
#endif //SYNTAXPARSER_AUTOMATA_H
|
386
src/Automata.cpp
Normal file
386
src/Automata.cpp
Normal file
@ -0,0 +1,386 @@
|
||||
//
|
||||
// Created by Administrator on 2021/5/1.
|
||||
//
|
||||
|
||||
#include "Automata.h"
|
||||
|
||||
const std::vector<Automata::ReservedWord> Automata::reservedWords = {
|
||||
{L"struct", STRUCT},
|
||||
{L"boolean", BOOLEAN},
|
||||
{L"short", SHORT},
|
||||
{L"long", LONG},
|
||||
{L"double", DOUBLE},
|
||||
{L"float", FLOAT},
|
||||
{L"int8", INT8},
|
||||
{L"int16", INT16},
|
||||
{L"int32", INT32},
|
||||
{L"int64", INT64},
|
||||
{L"uint8", UINT8},
|
||||
{L"uint16", UINT16},
|
||||
{L"uint32", UINT32},
|
||||
{L"uint64", UINT64},
|
||||
{L"char", CHAR},
|
||||
{L"unsigned", UNSIGNED},
|
||||
{L"TRUE", T_TRUE},
|
||||
{L"FALSE", T_FALSE}
|
||||
};
|
||||
|
||||
const std::map<Automata::StateType, std::wstring> Automata::stateTypeStrMap = {
|
||||
{START, L"START"},
|
||||
{S_LETTER, L"LETTER"},
|
||||
{S_UNDERLINE, L"UNDERLINE"},
|
||||
{S_DIGIT, L"DIGIT"},
|
||||
{INT_0, L"INT_0"},
|
||||
{INT_NOT_0, L"INT_NOT_0"},
|
||||
{INT_TYPE_SUFFIX, L"INT_TYPE_SUFFIX"},
|
||||
{STRING_START, L"STRING_START"},
|
||||
{STRING_END, L"STRING_END"},
|
||||
{DONE, L"DONE"},
|
||||
{S_SIGN, L"SIGN"}
|
||||
};
|
||||
|
||||
const std::map<Automata::TokenType, std::wstring> Automata::tokenTypeStrMap = {
|
||||
{STRUCT, L"STRUCT"},
|
||||
{BOOLEAN, L"BOOLEAN"},
|
||||
{SHORT, L"SHORT"},
|
||||
{LONG, L"LONG"},
|
||||
{FLOAT, L"FLOAT"},
|
||||
{DOUBLE, L"DOUBLE"},
|
||||
{INT8, L"INT8"},
|
||||
{INT16, L"INT16"},
|
||||
{INT32, L"INT32"},
|
||||
{INT64, L"INT64"},
|
||||
{UINT8, L"UINT8"},
|
||||
{UINT16, L"UINT16"},
|
||||
{UINT32, L"UINT32"},
|
||||
{UINT64, L"UINT64"},
|
||||
{CHAR, L"CHAR"},
|
||||
{UNSIGNED, L"UNSIGNED"},
|
||||
{OPENING_BRACE, L"OPENING_BRACE"},
|
||||
{CLOSING_BRACE, L"CLOSING_BRACE"},
|
||||
{SEMICOLON, L"SEMICOLON"},
|
||||
{LEFT_BRACKET, L"LEFT_BRACKET"},
|
||||
{RIGHT_BRACKET, L"RIGHT_BRACKET"},
|
||||
{MULT, L"MULT"},
|
||||
{PLUS, L"PLUS"},
|
||||
{SUB, L"SUB"},
|
||||
{TILDE, L"TILDE"},
|
||||
{SLASH, L"SLASH"},
|
||||
{PERCENT, L"PERCENT"},
|
||||
{LEFT_SHIFT, L"LEFT_SHIFT"},
|
||||
{RIGHT_SHIFT, L"RIGHT_SHIFT"},
|
||||
{AND, L"AND"},
|
||||
{INSERT, L"INSERT"},
|
||||
{DELIMITER, L"DELIMITER"},
|
||||
{COMMA, L"COMMA"},
|
||||
{ID, L"ID"},
|
||||
{LETTER, L"LETTER"},
|
||||
{DIGIT, L"DIGIT"},
|
||||
{UNDERLINE, L"UNDERLINE"},
|
||||
{T_TRUE, L"TRUE"},
|
||||
{T_FALSE, L"FALSE"},
|
||||
{INTEGER, L"INTEGER"},
|
||||
{INTEGER_TYPE_SUFFIX, L"INTEGER_TYPE_SUFFIX"},
|
||||
{T_BOOLEAN, L"BOOLEAN"},
|
||||
{STRING, L"STRING"},
|
||||
{COMMA, L"COMMA"},
|
||||
{NONE, L"NONE"},
|
||||
{ERROR, L"ERROR"},
|
||||
{T_EOF, L"EOF"}
|
||||
};
|
||||
|
||||
void Automata::output() {
|
||||
|
||||
std::wofstream stream(L"./tokenOut.txt", std::ios::binary | std::ios::trunc);
|
||||
int temp_line = 1;
|
||||
stream << "1 ";
|
||||
|
||||
for (const auto& token : tokens) {
|
||||
if (token.line > temp_line) {
|
||||
temp_line = token.line;
|
||||
stream << std::endl << temp_line << ' ';
|
||||
}
|
||||
|
||||
if (token.token == ERROR) {
|
||||
stream << tokenTypeStrMap.find(token.token)->second << '{' << token.str << ", " << stateTypeStrMap.find(token.state)->second << '}' << ' ';
|
||||
}
|
||||
else {
|
||||
stream << tokenTypeStrMap.find(token.token)->second << '(' << token.str << ')' << ' ';
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
stream.close();
|
||||
}
|
||||
|
||||
wchar_t Automata::nextChar() {
|
||||
wchar_t c;
|
||||
input >> std::noskipws >> c;
|
||||
if (input.eof()) {
|
||||
this->ifeof = true;
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
Automata::TokenInfo Automata::nextToken() {
|
||||
TokenType currentToken = NONE;
|
||||
StateType state = START, last_state = S_NONE;
|
||||
std::wstringstream ss;
|
||||
while (state != DONE) {
|
||||
|
||||
last_state = state;
|
||||
|
||||
wchar_t CH = nextChar();
|
||||
bool save = true;
|
||||
if (ifeof) {
|
||||
currentToken = T_EOF;
|
||||
break;
|
||||
}
|
||||
switch (state) {
|
||||
case START:
|
||||
if (isdigit(CH)) {
|
||||
if (CH == '0') {
|
||||
state = INT_0;
|
||||
}
|
||||
else {
|
||||
state = INT_NOT_0;
|
||||
}
|
||||
}
|
||||
else if (isalpha(CH)) {
|
||||
state = S_LETTER;
|
||||
}
|
||||
else if (CH == '\"') {
|
||||
state = STRING_START;
|
||||
}
|
||||
else if ((CH == ' ') || (CH == '\t') || (CH == '\r')) {
|
||||
save = false;
|
||||
}
|
||||
else if (CH == '\n') {
|
||||
this->line++;
|
||||
save = false;
|
||||
}
|
||||
else {
|
||||
|
||||
state = S_SIGN;
|
||||
switch (CH) {
|
||||
case '{':
|
||||
currentToken = OPENING_BRACE;
|
||||
break;
|
||||
case '}':
|
||||
currentToken = CLOSING_BRACE;
|
||||
break;
|
||||
case ';':
|
||||
currentToken = SEMICOLON;
|
||||
break;
|
||||
case '[':
|
||||
currentToken = LEFT_BRACKET;
|
||||
break;
|
||||
case ']':
|
||||
currentToken = RIGHT_BRACKET;
|
||||
break;
|
||||
case '*':
|
||||
currentToken = MULT;
|
||||
break;
|
||||
case '+':
|
||||
currentToken = PLUS;
|
||||
break;
|
||||
case '-':
|
||||
currentToken = SUB;
|
||||
break;
|
||||
case '~':
|
||||
currentToken = TILDE;
|
||||
break;
|
||||
case '/':
|
||||
currentToken = SLASH;
|
||||
break;
|
||||
case '%':
|
||||
currentToken = PERCENT;
|
||||
break;
|
||||
case '>':
|
||||
CH = nextChar();
|
||||
if (CH == '>') {
|
||||
currentToken = RIGHT_SHIFT;
|
||||
ss << CH;
|
||||
}
|
||||
else {
|
||||
pushBackChar();
|
||||
currentToken = ERROR;
|
||||
}
|
||||
break;
|
||||
case '<':
|
||||
CH = nextChar();
|
||||
if (CH == '<') {
|
||||
currentToken = LEFT_SHIFT;
|
||||
ss << CH;
|
||||
}
|
||||
else {
|
||||
pushBackChar();
|
||||
currentToken = ERROR;
|
||||
}
|
||||
break;
|
||||
case '&':
|
||||
currentToken = AND;
|
||||
break;
|
||||
case '^':
|
||||
currentToken = INSERT;
|
||||
break;
|
||||
case '|':
|
||||
currentToken = DELIMITER;
|
||||
break;
|
||||
case ',':
|
||||
currentToken = COMMA;
|
||||
break;
|
||||
default:
|
||||
currentToken = ERROR;
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case S_LETTER:
|
||||
if (CH == '_') {
|
||||
state = S_UNDERLINE;
|
||||
}
|
||||
else if (isdigit(CH)) {
|
||||
state = S_DIGIT;
|
||||
}
|
||||
else if (isalpha(CH)) {
|
||||
state = S_LETTER;
|
||||
}
|
||||
else {
|
||||
currentToken = ID;
|
||||
pushBackChar();
|
||||
state = DONE;
|
||||
save = false;
|
||||
}
|
||||
break;
|
||||
|
||||
case S_DIGIT:
|
||||
if (isalpha(CH)) {
|
||||
state = S_LETTER;
|
||||
} else if (isdigit(CH)) {
|
||||
state = S_DIGIT;
|
||||
}
|
||||
else {
|
||||
currentToken = ID;
|
||||
pushBackChar();
|
||||
state = DONE;
|
||||
save = false;
|
||||
}
|
||||
break;
|
||||
|
||||
case S_UNDERLINE:
|
||||
if (isdigit(CH)) {
|
||||
state = S_DIGIT;
|
||||
} else if (isalpha(CH)) {
|
||||
state = S_LETTER;
|
||||
}
|
||||
else {
|
||||
pushBackChar();
|
||||
currentToken = ERROR;
|
||||
save = false;
|
||||
}
|
||||
break;
|
||||
|
||||
case INT_0:
|
||||
if (CH == 'l' || CH == 'L') {
|
||||
state = INT_TYPE_SUFFIX;
|
||||
}
|
||||
else {
|
||||
currentToken = INTEGER;
|
||||
pushBackChar();
|
||||
state = DONE;
|
||||
save = false;
|
||||
}
|
||||
break;
|
||||
|
||||
case INT_NOT_0:
|
||||
if (CH == 'l' || CH == 'L') {
|
||||
state = INT_TYPE_SUFFIX;
|
||||
}
|
||||
else if (isdigit(CH)) {
|
||||
state = INT_NOT_0;
|
||||
}
|
||||
else {
|
||||
currentToken = INTEGER;
|
||||
pushBackChar();
|
||||
state = DONE;
|
||||
save = false;
|
||||
}
|
||||
break;
|
||||
|
||||
case INT_TYPE_SUFFIX:
|
||||
state = DONE;
|
||||
currentToken = INTEGER;
|
||||
pushBackChar();
|
||||
save = false;
|
||||
break;
|
||||
|
||||
case STRING_START:
|
||||
if (CH == '\\') {
|
||||
wchar_t buff_c = CH;
|
||||
CH = nextChar();
|
||||
|
||||
if ((CH == 'b') || (CH == 't') || (CH == 'n') || (CH == 'f') || (CH == 'r')
|
||||
|| (CH == '"') || (CH == '\\')) {
|
||||
ss << buff_c;
|
||||
state = STRING_START;
|
||||
}
|
||||
else {
|
||||
pushBackChar();
|
||||
currentToken = ERROR;
|
||||
}
|
||||
}
|
||||
else if (CH == '\"') {
|
||||
state = STRING_END;
|
||||
}
|
||||
else if (CH == ' ') {
|
||||
ss << '\\' << '4';
|
||||
CH = '0';
|
||||
state = STRING_START;
|
||||
}
|
||||
else {
|
||||
state = STRING_START;
|
||||
}
|
||||
break;
|
||||
|
||||
case STRING_END:
|
||||
state = DONE;
|
||||
currentToken = STRING;
|
||||
pushBackChar();
|
||||
save = false;
|
||||
break;
|
||||
|
||||
case S_SIGN:
|
||||
state = DONE;
|
||||
pushBackChar();
|
||||
save = false;
|
||||
break;
|
||||
}
|
||||
|
||||
if (save) {
|
||||
ss << CH;
|
||||
}
|
||||
|
||||
if (state == DONE) {
|
||||
const std::wstring token = ss.str();
|
||||
if (currentToken == ID) {
|
||||
currentToken = reservedLookup(token);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return TokenInfo(this->line, currentToken, ss.str(), last_state);
|
||||
}
|
||||
|
||||
Automata::TokenType Automata::reservedLookup(const std::wstring &s) {
|
||||
for (const auto& word : reservedWords)
|
||||
if (word.str == s)
|
||||
return word.token;
|
||||
return ID;
|
||||
}
|
||||
|
||||
void Automata::pushBackChar() {
|
||||
input.seekg(-1L, std::ios::cur);
|
||||
}
|
@ -110,11 +110,7 @@ void SyntaxParser::getToken() {
|
||||
for(int i = 1; i < tokens.size(); i++) {
|
||||
if(tokens[i] == L"\r") continue;;
|
||||
auto token_info = get_token_info(tokens[i]);
|
||||
int symbol_index;
|
||||
|
||||
symbol_index = pool->getSymbolIndex(token_info.first);
|
||||
|
||||
tokens_queue.push(symbol_index);
|
||||
tokens_queue.push(pool->getSymbolIndex(token_info.first));
|
||||
_line_index++;
|
||||
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user