// // Created by Administrator on 2021/5/1. // #ifndef SYNTAXPARSER_AUTOMATA_H #define SYNTAXPARSER_AUTOMATA_H #include #include #include #include #include #include #include class Automata { public: std::wifstream input; explicit Automata(const std::string& path) : input(path, std::ios::binary) { auto* codeCvtToUTF8= new std::codecvt_utf8; input.imbue(std::locale(input.getloc(), codeCvtToUTF8)); } ~Automata() { input.close(); } void parse() { while (!ifeof) { tokens.push_back(nextToken()); } } void output(); private: using TokenType = enum { /* Reserve Words */ STRUCT, BOOLEAN, SHORT, LONG, DOUBLE, FLOAT, INT8, INT16, INT32, INT64, UINT8, UINT16, UINT32, UINT64, CHAR, UNSIGNED, /* Special Symbols */ OPENING_BRACE, CLOSING_BRACE, SEMICOLON, LEFT_BRACKET, RIGHT_BRACKET, MULT, PLUS, SUB, TILDE, SLASH, PERCENT, LEFT_SHIFT, RIGHT_SHIFT, AND, INSERT, DELIMITER, COMMA, /* Multicharacter Tokens */ ID, LETTER, DIGIT, UNDERLINE, T_TRUE, T_FALSE, INTEGER, INTEGER_TYPE_SUFFIX, STRING, T_BOOLEAN, /* None & Error & EOF */ NONE, ERROR, T_EOF }; using StateType = enum { START, S_LETTER, S_UNDERLINE, S_DIGIT, INT_0, INT_NOT_0, INT_TYPE_SUFFIX, STRING_START, STRING_END, S_SIGN, DONE, S_NONE }; using ReservedWord = struct { std::wstring str; TokenType token; }; struct TokenInfo { const int line; const TokenType token; const std::wstring str; const StateType state; TokenInfo(const int line, const TokenType token, std::wstring str, StateType state) : line(line), token(token), str(std::move(str)), state(state) {} }; FILE* fp = nullptr; bool ifeof = false; std::vector tokens; int line = 1; const static std::vector reservedWords; const static std::map tokenTypeStrMap; const static std::map stateTypeStrMap; wchar_t Automata::nextChar(); void pushBackChar(); TokenInfo nextToken(); static TokenType reservedLookup(const std::wstring& s); }; #endif //SYNTAXPARSER_AUTOMATA_H