词法分析器（c++）-Toy模板网

这篇具有很好参考价值的文章主要介绍了词法分析器（c++）。希望对大家有所帮助。如果存在错误或未考虑完全的地方，请大家不吝赐教，您也可以点击"举报违法"按钮提交疑问。

前景提示：

个人觉得单纯是用来完成实验报告的话还行，但仅做参考，因为本人的编程水平有限，怕误人子弟。

本次代码支持以下操作：

单行注释

多行注释

文件形式输入

种别码可以在文件中自由修改

单词字符串识别支持：

部分关键字（可手动在程序外部---reference.txt文件添加，），

标识符，

无符号整形数字（仅可识别整形，其他类型需要自主添加别的函数支持），

界符（，{}（）[]），

全部运算符（+、=、-、*、/、<、>，以及这些的两两组合）

代码程序：

// 编译原理词法分析器


#include <iostream>
#include <string>
#include <fstream>
#include <vector>
#include <map>
using namespace std;


string getFormFile(string filePath);
bool isLetter(char ch);
bool isNumber(char ch);
bool isOperator(char ch);
bool isDelimiter(char ch);
string rmExegesis(const string);
string rmExegesis_1(const string str);
string rmExegesis_2(const string str);


vector<string> compile();
vector<string> compile(const string originpath, const string targetpath);
map<string, string> getMap();
void showTime(map<string, string> refer, vector<string> key);

int main()
{
    map<string, string> refer = getMap();                            //从配置文件中拿到键值对

    cout << "请在文件 input.txt 中输入待编译程序" << endl;
    system("pause");
    vector<string> key = compile();                                    //拿到待识别字符
                                                            
    showTime(refer, key);
    cout << "编译成功！" << endl;
}


string getFormFile(string filePath)
{

    ifstream ifs;
    ifs.open(filePath);
    if (!ifs.is_open())
        cout << filePath << "打开失败！" << endl;

    ifs.seekg(0, ios::end);
    int size = ifs.tellg();                //得到文件大小                
    string str;
    str.resize(size);
    ifs.seekg(0, ios::beg);
    ifs.read((char*)str.c_str(), size);
    ifs.close();                        //关闭了！

    return str;
}

//单词
bool isLetter(char ch)
{
    if (ch > ('a' - 1) && ch < ('z' + 1))
        return true;
    else if (ch > ('A' - 1) && ch < ('Z' + 1))
        return true;
    else if (ch == '_')
        return true;
    else
        return false;
}

//数字
bool isNumber(char ch)
{
    if (ch > ('0' - 1) && ch < ('9' + 1))
        return true;
    else
        return false;
}

//算术运算符
bool isOperator(char ch)
{
    if (ch == '=' || ch == '+' || ch == '-' || ch == '*' || ch == '/' || ch == ':' || ch == '<' || ch == '>')
        return true;
    else
        return false;
}

//界符
bool isDelimiter(char ch)
{
    if (ch == ';' || ch == ',' || ch == '(' || ch == ')' || ch == '{' || ch == '}' || ch == '[' || ch == ']')
        return true;
    else
        return false;
}


//去单行注释函数
string rmExegesis_1(const string str)
{
    string temp = str;
    string result = temp;
    string str1;
    string str2;

    int len = temp.length();
    for (int i = 0; i < len;)
    {
        auto be = temp.find("//", i);
        auto en = temp.find("\n", be) + 1;

        if (be == string::npos || en == string::npos)
        {
            break;
        }
        str1.assign(temp, 0, be);
        str2.assign(temp, en, len - en);

        result = str1 + str2;
        temp = result;
        len = temp.length();
    }
    return result;
}

//去多行注释函数
string rmExegesis_2(const string str)
{
    string temp = str;
    string result = temp;
    string str1;
    string str2;

    int len = temp.length();
    for (int i = 0; i < len;)
    {
        auto be = temp.find("/*", i);
        auto en = temp.find("*/", i) + 2;

        if (en < be)
        {
            i = be;
            en = temp.find("*/", i) + 2;
        }

        if (be == string::npos || en == string::npos)
        {
            break;
        }
        str1.assign(temp, 0, be);
        str2.assign(temp, en, len - en);

        result = str1 + str2;
        temp = result;
        len = temp.length();
    }
    return result;
}

//去单行和多行注释
string rmExegesis(const string str)
{
    string result = rmExegesis_1(str);
    return rmExegesis_2(result);
}

vector<string> compile()
{
    string origin = "input.txt";
    string target = "output.txt";

    return compile(origin, target);
}

vector<string> compile(const string originpath, const string targetpath)
{
    string str = getFormFile(originpath);
    str = rmExegesis(str);                                        //去注释
    cout << str << endl;                                        //输出去注释之后的文本

    vector<string> v;                                            //存储字符
    int pos = 0;
    int len = str.find("#~");

    v.push_back("");
    for (int i = 0; i < len; i++)
    {
        if (str[i] == ' ' || str[i] == '\t' || str[i] == '\n')    //空格，tab键，换行键作为分割符
        {
            if (v[pos] != "")
            {
                v.push_back("");
                pos++;
            }
        }
        else if (isLetter(str[i]))
        {
            v[pos] = v[pos] + str[i];
            if (i + 1 < len && !isLetter(str[i + 1]))
            {
                v[pos] = v[pos] + "$ident";
                v.push_back("");
                pos++;
            }
        }
        else if (isNumber(str[i]))
        {
            v[pos] = v[pos] + str[i];
            if (i + 1 < len && !isNumber(str[i + 1]))
            {
                v[pos] = v[pos] + "$idconst";
                v.push_back("");
                pos++;
            }
        }
        else if (isOperator(str[i]))
        {
            v[pos] = v[pos] + str[i];

            if (i + 1 < len && isOperator(str[i + 1]))
            {
                i = i + 1;
                v[pos] = v[pos] + str[i];

                v.push_back("");
                pos++;
            }
            else
            {
                v.push_back("");
                pos++;
            }

        }
        else if(isDelimiter(str[i]))
        {
            v[pos] = v[pos] + str[i];
            v.push_back("");
            pos++;
        }
        else
        {
            v[pos] = v[pos] + str[i];
            v.push_back("");
            pos++;
        }
    }
    return v;
}

map<string, string> getMap()
{
    string str = getFormFile("reference.txt");                    //读取匹配文件
    map<string, string> refer;
    pair<string, string> p;

    string bpattern = "<reference>";
    string epattern = "</reference>";
    int ben = str.find(bpattern) + bpattern.size();
    int end = str.find(epattern);

    string key;
    string value;
    int flag1;
    int flag2;
    for (int i = ben; i < end; i++)
    {
        if (str[i] == ' ' || str[i] == '\t' || str[i] == '\n')
            continue;
        flag1 = i;
        flag2 = (str.find(' ', i) < str.find('\t', i) ? str.find(' ', i) : str.find('\t', i));
        key.assign(str, flag1, flag2 - flag1);

        flag1 = str.find('$', flag2) + 1;
        flag2 = str.find('\n', i);
        value.assign(str, flag1, flag2 - flag1);
        p = make_pair(key, value);
        refer.insert(p);
        i = flag2;
    }
    return refer;
}

void showTime(map<string, string> refer, vector<string> key)
{    
    cout << "符号\t\t\t" << "种别码\t\t\t" << "说明" << endl;
    int len = key.size();
    for (int i = 0; i < len; i++)
    {
        auto pos = key[i].find('$');
        if (pos != string::npos)
        {
            string key1;
            string key2;
            key1.assign(key[i], 0, pos);
            key2.assign(key[i], pos + 1, key[i].length() - pos);

            map<string, string>::iterator iter = refer.find(key1);
            if (iter != refer.end())
            {
                cout << key1 << "\t\t\t" << iter->second << "\t\t\t" << "关键字" << endl;
            }
            else
            {
                cout << key2 << "\t\t\t" << refer[key2] << "\t\t\t" << key1 << endl;
            }
        }
        else
        {
            if (key[i] == "")
                continue;
            cout << key[i] << "\t\t\t" << refer[key[i]] << "\t\t\t" << key[i] << endl;
        }
    }
}

运行示例：

文件结构：

词法分析器（c++）

文章来源地址https://www.toymoban.com/news/detail-445415.html

示例1：

input.txt文件：

begin
    begin
    
        begin 
         a:=1
         end;
         
       begin 
         b:=1
    end;
    q:=10
    end;
    
    x:=9;
    y:=11;
    z:=12
end
#~

reference.txt文件

<reference>
    if        $0
    then    $1
    else    $2
    while    $3
    begin    $4
    do        $5
    end        $6
    a        $7
    :=        $8
    +        $9
    -        $10
    *        $11
    /        $12
    +=        $13
    -=        $14
    *=        $15
    /=        $16
    ,        $17
    ;        $22
    (        $18
    )        $19
    {        $20
    }        $21
    ident    $100
    idconst    $101
</reference>

输出

词法分析器（c++）

示例2：

input.txt文件：

#include <iostream>
using namespace std;

void main()
{
    cout<<"hello world!!!"<<endl;            //单行注释在这里！！！  /*eryedhds
}
#~                        //#~为结束标志符


/*
多行注释在这里！！
asbhfafs
afs
adfsdfcdc

cs
csCSZ
C
ZXC
Scxcds    sfasf saf sd f afd sfd a14 1 411:36 2022/11/711:36 2022/11/7

wr


#~
*/

reference.txt文件

<reference>
    if        $0
    then        $1
    else        $2
    while    $3
    begin    $4
    do        $5
    end        $6
    :=        $8
    +        $9
    -        $10
    *        $11
    /        $12
    +=        $13
    -=        $14
    *=        $15
    /=        $16
    >        $30
    <        $31
    ,        $17
    ;        $22
    (        $18
    )        $19
    {        $20
    }        $21
    #        $1111
    include    $1112
    iostream    $1113
    using    $1114
    namespace    $1115
    std        $1116
    int        $1117
    main        $1118
    cout        $1119
    endl        $1120
    void        $1121
    "        $1122
    ident    $100
    idconst    $101
    !        $1123
    <<        $1124
    >>        $1125
</reference>

输出

词法分析器（c++）