如何在txt文件中搜索正则表达式?

时间:2016-11-28 06:45:19

标签: c++ regex

我尝试使用正则表达式以HTML格式输出文本文件中的电子邮件地址数量,我可以打开文件并阅读文件,但我不确定如何使用正则表达式搜索正则表达式模式的文件。

更新:确定我使用的是测试文本文件,但它不能用于HTML格式的实际文本文件,而是输出电话号码,但不输出电子邮件地址的数量。

int _tmain(int argc, _TCHAR* argv[])
{
ifstream htmlText;
string line;


string eduEmail = "^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.+-]+\.edu$";
string nonEduEmail = "^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.+-]+\.com$";
string phoneNumbers = "[[:digit:]]{2}-[[:digit:]]{3}-[[:digit:]]{4}";

int eduEmails = 0;
int nonEduEmails = 0;
int num_phoneNumbers = 0;

htmlText.open("ltudirectory.txt");


if (htmlText.good())
{
    while (getline(htmlText, line))
    {
        cout << line << endl;
        regex r_edu(eduEmail); //the pattern to search for edu emails
        regex r_com(nonEduEmail); //the pattern to search for .com emails
        regex r_phoneNumbers(phoneNumbers); //the pattern to search for .com    emails


        bool eduEmail_match = regex_search(line, r_edu);
        bool nonEmail_match = regex_search(line, r_com);
        bool phoneNumber_match = regex_search(line, r_phoneNumbers);


        if (eduEmail_match)
        {
            ++eduEmails;
        }
        if (nonEmail_match)
        {
            ++nonEduEmails;
        }
        if (phoneNumber_match)
        {
            ++num_phoneNumbers;
        }
    }
}


htmlText.close();
cout << "Emails ending with .edu : " << eduEmails << endl;
cout << "Emails ending with .com : " << nonEduEmails << endl;
cout << "Number of Phone Numbers: " << num_phoneNumbers << endl;


system("pause");
return 0;
}

1 个答案:

答案 0 :(得分:1)

int _tmain(int argc, _TCHAR* argv[])
{
ifstream htmlText;
string line;
string eduEmail = "(\\w+)(\\.|_)?(\\w*)@(\\w+)(\\.(\\w+))+";


int testNum = 0;

list<string> l;


htmlText.open("ltudirectory.txt");
if (htmlText.good())
{
    while (getline(htmlText, line))
    {
        regex e(eduEmail); // the pattern
        bool match = regex_search(line, e);
        if (match) {
          ++testNum;
       }
    }
}

htmlText.close();

system("pause");
return 0;
}