我尝试使用正则表达式以HTML格式输出文本文件中的电子邮件地址数量,我可以打开文件并阅读文件,但我不确定如何使用正则表达式搜索正则表达式模式的文件。
更新:确定我使用的是测试文本文件,但它不能用于HTML格式的实际文本文件,而是输出电话号码,但不输出电子邮件地址的数量。
int _tmain(int argc, _TCHAR* argv[])
{
ifstream htmlText;
string line;
string eduEmail = "^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.+-]+\.edu$";
string nonEduEmail = "^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.+-]+\.com$";
string phoneNumbers = "[[:digit:]]{2}-[[:digit:]]{3}-[[:digit:]]{4}";
int eduEmails = 0;
int nonEduEmails = 0;
int num_phoneNumbers = 0;
htmlText.open("ltudirectory.txt");
if (htmlText.good())
{
while (getline(htmlText, line))
{
cout << line << endl;
regex r_edu(eduEmail); //the pattern to search for edu emails
regex r_com(nonEduEmail); //the pattern to search for .com emails
regex r_phoneNumbers(phoneNumbers); //the pattern to search for .com emails
bool eduEmail_match = regex_search(line, r_edu);
bool nonEmail_match = regex_search(line, r_com);
bool phoneNumber_match = regex_search(line, r_phoneNumbers);
if (eduEmail_match)
{
++eduEmails;
}
if (nonEmail_match)
{
++nonEduEmails;
}
if (phoneNumber_match)
{
++num_phoneNumbers;
}
}
}
htmlText.close();
cout << "Emails ending with .edu : " << eduEmails << endl;
cout << "Emails ending with .com : " << nonEduEmails << endl;
cout << "Number of Phone Numbers: " << num_phoneNumbers << endl;
system("pause");
return 0;
}
答案 0 :(得分:1)
int _tmain(int argc, _TCHAR* argv[])
{
ifstream htmlText;
string line;
string eduEmail = "(\\w+)(\\.|_)?(\\w*)@(\\w+)(\\.(\\w+))+";
int testNum = 0;
list<string> l;
htmlText.open("ltudirectory.txt");
if (htmlText.good())
{
while (getline(htmlText, line))
{
regex e(eduEmail); // the pattern
bool match = regex_search(line, e);
if (match) {
++testNum;
}
}
}
htmlText.close();
system("pause");
return 0;
}