我已经使用Poco / Net / POP3ClientSession下载了邮件,我想将电子邮件主题转换为人类可读的,所以我尝试使用neagoegab's解决方案来自: https://stackoverflow.com/a/8104496/1350091 不幸的是它不起作用:
id: 1 size: 2792
Original subject: =?ISO-8859-2?Q?Re: M=F3j sen o JP II?=
converted: =?ISO-8859-2?Q?Re: M=F3j sen o JP II?=
输出结果为:
cout << "Encoded with POCO: " << MailMessage::encodeWord("Re: Mój sen o JP II", "ISO-8859-2") << endl; // output: Encoded with POCO: =?ISO-8859-2?q?Re=3A_M=C3=B3j_sen_o_JP_II?=
有趣的是,当我尝试用POCO转换主题时,它失败了:
drop
但我想收到的主题是: “Re:Mójseno JP II” 我发现转换主题的唯一成功方法是: https://docs.python.org/2/library/email.header.html#email.header.decode_header
所以我的问题是 - 如何将C ++中的电子邮件主题转换为UTF-8等格式?
答案 0 :(得分:3)
您的情况的相关RFC是RFC 2047。该RFC规定了如何在邮件消息中编码非ASCII数据。基本要点是除了可打印的ASCII字符之外的所有字节都被转义为'='字符,后跟两个十六进制数字。由于“ó”由ISO-8859-2中的字节0xF3
表示,而0xF3
不是可打印的ASCII字符,因此编码为“= F3”。您需要解码消息中的所有编码字符。
答案 1 :(得分:0)
我发现了如何解决问题(我不确定它是100%正确的解决方案),但看起来它足以使用: Poco :: UTF8Encoding :: convert从characterCode转换为utf8:
#include <Poco/Net/POP3ClientSession.h>
#include <Poco/Net/MessageHeader.h>
#include <Poco/Net/MailMessage.h>
#include <Poco/UTF8Encoding.h>
#include <iostream>
#include <string>
using namespace std;
using namespace Poco::Net;
class EncoderLatin2
{
public:
EncoderLatin2(const string& encodedSubject)
{
/// encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
int charsetBeginPosition = strlen("=?");
int charsetEndPosition = encodedSubject.find("?", charsetBeginPosition);
charset = encodedSubject.substr(charsetBeginPosition, charsetEndPosition-charsetBeginPosition);
int encodingPosition = charsetEndPosition + strlen("?");
encoding = encodedSubject[encodingPosition];
if ("ISO-8859-2" != charset)
throw std::invalid_argument("Invalid encoding!");
const int lenghtOfEncodedText = encodedSubject.length() - encodingPosition-strlen("?=")-2;
extractedEncodedSubjectToConvert = encodedSubject.substr(encodingPosition+2, lenghtOfEncodedText);
}
string convert()
{
size_t positionOfAssignment = -1;
while (true)
{
positionOfAssignment = extractedEncodedSubjectToConvert.find('=', positionOfAssignment+1);
if (string::npos != positionOfAssignment)
{
const string& charHexCode = extractedEncodedSubjectToConvert.substr(positionOfAssignment + 1, 2);
replaceAllSubstringsWithUnicode(extractedEncodedSubjectToConvert, charHexCode);
}
else
break;
}
return extractedEncodedSubjectToConvert;
}
void replaceAllSubstringsWithUnicode(string& s, const string& charHexCode)
{
const int charCode = stoi(charHexCode, nullptr, 16);
char buffer[10] = {};
encodingConverter.convert(charCode, (unsigned char*)buffer, sizeof(buffer));
replaceAll(s, '=' + charHexCode, buffer);
}
void replaceAll(string& s, const string& replaceFrom, const string& replaceTo)
{
size_t needlePosition = -1;
while (true)
{
needlePosition = s.find(replaceFrom, needlePosition + 1);
if (string::npos == needlePosition)
break;
s.replace(needlePosition, replaceFrom.length(), replaceTo);
}
}
private:
string charset;
char encoding;
Poco::UTF8Encoding encodingConverter;
string extractedEncodedSubjectToConvert;
};
int main()
{
POP3ClientSession session("poczta.o2.pl");
session.login("my mail", "my password");
POP3ClientSession::MessageInfoVec messages;
session.listMessages(messages);
MessageHeader header;
MailMessage message;
auto currentMessage = messages[0];
session.retrieveHeader(currentMessage.id, header);
session.retrieveMessage(currentMessage.id, message);
const string subject = message.getSubject();
EncoderLatin2 encoder(subject);
cout << "Original subject: " << subject << endl;
cout << "Encoded: " << encoder.convert() << endl;
}
答案 2 :(得分:-1)
我找到了另一个解决方案,比以前更好。 一些电子邮件主题有不同的编码,我注意到:
所以使用POCO(Base64Decoder,Latin2Encoding,UTF8Encoding,QuotedPrintableDecoder)我设法转换所有情况:
#include <iostream>
#include <string>
#include <sstream>
#include <Poco/Net/POP3ClientSession.h>
#include <Poco/Net/MessageHeader.h>
#include <Poco/Net/MailMessage.h>
#include <Poco/Base64Decoder.h>
#include <Poco/Latin2Encoding.h>
#include <Poco/UTF8Encoding.h>
#include <Poco/Net/QuotedPrintableDecoder.h>
using namespace std;
class Encoder
{
public:
Encoder(const string& encodedText)
{
isStringEncoded = isEncoded(encodedText);
if (!isStringEncoded)
{
extractedEncodedSubjectToConvert = encodedText;
return;
}
splitEncodedText(encodedText);
}
string convert()
{
if (isStringEncoded)
{
if (Poco::Latin2Encoding().isA(charset))
return decodeFromLatin2();
if (Poco::UTF8Encoding().isA(charset))
return decodeFromUtf8();
}
return extractedEncodedSubjectToConvert;
}
private:
void splitEncodedText(const string& encodedText)
{
/// encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
const int charsetBeginPosition = strlen(sequenceBeginEncodedText);
const int charsetEndPosition = encodedText.find("?", charsetBeginPosition);
charset = encodedText.substr(charsetBeginPosition, charsetEndPosition-charsetBeginPosition);
const int encodingPosition = charsetEndPosition + strlen("?");
encoding = encodedText[encodingPosition];
const int lenghtOfEncodedText = encodedText.length() - encodingPosition-strlen(sequenceBeginEncodedText)-strlen(sequenceEndEncodedText);
extractedEncodedSubjectToConvert = encodedText.substr(encodingPosition+2, lenghtOfEncodedText);
}
bool isEncoded(const string& encodedSubject)
{
if (encodedSubject.size() < 4)
return false;
if (0 != encodedSubject.find(sequenceBeginEncodedText))
return false;
const unsigned positionOfLastTwoCharacters = encodedSubject.size() - strlen(sequenceEndEncodedText);
return positionOfLastTwoCharacters == encodedSubject.rfind(sequenceEndEncodedText);
}
string decodeFromLatin2()
{
size_t positionOfAssignment = -1;
while (true)
{
positionOfAssignment = extractedEncodedSubjectToConvert.find('=', positionOfAssignment+1);
if (string::npos != positionOfAssignment)
{
const string& charHexCode = extractedEncodedSubjectToConvert.substr(positionOfAssignment + 1, 2);
replaceAllSubstringsWithUnicode(extractedEncodedSubjectToConvert, charHexCode);
}
else
break;
}
return extractedEncodedSubjectToConvert;
}
void replaceAllSubstringsWithUnicode(string& s, const string& charHexCode)
{
static Poco::UTF8Encoding encodingConverter;
const int charCode = stoi(charHexCode, nullptr, 16);
char buffer[10] = {};
encodingConverter.convert(charCode, (unsigned char*)buffer, sizeof(buffer));
replaceAll(s, '=' + charHexCode, buffer);
}
void replaceAll(string& s, const string& replaceFrom, const string& replaceTo)
{
size_t needlePosition = -1;
while (true)
{
needlePosition = s.find(replaceFrom, needlePosition + 1);
if (string::npos == needlePosition)
break;
s.replace(needlePosition, replaceFrom.length(), replaceTo);
}
}
string decodeFromUtf8()
{
if('B' == toupper(encoding))
{
return decodeFromBase64();
}
else // if Q:
{
return decodeFromQuatedPrintable();
}
}
string decodeFromBase64()
{
istringstream is(extractedEncodedSubjectToConvert);
Poco::Base64Decoder e64(is);
extractedEncodedSubjectToConvert.clear();
string buffer;
while(getline(e64, buffer))
extractedEncodedSubjectToConvert += buffer;
return extractedEncodedSubjectToConvert;
}
string decodeFromQuatedPrintable()
{
replaceAll(extractedEncodedSubjectToConvert, "_", " ");
istringstream is(extractedEncodedSubjectToConvert);
Poco::Net::QuotedPrintableDecoder qp(is);
extractedEncodedSubjectToConvert.clear();
string buffer;
while(getline(qp, buffer))
extractedEncodedSubjectToConvert += buffer;
return extractedEncodedSubjectToConvert;
}
private:
string charset;
char encoding;
string extractedEncodedSubjectToConvert;
bool isStringEncoded;
static constexpr const char* sequenceBeginEncodedText = "=?";
static constexpr const char* sequenceEndEncodedText = "?=";
};
int main()
{
Poco::Net::POP3ClientSession session("poczta.o2.pl");
session.login("my mail", "my password");
Poco::Net::POP3ClientSession::MessageInfoVec messages;
session.listMessages(messages);
Poco::Net::MessageHeader header;
Poco::Net::MailMessage message;
auto currentMessage = messages[0];
session.retrieveHeader(currentMessage.id, header);
session.retrieveMessage(currentMessage.id, message);
const string subject = message.getSubject();
Encoder encoder(subject);
cout << "Original subject: " << subject << endl;
cout << "Encoded: " << encoder.convert() << endl;
}