是否有任何库可以解析电子邮件?我知道有很多。 除了基本的MIME解析之外,它还应该能够解析如下内容:
它应该支持其中之一:C / C ++,PHP,Python
答案 0 :(得分:1)
您很可能会在Spam Assassin中找到一些代码。
答案 1 :(得分:1)
这是我在PHP中的简单示例:
<?php
$str = file_get_contents('mime-mixed-related-alternative.eml');
preg_match_all('/((?<=(Content-Type: multipart\/mixed; boundary="))(.*)?(?=(")))|((?<=(Content-Type: multipart\/related; boundary="))(.*)?(?=(")))|((?<=(Content-Type: multipart\/alternative; boundary="))(.*)?(?=(")))/', $str, $boundary);
echo "<pre>";
// print_r($boundary);
$AllPartsUnique = "";
$j=0;
foreach ($boundary[0] as $key => $v) {
if($key >= 0){
echo "\n\n\nBoundary " . $v . "\r\n";
// cut boundary content
preg_match_all('/(?<=(--'.$v.'))(| |.*|[\s\S]+|\<|\>|\.|\r|\n|\0|@|\w+)?(?=(--'.$v.'--))/', $str, $part);
$bname = $v;
foreach ($part[0] as $v) {
// echo "PART " . $bname . " " . $v . "\r\n";
$parts = explode("--".$bname, $v);
echo "<pre>";
foreach ($parts as $v) {
// echo "\r\nSINGLE PART " . $v . "\r\n";
// $AllPartsUnique[$j] = $v;
// with html visible on page
$AllPartsUnique[$j] = htmlentities($v);
$j++;
}
}
}
}
echo "<pre>";
// print_r($AllPartsUnique);
foreach($AllPartsUnique as $key => $one) {
foreach ($boundary[0] as $find) {
if(strpos($one, $find) !== false){
unset($AllPartsUnique[$key]);
}
}
}
echo "<pre>";
print_r($AllPartsUnique);
preg_match_all('/(?<=((\n)To: )|(^To: ))(.*)+?(?=())/', $str, $to);
echo "To " . $to[0][0];
preg_match_all('/(?<=((\n)From: )|(^From: ))(.*)+?(?=())/', $str, $from);
echo "From " . $from[0][0];
preg_match_all('/(?<=((\n)Subject: )|(^Subject: ))(.*)+?(?=())/', $str, $subject);
echo "Subject " . $subject[0][0];
// end script
die();
?>
amd here mime message example:
From: ddd@email.coc
To: to@email.coc
Subject: Example Email
MIME-Version: 1.0
Content-Type: multipart/mixed; boundary="MixedBoundaryString"
--MixedBoundaryString
Content-Type: multipart/related; boundary="RelatedBoundaryString"
--RelatedBoundaryString
Content-Type: multipart/alternative; boundary="AlternativeBoundaryString"
--AlternativeBoundaryString
Content-Type: text/plain;charset="utf-8"
Content-Transfer-Encoding: quoted-printable
This is the plain text part of the email.
--AlternativeBoundaryString
Content-Type: text/html;charset="utf-8"
Content-Transfer-Encoding: quoted-printable
<html>
<body>=0D
<img src=3D=22cid:masthead.png=40email.coc=22 width 800 height=3D80=
=5C>=0D
<p>This is the html part of the email.</p>=0D
<img src=3D=22cid:logo.png=40email.coc=22 width 200 height=3D60 =5C=
>=0D
</body>=0D
</html>=0D
--AlternativeBoundaryString--
--RelatedBoundaryString
Content-Type: image/jpgeg;name="logo.png"
Content-Transfer-Encoding: base64
Content-Disposition: inline;filename="logo.png"
Content-ID: <logo.png@email.coc>
amtsb2hiaXVvbHJueXZzNXQ2XHVmdGd5d2VoYmFmaGpremxidTh2b2hydHVqd255aHVpbnRyZnhu
dWkgb2l1b3NydGhpdXRvZ2hqdWlyb2h5dWd0aXJlaHN1aWhndXNpaHhidnVqZmtkeG5qaG5iZ3Vy
a25qbW9nNXRwbF0nemVycHpvemlnc3k5aDZqcm9wdHo7amlodDhpOTA4N3U5Nnkwb2tqMm9sd3An
LGZ2cDBbZWRzcm85eWo1Zmtsc2xrZ3g=
--RelatedBoundaryString
Content-Type: image/jpgeg;name="masthead.png"
Content-Transfer-Encoding: base64
Content-Disposition: inline;filename="masthead.png"
Content-ID: <masthead.png@email.coc>
aXR4ZGh5Yjd1OHk3MzQ4eXFndzhpYW9wO2tibHB6c2tqOTgwNXE0aW9qYWJ6aXBqOTBpcjl2MC1t
dGlmOTA0cW05dGkwbWk0OXQwYVttaXZvcnBhXGtsbGo7emt2c2pkZnI7Z2lwb2F1amdpNTh1NDlh
eXN6dWdoeXhiNzhuZzdnaHQ3eW9zemlqb2FqZWt0cmZ1eXZnamhka3JmdDg3aXV2dWd5aGVidXdz
dhyuhehe76YTGSFGA=
--RelatedBoundaryString--
--MixedBoundaryString
Content-Type: application/pdf;name="Invoice_1.pdf"
Content-Transfer-Encoding: base64
Content-Disposition: attachment;filename="Invoice_1.pdf"
aGZqZGtsZ3poZHVpeWZoemd2dXNoamRibngganZodWpyYWRuIHVqO0hmSjtyRVVPIEZSO05SVURF
SEx1aWhudWpoZ3h1XGh1c2loZWRma25kamlsXHpodXZpZmhkcnVsaGpnZmtsaGVqZ2xod2plZmdq
a2psajY1ZWxqanNveHV5ZXJ3NTQzYXRnZnJhZXdhcmV0eXRia2xhanNueXVpNjRvNWllc3l1c2lw
dWg4NTA0
--MixedBoundaryString
Content-Type: application/pdf;name="SpecialOffer.pdf"
Content-Transfer-Encoding: base64
Content-Disposition: attachment;filename="SpecialOffer.pdf"
aXBvY21odWl0dnI1dWk4OXdzNHU5NTgwcDN3YTt1OTQwc3U4NTk1dTg0dTV5OGlncHE1dW4zOTgw
cS0zNHU4NTk0eWI4OTcwdjg5MHE4cHV0O3BvYTt6dWI7dWlvenZ1em9pdW51dDlvdTg5YnE4N3Z3
OTViOHk5cDV3dTh5bnB3dWZ2OHQ5dTh2cHVpO2p2Ymd1eTg5MGg3ajY4bjZ2ODl1ZGlvcjQ1amts
dfnhgjdfihn=
--MixedBoundaryString--