I'm Using outlook lib for work with emails. I receive mail body like this :
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional //EN"><html><head=
>
<meta http-equiv=3D"Content-Type" content=3D"text/html; charset=3Dutf-8"><t=
itle>Facebook</title><style>@media all and (max-width: 480px){*[class].ib_t=
{min-width:100% !important}*[class].ib_row{display:block !important}*[class=
].ib_ext{display:block !important;padding:10px 0 5px 0;vertical-align:top !=
important;width:100% !important}*[class].ib_img,*[class].ib_mid{vertical-al=
ign:top !important}*[class].mb_blk{display:block !important;padding-bottom:=
10px;width:100% !important}*[class].mb_hide{display:none !important}*[class=
].mb_inl{display:inline !important}}.d_mb_show{display:none}.d_mb_show_cent=
er{display:table;margin:auto}@media only screen and (max-device-width: 480p=
x){.d_mb_hide{display:none !important}.d_mb_show{display:block !important}}=
.mb_text h1,.mb_text h2,.mb_text h3,.mb_text h4,.mb_text h5,.mb_text h6{lin=
e-height:normal}.mb_work_text h1{font-size:18px;line-height:normal;margin-t=
op:4px}.mb_work_text h2,.mb_work_text h3{font-size:16px;line-height:normal;=
margin-top:4px}.mb_work_text h4,.mb_work_text h5,.mb_work_text h6{font-size=
:14px;line-height:normal}.mb_work_text a{color:#1270e9}.mb_work_text p{marg=
in-top:4px}</style></head><body style=3D"margin:0;padding:0;" dir=3D"ltr" b=
gcolor=3D"#ffffff"><table border=3D"0" width=3D"100%;" cellspacing=3D"0" ce=
llpadding=3D"0" id=3D"email_table" style=3D"border-collapse:collapse;"><tr>=
<td id=3D"email_content" style=3D"font-family:Helvetica Neue,Helvetica,Luci=
da Grande,tahoma,verdana,arial,sans-serif;background:#ffffff;"><table borde=
r=3D"0" width=3D"100%" cellspacing=3D"0" cellpadding=3D"0" style=3D"border-=
collapse:collapse;"><tr style=3D""><td height=3D"20" style=3D"line-height:2=
0px;" colspan=3D"3"> </td></tr><tr><td height=3D"1" colspan=3D"3" styl=
e=3D"line-height:1px;"></td></tr><tr><td style=3D""><table border=3D"0" wid=
th=3D"430" cellspacing=3D"0" cellpadding=3D"0" style=3D"border-collapse:col=
lapse;margin:0 auto 0 auto;"><tr><td style=3D""><table border=3D"0" width=
=3D"430px" cellspacing=3D"0" cellpadding=3D"0" style=3D"border-collapse:col=
lapse;margin:0 auto 0 auto;width:430px;"><tr style=3D""><td width=3D"15" st=
yle=3D"display:block;width:15px;"> </td><td style=3D""><ta=
ble border=3D"0" width=3D"100%" cellspacing=3D"0" cellpadding=3D"0" style=
=3D"border-collapse:collapse;"><tr><td style=3D""><img src=3D"https://stati=
c.xx.fbcdn.net/rsrc.php/v3/yv/r/ri-arh5nIkG.png" width=3D"430" style=3D"bor=
der:0;width:430px;"></td></tr><tr style=3D""><td height=3D"30" style=3D"lin=
e-height:30px;" colspan=3D"3"> </td></tr><tr><td style=3D""><table bor=
der=3D"0" cellspacing=3D"0" cellpadding=3D"0" style=3D"border-collapse:coll=
apse;"><tr><td width=3D"30" style=3D"display:block;width:30px;">  =
; </td><td style=3D""><table border=3D"0" cellspacing=3D"0" cellpaddin=
g=3D"0" style=3D"border-collapse:collapse;"><tr><td style=3D""><p style=3D"=
padding:0;margin:0;text-align:center;color:#000000;font-size:25px;">Welcome=
to Instagram, sepoi7936</p><p style=3D"padding:0;margin:0;text-align:cente=
r;color:#565a5c;font-size:18px;">First, please confirm your email address. =
If you're ever locked out of your account, this will help us get you back i=
n.</p></td></tr><tr style=3D""><td height=3D"30" style=3D"line-height:30px;=
" colspan=3D"1"> </td></tr><tr><td style=3D""><a href=3D"https://insta=
gram.com/accounts/confirm_email/Ufsti1rj/bmltYWF6aGRhcmkxMkBvdXRsb29rLmNvbQ=
/?app_redirect=3DFalse" style=3D"color:#3b5998;text-decoration:none;display=
:block;width:370px;"><table border=3D"0" width=3D"100%" cellspacing=3D"0" c=
ellpadding=3D"0" style=3D"border-collapse:collapse;"><tr><td style=3D"borde=
r-collapse:collapse;border-radius:3px;text-align:center;display:block;borde=
r:solid 1px #009fdf;padding:10px 16px 14px 16px;margin:0 2px 0 auto;min-wid=
th:80px;background-color:#47A2EA;"><a href=3D"https://instagram.com/account=
s/confirm_email/Ufsti1rj/bmltYWF6aGRhcmkxMkBvdXRsb29rLmNvbQ/?app_redirect=
=3DFalse" style=3D"color:#3b5998;text-decoration:none;display:block;"><cent=
er><font size=3D"3"><span style=3D"font-family:Helvetica Neue,Helvetica,Rob=
oto,Arial,sans-serif;white-space:nowrap;font-weight:bold;vertical-align:mid=
dle;color:#fdfdfd;font-size:16px;line-height:16px;">Confirm your =
email address</span></font></center></a></td></tr></table></a></td></t=
r><tr style=3D""><td height=3D"30" style=3D"line-height:30px;" colspan=3D"3=
"> </td></tr><tr><td style=3D"border-top:solid 1px #c8c8c8;"></td></tr=
></table></td><td width=3D"30" style=3D"display:block;width:30px;"> &n=
bsp; </td></tr></table></td></tr><tr style=3D""><td height=3D"25" styl=
e=3D"line-height:25px;" colspan=3D"3"> </td></tr></table></td><td widt=
h=3D"15" style=3D"display:block;width:15px;"> </td></tr><t=
r style=3D""><td width=3D"15" style=3D"display:block;width:15px;"> &nb=
sp; </td><td style=3D""><table border=3D"0" width=3D"100%" cellspacing=
=3D"0" cellpadding=3D"0" style=3D"border-collapse:collapse;"><tr><td style=
=3D""><img src=3D"https://static.xx.fbcdn.net/rsrc.php/v3/yg/r/zACQd8KtsK7.=
png" width=3D"430" style=3D"border:0;width:430px;"></td></tr><tr style=3D""=
><td height=3D"30" style=3D"line-height:30px;" colspan=3D"3"> </td></t=
r><tr><td style=3D""><table border=3D"0" cellspacing=3D"0" cellpadding=3D"0=
" style=3D"border-collapse:collapse;"><tr><td width=3D"30" style=3D"display=
:block;width:30px;"> </td><td style=3D""><table border=3D"=
0" cellspacing=3D"0" cellpadding=3D"0" style=3D"border-collapse:collapse;">=
<tr><td style=3D""><p style=3D"padding:0;margin:0;text-align:center;color:#=
000000;font-size:25px;">Choose What You See</p><p style=3D"padding:0;margin=
:0;text-align:center;color:#565a5c;font-size:18px;">Following someone means=
you'll see the photos and videos they post. The more accounts you follow, =
the more great stuff you'll see in your feed. Follow your friends or people=
who share your interests.</p></td></tr><tr style=3D""><td height=3D"30" st=
yle=3D"line-height:30px;" colspan=3D"1"> </td></tr><tr><td style=3D"">=
<a href=3D"https://instagram.com/accounts/confirm_email/Ufsti1rj/bmltYWF6aG=
RhcmkxMkBvdXRsb29rLmNvbQ/?app_redirect=3DFalse" style=3D"color:#3b5998;text=
-decoration:none;display:block;width:370px;"><table border=3D"0" width=3D"1=
00%" cellspacing=3D"0" cellpadding=3D"0" style=3D"border-collapse:collapse;=
"><tr><td style=3D"border-collapse:collapse;border-radius:3px;text-align:ce=
nter;display:block;border:solid 1px #009fdf;padding:10px 16px 14px 16px;mar=
gin:0 2px 0 auto;min-width:80px;background-color:#47A2EA;"><a href=3D"https=
://instagram.com/accounts/confirm_email/Ufsti1rj/bmltYWF6aGRhcmkxMkBvdXRsb2=
9rLmNvbQ/?app_redirect=3DFalse" style=3D"color:#3b5998;text-decoration:none=
;display:block;"><center><font size=3D"3"><span style=3D"font-family:Helvet=
ica Neue,Helvetica,Roboto,Arial,sans-serif;white-space:nowrap;font-weight:b=
old;vertical-align:middle;color:#fdfdfd;font-size:16px;line-height:16px;">F=
ind People to Follow</span></font></center></a></td></tr></t=
able></a></td></tr><tr style=3D""><td height=3D"30" style=3D"line-height:30=
px;" colspan=3D"3"> </td></tr><tr><td style=3D"border-top:solid 1px #c=
8c8c8;"></td></tr></table></td><td width=3D"30" style=3D"display:block;widt=
h:30px;"> </td></tr></table></td></tr><tr style=3D""><td h=
eight=3D"25" style=3D"line-height:25px;" colspan=3D"3"> </td></tr></ta=
ble></td><td width=3D"15" style=3D"display:block;width:15px;"> &=
nbsp;</td></tr><tr style=3D""><td width=3D"15" style=3D"display:block;width=
:15px;"> </td><td style=3D""><table border=3D"0" width=3D"=
100%" cellspacing=3D"0" cellpadding=3D"0" style=3D"border-collapse:collapse=
;"><tr><td style=3D""><img src=3D"https://static.xx.fbcdn.net/rsrc.php/v3/y=
4/r/twHu0ANul9l.png" width=3D"430" style=3D"border:0;width:430px;"></td></t=
r><tr style=3D""><td height=3D"30" style=3D"line-height:30px;" colspan=3D"3=
"> </td></tr><tr><td style=3D""><table border=3D"0" cellspacing=3D"0" =
cellpadding=3D"0" style=3D"border-collapse:collapse;"><tr><td width=3D"30" =
style=3D"display:block;width:30px;"> </td><td style=3D""><=
table border=3D"0" cellspacing=3D"0" cellpadding=3D"0" style=3D"border-coll=
apse:collapse;"><tr><td style=3D""><p style=3D"padding:0;margin:0;text-alig=
n:center;color:#000000;font-size:25px;">Express Yourself</p><p style=3D"pad=
ding:0;margin:0;text-align:center;color:#565a5c;font-size:18px;">Share your=
perspective by capturing and sharing photos and videos from your day, whet=
her it's your morning routine or the trip of a lifetime. Instagram's free f=
ilters and tools make it easy to express yourself in new ways.</p></td></tr=
><tr style=3D""><td height=3D"30" style=3D"line-height:30px;" colspan=3D"1"=
> </td></tr><tr><td style=3D""><a href=3D"https://instagram.com/accoun=
ts/confirm_email/Ufsti1rj/bmltYWF6aGRhcmkxMkBvdXRsb29rLmNvbQ/?app_redirect=
=3DFalse" style=3D"color:#3b5998;text-decoration:none;display:block;width:3=
70px;"><table border=3D"0" width=3D"100%" cellspacing=3D"0" cellpadding=3D"=
0" style=3D"border-collapse:collapse;"><tr><td style=3D"border-collapse:col=
lapse;border-radius:3px;text-align:center;display:block;border:solid 1px #0=
09fdf;padding:10px 16px 14px 16px;margin:0 2px 0 auto;min-width:80px;backgr=
ound-color:#47A2EA;"><a href=3D"https://instagram.com/accounts/confirm_emai=
l/Ufsti1rj/bmltYWF6aGRhcmkxMkBvdXRsb29rLmNvbQ/?app_redirect=3DFalse" style=
=3D"color:#3b5998;text-decoration:none;display:block;"><center><font size=
=3D"3"><span style=3D"font-family:Helvetica Neue,Helvetica,Roboto,Arial,san=
s-serif;white-space:nowrap;font-weight:bold;vertical-align:middle;color:#fd=
fdfd;font-size:16px;line-height:16px;">Open Instagram</span></font></c=
enter></a></td></tr></table></a></td></tr><tr style=3D""><td height=3D"30" =
style=3D"line-height:30px;" colspan=3D"3"> </td></tr><tr><td style=3D"=
border-top:solid 1px #c8c8c8;"></td></tr></table></td><td width=3D"30" styl=
e=3D"display:block;width:30px;"> </td></tr></table></td></=
tr><tr style=3D""><td height=3D"25" style=3D"line-height:25px;" colspan=3D"=
3"> </td></tr></table></td><td width=3D"15" style=3D"display:block;wid=
th:15px;"> </td></tr><tr style=3D""><td width=3D"15" style=
=3D"display:block;width:15px;"> </td><td style=3D""><table=
border=3D"0" width=3D"100%" cellspacing=3D"0" cellpadding=3D"0" style=3D"b=
order-collapse:collapse;"><tr><td style=3D""><img src=3D"https://static.xx.=
fbcdn.net/rsrc.php/v3/yC/r/QbsnSndHS4m.png" width=3D"430" style=3D"border:0=
;width:430px;"></td></tr><tr style=3D""><td height=3D"30" style=3D"line-hei=
ght:30px;" colspan=3D"3"> </td></tr><tr><td style=3D""><table border=
=3D"0" cellspacing=3D"0" cellpadding=3D"0" style=3D"border-collapse:collaps=
e;"><tr><td width=3D"30" style=3D"display:block;width:30px;"> &n=
bsp;</td><td style=3D""><table border=3D"0" cellspacing=3D"0" cellpadding=
=3D"0" style=3D"border-collapse:collapse;"><tr><td style=3D""><p style=3D"p=
adding:0;margin:0;text-align:center;color:#000000;font-size:25px;">Explore =
Your Interests</p><p style=3D"padding:0;margin:0;text-align:center;color:#5=
65a5c;font-size:18px;">Visit the Explore tab to find photos and videos from=
accounts you're not following yet. We'll show you posts you might like, ba=
sed on your interests and activity on Instagram. You can also find new acco=
unts to follow, so you'll see their posts in your feed.</p></td></tr><tr st=
yle=3D""><td height=3D"30" style=3D"line-height:30px;" colspan=3D"1"> =
</td></tr><tr><td style=3D""><a href=3D"https://instagram.com/accounts/conf=
irm_email/Ufsti1rj/bmltYWF6aGRhcmkxMkBvdXRsb29rLmNvbQ/?app_redirect=3DFalse=
" style=3D"color:#3b5998;text-decoration:none;display:block;width:370px;"><=
table border=3D"0" width=3D"100%" cellspacing=3D"0" cellpadding=3D"0" style=
=3D"border-collapse:collapse;"><tr><td style=3D"border-collapse:collapse;bo=
rder-radius:3px;text-align:center;display:block;border:solid 1px #009fdf;pa=
dding:10px 16px 14px 16px;margin:0 2px 0 auto;min-width:80px;background-col=
or:#47A2EA;"><a href=3D"https://instagram.com/accounts/confirm_email/Ufsti1=
rj/bmltYWF6aGRhcmkxMkBvdXRsb29rLmNvbQ/?app_redirect=3DFalse" style=3D"color=
:#3b5998;text-decoration:none;display:block;"><center><font size=3D"3"><spa=
n style=3D"font-family:Helvetica Neue,Helvetica,Roboto,Arial,sans-serif;whi=
te-space:nowrap;font-weight:bold;vertical-align:middle;color:#fdfdfd;font-s=
ize:16px;line-height:16px;">Visit Explore</span></font></center></a></=
td></tr></table></a></td></tr><tr style=3D""><td height=3D"30" style=3D"lin=
e-height:30px;" colspan=3D"3"> </td></tr><tr><td style=3D"border-top:s=
olid 1px #c8c8c8;"></td></tr></table></td><td width=3D"30" style=3D"display=
:block;width:30px;"> </td></tr></table></td></tr><tr style=
=3D""><td height=3D"25" style=3D"line-height:25px;" colspan=3D"3"> </t=
d></tr></table></td><td width=3D"15" style=3D"display:block;width:15px;">&n=
bsp; </td></tr><tr><td width=3D"15" style=3D"display:block;width=
:15px;"> </td><td style=3D""><p style=3D"padding:0;margin:=
0;text-align:center;color:#565a5c;font-size:18px;">Clicking any of the link=
s above will confirm nimaazhdari12@outlook.com on Instagram.</p></td><td wi=
dth=3D"15" style=3D"display:block;width:15px;"> </td></tr>=
</table></td></tr></table></td></tr><tr><td style=3D""><table border=3D"0" =
width=3D"430px" cellspacing=3D"0" cellpadding=3D"0" style=3D"border-collaps=
e:collapse;margin:0 auto 0 auto;width:430px;"><tr style=3D""><td height=3D"=
30" style=3D"line-height:30px;" colspan=3D"3"> </td></tr><tr><td width=
=3D"30" style=3D"display:block;width:30px;"> </td><td styl=
e=3D""><div style=3D"color:#abadae;font-size:12px;margin:0 auto 5px auto;">=
=C2=A9 Instagram, 1 Hacker Way, Menlo Park, CA 94022</div><div style=3D"col=
or:#abadae;font-size:12px;margin:0 auto 5px auto;">This message was sent to=
<a style=3D"color:#abadae;text-decoration:underline;">nimaazhdari12@outloo=
k.com</a> and intended for sepoi7936. Instagram sends updates like this to =
help you keep up with the latest on Instagram. You can unsubscribe from the=
se updates, or remove your email if this isn't your Instagram account. <a h=
ref=3D"https://instagram.com/emails/unsubscribe/tutorial?user_id=3D56847734=
80&sig=3DAU_lxK13iCXRWi8x" style=3D"color:#abadae;text-decoration:under=
line;">Unsubscribe</a> or <a href=3D"https://instagram.com/accounts/remove/=
report_wrong_email/2m0kfag/4nf-865fa936d95b370febb99c189175d18a/vVT7r6CQ/bm=
ltYWF6aGRhcmkxMkBvdXRsb29rLmNvbQ/" style=3D"color:#abadae;text-decoration:u=
nderline;">remove your email</a> from this account.<br></div></td><td width=
=3D"30" style=3D"display:block;width:30px;"> </td></tr></t=
able></td></tr><tr style=3D""><td height=3D"20" style=3D"line-height:20px;"=
colspan=3D"3"> </td></tr></table><span style=3D""><img src=3D"https:/=
/www.facebook.com/email_open_log_pic.php?mid=3DHMjY0NTExNjE1Om5pbWFhemhkYXJ=
pMTJAb3V0bG9vay5jb206ODU5" style=3D"border:0;width:1px;height:1px;"></span>=
</td></tr></table></body></html>
i need to extract confirmation instagram link for open and confirm it automatically. link like this :
https://instagram.com/accounts/confirm_email/Ufsti1rj/bmltYWF6aGRhcmkxMkBvdXRsb29rLmNvbQ/?app_redirect=False
it is repeated two or three times. how can i find just one of them?
i use some regex urlmaker:
URL_REGEX = r"""(?i)\b((?:https?:(?:/{1,3}|[a-z0-9%])|[a-z0-9.\-]+[.](?:com|net|org|edu|gov|mil|aero|asia|biz|cat|coop|info|int|jobs|mobi|museum|name|post|pro|tel|travel|xxx|ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cs|cu|cv|cx|cy|cz|dd|de|dj|dk|dm|do|dz|ec|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|Ja|sk|sl|sm|sn|so|sr|ss|st|su|sv|sx|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|yu|za|zm|zw)/)(?:[^\s()<>{}\[\]]+|\([^\s()]*?\([^\s()]+\)[^\s()]*?\)|\([^\s]+?\))+(?:\([^\s()]*?\([^\s()]+\)[^\s()]*?\)|\([^\s]+?\)|[^\s`!()\[\]{};:'".,<>?«»“”‘’])|(?:(?<!@)[a-z0-9]+(?:[.\-][a-z0-9]+)*[.](?:com|net|org|edu|gov|mil|aero|asia|biz|cat|coop|info|int|jobs|mobi|museum|name|post|pro|tel|travel|xxx|ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cs|cu|cv|cx|cy|cz|dd|de|dj|dk|dm|do|dz|ec|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|Ja|sk|sl|sm|sn|so|sr|ss|st|su|sv|sx|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|yu|za|zm|zw)\b/?(?!@)))"""
import urlmarker
import re
re.findall(urlmarker.URL_REGEX,mystring)
but not take the link completely.
答案 0 :(得分:3)
看看BeautifulSoup。
这是一个html解析器,可以很容易地找到精确的标签。
使用您的html代码作为参数实例化BeautifulSoup
对象。
然后使用其find_all
方法查找超链接标记(<a>
变为"a"
)。
标签&#39;属性可以通过dict语法获得,因此url将在tag['href']
找到。
import bs4
html = """<body>...</body>"""
soup = bs4.BeautifulSoup(html)
aTags = soup.find_all("a")
urls = [tag['href'] for tag in aTags if 'href' in tag.attrs and "https://instagram.com" in tag['href']]
为清楚起见,以下是其扩展形式的理解:
urls = []
for tag in aTages:
if 'href' in tag.attrs and "https://instagram.com" in tag['href']:
urls.append(tag)