使用以下HTML:
"foo" === "foo"
以下python正则表达式:
<!-- DW6 --><!-- kelly 10-28-16 the white box welcome email -->
<!-- e2ma-autonomous-cv -->
<!--[if gte mso 9]><xml>
<o:OfficeDocumentSettings>
<o:AllowPNG/>
<o:PixelsPerInch>96</o:PixelsPerInch>
</o:OfficeDocumentSettings>
</xml><![endif] -->
<!--[if !mso]><!-- --><link href="https://fonts.googleapis.com/css?family=Lato:400,700" rel="stylesheet" type="text/css"/><!--<![endif]-->
<style>
h1 {
font-family: Georgia, "Times New Roman", Times, serif;
font-size: 20px;
font-weight: normal;
color: #000
}
h2 {
font-family: "Lato", "Trebuchet MS", Arial, Helvetica, sans-serif;
font-size: 14px;
font-weight: bold;
color: #000;
text-transform: uppercase
}
h3 {
font-family: Georgia, "Times New Roman", Times, serif;
font-size: 16px;
font-weight: normal;
color: #000;
font-style: italic
}
h4 {
font-family: "Lato", "Trebuchet MS", Arial, Helvetica, sans-serif;
font-size: 17px;
font-weight: normal;
color: #000
}
.e2ma-content-block p, .e2ma-combo-block p, .e2ma-combo-content p, .e2ma-content-block li, .e2ma-combo-block li, .e2ma-combo-content li {
font-family: "Lato", "Trebuchet MS", Arial, Helvetica, sans-serif;
font-size: 14px;
color: #000
}
#template_container a, .link {
color: #333;
text-decoration: underline
}
blockquote {
padding-top: 10px;
padding-bottom: 10px;
border-top: 1px solid #000;
border-bottom: 1px solid #000;
font-style: italic
}
blockquote p {
margin-bottom: 0
}
.editable_image img {
display: block;
border: 0
}
.e2ma-unsubscribe {
padding-bottom: 10px
}
.business_rsvp {
font-family: Helvetica, Arial, sans-serif;
padding: 10px;
color: #666;
font-size: 10px
}
[style*="Lato"] {
font-family: "Lato", Arial, sans-serif !important
}
@media screen and (max-width: 480px) {
html {
-webkit-text-size-adjust: none
}
*[class].e2ma-content-block p, *[class].e2ma-combo-block p, *[class].e2ma-combo-content p, *[class].e2ma-content-block li, *[class].e2ma-combo-block li, *[class].e2ma-combo-content li {
font-size: 15px !important
}
*[class].e2ma-holder table, *[class].e2ma-holder table td {
display: table !important;
float: none !important;
width: 100% !important;
padding-left: 0 !important;
padding-right: 0 !important
}
*[class].e2ma-single-column-layout table {
float: none !important;
margin: 0 auto
}
*[class].e2ma-unsubscribe span {
font-size: 12px !important
}
*[class].business_rsvp {
font-size: 12px !important
}
*[class].social-sharing {
text-align: center !important;
padding-bottom: 10px
}
*[class].e2ma-layout-column-content img, *[class].e2ma-single-column-layout img, *[class].e2ma-layout-column-sidebar img, *[class].e2ma-layout-column-sidebar-2 img, *[class].e2ma-layout-column-sidebar-3 img {
max-width: 100%;
height: auto;
margin: 0 auto
}
*[class].footer-social img {
width: 44px !important;
height: 43px !important;
margin: 0 auto
}
*[class].share-block {
text-align: center !important;
margin: 0 auto !important
}
*[class].footer-text {
text-align: center !important
}
*[class].mobile-width {
width: 100% !important;
padding-left: 10px;
padding-right: 10px
}
*[class].mobile-width-nopad {
width: 100% !important
}
*[class].stack, *[class].e2ma-layout-column-content, *[class].e2ma-layout-column-sidebar, *[class].e2ma-layout-column-sidebar-2, *[class].e2ma-layout-column-sidebar-3 {
display: block !important;
width: 100% !important
}
*[class].hide {
display: none !important
}
*[class].center, *[class].center img {
text-align: center !important;
margin: 0 auto
}
*[class].scale img, *[class].editable_image img {
max-width: 100%;
height: auto;
margin: 0 auto
}
*[class].addpad {
padding: 10px !important
}
*[class].addpad-top {
padding-top: 10px !important
}
*[class].sanpad {
padding: 0 !important
}
*[class].sanborder {
border: none !important
}
*[class].nav212 {
width: 212 px !important;
height: auto !important;
margin: 0 auto !important
}
}
</style>
conditional_search = r"(<!--.*\[if.*\]>.*<!--.*\n*.*endif\]-->)"
it = re.finditer(conditional_search, html, flags=re.DOTALL)
print([tuple(i.groups()) for i in it])
的输出匹配从print
到<!-- DW6 -->
标记末尾的所有内容。我只是想找到每个</style>
HTML评论块(包括双<!--if
)。
答案 0 :(得分:1)
给它一个机会。基本上我只是撕掉所有的回车(也许你想保留它们?)并调整正则表达式。如果正则表达式不清楚,请留下评论,我认为它非常直接。如果我理解你在寻找什么,这应该可以解决问题。这是我的整个测试脚本:
import re
textblock = '''
<!-- DW6 --><!-- kelly 10-28-16 the white box welcome email -->
<!-- e2ma-autonomous-cv -->
<!--[if gte mso 9]><xml>
<o:OfficeDocumentSettings>
<o:AllowPNG/>
<o:PixelsPerInch>96</o:PixelsPerInch>
</o:OfficeDocumentSettings>
</xml><![endif] -->
<!--[if !mso]><!-- --><link href="https://fonts.googleapis.com/css?family=Lato:400,700" rel="stylesheet" type="text/css"/><!--<![endif]-->
<style>
h1 {
font-family: Georgia, "Times New Roman", Times, serif;
font-size: 20px;
font-weight: normal;
color: #000
}
h2 {
font-family: "Lato", "Trebuchet MS", Arial, Helvetica, sans-serif;
font-size: 14px;
font-weight: bold;
color: #000;
text-transform: uppercase
}
h3 {
font-family: Georgia, "Times New Roman", Times, serif;
font-size: 16px;
font-weight: normal;
color: #000;
font-style: italic
}
h4 {
font-family: "Lato", "Trebuchet MS", Arial, Helvetica, sans-serif;
font-size: 17px;
font-weight: normal;
color: #000
}
.e2ma-content-block p, .e2ma-combo-block p, .e2ma-combo-content p, .e2ma-content-block li, .e2ma-combo-block li, .e2ma-combo-content li {
font-family: "Lato", "Trebuchet MS", Arial, Helvetica, sans-serif;
font-size: 14px;
color: #000
}
#template_container a, .link {
color: #333;
text-decoration: underline
}
blockquote {
padding-top: 10px;
padding-bottom: 10px;
border-top: 1px solid #000;
border-bottom: 1px solid #000;
font-style: italic
}
blockquote p {
margin-bottom: 0
}
.editable_image img {
display: block;
border: 0
}
.e2ma-unsubscribe {
padding-bottom: 10px
}
.business_rsvp {
font-family: Helvetica, Arial, sans-serif;
padding: 10px;
color: #666;
font-size: 10px
}
[style*="Lato"] {
font-family: "Lato", Arial, sans-serif !important
}
@media screen and (max-width: 480px) {
html {
-webkit-text-size-adjust: none
}
*[class].e2ma-content-block p, *[class].e2ma-combo-block p, *[class].e2ma-combo-content p, *[class].e2ma-content-block li, *[class].e2ma-combo-block li, *[class].e2ma-combo-content li {
font-size: 15px !important
}
*[class].e2ma-holder table, *[class].e2ma-holder table td {
display: table !important;
float: none !important;
width: 100% !important;
padding-left: 0 !important;
padding-right: 0 !important
}
*[class].e2ma-single-column-layout table {
float: none !important;
margin: 0 auto
}
*[class].e2ma-unsubscribe span {
font-size: 12px !important
}
*[class].business_rsvp {
font-size: 12px !important
}
*[class].social-sharing {
text-align: center !important;
padding-bottom: 10px
}
*[class].e2ma-layout-column-content img, *[class].e2ma-single-column-layout img, *[class].e2ma-layout-column-sidebar img, *[class].e2ma-layout-column-sidebar-2 img, *[class].e2ma-layout-column-sidebar-3 img {
max-width: 100%;
height: auto;
margin: 0 auto
}
*[class].footer-social img {
width: 44px !important;
height: 43px !important;
margin: 0 auto
}
*[class].share-block {
text-align: center !important;
margin: 0 auto !important
}
*[class].footer-text {
text-align: center !important
}
*[class].mobile-width {
width: 100% !important;
padding-left: 10px;
padding-right: 10px
}
*[class].mobile-width-nopad {
width: 100% !important
}
*[class].stack, *[class].e2ma-layout-column-content, *[class].e2ma-layout-column-sidebar, *[class].e2ma-layout-column-sidebar-2, *[class].e2ma-layout-column-sidebar-3 {
display: block !important;
width: 100% !important
}
*[class].hide {
display: none !important
}
*[class].center, *[class].center img {
text-align: center !important;
margin: 0 auto
}
*[class].scale img, *[class].editable_image img {
max-width: 100%;
height: auto;
margin: 0 auto
}
*[class].addpad {
padding: 10px !important
}
*[class].addpad-top {
padding-top: 10px !important
}
*[class].sanpad {
padding: 0 !important
}
*[class].sanborder {
border: none !important
}
*[class].nav212 {
width: 212 px !important;
height: auto !important;
margin: 0 auto !important
}
}
</style>
'''
text = re.sub("\n", " ", textblock)
conditional_search = "<!--\[if[^\]]+\]>(.*?)<!\[endif\]"
it = re.findall(conditional_search, text)
for i in it:
print (i)
print ("how now brown cow")