我尝试从这部分得到答案:
<div class="beschreibung">
<!-- Jahr -->
<strong class="main">Jahr:</strong>
2008<br/>
<!-- Jahr Ende -->
<!-- Genre -->
<strong class="main">Genre:</strong>
Action | Krimi | Drama<br/>
<!-- Genre Ende -->
<!-- Sprache -->
<strong class="main">Sprache:</strong>
Deutsch DTS-HD | Englisch DTS-HD<br/>
<!-- Sprache Ende -->
<!-- Länge -->
<strong class="main">Laufzeit:</strong>
90 Minuten<br/>
<!-- Länge Ende -->
所以我尝试了如下:
for details in soup_.find_all("div", {"class" : "beschreibung"}):
info = {details.text.rstrip(':'): details.next_sibling.strip() for s in details.find_all("strong")}
print (repr(info))
我得到了休息:
{u"\n\nJahr: \r\n2010\n\n\nGenre: \r\nThriller | Mystery\n\n\nSprache: \r\nDeutsch DTS\n\n\nLaufzeit: \r\n76 Minuten\n\n": u''}
但我怎样才能获得#34; Jahr&#34;,&#34; Sprache&#34;等...
答案 0 :(得分:2)
你的意思是这样的:
from bs4 import BeautifulSoup
content = """
<div class="beschreibung">
<!-- Jahr -->
<strong class="main">Jahr:</strong>
2008<br/>
<!-- Jahr Ende -->
<!-- Genre -->
<strong class="main">Genre:</strong>
Action | Krimi | Drama<br/>
<!-- Genre Ende -->
<!-- Sprache -->
<strong class="main">Sprache:</strong>
Deutsch DTS-HD | Englisch DTS-HD<br/>
<!-- Sprache Ende -->
<!-- Lange -->
<strong class="main">Laufzeit:</strong>
90 Minuten<br/>
<!-- Lange Ende -->
</div>
"""
soup = BeautifulSoup(content, "html.parser")
info = {}
for details in soup.find_all("div", {"class" : "beschreibung"}):
for strong in details.find_all('strong'):
info[strong.text.strip(':')] = strong.next_sibling.strip('\n')
print info
此代码将产生以下输出:
{u'Genre': u'Action | Krimi | Drama', u'Jahr': u'2008', u'Laufzeit': u'90 Minuten', u'Sprache': u'Deut│
sch DTS-HD | Englisch DTS-HD'}
答案 1 :(得分:0)
<!DOCTYPE html>
<meta charset="utf-8">
<style>
.background {
fill: none;
pointer-events: all;
}
#states {
fill: #aaa;
}
#states .active {
fill: orange;
}
#state-borders {
fill: none;
stroke: #fff;
stroke-width: 1.5px;
stroke-linejoin: round;
stroke-linecap: round;
pointer-events: none;
}
</style>
<body>
<script src="//d3js.org/d3.v3.min.js"></script>
<script src="//d3js.org/topojson.v1.min.js"></script>
<script>
var width = 960,
height = 500,
centered;
var projection = d3.geo.albersUsa()
.scale(1070)
.translate([width / 2, height / 2]);
var path = d3.geo.path()
.projection(projection);
var svg = d3.select("body").append("svg")
.attr("width", width)
.attr("height", height);
svg.append("rect")
.attr("class", "background")
.attr("width", width)
.attr("height", height)
.on("click", clicked);
var g = svg.append("g");
d3.json("/mbostock/raw/4090846/us.json", function(error, us) {
if (error) throw error;
g.append("g")
.attr("id", "states")
.selectAll("path")
.data(topojson.feature(us, us.objects.states).features)
.enter().append("path")
.attr("d", path)
.on("click", clicked);
g.append("path")
.datum(topojson.mesh(us, us.objects.states, function(a, b) { return a !== b; }))
.attr("id", "state-borders")
.attr("d", path);
});
function clicked(d) {
var x, y, k;
if (d && centered !== d) {
var centroid = path.centroid(d);
x = centroid[0];
y = centroid[1];
k = 4;
centered = d;
} else {
x = width / 2;
y = height / 2;
k = 1;
centered = null;
}
g.selectAll("path")
.classed("active", centered && function(d) { return d === centered; });
g.transition()
.duration(750)
.attr("transform", "translate(" + width / 2 + "," + height / 2 + ")scale(" + k + ")translate(" + -x + "," + -y + ")")
.style("stroke-width", 1.5 / k + "px");
}
</script>
out_put:
from bs4 import BeautifulSoup
content = """
<div class="beschreibung">
<!-- Jahr -->
<strong class="main">Jahr:</strong>
2008<br/>
<!-- Jahr Ende -->
<!-- Genre -->
<strong class="main">Genre:</strong>
Action | Krimi | Drama<br/>
<!-- Genre Ende -->
<!-- Sprache -->
<strong class="main">Sprache:</strong>
Deutsch DTS-HD | Englisch DTS-HD<br/>
<!-- Sprache Ende -->
<!-- Lange -->
<strong class="main">Laufzeit:</strong>
90 Minuten<br/>
<!-- Lange Ende -->
</div>
"""
soup = BeautifulSoup(content, "lxml")
{i.text.rstrip(':'):i.next_sibling.strip() for i in soup.find_all('strong')}