我有一个项目列表,我从某个网站上的表中抓取,然后将其转换为JSON。我的方法是:
public function get_ee_api() {
$response = Requests::get("https://api.elasticemail.com/v2/campaign/list?apikey=*", array());
$this->get_ee_api();
return json_decode($response->body, true);
}
public function get_data(){
$query = $this->db->query('SELECT * FROM ee_campaigns');
foreach ($query->result() as $row)
{
echo $row->ee_name . '<br/>' ;
}
}
这给了我这样的输出:
["name", "component1", "unit value", "x", "y", "z", "component1", "unit value", "x2", "y2", "z2", "component2", "unit value", "x3", "y3", "z3", ...
我想把它转换成这样的东西:
r = requests.get("some_url")
soup = BeautifulSoup(r.content, "html.parser")
data = []
names = soup.find_all("keyword")
for name in names:
data.append(name.text)
table = soup.find_all("td")
for item in table:
item_text = item.text.strip()
data.append(item_text)
with io.open('data.json', 'w', encoding='utf8') as outfile:
json.dump(data, outfile, ensure_ascii=False)
如何像这样格式化我的JSON输出?
html文件:
{
"table": {
"id":"1",
"title": "name",
"component1": [
{
"unit":"unit value",
"x value":"x",
"y value":"y",
"z value":"z"
},
{
"unit":"unit value",
"x value":"x",
"y value":"y",
"z value":"z"
}
],
"component2":[
{
"unit":"unit value",
"x value":"x",
"y value":"y",
"z value":"z"
}
]
...
}
}
答案 0 :(得分:0)
from bs4 import BeautifulSoup, Comment
t = """<html><table id="table">
<tr>
<th>component</th>
<th>unit</th>
<th>x value</th>
<th>y value</th>
<th>z value</th>
</tr>
<tr>
<td ><a href="#">
component1
</a> </td>
<td class="right ">unit</td>
<td class="right "><nobr>x </nobr></td>
<td class="right "><nobr>y </nobr></td>
<td class="right "><nobr>z </nobr></td>
</tr>
<tr>
<td class="alt"><a href="/#">
component1
</a> </td>
<td class="right alt">unit</td>
<td class="right alt"><nobr>x2 </nobr></td>
<td class="right alt"><nobr>y2 </nobr></td>
<td class="right alt"><nobr>z2 </nobr></td>
</tr>
<tr>
<td ><a href="#">
component2
</a> </td>
<td class="right ">g</td>
<td class="right "><nobr>x3 </nobr></td>
<td class="right "><nobr>y3 </nobr></td>
<td class="right "><nobr>z3 </nobr></td>
</tr></<table></html>"""
bs = BeautifulSoup(t)
results = {}
for row in bs.findAll('tr'):
# build the header
aux = row.findAll('th')
if aux:
keys = [val.text.strip() for val in aux]
continue
# for rows other than header
aux = row.findAll('td')
if aux:
# for each row build the dictionary equivalent
temp_res = {}
for idx, key in enumerate(keys):
if key == 'component':
component_name = aux[idx].text.strip()
temp_res[key] = aux[idx].text.strip()
# append the component value to result
if component_name in results:
results[component_name].append(temp_res)
else:
results[component_name] = [temp_res]
# adjusting the result in the format you requested. adding id/title.
import json
results["id"] = "1"
results["title"] = "name"
main_result = {"table": results}
json.dumps(main_result)
输出:
{
"table": {
"component2": [
{
"component": "component2",
"z value": "z3",
"unit": "g",
"x value": "x3",
"y value": "y3"
}
],
"id": "1",
"component1": [
{
"component": "component1",
"z value": "z",
"unit": "unit",
"x value": "x",
"y value": "y"
},
{
"component": "component1",
"z value": "z2",
"unit": "unit",
"x value": "x2",
"y value": "y2"
}
],
"title": "name"
}
}