在Python中将List转换为JSON数组

时间:2017-03-05 18:37:35

标签: python json

我有一个项目列表,我从某个网站上的表中抓取,然后将其转换为JSON。我的方法是:

public function get_ee_api() {

  $response = Requests::get("https://api.elasticemail.com/v2/campaign/list?apikey=*", array());
  $this->get_ee_api();
  return json_decode($response->body, true);

}


public function get_data(){

  $query = $this->db->query('SELECT * FROM ee_campaigns');
  foreach ($query->result() as $row)
    {

      echo $row->ee_name . '<br/>' ;
    }
}

这给了我这样的输出:

["name",
    "component1",
    "unit value",
    "x",
    "y",
    "z",
    "component1",
    "unit value",
    "x2",
    "y2",
    "z2",
    "component2",
    "unit value",
    "x3",
    "y3",
    "z3",

    ... 

我想把它转换成这样的东西:

r = requests.get("some_url")
soup = BeautifulSoup(r.content, "html.parser")
data = []

names = soup.find_all("keyword")
for name in names:
    data.append(name.text)

table = soup.find_all("td")
for item in table:
    item_text = item.text.strip()
    data.append(item_text)

with io.open('data.json', 'w', encoding='utf8') as outfile:
    json.dump(data, outfile, ensure_ascii=False)

如何像这样格式化我的JSON输出?

html文件:

{
    "table": {
        "id":"1",
        "title": "name",
        "component1": [
          {
            "unit":"unit value",
            "x value":"x",
            "y value":"y",
            "z value":"z"
          },
          {
            "unit":"unit value",
            "x value":"x",
            "y value":"y",
            "z value":"z"
          }
        ],
        "component2":[
          {
            "unit":"unit value",
            "x value":"x",
            "y value":"y",
            "z value":"z"
          }
        ]

        ...

    }
}

1 个答案:

答案 0 :(得分:0)

from bs4 import BeautifulSoup, Comment

t = """<html><table id="table">
    <tr>
      <th>component</th>
      <th>unit</th>
      <th>x value</th>
      <th>y value</th>
      <th>z value</th>
    </tr>
    <tr>
      <td ><a href="#">
component1

</a>&nbsp;</td>
     <td class="right ">unit</td>
     <td class="right "><nobr>x&nbsp;</nobr></td>
     <td class="right "><nobr>y&nbsp;</nobr></td>
     <td class="right "><nobr>z&nbsp;</nobr></td>
    </tr>
    <tr>
      <td class="alt"><a href="/#">
component1

</a>&nbsp;</td>
      <td class="right alt">unit</td>
      <td class="right alt"><nobr>x2&nbsp;</nobr></td>
      <td class="right alt"><nobr>y2&nbsp;</nobr></td>
      <td class="right alt"><nobr>z2&nbsp;</nobr></td>
     </tr>
     <tr>
      <td ><a href="#">
component2

</a>&nbsp;</td>
      <td class="right ">g</td>
      <td class="right "><nobr>x3&nbsp;</nobr></td>
      <td class="right "><nobr>y3&nbsp;</nobr></td>
      <td class="right "><nobr>z3&nbsp;</nobr></td>
     </tr></<table></html>"""

bs = BeautifulSoup(t)

results = {}
for row in bs.findAll('tr'):
    # build the header
    aux = row.findAll('th')
    if aux:
        keys = [val.text.strip() for val in aux]
        continue
    # for rows other than header
    aux = row.findAll('td')
    if aux:
        # for each row build the dictionary equivalent
        temp_res = {}
        for idx, key in enumerate(keys):
            if key == 'component':
                component_name = aux[idx].text.strip()
            temp_res[key] = aux[idx].text.strip()

    # append the component value to result
    if component_name in results:
        results[component_name].append(temp_res)
    else:
        results[component_name] = [temp_res]

# adjusting the result in the format you requested. adding id/title.
import json
results["id"] = "1"
results["title"] = "name"

main_result = {"table": results}

json.dumps(main_result)

输出:

{
    "table": {
        "component2": [
            {
                "component": "component2",
                "z value": "z3",
                "unit": "g",
                "x value": "x3",
                "y value": "y3"
            }
        ],
        "id": "1",
        "component1": [
            {
                "component": "component1",
                "z value": "z",
                "unit": "unit",
                "x value": "x",
                "y value": "y"
            },
            {
                "component": "component1",
                "z value": "z2",
                "unit": "unit",
                "x value": "x2",
                "y value": "y2"
            }
        ],
        "title": "name"
    }
}