使用Cheerio迭代一个html表

时间:2018-03-06 13:39:44

标签: json node.js web-scraping cheerio

我必须遍历一个表并以这种方式创建一个包含信息的json对象:

var obj = {
    vaccine: "...",
    year: ...,
    country: "...",
    coverage: ...
} 

表格是:

<table class="ts">
    <tr>
        <td class="statheadings" colspan="100%" align="center">
            <h1 class="statistics">Coverage time series for Italy&nbsp;&nbsp;(ITA)</h1>
        </td>
    </tr>
    <tr>
        <td align="center" colspan="100%"> <font color="red">
            Last updated 06-Sep-2017 (data as of 05-Sep-2017)<br />Next overall update 2018<br /></font>
        </td>
    </tr>
    <tr>
        <td colspan="100%" >
            <hr />
        </td>
    </tr>

    <tr>
        <th colspan="4" align="left">Vacciness</th>  
        <th class="year">2016</th>  
        <th class="year">2015</th>  
        <th class="year">2014</th>  
        <th class="year">2013</th>  
        <th class="year">2012</th>  
        <th class="year">2011</th>  
        <th class="year">2010</th>  
        <th class="year">2009</th>  
        <th class="year">2008</th>  
        <th class="year">2007</th>  
        <th class="year">2006</th>  
        <th class="year">2005</th>  
        <th class="year">2004</th>  
        <th class="year">2003</th>  
        <th class="year">2002</th>  
        <th class="year">2001</th>  
        <th class="year">2000</th>  
        <th class="year">1999</th>  
        <th class="year">1998</th>  
        <th class="year">1997</th>  
        <th class="year">1996</th>  
        <th class="year">1995</th>  
        <th class="year">1994</th>  
        <th class="year">1993</th>  
        <th class="year">1992</th>  
        <th class="year">1991</th>  
        <th class="year">1990</th>  
        <th class="year">1989</th>  
        <th class="year">1988</th>  
        <th class="year">1987</th>  
        <th class="year">1986</th>  
        <th class="year">1985</th>  
        <th class="year">1984</th>  
        <th class="year">1983</th>  
        <th class="year">1982</th>
        <th class="year">1981</th>    
    </tr>

    <tr class="odd">
        <td colspan="4" align="left">
            <a href="timeseries/tscoveragedtp3.html" title="Click for full global time series for DTP3">DTP3</a>
        </td>
        <td class="statistics_small" colspan="1">_&nbsp;</td>    
        <td class="statistics_small" colspan="1">93&nbsp;</td>    
        <td class="statistics_small" colspan="1">95&nbsp;</td>    
        <td class="statistics_small" colspan="1">96&nbsp;</td>    
        <td class="statistics_small" colspan="1">97&nbsp;</td>    
        <td class="statistics_small" colspan="1">96&nbsp;</td>    
        <td class="statistics_small" colspan="1">96&nbsp;</td>    
        <td class="statistics_small" colspan="1">96&nbsp;</td>    
        <td class="statistics_small" colspan="1">96&nbsp;</td>    
        <td class="statistics_small" colspan="1">97&nbsp;</td>    
        <td class="statistics_small" colspan="1">96&nbsp;</td>    
        <td class="statistics_small" colspan="1">95&nbsp;</td>    
        <td class="statistics_small" colspan="1">94&nbsp;</td>    
        <td class="statistics_small" colspan="1">96&nbsp;</td>    
        <td class="statistics_small" colspan="1">93&nbsp;</td>    
        <td class="statistics_small" colspan="1">93&nbsp;</td>    
        <td class="statistics_small" colspan="1">87&nbsp;</td>    
        <td class="statistics_small" colspan="1">87&nbsp;</td>    
        <td class="statistics_small" colspan="1">86&nbsp;</td>    
        <td class="statistics_small" colspan="1">85&nbsp;</td>    
        <td class="statistics_small" colspan="1">84&nbsp;</td>    
        <td class="statistics_small" colspan="1">84&nbsp;</td>    
        <td class="statistics_small" colspan="1">83&nbsp;</td>    
        <td class="statistics_small" colspan="1">82&nbsp;</td>    
        <td class="statistics_small" colspan="1">80&nbsp;</td>    
        <td class="statistics_small" colspan="1">80&nbsp;</td>    
        <td class="statistics_small" colspan="1">80&nbsp;</td>    
        <td class="statistics_small" colspan="1">_&nbsp;</td>    
        <td class="statistics_small" colspan="1">_&nbsp;</td>    
        <td class="statistics_small" colspan="1">_&nbsp;</td>    
        <td class="statistics_small" colspan="1">_&nbsp;</td>    
        <td class="statistics_small" colspan="1">_&nbsp;</td>    
        <td class="statistics_small" colspan="1">_&nbsp;</td>    
        <td class="statistics_small" colspan="1">_&nbsp;</td>    
        <td class="statistics_small" colspan="1">_&nbsp;</td>    
        <td class="statistics_small" colspan="1">_&nbsp;</td>       

    <tr class="even">
        <td colspan="4" align="left"> 
        <a href="timeseries/tscoveragehepb3.html" title="Click for full global time series for HepB3">HepB3</a>
        </td>
        <td class="statistics_small" colspan="1">
        _&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        93&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        95&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        96&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        96&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        96&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        96&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        96&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        96&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        97&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        96&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        96&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        96&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        95&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        95&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        95&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        94&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        94&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        95&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        95&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        95&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        95&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        95&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        95&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        95&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        50&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        _&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        _&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        _&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        _&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        _&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        _&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        _&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        _&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        _&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        _&nbsp;
        </td>     

    <tr class="odd">
        <td colspan="4" align="left">
            <a href="timeseries/tscoveragedtp3.html" title="Click for full global time series for DTP3">DTP3</a>
        </td>
        ...

如您所见,行分为oddeven

我使用Node.jsExpress.jsCheerio来使用此表格。 这是我的代码:

const cheerio = require('cheerio');
const express = require('express');
var fs = require('fs');
const request = require('request');

const app = express();

// piece of link of each country
/*var countries = {
    'Albania': 'ALB',
    'Austria': 'AUT',
    'Belgium': 'BEL',
    'Bulgaria': 'BGR',
    'Croatia': 'HRV',
    'Cyprus': 'CYP',
    'Denmark': 'DNK',
    'Estonia': 'EST',
    'Finland': 'FIN',
    'France': 'FRA',
    'Germany': 'DEU',
    'Greece': 'GRC',
    'Iceland': 'ISL',
    'Ireland': 'IRL',
    'Italy': 'ITA',
    'Latvia': 'LVA',
    'Netherlands': 'NLD',
    'Norway': 'NOR',
    'Poland': 'POL',
    'Portugal': 'PRT',
    'Romania': 'ROU',
    'Slovakia': 'SVK',
    'Slovenia': 'SVN',
    'Spain': 'ESP',
    'Sweden': 'SWE',
    'Switzerland': 'CHE',
    'United Kingdom': 'GBR'
};*/
// for test
var countries = {
    'Albania': 'ALB'
};

// create variables to create json object
var jsons = [];
var json = {vaccine: "", country: "", year: "", coverage: ""};

for(country in countries) {
    var url = 'http://apps.who.int/immunization_monitoring/globalsummary/coverages?c=' + countries[country];

    request(url, (function(country) {
        var thisCountry = country;

        return function(error, res, html) {
            if(error) {
                console.log(error);
                throw error;
            }

            // send html response to cheerio to create DOM
            $ = cheerio.load(html);

            // arrays containing all the values
            var years = [];
            var vaccines = [];
            var coverages = [];

            var i = 1;

            // scraping year values
            $('.ts .year').each(function(year) {
                var country = thisCountry.trim();
                var year = $(this).text().trim();
                years.push(year);
                json.country = country;
                json.year = year;
            });
            console.log(years, years.length);

            // scraping vaccine values
            $('.ts .odd td a').each(function(odd) {
                var vaccine = $(this).text().trim();
                vaccines.push(vaccine);
                json.vaccine = vaccine;
            });
            $('.ts .even td a').each(function(even) {
                var vaccine = $(this).text().trim();
                vaccines.push(vaccine);
                json.vaccine = vaccine;
            });
            console.log(vaccines, vaccines.length);

            // scraping coverage values (get all values)
            $('.ts .odd .statistics_small').each(function(oddCoverage) {
                var coverage = $(this).text().trim();
                coverages.push(coverage);
            });
            $('.ts .even .statistics_small').each(function(evenCoverage) {
                var coverage = $(this).text().trim();
                coverages.push(coverage);
            });
            console.log(coverages, coverages.length);

            console.log("i", i); // 1

            // scraping coverage values (geto only some values)
            $('.ts .odd:nth-child(' + i + ')').each(function(oddCoverage) {
                var coverage = $(this).text().trim();
                json.coverage = coverage;
            });

            i++;
            console.log("i", i); // 2

            jsons.push(json);

            // write jsons on file output.json
            fs.writeFile('output.json', JSON.stringify(jsons, null, 3), function(error) {
                console.log('File output.json successfully written!');
            });
            console.log("i", i); // 2

        } // end return

    })(country)); // end request
}

代码无效。

我能够检索所有年份,所有疫苗和所有覆盖值。 但所有这些价值观都被混淆了。

我不确定如何构建包含所有orderd和结构化值的json文件。我的目标是这样的文件:

[
   {
      "vaccine": "BCG",
      "country": "Albania",
      "year": 2016,
      "coverage": 99
   },
   {
      "vaccine": "BCG",
      "country": "Albania",
      "year": 2015,
      "coverage": 100
   },
   {
      "vaccine": "BCG",
      "country": "Albania",
      "year": 2014,
      "coverage": 100
   },
   {
    ...
   },
   {
      "vaccine": "BCG",
      "country": "Albania",
      "year": 1981,
      "coverage": 93
   },
   {
      "vaccine": "DTP1",
      "country": "Albania",
      "year": 2016,
      "coverage": 99
   },
   {
    ...
   },
   {
      "vaccine": "DTP1",
      "country": "Albania",
      "year": 1981,
      "coverage": _
   },
   {
      "vaccine": "TT2+",
      "country": "Albania",
      "year": 2016,
      "coverage": _
   },
   {
    ...
   },
   {
      "vaccine": "TT2+",
      "country": "Albania",
      "year": 1981,
      "coverage": _
   },
   {
      "vaccine": "BCG",
      "country": "Austria",
      "year": 2016,
      "coverage": _
   },
   {
    ...
   }
]

必须包含576 = 36*16元素。

我尝试创建一个索引i来迭代td元素,但它不起作用。 现在,我的output.js文件是:

[
   {
      "vaccine": "TT2+",
      "country": "Albania",
      "year": "1981",
      "coverage": ""
   }
]

谢谢!

修改

我的目标是从表中构建一个json对象。

表格:

<table class="ts">
    <tr>
        <td class="statheadings" colspan="100%" align="center">
            <h1 class="statistics">Coverage time series for Italy&nbsp;&nbsp;(ITA)</h1>
        </td>
    </tr>
    <tr>
        <td align="center" colspan="100%"> <font color="red">
            Last updated 06-Sep-2017 (data as of 05-Sep-2017)<br />Next overall update 2018<br /></font>
        </td>
    </tr>
    <tr>
        <td colspan="100%" >
            <hr />
        </td>
    </tr>

    <tr>
        <th colspan="4" align="left">Vacciness</th>  
        <th class="year">2016</th>  
        <th class="year">2015</th>  
        <th class="year">2014</th>  
        <th class="year">2013</th>  
        <th class="year">2012</th>  
        <th class="year">2011</th>  
        <th class="year">2010</th>  
        <th class="year">2009</th>  
        <th class="year">2008</th>  
        <th class="year">2007</th>  
        <th class="year">2006</th>  
        <th class="year">2005</th>  
        <th class="year">2004</th>  
        <th class="year">2003</th>  
        <th class="year">2002</th>  
        <th class="year">2001</th>  
        <th class="year">2000</th>  
        <th class="year">1999</th>  
        <th class="year">1998</th>  
        <th class="year">1997</th>  
        <th class="year">1996</th>  
        <th class="year">1995</th>  
        <th class="year">1994</th>  
        <th class="year">1993</th>  
        <th class="year">1992</th>  
        <th class="year">1991</th>  
        <th class="year">1990</th>  
        <th class="year">1989</th>  
        <th class="year">1988</th>  
        <th class="year">1987</th>  
        <th class="year">1986</th>  
        <th class="year">1985</th>  
        <th class="year">1984</th>  
        <th class="year">1983</th>  
        <th class="year">1982</th>
        <th class="year">1981</th>    
    </tr>

    <tr class="odd">
        <td colspan="4" align="left">
            <a href="timeseries/tscoveragedtp3.html" title="Click for full global time series for DTP3">DTP3</a>
        </td>
        <td class="statistics_small" colspan="1">_&nbsp;</td>    
        <td class="statistics_small" colspan="1">93&nbsp;</td>    
        <td class="statistics_small" colspan="1">95&nbsp;</td>    
        <td class="statistics_small" colspan="1">96&nbsp;</td>    
        <td class="statistics_small" colspan="1">97&nbsp;</td>    
        <td class="statistics_small" colspan="1">96&nbsp;</td>    
        <td class="statistics_small" colspan="1">96&nbsp;</td>    
        <td class="statistics_small" colspan="1">96&nbsp;</td>    
        <td class="statistics_small" colspan="1">96&nbsp;</td>    
        <td class="statistics_small" colspan="1">97&nbsp;</td>    
        <td class="statistics_small" colspan="1">96&nbsp;</td>    
        <td class="statistics_small" colspan="1">95&nbsp;</td>    
        <td class="statistics_small" colspan="1">94&nbsp;</td>    
        <td class="statistics_small" colspan="1">96&nbsp;</td>    
        <td class="statistics_small" colspan="1">93&nbsp;</td>    
        <td class="statistics_small" colspan="1">93&nbsp;</td>    
        <td class="statistics_small" colspan="1">87&nbsp;</td>    
        <td class="statistics_small" colspan="1">87&nbsp;</td>    
        <td class="statistics_small" colspan="1">86&nbsp;</td>    
        <td class="statistics_small" colspan="1">85&nbsp;</td>    
        <td class="statistics_small" colspan="1">84&nbsp;</td>    
        <td class="statistics_small" colspan="1">84&nbsp;</td>    
        <td class="statistics_small" colspan="1">83&nbsp;</td>    
        <td class="statistics_small" colspan="1">82&nbsp;</td>    
        <td class="statistics_small" colspan="1">80&nbsp;</td>    
        <td class="statistics_small" colspan="1">80&nbsp;</td>    
        <td class="statistics_small" colspan="1">80&nbsp;</td>    
        <td class="statistics_small" colspan="1">_&nbsp;</td>    
        <td class="statistics_small" colspan="1">_&nbsp;</td>    
        <td class="statistics_small" colspan="1">_&nbsp;</td>    
        <td class="statistics_small" colspan="1">_&nbsp;</td>    
        <td class="statistics_small" colspan="1">_&nbsp;</td>    
        <td class="statistics_small" colspan="1">_&nbsp;</td>    
        <td class="statistics_small" colspan="1">_&nbsp;</td>    
        <td class="statistics_small" colspan="1">_&nbsp;</td>    
        <td class="statistics_small" colspan="1">_&nbsp;</td>       

    <tr class="even">
        <td colspan="4" align="left"> 
        <a href="timeseries/tscoveragehepb3.html" title="Click for full global time series for HepB3">HepB3</a>
        </td>
        <td class="statistics_small" colspan="1">
        _&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        93&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        95&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        96&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        96&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        96&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        96&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        96&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        96&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        97&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        96&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        96&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        96&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        95&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        95&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        95&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        94&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        94&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        95&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        95&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        95&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        95&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        95&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        95&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        95&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        50&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        _&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        _&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        _&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        _&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        _&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        _&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        _&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        _&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        _&nbsp;
        </td>    
        <td class="statistics_small" colspan="1">
        _&nbsp;
        </td>     

    <tr class="odd">
        <td colspan="4" align="left">
            <a href="timeseries/tscoveragedtp3.html" title="Click for full global time series for DTP3">DTP3</a>
        </td>
        ...

我想要的json对象:

[
   {
      "vaccine": "BCG",
      "country": "Albania",
      "year": 2016,
      "coverage": 99
   },
   {
      "vaccine": "BCG",
      "country": "Albania",
      "year": 2015,
      "coverage": 100
   },
   {
      "vaccine": "BCG",
      "country": "Albania",
      "year": 2014,
      "coverage": 100
   },
   {
    ...
   },
   {
      "vaccine": "BCG",
      "country": "Albania",
      "year": 1981,
      "coverage": 93
   },
   {
      "vaccine": "DTP1",
      "country": "Albania",
      "year": 2016,
      "coverage": 99
   },
   {
    ...
   },
   {
      "vaccine": "DTP1",
      "country": "Albania",
      "year": 1981,
      "coverage": _
   },
   {
      "vaccine": "TT2+",
      "country": "Albania",
      "year": 2016,
      "coverage": _
   },
   {
    ...
   },
   {
      "vaccine": "TT2+",
      "country": "Albania",
      "year": 1981,
      "coverage": _
   },
   {
      "vaccine": "BCG",
      "country": "Austria",
      "year": 2016,
      "coverage": _
   },
   {
    ...
   }
]

1 个答案:

答案 0 :(得分:7)

我简化了你的解析代码。它在我的机器上工作正常。代码的问题在于,当您将所有值推送到数组中时,您没有使用这些值来构建所需的对象。我的解析策略:像你一样抓取三个独立数组中的所有数据,然后迭代这些数组以生成所需的对象结构。

我在您所在国家/地区对象的前两个国家/地区进行了本地测试,并且成功了。

以下是我在for(country in countries)循环中更改的代码:

var years = [];
var vaccines = [];
var coverages = [];

// get years
$('.ts .year').each(function() {
    years.push($(this).text().trim());
});
// get vaccines
$('.ts .odd td a, .ts .even td a').each(function() {
    vaccines.push($(this).text().trim());
});
// get coverages
$('.ts .odd .statistics_small, .ts .even .statistics_small').each(function() {
    coverages.push($(this).text().trim());
});

const numYears = years.length;
const numVaccines = vaccines.length;
for (var vaccineIdx=0; vaccineIdx<numVaccines; vaccineIdx++) {
  // iterate over each vaccine
  for (var yearIdx=0; yearIdx<numYears; yearIdx++) {
    //iterate over each year for vaccine
    let obj = {
      vaccine: vaccines[vaccineIdx],
      country: country,
      year: years[yearIdx],
      coverage: coverages[vaccineIdx*numYears+yearIdx]
    }
    jsons.push(obj);
  } 
}

// write jsons on file output.json
fs.writeFile('output.json', JSON.stringify(jsons, null, 3), function(error) {
    if (!error) { console.log('File output.json successfully written with',country,'data!'); }
    else {
      console.log(error, 'occurred while trying to write',country,'data to output.json');
    }
});

阿尔巴尼亚的第一个和最后一个(BCG和TT2 +疫苗)的结果对象(由于StackOverflow限制,数组中的中间值被省略):

[
   {
      "vaccine": "BCG",
      "country": "Albania",
      "year": "2016",
      "coverage": "99"
   },
   {
      "vaccine": "BCG",
      "country": "Albania",
      "year": "2015",
      "coverage": "100"
   },
   {
      "vaccine": "BCG",
      "country": "Albania",
      "year": "2014",
      "coverage": "100"
   },
   {
      "vaccine": "BCG",
      "country": "Albania",
      "year": "2013",
      "coverage": "100"
   },
   {
      "vaccine": "BCG",
      "country": "Albania",
      "year": "2012",
      "coverage": "96"
   },
   {
      "vaccine": "BCG",
      "country": "Albania",
      "year": "2011",
      "coverage": "97"
   },
   {
      "vaccine": "BCG",
      "country": "Albania",
      "year": "2010",
      "coverage": "100"
   },
   {
      "vaccine": "BCG",
      "country": "Albania",
      "year": "2009",
      "coverage": "97"
   },
   {
      "vaccine": "BCG",
      "country": "Albania",
      "year": "2008",
      "coverage": "100"
   },
   {
      "vaccine": "BCG",
      "country": "Albania",
      "year": "2007",
      "coverage": "98"
   },
   {
      "vaccine": "BCG",
      "country": "Albania",
      "year": "2006",
      "coverage": "97"
   },
   {
      "vaccine": "BCG",
      "country": "Albania",
      "year": "2005",
      "coverage": "98"
   },
   {
      "vaccine": "BCG",
      "country": "Albania",
      "year": "2004",
      "coverage": "97"
   },
   {
      "vaccine": "BCG",
      "country": "Albania",
      "year": "2003",
      "coverage": "95"
   },
   {
      "vaccine": "BCG",
      "country": "Albania",
      "year": "2002",
      "coverage": "94"
   },
   {
      "vaccine": "BCG",
      "country": "Albania",
      "year": "2001",
      "coverage": "93"
   },
   {
      "vaccine": "BCG",
      "country": "Albania",
      "year": "2000",
      "coverage": "93"
   },
   {
      "vaccine": "BCG",
      "country": "Albania",
      "year": "1999",
      "coverage": "93"
   },
   {
      "vaccine": "BCG",
      "country": "Albania",
      "year": "1998",
      "coverage": "87"
   },
   {
      "vaccine": "BCG",
      "country": "Albania",
      "year": "1997",
      "coverage": "94"
   },
   {
      "vaccine": "BCG",
      "country": "Albania",
      "year": "1996",
      "coverage": "94"
   },
   {
      "vaccine": "BCG",
      "country": "Albania",
      "year": "1995",
      "coverage": "97"
   },
   {
      "vaccine": "BCG",
      "country": "Albania",
      "year": "1994",
      "coverage": "87"
   },
   {
      "vaccine": "BCG",
      "country": "Albania",
      "year": "1993",
      "coverage": "82"
   },
   {
      "vaccine": "BCG",
      "country": "Albania",
      "year": "1992",
      "coverage": "81"
   },
   {
      "vaccine": "BCG",
      "country": "Albania",
      "year": "1991",
      "coverage": "80"
   },
   {
      "vaccine": "BCG",
      "country": "Albania",
      "year": "1990",
      "coverage": "94"
   },
   {
      "vaccine": "BCG",
      "country": "Albania",
      "year": "1989",
      "coverage": "94"
   },
   {
      "vaccine": "BCG",
      "country": "Albania",
      "year": "1988",
      "coverage": "92"
   },
   {
      "vaccine": "BCG",
      "country": "Albania",
      "year": "1987",
      "coverage": "92"
   },
   {
      "vaccine": "BCG",
      "country": "Albania",
      "year": "1986",
      "coverage": "92"
   },
   {
      "vaccine": "BCG",
      "country": "Albania",
      "year": "1985",
      "coverage": "92"
   },
   {
      "vaccine": "BCG",
      "country": "Albania",
      "year": "1984",
      "coverage": "90"
   },
   {
      "vaccine": "BCG",
      "country": "Albania",
      "year": "1983",
      "coverage": "90"
   },
   {
      "vaccine": "BCG",
      "country": "Albania",
      "year": "1982",
      "coverage": "_"
   },
   {
      "vaccine": "BCG",
      "country": "Albania",
      "year": "1981",
      "coverage": "93"
   },
   {
      "vaccine": "TT2+",
      "country": "Albania",
      "year": "2016",
      "coverage": "_"
   },
   {
      "vaccine": "TT2+",
      "country": "Albania",
      "year": "2015",
      "coverage": "_"
   },
   {
      "vaccine": "TT2+",
      "country": "Albania",
      "year": "2014",
      "coverage": "_"
   },
   {
      "vaccine": "TT2+",
      "country": "Albania",
      "year": "2013",
      "coverage": "_"
   },
   {
      "vaccine": "TT2+",
      "country": "Albania",
      "year": "2012",
      "coverage": "_"
   },
   {
      "vaccine": "TT2+",
      "country": "Albania",
      "year": "2011",
      "coverage": "75"
   },
   {
      "vaccine": "TT2+",
      "country": "Albania",
      "year": "2010",
      "coverage": "85"
   },
   {
      "vaccine": "TT2+",
      "country": "Albania",
      "year": "2009",
      "coverage": "80"
   },
   {
      "vaccine": "TT2+",
      "country": "Albania",
      "year": "2008",
      "coverage": "83"
   },
   {
      "vaccine": "TT2+",
      "country": "Albania",
      "year": "2007",
      "coverage": "86"
   },
   {
      "vaccine": "TT2+",
      "country": "Albania",
      "year": "2006",
      "coverage": "86"
   },
   {
      "vaccine": "TT2+",
      "country": "Albania",
      "year": "2005",
      "coverage": "86"
   },
   {
      "vaccine": "TT2+",
      "country": "Albania",
      "year": "2004",
      "coverage": "85"
   },
   {
      "vaccine": "TT2+",
      "country": "Albania",
      "year": "2003",
      "coverage": "73"
   },
   {
      "vaccine": "TT2+",
      "country": "Albania",
      "year": "2002",
      "coverage": "71"
   },
   {
      "vaccine": "TT2+",
      "country": "Albania",
      "year": "2001",
      "coverage": "80"
   },
   {
      "vaccine": "TT2+",
      "country": "Albania",
      "year": "2000",
      "coverage": "89"
   },
   {
      "vaccine": "TT2+",
      "country": "Albania",
      "year": "1999",
      "coverage": "77"
   },
   {
      "vaccine": "TT2+",
      "country": "Albania",
      "year": "1998",
      "coverage": "65"
   },
   {
      "vaccine": "TT2+",
      "country": "Albania",
      "year": "1997",
      "coverage": "99"
   },
   {
      "vaccine": "TT2+",
      "country": "Albania",
      "year": "1996",
      "coverage": "98"
   },
   {
      "vaccine": "TT2+",
      "country": "Albania",
      "year": "1995",
      "coverage": "97"
   },
   {
      "vaccine": "TT2+",
      "country": "Albania",
      "year": "1994",
      "coverage": "100"
   },
   {
      "vaccine": "TT2+",
      "country": "Albania",
      "year": "1993",
      "coverage": "96"
   },
   {
      "vaccine": "TT2+",
      "country": "Albania",
      "year": "1992",
      "coverage": "94"
   },
   {
      "vaccine": "TT2+",
      "country": "Albania",
      "year": "1991",
      "coverage": "78"
   },
   {
      "vaccine": "TT2+",
      "country": "Albania",
      "year": "1990",
      "coverage": "_"
   },
   {
      "vaccine": "TT2+",
      "country": "Albania",
      "year": "1989",
      "coverage": "_"
   },
   {
      "vaccine": "TT2+",
      "country": "Albania",
      "year": "1988",
      "coverage": "_"
   },
   {
      "vaccine": "TT2+",
      "country": "Albania",
      "year": "1987",
      "coverage": "_"
   },
   {
      "vaccine": "TT2+",
      "country": "Albania",
      "year": "1986",
      "coverage": "_"
   },
   {
      "vaccine": "TT2+",
      "country": "Albania",
      "year": "1985",
      "coverage": "_"
   },
   {
      "vaccine": "TT2+",
      "country": "Albania",
      "year": "1984",
      "coverage": "_"
   },
   {
      "vaccine": "TT2+",
      "country": "Albania",
      "year": "1983",
      "coverage": "_"
   },
   {
      "vaccine": "TT2+",
      "country": "Albania",
      "year": "1982",
      "coverage": "_"
   },
   {
      "vaccine": "TT2+",
      "country": "Albania",
      "year": "1981",
      "coverage": "_"
   }
]

此解析代码有点脆弱:如果您要抓取的网站更改其类别值或在不同国家/地区的页面上具有不同的结构,则数据可能无效。根据项目的范围和意图,这可能是也可能不是问题。需要考虑的事情。