我想循环使用cheerio
抓取的数据示例输入数据:
11 aug 2017
Arsenal - Leicester City
12 aug 2017
Watford - Liverpool
Crystal Palace - Huddersfield Town
Everton - Stoke City
我想要创建的最终结果是json文件:
{
"fixtureDate": [
{
"homeTeam": "Arsenal",
"awayTeam": "Leicester City",
"matchDate": " 11 aug 2017 "
},
{
"homeTeam": "Watford",
"awayTeam": "Liverpool",
"matchDate": " 12 aug 2017 "
},
{
"homeTeam": "Crystal Palace",
"awayTeam": "Huddersfield Town",
"matchDate": " 12 aug 2017 "
},
{
"homeTeam": "Everton",
"awayTeam": "Stoke City",
"matchDate": " 12 aug 2017 "
},
我现在需要遍历数据并创建数组的代码:
// loop trough the data
for(var i=0; i<json.matchDate.length; i++){
output.fixtureDate[i] = {
matchDate : json.matchDate[i],
homeTeam : json.homeTeam[i],
awayTeam : json.awayTeam[i],
matchTime : json.matchTime[i]
}
}
但结果不正确,因为I ++是日期 请参阅下面的当前结果(请参阅matchDate):
{
"fixtureDate": [
{
"homeTeam": "Arsenal",
"awayTeam": "Leicester City",
"matchDate": " 11 aug 2017 "
},
{
"homeTeam": "Watford",
"awayTeam": "Liverpool",
"matchDate": " 12 aug 2017 "
},
{
"homeTeam": "Crystal Palace",
"awayTeam": "Huddersfield Town",
"matchDate": " 13 aug 2017 "
},
{
"homeTeam": "Everton",
"awayTeam": "Stoke City",
"matchDate": " 14 aug 2017 "
},
如何循环数据并创建正确的数组?
到目前为止我创建的完整代码:
var cheerio = require('cheerio');
var request = require('request');
var fs = require('fs');
var url = 'FIXTURES LINK';
request(url, function (error, response, html) {
if (!error && response.statusCode == 200) {
var $ = cheerio.load(html);
// create json structure in an array with the scraped data
var json = {
homeTeam : [],
awayTeam : [],
matchTime : [],
matchDate : []
};
output = {
fixtureDate : []
};
// create homeTeam value from website
$('.fm-fixtures__list li .fm-fixture .fm-fixture__team--home .fm-fixture__team__name').each(function(){
json.homeTeam.push($(this).text());
});
// create awayTeam value from website
$('.fm-fixtures__list li .fm-fixture .fm-fixture__team--away .fm-fixture__team__name').each(function(){
json.awayTeam.push($(this).text());
});
// create matchTime value from website
$('.fm-fixtures__list li .fm-fixture .fm-fixture__status .match-status').each(function(){
json.matchTime.push($(this).text());
});
// create matchDate value from website
$('.fm-fixtures__list li .fm-fixtures__date').each(function(){
json.matchDate.push($(this).text());
});
// loop trough the data
for(var i=0; i<json.homeTeam.length; i++){
output.fixtureDate[i] = {
matchDate : json.matchDate[i],
homeTeam : json.homeTeam[i],
awayTeam : json.awayTeam[i],
matchTime : json.matchTime[i]
}
}
// create a json output and print in the console
var scrape = JSON.stringify(output, null, 4);
console.log(scrape);
// create a json file
fs.writeFile('fixtures.json', JSON.stringify(output, null, 4), function(err){
console.log('File successfully written to folder!');
})
} // end if error
}); // end request function
答案 0 :(得分:1)
由于匹配未按日期分组(在HTML中),因此您必须抓取每个匹配项,并检查它是否具有日期标签。如果是这样,请使用其日期并将其推送到阵列。如果没有,请使用上次使用的日期作为该匹配的日期,如下所示:
注意:您也可以在1个循环中执行此操作,而不是多个循环并创建多个数组然后组合它们。
// create matchDate value from website
var json = {
"fixtureDate": []
};
var lastDate = "";
$('.fm-fixtures__list li').each(function(){
var matchContainer = $(this);
var homeTeam = matchContainer.find(".fm-fixture__team--home .fm-fixture__team__name").text().trim();
var awayTeam = matchContainer.find(".fm-fixture__team--away .fm-fixture__team__name").text().trim();
var matchDateContainer = matchContainer.find(".fm-fixtures__date__label");
var matchDate = "";
if (matchDateContainer.length){
lastDate = matchDateContainer.text().trim();
}
matchDate = lastDate;
json.fixtureDate.push({homeTeam: homeTeam, awayTeam: awayTeam, matchDate: matchDate});
});
console.log(json);
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
<ul class="fm-fixtures__list" data-reactid="1432">
<li data-reactid="1433">
<a href="/livescores/2522743" data-reactid="1434">
<div class="fm-fixtures__date" data-reactid="1435"><span class="fm-fixtures__date__label" data-reactid="1436"><!-- react-text: 1437 -->August 11, 2017 <!-- /react-text --><!-- react-text: 1438 --> <!-- /react-text --></span></div>
<div class="fm-fixture" data-reactid="1439">
<div class="fm-fixture__time" data-reactid="1440"></div>
<div class="fm-fixture__team fm-fixture__team--home" data-reactid="1441">
<p class="fm-fixture__team__name" data-reactid="1442">Arsenal</p>
<div class="fm-fixture__team__logo" data-reactid="1443">
<div class="fm-image fm-image--loading" data-reactid="1444"><img src="https://images.fotmob.com/image_resources/logo/teamlogo/9825.png" class="fm-image__wrapper fm-team-logo fm-team-logo--small" alt="Arsenal" data-reactid="1445"></div>
</div>
</div>
<div class="fm-fixture__status" data-reactid="1446"><span class="match-status" data-reactid="1447"><!-- react-text: 1448 --> <!-- /react-text --><!-- react-text: 1449 -->4 : 3<!-- /react-text --><!-- react-text: 1450 --> <!-- /react-text --></span></div>
<div class="fm-fixture__team fm-fixture__team--away"
data-reactid="1451">
<div class="fm-fixture__team__logo" data-reactid="1452">
<div class="fm-image fm-image--loading" data-reactid="1453"><img src="https://images.fotmob.com/image_resources/logo/teamlogo/8197.png" class="fm-image__wrapper fm-team-logo fm-team-logo--small" alt="Leicester City" data-reactid="1454"></div>
</div>
<p class="fm-fixture__team__name" data-reactid="1455">Leicester City</p>
</div>
</div>
</a>
</li>
<li data-reactid="1456">
<a href="/livescores/2522751" data-reactid="1457">
<div class="fm-fixtures__date" data-reactid="1458"><span class="fm-fixtures__date__label" data-reactid="1459"><!-- react-text: 1460 -->August 12, 2017 <!-- /react-text --><!-- react-text: 1461 --> <!-- /react-text --></span></div>
<div class="fm-fixture" data-reactid="1462">
<div class="fm-fixture__time" data-reactid="1463"></div>
<div class="fm-fixture__team fm-fixture__team--home" data-reactid="1464">
<p class="fm-fixture__team__name" data-reactid="1465">Watford</p>
<div class="fm-fixture__team__logo" data-reactid="1466">
<div class="fm-image fm-image--loading" data-reactid="1467"><img src="https://images.fotmob.com/image_resources/logo/teamlogo/9817.png" class="fm-image__wrapper fm-team-logo fm-team-logo--small" alt="Watford" data-reactid="1468"></div>
</div>
</div>
<div class="fm-fixture__status" data-reactid="1469"><span class="match-status" data-reactid="1470"><!-- react-text: 1471 --> <!-- /react-text --><!-- react-text: 1472 -->3 : 3<!-- /react-text --><!-- react-text: 1473 --> <!-- /react-text --></span></div>
<div class="fm-fixture__team fm-fixture__team--away"
data-reactid="1474">
<div class="fm-fixture__team__logo" data-reactid="1475">
<div class="fm-image fm-image--loading" data-reactid="1476"><img src="https://images.fotmob.com/image_resources/logo/teamlogo/8650.png" class="fm-image__wrapper fm-team-logo fm-team-logo--small" alt="Liverpool" data-reactid="1477"></div>
</div>
<p class="fm-fixture__team__name" data-reactid="1478">Liverpool</p>
</div>
</div>
</a>
</li>
<li data-reactid="1536">
<a href="/livescores/2522746" data-reactid="1537">
<div class="fm-fixture" data-reactid="1538">
<div class="fm-fixture__time" data-reactid="1539"></div>
<div class="fm-fixture__team fm-fixture__team--home" data-reactid="1540">
<p class="fm-fixture__team__name" data-reactid="1541">Crystal Palace</p>
<div class="fm-fixture__team__logo" data-reactid="1542">
<div class="fm-image fm-image--loading" data-reactid="1543"><img src="https://images.fotmob.com/image_resources/logo/teamlogo/9826.png" class="fm-image__wrapper fm-team-logo fm-team-logo--small" alt="Crystal Palace" data-reactid="1544"></div>
</div>
</div>
<div class="fm-fixture__status" data-reactid="1545"><span class="match-status" data-reactid="1546"><!-- react-text: 1547 --> <!-- /react-text --><!-- react-text: 1548 -->0 : 3<!-- /react-text --><!-- react-text: 1549 --> <!-- /react-text --></span></div>
<div class="fm-fixture__team fm-fixture__team--away"
data-reactid="1550">
<div class="fm-fixture__team__logo" data-reactid="1551">
<div class="fm-image fm-image--loading" data-reactid="1552"><img src="https://images.fotmob.com/image_resources/logo/teamlogo/9796.png" class="fm-image__wrapper fm-team-logo fm-team-logo--small" alt="Huddersfield Town" data-reactid="1553"></div>
</div>
<p class="fm-fixture__team__name" data-reactid="1554">Huddersfield Town</p>
</div>
</div>
</a>
</li>
<li data-reactid="1479">
<a href="/livescores/2522747" data-reactid="1480">
<div class="fm-fixture" data-reactid="1481">
<div class="fm-fixture__time" data-reactid="1482"></div>
<div class="fm-fixture__team fm-fixture__team--home" data-reactid="1483">
<p class="fm-fixture__team__name" data-reactid="1484">Everton</p>
<div class="fm-fixture__team__logo" data-reactid="1485">
<div class="fm-image fm-image--loading" data-reactid="1486"><img src="https://images.fotmob.com/image_resources/logo/teamlogo/8668.png" class="fm-image__wrapper fm-team-logo fm-team-logo--small" alt="Everton" data-reactid="1487"></div>
</div>
</div>
<div class="fm-fixture__status" data-reactid="1488"><span class="match-status" data-reactid="1489"><!-- react-text: 1490 --> <!-- /react-text --><!-- react-text: 1491 -->1 : 0<!-- /react-text --><!-- react-text: 1492 --> <!-- /react-text --></span></div>
<div class="fm-fixture__team fm-fixture__team--away"
data-reactid="1493">
<div class="fm-fixture__team__logo" data-reactid="1494">
<div class="fm-image fm-image--loading" data-reactid="1495"><img src="https://images.fotmob.com/image_resources/logo/teamlogo/10194.png" class="fm-image__wrapper fm-team-logo fm-team-logo--small" alt="Stoke City" data-reactid="1496"></div>
</div>
<p class="fm-fixture__team__name" data-reactid="1497">Stoke City</p>
</div>
</div>
</a>
</li>
<li data-reactid="1498">
<a href="/livescores/2522752" data-reactid="1499">
<div class="fm-fixture" data-reactid="1500">
<div class="fm-fixture__time" data-reactid="1501"></div>
<div class="fm-fixture__team fm-fixture__team--home" data-reactid="1502">
<p class="fm-fixture__team__name" data-reactid="1503">West Bromwich Albion</p>
<div class="fm-fixture__team__logo" data-reactid="1504">
<div class="fm-image fm-image--loading" data-reactid="1505"><img src="https://images.fotmob.com/image_resources/logo/teamlogo/8659.png" class="fm-image__wrapper fm-team-logo fm-team-logo--small" alt="West Bromwich Albion" data-reactid="1506"></div>
</div>
</div>
<div class="fm-fixture__status" data-reactid="1507"><span class="match-status" data-reactid="1508"><!-- react-text: 1509 --> <!-- /react-text --><!-- react-text: 1510 -->1 : 0<!-- /react-text --><!-- react-text: 1511 --> <!-- /react-text --></span></div>
<div class="fm-fixture__team fm-fixture__team--away"
data-reactid="1512">
<div class="fm-fixture__team__logo" data-reactid="1513">
<div class="fm-image fm-image--loading" data-reactid="1514"><img src="https://images.fotmob.com/image_resources/logo/teamlogo/8678.png" class="fm-image__wrapper fm-team-logo fm-team-logo--small" alt="AFC Bournemouth" data-reactid="1515"></div>
</div>
<p class="fm-fixture__team__name" data-reactid="1516">AFC Bournemouth</p>
</div>
</div>
</a>
</li>
</ul>
答案 1 :(得分:0)
这样做:
var cheerio = require('cheerio');
var request = require('request');
var fs = require('fs');
var url = "https://www.fotmob.com/leagues/47/matches/"
request(url, function (error, response, html) {
if (!error && response.statusCode == 200) {
var $ = cheerio.load(html);
let matchDate;
let jsonArray = $('#app-view > div.fm-wrapper > section > div.fm-content > section ul > li').map(function(i, el) {
// this === el
let htmlLi = $(this).html();
$ = cheerio.load(htmlLi);
let home = $('div.fm-fixture > div.fm-fixture__team.fm-fixture__team--home > p').text() || 'emptyRes';
let away = $('div.fm-fixture > div.fm-fixture__team.fm-fixture__team--away > p').text();
let status = $('div > div.fm-fixture__status > span').text();
let date = $('.fm-fixtures__date__label').text() || "useSameDate";
if (date != "useSameDate"){
matchDate = date;
}
if (home == "emptyRes"){
return;
}
let matchDataObj = {
matchDate : matchDate,
homeTeam : home,
awayTeam: away,
matchStatus: status
}
// console.log(matchDataObj)
return matchDataObj;
}).toArray();
var JsonData = {
fixtureDate : jsonArray
};
// create a json file
fs.writeFile('fixtures.json', JSON.stringify(JsonData, null, 4), function(err){
console.log('File successfully written to folder!');
})
} // end if error
}); // end request function
fixtures.json
的输出为:
{
"fixtureDate": [
{
"matchDate": "August 11, 2017 ",
"homeTeam": "Arsenal",
"awayTeam": "Leicester City",
"matchStatus": " 4 : 3 "
},
{
"matchDate": "August 12, 2017 ",
"homeTeam": "Watford",
"awayTeam": "Liverpool",
"matchStatus": " 3 : 3 "
},
{
"matchDate": "August 12, 2017 ",
"homeTeam": "Crystal Palace",
"awayTeam": "Huddersfield Town",
"matchStatus": " 0 : 3 "
},
{
"matchDate": "August 12, 2017 ",
"homeTeam": "Everton",
"awayTeam": "Stoke City",
"matchStatus": " 1 : 0 "
},
{
"matchDate": "August 12, 2017 ",
"homeTeam": "West Bromwich Albion",
"awayTeam": "AFC Bournemouth",
"matchStatus": " 1 : 0 "
},
{
"matchDate": "August 12, 2017 ",
"homeTeam": "Chelsea",
"awayTeam": "Burnley",
"matchStatus": " 2 : 3 "
},
{
"matchDate": "August 12, 2017 ",
"homeTeam": "Southampton",
"awayTeam": "Swansea City",
"matchStatus": " 0 : 0 "
},
{
"matchDate": "August 12, 2017 ",
"homeTeam": "Brighton & Hove Albion",
"awayTeam": "Manchester City",
"matchStatus": " 0 : 2 "
},
{
"matchDate": "August 13, 2017 ",
"homeTeam": "Newcastle United",
"awayTeam": "Tottenham Hotspur",
"matchStatus": " 0 : 2 "
},
{
"matchDate": "August 13, 2017 ",
"homeTeam": "Manchester United",
"awayTeam": "West Ham United",
"matchStatus": " 4 : 0 "
},
{
"matchDate": "August 19, 2017 ",
"homeTeam": "Swansea City",
"awayTeam": "Manchester United",
"matchStatus": " 0 : 4 "
},
{
"matchDate": "August 19, 2017 ",
"homeTeam": "Leicester City",
"awayTeam": "Brighton & Hove Albion",
"matchStatus": " 2 : 0 "
},
{
"matchDate": "August 19, 2017 ",
"homeTeam": "Burnley",
"awayTeam": "West Bromwich Albion",
"matchStatus": " 0 : 1 "
},
{
"matchDate": "August 19, 2017 ",
"homeTeam": "Liverpool",
"awayTeam": "Crystal Palace",
"matchStatus": " 1 : 0 "
},
{
"matchDate": "August 19, 2017 ",
"homeTeam": "AFC Bournemouth",
"awayTeam": "Watford",
"matchStatus": " 0 : 2 "
},
{
"matchDate": "August 19, 2017 ",
"homeTeam": "Southampton",
"awayTeam": "West Ham United",
"matchStatus": " 3 : 2 "
},
{
"matchDate": "August 19, 2017 ",
"homeTeam": "Stoke City",
"awayTeam": "Arsenal",
"matchStatus": " 1 : 0 "
},
{
"matchDate": "August 20, 2017 ",
"homeTeam": "Huddersfield Town",
"awayTeam": "Newcastle United",
"matchStatus": " 1 : 0 "
},
{
"matchDate": "August 20, 2017 ",
"homeTeam": "Tottenham Hotspur",
"awayTeam": "Chelsea",
"matchStatus": " 1 : 2 "
},
{
"matchDate": "August 21, 2017 ",
"homeTeam": "Manchester City",
"awayTeam": "Everton",
"matchStatus": " 1 : 1 "
},
{
"matchDate": "August 26, 2017 ",
"homeTeam": "AFC Bournemouth",
"awayTeam": "Manchester City",
"matchStatus": " 1 : 2 "
},
{
"matchDate": "August 26, 2017 ",
"homeTeam": "Huddersfield Town",
"awayTeam": "Southampton",
"matchStatus": " 0 : 0 "
},
{
"matchDate": "August 26, 2017 ",
"homeTeam": "Newcastle United",
"awayTeam": "West Ham United",
"matchStatus": " 3 : 0 "
}, ...]}