这是我的node.js应用程序代码:
这是我的html.js文件代码
/**
* HTML Helpers
*/
'use strict';
var cheerio = require('cheerio');
module.exports.gotHTML = function gotHTML(error, response, htmlBody) {
if (!error && response.statusCode == 200) {
const $ = cheerio.load(htmlBody);
var reviews = [];
var businesses = [];
var combinedData;
var reviewsSelectors = $('.js-hot-new-businesses-section .rating-qualifier');
var businessSelectors = $('.js-hot-new-businesses-section .biz-name');
reviewsSelectors.each(function (index, value) {
reviews.push($(this).text().trim());
});
businessSelectors.each(function (index, value) {
businesses.push($(this).text().trim())
});
combinedData = businesses.map(function (value, index) {
return {businessName: value, reviewsTotal: reviews[index]};
});
combinedData.forEach(function(value, index) {
console.log(value["businessName"] + " - " + value["reviewsTotal"]);
})
}
}
这个是app.js文件代码
'use strict';
const request = require('request');
const cheerio = require('cheerio');
const express = require('express');
const app = express();
const htmlHelpers = require('./html');
const url = 'https://www.yelp.com/sf';
request(url, htmlHelpers.gotHTML);
// Expose app
exports = module.exports = app;
当我执行此代码时,为了打印第一个结果的名称和评论数量,一旦打印出两次。这是输出
Boba Guys - 24 reviews
Boba Guys - 24 reviews
Coletta Gelato - 23 reviews
Coletta Gelato - 23 reviews
An Chi - 29 reviews
An Chi - 29 reviews
顺便说一下,相同的代码正确执行并在浏览器控制台中打印所需的结果。
Boba Guys - 24 reviews
Coletta Gelato - 23 reviews
An Chi - 29 reviews
答案 0 :(得分:1)
这是因为cheerio在浏览器执行时不会从页面执行脚本。 Cheerio只关心HTML。
看看你得到的回应。
<div class="home-container_section js-hot-new-businesses-section">
<h3 class="explore-city_title">Hot & New Businesses</h3>
<div class="arrange arrange--wrap arrange--3-units arrange--30">
<div class="arrange_unit">
<div class="card business-passport-card">
<div class="card_photo">
<div class="photo-box photo-box--interactive photo-box--background" style="background-image: url(https://s3-media1.fl.yelpcdn.com/bphoto/OA40TlIfaVARVjRdZnB0SA/l.jpg)">
<a href="/biz/coletta-gelato-san-francisco" class="photo-box_link js-analytics-click" data-analytics-label="biz-photo">
<img alt="Coletta Gelato" class="photo-box-img" height="400" src="https://s3-media1.fl.yelpcdn.com/bphoto/OA40TlIfaVARVjRdZnB0SA/l.jpg" width="600">
</a>
</div>
</div>
<div class="card_body">
<div class="card_content">
<h3 class="card_content-title--linked u-text-truncate">
<div class="business-link-hovercard js-business-link-hovercard">
<span class="business-link-hovercard_business-link js-business-link">
<a class="biz-name js-analytics-click" data-analytics-label="biz-name" href="/biz/coletta-gelato-san-francisco" data-hovercard-id="" ><span >Coletta Gelato</span></a>
</span>
<div class="business-link-hovercard_hovercard u-hidden u-break-word js-business-hovercard">
<div class="media-block media-block--12 biz-listing-medium">
<div class="media-avatar">
<div class="photo-box pb-90s">
<a href="/biz/coletta-gelato-san-francisco" class="js-analytics-click" data-analytics-label="biz-photo">
<img alt="Coletta Gelato" class="photo-box-img" height="90" src="https://s3-media1.fl.yelpcdn.com/bphoto/OA40TlIfaVARVjRdZnB0SA/90s.jpg" width="90">
</a>
</div>
</div>
<div class="media-story">
<div class="media-title clearfix">
<a class="biz-name js-analytics-click" data-analytics-label="biz-name" href="/biz/coletta-gelato-san-francisco" data-hovercard-id="" ><span >Coletta Gelato</span></a>
</div>
<div class="biz-rating biz-rating-large clearfix" itemprop="aggregateRating" itemscope itemtype="http://schema.org/AggregateRating">
<div class="i-stars i-stars--regular-5 rating-large" title="5.0 star rating">
<img class="offscreen" height="303" src="https://s3-media1.fl.yelpcdn.com/assets/srv0/yelp_design_web/41341496d9db/assets/img/stars/stars.png" width="84" alt="5.0 star rating">
</div>
<meta itemprop="ratingValue" content="5.0">
<span class="review-count rating-qualifier">
<span itemprop="reviewCount">25</span> reviews
</span>
</div>
<div class="price-category">
<span class="bullet-after">
<span class="business-attribute price-range">$$</span>
</span>
<span class="category-str-list">
Gelato,
Ice Cream & Frozen Yogurt
</span>
</div>
<small>
<span class="addr-city">San Francisco, CA</span>
</small>
</div>
</div>
</div>
</div>
</h3>
<div class="biz-rating biz-rating-large clearfix" >
<div class="i-stars i-stars--regular-5 rating-large" title="5.0 star rating">
<img class="offscreen" height="303" src="https://s3-media1.fl.yelpcdn.com/assets/srv0/yelp_design_web/41341496d9db/assets/img/stars/stars.png" width="84" alt="5.0 star rating">
</div>
<span class="review-count rating-qualifier">
25 reviews
</span>
</div>
<div class="price-category">
<span class="bullet-after">
<span class="business-attribute price-range">$$</span>
</span>
<span class="category-str-list">
Gelato,
Ice Cream & Frozen Yogurt
</span>
</div>
<p class="business-passport-card_date-opened">
<span aria-hidden="true" style="width: 18px; height: 18px;" class="icon icon--18-flame icon--size-18 icon--currentColor">
<svg class="icon_svg">
<use xlink:href="#18x18_flame" />
</svg>
</span> Opened 6 weeks ago
</p>
</div>
</div>
</div>
</div>
<div class="arrange_unit">
<div class="card business-passport-card">
<div class="card_photo">
<div class="photo-box photo-box--interactive photo-box--background" style="background-image: url(https://s3-media3.fl.yelpcdn.com/bphoto/teafIvqzpXkxVidRzpkJOQ/l.jpg)">
<a href="/biz/food-la-la-san-francisco" class="photo-box_link js-analytics-click" data-analytics-label="biz-photo">
<img alt="Food La La" class="photo-box-img" height="400" src="https://s3-media3.fl.yelpcdn.com/bphoto/teafIvqzpXkxVidRzpkJOQ/l.jpg" width="600">
</a>
</div>
</div>
<div class="card_body">
<div class="card_content">
<h3 class="card_content-title--linked u-text-truncate">
<div class="business-link-hovercard js-business-link-hovercard">
<span class="business-link-hovercard_business-link js-business-link">
<a class="biz-name js-analytics-click" data-analytics-label="biz-name" href="/biz/food-la-la-san-francisco" data-hovercard-id="" ><span >Food La La</span></a>
</span>
<div class="business-link-hovercard_hovercard u-hidden u-break-word js-business-hovercard">
<div class="media-block media-block--12 biz-listing-medium">
<div class="media-avatar">
<div class="photo-box pb-90s">
<a href="/biz/food-la-la-san-francisco" class="js-analytics-click" data-analytics-label="biz-photo">
<img alt="Food La La" class="photo-box-img" height="90" src="https://s3-media1.fl.yelpcdn.com/bphoto/teafIvqzpXkxVidRzpkJOQ/90s.jpg" width="90">
</a>
</div>
</div>
<div class="media-story">
<div class="media-title clearfix">
<a class="biz-name js-analytics-click" data-analytics-label="biz-name" href="/biz/food-la-la-san-francisco" data-hovercard-id="" ><span >Food La La</span></a>
</div>
<div class="biz-rating biz-rating-large clearfix" itemprop="aggregateRating" itemscope itemtype="http://schema.org/AggregateRating">
<div class="i-stars i-stars--regular-5 rating-large" title="5.0 star rating">
<img class="offscreen" height="303" src="https://s3-media1.fl.yelpcdn.com/assets/srv0/yelp_design_web/41341496d9db/assets/img/stars/stars.png" width="84" alt="5.0 star rating">
</div>
<meta itemprop="ratingValue" content="5.0">
<span class="review-count rating-qualifier">
<span itemprop="reviewCount">10</span> reviews
</span>
</div>
<div class="price-category">
<span class="bullet-after">
<span class="business-attribute price-range">$$</span>
</span>
<span class="category-str-list">
Cooking Classes
</span>
</div>
<small>
<span class="addr-city">San Francisco, CA</span>
</small>
</div>
</div>
</div>
</div>
</h3>
<div class="biz-rating biz-rating-large clearfix" >
<div class="i-stars i-stars--regular-5 rating-large" title="5.0 star rating">
<img class="offscreen" height="303" src="https://s3-media1.fl.yelpcdn.com/assets/srv0/yelp_design_web/41341496d9db/assets/img/stars/stars.png" width="84" alt="5.0 star rating">
</div>
<span class="review-count rating-qualifier">
10 reviews
</span>
</div>
<div class="price-category">
<span class="bullet-after">
<span class="business-attribute price-range">$$</span>
</span>
<span class="category-str-list">
Cooking Classes
</span>
</div>
<p class="business-passport-card_date-opened">
<span aria-hidden="true" style="width: 18px; height: 18px;" class="icon icon--18-flame icon--size-18 icon--currentColor">
<svg class="icon_svg">
<use xlink:href="#18x18_flame" />
</svg>
</span> Opened 4 weeks ago
</p>
</div>
</div>
</div>
</div>
<div class="arrange_unit">
<div class="card business-passport-card">
<div class="card_photo">
<div class="photo-box photo-box--interactive photo-box--background" style="background-image: url(https://s3-media1.fl.yelpcdn.com/bphoto/MkwY_oo9oz0CYITBrfqtbA/l.jpg)">
<a href="/biz/saltroot-cafe-san-francisco" class="photo-box_link js-analytics-click" data-analytics-label="biz-photo">
<img alt="Saltroot Cafe" class="photo-box-img" height="400" src="https://s3-media1.fl.yelpcdn.com/bphoto/MkwY_oo9oz0CYITBrfqtbA/l.jpg" width="600">
</a>
</div>
</div>
<div class="card_body">
<div class="card_content">
<h3 class="card_content-title--linked u-text-truncate">
<div class="business-link-hovercard js-business-link-hovercard">
<span class="business-link-hovercard_business-link js-business-link">
<a class="biz-name js-analytics-click" data-analytics-label="biz-name" href="/biz/saltroot-cafe-san-francisco" data-hovercard-id="" ><span >Saltroot Cafe</span></a>
</span>
<div class="business-link-hovercard_hovercard u-hidden u-break-word js-business-hovercard">
<div class="media-block media-block--12 biz-listing-medium">
<div class="media-avatar">
<div class="photo-box pb-90s">
<a href="/biz/saltroot-cafe-san-francisco" class="js-analytics-click" data-analytics-label="biz-photo">
<img alt="Saltroot Cafe" class="photo-box-img" height="90" src="https://s3-media2.fl.yelpcdn.com/bphoto/MkwY_oo9oz0CYITBrfqtbA/90s.jpg" width="90">
</a>
</div>
</div>
<div class="media-story">
<div class="media-title clearfix">
<a class="biz-name js-analytics-click" data-analytics-label="biz-name" href="/biz/saltroot-cafe-san-francisco" data-hovercard-id="" ><span >Saltroot Cafe</span></a>
</div>
<div class="biz-rating biz-rating-large clearfix" itemprop="aggregateRating" itemscope itemtype="http://schema.org/AggregateRating">
<div class="i-stars i-stars--regular-5 rating-large" title="5.0 star rating">
<img class="offscreen" height="303" src="https://s3-media1.fl.yelpcdn.com/assets/srv0/yelp_design_web/41341496d9db/assets/img/stars/stars.png" width="84" alt="5.0 star rating">
</div>
<meta itemprop="ratingValue" content="5.0">
<span class="review-count rating-qualifier">
<span itemprop="reviewCount">11</span> reviews
</span>
</div>
<div class="price-category">
<span class="bullet-after">
<span class="business-attribute price-range">$</span>
</span>
<span class="category-str-list">
Bakeries,
Coffee & Tea
</span>
</div>
<small>
<span class="addr-city">San Francisco, CA</span>
</small>
</div>
</div>
</div>
</div>
</h3>
<div class="biz-rating biz-rating-large clearfix" >
<div class="i-stars i-stars--regular-5 rating-large" title="5.0 star rating">
<img class="offscreen" height="303" src="https://s3-media1.fl.yelpcdn.com/assets/srv0/yelp_design_web/41341496d9db/assets/img/stars/stars.png" width="84" alt="5.0 star rating">
</div>
<span class="review-count rating-qualifier">
11 reviews
</span>
</div>
<div class="price-category">
<span class="bullet-after">
<span class="business-attribute price-range">$</span>
</span>
<span class="category-str-list">
Bakeries,
Coffee & Tea
</span>
</div>
<p class="business-passport-card_date-opened">
<span aria-hidden="true" style="width: 18px; height: 18px;" class="icon icon--18-flame icon--size-18 icon--currentColor">
<svg class="icon_svg">
<use xlink:href="#18x18_flame" />
</svg>
</span> Opened 3 weeks ago
</p>
</div>
</div>
</div>
</div>
</div>
<div class="u-space-t2 u-text-centered">
<a class="js-analytics-click explore-city_see-all" data-analytics-label="link-more" href="/search?attrs=NewBusiness" >See more hot and new businesses</a>
</div>
</div>
&#13;
在此处运行上述代码段或在jsbin.com上运行launch the page,您将看到您感兴趣的原始块是重复的。然后JS剪切副本,据我所知,它们在悬停时显示它们(作为悬停卡片)。
//As you remember there are duplicates for each of the element, you're looking for.
//Here are the parent element's classes (the whole tree)
//So you can see the difference, and make the selectors you want
Raiting qualifiers:
"home-container_section js-hot-new-businesses-section" > "arrange arrange--wrap arrange--3-units arrange--30" > "arrange_unit" > "card business-passport-card" > "card_body" > "card_content" > "card_content-title--linked u-text-truncate" > "business-link-hovercard js-business-link-hovercard" > "business-link-hovercard_hovercard u-hidden u-break-word js-business-hovercard" > "media-block media-block--12 biz-listing-medium" > "media-story" > "biz-rating biz-rating-large clearfix" > "review-count rating-qualifier"
"home-container_section js-hot-new-businesses-section" > "arrange arrange--wrap arrange--3-units arrange--30" > "arrange_unit" > "card business-passport-card" > "card_body" > "card_content" > "biz-rating biz-rating-large clearfix" > "review-count rating-qualifier"
Biz names:
"home-container_section js-hot-new-businesses-section" > "arrange arrange--wrap arrange--3-units arrange--30" > "arrange_unit" > "card business-passport-card" > "card_body" > "card_content" > "card_content-title--linked u-text-truncate" > "business-link-hovercard js-business-link-hovercard" > "business-link-hovercard_business-link js-business-link" > "biz-name js-analytics-click"
"home-container_section js-hot-new-businesses-section" > "arrange arrange--wrap arrange--3-units arrange--30" > "arrange_unit" > "card business-passport-card" > "card_body" > "card_content" > "card_content-title--linked u-text-truncate" > "business-link-hovercard js-business-link-hovercard" > "business-link-hovercard_hovercard u-hidden u-break-word js-business-hovercard" > "media-block media-block--12 biz-listing-medium" > "media-story" > "media-title clearfix" > "biz-name js-analytics-click"
&#13;
要解决您的问题,请更具体,例如这样取决于你需要的元素:
var reviewsSelectors = $('.js-hot-new-businesses-section .media-story .rating-qualifier');
var businessSelectors = $('.js-hot-new-businesses-section .js-business-link .biz-name');
&#13;