我正在尝试实现一个超级简单的刮板程序,该刮板程序可以从网站上刮除公寓价格和平方英尺。我使用Python + scrapy来实现这一点,但只有一个问题:似乎该部分是所需的信息,作为响应返回时显示为空,并且其中包含的所有内容(div,span等)也无法得到解决通过CSS查询。除了本节中的内容之外,我还可以访问其他所有内容。
这是网站:https://www.251brandon.com/floorplans
这是我的初始蜘蛛的外观(在此示例中,仅查找类“ fp-price”):
var express = require('express');
var graphqlHTTP = require('express-graphql');
var graphql = require('graphql');
// Maps id to User object
var fakeDatabase = {
'a': {
id: 'a',
name: 'alice',
},
'b': {
id: 'b',
name: 'bob',
},
};
// Define the User type
var userType = new graphql.GraphQLObjectType({
name: 'User',
fields: {
id: { type: graphql.GraphQLString },
// How can I change the name of this field to "firstname" while still referencing "name" in our database?
name: { type: graphql.GraphQLString },
}
});
// Define the Query type
var queryType = new graphql.GraphQLObjectType({
name: 'Query',
fields: {
user: {
type: userType,
// `args` describes the arguments that the `user` query accepts
args: {
id: { type: graphql.GraphQLString }
},
resolve: function (_, {id}) {
return fakeDatabase[id];
}
}
}
});
var schema = new graphql.GraphQLSchema({query: queryType});
var app = express();
app.use('/graphql', graphqlHTTP({
schema: schema,
graphiql: true,
}));
app.listen(4000);
console.log('Running a GraphQL API server at localhost:4000/graphql');
返回的是一个空的SectorList,而不是所有具有fp-price类的元素。
感谢您的帮助。 :)
答案 0 :(得分:0)
您可以尝试使用xpath代替选择器:
response.xpath('//*[@id="floorplan"]/text()')
还可以查看:https://doc.scrapy.org/en/latest/topics/selectors.html
如果@Casper是正确的,并且特定元素是由javascript加载的,则应签出scrapy-splash(https://github.com/scrapy-plugins/scrapy-splash),这将使您能够加载javascript并随后将页面抓取。祝你好运!
答案 1 :(得分:0)
@Casper是正确的,页面是使用Java脚本生成的。如果您尝试在禁用javascript的情况下在浏览器中加载页面,则该内容将不可见。但是,当用javascript加载页面时,所需的数据通常是JSON。 我在网络响应中搜索了sqr ft的值之一,发现数据全部随页面加载在名为pageData的变量中。
如果您搜索页面的源代码,则会发现定义了一个JSON对象,其中已准备好构建页面的页面数据。
var pageData = {
filters: {
beds: [],
baths: 0,
priceRange: {
low: 0,
high: 9999
},
sqftRange: {
low: 0,
high: 9999
},
availableDate: "all",
amenities: []
},
hasImages: true,
amenities: {
am_0: "Built in USB Ports",
am_1: "Designer Carpeting and Two-Tone Paint",
am_2: "Dishwasher",
am_3: "Double Stainless Steel Sinks",
am_4: "Gas Range",
am_5: "Granite Countertops",
am_6: "Large Patio Or Balcony",
am_7: "Linen Closet",
am_8: "Platinum Silver Kitchen Appliances",
am_9: "Pre-Wired For Technology",
am_10: "Spacious Closets",
am_11: "Stackable Washer/Dryer",
am_12: "Wood Blinds"
},
floorplans: [
{
id: 2029996,
name: "1 Bed 1 Bath | 1B",
amenities: [],
sqft: 594,
beds: 1,
baths: 1.0,
lowPrice: 2392,
highPrice: 4208,
availableCount: 1,
availableDate: "10/8/2018",
special: false,
images: [
{
src: "/dmslivecafe/3/234323/1B.png?quality=85",
alt: "",
title: "1 Bed 1 Bath | 1B",
caption: ""
}
],