我发现它无法运行allowed_domains
为什么,我找不到原因。
我没有使用short_critic_content(self,response)
,
如果我接受它,allowed_domains = ["movie.mtime.com"].
就不会运行。
start_urls = ['http://movie.mtime.com']
class YinPin(CrawlSpider):
name = "yingping"
#allowed_domains = ["movie.mtime.com"]
start_urls = ['http://movie.mtime.com']
rules = (
#Rule(LinkExtractor(allow=())),
Rule(LinkExtractor(allow=(r'http://movie.mtime.com/40677/'), ), callback='movie_info', follow=False),
)
def movie_info(self, response):
selector = Selector(response)
#for movieinfo in movie_info:
movie_name = selector.xpath('//*[@id="db_head"]/div[2]/div/div[1]/h1/text()').extract()
movie_url = response.url#movieinfo.xpath('//*[@id="db_head"]/div[2]/div/div[2]/a[3]/@href').extract()
number = re.compile(r'\d+')
movie_num = int(number.search(str(movie_url)).group())
movie_release_time = selector.xpath('//*[@id="db_head"]/div[2]/div/div[1]/p[1]/a/text()').extract()
movie_place = selector.xpath('//*[@id="db_head"]/div[2]/div/div[2]/text()').extract()[3]
movie_type = selector.xpath('//*[@id="db_head"]/div[2]/div/div[2]/a/text()').extract()
movie_type_l = movie_type.pop()
movie_type = ' '.join(movie_type)
short_content = selector.css('#tweetRegion > dd > div > h3::text').extract() # selector.xpath('//*[@id="tweetRegion"]').css('h3::text').extract()
short_url = str(selector.xpath('//*[@id="tweetBottomDiv"]/p[2]/a/@href').extract())
yield Request(short_url, callback=self.short_critic_content,
meta={ 'movie_num': movie_num,
'short_content': short_content})
item = YingpingItem(
movie_num = movie_num,
movie_name = movie_name,
movie_release_time = movie_release_time,
movie_place = movie_place,
movie_type = movie_type,
)
yield item
def short_critic_content(self, response):
selector = Selector(response)
movie_num = response.meta['movie_num']
short_contentft = response.meta['short_content']
short_contentsd = selector.css('#tweetRegion > dd > div > h3::text').extract()
short_contents = short_contentft +short_contentsd
item = shortcriticItem(
movie_num = movie_num,
movie_scritic = short_contents
)
yield item
错误或正确!
收到错误有什么问题:
Scrapy不支持的URL方案'':没有可用于该方案的处理程序
<script>
new Vue({
el: '#fad' ,
data: {
data: {},
},
mounted() {
var self = this;
navigator.geolocation.getCurrentPosition(success, error);
function success(position) {
var GEOCODING = 'https://maps.googleapis.com/maps/api/geocode/json?latlng=' + position.coords.latitude + '%2C' + position.coords.longitude + '&language=en';
$.getJSON(GEOCODING).done(function(location) {
$('#country').html(location.results[0].address_components[5].long_name);
$('#state').html(location.results[0].address_components[4].long_name);
$('#city').html(location.results[0].address_components[2].long_name);
$('#address').html(location.results[0].formatted_address);
$('#latitude').html(position.coords.latitude);
$('#longitude').html(position.coords.longitude);
})
var lat = position.coords.latitude;
$.ajax({
url: 'https://api/post//',
data: {
lat: position.coords.latitude,
lon: position.coords.longitude,
city:location.results[0].address_components[2].long_name,
},
type: "POST",
dataType: 'json',
success: function (e) {
if (e.status == 1) {
self.data = e.data;
console.log(e.data)
}
}
});
console.log(lat);
}
function error(err) {
console.log(err)
}
}
})
</script>
答案 0 :(得分:1)
几乎可以肯定问题出在SqlCommand
函数的这一行:
sql
movie_info
的{{1}}方法返回一个列表,然后将其转换为字符串。但是这不会给你URL,它给你一个列表的字符串表示,以short_url = str(selector.xpath('//*[@id="tweetBottomDiv"]/p[2]/a/@href').extract())
字符开头。这就是你得到这个错误的原因。
正确的方法是
extract()
甚至更好地使用Selector
代替"
short_url = selector.xpath('//*[@id="tweetBottomDiv"]/p[2]/a/@href').extract()[0]