我正在尝试从微博获取用户状态,但我一直有这个错误。
import re
import string
import sys
import os
import urllib
import urllib2
from bs4 import BeautifulSoup
import requests
from lxml import etree
reload(sys)
sys.setdefaultencoding('utf-8')
if(len(sys.argv)>=2):
user_id = (int)(sys.argv[1])
else:
user_id = (int)(raw_input("input user_id: "))
cookie = {"Cookie": "******my cookies"}
url = 'http://weibo.cn/u/%d?filter=1&page=1'%user_id
html = requests.get(url, cookies = cookie).content
selector = etree.HTML(html)
pageNum = (int)(selector.xpath('//input[@name="mp"]')[0].attrib['value'])
result = ""
urllist_set = set()
word_count = 1
image_count = 1
print 'spider is ready...'
for page in range(1,pageNum+1):
url = 'http://weibo.cn/u/%d?filter=1&page=%d'%(user_id,page)
lxml = requests.get(url, cookies = cookie).content
selector = etree.HTML(lxml)
content = selector.xpath('//span[@class="ctt"]')
for each in content:
text = each.xpath('string(.)')
if word_count>=4:
text = "%d :"%(word_count-3) +text+"\n\n"
else :
text = text+"\n\n"
result = result + text
word_count += 1
fo = open("/Users/apple/Desktop/%s"%user_id, "wb")
fo.write(result)
word_path=os.getcwd()+'/%d'%user_id
print 'done'
错误:
File "weibo_spider.py", line 25, in <module>
pageNum = (int)(selector.xpath('//input[@name="mp"]')[0].attrib['value'])
IndexError: list index out of range
答案 0 :(得分:4)
您假设using System.Data.Entity;
总能找到一些东西,但大多数情况并非如此。因此建立防御性编程的习惯。见Defensive Programming
尝试替换
SampleDbEntities db = new SampleDbEntities();
private void Form1_Load(object sender, EventArgs e)
{
db.Configuration.ProxyCreationEnabled = false;
db.Products.Load();
this.productsBindingSource.DataSource = db.Products.Local.ToBindingList();
}
private void FilterButton_Click(object sender, EventArgs e)
{
if (string.IsNullOrEmpty(this.FilterTextBox.Text))
{
this.productsBindingSource.DataSource = db.Products.Local.ToBindingList();
}
else
{
var filteredData = db.Products.Local.ToBindingList()
.Where(x => x.Name.Contains(this.FilterTextBox.Text));
this.productsBindingSource.DataSource = filteredData.Count() > 0 ?
filteredData : filteredData.ToArray();
}
}
private void productBindingNavigatorSaveItem_Click(object sender, EventArgs e)
{
this.Validate();
productsBindingSource.EndEdit();
db.SaveChanges();
}
private void bindingNavigatorDeleteItem_Click(object sender, EventArgs e)
{
if (productsBindingSource.Current != null)
{
var current = (Product)this.productsBindingSource.Current;
this.productsBindingSource.RemoveCurrent();
if (!string.IsNullOrEmpty(this.FilterTextBox.Text))
db.Products.Local.Remove(current);
}
}
使用:
selector.path