我有大约200个值,我已经完成了删除新行字符的所有内容' \ n' 尝试了所有这些
prodname = str(row['productname'])
prodname = ''.join(prodname.split('[u\'\n', 1))
prodname = ''.join(prodname.split('[u\'', 1))
prodname = ''.join(prodname.split('\']', 1))
prodname = ''.join(prodname.split('\n', 1))
prodname = prodname.replace('\n', '')
我的价值观仍然像这样......任何想法为什么???
更新
这是蜘蛛代码
class ShopBot(Spider):
name = "shopbot"
allowed_domains = ["shopbot.com.au"]
start_urls = [
"http://www.shopbot.com.au/apple-iphone-unlocked/price/australia/13721",
"http://www.shopbot.com.au/m/?m=apple-iphone-unlocked&page=2"
"http://www.shopbot.com.au/m/?m=apple-iphone-unlocked&page=3",
"http://www.shopbot.com.au/laptops/laptop,-tablet-pda/australia/115",
"http://www.shopbot.com.au/laptops/laptop,-tablet-pda/australia/115/order/0/page/2",
"http://www.shopbot.com.au/laptops/laptop,-tablet-pda/australia/115/order/0/page/3",
"http://www.shopbot.com.au/laptops/laptop,-tablet-pda/australia/115/order/0/page/4",
"http://www.shopbot.com.au/laptops/laptop,-tablet-pda/australia/115/order/0/page/5"
]
def parse(self, response):
sel = Selector(response)
prodsel = sel.xpath('//div[@id="content"]/div[@class="left"]/ul[@id="results"]/li[@class="result"]/ol')
#productnames = sel.xpath('//ul[@class="navigation"]/li/dl/dt[@class="title"]/a/text()').extract()
#price = sel.xpath('//ul[@class="navigation"]/li/dl/dd[@class="price"]/span[@class="main"]/text()').extract()
items = []
for site in prodsel:
item = ArgosItems()
item['productname'] = site.xpath('li[@class="details"]/span[@rel="external"]/text()').extract()
item['price'] = site.xpath('li[@class="action"]/div[@class="price"]/span/text()').extract()
item['siteid'] = 2
items.append(item)
return items
这就是im处理它以将其保存在db
中的地方def spider_closed(spider):
config = {
'user': 'root',
'password': 'root',
'host': '127.0.0.1',
'database': 'pricewatch',
'raise_on_warnings': True,
'buffered': True,
}
cnx = mysql.connector.connect(**config)
cursor = cnx.cursor()
today = datetime.now()
print today
count = 0
for row in results:
prodname = str(row['productname'])
prodname = ''.join(prodname.split('[u\'\n', 1))
prodname = ''.join(prodname.split('[u\'', 1))
prodname = ''.join(prodname.split('\']', 1))
prodname = ''.join(prodname.split('\n', 1))
prodname = prodname.replace('\n', '')
price = str(row['price'])
price = ''.join(price.split('$', 1))
price = ''.join(price.split('[u\'', 1))
price = ''.join(price.split('\']', 1))
price = ''.join(price.split('\n', 1))
price = price.replace('\n','')
siteid = row['siteid']
注意:我从python脚本运行scrapy,名为pricewatch.py