我从API中提取数据以将其插入MySQL表中。我发现有些行的列带有单引号(')值,并且会给插入代码带来问题。我想用转义值(\')替换它们,但由于某种原因,我的替换代码不起作用
import requests
import math
import pymysql
import string
from datetime import date, timedelta
#replace invalid characters with scapes values
print (type(calls[b][17]))
print (type(calls[b][18]))
print (calls[b][17])
print (calls[b][18])
if calls[b][17] is not None:
calls[b][17] = calls[b][17].replace("'","\'")
if calls[b][18] is not None:
calls[b][18] = calls[b][18].replace("'","\'")
print (calls[b][17])
print (calls[b][18])
结果是
班级类型
“Cecile Denier d'Aprigny”< + 33179976110>
薛
Denier d'Aprigny
Cecile Denier d'Aprigny
“Cecile Denier d'Aprigny”< + 33179976110>
薛
Denier d'Aprigny
Cecile Denier d'Aprigny
INSERT INTO thinking_phone_data VALUES('e912be49-bfd3-4454-8679-c53503eecfd2','+ 33179976110','+ 33170709949','2016-01-19 09:27:25.0','2016-01-19 09 :27:33.0','2016-01-19 09:31:19.0','234','226','已回答','0','0','“Cecile Denier d'Aprigny”< + 33179976110>','3 Mundi','3 Mundi','team3','cdenier @ 3mundi.com','Cecile','Denier d'Aprigny','Cecile Denier d'Aprigny','巴黎办公室', '无', '无', '无', '无', '无', '无', '无', '无')
我希望我的替换值为
“Cecile Denier d \'Aprigny”< + 33179976110>
薛
Denier d \'Aprigny
Cecile Denier d'Aprigny
答案 0 :(得分:0)
似乎我只需要解释清楚问题的问题。只需要添加所有正确的转义值
解决方案是
import scrapy.selector
import urlparse
from scrapy.spiders import Spider
from scrapy.http import Request
from MediaMarkt.items import MediamarktItem
models = []
for line in open("tvmodels.txt", "r"):
models.append(line.strip("\n\-"))
d = {}
for model in models:
d[model] = True
vendors = []
for line in open("vendors.txt", "r"):
vendors.append(line.strip("\n\-"))
e = {}
for vendor in vendors:
e[vendor] = True
def complete_url(string):
return "http://www.mediamarkt.de"+ string
def encode(str):
return str.encode('utf8', 'ignore')
class MshbeSpider(Spider):
name = "mshdetv"
start_urls = ['http://www.mediamarkt.de/mcs/productlist/_led-lcd-fernseher,48353,460668.html?langId=-3&searchParams=&sort=&view=&page=']
def parse(self, response):
items = response.xpath('//ul[@class="products-list"]/li/div')
for item in items:
mshtv = MediamarktItem()
mshtv['item_3_price'] = encode(item.xpath('normalize-space(.//aside/div/div/div/text())').extract()[0]).replace("-","")
mshtv['item_2_name'] = encode(item.xpath('normalize-space(.//div/h2/a/text())').extract()[0]).replace("-","").replace(" C","C").replace(" B","B").replace(" ","")
mshtv['item_a_link'] = item.select('.//div/h2/a/@href').extract()
mshtv['item_4_avai'] = encode(item.xpath('normalize-space(.//aside/div/div/ul/li//text())').extract()[0])
#mshtv['item_1_cat'] = encode(item.xpath('normalize-space(//*[@id="category"]/hgroup/h1/text())').extract()[0])
for word in mshtv['item_2_name'].split(" "):
if word in d:
mshtv['item_model'] = word
for word in mshtv['item_2_name'].split(" "):
if word in e:
mshtv['item_vendor'] = word
yield mshtv
new_link = response.xpath('//li[@class="pagination-next"]/a/@href').extract()[0]
yield Request(complete_url(new_link),callback=self.parse)