Python for循环重复

时间:2017-09-22 12:14:05

标签: python mysql

    def get_user_data(self,start_url):
        html = requests.get(url=start_url,headers=self.headers,cookies=self.cookies).content
        selector = etree.fromstring(html,etree.HTMLParser(encoding='utf-8'))
        contents = selector.xpath('//span[@class="ctt"]/text()')
        times = selector.xpath('//span[@class="ct"]/text()')
        data = {}
        for each_text in contents:

            data['content'] = each_text.encode().decode('utf-8').replace('\u200b','')

        for each_time in times:
            month_day, time, device = each_time.split(maxsplit=2)
            data['mobile_phone'] = device
            data['create_time'] = month_day + time
            data['crawl_time'] = datetime.strftime(datetime.now(),'%Y-%m-%d %H:%M:%S')
            self.mysql.insert(data)

我想将数据插入数据库,但是数据['content']字段会重复,我该如何修改呢?

1 个答案:

答案 0 :(得分:1)

您应该并行遍历contentstimes,而不是一个接一个地遍历。尝试使用zip

def get_user_data(self,start_url):
    html = requests.get(url=start_url,headers=self.headers,cookies=self.cookies).content
    selector = etree.fromstring(html,etree.HTMLParser(encoding='utf-8'))
    contents = selector.xpath('//span[@class="ctt"]/text()')
    times = selector.xpath('//span[@class="ct"]/text()')
    for each_text, each_time in zip(contents, times):
        data = {}
        data['content'] = each_text.encode().decode('utf-8').replace('\u200b','')
        month_day, time, device = each_time.split(maxsplit=2)
        data['mobile_phone'] = device
        data['create_time'] = month_day + time
        data['crawl_time'] = datetime.strftime(datetime.now(),'%Y-%m-%d %H:%M:%S')
        self.mysql.insert(data)