使用Python& amp;在csv中写入数据的问题美丽的汤

时间:2013-01-08 14:04:39

标签: csv python-2.7 beautifulsoup

下面提到的代码-1只给我一行,而代码-2给了我所有的行。代码1给出的1行是递归的最后一个元素(该行是代码-2输出中的最后一行)。

请仔细阅读代码1和代码2之间的区别,并帮助我找出引起此问题的问题。

CODE-1:

# -*- coding: cp1252 -*-
import csv
import urllib2
import sys
import urllib
import time
import mechanize
import cookielib
from bs4 import BeautifulSoup
from itertools import islice

cy_q = int(time.strftime("%m"))
if cy_q <= 3:
    q = 1
elif cy_q <=6:
    q = 2
elif cy_q <=9:
    q = 3
else:
    q = 4
month = int(time.strftime("%m"))
if month <= 6:
    fy = time.strftime("%Y")
else:
    fy = int(time.strftime("%Y")) +1
if month <=3:
    fy_q = 3
elif month <=6:
    fy_q = 4
elif month <= 9:
    fy_q = 1
else:
    fy_q = 2



urls = ['http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=0',
        'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=1',
        'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=2',
        'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=3',
        'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=4',
        'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=5',
        'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=6',
        'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=7',
        'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=8',
        'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=9',
        'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=10'
        ]
for url in urls:
        page= urllib2.urlopen(url).read()
        soup = BeautifulSoup(page)
        items = soup.findAll('h3', {"class": "title"})
        prices_int = soup.findAll('span', {"class": "price"})
        prices_dec = [None]*100
        j = 0
        i = 0

        for tag in soup.findAll('span', {'class': 'priceDecimalPart'}):
                try:
                    check = soup.findAll('span', {"class": "priceDecimalPart"})[j].parent['class']
                except KeyError:
                    prices_dec[i] = soup.findAll('span', {"class": "priceDecimalPart"})[j]
                    i = i + 1
                j = j + 1

        with open('sfr_oemtest.csv', 'wb') as csvfile:
            spamwriter = csv.writer(csvfile, delimiter=',')
            spamwriter.writerow(["Date","Month","FY","CY","FY Quarter","CY Quarter","Day of Week","Geography","MO","OEM","Device Name","GDN",
                                "Refurbished (Y/N)","Color","Storage (GB)","Additional","Plan Name","Currency","Device Price","Plan Price",
                                "Plan Data","Plan Minutes"])        
            for item, price_int, price_dec in zip(items,prices_int,prices_dec):
                textcontent = u' '.join(item.stripped_strings)
                name_1 =  unicode(textcontent).encode('utf8').replace("é","").replace("RECONDITIONNE","Refurbished").replace("reconditionn","Refurbished").replace("Tablette","Tablet").replace("Noir et Blanc","Black and White").replace("Remis à neuf","Refurbished").replace("Remis à Neuf","Refurbished").replace("Reconditionn","Refurbished").replace("Go","GB").replace("Bleu Nuit","Midnight Blue").replace("Noir","Black").replace("Blanc","White").replace("Bleu","Blue").replace("Rose","Pink").replace("Rouge","Red").replace("Gris","Grey").strip()
                oem = list(name_1)
                pos = oem.index(" ")
                if name_1.find('Refurbished') == -1:
                    name = name_1
                    refur = "N"
                else:
                    name = name_1.replace("Refurbished","")
                    refur = "Y"
                if name_1:
                    spamwriter.writerow([time.strftime("%Y-%m-%d"),time.strftime("%B"),fy,time.strftime("%Y"),fy_q,q,
                                         time.strftime("%A") , "France", "SFR",name[0:pos],name,"",refur,"","","","24 Months",
                                         "€" ,unicode(price_int.string).encode('utf8').strip().replace("€","").replace(",",".")+
                                         unicode(price_dec.string).encode('utf8').strip().replace("€","").replace(",","."),"","",""])

CODE-2:

urls = ['http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=0',
        'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=1',
        'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=2',
        'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=3',
        'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=4',
        'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=5',
        'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=6',
        'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=7',
        'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=8',
        'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=9',
        'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=10'
        ]
for url in urls:
        page= urllib2.urlopen(url).read()
        soup = BeautifulSoup(page)
        items = soup.findAll('h3', {"class": "title"})
        prices_int = soup.findAll('span', {"class": "price"})
        prices_dec = [None]*100
        j = 0
        i = 0

        for tag in soup.findAll('span', {'class': 'priceDecimalPart'}):
                try:
                    check = soup.findAll('span', {"class": "priceDecimalPart"})[j].parent['class']
                except KeyError:
                    prices_dec[i] = soup.findAll('span', {"class": "priceDecimalPart"})[j]
                    i = i + 1
                j = j + 1

        with open('Pricing_Updated.csv', 'ab') as csvfile:
            spamwriter = csv.writer(csvfile, delimiter=',')
          #  spamwriter.writerow(["Date","Month","Day of Week","Geography","Mobile Operator","Device Name","Price","Monthly Price","Plan"])
          #  spamwriter.writerow(["Date","Month","FY","CY","FY Quarter","CY Quarter","Day of Week","Geography","MO","OEM","Device Name","GDN",
          #                      "Refurbished (Y/N)","Color","Storage (GB)","Additional","Plan Name","Currency","Device Price","Plan Price",
          #                      "Plan Data","Plan Minutes"])        
            for item, price_int, price_dec in zip(items,prices_int,prices_dec):
                textcontent = u' '.join(item.stripped_strings)
                if textcontent:
                    spamwriter.writerow([time.strftime("%Y-%m-%d"),
                                         time.strftime("%B"),fy,time.strftime("%Y"),fy_q,q,
                                         time.strftime("%A") , "France", "SFR","",
                                         unicode(textcontent).encode('utf8')
                                         .replace("é","")
                                         .replace("RECONDITIONNE","Refurbished")
                                         .replace("reconditionn","Refurbished")
                                         .replace("Tablette","Tablet")
                                         .replace("Noir et Blanc","Black and White")
                                         .replace("Remis à neuf","Refurbished")
                                         .replace("Remis à Neuf","Refurbished")
                                         .replace("Reconditionn","Refurbished")
                                         .replace("Go","GB")
                                         .replace("Bleu Nuit","Midnight Blue")
                                         .replace("Noir","Black")
                                         .replace("Blanc","White")
                                         .replace("Bleu","Blue")
                                         .replace("Rose","Pink")
                                         .replace("Rouge","Red")
                                         .replace("Gris","Grey"),"","","","","","24 Months",
                                         "€" ,unicode(price_int.string).encode('utf8').strip().replace("€","").replace(",",".")+
                                         unicode(price_dec.string).encode('utf8').strip().replace("€","").replace(",","."),"","",""])

1 个答案:

答案 0 :(得分:1)

区别在于open('Pricing_Updated.csv', 'ab')open('sfr_oemtest.csv', 'wb'),具体而言,abwb

a指的是追加,而w指的是。在第一个示例中,您将覆盖每个循环中的行,这就是您只查找最后一个预期条目的原因。在第二个中,您将附加到现有数据。