下面提到的代码-1只给我一行,而代码-2给了我所有的行。代码1给出的1行是递归的最后一个元素(该行是代码-2输出中的最后一行)。
请仔细阅读代码1和代码2之间的区别,并帮助我找出引起此问题的问题。
CODE-1:
# -*- coding: cp1252 -*-
import csv
import urllib2
import sys
import urllib
import time
import mechanize
import cookielib
from bs4 import BeautifulSoup
from itertools import islice
cy_q = int(time.strftime("%m"))
if cy_q <= 3:
q = 1
elif cy_q <=6:
q = 2
elif cy_q <=9:
q = 3
else:
q = 4
month = int(time.strftime("%m"))
if month <= 6:
fy = time.strftime("%Y")
else:
fy = int(time.strftime("%Y")) +1
if month <=3:
fy_q = 3
elif month <=6:
fy_q = 4
elif month <= 9:
fy_q = 1
else:
fy_q = 2
urls = ['http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=0',
'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=1',
'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=2',
'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=3',
'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=4',
'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=5',
'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=6',
'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=7',
'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=8',
'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=9',
'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=10'
]
for url in urls:
page= urllib2.urlopen(url).read()
soup = BeautifulSoup(page)
items = soup.findAll('h3', {"class": "title"})
prices_int = soup.findAll('span', {"class": "price"})
prices_dec = [None]*100
j = 0
i = 0
for tag in soup.findAll('span', {'class': 'priceDecimalPart'}):
try:
check = soup.findAll('span', {"class": "priceDecimalPart"})[j].parent['class']
except KeyError:
prices_dec[i] = soup.findAll('span', {"class": "priceDecimalPart"})[j]
i = i + 1
j = j + 1
with open('sfr_oemtest.csv', 'wb') as csvfile:
spamwriter = csv.writer(csvfile, delimiter=',')
spamwriter.writerow(["Date","Month","FY","CY","FY Quarter","CY Quarter","Day of Week","Geography","MO","OEM","Device Name","GDN",
"Refurbished (Y/N)","Color","Storage (GB)","Additional","Plan Name","Currency","Device Price","Plan Price",
"Plan Data","Plan Minutes"])
for item, price_int, price_dec in zip(items,prices_int,prices_dec):
textcontent = u' '.join(item.stripped_strings)
name_1 = unicode(textcontent).encode('utf8').replace("é","").replace("RECONDITIONNE","Refurbished").replace("reconditionn","Refurbished").replace("Tablette","Tablet").replace("Noir et Blanc","Black and White").replace("Remis à neuf","Refurbished").replace("Remis à Neuf","Refurbished").replace("Reconditionn","Refurbished").replace("Go","GB").replace("Bleu Nuit","Midnight Blue").replace("Noir","Black").replace("Blanc","White").replace("Bleu","Blue").replace("Rose","Pink").replace("Rouge","Red").replace("Gris","Grey").strip()
oem = list(name_1)
pos = oem.index(" ")
if name_1.find('Refurbished') == -1:
name = name_1
refur = "N"
else:
name = name_1.replace("Refurbished","")
refur = "Y"
if name_1:
spamwriter.writerow([time.strftime("%Y-%m-%d"),time.strftime("%B"),fy,time.strftime("%Y"),fy_q,q,
time.strftime("%A") , "France", "SFR",name[0:pos],name,"",refur,"","","","24 Months",
"€" ,unicode(price_int.string).encode('utf8').strip().replace("€","").replace(",",".")+
unicode(price_dec.string).encode('utf8').strip().replace("€","").replace(",","."),"","",""])
CODE-2:
urls = ['http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=0',
'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=1',
'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=2',
'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=3',
'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=4',
'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=5',
'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=6',
'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=7',
'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=8',
'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=9',
'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=10'
]
for url in urls:
page= urllib2.urlopen(url).read()
soup = BeautifulSoup(page)
items = soup.findAll('h3', {"class": "title"})
prices_int = soup.findAll('span', {"class": "price"})
prices_dec = [None]*100
j = 0
i = 0
for tag in soup.findAll('span', {'class': 'priceDecimalPart'}):
try:
check = soup.findAll('span', {"class": "priceDecimalPart"})[j].parent['class']
except KeyError:
prices_dec[i] = soup.findAll('span', {"class": "priceDecimalPart"})[j]
i = i + 1
j = j + 1
with open('Pricing_Updated.csv', 'ab') as csvfile:
spamwriter = csv.writer(csvfile, delimiter=',')
# spamwriter.writerow(["Date","Month","Day of Week","Geography","Mobile Operator","Device Name","Price","Monthly Price","Plan"])
# spamwriter.writerow(["Date","Month","FY","CY","FY Quarter","CY Quarter","Day of Week","Geography","MO","OEM","Device Name","GDN",
# "Refurbished (Y/N)","Color","Storage (GB)","Additional","Plan Name","Currency","Device Price","Plan Price",
# "Plan Data","Plan Minutes"])
for item, price_int, price_dec in zip(items,prices_int,prices_dec):
textcontent = u' '.join(item.stripped_strings)
if textcontent:
spamwriter.writerow([time.strftime("%Y-%m-%d"),
time.strftime("%B"),fy,time.strftime("%Y"),fy_q,q,
time.strftime("%A") , "France", "SFR","",
unicode(textcontent).encode('utf8')
.replace("é","")
.replace("RECONDITIONNE","Refurbished")
.replace("reconditionn","Refurbished")
.replace("Tablette","Tablet")
.replace("Noir et Blanc","Black and White")
.replace("Remis à neuf","Refurbished")
.replace("Remis à Neuf","Refurbished")
.replace("Reconditionn","Refurbished")
.replace("Go","GB")
.replace("Bleu Nuit","Midnight Blue")
.replace("Noir","Black")
.replace("Blanc","White")
.replace("Bleu","Blue")
.replace("Rose","Pink")
.replace("Rouge","Red")
.replace("Gris","Grey"),"","","","","","24 Months",
"€" ,unicode(price_int.string).encode('utf8').strip().replace("€","").replace(",",".")+
unicode(price_dec.string).encode('utf8').strip().replace("€","").replace(",","."),"","",""])
答案 0 :(得分:1)
区别在于open('Pricing_Updated.csv', 'ab')
与open('sfr_oemtest.csv', 'wb')
,具体而言,ab
与wb
。
a
指的是追加,而w
指的是写。在第一个示例中,您将覆盖每个循环中的行,这就是您只查找最后一个预期条目的原因。在第二个中,您将附加到现有数据。