我是一个相当新手寻求帮助的一些编码,似乎我比我在这里咀嚼更多。 我应该从两个网站上抓取数据,收集两个游戏产品的价格,比较价格并将其存储在sqlite3数据库中,我可以以任何方式搜索,编辑或修改它。 目前我可以毫无问题地刮掉,但是将数据插入数据库是个问题。任何帮助将受到高度赞赏。
# import libraries
from bs4 import BeautifulSoup
import sqlite3
import sys
import urllib2
import datetime
import os
import requests
#Global variables
fishpond_price = []
fishpond_title = []
gameheader_price = []
gameheader_title = []
now = datetime.datetime.now()
time = now.strftime("%H:%M")
date = now.strftime("%d-%m-%Y")
# combinelist=games1+games2
# for items in combinelist:
# c.execute("INSERT INTO user(name,store) VALUES(?,?);",[items[0],items[2]])
# c.execute("INSERT INTO user(name,store) VALUES(?,?);",[items[0],items[2]])
#Parsing Gameheader
def gameheader(games):
print "Gameheader Prices_____________________"
# scraping game data from gamehead
for b in soup2.findAll('div', attrs={'class': 'price_area'}):
#print b
#c = b.find('div',{'class':'current_price_text'})
#print c
try:
c = b.find('div', {'class':'current_price_text'})
price2 = c.text
print price2
except:
continue
#Parsing Fishpond
def fishpond(games):
conn = sqlite3.connect('callofduty.db')
print ("Updating prices from Fishpond")
#Parse
counter = 1
for a in all_games.findAll('tr'):
try:
name = a.findAll('img', attrs={'class': 'photo'})
print name[0]['title']
fishpond_title.append(title)
#Price
price = a.findAll('span', attrs={'class': 'productSpecialPrice'})
print price[0].text
fishpond_price.append(price)
#import pdb; pdb.set_trace() # breakpoint 6384638a //
#url
site = a.findAll('head', attrs = {'class': 'base href'})
print site[0].text
games.append({'name': name[0]['title'], 'price': price[0].text, 'shop': 'Fishpond', 'site': site[0].text})
except:
continue
## NAME ## img and return the attr title
## PRICE ### use searach for div attr class_ = "productOriginalPrice"
#INSERT INTO DATABASE USING FUNCTIONS
for key in fishpond_price:
conn.execute("INSERT INTO gameProduct (title, site, date, time) VALUES (?, ?, ?, ?) ", (key, "fishpond", date, time));
for key in fishpond_title:
conn.execute("INSERT INTO gamePrice (prod_id, price, date, time) VALUES (?, ?, ?, ?) ", (key, "fishpond", date, time));
print games
print ("Update Complete, {} Prices have been entered").format(counter)
return games
#Creating database
conn = sqlite3.connect('callofduty.db') #<- initial database creation/connection
conn.execute('''CREATE TABLE IF NOT EXISTS gameProduct(prodCode INTEGER primarykey,prodName Text,prodSite Text);''')
conn.execute('''CREATE TABLE IF NOT EXISTS gamePrice(itemNum INTEGER,prodDate NUMERIC, prodTime NUMERIC,prodPrice NUMERIC);''')
print 'Detected first run, creating database'
#Scraping Data for Fishpond
# Games Website Url
# Fishpond url
fishpondURL = 'http://www.fishpond.com.au/q/call+of+duty?rid=1744546722'
# query the fishpond website and return the html to the variable 'page'
page = urllib2.urlopen(fishpondURL)
# parse the fishpond html using beautiful soap and store in variable `soup`
soup = BeautifulSoup(page, 'html.parser')
# # Take out the <div> of Fishpond name and get its value
name_box = soup.find('h1', attrs={'class': 'productSearch-price-container'})
#print all_games
prices = []
for link in soup.find_all("span", class_="productSpecialPrice"):
prices.append(link.get_text())
#print prices
gamename = []
for link in soup.find_all("a", class_="blue_link fn url"):
gamename.append(link.get_text())
#print gamename
print gamename[1], prices [1]
#Save into database
fishpondgames = zip(gamename, prices)
for fish in fishpondgames:
print " : ".join(fish)
# #Site 2 Gameshead
all_games = soup.find_all("span", class_="category-products")
# Gamehead Url
gameURL = 'https://www.gamesmen.com.au/catalogsearch/result/?cat=&q=call+of+duty&dir=desc&order=relevance'
#query the gamesmen website and return the html to the variable 'page'
page2 = urllib2.urlopen(gameURL)
#parse the gameheader html using beautiful soup and store in variable 'soup'
soup2 = BeautifulSoup(page2, 'html.parser')
all_games2 = soup.find_all("a", class_="category-products")
print all_games2
for link in soup.find_all("span", class_="price-box"):
prices.append(link.get_text())
#print gamename
print gamename[1], prices [1]
#Save into database
gamesmen = zip(gamename, prices)
for game in gamesmen:
print " : ".join(gamesmen)
conn.close()