使用python将动态数据插入mysql

时间:2018-09-01 10:06:31

标签: python-3.x web-scraping beautifulsoup

编辑>>>>>

我写了一些代码,该代码返回两个输出,但出现错误。

我的代码的主要问题是什么?

from urllib.request import urlopen as uReq
from bs4 import BeautifulSoup as soup
import os
import sys
import codecs
from urllib.request import urlopen
import pymysql
import mysql.connector

for i in range(1): #electronic

    my_url = "https://www.xxxxx.com/mobile_phones/?facet_is_mpg_child=0&viewType=gridView&page="

    uClient = uReq(my_url + str(i))

    page_html = uClient.read()

    uClient.close()

    page_soup = soup(page_html, "html.parser")

    containers = page_soup.findAll("div" , {"class" : "sku -gallery" })

    for container in containers:

        name = container.img["alt"]

        title_container = container.findAll("span", {"class" : "brand"})

        Brand = title_container[0].text

        price = container.findAll("span",{"class" : "price"} )

        price_one = price[0].text.strip()

        price_old = container.findAll("span",{"class" : "price -old "})
        price_two = '0'
        if len(price_old) > 0:
            price_two = price_old[0].text.strip()

        rank = container.findAll("span",{"class" : "rating-aggregate"})
        ranking = 'N/A'
        if len(rank) > 0:
            ranking = rank[0].text.strip()

conn = pymysql.connect(host="localhost",user="root",passwd="",db="prod")
x = conn.cursor()
#name1 = name()
#brand1 = Brand()
#price_one1 = price_one1()
#price_two1= price_one1()
#rank1 = rank()

x.execute("INSERT INTO list (productname,brand,price1,price2,rank) VALUES (%s,%s,%s,%s.%s)" , (name,Brand,price_one,price_two,ranking))
conn.commit()
conn.close()
  

C:\ Users \ xxxx \ AppData \ Local \ Programs \ Python \ Python35 \ python.exe   C:/Users/xxxx/.PyCharm2018.2/config/scratches/bd.py追溯(大多数   最近通话结束):文件   “ C:/Users/xxxx/.PyCharm2018.2/config/scratches/bd.py”,第54行,在          x.execute(“ INSERT INTO list(productname,brand,price1,price2,rank)VALUES(%s,%s,%s,%s。%s)”,(name,Brand,price_one,price_two,ranking))
  文件   “ C:\ Users \ xxxx \ AppData \ Local \ Programs \ Python \ Python35 \ lib \ site-packages \ pymysql \ cursors.py”,   执行中的第170行       结果= self._query(查询)文件“ C:\ Users \ xxxx \ AppData \ Local \ Programs \ Python \ Python35 \ lib \ site-packages \ pymysql \ cursors.py”,   _query中的第328行       conn.query(q)文件“ C:\ Users \ xxxx \ AppData \ Local \ Programs \ Python \ Python35 \ lib \ site-packages \ pymysql \ connections.py”,   第516行,在查询中       self._affected_rows = self._read_query_result(unbuffered = unbuffered)文件   “ C:\ Users \ xxxx \ AppData \ Local \ Programs \ Python \ Python35 \ lib \ site-packages \ pymysql \ connections.py”,   _read_query_result中的第727行       result.read()文件“ C:\ Users \ xxxx \ AppData \ Local \ Programs \ Python \ Python35 \ lib \ site-packages \ pymysql \ connections.py”,   第1066行,处于读取状态       first_packet = self.connection._read_packet()文件“ C:\ Users \ xxxx \ AppData \ Local \ Programs \ Python \ Python35 \ lib \ site-packages \ pymysql \ connections.py”,   _read_packet中的第683行       packet.check_error()文件“ C:\ Users \ xxxx \ AppData \ Local \ Programs \ Python \ Python35 \ lib \ site-packages \ pymysql \ protocol.py”,   第220行,在check_error中       err.raise_mysql_exception(self._data)文件“ C:\ Users \ xxxx \ AppData \ Local \ Programs \ Python \ Python35 \ lib \ site-packages \ pymysql \ err.py”,   第109行,在raise_mysql_exception中       引发errorclass(errno,errval)pymysql.err.ProgrammingError:(1064,“您的SQL语法有错误;请查看   对应于您的MariaDB服务器版本,以使用正确的语法   在第1行的'.'2')'附近“)

     

以退出代码1完成的过程

2 个答案:

答案 0 :(得分:0)

问题出在变量rank上。您应该通过ranking,但由于某种原因错过了它。 根据您提供的代码

rank = container.findAll("span",{"class" : "rating-aggregate"}) # resultset
if len(rank) > 0:
    ranking = rank[0].text.strip() #result

因此更改为

x.execute("INSERT INTO list (productname,brand,price1,price2,rank) VALUES (%s,%s,%s,%s.%s)" , (name,Brand,price_one,price_two,ranking))

就可以开始了!我有一些建议给你。如果您使用if条件,请始终为条件条件语句中声明的变量提供else条件或默认值。否则,当条件失败时,您可能最终会出错。喜欢,

rank = container.findAll("span",{"class" : "rating-aggregate"})
ranking = rank[0].text.strip() if len(rank) > 0 else 'N/A'

或者,

rank = container.findAll("span",{"class" : "rating-aggregate"})
ranking = 'N/A'
if len(rank) > 0:
    ranking = rank[0].text.strip()

干杯!

答案 1 :(得分:0)

这段代码将信息存储在csv文件中,但是现在我需要将其保存到mysql中。

from urllib.request import urlopen as uReq
from bs4 import BeautifulSoup as soup
import os
import sys
import unicodecsv as csv
import codecs
from urllib.request import urlopen


for i in range(3): #electronic

    my_url = "https://www.xxxx.com/mobile_phones/?facet_is_mpg_child=0&viewType=gridView&page="

    uClient = uReq(my_url + str(i))

    page_html = uClient.read()

    uClient.close()

    page_soup = soup(page_html, "html.parser")

    containers = page_soup.findAll("div" , {"class" : "sku -gallery" })

    filename = "mobile.csv"
    f = codecs.open(filename, "a" , "utf-8-sig")
    headers = "name, Brand, price_one, price_two, ranking\n"
    f.write(headers)


    for container in containers:

        name = container.img["alt"]

        title_container = container.findAll("span", {"class" : "brand"})

        Brand = title_container[0].text

        price = container.findAll("span",{"class" : "price"} )

        price_one = price[0].text.strip()

        price_old = container.findAll("span",{"class" : "price -old "})
        price_two = 0
        if len(price_old) > 0:
            price_two = price_old[0].text.strip()

        rank = container.findAll("span",{"class" : "rating-aggregate"})
        if len(rank) > 0:
            ranking = rank[0].text.strip()

        print("name " + name)
        print("Brand "+ Brand)
        print("price_one " + price_one)
        print("price_two {}".format(price_two))  #----> 
        print("ranking " + ranking)

        f.write(name + "," + Brand.replace(",", "|") + "," + price_one.replace(",", "") + "," + price_two.replace(",", "") + "," + ranking + "\n")

f.close()