如何将大JSON文件导入MYSQL

时间:2018-06-19 15:34:01

标签: python mysql json python-3.x

我必须从给定的JSON文件(50 MB大小)读取数据并将其导入MYSQL。 我曾尝试使用Python并失败(出现错误)。我不知道如何以最有效的方式将JSON文件导入MYSQL。请帮帮我。

到目前为止,我所做的是:

import os
import json
import MySQLdb

# read JSON file which is in the next parent folder
file = 'vehicle.json'
json_data = open(file).read()
json_obj = json.loads(json_data)


#print(json_obj)

# do validation and checks before insert
def validate_string(val):
    if val is not None:
        if type(val) is int:
            # for x in val:
            #    print(x)
            return str(val).encode('utf-8')
        else:
            return val


# connect to MySQL
con = MySQLdb.connect(host="127.0.0.1", user="root", passwd="PASSWORD", db="vehicle", port=3306)
cursor = con.cursor()

cursor.execute("DROP TABLE IF EXISTS vehicle")
sql1 = """ 
CREATE TABLE vehicle (
    CarYear int(4) ,
    make varchar(15),
    model varchar(30),
    cylinders varchar(2),
    VClass varchar(30),
    drive varchar(30),
    trany  varchar(30),
    displ varchar(20),
    eng_dscr varchar(20),
    trans_dscr varchar(30),
    mpgData varchar(5),
    evMotor varchar(10),
    youSaveSpend varchar(20),
    fuleType varchar(20),
    fuleType1 varchar(30),
    barrelsA08 varchar(2),
    charge120 varchar(2),
    charge240 varchar(2),
    city08 varchar(4),
    city08U varchar(2),
    cityA08 varchar(2),
    cityA08U varchar(2),
    cityCD varchar(2),
    cityE varchar(2),
    cityUF varchar(2),
    co2 varchar(2),
    coA2 varchar(2),
    co2TailpipeAGpm varchar(2),
    co2TailpipeGpm varchar(20),
    comb08 varchar(3),
    comb08U varchar(2),
    combA08 varchar(2),
    combA08U varchar(2),
    combE varchar(2),
    combinedCD varchar(2),
    combinedUF varchar(2),
    engld varchar(8),
    feScore varchar(3),
    fuelCost08 varchar(5),
    fuelCostA08 varchar(2),
    ghgScore varchar(2),
    ghgScoreA varchar(2),
    highway08 varchar(4),
    highway08U varchar(2),
    highwayA08 varchar(2),
    highwayA08U varchar(2),
    highwayCD varchar(2),
    highwayE varchar(2),
    highwayUF varchar(2),
    hlv varchar(2),
    hpv varchar(2),
    id varchar(8),
    lv2 varchar(2),
    lv4 varchar(2),
    phevBlended varchar(10),
    pv2 varchar(5),
    pv4 varchar(5),
    CarRange varchar(5),
    rangeCity varchar(5),
    rangeCityA varchar(5),
    rangeHwy varchar(5),
    rangeHwyA varchar(5),
    UCity varchar(5),
    UCityA varchar(5),
    UHighway varchar(5),
    UHighwayA varchar(5),
    guzzler varchar(10),
    tCharger varchar(10),
    sCharger varchar(10),
    atvType varchar(10),
    fuelType2 varchar(10),
    rangeA varchar(10),
    mfrCode varchar(10),
    c240Dscr varchar(10),
    charge240b varchar(5),
    c240bDscr varchar(10),
    createdOn varchar(30),
    modifiedOn varchar(30),
    startStop varchar(10),
    phevCity varchar(5),
    phevHwy varchar(5),
    phevComb varchar(5)   
)"""

cursor.execute(sql1)

# parse json data to SQL insert
for i, item in enumerate(json_obj):
    CarYear = validate_string(item.get("year", None))
    make = validate_string(item.get("make", None))
    model = validate_string(item.get("model", None))
    cylinders = validate_string(item.get("cylinders", None))
    VClass = validate_string(item.get("VClass", None))
    drive = validate_string(item.get("drive", None))
    trany = validate_string(item.get("trany", None))
    displ = validate_string(item.get("displ", None))
    eng_dscr = validate_string(item.get("eng_dscr", None))
    trans_dscr = validate_string(item.get("trans_dscr", None))
    mpgData = validate_string(item.get("mpgData", None))
    evMotor = validate_string(item.get("evMotor", None))
    youSaveSpend = validate_string(item.get("youSaveSpend", None))
    fuleType = validate_string(item.get("fuleType", None))
    fuleType1 = validate_string(item.get("fuleType1", None))
    barrelsA08 = validate_string(item.get("barrelsA08", None))
    charge120 = validate_string(item.get("charge120", None))
    charge240 = validate_string(item.get("charge240", None))
    city08 = validate_string(item.get("city08", None))
    city08U = validate_string(item.get("city08U", None))
    cityA08 = validate_string(item.get("cityA08", None))
    cityA08U = validate_string(item.get("cityA08U", None))
    cityCD = validate_string(item.get("cityCD", None))
    cityE = validate_string(item.get("cityE", None))
    cityUF = validate_string(item.get("cityUF", None))
    co2 = validate_string(item.get("co2", None))
    coA2 = validate_string(item.get("coA2", None))
    co2TailpipeAGpm = validate_string(item.get("co2TailpipeAGpm", None))
    co2TailpipeGpm = validate_string(item.get("co2TailpipeGpm", None))
    comb08 = validate_string(item.get("comb08", None))
    comb08U = validate_string(item.get("comb08U", None))
    combA08 = validate_string(item.get("combA08", None))
    combA08U = validate_string(item.get("combA08U", None))
    combE = validate_string(item.get("combE", None))
    combinedCD = validate_string(item.get("combinedCD", None))
    combinedUF = validate_string(item.get("combinedUF", None))
    engld = validate_string(item.get("engld", None))
    feScore = validate_string(item.get("feScore", None))
    fuelCost08 = validate_string(item.get("fuelCost08", None))
    fuelCostA08 = validate_string(item.get("fuelCostA08", None))
    ghgScore = validate_string(item.get("ghgScore", None))
    ghgScoreA = validate_string(item.get("ghgScoreA", None))
    highway08 = validate_string(item.get("highway08", None))
    highway08U = validate_string(item.get("highway08U", None))
    highwayA08 = validate_string(item.get("highwayA08", None))
    highwayA08U = validate_string(item.get("highwayA08U", None))
    highwayCD = validate_string(item.get("highwayCD", None))
    highwayE = validate_string(item.get("highwayE", None))
    highwayUF = validate_string(item.get("highwayUF", None))
    hlv = validate_string(item.get("hlv", None))
    hpv = validate_string(item.get("hpv", None))
    lv2 = validate_string(item.get("lv2", None))
    lv4 = validate_string(item.get("lv4", None))
    phevBlended = validate_string(item.get("phevBlended", None))
    pv2 = validate_string(item.get("pv2", None))
    pv4 = validate_string(item.get("pv4", None))
    CarRange = validate_string(item.get("CarRange", None))
    rangeCity = validate_string(item.get("rangeCity", None))
    rangeCityA = validate_string(item.get("rangeCityA", None))
    rangeHwy = validate_string(item.get("rangeHwy", None))
    rangeHwyA = validate_string(item.get("rangeHwyA", None))
    UCity = validate_string(item.get("UCity", None))
    UCityA = validate_string(item.get("UCityA", None))
    UHighway = validate_string(item.get("UHighway", None))
    UHighwayA = validate_string(item.get("UHighwayA", None))
    guzzler = validate_string(item.get("guzzler", None))
    tCharger = validate_string(item.get("tCharger", None))
    sCharger = validate_string(item.get("sCharger", None))
    atvType = validate_string(item.get("atvType", None))
    fuelType2 = validate_string(item.get("fuelType2", None))
    rangeA = validate_string(item.get("rangeA", None))
    mfrCode = validate_string(item.get("mfrCode", None))
    c240Dscr = validate_string(item.get("c240Dscr", None))
    charge240b = validate_string(item.get("charge240b", None))
    c240bDscr = validate_string(item.get("c240bDscr", None))
    createdOn = validate_string(item.get("createdOn", None))
    modifiedOn = validate_string(item.get("modifiedOn", None))
    startStop = validate_string(item.get("startStop", None))
    phevCity = validate_string(item.get("phevCity", None))
    phevHwy = validate_string(item.get("phevHwy", None))
    phevComb = validate_string(item.get("phevComb", None))

    sql2 = """INSERT INTO vehicle (CarYear,make,model,cylinders,VClass,drive,trany,displ,eng_dscr,trans_dscr,mpgData,evMotor,
    youSaveSpend,fuleType,fuleType1,barrelsA08,charge120,charge240,city08,city08U,cityA08,cityA08U,cityCD,cityE,cityUF,co2,coA2,co2TailpipeAGpm,
    co2TailpipeGpm,comb08,comb08U,combA08,combA08U,combE,combinedCD,combinedUF,engld,feScore,fuelCost08,fuelCostA08,ghgScore,ghgScoreA,highway08,
    highway08U,highwayA08,highwayA08U,highwayCD,highwayE,highwayUF,hlv,hpv,id,lv2,lv4,phevBlended,pv2,pv4,CarRange,rangeCity,rangeCityA,rangeHwy,
    rangeHwyA,UCity,UCityA,UHighway,UHighwayA,guzzler,tCharger,sCharger,atvType,fuelType2,rangeA,mfrCode,c240Dscr,charge240b,c240bDscr,createdOn,
    modifiedOn,startStop,phevCity,phevHwy,phevComb)
    VALUES (%d,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s), (CarYear,make,model,cylinders,VClass,drive,trany,displ,eng_dscr,trans_dscr,mpgData,evMotor,
youSaveSpend,fuleType,fuleType1,barrelsA08,charge120,charge240,city08,city08U,cityA08,cityA08U,cityCD,cityE,cityUF,co2,coA2,co2TailpipeAGpm,
co2TailpipeGpm,comb08,comb08U,combA08,combA08U,combE,combinedCD,combinedUF,engld,feScore,fuelCost08,fuelCostA08,ghgScore,ghgScoreA,highway08,
highway08U,highwayA08,highwayA08U,highwayCD,highwayE,highwayUF,hlv,hpv,id,lv2,lv4,phevBlended,pv2,pv4,CarRange,rangeCity,rangeCityA,rangeHwy,
rangeHwyA,UCity,UCityA,UHighway,UHighwayA,guzzler,tCharger,sCharger,atvType,fuelType2,rangeA,mfrCode,c240Dscr,charge240b,c240bDscr,createdOn,
modifiedOn,startStop,phevCity,phevHwy,phevComb)"""

    cursor.execute(sql2)
    con.commit()
    # print(CarYear)

con.close()

我收到此错误:

  

回溯(最近通话最近):文件   “ C:/Users/risha/PycharmProjects/Test/JSON_TEST.py”,第209行,在          cursor.execute(sql2)文件“ C:\ Users \ risha \ AppData \ Local \ Programs \ Python \ Python36 \ lib \ site-packages \ MySQLdb \ cursors.py”,   第250行,执行中       self.errorhandler(自我,exc,值)文件“ C:\ Users \ risha \ AppData \ Local \ Programs \ Python \ Python36 \ lib \ site-packages \ MySQLdb \ connections.py”,   第50行,在defaultErrorhandler中       引发错误值文件“ C:\ Users \ risha \ AppData \ Local \ Programs \ Python \ Python36 \ lib \ site-packages \ MySQLdb \ cursors.py”,   第247行,执行       res = self._query(查询)文件“ C:\ Users \ risha \ AppData \ Local \ Programs \ Python \ Python36 \ lib \ site-packages \ MySQLdb \ cursors.py”,   _query中的第411行       rowcount = self._do_query(q)文件“ C:\ Users \ risha \ AppData \ Local \ Programs \ Python \ Python36 \ lib \ site-packages \ MySQLdb \ cursors.py”,   _do_query中的第374行       db.query(q)文件“ C:\ Users \ risha \ AppData \ Local \ Programs \ Python \ Python36 \ lib \ site-packages \ MySQLdb \ connections.py”,   第277行,在查询中       _mysql.connection.query(自己,查询)   _mysql_exceptions.ProgrammingError:(1064,“您的SQL语法有错误;请查看与您的MySQL服务器相对应的手册   在附近使用正确语法的版本   '%d,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,% s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s'   在第7行”)

     

以退出代码1完成的过程

您也可以建议其他任何方法。我也创建了相同的数据库,并且通过硬编码它工作正常,但是在这里显示了上述错误。

任何帮助都是有意义的。

0 个答案:

没有答案