从json文件创建一个sql查询

时间:2018-03-05 13:36:03

标签: python sql json create-table

我有一个包含以下信息的JSON文件:

{
  "type" : "record",
  "name" : "warranty",
  "doc" : "Schema generated by Kite",
  "fields" : [ {
    "name" : "id",
    "type" : "long",
    "doc" : "Type inferred from '1'"
  }, {
    "name" : "train_id",
    "type" : "long",
    "doc" : "Type inferred from '21691'"
  }, {
    "name" : "siemens_nr",
    "type" : "string",
    "doc" : "Type inferred from 'Loco-001'"
  }, {
    "name" : "uic_nr",
    "type" : "long",
    "doc" : "Type inferred from '193901'"
  }, {
    "name" : "Configuration",
    "type" : "string",
    "doc" : "Type inferred from 'ZP28'"
  }, {
    "name" : "Warranty_Status",
    "type" : "string",
    "doc" : "Type inferred from 'Out_of_Warranty'"
  }, {
    "name" : "Warranty_Data_Type",
    "type" : "string",
    "doc" : "Type inferred from 'Real_based_on_preliminary_acceptance_date'"
  }, {
    "name" : "of_progression",
    "type" : "long",
    "doc" : "Type inferred from '100'"
  }, {
    "name" : "Delivery_Date",
    "type" : "string",
    "doc" : "Type inferred from '18/12/2009'"
  }, {
    "name" : "Warranty_on_Delivery_Date",
    "type" : "string",
    "doc" : "Type inferred from '18/12/2013'"
  }, {
    "name" : "Customer_Status",
    "type" : "string",
    "doc" : "Type inferred from 'homologation'"
  }, {
    "name" : "Commissioning_Date",
    "type" : "string",
    "doc" : "Type inferred from '6/10/2010'"
  }, {
    "name" : "Preliminary_acceptance_date",
    "type" : "string",
    "doc" : "Type inferred from '6/01/2011'"
  }, {
    "name" : "Warranty_Start_Date",
    "type" : "string",
    "doc" : "Type inferred from '6/01/2011'"
  }, {
    "name" : "Warranty_End_Date",
    "type" : "string",
    "doc" : "Type inferred from '6/01/2013'"
  }, {
    "name" : "Effective_End_Warranty_Date",
    "type" : [ "null", "string" ],
    "doc" : "Type inferred from 'null'",
    "default" : null
  }, {
    "name" : "Level_2_in_function",
    "type" : "string",
    "doc" : "Type inferred from '17/07/2015'"
  }, {
    "name" : "Baseline",
    "type" : "string",
    "doc" : "Type inferred from '2.10.23.4'"
  }, {
    "name" : "RELN_revision",
    "type" : "string",
    "doc" : "Type inferred from '0434-26.3'"
  }, {
    "name" : "TC_report",
    "type" : "string",
    "doc" : "Type inferred from 'A480140'"
  }, {
    "name" : "Last_version_Date",
    "type" : "string",
    "doc" : "Type inferred from 'A-23/09/2015'"
  }, {
    "name" : "ETCS_ID_NID_Engine",
    "type" : [ "null", "long" ],
    "doc" : "Type inferred from '13001'",
    "default" : null
  }, {
    "name" : "Item_Type",
    "type" : "string",
    "doc" : "Type inferred from 'Item'"
  }, {
    "name" : "Path",
    "type" : "string",
    "doc" : "Type inferred from 'sites/TrWMTISnerc_Community/Lists/X4Trains'"
  } ]
}

我想创建一个SQL查询,允许您在fields实体中创建包含现有信息的表。为此,我创建了这个python代码:

import json
from pprint import pprint
import string

with open('/data/my-data/archive/in/test_warranty_data.csv.txt.avro.txt', 'r') as f:   
    data = json.load(f)
    fields = data["fields"]
    sentence="create table "+data["name"]+" if not exist("
    for field in fields:
      if isinstance(field["type"],list):
        sentence += str(field["name"])+" "+ str(field["type"][-1])+" ,"
      else:
        sentence += str(field["name"])+" "+ str(field["type"])+" ,"
    sentence=sentence[0:-1]+")"
    sentence = string.replace(sentence, 'long', 'float')
    sentence = string.replace(sentence, 'string', 'varchar(255)')
    print sentence

它会返回此结果:

create table warranty if not exist(id float ,train_id float ,siemens_nr varchar(255) ,uic_nr float ,Configuration varchar(255) ,Warranty_Status varchar(255) ,Warranty_Data_Type varchar(255) ,of_progression float ,Delivery_Date varchar(255) ,Warranty_on_Delivery_Date varchar(255) ,Customer_Status varchar(255) ,Commissioning_Date varchar(255) ,Preliminary_acceptance_date varchar(255) ,Warranty_Start_Date varchar(255) ,Warranty_End_Date varchar(255) ,Effective_End_Warranty_Date varchar(255) ,Level_2_in_function varchar(255) ,Baseline varchar(255) ,RELN_revision varchar(255) ,TC_report varchar(255) ,Last_version_Date varchar(255) ,ETCS_ID_NID_Engine float ,Item_Type varchar(255) ,Path varchar(255) )

代码运行良好,但我想改进它并使其更美观。你有什么建议吗?谢谢

1 个答案:

答案 0 :(得分:0)

您可以替换此部分:

fields = data["fields"]
sentence="create table "+data["name"]+" if not exist("
for field in fields:
  if isinstance(field["type"],list):
    sentence += str(field["name"])+" "+ str(field["type"][-1])+" ,"
  else:
    sentence += str(field["name"])+" "+ str(field["type"])+" ,"
sentence=sentence[0:-1]+")"

有点像python的东西:

sentence="create table {name} if not exist({fields})".format(
    name = data["name"],
    fields = ', '.join(i["name"]+" "+i["type"]
                       if not isinstance(i["type"],list)
                       else i["name"]+" "+i["type"][-1]
                       for i in data["fields"])
)