我目前正在尝试从API解析JSON文件。目前,我可以读取响应中的每个项目并将数据解析为变量,我用它来创建用于导入表格的SQL语句。以下是一些示例代码:
{
"data": [
{
"id": 64731,
"label": "Label Text goes here",
"locations": [
{
"id": "55925",
"label": "San Miguel (La Dorada)",
"self": "http://url.com/api/locations/55925"
}
],
"other_location": "Other location text goes here",
"subject": "Subject Text goes here",
"url": "http://www.url.com"
},
]
}
我的python脚本可以读取json中的每个条目,并将每个条目作为行导入到具有id,label,location,other_location,subject和url作为字段的表中。
但是,对于某些条目,我有多个位置,所以我想要做的基本上是为每个位置重复相同的条目,只更改位置信息。所以这就是:
{
"data": [
{
"id": 64731,
"label": "Label Text goes here",
"locations": [
{
"id": "55925",
"label": "San Miguel (La Dorada)",
"self": "http://url.com/api/locations/55925"
},
{
"id": "55926",
"label": "Istanbul",
"self": "http://url.com/api/locations/55926"
}
],
"other_location": "Other location text goes here",
"subject": "Subject Text goes here",
"url": "http://www.url.com"
},
]
}
实际上我的表中有两行,每行都有相同的数据,但位置不同。我应该如何修改我的工作脚本以分割多个条目中的条目? (注意一些变量来自位置字段中的嵌套jsons,但它不应该对我想要做的事情产生影响)
def insert_into_table(sql_query):
try:
print cl.sql(sql_query)
except Exception as e:
print ("some error ocurred", e)
def main():
# define a variable to hold the source URL
urlData = "http://www.url.com/api"
# Open the URL and read the data
webUrl = urllib2.urlopen(urlData)
if (webUrl.getcode() == 200):
data = webUrl.read()
# Use the json module to load the string data into a dictionary
api_url = json.loads(data)
for i in api_url["data"]:
id = i["id"]
label = i["label"]
#variablea for nested locations JSON
location_api = i["locations"][0]["self"]
location_id = i["locations"][0]["id"]
location_label = i["locations"][0]["label"]
#checks connection and loads the json
openlocations = urllib2.urlopen(location_api)
if (openlocations.getcode() == 200):
location_data = openlocations.read()
load_locations = json.loads(location_data)
#defining the variable to be inserted into table from the nested JSON
geoid = load_locations["data"][0]["id"]
geo_pcode = load_locations["data"][0]["pcode"]
geo_iso_code = load_locations["data"][0]["iso3"]
geo_admin_level = load_locations["data"][0]["admin_level"]
lat = load_locations["data"][0]["geolocation"]["lat"]
long = load_locations["data"][0]["geolocation"]["lon"]
#dive into the nested locations to find admin level 0 which is the country name
country = ""
if geo_admin_level == "0":
country = location_label
#redeclares the location as null if the location is also the country
location_label = "null"
# finds, opens and loads the nested location url if necessary
elif geo_admin_level == "1":
geo_parent_url = load_locations["data"][0]["parent"][0]["self"]
open_geoparent = urllib2.urlopen(geo_parent_url)
if (open_geoparent.getcode() == 200):
geoparent_data = open_geoparent.read()
load_geoparent_data = json.loads(geoparent_data)
parent_geo_admin_level = load_geoparent_data["data"][0]["admin_level"]
if parent_geo_admin_level == "0":
country = load_geoparent_data["data"][0]["label"]
# finds, opens and loads the nested location url if necessary
elif parent_geo_admin_level == "1":
geo_grandparent_url = load_geoparent_data["data"][0]["parent"][0]["self"]
open_geograndparent = urllib2.urlopen(geo_grandparent_url)
if (open_geograndparent.getcode() == 200):
geo_grandparent_data = open_geograndparent.read()
load_geograndparent_data = json.loads(geo_grandparent_data)
grandparent_geo_admin_level = load_geograndparent_data["data"][0]["admin_level"]
if grandparent_geo_admin_level == "0":
country = load_geograndparent_data["data"][0]["label"]
# finds, opens and loads the nested location url if necessary
elif grandparent_geo_admin_level == "1":
geo_greatgrandparent_url = load_geograndparent_data["data"][0]["parent"][0]["self"]
open_geogreatgrandparent = urllib2.urlopen(geo_greatgrandparent_url)
if (open_geogreatgrandparent.getcode() == 200):
geo_greatgrandparent_data = open_geogreatgrandparent.read()
load_geogreatgrandparent_data = json.loads(geo_greatgrandparent_data)
greatgrandparent_geo_admin_level = load_geogreatgrandparent_data["data"][0]["admin_level"]
if greatgrandparent_geo_admin_level == "0":
country = load_geogreatgrandparent_data["data"][0]["label"]
else:
#leaving as null for testing purposes if dive into 5th level
country = "null"
else:
print "GreatGrandparent location url does not exist or cannot be opened. Code: " + str(open_geogreatgrandparent.getcode())
else:
print "Grandparent location url does not exist or cannot be opened. Code: " + str(open_geograndparent.getcode())
else:
print "Parent location url does not exist or cannot be opened. Code: " + str(open_geoparent.getcode())
else:
print "Primary Admin Level was not level 0 or 1"
#prints error mesage if connection fails
else:
print "Cannot open locations url. Code: " + str(openlocations.getcode())
other_location = i["other_location"]
subject = i["subject"]
assessment_url = i["url"]
try:
sql_query = "INSERT INTO table_name (lat_lon, id, label, location_id, location_label, country, geoid, geo_pcode, geo_iso_code, geo_admin_level, other_location, subject, assessment_url) VALUES ("
sql_query = sql_query + "'SRID=4326; POINT (%f %f)', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s'" % (float(str(long)), float(str(lat)), id, label, id, label, location_id, location_label, country, geoid, geo_pcode, geo_iso_code, geo_admin_level, other_location, subject, assessment_url)
sql_query = sql_query + ")"
print str(sql_query)
except ValueError,e:
print ("some error ocurred", e)
#This is where you call insert_into_table()
insert_into_table(sql_query)
任何帮助将不胜感激。
答案 0 :(得分:0)
您必须遍历i["locations"]
列表。
改变这个:
#variables for nested locations JSON
location_api = i["locations"][0]["self"]
location_id = i["locations"][0]["id"]
location_label = i["locations"][0]["label"]
以下内容:
#variables for nested locations JSON
for loc in i["locations"]:
location_api = loc["self"]
location_id = loc["id"]
location_label = loc["label"]
<...rest of your code...>