我对python / json /&编码..试图学习如何获得以下格式的json到sql表。我使用python pandas,它将json节点转换为字典。请有人帮我理解如何做到这一点。
同样的json:
{
"Volumes": [
{
"AvailabilityZone": "us-east-1a",
"Attachments": [
{
"AttachTime": "2013-12-18T22:35:00.000Z",
"InstanceId": "i-1234567890abcdef0",
"VolumeId": "vol-049df61146c4d7901",
"State": "attached",
"DeleteOnTermination": true,
"Device": "/dev/sda1"
}
],
"Tags": [
{
"Value": "DBJanitor-Private",
"Key": "Name"
},
{
"Value": "DBJanitor",
"Key": "Owner"
},
{
"Value": "Database",
"Key": "Product"
},
{
"Value": "DB Janitor",
"Key": "Portfolio"
},
{
"Value": "DB Service",
"Key": "Service"
}
],
"VolumeType": "standard",
"VolumeId": "vol-049df61146c4d7901",
"State": "in-use",
"SnapshotId": "snap-1234567890abcdef0",
"CreateTime": "2013-12-18T22:35:00.084Z",
"Size": 8
},
{
"AvailabilityZone": "us-east-1a",
"Attachments": [],
"VolumeType": "io1",
"VolumeId": "vol-1234567890abcdef0",
"State": "available",
"Iops": 1000,
"SnapshotId": null,
"CreateTime": "2014-02-27T00:02:41.791Z",
"Size": 100
}
]
}
直到现在..这就是我在尝试...在python:
asg_list_json_Tags=asg_list_json["AutoScalingGroups"]
Tags=pandas.DataFrame(asg_list_json_Tags)
n = []
for i in Tags.columns:
n.append(i)
print n
engine = create_engine("mysql+mysqldb://user:"+'pwd'+"@mysqlserver/dbname")
Tags.to_sql(name='TableName', con=engine, if_exists='append', index=True)
任何帮助都是非常有用的..谢谢!
答案 0 :(得分:5)
我会这样做:
fn = r'D:\temp\.data\40450591.json'
with open(fn) as f:
data = json.load(f)
# some of your records seem NOT to have `Tags` key, hence `KeyError: 'Tags'`
# let's fix it
for r in data['Volumes']:
if 'Tags' not in r:
r['Tags'] = []
v = pd.DataFrame(data['Volumes']).drop(['Attachments', 'Tags'],1)
a = pd.io.json.json_normalize(data['Volumes'], 'Attachments', ['VolumeId'], meta_prefix='parent_')
t = pd.io.json.json_normalize(data['Volumes'], 'Tags', ['VolumeId'], meta_prefix='parent_')
v.to_sql('volume', engine)
a.to_sql('attachment', engine)
t.to_sql('tag', engine)
输出:
In [179]: v
Out[179]:
AvailabilityZone CreateTime Iops Size SnapshotId State VolumeType
VolumeId
vol-049df61146c4d7901 us-east-1a 2013-12-18T22:35:00.084Z NaN 8 snap-1234567890abcdef0 in-use standard
vol-1234567890abcdef0 us-east-1a 2014-02-27T00:02:41.791Z 1000.0 100 None available io1
In [180]: a
Out[180]:
AttachTime DeleteOnTermination Device InstanceId State VolumeId parent_VolumeId
0 2013-12-18T22:35:00.000Z True /dev/sda1 i-1234567890abcdef0 attached vol-049df61146c4d7901 vol-049df61146c4d7901
1 2013-12-18T22:35:11.000Z True /dev/sda1 i-1234567890abcdef1 attached vol-049df61146c4d7111 vol-049df61146c4d7901
In [217]: t
Out[217]:
Key Value parent_VolumeId
0 Name DBJanitor-Private vol-049df61146c4d7901
1 Owner DBJanitor vol-049df61146c4d7901
2 Product Database vol-049df61146c4d7901
3 Portfolio DB Janitor vol-049df61146c4d7901
4 Service DB Service vol-049df61146c4d7901
测试JSON文件:
{
"Volumes": [
{
"AvailabilityZone": "us-east-1a",
"Attachments": [
{
"AttachTime": "2013-12-18T22:35:00.000Z",
"InstanceId": "i-1234567890abcdef0",
"VolumeId": "vol-049df61146c4d7901",
"State": "attached",
"DeleteOnTermination": true,
"Device": "/dev/sda1"
},
{
"AttachTime": "2013-12-18T22:35:11.000Z",
"InstanceId": "i-1234567890abcdef1",
"VolumeId": "vol-049df61146c4d7111",
"State": "attached",
"DeleteOnTermination": true,
"Device": "/dev/sda1"
}
],
"Tags": [
{
"Value": "DBJanitor-Private",
"Key": "Name"
},
{
"Value": "DBJanitor",
"Key": "Owner"
},
{
"Value": "Database",
"Key": "Product"
},
{
"Value": "DB Janitor",
"Key": "Portfolio"
},
{
"Value": "DB Service",
"Key": "Service"
}
],
"VolumeType": "standard",
"VolumeId": "vol-049df61146c4d7901",
"State": "in-use",
"SnapshotId": "snap-1234567890abcdef0",
"CreateTime": "2013-12-18T22:35:00.084Z",
"Size": 8
},
{
"AvailabilityZone": "us-east-1a",
"Attachments": [],
"VolumeType": "io1",
"VolumeId": "vol-1234567890abcdef0",
"State": "available",
"Iops": 1000,
"SnapshotId": null,
"CreateTime": "2014-02-27T00:02:41.791Z",
"Size": 100
}
]
}
答案 1 :(得分:0)
此示例的模拟:https://github.com/zolekode/json-to-tables/blob/master/example.py
使用以下脚本:
status
HTML输出:
import json
from extent_table import ExtentTable
from table_maker import TableMaker
Volumes = [
{
"AvailabilityZone": "us-east-1a",
"Attachments": [
{
"AttachTime": "2013-12-18T22:35:00.000Z",
"InstanceId": "i-1234567890abcdef0",
"VolumeId": "vol-049df61146c4d7901",
"State": "attached",
"DeleteOnTermination": "true",
"Device": "/dev/sda1"
}
],
"Tags": [
{
"Value": "DBJanitor-Private",
"Key": "Name"
},
{
"Value": "DBJanitor",
"Key": "Owner"
},
{
"Value": "Database",
"Key": "Product"
},
{
"Value": "DB Janitor",
"Key": "Portfolio"
},
{
"Value": "DB Service",
"Key": "Service"
}
],
"VolumeType": "standard",
"VolumeId": "vol-049df61146c4d7901",
"State": "in-use",
"SnapshotId": "snap-1234567890abcdef0",
"CreateTime": "2013-12-18T22:35:00.084Z",
"Size": 8
},
{
"AvailabilityZone": "us-east-1a",
"Attachments": [],
"VolumeType": "io1",
"VolumeId": "vol-1234567890abcdef0",
"State": "available",
"Iops": 1000,
"SnapshotId": "null",
"CreateTime": "2014-02-27T00:02:41.791Z",
"Size": 100
}
]
volumes = json.dumps(Volumes)
volumes = json.loads(volumes)
extent_table = ExtentTable()
table_maker = TableMaker(extent_table)
table_maker.convert_json_objects_to_tables(volumes, "volumes")
table_maker.show_tables(8)
table_maker.save_tables("./", export_as="html") # you can also pass in export_as="sql" or "csv". In the case of sql, there is a parameter to pass the engine.