这里我们有一个带有嵌套内容的大型json文件。我们想将它转换为csv文件,以便它可以用它来进行数据建模,但是我觉得代码丢失的东西,我无法发现。我是python的新手,需要帮助。
以下是文件中的内容:
[{
"address": " -, Gulbarga-585102",
"college": "College (Architecture)",
"courses": [
{
"brief_details": "",
"college_name": "School of ArchitecturePoojya Doddappa Appa College of Engineering",
"course_branch": "B.Arch",
"course_duration": " 5-year",
"course_nature": " Full-Time",
"course_title": "",
"course_type": " B.Arch",
"no_of_seats": " 60",
"qualifications": "",
"selection_process": ""
}
],
"email": " principal@pdaengg.com",
"fax": "08472-255685",
"name": "School Of Architecturepoojya Doddappa Appa College Of Engineering",
"phone": "08472-224262 Extn. 435, 220742",
"recognition": " V.t.u. Belgaum",
"website": ""
}]
以下是我的代码
from bs4 import BeautifulSoup
from os import listdir
import os
from os.path import isfile, join
import fnmatch
import shelve
import json
import csv
def write_csv(read_file_path):
data = json.loads(open(read_file_path).read())
file_colleges = csv.writer(open(r"/home/maitreyee/SchoolCollege.com/collegesdb/colleges.csv", "w", newline=""))
list_colleges_headers = ['name', 'recognition','address','phone','fax','email','website']
file_colleges.writerow(list_colleges_headers)
list_courses.list_colleges_headers = ['course_title', 'course_type','course_duration','course_nature','qualifications','brief_details','selection_process', 'course_branch', 'no_of_seats']
for d in data:
file_colleges.writerow(
[d['name'], d['college'], d['recognition'], d['address'], d['phone'], d['fax'], d['website']])
file_course.writerow(list_courses_headers)
for course in d['courses']:
file_course.writerow(
[
(course['course_title'] if course['course_title'] is not None or course['course_title'] != '' else 'NA'),
(course['course_type'] if course['course_type'] is not None or course['course_type'] != '' else 'NA'),
(course['course_duration'] if course['course_duration'] is not None or course['course_duration'] != '' else 'NA'),
(course['course_nature'] if course['course_nature'] is not None or course['course_nature'] != '' else 'NA'),
(course['qualifications'] if course['qualifications'] is not None or course['qualifications'] != '' else 'NA'),
(course['brief_details'] if course['brief_details'] is not None or course['brief_details'] != '' else 'NA'),
(course['selection_process'] if course['selection_process'] is not None or course['selection_process'] != '' else 'NA'),
(course['course_branch'] if course['course_branch'] is not None or course['course_branch'] != '' else 'NA'),
(course['no_of_seats'] if course['no_of_seats'] is not None or course['no_of_seats'] != '' else 'NA')])
pass
#def write_file(file, colleges):
# db = shelve.open(file)
# for college in colleges:
# db[college.name] = college
# db.close()
read_file_path = r'/home/maitreyee/Downloads/SchoolCollege.com/collegesdb/collegesdb1.json'
#colleges = read_colleges(r"/home/maitreyee/Downloads/SchoolCollege.com1/collegedb1.json")
#new_write_file(r'/home/maitreyee/Downloads/SchoolCollege.com1/')
代码返回一个空文件
以下是@ 7stud.have的代码,只修改了文件位置。
import json
import csv
def write_csv(jsonfile, outfile):
with open(jsonfile) as f:
data = json.loads(f.read())
college_dict = data[0]
college_keys = list(college_dict.keys())
college_keys.remove('courses')
college_keys.remove('college')
courses_dict = data[0]['courses'][0]
courses_keys = list(courses_dict.keys())
courses_keys.remove('brief_details')
with open(outfile, 'w', newline='') as f:
csv_writer = csv.writer(f)
headers = college_keys + courses_keys
csv_writer.writerow(headers)
row = (
[
college_dict[key] if college_dict[key] else 'NA'
for key in college_keys
]
+
[
courses_dict[key] if courses_dict[key] else 'NA'
for key in courses_keys
]
)
csv_writer.writerow(row)
jsonfile = '/home/maitreyee/Downloads/SchoolCollege.com/collegesdb/collegesdb1.json'
outfile = '/home/maitreyee/Downloads/SchoolCollege.com/collegesdb/collegesout.csv'
write_csv(jsonfile, outfile)
以下是错误
maitreyee@Maitreyee:~/Downloads/SchoolCollege.com$ python json2csv4.py
Traceback (most recent call last):
File "json2csv4.py", line 41, in <module>
write_csv(jsonfile, outfile)
File "json2csv4.py", line 15, in write_csv
courses_dict = data[0]['courses'][0]
IndexError: list index out of range
答案 0 :(得分:2)
您打算在程序中调用write_csv()
功能吗?
如果您拨打write_csv()
,则会收到错误消息:
NameError:name&#39; list_courses&#39;未定义
如果你这样做:
import json
import csv
def write_csv(read_file_path):
data = json.loads(open(read_file_path).read())
file_colleges = csv.writer(open('out.txt', "w", newline=""))
list_colleges_headers = ['name', 'recognition','address','phone','fax','email','website']
file_colleges.writerow(list_colleges_headers)
infile = "json.txt"
write_csv(infile)
您将看到该文件包含输出:
$ cat out.txt
name,recognition,address,phone,fax,email,website
修改强>
如果csv文件中的列顺序不重要:
import json
import csv
def write_csv(jsonfile, outfile):
with open(jsonfile) as f:
data = json.loads(f.read())
college_dict = data[0]
college_keys = list(college_dict.keys())
college_keys.remove('courses')
college_keys.remove('college')
courses_dict = data[0]['courses'][0]
courses_keys = list(courses_dict.keys())
courses_keys.remove('brief_details')
with open(outfile, 'w', newline='') as f:
csv_writer = csv.writer(f)
headers = college_keys + courses_keys
csv_writer.writerow(headers)
row = (
[
college_dict[key] if college_dict[key] else 'NA'
for key in college_keys
]
+
[
courses_dict[key] if courses_dict[key] else 'NA'
for key in courses_keys
]
)
csv_writer.writerow(row)
jsonfile = 'data.json'
outfile = 'out.csv'
write_csv(jsonfile, outfile)