在Python中将json文件转换为csv什么都不返回?

时间:2015-11-15 10:07:19

标签: python json csv

这里我们有一个带有嵌套内容的大型json文件。我们想将它转换为csv文件,以便它可以用它来进行数据建模,但是我觉得代码丢失的东西,我无法发现。我是python的新手,需要帮助。

以下是文件中的内容:

[{
"address": " -, Gulbarga-585102", 
"college": "College (Architecture)", 
"courses": [
{
    "brief_details": "", 
    "college_name": "School of ArchitecturePoojya Doddappa Appa College of Engineering", 
    "course_branch": "B.Arch", 
    "course_duration": " 5-year", 
    "course_nature": " Full-Time", 
    "course_title": "", 
    "course_type": " B.Arch", 
    "no_of_seats": " 60", 
    "qualifications": "", 
    "selection_process": ""
}
], 
"email": " principal@pdaengg.com", 
"fax": "08472-255685", 
"name": "School Of Architecturepoojya Doddappa Appa College Of    Engineering", 
"phone": "08472-224262 Extn. 435, 220742", 
"recognition": " V.t.u. Belgaum", 
"website": ""
}]

以下是我的代码

from bs4 import BeautifulSoup
from os import listdir
import os
from os.path import isfile, join
import fnmatch
import shelve
import json
import csv

def write_csv(read_file_path):
    data = json.loads(open(read_file_path).read())
    file_colleges = csv.writer(open(r"/home/maitreyee/SchoolCollege.com/collegesdb/colleges.csv", "w", newline=""))
    list_colleges_headers = ['name', 'recognition','address','phone','fax','email','website']
    file_colleges.writerow(list_colleges_headers)
    list_courses.list_colleges_headers = ['course_title', 'course_type','course_duration','course_nature','qualifications','brief_details','selection_process', 'course_branch', 'no_of_seats']

    for d in data:
        file_colleges.writerow(
            [d['name'], d['college'], d['recognition'], d['address'], d['phone'], d['fax'], d['website']])
        file_course.writerow(list_courses_headers)
        for course in d['courses']:
            file_course.writerow(
                [
                 (course['course_title'] if course['course_title'] is not None or course['course_title'] != '' else 'NA'),
                 (course['course_type'] if course['course_type'] is not None or course['course_type'] != '' else 'NA'),
                 (course['course_duration'] if course['course_duration'] is not None or course['course_duration'] != '' else 'NA'),
                 (course['course_nature'] if course['course_nature'] is not None or course['course_nature'] != '' else 'NA'),
                 (course['qualifications'] if course['qualifications'] is not None or course['qualifications'] != '' else 'NA'),
                 (course['brief_details'] if course['brief_details'] is not None or course['brief_details'] != '' else 'NA'),
                 (course['selection_process'] if course['selection_process'] is not None or course['selection_process'] != '' else 'NA'),
                 (course['course_branch'] if course['course_branch'] is not None or course['course_branch'] != '' else 'NA'),
                 (course['no_of_seats'] if course['no_of_seats'] is not None or course['no_of_seats'] != '' else 'NA')])
        pass

#def write_file(file, colleges):
#   db = shelve.open(file)
#   for college in colleges:
#       db[college.name] = college
#   db.close()
read_file_path = r'/home/maitreyee/Downloads/SchoolCollege.com/collegesdb/collegesdb1.json'
#colleges = read_colleges(r"/home/maitreyee/Downloads/SchoolCollege.com1/collegedb1.json")
#new_write_file(r'/home/maitreyee/Downloads/SchoolCollege.com1/')

代码返回一个空文件

以下是@ 7stud.have的代码,只修改了文件位置。

import json
import csv

def write_csv(jsonfile, outfile):

    with open(jsonfile) as f:
        data = json.loads(f.read())

    college_dict = data[0]

    college_keys = list(college_dict.keys())
    college_keys.remove('courses')
    college_keys.remove('college')

    courses_dict = data[0]['courses'][0]
    courses_keys = list(courses_dict.keys())
    courses_keys.remove('brief_details')

    with open(outfile, 'w', newline='') as f:
        csv_writer = csv.writer(f)
        headers = college_keys + courses_keys
        csv_writer.writerow(headers)

        row = (
            [
                college_dict[key] if college_dict[key] else 'NA'
                for key in college_keys
            ] 
            + 
            [
                courses_dict[key] if courses_dict[key] else 'NA'
                for key in courses_keys
            ]
        )

        csv_writer.writerow(row)

jsonfile = '/home/maitreyee/Downloads/SchoolCollege.com/collegesdb/collegesdb1.json'
outfile = '/home/maitreyee/Downloads/SchoolCollege.com/collegesdb/collegesout.csv'

write_csv(jsonfile, outfile)

以下是错误

 maitreyee@Maitreyee:~/Downloads/SchoolCollege.com$ python json2csv4.py
Traceback (most recent call last):
  File "json2csv4.py", line 41, in <module>
    write_csv(jsonfile, outfile)
  File "json2csv4.py", line 15, in write_csv
    courses_dict = data[0]['courses'][0]
IndexError: list index out of range

1 个答案:

答案 0 :(得分:2)

  1. 您打算在程序中调用write_csv()功能吗?

  2. 如果您拨打write_csv(),则会收到错误消息:

  3.   

    NameError:name&#39; list_courses&#39;未定义

    如果你这样做:

    import json
    import csv
    
    def write_csv(read_file_path):
        data = json.loads(open(read_file_path).read())
        file_colleges = csv.writer(open('out.txt', "w", newline=""))
        list_colleges_headers = ['name', 'recognition','address','phone','fax','email','website']
        file_colleges.writerow(list_colleges_headers)
    
    
    infile = "json.txt"
    write_csv(infile)
    

    您将看到该文件包含输出:

    $ cat out.txt
    name,recognition,address,phone,fax,email,website
    

    修改

    如果csv文件中的列顺序不重要:

    import json
    import csv
    
    def write_csv(jsonfile, outfile):
    
        with open(jsonfile) as f:
            data = json.loads(f.read())
    
        college_dict = data[0]
    
        college_keys = list(college_dict.keys())
        college_keys.remove('courses')
        college_keys.remove('college')
    
        courses_dict = data[0]['courses'][0]
        courses_keys = list(courses_dict.keys())
        courses_keys.remove('brief_details')
    
        with open(outfile, 'w', newline='') as f:
            csv_writer = csv.writer(f)
            headers = college_keys + courses_keys
            csv_writer.writerow(headers)
    
            row = (
                [
                    college_dict[key] if college_dict[key] else 'NA'
                    for key in college_keys
                ] 
                + 
                [
                    courses_dict[key] if courses_dict[key] else 'NA'
                    for key in courses_keys
                ]
            )
    
            csv_writer.writerow(row)
    
    jsonfile = 'data.json'
    outfile = 'out.csv'
    
    write_csv(jsonfile, outfile)