我是新手程序员,是社区的新手。我正在尝试从Excel文件读取数据并将文件内容转换为JSON格式。我知道过去在StackOverflow上存在关于此类问题的问题-在这些问题的帮助下,我能够提出解决方案,但未按预期工作。
这是excel文件的样子:
这是JSON的样子:
{"linesOfFruits": [
{
"title" : {
"en": "Apple"
},
"url" : "https://isThisApple/123123"
},
{
"title" : {
"en": "Banana"
},
"teams" : [
{
"title" : {
"en": "Red Banana"
},
"url" : "https://isThisBanana/123124"
},
{
"title" : {
"en": "Green Banana"
},
"url" : "https://isThisBanana/123125"
},
{
"title" : {
"en": "Yellow Banana"
},
"url" : "https://isThisBanana/123126"
},
]
},
{
"title" : {
"en": "Grape"
},
"teams" : [
{
"title" : {
"en": "Orange Grape"
},
"url" : "https:/isThisGrape/1234"
},
{
"title" : {
"en": "Blue Grape"
},
"url" : "https:/isThisGrape/1235"
},
{
"title" : {
"en": "Pink Grape"
},
"url" : "https:/isThisGrape/1236"
},
]
}
]}
但这是我的代码输出的内容:
{"linesOfBusiness":
[
{
"title": "Apple",
"url": "https://isThisApple/123123"
},
{
"title": "Banana",
"teams": ["Orange Grape", "Blue Grape", "Pink Grape"],
"url": "https://isThisBanana/123124"
},
{
"title": "Grape",
"teams": ["Orange Grape", "Blue Grape", "Pink Grape"],
"url": "https:/isThisGrape/1234"}
]
}
还请注意,如果teams字段为(全部),则可以在JSON中省略teams数组
这是我到目前为止所拥有的:
import json
import sys
import xlrd
def convertToJson(excelFile, sheetName, jsonFile):
workbook = xlrd.open_workbook(excelFile)
worksheet = workbook.sheet_by_name(sheetName)
data = []
keys = ['title', 'teams', 'url']
teams = []
for row_number in range(worksheet.nrows):
if row_number == 0:
continue
row_data = {}
empty = False
team = {}
list = worksheet.row(0)
for col_number, cell in enumerate(worksheet.row(row_number)):
if col_number == 0 and cell.value == '':
empty = True
elif col_number == 0:
empty = False
teams.clear()
if col_number == 1 and ( empty is True or cell.value != '(All)'):
team[keys[0]] = worksheet.row(row_number)
teams.append(cell.value)
print(teams)
elif col_number == 1 and cell.value == '(All)':
continue
# skip the number of fruits since it does not have to be in JSON file
if (col_number == 3):
continue
if col_number == 1:
if teams.__len__() > 0:
row_data[keys[col_number]] = teams
elif col_number == 0 and cell.value == '':
continue
else:
row_data[keys[col_number]] = cell.value
if 'title' in row_data:
data.append(row_data)
with open(jsonFile, 'w') as json_file:
json_file.write(json.dumps({
'linesOfBusiness': data
}))
任何帮助将不胜感激。已经被卡住了一段时间了:)
答案 0 :(得分:0)
我提出了另一种解决方案:
import requests
import xlrd
class ExcelParser:
@staticmethod
def parse_cell(cell, datemode):
# TODO Extend to all Excel date types
if cell.ctype == 0:
return ""
elif cell.ctype == 1:
return str(cell.value)
elif cell.ctype == 3:
datetime = xlrd.xldate.xldate_as_datetime(cell.value, datemode)
return datetime
else:
return cell.value
@staticmethod
def from_url(url):
response = requests.get(url, timeout=30)
if response.status_code != 200:
raise Exception("Excel couldn't be downloaded. Http status: {status}".format(status=response.status_code))
xls_raw_data = xlrd.open_workbook(file_contents=response.content)
# Use xlrd.open_workbook(filename=<path>) to load content from file
sheet = xls_raw_data.sheet_by_index(0)
title, header, *rows = [[ExcelParser.parse_cell(cell, xls_raw_data.datemode) for cell in row] for row in sheet.get_rows()]
json_obj = [dict(zip(header, row)) for row in rows]
return json_obj
if __name__ == '__main__':
url = ""
data = ExcelParser.from_url(url)