import os
import json
import csv
import re
subdir = "./json_files/" #'/home/varun/Desktop/pyfile'
def jsontocsv():
with open ('test.csv', 'w') as outfile:
fieldnames = ['name', 'private', 'version', 'dependencies', 'scripts', 'devDependencies']
writer = csv.DictWriter(outfile, fieldnames=fieldnames)
writer.writeheader()
for file in os.listdir(subdir):
file_path = os.path.join(subdir, file)
with open(file_path, 'r') as json_file:
parsed_json = json.load(json_file)
with open ('test.csv', 'a') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(parsed_json.values())
def cleanUnicode():
with open ('data.csv', 'w') as outfile:
fieldnames = ['name', 'private', 'version', 'dependencies', 'scripts', 'devDependencies']
writer = csv.DictWriter(outfile, fieldnames=fieldnames)
writer.writeheader()
with open('test.csv', 'r') as csvfile:
reader = csv.DictReader(csvfile, delimiter=',')
rows = list(reader)
for row in rows[1:]:
row = str(row)
row = re.sub(r'u', r'', row)
print(row)
# with open ('data.csv', 'a') as csvfile:
# fieldnames = ['name', 'private', 'version', 'dependencies', 'scripts', 'devDependencies']
# writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
# writer.writerow(row)
# os.remove('test.csv')
if __name__ == '__main__':
jsontocsv()
cleanUnicode()
print("Scripts finished running all json files parsed to csv")
我正在从多个json文件读取到单个csv文件中,将数据放在单个csv文件中,但它对于每个嵌套值都有“u”。如何删除这些并仅保留我想要的数据?
示例输入:
{
"version": "0.1.0",
"devDependencies": {
"react-scripts": "0.6.1"
},
"dependencies": {
"crossfilter": "^1.3.12",
"d3": "^4.2.6",
"d3-scale": "^1.0.3",
"dc": "^2.0.0-beta.32",
"immutable": "^3.8.1",
"jszip": "^3.1.2",
"react": "^15.3.2",
"react-addons-transition-group": "^15.3.2",
"react-dom": "^15.3.2",
"shifty": "^1.5.2",
"wolfy87-eventemitter": "^5.1.0"
},
"scripts": {
"start": "react-scripts start",
"build": "react-scripts build",
"test": "react-scripts test --env=jsdom",
"eject": "react-scripts eject"
}
}
输出:
version,dependencies,scripts,devDependencies
0.1.0,"{u'wolfy87-eventemitter': u'^5.1.0', u'shifty': u'^1.5.2', u'react-addons-transition-group': u'^15.3.2', u'react-dom': u'^15.3.2', u'dc': u'^2.0.0-beta.32', u'ccbooleananalysis': u'^1.0.0', u'react': u'^15.3.2', u'jszip': u'^3.1.2', u'crossfilter': u'^1.3.12', u'ccnetviz': u'^1.0.8', u'immutable': u'^3.8.1', u'd3': u'^4.2.6', u'd3-scale': u'^1.0.3'}","{u'test': u'react-scripts test --env=jsdom', u'start': u'react-scripts start', u'build': u'react-scripts build', u'eject': u'react-scripts eject'}",{u'react-scripts': u'0.6.1'}
希望所有人都被替换
答案 0 :(得分:0)
我不确定您为什么要将字典作为字符串写入CSV文件,但无论如何......
这是获取没有u
Unicode前缀的字符串的一种方法。我们处理通过加载JSON数据创建的字典,将所有键和值字符串编码为UTF-8;任何字典值都是递归处理的。
这适用于纯ASCII数据。但是,任何超出7位ASCII范围的数据都将编码为\x
转义序列。但这不是一个真正的问题。当您阅读CSV文件时,您可能希望将这些字符串转换回正确的字典。您可以使用ast.literal_eval
,它会很乐意接受\x
转义序列。
要验证此代码是否处理Unicode,我在测试数据中添加了一个额外的项目。 “devDependencies”字典现在包含一个新项目:“unicode-test”,其值为“™©”。在我的代码的最后一部分,我读回了CSV数据,将“devDependencies”字符串转换回dict,并打印该dict的'unicode-test'字段,以验证它是否被转换回正确的Unicode字符串。 / p>
BTW,我这些天大多使用Python 3.6,而我最新版本的Python 2是2.6.6。它的csv
模块没有DictWriter.writeheader
方法,因此我使用另一种方法来编写标题行。
import json
import csv
import ast
csvname = 'test.csv'
src = '''\
{
"version": "0.1.0",
"devDependencies": {
"unicode-test": "™©",
"react-scripts": "0.6.1"
},
"dependencies": {
"crossfilter": "^1.3.12",
"d3": "^4.2.6",
"d3-scale": "^1.0.3",
"dc": "^2.0.0-beta.32",
"immutable": "^3.8.1",
"jszip": "^3.1.2",
"react": "^15.3.2",
"react-addons-transition-group": "^15.3.2",
"react-dom": "^15.3.2",
"shifty": "^1.5.2",
"wolfy87-eventemitter": "^5.1.0"
},
"scripts": {
"start": "react-scripts start",
"build": "react-scripts build",
"test": "react-scripts test --env=jsdom",
"eject": "react-scripts eject"
}
}
'''
data = json.loads(src)
encoding = 'utf8'
def encode_dict(d):
newd = {}
for k, v in d.iteritems():
if isinstance(v, dict):
v = encode_dict(v)
else:
v = v.encode(encoding)
newd[k.encode(encoding)] = v
return newd
clean_data = encode_dict(data)
print clean_data
print '- ' * 20
fieldnames = ['version', 'dependencies', 'scripts', 'devDependencies']
with open(csvname, 'wb') as outfile:
writer = csv.DictWriter(outfile, fieldnames=fieldnames)
#writer.writeheader()
# Write header, the old-fashioned way
writer.writerow(dict((s, s) for s in fieldnames))
writer.writerow(clean_data)
# Verify
with open(csvname, 'rb') as infile:
reader = csv.DictReader(infile)
for row in reader:
print row
s = row['devDependencies']
d = ast.literal_eval(s)
print d['unicode-test']
<强>输出强>
{'devDependencies': {'unicode-test': '\xe2\x84\xa2\xc2\xa9', 'react-scripts': '0.6.1'}, 'version': '0.1.0', 'dependencies': {'wolfy87-eventemitter': '^5.1.0', 'react-addons-transition-group': '^15.3.2', 'react-dom': '^15.3.2', 'd3-scale': '^1.0.3', 'dc': '^2.0.0-beta.32', 'jszip': '^3.1.2', 'react': '^15.3.2', 'crossfilter': '^1.3.12', 'shifty': '^1.5.2', 'd3': '^4.2.6', 'immutable': '^3.8.1'}, 'scripts': {'test': 'react-scripts test --env=jsdom', 'start': 'react-scripts start', 'build': 'react-scripts build', 'eject': 'react-scripts eject'}}
- - - - - - - - - - - - - - - - - - - -
{'devDependencies': "{'unicode-test': '\\xe2\\x84\\xa2\\xc2\\xa9', 'react-scripts': '0.6.1'}", 'version': '0.1.0', 'dependencies': "{'wolfy87-eventemitter': '^5.1.0', 'react-addons-transition-group': '^15.3.2', 'react-dom': '^15.3.2', 'd3-scale': '^1.0.3', 'dc': '^2.0.0-beta.32', 'jszip': '^3.1.2', 'react': '^15.3.2', 'crossfilter': '^1.3.12', 'shifty': '^1.5.2', 'd3': '^4.2.6', 'immutable': '^3.8.1'}", 'scripts': "{'test': 'react-scripts test --env=jsdom', 'start': 'react-scripts start', 'build': 'react-scripts build', 'eject': 'react-scripts eject'}"}
™©
test.csv的内容
version,dependencies,scripts,devDependencies
0.1.0,"{'wolfy87-eventemitter': '^5.1.0', 'react-addons-transition-group': '^15.3.2', 'react-dom': '^15.3.2', 'd3-scale': '^1.0.3', 'dc': '^2.0.0-beta.32', 'jszip': '^3.1.2', 'react': '^15.3.2', 'crossfilter': '^1.3.12', 'shifty': '^1.5.2', 'd3': '^4.2.6', 'immutable': '^3.8.1'}","{'test': 'react-scripts test --env=jsdom', 'start': 'react-scripts start', 'build': 'react-scripts build', 'eject': 'react-scripts eject'}","{'unicode-test': '\xe2\x84\xa2\xc2\xa9', 'react-scripts': '0.6.1'}"