我是Python的初学者,尝试添加几行代码以将json转换为csv并返回到json。有成千上万个文件(大小为300 MB)要转换和处理。使用当前程序(使用1个CPU),我无法使用服务器的16个CPU,并且需要建议以微调程序以进行多处理。下面是我的python 3.7版本的代码。
import json
import csv
import os
os.chdir('/stagingData/Scripts/test')
for JsonFile in os.listdir(os.getcwd()):
PartialFileName = JsonFile.split('.')[0]
j = 1
with open(PartialFileName +".csv", 'w', newline='') as Output_File:
with open(JsonFile) as fileHandle:
i = 1
for Line in fileHandle:
try:
data = json.loads(Line, parse_float=str)
except:
print("Can't load line {}".format(i))
if i == 1:
header = data.keys()
output = csv.writer(Output_File)
output.writerow(header) #Writes header row
i += 1
output.writerow(data.values()) #writes values row
j += 1
了解有关多处理逻辑的建议
答案 0 :(得分:1)
由于文件很多,因此文档中最简单的多处理示例应该适用。 https://docs.python.org/3.4/library/multiprocessing.html?highlight=process
import XYZ from './xyz';
import {Edit} from './xyz';
import { pencilEditButton } from './Images';
const App = createStackNavigator(
{
Home: {
screen: My App,
navigationOptions: ({ navigation }) => ({
title: 'myApp',
headerRight: (
<View>
<TouchableHighlight
onPress={() => EditMix()}
underlayColor="gray">
<View>
<Image source={pencilEditButton} style={styles.navigationButtonImage} />
</View>
</TouchableHighlight>
</View>
),
}),
},
}
);
export default createAppContainer(App);
您还可以尝试将export default class XYZ extends React.Component {
constructor(props) {
super(props);
this.state = {
editMode: false,
};
};
render() {
return (
<View style={styles.container}>
{ this.state.editMode === true ?
<TouchableHighlight
onPress={this._onXPressed}
underlayColor="white">
<View style={[styles.flowRight, styles.controlButton]}>
<Text style={styles.buttonText}>{'Edit Mode'}</Text>
</View>
</TouchableHighlight>
:
<TouchableHighlight
onPress={this._onYPressed}
underlayColor="white">
<View style={[styles.flowRight, styles.controlButton]}>
<Text style={styles.buttonText}>{'Non Edit Mode'}</Text>
</View>
</TouchableHighlight>
}
</View>
);
}
}
export function Edit() {
editMode = true;
alert('User wants to edit!');
}
替换为os.scandir()
,在启动前不必返回所有目录条目。
答案 1 :(得分:1)
如果您要更有效地处理一个大文件,我建议以下内容:
将文件拆分为大块
创建一个处理每个块的过程
(如有必要)将处理后的块合并回一个文件
类似这样的东西:
import csv
import json
from pathlib import Path
from concurrent.futures import ProcessPoolExecutor
source_big_file = Path('/path/to/file')
def chunk_file_by_line(source_filepath: Path, chunk_size: int = 10_000):
chunk_line_size = 10_000
intermediate_file_handlers = {}
last_chunk_filepath = None
with source_big_file.open('r', encoding='utf8') as big:
for line_number, line in big:
group = line_number - (line_number % chunk_line_size)
chunk_filename = f'{source_big_file.stem}.g{group}{source_big_file.suffix}'
chunk_filepath = source_big_file.parent / chunk_filename
if chunk_filepath not in intermediate_file_handlers:
file_handler = chuck_filepath.open('w', encoding='utf8')
intermediate_file_handlers[chunk_filepath] = file_handler
if last_chunk_filepath:
last_file_hanlder = intermediate_file_handlers[last_chunk_filepath]
last_file_handler.close()
yield last_chunk_filepath
else:
file_handler = intermediate_file_handlers[chunk_filepath]
file_handler.write(line)
last_chunk_filepath = chunk_filepath
# output last one
yield last_chunk_filepath
def json_to_csv(json_filepath: Path) -> Path:
csv_filename = f'{json_filepath.stem}.csv'
csv_filepath = json_filepath.parent / csv_filename
with csv_filepath.open('w', encoding='utf8') as csv_out, json_filepath.open('r', encoding='utf8') as json_in:
dwriter = csv.DictWriter(csv_out)
headers_written = False
for json_line in json_in:
data = json.loads(json_line)
if not headers_written:
# create header record
headers = {k:k for k in data.keys()}
dwriter.writeline(headers)
headers_written = True
dwriter.writeline(data)
return csv_filepath
with ProcessPoolExecutor() as pool:
futures = []
for chunk_filepath in chuck_file_by_line(source_big_file):
future = pool.submit(json_to_csv, chunk_filepath)
futures.append(future)
# wait for all to finish
for future in futures:
csv_filepath = future.result(timeout=None) # waits until complete
print(f'conversion complete> csv filepath: {csv_filepath}')