我有一个带有几列的pandas数据框,其中一列是json字符串。当大熊猫将我的数据帧写入csv时,它将JSON转换为类似"{""account_sid"": ""AC68a8baf89872000218e9245b27367e3c"", ""actor_sid"": ""WSe666c5749a1903155a6235d9525a84a7"",..}"
当我要将副本复制到redshift时,由于两个双引号而导致失败。为什么不能保留诸如{"account_sid"": "AC68a8baf89872000218e9245b27367e3c", "actor_sid"": "WSe666c5749a1903155a6235d9525a84a7",..}
之类的纯文本?
这是我将字典解析为json并将df写入csv的方式:
def _normalize_task_and_reservation_event(event):
event_data = event.get('event_data')
task_attrs = json.loads(event_data.get('task_attributes', '{ }').replace(';', ':'))
worker_attrs = json.loads(event_data.get('worker_attributes', '{ }'))
event['event_date'] = event.get('event_date').isoformat()
return {
'event_date': event.get('event_date'),
'event_json': json.dumps(event).replace(';',':'),
# 'event_json': event,
'event_sid': event.get('sid'),
'event_type': event.get('event_type'),
'task_sid': event_data.get('task_sid'),
'workflow_name': event_data.get('workflow_name'),
'workspace_name': event_data.get('workspace_name'),
'task_queue_name': event_data.get('task_queue_name'),
'called': _extract_phone(task_attrs.get('called', '')),
'caller': _extract_phone(task_attrs.get('caller', '')),
'direction': task_attrs.get('direction', ''),
'agent_email': worker_attrs.get('email', ''),
'agent_name': worker_attrs.get('full_name', ''),
'zendesk_ticket_id': task_attrs.get('zendesk_ticket_id', '0'),
'start_call_sid': task_attrs.get('call_sid', ''),
'worker_sid': event_data.get('worker_sid', ''),
'rounded_time': _round_up_time(event.get('event_date'))
}
data = list(map(_normalize_task_and_reservation_event, events))
df = pd.DataFrame.from_dict(data)
# df['event_json'] = df.event_json.apply(lambda x: json.dumps(x).replace(';',':') )
df.to_csv(path_or_buf=local_file, sep=';', index=False)