原始帖子为Go through json line by line including unkown nested arrays and objects
我正在尝试在JSON字典中的所有嵌套列表和嵌套字典中搜索特定值。字典的结构并不总是已知的。嵌套字典可以具有嵌套列表。
我要查找的键是日期,并且不应包含字符串。示例键= 日期为真,但是如果键= DateString 条件不满足。
df['col5'] = np.sum(np.less(df.to_numpy()[:,:-1],
df.col4.to_numpy()[:,np.newaxis]),
axis=1)
df
col1 col2 col3 col4 col5
0 1 3 4 2 1
1 4 6 7 7 2
2 3 6 3 3 0
def ConvertTimestamp(my_list_of_dicts: list):
for e in my_list_of_dicts:
# check top level keys whose values are not a list
keys_with_date = [k for k, v in e.items() if 'Date' in k and type(v) and 'String' not in k != list]
for k1 in keys_with_date:
e[k1] = 'found'
# check top level keys whose values are a list
keys_with_lists = [k for k, v in e.items() if type(v) == list]
for k1 in keys_with_lists:
for i, d in enumerate(e[k1]):
for k2, v in d.items():
if 'Date' in k2 and 'String' not in k2:
e[k1][i][k2] = 'found'
return my_list_of_dicts
test_data = [{
"PurchaseOrderID": "aaff50c2-05d5-4943-9a37-421d1b326dc3",
"PurchaseOrderNumber": "PO-0001",
"DateString": "2020-06-04T00:00:00",
"Date": "2020-06-04T02:00:00.000000",
"DeliveryDateString": "2020-06-11T00:00:00",
"DeliveryDate": "2020-06-11T02:00:00.000000",
"DeliveryAddress": "",
"AttentionTo": "",
"Telephone": "",
"DeliveryInstructions": "",
"HasErrors": false,
"IsDiscounted": true,
"Reference": "",
"Type": "PURCHASEORDER",
"CurrencyRate": 1.0,
"CurrencyCode": "EUR",
"Contact": {
"ContactID": "31dcd998-026662967",
"ContactStatus": "ACTIVE",
"Name": "Test",
"FirstName": "",
"LastName": "",
"Addresses": [],
"Phones": [],
"UpdatedDateUTC": "/Date(1591272554130+0000)/",
"ContactGroups": [],
"DefaultCurrency": "EUR",
"ContactPersons": [],
"HasValidationErrors": false
},
"BrandingThemeID": "86a1c878-7b2ed792b224",
"Status": "DELETED",
"LineAmountTypes": "Exclusive",
"SubTotal": 1000.0,
"TotalTax": 0.0,
"Total": 1000.0,
"UpdatedDateUTC": "2020-06-04T12:14:26.527000",
"HasAttachments": false }]
答案 0 :(得分:0)
由于您不知道字典的结构,因此可能在任意数量的级别下。
此外,预期的结果并没有真正显示您想要对找到的字典执行的操作,因此我只是将这些字典添加到列表中。
在这种情况下递归会有所帮助。
"js --help"
答案 1 :(得分:0)
这里是您可以遍历对象并进行更新的方法。
出于一般性考虑,递归函数将两个外部定义的可调用对象添加到要递归的对象上:
一个“键测试器”函数,该函数应获取一个键(字符串)并返回一个布尔值,该布尔值用于确定哪些键的值将被更新
一个“替换”函数,该函数应该获取一个值并返回新值
from datetime import date
from pprint import pprint
from copy import deepcopy
import re
def do_replacements(obj, key_tester, replacer):
"""
recursing through the nested list/dict structure,
and wherever key_tester(key) yields True,
use replacer function to generate the new value
"""
if isinstance(obj, dict):
for k, v in obj.items():
if key_tester(k):
obj[k] = replacer(v)
else:
do_replacements(v, key_tester, replacer)
elif isinstance(obj, list):
for item in obj:
do_replacements(item, key_tester, replacer)
def fix_time(ts):
"""
replace the timestamp if it fits a particular pattern
(based on code in original question)
"""
pattern = '\(|\)'
if not re.search(pattern, ts):
return ts
format = '%Y-%m-%dT%H:%M:%S.%f'
ts_utc = re.split(pattern, ts)[1]
ts_utc = ts_utc[:ts_utc.find("+")]
return date.fromtimestamp(float(ts_utc)/1000).strftime(format)
test_data = [{'PurchaseOrderID': 'aaff50c2-05d5-4943-9a37-421d1b326dc3', 'PurchaseOrderNumber': 'PO-0001', 'DateString': '2020-06-04T00:00:00', 'Date': '2020-06-04T02:00:00.000000', 'DeliveryDateString': '2020-06-11T00:00:00', 'DeliveryDate': '2020-06-11T02:00:00.000000', 'DeliveryAddress': '', 'AttentionTo': '', 'Telephone': '', 'DeliveryInstructions': '', 'HasErrors': False, 'IsDiscounted': True, 'Reference': '', 'Type': 'PURCHASEORDER', 'CurrencyRate': 1.0, 'CurrencyCode': 'EUR', 'Contact': {'ContactID': '31dcd998-026662967', 'ContactStatus': 'ACTIVE', 'Name': 'Test', 'FirstName': '', 'LastName': '', 'Addresses': [], 'Phones': [], 'UpdatedDateUTC': '/Date(1591272554130+0000)/', 'ContactGroups': [], 'DefaultCurrency': 'EUR', 'ContactPersons': [], 'HasValidationErrors': False}, 'BrandingThemeID': '86a1c878-7b2ed792b224', 'Status': 'DELETED', 'LineAmountTypes': 'Exclusive', 'SubTotal': 1000.0, 'TotalTax': 0.0, 'Total': 1000.0, 'UpdatedDateUTC': '2020-06-04T12:14:26.527000', 'HasAttachments': False}]
func = lambda k: "Date" in k and "String" not in k
output = deepcopy(test_data)
do_replacements(output, func, fix_time)
pprint(output)
给予:
[{'AttentionTo': '',
'BrandingThemeID': '86a1c878-7b2ed792b224',
'Contact': {'Addresses': [],
'ContactGroups': [],
'ContactID': '31dcd998-026662967',
'ContactPersons': [],
'ContactStatus': 'ACTIVE',
'DefaultCurrency': 'EUR',
'FirstName': '',
'HasValidationErrors': False,
'LastName': '',
'Name': 'Test',
'Phones': [],
'UpdatedDateUTC': '2020-06-04T00:00:00.000000'},
'CurrencyCode': 'EUR',
'CurrencyRate': 1.0,
'Date': '2020-06-04T02:00:00.000000',
'DateString': '2020-06-04T00:00:00',
'DeliveryAddress': '',
'DeliveryDate': '2020-06-11T02:00:00.000000',
'DeliveryDateString': '2020-06-11T00:00:00',
'DeliveryInstructions': '',
'HasAttachments': False,
'HasErrors': False,
'IsDiscounted': True,
'LineAmountTypes': 'Exclusive',
'PurchaseOrderID': 'aaff50c2-05d5-4943-9a37-421d1b326dc3',
'PurchaseOrderNumber': 'PO-0001',
'Reference': '',
'Status': 'DELETED',
'SubTotal': 1000.0,
'Telephone': '',
'Total': 1000.0,
'TotalTax': 0.0,
'Type': 'PURCHASEORDER',
'UpdatedDateUTC': '2020-06-04T12:14:26.527000'}]
(注意:此处显示的输出是打印精美的python对象,而不是JSON,尽管类似。)