我正在尝试从带有嵌套json的列中提取特定值。
这就是我尝试过的:
import pandas as pd
import json
pd.io.json.json_normalize(df.response.apply(json.loads))
但是,这将返回以下错误消息:
JSONDecodeError:从第1行第29996列开始的未终止字符串 (字符29995)
为了澄清一下,理想情况下,我想提取已分配给特定键的值(我想我尝试用我的代码展平json)。
以下是我在列中拥有的数据示例(我们称其为响应:
'{"at":"cop","b1":{"B2":"200000000","B3":"ABCDE ART","B4":"21000","B5":"CURRENT","B6":"1234567890"},"B7":"Yes","verified":true,"B8":"2010-11-02T11:10:18.471Z","B9":true,"B10":false,"B11":true,"B12":false,"B13":true,"accountAcceptsDebits":true,"B14":true,"B15":{"B16":"2010-11-02T11:10:18","response":{"B17":"ok","pagination":{},"body":{"response":{"result_status":"complete","registration_number_match":"Y","company_name_match":"N","account_type_match":"Y","account_open":"Y","account_length_match":"Y","account_found":"Y","account_accepts_debits":"Y","account_accepts_credit":"Y"},"request":{"B20":"1111-1111-1111-1111-1111","B21":"1111111111","B22":"ABCDE","B23":"11111","B24":"CUR","B25":"111111111"}},"attachments":[]},"B26":"111111-11111-1111-1111-1111111"},"B27":"11111-1111-1111-1111-111111","Thisthat":{"version":"1.1.0","timestamp":"2010-11-02T11:11:17"}}'
当我在给出的答案中运行代码时,似乎又导致错误的一行示例:
{"B1":{"B2":"requires_action","B3":[{"B4":"STATUS CODES -- 3.3","B5":"Code - 38 - AR De-Process"}],"B6":"checks","B7":{"workingMemory":{"B8":1111111111,"B9-NAME-VERIFIED":true,"B10":"F","B11":true,"B12":true,"B13":true,"B14":true,"B15":{"B16":"5123","B17":"ANYPERSON","B18":"Man","B19":"FID","B20":"NDOF","B21":"212121","B22":"","B23":"N","B24":"N","B25":"N","B26":"Y","B27":"Member","B28":"111111","B29":"","B30":"","B31":"","B32":"YES","B33":"11111","B34":"N","B35":"","B36":{"string":["","","",""]},"B37":{"string":["","","",""]},"B38":"","B39":"","B40":"11111111111"},"B41":true},"B42":[{"rule":{"B43":"B44 -- 3.1.1"},"B45":{"B46":true,"B47":"F","B48":111111111,"B49":true},"result":{"B50":"true","overridden":false}},{"rule":{"name":"B51 -- 3.3.2"},"workingMemoryUpdates":{"B52":true},"result":{"exitValue":"true","overridden":false}},{"rule":{"name":"sdsfd -- 3.3.3"},"workingMemoryUpdates":{},"result":{"rtert":"true","message":"No principal member defaults/judgements found","overridden":false}},{"rule":{"name":"etwrtret -- 3.2.1"},"B60":{"dfdsffds":true},"result":{"B61":"true","message":" Anon # Anon report: Thsi: Names: Whatssomethi thig ","B62":false}},{"rule":{"name":"B63 -- 3.1.4"},"workingMemoryUpdates":{"B64":true},"result":{"exitValue":"true","message":"ID Number is valid","overridden":false}},{"rule":{"name":"wrtrtrtr-- 3.3.1"},"workingMemoryUpdates":{"retertetr":true,"B67":{"B68":"5123","B69":"ASAK","B70":"SDSDS","B71":"DSDS","B72":"SDSD","B73":"122222","comment":"","B73":"N","B74":"N","B75":"N","B76":"Y","position":"Member","B77":"111111","B78":"","B79":"","B80":"","deeds":"YES","infoDate":"12121","B81":"N","srere":"","B82":{"string":["","","",""]},"B83":{"string":["","","",""]},"B89":"","B90":"","idnumber":"8304040035089"}},"result":{"exitValue":"true","message":"name: This Man Woman perons ","overridden":false}}],"pausedRules":[],"referredRules":[{"rule":{"name":"BUSINESS STATUS CODES -- 3.3.15"},"workingMemoryUpdates":{},"result":{"exitValue":"referred","message":"dfdfdfsd - df - dfsd","requirements":[{"key":"business-status-code","name":"ABDSD -sdsds Process","type":"boolean"}],"overridden":false}}]},"requestId":"1111-1111-1111-111-111-11111"}}
答案 0 :(得分:0)
这有效:
df = pd.DataFrame({'response': ['{"at":"cop","b1":{"B2":"200000000","B3":"ABCDE ART","B4":"21000","B5":"CURRENT","B6":"1234567890"},"B7":"Yes","verified":true,"B8":"2010-11-02T11:10:18.471Z","B9":true,"B10":false,"B11":true,"B12":false,"B13":true,"accountAcceptsDebits":true,"B14":true,"B15":{"B16":"2010-11-02T11:10:18","response":{"B17":"ok","pagination":{},"body":{"response":{"result_status":"complete","registration_number_match":"Y","company_name_match":"N","account_type_match":"Y","account_open":"Y","account_length_match":"Y","account_found":"Y","account_accepts_debits":"Y","account_accepts_credit":"Y"},"request":{"B20":"1111-1111-1111-1111-1111","B21":"1111111111","B22":"ABCDE","B23":"11111","B24":"CUR","B25":"111111111"}},"attachments":[]},"B26":"111111-11111-1111-1111-1111111"},"B27":"11111-1111-1111-1111-111111","Thisthat":{"version":"1.1.0","timestamp":"2010-11-02T11:11:17"}}']})
df
# response
#0 {"at":"cop","b1":{"B2":"200000000","B3":"ABCDE...
pd.io.json.json_normalize(df.response.apply(json.loads)).B7
#0 Yes
#Name: B7, dtype: object
假设您有一些无法用json
解析的行,您可以看到它们正在使用哪些行:
for i, row in df.response.iteritems():
try:
pd.io.json.json_normalize(json.loads(row))
except:
print(i)