我有以下列表:
structure(list(Station = structure(1:6, .Label = c("BL0102S",
"BL0102V", "BL01R", "BL01S", "BL0405-2R", "BL0405-2S", "BL0405R",
"BL0405S", "BL1112S", "BL1112V", "BL11R", "BL11S", "BL1314-2R",
"BL1314-2S", "BL1516S", "BL1516V", "D0405S", "D0405V", "DF0405S",
"DF0405V", "DF04S", "DF04V", "DF0708S", "DF0708V", "DF07R", "DF07S",
"DF0809R", "DF0809S", "DF12R", "DF12S", "DF14S", "DF14V", "FM06R",
"FM06S", "FM0708S", "FM0708V", "FM0910R", "FM0910S", "FM1415R",
"FM1415S", "TB0405S", "TB0405V", "TB12S", "TB12V", "WMW0102R",
"WMW0102S", "WMW0203S", "WMW0203V", "WMW02S", "WMW02V"), class = "factor"),
Geom = structure(c(2L, 1L, 1L, 2L, 1L, 2L), .Label = c("L",
"S"), class = "factor"), Slope_degree = c(24L, 5L, 0L, 23L,
5L, 35L), MDD = c(5, 5, 4.7, 4.7, 4.3, 5.3), CC = c(84L,
83L, 82L, 80L, 78L, 74L), CT = c(61L, 15L, 7L, 33L, 22L,
30L), Corrected_10m = c(10.9, 10, 10, 10.9, 10, 12.2), Av_litter_depth = c(2.89,
4, 7, 4.22, 3.83, 3.58), Content = c(20.34, 23.26, 23.23,
23.74, 17.5, 20.24), Slope_radian = c(0.41887902, 0.087266463,
0, 0.401425728, 0.087266463, 0.610865238), AB_a = c(0L, 0L,
0L, 0L, 1L, 0L), AB_sa = c(9L, 2L, 0L, 1L, 5L, 0L), AS_a = c(0L,
0L, 0L, 0L, 3L, 0L), AS_sa = c(2L, 1L, 0L, 0L, 2L, 0L), Days_deployed = c(15L,
15L, 14L, 14L, 14L, 14L), Count = c(23L, 29L, 9L, 20L, 85L,
43L), Tottime = c(295, 318, 66, 199, 1386, 745), Path_dist = c(659.4047198,
1021.11078342, 516.51545352, 997.8758996, 988.18342935, 957.66932416
), Count_rate = c(9.2, 11.6, 4.10334346504559, 9.11854103343465,
42.358803986711, 17.3854447439353), Time_use = c(118, 127.2,
30.0911854103343, 90.7294832826748, 690.697674418605, 301.212938005391
), `Log(Time)` = c(4.77068462446567, 4.84576065090602, 3.40423228535731,
4.50788236805538, 6.53770220909723, 5.70781744986838), `Log(Visit)` = c(2.21920348405499,
2.45100509811232, 1.4118021206671, 2.21030981688487, 3.7461762858377,
2.85563334718238), Location = c(1, 1, 2, 2, 3, 3)), row.names = c(NA,
-6L), groups = structure(list(Station = structure(1:6, .Label = c("BL0102S",
"BL0102V", "BL01R", "BL01S", "BL0405-2R", "BL0405-2S", "BL0405R",
"BL0405S", "BL1112S", "BL1112V", "BL11R", "BL11S", "BL1314-2R",
"BL1314-2S", "BL1516S", "BL1516V", "D0405S", "D0405V", "DF0405S",
"DF0405V", "DF04S", "DF04V", "DF0708S", "DF0708V", "DF07R", "DF07S",
"DF0809R", "DF0809S", "DF12R", "DF12S", "DF14S", "DF14V", "FM06R",
"FM06S", "FM0708S", "FM0708V", "FM0910R", "FM0910S", "FM1415R",
"FM1415S", "TB0405S", "TB0405V", "TB12S", "TB12V", "WMW0102R",
"WMW0102S", "WMW0203S", "WMW0203V", "WMW02S", "WMW02V"), class = "factor"),
Geom = structure(c(2L, 1L, 1L, 2L, 1L, 2L), .Label = c("L",
"S"), class = "factor"), Slope_degree = c(24L, 5L, 0L, 23L,
5L, 35L), MDD = c(5, 5, 4.7, 4.7, 4.3, 5.3), CC = c(84L,
83L, 82L, 80L, 78L, 74L), CT = c(61L, 15L, 7L, 33L, 22L,
30L), Corrected_10m = c(10.9, 10, 10, 10.9, 10, 12.2), Av_litter_depth = c(2.89,
4, 7, 4.22, 3.83, 3.58), Content = c(20.34, 23.26, 23.23,
23.74, 17.5, 20.24), Slope_radian = c(0.41887902, 0.087266463,
0, 0.401425728, 0.087266463, 0.610865238), AB_a = c(0L, 0L,
0L, 0L, 1L, 0L), AB_sa = c(9L, 2L, 0L, 1L, 5L, 0L), AS_a = c(0L,
0L, 0L, 0L, 3L, 0L), AS_sa = c(2L, 1L, 0L, 0L, 2L, 0L), .rows = list(
1L, 2L, 3L, 4L, 5L, 6L)), row.names = c(NA, -6L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), na.action = structure(c(`31` = 31L,
`32` = 32L, `47` = 47L, `48` = 48L), class = "omit"))
使用这个,我想创建一个字典列表。我在下面创建字典列表,
my_list = [[['pd', 1],
['pd_de', None],
['pd_amnt', '$10.00']],
[['pd', 1],
['pd_de', '5/1/19 '],
['pd_amnt', '$100.00 '],
['pd', 2],
['pd_de', '5/1/20 '],
['pd_amnt', '$200.00 ']],
[['pd', 1],
['pd_de', None],
['pd_amnt', None]],
[['pd', 1],
['pd_de', '5/1/19 '],
['pd_amnt', '$300.00 '],
['pd', 2],
['pd_de', '5/1/20 '],
['pd_amnt', '$600.00 '],
['pd', 3],
['pd_de', '6/1/18'],
['pd_amnt', '$450.00']]]
我得到了我不想要的输出,
list_dict = []
for i in my_list:
temp_dict = {}
for j in i:
temp_dict[j[0]] = j[1]
list_dict.append(temp_dict)
我需要这样的输出
[{'pd': 1, 'pd_de': None, 'pd_amnt': '$10.00'},
{'pd': 2, 'pd_de': '5/1/20 ', 'pd_amnt': '$200.00 '},
{'pd': 1, 'pd_de': None, 'pd_amnt': None},
{'pd': 3, 'pd_de': '6/1/18', 'pd_amnt': '$450.00'}]
如果您在上面看到,当列表里面的长度为3时,它们就可以了。如果长度超过3,则不能给出正确的结果。
我也不确定在为字典创建键时如何在键(即“ pd_1”)中创建[{'pd_1': 1, 'pd_de_1': None, 'pd_amnt_1': '$10.00'},
{'pd_1': 1, 'pd_de_1': '5/1/19', 'pd_amnt_1': '$100.00', 'pd_2': 2, 'pd_de_2': '5/1/20 ', 'pd_amnt_2': '$200.00 '},
{'pd_1': 1, 'pd_de_1': None, 'pd_amnt_1': None},
{'pd_1': 1, 'pd_de_1': '5/1/19', 'pd_amnt_1': '$300.00','pd_2': 2, 'pd_de_2': '5/1/20', 'pd_amnt': '$600.00','pd_3': 1, 'pd_de_3': '6/1/18', 'pd_amnt_3': '$450.00'}]
。
如何获得所需的输出?
(注意:不确定标题的名称,我说的是列表的长度,在这里可能是错误的,因为我不熟悉pythonic术语)
答案 0 :(得分:2)
保留项目顺序:
import pandas as pd
from collections import OrderedDict
# my_list = ...
res = []
for l1 in my_list:
d = OrderedDict()
for l2 in l1:
if l2[0] == 'pd':
sfx = l2[1]
d[f'{l2[0]}_{sfx}'] = l2[1].strip() if isinstance(l2[1], str) else l2[1]
res.append(d)
df = pd.DataFrame(res)
print(df)
输出:
pd_1 pd_de_1 pd_amnt_1 pd_2 pd_de_2 pd_amnt_2 pd_3 pd_de_3 pd_amnt_3
0 1 None $10.00 NaN NaN NaN NaN NaN NaN
1 1 5/1/19 $100.00 2.0 5/1/20 $200.00 NaN NaN NaN
2 1 None None NaN NaN NaN NaN NaN NaN
3 1 5/1/19 $300.00 2.0 5/1/20 $600.00 3.0 6/1/18 $450.00
答案 1 :(得分:1)
您可以使用其他变量(counter
)查找字典中尚不存在的键“ index”:
result = []
for sub_list in my_list:
temp = {}
for key, value in sub_list:
counter = 1
while f"{key}_{counter}" in temp:
counter += 1
temp[f"{key}_{counter}"] = value
result.append(temp)
更有效的解决方案是将计数器存储到dict中,并在使用键后对其进行递增:
result = []
for sub_list in my_list:
counters = {}
temp = {}
for key, value in sub_list:
if key in counters:
counters[key] += 1
else:
counters[key] = 1
temp[f"{key}_{counters[key]}" ] = value
result.append(temp)
使用collections.defaultdict
可以将它写得短一些:
from collections import defaultdict
result = []
for sub_list in my_list:
counters = defaultdict(int)
temp = {}
for key, value in sub_list:
counters[key] += 1
temp[f"{key}_{counters[key]}"] = value
result.append(temp)
答案 2 :(得分:1)
defaultdict
时使用它来递增密钥。然后将其添加到您的result
字典中。 list_dict = []
from collections import defaultdict
for i in my_list:
temp_dict = {}
incr = defaultdict(int)
for j in i:
incr[j[0]] += 1
temp_dict[j[0] + '_' + str(incr[j[0]])] = j[1]
list_dict.append(temp_dict)
输出:
[{'pd_1': 1, 'pd_de_1': None, 'pd_amnt_1': '$10.00'},
{'pd_1': 1,
'pd_de_1': '5/1/19 ',
'pd_amnt_1': '$100.00 ',
'pd_2': 2,
'pd_de_2': '5/1/20 ',
'pd_amnt_2': '$200.00 '},
{'pd_1': 1, 'pd_de_1': None, 'pd_amnt_1': None},
{'pd_1': 1,
'pd_de_1': '5/1/19 ',
'pd_amnt_1': '$300.00 ',
'pd_2': 2,
'pd_de_2': '5/1/20 ',
'pd_amnt_2': '$600.00 ',
'pd_3': 1,
'pd_de_3': '6/1/18',
'pd_amnt_3': '$450.00'}]
答案 3 :(得分:-1)
得到这个的原因是因为当您将字典中的键设置为某项时,它将覆盖以前的所有数据。例如,您有这个字典new.v1 = c(v2[1:4], v1[6:length(v1)]
,如果您执行x = ["a":1, "b":2, "c":3]
,则它将为x["d"] = 4
,但是如果您执行["a":1, "b":2, "c":3, "d":4]
,则将为x["a"] = 3
。
为您提供的解决方案是将每个项目添加到字典中,并在标签后添加一个数字以表示它是哪个标签。
["a":3, "b":2, "c":3, "d":4]