Question

我有以下列表：

    structure(list(Station = structure(1:6, .Label = c("BL0102S", 
"BL0102V", "BL01R", "BL01S", "BL0405-2R", "BL0405-2S", "BL0405R", 
"BL0405S", "BL1112S", "BL1112V", "BL11R", "BL11S", "BL1314-2R", 
"BL1314-2S", "BL1516S", "BL1516V", "D0405S", "D0405V", "DF0405S", 
"DF0405V", "DF04S", "DF04V", "DF0708S", "DF0708V", "DF07R", "DF07S", 
"DF0809R", "DF0809S", "DF12R", "DF12S", "DF14S", "DF14V", "FM06R", 
"FM06S", "FM0708S", "FM0708V", "FM0910R", "FM0910S", "FM1415R", 
"FM1415S", "TB0405S", "TB0405V", "TB12S", "TB12V", "WMW0102R", 
"WMW0102S", "WMW0203S", "WMW0203V", "WMW02S", "WMW02V"), class = "factor"), 
    Geom = structure(c(2L, 1L, 1L, 2L, 1L, 2L), .Label = c("L", 
    "S"), class = "factor"), Slope_degree = c(24L, 5L, 0L, 23L, 
    5L, 35L), MDD = c(5, 5, 4.7, 4.7, 4.3, 5.3), CC = c(84L, 
    83L, 82L, 80L, 78L, 74L), CT = c(61L, 15L, 7L, 33L, 22L, 
    30L), Corrected_10m = c(10.9, 10, 10, 10.9, 10, 12.2), Av_litter_depth = c(2.89, 
    4, 7, 4.22, 3.83, 3.58), Content = c(20.34, 23.26, 23.23, 
    23.74, 17.5, 20.24), Slope_radian = c(0.41887902, 0.087266463, 
    0, 0.401425728, 0.087266463, 0.610865238), AB_a = c(0L, 0L, 
    0L, 0L, 1L, 0L), AB_sa = c(9L, 2L, 0L, 1L, 5L, 0L), AS_a = c(0L, 
    0L, 0L, 0L, 3L, 0L), AS_sa = c(2L, 1L, 0L, 0L, 2L, 0L), Days_deployed = c(15L, 
    15L, 14L, 14L, 14L, 14L), Count = c(23L, 29L, 9L, 20L, 85L, 
    43L), Tottime = c(295, 318, 66, 199, 1386, 745), Path_dist = c(659.4047198, 
    1021.11078342, 516.51545352, 997.8758996, 988.18342935, 957.66932416
    ), Count_rate = c(9.2, 11.6, 4.10334346504559, 9.11854103343465, 
    42.358803986711, 17.3854447439353), Time_use = c(118, 127.2, 
    30.0911854103343, 90.7294832826748, 690.697674418605, 301.212938005391
    ), `Log(Time)` = c(4.77068462446567, 4.84576065090602, 3.40423228535731, 
    4.50788236805538, 6.53770220909723, 5.70781744986838), `Log(Visit)` = c(2.21920348405499, 
    2.45100509811232, 1.4118021206671, 2.21030981688487, 3.7461762858377, 
    2.85563334718238), Location = c(1, 1, 2, 2, 3, 3)), row.names = c(NA, 
-6L), groups = structure(list(Station = structure(1:6, .Label = c("BL0102S", 
"BL0102V", "BL01R", "BL01S", "BL0405-2R", "BL0405-2S", "BL0405R", 
"BL0405S", "BL1112S", "BL1112V", "BL11R", "BL11S", "BL1314-2R", 
"BL1314-2S", "BL1516S", "BL1516V", "D0405S", "D0405V", "DF0405S", 
"DF0405V", "DF04S", "DF04V", "DF0708S", "DF0708V", "DF07R", "DF07S", 
"DF0809R", "DF0809S", "DF12R", "DF12S", "DF14S", "DF14V", "FM06R", 
"FM06S", "FM0708S", "FM0708V", "FM0910R", "FM0910S", "FM1415R", 
"FM1415S", "TB0405S", "TB0405V", "TB12S", "TB12V", "WMW0102R", 
"WMW0102S", "WMW0203S", "WMW0203V", "WMW02S", "WMW02V"), class = "factor"), 
    Geom = structure(c(2L, 1L, 1L, 2L, 1L, 2L), .Label = c("L", 
    "S"), class = "factor"), Slope_degree = c(24L, 5L, 0L, 23L, 
    5L, 35L), MDD = c(5, 5, 4.7, 4.7, 4.3, 5.3), CC = c(84L, 
    83L, 82L, 80L, 78L, 74L), CT = c(61L, 15L, 7L, 33L, 22L, 
    30L), Corrected_10m = c(10.9, 10, 10, 10.9, 10, 12.2), Av_litter_depth = c(2.89, 
    4, 7, 4.22, 3.83, 3.58), Content = c(20.34, 23.26, 23.23, 
    23.74, 17.5, 20.24), Slope_radian = c(0.41887902, 0.087266463, 
    0, 0.401425728, 0.087266463, 0.610865238), AB_a = c(0L, 0L, 
    0L, 0L, 1L, 0L), AB_sa = c(9L, 2L, 0L, 1L, 5L, 0L), AS_a = c(0L, 
    0L, 0L, 0L, 3L, 0L), AS_sa = c(2L, 1L, 0L, 0L, 2L, 0L), .rows = list(
        1L, 2L, 3L, 4L, 5L, 6L)), row.names = c(NA, -6L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"), na.action = structure(c(`31` = 31L, 
`32` = 32L, `47` = 47L, `48` = 48L), class = "omit"))

使用这个，我想创建一个字典列表。我在下面创建字典列表，

my_list = [[['pd', 1],
           ['pd_de', None],
           ['pd_amnt', '$10.00']],
           [['pd', 1],
           ['pd_de', '5/1/19 '],
           ['pd_amnt', '$100.00 '],
           ['pd', 2],
           ['pd_de', '5/1/20 '],
           ['pd_amnt', '$200.00 ']],
           [['pd', 1],
           ['pd_de', None],
           ['pd_amnt', None]],
           [['pd', 1],
           ['pd_de', '5/1/19 '],
           ['pd_amnt', '$300.00 '],
           ['pd', 2],
           ['pd_de', '5/1/20 '],
           ['pd_amnt', '$600.00 '],
           ['pd', 3],
           ['pd_de', '6/1/18'],
           ['pd_amnt', '$450.00']]]

我得到了我不想要的输出，

list_dict = []

for i in my_list:
    temp_dict = {}
    for j in i:
        temp_dict[j[0]] = j[1]
    list_dict.append(temp_dict)

我需要这样的输出

[{'pd': 1, 'pd_de': None, 'pd_amnt': '$10.00'},
 {'pd': 2, 'pd_de': '5/1/20 ', 'pd_amnt': '$200.00 '},
 {'pd': 1, 'pd_de': None, 'pd_amnt': None},
 {'pd': 3, 'pd_de': '6/1/18', 'pd_amnt': '$450.00'}]

如果您在上面看到，当列表里面的长度为3时，它们就可以了。如果长度超过3，则不能给出正确的结果。

我也不确定在为字典创建键时如何在键（即“ pd_1”）中创建[{'pd_1': 1, 'pd_de_1': None, 'pd_amnt_1': '$10.00'}, {'pd_1': 1, 'pd_de_1': '5/1/19', 'pd_amnt_1': '$100.00', 'pd_2': 2, 'pd_de_2': '5/1/20 ', 'pd_amnt_2': '$200.00 '}, {'pd_1': 1, 'pd_de_1': None, 'pd_amnt_1': None}, {'pd_1': 1, 'pd_de_1': '5/1/19', 'pd_amnt_1': '$300.00','pd_2': 2, 'pd_de_2': '5/1/20', 'pd_amnt': '$600.00','pd_3': 1, 'pd_de_3': '6/1/18', 'pd_amnt_3': '$450.00'}]。

如何获得所需的输出？

（注意：不确定标题的名称，我说的是列表的长度，在这里可能是错误的，因为我不熟悉pythonic术语）

Answer 1

保留项目顺序：

import pandas as pd
from collections import OrderedDict

# my_list = ...

res = []
for l1 in my_list:
    d = OrderedDict()
    for l2 in l1:
        if l2[0] == 'pd':
            sfx = l2[1]
        d[f'{l2[0]}_{sfx}'] = l2[1].strip() if isinstance(l2[1], str) else l2[1]
    res.append(d)

df = pd.DataFrame(res)
print(df)

输出：

   pd_1 pd_de_1 pd_amnt_1  pd_2 pd_de_2 pd_amnt_2  pd_3 pd_de_3 pd_amnt_3
0     1    None    $10.00   NaN     NaN       NaN   NaN     NaN       NaN
1     1  5/1/19   $100.00   2.0  5/1/20   $200.00   NaN     NaN       NaN
2     1    None      None   NaN     NaN       NaN   NaN     NaN       NaN
3     1  5/1/19   $300.00   2.0  5/1/20   $600.00   3.0  6/1/18   $450.00

Answer 2

您可以使用其他变量（counter）查找字典中尚不存在的键“ index”：

result = []
for sub_list in my_list:
    temp = {}
    for key, value in sub_list:
        counter = 1
        while f"{key}_{counter}" in temp:
            counter  += 1
        temp[f"{key}_{counter}"] = value
    result.append(temp)

更有效的解决方案是将计数器存储到dict中，并在使用键后对其进行递增：

result = []
for sub_list in my_list:
    counters = {}
    temp = {}
    for key, value in sub_list:
        if key in counters:
            counters[key] += 1
        else:
            counters[key] = 1
        temp[f"{key}_{counters[key]}" ] = value
    result.append(temp)

使用collections.defaultdict可以将它写得短一些：

from collections import defaultdict

result = []
for sub_list in my_list:
    counters = defaultdict(int)
    temp = {}
    for key, value in sub_list:
        counters[key] += 1
        temp[f"{key}_{counters[key]}"] = value
    result.append(temp)

Answer 3

我发现了一种非常酷的方法。
您可以在每次看到defaultdict时使用它来递增密钥。然后将其添加到您的result字典中。

list_dict = []

from collections import defaultdict

for i in my_list:
    temp_dict = {}
    incr = defaultdict(int)
    for j in i:
        incr[j[0]] += 1
        temp_dict[j[0] + '_' + str(incr[j[0]])] = j[1]
    list_dict.append(temp_dict)

输出：

[{'pd_1': 1, 'pd_de_1': None, 'pd_amnt_1': '$10.00'},
 {'pd_1': 1,
  'pd_de_1': '5/1/19 ',
  'pd_amnt_1': '$100.00 ',
  'pd_2': 2,
  'pd_de_2': '5/1/20 ',
  'pd_amnt_2': '$200.00 '},
 {'pd_1': 1, 'pd_de_1': None, 'pd_amnt_1': None},
 {'pd_1': 1,
  'pd_de_1': '5/1/19 ',
  'pd_amnt_1': '$300.00 ',
  'pd_2': 2,
  'pd_de_2': '5/1/20 ',
  'pd_amnt_2': '$600.00 ',
  'pd_3': 1,
  'pd_de_3': '6/1/18',
  'pd_amnt_3': '$450.00'}]

Answer 4

得到这个的原因是因为当您将字典中的键设置为某项时，它将覆盖以前的所有数据。例如，您有这个字典new.v1 = c(v2[1:4], v1[6:length(v1)]，如果您执行x = ["a":1, "b":2, "c":3]，则它将为x["d"] = 4，但是如果您执行["a":1, "b":2, "c":3, "d":4]，则将为x["a"] = 3。
为您提供的解决方案是将每个项目添加到字典中，并在标签后添加一个数字以表示它是哪个标签。

["a":3, "b":2, "c":3, "d":4]

根据列表的长度为字典创建键

4 个答案: