Question

此问题与其他问题不同，因为列名都不位于键的值中。请在标记为重复项之前查看给出的示例。

我有这样的df：

df: col1 col2 col3
    100  200  [{'attribute': 'Pattern', 'value': 'Printed'},...

仔细查看第3列的样子：

[{'attribute': 'Pattern', 'value': 'Printed'},
 {'attribute': 'Topwear style', 'value': 'T shirt'},
 {'attribute': 'Bottomwear Length', 'value': 'Short'},
 {'attribute': 'Colour Palette', 'value': 'Bright colours'},
 {'attribute': 'Bottomwear style', 'value': 'Baggy'},
 {'attribute': 'Topwear length', 'value': 'Waist'},
 {'attribute': 'Sleeve style', 'value': 'Sleeveless'},
 {'attribute': 'Type of pattern', 'value': 'Graphic print'},
 {'attribute': 'Neck', 'value': 'Round'},
 {'attribute': 'Level of embellishment', 'value': 'No'}]

每个属性是列名和每个值，是该列名的值。

输出将如下所示：

df: col1   col2    Pattern       Topwear Style       Bottomwear Length ....
    100    200     Printed       T shirt             Shorts

有多行具有重复的新属性和值。我将如何在熊猫中做到这一点？我尝试搜索类似内容，但找不到任何有用的内容。

Answer 1

x = df['col3'].tolist()
newcol = {item['attribute'] : [item['value']] for item in x }
newdf = pd.DataFrame(newcol)
del df['col3'] 
print(df.join(newdf, how='right'))

输出

   col1  col2  Pattern Topwear style Bottomwear Length  Colour Palette  \
0   100   200  Printed       T shirt             Short  Bright colours  
...

用于测试的数据框。

data = {'col1':100, 'col2': 200, 'col3': [{'attribute': 'Pattern', 'value': 'Printed'},
 {'attribute': 'Topwear style', 'value': 'T shirt'},
 {'attribute': 'Bottomwear Length', 'value': 'Short'},
 {'attribute': 'Colour Palette', 'value': 'Bright colours'},
 {'attribute': 'Bottomwear style', 'value': 'Baggy'},
 {'attribute': 'Topwear length', 'value': 'Waist'},
 {'attribute': 'Sleeve style', 'value': 'Sleeveless'},
 {'attribute': 'Type of pattern', 'value': 'Graphic print'},
 {'attribute': 'Neck', 'value': 'Round'},
 {'attribute': 'Level of embellishment', 'value': 'No'}]}

df = pd.DataFrame(data)

Answer 2

尝试：

DECLARE 
    @a_INT INT,
    @a_BIGINT BIGINT,
    @a_decimal_20 DECIMAL(20),
    @bvarbin_INT VARBINARY(MAX),
    @bvarbin_BIGINT VARBINARY(MAX),
    @bvarbin_decimal_20 VARBINARY(MAX),
    @cresult_UInt16 INT,
    @cresult_UInt32 BIGINT,
    @cresult_UInt64 DECIMAL(20)

SELECT
    @a_INT = 2147483647,--INT MAX VALUE
    @a_BIGINT = 9223372036854775807, -- BIGINT MAX VALUE
    @a_decimal_20 = 99999999999999999999 -- DECIMAL(20) MAX VALUE

SELECT
    @bvarbin_INT = CONVERT(VARBINARY(MAX),@a_INT),
    @bvarbin_BIGINT = CONVERT(VARBINARY(MAX),@a_BIGINT),
    @bvarbin_decimal_20 = CONVERT(VARBINARY(MAX),@a_decimal_20)

SELECT
    @cresult_UInt16 = CONVERT(INT,@bvarbin_INT),
    @cresult_UInt32 = CONVERT(BIGINT,@bvarbin_BIGINT),
    @cresult_UInt64 = CONVERT(DECIMAL(20),@bvarbin_decimal_20)

SELECT 
    @a_INT a_INT,
    @a_BIGINT a_BIGINT,
    @a_decimal_20 a_decimal_20,
    @bvarbin_INT bvarbin_INT,
    @bvarbin_BIGINT bvarbin_BIGINT,
    @bvarbin_decimal_20 bvarbin_decimal_20,
    @cresult_UInt16 cresult_UInt16,
    @cresult_UInt32 cresult_UInt32,
    @cresult_UInt64 cresult_UInt64;

设置：

df=df.join(pd.concat([pd.DataFrame(v).set_index('attribute').T 
               for v in df.pop('col3')]).reset_index(drop=True))

输出：

Answer 3

您可以将嵌套列表理解与dict理解一起用于可能传递给DataFrame构造函数的字典列表：

Advadage的性能更好，但分解起来有点复杂。

d = [{'attribute': 'Pattern', 'value': 'Printed'},
 {'attribute': 'Topwear style', 'value': 'T shirt'},
 {'attribute': 'Bottomwear Length', 'value': 'Short'},
 {'attribute': 'Colour Palette', 'value': 'Bright colours'}
]

df = pd.DataFrame({'col1':[100, 20], 'col2':[200, 10], 'col3':[d, d]})
print (df)

   col1  col2                                               col3
0   100   200  [{'attribute': 'Pattern', 'value': 'Printed'},...
1    20    10  [{'attribute': 'Pattern', 'value': 'Printed'},...

a = [{y['attribute']: y['value']  for y in x for k, v in y.items()} for x in df.pop('col3')]

df = df.join(pd.DataFrame(a))
print (df)
   col1  col2  Pattern Topwear style Bottomwear Length  Colour Palette
0   100   200  Printed       T shirt             Short  Bright colours
1    20    10  Printed       T shirt             Short  Bright colours

转换包含字典列表的列，其中列名和值都以字典键中的值形式出现

3 个答案: