在熊猫中我有一个类似下面的词典:
DECLARE @SearchStr nvarchar(100)
SET @SearchStr =''
CREATE TABLE #Results (ColumnName nvarchar(370), ColumnValue nvarchar(3630))--,TableName varchar(200))
SET NOCOUNT ON
DECLARE @TableName nvarchar(256), @ColumnName nvarchar(128), @SearchStr2 nvarchar(110)
SET @TableName = ''
SET @SearchStr2 = QUOTENAME('' + @SearchStr + '','''')
WHILE @TableName IS NOT NULL
BEGIN
SET @ColumnName = ''
SET @TableName =
(
SELECT MIN(QUOTENAME(TABLE_SCHEMA) + '.' + QUOTENAME(TABLE_NAME))
FROM INFORMATION_SCHEMA.TABLES
WHERE
TABLE_SCHEMA = 'SAMPLE SCHEMA HERE'
AND TABLE_TYPE = 'BASE TABLE'
AND QUOTENAME(TABLE_SCHEMA) + '.' + QUOTENAME(TABLE_NAME) > @TableName
AND OBJECTPROPERTY(
OBJECT_ID(
QUOTENAME(TABLE_SCHEMA) + '.' + QUOTENAME(TABLE_NAME)
), 'IsMSShipped'
) = 0
)
WHILE (@TableName IS NOT NULL) AND (@ColumnName IS NOT NULL)
BEGIN
SET @ColumnName =
(
SELECT MIN(QUOTENAME(COLUMN_NAME))
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_SCHEMA = PARSENAME(@TableName, 2)
AND TABLE_NAME = PARSENAME(@TableName, 1)
AND DATA_TYPE IN ('char', 'varchar', 'nchar', 'nvarchar', 'int', 'decimal')
AND QUOTENAME(COLUMN_NAME) > @ColumnName
)
IF @ColumnName IS NOT NULL
BEGIN
INSERT INTO #Results
EXEC
(
'SELECT ''' + @TableName + '.' + @ColumnName + ''', LEFT(' + @ColumnName + ', 3630) FROM ' + @TableName + ' (NOLOCK) ' +
' WHERE ' + @ColumnName + '=' + @SearchStr2 +
' OR' + @ColumnName + ' IS NULL '
)
END
END
END
SELECT DISTINCT left(columnName, charindex('.', ColumnName, charindex('.',ColumnName )+1)-1) as tableName,ColumnName FROM #Results Order by tableName;
DROP TABLE #Results
我想将其转换为DataFrame,您可以在其中看到动物类型可能吃的东西。所以它看起来与下图类似:
当试图生成这样一个表时,我感觉我是以不正确的方式这样做的,因为我需要相当多的代码行。所以我的问题是,是否有一个很好的函数将这个字典映射到一个DataFrame,所以它看起来像上面的表?
答案 0 :(得分:4)
最简单的方式
使用pd.str.get_dummies
dct = {
'Anemones & allies': ['Carnivore'],
'Ants, bees & wasps': ['Omnivore', 'Herbivore', 'Nectar', 'Insects', 'Parasite'],
'Beetles & bugs': ['Herbivore', 'Carnivore', 'Nectar', 'Insects'],
'Birds': ['Carnivore'],
'Fishes': ['Carnivore', 'Plankton or Particles']
}
pd.Series(dct).str.join('|').str.get_dummies()
Carnivore Herbivore Insects Nectar Omnivore Parasite Plankton or Particles
Anemones & allies 1 0 0 0 0 0 0
Ants, bees & wasps 0 1 1 1 1 1 0
Beetles & bugs 1 1 1 1 0 0 0
Birds 1 0 0 0 0 0 0
Fishes 1 0 0 0 0 0 1
更复杂
但可能推荐
from sklearn.preprocessing import MultiLabelBinarizer
dct = {
'Anemones & allies': ['Carnivore'],
'Ants, bees & wasps': ['Omnivore', 'Herbivore', 'Nectar', 'Insects', 'Parasite'],
'Beetles & bugs': ['Herbivore', 'Carnivore', 'Nectar', 'Insects'],
'Birds': ['Carnivore'],
'Fishes': ['Carnivore', 'Plankton or Particles']
}
s = pd.Series(dct)
mlb = MultiLabelBinarizer()
d = mlb.fit_transform(s)
c = mlb.classes_
pd.DataFrame(d, s.index, c)
Carnivore Herbivore Insects Nectar Omnivore Parasite Plankton or Particles
Anemones & allies 1 0 0 0 0 0 0
Ants, bees & wasps 0 1 1 1 1 1 0
Beetles & bugs 1 1 1 1 0 0 0
Birds 1 0 0 0 0 0 0
Fishes 1 0 0 0 0 0 1
答案 1 :(得分:1)
从DataFrame
(d)创建dict
,然后使用get_dummies
pd.get_dummies(pd.DataFrame(dict([ (k,pd.Series(v)) for k,v in d.items() ])).stack()).sum(level=1)
Out[130]:
Carnivore Herbivore Insects Nectar Omnivore Parasite \
Anemones & allies 1 0 0 0 0 0
Ants, bees & wasps 0 1 1 1 1 1
Beetles & bugs 1 1 1 1 0 0
Birds 1 0 0 0 0 0
Fishes 1 0 0 0 0 0
Plankton or Particles
Anemones & allies 0
Ants, bees & wasps 0
Beetles & bugs 0
Birds 0
Fishes 1