获取数据框列中元素出现的所有不同类型的模式

时间:2019-04-03 06:42:14

标签: python-3.x pandas duplicates

我有一个仅包含一列的数据集。我需要找出的是在该特定列中排列数据的所有模式。

我尝试使用.map函数进行操作。但这不能正确给出结果。

例如:

数据集:

pd.duplicated()

所需结果应为Column A [1,2,3,1,2,3,4,1,2,3,1,2,3,4] [1,2,3],[1,2],[3,1,2]等

无论使用Python中的任何软件包,都可以实现该解决方案。

1 个答案:

答案 0 :(得分:0)

一个好的老式双循环并不优雅,但是可以使用:

我添加为元组,以便可以使用set(...)删除重复项。

ls = [1,2,3,1,2,3,4,1,2,3,1,2,3,4]

set([tuple(ls[i:j+1]) for i in range(len(ls)) for j in range(i, len(ls))])

输出

{(1,),
 (1, 2),
 (1, 2, 3),
 (1, 2, 3, 1),
 (1, 2, 3, 1, 2),
 (1, 2, 3, 1, 2, 3),
 (1, 2, 3, 1, 2, 3, 4),
 (1, 2, 3, 1, 2, 3, 4, 1),
 (1, 2, 3, 1, 2, 3, 4, 1, 2),
 (1, 2, 3, 1, 2, 3, 4, 1, 2, 3),
 (1, 2, 3, 1, 2, 3, 4, 1, 2, 3, 1),
 (1, 2, 3, 1, 2, 3, 4, 1, 2, 3, 1, 2),
 (1, 2, 3, 1, 2, 3, 4, 1, 2, 3, 1, 2, 3),
 (1, 2, 3, 1, 2, 3, 4, 1, 2, 3, 1, 2, 3, 4),
 (1, 2, 3, 4),
 (1, 2, 3, 4, 1),
 (1, 2, 3, 4, 1, 2),
 (1, 2, 3, 4, 1, 2, 3),
 (1, 2, 3, 4, 1, 2, 3, 1),
 (1, 2, 3, 4, 1, 2, 3, 1, 2),
 (1, 2, 3, 4, 1, 2, 3, 1, 2, 3),
 (1, 2, 3, 4, 1, 2, 3, 1, 2, 3, 4),
 (2,),
 (2, 3),
 (2, 3, 1),
 (2, 3, 1, 2),
 (2, 3, 1, 2, 3),
 (2, 3, 1, 2, 3, 4),
 (2, 3, 1, 2, 3, 4, 1),
 (2, 3, 1, 2, 3, 4, 1, 2),
 (2, 3, 1, 2, 3, 4, 1, 2, 3),
 (2, 3, 1, 2, 3, 4, 1, 2, 3, 1),
 (2, 3, 1, 2, 3, 4, 1, 2, 3, 1, 2),
 (2, 3, 1, 2, 3, 4, 1, 2, 3, 1, 2, 3),
 (2, 3, 1, 2, 3, 4, 1, 2, 3, 1, 2, 3, 4),
 (2, 3, 4),
 (2, 3, 4, 1),
 (2, 3, 4, 1, 2),
 (2, 3, 4, 1, 2, 3),
 (2, 3, 4, 1, 2, 3, 1),
 (2, 3, 4, 1, 2, 3, 1, 2),
 (2, 3, 4, 1, 2, 3, 1, 2, 3),
 (2, 3, 4, 1, 2, 3, 1, 2, 3, 4),
 (3,),
 (3, 1),
 (3, 1, 2),
 (3, 1, 2, 3),
 (3, 1, 2, 3, 4),
 (3, 1, 2, 3, 4, 1),
 (3, 1, 2, 3, 4, 1, 2),
 (3, 1, 2, 3, 4, 1, 2, 3),
 (3, 1, 2, 3, 4, 1, 2, 3, 1),
 (3, 1, 2, 3, 4, 1, 2, 3, 1, 2),
 (3, 1, 2, 3, 4, 1, 2, 3, 1, 2, 3),
 (3, 1, 2, 3, 4, 1, 2, 3, 1, 2, 3, 4),
 (3, 4),
 (3, 4, 1),
 (3, 4, 1, 2),
 (3, 4, 1, 2, 3),
 (3, 4, 1, 2, 3, 1),
 (3, 4, 1, 2, 3, 1, 2),
 (3, 4, 1, 2, 3, 1, 2, 3),
 (3, 4, 1, 2, 3, 1, 2, 3, 4),
 (4,),
 (4, 1),
 (4, 1, 2),
 (4, 1, 2, 3),
 (4, 1, 2, 3, 1),
 (4, 1, 2, 3, 1, 2),
 (4, 1, 2, 3, 1, 2, 3),
 (4, 1, 2, 3, 1, 2, 3, 4)}