将两个DataFrames元素与不同的DataFrame进行比较

时间:2016-06-08 08:02:50

标签: python pandas

我有一个包含多个样本的DataFrame

df1 = [[        0     1     2    3    4     5     6     7     8     9     10    11  \
0     NaN   NaN   NaN  NaN  NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN   
1    13.4   5.2   7.7 -2.1  1.6  -4.1  -0.5   8.2  15.9  12.9  11.8   9.3   
2    -3.1  -0.6  -5.1 -0.5 -4.1   0.5  -3.6  -5.6  -9.7  -3.6  -4.7  -5.7   
3   -10.3  -1.0  -9.8  0.5 -3.6   1.0  -1.5  -1.6  -5.1  -4.6 -13.3 -10.7   
4     0.0  -5.6  -4.1  1.5  3.0  -1.0   2.6   6.7  12.3   6.6  -0.5   1.0   
5     6.2   0.5   2.6  1.1  1.6   0.5   2.0   0.0  -0.5   0.5   7.7   5.6   
6    -1.6   5.1   6.1 -1.1 -2.1   0.0  -1.5  -9.2 -13.9  -7.1   1.5  -0.5   
7    -6.1  -4.1  -1.5 -1.5 -0.5  -0.5  -0.5  -2.6   2.6  -2.6  -6.6  -3.1   
8    -0.5  -4.1  -6.7  1.5  0.0   0.5   1.0   8.2   8.7   4.1  -3.1   0.6   
9     5.1   7.2   4.6  0.6  1.0   0.0   0.5  -0.5  -7.7  -0.5   5.6   0.5   
10    0.5   2.6   5.7 -2.1  2.1  -1.0   0.0  -8.2 -10.2  -6.2   3.6   0.0   
11   -5.1  -7.2  -4.6  1.0 -1.0   2.0  -2.0   8.7  14.3  10.3   2.1   8.2   
12    0.5  -2.6  -1.6  3.1 -0.5   3.1   4.1  10.8  15.9   9.7   2.5   7.7   
13    2.6   2.1   0.0  0.0 -0.6  -0.5   8.7  -8.7 -15.9 -13.3  -3.0  -9.3   
14   -2.1   4.6   1.0 -2.6  1.1   0.0   0.0  -6.2 -10.8  -9.7  -1.1  -4.1   
15    4.6   5.1   6.2 -0.5  7.7   3.1  -3.6   8.2  19.0  11.7   7.2  12.9   
16    1.6  -6.1  -2.6  0.5  5.1   2.0   1.0   0.0   5.7   5.7   2.1   1.0   
17  -11.8 -10.8 -10.7 -1.5 -6.2  -1.0  -3.1 -10.7 -23.1 -11.3  -6.7 -12.8   
18   -6.2  -0.5  -0.5  0.0 -7.7  -3.6  -7.7   0.0  -3.1   0.0  -4.1  -0.6   
19    9.8   3.6   4.1  1.5 -2.0  -4.6  -1.0  11.2  25.1  14.9   1.5   8.8   
20    7.7   3.0   2.0  0.0  3.6   2.5   3.1   1.6   3.1   2.0   6.2   2.5   
21   -1.1   3.1   3.1 -0.5  7.2   7.2   2.0 -11.3 -26.7 -12.8   3.1  -2.5   
22    0.5  -1.0   0.0  0.5  3.0   0.5  -2.0  -0.5  -2.0  -1.0  -3.1   1.0   
23    1.1  -2.1  -2.6 -1.5 -0.5  -0.5  -2.6   9.2  23.1   6.6  -1.0   1.0   
24   -2.1   3.6   2.1 -1.0  1.1   3.6   1.6  -3.6  -9.3  -5.1   2.0  -2.0   
25    1.6   4.6   5.1  2.0 -4.7  -2.6   1.0  -2.0 -11.8  -2.0   2.1   3.0   
26    0.5  -1.5  -2.6  1.1 -7.7  -9.2   0.5   6.1  15.4   5.6  -2.1   0.0   
27   -6.2 -11.3 -11.8 -0.6 -1.5   2.1   5.1  -3.6  -2.0  -4.6  -6.7 -11.2   
28   -1.0  -4.1  -1.0  0.6  1.0   8.7   7.7  -4.1 -10.3  -2.1   0.0  -1.1   
29    8.2  10.3  10.3  0.0 -0.5   0.5   3.6   3.1   6.2   4.7   6.2  10.3   
..    ...   ...   ...  ...  ...   ...   ...   ...   ...   ...   ...   ...   
98   -1.0  -4.1  -1.0  1.1  0.5  -3.1  -6.7   1.5  10.3   2.5  -1.0  -6.1   
99    5.6   9.8   5.1 -1.6  1.6  -1.0   4.7  11.8  18.4  22.6  11.8  13.3   
100  -1.0   2.0   1.1  0.5 -1.1   1.5  11.2  -5.6  -4.1   8.2   4.1   9.3   
101  -4.6  -9.2  -5.7  2.6 -1.0   1.6  -0.5 -10.8 -14.3 -16.4  -8.2 -14.4   
102   1.5   0.0   2.6 -1.5  0.5  -0.5 -10.7   4.6  -2.1 -13.3  -0.5  -3.1   
103  -2.5  -2.6   1.5 -1.6 -4.1  -2.6  -5.2   2.6   2.6  -2.6  -3.1   2.6   
104  -7.7  -6.7  -7.2  1.6 -6.1  -4.1   1.1  -5.6  -2.1  -2.1 -10.3 -12.8   
105   2.6   7.2  -1.0  0.0  2.0  -9.2   0.0   4.1   1.6   4.1  -1.0  -3.1   
106   5.6   7.2   7.2 -2.6  5.6  -1.1  -2.1   4.6   1.5   7.2   4.1   9.7   
107  -0.5  -5.6   1.0  3.1  1.6   8.8   1.0  -4.6  -1.5  -7.7  -2.6  -6.6   
108  -4.6  -3.6  -8.7  1.5 -2.6   0.0   2.6   0.5   5.1  -4.6  -2.5  -7.7   
109  -3.6   1.5  -5.6 -5.6 -7.7 -10.8  -7.7   2.5  -0.5   5.1   0.5   4.1   
110   0.0   2.1   4.1  0.0 -1.5  -1.5  -8.7  -7.7 -11.3  -8.2   0.5   0.5   
111   7.2   6.6   9.7  4.6  9.7  10.2   8.7  -4.1  -7.2  -7.7   3.6   2.5   
112   5.1   2.6   2.1 -1.6  5.2   5.1   5.6   1.6   3.6   7.7   1.0   8.8   
113  -7.7  -5.1   2.5 -2.5 -8.2  -8.7 -11.8   5.6  16.9  11.3  -1.0  -1.1   
114  -7.2   1.5  10.8  2.0 -6.2  -8.2  -1.0   4.6   9.8  10.3   6.1   2.6   
115  -2.6  -1.5  -2.1  0.5  5.1   5.7  12.8  -9.2 -22.1 -10.3   1.1   2.6   
116  -2.0  -8.2  -9.7 -0.5  3.1   9.2   3.1  -9.2 -16.9 -24.6 -11.8 -11.8   
117   0.5   1.5   5.6  0.0 -5.1  -1.5  -3.6  13.3  24.1   6.2  -1.6  -3.1   
118   1.5   7.2   6.7 -0.5  1.0  -2.1   2.1   8.7  10.3  18.4   9.3  10.8   
119   8.3   6.1   3.1  1.0  8.2   4.1  -0.5 -16.4 -22.6  -6.6   1.5   2.0   
120   9.2   5.7   5.6  1.6  2.6   1.0 -10.3  -5.6   2.6  -2.1   2.1   0.0   
121  -2.1  -4.1  -1.5 -2.6 -2.1  -3.6  -8.2  13.3  22.5   9.7   7.1   4.1   
122  -5.6  -2.6  -2.6 -2.0 -1.0   1.1   6.2   4.6  -2.5  -0.5  -1.0  -0.5   
123   4.1   7.2   6.7  2.5 -1.5   2.0  11.2  -7.2 -14.4  -6.1  -1.0  -1.0   
124   3.1   1.5   4.6 -1.0  0.0  -1.0  -2.5  -3.5  -2.0   0.0   1.5   1.0   
125  -2.1  -2.0  -3.6 -2.1  1.5   1.0  -6.7  -1.1   1.5   0.0  -5.1  -5.1   
126   1.6   3.0  -0.5  1.1  0.0  -1.5   1.0   1.6   4.6   0.5  -4.6   0.0   
127  -2.1  -1.0  -2.6  1.0  0.5  -6.2   4.1   7.1  12.3   3.6   2.0   3.6   

       12    13  
0     NaN   NaN  
1    12.8  14.9  
2    -3.0  -7.7  
3    -5.7 -10.8  
4     2.6   1.5  
5     4.1   4.1  
6    -2.1   1.1  
7    -7.7  -2.6  
8     1.6  -3.6  
9     4.1   0.5  
10   -5.7   3.1  
11    2.1   2.6  
12    8.2   3.6  
13   -2.1  -1.6  
14   -2.5  -2.0  
15    6.1   5.6  
16    0.5   0.0  
17   -5.1 -11.3  
18   -2.0  -6.1  
19    1.5   8.7  
20    2.1   7.2  
21   -0.6  -0.5  
22    2.1   3.0  
23    2.0   2.6  
24   -5.1  -3.6  
25    2.1   2.6  
26    2.5   3.6  
27  -13.8 -12.3  
28   -3.6  -4.1  
29   11.3  11.2  
..    ...   ...  
98    6.2   1.0  
99   12.3   9.7  
100  -2.1   2.1  
101  -7.2  -6.2  
102   3.6  -1.5  
103  -4.1  -3.1  
104 -12.8  -7.2  
105   3.6   0.6  
106  13.8   4.6  
107  -2.5  -3.1  
108 -11.3  -7.2  
109  -2.6  -3.1  
110   4.1   1.1  
111   7.2   4.6  
112   4.1   4.1  
113  -4.1  -2.1  
114   3.6   3.1  
115   0.5  -4.1  
116 -11.3 -12.3  
117   0.0   3.1  
118   9.3  11.8  
119   0.5   3.6  
120  -1.1   1.5  
121   2.6  -0.5  
122  -2.6  -2.6  
123  -1.5   4.1  
124   2.1   1.0  
125  -1.1  -1.0  
126   0.5   5.7  
127   0.6  -2.6  

[128 rows x 14 columns x 60 samples]]

我有大约60个,然后我有另一个大小为df2 = (1x14)

的DataFrame

我想要做的是,检查df2中的行的值是否等于或大于df1中的相应行,这样它应该给我1, 0 or -1的值对于该行中的每个元素。

应该看起来像这样

0 1 0 0 0 0 -1 0 -0....
-1 0 1 1 0 -1 0 0 ....
.
.

任何人都可以帮我这个吗?

2 个答案:

答案 0 :(得分:2)

好的,我认为以下内容应该有效:

In [208]:
# load some data
t="""1    13.4   5.2   7.7 -2.1  1.6  -4.1  -0.5   8.2  15.9  12.9  11.8   9.3   
2    -3.1  -0.6  -5.1 -0.5 -4.1   0.5  -3.6  -5.6  -9.7  -3.6  -4.7  -5.7   
3   -10.3  -1.0  -9.8  0.5 -3.6   1.0  -1.5  -1.6  -5.1  -4.6 -13.3 -10.7   
4     0.0  -5.6  -4.1  1.5  3.0  -1.0   2.6   6.7  12.3   6.6  -0.5   1.0   
5     6.2   0.5   2.6  1.1  1.6   0.5   2.0   0.0  -0.5   0.5   7.7   5.6   
6    -1.6   5.1   6.1 -1.1 -2.1   0.0  -1.5  -9.2 -13.9  -7.1   1.5  -0.5   
7    -6.1  -4.1  -1.5 -1.5 -0.5  -0.5  -0.5  -2.6   2.6  -2.6  -6.6  -3.1   
8    -0.5  -4.1  -6.7  1.5  0.0   0.5   1.0   8.2   8.7   4.1  -3.1   0.6   
9     5.1   7.2   4.6  0.6  1.0   0.0   0.5  -0.5  -7.7  -0.5   5.6   0.5"""
df = pd.read_csv(io.StringIO(t), delim_whitespace=True, header = None, index_col=[0])
df.reset_index(inplace=True, drop=True)
df

Out[208]:
     1    2    3    4    5    6    7    8     9     10    11    12
0  13.4  5.2  7.7 -2.1  1.6 -4.1 -0.5  8.2  15.9  12.9  11.8   9.3
1  -3.1 -0.6 -5.1 -0.5 -4.1  0.5 -3.6 -5.6  -9.7  -3.6  -4.7  -5.7
2 -10.3 -1.0 -9.8  0.5 -3.6  1.0 -1.5 -1.6  -5.1  -4.6 -13.3 -10.7
3   0.0 -5.6 -4.1  1.5  3.0 -1.0  2.6  6.7  12.3   6.6  -0.5   1.0
4   6.2  0.5  2.6  1.1  1.6  0.5  2.0  0.0  -0.5   0.5   7.7   5.6
5  -1.6  5.1  6.1 -1.1 -2.1  0.0 -1.5 -9.2 -13.9  -7.1   1.5  -0.5
6  -6.1 -4.1 -1.5 -1.5 -0.5 -0.5 -0.5 -2.6   2.6  -2.6  -6.6  -3.1
7  -0.5 -4.1 -6.7  1.5  0.0  0.5  1.0  8.2   8.7   4.1  -3.1   0.6
8   5.1  7.2  4.6  0.6  1.0  0.0  0.5 -0.5  -7.7  -0.5   5.6   0.5

现在使用嵌套np.where来屏蔽df,使用gtlt分别设置1-1 0 ;如果两个条件都不满足,则相等:

In [213]:
df1 = pd.DataFrame(np.arange(12)).astype(float)
df = pd.DataFrame(np.where(df.gt(df1.squeeze(), axis=0), 1, np.where(df.lt(df1.squeeze(), axis=0), -1, 0)))
df

Out[213]:
    0   1   2   3   4   5   6   7   8   9   10  11
0    1   1   1  -1   1  -1  -1   1   1   1   1   1
1   -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1
2   -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1
3   -1  -1  -1  -1   0  -1  -1   1   1   1  -1  -1
4    1  -1  -1  -1  -1  -1  -1  -1  -1  -1   1   1
5   -1   1   1  -1  -1  -1  -1  -1  -1  -1  -1  -1
6   -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1
7   -1  -1  -1  -1  -1  -1  -1   1   1  -1  -1  -1
8   -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1
9    0   0   0   0   0   0   0   0   0   0   0   0
10   0   0   0   0   0   0   0   0   0   0   0   0
11   0   0   0   0   0   0   0   0   0   0   0   0

只要索引和列标签匹配

,上述内容就可以正常工作

答案 1 :(得分:2)

设置

import pandas as pd
import numpy as np

np.random.seed([3,1415])

df1 = pd.DataFrame(np.random.choice(range(10), (20, 10)))
df2 = pd.Series(np.random.choice(range(10), (10,)))

尝试:

1 * (df1 > df2) - (df1 < df2)

    0  1  2  3  4  5  6  7  8  9
0  -1  1  1 -1 -1  0 -1 -1  1 -1
1  -1  1 -1 -1  0  0 -1 -1 -1 -1
2  -1  1  1 -1 -1 -1 -1 -1 -1  1
3   0  1  1 -1 -1 -1 -1 -1  0 -1
4  -1  1  1 -1 -1 -1  1 -1 -1 -1
5  -1  1  1 -1 -1 -1  1 -1 -1 -1
6  -1  1  1 -1  0  1 -1 -1 -1 -1
7  -1  1  1  0 -1 -1  1 -1 -1 -1
8  -1  1  1 -1 -1 -1 -1 -1 -1  1
9  -1  1  1 -1 -1 -1  0 -1  1  1
10 -1  1  1 -1 -1 -1 -1 -1 -1  1
11 -1  1  1 -1 -1 -1  1 -1  1  0
12 -1  1  0 -1 -1 -1 -1  0  1 -1
13  1  1  1 -1 -1  1 -1 -1 -1 -1
14 -1  1 -1 -1 -1  0  1 -1  1 -1
15 -1  1  1 -1 -1  1  0 -1 -1 -1
16 -1  0  1  1 -1 -1  1 -1  1  0
17 -1  1  1 -1 -1  1 -1 -1 -1  1
18 -1  1  1 -1 -1 -1 -1 -1  1  1
19 -1  1  1 -1 -1 -1 -1  0 -1 -1