我发布的内容更简单,因为我觉得它很容易理解,但是提到你的评论,我错了,所以我编辑了这个问题:
所以这是代码。我想在没有循环的情况下这样做,是否应该在熊猫中完成?
import pandas as pd
myval = [0.0,1.1, 2.2, 3.3, 4.4, 5.5,6.6, 7.7, 8.8,9.9]
s1 = [0,0,1,1,0,0,1,1,0,1]
s2 = [0,0,1,0,1,0,1,0,1,1]
posin = [10,0,0,0,0,0,0,0,0,0]
posout = [0,0,0,0,0,0,0,0,0,0]
sig = ['-']
d = {'myval' : myval, 's1' : s1, 's2' : s2}
d = pd.DataFrame(d)
'''
normaly the dataframe should be with the 6 col,
but I can't make the part below working in the df.(THAT is the problem !!)
The real df is 5000+ row, and this should be done for 100+ sets of values,
so this way is not eligible. Too slow.
'''
for i in xrange(1,len(myval)) :
if (s1[i]== 1) & (s2[i] == 1) & (posin[i-1] != 0 ) :
posin[i]= 0
posout[i]= posin[i-1] / myval[i]
sig.append( 'a')
elif (s1[i] == 0) & (s2[i] == 1) & (posin[i-1] == 0) :
posin[i]= posout[i-1] * myval[i]
posout[i] = 0
sig.append( 'v')
else :
posin[i] = posin[i-1]
posout[i] = posout[i-1]
sig.append('-')
d2 = pd.DataFrame({'posin' : posin , 'posout' : posout , 'sig' : sig })
d = d.join(d2)
#the result wanted :
print d
myval s1 s2 posin posout sig
0 0.0 0 0 10.000000 0.000000 -
1 1.1 0 0 10.000000 0.000000 -
2 2.2 1 1 0.000000 4.545455 a
3 3.3 1 0 0.000000 4.545455 -
4 4.4 0 1 20.000000 0.000000 v
5 5.5 0 0 20.000000 0.000000 -
6 6.6 1 1 0.000000 3.030303 a
7 7.7 1 0 0.000000 3.030303 -
8 8.8 0 1 26.666667 0.000000 v
9 9.9 1 1 0.000000 2.693603 a
任何帮助?
谢谢!!
答案 0 :(得分:0)
我希望以下内容可能会起作用(如评论中所述),但是(令人惊讶的是?)这种np.where的使用引发了ValueError: shape mismatch: objects cannot be broadcast to a single shape
(使用1D从2D中进行选择):
np.where(df.s1 & df.s2,
pd.DataFrame({"bin": 0, "bout": df.bin.diff() / df.myval}),
np.where(df.s1,
pd.DataFrame({"bin": df.bout.diff() * df.myval, "bout": 0}),
pd.DataFrame({"bin": df.bin.diff(), "bout": df.bout.diff()})))
作为使用where的替代方法,我将分阶段构建它:
res = pd.DataFrame({"bin": 0, "bout": df.bin.diff() / df.myval})
res.update(pd.DataFrame({"bin": df.bout.diff() * df.myval,
"bout": 0}).loc[(df.s1 == 1) & (df.s2 == 0)])
res.update(pd.DataFrame({"bin": df.bin.diff(),
"bout": df.bout.diff()}).loc[(df.s1 == 0) & (df.s2 == 0)])
然后你可以将它分配给df中的两列:
df[["bin", "bout"]] = res
答案 1 :(得分:0)
代码指的是Andy Hayden的回答:
import pandas as pd
myval = [0.0,1.1, 2.2, 3.3, 4.4, 5.5,6.6, 7.7, 8.8,9.9]
s1 = [0,0,1,1,0,0,1,1,0,1]
s2 = [0,0,1,0,1,0,1,0,1,1]
posin = [10,0,0,0,0,0,0,0,0,0]
posout = [0,0,0,0,0,0,0,0,0,0]
sig = ['-']
d = {'myval' : myval, 's1' : s1, 's2' : s2,'posin' : posin , 'posout' : posout }
d = pd.DataFrame(d)
res = pd.DataFrame({"posin": 10, 'sig' : '-', "posout": d.posin.diff() / d.myval})
res.update(pd.DataFrame({"posin": 0,
'sig' : 'a',
"posout":d.posin.diff() / d.myval }
).loc[(d.s1 == 1) & (d.s2 == 1) & (d.posin.diff() != 0) ])
res.update(pd.DataFrame({"posin": d.posout.diff() * d.myval,
'sig' : 'v',
"posout": 0}
).loc[(d.s1 == 0) & (d.s2 == 1) & (d.posin.diff()) == 0])
d[["posin", "posout", 'sig']] = res
print d
myval posin posout s1 s2 sig
0 0.0 10 0 0 0 v
1 1.1 0 0 0 0 v
2 2.2 0 0 1 1 v
3 3.3 0 0 1 0 v
4 4.4 0 0 0 1 v
5 5.5 0 0 0 0 v
6 6.6 0 0 1 1 v
7 7.7 0 0 1 0 v
8 8.8 0 0 0 1 v
9 9.9 0 0 1 1 v