根据pandas中另一列的状态创建一个新列

时间:2017-07-23 21:51:37

标签: python pandas

假设我想创建一个新列,该列计算自状态为1以来的天数。例如,当前列将是下面的前三列。第四栏是我想要的。

from random import random

import numpy as np
from numpy.random import choice

import matplotlib.pylab as plt
import matplotlib.colors as mcolors
from matplotlib import cm

import matplotlib.animation as animation



def hazard(p):
    r=random()
    assert p>=0 and p<=1
    return r <= p


def chg(case):
    if case > 1.:
        return 1.
    else:
        return case


def spreadfire(forest):    

    n,m=forest.shape
    c = np.copy(forest)

    L = xrange(3)

    for i in xrange(n):
        for j in xrange(m):

            if c[i,j] == 2.:

                sautX, sautY = choice([0,1,40],p=[0.4999,0.4999,0.0002]), choice([0,1,40],p=[0.4999,0.4999,0.0002])
                Y, X = xrange(max(0,i-1-sautY),min(n,i+2+sautY),sautY+1), xrange(max(0,j-1-sautX),min(m,j+2+sautX),sautX+1)    

                for y1,y2 in zip(Y,L):
                    for x1,x2 in zip(X,L):

                        if hazard(chg(c[y1,x1])):
                            forest[y1,x1] = 2.

    return forest


def forestfire(forest):

    fig, ax = plt.subplots()

    movie, hashes = [], []

    # Colormap
    greens = cm.Greens(np.linspace(0,1, num=50))
    greensfill = cm.Greens(np.ones(25))
    red = [(1,0,0,1)]*len(greens)
    gray = [(.5,.5,.5,1)]*len(greens)

    colors = np.vstack((greens, greensfill, red, gray))
    mycmap = mcolors.LinearSegmentedColormap.from_list('my_colormap', colors)

    # Initialization
    k = 0
    firefront = 5


    forest = spreadfire(forest)

    c = np.copy(forest)
    c[np.where(c==2.)] = 3.
    hashes.append(c)

    im = plt.imshow(forest, animated=True, cmap = mycmap, interpolation="none", origin='lower', vmin=0, vmax=3.5)
    movie.append([im])

    # Fire propagation
    while np.count_nonzero(forest == 2.) != 0:
        k += 1
        print k

        if k < firefront:

            forest = spreadfire(forest)

            c = np.copy(forest)
            c[np.where(c==2.)] = 3.
            hashes.append(c)

            im = plt.imshow(forest, animated=True, cmap = mycmap, interpolation="none", origin='lower', vmin=0, vmax=3.5)
            movie.append([im])

        else:        
            forest = spreadfire(forest)

            c = np.copy(forest)
            c[np.where(c==2.)] = 3.
            hashes.append(c)

            forest[np.where(hashes[0]==3.)] = 3.
            im = plt.imshow(forest, animated=True, cmap = mycmap, interpolation="none", origin='lower', vmin=0, vmax=3.5)
            movie.append([im])
            hashes.remove(hashes[0])

    return animation.ArtistAnimation(fig, movie, blit=True, repeat_delay=100)

不采用for循环,大熊猫的方法是什么?

2 个答案:

答案 0 :(得分:3)

您还可以尝试首先按State分组,然后按State == 1分组,按差异填写。然后,对于那些State == 0na可以填充相应的Days列值的人

df.loc[df.State == 1, 'Since_Days'] = df.groupby('State')['Days'].diff().fillna(0)
df['Since_Days'].fillna(df['Days'],inplace=True)
print(df)

结果:

   Index  State  Days  Since_Days
0      1      1     0         0.0
1      2      0    20        20.0
2      3      0    40        40.0
3      4      1    55        55.0
4      5      1    60         5.0
5      6      1    70        10.0

答案 1 :(得分:2)

要减去的值可以用:

形成
ser = df['Days'].where(df['State']==1, np.nan).ffill().shift()

如果从原始“天数”列中减去此值,则您将拥有:

df['Days'].sub(ser, fill_value=0).astype('int')
Out: 
0     0
1    20
2    40
3    55
4     5
5    10
Name: Days, dtype: int64