尝试使用函数和apply()填充数据帧缺失值

时间:2017-07-02 18:58:31

标签: python pandas

我正在尝试使用基于Counrty的不同值填充数据集的缺失值

def avgtemp(cols):
    AverageTemperature = cols[0]
    Country = cols[1]

    if state.isnull(AverageTemperature):

        if Country == "Brazil":
            return 23.770601

        elif Country == "Russia":
            return 1.808555

        elif Country == "United State":
            return 10.701555

        elif Country == "Canada":
            return -1.321079

        elif Country == "India":
            return 22.534576

        elif Country == "China":
            return 11.206830

        elif Country == "Australia":
            return 17.664222

        else:
            return 8.9931

    else:
        return AverageTemperature
state['AverageTemperature'] = state[['AverageTemperature','Country']].apply(avgtemp,axis=1)

但我一直收到这个错误

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
 in ()
----> 1 state['AverageTemperature'] = state[['AverageTemperature','Country']].apply(avgtemp,axis=1)

C:\Anaconda3\lib\site-packages\pandas\core\frame.py in apply(self, func, axis, broadcast, raw, reduce, args, **kwds)
   4059                     if reduce is None:
   4060                         reduce = True
-> 4061                     return self._apply_standard(f, axis, reduce=reduce)
   4062             else:
   4063                 return self._apply_broadcast(f, axis)

C:\Anaconda3\lib\site-packages\pandas\core\frame.py in _apply_standard(self, func, axis, ignore_failures, reduce)
   4155             try:
   4156                 for i, v in enumerate(series_gen):
-> 4157                     results[i] = func(v)
   4158                     keys.append(v.name)
   4159             except Exception as e:

 in avgtemp(cols)
      3     Country = cols[1]
      4 
----> 5     if state.isnull(AverageTemperature):
      6 
      7         if Country == "Brazil":

TypeError: ('isnull() takes 1 positional argument but 2 were given', 'occurred at index 0')

1 个答案:

答案 0 :(得分:1)

您可以使用locisnull查找值,并使用map替换:

state = pd.DataFrame({'Country':['Brazil','Russia','Slovakia','Russia'],
                   'AverageTemperature':[np.nan, np.nan, np.nan, 23]})
print (state)
   AverageTemperature   Country
0                 NaN    Brazil
1                 NaN    Russia
2                 NaN  Slovakia
3                23.0    Russia

#add all values to dict
d = {"Brazil":23.770601, "Russia":1.808555, ...}

mask = state['AverageTemperature'].isnull()
state.loc[mask, 'AverageTemperature'] = state.loc[mask, 'Country'].map(d)
#all another NaNs replace by default value 
state['AverageTemperature'] = state['AverageTemperature'].fillna(8.9931)
print (state)
   AverageTemperature   Country
0           23.770601    Brazil
1            1.808555    Russia
2            8.993100  Slovakia
3           23.000000    Russia