我有以下代码并且它给了我非常奇怪的错误,我的目标是用不同的标签来回填数据的缺失值。错误发生在此行df_filled[is_filled]
如果我更改df_filled=df.asfreq(freq='D').fillna(method='bfill', limit=1).dropna(how='all').drop_duplicates(keep='last')
一切正常,但使用freq = 2D,df_filled [is_filled]没有布尔形式。
from datetime import datetime, timedelta
import pandas as pd
import numpy as np
import random
##Generate the Data
np.random.seed(11)
date_today = datetime.now()
ndays = 15
df = pd.DataFrame({'date': [date_today + timedelta(days=(abs(np.random.randn(1))*2)[0]*x) for x in range(ndays)],
'test': pd.Series(np.random.randn(ndays)), 'test2':pd.Series(np.random.randn(ndays))})
df1=pd.DataFrame({'date': [date_today + timedelta(hours=x) for x in range(ndays)],
'test': pd.Series(np.random.randn(ndays)), 'test2':pd.Series(np.random.randn(ndays))})
df2=pd.DataFrame({'date': [date_today + timedelta(days=x)-timedelta(seconds=100*x) for x in range(ndays)],
'test': pd.Series(np.random.randn(ndays)), 'test2':pd.Series(np.random.randn(ndays))})
df=df.append(df1)
df=df.append(df2)
df = df.set_index('date').sort_index()
df = df.mask(np.random.random(df.shape) < .7)
df=df.reset_index()
df['test']=df['test'].astype(str)
df['test2']=df['test2'].astype(str)
df.replace('nan', np.nan, inplace = True)
##
df.set_index(df['date'].dt.date, inplace = True)
df = df[~df.index.duplicated(keep='first')]
df_filled=df.asfreq(freq='2D').fillna(method='bfill', limit=2).dropna(how='all').drop_duplicates(keep='last')
df_filled.set_index(df_filled['date'],inplace=True)
df_filled=df_filled.drop('date',1)
df.set_index(df['date'],inplace=True)
df=df.drop('date',1)
is_filled = (df.isnull() & df_filled.notnull()) | df.notnull()
df_filled[is_filled] ## error happens here
df_filled[is_filled]=df_filled[is_filled].applymap(lambda x: '_2D' if pd.notnull(x) else np.nan)
输出:
ValueError: Must pass DataFrame with boolean values only
我提前感谢您的帮助。
答案 0 :(得分:2)
如果您print(is_filled = (df.isnull() & df_filled.notnull()) | df.notnull())
,那么您会发现True
和NaN
混合在一起。因此,解决方案是将NaN
值替换为False
:
代码的底部代码:
df=df.drop('date',1)
is_filled = (df.isnull() & df_filled.notnull()) | df.notnull()
is_filled = is_filled.fillna(False) # Fix here
df_filled[is_filled]=df_filled[is_filled].applymap(lambda x: '_2D' if pd.notnull(x) else np.nan)