我有一个数据框,如下所示。 period-1(1998-01-01:1998-02-01) period 2(1998-02-02-1998-05-02)
有两个时间段。我想将其转换为另一个数据帧,如图2所示。第二个数据帧的每个条目都是相应时间段内ID的平均值。
根据一位朋友的建议,这里是代码
import pandas as pd
df=pd.DataFrame({"ID": ["1002", "2001", "1002", "2003", "1002"],
"Time": ["1998-01-02", "1998-02-03", "1998-04-05", "1998-02-03", "1998-01-20"],
"Price": ["34", "45", "23", "30","60"]})
print(df)
df.Time=pd.to_datetime(df.Time)
period2=pd.Interval(pd.Timestamp('1998-02-02'), pd.Timestamp('1998-05-02'), closed='both')
df['Price'].apply(pd.to_numeric)
df['New']='period1'
df.loc[df.Time.apply(lambda x : x in period2),'New']='period2'
df.pivot_table(index='ID',columns='New',values='Price',aggfunc='mean')
但是我收到以下错误消息,这可能是什么原因?
---------------------------------------------------------------------------
DataError Traceback (most recent call last)
<ipython-input-62-1d40b807c09c> in <module>()
----> 1 df.pivot_table(index='ID',columns='New',values='Price',aggfunc='mean')
~\Anaconda3\lib\site-packages\pandas\core\frame.py in pivot_table(self, values, index, columns, aggfunc, fill_value, margins, dropna, margins_name)
5298 aggfunc=aggfunc, fill_value=fill_value,
5299 margins=margins, dropna=dropna,
-> 5300 margins_name=margins_name)
5301
5302 def stack(self, level=-1, dropna=True):
~\Anaconda3\lib\site-packages\pandas\core\reshape\pivot.py in pivot_table(data, values, index, columns, aggfunc, fill_value, margins, dropna, margins_name)
81
82 grouped = data.groupby(keys, observed=dropna)
---> 83 agged = grouped.agg(aggfunc)
84
85 table = agged
~\Anaconda3\lib\site-packages\pandas\core\groupby\groupby.py in aggregate(self, arg, *args, **kwargs)
4656 axis=''))
4657 def aggregate(self, arg, *args, **kwargs):
-> 4658 return super(DataFrameGroupBy, self).aggregate(arg, *args, **kwargs)
4659
4660 agg = aggregate
~\Anaconda3\lib\site-packages\pandas\core\groupby\groupby.py in aggregate(self, arg, *args, **kwargs)
4087
4088 _level = kwargs.pop('_level', None)
-> 4089 result, how = self._aggregate(arg, _level=_level, *args, **kwargs)
4090 if how is None:
4091 return result
~\Anaconda3\lib\site-packages\pandas\core\base.py in _aggregate(self, arg, *args, **kwargs)
346 if isinstance(arg, compat.string_types):
347 return self._try_aggregate_string_function(arg, *args,
--> 348 **kwargs), None
349
350 if isinstance(arg, dict):
~\Anaconda3\lib\site-packages\pandas\core\base.py in _try_aggregate_string_function(self, arg, *args, **kwargs)
302 if f is not None:
303 if callable(f):
--> 304 return f(*args, **kwargs)
305
306 # people may try to aggregate on a non-callable attribute
~\Anaconda3\lib\site-packages\pandas\core\groupby\groupby.py in mean(self, *args, **kwargs)
1304 nv.validate_groupby_func('mean', args, kwargs, ['numeric_only'])
1305 try:
-> 1306 return self._cython_agg_general('mean', **kwargs)
1307 except GroupByError:
1308 raise
~\Anaconda3\lib\site-packages\pandas\core\groupby\groupby.py in _cython_agg_general(self, how, alt, numeric_only, min_count)
3972 min_count=-1):
3973 new_items, new_blocks = self._cython_agg_blocks(
-> 3974 how, alt=alt, numeric_only=numeric_only, min_count=min_count)
3975 return self._wrap_agged_blocks(new_items, new_blocks)
3976
~\Anaconda3\lib\site-packages\pandas\core\groupby\groupby.py in _cython_agg_blocks(self, how, alt, numeric_only, min_count)
4044
4045 if len(new_blocks) == 0:
-> 4046 raise DataError('No numeric types to aggregate')
4047
4048 # reset the locs in the blocks to correspond to our
DataError: No numeric types to aggregate
答案 0 :(得分:0)
有一个您忘记分配回去的问题:
df['Price'] = df['Price'].apply(pd.to_numeric)
更好的是:
df['Price'] = df['Price'].astype(int)
或者:
df['Price'] = pd.to_numeric(df['Price'])
此外,应使用numpy.where
创建的带有掩码Series.between
的代码来简化代码:
df.Time=pd.to_datetime(df.Time)
df['Price']= df['Price'].astype(int)
df['New'] = np.where(df['Time'].between('1998-02-02','1998-05-02'), 'period2', 'period1')
df1 = df.pivot_table(index='ID',columns='New',values='Price',aggfunc='mean')
print (df1)
New period1 period2
ID
1002 47.0 23.0
2001 NaN 45.0
2003 NaN 30.0