def lag1(x):
return x[(len(x)-1)]
x=pd.Series([12,3,4,5,6])
lag1(x)
Out[65]: 6
dat.shape
Out[70]: (247619, 33)
d2=dat.groupby('PATID_CD').agg(lag1)
Traceback (most recent call last):
File "<ipython-input-71-f514757a3da8>", line 1, in <module>
d2=dat.groupby('PATID_CD').agg(lag1)
File "D:\Users\shan xu\Anaconda3\lib\site-packages\pandas\core\groupby\groupby.py", line 4658, in aggregate
return super(DataFrameGroupBy, self).aggregate(arg, *args, **kwargs)
File "D:\Users\shan xu\Anaconda3\lib\site-packages\pandas\core\groupby\groupby.py", line 4109, in aggregate
result = self._aggregate_generic(arg, *args, **kwargs)
File "D:\Users\shan xu\Anaconda3\lib\site-packages\pandas\core\groupby\groupby.py", line 4133, in _aggregate_generic
return self._aggregate_item_by_item(func, *args, **kwargs)
File "D:\Users\shan xu\Anaconda3\lib\site-packages\pandas\core\groupby\groupby.py", line 4162, in _aggregate_item_by_item
colg.aggregate(func, *args, **kwargs), data)
File "D:\Users\shan xu\Anaconda3\lib\site-packages\pandas\core\groupby\groupby.py", line 3497, in aggregate
result = self._aggregate_named(func_or_funcs, *args, **kwargs)
File "D:\Users\shan xu\Anaconda3\lib\site-packages\pandas\core\groupby\groupby.py", line 3627, in _aggregate_named
output = func(group, *args, **kwargs)
File "<ipython-input-64-be977293b7b9>", line 2, in lag1
return x[(len(x)-1)]
File "D:\Users\shan xu\Anaconda3\lib\site-packages\pandas\core\series.py", line 766, in __getitem__
result = self.index.get_value(self, key)
File "D:\Users\shan xu\Anaconda3\lib\site-packages\pandas\core\indexes\base.py", line 3103, in get_value
tz=getattr(series.dtype, 'tz', None))
File "pandas\_libs\index.pyx", line 106, in pandas._libs.index.IndexEngine.get_value
File "pandas\_libs\index.pyx", line 114, in pandas._libs.index.IndexEngine.get_value
File "pandas\_libs\index.pyx", line 162, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\hashtable_class_helper.pxi", line 958, in pandas._libs.hashtable.Int64HashTable.get_item
File "pandas\_libs\hashtable_class_helper.pxi", line 964, in pandas._libs.hashtable.Int64HashTable.get_item
KeyError: 23
我不知道为什么我的函数不能正常工作,它给了我一个密钥错误,提示该名称不存在。这有点令人困惑。我是在做正确的方法,还是有其他解决方案?
dat.groupby('PATID_CD').agg('mean')
Out[73]:
MONTH_LOOKBACK_NR CCYYMM_CD ... ENG_SPOKEN EVENT_FL
PATID_CD ...
584 12.0 201556.500000 ... 1.0 0.0
4277 12.0 201556.500000 ... 1.0 0.0
我也尝试过:
dat.groupby('PATID_CD').agg(lambda x : x.iloc[-1,:])
这是一个好方法,但是我不能将此函数放入可以与其他函数进行计算的列表中:
def lag1(x):
return x.iloc[-1,:]
d2=dat.groupby(dat['PATID_CD']).agg({'mean','max','min','std','skew', lambda x:len(x),kurtosis,lag1})
Traceback (most recent call last):
File "<ipython-input-86-ac95a8297b5c>", line 1, in <module>
d2=dat.groupby(dat['PATID_CD']).agg({'mean','max','min','std','skew', lambda x:len(x),kurtosis,lag1})
File "D:\Users\shan xu\Anaconda3\lib\site-packages\pandas\core\groupby\groupby.py", line 4658, in aggregate
return super(DataFrameGroupBy, self).aggregate(arg, *args, **kwargs)
File "D:\Users\shan xu\Anaconda3\lib\site-packages\pandas\core\groupby\groupby.py", line 4089, in aggregate
result, how = self._aggregate(arg, _level=_level, *args, **kwargs)
File "D:\Users\shan xu\Anaconda3\lib\site-packages\pandas\core\base.py", line 551, in _aggregate
_axis=_axis), None
File "D:\Users\shan xu\Anaconda3\lib\site-packages\pandas\core\base.py", line 596, in _aggregate_multiple_funcs
results.append(colg.aggregate(arg))
File "D:\Users\shan xu\Anaconda3\lib\site-packages\pandas\core\groupby\groupby.py", line 3485, in aggregate
(_level or 0) + 1)
File "D:\Users\shan xu\Anaconda3\lib\site-packages\pandas\core\groupby\groupby.py", line 3558, in _aggregate_multiple_funcs
results[name] = obj.aggregate(func)
File "D:\Users\shan xu\Anaconda3\lib\site-packages\pandas\core\groupby\groupby.py", line 3497, in aggregate
result = self._aggregate_named(func_or_funcs, *args, **kwargs)
File "D:\Users\shan xu\Anaconda3\lib\site-packages\pandas\core\groupby\groupby.py", line 3627, in _aggregate_named
output = func(group, *args, **kwargs)
File "<ipython-input-85-6bbffa1ca952>", line 2, in lag1
return x.iloc[-1,:]
File "D:\Users\shan xu\Anaconda3\lib\site-packages\pandas\core\indexing.py", line 1472, in __getitem__
return self._getitem_tuple(key)
File "D:\Users\shan xu\Anaconda3\lib\site-packages\pandas\core\indexing.py", line 2013, in _getitem_tuple
self._has_valid_tuple(tup)
File "D:\Users\shan xu\Anaconda3\lib\site-packages\pandas\core\indexing.py", line 220, in _has_valid_tuple
raise IndexingError('Too many indexers')
IndexingError:索引器过多 与此相同:
x=pd.Series([12,3,4,5,6])
lag1(x)
Traceback (most recent call last):
File "<ipython-input-85-6bbffa1ca952>", line 5, in <module>
lag1(x)
File "<ipython-input-85-6bbffa1ca952>", line 2, in lag1
return x.iloc[-1,:]
File "D:\Users\shan xu\Anaconda3\lib\site-packages\pandas\core\indexing.py", line 1472, in __getitem__
return self._getitem_tuple(key)
File "D:\Users\shan xu\Anaconda3\lib\site-packages\pandas\core\indexing.py", line 2013, in _getitem_tuple
self._has_valid_tuple(tup)
File "D:\Users\shan xu\Anaconda3\lib\site-packages\pandas\core\indexing.py", line 220, in _has_valid_tuple
raise IndexingError('Too many indexers')
IndexingError: Too many indexers
答案 0 :(得分:0)
您还没有解释您要完成的工作,而且代码中也不清楚。我在这里的代表不足,无法将其添加为评论,因此请考虑:
1)您的第一个示例是在隐式使用class Location(models.Model):
""" Model representing a Location (attached to Hashtag objects through a M2M relationship) """
name = models.CharField(max_length=14000)
latitude = models.CharField(max_length=30)
longitude = models.CharField(max_length=30)
country = models.CharField(max_length=50)
tweet_date = models.DateTimeField()
class Hashtag(models.Model):
""" Model representing a specific Hashtag serch by user """
search_text = models.CharField(max_length=140, primary_key=True)
location = models.ManyToManyField(Location, blank=True)
histogram = models.ImageField(upload_to='img', blank=True)
def __str__(self):
return self.search_text
def display_locations(self):
""" Function to create a dict by frequency of the locations associated with search_text """
country_list = list(self.location.values_list('country', flat=True).all())
for country in country_list:
location_freq = {i:country_list.count(i) for i in set(country_list)}
return location_freq
@property
def get_histogram(self):
""" Function to create a histogram of locations associated with search_text """
location_freq = self.display_locations()
plt.bar(list(location_freq.keys()), location_freq.values(), color='g')
file_location = 'mapping_twitter/static/mapping_twitter/images/histogram.png'
plt.show()
plt.savefig(file_location)
f = open(file_location)
self.histogram.save('histogram.png', File(f))
的{{1}}上执行的,因此是<img src="{{ hashtag.histogram.url }}" alt="Histogram" />
。您的第二个示例似乎是一个Series
为索引的DataFrame。如果RangeIndex
是例如x[(5-1)] == 6
,仅仅因为您将错误的参数类型(PATID_CD
)传递给了熊猫索引表达式PATID_CD.dtype
docs而得到object
异常。 / p>
2)如果您只想提取组的最后一行,请通过类似这样的方式编写组
KeyError