`xarray.Dataset.groupby()`的可能错误?

时间:2016-08-08 14:00:32

标签: python python-xarray

我在Mac OS X El Capitan 10.11.6上使用Xarray版本0.8.0,Python 3.5.1。

以下代码按预期工作。

id_data_array = xarray.DataArray([280, 306, 280], coords={"index": range(3)})
random = numpy.random.rand(3)
score_data_array = xarray.DataArray(random, coords={"index": range(3)})
score_dataset = xarray.Dataset({"id": id_data_array, "score": score_data_array})
print(score_dataset)
print("======")
print(score_dataset.groupby("id").count())

输出:

<xarray.Dataset>
Dimensions:  (index: 3)
Coordinates:
  * index    (index) int64 0 1 2
Data variables:
    id       (index) int64 280 306 280
    score    (index) float64 0.8358 0.7536 0.9495
======
<xarray.Dataset>
Dimensions:  (id: 2)
Coordinates:
  * id       (id) int64 280 306
Data variables:
    score    (id) int64 2 1
In [ ]:

但是,如果我只改变一件小事,为了使id_data_array的元素全部不同,则会出现错误。

代码:

id_data_array = xarray.DataArray([280, 306, 120], coords={"index": range(3)})
random = numpy.random.rand(3)
score_data_array = xarray.DataArray(random, coords={"index": range(3)})
score_dataset = xarray.Dataset({"id": id_data_array, "score": score_data_array})
print(score_dataset)
print("======")
print(score_dataset.groupby("id").count())

输出:

<xarray.Dataset>
Dimensions:  (index: 3)
Coordinates:
  * index    (index) int64 0 1 2
Data variables:
    id       (index) int64 280 306 120
    score    (index) float64 0.1353 0.0437 0.1687
======
---------------------------------------------------------------------------
InvalidIndexError                         Traceback (most recent call last)
<ipython-input-92-cc412270ba2e> in <module>()
      5 print(score_dataset)
      6 print("======")
----> 7 print(score_dataset.groupby("id").count())

/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/xarray/core/common.py in wrapped_func(self, dim, keep_attrs, **kwargs)
     44                 return self.reduce(func, dim, keep_attrs,
     45                                    numeric_only=numeric_only, allow_lazy=True,
---> 46                                    **kwargs)
     47         return wrapped_func
     48 

/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/xarray/core/groupby.py in reduce(self, func, dim, keep_attrs, **kwargs)
    605         def reduce_dataset(ds):
    606             return ds.reduce(func, dim, keep_attrs, **kwargs)
--> 607         return self.apply(reduce_dataset)
    608 
    609     def assign(self, **kwargs):

/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/xarray/core/groupby.py in apply(self, func, **kwargs)
    562         kwargs.pop('shortcut', None)  # ignore shortcut if set (for now)
    563         applied = (func(ds, **kwargs) for ds in self._iter_grouped())
--> 564         combined = self._concat(applied)
    565         result = self._maybe_restore_empty_groups(combined)
    566         return result

/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/xarray/core/groupby.py in _concat(self, applied)
    570         concat_dim, positions = self._infer_concat_args(applied_example)
    571 
--> 572         combined = concat(applied, concat_dim)
    573         reordered = _maybe_reorder(combined, concat_dim, positions)
    574         return reordered

/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/xarray/core/combine.py in concat(objs, dim, data_vars, coords, compat, positions, indexers, mode, concat_over)
    114         raise TypeError('can only concatenate xarray Dataset and DataArray '
    115                         'objects, got %s' % type(first_obj))
--> 116     return f(objs, dim, data_vars, coords, compat, positions)
    117 
    118 

/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/xarray/core/combine.py in _dataset_concat(datasets, dim, data_vars, coords, compat, positions)
    276     if coord is not None:
    277         # add concat dimension last to ensure that its in the final Dataset
--> 278         result[coord.name] = coord
    279 
    280     return result

/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/xarray/core/dataset.py in __setitem__(self, key, value)
    536             raise NotImplementedError('cannot yet use a dictionary as a key '
    537                                       'to set Dataset values')
--> 538         self.update({key: value})
    539 
    540     def __delitem__(self, key):

/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/xarray/core/dataset.py in update(self, other, inplace)
   1434             dataset.
   1435         """
-> 1436         variables, coord_names, dims = dataset_update_method(self, other)
   1437 
   1438         return self._replace_vars_and_dims(variables, coord_names, dims,

/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/xarray/core/merge.py in dataset_update_method(dataset, other)
    490     priority_arg = 1
    491     indexes = dataset.indexes
--> 492     return merge_core(objs, priority_arg=priority_arg, indexes=indexes)

/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/xarray/core/merge.py in merge_core(objs, compat, join, priority_arg, explicit_coords, indexes)
    371 
    372     coerced = coerce_pandas_values(objs)
--> 373     aligned = deep_align(coerced, join=join, copy=False, indexes=indexes)
    374     expanded = expand_variable_dicts(aligned)
    375 

/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/xarray/core/alignment.py in deep_align(list_of_variable_maps, join, copy, indexes)
    146             out.append(variables)
    147 
--> 148     aligned = partial_align(*targets, join=join, copy=copy, indexes=indexes)
    149 
    150     for key, aligned_obj in zip(keys, aligned):

/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/xarray/core/alignment.py in partial_align(*objects, **kwargs)
    109         valid_indexers = dict((k, v) for k, v in joined_indexes.items()
    110                               if k in obj.dims)
--> 111         result.append(obj.reindex(copy=copy, **valid_indexers))
    112     return tuple(result)
    113 

/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/xarray/core/dataset.py in reindex(self, indexers, method, tolerance, copy, **kw_indexers)
   1216 
   1217         variables = alignment.reindex_variables(
-> 1218             self.variables, self.indexes, indexers, method, tolerance, copy=copy)
   1219         return self._replace_vars_and_dims(variables)
   1220 

/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/xarray/core/alignment.py in reindex_variables(variables, indexes, indexers, method, tolerance, copy)
    218             target = utils.safe_cast_to_index(indexers[name])
    219             indexer = index.get_indexer(target, method=method,
--> 220                                         **get_indexer_kwargs)
    221 
    222             to_shape[name] = len(target)

/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/pandas/indexes/base.py in get_indexer(self, target, method, limit, tolerance)
   2080 
   2081         if not self.is_unique:
-> 2082             raise InvalidIndexError('Reindexing only valid with uniquely'
   2083                                     ' valued Index objects')
   2084 

InvalidIndexError: Reindexing only valid with uniquely valued Index objects

对我来说,这似乎是错误的,因为如果这是理想的行为,那将是非常奇怪的。当然,我们应该在我们分组的DataArray的所有元素都不同的情况下包含这种情况吗?

更新

我现在已经卸载并重新安装了Xarray。新的Xarray版本为0.8.1,似乎工作正常。所以它可能确实是Xarray 0.8.0中的一个错误。

0 个答案:

没有答案