我有一个65点的散点图。我想将这些点归为36个矩形列表。这对应于将绘制图形的空间区域划分为36个区域。有没有一种“ pythonic”的方法来执行此操作而不创建36个条件语句?
例如,为简化起见,下面显示的x和y坐标包含20个点。有没有简单的方法可以将它们分成10个大小相等的区域?
x = [484248.77, 481335.51, 473814.14, 488522.14, 481703.17, 479105.54, 480700.85, 482816.02, 484579.26, 483984.83, 483278.12, 473877.12, 484711.57, 481574.8, 484374.02, 483920.51, 484318.97, 482229.34, 481458.91, 487751.09]
y = [7421919.17, 7417638.85, 7426640.34, 7420657.74, 7423742.49, 7422636.23, 7422958.38, 7422550.7, 7421886.44, 7421707.53, 7415756.43, 7424344.33, 7422787.38, 7418556.75, 7420368.91, 7421946.9, 7419293.06, 7424612.41, 7427565.78, 7405473.74]
答案 0 :(得分:2)
您可以使用np.digitize
和this Q&A。您必须自己决定如何分配垃圾箱,即10x1或5x2或2x5或1x10。
import numpy as np
from scipy import sparse
def sort_to_bins_sparse(idx, data, mx=-1):
if mx==-1:
mx = idx.max() + 1
aux = sparse.csr_matrix((data, idx, np.arange(len(idx)+1)), (len(idx), mx)).tocsc()
return np.split(aux.data, aux.indptr[1:]), \
np.split(aux.indices, aux.indptr[1:])
def bin(data, bincounts):
data = np.asanyarray(data)
idx = [np.digitize(d, np.linspace(d.min(), d.max(), b, endpoint=False))-1
for d, b in zip(data, bincounts)]
flat = np.ravel_multi_index(idx, bincounts)
_, idx = sort_to_bins_sparse(flat, data[0])
return [data[:,i] for i in idx]
x = [484248.77, 481335.51, 473814.14, 488522.14, 481703.17, 479105.54, 480700.85, 482816.02, 484579.26, 483984.83, 483278.12, 473877.12, 484711.57, 481574.8, 484374.02, 483920.51, 484318.97, 482229.34, 481458.91, 487751.09]
y = [7421919.17, 7417638.85, 7426640.34, 7420657.74, 7423742.49, 7422636.23, 7422958.38, 7422550.7, 7421886.44, 7421707.53, 7415756.43, 7424344.33, 7422787.38, 7418556.75, 7420368.91, 7421946.9, 7419293.06, 7424612.41, 7427565.78, 7405473.74]
print(bin((x,y),(5,2)))
输出:
[array([], shape=(2, 0), dtype=float64), array([[ 473814.14, 473877.12],
[7426640.34, 7424344.33]]), array([], shape=(2, 0), dtype=float64), array([[ 479105.54],
[7422636.23]]), array([], shape=(2, 0), dtype=float64), array([[ 481335.51, 481703.17, 480700.85, 481574.8 , 482229.34,
481458.91],
[7417638.85, 7423742.49, 7422958.38, 7418556.75, 7424612.41,
7427565.78]]), array([[ 483278.12],
[7415756.43]]), array([[ 484248.77, 482816.02, 484579.26, 483984.83, 484711.57,
484374.02, 483920.51, 484318.97],
[7421919.17, 7422550.7 , 7421886.44, 7421707.53, 7422787.38,
7420368.91, 7421946.9 , 7419293.06]]), array([[ 487751.09],
[7405473.74]]), array([[ 488522.14],
[7420657.74]]), array([], shape=(2, 0), dtype=float64)]