在matplotlib中获取带有hexbin的bin坐标

时间:2012-10-18 09:11:17

标签: python matplotlib histogram

我使用matplotlib的方法hexbin来计算我的数据的2d直方图。 但我想得到六边形中心的坐标,以便进一步处理结果。

我在结果上使用get_array()方法获取了值,但我无法弄清楚如何获取bin坐标。

我尝试计算它们给定的箱数和我的数据范围,但我不知道每个方向的确切箱数。 gridsize=(10,2)应该做的伎俩,但它似乎不起作用。

有什么想法吗?

3 个答案:

答案 0 :(得分:18)

我认为这很有效。

from __future__ import division
import numpy as np
import math
import matplotlib.pyplot as plt

def generate_data(n):
    """Make random, correlated x & y arrays"""
    points = np.random.multivariate_normal(mean=(0,0),
        cov=[[0.4,9],[9,10]],size=int(n))
    return points

if __name__ =='__main__':

    color_map = plt.cm.Spectral_r
    n = 1e4
    points = generate_data(n)

    xbnds = np.array([-20.0,20.0])
    ybnds = np.array([-20.0,20.0])
    extent = [xbnds[0],xbnds[1],ybnds[0],ybnds[1]]

    fig=plt.figure(figsize=(10,9))
    ax = fig.add_subplot(111)
    x, y = points.T
    # Set gridsize just to make them visually large
    image = plt.hexbin(x,y,cmap=color_map,gridsize=20,extent=extent,mincnt=1,bins='log')
    # Note that mincnt=1 adds 1 to each count
    counts = image.get_array()
    ncnts = np.count_nonzero(np.power(10,counts))
    verts = image.get_offsets()
    for offc in xrange(verts.shape[0]):
        binx,biny = verts[offc][0],verts[offc][1]
        if counts[offc]:
            plt.plot(binx,biny,'k.',zorder=100)
    ax.set_xlim(xbnds)
    ax.set_ylim(ybnds)
    plt.grid(True)
    cb = plt.colorbar(image,spacing='uniform',extend='max')
    plt.show()

enter image description here

答案 1 :(得分:2)

我很想确认Hooked使用get_offsets()的代码是否有效,但是我尝试了几次上述代码的迭代来检索中心位置,正如Dave所说,get_offsets()仍然是空的。我找到的解决方法是使用非空的' image.get_paths()'选项。我的代码采用均值来寻找中心,但这意味着它只是一个更长的时间,但确实有效。

get_paths()选项返回一组嵌入的x,y坐标,这些坐标可以循环,然后平均以返回每个六芒星的中心位置。

我的代码如下:

counts=image.get_array() #counts in each hexagon, works great
verts=image.get_offsets() #empty, don't use this
b=image.get_paths()   #this does work, gives Path([[]][]) which can be plotted

for x in xrange(len(b)):
    xav=np.mean(b[x].vertices[0:6,0]) #center in x (RA)
    yav=np.mean(b[x].vertices[0:6,1]) #center in y (DEC)
    plt.plot(xav,yav,'k.',zorder=100)

答案 2 :(得分:0)

我有同样的问题。我认为需要开发的是一个具有HexagonalGrid对象的框架,然后可以将其应用于许多不同的数据集(对于N维,这样做会非常棒)。这是可能的,让我感到惊讶的是Scipy或Numpy都没有任何东西(除了binify之外似乎没有其他类似的东西)

那就是说,我假设您想使用hexbinning来比较多个分箱数据集。这需要一些共同的基础。我通过以下方式使用matplotlib的hexbin工作:

import numpy as np
import matplotlib.pyplot as plt

def get_data (mean,cov,n=1e3):
    """
    Quick fake data builder
    """
    np.random.seed(101)
    points = np.random.multivariate_normal(mean=mean,cov=cov,size=int(n))
    x, y = points.T
    return x,y

def get_centers (hexbin_output):
    """
    about 40% faster than previous post only cause you're not calculating the 
    min/max every time 
    """
    paths = hexbin_output.get_paths()
    v = paths[0].vertices[:-1] # adds a value [0,0] to the end
    vx,vy = v.T

    idx = [3,0,5,2] # index for [xmin,xmax,ymin,ymax]    
    xmin,xmax,ymin,ymax = vx[idx[0]],vx[idx[1]],vy[idx[2]],vy[idx[3]]

    half_width_x = abs(xmax-xmin)/2.0
    half_width_y = abs(ymax-ymin)/2.0

    centers = []
    for i in xrange(len(paths)):
        cx = paths[i].vertices[idx[0],0]+half_width_x
        cy = paths[i].vertices[idx[2],1]+half_width_y
        centers.append((cx,cy))

    return np.asarray(centers)


# important parts ==>

class Hexagonal2DGrid (object):
    """
    Used to fix the gridsize, extent, and bins
    """
    def __init__ (self,gridsize,extent,bins=None):
        self.gridsize = gridsize
        self.extent = extent
        self.bins = bins

def hexbin (x,y,hexgrid):
    """
    To hexagonally bin the data in 2 dimensions
    """
    fig = plt.figure()
    ax = fig.add_subplot(111)

    # Note mincnt=0 so that it will return a value for every point in the 
    # hexgrid, not just those with count>mincnt

    # Basically you fix the gridsize, extent, and bins to keep them the same
    # then the resulting count array is the same
    hexbin = plt.hexbin(x,y, mincnt=0,
                        gridsize=hexgrid.gridsize, 
                        extent=hexgrid.extent,
                        bins=hexgrid.bins)
    # you could close the figure if you don't want it
    # plt.close(fig.number)

    counts = hexbin.get_array().copy() 
    return counts, hexbin

# Example ===>
if __name__ == "__main__":
    hexgrid = Hexagonal2DGrid((21,5),[-70,70,-20,20])
    x_data,y_data = get_data((0,0),[[-40,95],[90,10]])
    x_model,y_model = get_data((0,10),[[100,30],[3,30]])

    counts_data, hexbin_data = hexbin(x_data,y_data,hexgrid)
    counts_model, hexbin_model = hexbin(x_model,y_model,hexgrid)

    # if you want the centers, they will be the same for both 
    centers = get_centers(hexbin_data) 

    # if you want to ignore the cells with zeros then use the following mask. 
    # But if want zeros for some bins and not others I'm not sure an elegant way
    # to do this without using the centers
    nonzero = counts_data != 0

    # now you can compare the two data sets
    variance_data = counts_data[nonzero]
    square_diffs = (counts_data[nonzero]-counts_model[nonzero])**2
    chi2 = np.sum(square_diffs/variance_data)
    print(" chi2={}".format(chi2))