极坐标立体投影中的Python点密度图

时间:2015-07-16 12:44:09

标签: python matplotlib scipy scikit-learn matplotlib-basemap

我有一个磁化方向的点云,方位角(偏角在0°和360°之间),倾角在0°和90°之间。我在极坐标方位等距投影中显示这些点(使用matplotlib底图)。这意味着90°倾角将直接指向绘图的中心,赤纬顺时针方向运行。

我的问题是我还想在这些点云周围绘制等值线,这应该代表点/方向的最高密度所在的位置。最简单的方法是什么?很好的是标记围绕50%的等值线是我的数据。如果我没有弄错 - 这将是中位数。

到目前为止,我已经摆弄了gaussian_kde和sklearn(12)的异常检测,但结果并不像预期的那样。

有什么想法吗?

编辑#1:
首先是gaussian_kde

import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
from mpl_toolkits.basemap import Basemap

m = Basemap(projection='spaeqd',boundinglat=0,lon_0=180,resolution='l',round=True)
m.drawparallels(np.arange(-80.,1.,10.),labels=[False,True,True,False])
m.drawmeridians(np.arange(-180.,181.,30.),labels=[True,False,False,True])
#data
x, y = m(m1,-m2) #m2 is negative because I to plot in the southern hemisphere!

#set up the grid for evaluation of the KDE
yi = np.arange(0,360.1,1)
xi = np.arange(-90,1,1)
xx,yy = np.meshgrid(xi,yi)

X, Y = m(xx,yy) # to have it in my basemap projection

#setup the gaussian kde and evaluate it
#pretty much similiar to the scipy.stats docs
positions = np.vstack([X.ravel(), Y.ravel()])
values = np.vstack([x, y])
kernel = stats.gaussian_kde(values)
Z = np.reshape(kernel(positions).T, X.shape)

#plot orginal points and probaility density function
ax = plt.gca()
ax.scatter(x,y,c = 'Crimson')
TOT = ax.contour(X,Y,Z,cmap=plt.cm.Reds)
plt.show()

然后sklearn:

import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
from mpl_toolkits.basemap import Basemap
from sklearn import svm
from sklearn.covariance import EllipticEnvelope

m = Basemap(projection='spaeqd',boundinglat=0,lon_0=180,resolution='l',round=True)
m.drawparallels(np.arange(-80.,1.,10.),labels=[False,True,True,False])
m.drawmeridians(np.arange(-180.,181.,30.),labels=[True,False,False,True])
#data
x, y = m(m1,-m2) #m2 is negative because I to plot in the southern hemisphere!

#Similar to examples in sklearn docs
outliers_fraction = 0.5
oneclass_svm = svm.OneClassSVM(nu=0.95 * outliers_fraction + 0.05,\
               kernel="rbf", gamma=0.1,verbose=True)

#seup grid
yi = np.arange(0,360.1,1)
xi = np.arange(-90,1,1)
R,T = np.meshgrid(xi,yi)
xx, yy = m(T,R)

x, y = m(m1,-m2)

#standardize data as suggested by docs
x_std = (x-x.mean())/x.std()
y_std = (y-y.mean())/y.std()
values = np.vstack([x_std, y_std])

#fit data and calculate threshold - this should mark my median - according to value of outliers_fraction
oneclass_svm.fit(values.T)
y_pred = oneclass_svm.decision_function(values.T).ravel()
threshold = stats.scoreatpercentile(y_pred, 100 * outliers_fraction)
y_pred = y_pred > threshold

#Target vector for evaluation
TV = np.c_[xx.ravel(), yy.ravel()]
TV = (TV-TV.mean(axis=0))/TV.std(axis=0) #must be standardized as well

# evaluation - This is now shifted in the plot ad does not fit my point cloud anymore - because of the standadrization
Z = oneclass_svm.decision_function(TV)
Z = Z.reshape(xx.shape)

#plotting - very similar to the example in the docs
ax = plt.gca()
ax.contourf(xx, yy, Z, levels=np.linspace(Z.min(), threshold, 7), \
           cmap=plt.cm.Blues_r)
ax.contour(xx, yy, Z, levels=[threshold],
           linewidths=2, colors='red')
ax.contourf(xx, yy, Z, levels=[threshold, Z.max()],
           colors='orange')
ax.scatter(x, y,s=30, marker='s',c = 'RoyalBlue',label = 'Mr')
plt.show()

EllipticEvelope可以工作,但这不是我想要的。

1 个答案:

答案 0 :(得分:0)

好的,我想我可能会找到一个解决方案。但它并不适用于所有情况。当数据是多模式分布时,我认为它应该失败。

尽管如此,这是我的过程:

因此,Probalibity密度函数(PDF)与连续直方图基本相同。所以我使用np.percentile来计算两个向量的上下25%百分位数。我已经在这些过程中搜索了PDF的价值,这应该是我想要的等值线。

当然,这也应该适用于极地立体(或任何其他)投影。

以下是交叉图中两个伽玛分布式数据集的示例代码:

import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
from scipy.interpolate import LinearNDInterpolator, RegularGridInterpolator

#generate some data
x = np.random.gamma(10,0.8,1e4)
y = np.random.gamma(4,0.3,1e4)

#set up the data and grid for the 2D PDF
values = np.vstack([x,y])
pdf_x = np.linspace(x.min(),x.max(),1e2)
pdf_y = np.linspace(y.min(),y.max(),1e2)
X,Y = np.meshgrid(pdf_x,pdf_y)

kernel = stats.gaussian_kde(values)

#evaluate the PDF at every grid location
positions = np.vstack([X.ravel(), Y.ravel()])
Z = np.reshape(kernel(positions).T, X.shape)


#upper and lower quartiles of x and y data
xql = np.percentile(x,25)
xqu = np.percentile(x,75)
yql = np.percentile(y,25)
yqu = np.percentile(y,75)

#set up the interpolator - I could also use RegularGridInterpolator - should be faster
Interp = LinearNDInterpolator((X.flatten(),Y.flatten()),Z.flatten())

#1D example to illustrate what I mean 
plt.figure()
kernel2 = stats.gaussian_kde(x)
plt.hist(x,30,normed=True)
plt.plot(pdf_x,kernel2(pdf_x),'r--',linewidth=2)

#plot vertical lines at the upper and lower quartiles
plt.vlines(np.percentile(x,25),0,0.2,color='red')
plt.vlines(np.percentile(x,75),0,0.2,color='red')

#Scatterplot / Crossplot with PDF and 25 and 75% isolines
plt.figure()
plt.scatter(x,y)
#search for the isolines defining the upper and lower quartiles
#the lower quartiles isoline should encircle 75% of the data
levels = [Interp(xql,yql),Interp(xqu,yqu)]
plt.contour(X,Y,Z,levels=levels,colors='orange')

plt.show()

为了完成,我将简要介绍它在极地立体投影中的外观:

import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
from scipy.interpolate import LinearNDInterpolator
from mpl_toolkits.basemap import Basemap

#set up the coordinate projection
m = Basemap(projection='spaeqd',boundinglat=0,lon_0=180,\
            resolution='l',round=True,suppress_ticks=True)
parallelGrid = np.arange(-80.,1.,10.)
meridianGrid = np.arange(-180.0,180.1,30)
m.drawparallels(parallelGrid,labels=[False,False,False,False])
m.drawmeridians(meridianGrid,labels=[False,False,False,False],labelstyle='+/-',fmt='%i')

#Found this on stackoverflow - labels it exactly how I want it
ax = plt.gca()
ax.text(0.5,1.025,'N',transform=ax.transAxes,\
        horizontalalignment='center',verticalalignment='bottom',size=25)
for para in np.arange(30,360,30):
    x= (1.1*0.5*np.sin(np.deg2rad(para)))+0.5
    y= (1.1*0.5*np.cos(np.deg2rad(para)))+0.5
    ax.text(x,y,u'%i\N{DEGREE SIGN}'%para,transform=ax.transAxes,\
            horizontalalignment='center',verticalalignment='center')

#generate some data
x = np.random.randint(180,225,size=15)
y = np.random.randint(30,40,size=15)

#into projection
x,y = m(x,-y)
values = np.vstack([x,y])

pdf_x = np.arange(0,361,1)
pdf_y = np.arange(0,91,1)

#into projection
X,Y = np.meshgrid(pdf_x,pdf_y)
X,Y = m(X,-Y)


kernel = stats.gaussian_kde(values)
positions = np.vstack([X.ravel(), Y.ravel()])
Z = np.reshape(kernel(positions).T, X.shape)

xql = np.percentile(x,25)
xqu = np.percentile(x,75)
yql = np.percentile(y,25)
yqu = np.percentile(y,75)

Interp = LinearNDInterpolator((X.flatten(),Y.flatten()),Z.flatten())

ax = plt.gca()
ax.scatter(x,y)

levels = [Interp(xql,yql),Interp(xqu,yqu)]
ax.contour(X,Y,Z,levels=levels,colors='red')

plt.show()