我有一些x,y
数据,我使用scipy.stats.gaussian_kde函数获得高斯核密度估算器(KDE)。我可以绘制这个图,以显示MWE下面显示的轮廓密度曲线。
这是MWE和结果图。
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
# Data.
x = [1.81,1.715,1.78,1.613,1.629,1.714,1.62,1.738,1.495,1.669,1.57,1.877,1.385,2.129,2.016,1.606,1.444,2.103,1.397,1.854,1.327,1.377,1.798,1.684,2.186,2.079,1.32,1.452,2.272,1.313,1.762,2.308,2.285,2.328,2.288,2.345,2.237,2.078,2.057,1.505,2.595,2.176,2.501,0.942,2.424,2.49,2.65,1.303,2.43,2.241,0.897,1.731,2.464,1.638,0.867,2.392,3.248,2.608,2.733,0.745,2.715,3.078,2.571,0.771,1.071,2.574,3.343,2.835,2.629,3.421,0.642,2.571,2.698,0.595,2.912,0.563,2.832,2.636,3.149,2.522,0.836,0.894,0.447,1.304,1.132,2.488,3.363,2.961,1.317,2.387,0.036,2.199,0.356,3.036,2.103,2.894,-0.097,0.069,2.688,-0.083,0.653,3.247,3.045,3.197,2.963,2.473,2.571,3.333,3.009,1.281,3.257,3.116,2.673,2.901,2.903,2.634,-0.291,-0.29,0.212]
y = [0.924,0.915,0.914,0.91,0.909,0.905,0.905,0.893,0.886,0.881,0.873,0.873,0.844,0.838,0.83,0.817,0.811,0.809,0.807,0.803,0.802,0.792,0.777,0.774,0.774,0.77,0.748,0.746,0.742,0.734,0.729,0.726,0.722,0.677,0.676,0.672,0.635,0.62,0.62,0.608,0.605,0.587,0.586,0.578,0.571,0.569,0.549,0.544,0.535,0.53,0.529,0.513,0.499,0.497,0.496,0.496,0.49,0.486,0.482,0.476,0.474,0.473,0.471,0.47,0.459,0.444,0.438,0.435,0.428,0.419,0.411,0.4,0.396,0.384,0.378,0.368,0.362,0.362,0.361,0.357,0.347,0.346,0.344,0.33,0.322,0.319,0.318,0.305,0.296,0.296,0.289,0.288,0.288,0.288,0.287,0.286,0.283,0.283,0.278,0.274,0.264,0.259,0.248,0.244,0.241,0.239,0.238,0.237,0.23,0.222,0.221,0.218,0.214,0.212,0.207,0.205,0.196,0.19,0.182]
xmin, xmax = min(x), max(x)
ymin, ymax = min(y), max(y)
# Generate KDE.
x1, y1 = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]
positions = np.vstack([x1.ravel(), y1.ravel()])
values = np.vstack([x, y])
kernel = stats.gaussian_kde(values)
kde = np.reshape(kernel(positions).T, x1.shape)
# Make plot.
plt.figure()
CS = plt.contour(x1,y1,kde)
plt.clabel(CS, inline=1, fontsize=10, zorder=6)
plt.show()
我对此输出的形状感兴趣。我特别需要的是获得每个密度曲线的两个最远点的x,y
坐标的方法。例如,对于右下角0.7
红色曲线,坐标将围绕:(x1,y1)=(2.7,0.42)
和(x2,y2)=(2.9,0.29)
(标记为黑色圆圈的点)。
由于存在密度值相等的曲线(即:两条红色曲线,其值为0.7
,两条橙色曲线带有0.6
等),因此更加复杂。需要一种方法来区分这些或忽略给定值的某些曲线。
我不确定如何解决这个问题,任何帮助或方向都会非常感激。
答案 0 :(得分:5)
以下是我们的解决方案:
检查处理部分......
from __future__ import division
import scipy as sp
from scipy import stats
import pylab as pl
x = [1.81,1.715,1.78,1.613,1.629,1.714,1.62,1.738,1.495,1.669,1.57,1.877,1.385,2.129, \
2.016,1.606,1.444,2.103,1.397,1.854,1.327,1.377,1.798,1.684,2.186,2.079,1.32, \
1.452,2.272,1.313,1.762,2.308,2.285,2.328,2.288,2.345,2.237,2.078,2.057,1.505, \
2.595,2.176,2.501,0.942,2.424,2.49,2.65,1.303,2.43,2.241,0.897,1.731,2.464,1.638, \
0.867,2.392,3.248,2.608,2.733,0.745,2.715,3.078,2.571,0.771,1.071,2.574,3.343, \
2.835,2.629,3.421,0.642,2.571,2.698,0.595,2.912,0.563,2.832,2.636,3.149,2.522, \
0.836,0.894,0.447,1.304,1.132,2.488,3.363,2.961,1.317,2.387,0.036,2.199,0.356, \
3.036,2.103,2.894,-0.097,0.069,2.688,-0.083,0.653,3.247,3.045,3.197,2.963,2.473, \
2.571,3.333,3.009,1.281,3.257,3.116,2.673,2.901,2.903,2.634,-0.291,-0.29,0.212]
y = [0.924,0.915,0.914,0.91,0.909,0.905,0.905,0.893,0.886,0.881,0.873,0.873,0.844, \
0.838,0.83,0.817,0.811,0.809,0.807,0.803,0.802,0.792,0.777,0.774,0.774,0.77,0.748, \
0.746,0.742,0.734,0.729,0.726,0.722,0.677,0.676,0.672,0.635,0.62,0.62,0.608,0.605, \
0.587,0.586,0.578,0.571,0.569,0.549,0.544,0.535,0.53,0.529,0.513,0.499,0.497, \
0.496,0.496,0.49,0.486,0.482,0.476,0.474,0.473,0.471,0.47,0.459,0.444,0.438,0.435, \
0.428,0.419,0.411,0.4,0.396,0.384,0.378,0.368,0.362,0.362,0.361,0.357,0.347,0.346, \
0.344,0.33,0.322,0.319,0.318,0.305,0.296,0.296,0.289,0.288,0.288,0.288,0.287, \
0.286,0.283,0.283,0.278,0.274,0.264,0.259,0.248,0.244,0.241,0.239,0.238,0.237, \
0.23,0.222,0.221,0.218,0.214,0.212,0.207,0.205,0.196,0.19,0.182]
xmin, xmax = min(x), max(x)
ymin, ymax = min(y), max(y)
# Generate KDE
x1, y1 = sp.mgrid[xmin:xmax:100j, ymin:ymax:100j]
positions = sp.vstack([x1.ravel(), y1.ravel()])
values = sp.vstack([x, y])
kernel = stats.gaussian_kde(values)
kde = sp.reshape(kernel(positions).T, x1.shape)
# plotting
CS = pl.contour(x1,y1,kde)
# ----------------------------------- our solution ------------------------------------
# processing the distances
for i,clc in enumerate(CS.collections):
for j,pth in enumerate(clc.get_paths()):
cts = pth.vertices
d = sp.spatial.distance.cdist(cts,cts)
x,y = cts[list(sp.unravel_index(sp.argmax(d),d.shape))].T
pl.plot(x,y,':o')
print 'Contour Level %d, Part %d'%(i,j)
# ----------------------------------- our solution ------------------------------------
pl.clabel(CS, inline=1, fontsize=10, zorder=6)
pl.axis('image') # don't forget using this to fix aspect ratio to 1,1
pl.show()