我查看了SKL的文档,但他们使用多种算法进行异常检测,但我只研究了椭圆包络的python代码
答案 0 :(得分:0)
SK Learn文档提供了一些有关如何使用它的示例和文档。请按照此示例here进行操作,但我继续将示例改编为椭圆信封。
你应该能够采用这个例子并将其应用到其他地方。
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
import matplotlib.font_manager
from sklearn.covariance import EllipticEnvelope
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor
np.random.seed(42)
rng = np.random.RandomState(42)
# Example settings
n_samples = 200
outliers_fraction = 0.25
clusters_separation = [0, 1, 2]
# Settings for evaluation
xx, yy = np.meshgrid(np.linspace(-7, 7, 100), np.linspace(-7, 7, 100))
n_inliers = int((1. - outliers_fraction) * n_samples)
n_outliers = int(outliers_fraction * n_samples)
ground_truth = np.ones(n_samples, dtype=int)
ground_truth[-n_outliers:] = -1
for i, offset in enumerate(clusters_separation):
# Data generation
X1 = 0.3 * np.random.randn(n_inliers // 2, 2) - offset
X2 = 0.3 * np.random.randn(n_inliers // 2, 2) + offset
X = np.r_[X1, X2]
# Add outliers
X = np.r_[X, np.random.uniform(low=-6, high=6, size=(n_outliers, 2))]
# Model
clf = EllipticEnvelope(contamination=outliers_fraction)
# Fit the model
plt.figure(figsize=(9, 7))
clf.fit(X)
scores_pred = clf.decision_function(X)
y_pred = clf.predict(X)
threshold = stats.scoreatpercentile(scores_pred, 100 * outliers_fraction)
n_errors = (y_pred != ground_truth).sum()
# plot the levels lines and the points
Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
plt.contourf(xx, yy, Z, levels=np.linspace(Z.min(), threshold, 7),
cmap=plt.cm.Blues_r)
a = plt.contour(xx, yy, Z, levels=[threshold],
linewidths=2, colors='red')
plt.contourf(xx, yy, Z, levels=[threshold, Z.max()],
colors='orange')
b = plt.scatter(X[:-n_outliers, 0], X[:-n_outliers, 1], c='white',
s=20, edgecolor='k')
c = plt.scatter(X[-n_outliers:, 0], X[-n_outliers:, 1], c='black',
s=20, edgecolor='k')
plt.axis('tight')
plt.legend(
[a.collections[0], b, c],
['learned decision function', 'true inliers', 'true outliers'],
prop=matplotlib.font_manager.FontProperties(size=10),
loc='lower right')
plt.xlabel("%d. %s (errors: %d)" % (i + 1, 'Elliptic Envelope', n_errors))
plt.xlim((-7, 7))
plt.ylim((-7, 7))
plt.suptitle("Outlier detection via Elliptic Envelope")
plt.show()