我想用python检测异常值只使用椭圆包络,那怎么样?

时间:2018-04-24 16:07:51

标签: python

我查看了SKL的文档,但他们使用多种算法进行异常检测,但我只研究了椭圆包络的python代码

1 个答案:

答案 0 :(得分:0)

SK Learn文档提供了一些有关如何使用它的示例和文档。请按照此示例here进行操作,但我继续将示例改编为椭圆信封。

你应该能够采用这个例子并将其应用到其他地方。

import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
import matplotlib.font_manager


from sklearn.covariance import EllipticEnvelope
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor

np.random.seed(42)
rng = np.random.RandomState(42)

# Example settings
n_samples = 200
outliers_fraction = 0.25
clusters_separation = [0, 1, 2]

# Settings for evaluation
xx, yy = np.meshgrid(np.linspace(-7, 7, 100), np.linspace(-7, 7, 100))
n_inliers = int((1. - outliers_fraction) * n_samples)
n_outliers = int(outliers_fraction * n_samples)
ground_truth = np.ones(n_samples, dtype=int)
ground_truth[-n_outliers:] = -1


for i, offset in enumerate(clusters_separation):
    # Data generation
    X1 = 0.3 * np.random.randn(n_inliers // 2, 2) - offset
    X2 = 0.3 * np.random.randn(n_inliers // 2, 2) + offset
    X = np.r_[X1, X2]

    # Add outliers
    X = np.r_[X, np.random.uniform(low=-6, high=6, size=(n_outliers, 2))]

    # Model
    clf = EllipticEnvelope(contamination=outliers_fraction)

    # Fit the model
    plt.figure(figsize=(9, 7))


    clf.fit(X)
    scores_pred = clf.decision_function(X)
    y_pred = clf.predict(X)
    threshold = stats.scoreatpercentile(scores_pred, 100 * outliers_fraction)
    n_errors = (y_pred != ground_truth).sum()

    # plot the levels lines and the points
    Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)

    plt.contourf(xx, yy, Z, levels=np.linspace(Z.min(), threshold, 7),
                     cmap=plt.cm.Blues_r)
    a = plt.contour(xx, yy, Z, levels=[threshold],
                        linewidths=2, colors='red')
    plt.contourf(xx, yy, Z, levels=[threshold, Z.max()],
                     colors='orange')
    b = plt.scatter(X[:-n_outliers, 0], X[:-n_outliers, 1], c='white',
                        s=20, edgecolor='k')
    c = plt.scatter(X[-n_outliers:, 0], X[-n_outliers:, 1], c='black',
                        s=20, edgecolor='k')
    plt.axis('tight')
    plt.legend(
        [a.collections[0], b, c],
        ['learned decision function', 'true inliers', 'true outliers'],
        prop=matplotlib.font_manager.FontProperties(size=10),
        loc='lower right')
    plt.xlabel("%d. %s (errors: %d)" % (i + 1, 'Elliptic Envelope', n_errors))
    plt.xlim((-7, 7))
    plt.ylim((-7, 7))
    plt.suptitle("Outlier detection via Elliptic Envelope")

plt.show()

Figure 1

Figure 2

Figure 3