从sklearn PCA获得特征值和向量

时间:2015-08-09 23:51:27

标签: python scipy scikit-learn pca

如何获得PCA应用的特征值和特征向量?

/*
 * Copyright (c) 1995, 2013, Oracle and/or its affiliates. All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 *   - Redistributions of source code must retain the above copyright
 *     notice, this list of conditions and the following disclaimer.
 *
 *   - Redistributions in binary form must reproduce the above copyright
 *     notice, this list of conditions and the following disclaimer in the
 *     documentation and/or other materials provided with the distribution.
 *
 *   - Neither the name of Oracle or the names of its
 *     contributors may be used to endorse or promote products derived
 *     from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
package converter;

/**
 * @see http://docs.oracle.com/javafx/2/swing/port-to-javafx.htm
 */
public class Unit {

    String description;
    double multiplier;

    Unit(String description, double multiplier) {
        super();
        this.description = description;
        this.multiplier = multiplier;
    }

    @Override
    public String toString() {
        String s = "Meters/" + description + " = " + multiplier;
        return s;
    }
}

/*
 * Copyright (c) 2012, 2013 Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 */
package converter;

/**
 * @see http://docs.oracle.com/javafx/2/swing/port-to-javafx.htm
 */
import java.text.NumberFormat;
import javafx.beans.InvalidationListener;
import javafx.beans.Observable;
import javafx.beans.property.DoubleProperty;
import javafx.collections.ObservableList;
import javafx.scene.control.*;
import javafx.scene.layout.HBox;
import javafx.scene.layout.VBox;
import javafx.util.StringConverter;

public class ConversionPanel extends TitledPane {

    private static final int MAX = 10000;
    private static final int DIGITS = 3;

    private final TextField textField = new TextField();
    private final Slider slider = new Slider(0, MAX, 0);
    private final ComboBox<Unit> comboBox;
    private NumberFormat numberFormat = NumberFormat.getNumberInstance();
    private DoubleProperty meters;

    {
        numberFormat.setMaximumFractionDigits(DIGITS);
    }

    private InvalidationListener fromMeters = (Observable o) -> {
        if (!textField.isFocused()) {
            textField.setText(numberFormat.format(meters.get() / getMultiplier()));
        }
    };

    private InvalidationListener toMeters = (Observable o) -> {
        if (textField.isFocused()) {
            try {
                Number n = numberFormat.parse(textField.getText());
                meters.set(n.doubleValue() * getMultiplier());
            } catch (Exception ignored) {
            }
        }
    };

    public ConversionPanel(String title, ObservableList<Unit> units, DoubleProperty meters) {
        setText(title);
        setCollapsible(false);
        comboBox = new ComboBox<>(units);
        comboBox.getSelectionModel().select(0);
        comboBox.setConverter(new StringConverter<Unit>() {

            @Override
            public String toString(Unit t) {
                return t.description;
            }

            @Override
            public Unit fromString(String string) {
                throw new UnsupportedOperationException("Not supported yet.");
            }
        });
        setContent(new HBox(new VBox(textField, slider), comboBox));

        this.meters = meters;
        meters.addListener(fromMeters);
        comboBox.valueProperty().addListener(fromMeters);
        textField.textProperty().addListener(toMeters);
        slider.valueProperty().bindBidirectional(meters);
        fromMeters.invalidated(null);
    }

    /**
     * Returns the multiplier for the currently selected unit of measurement.
     */
    public double getMultiplier() {
        return comboBox.getValue().multiplier;
    }
}

/*
 * Copyright (c) 2012, 2013 Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 */
package converter;

import javafx.application.Application;
import javafx.beans.property.DoubleProperty;
import javafx.beans.property.SimpleDoubleProperty;
import javafx.collections.FXCollections;
import javafx.collections.ObservableList;
import javafx.scene.Scene;
import javafx.scene.layout.VBox;
import javafx.stage.Stage;

/**
 * @see https://stackoverflow.com/a/31909942/230513
 * @see http://docs.oracle.com/javafx/2/swing/port-to-javafx.htm
 */
public class Converter extends Application {

    public static void main(String[] args) {
        launch(args);
    }

    private final ObservableList<Unit> metricDistances;
    private final ObservableList<Unit> usaDistances;
    private final DoubleProperty meters = new SimpleDoubleProperty(1);

    public Converter() {
        //Create Unit objects for metric distances, and then
        //instantiate a ConversionPanel with these Units.
        metricDistances = FXCollections.observableArrayList(
            new Unit("Centimeters", 0.01),
            new Unit("Meters", 1.0),
            new Unit("Kilometers", 1000.0));

        //Create Unit objects for U.S. distances, and then
        //instantiate a ConversionPanel with these Units.
        usaDistances = FXCollections.observableArrayList(
            new Unit("Inches", 0.0254),
            new Unit("Feet", 0.3048),
            new Unit("Yards", 0.9144),
            new Unit("Miles", 1609.34));
    }

    @Override
    public void start(Stage stage) {
        stage.setScene(new Scene(new VBox(
            new ConversionPanel("Metric System", metricDistances, meters),
            new ConversionPanel("U.S. System", usaDistances, meters))));
        stage.show();
    }
}

我在ObservableList找不到它。

1.我“无法”理解这里的不同结果。

修改

from sklearn.decomposition import PCA
clf=PCA(0.98,whiten=True)      #converse 98% variance
X_train=clf.fit_transform(X_train)
X_test=clf.transform(X_test)
  1. 我希望获得所有特征值和特征向量,而不仅仅是具有收敛条件的简化集。

3 个答案:

答案 0 :(得分:44)

您的实施

您正在计算相关矩阵的特征向量,即规范化变量的协方差矩阵。
data/=np.std(data, axis=0)不是经典PCA的一部分,我们只将变量集中在一起。 因此,sklearn PCA 不会事先对数据进行扩展

除了你在正确的轨道上,如果我们抽象你提供的代码没有运行的事实;)。 您只对行/列布局感到困惑。老实说,我认为从X = data.T开始并且仅从那里开始使用X更容易。我在帖子的末尾添加了代码“fixed”。

获取特征值

您已经注意到可以使用clf.components_来获取特征向量。

所以你有主要成分。它们是协方差矩阵$ X ^ T X $的特征向量。

从中检索特征值的一种方法是将此矩阵应用于每个主成分并将结果投影到组件上。 设v_1为第一主成分,lambda_1为相关的特征值。我们有:
eq因此: eq2 eq3以来fiddle。 (x,y)向量x和y的标量积。

回到Python,你可以这样做:

n_samples = X.shape[0]
# We center the data and compute the sample covariance matrix.
X -= np.mean(X, axis=0)
cov_matrix = np.dot(X.T, X) / n_samples
for eigenvector in pca.components_:
    print(np.dot(eigenvector.T, np.dot(cov_matrix, eigenvector)))

你得到与特征向量相关的特征值。 好吧,在我的测试中,结果证明不使用这对夫妇的最后特征值,但我认为这与我在数值稳定性方面缺乏技能有关。

现在这不是获取特征值的最佳方法,但很高兴知道它们来自何处。
特征值表示特征向量方向的方差。因此,您可以通过pca.explained_variance_属性获取它们:

eigenvalues = pca.explained_variance_

这是一个可重复的示例,用于打印每种方法得到的特征值:

import numpy as np
from sklearn.decomposition import PCA
from sklearn.datasets import make_classification


X, y = make_classification(n_samples=1000)
n_samples = X.shape[0]

pca = PCA()
X_transformed = pca.fit_transform(X)

# We center the data and compute the sample covariance matrix.
X_centered = X - np.mean(X, axis=0)
cov_matrix = np.dot(X_centered.T, X_centered) / n_samples
eigenvalues = pca.explained_variance_
for eigenvalue, eigenvector in zip(eigenvalues, pca.components_):    
    print(np.dot(eigenvector.T, np.dot(cov_matrix, eigenvector)))
    print(eigenvalue)

原始代码,已修复

如果你运行它,你会看到值是一致的。它们并不完全相同,因为numpy和scikit-learn在这里没有使用相同的算法 主要的是你使用相关矩阵而不是协方差,如上所述。你也从numpy那里得到了转置的特征向量,这让它变得非常混乱。

import numpy as np
from scipy.stats.mstats import zscore
from sklearn.decomposition import PCA

def pca_code(data):
    #raw_implementation
    var_per=.98
    data-=np.mean(data, axis=0)
    # data/=np.std(data, axis=0)
    cov_mat=np.cov(data, rowvar=False)
    evals, evecs = np.linalg.eigh(cov_mat)
    idx = np.argsort(evals)[::-1]
    evecs = evecs[:,idx]
    evals = evals[idx]
    variance_retained=np.cumsum(evals)/np.sum(evals)
    index=np.argmax(variance_retained>=var_per)
    evecs = evecs[:,:index+1]
    reduced_data=np.dot(evecs.T, data.T).T
    print("evals", evals)
    print("_"*30)
    print(evecs.T[1, :])
    print("_"*30)
    #using scipy package
    clf=PCA(var_per)
    X_train=data
    X_train=clf.fit_transform(X_train)
    print(clf.explained_variance_)
    print("_"*30)
    print(clf.components_[1,:])
    print("__"*30)

希望这有帮助,请随时要求澄清。

答案 1 :(得分:0)

我使用了sklearn PCA功能。返回参数“ components_”是特征向量,“ explained_variance_”是特征值。下面是我的测试代码。

from sklearn.decomposition import PCA
import numpy as np


def main():
    data = np.array([[2.5, 2.4], [0.5, 0.7], [2.2, 2.9], [1.9, 2.2], [3.1, 3.0], [2.3, 2.7], [2, 1.6], [1, 1.1], [1.5, 1.6], [1.1, 0.9]])
    print(data)
    pca = PCA()
    pca.fit(data)

    print(pca.components_)
    print(pca.explained_variance_)



if __name__ == "__main__":
    main()

答案 2 :(得分:0)

当您说“特征值”时,您是指PCA的“奇异值”吗?仅当应用的矩阵PCA为方矩阵时,才可以使用特征值。

如果尝试使用“特征值”来确定PCA所需的正确尺寸,则实际上应使用奇异值。您可以只使用pca.singular_values_来获取奇异值。