如何使用sklearn" LinearRegression"列表?

时间:2018-03-13 07:07:20

标签: python pandas scikit-learn

我有一个这样的清单:

list1 = [['2012', '1', '3', '1', '832.0', '261.0', '100.00'],
 ['2012', '1', '5', '1', '507.0', '193.0', '92.50'],
 ['2012', '2', '3', '1', '412.0', '200.0', '95.00'],
 ['2012', '2', '5', '1', '560.0', '335.0', '90.00'],
 ['2012', '3', '3', '1', '584.0', '205.0', '100.00'],
 ['2012', '3', '5', '1', '595.0', '162.0', '92.50'],
 ['2012', '4', '3', '1', '504.0', '227.0', '100.00'],
 ['2012', '4', '5', '1', '591.0', '264.0', '92.50'],
 ['2012', '5', '3', '1', '489.0', '234.0', '100.00'],
 ['2012', '5', '5', '1', '561.0', '292.0', '95.00'],
 ['2012', '6', '3', '1', '622.0', '221.0', '100.00'],
 ['2012', '6', '5', '1', '478.0', '204.0', '92.50'],
 ['2012', '7', '3', '1', '974.0', '290.0', '100.00'],
 ['2012', '7', '5', '1', '553.0', '269.0', '95.00'],
 ['2012', '8', '3', '1', '473.0', '158.0', '100.00'],
 ['2012', '8', '5', '1', '526.0', '174.0', '92.50'],
 ['2012', '9', '3', '1', '701.0', '189.0', '95.00'],
 ['2012', '9', '5', '1', '502.0', '179.0', '97.50'],
 ['2012', '10', '3', '1', '470.0', '184.0', '100.00'],
 ['2012', '10', '5', '1', '579.0', '218.0', '92.50']]

每行的第七列是Y数据,每行的第五列和第六列是X数据。 我想计算"拦截"和" coef"有这个清单。 如果有一个csv文件,我知道该怎么做,例如:

import pandas as pd
from sklearn import linear_model

wine = pd.read_csv( r"D:/1.csv",sep=',' )
clf = linear_model.LinearRegression()
X = wine.drop( 'point', axis=1 )
y = wine['point']
clf.fit( X, y )
print( clf.intercept_,clf.coef_[0],clf.coef_[1] )

我的csv: enter image description here

但现在,我需要计算"拦截"和" coef"与列表。 怎么做?

3 个答案:

答案 0 :(得分:2)

没有太大的变化,只需将list1加载到DataFrame中作为float,slice和fit:

from sklearn.linear_model import LinearRegression

df = pd.DataFrame(list1, dtype=float)
X = df.iloc[:, 5:7]
y = df.iloc[:, [-1]]

clf = LinearRegression()
clf.fit(X, y);

clf.intercept_
[0.]

clf.coef_
[[0. 1.]]

答案 1 :(得分:1)

您可以将列表转换为numpy数组,然后将其提供给模型。

import pandas as pd
from sklearn import linear_model

list1 = [['2012', '1', '3', '1', '832.0', '261.0', '100.00'],
 ['2012', '1', '5', '1', '507.0', '193.0', '92.50'],
 ['2012', '2', '3', '1', '412.0', '200.0', '95.00'],
 ['2012', '2', '5', '1', '560.0', '335.0', '90.00'],
 ['2012', '3', '3', '1', '584.0', '205.0', '100.00'],
 ['2012', '3', '5', '1', '595.0', '162.0', '92.50'],
 ['2012', '4', '3', '1', '504.0', '227.0', '100.00'],
 ['2012', '4', '5', '1', '591.0', '264.0', '92.50'],
 ['2012', '5', '3', '1', '489.0', '234.0', '100.00'],
 ['2012', '5', '5', '1', '561.0', '292.0', '95.00'],
 ['2012', '6', '3', '1', '622.0', '221.0', '100.00'],
 ['2012', '6', '5', '1', '478.0', '204.0', '92.50'],
 ['2012', '7', '3', '1', '974.0', '290.0', '100.00'],
 ['2012', '7', '5', '1', '553.0', '269.0', '95.00'],
 ['2012', '8', '3', '1', '473.0', '158.0', '100.00'],
 ['2012', '8', '5', '1', '526.0', '174.0', '92.50'],
 ['2012', '9', '3', '1', '701.0', '189.0', '95.00'],
 ['2012', '9', '5', '1', '502.0', '179.0', '97.50'],
 ['2012', '10', '3', '1', '470.0', '184.0', '100.00'],
 ['2012', '10', '5', '1', '579.0', '218.0', '92.50']]

wine = np.asarray(list1)
clf = linear_model.LinearRegression()
X = wine[:, 4:6]
y = wine[:,-1]
clf.fit( X, y )
print(clf.intercept_,clf.coef_[0], clf.coef_[1])
  

105.668662639 0.0 0.18516127419

答案 2 :(得分:0)

import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

list1 = [['2012', '1', '3', '1', '832.0', '261.0', '100.00'],
 ['2012', '1', '5', '1', '507.0', '193.0', '92.50'],
 ['2012', '2', '3', '1', '412.0', '200.0', '95.00'],
 ['2012', '2', '5', '1', '560.0', '335.0', '90.00'],
 ['2012', '3', '3', '1', '584.0', '205.0', '100.00'],
 ['2012', '3', '5', '1', '595.0', '162.0', '92.50'],
 ['2012', '4', '3', '1', '504.0', '227.0', '100.00'],
 ['2012', '4', '5', '1', '591.0', '264.0', '92.50'],
 ['2012', '5', '3', '1', '489.0', '234.0', '100.00'],
 ['2012', '5', '5', '1', '561.0', '292.0', '95.00'],
 ['2012', '6', '3', '1', '622.0', '221.0', '100.00'],
 ['2012', '6', '5', '1', '478.0', '204.0', '92.50'],
 ['2012', '7', '3', '1', '974.0', '290.0', '100.00'],
 ['2012', '7', '5', '1', '553.0', '269.0', '95.00'],
 ['2012', '8', '3', '1', '473.0', '158.0', '100.00'],
 ['2012', '8', '5', '1', '526.0', '174.0', '92.50'],
 ['2012', '9', '3', '1', '701.0', '189.0', '95.00'],
 ['2012', '9', '5', '1', '502.0', '179.0', '97.50'],
 ['2012', '10', '3', '1', '470.0', '184.0', '100.00'],
 ['2012', '10', '5', '1', '579.0', '218.0', '92.50']]

def drawabline(xy):

    slope, intercept = np.polyfit(xy[:,0], xy[:,1], 1)
    print(slope, intercept)
    # or this
    # slope, intercept, r_value, p_value, stderr = stats.linregress(xy)
    # print(slope, intercept)

    abline_values = [slope * i + intercept for i in xy[:,0]]

    plt.figure()
    plt.scatter(xy[:,0], xy[:,1])
    plt.plot(xy[:,0], abline_values, 'b')
    plt.show()

data = np.asarray(list1, dtype=np.float)
data1 = np.array( data[:,(4,6)])
print( 'data1=', data1 )
drawabline(data1)

data2 = np.array( data[:,(5,6)])
print( 'data2=', data2 )
drawabline(data2)