我有一个这样的清单:
list1 = [['2012', '1', '3', '1', '832.0', '261.0', '100.00'],
['2012', '1', '5', '1', '507.0', '193.0', '92.50'],
['2012', '2', '3', '1', '412.0', '200.0', '95.00'],
['2012', '2', '5', '1', '560.0', '335.0', '90.00'],
['2012', '3', '3', '1', '584.0', '205.0', '100.00'],
['2012', '3', '5', '1', '595.0', '162.0', '92.50'],
['2012', '4', '3', '1', '504.0', '227.0', '100.00'],
['2012', '4', '5', '1', '591.0', '264.0', '92.50'],
['2012', '5', '3', '1', '489.0', '234.0', '100.00'],
['2012', '5', '5', '1', '561.0', '292.0', '95.00'],
['2012', '6', '3', '1', '622.0', '221.0', '100.00'],
['2012', '6', '5', '1', '478.0', '204.0', '92.50'],
['2012', '7', '3', '1', '974.0', '290.0', '100.00'],
['2012', '7', '5', '1', '553.0', '269.0', '95.00'],
['2012', '8', '3', '1', '473.0', '158.0', '100.00'],
['2012', '8', '5', '1', '526.0', '174.0', '92.50'],
['2012', '9', '3', '1', '701.0', '189.0', '95.00'],
['2012', '9', '5', '1', '502.0', '179.0', '97.50'],
['2012', '10', '3', '1', '470.0', '184.0', '100.00'],
['2012', '10', '5', '1', '579.0', '218.0', '92.50']]
每行的第七列是Y数据,每行的第五列和第六列是X数据。 我想计算"拦截"和" coef"有这个清单。 如果有一个csv文件,我知道该怎么做,例如:
import pandas as pd
from sklearn import linear_model
wine = pd.read_csv( r"D:/1.csv",sep=',' )
clf = linear_model.LinearRegression()
X = wine.drop( 'point', axis=1 )
y = wine['point']
clf.fit( X, y )
print( clf.intercept_,clf.coef_[0],clf.coef_[1] )
但现在,我需要计算"拦截"和" coef"与列表。 怎么做?
答案 0 :(得分:2)
没有太大的变化,只需将list1
加载到DataFrame中作为float,slice和fit:
from sklearn.linear_model import LinearRegression
df = pd.DataFrame(list1, dtype=float)
X = df.iloc[:, 5:7]
y = df.iloc[:, [-1]]
clf = LinearRegression()
clf.fit(X, y);
clf.intercept_
[0.]
clf.coef_
[[0. 1.]]
答案 1 :(得分:1)
您可以将列表转换为numpy数组,然后将其提供给模型。
import pandas as pd
from sklearn import linear_model
list1 = [['2012', '1', '3', '1', '832.0', '261.0', '100.00'],
['2012', '1', '5', '1', '507.0', '193.0', '92.50'],
['2012', '2', '3', '1', '412.0', '200.0', '95.00'],
['2012', '2', '5', '1', '560.0', '335.0', '90.00'],
['2012', '3', '3', '1', '584.0', '205.0', '100.00'],
['2012', '3', '5', '1', '595.0', '162.0', '92.50'],
['2012', '4', '3', '1', '504.0', '227.0', '100.00'],
['2012', '4', '5', '1', '591.0', '264.0', '92.50'],
['2012', '5', '3', '1', '489.0', '234.0', '100.00'],
['2012', '5', '5', '1', '561.0', '292.0', '95.00'],
['2012', '6', '3', '1', '622.0', '221.0', '100.00'],
['2012', '6', '5', '1', '478.0', '204.0', '92.50'],
['2012', '7', '3', '1', '974.0', '290.0', '100.00'],
['2012', '7', '5', '1', '553.0', '269.0', '95.00'],
['2012', '8', '3', '1', '473.0', '158.0', '100.00'],
['2012', '8', '5', '1', '526.0', '174.0', '92.50'],
['2012', '9', '3', '1', '701.0', '189.0', '95.00'],
['2012', '9', '5', '1', '502.0', '179.0', '97.50'],
['2012', '10', '3', '1', '470.0', '184.0', '100.00'],
['2012', '10', '5', '1', '579.0', '218.0', '92.50']]
wine = np.asarray(list1)
clf = linear_model.LinearRegression()
X = wine[:, 4:6]
y = wine[:,-1]
clf.fit( X, y )
print(clf.intercept_,clf.coef_[0], clf.coef_[1])
105.668662639 0.0 0.18516127419
答案 2 :(得分:0)
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
list1 = [['2012', '1', '3', '1', '832.0', '261.0', '100.00'],
['2012', '1', '5', '1', '507.0', '193.0', '92.50'],
['2012', '2', '3', '1', '412.0', '200.0', '95.00'],
['2012', '2', '5', '1', '560.0', '335.0', '90.00'],
['2012', '3', '3', '1', '584.0', '205.0', '100.00'],
['2012', '3', '5', '1', '595.0', '162.0', '92.50'],
['2012', '4', '3', '1', '504.0', '227.0', '100.00'],
['2012', '4', '5', '1', '591.0', '264.0', '92.50'],
['2012', '5', '3', '1', '489.0', '234.0', '100.00'],
['2012', '5', '5', '1', '561.0', '292.0', '95.00'],
['2012', '6', '3', '1', '622.0', '221.0', '100.00'],
['2012', '6', '5', '1', '478.0', '204.0', '92.50'],
['2012', '7', '3', '1', '974.0', '290.0', '100.00'],
['2012', '7', '5', '1', '553.0', '269.0', '95.00'],
['2012', '8', '3', '1', '473.0', '158.0', '100.00'],
['2012', '8', '5', '1', '526.0', '174.0', '92.50'],
['2012', '9', '3', '1', '701.0', '189.0', '95.00'],
['2012', '9', '5', '1', '502.0', '179.0', '97.50'],
['2012', '10', '3', '1', '470.0', '184.0', '100.00'],
['2012', '10', '5', '1', '579.0', '218.0', '92.50']]
def drawabline(xy):
slope, intercept = np.polyfit(xy[:,0], xy[:,1], 1)
print(slope, intercept)
# or this
# slope, intercept, r_value, p_value, stderr = stats.linregress(xy)
# print(slope, intercept)
abline_values = [slope * i + intercept for i in xy[:,0]]
plt.figure()
plt.scatter(xy[:,0], xy[:,1])
plt.plot(xy[:,0], abline_values, 'b')
plt.show()
data = np.asarray(list1, dtype=np.float)
data1 = np.array( data[:,(4,6)])
print( 'data1=', data1 )
drawabline(data1)
data2 = np.array( data[:,(5,6)])
print( 'data2=', data2 )
drawabline(data2)