我在python中有以下代码,在其中尝试实现线性回归。我尝试通过具有功能computeCost来计算成本,作为第一步。这是向量化的实现,因此我尝试通过定义矩阵X,y和theta来实现成本函数的公式。
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.preprocessing import MinMaxScaler
def loadData():
dataset = pd.read_csv('datasets/Folds5x2_pp.csv')
X = dataset[['AT','V','AP','RH']]
y = dataset['PE']
#print(X)
scaler = MinMaxScaler()
scaler.fit(X)
X_nor = scaler.transform(X)
ones = np.ones([X.shape[0],1])
X_nor = np.hstack((ones,X_nor))
#print(X_nor)
y_nor = (y - min(y)) / (max(y) - min(y))
return X_nor, y_nor
def computeCost(X,y,theta):
tobesummed = np.power(((X @ theta.T)-y),2)
return np.sum(tobesummed)/(2 * len(X))
if __name__ == "__main__":
X,y = loadData()
theta = np.ones([1,5])
alpha = 0.01
iters = 1000
computeCost(X,y,theta)
当我运行computeCost时,出现以下错误,该数据必须是一维的。我不确定1维是什么意思。
Exception Traceback (most recent call last)
<ipython-input-64-7bd696e0edc3> in <module>
32 iters = 1000
33
---> 34 computeCost(X,y,theta)
35
36
<ipython-input-64-7bd696e0edc3> in computeCost(X, y, theta)
21
22 def computeCost(X,y,theta):
---> 23 tobesummed = np.power(((X @ theta.T)-y),2)
24 return np.sum(tobesummed)/(2 * len(X))
25
~\Anaconda3\lib\site-packages\pandas\core\ops.py in wrapper(left, right)
1583 result = safe_na_op(lvalues, rvalues)
1584 return construct_result(left, result,
-> 1585 index=left.index, name=res_name, dtype=None)
1586
1587 wrapper.__name__ = op_name
~\Anaconda3\lib\site-packages\pandas\core\ops.py in _construct_result(left, result, index, name,
dtype)
1472 not be enough; we still need to override the name attribute.
1473 """
-> 1474 out = left._constructor(result, index=index, dtype=dtype)
1475
1476 out.name = name
~\Anaconda3\lib\site-packages\pandas\core\series.py in __init__(self, data, index, dtype, name,
copy, fastpath)
260 else:
261 data = sanitize_array(data, index, dtype, copy,
--> 262 raise_cast_failure=True)
263
264 data = SingleBlockManager(data, index, fastpath=True)
~\Anaconda3\lib\site-packages\pandas\core\internals\construction.py in sanitize_array(data,
index, dtype, copy, raise_cast_failure)
656 elif subarr.ndim > 1:
657 if isinstance(data, np.ndarray):
--> 658 raise Exception('Data must be 1-dimensional')
659 else:
660 subarr = com.asarray_tuplesafe(data, dtype=dtype)
Exception: Data must be 1-dimensional