ValueError:x和y必须具有相同的第一个维度,但具有不同的形状

时间:2017-10-12 10:51:59

标签: python machine-learning scikit-learn

import urllib.request  
from math import sqrt, fabs, exp
import matplotlib.pyplot as plot
from sklearn.linear_model import enet_path
from sklearn.metrics import roc_auc_score, roc_curve
import numpy

target_url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/undocumented/connectionist-bench/sonar/sonar.all-data'
data = urllib.request.urlopen(target_url)

xList = []
for line in data:
    #split on comma
    row = line.strip().split(",".encode(encoding='utf-8'))  
    xList.append(row)

xNum = []
labels = []

for row in xList:
    lastCol = row.pop()
    if lastCol == b'M':
        labels.append(1.0)
    else:
        labels.append(0.0)
    attrRow = [float(elt) for elt in row]
    xNum.append(attrRow)

nrow = len(xNum)
ncol = len(xNum[1])

alpha = 1.0

xMeans = []
xSD = []
for i in range(ncol):
    col = [xNum[j][i] for j in range(nrow)]
    mean = sum(col)/nrow
    xMeans.append(mean)
    colDiff = [(xNum[j][i] - mean) for j in range(nrow)]
    sumSq = sum([colDiff[i] * colDiff[i] for i in range(nrow)])
    stdDev = sqrt(sumSq/nrow)
    xSD.append(stdDev)

xNormalized = []
for i in range(nrow):
    rowNormalized = [(xNum[i][j] - xMeans[j])/xSD[j] for j in range(ncol)]
    xNormalized.append(rowNormalized)

meanLabel = sum(labels)/nrow
sdLabel = sqrt(sum([(labels[i] - meanLabel) * (labels[i] - meanLabel) for i in range (nrow)])/nrow)



labelNormalized = [(labels[i] - meanLabel)/sdLabel for i in range(nrow)]

nxval = 10
for ixval in range(nxval):
    idxTest = [a for a in range (nrow) if a%nxval == ixval]
    idxTrain = [a for a in range(nrow) if a%nxval != ixval]

    xTrain = numpy.array([xNormalized[r] for r in idxTrain])
    xTest = numpy.array([xNormalized[r] for r in idxTest])
    labelTrain = numpy.array([labelNormalized[r] for r in idxTrain])
    labelTest = numpy.array([labelNormalized[r] for r in idxTest])
    alphas, coefs, _ = enet_path(xTrain, labelTrain, l1_ratio = 0.8, fit_intercept=False, return_models=False)


    if ixval == 0:
        pred = numpy.dot(xTest, coefs)
        yOut = labelTest

    else:
        yTemp = numpy.array(yOut)
        yOut = numpy.concatenate((yTemp, labelTest), axis = 0)


    predTemp = numpy.array(pred)
    pred = numpy.concatenate((predTemp, numpy.dot(xTest, coefs)), axis = 0)


misClassRate = []
_,nPred = pred.shape
for iPred in range(1, nPred):
    predList = list(pred[:, iPred])
    errCnt = 0.0
    for irow in range(nrow):
        if (predList[irow] < 0.0) and (yOut[irow] >= 0.0):
            errCnt += 1.0
        elif (predList[irow] >= 0.0) and (yOut[irow] < 0.0):
            errCnt += 1.0

    misClassRate.append(errCnt/nrow)

    minError = min(misClassRate)
    idxMin = misClassRate.index(minError)
    plotAlphas = numpy.array(alphas[1:len(alphas)])
    misClassRate_np = numpy.array(misClassRate)

    plot.figure()
    plot.plot(plotAlphas, misClassRate_np, label='Misclassification Error Across Folds', linewidth=2)
    plot.axvline(plotAlphas[idxMin], linestyle='--', label='CV Estimate of Best alpha')
    plot.legend()
    plot.semilogx()
    ax = plot.gca()
    ax.invert_xaxis()
    plot.xlabel('alpha')
    plot.ylabel('Misclassification Error')
    plot.axis('tight')
    plot.show()

当我执行上面的代码时,它返回:ValueError:x和y必须具有相同的第一个维度,但是具有形状(99,)和(1,)。

似乎问题是由于x和y的长度不等。

然后我检查了plotAlphasmisClassRate_np,它们显示的长度相同。此外,它们都已更改为阵列但仍无法解决问题。无法弄清楚发生了什么。

0 个答案:

没有答案