重塑两个数组以进行回归时遇到问题

时间:2019-07-02 17:31:13

标签: python arrays numpy

我很难理解数组中哪些维度错误导致其无法正确运行回归模型。

这是到目前为止我尝试过的代码。

x = df_new[["Orig. X", "Orig. Y", "Orig Z", "x (Inches)", "y (Inches)", 
       "z (Inches)", "Volume (orig. units)"]]
y = df_new["runtime (min)"]

x, y = np.array(X), np.array(y)
print ('Fitting model...')
model = build_model()
x, y = make_regression(n_features=7, n_targets=1)
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.2)
print(x.shape)
print(y.shape)
model.fit(x,y)
score = model.score(X_test, y_test)
print('Score:', score)

我如何处理x和y以使回归模型顺利运行? x和y均为100行。 X有7列,而Y当然只有1列。

编辑:下面的build_model()代码

def build_model():
    ridge_transformer = Pipeline(steps=[
        ('scaler', StandardScaler()),
        ('poly_feats', PolynomialFeatures()),
        ('ridge', RidgeTransformer())
    ])

    pred_union = FeatureUnion(
        transformer_list=[
            ('ridge', ridge_transformer),
            ('rand_forest', RandomForestTransformer()),
            ('knn', KNeighborsTransformer())
        ],
        n_jobs=2
    )

    model = Pipeline(steps=[
        ('pred_union', pred_union),
        ('lin_regr', LinearRegression())
    ])

    return model 

编辑:以下回溯错误

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-127-775526fc779a> in <module>
     25 print(x.shape)
     26 print(y.shape)
---> 27 model.fit(x,y)
     28 
     29 score = model.score(X_test, y_test)

~\AppData\Local\Continuum\anaconda3\lib\site-packages\sklearn\pipeline.py in fit(self, X, y, **fit_params)
    265         Xt, fit_params = self._fit(X, y, **fit_params)
    266         if self._final_estimator is not None:
--> 267             self._final_estimator.fit(Xt, y, **fit_params)
    268         return self
    269 

~\AppData\Local\Continuum\anaconda3\lib\site-packages\sklearn\linear_model\base.py in fit(self, X, y, sample_weight)
    456         n_jobs_ = self.n_jobs
    457         X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'],
--> 458                          y_numeric=True, multi_output=True)
    459 
    460         if sample_weight is not None and np.atleast_1d(sample_weight).ndim > 1:

~\AppData\Local\Continuum\anaconda3\lib\site-packages\sklearn\utils\validation.py in check_X_y(X, y, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, warn_on_dtype, estimator)
    754                     ensure_min_features=ensure_min_features,
    755                     warn_on_dtype=warn_on_dtype,
--> 756                     estimator=estimator)
    757     if multi_output:
    758         y = check_array(y, 'csr', force_all_finite=True, ensure_2d=False,

~\AppData\Local\Continuum\anaconda3\lib\site-packages\sklearn\utils\validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)
    550                     "Reshape your data either using array.reshape(-1, 1) if "
    551                     "your data has a single feature or array.reshape(1, -1) "
--> 552                     "if it contains a single sample.".format(array))
    553 
    554         # in the future np.flexible dtypes will be handled like object dtypes

ValueError: Expected 2D array, got 1D array instead:
array=[  28.51328986   15.04637032   93.37898607  -98.63740483  105.3785245
  -46.01294375 -110.93503958 -144.79637899  -59.35923549   62.36778776
  -88.85236254   93.0756314   101.64029368  218.01391173  101.26137798
   54.72891455 -133.71211185 -125.99472368   29.94269368   71.59822427
  212.59784734  -33.55787108  184.89287736 -232.48041924  135.26734855
  -12.12651132 -172.7038076   -97.8749558   314.46956576  146.3874269
  102.80820968   76.93644443   16.96718666  134.04031743 -102.1777084
 -253.68761765  180.2381539    85.20069473 -140.62502367   47.03879138
  156.93511131   67.7676657    -9.04138323  123.93189227  174.12355414
  -56.19912667   61.63232531    2.04173984   34.86750124   -5.96328448
 -126.81900755 -174.96317239  111.01260932  -22.89424944   15.59595122
  280.37346561 -101.39356531 -176.98811589   88.49720305  -39.27647122
  -25.98321465  -33.27364379  -60.42319159  -48.1694774   -32.0730914
  138.79010141  -11.27634536  -74.92271316  -86.59070448  118.17802672
   64.50120432  -28.88942322  177.00001615  -75.84108743  -58.35393161
 -144.03754366  274.49491635  116.30453855 -123.67954762  -30.89047884
  -23.5174034    -1.00726339   -1.88196999  166.40349424 -137.95350454
   71.25835091  -64.09838143  -31.63507257  133.10292084  -67.9354037
   63.85237459  142.25572131 -108.63072303   -5.7313783   -50.98668871
   59.25002692  -13.43424531  -17.82269722  -45.83104936 -148.90728362
    1.50193106   17.87438824   81.92662239  -83.19388204   86.83775258
  -46.85608104  -84.17690659  -94.8195309   -42.46441727   48.86628343
  -86.59104641   98.88486117   65.52270744  190.27987169   86.45217583
   31.46603065 -116.71038463 -117.22039948   55.95260753   60.34723681
  136.14328873  -66.13663513  170.37393394 -234.74000246  142.4215913
  -15.46199011 -143.03607653 -106.87052936  239.01861784  141.20861316
   42.36405844   70.85384876    2.41030437  125.38478863  -49.55470155
 -175.47492103  146.60430159   83.0396612  -117.86866863   22.03673312
  111.04275639   68.28721289  -30.41667672   92.39030512  156.57403451
  -39.37562231   79.81213791   -7.6069786    39.83725999   12.14887626
  -68.94858082 -142.67522236  115.35903901  -29.70325292   27.43727563
  197.58564756  -93.72749402 -129.75046815   46.87218775  -28.13066948
  -32.2957201   -32.28477183  -68.89313453  -45.34605254   12.35023878
   99.43077668   -4.66376816  -81.9798013   -52.13944475   89.84264889
   25.88417774  -63.57461905  192.73937579  -43.27281399  -36.70804327
 -140.76773381  242.04620261  115.63767405 -101.22487977  -62.77992477
  -44.18638662    2.16920856  -24.62620964  162.50686903 -127.07490443
   31.10421595  -57.55304524  -74.66626193  131.84190948  -73.94748475
   59.1205745   146.03675545 -109.33328323  -27.49279807   19.15473012
   62.80408203   33.36949411   -1.9273022   -51.49779114 -111.2861183
   34.06674269   18.05670255   46.43729984 -140.21078294   64.59153281
   11.74490721  -57.39114509 -114.55901385  -13.53130554  -13.82323384
   -0.49549397   53.19906766   90.57809032  151.70561796  122.69049054
   72.98809521 -154.14476693  -74.05733629   32.41234415   50.97790246
  116.93388344  -63.94235318  133.64933024 -139.94909392  109.72026197
   11.56169254 -138.87962453  -59.10262835  174.53934447  156.64912468
   65.03610007   76.4964515    22.04764677  115.84366991  -46.86259747
 -161.7681963   158.8401824     6.38833818 -105.66046837   30.75080677
  109.00053199   49.33338858  -13.05038717  111.00662647  122.23128993
  -40.06503916   71.0503438    -8.47825212   -3.03806969  -11.35250807
  -90.35772487  -86.08357992  131.31669463   -7.10243146   65.56581852
  177.63529127  -20.92617293 -161.7681963    55.26073005   30.13803328
   -8.53039856   -2.46567779  -73.25040534  -21.31245277  -34.32898768
   67.07418455   -5.87635605  -70.95576416  -78.20779048  111.04994216
   -4.35860823    6.44163718  141.88283904  -55.03298105  -59.06454078
  -56.6655874   152.89861136   88.89354733  -32.13338662   10.56350227
  -36.24515621   15.19991518   27.09160819  122.94643958  -99.11494519
   89.08121089  -50.14558081  -22.37651747  141.88283904  -39.79718785
   55.29960662  125.56984308  -74.99872018  -20.25735477  -42.91528289
   51.09138403   -8.37011402    6.79676896  -49.05931304 -117.99910408].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.

0 个答案:

没有答案