我正在研究来自 kaggle https://www.kaggle.com/hellbuoy/car-price-prediction 的汽车价格预测。
问题是,当我运行 model.fit()
时,将数据拆分为训练、测试和验证集后,
它一直给我这个ValueError
。
我使用 pd.get_dummies()
对字符串数据进行分类。我发现了一个问题,即数据的较低值没有一些位于数据上部的值。所以我使用 pd.sample
来改变数据的顺序,我必须不断尝试直到形状匹配为止。这是什么原因?我该怎么做才能不使用 pd.sample
?
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tensorflow.keras import layers,models
from tensorflow.keras.utils import to_categorical
cars_data=pd.read_csv('/content/CarPrice_Assignment.csv')
train_data=cars_data.iloc[:103]
train_data=train_data.drop('price',axis=1)
train_data=train_data.drop('CarName',axis=1)
train_data=train_data.drop('car_ID',axis=1)
categorical = train_data.select_dtypes("object").columns
dummy = pd.get_dummies(train_data[categorical],drop_first=True)
train_data.drop(categorical,axis = 1,inplace=True)
train_data = pd.concat([dummy,train_data],axis = 1)
print(train_data.shape)
train_targets=cars_data.price.iloc[:103]
train_targets=np.asarray(train_targets)
test_data=cars_data.iloc[103:166]
test_data=test_data.drop('price',axis=1)
test_data=test_data.drop('CarName',axis=1)
test_data=test_data.drop('car_ID',axis=1)
categorical1 = test_data.select_dtypes("object").columns
dummy1 = pd.get_dummies(test_data[categorical1],drop_first=True)
test_data.drop(categorical1,axis = 1,inplace=True)
test_data = pd.concat([dummy1,test_data],axis = 1)
test_targets=cars_data.price.iloc[103:166]
test_targets=np.asarray(test_targets)
val_data=cars_data.iloc[166:]
val_data=val_data.drop('price',axis=1)
val_data=val_data.drop('CarName',axis=1)
val_data=val_data.drop('car_ID',axis=1)
categorical2=val_data.select_dtypes('object').columns
dummy2=pd.get_dummies(val_data[categorical2],drop_first=True)
val_data.drop(val_data[categorical2],axis=1,inplace=True)
val_data=pd.concat([dummy2,val_data],axis=1)
val_targets=cars_data.price.iloc[166:]
val_targets=np.asarray(val_targets)
print(val_data.shape)
model=models.Sequential()
model.add(layers.Dense(256,activation='relu',input_shape=(40,)))
model.add(layers.Dense(128,activation='relu'))
model.add(layers.Dense(32,activation='relu'))
model.add(layers.Dense(1))
model.compile(optimizer='rmsprop',loss='mse',metrics=['mae'])
model.fit(train_data,train_targets,epochs=400,batch_size=1,validation_data=(val_data,val_targets))