Question

我正在尝试使用training_label.CSV和两个文件夹（该模型的（测试/培训）文件夹）来识别语音声谱图中的重音。

我正在运行模型，但是遇到错误：

ValueError: Error when checking input: expected input_2 to have shape (128, 173, 1) but got array with shape (128, 173, 3)

我不确定要进行哪些更改才能使其正常工作？

# Importing the Keras libraries and packages

import pandas as pd
import numpy as np
from skimage.io import imread
from sklearn.model_selection import train_test_split
from matplotlib import pyplot as plt
%matplotlib inline

import keras
from keras.models import Sequential
from keras import optimizers
from keras.utils import to_categorical
from keras.layers import Input, Dense, Flatten, Dropout, Conv2D, MaxPooling2D, GlobalAveragePooling2D, Activation, concatenate
from keras.layers.advanced_activations import LeakyReLU
from keras.layers.normalization import BatchNormalization
from keras.models import Model
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from keras.preprocessing import image

# Initialising the CNN
classifier = Sequential()

# Step 1 - Convolution
classifier.add(Conv2D(32, (3, 3), input_shape = (128, 173, 1), activation = 'relu'))

# Step 2 - Pooling
classifier.add(MaxPooling2D(pool_size = (2, 2)))

# Adding a second convolutional layer
classifier.add(Conv2D(32, (3, 3), activation = 'relu'))
classifier.add(MaxPooling2D(pool_size = (2, 2)))

# Step 3 - Flattening
classifier.add(Flatten())

# Step 4 - Full connection
classifier.add(Dense(units = 128, activation = 'relu'))
classifier.add(Dense(units = 1, activation = 'sigmoid'))

# Compiling the CNN
classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])


#Read Data
train_df = pd.read_csv('dataset/training_set_labels.csv', index_col=0)
train_df['Canada_file'] = train_df.index.map(lambda id: f'dataset/training/{id}.png')
train_df['India_file'] = train_df.index.map(lambda id: f'dataset/training/{id}.png')
train_df['England_file'] = train_df.index.map(lambda id: f'dataset/training/{id}.png')
print(train_df.head())


#Read Spectrograms
def read_spectograms(file_paths, img_rows, img_cols, as_gray, channels):
  """
  Reads the spectogram files from disk and normalizes the pixel values
    @params:
      file_paths - Array of file paths to read from
      img_rows - The image height.
      img_cols - The image width.
      as_grey - Read the image as Greyscale or RGB.
      channels - Number of channels.
    @returns:
      The created and compiled model (Model)        
  """
  images = []

  for file_path in file_paths:
    images.append(imread(file_path, as_grey = as_gray))

  images = np.asarray(images, dtype=np.float32)

  # normalize
  images = images / np.max(images)

  # reshape to match Keras expectaions
  images = images.reshape(images.shape[0], img_rows, img_cols, channels)

  return images

#Parameters
as_gray = True
in_channel = 4

if as_gray:
  in_channel = 1

img_rows, img_cols = 128, 173
num_classes = 3 # number of accents

batch_size = 32
epochs = 1
input_shape = (img_rows, img_cols, in_channel)
input_img = Input(shape = input_shape)

#Accent Files
x_train_Canada = read_spectograms(train_df.Canada_file.values, img_rows, img_cols, as_gray, in_channel)
x_train_India = read_spectograms(train_df.India_file.values, img_rows, img_cols, as_gray, in_channel)
x_train_England = read_spectograms(train_df.England_file.values, img_rows, img_cols, as_gray, in_channel)

#Labels
labels = train_df.Accent.values

# convert class vectors to binary class matrices One Hot Encoding
labels = keras.utils.to_categorical(labels, num_classes)


#Show Data
Accent = [
    'Canada', 
    'India', 
    'England']

# pick a random index from the list
rn_Accent = np.random.choice(train_df.Accent.values)
rn_label = train_df.Accent.values[rn_Accent]
rn_Canada = x_train_Canada[rn_Accent]
rn_India = x_train_India[rn_Accent]
rn_England = x_train_England[rn_Accent]

plt.figure()
plt.axis('off')

plt.suptitle(f"{Accent[rn_label]} (Label: {rn_label})", fontsize="x-large")

plt.subplot(121)
curr_img = None
if as_gray:
  curr_img = np.reshape(rn_Canada, (img_rows, img_cols))
else:
  curr_img = np.reshape(rn_Canada, (img_rows, img_cols, in_channel))

plt.imshow(curr_img, cmap='gray')
plt.title("Canada")
plt.xticks([])
plt.yticks([])

plt.subplot(122)
if as_gray:
  curr_img = np.reshape(rn_India, (img_rows, img_cols))
else:
  curr_img = np.reshape(rn_India, (img_rows, img_cols, in_channel))

plt.imshow(curr_img, cmap='gray')
plt.title("India")
plt.xticks([])
plt.yticks([])

plt.subplot(123)
if as_gray:
  curr_img = np.reshape(rn_England, (img_rows, img_cols))
else:
  curr_img = np.reshape(rn_England, (img_rows, img_cols, in_channel))

plt.imshow(curr_img, cmap='gray')
plt.title("England")
plt.xticks([])
plt.yticks([])

plt.show()



#Split in Train and Test Batches
x_train_comp = np.stack((x_train_Canada, x_train_India, x_train_England), axis=4)

x_train, x_test, y_train, y_test = train_test_split(x_train_comp, labels, test_size = 0.3, random_state=666)


# take them apart
x_train_Canada = x_train[:,:,:,0]
x_test_Canada = x_test[:,:,:,0]

x_train_India = x_train[:,:,:,0]
x_test_India = x_test[:,:,:,0]

x_train_England = x_train[:,:,:,0]
x_test_England = x_test[:,:,:,0]

#Create the Model
def create_convolution_layers(input_img):
  model = Conv2D(32, (3, 3), padding='same', input_shape=input_shape)(input_img)
  model = LeakyReLU(alpha=0.1)(model)
  model = MaxPooling2D((2, 2),padding='same')(model)
  model = Dropout(0.25)(model)

  model = Conv2D(64, (3, 3), padding='same')(model)
  model = LeakyReLU(alpha=0.1)(model)
  model = MaxPooling2D(pool_size=(2, 2),padding='same')(model)
  model = Dropout(0.25)(model)

  model = Conv2D(128, (3, 3), padding='same')(model)
  model = LeakyReLU(alpha=0.1)(model)
  model = MaxPooling2D(pool_size=(2, 2),padding='same')(model)
  model = Dropout(0.4)(model)

  return model

Canada_input = Input(shape=input_shape)
Canada_model = create_convolution_layers(Canada_input)

India_input = Input(shape=input_shape)
India_model = create_convolution_layers(India_input)

England_input = Input(shape=input_shape)
England_model = create_convolution_layers(England_input)

conv = concatenate([Canada_model, India_model, England_model])

conv = Flatten()(conv)

dense = Dense(512)(conv)
dense = LeakyReLU(alpha=0.1)(dense)
dense = Dropout(0.5)(dense)

output = Dense(num_classes, activation='softmax')(dense)

model = Model(inputs=[Canada_input, India_input, England_input], outputs=[output])

opt = optimizers.Adam()

model.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

print(model.summary())

#Train the Model
best_weights_file="weights.best.hdf5"
checkpoint = ModelCheckpoint(best_weights_file, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
#lr_reducer = ReduceLROnPlateau(verbose=1)

callbacks = [checkpoint]

model.fit([x_train_Canada, x_train_India, x_train_England], y_train,
          batch_size=batch_size,
          epochs=epochs,
          callbacks=callbacks,
          verbose=1,
          validation_data=([x_test_Canada, x_test_India, x_test_England], y_test),
          shuffle=True)


#Basic Evaluation
# load weights
#model.load_weights(best_weights_file)
#final_loss, final_acc = model.evaluate([x_test_accent, x_test_accent], y_test, verbose=1)
#print("Final loss: {0:.6f}, final accuracy: {1:.6f}".format(final_loss, final_acc))

#Predict
predict_df = pd.read_csv('dataset/submission_format.csv', index_col=0)
predict_df['Canada_file'] = predict_df.index.map(lambda id: f'dataset/testing/{id}.png')
x_test_accent = read_spectograms(predict_df.Canada_file.values, img_rows, img_cols, as_gray, in_channel)



#Get the predictions for the test data
predicted_classes = model.predict([x_test_Canada, x_test_India, x_test_England])
predict_df.Accent = np.argmax(predicted_classes,axis=1)
#predict_df = predict_df.drop(['Accent_file''], axis=1)
predict_df.to_csv('submission.csv')
files.download('submission.csv')

我不确定为什么会出现此错误。我对Python非常陌生，这是我正在做的Microsoft课程的一部分。这是link to the course

我想要达到的最终结果是使用模型的Testing文件夹中的数据生成Submission_Format.csv，其中模型已经预测了每个频谱图.png文件的每个重音。

Answer 1

您只需要将输入形状更改为（128，173，3），因为您正在处理包含3个不同颜色通道的图像

ValueError：检查输入时出错：预期input_2具有形状（128，173，1）但具有形状（128，173，3）的数组

1 个答案: