'Keras - Multiclass classification and transfer learning, bad validation and test accuracy
I am building a multiclass classification model that would be able to recognize 4 different insects. I am using Resnet50 (weights = imagenet).
The dataset is small, average 100 photos per class (more than 400 in total)
Depends on model, I usually get val_accuracy more than 90% (epochs 200) and test accuracy around 80-85% but when I print confusion matrix or plot actual and predicted labels for given photos, results are terrible (usually around 25%).
I have tried different models (resnet18, resnet50v2, Xception) I was freezing model layers, tried different data augumentation parameters, different model parameters(such as: Dropout(0.5, 0.2), kernel_regularizer='l2' because I read that helps reducing overfitting).
I think problem is while generating images but I don't know what else to change there, I tried val_generator with shuffle=False/True, train_generator seed=1/off but final results are similar.
I am adding images of confusion matrix, accuracy and plotted photos.
I am using jupyter notebook. Thank you!
directory_train = "keras_splited/train"
directory_test = "keras_splited/test"
directory_val = "keras_splited/val"
BATCH_SIZE = 32
IMG_SIZE = 224
def make_DataImageGenerator(validation_split=None):
image_generator = ImageDataGenerator(
rescale=(1.0/255),
rotation_range=40,
zoom_range=0.1,
horizontal_flip=True,
vertical_flip=True,
validation_split=validation_split
)
return image_generator
train_img_generator = make_DataImageGenerator(validation_split=None)
val_img_generator = make_DataImageGenerator(validation_split=None)
test_img_generator = make_DataImageGenerator(validation_split=None)
def get_generator(img_generator, directory, train_valid=None, seed=None, shuffle=True):
train_generator = img_generator.flow_from_directory(
directory,
batch_size=BATCH_SIZE,
target_size=(IMG_SIZE, IMG_SIZE),
subset=train_valid,
seed=seed,
shuffle=shuffle
)
return train_generator
train_generator = get_generator(train_img_generator, directory_train)
val_generator = get_generator(val_img_generator, directory_val)
test_generator = get_generator(test_img_generator, directory_test)
target_labels = next(os.walk(directory_train))[1]
target_labels.sort()
num_classes = len(target_labels)
model_feature_extraction = tf.keras.applications.ResNet50(weights="imagenet", include_top=False, input_shape=(IMG_SIZE, IMG_SIZE, 3))
x = model_feature_extraction.output
x = layers.GlobalAveragePooling2D()(x)
x = Dense(1024, activation="relu")(x)
myModelOut = Dense(4, activation="softmax")(x)
model = Model(inputs=model_feature_extraction.input, outputs=myModelOut)
optimizer = "adam"
loss = "categorical_crossentropy"
def freeze_pretrained_weights(model):
#model.layers[0].trainable=False #wanted to freeze the model but didn't work good
model.compile(
optimizer=optimizer,
loss=loss,
metrics=["accuracy"]
)
return model
frozen_new_model = freeze_pretrained_weights(model)
my_callbacks = [
tf.keras.callbacks.ModelCheckpoint("testno/best_model/", save_best_only=True, monitor="accuracy", save_weights_only=False, mode="max"),
tf.keras.callbacks.ReduceLROnPlateau(monitor="loss", factor=0.2, patience=25, min_lr=0.001)
]
def train_model(model, train_gen, valid_gen, epochs):
train_steps_per_epoch = train_gen.n // train_gen.batch_size
history = model.fit(
train_gen,
steps_per_epoch=train_steps_per_epoch,
epochs=epochs,
callbacks=my_callbacks,
validation_data=valid_gen,
)
return history
history_frozen_model = train_model(frozen_new_model, train_generator, val_generator, epochs=150)
plt.figure(figsize=(15,5))
plt.subplot(121)
plt.plot(history_frozen_model.history['accuracy'])
plt.plot(history_frozen_model.history['val_accuracy'])
plt.title('Accuracy vs. epochs')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Training', 'Validation'], loc='lower right')
plt.subplot(122)
plt.plot(history_frozen_model.history['loss'])
plt.plot(history_frozen_model.history['val_loss'])
plt.title('Loss vs. epochs')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Training', 'Validation'], loc='upper right')
plt.show()
test_steps = test_generator.n // test_generator.batch_size
test_generator.reset()
new_model_test_loss, new_model_test_acc = frozen_new_model.evaluate(test_generator)
print('\nTest dataset')
print(f"Loss: {new_model_test_loss}")
print(f"Accuracy: {new_model_test_acc}")
pred = frozen_new_model.predict(test_generator, steps=test_steps, verbose=1)
batch = next(test_generator)
batch_images = np.array(batch[0])
batch_labels = np.array(batch[1])
target_labels = np.asarray(target_labels)
print(target_labels)
plt.figure(figsize=(15,15))
for n, i in enumerate(np.arange(6)):
actual = target_labels[np.argmax(batch_labels[i])]
predicted = target_labels[np.argmax(pred[i])]
confidence = round(100*(np.max(pred[i])),2)
ax = plt.subplot(3,3,n+1)
plt.imshow(batch_images[i])
plt.title(f"Actual: {actual},\n Predicted: {predicted},\n Confidence: {confidence}")
plt.axis('off')
from sklearn.metrics import ConfusionMatrixDisplay
y_true_lista = []
y_pred_lista = []
for i, img in enumerate(batch_labels):
y_true = np.argmax(batch_labels[i]).reshape(-1)
for i in y_true:
y_true_lista.append(i)
y_pred = np.argmax(pred[i]).reshape(-1)
for i in y_pred:
y_pred_lista.append(i)
print("y_true: ", y_true_lista)
print("y_pred: ", y_pred_lista)
matrix = confusion_matrix(y_true, y_pred)
#print(matrix.shape)
labels = target_labels
cm = confusion_matrix(y_true_lista, y_pred_lista)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels)
disp.plot(cmap=plt.cm.Blues, xticks_rotation = 'vertical')
plt.show()
I don't know what to change to get right results when plotting and on the matrix
Can someone point me to the right direction? What did I do wrong with this model or is it something wrong with plotting?
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|



