'Keras - Multiclass classification and transfer learning, bad validation and test accuracy

I am building a multiclass classification model that would be able to recognize 4 different insects. I am using Resnet50 (weights = imagenet).

The dataset is small, average 100 photos per class (more than 400 in total)

Depends on model, I usually get val_accuracy more than 90% (epochs 200) and test accuracy around 80-85% but when I print confusion matrix or plot actual and predicted labels for given photos, results are terrible (usually around 25%).

I have tried different models (resnet18, resnet50v2, Xception) I was freezing model layers, tried different data augumentation parameters, different model parameters(such as: Dropout(0.5, 0.2), kernel_regularizer='l2' because I read that helps reducing overfitting).

I think problem is while generating images but I don't know what else to change there, I tried val_generator with shuffle=False/True, train_generator seed=1/off but final results are similar.

I am adding images of confusion matrix, accuracy and plotted photos.

I am using jupyter notebook. Thank you!

directory_train = "keras_splited/train"
directory_test = "keras_splited/test"
directory_val = "keras_splited/val"
    
BATCH_SIZE = 32
IMG_SIZE = 224

def make_DataImageGenerator(validation_split=None):
    image_generator = ImageDataGenerator(
        rescale=(1.0/255),
        rotation_range=40,
        zoom_range=0.1,
        horizontal_flip=True,
        vertical_flip=True,
        validation_split=validation_split
        )

    return image_generator

train_img_generator = make_DataImageGenerator(validation_split=None)
val_img_generator = make_DataImageGenerator(validation_split=None)
test_img_generator = make_DataImageGenerator(validation_split=None)

def get_generator(img_generator, directory, train_valid=None, seed=None, shuffle=True):
    train_generator = img_generator.flow_from_directory(
        directory,
        batch_size=BATCH_SIZE,
        target_size=(IMG_SIZE, IMG_SIZE),
        subset=train_valid,
        seed=seed,
        shuffle=shuffle
    )

    return train_generator

train_generator = get_generator(train_img_generator, directory_train)

val_generator = get_generator(val_img_generator, directory_val)

test_generator = get_generator(test_img_generator, directory_test)

target_labels = next(os.walk(directory_train))[1]
target_labels.sort()

num_classes = len(target_labels)

model_feature_extraction = tf.keras.applications.ResNet50(weights="imagenet", include_top=False, input_shape=(IMG_SIZE, IMG_SIZE, 3))

x = model_feature_extraction.output
x = layers.GlobalAveragePooling2D()(x)
x = Dense(1024, activation="relu")(x)
myModelOut = Dense(4, activation="softmax")(x)

model = Model(inputs=model_feature_extraction.input, outputs=myModelOut)

optimizer = "adam"
loss = "categorical_crossentropy"

def freeze_pretrained_weights(model):
    #model.layers[0].trainable=False #wanted to freeze the model but didn't work good

    model.compile(
        optimizer=optimizer,
        loss=loss,
        metrics=["accuracy"]
    )

    return model

frozen_new_model = freeze_pretrained_weights(model)

my_callbacks = [
    tf.keras.callbacks.ModelCheckpoint("testno/best_model/", save_best_only=True, monitor="accuracy", save_weights_only=False, mode="max"),
    tf.keras.callbacks.ReduceLROnPlateau(monitor="loss", factor=0.2, patience=25, min_lr=0.001)
    ]

def train_model(model, train_gen, valid_gen, epochs):
    train_steps_per_epoch = train_gen.n // train_gen.batch_size

    history = model.fit(
        train_gen,
        steps_per_epoch=train_steps_per_epoch,
        epochs=epochs,
        callbacks=my_callbacks,
        validation_data=valid_gen,
    )

    return history

history_frozen_model = train_model(frozen_new_model, train_generator, val_generator, epochs=150)

plt.figure(figsize=(15,5))
plt.subplot(121)
plt.plot(history_frozen_model.history['accuracy'])
plt.plot(history_frozen_model.history['val_accuracy'])
plt.title('Accuracy vs. epochs')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Training', 'Validation'], loc='lower right')

plt.subplot(122)
plt.plot(history_frozen_model.history['loss'])
plt.plot(history_frozen_model.history['val_loss'])
plt.title('Loss vs. epochs')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Training', 'Validation'], loc='upper right')
plt.show()

test_steps = test_generator.n // test_generator.batch_size
test_generator.reset()

new_model_test_loss, new_model_test_acc = frozen_new_model.evaluate(test_generator)
print('\nTest dataset')
print(f"Loss: {new_model_test_loss}")
print(f"Accuracy: {new_model_test_acc}")

pred = frozen_new_model.predict(test_generator, steps=test_steps, verbose=1)

batch = next(test_generator)
batch_images = np.array(batch[0])
batch_labels = np.array(batch[1])

target_labels = np.asarray(target_labels)
print(target_labels)

plt.figure(figsize=(15,15))
for n, i in enumerate(np.arange(6)):
    actual = target_labels[np.argmax(batch_labels[i])]
    predicted = target_labels[np.argmax(pred[i])]
    confidence = round(100*(np.max(pred[i])),2)

    ax = plt.subplot(3,3,n+1)
    plt.imshow(batch_images[i])
    plt.title(f"Actual: {actual},\n Predicted: {predicted},\n Confidence: {confidence}")
    plt.axis('off')


from sklearn.metrics import ConfusionMatrixDisplay
y_true_lista = []
y_pred_lista = []
for i, img in enumerate(batch_labels):
    y_true = np.argmax(batch_labels[i]).reshape(-1)
    for i in y_true:
        y_true_lista.append(i)

    y_pred = np.argmax(pred[i]).reshape(-1)
    for i in y_pred:
        y_pred_lista.append(i)

print("y_true: ", y_true_lista)
print("y_pred: ", y_pred_lista)
matrix = confusion_matrix(y_true, y_pred)
#print(matrix.shape)

labels = target_labels

cm = confusion_matrix(y_true_lista, y_pred_lista)

disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels)

disp.plot(cmap=plt.cm.Blues, xticks_rotation = 'vertical')

plt.show()

accuracy and loss vs. epochs

confusion matrix that I usually get, sometimes looks different but it's wrong all the time

difference between actual and predicted labels and their confidence

I don't know what to change to get right results when plotting and on the matrix

Can someone point me to the right direction? What did I do wrong with this model or is it something wrong with plotting?



Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source