I’m trying to do a classifier for artwork using a bag of visual words model. However, I keep getting the same prediction for every artwork I feed into it. It seems like it keeps predicting the most repeated class in the training dataset that I gave it. I’m using the training images present in iMet Collection 2019 - FGVC6 | Kaggle and using a custom csv file with each picture’s name and it’s class. This CSV file is heavly unbalanced as two classes (out of nearly a thousand) represent 75% of the dataset.
Here’s a bulk of my code:
from cv2 import cv2 as cv
import numpy as np
import sys
sys.path.extend(['../../'])
from src import utils
if __name__ == '__main__':
    DICTIONARY_SIZE = 150
    TRAIN_SIZE = 300
    TEST_SIZE = 100
    SVM_MAX_ITER = 3000
    SVM_EPSILON = 1e-6
    DETECTOR = cv.SIFT_create()
    MATCHER = cv.FlannBasedMatcher()
    EXTRACTOR = cv.BOWImgDescriptorExtractor(DETECTOR, MATCHER)
    TRAINER = cv.BOWKMeansTrainer(DICTIONARY_SIZE)
    SVM = cv.ml.SVM_create()
    SVM.setType(cv.ml.SVM_C_SVC)
    SVM.setKernel(cv.ml.SVM_LINEAR)
    SVM.setTermCriteria((cv.TERM_CRITERIA_MAX_ITER, SVM_MAX_ITER, SVM_EPSILON))
    print("Generating Training and Test Sets...")
    train_set, test_set = utils.getTrainingAndTestSets('multiclass.csv', TRAIN_SIZE, TEST_SIZE)
    print("Generating Dictionary...")
    for train_entry in train_set:
        img_path = train_entry[0]
        img = cv.imread(img_path)
        img = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
        keypoint, descriptors = DETECTOR.detectAndCompute(img, None)
        if descriptors is not None:
            TRAINER.add(descriptors)
    print("Clustering...")
    EXTRACTOR.setVocabulary(TRAINER.cluster())
    print("Preparing Training Data...")
    train_desc = []
    train_labels = []
    for train_entry in train_set:
        img_path = train_entry[0]
        img_label = int(train_entry[1])
        img = cv.imread(img_path)
        img = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
        descriptor = EXTRACTOR.compute(img, DETECTOR.detect(img))
        if descriptor is not None:
            train_desc.extend(descriptor)
            train_labels.append(img_label)
    print("Training...")
    SVM.train(np.array(train_desc), cv.ml.ROW_SAMPLE, np.array(train_labels))
    correct_predictions = 0
    samples_tested = len(test_set)
    print("Testing...")
    for test_entry in test_set:
        img_path = test_entry[0]
        real_attribute_id = int(test_entry[1])
        img = cv.imread(img_path)
        img = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
        descriptor = EXTRACTOR.compute(img, DETECTOR.detect(img))
        try:
            _, prediction = SVM.predict(descriptor)
            predicted_attribute_id = int(prediction[0][0])
            if predicted_attribute_id == real_attribute_id:
                print("CORRECT PREDICTION! :)")
                correct_predictions += 1
            else:
                print("INCORRECT PREDICTION... :(")
            print("Predicted Label: " + utils.getLabelFromAttributeID(predicted_attribute_id) + "(" + str(predicted_attribute_id) + ")")
            print("Real Label: " + utils.getLabelFromAttributeID(real_attribute_id) + "(" + str(real_attribute_id) + ")")
        except Exception:
            samples_tested -= 1
    correct_percentage = (correct_predictions / samples_tested) * 100
    print("Test Results: " + "{:.2f}".format(correct_percentage) + "% Correct Predictions.")
Help would greatly be appreciated.