I’m trying to do a classifier for artwork using a bag of visual words model. However, I keep getting the same prediction for every artwork I feed into it. It seems like it keeps predicting the most repeated class in the training dataset that I gave it. I’m using the training images present in iMet Collection 2019 - FGVC6 | Kaggle and using a custom csv file with each picture’s name and it’s class. This CSV file is heavly unbalanced as two classes (out of nearly a thousand) represent 75% of the dataset.
Here’s a bulk of my code:
from cv2 import cv2 as cv
import numpy as np
import sys
sys.path.extend(['../../'])
from src import utils
if __name__ == '__main__':
DICTIONARY_SIZE = 150
TRAIN_SIZE = 300
TEST_SIZE = 100
SVM_MAX_ITER = 3000
SVM_EPSILON = 1e-6
DETECTOR = cv.SIFT_create()
MATCHER = cv.FlannBasedMatcher()
EXTRACTOR = cv.BOWImgDescriptorExtractor(DETECTOR, MATCHER)
TRAINER = cv.BOWKMeansTrainer(DICTIONARY_SIZE)
SVM = cv.ml.SVM_create()
SVM.setType(cv.ml.SVM_C_SVC)
SVM.setKernel(cv.ml.SVM_LINEAR)
SVM.setTermCriteria((cv.TERM_CRITERIA_MAX_ITER, SVM_MAX_ITER, SVM_EPSILON))
print("Generating Training and Test Sets...")
train_set, test_set = utils.getTrainingAndTestSets('multiclass.csv', TRAIN_SIZE, TEST_SIZE)
print("Generating Dictionary...")
for train_entry in train_set:
img_path = train_entry[0]
img = cv.imread(img_path)
img = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
keypoint, descriptors = DETECTOR.detectAndCompute(img, None)
if descriptors is not None:
TRAINER.add(descriptors)
print("Clustering...")
EXTRACTOR.setVocabulary(TRAINER.cluster())
print("Preparing Training Data...")
train_desc = []
train_labels = []
for train_entry in train_set:
img_path = train_entry[0]
img_label = int(train_entry[1])
img = cv.imread(img_path)
img = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
descriptor = EXTRACTOR.compute(img, DETECTOR.detect(img))
if descriptor is not None:
train_desc.extend(descriptor)
train_labels.append(img_label)
print("Training...")
SVM.train(np.array(train_desc), cv.ml.ROW_SAMPLE, np.array(train_labels))
correct_predictions = 0
samples_tested = len(test_set)
print("Testing...")
for test_entry in test_set:
img_path = test_entry[0]
real_attribute_id = int(test_entry[1])
img = cv.imread(img_path)
img = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
descriptor = EXTRACTOR.compute(img, DETECTOR.detect(img))
try:
_, prediction = SVM.predict(descriptor)
predicted_attribute_id = int(prediction[0][0])
if predicted_attribute_id == real_attribute_id:
print("CORRECT PREDICTION! :)")
correct_predictions += 1
else:
print("INCORRECT PREDICTION... :(")
print("Predicted Label: " + utils.getLabelFromAttributeID(predicted_attribute_id) + "(" + str(predicted_attribute_id) + ")")
print("Real Label: " + utils.getLabelFromAttributeID(real_attribute_id) + "(" + str(real_attribute_id) + ")")
except Exception:
samples_tested -= 1
correct_percentage = (correct_predictions / samples_tested) * 100
print("Test Results: " + "{:.2f}".format(correct_percentage) + "% Correct Predictions.")
Help would greatly be appreciated.