Training model to detect color instead of shape/features? AKA "Banana ripeness detector"

Hello there! Beginner here!

So long story short I’m doing a program that detects bananas in a picture and tells you how ripe they are. How I want to do that? Well, I want to train a certain model to detect color and then make a prediction. I have a folder called “banana_models” with 3 sub-folders “unripe bananas”, "ripe bananas" and "overripe bananas" with multiple images each.

Then I have a .csv file like this (the numbers after the semicolons are labels)

If you think about it, it’s like a face recognition algorithm you’re training on images (Fisherfacerecognizer, LBPHFaceRecognizer etc) but I’m not looking for facial features (obviously, bananas have no face) but for colors.

I understand that for such a purpose, I should use Ptr (Support Vector Machines). However, I have no idea how to train it in order to take into account only the color of the training images and then do a prediction based on the image I’m giving.

Here is the code. Is SVM the correct approach? If yes, how do I properly train the model to take into account the color? If not, what would be the correct approach?

#include “opencv2/objdetect/objdetect.hpp”
#include “opencv2/highgui/highgui.hpp”
#include “opencv2/imgproc/imgproc.hpp”
#include “opencv2/ml.hpp”
#include
#include <stdio.h>
#include

using namespace std;
using namespace cv;
using namespace ml;

void detectAndDisplayBanana(Mat frame);
/** Global variables */
String banana_cascade_name = “resources//banana_detect_cascade.xml”;
CascadeClassifier banana_cascade;
string window_name = “Capture - Banana Ripe Detection”;

// CSV containing banana sets
static void read_csv(const string filename, vector& images, vector& labels, char separator = ‘;’)
{
ifstream file(filename, ios::in);
if (file)
{
string line, path, classLabel;
while (getline(file, line))
{
stringstream liness(line);
getline(liness, path, separator);
getline(liness, classLabel);
if (!path.empty() && !classLabel.empty())
{
images.push_back(imread(path, ImreadModes::IMREAD_COLOR));
labels.push_back(atoi(classLabel.c_str()));
}
}
}
}
int im_width;
int im_height;
Ptr model;

int main(int argc, const char** argv)
{
VideoCapture capture;
Mat frame;
//-- 1. Load the cascades
if (!banana_cascade.load(banana_cascade_name)) { printf(“–(!)Error loading\n”); return -1; };
string fileName = string(“resources//csv.ext”);

//-- 2. train the Fisher model
vector images;
vector labels;
read_csv(fileName, images, labels);

im_width = images[0].cols;
im_height = images[0].rows;
model = SVM::create();

//// … HOW TO TRAIN ???

//-- 3. Read the image

while (true)
{
frame = imread(“banana_example.jpg”);
//-- 3. Apply the classifier to the frame
if (!frame.empty())
{
detectAndDisplayBanana(frame);
}
else
{
printf(“No banana image!”); break;
}
int c = waitKey(10);
if ((char)c == ‘c’) { break; }
}

return 0;
}

void detectAndDisplayBanana(Mat frame)
{
// This functions detect bananas in a picture

string ripeStage;
std::vector bananas;
Mat frame_gray;
cvtColor(frame, frame_gray, COLOR_BGR2GRAY);
equalizeHist(frame_gray, frame_gray);
//-- Detect bananas

banana_cascade.detectMultiScale(frame_gray, bananas, 1.1, 2, 0 | 1, Size(30, 30));
for (size_t i = 0; i < bananas.size(); i++)
{
Rect banana_i = bananas[i];
Mat banana = frame_gray(banana_i);
Mat banana_resized;
cv:resize(banana, banana_resized, Size(im_width, im_height), 1.0, 1.0, InterpolationFlags::INTER_CUBIC);

  double confidence = 20.00;
  int ripeLabel = model->predict(banana_resized);

  switch (ripeLabel)
  {
  case 1:
  	ripeStage = "Unripe";
  	break;
  case 2:
  	ripeStage = "Ripe";
  	break;
  case 3:
  	ripeStage = "Overripe";
  	break;
  default:
  	ripeStage = "Unknown";
  }



  
  
  string box_text = format("%s %f", ripeStage.c_str());
  cout << box_text << endl;
  int pos_x = std::max(banana_i.x - 7, 0);
  int pos_y = std::max(banana_i.y - 7, 0);
  putText(frame, box_text, Point(pos_x, pos_y), HersheyFonts::FONT_HERSHEY_PLAIN, 1.0, CV_RGB(0, 255, 0), 2.0);

}
//-- Show what you got
imshow(window_name, frame);
}