Error: Number of input channels should be multiple of 3 but got 1 in function 'cv::dnn::ConvolutionLayerImpl::getMemoryShapes'

cashcoffee · May 16, 2022, 11:59am

Hi!
I recently trained a model on Keras and wanted to run it on OpenCV, so I used the ReadNetFromTensorflow function. I made model predictions using the blobFromImage() and .forward() methods. Yet I keep getting this error:

`error: OpenCV(4.5.3) Error: Number of input channels should be multiple of 3 but got 1 in function ‘cv::dnn::ConvolutionLayerImpl::getMemoryShapes’

Here is the code used to train the model:

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
model = tf.keras.models.Sequential([
    
    tf.keras.layers.Conv2D(32, (3,3), activation='relu', input_shape=(150, 150, 3)),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    
    tf.keras.layers.Conv2D(128, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    
    tf.keras.layers.Conv2D(128, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(512, activation='relu'),
    
    tf.keras.layers.Dense(29, activation='softmax')
])

model.summary()
model.compile(loss = 'categorical_crossentropy',
              optimizer = 'adam',
              metrics = ['accuracy'])

class myCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
        if(logs.get('accuracy')>0.95):
            print("\nReached >95% accuracy so cancelling training!")
            self.model.stop_training = True
        
callbacks = myCallback()
train_datagen = ImageDataGenerator(
      rescale=1./255,
      rotation_range=40,
      width_shift_range=0.2, # Shifting image width by 20%
      height_shift_range=0.2,# Shifting image height by 20%
      shear_range=0.2,       # Shearing across X-axis by 20%
      zoom_range=0.2,        # Image zooming by 20%
      horizontal_flip=True,
      fill_mode='nearest')

train_generator = train_datagen.flow_from_directory(
    "/content/drive/MyDrive/train_asl",
    target_size = (150, 150),
    class_mode = 'categorical',
    batch_size = 20)
validation_datagen = ImageDataGenerator(rescale=1./255)

validation_generator = validation_datagen.flow_from_directory(
    "/content/drive/MyDrive/test_asl",
    target_size = (150, 150),
    class_mode = 'categorical',
    batch_size = 20
)
import numpy as np
history = model.fit_generator(print
      train_generator,
      steps_per_epoch = np.ceil(870/20),  # 2520 images = batch_size * steps
      epochs = 80,
      validation_data=validation_generator,
      validation_steps = np.ceil(870/20),  # 372 images = batch_size * steps
      callbacks=[callbacks],
      verbose = 2)
model.save('mymodel6',save_format='pb')

Here is the code used to run the OpenCV Image Recognition Program:

import numpy as np
import cv2
import keras
from keras.preprocessing.image import ImageDataGenerator
import tensorflow as tf

model = keras.models.load_model("C:\Users\mymodel3.pb")

background = None
accumulated_weight = 0.5

ROI_top = 100
ROI_bottom = 300
ROI_right = 150
ROI_left = 350



def cal_accum_avg(frame, accumulated_weight):

    global background
    
    if background is None:
        background = frame.copy().astype("float")
        return None

    cv2.accumulateWeighted(frame, background, accumulated_weight)



def segment_hand(frame, threshold=25):
    global background
    
    diff = cv2.absdiff(background.astype("uint8"), frame)

    
    _ , thresholded = cv2.threshold(diff, threshold, 255, cv2.THRESH_BINARY)
    
    #Fetching contours in the frame (These contours can be of hand or any other object in foreground) ...
    image, contours, hierarchy = cv2.findContours(thresholded.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # If length of contours list = 0, means we didn't get any contours...
    if len(contours) == 0:
        return None
    else:
        # The largest external contour should be the hand 
        hand_segment_max_cont = max(contours, key=cv2.contourArea)
        
        # Returning the hand segment(max contour) and the thresholded image of hand...
        return (thresholded, hand_segment_max_cont)

cam = cv2.VideoCapture(0)
num_frames =0
while True:
    ret, frame = cam.read()

    # filpping the frame to prevent inverted image of captured frame...
    frame = cv2.flip(frame, 1)

    frame_copy = frame.copy()

    # ROI from the frame
    roi = frame[ROI_top:ROI_bottom, ROI_right:ROI_left]

    gray_frame = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
    gray_frame = cv2.GaussianBlur(gray_frame, (9, 9), 0)


    if num_frames < 70:
        
        cal_accum_avg(gray_frame, accumulated_weight)
        
        cv2.putText(frame_copy, "FETCHING BACKGROUND...PLEASE WAIT", (80, 400), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0,0,255), 2)
    
    else: 
        # segmenting the hand region
        hand = segment_hand(gray_frame)
        

        # Checking if we are able to detect the hand...
        if hand is not None:
            
            thresholded, hand_segment = hand

            # Drawing contours around hand segment
            cv2.drawContours(frame_copy, [hand_segment + (ROI_right, ROI_top)], -1, (255, 0, 0),1)
            
            cv2.imshow("Thesholded Hand Image", thresholded)
            
            thresholded = cv2.resize(thresholded, (64, 64))
            thresholded = cv2.cvtColor(thresholded, cv2.COLOR_GRAY2RGB)
            thresholded = np.reshape(thresholded, (1,thresholded.shape[0],thresholded.shape[1],3))

            
            model.setInput(cv2.dnn.blobFromImage(thresholded,size=(300,300),swapRB=True,crop=False))
            predict = model.forward()
            cv2.putText(frame_copy, word_dict[np.argmax(pred)], (170, 45), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,0,255), 2)
            
    # Draw ROI on frame_copy
    cv2.rectangle(frame_copy, (ROI_left, ROI_top), (ROI_right, ROI_bottom), (255,128,0), 3)

    # incrementing the number of frames for tracking
    num_frames += 1

    # Display the frame with segmented hand
    cv2.putText(frame_copy, "DataFlair hand sign recognition_ _ _", (10, 20), cv2.FONT_ITALIC, 0.5, (51,255,51), 1)
    cv2.imshow("Sign Detection", frame_copy)


    # Close windows with Esc
    k = cv2.waitKey(1) & 0xFF

    if k == 27:
        break

# Release the camera and destroy all the windows
cam.release()
cv2.destroyAllWindows()

berak · May 16, 2022, 12:30pm

i guess, your model was trained on rgb images
(and now you feed it a thresholded 1 channel image)

can you show one of your train images ?
(if those are tightly cropped, you probably should crop the (bgr !) frame to the hand rect, and feed that into your nn)

cashcoffee · May 16, 2022, 1:51pm

I’m not quite sure what you mean… but this is one of my training images
B0002_test

berak · May 16, 2022, 1:57pm

i’d suggest, you throw out the whole thresholding / findContours part and use the bgr frame as is (maybe cropped with your fixed(?) ROI_xxx coords)

(having different train / inference image processing is a bad idea anyway)

berak · May 16, 2022, 2:04pm

btw, proof, it wants 3 channels !

and please, close your issue here (it’s not a library bug, but an error on your side !)

cashcoffee · May 17, 2022, 9:10am

Hi,
I’m a beginner to OpenCV, so I’m not quite sure what you mean by BGR frames. I removed all the extra findContours parts and am still getting the same error. I also changed the size numbers in the blobFromImage() function - no change.
Any suggestions?

My new code is below:

from imutils.video import VideoStream
from imutils.video import FPS
import numpy as np
import argparse
import imutils
import time
import cv2


ap = argparse.ArgumentParser()
ap.add_argument("-c", "--confidence", type=float, default=0.8,
    help="minimum probability to filter weak detections")
args = vars(ap.parse_args())


CLASSES = ["a", "b", "c", "d", "del", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "nothing", "o", "p", "q", "r", "s", "space", "t", "u", "v", "w", "x", "y", "z"]


#This is to pull the information about what each object should look like
weightsPath = "/home/mhhs/Downloads/frozen_graph.pb"


COLORS = np.random.uniform(0, 255, size=(len(CLASSES), 3))

cvNet = cv2.dnn.readNetFromTensorflow(weightsPath)

cap = cv2.VideoCapture(0)  

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    h = frame.shape[0]
    w = frame.shape[1]
    img = np.array(frame)
    cvNet.setInput(cv2.dnn.blobFromImage(img, size=(150, 150), swapRB=True, crop=False))
    detections = cvNet.forward()


    for i in np.arange(0, detections.shape[2]):
        # extract the confidence (i.e., probability) associated with
        # the prediction
        confidence = detections[0, 0, i, 2]

        if confidence > args["confidence"]:
            idx = int(detections[0, 0, i, 1])
            print(idx   )
            box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
            (startX, startY, endX, endY) = box.astype("int")

            label = "{}: {:.2f}%".format(CLASSES[idx],
                confidence * 100)
            cv2.rectangle(img, (startX, startY), (endX, endY),
                COLORS[idx], 2)
            y = startY - 15 if startY - 15 > 15 else startY + 15
            cv2.putText(img, label, (startX, y),
                cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLORS[idx], 2)

            #print(label)


    out_img = cv2.resize(img, (640, 480))
    cv2.imshow('img', out_img)
    if cv2.waitKey(25) & 0xFF == ord('q'):
        cv2.destroyAllWindows()

berak · May 17, 2022, 9:46am

i’m having a hard time to believe it. (simply works for me …)

import cv2, numpy as np
print(cv2.__version__)

n = cv2.dnn.readNet("mdl.pb") # link taken from gh issue
print(n.empty())

im = cv2.imread("img.png") # your image above ! 
b = cv2.dnn.blobFromImage(im, size=(150, 150), swapRB=True, crop=False)
   
n.setInput(b)
o = n.forward()
print(o.shape)
print(np.argmax(o))

############
4.5.5-dev
False
(1, 29)
8

cashcoffee · May 17, 2022, 2:02pm

I’m a little confused as to why this is happening… could it be that I’m using a video stream instead of a single image?
I probably should have mentioned: I am now getting an error in the np.arrange(0,detections.shape[2]) stage, that states “IndexError: tuple index out of range”.

berak · May 17, 2022, 2:28pm

yea, the output shape is (1,29), so there is no detections.shape[2]

you blindly copypasted that from an object detection code ?
your network does classification only, and all you need is a

gesture_id = np.argmax(network_output)

(throw away all that useless junk below cvNet.forward())

Topic		Replies	Views
OpenCV DNN changing my input dimension C++ dnn	10	1011	June 12, 2023
DNN::Network::forward() works in Python but not C++ C++ dnn	4	2511	March 14, 2022
Dnn module gives error in net.forward() (eltwise_layer.cpp: input[0][j] == input[i][j] in function getMemoryShapes) Python dnn , tensorflow	2	903	March 25, 2021
Model input dimension C++ dnn	4	1601	May 7, 2022
3D Image segmentation with 3D U-NET C++ dnn	0	1153	June 16, 2021

Error: Number of input channels should be multiple of 3 but got 1 in function 'cv::dnn::ConvolutionLayerImpl::getMemoryShapes'

Related topics