Assertion failed opencv when resizing a mat

Ashutosh_Soni · May 3, 2021, 7:33am

I’m trying to convert my python file to java for detecting seatbelt detection. so, I get the image. I detect if there is a person in the image using YOLO file. if there has been successfully a person detected, we try to crop it resize it and use a tensorflow model to detect if I have successfully detected a seatbelt or not. The problem comes with resizing of the image. The mat of the resized image fails everytime. imgRoi seems to be not resizing the image by the parameters specified.

def detect_person(image, confThreshold):
  img = np.copy(image)
  blob = cv2.dnn.blobFromImage(img, 1/255.0, (416, 416), swapRB=True, crop=False)
  net.setInput(blob)
  outputs = net.forward(names)
  outputs = np.vstack(outputs)
  H, W = image.shape[:2]
  boxes = []
  confidences = []
  classIDs = []
  for output in outputs:
        scores = output[5:]
        classID = np.argmax(scores)
        confidence = scores[classID]
        if confidence > confThreshold:
            x, y, w, h = output[:4] * np.array([W, H, W, H])
            p0 = int(x - w//2), int(y - h//2)
            p1 = int(x + w//2), int(y + h//2)
            boxes.append([*p0, int(w), int(h)])
            confidences.append(float(confidence))
            classIDs.append(classID)
  indices = cv2.dnn.NMSBoxes(boxes, confidences, confThreshold, confThreshold-0.1)
  
  bboxes = []
  confs = []
  if len(indices) > 0:
     for i in indices.flatten():
            (x, y) = (boxes[i][0], boxes[i][1])
            (w, h) = (boxes[i][2], boxes[i][3])
            # track only people in image 
            if classIDs[i] == 0:
               x0 = max(0,x)
               y0 = max(0,y)
               x1 = min(x0+w,img.shape[1])
               y1 = min(y0+h, img.shape[0])
            
               bboxes.append([x0,y0,x1,y1])
               confs.append(round(confidences[i],2))
              
  return bboxes,confs

working fine above code in python no image issue.

I’ve converted the to the above code for android as follows:

 val frame = Mat()

    val originalMat = Mat()
    Utils.bitmapToMat(img, originalMat)

    Utils.bitmapToMat(img, frame)
    Imgproc.cvtColor(frame, frame, Imgproc.COLOR_RGBA2RGB)

    val blob = Dnn.blobFromImage(
        frame,
        0.00392,
        Size(416.0, 416.0),
        Scalar(0.0, 0.0, 0.0),/*swapRB*/
        true, /*crop*/
        false
    )


    maskModel?.setInput(blob)


    val outPutNames = getOutputNames(maskModel!!)
    val result: List<Mat> = java.util.ArrayList(outPutNames!!.size)

    maskModel!!.forward(result, outPutNames)

    val confThreshold = 0.2f


    val clsIds: MutableList<Int> = java.util.ArrayList()
    val confs: MutableList<Float> = java.util.ArrayList()
    val rects: MutableList<Rect> = java.util.ArrayList()




    for (i in result.indices) {
        val level = result[i]
        for (j in 0 until level.rows()) {
            val row = level.row(j)
            val scores = row.colRange(5, level.cols())
            val mm = Core.minMaxLoc(scores)
            val confidence = mm.maxVal.toFloat()
            val classIdPoint = mm.maxLoc
            if (confidence > confThreshold) {
                val centerX = (row[0, 0][0] * frame.cols()).toInt()
                val centerY = (row[0, 1][0] * frame.rows()).toInt()
                val width = (row[0, 2][0] * frame.cols()).toInt()
                val height = (row[0, 3][0] * frame.rows()).toInt()
                val left = centerX - width / 2
                val top = centerY - height / 2
                clsIds.add(classIdPoint.x.toInt())
                confs.add(confidence)
                rects.add(Rect(left, top, width, height))
            }
        }
    }
    val ArrayLength = confs.size

    if (ArrayLength >= 1) {
        // Apply non-maximum suppression procedure.
        val nmsThresh = 0.4f
        val confidences = MatOfFloat(Converters.vector_float_to_Mat(confs))
        val boxesArray = rects.toTypedArray()
        val boxes = MatOfRect(*boxesArray)
        val indices = MatOfInt()
        Dnn.NMSBoxes(
            boxes,
            confidences,
            confThreshold,
            (confThreshold - 0.1).toFloat(),
            indices
        )


        // Draw result boxes:
        val ind = indices.toArray()
        for (i in ind.indices) {
            val idx = ind[i]
            val box = boxesArray[idx]
            val idGuy = clsIds[idx]
            val conf = confs[idx]
            val intConf = (conf * 100).toInt()




            Imgproc.putText(
                frame,
                labels[idGuy].toString() + " " + intConf + "%",
                box.tl(),
                Core.FONT_HERSHEY_SIMPLEX,
                2.0,
                Scalar(255.0, 255.0, 0.0),
                2
            )

            println(
                "frame " + "${labels[idGuy]} "
            )

            imageReceivedEvent.detectedItems?.add(labels[idGuy])
            Imgproc.rectangle(frame, box.tl(), box.br(), Scalar(255.0, 0.0, 0.0), 2)

            if (idGuy == 0) {
                BWLog.i("TRACKING PEOPLE!!", "SUCCESS")

                imageReceivedEvent.PersonEnum = BWConstants.PERSON_ENUM.PERSON_DETECTED
                imageReceivedEvent.confidence = intConf.toDouble()



                if (true ) {
                    try {

                        val x = max(0, box.x)
                        val y = max(0, box.y)

                        val rows = originalMat.rows()
                        val cols = originalMat.cols()

                        val x1 = min(x + box.width, cols)
                        val y1 = min(y + box.height, rows)

                
                        val imgRoi = Mat(
                            originalMat,
                            Rect(x, y, x1, y1)
                        )
                        val bitmap = preProcessPerson(imgRoi)
                        detectedPersonBoundingBoxes.add(
                            BoundingBox(
                                bitmap,
                                x, y, x1, y1
                            )
                        )
                    } catch (e: Exception) {
                        e.printStackTrace()
                    }


            } else {
             

                detectedPersonBoundingBoxes.add(
                    BoundingBox(
                        img,
                        box.x,
                        box.y,
                        box.x + box.width, box.y + box.height
                    )
                )

            }
        } else {


    }
}


}

if (detectedPersonBoundingBoxes.isEmpty()) {
//                no person found.
//            
  
} else {
recreateMaskDetectorTFLite(
    maskModelFloat,
    maskDevice,
    tfLiteNumThreads
)
//            doTensorFlowDetection(detectedPersonBoundingBoxes[0])
}

detectedPersonBoundingBoxes.forEach { boundBox ->
    recreateMaskDetectorTFLite(
        maskModelFloat,
        maskDevice,
        tfLiteNumThreads
    )
    doTensorFlowDetection(boundBox)
}

2021-05-03 12:14:45.534 10932-11085/com.buttonwillow.app E/cv::error(): OpenCV(3.4.6) Error: Assertion failed (0 <= _rowRange.start && _rowRange.start <= _rowRange.end && _rowRange.end <= m.rows) in cv::Mat::Mat(const cv::Mat&, const cv::Range&, const cv::Range&), file /build/3_4_pack-android/opencv/modules/core/src/matrix.cpp, line 424
    2021-05-03 12:14:45.534 10932-11085/com.buttonwillow.app E/org.opencv.core.Mat: Mat::n_1Mat__JIIII() caught cv::Exception: OpenCV(3.4.6) /build/3_4_pack-android/opencv/modules/core/src/matrix.cpp:424: error: (-215:Assertion failed) 0 <= _rowRange.start && _rowRange.start <= _rowRange.end && _rowRange.end <= m.rows in function 'cv::Mat::Mat(const cv::Mat&, const cv::Range&, const cv::Range&)'

berak · May 3, 2021, 7:40am

can you try to find out, in which line of your Kotlin code it fails ?

Ashutosh_Soni · May 3, 2021, 7:43am

This is the place where my code throws the exception. Thanks
@berak

berak · May 3, 2021, 7:58am

opencv’s Rect is [left, top, W, H] so it should be:

      val imgRoi = Mat(
                            originalMat,
                            Rect(x, y, x1-x, y1-y)
                        )

here (you already have it correct in several other places )

Ashutosh_Soni · May 3, 2021, 8:01am

thanks, I’ll try this and let you know.

Ashutosh_Soni · May 3, 2021, 8:46am

while doing a tensorflow detection it throws this error.
j ava.lang.IllegalArgumentException: Cannot copy from a TensorFlowLite tensor (logits) with shape [12, 2] to a Java object with shape [1, 2].

**MaskDetector:tflite**
abstract class MaskDetector : TensorFlowAndroid {Preformatted text
        var flattenedImage = FloatArray(IN_IMAGE_H * IN_IMAGE_W * DIM_PIXEL_SIZE)
        var intValues =
            IntArray(IN_IMAGE_H * IN_IMAGE_W)
        var predictions =
            Array(1) { FloatArray(2) }

        protected fun flattenImage(bitmap: Bitmap) {
            bitmap.getPixels(
                intValues,
                0,
            bitmap.width,
            0,
            0,
            bitmap.width,
            bitmap.height
        )
        var index = 0
        for (i in 0 until IN_IMAGE_H) {
            for (j in 0 until IN_IMAGE_W) {
                val pixelValue = intValues[i * IN_IMAGE_W + j]
                flattenedImage[index++] = (pixelValue shr 0 and 0xFF) / 255.0f
            }
        }
    }

    companion object {
        protected const val threshold = 0.5f
        val IN_IMAGE_W: Int = com.buttonwillow.app.configuration.AppConfig.MASK_RESIZE_WIDTH
        val IN_IMAGE_H: Int = com.buttonwillow.app.configuration.AppConfig.MASK_RESIZE_HEIGHT
        val DIM_PIXEL_SIZE = 1
        val THRESHOLD = 0.15f
    }
}



abstract class MaskDetectorTFLite(
    context: Context,
    device: TensorFlowAndroid.Device?,
    numThreads: Int
) : MaskDetector() {
    private val tfliteOptions = Interpreter.Options()
    private var tfliteModel: ByteArray?
    protected var tflite: Interpreter?
    private var gpuDelegate: GpuDelegate? = null

    // ------------------------------------- Public Methods ----------------------------------------
    fun recognizeImage(bitmap: Bitmap?) {
        bitmap?.let { flattenImage(it) }
        runInference()
    }

    open fun getGrayOutput(): Int {
        val temp: MutableList<Float> = ArrayList()
        for (f in predictions[0]) {
            temp.add(f)
        }
//        BWLog.d("Mask output: " , Arrays.toString(temp.toTypedArray())) // Display gray output values


        // System.out.println("Mask output: " + Arrays.toString(temp.toArray())); // Display gray output values
        return temp.indexOf(Collections.max(temp))
    }

    fun close() {
        if (tflite != null) {
            tflite!!.close()
            tflite = null
        }
        if (gpuDelegate != null) {
            gpuDelegate!!.close()
            gpuDelegate = null
        }
        tfliteModel = null
    }

    //
    // ------------------------------------ Private Methods ----------------------------------------
    @Throws(IOException::class)
    private fun loadModelFile(context: Context): ByteArray {


//        todo change this when you're using a different model name
        val fileName = BWConstants.TENSOR_SEATBELT_FILE
        val streams = context.openFileInput(fileName)

//        val assetFile = context.assets.open(fileName)
        return streams.readBytes()
//        return File(context.filesDir, fileName)

//        val fileDescriptor = context.assets.openFd(modelPath!!)
        val inputStream = streams
//            FileInputStream(fileDescriptor.fileDescriptor)
        val fileChannel = inputStream.channel
        val startOffset = 0L
        val declaredLength = inputStream.channel.size()


//        return fileChannel.map(
//            FileChannel.MapMode.READ_ONLY,
//            startOffset,
//            declaredLength
//        )
    }

    // ----------------------------------- Abstract Methods ----------------------------------------
    abstract val modelPath: String?

    abstract fun runInference()

    companion object {
        @Throws(IOException::class)
        fun create(
            context: Context?,
            model: TensorFlowAndroid.Model?,
            device: TensorFlowAndroid.Device?,
            numThreads: Int
        ): MaskDetectorTFLite {
            return MaskDetectorTFLiteFloat(context, device, numThreads)
        }
    }

    // -------------------------------------- Constructor ------------------------------------------
    init {
        tfliteModel = loadModelFile(context)



        when (device) {
            TensorFlowAndroid.Device.NNAPI, TensorFlowAndroid.Device.GPU, TensorFlowAndroid.Device.CPU -> println(
                "Using CPU"
            )
        }
        val buffer = ByteBuffer.allocateDirect(tfliteModel!!.size).order(ByteOrder.nativeOrder())
        buffer.put(tfliteModel)

        tfliteOptions.setNumThreads(numThreads)
        tflite = Interpreter(buffer, tfliteOptions)
    }
}

MaskDetectorTFLite:

abstract class MaskDetectorTFLite(
    context: Context,
    device: TensorFlowAndroid.Device?,
    numThreads: Int
) : MaskDetector() {
    private val tfliteOptions = Interpreter.Options()
    private var tfliteModel: ByteArray?
    protected var tflite: Interpreter?
    private var gpuDelegate: GpuDelegate? = null

    // ------------------------------------- Public Methods ----------------------------------------
    fun recognizeImage(bitmap: Bitmap?) {
        bitmap?.let { flattenImage(it) }
        runInference()
    }

    open fun getGrayOutput(): Int {
        val temp: MutableList<Float> = ArrayList()
        for (f in predictions[0]) {
            temp.add(f)
        }
//        BWLog.d("Mask output: " , Arrays.toString(temp.toTypedArray())) // Display gray output values


        // System.out.println("Mask output: " + Arrays.toString(temp.toArray())); // Display gray output values
        return temp.indexOf(Collections.max(temp))
    }

    fun close() {
        if (tflite != null) {
            tflite!!.close()
            tflite = null
        }
        if (gpuDelegate != null) {
            gpuDelegate!!.close()
            gpuDelegate = null
        }
        tfliteModel = null
    }

    //
    // ------------------------------------ Private Methods ----------------------------------------
    @Throws(IOException::class)
    private fun loadModelFile(context: Context): ByteArray {


//        todo change this when you're using a different model name
        val fileName = BWConstants.TENSOR_SEATBELT_FILE
        val streams = context.openFileInput(fileName)

//        val assetFile = context.assets.open(fileName)
        return streams.readBytes()
//        return File(context.filesDir, fileName)

//        val fileDescriptor = context.assets.openFd(modelPath!!)
        val inputStream = streams
//            FileInputStream(fileDescriptor.fileDescriptor)
        val fileChannel = inputStream.channel
        val startOffset = 0L
        val declaredLength = inputStream.channel.size()


//        return fileChannel.map(
//            FileChannel.MapMode.READ_ONLY,
//            startOffset,
//            declaredLength
//        )
    }

    // ----------------------------------- Abstract Methods ----------------------------------------
    abstract val modelPath: String?

    abstract fun runInference()

    companion object {
        @Throws(IOException::class)
        fun create(
            context: Context?,
            model: TensorFlowAndroid.Model?,
            device: TensorFlowAndroid.Device?,
            numThreads: Int
        ): MaskDetectorTFLite {
            return MaskDetectorTFLiteFloat(context, device, numThreads)
        }
    }

    // -------------------------------------- Constructor ------------------------------------------
    init {
        tfliteModel = loadModelFile(context)



        when (device) {
            TensorFlowAndroid.Device.NNAPI, TensorFlowAndroid.Device.GPU, TensorFlowAndroid.Device.CPU -> println(
                "Using CPU"
            )
        }
        val buffer = ByteBuffer.allocateDirect(tfliteModel!!.size).order(ByteOrder.nativeOrder())
        buffer.put(tfliteModel)

        tfliteOptions.setNumThreads(numThreads)
        tflite = Interpreter(buffer, tfliteOptions)
    }
}

MaskDetectorTFLiteFloat

class MaskDetectorTFLiteFloat(
    context: Context?,
    device: TensorFlowAndroid.Device?,
    numThreads: Int
) : MaskDetectorTFLite(context!!, device, numThreads) {

//    todo this has to be changed to your tensorflow model name
override val modelPath: String
    get() = BWConstants.TENSOR_SEATBELT_FILE

override fun runInference() {
    tflite!!.run(reshapedImage, predictions)
}

// ------------------------------------ Private Methods ----------------------------------------
private val reshapedImage: Array<Array<Array<FloatArray>>>
    get() {
        val temp =
            Array(
                1
            ) {
                Array(
                    IN_IMAGE_H
                ) {
                    Array(
                        IN_IMAGE_W
                    ) { FloatArray(DIM_PIXEL_SIZE) }
                }
            }
        var index = 0
        for (i in 0 until IN_IMAGE_H) {
            for (j in 0 until IN_IMAGE_W) {
                for (bgr in 0 until DIM_PIXEL_SIZE) {
                    temp[0][i][j][bgr] = flattenedImage[index++]
                }
            }
        }
        return temp
    }

}

I create my mask detector tflite like this. from the above file.

    @SuppressLint("DefaultLocale")
    private fun recreateMaskDetectorTFLite(
        model: TensorFlowAndroid.Model,
        device: TensorFlowAndroid.Device,
        numThreads: Int,
    ) {
        if (maskDetectorTFLite != null) {
            BWLog.d("recreateTFLite Mask", "Closing Mask Detector TFLite")
            maskDetectorTFLite?.close()
            maskDetectorTFLite = null
            allMaskResults = ArrayList()
        }
        try {
            maskDetectorTFLite =
                MaskDetectorTFLite.create(requireActivity(), model, device, numThreads)
        } catch (e: Exception) {
            e.printStackTrace()
       
        }
    }

and whenever I do this it fails over here.

fun doTensorFlowDetection(detectedFace: BoundingBox) {

try {
    maskDetectorTFLite!!.recognizeImage(detectedFace.bitmap)
    maskGrayResults = maskDetectorTFLite!!.getGrayOutput()
} catch (e: java.lang.IllegalArgumentException) {
    e.printStackTrace()
    maskGrayResults = 2
}



if (maskGrayResults == 0) {
    imageReceivedEvent.seatBelt = BWConstants.SEATBELT_ENUM.SEAT_BELT_OFF
    BWLog.d(TAG, "SEATBELT OFF")
} else if (maskGrayResults == 2) {
    BWLog.d(TAG, "SEATBELT UNKNOWN")
    imageReceivedEvent.seatBelt = BWConstants.SEATBELT_ENUM.SEAT_BELT_NOT_DETECTED
} else {
    imageReceivedEvent.seatBelt = BWConstants.SEATBELT_ENUM.SEAT_BELT_ON
    BWLog.d(TAG, "SEATBELT ON")
}

}

exception happens over here…
** maskDetectorTFLite!!.recognizeImage(detectedFace.bitmap)**

let me know if there is anything else you might require. thanks.

Ashutosh_Soni · May 3, 2021, 8:49am

2021-05-03 13:57:11.214 17946-18080/W/System.err: java.lang.IllegalArgumentException: Cannot copy from a TensorFlowLite tensor (logits) with shape [12, 2] to a Java object with shape [1, 2].
2021-05-03 13:57:11.215 17946-18080/ W/System.err:     at org.tensorflow.lite.Tensor.throwIfDstShapeIsIncompatible(Tensor.java:461)
2021-05-03 13:57:11.215 17946-18080/com.buttonwillow.app W/System.err:     at org.tensorflow.lite.Tensor.copyTo(Tensor.java:252)
2021-05-03 13:57:11.215 17946-18080/ W/System.err:     at org.tensorflow.lite.NativeInterpreterWrapper.run(NativeInterpreterWrapper.java:170)
2021-05-03 13:57:11.215 17946-18080/ W/System.err:     at org.tensorflow.lite.Interpreter.runForMultipleInputsOutputs(Interpreter.java:343)
2021-05-03 13:57:11.216 17946-18080/ W/System.err:     at org.tensorflow.lite.Interpreter.run(Interpreter.java:304)
2021-05-03 13:57:11.216 17946-18080/ W/System.err:     at com.buttonwillow.app.views.tensorflow.tfLite.MaskDetectorTFLiteFloat.runInference

a little extra logtrace. to identify the problem @berak

berak · May 3, 2021, 9:29am

sorry, but this is opencv, we cannot help you with TFLite on android

berak · May 3, 2021, 9:57am

however:

your prediction dims are [1,2], error complains, it expects [12,2], if i read this correctly

(i think, you have to do some research about the TF model used here, what does it do, exactly ? what’s in the predictions ?)

Ashutosh_Soni · May 4, 2021, 2:08am

It will predict if seatbelt has been detected or not. 2 outputs. 0 if no seatbelt detected. 1 if seatbelt has been detected.

Topic		Replies	Views
Kotlin: error on converting mat to bitmap Android/Java	9	1314	May 31, 2021
Inferencing ONNX model on a RGB image in Android-Java Android/Java dnn , java	7	1091	August 10, 2023
Problems with Dnn TextDetectors (TextDetectionModel_DB & TextDetectionModel_EAST) on Android Android/Java dnn , android , text	5	1065	December 7, 2022
Camera2 byteArray to BGR uint8 mat conversion Android/Java android	10	2351	August 16, 2022
How to store 4D mat of dimension [12242241CV_32FC1] to Bitmap Image Android/Java dnn , mat	2	298	June 20, 2023

Assertion failed opencv when resizing a mat

Related topics