Using CAP_MSMF and CAP_DSHOW at same time with two cameras causes "can't grab frame. Error: -2147483638"

I understand, that if I set cv2.CAP_PROP_CONVERT_RGB = 0, then I should receive a frame as 1D u8 array, independent from the backend.

Some more tests I did with the two cameras do not reflect my understanding.

Thermal cam with CAP_MSMF:

import cv2
import numpy as np

# devices: 0 = thermal cam, 1 = webcam
#video = cv2.VideoCapture(0, cv2.CAP_DSHOW)
video = cv2.VideoCapture(0, cv2.CAP_MSMF)
#video.set(cv2.CAP_PROP_FOURCC, 0x32595559)

# request raw data from camera
ret_val = video.set(cv2.CAP_PROP_ZOOM, 0x8004)

# activate shutter at camera
ret_val = video.set(cv2.CAP_PROP_ZOOM, 32768)

# request RAW data from CV API
ret_val = video.set(cv2.CAP_PROP_CONVERT_RGB, 0)

print ("Video FOURCC")
print (hex(int(video.get(cv2.CAP_PROP_FOURCC)) & 0xffffffff))

if video.isOpened(): # try to get the first frame
    rval, frame = video.read()
else:
    rval = False
Video FOURCC
0x32595559

0x32595559 == “2YUY”
Returns 100352 bytes as shape:(1, 100352).

Thermal cam with CAP_DSHOW:

import cv2
import numpy as np

# devices: 0 = thermal cam, 1 = webcam
video = cv2.VideoCapture(0, cv2.CAP_DSHOW)
#video = cv2.VideoCapture(0, cv2.CAP_MSMF)
#video.set(cv2.CAP_PROP_FOURCC, 0x32595559)

# request raw data from camera
ret_val = video.set(cv2.CAP_PROP_ZOOM, 0x8004)

# activate shutter at camera
ret_val = video.set(cv2.CAP_PROP_ZOOM, 32768)

# request RAW data from CV API
ret_val = video.set(cv2.CAP_PROP_CONVERT_RGB, 0)

print ("Video FOURCC")
print (hex(int(video.get(cv2.CAP_PROP_FOURCC)) & 0xffffffff))

if video.isOpened(): # try to get the first frame
    rval, frame = video.read()
else:
    rval = False
Video FOURCC
0xe436eb7d

0xe436eb7d == “ä6ë}”

Returns 150528 bytes as shape:(196, 256, 3).

Webcam with CAP_MSMF:

import cv2
import numpy as np

# devices: 0 = thermal cam, 1 = webcam
#video = cv2.VideoCapture(1, cv2.CAP_DSHOW)
video = cv2.VideoCapture(1, cv2.CAP_MSMF)
#video.set(cv2.CAP_PROP_FOURCC, 0x32595559)

# request raw data from camera
ret_val = video.set(cv2.CAP_PROP_ZOOM, 0x8004)

# activate shutter at camera
#ret_val = video.set(cv2.CAP_PROP_ZOOM, 32768)

# request RAW data from CV API
#ret_val = video.set(cv2.CAP_PROP_CONVERT_RGB, 0)

print ("Video FOURCC")
print (hex(int(video.get(cv2.CAP_PROP_FOURCC)) & 0xffffffff))

if video.isOpened(): # try to get the first frame
    rval, frame = video.read()
else:
    rval = False
Video FOURCC
0x16

0x16 == not a readable ASCII code.
Returns 921600 bytes as shape:(480, 640, 3).

Webcam with CAP_DSHOW:

import cv2
import numpy as np

# devices: 0 = thermal cam, 1 = webcam
video = cv2.VideoCapture(1, cv2.CAP_DSHOW)
#video = cv2.VideoCapture(1, cv2.CAP_MSMF)
#video.set(cv2.CAP_PROP_FOURCC, 0x32595559)

# request raw data from camera
ret_val = video.set(cv2.CAP_PROP_ZOOM, 0x8004)

# activate shutter at camera
#ret_val = video.set(cv2.CAP_PROP_ZOOM, 32768)

# request RAW data from CV API
#ret_val = video.set(cv2.CAP_PROP_CONVERT_RGB, 0)

print ("Video FOURCC")
print (hex(int(video.get(cv2.CAP_PROP_FOURCC)) & 0xffffffff))

if video.isOpened(): # try to get the first frame
    rval, frame = video.read()
else:
    rval = False
Video FOURCC
0x32595559 == "2YUY"

Returns 921600 bytes as shape:(480, 640, 3).

Is this the way how it should be?