Changing float* to Mat in functions

I am having difficulty. I am using a float* to handle output from a compute shader. When I try to replace that float* with a Mat of type CV_32FC4, I get random noise. It has to be something simple, but I can’t seem to figure it out.

Here is the code BEFORE the conversion:

Here is the code AFTER the conversion:

The relevant code is in main.h and main.cpp. If I haven’t provided enough details, then please correct me.

Here is the code that (sort of) works. Note that I use a vector instead of a Mat.

	int num_tiles_per_dimension = 1;

	std::vector<cv::Mat> array_of_input_mats = splitImage(input_mat, num_tiles_per_dimension, num_tiles_per_dimension);
	std::vector<cv::Mat> array_of_output_mats;

	for (size_t i = 0; i < array_of_input_mats.size(); i++)
	{
		string s = "_input_" + to_string(i) + ".png";
		imwrite(s.c_str(), array_of_input_mats[i]);

		vector<float> output_pixels(4 * array_of_input_mats[i].rows * array_of_input_mats[i].cols);
	
		//Mat output_pixels(array_of_input_mats[i].rows, array_of_input_mats[i].cols, CV_32FC4);


		gpu_compute(
			compute_shader_program,
			reinterpret_cast<unsigned char*>(output_pixels.data()),
			array_of_input_mats[i],
			input_light_mat_with_dynamic_lights,
			input_light_blocking_mat);

		Mat uc_output_small(array_of_input_mats[i].rows, array_of_input_mats[i].cols, CV_8UC4);

		for (size_t x = 0; x < (4 * uc_output_small.rows * uc_output_small.cols); x += 4)
		{
			uc_output_small.data[x + 0] = static_cast<unsigned char>(output_pixels[x + 0] * 255.0);
			uc_output_small.data[x + 1] = static_cast<unsigned char>(output_pixels[x + 1] * 255.0);
			uc_output_small.data[x + 2] = static_cast<unsigned char>(output_pixels[x + 2] * 255.0);
			uc_output_small.data[x + 3] = 255;
		}

		array_of_output_mats.push_back(uc_output_small);

		// These images show that something's not working right where num_tiles_per_dimension is >= 2
		// there are duplicate output images
		s = "_output_" + to_string(i) + ".png";
		imwrite(s.c_str(), array_of_output_mats[i]);
	}

Here is the Mat code, which returns noise:

	int num_tiles_per_dimension = 1;

	std::vector<cv::Mat> array_of_input_mats = splitImage(input_mat, num_tiles_per_dimension, num_tiles_per_dimension);
	std::vector<cv::Mat> array_of_output_mats;

	for (size_t i = 0; i < array_of_input_mats.size(); i++)
	{
		string s = "_input_" + to_string(i) + ".png";
		imwrite(s.c_str(), array_of_input_mats[i]);

		//vector<float> output_pixels(4 * array_of_input_mats[i].rows * array_of_input_mats[i].cols);
	
		Mat output_pixels(array_of_input_mats[i].rows, array_of_input_mats[i].cols, CV_32FC4);


		gpu_compute(
			compute_shader_program,
			reinterpret_cast<unsigned char*>(output_pixels.data),
			array_of_input_mats[i],
			input_light_mat_with_dynamic_lights,
			input_light_blocking_mat);

		Mat uc_output_small(array_of_input_mats[i].rows, array_of_input_mats[i].cols, CV_8UC4);

		for (size_t x = 0; x < (4 * uc_output_small.rows * uc_output_small.cols); x += 4)
		{
			uc_output_small.data[x + 0] = static_cast<unsigned char>(output_pixels.data[x + 0] * 255.0);
			uc_output_small.data[x + 1] = static_cast<unsigned char>(output_pixels.data[x + 1] * 255.0);
			uc_output_small.data[x + 2] = static_cast<unsigned char>(output_pixels.data[x + 2] * 255.0);
			uc_output_small.data[x + 3] = 255;
		}

		array_of_output_mats.push_back(uc_output_small);

		// These images show that something's not working right where num_tiles_per_dimension is >= 2
		// there are duplicate output images
		s = "_output_" + to_string(i) + ".png";
		imwrite(s.c_str(), array_of_output_mats[i]);
	}