cv::cuda::Stream synchronization on gaussian filter

Hello guys :slight_smile:
I have a question about synchronization cv::cuda::StreamsCurrent version of my code work correctly, but uncommentented version has a problem with synchronization.
Do you know where I made a bug in streams synchronization?
My target is run gaussFilters asynchronically, because It increases a performance.

cv::cuda::GpuMat testMat1GPU, testMat2GPU, testMat1GrayGPU, testMat2GrayGPU, testMat1GausianGPU, testMat2GausianGPU, differenceGPU, resultGPU;``
cv::Mat resultDownload;
std::vector<std::vector<cv::Point>> contoursGPU;
// cv::cuda::Stream stream1, stream2;
// cv::cuda::Event event1, event2;
auto gaussFilter = cv::cuda::createGaussianFilter(CV_8UC1, CV_8UC1, kernelSizeGaussian, 0.0);
testMat1GPU.upload(testMat1);
testMat2GPU.upload(testMat2);
cv::cuda::cvtColor(testMat1GPU, testMat1GrayGPU, cv::COLOR_BGR2GRAY, 0);
cv::cuda::cvtColor(testMat2GPU, testMat2GrayGPU, cv::COLOR_BGR2GRAY, 0);
Stopwatch swGPU;
swGPU.start();

for (size_t i = 0; i < ITERATIONS; ++i)
{
  // synchronization still doesn't works
  gaussFilter->apply(testMat1GrayGPU, testMat1GausianGPU /*, stream1*/);
  gaussFilter->apply(testMat2GrayGPU, testMat2GausianGPU /*, stream2*/);
  // event1.record(stream1);
  // event2.record(stream2);
  // cv::cuda::Stream::Null().waitEvent(event1);
  // cv::cuda::Stream::Null().waitEvent(event2);
  // event1.waitForCompletion();
  // event2.waitForCompletion();
  // stream1.waitForCompletion();
  // stream2.waitForCompletion();
  cv::cuda::absdiff(testMat1GausianGPU, testMat2GausianGPU, differenceGPU);
  cv::cuda::threshold(differenceGPU, resultGPU, 20.0, 255.0, cv::THRESH_BINARY);
  resultGPU.download(resultDownload);
  cv::findContours(resultDownload, contoursGPU, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE);
}
swGPU.stop();

There is an internal _buf which belongs to the filter

As a result you need to use a different filter per stream. e.g.

auto gaussFilterS1 = cv::cuda::createGaussianFilter(CV_8UC1, CV_8UC1, kernelSizeGaussian, 0.0);
auto gaussFilterS1 = cv::cuda::createGaussianFilter(CV_8UC1, CV_8UC1, kernelSizeGaussian, 0.0);

This needs to be documented.