I need to pass cv::cuda::GpuMat as argument to a CUDA kernel function. I came across, that a cv::cuda::GpuMat can be passed to kernel as cv::cuda::PtrStepSz. I am not able to find documentation or any implementation example for the same. What is the proper syntax to pass cv::cuda::GpuMat? How to access the cv::cuda::PtrStepSz inside kernel?
If available kindly share resources to better understand utilisation of cv::cuda::PtrStepSz.
Nearly all of the internal CUDA code is passed a GpuMat
which is implicitly converted to a PtrStep
or PtrStepSz
because of
template <typename _Tp> operator PtrStepSz<_Tp>() const;
CV_WRAP void assignTo(GpuMat& m, int type = -1) const;
//! returns pointer to y-th row
uchar* ptr(int y = 0);
const uchar* ptr(int y = 0) const;
//! template version of the above method
template<typename _Tp> _Tp* ptr(int y = 0);
template<typename _Tp> const _Tp* ptr(int y = 0) const;
template <typename _Tp> operator PtrStepSz<_Tp>() const;
template <typename _Tp> operator PtrStep<_Tp>() const;
//! returns a new GpuMat header for the specified row
CV_WRAP GpuMat row(int y) const;
//! returns a new GpuMat header for the specified column
CV_WRAP GpuMat col(int x) const;
//! ... for the specified row span
CV_WRAP GpuMat rowRange(int startrow, int endrow) const;
See resize for an example. The kernel is passed a PtrStepSz
for dst
template <class Ptr2D, typename T> __global__ void resize(Ptr2D src, PtrStepSz<T> dst, const float fy, const float fx)
src_reg = src(y2_read, x1);
out = out + src_reg * ((x2 - src_x) * (src_y - y1));
src_reg = src(y2_read, x2_read);
out = out + src_reg * ((src_x - x1) * (src_y - y1));
dst(dst_y, dst_x) = saturate_cast<T>(out);
}
}
template <class Ptr2D, typename T> __global__ void resize(Ptr2D src, PtrStepSz<T> dst, const float fy, const float fx)
{
const int dst_x = blockDim.x * blockIdx.x + threadIdx.x;
const int dst_y = blockDim.y * blockIdx.y + threadIdx.y;
if (dst_x < dst.cols && dst_y < dst.rows)
{
const float src_x = dst_x * fx;
const float src_y = dst_y * fy;
dst(dst_y, dst_x) = src(src_y, src_x);
but is orginally called with a GpuMat
{
dsize = Size(saturate_cast<int>(src.cols * fx), saturate_cast<int>(src.rows * fy));
}
else
{
fx = static_cast<double>(dsize.width) / src.cols;
fy = static_cast<double>(dsize.height) / src.rows;
}
_dst.create(dsize, src.type());
GpuMat dst = _dst.getGpuMat();
if (dsize == src.size())
{
src.copyTo(dst, stream);
return;
}
const func_t func = funcs[src.depth()][src.channels() - 1];
if (!func)