diff --git a/dnn/include/megdnn/oprs/imgproc.h b/dnn/include/megdnn/oprs/imgproc.h index 0f1c13347c849351297c5f6f1803981cbe0cbc66..9f951753b999274e9c37e080407880399adf779d 100644 --- a/dnn/include/megdnn/oprs/imgproc.h +++ b/dnn/include/megdnn/oprs/imgproc.h @@ -105,15 +105,32 @@ class WarpPerspectiveBackwardData: public WarpPerspectiveBase { * \param[out] grad the backpropagated gradient wrt. src * \param[out] workspace temporary workspace to perform backward */ + void exec(_megdnn_tensor_in mat, + _megdnn_tensor_in diff, + _megdnn_tensor_out grad, + _megdnn_workspace workspace) { + exec(mat, {}, diff, grad, workspace); + } + virtual void exec(_megdnn_tensor_in mat, + _megdnn_tensor_in mat_idx, _megdnn_tensor_in diff, _megdnn_tensor_out grad, _megdnn_workspace workspace) = 0; + + size_t get_workspace_in_bytes(const TensorLayout &mat, + const TensorLayout &diff, + const TensorLayout &grad) { + return get_workspace_in_bytes(mat, {}, diff, grad); + } + virtual size_t get_workspace_in_bytes(const TensorLayout &mat, + const TensorLayout &mat_idx, const TensorLayout &diff, const TensorLayout &grad) = 0; protected: void check_exec(const TensorLayout &mat, + const TensorLayout &mat_idx, const TensorLayout &diff, const TensorLayout &grad, size_t workspace_in_bytes); @@ -129,18 +146,37 @@ class WarpPerspectiveBackwardMat: public WarpPerspectiveBase { * \param[out] grad the backpropagated gradient wrt. mat * \param[out] workspace temporary workspace to perform backward */ + void exec(_megdnn_tensor_in src, + _megdnn_tensor_in mat, + _megdnn_tensor_in diff, + _megdnn_tensor_out grad, + _megdnn_workspace workspace) { + exec(src, mat, {}, diff, grad, workspace); + } + virtual void exec(_megdnn_tensor_in src, _megdnn_tensor_in mat, + _megdnn_tensor_in mat_idx, _megdnn_tensor_in diff, _megdnn_tensor_out grad, _megdnn_workspace workspace) = 0; + + size_t get_workspace_in_bytes(const TensorLayout &src, + const TensorLayout &mat, + const TensorLayout &diff, + const TensorLayout &grad) { + return get_workspace_in_bytes(src, mat, {}, diff, grad); + } + virtual size_t get_workspace_in_bytes(const TensorLayout &src, const TensorLayout &mat, + const TensorLayout &mat_idx, const TensorLayout &diff, const TensorLayout &grad) = 0; protected: void check_exec(const TensorLayout &src, const TensorLayout &mat, + const TensorLayout &mat_idx, const TensorLayout &diff, const TensorLayout &grad, size_t workspace_in_bytes); diff --git a/dnn/src/common/warp_perspective.cpp b/dnn/src/common/warp_perspective.cpp index 4de3171b27fc124e5e4039fff0c1bc5faef814cd..a0d558cb0925e68cad57c6754e9196b43ab712ab 100644 --- a/dnn/src/common/warp_perspective.cpp +++ b/dnn/src/common/warp_perspective.cpp @@ -255,29 +255,31 @@ void WarpPerspectiveForward::check_exec_allow_nhwc_mat_idx( } void WarpPerspectiveBackwardData::check_exec(const TensorLayout& mat, + const TensorLayout& mat_idx, const TensorLayout& diff, const TensorLayout& grad, size_t workspace_in_bytes) { - check_layout_fwd(grad, mat, diff); + check_layout_fwd(grad, mat, mat_idx, diff); megdnn_assert(grad.dtype == dtype::Float32() MEGDNN_INC_FLOAT16( || grad.dtype == dtype::BFloat16()), "Backward WarpPerspective only supports Float32/BFloat16."); - auto required_workspace_in_bytes = get_workspace_in_bytes(mat, diff, grad); + auto required_workspace_in_bytes = get_workspace_in_bytes(mat, mat_idx, diff, grad); megdnn_assert(workspace_in_bytes >= required_workspace_in_bytes); } void WarpPerspectiveBackwardMat::check_exec(const TensorLayout& src, const TensorLayout& mat, + const TensorLayout& mat_idx, const TensorLayout& diff, const TensorLayout& grad, size_t workspace_in_bytes) { - check_layout_fwd(src, mat, diff); + check_layout_fwd(src, mat, mat_idx, diff); megdnn_assert_eq_layout(mat, grad); megdnn_assert(grad.dtype == dtype::Float32() MEGDNN_INC_FLOAT16( || grad.dtype == dtype::BFloat16()), "Backward WarpPerspective only supports Float32/BFloat16."); auto required_workspace_in_bytes = - get_workspace_in_bytes(src, mat, diff, grad); + get_workspace_in_bytes(src, mat, mat_idx, diff, grad); megdnn_assert(workspace_in_bytes >= required_workspace_in_bytes); } diff --git a/dnn/src/cuda/warp_perspective/backward_data.cpp b/dnn/src/cuda/warp_perspective/backward_data.cpp index 272364a1909250e8c65a3802d7c8fe54a5c67ed3..b49f10f8f78327faf798e2224d55621ba6e1bbe8 100644 --- a/dnn/src/cuda/warp_perspective/backward_data.cpp +++ b/dnn/src/cuda/warp_perspective/backward_data.cpp @@ -6,7 +6,8 @@ * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. */ #include "src/cuda/warp_perspective/opr_impl.h" @@ -18,8 +19,8 @@ namespace megdnn { namespace cuda { WorkspaceBundle WarpPerspectiveBackwardDataImpl::get_workspace_bundle( - void* ptr, const TensorLayout& mat, const TensorLayout& diff, - const TensorLayout& grad) const { + void* ptr, const TensorLayout& mat, const TensorLayout& mat_idx, + const TensorLayout& diff, const TensorLayout& grad) const { SmallVector sizes; TensorLayout fmat = mat; TensorLayout fdiff = diff; @@ -33,20 +34,24 @@ WorkspaceBundle WarpPerspectiveBackwardDataImpl::get_workspace_bundle( get_workspace(fmat); get_workspace(fdiff); get_workspace(fgrad); - sizes.push_back(get_float32_workspace_in_bytes(fmat, fdiff, fgrad)); + sizes.push_back( + get_float32_workspace_in_bytes(fmat, mat_idx, fdiff, fgrad)); return {ptr, std::move(sizes)}; } void WarpPerspectiveBackwardDataImpl::exec(_megdnn_tensor_in smat, + _megdnn_tensor_in mat_idx, _megdnn_tensor_in sdiff, _megdnn_tensor_out sgrad, _megdnn_workspace sworkspace) { - check_exec(smat.layout, sdiff.layout, sgrad.layout, sworkspace.size); + check_exec(smat.layout, mat_idx.layout, sdiff.layout, sgrad.layout, + sworkspace.size); TensorND mat = smat; TensorND diff = sdiff; TensorND grad = sgrad; - auto bundle = get_workspace_bundle(sworkspace.raw_ptr, smat.layout, - sdiff.layout, sgrad.layout); + auto bundle = + get_workspace_bundle(sworkspace.raw_ptr, smat.layout, + mat_idx.layout, sdiff.layout, sgrad.layout); auto ctypecvt = CompTypeCvter( concrete_handle(this->handle()), &bundle); if (sgrad.layout.dtype.enumv() == DTypeTrait::enumv) { @@ -60,6 +65,15 @@ void WarpPerspectiveBackwardDataImpl::exec(_megdnn_tensor_in smat, auto N = grad.layout.shape[0], C = grad.layout.shape[1], IH = grad.layout.shape[2], IW = grad.layout.shape[3], OH = diff.layout.shape[2], OW = diff.layout.shape[3]; + int* midx_ptr = nullptr; + if (mat_idx.raw_ptr) { + megdnn_assert(mat_idx.layout.ndim == 1); + N = mat_idx.layout.shape[0]; + midx_ptr = mat_idx.ptr(); + } else { + megdnn_assert(mat_idx.layout.ndim == 0); + } + auto bval = param().border_val; auto bmode = warp_perspective::get_bmode(param().bmode); @@ -67,10 +81,11 @@ void WarpPerspectiveBackwardDataImpl::exec(_megdnn_tensor_in smat, size_t max_batch_x_channel = max_batch_x_channel_size(); if (batch_x_channel_size <= max_batch_x_channel) { warp_perspective::backward_data_proxy( - mat.ptr(), diff.ptr(), + mat.ptr(), midx_ptr, diff.ptr(), grad.ptr(), - reinterpret_cast(workspace.raw_ptr), N, C, IH, IW, - OH, OW, bval, bmode, stream); + reinterpret_cast(workspace.raw_ptr), N, + grad.layout.shape[0], C, IH, IW, OH, OW, bval, bmode, + stream); } else { dt_float32* mat_ptr = mat.ptr(); dt_float32* diff_ptr = diff.ptr(); @@ -80,10 +95,10 @@ void WarpPerspectiveBackwardDataImpl::exec(_megdnn_tensor_in smat, size_t curr_batch_size = N > max_batch_size ? max_batch_size : N; warp_perspective::backward_data_proxy( - mat_ptr, diff_ptr, grad_ptr, + mat_ptr, midx_ptr, diff_ptr, grad_ptr, reinterpret_cast(workspace.raw_ptr), - curr_batch_size, C, IH, IW, OH, OW, bval, bmode, - stream); + curr_batch_size, grad.layout.shape[0], C, IH, IW, OH, + OW, bval, bmode, stream); if (N <= max_batch_size) { break; @@ -91,7 +106,11 @@ void WarpPerspectiveBackwardDataImpl::exec(_megdnn_tensor_in smat, N -= max_batch_size; mat_ptr += curr_batch_size * mat.layout.stride[0]; diff_ptr += curr_batch_size * diff.layout.stride[0]; - grad_ptr += curr_batch_size * grad.layout.stride[0]; + if (midx_ptr == nullptr) { + grad_ptr += curr_batch_size * grad.layout.stride[0]; + } else { + midx_ptr += curr_batch_size; + } } } } @@ -102,8 +121,8 @@ void WarpPerspectiveBackwardDataImpl::exec(_megdnn_tensor_in smat, } size_t WarpPerspectiveBackwardDataImpl::get_float32_workspace_in_bytes( - const TensorLayout& /* mat */, const TensorLayout& diff, - const TensorLayout& grad) const { + const TensorLayout& /* mat */, const TensorLayout& mat_idx, + const TensorLayout& diff, const TensorLayout& grad) const { auto N = grad.shape[0], C = grad.shape[1], IH = grad.shape[2], IW = grad.shape[3]; auto OH = diff.shape[2], OW = diff.shape[3]; @@ -112,6 +131,9 @@ size_t WarpPerspectiveBackwardDataImpl::get_float32_workspace_in_bytes( size_t max_batch_size = N; size_t max_batch_x_channel = max_batch_x_channel_size(); if (N * C > max_batch_x_channel) { + /* when batch size is too large, the workspace only contains part of grad, + this will cause out of range with mat idx */ + megdnn_assert(mat_idx.ndim == 0, "batch size is too large, it's unsupported with mat idx backward."); max_batch_size = max_batch_x_channel / C; } diff --git a/dnn/src/cuda/warp_perspective/backward_data.cu b/dnn/src/cuda/warp_perspective/backward_data.cu index 7c95d20bb7714183dadb53408b801d31dd684166..f225dc653221c93b28b318c55f41d01c23d1f188 100644 --- a/dnn/src/cuda/warp_perspective/backward_data.cu +++ b/dnn/src/cuda/warp_perspective/backward_data.cu @@ -6,7 +6,8 @@ * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. */ #include "src/cuda/warp_perspective/common.h" @@ -20,16 +21,21 @@ namespace warp_perspective { const int factor = 4; template -__global__ void warp_perspective_bwd_data_kernel(const float *hidden, - const float *mat, float *dst, - int N, int C, int IH, int IW, int OH, int OW) -{ +__global__ void warp_perspective_bwd_data_kernel(const float* hidden, + const float* mat, + const int* midx, float* dst, + int N, int C, int IH, int IW, + int OH, int OW) { Getter getter; int n = blockIdx.z; int ow = blockIdx.x * blockDim.x + threadIdx.x; int oh = blockIdx.y * blockDim.y + threadIdx.y; hidden += n * C*OH*OW; - dst += n * C*factor*IH*IW; + if (midx) { + dst += midx[n] * C * factor * IH * IW; + } else { + dst += n * C * factor * IH * IW; + } mat += n * 3*3; if (ow < OW && oh < OH) { float denominator = mat[6]*ow + mat[7]*oh + mat[8]; @@ -72,15 +78,19 @@ __global__ void add_up_kernel(const float *src, float *dst, } template -__global__ void warp_perspective_bwd_data_constant_kernel(const float *hidden, - const float *mat, float *dst, - int N, int C, int IH, int IW, int OH, int OW) -{ +__global__ void warp_perspective_bwd_data_constant_kernel( + const float* hidden, const float* mat, const int* midx, float* dst, + int N, int C, int IH, int IW, int OH, int OW) { + int n = blockIdx.z; int ow = blockIdx.x * blockDim.x + threadIdx.x; int oh = blockIdx.y * blockDim.y + threadIdx.y; - hidden += blockIdx.z * C*OH*OW; - dst += blockIdx.z * C*factor*IH*IW; - mat += blockIdx.z * 3*3; + hidden += n * C * OH * OW; + if (midx) { + dst += midx[n] * C * factor * IH * IW; + } else { + dst += n * C * factor * IH * IW; + } + mat += n * 3 * 3; if (ow < OW && oh < OH) { float denominator = mat[6]*ow + mat[7]*oh + mat[8]; float iw = (mat[0]*ow + mat[1]*oh + mat[2]) / denominator; @@ -119,30 +129,35 @@ __global__ void warp_perspective_bwd_data_constant_kernel(const float *hidden, } } -size_t get_backward_data_workspace_in_bytes( - int N, int C, int IH, int IW, int /* OH */, int /* OW */, - BorderMode /* bmode */) -{ +size_t get_backward_data_workspace_in_bytes(int N, int C, int IH, int IW, + int /* OH */, int /* OW */, + BorderMode /* bmode */) { return N*C*IH*IW*factor * sizeof(float); } -void backward_data_proxy(const float *mat, const float *diff, - float *grad, float *workspace, - int N, int C, int IH, int IW, int OH, int OW, float bval, - BorderMode mode, cudaStream_t stream) -{ - +void backward_data_proxy(const float* mat, const int* midx, const float* diff, + float* grad, float* workspace, int N, int N_SRC, int C, + int IH, int IW, int OH, int OW, float bval, + BorderMode mode, cudaStream_t stream) { (void)bval; (void)grad; const int BY = 16, BX = 32; { dim3 threads(BX, BY); dim3 blocks((OW+BX-1)/BX, (OH+BY-1)/BY, N); - cuda_check(cudaMemsetAsync(workspace, 0, sizeof(float) * factor*N*C*IH*IW, + if (midx) { + cuda_check(cudaMemsetAsync( + workspace, 0, sizeof(float) * factor * N_SRC * C * IH * IW, stream)); -#define DISPATCH(Getter) \ - warp_perspective_bwd_data_kernel<<>>(diff, mat, workspace, N, C, IH, IW, OH, OW); + } else { + cuda_check(cudaMemsetAsync(workspace, 0, + sizeof(float) * factor * N * C * IH * IW, + stream)); + } +#define DISPATCH(Getter) \ + warp_perspective_bwd_data_kernel \ + <<>>(diff, mat, midx, workspace, N, C, \ + IH, IW, OH, OW); switch (mode) { case BORDER_REPLICATE: DISPATCH(ReplicateGetter); @@ -158,8 +173,9 @@ void backward_data_proxy(const float *mat, const float *diff, break; case BORDER_CONSTANT: warp_perspective_bwd_data_constant_kernel - <<>> - (diff, mat, workspace, N, C, IH, IW, OH, OW); + <<>>(diff, mat, midx, + workspace, N, C, IH, + IW, OH, OW); break; default: break; @@ -169,9 +185,15 @@ void backward_data_proxy(const float *mat, const float *diff, { int THREADS = 512; dim3 threads(THREADS); - dim3 blocks((IH*IW+THREADS-1)/THREADS, N*C); - add_up_kernel<<>>(workspace, grad, - IH*IW); + if (midx) { + dim3 blocks((IH * IW + THREADS - 1) / THREADS, N_SRC * C); + add_up_kernel + <<>>(workspace, grad, IH * IW); + } else { + dim3 blocks((IH * IW + THREADS - 1) / THREADS, N * C); + add_up_kernel + <<>>(workspace, grad, IH * IW); + } } after_kernel_launch(); } @@ -181,4 +203,3 @@ void backward_data_proxy(const float *mat, const float *diff, } // namespace megdnn // vim: syntax=cpp.doxygen - diff --git a/dnn/src/cuda/warp_perspective/backward_mat.cpp b/dnn/src/cuda/warp_perspective/backward_mat.cpp index 48964c2cbe29a859ef9f3b406e441ee598fe64d5..686858028cfb1bb3e6d9338e0b0b042a4cdfd9f8 100644 --- a/dnn/src/cuda/warp_perspective/backward_mat.cpp +++ b/dnn/src/cuda/warp_perspective/backward_mat.cpp @@ -6,7 +6,8 @@ * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. */ #include "src/cuda/warp_perspective/opr_impl.h" @@ -40,15 +41,17 @@ WorkspaceBundle WarpPerspectiveBackwardMatImpl::get_workspace_bundle( void WarpPerspectiveBackwardMatImpl::exec(_megdnn_tensor_in ssrc, _megdnn_tensor_in smat, + _megdnn_tensor_in smat_idx, _megdnn_tensor_in sdiff, _megdnn_tensor_out sgrad, _megdnn_workspace sworkspace) { - check_exec(ssrc.layout, smat.layout, sdiff.layout, sgrad.layout, - sworkspace.size); + check_exec(ssrc.layout, smat.layout, smat_idx.layout, sdiff.layout, + sgrad.layout, sworkspace.size); TensorND src = ssrc; TensorND mat = smat; TensorND diff = sdiff; TensorND grad = sgrad; + TensorND mat_idx = smat_idx; auto bundle = get_workspace_bundle(sworkspace.raw_ptr, ssrc.layout, smat.layout, sdiff.layout, sgrad.layout); auto ctypecvt = CompTypeCvter( @@ -64,6 +67,15 @@ void WarpPerspectiveBackwardMatImpl::exec(_megdnn_tensor_in ssrc, auto N = src.layout.shape[0], C = src.layout.shape[1], IH = src.layout.shape[2], IW = src.layout.shape[3], OH = diff.layout.shape[2], OW = diff.layout.shape[3]; + int* midx_ptr = nullptr; + if (mat_idx.raw_ptr) { + megdnn_assert(mat_idx.layout.ndim == 1); + N = mat_idx.layout.shape[0]; + midx_ptr = mat_idx.ptr(); + } else { + megdnn_assert(mat_idx.layout.ndim == 0); + } + auto bval = param().border_val; auto bmode = warp_perspective::get_bmode(param().bmode); @@ -71,7 +83,7 @@ void WarpPerspectiveBackwardMatImpl::exec(_megdnn_tensor_in ssrc, size_t max_batch_x_channel = max_batch_x_channel_size(); if (batch_x_channel_size <= max_batch_x_channel) { warp_perspective::backward_mat_proxy( - src.ptr(), mat.ptr(), + src.ptr(), mat.ptr(), midx_ptr, diff.ptr(), grad.ptr(), N, C, IH, IW, OH, OW, bval, bmode, stream); } else { @@ -84,14 +96,19 @@ void WarpPerspectiveBackwardMatImpl::exec(_megdnn_tensor_in ssrc, size_t curr_batch_size = N > max_batch_size ? max_batch_size : N; warp_perspective::backward_mat_proxy( - src_ptr, mat_ptr, diff_ptr, grad_ptr, curr_batch_size, - C, IH, IW, OH, OW, bval, bmode, stream); + src_ptr, mat_ptr, midx_ptr, diff_ptr, grad_ptr, + curr_batch_size, C, IH, IW, OH, OW, bval, bmode, + stream); if (N <= max_batch_size) { break; } else { N -= max_batch_size; - src_ptr += curr_batch_size * src.layout.stride[0]; + if (midx_ptr == nullptr) { + src_ptr += curr_batch_size * src.layout.stride[0]; + } else { + midx_ptr += curr_batch_size; + } mat_ptr += curr_batch_size * mat.layout.stride[0]; diff_ptr += curr_batch_size * diff.layout.stride[0]; grad_ptr += curr_batch_size * grad.layout.stride[0]; @@ -109,4 +126,3 @@ void WarpPerspectiveBackwardMatImpl::exec(_megdnn_tensor_in ssrc, } // namespace megdnn // vim: syntax=cpp.doxygen - diff --git a/dnn/src/cuda/warp_perspective/backward_mat.cu b/dnn/src/cuda/warp_perspective/backward_mat.cu index 9d072d434dcb46abf3b6e7c67ab58d7f36a4be94..147b0bded4b7a3c09decad1a1ca721a42f290397 100644 --- a/dnn/src/cuda/warp_perspective/backward_mat.cu +++ b/dnn/src/cuda/warp_perspective/backward_mat.cu @@ -6,7 +6,8 @@ * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. */ #include "src/cuda/warp_perspective/common.h" @@ -20,17 +21,21 @@ namespace cuda { namespace warp_perspective { template -__global__ void warp_perspective_bwd_mat_kernel(const float *hidden, - const float *in, const float *mat, float *grad, - int N, int C, int IH, int IW, int OH, int OW) -{ +__global__ void warp_perspective_bwd_mat_kernel( + const float* hidden, const float* in, const float* mat, const int* midx, + float* grad, int N, int C, int IH, int IW, int OH, int OW) { Getter getter; + int n = blockIdx.z; int ow = blockIdx.x * blockDim.x + threadIdx.x; int oh = blockIdx.y * blockDim.y + threadIdx.y; hidden += blockIdx.z * C*OH*OW; - in += blockIdx.z * C*IH*IW; - mat += blockIdx.z * 3*3; - grad += blockIdx.z * 3*3; + if (midx) { + in += midx[n] * C * IH * IW; + } else { + in += n * C * IH * IW; + } + mat += n * 3*3; + grad += n * 3*3; float grad_local[3*3]; memset(grad_local, 0, sizeof(grad_local)); if (ow < OW && oh < OH) { @@ -83,9 +88,8 @@ __global__ void warp_perspective_bwd_mat_kernel(const float *hidden, dh[8] = 1.0f * ddenominatorh; #pragma unroll for (int i = 0; i < 9; ++i) { - grad_local[i] += - hidden[oh*OW+ow] * dalpha * dh[i] + - hidden[oh*OW+ow] * dbeta * dw[i]; + grad_local[i] += hidden[oh * OW + ow] * dalpha * dh[i] + + hidden[oh * OW + ow] * dbeta * dw[i]; } hidden += OH*OW; in += IH*IW; @@ -125,17 +129,21 @@ __global__ void warp_perspective_bwd_mat_kernel(const float *hidden, } } -__global__ void warp_perspective_bwd_mat_constant_kernel(const float *hidden, - const float *in, const float *mat, float *grad, - int N, int C, int IH, int IW, int OH, int OW, float bval) -{ +__global__ void warp_perspective_bwd_mat_constant_kernel( + const float* hidden, const float* in, const float* mat, const int* midx, + float* grad, int N, int C, int IH, int IW, int OH, int OW, float bval) { + int n = blockIdx.z; int ow = blockIdx.x * blockDim.x + threadIdx.x; int oh = blockIdx.y * blockDim.y + threadIdx.y; - hidden += blockIdx.z * C*OH*OW; - in += blockIdx.z * C*IH*IW; - mat += blockIdx.z * 3*3; - grad += blockIdx.z * 3*3; - float grad_local[3*3]; + hidden += blockIdx.z * C * OH * OW; + if (midx) { + in += midx[n] * C * IH * IW; + } else { + in += n * C * IH * IW; + } + mat += n * 3 * 3; + grad += n * 3 * 3; + float grad_local[3 * 3]; memset(grad_local, 0, sizeof(grad_local)); if (ow < OW && oh < OH) { float numeratorw = mat[0]*ow + mat[1]*oh + mat[2]; @@ -199,10 +207,10 @@ __global__ void warp_perspective_bwd_mat_constant_kernel(const float *hidden, dh[8] = 1.0f * ddenominatorh; #pragma unroll for (int i = 0; i < 9; ++i) { - float delta = - hidden[oh*OW+ow] * dalpha * dh[i] + - hidden[oh*OW+ow] * dbeta * dw[i]; - if (isfinite(delta)) grad_local[i] += delta; + float delta = hidden[oh * OW + ow] * dalpha * dh[i] + + hidden[oh * OW + ow] * dbeta * dw[i]; + if (isfinite(delta)) + grad_local[i] += delta; } hidden += OH*OW; in += IH*IW; @@ -227,8 +235,9 @@ __global__ void warp_perspective_bwd_mat_constant_kernel(const float *hidden, for (int k = 16; k >= 1; k >>= 1) { if (tidx < k) { #pragma unroll - for (int i = 0; i < 9; ++i) - grad_shared[tidy][tidx][i] += grad_shared[tidy][tidx+k][i]; + for (int i = 0; i < 9; ++i) + grad_shared[tidy][tidx][i] += + grad_shared[tidy][tidx + k][i]; } cub::WARP_SYNC(0xffffffff); } @@ -240,18 +249,17 @@ __global__ void warp_perspective_bwd_mat_constant_kernel(const float *hidden, } } -void backward_mat_proxy(const float *src, const float *mat, - const float *diff, float *grad, - int N, int C, int IH, int IW, int OH, int OW, float bval, - BorderMode mode, cudaStream_t stream) -{ +void backward_mat_proxy(const float* src, const float* mat, const int* midx, + const float* diff, float* grad, int N, int C, int IH, + int IW, int OH, int OW, float bval, BorderMode mode, + cudaStream_t stream) { const int BY = 16, BX = 32; dim3 threads(BX, BY); dim3 blocks((OW+BX-1)/BX, (OH+BY-1)/BY, N); cuda_check(cudaMemsetAsync(grad, 0, sizeof(float) * N*3*3, stream)); -#define DISPATCH(Getter) \ +#define DISPATCH(Getter) \ warp_perspective_bwd_mat_kernel<<>>( \ - diff, src, mat, grad, N, C, IH, IW, OH, OW); + diff, src, mat, midx, grad, N, C, IH, IW, OH, OW); switch (mode) { case BORDER_REPLICATE: DISPATCH(ReplicateGetter); @@ -266,8 +274,9 @@ void backward_mat_proxy(const float *src, const float *mat, DISPATCH(WrapGetter); break; case BORDER_CONSTANT: - warp_perspective_bwd_mat_constant_kernel<<>>( - diff, src, mat, grad, N, C, IH, IW, OH, OW, bval); + warp_perspective_bwd_mat_constant_kernel<<>>( + diff, src, mat, midx, grad, N, C, IH, IW, OH, OW, bval); break; default: break; @@ -281,4 +290,3 @@ void backward_mat_proxy(const float *src, const float *mat, } // namespace megdnn // vim: syntax=cpp.doxygen - diff --git a/dnn/src/cuda/warp_perspective/common.h b/dnn/src/cuda/warp_perspective/common.h index deea5a0916f6573aae1618477b18c3ec26fe6919..1687238c490976c9ced87296194ba37133ab47f8 100644 --- a/dnn/src/cuda/warp_perspective/common.h +++ b/dnn/src/cuda/warp_perspective/common.h @@ -6,7 +6,8 @@ * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. */ #pragma once #include @@ -19,40 +20,34 @@ namespace warp_perspective { // all these kernels use bilinear interpolation -template -void forward_proxy( - bool is_nhwc, - const ctype *src, const float *mat, const int *mat_idx, - ctype *dst, int N_SRC, int N_MAT, - int C, int IH, int IW, int OH, int OW, ctype bval, - BorderMode bmode, - megcore::AsyncErrorInfo* error_info, void* error_tracker, - cudaStream_t stream); +template +void forward_proxy(bool is_nhwc, const ctype* src, const float* mat, + const int* mat_idx, ctype* dst, int N_SRC, int N_MAT, int C, + int IH, int IW, int OH, int OW, ctype bval, BorderMode bmode, + megcore::AsyncErrorInfo* error_info, void* error_tracker, + cudaStream_t stream); template -void forward_proxy_nchw4( - const ctype *src, const float *mat, const int *mat_idx, - ctype *dst, int N_SRC, int N_MAT, - int C, int IH, int IW, int OH, int OW, ctype bval, - BorderMode bmode, - megcore::AsyncErrorInfo* error_info, void* error_tracker, - cudaStream_t stream); - -void backward_data_proxy(const float *mat, const float *diff, float *grad, - float *workspace, - int N, int C, int IH, int IW, int OH, int OW, float bval, - BorderMode bmode, cudaStream_t stream); -size_t get_backward_data_workspace_in_bytes( - int N, int C, int IH, int IW, int OH, int OW, - BorderMode bmode); - -void backward_mat_proxy( - const float *src, const float *mat, const float *diff, float *grad, - int N, int C, int IH, int IW, int OH, int OW, float bval, - BorderMode bmode, cudaStream_t stream); - -} // namespace warp_perspective -} // namespace cuda -} // namespace megdnn +void forward_proxy_nchw4(const ctype* src, const float* mat, const int* mat_idx, + ctype* dst, int N_SRC, int N_MAT, int C, int IH, + int IW, int OH, int OW, ctype bval, BorderMode bmode, + megcore::AsyncErrorInfo* error_info, + void* error_tracker, cudaStream_t stream); + +void backward_data_proxy(const float* mat, const int* midx, const float* diff, + float* grad, float* workspace, int N, int N_SRC, int C, + int IH, int IW, int OH, int OW, float bval, + BorderMode bmode, cudaStream_t stream); +size_t get_backward_data_workspace_in_bytes(int N, int C, int IH, int IW, + int OH, int OW, BorderMode bmode); + +void backward_mat_proxy(const float* src, const float* mat, const int* midx, + const float* diff, float* grad, int N, int C, int IH, + int IW, int OH, int OW, float bval, BorderMode bmode, + cudaStream_t stream); + +} // namespace warp_perspective +} // namespace cuda +} // namespace megdnn // vim: syntax=cpp.doxygen diff --git a/dnn/src/cuda/warp_perspective/opr_impl.h b/dnn/src/cuda/warp_perspective/opr_impl.h index e4b36747ca0ff28fa27d80cfecd730788a18f8b0..35da891b8374bf8a031710299ce3a09130e70bda 100644 --- a/dnn/src/cuda/warp_perspective/opr_impl.h +++ b/dnn/src/cuda/warp_perspective/opr_impl.h @@ -6,7 +6,8 @@ * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. */ #pragma once #include "megdnn/oprs.h" @@ -48,20 +49,24 @@ class WarpPerspectiveBackwardDataImpl final : public WarpPerspectiveBackwardData { public: using WarpPerspectiveBackwardData::WarpPerspectiveBackwardData; - void exec(_megdnn_tensor_in mat, _megdnn_tensor_in diff, - _megdnn_tensor_out grad, _megdnn_workspace workspace) override; + void exec(_megdnn_tensor_in mat, _megdnn_tensor_in mat_idx, + _megdnn_tensor_in diff, _megdnn_tensor_out grad, + _megdnn_workspace workspace) override; size_t get_workspace_in_bytes(const TensorLayout& mat, + const TensorLayout& mat_idx, const TensorLayout& diff, const TensorLayout& grad) override { - return get_workspace_bundle(nullptr, mat, diff, grad) + return get_workspace_bundle(nullptr, mat, mat_idx, diff, grad) .total_size_in_bytes(); } private: WorkspaceBundle get_workspace_bundle(void* ptr, const TensorLayout& mat, + const TensorLayout& mat_idx, const TensorLayout& diff, const TensorLayout& grad) const; size_t get_float32_workspace_in_bytes(const TensorLayout& mat, + const TensorLayout& mat_idx, const TensorLayout& diff, const TensorLayout& grad) const; }; @@ -70,10 +75,11 @@ class WarpPerspectiveBackwardMatImpl final : public WarpPerspectiveBackwardMat { public: using WarpPerspectiveBackwardMat::WarpPerspectiveBackwardMat; void exec(_megdnn_tensor_in src, _megdnn_tensor_in mat, - _megdnn_tensor_in diff, _megdnn_tensor_out grad, - _megdnn_workspace workspace) override; + _megdnn_tensor_in mat_idx, _megdnn_tensor_in diff, + _megdnn_tensor_out grad, _megdnn_workspace workspace) override; size_t get_workspace_in_bytes(const TensorLayout& src, const TensorLayout& mat, + const TensorLayout& /* mat_idx */, const TensorLayout& diff, const TensorLayout& grad) override { return get_workspace_bundle(nullptr, src, mat, diff, grad) diff --git a/dnn/src/naive/warp_perspective/opr_impl.cpp b/dnn/src/naive/warp_perspective/opr_impl.cpp index 8958c180dc50ab2e6e573c056ba9f309ee220b68..ca46d19127f93a92a9d5463fd0141144cca1a8f4 100644 --- a/dnn/src/naive/warp_perspective/opr_impl.cpp +++ b/dnn/src/naive/warp_perspective/opr_impl.cpp @@ -6,7 +6,8 @@ * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. */ #include "src/naive/warp_perspective/opr_impl.h" #include "src/naive/warp_perspective/warp_perspective_cv.h" @@ -358,18 +359,29 @@ void WarpPerspectiveForwardImpl::exec(_megdnn_tensor_in src, } template -void WarpPerspectiveBackwardDataImpl::kern_naive(const KernParam& kern_param) { - const int N = kern_param.n, C = kern_param.c, - IH = kern_param.ih, IW = kern_param.iw; +void WarpPerspectiveBackwardDataImpl::kern_naive( + const KernParam& kern_param) { + const int N = kern_param.n_mat, C = kern_param.c, IH = kern_param.ih, + IW = kern_param.iw; const int OH = kern_param.oh, OW = kern_param.ow; const ctype* hptr_ = kern_param.hptr; const mtype* mptr_ = kern_param.mptr; ctype* sptr_ = kern_param.sptr; + int* midx_ptr = kern_param.midx_ptr; auto hptr = hptr_; auto mptr = mptr_; auto sptr = sptr_; - std::memset(sptr, 0, sizeof(ctype) * N * C * IH * IW); + if (midx_ptr) { + std::memset(sptr, 0, sizeof(ctype) * kern_param.n_src * C * IH * IW); + } else { + std::memset(sptr, 0, sizeof(ctype) * N * C * IH * IW); + } rep(n, N) { + if (midx_ptr) { + sptr = sptr_ + midx_ptr[n] * C * IH * IW; + } else { + sptr = sptr_ + n * C * IH * IW; + } rep(oh, OH) rep(ow, OW) { float numeratorw = mptr[0] * ow + mptr[1] * oh + mptr[2]; float numeratorh = mptr[3] * ow + mptr[4] * oh + mptr[5]; @@ -404,27 +416,30 @@ void WarpPerspectiveBackwardDataImpl::kern_naive(const KernParam& } } } - sptr += C * IH * IW; hptr += C * OH * OW; mptr += 3 * 3; } } void WarpPerspectiveBackwardDataImpl::exec(_megdnn_tensor_in mat, + _megdnn_tensor_in mat_idx, _megdnn_tensor_in diff, _megdnn_tensor_out grad, _megdnn_workspace workspace) { - check_exec(mat.layout, diff.layout, grad.layout, workspace.size); + check_exec(mat.layout, mat_idx.layout, diff.layout, grad.layout, + workspace.size); megdnn_assert(param().format == param::WarpPerspective::Format::NCHW, "invalid warp_perspective format"); #define DISPATCH_ST_MT(dt, ct) \ if (diff.layout.dtype.enumv() == DTypeTrait
::enumv) { \ if (mat.layout.dtype.enumv() == DTypeTrait::enumv) { \ - auto kparam = KernParam::from_tensors(mat, diff, grad); \ + auto kparam = KernParam::from_tensors(mat, mat_idx, \ + diff, grad); \ MEGDNN_DISPATCH_CPU_KERN_OPR(kern_naive(kparam)); \ return; \ } else { \ - auto kparam = KernParam::from_tensors(mat, diff, grad); \ + auto kparam = \ + KernParam::from_tensors(mat, mat_idx, diff, grad); \ MEGDNN_DISPATCH_CPU_KERN_OPR(kern_naive(kparam)); \ return; \ } \ @@ -441,7 +456,7 @@ void WarpPerspectiveBackwardDataImpl::exec(_megdnn_tensor_in mat, template void WarpPerspectiveBackwardMatImpl::kern_naive( const KernParam& kern_param) { - const int N = kern_param.n, C = kern_param.c, IH = kern_param.ih, + const int N = kern_param.n_mat, C = kern_param.c, IH = kern_param.ih, IW = kern_param.iw; const int OH = kern_param.oh, OW = kern_param.ow; @@ -449,9 +464,15 @@ void WarpPerspectiveBackwardMatImpl::kern_naive( auto sptr = kern_param.sptr; auto mptr = kern_param.mptr; auto res = kern_param.res; + auto midx_ptr = kern_param.midx_ptr; auto border_val = kern_param.border_val; std::memset(res, 0, sizeof(float) * N * 3 * 3); rep(n, N) { + if (midx_ptr) { + sptr = kern_param.sptr + midx_ptr[n] * C * IH * IW; + } else { + sptr = kern_param.sptr + n * C * IH * IW; + } rep(oh, OH) rep(ow, OW) { float numeratorw = mptr[0] * ow + mptr[1] * oh + mptr[2]; float numeratorh = mptr[3] * ow + mptr[4] * oh + mptr[5]; @@ -537,7 +558,6 @@ void WarpPerspectiveBackwardMatImpl::kern_naive( } } hptr += C * OH * OW; - sptr += C * IH * IW; mptr += 3 * 3; res += 3 * 3; } @@ -545,21 +565,22 @@ void WarpPerspectiveBackwardMatImpl::kern_naive( void WarpPerspectiveBackwardMatImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_in mat, + _megdnn_tensor_in mat_idx, _megdnn_tensor_in diff, _megdnn_tensor_out grad, _megdnn_workspace workspace) { - check_exec(src.layout, mat.layout, diff.layout, grad.layout, + check_exec(src.layout, mat.layout, mat_idx.layout, diff.layout, grad.layout, workspace.size); #define DISPATCH_ST_MT(dt, ct) \ if (src.layout.dtype.enumv() == DTypeTrait
::enumv) { \ if (mat.layout.dtype.enumv() == DTypeTrait::enumv) { \ auto kparam = KernParam::from_tensors( \ - param().border_val, src, mat, diff, grad); \ + param().border_val, src, mat, mat_idx, diff, grad); \ MEGDNN_DISPATCH_CPU_KERN_OPR(kern_naive(kparam)); \ return; \ } else { \ auto kparam = KernParam::from_tensors( \ - param().border_val, src, mat, diff, grad); \ + param().border_val, src, mat, mat_idx, diff, grad); \ MEGDNN_DISPATCH_CPU_KERN_OPR(kern_naive(kparam)); \ return; \ } \ diff --git a/dnn/src/naive/warp_perspective/opr_impl.h b/dnn/src/naive/warp_perspective/opr_impl.h index d2a5dc7f8f2dd1ad528e0833e0700947071f2cc2..94e3b396cbbf63d585a7b8301e3ba211a93f1964 100644 --- a/dnn/src/naive/warp_perspective/opr_impl.h +++ b/dnn/src/naive/warp_perspective/opr_impl.h @@ -6,7 +6,8 @@ * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. */ #pragma once #include "megdnn/oprs.h" @@ -15,144 +16,158 @@ namespace megdnn { namespace naive { -class WarpPerspectiveForwardImpl: public WarpPerspectiveForward { - protected: - using Format = Param::Format; - template - struct KernParam { - Format format; - BorderMode bmode; - float border_val; - size_t n_src, n_mat, c, ih, iw, oh, ow; - ctype *sptr, *dptr; - mtype *mptr; - int *midx_ptr; //!< can be null - Workspace workspace; - - static KernParam from_tensors( - Format format, BorderMode bmode, float border_val, - _megdnn_tensor_in src, _megdnn_tensor_in mat, - _megdnn_tensor_in mat_idx, _megdnn_tensor_out dst, - _megdnn_workspace workspace) { - KernParam ret; - ret.format = format; - ret.bmode = bmode; - ret.border_val = border_val; - ret.n_src = src.layout.shape[0]; - if (mat_idx.raw_ptr) { - megdnn_assert(mat_idx.layout.ndim == 1); - ret.n_mat = mat_idx.layout.shape[0]; - ret.midx_ptr = mat_idx.ptr(); - } else { - megdnn_assert(mat_idx.layout.ndim == 0); - ret.n_mat = ret.n_src; - ret.midx_ptr = nullptr; - } - if (format == Format::NCHW) { - ret.c = src.layout.shape[1]; - ret.ih = src.layout.shape[2]; - ret.iw = src.layout.shape[3]; - ret.oh = dst.layout.shape[2]; - ret.ow = dst.layout.shape[3]; - } else if (format == Format::NHWC) { - ret.c = src.layout.shape[3]; - ret.ih = src.layout.shape[1]; - ret.iw = src.layout.shape[2]; - ret.oh = dst.layout.shape[1]; - ret.ow = dst.layout.shape[2]; - } else if (format == Format::NCHW4) { - ret.c = src.layout.shape[1] * 4; - ret.ih = src.layout.shape[2]; - ret.iw = src.layout.shape[3]; - ret.oh = dst.layout.shape[2]; - ret.ow = dst.layout.shape[3]; - } else { - megdnn_assert(format == Format::NHWCD4); - ret.c = src.layout.shape[2] * 4; - ret.ih = src.layout.shape[1]; - ret.iw = src.layout.shape[3]; - ret.oh = dst.layout.shape[1]; - ret.ow = dst.layout.shape[3]; - } - if (src.layout.dtype.enumv() == DTypeEnum::Float32 || - MEGDNN_FLOAT16_SELECT( - (src.layout.dtype.enumv() == DTypeEnum::Float16 || - src.layout.dtype.enumv() == DTypeEnum::BFloat16), - false) || - src.layout.dtype.enumv() == DTypeEnum::Int8 || - src.layout.dtype.enumv() == DTypeEnum::Uint8 || - src.layout.dtype.enumv() == DTypeEnum::QuantizedS8 || - src.layout.dtype.enumv() == DTypeEnum::Quantized8Asymm) { - ret.sptr = src.compatible_ptr(); - ret.mptr = mat.ptr(); - ret.dptr = dst.compatible_ptr(); - } else if (src.layout.dtype.enumv() == DTypeEnum::QuantizedS8) { - ret.sptr = src.compatible_ptr(); - ret.mptr = mat.ptr(); - ret.dptr = dst.compatible_ptr(); - } else { - ret.sptr = nullptr; - ret.mptr = nullptr; - ret.dptr = nullptr; - } - ret.workspace = workspace; - return ret; +class WarpPerspectiveForwardImpl : public WarpPerspectiveForward { +protected: + using Format = Param::Format; + template + struct KernParam { + Format format; + BorderMode bmode; + float border_val; + size_t n_src, n_mat, c, ih, iw, oh, ow; + ctype *sptr, *dptr; + mtype* mptr; + int* midx_ptr; //!< can be null + Workspace workspace; + + static KernParam from_tensors(Format format, BorderMode bmode, + float border_val, _megdnn_tensor_in src, + _megdnn_tensor_in mat, + _megdnn_tensor_in mat_idx, + _megdnn_tensor_out dst, + _megdnn_workspace workspace) { + KernParam ret; + ret.format = format; + ret.bmode = bmode; + ret.border_val = border_val; + ret.n_src = src.layout.shape[0]; + if (mat_idx.raw_ptr) { + megdnn_assert(mat_idx.layout.ndim == 1); + ret.n_mat = mat_idx.layout.shape[0]; + ret.midx_ptr = mat_idx.ptr(); + } else { + megdnn_assert(mat_idx.layout.ndim == 0); + ret.n_mat = ret.n_src; + ret.midx_ptr = nullptr; + } + if (format == Format::NCHW) { + ret.c = src.layout.shape[1]; + ret.ih = src.layout.shape[2]; + ret.iw = src.layout.shape[3]; + ret.oh = dst.layout.shape[2]; + ret.ow = dst.layout.shape[3]; + } else if (format == Format::NHWC) { + ret.c = src.layout.shape[3]; + ret.ih = src.layout.shape[1]; + ret.iw = src.layout.shape[2]; + ret.oh = dst.layout.shape[1]; + ret.ow = dst.layout.shape[2]; + } else if (format == Format::NCHW4) { + ret.c = src.layout.shape[1] * 4; + ret.ih = src.layout.shape[2]; + ret.iw = src.layout.shape[3]; + ret.oh = dst.layout.shape[2]; + ret.ow = dst.layout.shape[3]; + } else { + megdnn_assert(format == Format::NHWCD4); + ret.c = src.layout.shape[2] * 4; + ret.ih = src.layout.shape[1]; + ret.iw = src.layout.shape[3]; + ret.oh = dst.layout.shape[1]; + ret.ow = dst.layout.shape[3]; + } + if (src.layout.dtype.enumv() == DTypeEnum::Float32 || + MEGDNN_FLOAT16_SELECT( + (src.layout.dtype.enumv() == DTypeEnum::Float16 || + src.layout.dtype.enumv() == DTypeEnum::BFloat16), + false) || + src.layout.dtype.enumv() == DTypeEnum::Int8 || + src.layout.dtype.enumv() == DTypeEnum::Uint8 || + src.layout.dtype.enumv() == DTypeEnum::QuantizedS8 || + src.layout.dtype.enumv() == DTypeEnum::Quantized8Asymm) { + ret.sptr = src.compatible_ptr(); + ret.mptr = mat.ptr(); + ret.dptr = dst.compatible_ptr(); + } else if (src.layout.dtype.enumv() == DTypeEnum::QuantizedS8) { + ret.sptr = src.compatible_ptr(); + ret.mptr = mat.ptr(); + ret.dptr = dst.compatible_ptr(); + } else { + ret.sptr = nullptr; + ret.mptr = nullptr; + ret.dptr = nullptr; } - }; - - // ctype: C type of input data type. - // mtype: C type of transformation matrix data type. - template - void kern_naive(const KernParam& kern_param, - size_t task_id); - - public: - using WarpPerspectiveForward::WarpPerspectiveForward; - void exec(_megdnn_tensor_in src, _megdnn_tensor_in mat, - _megdnn_tensor_in mat_idx, _megdnn_tensor_out dst, - _megdnn_workspace workspace) override; - size_t get_workspace_in_bytes(const TensorLayout&, const TensorLayout&, - const TensorLayout&, - const TensorLayout&) override { - return 0; + ret.workspace = workspace; + return ret; } + }; - private: - template - void kern_naive_nhwcd4(const KernParam& kern_param, - size_t task_id); + // ctype: C type of input data type. + // mtype: C type of transformation matrix data type. + template + void kern_naive(const KernParam& kern_param, size_t task_id); + +public: + using WarpPerspectiveForward::WarpPerspectiveForward; + void exec(_megdnn_tensor_in src, _megdnn_tensor_in mat, + _megdnn_tensor_in mat_idx, _megdnn_tensor_out dst, + _megdnn_workspace workspace) override; + size_t get_workspace_in_bytes(const TensorLayout&, const TensorLayout&, + const TensorLayout&, + const TensorLayout&) override { + return 0; + } + +private: + template + void kern_naive_nhwcd4(const KernParam& kern_param, + size_t task_id); }; class WarpPerspectiveBackwardDataImpl : public WarpPerspectiveBackwardData { protected: template struct KernParam { - size_t n, c, ih, iw, oh, ow; + size_t n_src, n_mat, c, ih, iw, oh, ow; ctype *sptr, *hptr; mtype* mptr; + int* midx_ptr; //!< can be null static KernParam from_tensors(_megdnn_tensor_in mat, + _megdnn_tensor_in mat_idx, _megdnn_tensor_in diff, _megdnn_tensor_out grad) { KernParam ret; - ret.n = grad.layout.shape[0], ret.c = grad.layout.shape[1], + ret.n_src = grad.layout.shape[0], ret.c = grad.layout.shape[1]; ret.ih = grad.layout.shape[2], ret.iw = grad.layout.shape[3]; ret.oh = diff.layout.shape[2], ret.ow = diff.layout.shape[3]; ret.hptr = diff.ptr(); ret.mptr = mat.ptr(); ret.sptr = grad.ptr(); + if (mat_idx.raw_ptr) { + megdnn_assert(mat_idx.layout.ndim == 1); + ret.n_mat = mat_idx.layout.shape[0]; + ret.midx_ptr = mat_idx.ptr(); + } else { + megdnn_assert(mat_idx.layout.ndim == 0); + ret.n_mat = ret.n_src; + ret.midx_ptr = nullptr; + } return ret; } }; public: using WarpPerspectiveBackwardData::WarpPerspectiveBackwardData; - void exec(_megdnn_tensor_in mat, _megdnn_tensor_in diff, - _megdnn_tensor_out grad, _megdnn_workspace workspace) override; + void exec(_megdnn_tensor_in mat, _megdnn_tensor_in mat_idx, + _megdnn_tensor_in diff, _megdnn_tensor_out grad, + _megdnn_workspace workspace) override; size_t get_workspace_in_bytes(const TensorLayout&, const TensorLayout&, + const TensorLayout&, const TensorLayout&) override { return 0; } + private: template void kern_naive(const KernParam& kern_param); @@ -162,23 +177,35 @@ class WarpPerspectiveBackwardMatImpl : public WarpPerspectiveBackwardMat { protected: template struct KernParam { - size_t n, c, ih, iw, oh, ow; + size_t n_src, n_mat, c, ih, iw, oh, ow; ctype *sptr, *hptr; - mtype* mptr, *res; + mtype *mptr, *res; + int* midx_ptr; //!< can be null float border_val; + static KernParam from_tensors(float border_val_, _megdnn_tensor_in src, _megdnn_tensor_in mat, + _megdnn_tensor_in mat_idx, _megdnn_tensor_in diff, _megdnn_tensor_out grad) { KernParam ret; ret.border_val = border_val_; - ret.n = src.layout.shape[0], ret.c = src.layout.shape[1], + ret.n_src = src.layout.shape[0], ret.c = src.layout.shape[1]; ret.ih = src.layout.shape[2], ret.iw = src.layout.shape[3]; ret.oh = diff.layout.shape[2], ret.ow = diff.layout.shape[3]; ret.hptr = diff.ptr(); ret.mptr = mat.ptr(); ret.sptr = src.ptr(); ret.res = grad.ptr(); + if (mat_idx.raw_ptr) { + megdnn_assert(mat_idx.layout.ndim == 1); + ret.n_mat = mat_idx.layout.shape[0]; + ret.midx_ptr = mat_idx.ptr(); + } else { + megdnn_assert(mat_idx.layout.ndim == 0); + ret.n_mat = ret.n_src; + ret.midx_ptr = nullptr; + } return ret; } }; @@ -186,10 +213,10 @@ protected: public: using WarpPerspectiveBackwardMat::WarpPerspectiveBackwardMat; void exec(_megdnn_tensor_in src, _megdnn_tensor_in mat, - _megdnn_tensor_in diff, _megdnn_tensor_out grad, - _megdnn_workspace workspace) override; + _megdnn_tensor_in mat_idx, _megdnn_tensor_in diff, + _megdnn_tensor_out grad, _megdnn_workspace workspace) override; size_t get_workspace_in_bytes(const TensorLayout&, const TensorLayout&, - const TensorLayout&, + const TensorLayout&, const TensorLayout&, const TensorLayout&) override { return 0; } diff --git a/dnn/test/common/warp_perspective.cpp b/dnn/test/common/warp_perspective.cpp index 23e45b80173f5bdf18af8465bb847a72cb4f4511..a8c6ec05258394e24843e1ad7f01228401a16d0f 100644 --- a/dnn/test/common/warp_perspective.cpp +++ b/dnn/test/common/warp_perspective.cpp @@ -6,7 +6,8 @@ * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. */ #include "test/common/warp_perspective.h" @@ -19,6 +20,10 @@ using namespace warp_perspective; void WarpPerspectiveMatIdxProxy::deduce_layout(WarpPerspective*, TensorLayoutArray&) {} +void WarpPerspectiveMatIdxProxy::deduce_layout(WarpPerspectiveBackwardData*, + TensorLayoutArray&) {} +void WarpPerspectiveMatIdxProxy::deduce_layout(WarpPerspectiveBackwardMat*, + TensorLayoutArray&) {} void WarpPerspectiveMatIdxProxy::exec(WarpPerspective* opr, const TensorNDArray& tensors) { @@ -31,6 +36,30 @@ void WarpPerspectiveMatIdxProxy::exec(WarpPerspective* opr, opr->exec(tensors[0], tensors[1], tensors[2], tensors[3], W.workspace()); } +void WarpPerspectiveMatIdxProxy::exec(WarpPerspectiveBackwardData* opr, + const TensorNDArray& tensors) { + if (!W.valid()) { + W = WorkspaceWrapper(opr->handle(), 0); + } + megdnn_assert(tensors.size() == 4); + W.update(opr->get_workspace_in_bytes(tensors[0].layout, tensors[1].layout, + tensors[2].layout, tensors[3].layout)); + opr->exec(tensors[0], tensors[1], tensors[2], tensors[3], W.workspace()); +} + +void WarpPerspectiveMatIdxProxy::exec(WarpPerspectiveBackwardMat* opr, + const TensorNDArray& tensors) { + if (!W.valid()) { + W = WorkspaceWrapper(opr->handle(), 0); + } + megdnn_assert(tensors.size() == 5); + W.update(opr->get_workspace_in_bytes(tensors[0].layout, tensors[1].layout, + tensors[2].layout, tensors[3].layout, + tensors[4].layout)); + opr->exec(tensors[0], tensors[1], tensors[2], tensors[3], tensors[4], + W.workspace()); +} + std::vector warp_perspective::get_cv_args() { std::vector args; @@ -101,10 +130,10 @@ void warp_perspective::run_mat_idx_test(Handle* handle) { // test NHWC param.format = WarpPerspective::Param::Format::NHWC; - checker.set_param(param) - .set_rng(2, &mat_idx_rng) - .set_epsilon(1e-1) - .set_dtype(2, dtype::Int32()); + checker.set_param(param) + .set_rng(2, &mat_idx_rng) + .set_epsilon(1e-1) + .set_dtype(2, dtype::Int32()); checker.execs({{N_SRC, 10, 11, 3}, {2, 3, 3}, {2}, {2, 11, 12, 3}}); } diff --git a/dnn/test/common/warp_perspective.h b/dnn/test/common/warp_perspective.h index 68931763386e63b5184a40f8439b172f248f87be..fd1635362d8cb00488a6281e4de54217a823cb12 100644 --- a/dnn/test/common/warp_perspective.h +++ b/dnn/test/common/warp_perspective.h @@ -22,7 +22,11 @@ namespace test { struct WarpPerspectiveMatIdxProxy { WorkspaceWrapper W; static void deduce_layout(WarpPerspective*, TensorLayoutArray&); + static void deduce_layout(WarpPerspectiveBackwardData*, TensorLayoutArray&); + static void deduce_layout(WarpPerspectiveBackwardMat*, TensorLayoutArray&); void exec(WarpPerspective* opr, const TensorNDArray& tensors); + void exec(WarpPerspectiveBackwardData* opr, const TensorNDArray& tensors); + void exec(WarpPerspectiveBackwardMat* opr, const TensorNDArray& tensors); }; class WarpPerspectiveMatRNG final : public IIDRNG { diff --git a/dnn/test/cuda/warp_perspective.cpp b/dnn/test/cuda/warp_perspective.cpp index 6d72943721e3e3ecc5570600fea2c61cd23cf9b7..2911f42ce648764f6b5511d857cccc34845459aa 100644 --- a/dnn/test/cuda/warp_perspective.cpp +++ b/dnn/test/cuda/warp_perspective.cpp @@ -6,7 +6,8 @@ * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. */ #include "test/cuda/fixture.h" @@ -21,10 +22,10 @@ namespace { using namespace megdnn; using namespace test; -class NanMatRNG: public RNG { - void gen(const TensorND &tensor_) override +class NanMatRNG : public RNG { + void gen(const TensorND& tensor_) override { - auto &gen = RandomState::generator(); + auto& gen = RandomState::generator(); std::uniform_real_distribution pdist3(1.9f, 2.1f); std::uniform_real_distribution pdist(0.9f, 1.1f); std::uniform_real_distribution pdisth(0.4f, 0.6f); @@ -32,7 +33,7 @@ class NanMatRNG: public RNG { std::uniform_real_distribution ndist3(-2.1f, -1.9f); std::uniform_real_distribution ndisth(-0.6f, -0.4f); std::uniform_int_distribution dice(0, 5); - float *ptr = tensor_.ptr(); + float* ptr = tensor_.ptr(); auto N = tensor_.layout.shape[0]; for (size_t n = 0; n < N; ++n) { for (size_t i = 0; i < 9; ++i) { @@ -65,7 +66,7 @@ class NanMatRNG: public RNG { } }; -} // anonymous namespace +} // anonymous namespace namespace megdnn { namespace test { @@ -171,17 +172,15 @@ TEST_F(CUDA, WARP_PERSPECTIVE_CV) { } #endif -TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD) -{ +TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD) { using Param = WarpPerspective::Param; Checker checker(handle_cuda()); WarpPerspectiveMatRNG rng; checker.set_rng(1, &rng); - for (auto bmode: {WarpPerspective::BorderMode::WRAP, - WarpPerspective::BorderMode::REFLECT, - WarpPerspective::BorderMode::REPLICATE, - WarpPerspective::BorderMode::CONSTANT}) - { + for (auto bmode : {WarpPerspective::BorderMode::WRAP, + WarpPerspective::BorderMode::REFLECT, + WarpPerspective::BorderMode::REPLICATE, + WarpPerspective::BorderMode::CONSTANT}) { WarpPerspective::Param param; param.border_val = 0.3f; param.bmode = bmode; @@ -204,8 +203,7 @@ TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD) // nan case NanMatRNG rng_nan; UniformFloatRNG rng_zero(0, 0); - for (auto rng: std::vector{&rng_nan, &rng_zero}) - { + for (auto rng : std::vector{&rng_nan, &rng_zero}) { param::WarpPerspective param; param.bmode = param::WarpPerspective::BorderMode::CONSTANT; param.imode = param::WarpPerspective::InterpolationMode::LINEAR; @@ -213,20 +211,18 @@ TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD) param.border_val = 1.737; checker.set_param(param); // no invalid mem access is enough; no need to check value - checker.set_expect_exec_fail([](){}); + checker.set_expect_exec_fail([]() {}); checker.exec({{1000, 2, 10, 11}, {1000, 3, 3}, {1000, 2, 12, 13}}); } } -TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_INTMAX) -{ +TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_INTMAX) { require_compute_capability(6, 0); using Param = WarpPerspective::Param; Checker checker(handle_cuda()); WarpPerspectiveMatRNG rng; checker.set_rng(1, &rng); - for (auto bmode: {WarpPerspective::BorderMode::REPLICATE}) - { + for (auto bmode : {WarpPerspective::BorderMode::REPLICATE}) { WarpPerspective::Param param; param.border_val = 0.3f; param.bmode = bmode; @@ -235,27 +231,24 @@ TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_INTMAX) param.format = Param::Format::NHWC; checker.set_param(param); checker.set_epsilon(0.15).set_max_avg_error(4e-2); - size_t n = (INT_MAX) / (512 * 512 * 3); - checker.execs( - {{n + 1, 512, 512, 3}, {n + 1, 3, 3}, {n + 1, 25, 25, 3}}); + size_t n = (INT_MAX) / (512 * 512 * 3); + checker.execs( + {{n + 1, 512, 512, 3}, {n + 1, 3, 3}, {n + 1, 25, 25, 3}}); } } - -TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_FP16) -{ +TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_FP16) { using Param = WarpPerspective::Param; Checker checker(handle_cuda()); WarpPerspectiveMatRNG rng; checker.set_rng(1, &rng); checker.set_dtype(0, dtype::Float16()) - .set_dtype(1, dtype::Float32()) - .set_dtype(2, dtype::Float16()); - for (auto bmode: {WarpPerspective::BorderMode::WRAP, - WarpPerspective::BorderMode::REFLECT, - WarpPerspective::BorderMode::REPLICATE, - WarpPerspective::BorderMode::CONSTANT}) - { + .set_dtype(1, dtype::Float32()) + .set_dtype(2, dtype::Float16()); + for (auto bmode : {WarpPerspective::BorderMode::WRAP, + WarpPerspective::BorderMode::REFLECT, + WarpPerspective::BorderMode::REPLICATE, + WarpPerspective::BorderMode::CONSTANT}) { WarpPerspective::Param param; param.border_val = 0.3f; param.bmode = bmode; @@ -278,8 +271,7 @@ TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_FP16) // nan case NanMatRNG rng_nan; UniformFloatRNG rng_zero(0, 0); - for (auto rng: std::vector{&rng_nan, &rng_zero}) - { + for (auto rng : std::vector{&rng_nan, &rng_zero}) { param::WarpPerspective param; param.bmode = param::WarpPerspective::BorderMode::CONSTANT; param.imode = param::WarpPerspective::InterpolationMode::LINEAR; @@ -287,13 +279,12 @@ TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_FP16) param.border_val = 1.737; checker.set_param(param); // no invalid mem access is enough; no need to check value - checker.set_expect_exec_fail([](){}); + checker.set_expect_exec_fail([]() {}); checker.exec({{1000, 2, 10, 11}, {1000, 3, 3}, {1000, 2, 12, 13}}); } } -TEST_F(CUDA, WARP_PERSPECTIVE_NCHW4) -{ +TEST_F(CUDA, WARP_PERSPECTIVE_NCHW4) { using Param = WarpPerspective::Param; WarpPerspective::Param param; Checker checker(handle_cuda()); @@ -348,31 +339,29 @@ TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_NCHW_INT8) { warp_perspective::run_int8_test(handle_cuda()); } -TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_DATA) -{ +TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_DATA) { Checker checker(handle_cuda()); WarpPerspectiveMatRNG rng; checker.set_rng(0, &rng); for (int i = 0; i < 1; ++i) { - for (auto bmode: {WarpPerspective::BorderMode::WRAP, - WarpPerspective::BorderMode::REFLECT, - WarpPerspective::BorderMode::REPLICATE, - WarpPerspective::BorderMode::CONSTANT}) - { + for (auto bmode : {WarpPerspective::BorderMode::WRAP, + WarpPerspective::BorderMode::REFLECT, + WarpPerspective::BorderMode::REPLICATE, + WarpPerspective::BorderMode::CONSTANT}) { WarpPerspective::Param param; param.border_val = 0.3f; param.bmode = bmode; param.imode = param::WarpPerspective::InterpolationMode::LINEAR; checker.set_param(param); checker.execs({{2, 3, 3}, {2, 3, 11, 12}, {2, 3, 10, 11}}); - checker.execs({{22000, 3, 3}, {22000, 3, 11, 12}, {22000, 3, 10, 11}}); + checker.execs( + {{22000, 3, 3}, {22000, 3, 11, 12}, {22000, 3, 10, 11}}); } } // nan case NanMatRNG rng_nan; UniformFloatRNG rng_zero(0, 0); - for (auto rng: std::vector{&rng_nan, &rng_zero}) - { + for (auto rng : std::vector{&rng_nan, &rng_zero}) { param::WarpPerspective param; param.bmode = param::WarpPerspective::BorderMode::CONSTANT; param.imode = param::WarpPerspective::InterpolationMode::LINEAR; @@ -380,39 +369,54 @@ TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_DATA) param.border_val = 1.737; checker.set_param(param); // no invalid mem access is enough; no need to check value - checker.set_expect_exec_fail([](){}); + checker.set_expect_exec_fail([]() {}); checker.exec({{1000, 3, 3}, {1000, 2, 10, 11}, {1000, 2, 12, 13}}); } + + { + Checker + checker(handle_cuda()); + constexpr int N_SRC = 5; + UniformIntRNG mat_idx_rng{0, N_SRC - 1}; + checker.set_rng(0, &rng); + checker.set_dtype(1, dtype::Int32()); + checker.set_rng(1, &mat_idx_rng); + param::WarpPerspective param; + param.bmode = param::WarpPerspective::BorderMode::REFLECT; + param.imode = param::WarpPerspective::InterpolationMode::LINEAR; + checker.set_param(param); + checker.set_epsilon(1 + 1e-3); + checker.execs({{2, 3, 3}, {2}, {2, 12, 11, 12}, {N_SRC, 12, 10, 11}}); + checker.execs( + {{123, 3, 3}, {123}, {123, 56, 16, 15}, {N_SRC, 56, 17, 13}}); + } } -TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_MAT) -{ +TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_MAT) { Checker checker(handle_cuda()); WarpPerspectiveMatRNG rng; checker.set_rng(1, &rng); for (int i = 0; i < 1; ++i) { - for (auto bmode: {WarpPerspective::BorderMode::WRAP, - WarpPerspective::BorderMode::REFLECT, - WarpPerspective::BorderMode::REPLICATE, - WarpPerspective::BorderMode::CONSTANT}) - { + for (auto bmode : {WarpPerspective::BorderMode::WRAP, + WarpPerspective::BorderMode::REFLECT, + WarpPerspective::BorderMode::REPLICATE, + WarpPerspective::BorderMode::CONSTANT}) { WarpPerspective::Param param; param.border_val = 0.3f; param.imode = param::WarpPerspective::InterpolationMode::LINEAR; param.bmode = bmode; checker.set_param(param); checker.set_epsilon(1e-2); - checker.execs({ - {1000, 3, 11, 12}, {1000, 3, 3}, - {1000, 3, 10, 11}, {1000, 3, 3} - }); + checker.execs({{1000, 3, 11, 12}, + {1000, 3, 3}, + {1000, 3, 10, 11}, + {1000, 3, 3}}); } } // nan case NanMatRNG rng_nan; UniformFloatRNG rng_zero(0, 0); - for (auto rng: std::vector{&rng_nan, &rng_zero}) - { + for (auto rng : std::vector{&rng_nan, &rng_zero}) { param::WarpPerspective param; param.bmode = param::WarpPerspective::BorderMode::CONSTANT; param.imode = param::WarpPerspective::InterpolationMode::LINEAR; @@ -420,26 +424,50 @@ TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_MAT) param.border_val = 1.737; checker.set_param(param); // no invalid mem access is enough; no need to check value - checker.set_expect_exec_fail([](){}); - checker.exec({{1000, 2, 10, 11}, {1000, 3, 3}, - {1000, 2, 12, 13}, {1000, 3, 3}}); + checker.set_expect_exec_fail([]() {}); + checker.exec({{1000, 2, 10, 11}, + {1000, 3, 3}, + {1000, 2, 12, 13}, + {1000, 3, 3}}); + } + { + Checker checker( + handle_cuda()); + constexpr int N_SRC = 5; + UniformIntRNG mat_idx_rng{0, N_SRC - 1}; + checker.set_rng(1, &rng); + checker.set_dtype(2, dtype::Int32()); + checker.set_rng(2, &mat_idx_rng); + param::WarpPerspective param; + param.bmode = param::WarpPerspective::BorderMode::REFLECT; + param.imode = param::WarpPerspective::InterpolationMode::LINEAR; + checker.set_param(param); + checker.set_epsilon(1 + 1e-3); + checker.execs({{N_SRC, 12, 10, 11}, + {2, 3, 3}, + {2}, + {2, 12, 11, 12}, + {2, 3, 3}}); + checker.execs({{N_SRC, 56, 17, 13}, + {123, 3, 3}, + {123}, + {123, 56, 16, 15}, + {123, 3, 3}}); } } -TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_BFLOAT16) -{ +TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_BFLOAT16) { using Param = WarpPerspective::Param; Checker checker(handle_cuda()); WarpPerspectiveMatRNG rng; checker.set_rng(1, &rng); checker.set_dtype(0, dtype::BFloat16()) - .set_dtype(1, dtype::Float32()) - .set_dtype(2, dtype::BFloat16()); - for (auto bmode: {WarpPerspective::BorderMode::WRAP, - WarpPerspective::BorderMode::REFLECT, - WarpPerspective::BorderMode::REPLICATE, - WarpPerspective::BorderMode::CONSTANT}) - { + .set_dtype(1, dtype::Float32()) + .set_dtype(2, dtype::BFloat16()); + for (auto bmode : {WarpPerspective::BorderMode::WRAP, + WarpPerspective::BorderMode::REFLECT, + WarpPerspective::BorderMode::REPLICATE, + WarpPerspective::BorderMode::CONSTANT}) { WarpPerspective::Param param; param.border_val = 0.3f; param.bmode = bmode; @@ -457,21 +485,19 @@ TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_BFLOAT16) } } -TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_DATA_BFLOAT16) -{ +TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_DATA_BFLOAT16) { Checker checker(handle_cuda()); WarpPerspectiveMatRNG rng; checker.set_rng(0, &rng) - .set_epsilon(1e-1) - .set_dtype(0, dtype::Float32()) - .set_dtype(1, dtype::BFloat16()) - .set_dtype(2, dtype::BFloat16()); + .set_epsilon(1e-1) + .set_dtype(0, dtype::Float32()) + .set_dtype(1, dtype::BFloat16()) + .set_dtype(2, dtype::BFloat16()); for (int i = 0; i < 1; ++i) { - for (auto bmode: {WarpPerspective::BorderMode::WRAP, - WarpPerspective::BorderMode::REFLECT, - WarpPerspective::BorderMode::REPLICATE, - WarpPerspective::BorderMode::CONSTANT}) - { + for (auto bmode : {WarpPerspective::BorderMode::WRAP, + WarpPerspective::BorderMode::REFLECT, + WarpPerspective::BorderMode::REPLICATE, + WarpPerspective::BorderMode::CONSTANT}) { WarpPerspective::Param param; param.border_val = 0.3f; param.bmode = bmode; @@ -482,31 +508,29 @@ TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_DATA_BFLOAT16) } } -TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_MAT_BFLOAT16) -{ +TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_MAT_BFLOAT16) { Checker checker(handle_cuda()); WarpPerspectiveMatRNG rng; checker.set_rng(1, &rng) - .set_epsilon(1e-2) - .set_dtype(0, dtype::BFloat16()) - .set_dtype(1, dtype::Float32()) - .set_dtype(2, dtype::BFloat16()) - .set_dtype(3, dtype::Float32()); + .set_epsilon(1e-2) + .set_dtype(0, dtype::BFloat16()) + .set_dtype(1, dtype::Float32()) + .set_dtype(2, dtype::BFloat16()) + .set_dtype(3, dtype::Float32()); for (int i = 0; i < 1; ++i) { - for (auto bmode: {WarpPerspective::BorderMode::WRAP, - WarpPerspective::BorderMode::REFLECT, - WarpPerspective::BorderMode::REPLICATE, - WarpPerspective::BorderMode::CONSTANT}) - { + for (auto bmode : {WarpPerspective::BorderMode::WRAP, + WarpPerspective::BorderMode::REFLECT, + WarpPerspective::BorderMode::REPLICATE, + WarpPerspective::BorderMode::CONSTANT}) { WarpPerspective::Param param; param.border_val = 0.3f; param.imode = param::WarpPerspective::InterpolationMode::LINEAR; param.bmode = bmode; checker.set_param(param); - checker.execs({ - {1000, 3, 11, 12}, {1000, 3, 3}, - {1000, 3, 10, 11}, {1000, 3, 3} - }); + checker.execs({{1000, 3, 11, 12}, + {1000, 3, 3}, + {1000, 3, 10, 11}, + {1000, 3, 3}}); } } } @@ -549,14 +573,14 @@ TEST_F(CUDA, BENCHMARK_WARP_PERSPECTIVE_NCHW4) { benchmarker.set_dtype(0, dtype::QuantizedS8(1.0f)); benchmarker.set_dtype(2, dtype::QuantizedS8(1.0f)); run({TensorShape{1, 25, 256, 256, 4}, {1, 3, 3}, {1, 25, 256, 5120, 4}}); - run({TensorShape{1, 25, 256, 5120, 4}, {1, 3, 3}, {1,25, 256, 256, 4}}); + run({TensorShape{1, 25, 256, 5120, 4}, {1, 3, 3}, {1, 25, 256, 256, 4}}); run({TensorShape{1, 25, 256, 256, 4}, {1, 3, 3}, {1, 25, 512, 512, 4}}); run({TensorShape{1, 25, 512, 512, 4}, {1, 3, 3}, {1, 25, 256, 256, 4}}); } #endif -} // namespace test -} // namespace megdnn +} // namespace test +} // namespace megdnn // vim: syntax=cpp.doxygen diff --git a/src/opr/impl/imgproc.cpp b/src/opr/impl/imgproc.cpp index 0166e67fcb0ea57c6907ee62e3ce89274d895117..33fb1e23ac942af1b710f32ad3f76167e09e9676 100644 --- a/src/opr/impl/imgproc.cpp +++ b/src/opr/impl/imgproc.cpp @@ -6,18 +6,18 @@ * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. */ -#include "./internal/megdnn_opr_wrapper.inl" #include "megbrain/opr/imgproc.h" -#include "megbrain/opr/utility.h" +#include "./internal/megdnn_opr_wrapper.inl" #include "megbrain/graph/grad_impl.h" +#include "megbrain/opr/utility.h" using namespace mgb; using namespace opr; - /* ======================= WarpPerspectiveForward ======================= */ MGB_DYN_TYPE_OBJ_FINAL_IMPL(WarpPerspectiveForward); @@ -54,8 +54,7 @@ void WarpPerspectiveForward::add_input_layout_constraint() { } void WarpPerspectiveForward::outshape_by_symvar_do_get_output_shape( - TensorShape &dest, const ShapeInferInfo &shpinfo) { - + TensorShape& dest, const ShapeInferInfo& shpinfo) { TensorShape oshp2d; cg::copy_tensor_value_to_shape(oshp2d, *shpinfo.shpval_inp_val.at(0)); auto imgshp = shpinfo.shape_inp_shp.at(0), @@ -112,8 +111,8 @@ void WarpPerspectiveForward::scn_do_execute() { } size_t WarpPerspectiveForward::get_workspace_size_bytes( - const TensorShapeArray &input_shapes, - const TensorShapeArray &output_shapes) const { + const TensorShapeArray& input_shapes, + const TensorShapeArray& output_shapes) const { if (input().size() == 3) { return intl::_MegDNNOprMethInvoker<2, 1>::get_workspace_in_bytes( megdnn_opr(), this, input_shapes, output_shapes); @@ -129,19 +128,34 @@ void WarpPerspectiveForward::record_execute_deps(ExecDependencyArray& deps) { #ifdef MGB_ENABLE_GRAD MGB_IMPL_OPR_GRAD(WarpPerspectiveForward) { - mgb_assert(opr.input().size() == 3, - "backward with mat_idx is currently unsupported"); + if (opr.input().size() == 4) { + if (wrt_idx == 0) { + // wrt data + SymbolVar grad = WarpPerspectiveBackwardData::make( + opr.input(1), opr.input(2), out_grad[0], opr.input(0), + opr.param()); + return grad.node(); + } else if (wrt_idx == 1) { + // wrt mat + SymbolVar grad = WarpPerspectiveBackwardMat::make( + opr.input(0), opr.input(1), opr.input(2), out_grad[0], + opr.param()); + return grad.node(); + } else { + return InvalidGrad::make(opr, wrt_idx); + } + } + + mgb_assert(opr.input().size() == 3); if (wrt_idx == 0) { // wrt data SymbolVar grad = WarpPerspectiveBackwardData::make( - opr.input(1), out_grad[0], opr.input(0), - opr.param()); + opr.input(1), out_grad[0], opr.input(0), opr.param()); return grad.node(); - } else if (wrt_idx == 1){ + } else if (wrt_idx == 1) { // wrt mat SymbolVar grad = WarpPerspectiveBackwardMat::make( - opr.input(0), opr.input(1), out_grad[0], - opr.param()); + opr.input(0), opr.input(1), out_grad[0], opr.param()); return grad.node(); } else return InvalidGrad::make(opr, wrt_idx); @@ -151,14 +165,116 @@ MGB_IMPL_OPR_GRAD(WarpPerspectiveForward) { /* ====================== WarpPerspectiveBackwardData ====================== */ MGB_DYN_TYPE_OBJ_FINAL_IMPL(WarpPerspectiveBackwardData); -MEGDNN_OPR_INIT3(WarpPerspectiveBackwardData, "warp_perspective_bwd_data", - 2, false); + +WarpPerspectiveBackwardData::WarpPerspectiveBackwardData( + VarNode* mat, VarNode* out_diff, VarNode* in_for_shape, + const Param& param, const OperatorNodeConfig& config) + : Super(OperatorNodeBaseCtorParam{mat->owner_graph(), + config, + "warp_perspective_bwd_data", + {mat}}, + 2, false) { + init_megdnn_opr(*this, param); + add_input({mat, out_diff, in_for_shape}); + intl::MegDNNOprInitPostCtor::apply(*this); +} + +WarpPerspectiveBackwardData::WarpPerspectiveBackwardData( + VarNode* mat, VarNode* mat_idx, VarNode* out_diff, + VarNode* in_for_shape, const Param& param, + const OperatorNodeConfig& config) + : Super(OperatorNodeBaseCtorParam{mat->owner_graph(), + config, + "warp_perspective_bwd_data", + {mat, mat_idx}}, + 3, false) { + init_megdnn_opr(*this, param); + add_input({mat, mat_idx, out_diff, in_for_shape}); + intl::MegDNNOprInitPostCtor::apply(*this); +} + +SymbolVar WarpPerspectiveBackwardData::make(SymbolVar i0, SymbolVar i1, + SymbolVar i2, const Param& param, + const OperatorNodeConfig& config) { + intl::MegDNNOprInitInputsModifier::apply( + param, {&i0, &i1, &i2}); + return i0.insert_single_output_opr( + i0.node(), i1.node(), i2.node(), param, config); +} + +SymbolVar WarpPerspectiveBackwardData::make(SymbolVar i0, SymbolVar i1, + SymbolVar i2, SymbolVar i3, + const Param& param, + const OperatorNodeConfig& config) { + intl::MegDNNOprInitInputsModifier::apply( + param, {&i0, &i1, &i2, &i3}); + return i0.insert_single_output_opr( + i0.node(), i1.node(), i2.node(), i3.node(), param, config); +} + +void WarpPerspectiveBackwardData::scn_do_execute() { + if (input().size() == 3) { + megdnn_opr()->exec(input(0)->dev_tensor().as_megdnn(), + input(1)->dev_tensor().as_megdnn(), + output(0)->dev_tensor().as_megdnn(), + intl::get_megdnn_workspace_from_var(output(1))); + } else { + mgb_assert(input().size() == 4); + megdnn_opr()->exec(input(0)->dev_tensor().as_megdnn(), + input(1)->dev_tensor().as_megdnn(), + input(2)->dev_tensor().as_megdnn(), + output(0)->dev_tensor().as_megdnn(), + intl::get_megdnn_workspace_from_var(output(1))); + } +} /* ====================== WarpPerspectiveBackwardMat ====================== */ MGB_DYN_TYPE_OBJ_FINAL_IMPL(WarpPerspectiveBackwardMat); -MEGDNN_OPR_INIT3(WarpPerspectiveBackwardMat, "warp_perspective_bwd_mat", - 1, true); + +WarpPerspectiveBackwardMat::WarpPerspectiveBackwardMat( + VarNode* src, VarNode* mat, VarNode* mat_idx, VarNode* out_diff, + const Param& param, const OperatorNodeConfig& config) + : Super(OperatorNodeBaseCtorParam{src->owner_graph(), + config, + "warp_perspective_bwd_mat", + {src, mat, mat_idx}}, + 1, true) { + init_megdnn_opr(*this, param); + if (mat_idx) { + add_input({src, mat, mat_idx, out_diff}); + } else { + add_input({src, mat, out_diff}); + } + intl::MegDNNOprInitPostCtor::apply(*this); +} + +void WarpPerspectiveBackwardMat::scn_do_execute() { + if (input().size() == 3) { + megdnn_opr()->exec(input(0)->dev_tensor().as_megdnn(), + input(1)->dev_tensor().as_megdnn(), + input(2)->dev_tensor().as_megdnn(), + output(0)->dev_tensor().as_megdnn(), + intl::get_megdnn_workspace_from_var(output(1))); + } else { + mgb_assert(input().size() == 4); + megdnn_opr()->exec(input(0)->dev_tensor().as_megdnn(), + input(1)->dev_tensor().as_megdnn(), + input(2)->dev_tensor().as_megdnn(), + input(3)->dev_tensor().as_megdnn(), + output(0)->dev_tensor().as_megdnn(), + intl::get_megdnn_workspace_from_var(output(1))); + } +} + +SymbolVar WarpPerspectiveBackwardMat::make( + SymbolVar i0, SymbolVar i1, SymbolVar i2, SymbolVar i3, + const Param& param, const OperatorNodeConfig& config) { + intl::MegDNNOprInitInputsModifier::apply( + param, {&i0, &i1, &i2, &i3}); + return i0.insert_single_output_opr( + i0.node(), i1.node(), i2.node(), i3.node(), param, config); +} /* ====================== Cv operator ====================== */ @@ -188,8 +304,7 @@ void ResizeForward::add_input_layout_constraint() { } void ResizeForward::outshape_by_symvar_do_get_output_shape( - TensorShape &dest, const ShapeInferInfo &shpinfo) { - + TensorShape& dest, const ShapeInferInfo& shpinfo) { TensorShape oshp2d; cg::copy_tensor_value_to_shape(oshp2d, *shpinfo.shpval_inp_val.at(0)); auto imgshp = shpinfo.shape_inp_shp.at(0); @@ -232,7 +347,7 @@ size_t ResizeForward::get_workspace_size_bytes( megdnn_opr(), this, input_shapes, output_shapes); } -void ResizeForward::record_execute_deps(ExecDependencyArray &deps) { +void ResizeForward::record_execute_deps(ExecDependencyArray& deps) { record_megdnn_opr(deps); } @@ -268,19 +383,17 @@ void WarpAffineForward::add_input_layout_constraint() { } void WarpAffineForward::outshape_by_symvar_do_get_output_shape( - TensorShape &dest, const ShapeInferInfo &shpinfo) { - + TensorShape& dest, const ShapeInferInfo& shpinfo) { TensorShape oshp2d; cg::copy_tensor_value_to_shape(oshp2d, *shpinfo.shpval_inp_val.at(0)); auto imgshp = shpinfo.shape_inp_shp.at(0), matshp = shpinfo.shape_inp_shp.at(1); - mgb_assert( - (imgshp.ndim == 4 || imgshp.ndim == 5) && matshp.ndim == 3 && oshp2d.ndim == 2 && - matshp.shape[0] == imgshp.shape[0] && - matshp.shape[1] == 2 && matshp.shape[2] == 3, - "shape mismatch for WarpAffineForward: img=%s mat=%s out2d=%s", - imgshp.to_string().c_str(), matshp.to_string().c_str(), - oshp2d.to_string().c_str()); + mgb_assert((imgshp.ndim == 4 || imgshp.ndim == 5) && matshp.ndim == 3 && + oshp2d.ndim == 2 && matshp.shape[0] == imgshp.shape[0] && + matshp.shape[1] == 2 && matshp.shape[2] == 3, + "shape mismatch for WarpAffineForward: img=%s mat=%s out2d=%s", + imgshp.to_string().c_str(), matshp.to_string().c_str(), + oshp2d.to_string().c_str()); size_t height_idx = 0; if (param().format == Param::Format::NCHW) { @@ -305,18 +418,19 @@ void WarpAffineForward::init_output_static_infer_desc() { } void WarpAffineForward::scn_do_execute() { - intl::MegDNNOprMethInvoker:: - exec(megdnn_opr(), this); + intl::MegDNNOprMethInvoker::exec(megdnn_opr(), this); } size_t WarpAffineForward::get_workspace_size_bytes( - const TensorShapeArray &input_shapes, - const TensorShapeArray &output_shapes) const { - return intl::MegDNNOprMethInvoker:: - get_workspace_in_bytes(megdnn_opr(), this, input_shapes, output_shapes); + const TensorShapeArray& input_shapes, + const TensorShapeArray& output_shapes) const { + return intl::MegDNNOprMethInvoker< + megdnn::WarpAffine>::get_workspace_in_bytes(megdnn_opr(), this, + input_shapes, + output_shapes); } -void WarpAffineForward::record_execute_deps(ExecDependencyArray &deps) { +void WarpAffineForward::record_execute_deps(ExecDependencyArray& deps) { record_megdnn_opr(deps); } @@ -325,7 +439,7 @@ void WarpAffineForward::record_execute_deps(ExecDependencyArray &deps) { MGB_DYN_TYPE_OBJ_FINAL_IMPL(RemapForward); MEGDNN_OPR_INIT2(RemapForward, "remap") -void RemapForward::init_output_dtype(){ +void RemapForward::init_output_dtype() { output(0)->dtype(input(0)->dtype()); } diff --git a/src/opr/impl/imgproc.sereg.h b/src/opr/impl/imgproc.sereg.h index 0b7e3bd64d697a1dcac4249cc44d974d006b47cd..949a97403b08af055ce2ac08305cd8b193d9b216 100644 --- a/src/opr/impl/imgproc.sereg.h +++ b/src/opr/impl/imgproc.sereg.h @@ -37,13 +37,59 @@ namespace serialization { } } }; + + template<> + struct OprMaker { + using Opr = opr::WarpPerspectiveBackwardData; + using Param = Opr::Param; + static cg::OperatorNodeBase* make(const Param& param, + const cg::VarNodeArray& inputs, + ComputingGraph& graph, + const OperatorNodeConfig& config) { + MGB_MARK_USED_VAR(graph); + if (inputs.size() == 3) { + return Opr::make(inputs[0], inputs[1], inputs[2], param, config) + .node() + ->owner_opr(); + } else { + mgb_assert(inputs.size() == 4); + return Opr::make(inputs[0], inputs[1], inputs[2], inputs[3], + param, config) + .node() + ->owner_opr(); + } + } + }; + + template<> + struct OprMaker { + using Opr = opr::WarpPerspectiveBackwardMat; + using Param = Opr::Param; + static cg::OperatorNodeBase* make(const Param& param, + const cg::VarNodeArray& inputs, + ComputingGraph& graph, + const OperatorNodeConfig& config) { + MGB_MARK_USED_VAR(graph); + if (inputs.size() == 3) { + return Opr::make(inputs[0], inputs[1], inputs[2], param, config) + .node() + ->owner_opr(); + } else { + mgb_assert(inputs.size() == 4); + return Opr::make(inputs[0], inputs[1], inputs[2], inputs[3], + param, config) + .node() + ->owner_opr(); + } + } + }; } // namespace serialization namespace opr { MGB_SEREG_OPR(WarpPerspective, 0); - MGB_SEREG_OPR(WarpPerspectiveBackwardData, 3); - MGB_SEREG_OPR(WarpPerspectiveBackwardMat, 3); + MGB_SEREG_OPR(WarpPerspectiveBackwardData, 0); + MGB_SEREG_OPR(WarpPerspectiveBackwardMat, 0); MGB_SEREG_OPR(Rotate, 1); MGB_SEREG_OPR(CvtColor, 1); diff --git a/src/opr/include/megbrain/opr/imgproc.h b/src/opr/include/megbrain/opr/imgproc.h index 18c4a03e5ad5868498c1d8faeb7471dcfe67831d..611d7bc1d590d4541f78a78250bb8c59e8a03bb1 100644 --- a/src/opr/include/megbrain/opr/imgproc.h +++ b/src/opr/include/megbrain/opr/imgproc.h @@ -6,7 +6,8 @@ * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. */ #pragma once @@ -33,77 +34,93 @@ namespace opr { * Impl note: this operator might have 3 or 4 inputs depending on whether * \p mat_idx is given */ -MGB_DEFINE_OPR_CLASS(WarpPerspectiveForward, +MGB_DEFINE_OPR_CLASS( + WarpPerspectiveForward, intl::WorkspaceSizeInfer< - intl::OutshapeBySymvarSCNOpr>>) // { - public: - WarpPerspectiveForward( - VarNode *in_tensor, VarNode *mat, VarNode *mat_idx, - VarNode *out_shape, - const Param ¶m, - const OperatorNodeConfig &config); - - static SymbolVar make(SymbolVar in_tensor, - SymbolVar mat, SymbolVar mat_idx, SymbolVar out_shape, - const Param ¶m = {}, - const OperatorNodeConfig &config = {}); - - static SymbolVar make(SymbolVar in_tensor, SymbolVar mat, - SymbolVar out_shape, - const Param ¶m = {}, - const OperatorNodeConfig &config = {}) { - return make(in_tensor, mat, SymbolVar{}, out_shape, param, config); - } - - static SymbolVar make(SymbolVar in_tensor, SymbolVar mat, - const TensorShape &out_shape, - const Param ¶m = {}, - const OperatorNodeConfig &config = {}) - { - return make(in_tensor, mat, - cg::var_from_tensor_shape( - in_tensor, out_shape), param, config); - } - - private: - void init_output_dtype() override; - void add_input_layout_constraint() override; - void init_output_static_infer_desc() override; - void outshape_by_symvar_do_get_output_shape( - TensorShape &dest, const ShapeInferInfo &shpinfo) override; - - void scn_do_execute() override; - size_t get_workspace_size_bytes( - const TensorShapeArray &input_shapes, - const TensorShapeArray &output_shapes) const override; - - void record_execute_deps(ExecDependencyArray& deps) override; -}; + intl::OutshapeBySymvarSCNOpr>>) // { +public: +WarpPerspectiveForward(VarNode* in_tensor, VarNode* mat, VarNode* mat_idx, + VarNode* out_shape, const Param& param, + const OperatorNodeConfig& config); + +static SymbolVar make(SymbolVar in_tensor, SymbolVar mat, SymbolVar mat_idx, + SymbolVar out_shape, const Param& param = {}, + const OperatorNodeConfig& config = {}); + +static SymbolVar make(SymbolVar in_tensor, SymbolVar mat, SymbolVar out_shape, + const Param& param = {}, + const OperatorNodeConfig& config = {}) { + return make(in_tensor, mat, SymbolVar{}, out_shape, param, config); +} + +static SymbolVar make(SymbolVar in_tensor, SymbolVar mat, + const TensorShape& out_shape, const Param& param = {}, + const OperatorNodeConfig& config = {}) { + return make(in_tensor, mat, cg::var_from_tensor_shape(in_tensor, out_shape), + param, config); +} + +private: +void init_output_dtype() override; +void add_input_layout_constraint() override; +void init_output_static_infer_desc() override; +void outshape_by_symvar_do_get_output_shape( + TensorShape& dest, const ShapeInferInfo& shpinfo) override; + +void scn_do_execute() override; +size_t get_workspace_size_bytes( + const TensorShapeArray& input_shapes, + const TensorShapeArray& output_shapes) const override; + +void record_execute_deps(ExecDependencyArray& deps) override; +}; // namespace opr using WarpPerspective = WarpPerspectiveForward; -MGB_DEFINE_OPR_CLASS(WarpPerspectiveBackwardData, - intl::MegDNNOprWrapperBwd) // { - public: - WarpPerspectiveBackwardData(VarNode *mat, VarNode *out_diff, - VarNode *in_for_shape, const Param ¶m, - const OperatorNodeConfig &config); - - static SymbolVar make(SymbolVar mat, SymbolVar out_diff, - SymbolVar in_for_shape, const Param ¶m = {}, - const OperatorNodeConfig &config = {}); -}; - -MGB_DEFINE_OPR_CLASS(WarpPerspectiveBackwardMat, - intl::MegDNNOprWrapperBwd) // { - public: - WarpPerspectiveBackwardMat( - VarNode *src, VarNode *mat, VarNode *out_diff, - const Param ¶m, const OperatorNodeConfig &config); - static SymbolVar make( - SymbolVar src, SymbolVar mat, SymbolVar out_diff, - const Param ¶m = {}, const OperatorNodeConfig &config = {}); -}; +MGB_DEFINE_OPR_CLASS( + WarpPerspectiveBackwardData, + intl::MegDNNOprWrapperBwd) // { +public: +WarpPerspectiveBackwardData(VarNode* mat, VarNode* out_diff, + VarNode* in_for_shape, const Param& param, + const OperatorNodeConfig& config); + +WarpPerspectiveBackwardData(VarNode* mat, VarNode* mat_idx, VarNode* out_diff, + VarNode* in_for_shape, const Param& param, + const OperatorNodeConfig& config); + +static SymbolVar make(SymbolVar mat, SymbolVar out_diff, SymbolVar in_for_shape, + const Param& param = {}, + const OperatorNodeConfig& config = {}); + +static SymbolVar make(SymbolVar mat, SymbolVar mat_idx, SymbolVar out_diff, + SymbolVar in_for_shape, const Param& param = {}, + const OperatorNodeConfig& config = {}); + +void scn_do_execute() override; +}; // namespace mgb + +MGB_DEFINE_OPR_CLASS( + WarpPerspectiveBackwardMat, + intl::MegDNNOprWrapperBwd) // { +public: +WarpPerspectiveBackwardMat(VarNode* src, VarNode* mat, VarNode* mat_idx, + VarNode* out_diff, const Param& param, + const OperatorNodeConfig& config); + +static SymbolVar make(SymbolVar src, SymbolVar mat, SymbolVar out_diff, + const Param& param = {}, + const OperatorNodeConfig& config = {}) { + return make(src, mat, {}, out_diff, param, config); +} + +static SymbolVar make(SymbolVar src, SymbolVar mat, SymbolVar mat_idx, + SymbolVar out_diff, const Param& param = {}, + const OperatorNodeConfig& config = {}); + +void scn_do_execute() override; +} +; /* ============================= shape infer ============================== */ //! param: src, dst @@ -116,68 +133,67 @@ using CvtColor = CvtColorForward; using GaussianBlur = GaussianBlurForward; /* ============================= user set shape =========================== */ -MGB_DEFINE_OPR_CLASS(ResizeForward, - intl::WorkspaceSizeInfer< - intl::OutshapeBySymvarSCNOpr>>) // { - public: - ResizeForward( - VarNode *in_tensor, VarNode *out_shape, const Param ¶m, - const OperatorNodeConfig &config); - - static SymbolVar make(SymbolVar in_tensor, SymbolVar out_shape, - const Param ¶m = {}, - const OperatorNodeConfig &config = {}); - - static SymbolVar make(SymbolVar in_tensor, const TensorShape &out_shape, - const Param ¶m = {}, - const OperatorNodeConfig &config = {}) - { - return make(in_tensor, - cg::var_from_tensor_shape( - in_tensor, out_shape), param, config); - } - - private: - void init_output_dtype() override; - void add_input_layout_constraint() override; - void init_output_static_infer_desc() override; - void outshape_by_symvar_do_get_output_shape( - TensorShape &dest, const ShapeInferInfo &shpinfo) override; - - void scn_do_execute() override; - size_t get_workspace_size_bytes( - const TensorShapeArray &input_shapes, - const TensorShapeArray &output_shapes) const override; - void record_execute_deps(ExecDependencyArray &deps) override; -}; +MGB_DEFINE_OPR_CLASS( + ResizeForward, + intl::WorkspaceSizeInfer>>) // { +public: +ResizeForward(VarNode* in_tensor, VarNode* out_shape, const Param& param, + const OperatorNodeConfig& config); + +static SymbolVar make(SymbolVar in_tensor, SymbolVar out_shape, + const Param& param = {}, + const OperatorNodeConfig& config = {}); + +static SymbolVar make(SymbolVar in_tensor, const TensorShape& out_shape, + const Param& param = {}, + const OperatorNodeConfig& config = {}) { + return make(in_tensor, cg::var_from_tensor_shape(in_tensor, out_shape), + param, config); +} + +private: +void init_output_dtype() override; +void add_input_layout_constraint() override; +void init_output_static_infer_desc() override; +void outshape_by_symvar_do_get_output_shape( + TensorShape& dest, const ShapeInferInfo& shpinfo) override; + +void scn_do_execute() override; +size_t get_workspace_size_bytes( + const TensorShapeArray& input_shapes, + const TensorShapeArray& output_shapes) const override; +void record_execute_deps(ExecDependencyArray& deps) override; +} +; using Resize = ResizeForward; MGB_DEFINE_OPR_CLASS(ResizeBackward, - intl::MegDNNOprWrapperBwd) // { - public: - ResizeBackward(VarNode *out_diff, - VarNode *in_for_shape, const Param ¶m, - const OperatorNodeConfig &config); + intl::MegDNNOprWrapperBwd) // { +public: +ResizeBackward(VarNode* out_diff, VarNode* in_for_shape, const Param& param, + const OperatorNodeConfig& config); - static SymbolVar make(SymbolVar out_diff, - SymbolVar in_for_shape, const Param ¶m = {}, - const OperatorNodeConfig &config = {}); -}; +static SymbolVar make(SymbolVar out_diff, SymbolVar in_for_shape, + const Param& param = {}, + const OperatorNodeConfig& config = {}); +} +; MGB_DEFINE_OPR_CLASS(RemapForward, - intl::MegDNNOprWrapperFwd) // { - public: - RemapForward( - VarNode *in_tensor, VarNode* map, - const Param ¶m, const OperatorNodeConfig &config); - - static SymbolVar make(SymbolVar in_tensor, SymbolVar map, const Param ¶m = {}, - const OperatorNodeConfig &config = {}); - - private: - void init_output_dtype() override; -}; + intl::MegDNNOprWrapperFwd) // { +public: +RemapForward(VarNode* in_tensor, VarNode* map, const Param& param, + const OperatorNodeConfig& config); + +static SymbolVar make(SymbolVar in_tensor, SymbolVar map, + const Param& param = {}, + const OperatorNodeConfig& config = {}); + +private: +void init_output_dtype() override; +} +; using Remap = RemapForward; /*! @@ -191,47 +207,42 @@ using Remap = RemapForward; * Input mat shape: batch, 2, 2; note that the mat is used to translate output * coordinate onto input coordinate, so it is not inversed. */ -MGB_DEFINE_OPR_CLASS(WarpAffineForward, - intl::WorkspaceSizeInfer< - intl::OutshapeBySymvarSCNOpr>>) // { - public: - WarpAffineForward( - VarNode *in_tensor, VarNode *mat, VarNode *out_shape, - const Param ¶m, - const OperatorNodeConfig &config); - - static SymbolVar make(SymbolVar in_tensor, SymbolVar mat, - SymbolVar out_shape, - const Param ¶m = {}, - const OperatorNodeConfig &config = {}); - - static SymbolVar make(SymbolVar in_tensor, SymbolVar mat, - const TensorShape &out_shape, - const Param ¶m = {}, - const OperatorNodeConfig &config = {}) - { - return make(in_tensor, mat, - cg::var_from_tensor_shape( - in_tensor, out_shape), param, config); - } - - private: - void init_output_dtype() override; - void add_input_layout_constraint() override; - void init_output_static_infer_desc() override; - void outshape_by_symvar_do_get_output_shape( - TensorShape &dest, const ShapeInferInfo &shpinfo) override; - - void scn_do_execute() override; - size_t get_workspace_size_bytes( - const TensorShapeArray &input_shapes, - const TensorShapeArray &output_shapes) const override; - void record_execute_deps(ExecDependencyArray &deps) override; -}; +MGB_DEFINE_OPR_CLASS( + WarpAffineForward, + intl::WorkspaceSizeInfer>>) // { +public: +WarpAffineForward(VarNode* in_tensor, VarNode* mat, VarNode* out_shape, + const Param& param, const OperatorNodeConfig& config); + +static SymbolVar make(SymbolVar in_tensor, SymbolVar mat, SymbolVar out_shape, + const Param& param = {}, + const OperatorNodeConfig& config = {}); + +static SymbolVar make(SymbolVar in_tensor, SymbolVar mat, + const TensorShape& out_shape, const Param& param = {}, + const OperatorNodeConfig& config = {}) { + return make(in_tensor, mat, cg::var_from_tensor_shape(in_tensor, out_shape), + param, config); +} + +private: +void init_output_dtype() override; +void add_input_layout_constraint() override; +void init_output_static_infer_desc() override; +void outshape_by_symvar_do_get_output_shape( + TensorShape& dest, const ShapeInferInfo& shpinfo) override; + +void scn_do_execute() override; +size_t get_workspace_size_bytes( + const TensorShapeArray& input_shapes, + const TensorShapeArray& output_shapes) const override; +void record_execute_deps(ExecDependencyArray& deps) override; +} +; using WarpAffine = WarpAffineForward; -} // opr -} // mgb +} // opr +} // mgb // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/src/opr/test/imgproc.cpp b/src/opr/test/imgproc.cpp index 5a12ce69973cdd567eb21db636ba109e6e43ad12..68d9e83537758a0ed828920d258afa6a0bebbd8b 100644 --- a/src/opr/test/imgproc.cpp +++ b/src/opr/test/imgproc.cpp @@ -216,7 +216,10 @@ TEST(TestOprImgproc, WarpPerspectiveWithMatIdx) { .set_input_generator(1, gen_mat) .set_input_generator(2, gen_mat_idx) .set_input_dtype(2, dtype::Int32{}) + /*! it's hard to make the grad check success, + the cuda implementation is grad sum */ .disable_grad_check() + .set_input_allow_grad(2,false) .run({TensorShape{N_SRC, C, 4, 5}, {N_MAT, 3, 3}, {N_MAT}}) .run({TensorShape{N_SRC, C, 6, 5}, {N_MAT, 3, 3}, {N_MAT}}) .run({TensorShape{N_SRC, C, 22, 19}, {N_MAT, 3, 3}, {N_MAT}});