/** * \file dnn/src/naive/correlation/opr_impl.cpp * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") * * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or * implied. */ #include "src/cuda/correlation/opr_impl.h" #include "src/cuda/correlation/correlation_cuda.cuh" #include "src/cuda/utils.h" namespace megdnn { namespace cuda { void CorrelationForwardImpl::exec(_megdnn_tensor_in data1, _megdnn_tensor_in data2, _megdnn_tensor_out dst, _megdnn_workspace workspace) { check_exec(data1.layout, data2.layout, dst.layout, workspace.size); auto p = param(); auto stream = cuda_stream(handle()); int nthreads = dst.layout.total_nr_elems(); int stride1 = p.stride1; int stride2 = p.stride2; int kernel_size = p.kernel_size; int max_displacement = p.max_displacement; int pad_size = p.pad_size; bool is_multiply = p.is_multiply; int tchannels = dst.layout[1]; int theight = dst.layout[2], twidth = dst.layout[3]; int bchannels = data1.layout[1]; int bheight = data1.layout[2], bwidth = data1.layout[3]; using namespace ::megdnn::cuda::correlation; #define cb(DType) \ if (data1.layout.dtype == DType()) { \ using T = typename DTypeTrait::ctype; \ forward_proxy(nthreads, data1.ptr(), data2.ptr(), \ dst.ptr(), bchannels, bheight, bwidth, tchannels, \ theight, twidth, kernel_size, max_displacement, \ stride1, stride2, pad_size, is_multiply, stream); \ } MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb) #undef cb } void CorrelationBackwardData1Impl::exec(_megdnn_tensor_in diff, _megdnn_tensor_in data1, _megdnn_tensor_in data2, _megdnn_tensor_out grad1, _megdnn_workspace workspace) { check_exec(diff.layout, data1.layout, data2.layout, grad1.layout, workspace.size); auto stream = cuda_stream(handle()); int nthreads = grad1.layout.total_nr_elems(); int stride1 = param().stride1; int stride2 = param().stride2; int kernel_size = param().kernel_size; int max_displacement = param().max_displacement; int pad_size = param().pad_size; bool is_multiply = param().is_multiply; int tchannels = diff.layout[1]; int theight = diff.layout[2], twidth = diff.layout[3]; int bchannels = data1.layout[1]; int bheight = data1.layout[2], bwidth = data1.layout[3]; using namespace ::megdnn::cuda::correlation; #define cb(DType) \ if (diff.layout.dtype == DType()) { \ using T = typename DTypeTrait::ctype; \ backward_proxy_data1(nthreads, diff.ptr(), data1.ptr(), \ data2.ptr(), grad1.ptr(), bchannels, \ bheight, bwidth, tchannels, theight, twidth, \ kernel_size, max_displacement, stride1, \ stride2, pad_size, is_multiply, stream); \ } MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb) #undef cb } void CorrelationBackwardData2Impl::exec(_megdnn_tensor_in diff, _megdnn_tensor_in data1, _megdnn_tensor_in data2, _megdnn_tensor_out grad2, _megdnn_workspace workspace) { check_exec(diff.layout, data1.layout, data2.layout, grad2.layout, workspace.size); auto p = param(); auto stream = cuda_stream(handle()); int nthreads = grad2.layout.total_nr_elems(); int stride1 = p.stride1; int stride2 = p.stride2; int kernel_size = p.kernel_size; int max_displacement = p.max_displacement; int pad_size = p.pad_size; bool is_multiply = p.is_multiply; int tchannels = diff.layout[1]; int theight = diff.layout[2], twidth = diff.layout[3]; int bchannels = data1.layout[1]; int bheight = data1.layout[2], bwidth = data1.layout[3]; using namespace ::megdnn::cuda::correlation; #define cb(DType) \ if (diff.layout.dtype == DType()) { \ using T = typename DTypeTrait::ctype; \ backward_proxy_data2(nthreads, diff.ptr(), data1.ptr(), \ data2.ptr(), grad2.ptr(), bchannels, \ bheight, bwidth, tchannels, theight, twidth, \ kernel_size, max_displacement, stride1, \ stride2, pad_size, is_multiply, stream); \ } MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb) #undef cb } } // namespace cuda } // namespace megdnn // vim: syntax=cpp.doxygen