提交 eab6afab 编写于 作者: M Megvii Engine Team

feat(mgb): add padding opr for megbrain

GitOrigin-RevId: 490e0c5d5add99d54d9c9e3095147016b3c191c7
上级 2224a252
......@@ -1353,6 +1353,53 @@ public:
protected:
void check_exec(const TensorLayout& dst, size_t workspace_in_bytes);
};
/*!
* \brief standard padding operator
* Inputs must have the same dtype, and the output tensor shape must greater or equal than
* input tensor in every dimensions, the extra space will be fulled with m which default to
* be 0.
*/
class PaddingBase: public OperatorBase {
DEF_OPR_PARAM(Padding);
DEF_OPR_IMPL(PaddingBase, OperatorBase, 1, 1);
public:
using Mode = Param::PaddingMode;
protected:
SmallVector<size_t> get_offsets();
void check_exec(const TensorLayout& src, const TensorLayout& dst);
};
class PaddingForward: public PaddingBase {
DEF_OPR_IMPL(PaddingForward, PaddingBase, 1, 1);
public:
virtual void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst) = 0;
void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst,
_megdnn_workspace) {
return exec(src, dst);
}
virtual size_t get_workspace_in_bytes(const TensorLayout& src, const TensorLayout& dst) = 0;
void deduce_layout(const TensorLayout &src, TensorLayout &dst);
protected:
void forward_check_exec(const TensorLayout& src, const TensorLayout& dst);
};
using Padding = PaddingForward;
class PaddingBackward: public PaddingBase {
DEF_OPR_IMPL(PaddingBackward, PaddingBase, 1, 1);
public:
virtual void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst) = 0;
void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst,
_megdnn_workspace) {
return exec(src, dst);
}
virtual size_t get_workspace_in_bytes(const TensorLayout& src, const TensorLayout& dst) = 0;
protected:
void backward_check_exec(const TensorLayout& src, const TensorLayout& dst);
};
} // namespace megdnn
#include "megdnn/internal/opr_header_epilogue.h"
......
......@@ -1171,3 +1171,29 @@ Note: NCHW_NCHW4_WEIGHT will auto pad oc and ic, you should remove oc in later o
add_fields('int32', 'qmax', '2147483647')
)
pdef('Fill').add_fields('float32', 'value', '0')
PADDING_MODES = [Doc('REPLICATE', 'aaaaaa|abcdefgh|hhhhhhh'),
Doc('REFLECT', 'fedcba|abcdefgh|hgfedcb'),
Doc('CONSTANT', 'iiiiii|abcdefgh|iiiiiii')]
(pdef('Padding').
add_fields('uint32', Doc('front_offset_dim0','offset in dim 0'), 0).
add_fields('uint32', Doc('front_offset_dim1','offset in dim 1'), 0).
add_fields('uint32', Doc('front_offset_dim2','offset in dim 2'), 0).
add_fields('uint32', Doc('front_offset_dim3','offset in dim 3'), 0).
add_fields('uint32', Doc('front_offset_dim4','offset in dim 4'), 0).
add_fields('uint32', Doc('front_offset_dim5','offset in dim 5'), 0).
add_fields('uint32', Doc('front_offset_dim6','offset in dim 6'), 0).
add_fields('uint32', Doc('back_offset_dim0', 'back offset in dim0'), 0).
add_fields('uint32', Doc('back_offset_dim1', 'back offset in dim1'), 0).
add_fields('uint32', Doc('back_offset_dim2', 'back offset in dim2'), 0).
add_fields('uint32', Doc('back_offset_dim3', 'back offset in dim3'), 0).
add_fields('uint32', Doc('back_offset_dim4', 'back offset in dim4'), 0).
add_fields('uint32', Doc('back_offset_dim5', 'back offset in dim5'), 0).
add_fields('uint32', Doc('back_offset_dim6', 'back offset in dim6'), 0).
add_fields('float32', Doc('padding_val','param of padding opr'), 0).
add_enum('PaddingMode', *PADDING_MODES,
name_field='padding_mode', default=2,
member_alias=[(i, 'PADDING_{}'.format(i)) for i in PADDING_MODES]
)
)
......@@ -217,7 +217,9 @@ private:
cb(CheckHasInf) \
cb(LSQForward) \
cb(LSQBackward) \
cb(Fill)
cb(Fill) \
cb(PaddingForward) \
cb(PaddingBackward)
/*!
* \brief specialize HandleImpl::create_operator for a single opr type;
......
......@@ -27,6 +27,8 @@ struct OprTrait {};
static const bool can_deduce_layout = CanDeduceLayout; \
}
DEF(Padding, 2, false, true);
DEF(PaddingBackward, 2, false, false);
DEF(ConvolutionForward, 3, true, true);
DEF(Convolution3DForward, 3, true, true);
DEF(ConvolutionBackwardData, 3, true, false);
......
/**
* \file dnn/src/common/padding.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "megdnn/oprs.h"
#include "megdnn/oprs/general.h"
#include "megdnn/thin/small_vector.h"
#include "src/common/opr_param_defs_enumv.cuh"
#include "src/common/utils.h"
namespace megdnn {
using padding_param = megdnn::param_enumv::Padding;
void PaddingForward::forward_check_exec(const TensorLayout& src,
const TensorLayout& dst) {
check_exec(src, dst);
megdnn_assert(src.dtype.enumv() != DTypeEnum::Bool &&
src.dtype.enumv() != DTypeEnum::IntB1 &&
src.dtype.enumv() != DTypeEnum::IntB2 &&
src.dtype.enumv() != DTypeEnum::IntB4,
"unsupported %s dtype for forward padding opr",
src.dtype.name());
}
void PaddingForward::deduce_layout(const TensorLayout& src, TensorLayout& dst) {
SmallVector<size_t> offsets(get_offsets());
TensorShape dst_shape;
switch (src.ndim) {
case 1:
dst_shape = {src.shape[0] + offsets[0] + offsets[1]};
break;
case 2:
dst_shape = {src.shape[0] + offsets[0] + offsets[1],
src.shape[1] + offsets[2] + offsets[3]};
break;
case 3:
dst_shape = {src.shape[0] + offsets[0] + offsets[1],
src.shape[1] + offsets[2] + offsets[3],
src.shape[2] + offsets[4] + offsets[5]};
break;
case 4:
dst_shape = {src.shape[0] + offsets[0] + offsets[1],
src.shape[1] + offsets[2] + offsets[3],
src.shape[2] + offsets[4] + offsets[5],
src.shape[3] + offsets[6] + offsets[7]};
break;
case 5:
dst_shape = {src.shape[0] + offsets[0] + offsets[1],
src.shape[1] + offsets[2] + offsets[3],
src.shape[2] + offsets[4] + offsets[5],
src.shape[3] + offsets[6] + offsets[7],
src.shape[4] + offsets[8] + offsets[9]};
break;
case 6:
dst_shape = {src.shape[0] + offsets[0] + offsets[1],
src.shape[1] + offsets[2] + offsets[3],
src.shape[2] + offsets[4] + offsets[5],
src.shape[3] + offsets[6] + offsets[7],
src.shape[4] + offsets[8] + offsets[9],
src.shape[5] + offsets[10] + offsets[11]};
break;
case 7:
dst_shape = {src.shape[0] + offsets[0] + offsets[1],
src.shape[1] + offsets[2] + offsets[3],
src.shape[2] + offsets[4] + offsets[5],
src.shape[3] + offsets[6] + offsets[7],
src.shape[4] + offsets[8] + offsets[9],
src.shape[5] + offsets[10] + offsets[11],
src.shape[6] + offsets[12] + offsets[13]};
break;
default:
megdnn_assert(false, "invalid tensor ndim %zu", src.ndim);
break;
}
dst = TensorLayout(dst_shape, src.dtype);
}
void PaddingBackward::backward_check_exec(const TensorLayout& src,
const TensorLayout& dst) {
check_exec(dst, src);
megdnn_assert(src.dtype.enumv() ==
DTypeEnum::Float32 DNN_INC_FLOAT16(
|| src.dtype.enumv() == DTypeEnum::Float16 ||
src.dtype.enumv() == DTypeEnum::BFloat16),
"unsupported %s dtype for forward padding opr",
src.dtype.name());
}
SmallVector<size_t> PaddingBase::get_offsets() {
SmallVector<size_t> offsets = {
param().front_offset_dim0, param().back_offset_dim0,
param().front_offset_dim1, param().back_offset_dim1,
param().front_offset_dim2, param().back_offset_dim2,
param().front_offset_dim3, param().back_offset_dim3,
param().front_offset_dim4, param().back_offset_dim4,
param().front_offset_dim5, param().back_offset_dim5,
param().front_offset_dim6, param().back_offset_dim6};
return offsets;
}
void PaddingBase::check_exec(const TensorLayout& src, const TensorLayout& dst) {
SmallVector<size_t> offsets(get_offsets());
// make sure the src and dst tensor not empty
megdnn_assert(src.ndim != 0 && dst.ndim != 0);
// make sure src and dst is same dtype
megdnn_assert_eq_dtype(src, dst);
// make sure src and dst is same ndim
megdnn_assert(src.ndim == dst.ndim, "the src.ndim = %zu the dst.ndim = %zu",
src.ndim, dst.ndim);
// make sure in every dimension dst is equal or greater than src
for (size_t i = 0; i < src.ndim; ++i) {
megdnn_assert(dst.shape[i] ==
src.shape[i] + offsets[i * 2] + offsets[i * 2 + 1]);
}
// check the padding mode is valid
megdnn_assert(static_cast<uint32_t>(param().padding_mode) ==
padding_param::PaddingMode::REFLECT ||
static_cast<uint32_t>(param().padding_mode) ==
padding_param::PaddingMode::REPLICATE ||
static_cast<uint32_t>(param().padding_mode) ==
padding_param::PaddingMode::CONSTANT,
"unsupported padding mode");
// addition check for reflect padding, make sure the reflected index is
// valid
if (static_cast<uint32_t>(param().padding_mode) ==
padding_param::PaddingMode::REFLECT) {
for (size_t i = 0; i < src.ndim; ++i) {
megdnn_assert(offsets[i * 2] < src.shape[i] &&
dst.shape[i] - offsets[i * 2] - src.shape[i] <
src.shape[i]);
}
}
}
} // namespace megdnn
......@@ -12,6 +12,7 @@
#include "src/common/handle_impl.h"
#include "src/cuda/padding/opr_impl.h"
#include "src/cuda/adaptive_pooling/opr_impl.h"
#include "src/cuda/add_update/opr_impl.h"
#include "src/cuda/argmxx/opr_impl.h"
......
/**
* \file dnn/src/cuda/padding/opr_impl.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/cuda/padding/opr_impl.h"
#include "src/common/utils.h"
#include "src/cuda/handle.h"
#include "src/cuda/padding/padding.cuh"
#include "src/cuda/utils.h"
namespace megdnn {
namespace cuda {
void PaddingForwardImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_out dst) {
forward_check_exec(src.layout, dst.layout);
SmallVector<size_t> offsets(get_offsets());
// SamllVector can not be used as argument in cu file
size_t param_offsets[MEGDNN_MAX_NDIM * 2] = {
offsets[0], offsets[1], offsets[2], offsets[3], offsets[4],
offsets[5], offsets[6], offsets[7], offsets[8], offsets[9],
offsets[10], offsets[11], offsets[12], offsets[13]};
auto stream = cuda_stream(this->handle());
#define cb(DType) \
if (src.layout.dtype.enumv() == DTypeTrait<DType>::enumv) { \
using ctype = typename DTypeTrait<DType>::ctype; \
padding::padding_forward_proxy<ctype>(src, dst, param_offsets, \
uint32_t(param().padding_mode), \
param().padding_val, stream); \
}
MEGDNN_FOREACH_COMPUTING_DTYPE(cb)
#undef cb
}
void PaddingBackwardImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_out dst) {
backward_check_exec(src.layout, dst.layout);
SmallVector<size_t> offsets(get_offsets());
// SamllVector can not be used as argument in cu file
size_t param_offsets[MEGDNN_MAX_NDIM * 2] = {
offsets[0], offsets[1], offsets[2], offsets[3], offsets[4],
offsets[5], offsets[6], offsets[7], offsets[8], offsets[9],
offsets[10], offsets[11], offsets[12], offsets[13]};
auto stream = cuda_stream(this->handle());
#define cb(DType) \
if (src.layout.dtype.enumv() == DTypeTrait<DType>::enumv) { \
using ctype = typename DTypeTrait<DType>::ctype; \
padding::padding_backward_proxy<ctype>(src, dst, param_offsets, \
uint32_t(param().padding_mode), \
stream); \
}
MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb)
#undef cb
}
size_t PaddingForwardImpl::get_workspace_in_bytes(const TensorLayout& src,
const TensorLayout& dst) {
return 0;
}
size_t PaddingBackwardImpl::get_workspace_in_bytes(const TensorLayout& src,
const TensorLayout& dst) {
return 0;
}
} // namespace cuda
} // namespace megdnn
\ No newline at end of file
/**
* \file dnn/src/cuda/padding/opr_impl.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#pragma once
#include "megdnn/oprs.h"
namespace megdnn {
namespace cuda {
class PaddingForwardImpl : public PaddingForward {
using PaddingForward::PaddingForward;
public:
void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst) override;
size_t get_workspace_in_bytes(const TensorLayout& src,
const TensorLayout& dst) override;
};
class PaddingBackwardImpl : public PaddingBackward {
using PaddingBackward::PaddingBackward;
public:
void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst) override;
size_t get_workspace_in_bytes(const TensorLayout& src,
const TensorLayout& dst) override;
};
} // namespace cuda
} // namespace megdnn
\ No newline at end of file
/**
* \file dnn/src/cuda/padding/padding.cu
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include <algorithm>
#include <cstring>
#include <iostream>
#include "megdnn/basic_types.h"
#include "padding.cuh"
#include "src/cuda/int_fastdiv.cuh"
#include "src/cuda/query_blocksize.cuh"
namespace megdnn {
namespace cuda {
namespace padding {
struct ShapeParams {
size_t src_shape[MEGDNN_MAX_NDIM];
size_t dst_shape[MEGDNN_MAX_NDIM];
Uint32Fastdiv src_stride[MEGDNN_MAX_NDIM];
Uint32Fastdiv dst_stride[MEGDNN_MAX_NDIM];
size_t offsets[MEGDNN_MAX_NDIM * 2];
};
template <typename T>
__global__ void paddingConst_kernel(const size_t ndim,
const size_t total_out_nr,
const T* const src, T* const dst,
ShapeParams params,
const float_t padding_val) {
KERN_FOR(out_index, total_out_nr) {
bool in_src_valid_area = true;
size_t in_index = 0;
size_t out_index_tmp = out_index;
for (size_t dim = 0; dim <= ndim - 1; ++dim) {
Uint32Fastdiv dst_stride = params.dst_stride[dim], src_stride = params.src_stride[dim];
size_t src_shape = params.src_shape[dim];
size_t offset = params.offsets[dim*2];
size_t dim_index = out_index_tmp / dst_stride;
in_src_valid_area &= (dim_index >= offset && dim_index < offset+src_shape);
if(!in_src_valid_area) break;
out_index_tmp -= dim_index * dst_stride.divisor();
in_index += (dim_index - offset)*src_stride.divisor();
/*
size_t dim_index = out_index_tmp / params.dst_stride[dim];
out_index_tmp -= dim_index * params.dst_stride[dim].divisor();
in_src_valid_area &= (dim_index >= params.offsets[dim * 2] &&
dim_index < params.offsets[dim * 2] +
params.src_shape[dim]);
in_index += (dim_index - params.offsets[dim * 2]) *
params.src_stride[dim].divisor();
*/
}
dst[out_index] = in_src_valid_area ? src[in_index] : padding_val;
}
}
template <typename T>
__global__ void paddingReplicate_kernel(const size_t ndim,
const size_t total_out_nr,
const T* const src, T* const dst,
ShapeParams params, const float_t) {
KERN_FOR(out_index, total_out_nr) {
size_t in_index = 0;
size_t out_index_tmp = out_index;
for (size_t dim = 0; dim <= ndim - 1; ++dim) {
size_t dim_index = out_index_tmp / params.dst_stride[dim];
out_index_tmp -= dim_index * params.dst_stride[dim].divisor();
dim_index = (size_t)llmin(
(long long)params.src_shape[dim] - 1,
llmax((long long)dim_index -
(long long)params.offsets[dim * 2],
(long long)0));
in_index += dim_index * params.src_stride[dim].divisor();
}
dst[out_index] = src[in_index];
}
}
template <typename T>
__global__ void paddingReflect_kernel(const size_t ndim,
const size_t total_out_nr,
const T* const src, T* const dst,
ShapeParams params, const float_t) {
KERN_FOR(out_index, total_out_nr) {
size_t in_index = 0;
size_t out_index_tmp = out_index;
for (size_t dim = 0; dim <= ndim - 1; ++dim) {
long long dim_index = out_index_tmp / params.dst_stride[dim];
out_index_tmp -= dim_index * params.dst_stride[dim].divisor();
dim_index -= (long long)params.offsets[dim * 2];
dim_index = llmax(dim_index, -dim_index);
dim_index = llmin(dim_index, 2 * (long long)params.src_shape[dim] -
dim_index - 2);
in_index += size_t(dim_index) *
(size_t)params.src_stride[dim].divisor();
}
dst[out_index] = src[in_index];
}
}
template <typename T>
__global__ void paddingConstBackward_kernel(const size_t ndim,
const size_t total_in_nr,
const T* const src, T* const dst,
ShapeParams params) {
KERN_FOR(in_index, total_in_nr) {
bool in_dst_valid_area = true;
size_t out_index = 0;
size_t in_index_tmp = in_index;
for (size_t dim = 0; dim <= ndim - 1; ++dim) {
size_t dim_index = in_index_tmp / params.src_stride[dim];
in_index_tmp -= dim_index * params.src_stride[dim].divisor();
in_dst_valid_area &= (dim_index >= params.offsets[dim * 2] &&
dim_index < params.offsets[dim * 2] +
params.dst_shape[dim]);
out_index += (dim_index - params.offsets[dim * 2]) *
params.dst_stride[dim].divisor();
}
if (in_dst_valid_area) {
dst[out_index] = src[in_index];
}
}
}
template <typename T>
__global__ void paddingReplicateBackward_kernel(const size_t ndim,
const size_t total_in_nr,
const T* const src,
T* const dst,
ShapeParams params) {
KERN_FOR(in_index, total_in_nr) {
size_t out_index = 0;
size_t in_index_tmp = in_index;
for (size_t dim = 0; dim <= ndim - 1; ++dim) {
size_t dim_index = in_index_tmp / params.src_stride[dim];
in_index_tmp -= dim_index * params.src_stride[dim].divisor();
dim_index = (size_t)llmin(
(long long)params.dst_shape[dim] - 1,
llmax((long long)dim_index -
(long long)params.offsets[dim * 2],
(long long)0));
out_index += dim_index * params.dst_stride[dim].divisor();
}
atomic_add(&dst[out_index], src[in_index]);
}
}
template <typename T>
__global__ void paddingReflectBackward_kernel(const size_t ndim,
const size_t total_in_nr,
const T* const src, T* const dst,
ShapeParams params) {
KERN_FOR(in_index, total_in_nr) {
size_t out_index = 0;
size_t in_index_tmp = in_index;
for (size_t dim = 0; dim <= ndim - 1; ++dim) {
long long dim_index = in_index_tmp / params.src_stride[dim];
in_index_tmp -= dim_index * params.src_stride[dim].divisor();
dim_index -= (long long)params.offsets[dim * 2];
dim_index = llmax(dim_index, -dim_index);
dim_index = llmin(dim_index, 2 * (long long)params.dst_shape[dim] -
dim_index - 2);
out_index += size_t(dim_index) *
(size_t)params.dst_stride[dim].divisor();
}
atomic_add(&dst[out_index], src[in_index]);
}
}
template <typename T>
void padding_forward_proxy(const TensorND& src, const TensorND& dst,
size_t offsets[MEGDNN_MAX_NDIM * 2], uint32_t mode,
const float_t padding_val, cudaStream_t stream) {
ShapeParams params;
for (size_t i = 0; i < src.layout.ndim; ++i) {
params.src_shape[i] = src.layout.shape[i];
params.dst_shape[i] = dst.layout.shape[i];
params.src_stride[i] = src.layout.stride[i];
params.dst_stride[i] = dst.layout.stride[i];
params.offsets[i * 2] = offsets[i * 2];
params.offsets[i * 2 + 1] = offsets[i * 2 + 1];
}
void (*fwd_kern)(const size_t, const size_t, const T* const, T* const,
ShapeParams, const float_t);
switch (mode) {
case param_enumv::Padding::PaddingMode::CONSTANT:
fwd_kern = paddingConst_kernel<T>;
break;
case param_enumv::Padding::PaddingMode::REPLICATE:
fwd_kern = paddingReplicate_kernel<T>;
break;
case param_enumv::Padding::PaddingMode::REFLECT:
fwd_kern = paddingReflect_kernel<T>;
break;
default:
megdnn_assert(false, "invalid padding mode");
}
size_t total_nr = dst.layout.total_nr_elems();
uint32_t nr_threads = query_blocksize_for_kernel(fwd_kern);
dim3 threads(nr_threads);
dim3 blocks(DIVUP(total_nr, nr_threads));
fwd_kern<<<blocks, threads, 0, stream>>>(src.layout.ndim, total_nr,
src.ptr<T>(), dst.ptr<T>(), params,
padding_val);
after_kernel_launch();
}
template <typename T>
void padding_backward_proxy(const TensorND& src, const TensorND& dst,
size_t offsets[MEGDNN_MAX_NDIM * 2], uint32_t mode,
cudaStream_t stream) {
ShapeParams params;
for (size_t i = 0; i < src.layout.ndim; ++i) {
params.src_shape[i] = src.layout.shape[i];
params.dst_shape[i] = dst.layout.shape[i];
params.src_stride[i] = src.layout.stride[i];
params.dst_stride[i] = dst.layout.stride[i];
params.offsets[i * 2] = offsets[i * 2];
params.offsets[i * 2 + 1] = offsets[i * 2 + 1];
}
cudaMemset(dst.raw_ptr, 0, dst.layout.access_bytes());
void (*bwd_kern)(const size_t, const size_t, const T* const, T* const,
ShapeParams);
switch (mode) {
case param_enumv::Padding::PaddingMode::CONSTANT:
bwd_kern = paddingConstBackward_kernel<T>;
break;
case param_enumv::Padding::PaddingMode::REPLICATE:
bwd_kern = paddingReplicateBackward_kernel<T>;
break;
case param_enumv::Padding::PaddingMode::REFLECT:
bwd_kern = paddingReflectBackward_kernel<T>;
break;
default:
megdnn_assert(false, "invalid padding mode");
}
size_t total_nr = src.layout.total_nr_elems();
uint32_t nr_threads = query_blocksize_for_kernel(bwd_kern);
dim3 threads(nr_threads);
dim3 blocks(DIVUP(total_nr, nr_threads));
bwd_kern<<<blocks, threads, 0, stream>>>(
src.layout.ndim, total_nr, src.ptr<T>(), dst.ptr<T>(), params);
after_kernel_launch();
}
#define INST(T) \
template void padding_forward_proxy<T>( \
const TensorND& src, const TensorND& dst, \
size_t offsets[MEGDNN_MAX_NDIM * 2], uint32_t mode, \
const float_t padding_val, cudaStream_t stream);
#define cb(DType) INST(typename DTypeTrait<DType>::ctype)
MEGDNN_FOREACH_COMPUTING_DTYPE(cb)
#undef cb
#undef INST
#define INST(T) \
template void padding_backward_proxy<T>( \
const TensorND& src, const TensorND& dst, \
size_t offsets[MEGDNN_MAX_NDIM * 2], uint32_t mode, \
cudaStream_t stream);
#define cb(DType) INST(typename DTypeTrait<DType>::ctype)
MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb)
#undef cb
#undef INST
} // namespace padding
} // namespace cuda
} // namespace megdnn
\ No newline at end of file
/**
* \file dnn/src/cuda/padding/padding.cuh
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#pragma once
#include "cuda_runtime.h"
#include "megdnn/basic_types.h"
#include "src/common/opr_param_defs_enumv.cuh"
#include "src/cuda/utils.cuh"
namespace megdnn {
namespace cuda {
namespace padding {
template <typename T>
void padding_forward_proxy(const TensorND& src, const TensorND& dst,
size_t offsets[MEGDNN_MAX_NDIM * 2], uint32_t mode,
const float_t padding_val, cudaStream_t stream);
template <typename T>
void padding_backward_proxy(const TensorND& src, const TensorND& dst,
size_t offsets[MEGDNN_MAX_NDIM * 2], uint32_t mode,
cudaStream_t stream);
} // namespace padding
} // namespace cuda
} // namespace megdnn
// vim: syntax=cuda.doxygen
\ No newline at end of file
......@@ -78,7 +78,7 @@
#if MEGDNN_THREADS_512
#define NR_THREADS 512
#define NR_THREADS_X 32
#define NR_THREADS_Y 16
#define NR_THREADS_Y 16
#else
#define NR_THREADS 1024
#define NR_THREADS_X 32
......
......@@ -16,7 +16,7 @@
#include <numeric>
namespace megdnn {
namespace {
using namespace megdnn;
......
......@@ -14,6 +14,7 @@
#include "src/common/handle_impl.h"
#include "src/naive/padding/opr_impl.h"
#include "src/naive/adaptive_pooling/opr_impl.h"
#include "src/naive/add_update/opr_impl.h"
#include "src/naive/argmxx/opr_impl.h"
......
/**
* \file dnn/src/naive/padding/opr_impl.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/naive/padding/opr_impl.h"
#include <math.h>
#include <stdio.h>
#include "src/common/utils.h"
#include "src/naive/handle.h"
namespace megdnn {
namespace naive {
struct ShapeParams {
size_t src_shape[MEGDNN_MAX_NDIM];
size_t dst_shape[MEGDNN_MAX_NDIM];
ptrdiff_t src_stride[MEGDNN_MAX_NDIM];
ptrdiff_t dst_stride[MEGDNN_MAX_NDIM];
size_t offsets[MEGDNN_MAX_NDIM * 2];
};
template <typename T>
void exec_const_internal(const size_t ndim, const size_t total_out_nr,
const T* const src, T* const dst, ShapeParams params,
const T padding_val) MEGDNN_NOEXCEPT {
rep(out_index, total_out_nr) {
bool in_src_valid_area = true;
size_t in_index = 0;
size_t out_index_tmp = out_index;
for (size_t dim = 0; dim <= ndim - 1; ++dim) {
size_t dim_index = out_index_tmp / params.dst_stride[dim];
out_index_tmp -= dim_index * params.dst_stride[dim];
in_src_valid_area &= (dim_index >= params.offsets[dim * 2] &&
dim_index < params.offsets[dim * 2] +
params.src_shape[dim]);
in_index += (dim_index - params.offsets[dim * 2]) *
params.src_stride[dim];
}
if (in_src_valid_area) {
dst[out_index] = src[in_index];
} else {
dst[out_index] = padding_val;
}
}
}
template <typename T>
void exec_replicate_internal(const size_t ndim, const size_t total_out_nr,
const T* const src, T* const dst,
ShapeParams params) MEGDNN_NOEXCEPT {
rep(out_index, total_out_nr) {
size_t in_index = 0;
size_t out_index_tmp = out_index;
for (size_t dim = 0; dim <= ndim - 1; ++dim) {
size_t dim_index = out_index_tmp / params.dst_stride[dim];
out_index_tmp -= dim_index * params.dst_stride[dim];
dim_index = (size_t)std::min(
(long long)params.src_shape[dim] - 1,
std::max((long long)dim_index -
(long long)params.offsets[dim * 2],
(long long)0));
in_index += dim_index * params.src_stride[dim];
}
dst[out_index] = src[in_index];
}
}
template <typename T>
void exec_reflect_internal(const size_t ndim, const size_t total_out_nr,
const T* const src, T* const dst,
ShapeParams params) MEGDNN_NOEXCEPT {
rep(out_index, total_out_nr) {
size_t in_index = 0;
size_t out_index_tmp = out_index;
for (size_t dim = 0; dim <= ndim - 1; ++dim) {
long long dim_index = out_index_tmp / params.dst_stride[dim];
out_index_tmp -= dim_index * params.dst_stride[dim];
dim_index -= (long long)params.offsets[dim * 2];
dim_index = std::max(dim_index, -dim_index);
dim_index =
std::min(dim_index, 2 * (long long)params.src_shape[dim] -
dim_index - 2);
in_index += size_t(dim_index) * (size_t)params.src_stride[dim];
}
dst[out_index] = src[in_index];
}
}
template <typename T>
void backward_exec_const_internal(const size_t ndim, const size_t total_in_nr,
const T* const src, T* const dst,
ShapeParams params) MEGDNN_NOEXCEPT {
rep(in_index, total_in_nr) {
bool in_dst_valid_area = true;
size_t out_index = 0;
size_t in_index_tmp = in_index;
for (size_t dim = 0; dim <= ndim - 1; ++dim) {
size_t dim_index = in_index_tmp / params.src_stride[dim];
in_index_tmp -= dim_index * params.src_stride[dim];
in_dst_valid_area &= (dim_index >= params.offsets[dim * 2] &&
dim_index < params.offsets[dim * 2] +
params.dst_shape[dim]);
out_index += (dim_index - params.offsets[dim * 2]) *
params.dst_stride[dim];
}
if (in_dst_valid_area) {
dst[out_index] = src[in_index];
}
}
}
template <typename T>
void backward_exec_replicate_internal(const size_t ndim,
const size_t total_in_nr,
const T* const src, T* const dst,
ShapeParams params) MEGDNN_NOEXCEPT {
rep(in_index, total_in_nr) {
size_t out_index = 0;
size_t in_index_tmp = in_index;
for (size_t dim = 0; dim <= ndim - 1; ++dim) {
size_t dim_index = in_index_tmp / params.src_stride[dim];
in_index_tmp -= dim_index * params.src_stride[dim];
dim_index = (size_t)std::min(
(long long)params.dst_shape[dim] - 1,
std::max((long long)dim_index -
(long long)params.offsets[dim * 2],
(long long)0));
out_index += dim_index * params.dst_stride[dim];
}
dst[out_index] += src[in_index];
}
}
template <typename T>
void backward_exec_reflect_internal(const size_t ndim, const size_t total_in_nr,
const T* const src, T* const dst,
ShapeParams params) MEGDNN_NOEXCEPT {
rep(in_index, total_in_nr) {
size_t out_index = 0;
size_t in_index_tmp = in_index;
for (size_t dim = 0; dim <= ndim - 1; ++dim) {
long long dim_index = in_index_tmp / params.src_stride[dim];
in_index_tmp -= dim_index * params.src_stride[dim];
dim_index -= (long long)params.offsets[dim * 2];
dim_index = std::max(dim_index, -dim_index);
dim_index =
std::min(dim_index, 2 * (long long)params.dst_shape[dim] -
dim_index - 2);
out_index += size_t(dim_index) * (size_t)params.dst_stride[dim];
}
dst[out_index] += src[in_index];
}
}
void PaddingForwardImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_out dst) {
forward_check_exec(src.layout, dst.layout);
SmallVector<size_t> offsets(get_offsets());
ShapeParams params;
for (size_t i = 0; i < src.layout.ndim; ++i) {
params.src_shape[i] = src.layout.shape[i];
params.dst_shape[i] = dst.layout.shape[i];
params.src_stride[i] = src.layout.stride[i];
params.dst_stride[i] = dst.layout.stride[i];
params.offsets[i * 2] = offsets[i * 2];
params.offsets[i * 2 + 1] = offsets[i * 2 + 1];
}
size_t n = dst.layout.total_nr_elems();
switch (param().padding_mode) {
case param::Padding::PaddingMode::CONSTANT:
#define cb(DType) \
if (src.layout.dtype == DType()) { \
using T = typename DTypeTrait<DType>::ctype; \
MEGDNN_DISPATCH_CPU_KERN_OPR(exec_const_internal<T>( \
src.layout.ndim, n, src.ptr<T>(), dst.ptr<T>(), params, \
T(param().padding_val))); \
return; \
}
MEGDNN_FOREACH_COMPUTING_DTYPE(cb)
#undef cb
break;
case param::Padding::PaddingMode::REPLICATE:
#define cb(DType) \
if (src.layout.dtype == DType()) { \
using T = typename DTypeTrait<DType>::ctype; \
MEGDNN_DISPATCH_CPU_KERN_OPR(exec_replicate_internal<T>( \
src.layout.ndim, n, src.ptr<T>(), dst.ptr<T>(), params)); \
return; \
}
MEGDNN_FOREACH_COMPUTING_DTYPE(cb)
#undef cb
break;
case param::Padding::PaddingMode::REFLECT:
#define cb(DType) \
if (src.layout.dtype == DType()) { \
using T = typename DTypeTrait<DType>::ctype; \
MEGDNN_DISPATCH_CPU_KERN_OPR(exec_reflect_internal<T>( \
src.layout.ndim, n, src.ptr<T>(), dst.ptr<T>(), params)); \
return; \
}
MEGDNN_FOREACH_COMPUTING_DTYPE(cb)
#undef cb
break;
default:
megdnn_assert(false, "unsupported padding mode!");
}
}
void PaddingBackwardImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_out dst) {
backward_check_exec(src.layout, dst.layout);
SmallVector<size_t> offsets(get_offsets());
ShapeParams params;
for (size_t i = 0; i < src.layout.ndim; ++i) {
params.src_shape[i] = src.layout.shape[i];
params.dst_shape[i] = dst.layout.shape[i];
params.src_stride[i] = src.layout.stride[i];
params.dst_stride[i] = dst.layout.stride[i];
params.offsets[i * 2] = offsets[i * 2];
params.offsets[i * 2 + 1] = offsets[i * 2 + 1];
}
size_t n = src.layout.total_nr_elems();
memset(dst.raw_ptr, 0, dst.layout.access_bytes());
switch (param().padding_mode) {
case param::Padding::PaddingMode::CONSTANT:
#define cb(DType) \
if (src.layout.dtype == DType()) { \
using T = typename DTypeTrait<DType>::ctype; \
MEGDNN_DISPATCH_CPU_KERN_OPR(backward_exec_const_internal<T>( \
src.layout.ndim, n, src.ptr<T>(), dst.ptr<T>(), params)); \
return; \
}
MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb)
#undef cb
break;
case param::Padding::PaddingMode::REPLICATE:
#define cb(DType) \
if (src.layout.dtype == DType()) { \
using T = typename DTypeTrait<DType>::ctype; \
MEGDNN_DISPATCH_CPU_KERN_OPR(backward_exec_replicate_internal<T>( \
src.layout.ndim, n, src.ptr<T>(), dst.ptr<T>(), params)); \
return; \
}
MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb)
#undef cb
break;
case param::Padding::PaddingMode::REFLECT:
#define cb(DType) \
if (src.layout.dtype == DType()) { \
using T = typename DTypeTrait<DType>::ctype; \
MEGDNN_DISPATCH_CPU_KERN_OPR(backward_exec_reflect_internal<T>( \
src.layout.ndim, n, src.ptr<T>(), dst.ptr<T>(), params)); \
return; \
}
MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb)
#undef cb
break;
default:
megdnn_assert(false, "unsupported padding mode!");
}
}
size_t PaddingForwardImpl::get_workspace_in_bytes(
const TensorLayout& /* src */, const TensorLayout& /* dst */) {
return 0;
}
size_t PaddingBackwardImpl::get_workspace_in_bytes(
const TensorLayout& /* src */, const TensorLayout& /* dst */) {
return 0;
}
} // namespace naive
} // namespace megdnn
\ No newline at end of file
/**
* \file dnn/src/naive/padding/opr_impl.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#pragma once
#include "megdnn/oprs.h"
namespace megdnn{
namespace naive{
class PaddingForwardImpl: public PaddingForward{
using PaddingForward::PaddingForward;
public:
void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst) override;
size_t get_workspace_in_bytes(const TensorLayout& src, const TensorLayout& dst) override;
};
class PaddingBackwardImpl: public PaddingBackward{
using PaddingBackward::PaddingBackward;
public:
void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst) override;
size_t get_workspace_in_bytes(const TensorLayout& src, const TensorLayout& dst) override;
};
}
}
\ No newline at end of file
/**
* \file dnn/test/common/padding.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#pragma once
#include <cstddef>
#include <iostream>
#include "megdnn/basic_types.h"
#include "megdnn/opr_param_defs.h"
namespace megdnn {
namespace test {
namespace padding {
struct TestArg {
param::Padding param;
TensorShape src;
TensorShape dst;
TestArg(param::Padding _param, TensorShape _src, TensorShape _dst)
: param(_param), src(_src), dst(_dst) {}
};
inline std::vector<TestArg> get_args() {
size_t src_shape_dim0 = 5;
size_t src_shape_dim1 = 5;
size_t src_shape_dim2 = 5;
size_t src_shape_dim3 = 5;
size_t src_shape_dim4 = 5;
size_t src_shape_dim5 = 5;
size_t src_shape_dim6 = 5;
size_t dst_shape_dim0 = 8;
size_t dst_shape_dim1 = 8;
size_t dst_shape_dim2 = 8;
size_t dst_shape_dim3 = 8;
size_t dst_shape_dim4 = 8;
size_t dst_shape_dim5 = 8;
size_t dst_shape_dim6 = 8;
std::vector<TestArg> args;
param::Padding cur_param;
cur_param.front_offset_dim0 = 0;
cur_param.front_offset_dim1 = 0;
cur_param.front_offset_dim2 = 0;
cur_param.front_offset_dim3 = 0;
cur_param.front_offset_dim4 = 0;
cur_param.front_offset_dim5 = 0;
cur_param.front_offset_dim6 = 0;
cur_param.back_offset_dim0 = 0;
cur_param.back_offset_dim1 = 0;
cur_param.back_offset_dim2 = 0;
cur_param.back_offset_dim3 = 0;
cur_param.back_offset_dim4 = 0;
cur_param.back_offset_dim5 = 0;
cur_param.back_offset_dim6 = 0;
cur_param.padding_val = 2;
cur_param.front_offset_dim0 = 1;
cur_param.back_offset_dim0 = 2;
cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT;
args.emplace_back(cur_param, TensorShape{src_shape_dim0},
TensorShape{dst_shape_dim0});
cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE;
args.emplace_back(cur_param, TensorShape{src_shape_dim0},
TensorShape{dst_shape_dim0});
cur_param.padding_mode = param::Padding::PaddingMode::REFLECT;
args.emplace_back(cur_param, TensorShape{src_shape_dim0},
TensorShape{dst_shape_dim0});
cur_param.front_offset_dim1 = 2;
cur_param.back_offset_dim1 = 1;
cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT;
args.emplace_back(cur_param, TensorShape{src_shape_dim0, src_shape_dim1},
TensorShape{dst_shape_dim0, dst_shape_dim1});
cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE;
args.emplace_back(cur_param, TensorShape{src_shape_dim0, src_shape_dim1},
TensorShape{dst_shape_dim0, dst_shape_dim1});
cur_param.padding_mode = param::Padding::PaddingMode::REFLECT;
args.emplace_back(cur_param, TensorShape{src_shape_dim0, src_shape_dim1},
TensorShape{dst_shape_dim0, dst_shape_dim1});
cur_param.front_offset_dim2 = 1;
cur_param.back_offset_dim2 = 2;
cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT;
args.emplace_back(
cur_param,
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2},
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2});
cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE;
args.emplace_back(
cur_param,
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2},
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2});
cur_param.padding_mode = param::Padding::PaddingMode::REFLECT;
args.emplace_back(
cur_param,
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2},
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2});
cur_param.front_offset_dim3 = 0;
cur_param.back_offset_dim3 = 3;
cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT;
args.emplace_back(cur_param,
TensorShape{src_shape_dim0, src_shape_dim1,
src_shape_dim2, src_shape_dim3},
TensorShape{dst_shape_dim0, dst_shape_dim1,
dst_shape_dim2, dst_shape_dim3});
cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE;
args.emplace_back(cur_param,
TensorShape{src_shape_dim0, src_shape_dim1,
src_shape_dim2, src_shape_dim3},
TensorShape{dst_shape_dim0, dst_shape_dim1,
dst_shape_dim2, dst_shape_dim3});
cur_param.padding_mode = param::Padding::PaddingMode::REFLECT;
args.emplace_back(cur_param,
TensorShape{src_shape_dim0, src_shape_dim1,
src_shape_dim2, src_shape_dim3},
TensorShape{dst_shape_dim0, dst_shape_dim1,
dst_shape_dim2, dst_shape_dim3});
cur_param.front_offset_dim4 = 3;
cur_param.back_offset_dim4 = 0;
cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT;
args.emplace_back(
cur_param,
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2,
src_shape_dim3, src_shape_dim4},
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2,
dst_shape_dim3, dst_shape_dim4});
cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE;
args.emplace_back(
cur_param,
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2,
src_shape_dim3, src_shape_dim4},
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2,
dst_shape_dim3, dst_shape_dim4});
cur_param.padding_mode = param::Padding::PaddingMode::REFLECT;
args.emplace_back(
cur_param,
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2,
src_shape_dim3, src_shape_dim4},
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2,
dst_shape_dim3, dst_shape_dim4});
cur_param.front_offset_dim5 = 1;
cur_param.back_offset_dim5 = 2;
cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT;
args.emplace_back(
cur_param,
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2,
src_shape_dim3, src_shape_dim4, src_shape_dim5},
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2,
dst_shape_dim3, dst_shape_dim4, dst_shape_dim5});
cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE;
args.emplace_back(
cur_param,
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2,
src_shape_dim3, src_shape_dim4, src_shape_dim5},
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2,
dst_shape_dim3, dst_shape_dim4, dst_shape_dim5});
cur_param.padding_mode = param::Padding::PaddingMode::REFLECT;
args.emplace_back(
cur_param,
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2,
src_shape_dim3, src_shape_dim4, src_shape_dim5},
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2,
dst_shape_dim3, dst_shape_dim4, dst_shape_dim5});
cur_param.front_offset_dim6 = 0;
cur_param.front_offset_dim6 = 3;
cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT;
args.emplace_back(
cur_param,
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2,
src_shape_dim3, src_shape_dim4, src_shape_dim5,
src_shape_dim6},
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2,
dst_shape_dim3, dst_shape_dim4, dst_shape_dim5,
dst_shape_dim6});
cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE;
args.emplace_back(
cur_param,
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2,
src_shape_dim3, src_shape_dim4, src_shape_dim5,
src_shape_dim6},
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2,
dst_shape_dim3, dst_shape_dim4, dst_shape_dim5,
dst_shape_dim6});
cur_param.padding_mode = param::Padding::PaddingMode::REFLECT;
args.emplace_back(
cur_param,
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2,
src_shape_dim3, src_shape_dim4, src_shape_dim5,
src_shape_dim6},
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2,
dst_shape_dim3, dst_shape_dim4, dst_shape_dim5,
dst_shape_dim6});
return args;
}
inline std::vector<TestArg> get_args_backward() {
size_t src_shape_dim0 = 8;
size_t src_shape_dim1 = 8;
size_t src_shape_dim2 = 8;
size_t src_shape_dim3 = 8;
size_t src_shape_dim4 = 8;
size_t src_shape_dim5 = 8;
size_t src_shape_dim6 = 8;
size_t dst_shape_dim0 = 5;
size_t dst_shape_dim1 = 5;
size_t dst_shape_dim2 = 5;
size_t dst_shape_dim3 = 5;
size_t dst_shape_dim4 = 5;
size_t dst_shape_dim5 = 5;
size_t dst_shape_dim6 = 5;
std::vector<TestArg> args;
param::Padding cur_param;
cur_param.front_offset_dim0 = 0;
cur_param.front_offset_dim1 = 0;
cur_param.front_offset_dim2 = 0;
cur_param.front_offset_dim3 = 0;
cur_param.front_offset_dim4 = 0;
cur_param.front_offset_dim5 = 0;
cur_param.front_offset_dim6 = 0;
cur_param.back_offset_dim0 = 0;
cur_param.back_offset_dim1 = 0;
cur_param.back_offset_dim2 = 0;
cur_param.back_offset_dim3 = 0;
cur_param.back_offset_dim4 = 0;
cur_param.back_offset_dim5 = 0;
cur_param.back_offset_dim6 = 0;
cur_param.padding_val = 2;
cur_param.front_offset_dim0 = 1;
cur_param.back_offset_dim0 = 2;
cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT;
args.emplace_back(cur_param, TensorShape{src_shape_dim0},
TensorShape{dst_shape_dim0});
cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE;
args.emplace_back(cur_param, TensorShape{src_shape_dim0},
TensorShape{dst_shape_dim0});
cur_param.padding_mode = param::Padding::PaddingMode::REFLECT;
args.emplace_back(cur_param, TensorShape{src_shape_dim0},
TensorShape{dst_shape_dim0});
cur_param.front_offset_dim1 = 2;
cur_param.back_offset_dim1 = 1;
cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT;
args.emplace_back(cur_param, TensorShape{src_shape_dim0, src_shape_dim1},
TensorShape{dst_shape_dim0, dst_shape_dim1});
cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE;
args.emplace_back(cur_param, TensorShape{src_shape_dim0, src_shape_dim1},
TensorShape{dst_shape_dim0, dst_shape_dim1});
cur_param.padding_mode = param::Padding::PaddingMode::REFLECT;
args.emplace_back(cur_param, TensorShape{src_shape_dim0, src_shape_dim1},
TensorShape{dst_shape_dim0, dst_shape_dim1});
cur_param.front_offset_dim2 = 1;
cur_param.back_offset_dim2 = 2;
cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT;
args.emplace_back(
cur_param,
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2},
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2});
cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE;
args.emplace_back(
cur_param,
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2},
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2});
cur_param.padding_mode = param::Padding::PaddingMode::REFLECT;
args.emplace_back(
cur_param,
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2},
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2});
cur_param.front_offset_dim3 = 0;
cur_param.back_offset_dim3 = 3;
cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT;
args.emplace_back(cur_param,
TensorShape{src_shape_dim0, src_shape_dim1,
src_shape_dim2, src_shape_dim3},
TensorShape{dst_shape_dim0, dst_shape_dim1,
dst_shape_dim2, dst_shape_dim3});
cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE;
args.emplace_back(cur_param,
TensorShape{src_shape_dim0, src_shape_dim1,
src_shape_dim2, src_shape_dim3},
TensorShape{dst_shape_dim0, dst_shape_dim1,
dst_shape_dim2, dst_shape_dim3});
cur_param.padding_mode = param::Padding::PaddingMode::REFLECT;
args.emplace_back(cur_param,
TensorShape{src_shape_dim0, src_shape_dim1,
src_shape_dim2, src_shape_dim3},
TensorShape{dst_shape_dim0, dst_shape_dim1,
dst_shape_dim2, dst_shape_dim3});
cur_param.front_offset_dim4 = 3;
cur_param.back_offset_dim4 =0;
cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT;
args.emplace_back(
cur_param,
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2,
src_shape_dim3, src_shape_dim4},
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2,
dst_shape_dim3, dst_shape_dim4});
cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE;
args.emplace_back(
cur_param,
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2,
src_shape_dim3, src_shape_dim4},
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2,
dst_shape_dim3, dst_shape_dim4});
cur_param.padding_mode = param::Padding::PaddingMode::REFLECT;
args.emplace_back(
cur_param,
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2,
src_shape_dim3, src_shape_dim4},
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2,
dst_shape_dim3, dst_shape_dim4});
cur_param.front_offset_dim5 = 1;
cur_param.back_offset_dim5 = 2;
cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT;
args.emplace_back(
cur_param,
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2,
src_shape_dim3, src_shape_dim4, src_shape_dim5},
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2,
dst_shape_dim3, dst_shape_dim4, dst_shape_dim5});
cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE;
args.emplace_back(
cur_param,
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2,
src_shape_dim3, src_shape_dim4, src_shape_dim5},
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2,
dst_shape_dim3, dst_shape_dim4, dst_shape_dim5});
cur_param.padding_mode = param::Padding::PaddingMode::REFLECT;
args.emplace_back(
cur_param,
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2,
src_shape_dim3, src_shape_dim4, src_shape_dim5},
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2,
dst_shape_dim3, dst_shape_dim4, dst_shape_dim5});
cur_param.front_offset_dim6 = 0;
cur_param.back_offset_dim6 = 3;
cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT;
args.emplace_back(
cur_param,
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2,
src_shape_dim3, src_shape_dim4, src_shape_dim5,
src_shape_dim6},
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2,
dst_shape_dim3, dst_shape_dim4, dst_shape_dim5,
dst_shape_dim6});
cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE;
args.emplace_back(
cur_param,
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2,
src_shape_dim3, src_shape_dim4, src_shape_dim5,
src_shape_dim6},
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2,
dst_shape_dim3, dst_shape_dim4, dst_shape_dim5,
dst_shape_dim6});
cur_param.padding_mode = param::Padding::PaddingMode::REFLECT;
args.emplace_back(
cur_param,
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2,
src_shape_dim3, src_shape_dim4, src_shape_dim5,
src_shape_dim6},
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2,
dst_shape_dim3, dst_shape_dim4, dst_shape_dim5,
dst_shape_dim6});
return args;
}
} // namespace padding
} // namespace test
} // namespace megdnn
\ No newline at end of file
/**
* \file dnn/test/cuda/padding.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "test/cuda/benchmark.h"
#include "test/common/padding.h"
#include "megdnn/oprs.h"
#include "test/common/checker.h"
#include "test/common/rng.h"
#include "test/cuda/fixture.h"
using namespace megdnn;
using namespace test;
TEST_F(CUDA, PADDING) {
std::vector<padding::TestArg> args = padding::get_args();
Checker<Padding> checker(handle_cuda());
UniformIntNonZeroRNG rng(1, 9);
for (auto&& arg : args) {
checker.set_param(arg.param)
.set_rng(0, &rng)
.set_dtype(0, dtype::Int8())
.set_dtype(1, dtype::Int8())
.execs({arg.src, arg.dst});
}
}
TEST_F(CUDA, PADDING_BACKWARD) {
std::vector<padding::TestArg> args = padding::get_args_backward();
Checker<PaddingBackward> checker(handle_cuda());
UniformFloatRNG rng(1, 9);
for (auto&& arg : args) {
checker.set_param(arg.param)
.set_rng(0, &rng)
.set_dtype(0, dtype::Float32())
.set_dtype(1, dtype::Float32())
.execs({arg.src, arg.dst});
}
}
TEST_F(CUDA, PADDING_REFLECT) {
Checker<Padding> checker(handle_cuda(), false);
param::Padding param;
param.padding_val = 10;
param.padding_mode = param::Padding::PaddingMode::REFLECT;
param.front_offset_dim0 = 2;
param.front_offset_dim1 = 0;
param.front_offset_dim2 = 0;
param.front_offset_dim3 = 0;
param.front_offset_dim4 = 0;
param.front_offset_dim5 = 0;
param.front_offset_dim6 = 0;
param.back_offset_dim0 = 3;
param.back_offset_dim1 = 0;
param.back_offset_dim2 = 0;
param.back_offset_dim3 = 0;
param.back_offset_dim4 = 0;
param.back_offset_dim5 = 0;
param.back_offset_dim6 = 0;
checker.set_param(param).exect(
Testcase{TensorValue({5}, dtype::Int8(), {1, 2, 3, 4, 5}), {}},
Testcase{{},
TensorValue({10}, dtype::Int8(),
{3, 2, 1, 2, 3, 4, 5, 4, 3, 2})});
}
TEST_F(CUDA, PADDING_REFLECT2) {
Checker<Padding> checker(handle_cuda(), false);
param::Padding param;
param.padding_val = 10;
param.padding_mode = param::Padding::PaddingMode::REFLECT;
param.front_offset_dim0 = 1;
param.front_offset_dim1 = 2;
param.front_offset_dim2 = 0;
param.front_offset_dim3 = 0;
param.front_offset_dim4 = 0;
param.front_offset_dim5 = 0;
param.front_offset_dim6 = 0;
param.back_offset_dim0 = 1;
param.back_offset_dim1 = 2;
param.back_offset_dim2 = 0;
param.back_offset_dim3 = 0;
param.back_offset_dim4 = 0;
param.back_offset_dim5 = 0;
param.back_offset_dim6 = 0;
checker.set_param(param).exect(
Testcase{TensorValue({3, 3}, dtype::Int8(),
{3, 5, 1, 3, 6, 1, 4, 7, 9}),
{}},
Testcase{{},
TensorValue({5, 7}, dtype::Int8(),
{1, 6, 3, 6, 1, 6, 3, 1, 5, 3, 5, 1,
5, 3, 1, 6, 3, 6, 1, 6, 3, 9, 7, 4,
7, 9, 7, 4, 1, 6, 3, 6, 1, 6, 3})});
}
TEST_F(CUDA, PADDING_REPLICATE) {
Checker<Padding> checker(handle_cuda(), false);
param::Padding param;
param.padding_val = 10;
param.padding_mode = param::Padding::PaddingMode::REPLICATE;
param.front_offset_dim0 = 1;
param.front_offset_dim1 = 0;
param.front_offset_dim2 = 0;
param.front_offset_dim3 = 0;
param.front_offset_dim4 = 0;
param.front_offset_dim5 = 0;
param.front_offset_dim6 = 0;
param.back_offset_dim0 = 2;
param.back_offset_dim1 = 0;
param.back_offset_dim2 = 0;
param.back_offset_dim3 = 0;
param.back_offset_dim4 = 0;
param.back_offset_dim5 = 0;
param.back_offset_dim6 = 0;
checker.set_param(param).exect(
Testcase{TensorValue({9}, dtype::Int8(),
{1, 2, 3, 4, 5, 6, 7, 8, 9}),
{}},
Testcase{{},
TensorValue({12}, dtype::Int8(),
{1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 9})});
}
TEST_F(CUDA, PADDING_REPLICATE2) {
Checker<Padding> checker(handle_cuda(), false);
param::Padding param;
param.padding_val = 10;
param.padding_mode = param::Padding::PaddingMode::REPLICATE;
param.front_offset_dim0 = 2;
param.front_offset_dim1 = 1;
param.front_offset_dim2 = 0;
param.front_offset_dim3 = 0;
param.front_offset_dim4 = 0;
param.front_offset_dim5 = 0;
param.front_offset_dim6 = 0;
param.back_offset_dim0 = 0;
param.back_offset_dim1 = 3;
param.back_offset_dim2 = 0;
param.back_offset_dim3 = 0;
param.back_offset_dim4 = 0;
param.back_offset_dim5 = 0;
param.back_offset_dim6 = 0;
checker.set_param(param).exect(
Testcase{TensorValue({3, 3}, dtype::Int8(),
{1, 2, 3, 4, 5, 6, 7, 8, 9}),
{}},
Testcase{{},
TensorValue({5, 7}, dtype::Int8(),
{1, 1, 2, 3, 3, 3, 3, 1, 1, 2, 3, 3,
3, 3, 1, 1, 2, 3, 3, 3, 3, 4, 4, 5,
6, 6, 6, 6, 7, 7, 8, 9, 9, 9, 9})});
}
// #if MEGDNN_WITH_BENCHMARK
TEST_F(CUDA, BENCHMARK_PADDING_CONSTANT) {
using Param = Padding::Param;
auto run = [&](const TensorShapeArray& shapes, Param param) {
CUBenchmarker<PaddingForward> benchmarker(handle_cuda());
benchmarker.set_param(param);
benchmarker.set_dtype(0, dtype::Int8())
.set_dtype(1, dtype::Int8());
for (auto&& shape : shapes) {
double memaccess =
double(TensorLayout(shape, dtype::Int8())
.span()
.dist_byte()) *
2e-6;
auto time_ms = benchmarker.execs({shape, {}});
printf("execute %s, time %.4f ms, %.4f GB/s\n",
shape.to_string().c_str(), time_ms, memaccess / time_ms);
}
};
printf("mode -> constant; dtype -> int8\n");
{
TensorShapeArray shapes = {
{16, 3, 736, 1280},
};
Param param;
param.padding_mode = param::Padding::PaddingMode::CONSTANT;
param.front_offset_dim1 = 1;
run(shapes, param);
}
printf("mode -> replicate; dtype -> int8\n");
{
TensorShapeArray shapes = {
{16, 3, 736, 1280},
};
Param param;
param.padding_mode = param::Padding::PaddingMode::REPLICATE;
param.front_offset_dim1 = 1;
run(shapes, param);
}
printf("mode -> reflect; dtype -> int8\n");
{
TensorShapeArray shapes = {
{16, 3, 736, 1280},
};
Param param;
param.padding_mode = param::Padding::PaddingMode::REFLECT;
param.front_offset_dim1 = 1;
run(shapes, param);
}
}
// #endif
\ No newline at end of file
/**
* \file dnn/test/naive/padding.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#include "test/common/padding.h"
#include "megdnn/dtype.h"
#include "megdnn/oprs.h"
#include "test/common/checker.h"
#include "test/naive/fixture.h"
namespace megdnn{
namespace test{
TEST_F(NAIVE, PADDING) {
std::vector<padding::TestArg> args = padding::get_args();
Checker<Padding> checker(handle());
for(auto&& arg : args){
checker.set_param(arg.param)
.set_dtype(0, dtype::Float32())
.set_dtype(1, dtype::Float32())
.execs({arg.src, arg.dst});
}
}
TEST_F(NAIVE, PADDING_CONSTANT) {
Checker<Padding> checker(handle(), false);
param::Padding param;
param.padding_val = 10;
param.padding_mode = param::Padding::PaddingMode::CONSTANT;
param.front_offset_dim0 = 2;
param.front_offset_dim1 = 1;
param.front_offset_dim2 = 0;
param.front_offset_dim3 = 0;
param.front_offset_dim4 = 0;
param.front_offset_dim5 = 0;
param.front_offset_dim6 = 0;
param.back_offset_dim0 = 2;
param.back_offset_dim1 = 3;
param.back_offset_dim2 = 0;
param.back_offset_dim3 = 0;
param.back_offset_dim4 = 0;
param.back_offset_dim5 = 0;
param.back_offset_dim6 = 0;
checker.set_param(param).exect(
Testcase{TensorValue({1,1}, dtype::Float32(), {1}), {}},
Testcase{{},TensorValue({5,5}, dtype::Float32(), {10,10,10,10,10,10,10,10,10,10,10,1,10,10,10,10,10,10,10,10,10,10,10,10,10})}
);
}
TEST_F(NAIVE, PADDING_REFLECT) {
Checker<Padding> checker(handle(), false);
param::Padding param;
param.padding_val = 10;
param.padding_mode = param::Padding::PaddingMode::REFLECT;
param.front_offset_dim0 = 2;
param.front_offset_dim1 = 0;
param.front_offset_dim2 = 0;
param.front_offset_dim3 = 0;
param.front_offset_dim4 = 0;
param.front_offset_dim5 = 0;
param.front_offset_dim6 = 0;
param.back_offset_dim0 = 3;
param.back_offset_dim1 = 0;
param.back_offset_dim2 = 0;
param.back_offset_dim3 = 0;
param.back_offset_dim4 = 0;
param.back_offset_dim5 = 0;
param.back_offset_dim6 = 0;
checker.set_param(param).exect(
Testcase{TensorValue({5}, dtype::Float32(), {1,2,3,4,5}), {}},
Testcase{{},TensorValue({10}, dtype::Float32(), {3,2,1,2,3,4,5,4,3,2})}
);
}
TEST_F(NAIVE, PADDING_REPLICATE) {
Checker<Padding> checker(handle(), false);
param::Padding param;
param.padding_val = 10;
param.padding_mode = param::Padding::PaddingMode::REPLICATE;
param.front_offset_dim0 = 1;
param.front_offset_dim1 = 0;
param.front_offset_dim2 = 0;
param.front_offset_dim3 = 0;
param.front_offset_dim4 = 0;
param.front_offset_dim5 = 0;
param.front_offset_dim6 = 0;
param.back_offset_dim0 = 2;
param.back_offset_dim1 = 0;
param.back_offset_dim2 = 0;
param.back_offset_dim3 = 0;
param.back_offset_dim4 = 0;
param.back_offset_dim5 = 0;
param.back_offset_dim6 = 0;
checker.set_param(param).exect(
Testcase{TensorValue({9}, dtype::Float32(), {1,2,3,4,5,6,7,8,9}), {}},
Testcase{{},TensorValue({12}, dtype::Float32(), {1,1,2,3,4,5,6,7,8,9,9,9})}
);
}
TEST_F(NAIVE, PADDING_REPLICATE2) {
Checker<Padding> checker(handle(), false);
param::Padding param;
param.padding_val = 10;
param.padding_mode = param::Padding::PaddingMode::REPLICATE;
param.front_offset_dim0 = 2;
param.front_offset_dim1 = 1;
param.front_offset_dim2 = 0;
param.front_offset_dim3 = 0;
param.front_offset_dim4 = 0;
param.front_offset_dim5 = 0;
param.front_offset_dim6 = 0;
param.back_offset_dim0 = 0;
param.back_offset_dim1 = 3;
param.back_offset_dim2 = 0;
param.back_offset_dim3 = 0;
param.back_offset_dim4 = 0;
param.back_offset_dim5 = 0;
param.back_offset_dim6 = 0;
checker.set_param(param).exect(
Testcase{TensorValue({3,3}, dtype::Float32(), {1,2,3,4,5,6,7,8,9}), {}},
Testcase{{},TensorValue({5,7}, dtype::Float32(), {1,1,2,3,3,3,3,1,1,2,3,3,3,3,1,1,2,3,3,3,3,4,4,5,6,6,6,6,7,7,8,9,9,9,9})}
);
}
}
}
\ No newline at end of file
......@@ -67,4 +67,9 @@ decl_opr('NvOf',
inputs=['src'], params='NvOf',
desc='opr Implements NVIDIA Optical Flow SDK.')
decl_opr('Padding',
inputs=['src'],
params='Padding',
desc='tensor padding opr.')
# vim: ft=python
......@@ -68,7 +68,7 @@ namespace opr {
//! current cumsum version
using CumsumV1 = opr::Cumsum;
MGB_SEREG_OPR(CumsumV1, 1);
#if MGB_CUDA
MGB_SEREG_OPR(NvOf, 1);
#endif
......
......@@ -1608,4 +1608,30 @@ void RelayoutFormat::init_output_format() {
// f}}}
//
/* f{{{ ======================= PaddingForward ======================= */
MGB_DYN_TYPE_OBJ_FINAL_IMPL(PaddingForward);
MEGDNN_OPR_INIT1(PaddingForward, "padding")
#if MGB_ENABLE_GRAD
MGB_IMPL_OPR_GRAD(PaddingForward) {
mgb_assert(opr.input().size() == 1);
if (wrt_idx == 0) {
SymbolVar grad = PaddingBackward::make(out_grad[0], opr.input(0), opr.param());
return grad.node();
} else
return InvalidGrad::make(opr, wrt_idx);
}
#endif
// f}}}
/* f{{{ ======================= PaddingBackward ======================= */
MGB_DYN_TYPE_OBJ_FINAL_IMPL(PaddingBackward);
MEGDNN_OPR_INIT2(PaddingBackward, "padding_backward", 1, false);
// f}}}
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}
......@@ -24,6 +24,35 @@ MGB_SEREG_MODIFY_SUBTENSOR_OPR(IncrSubtensor);
namespace mgb {
namespace serialization {
template<>
struct OprMaker<opr::Padding, 1> {
using Opr = opr::Padding;
using Param = Opr::Param;
static cg::OperatorNodeBase* make(const Param& param, const cg::VarNodeArray& inputs, ComputingGraph& graph, const OperatorNodeConfig& config) {
MGB_MARK_USED_VAR(graph);
if(inputs.size() == 1) {
return Opr::make(inputs[0], param, config).node()->owner_opr();
}else{
return nullptr;
}
}
};
template<>
struct OprMaker<opr::PaddingBackward, 2> {
using Opr = opr::PaddingBackward;
using Param = Opr::Param;
static cg::OperatorNodeBase* make(const Param& param, const cg::VarNodeArray& inputs, ComputingGraph& graph, const OperatorNodeConfig& config) {
MGB_MARK_USED_VAR(graph);
if(inputs.size() == 2) {
return Opr::make(inputs[0], inputs[1], param, config).node()->owner_opr();
}else{
return nullptr;
}
}
};
template<>
struct OprMaker<opr::Concat, 0>: public OprMakerVariadic<opr::Concat>{};
......@@ -185,6 +214,10 @@ namespace opr {
using RelayoutFormatV1 = opr::RelayoutFormat;
MGB_SEREG_OPR(RelayoutFormatV1, 1);
MGB_SEREG_OPR(Padding, 1);
MGB_SEREG_OPR(PaddingBackward, 2);
} // namespace opr
} // namespace mgb
......
......@@ -635,6 +635,26 @@ MGB_DEFINE_OPR_CLASS(RelayoutFormat,
const OperatorNodeConfig &config = {});
void init_output_format() override final;
};
/*!
* \brief padding the src tensor to dst tensor
*/
MGB_DEFINE_OPR_CLASS(PaddingForward, intl::MegDNNOprWrapperFwd<megdnn::PaddingForward>) // {
public:
PaddingForward(VarNode* src, const Param& param, const OperatorNodeConfig& config);
static SymbolVar make(SymbolVar src, const Param& param = {}, const OperatorNodeConfig &config = {});
};
using Padding = PaddingForward;
/*!
* \brief padding backward
*/
MGB_DEFINE_OPR_CLASS(PaddingBackward, intl::MegDNNOprWrapperBwd<megdnn::PaddingBackward>) // {
public:
PaddingBackward(VarNode* src, VarNode* in_for_shape, const Param& param, const OperatorNodeConfig& config);
static SymbolVar make(SymbolVar src, SymbolVar in_for_shape, const Param &param = {}, const OperatorNodeConfig &config = {});
};
} // opr
} // mgb
......
#include "./legacy_checker.h"
#include "megbrain/comp_node_env.h"
#include "megbrain/gopt/inference.h"
#include "megbrain/opr/basic_arith.h"
#include "megbrain/opr/dnn/convolution.h"
#include "megbrain/opr/tensor_manip.h"
#include "megbrain/serialization/serializer.h"
#include "megbrain/test/autocheck.h"
#include "megbrain/test/helper.h"
#include "megbrain/test/megdnn_helper.h"
#include "megdnn/dtype.h"
#include "megdnn/oprs/base.h"
#include <gmock/gmock.h>
#include <cmath>
#include <memory>
#include <random>
using namespace std;
using namespace mgb;
namespace {
TEST(TestOprDNN, PaddingForwardSerialization) {
using namespace serialization;
auto fname = output_file("PaddingForwardTest");
auto dump = [&]() {
opr::Padding::Param param;
param.padding_mode = megdnn::param::Padding::PaddingMode(0);
param.front_offset_dim0 = 3;
param.front_offset_dim1 = 3;
param.front_offset_dim2 = 3;
param.front_offset_dim3 = 3;
param.front_offset_dim4 = 0;
param.front_offset_dim5 = 0;
param.front_offset_dim6 = 0;
param.back_offset_dim0 = 0;
param.back_offset_dim1 = 0;
param.back_offset_dim2 = 0;
param.back_offset_dim3 = 0;
param.back_offset_dim4 = 0;
param.back_offset_dim5 = 0;
param.back_offset_dim6 = 0;
param.padding_val = 0;
auto cn = CompNode::load("xpu");
auto graph = ComputingGraph::make();
HostTensorND inp_host{cn, {32, 4, 24, 24}, dtype::Float32()};
auto inp = opr::ImmutableTensor::make(*graph, inp_host);
auto opr = opr::PaddingForward::make(inp, param, {});
auto dumper = GraphDumper::make(OutputFile::make_fs(fname.c_str()));
auto rst = dumper->dump({opr});
ASSERT_EQ(rst.outputs.size(), 1u);
};
auto load = [&]() {
auto loader = GraphLoader::make(InputFile::make_fs(fname.c_str()));
auto rst = loader->load();
ASSERT_EQ(rst.output_var_list.size(), 1u);
};
dump();
load();
}
} // namespace
\ No newline at end of file
......@@ -10,6 +10,7 @@
*/
#include "megbrain/test/helper.h"
#include "megbrain/test/megdnn_helper.h"
#include "megbrain/test/autocheck.h"
#include "megbrain/opr/tensor_manip.h"
#include "megbrain/opr/tensor_gen.h"
......@@ -2162,4 +2163,47 @@ TEST(TestParamPack, Split) {
test_param_pack_split<3>({{2, 9}, {123}, {5, 3}});
}
TEST(TestTensorManip, Padding_random) {
opr::Padding::Param param;
param.padding_mode = megdnn::param::Padding::PaddingMode(0);
param.front_offset_dim0 = 3;
param.front_offset_dim1 = 3;
param.front_offset_dim2 = 3;
param.front_offset_dim3 = 3;
param.front_offset_dim4 = 0;
param.front_offset_dim5 = 0;
param.front_offset_dim6 = 0;
param.back_offset_dim0 = 0;
param.back_offset_dim1 = 0;
param.back_offset_dim2 = 0;
param.back_offset_dim3 = 0;
param.back_offset_dim4 = 0;
param.back_offset_dim5 = 0;
param.back_offset_dim6 = 0;
param.padding_val = 0;
using Checker = AutoOprChecker<1,1>;
auto make_graph = [&](const Checker::SymInpArray& inputs) -> Checker::SymOutArray {
return {opr::Padding::make(inputs[0], param)};
};
auto fwd = [&](Checker::NumOutArray& dest, Checker::NumInpArray inp) {
auto opr = megdnn_naive_handle()->create_operator<megdnn::Padding>();
TensorShape out_shp{inp[0]->as_megdnn().layout.shape[0]+param.front_offset_dim0+param.back_offset_dim0,
inp[0]->as_megdnn().layout.shape[1]+param.front_offset_dim1+param.back_offset_dim1,
inp[0]->as_megdnn().layout.shape[2]+param.front_offset_dim2+param.back_offset_dim2,
inp[0]->as_megdnn().layout.shape[3]+param.front_offset_dim3+param.back_offset_dim3};
opr->param() = param;
dest[0].resize(out_shp);
opr->exec(inp[0]->as_megdnn(), dest[0].as_megdnn(), {});
};
Checker::RunOptions opt;
opt.numdiff_max_err = 2e-3;
Checker(make_graph, fwd, CompNode::load("xpu0"))
.run({TensorShape{5, 5, 5, 5}}, opt)
.run({TensorShape{4, 3, 4, 5}}, opt)
.run({TensorShape{5, 4, 4, 5}}, opt);
}
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}
......@@ -113,6 +113,7 @@ union OperatorParam {
param.PermutationRNG = 79,
param.BetaRNG = 80,
param.SlidingWindowTranspose = 81,
param.Padding = 82,
}
table Operator {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册