提交 f1c57df7 编写于 作者: L liutuo

fix deconv bias add

上级 8a45eb3f
......@@ -34,19 +34,24 @@
namespace mace {
namespace kernels {
enum FrameworkType {
TENSORFLOW = 0,
CAFFE = 1,
};
struct Deconv2dFunctorBase : OpKernel {
Deconv2dFunctorBase(OpKernelContext *context,
const std::vector<int> &strides,
const Padding &padding_type,
const std::vector<int> &paddings,
const std::vector<index_t> &output_shape,
const FrameworkType model_type,
const ActivationType activation,
const float relux_max_limit)
: OpKernel(context),
strides_(strides),
padding_type_(padding_type),
paddings_(paddings),
output_shape_(output_shape),
model_type_(model_type),
activation_(activation),
relux_max_limit_(relux_max_limit) {}
......@@ -156,7 +161,7 @@ struct Deconv2dFunctorBase : OpKernel {
std::vector<int> strides_; // [stride_h, stride_w]
const Padding padding_type_;
std::vector<int> paddings_;
std::vector<index_t> output_shape_;
const FrameworkType model_type_;
const ActivationType activation_;
const float relux_max_limit_;
};
......@@ -171,14 +176,14 @@ struct Deconv2dFunctor<DeviceType::CPU, float>: Deconv2dFunctorBase {
const std::vector<int> &strides,
const Padding &padding_type,
const std::vector<int> &paddings,
const std::vector<index_t> &output_shape,
const FrameworkType model_type,
const ActivationType activation,
const float relux_max_limit)
: Deconv2dFunctorBase(context,
strides,
padding_type,
paddings,
output_shape,
model_type,
activation,
relux_max_limit) {}
......@@ -277,19 +282,16 @@ struct Deconv2dFunctor<DeviceType::CPU, float>: Deconv2dFunctorBase {
std::vector<int> paddings(2);
std::vector<int> out_paddings(2);
std::vector<index_t> output_shape(4);
if (paddings_.empty()) { // tensorflow
if (model_type_ == FrameworkType::TENSORFLOW) { // tensorflow
paddings = std::vector<int>(2, 0);
if (output_shape_.size() == 4) {
output_shape = output_shape_;
} else {
MACE_CHECK_NOTNULL(output_shape_tensor);
MACE_CHECK(output_shape_tensor->size() == 4);
Tensor::MappingGuard output_shape_mapper(output_shape_tensor);
auto output_shape_data =
output_shape_tensor->data<int32_t>();
output_shape =
std::vector<index_t>(output_shape_data, output_shape_data + 4);
}
MACE_CHECK_NOTNULL(output_shape_tensor);
MACE_CHECK(output_shape_tensor->size() == 4);
Tensor::MappingGuard output_shape_mapper(output_shape_tensor);
auto output_shape_data =
output_shape_tensor->data<int32_t>();
output_shape =
std::vector<index_t>(output_shape_data, output_shape_data + 4);
const index_t t = output_shape[1];
output_shape[1] = output_shape[3];
output_shape[3] = output_shape[2];
......@@ -437,21 +439,6 @@ struct Deconv2dFunctor<DeviceType::CPU, float>: Deconv2dFunctorBase {
padded_out_h == output_shape[2] && padded_out_w == output_shape[3];
float *out_data = no_pad ? output_data : padded_out_data;
if (bias_data != nullptr) {
const index_t batch = output_shape[0];
const index_t channels = output_shape[1];
const index_t img_size = output_shape[2] * output_shape[3];
#pragma omp parallel for collapse(3)
for (index_t b = 0; b < batch; ++b) {
for (index_t c = 0; c < channels; ++c) {
for (index_t i = 0; i < img_size; ++i) {
output_data[(b * channels + c) * img_size + i] +=
bias_data[c];
}
}
}
}
deconv_func(input_data,
filter_data,
in_shape,
......@@ -466,7 +453,20 @@ struct Deconv2dFunctor<DeviceType::CPU, float>: Deconv2dFunctorBase {
output_data);
}
if (bias_data != nullptr) {
const index_t batch = output_shape[0];
const index_t channels = output_shape[1];
const index_t img_size = output_shape[2] * output_shape[3];
#pragma omp parallel for collapse(3)
for (index_t b = 0; b < batch; ++b) {
for (index_t c = 0; c < channels; ++c) {
for (index_t i = 0; i < img_size; ++i) {
output_data[(b * channels + c) * img_size + i] +=
bias_data[c];
}
}
}
}
DoActivation<float>(output_data,
output_data,
......@@ -501,7 +501,7 @@ struct Deconv2dFunctor<DeviceType::GPU, T> : Deconv2dFunctorBase {
const std::vector<int> &strides,
const Padding &padding_type,
const std::vector<int> &paddings,
const std::vector<index_t> &output_shape,
const FrameworkType model_type,
const ActivationType activation,
const float relux_max_limit);
......
......@@ -24,14 +24,14 @@ Deconv2dFunctor<DeviceType::GPU, T>::Deconv2dFunctor(
const std::vector<int> &strides,
const Padding &padding_type,
const std::vector<int> &paddings,
const std::vector<index_t> &output_shape,
const FrameworkType model_type,
const ActivationType activation,
const float relux_max_limit)
: Deconv2dFunctorBase(context,
strides,
padding_type,
paddings,
output_shape,
model_type,
activation,
relux_max_limit) {
if (context->device()->opencl_runtime()->UseImageMemory()) {
......@@ -55,19 +55,15 @@ MaceStatus Deconv2dFunctor<DeviceType::GPU, T>::operator()(
std::vector<int> paddings(2);
std::vector<int> out_paddings(2);
std::vector<index_t> output_shape(4);
if (paddings_.empty()) {
if (model_type_ == FrameworkType::TENSORFLOW) {
paddings = std::vector<int>(2, 0);
if (output_shape_.size() != 4) {
MACE_CHECK_NOTNULL(output_shape_tensor);
MACE_CHECK(output_shape_tensor->size() == 4);
Tensor::MappingGuard output_shape_mapper(output_shape_tensor);
auto output_shape_data =
output_shape_tensor->data<int32_t>();
output_shape =
std::vector<index_t>(output_shape_data, output_shape_data + 4);
} else {
output_shape = output_shape_;
}
MACE_CHECK_NOTNULL(output_shape_tensor);
MACE_CHECK(output_shape_tensor->size() == 4);
Tensor::MappingGuard output_shape_mapper(output_shape_tensor);
auto output_shape_data =
output_shape_tensor->data<int32_t>();
output_shape =
std::vector<index_t>(output_shape_data, output_shape_data + 4);
CalcDeconvPaddingAndInputSize(input->shape().data(),
filter->shape().data(),
strides_.data(),
......
......@@ -34,28 +34,39 @@ class Deconv2dOp : public Operator<D, T> {
static_cast<Padding>(OperatorBase::GetOptionalArg<int>(
"padding", static_cast<int>(SAME))),
OperatorBase::GetRepeatedArgs<int>("padding_values"),
OperatorBase::GetRepeatedArgs<index_t>("output_shape"),
static_cast<kernels::FrameworkType>(
OperatorBase::GetOptionalArg<int>("framework_type", 0)),
kernels::StringToActivationType(
OperatorBase::GetOptionalArg<std::string>("activation",
"NOOP")),
OperatorBase::GetOptionalArg<float>("max_limit", 0.0f)) {}
MaceStatus Run(StatsFuture *future) override {
const Tensor *input = this->Input(INPUT);
const Tensor *filter = this->Input(FILTER);
const Tensor *output_shape =
this->InputSize() >= 3 ? this->Input(OUTPUT_SHAPE) : nullptr;
const Tensor *bias = this->InputSize() >= 4 ? this->Input(BIAS) : nullptr;
Tensor *output = this->Output(OUTPUT);
MACE_CHECK(this->InputSize() >= 2, "deconv needs >= 2 inputs.");
const Tensor *input = this->Input(0);
const Tensor *filter = this->Input(1);
const kernels::FrameworkType model_type =
static_cast<kernels::FrameworkType>(
OperatorBase::GetOptionalArg<int>("framework_type", 0));
if (model_type == kernels::CAFFE) {
const Tensor *bias = this->InputSize() >= 3 ? this->Input(2) : nullptr;
Tensor *output = this->Output(OUTPUT);
return functor_(input, filter, bias, output_shape, output, future);
return functor_(input, filter, bias, nullptr, output, future);
} else {
const Tensor *output_shape =
this->InputSize() >= 3 ? this->Input(2) : nullptr;
const Tensor *bias = this->InputSize() >= 4 ? this->Input(3) : nullptr;
Tensor *output = this->Output(OUTPUT);
return functor_(input, filter, bias, output_shape, output, future);
}
}
private:
kernels::Deconv2dFunctor<D, T> functor_;
protected:
MACE_OP_INPUT_TAGS(INPUT, FILTER, OUTPUT_SHAPE, BIAS);
MACE_OP_OUTPUT_TAGS(OUTPUT);
};
......
......@@ -49,28 +49,35 @@ static void Deconv2d(int iters,
net.AddRandomInput<D, float>("Filter",
{output_channels, channels, kernel_h,
kernel_w});
net.AddRandomInput<D, float>("Bias", {output_channels});
net.AddInputFromArray<D, int32_t>("OutputShape", {4},
{batch, out_h, out_w, output_channels});
if (D == DeviceType::GPU) {
BufferToImage<D, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, T>(&net, "Filter", "FilterImage",
kernels::BufferType::CONV2D_FILTER);
BufferToImage<D, T>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("Deconv2D", "Deconv2dTest")
.Input("InputImage")
.Input("FilterImage")
.Input("OutputShape")
.Input("BiasImage")
.Output("Output")
.AddIntsArg("strides", {stride, stride})
.AddIntArg("padding", padding)
.AddIntsArg("output_shape", {batch, out_h, out_w, output_channels})
.AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
.Finalize(net.NewOperatorDef());
} else {
OpDefBuilder("Deconv2D", "Deconv2dTest")
.Input("Input")
.Input("Filter")
.Input("OutputShape")
.Input("Bias")
.Output("Output")
.AddIntsArg("strides", {stride, stride})
.AddIntArg("padding", padding)
.AddIntsArg("output_shape", {batch, out_h, out_w, output_channels})
.AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
.Finalize(net.NewOperatorDef());
}
......
此差异已折叠。
......@@ -70,6 +70,11 @@ class EltwiseType(Enum):
EQUAL = 10
class FrameworkType(Enum):
TENSORFLOW = 0
CAFFE = 1
MaceSupportedOps = [
'Activation',
'AddN',
......@@ -176,6 +181,7 @@ class MaceKeyword(object):
mace_seperate_buffer_str = 'seperate_buffer'
mace_scalar_input_index_str = 'scalar_input_index'
mace_opencl_mem_type = "opencl_mem_type"
mace_framework_type_str = "framework_type"
class TransformerRule(Enum):
......
......@@ -25,6 +25,7 @@ from mace.python.tools.converter_tool import shape_inference
from mace.python.tools.converter_tool.base_converter import PoolingType
from mace.python.tools.converter_tool.base_converter import ActivationType
from mace.python.tools.converter_tool.base_converter import EltwiseType
from mace.python.tools.converter_tool.base_converter import FrameworkType
from mace.python.tools.converter_tool.base_converter import DataFormat
from mace.python.tools.converter_tool.base_converter import FilterFormat
from mace.python.tools.converter_tool.base_converter import MaceOp
......@@ -351,6 +352,10 @@ class CaffeConverter(base_converter.ConverterInterface):
data_type_arg.name = 'T'
data_type_arg.i = self._option.data_type
framework_type_arg = op.arg.add()
framework_type_arg.name = MaceKeyword.mace_framework_type_str
framework_type_arg.i = FrameworkType.CAFFE.value
ConverterUtil.add_data_format_arg(op, DataFormat.NCHW)
return op
......
......@@ -25,6 +25,7 @@ from mace.python.tools.converter_tool.base_converter import PoolingType
from mace.python.tools.converter_tool.base_converter import PaddingMode
from mace.python.tools.converter_tool.base_converter import ActivationType
from mace.python.tools.converter_tool.base_converter import EltwiseType
from mace.python.tools.converter_tool.base_converter import FrameworkType
from mace.python.tools.converter_tool.base_converter import DataFormat
from mace.python.tools.converter_tool.base_converter import FilterFormat
from mace.python.tools.converter_tool.base_converter import MaceOp
......@@ -372,6 +373,10 @@ class TensorflowConverter(base_converter.ConverterInterface):
except ValueError:
data_type_arg.i = self._option.data_type
framework_type_arg = op.arg.add()
framework_type_arg.name = MaceKeyword.mace_framework_type_str
framework_type_arg.i = FrameworkType.TENSORFLOW.value
ConverterUtil.add_data_format_arg(op, DataFormat.NHWC)
return op
......@@ -414,13 +419,13 @@ class TensorflowConverter(base_converter.ConverterInterface):
"deconv should have (>=) 3 inputs.")
output_shape_arg = op.arg.add()
output_shape_arg.name = MaceKeyword.mace_output_shape_str
if tf_op.inputs[0].op.type == TFOpType.Const.name:
output_shape_value = \
tf_op.inputs[0].eval().astype(np.int32).flat
output_shape_arg.ints.extend(output_shape_value)
else:
output_shape_value = {}
output_shape_arg.ints.extend(output_shape_value)
# if tf_op.inputs[0].op.type == TFOpType.Const.name:
# output_shape_value = \
# tf_op.inputs[0].eval().astype(np.int32).flat
# output_shape_arg.ints.extend(output_shape_value)
# else:
# output_shape_value = {}
# output_shape_arg.ints.extend(output_shape_value)
del op.input[:]
op.input.extend([tf_op.inputs[2].name,
tf_op.inputs[1].name,
......
......@@ -26,6 +26,7 @@ from mace.python.tools.converter_tool.base_converter import ConverterUtil
from mace.python.tools.converter_tool.base_converter import DataFormat
from mace.python.tools.converter_tool.base_converter import DeviceType
from mace.python.tools.converter_tool.base_converter import EltwiseType
from mace.python.tools.converter_tool.base_converter import FrameworkType
from mace.python.tools.converter_tool.base_converter import FilterFormat
from mace.python.tools.converter_tool.base_converter import MaceKeyword
from mace.python.tools.converter_tool.base_converter import MaceOp
......@@ -810,12 +811,22 @@ class Transformer(base_converter.ConverterInterface):
net = self._model
for op in net.op:
if (((op.type == MaceOp.Conv2D.name
or op.type == MaceOp.Deconv2D.name
or op.type == MaceOp.DepthwiseConv2d.name
or op.type == MaceOp.FullyConnected.name)
and len(op.input) == 2)
or (op.type == MaceOp.WinogradInverseTransform.name
and len(op.input) == 1)) \
and len(op.input) == 1)
or (op.type == MaceOp.Deconv2D.name
and ((ConverterUtil.get_arg(
op,
MaceKeyword.mace_framework_type_str).i ==
FrameworkType.CAFFE.value
and len(op.input) == 2)
or (ConverterUtil.get_arg(
op,
MaceKeyword.mace_framework_type_str).i
== FrameworkType.TENSORFLOW.value
and len(op.input) == 3)))) \
and len(self._consumers.get(op.output[0], [])) == 1:
consumer_op = self._consumers[op.output[0]][0]
if consumer_op.type == MaceOp.BiasAdd.name:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册