提交 d63e484f 编写于 作者: L liutuo

deconv support variable output shape

上级 c2b06975
...@@ -231,6 +231,7 @@ struct Deconv2dFunctor : Deconv2dFunctorBase { ...@@ -231,6 +231,7 @@ struct Deconv2dFunctor : Deconv2dFunctorBase {
MaceStatus operator()(const Tensor *input, // NCHW MaceStatus operator()(const Tensor *input, // NCHW
const Tensor *filter, // OIHW const Tensor *filter, // OIHW
const Tensor *bias, const Tensor *bias,
const Tensor *output_shape_tensor,
Tensor *output, Tensor *output,
StatsFuture *future) { StatsFuture *future) {
MACE_UNUSED(future); MACE_UNUSED(future);
...@@ -240,10 +241,20 @@ struct Deconv2dFunctor : Deconv2dFunctorBase { ...@@ -240,10 +241,20 @@ struct Deconv2dFunctor : Deconv2dFunctorBase {
if (!from_caffe_) { // tensorflow if (!from_caffe_) { // tensorflow
std::vector<index_t> output_shape(4); std::vector<index_t> output_shape(4);
if (output_shape_.size() == 4) {
output_shape[0] = output_shape_[0]; output_shape[0] = output_shape_[0];
output_shape[1] = output_shape_[3]; output_shape[1] = output_shape_[3];
output_shape[2] = output_shape_[1]; output_shape[2] = output_shape_[1];
output_shape[3] = output_shape_[2]; output_shape[3] = output_shape_[2];
} else {
MACE_CHECK_NOTNULL(output_shape_tensor);
MACE_CHECK(output_shape_tensor->size() == 4);
Tensor::MappingGuard output_shape_mapper(output_shape_tensor);
auto output_shape_data =
output_shape_tensor->data<int32_t>();
output_shape =
std::vector<index_t>(output_shape_data, output_shape_data + 4);
}
paddings_.clear(); paddings_.clear();
paddings_ = std::vector<int>(2, 0); paddings_ = std::vector<int>(2, 0);
CalcDeconvPaddingAndInputSize( CalcDeconvPaddingAndInputSize(
...@@ -326,6 +337,7 @@ struct Deconv2dFunctor<DeviceType::GPU, T> : Deconv2dFunctorBase { ...@@ -326,6 +337,7 @@ struct Deconv2dFunctor<DeviceType::GPU, T> : Deconv2dFunctorBase {
MaceStatus operator()(const Tensor *input, MaceStatus operator()(const Tensor *input,
const Tensor *filter, const Tensor *filter,
const Tensor *bias, const Tensor *bias,
const Tensor *output_shape_tensor,
Tensor *output, Tensor *output,
StatsFuture *future); StatsFuture *future);
......
...@@ -167,6 +167,7 @@ MaceStatus Deconv2dFunctor<DeviceType::GPU, T>::operator()( ...@@ -167,6 +167,7 @@ MaceStatus Deconv2dFunctor<DeviceType::GPU, T>::operator()(
const Tensor *input, const Tensor *input,
const Tensor *filter, const Tensor *filter,
const Tensor *bias, const Tensor *bias,
const Tensor *output_shape_tensor,
Tensor *output, Tensor *output,
StatsFuture *future) { StatsFuture *future) {
MACE_CHECK_NOTNULL(input); MACE_CHECK_NOTNULL(input);
...@@ -174,6 +175,15 @@ MaceStatus Deconv2dFunctor<DeviceType::GPU, T>::operator()( ...@@ -174,6 +175,15 @@ MaceStatus Deconv2dFunctor<DeviceType::GPU, T>::operator()(
MACE_CHECK_NOTNULL(output); MACE_CHECK_NOTNULL(output);
if (!from_caffe_) { if (!from_caffe_) {
if (output_shape_.size() != 4) {
MACE_CHECK_NOTNULL(output_shape_tensor);
MACE_CHECK(output_shape_tensor->size() == 4);
Tensor::MappingGuard output_shape_mapper(output_shape_tensor);
auto output_shape_data =
output_shape_tensor->data<int32_t>();
output_shape_ =
std::vector<index_t>(output_shape_data, output_shape_data + 4);
}
paddings_.clear(); paddings_.clear();
paddings_ = std::vector<int>(2, 0); paddings_ = std::vector<int>(2, 0);
CalcDeconvPaddingAndInputSize(input->shape().data(), filter->shape().data(), CalcDeconvPaddingAndInputSize(input->shape().data(), filter->shape().data(),
......
...@@ -40,17 +40,19 @@ class Deconv2dOp : public ConvPool2dOpBase<D, T> { ...@@ -40,17 +40,19 @@ class Deconv2dOp : public ConvPool2dOpBase<D, T> {
MaceStatus Run(StatsFuture *future) override { MaceStatus Run(StatsFuture *future) override {
const Tensor *input = this->Input(INPUT); const Tensor *input = this->Input(INPUT);
const Tensor *filter = this->Input(FILTER); const Tensor *filter = this->Input(FILTER);
const Tensor *bias = this->InputSize() >= 3 ? this->Input(BIAS) : nullptr; const Tensor *output_shape =
this->InputSize() >= 3 ? this->Input(OUTPUT_SHAPE) : nullptr;
const Tensor *bias = this->InputSize() >= 4 ? this->Input(BIAS) : nullptr;
Tensor *output = this->Output(OUTPUT); Tensor *output = this->Output(OUTPUT);
return functor_(input, filter, bias, output, future); return functor_(input, filter, bias, output_shape, output, future);
} }
private: private:
kernels::Deconv2dFunctor<D, T> functor_; kernels::Deconv2dFunctor<D, T> functor_;
protected: protected:
MACE_OP_INPUT_TAGS(INPUT, FILTER, BIAS); MACE_OP_INPUT_TAGS(INPUT, FILTER, OUTPUT_SHAPE, BIAS);
MACE_OP_OUTPUT_TAGS(OUTPUT); MACE_OP_OUTPUT_TAGS(OUTPUT);
}; };
......
...@@ -23,6 +23,19 @@ void Register_Shape(OperatorRegistryBase *op_registry) { ...@@ -23,6 +23,19 @@ void Register_Shape(OperatorRegistryBase *op_registry) {
.TypeConstraint<float>("T") .TypeConstraint<float>("T")
.Build(), .Build(),
ShapeOp<DeviceType::CPU, float>); ShapeOp<DeviceType::CPU, float>);
#ifdef MACE_ENABLE_OPENCL
MACE_REGISTER_OPERATOR(op_registry, OpKeyBuilder("Shape")
.Device(DeviceType::GPU)
.TypeConstraint<float>("T")
.Build(),
ShapeOp<DeviceType::GPU, float>);
MACE_REGISTER_OPERATOR(op_registry, OpKeyBuilder("Shape")
.Device(DeviceType::GPU)
.TypeConstraint<half>("T")
.Build(),
ShapeOp<DeviceType::GPU, half>);
#endif
} }
} // namespace ops } // namespace ops
......
...@@ -151,6 +151,7 @@ class TensorflowConverter(base_converter.ConverterInterface): ...@@ -151,6 +151,7 @@ class TensorflowConverter(base_converter.ConverterInterface):
TFOpType.Div.name: self.convert_elementwise, TFOpType.Div.name: self.convert_elementwise,
TFOpType.Min.name: self.convert_elementwise, TFOpType.Min.name: self.convert_elementwise,
TFOpType.Max.name: self.convert_elementwise, TFOpType.Max.name: self.convert_elementwise,
TFOpType.Maximum.name: self.convert_elementwise,
TFOpType.Neg.name: self.convert_elementwise, TFOpType.Neg.name: self.convert_elementwise,
TFOpType.Abs.name: self.convert_elementwise, TFOpType.Abs.name: self.convert_elementwise,
TFOpType.Pow.name: self.convert_elementwise, TFOpType.Pow.name: self.convert_elementwise,
...@@ -372,19 +373,21 @@ class TensorflowConverter(base_converter.ConverterInterface): ...@@ -372,19 +373,21 @@ class TensorflowConverter(base_converter.ConverterInterface):
dilation_val = [1, 1] dilation_val = [1, 1]
dilation_arg.ints.extend(dilation_val) dilation_arg.ints.extend(dilation_val)
else: else:
mace_check(len(tf_op.inputs) >= 3,
"deconv should have (>=) 3 inputs.")
output_shape_arg = op.arg.add() output_shape_arg = op.arg.add()
output_shape_arg.name = MaceKeyword.mace_output_shape_str output_shape_arg.name = MaceKeyword.mace_output_shape_str
if len(tf_op.inputs) >= 3: if tf_op.inputs[0].op.type == TFOpType.Const.name:
del op.input[1:] output_shape_value = \
output_shape_value =\
tf_op.inputs[0].eval().astype(np.int32).flat tf_op.inputs[0].eval().astype(np.int32).flat
output_shape_arg.ints.extend(output_shape_value) output_shape_arg.ints.extend(output_shape_value)
self._skip_tensor.add(tf_op.inputs[0].name)
del op.input[0]
op.input.extend([tf_op.inputs[2].name, tf_op.inputs[1].name])
else: else:
output_shape_value = tf_op.get_attr(tf_strides_str) output_shape_value = {}
output_shape_arg.ints.extend(output_shape_value) output_shape_arg.ints.extend(output_shape_value)
del op.input[:]
op.input.extend([tf_op.inputs[2].name,
tf_op.inputs[1].name,
tf_op.inputs[0].name])
def convert_elementwise(self, tf_op): def convert_elementwise(self, tf_op):
op = self.convert_general_op(tf_op) op = self.convert_general_op(tf_op)
......
...@@ -980,8 +980,10 @@ class Transformer(base_converter.ConverterInterface): ...@@ -980,8 +980,10 @@ class Transformer(base_converter.ConverterInterface):
if op.type == MaceOp.Conv2D.name \ if op.type == MaceOp.Conv2D.name \
or op.type == MaceOp.Deconv2D.name: or op.type == MaceOp.Deconv2D.name:
self.buffer_to_image(op, 1, OpenCLBufferType.CONV2D_FILTER) self.buffer_to_image(op, 1, OpenCLBufferType.CONV2D_FILTER)
if len(op.input) >= 3: if len(op.input) >= 3 and op.type == MaceOp.Conv2D.name:
self.buffer_to_image(op, 2, OpenCLBufferType.ARGUMENT) self.buffer_to_image(op, 2, OpenCLBufferType.ARGUMENT)
elif len(op.input) >= 4 and op.type == MaceOp.Deconv2D.name:
self.buffer_to_image(op, 3, OpenCLBufferType.ARGUMENT)
elif op.type == MaceOp.DepthwiseConv2d.name: elif op.type == MaceOp.DepthwiseConv2d.name:
self.buffer_to_image(op, 1, OpenCLBufferType.DW_CONV2D_FILTER) self.buffer_to_image(op, 1, OpenCLBufferType.DW_CONV2D_FILTER)
if len(op.input) >= 3: if len(op.input) >= 3:
......
...@@ -24,6 +24,7 @@ class MemoryOptimizer(object): ...@@ -24,6 +24,7 @@ class MemoryOptimizer(object):
self.op_mem = {} # op_name->mem_id self.op_mem = {} # op_name->mem_id
self.mem_block = {} # mem_id->[size] or mem_id->[x, y] self.mem_block = {} # mem_id->[size] or mem_id->[x, y]
self.total_mem_count = 0 self.total_mem_count = 0
self.total_cpu_mem_count = 0
self.input_ref_counter = {} self.input_ref_counter = {}
self.mem_ref_counter = {} self.mem_ref_counter = {}
...@@ -184,6 +185,15 @@ class GPUMemoryOptimizer(MemoryOptimizer): ...@@ -184,6 +185,15 @@ class GPUMemoryOptimizer(MemoryOptimizer):
for arg in op.arg: for arg in op.arg:
if arg.name == 'mode' and arg.i == 0: if arg.name == 'mode' and arg.i == 0:
return False return False
elif op.type == 'Shape':
for i in range(len(op.output)):
mem_id = self.total_cpu_mem_count
self.total_cpu_mem_count += 1
op_mem_block = self.get_op_mem_block(
op.type,
op.output_shape[i].dims)
self.mem_block[mem_id] = op_mem_block
return False
return op.type != 'ImageToBuffer' return op.type != 'ImageToBuffer'
def get_op_mem_block(self, op_type, output_shape): def get_op_mem_block(self, op_type, output_shape):
...@@ -191,13 +201,18 @@ class GPUMemoryOptimizer(MemoryOptimizer): ...@@ -191,13 +201,18 @@ class GPUMemoryOptimizer(MemoryOptimizer):
if op_type == 'WinogradTransform' or op_type == 'MatMul': if op_type == 'WinogradTransform' or op_type == 'MatMul':
mem_block[0] = output_shape[2] mem_block[0] = output_shape[2]
mem_block[1] = output_shape[0] * int((output_shape[1] + 3) / 4) mem_block[1] = output_shape[0] * int((output_shape[1] + 3) / 4)
elif op_type == 'Shape':
mem_block[0] = output_shape[0]
mem_block[1] = 1
else: else:
if len(output_shape) == 2: # only support fc/softmax if len(output_shape) == 2: # only support fc/softmax
mem_block[0] = int((output_shape[1] + 3) / 4) mem_block[0] = int((output_shape[1] + 3) / 4)
mem_block[1] = output_shape[0] mem_block[1] = output_shape[0]
else: elif len(output_shape) == 4:
mem_block[0] = output_shape[2] * int((output_shape[3] + 3) / 4) mem_block[0] = output_shape[2] * int((output_shape[3] + 3) / 4)
mem_block[1] = output_shape[0] * output_shape[1] mem_block[1] = output_shape[0] * output_shape[1]
else:
raise Exception('output shape dim size is not 2 or 4.')
return mem_block return mem_block
def mem_size(self, memory_block): def mem_size(self, memory_block):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册