提交 d83cab22 编写于 作者: L luxuhui 提交者: 叶剑武

support reshape op on OpenCL

N/A
Signed-off-by: NLuxuhui <luxuhui@xiaomi.com>
上级 684d27fc
......@@ -31,11 +31,19 @@
namespace mace {
bool MemoryOptimizer::IsMemoryReuseOp(const std::string &op_type) {
bool MemoryOptimizer::IsMemoryReuseOp(const std::string &op_type,
const MemoryType mem_type) {
static const std::unordered_set<std::string> kReuseOp = {
"Reshape", "Identity", "Squeeze", "ExpandDims"
};
return kReuseOp.count(op_type) == 1;
static const std::unordered_set<std::string> kGpuImageReuseOp = {
"Identity", "Squeeze", "ExpandDims"
};
if (mem_type == MemoryType::GPU_IMAGE) {
return kGpuImageReuseOp.count(op_type) == 1;
} else {
return kReuseOp.count(op_type) == 1;
}
}
void MemoryOptimizer::UpdateTensorRef(const std::string &tensor_name) {
......@@ -142,7 +150,7 @@ void MemoryOptimizer::Optimize(
}
MemoryBlock op_mem_block = CreateMemoryBlock(op_def, i, dt, mem_type);
MemoryBlock best_mem_block;
if (IsMemoryReuseOp(op_def->type())) {
if (IsMemoryReuseOp(op_def->type(), mem_type)) {
if (tensor_mem_map_.count(op_def->input(0)) == 1) {
best_mem_id = tensor_mem_map_.at(op_def->input(0)).mem_id;
}
......
......@@ -90,7 +90,8 @@ class MemoryOptimizer {
};
public:
static bool IsMemoryReuseOp(const std::string &op_type);
static bool IsMemoryReuseOp(const std::string &op_type,
const MemoryType mem_type);
void UpdateTensorRef(const std::string &tensor_name);
void UpdateTensorRef(const OperatorDef *op_def);
void Optimize(
......
// Copyright 2018 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/ops/opencl/buffer/reshape.h"
#include <vector>
#include "mace/core/operator.h"
namespace mace {
namespace ops {
namespace opencl {
namespace buffer {
MaceStatus ReshapeKernel::Compute(OpContext *context,
const Tensor *input,
const std::vector<index_t> &new_shape,
Tensor *output) {
MACE_UNUSED(context);
output->ReuseTensorBuffer(*input);
output->Reshape(new_shape);
return MaceStatus::MACE_SUCCESS;
}
} // namespace buffer
} // namespace opencl
} // namespace ops
} // namespace mace
// Copyright 2018 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_OPENCL_BUFFER_RESHAPE_H_
#define MACE_OPS_OPENCL_BUFFER_RESHAPE_H_
#include "mace/ops/opencl/reshape.h"
#include <vector>
#include "mace/ops/opencl/helper.h"
namespace mace {
namespace ops {
namespace opencl {
namespace buffer {
class ReshapeKernel : public OpenCLReshapeKernel {
public:
ReshapeKernel() {}
MaceStatus Compute(OpContext *context,
const Tensor *input,
const std::vector<index_t> &new_shape,
Tensor *output) override;
};
} // namespace buffer
} // namespace opencl
} // namespace ops
} // namespace mace
#endif // MACE_OPS_OPENCL_BUFFER_RESHAPE_H_
// Copyright 2018 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/ops/opencl/image/reshape.h"
#include <vector>
#include <memory>
#include "mace/ops/opencl/image/buffer_to_image.h"
#include "mace/ops/opencl/image/image_to_buffer.h"
#include "mace/utils/memory.h"
namespace mace {
namespace ops {
namespace opencl {
namespace image {
ReshapeKernel::ReshapeKernel(OpConstructContext *context) {
i2bkernel_ = make_unique<opencl::image::ImageToBuffer>();
b2ikernel_ = make_unique<opencl::image::BufferToImage>();
inter_buffer_ =
make_unique<Tensor>(context->device()->allocator(), DT_FLOAT);
MACE_CHECK(inter_buffer_ != nullptr);
}
MaceStatus ReshapeKernel::Compute(OpContext *context,
const Tensor *input,
const std::vector<index_t> &new_shape,
Tensor *output) {
MaceStatus succ = i2bkernel_->Compute(context, input,
OpenCLBufferType::IN_OUT_CHANNEL,
0, inter_buffer_.get());
MACE_RETURN_IF_ERROR(succ);
succ = inter_buffer_->Resize(new_shape);
MACE_RETURN_IF_ERROR(succ);
succ = b2ikernel_->Compute(context, inter_buffer_.get(),
OpenCLBufferType::IN_OUT_CHANNEL,
0, output);
MACE_RETURN_IF_ERROR(succ);
return MaceStatus::MACE_SUCCESS;
}
} // namespace image
} // namespace opencl
} // namespace ops
} // namespace mace
// Copyright 2018 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_OPENCL_IMAGE_RESHAPE_H_
#define MACE_OPS_OPENCL_IMAGE_RESHAPE_H_
#include "mace/ops/opencl/reshape.h"
#include <vector>
#include <memory>
#include "mace/core/operator.h"
#include "mace/ops/opencl/helper.h"
#include "mace/ops/opencl/buffer_transform_kernel.h"
namespace mace {
namespace ops {
namespace opencl {
namespace image {
class ReshapeKernel : public OpenCLReshapeKernel {
public:
explicit ReshapeKernel(OpConstructContext *context);
MaceStatus Compute(OpContext *context,
const Tensor *input,
const std::vector<index_t> &new_shape,
Tensor *output) override;
private:
std::unique_ptr<Tensor> inter_buffer_;
std::unique_ptr<ops::OpenCLBufferTransformKernel> i2bkernel_;
std::unique_ptr<ops::OpenCLBufferTransformKernel> b2ikernel_;
};
} // namespace image
} // namespace opencl
} // namespace ops
} // namespace mace
#endif // MACE_OPS_OPENCL_IMAGE_RESHAPE_H_
// Copyright 2018 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_OPENCL_RESHAPE_H_
#define MACE_OPS_OPENCL_RESHAPE_H_
#include <vector>
#include "mace/core/types.h"
#include "mace/public/mace.h"
#include "mace/utils/math.h"
namespace mace {
class OpContext;
class Tensor;
namespace ops {
class OpenCLReshapeKernel {
public:
virtual MaceStatus Compute(OpContext *context,
const Tensor *input,
const std::vector<index_t> &new_shape,
Tensor *output) = 0;
MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLReshapeKernel);
};
} // namespace ops
} // namespace mace
#endif // MACE_OPS_OPENCL_RESHAPE_H_
......@@ -17,10 +17,61 @@
#include "mace/core/operator.h"
#include "mace/utils/math.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/image/reshape.h"
#include "mace/ops/opencl/buffer/reshape.h"
#endif
namespace mace {
namespace ops {
template <DeviceType D, class T>
namespace {
MaceStatus GetOutputShape(const Tensor *input,
const int32_t *shape_data,
const index_t num_dims,
std::vector<index_t> *out_shape) {
MACE_CHECK(input != nullptr && shape_data != nullptr && out_shape != nullptr);
int unknown_idx = -1;
index_t product = 1;
index_t n = 0;
out_shape->resize(num_dims);
for (int i = 0; i < num_dims; ++i) {
if (shape_data[i] == -1) {
MACE_CHECK(unknown_idx == -1, "Only one input size may be -1");
unknown_idx = i;
(*out_shape)[i] = 1;
} else {
MACE_CHECK(shape_data[i] >= 0, "Shape must be non-negative: ",
shape_data[i]);
if (shape_data[i] == 0) {
MACE_CHECK(i < input->dim_size(),
"dims:0 out of input dims' range.");
n = input->dim(i);
} else {
n = shape_data[i];
}
(*out_shape)[i] = n;
product *= n;
}
}
if (unknown_idx != -1) {
MACE_CHECK(product != 0)
<< "Cannot infer shape if there is zero shape size.";
const index_t missing = input->size() / product;
MACE_CHECK(missing * product == input->size())
<< "Input size not match reshaped tensor size";
(*out_shape)[unknown_idx] = missing;
}
return MaceStatus::MACE_SUCCESS;
}
} // namespace
template<DeviceType D, class T>
class ReshapeOp : public Operation {
public:
explicit ReshapeOp(OpConstructContext *context)
......@@ -31,46 +82,14 @@ class ReshapeOp : public Operation {
MACE_UNUSED(context);
const Tensor *input = this->Input(INPUT);
const Tensor *shape = this->Input(SHAPE);
const index_t num_dims = shape->dim_size() == 0 ? 0 : shape->dim(0);
Tensor::MappingGuard shape_guard(shape);
const int32_t *shape_data = shape->data<int32_t>();
const index_t num_dims = shape->dim_size() == 0 ? 0 : shape->dim(0);
std::vector<index_t> out_shape;
MACE_RETURN_IF_ERROR(
GetOutputShape(input, shape_data, num_dims, &out_shape));
int unknown_idx = -1;
index_t product = 1;
std::vector<index_t> out_shape(num_dims);
index_t n = 0;
for (int i = 0; i < num_dims; ++i) {
if (shape_data[i] == -1) {
MACE_CHECK(unknown_idx == -1, "Only one input size may be -1");
unknown_idx = i;
out_shape[i] = 1;
} else {
MACE_CHECK(shape_data[i] >= 0, "Shape must be non-negative: ",
shape_data[i]);
if (shape_data[i] == 0) {
MACE_CHECK(i < input->dim_size(),
"dims:0 out of input dims' range.");
n = input->dim(i);
} else {
n = shape_data[i];
}
out_shape[i] = n;
product *= n;
}
}
if (unknown_idx != -1) {
MACE_CHECK(product != 0)
<< "Cannot infer shape if there is zero shape size.";
const index_t missing = input->size() / product;
MACE_CHECK(missing * product == input->size())
<< "Input size not match reshaped tensor size";
out_shape[unknown_idx] = missing;
}
Tensor *output = this->Output(OUTPUT);
// NHWC -> NCHW
if (has_df_ && D == DeviceType::CPU
&& out_shape.size() == 4 && shape->is_weight()) {
std::vector<int> dst_dims = {0, 3, 1, 2};
......@@ -79,6 +98,7 @@ class ReshapeOp : public Operation {
out_shape = trans_shape;
}
Tensor *output = this->Output(OUTPUT);
output->ReuseTensorBuffer(*input);
output->Reshape(out_shape);
......@@ -93,11 +113,46 @@ class ReshapeOp : public Operation {
MACE_OP_OUTPUT_TAGS(OUTPUT);
};
#ifdef MACE_ENABLE_OPENCL
template<>
class ReshapeOp<GPU, float> : public Operation {
public:
explicit ReshapeOp(OpConstructContext *context)
: Operation(context),
dim_(Operation::GetRepeatedArgs<int>("dim")) {
if (context->GetOpMemoryType() == MemoryType::GPU_IMAGE) {
kernel_ = make_unique<opencl::image::ReshapeKernel>(context);
} else {
kernel_ = make_unique<opencl::buffer::ReshapeKernel>();
}
}
MaceStatus Run(OpContext *context) override {
const Tensor *input = this->Input(INPUT);
const int32_t *shape_data = dim_.data();
const index_t num_dims = dim_.size();
std::vector<index_t> out_shape;
MACE_RETURN_IF_ERROR(
GetOutputShape(input, shape_data, num_dims, &out_shape));
Tensor *output = this->Output(OUTPUT);
return kernel_->Compute(context, input, out_shape, output);
}
private:
std::vector<int> dim_;
std::unique_ptr<OpenCLReshapeKernel> kernel_;
MACE_OP_INPUT_TAGS(INPUT, SHAPE);
MACE_OP_OUTPUT_TAGS(OUTPUT);
};
#endif
void RegisterReshape(OpRegistryBase *op_registry) {
MACE_REGISTER_OP(op_registry, "Reshape", ReshapeOp,
DeviceType::CPU, float);
MACE_REGISTER_OP(op_registry, "Reshape", ReshapeOp,
DeviceType::CPU, int32_t);
MACE_REGISTER_GPU_OP(op_registry, "Reshape", ReshapeOp);
}
} // namespace ops
......
......@@ -179,6 +179,7 @@ MaceTransposableDataFormatOps = [MaceOp.Activation,
MaceOp.Eltwise,
MaceOp.Pad,
MaceOp.Reduce,
MaceOp.Reshape,
MaceOp.Softmax,
MaceOp.Split,
MaceOp.Squeeze,
......@@ -300,7 +301,7 @@ class TransformerRule(Enum):
FOLD_SQRDIFF_MEAN = 33
TRANSPOSE_MATMUL_WEIGHT = 34
FOLD_EMBEDDING_LOOKUP = 35
TRANSPOSE_CAFFE_RESHAPE_AND_FLATTEN = 36
TRANSPOSE_RESHAPE_AND_FLATTEN = 36
FOLD_FC_RESHAPE = 37
TRANSFORM_CHANNEL_SHUFFLE = 38
UPDATE_DATA_FORMAT = 39
......@@ -517,7 +518,7 @@ class ConverterOption(object):
TransformerRule.TRANSFORM_GLOBAL_POOLING,
TransformerRule.TRANSFORM_LSTMCELL_ZEROSTATE,
TransformerRule.TRANSFORM_BASIC_LSTMCELL,
TransformerRule.TRANSPOSE_CAFFE_RESHAPE_AND_FLATTEN,
TransformerRule.TRANSPOSE_RESHAPE_AND_FLATTEN,
TransformerRule.FOLD_RESHAPE,
TransformerRule.TRANSFORM_MATMUL_TO_FC,
# For StoB -> conv -> BtoS -> BN pattern
......
......@@ -97,11 +97,11 @@ class Transformer(base_converter.ConverterInterface):
self.add_opencl_informations,
TransformerRule.SORT_BY_EXECUTION: self.sort_by_execution,
TransformerRule.UPDATE_DATA_FORMAT: self.update_data_format,
TransformerRule.TRANSPOSE_RESHAPE_AND_FLATTEN:
self.transform_reshape_and_flatten,
TransformerRule.TRANSPOSE_DATA_FORMAT: self.transpose_data_format,
TransformerRule.CHECK_QUANTIZE_INFO:
self.check_quantize_info,
TransformerRule.TRANSPOSE_CAFFE_RESHAPE_AND_FLATTEN:
self.transform_caffe_reshape_and_flatten,
TransformerRule.TRANSFORM_CHANNEL_SHUFFLE:
self.transform_channel_shuffle,
TransformerRule.QUANTIZE_SPECIFIC_OPS_ONLY:
......@@ -1493,6 +1493,13 @@ class Transformer(base_converter.ConverterInterface):
print("Transpose crop args: %s(%s)"
% (op.name, op.type))
self.transpose_shape(offset_arg.ints, [0, 2, 3, 1])
elif op.type == MaceOp.Reshape.name:
for arg in op.arg:
if arg.name == MaceKeyword.mace_dim_str and \
len(arg.ints) == 4 and \
src_data_format == DataFormat.NCHW and \
has_data_format:
self.transpose_shape(arg.ints, [0, 2, 3, 1])
# transpose op output shape
if src_data_format == DataFormat.NCHW and \
......@@ -2048,14 +2055,16 @@ class Transformer(base_converter.ConverterInterface):
arg.i = mace_pb2.GPU_IMAGE if self._option.cl_mem_type == "image"\
else mace_pb2.GPU_BUFFER
def transform_caffe_reshape_and_flatten(self):
def transform_reshape_and_flatten(self):
net = self._model
for op in net.op:
if op.type == MaceOp.Reshape.name and \
len(op.input) == 1:
if op.type != MaceOp.Reshape.name:
continue
dim_arg = ConverterUtil.get_arg(op, MaceKeyword.mace_dim_str)
shape_tensor = None
if len(op.input) == 1:
print("Transform Caffe Reshape")
dims = []
dim_arg = ConverterUtil.get_arg(op, MaceKeyword.mace_dim_str)
axis_arg = ConverterUtil.get_arg(op, MaceKeyword.mace_axis_str)
# transform caffe reshape op
if dim_arg:
......@@ -2080,6 +2089,13 @@ class Transformer(base_converter.ConverterInterface):
mace_check(False, "Only support reshape and flatten")
shape_tensor.int32_data.extend(dims)
op.input.append(shape_tensor.name)
if len(op.input) == 2 and dim_arg is None:
if shape_tensor is None and op.input[1] in self._consts:
shape_tensor = self._consts[op.input[1]]
if shape_tensor is not None:
dim_arg = op.arg.add()
dim_arg.name = MaceKeyword.mace_dim_str
dim_arg.ints.extend(shape_tensor.int32_data)
def fold_fc_reshape(self):
net = self._model
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册