diff --git a/mace/core/runtime/hexagon/hexagon_dsp_wrapper.cc b/mace/core/runtime/hexagon/hexagon_dsp_wrapper.cc index e76c20f4bc48671091429432cdd13d929f3ccf85..9d17ab44c18f406c86d1c23fcd60ca22041cb462 100644 --- a/mace/core/runtime/hexagon/hexagon_dsp_wrapper.cc +++ b/mace/core/runtime/hexagon/hexagon_dsp_wrapper.cc @@ -114,7 +114,8 @@ HexagonDSPWrapper::HexagonDSPWrapper() { if (env_log_execute_time_str.empty()) { log_execute_time_ = false; } else { - log_execute_time_ = static_cast(std::stoi(env_log_execute_time_str)); + log_execute_time_ = static_cast( + std::atoi(env_log_execute_time_str.c_str())); } } diff --git a/mace/core/runtime/opencl/opencl_allocator.cc b/mace/core/runtime/opencl/opencl_allocator.cc index b0b4e9f477d51fea46b90f5f8f75dd236958983f..d999416959f96b58d9fc4c5a288a5cadb6065910 100644 --- a/mace/core/runtime/opencl/opencl_allocator.cc +++ b/mace/core/runtime/opencl/opencl_allocator.cc @@ -74,8 +74,8 @@ MaceStatus OpenCLAllocator::NewImage(const std::vector &image_shape, const DataType dt, void **result) const { MACE_CHECK(image_shape.size() == 2, "Image shape's size must equal 2"); - VLOG(3) << "Allocate OpenCL image: " << image_shape[0] << ", " - << image_shape[1]; + MACE_LATENCY_LOGGER(1, "Allocate OpenCL image: ", + image_shape[0], ", ", image_shape[1]); if (ShouldMockRuntimeFailure()) { return MaceStatus::MACE_OUT_OF_RESOURCES; @@ -109,7 +109,7 @@ MaceStatus OpenCLAllocator::NewImage(const std::vector &image_shape, } void OpenCLAllocator::Delete(void *buffer) const { - VLOG(3) << "Free OpenCL buffer"; + MACE_LATENCY_LOGGER(1, "Free OpenCL buffer"); if (buffer != nullptr) { cl::Buffer *cl_buffer = static_cast(buffer); delete cl_buffer; @@ -117,7 +117,7 @@ void OpenCLAllocator::Delete(void *buffer) const { } void OpenCLAllocator::DeleteImage(void *buffer) const { - VLOG(3) << "Free OpenCL image"; + MACE_LATENCY_LOGGER(1, "Free OpenCL image"); if (buffer != nullptr) { cl::Image2D *cl_image = static_cast(buffer); delete cl_image; @@ -125,7 +125,7 @@ void OpenCLAllocator::DeleteImage(void *buffer) const { } void *OpenCLAllocator::Map(void *buffer, size_t offset, size_t nbytes) const { - VLOG(3) << "Map OpenCL buffer"; + MACE_LATENCY_LOGGER(1, "Map OpenCL buffer"); auto cl_buffer = static_cast(buffer); auto queue = opencl_runtime_->command_queue(); // TODO(heliangliang) Non-blocking call @@ -144,7 +144,7 @@ void *OpenCLAllocator::Map(void *buffer, size_t offset, size_t nbytes) const { void *OpenCLAllocator::MapImage(void *buffer, const std::vector &image_shape, std::vector *mapped_image_pitch) const { - VLOG(3) << "Map OpenCL Image"; + MACE_LATENCY_LOGGER(1, "Map OpenCL Image"); MACE_CHECK(image_shape.size() == 2) << "Just support map 2d image"; auto cl_image = static_cast(buffer); std::array origin = {{0, 0, 0}}; @@ -164,7 +164,7 @@ void *OpenCLAllocator::MapImage(void *buffer, } void OpenCLAllocator::Unmap(void *buffer, void *mapped_ptr) const { - VLOG(3) << "Unmap OpenCL buffer/Image"; + MACE_LATENCY_LOGGER(1, "Unmap OpenCL buffer/Image"); auto cl_buffer = static_cast(buffer); auto queue = opencl_runtime_->command_queue(); cl_int error = queue.enqueueUnmapMemObject(*cl_buffer, mapped_ptr, diff --git a/mace/ops/deconv_2d.cc b/mace/ops/deconv_2d.cc index 3779174d063d29c14e0c33bb101305d88392826a..98298e0c9b709e51c9c8bda1a260bdd6dc8ed6e5 100644 --- a/mace/ops/deconv_2d.cc +++ b/mace/ops/deconv_2d.cc @@ -170,8 +170,8 @@ class Deconv2dOp : public Deconv2dOpBase { template<> class Deconv2dOp : public Deconv2dOpBase { public: - explicit Deconv2dOp(OpConstructContext *context) - : Deconv2dOpBase(context) { + explicit Deconv2dOp(OpConstructContext *context) : Deconv2dOpBase(context), + dim_(Operation::GetRepeatedArgs("dim")) { MemoryType mem_type = MemoryType::GPU_IMAGE; if (context->GetOpMemoryType() == MemoryType::GPU_IMAGE) { kernel_ = make_unique(); @@ -219,12 +219,16 @@ class Deconv2dOp : public Deconv2dOpBase { std::vector out_shape; if (output_shape_tensor) { - Tensor::MappingGuard out_shape_guard(output_shape_tensor); - MACE_CHECK(output_shape_tensor->size() == 4, - "output shape should be 4-dims"); - out_shape = - std::vector(output_shape_tensor->data(), - output_shape_tensor->data() + 4); + if (dim_.size() < 2) { + Tensor::MappingGuard out_shape_guard(output_shape_tensor); + MACE_CHECK(output_shape_tensor->size() == 4, + "output shape should be 4-dims"); + out_shape = + std::vector(output_shape_tensor->data(), + output_shape_tensor->data() + 4); + } else { + out_shape = dim_; + } } std::vector in_paddings; std::vector out_paddings; @@ -249,6 +253,7 @@ class Deconv2dOp : public Deconv2dOpBase { } private: + std::vector dim_; std::unique_ptr kernel_; }; #endif // MACE_ENABLE_OPENCL diff --git a/mace/ops/opencl/image/resize_nearest_neighbor.cc b/mace/ops/opencl/image/resize_nearest_neighbor.cc index afb4b151d4ed0ea6ad17030025bf82123adf5d3d..9f9dd1c8d6a29a5c4f24ba33a350134c282c9a75 100644 --- a/mace/ops/opencl/image/resize_nearest_neighbor.cc +++ b/mace/ops/opencl/image/resize_nearest_neighbor.cc @@ -25,14 +25,22 @@ MaceStatus ResizeNearestNeighborKernel::Compute( OpContext *context, const Tensor *input, const Tensor *size, + const std::vector &dims, Tensor *output) { const index_t batch = input->dim(0); const index_t in_height = input->dim(1); const index_t in_width = input->dim(2); const index_t channels = input->dim(3); - Tensor::MappingGuard size_mapper(size); - const index_t out_height = size->data()[0]; - const index_t out_width = size->data()[1]; + index_t out_height = 0; + index_t out_width = 0; + if (dims.size() < 2) { + Tensor::MappingGuard size_mapper(size); + out_height = size->data()[0]; + out_width = size->data()[1]; + } else { + out_height = dims[0]; + out_width = dims[1]; + } const index_t channel_blocks = RoundUpDiv4(channels); const uint32_t gws[3] = {static_cast(channel_blocks), diff --git a/mace/ops/opencl/image/resize_nearest_neighbor.h b/mace/ops/opencl/image/resize_nearest_neighbor.h index 98ef37b28944521123996fbb38f6688d90a277c0..9e2cec61a822e4e86e139e6bfe299771a94794d6 100644 --- a/mace/ops/opencl/image/resize_nearest_neighbor.h +++ b/mace/ops/opencl/image/resize_nearest_neighbor.h @@ -73,6 +73,7 @@ class ResizeNearestNeighborKernel : public OpenCLResizeNearestNeighborKernel { OpContext *context, const Tensor *input, const Tensor *size, + const std::vector &dims, Tensor *output) override; private: diff --git a/mace/ops/opencl/resize_nearest_neighbor.h b/mace/ops/opencl/resize_nearest_neighbor.h index b0178827ac6190d413b179b4a98c367d1e5f9c37..c98fc955e476e25db9b7a312fec93d5d13879544 100644 --- a/mace/ops/opencl/resize_nearest_neighbor.h +++ b/mace/ops/opencl/resize_nearest_neighbor.h @@ -15,6 +15,8 @@ #ifndef MACE_OPS_OPENCL_RESIZE_NEAREST_NEIGHBOR_H_ #define MACE_OPS_OPENCL_RESIZE_NEAREST_NEIGHBOR_H_ +#include + #include "mace/core/types.h" #include "mace/public/mace.h" #include "mace/utils/math.h" @@ -31,6 +33,7 @@ class OpenCLResizeNearestNeighborKernel { OpContext *context, const Tensor *input, const Tensor *size, + const std::vector &dims, Tensor *output) = 0; MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLResizeNearestNeighborKernel); }; diff --git a/mace/ops/resize_nearest_neighbor.cc b/mace/ops/resize_nearest_neighbor.cc index 89ed473c44e43c5dd4c6415fe2badfd9f738c844..6ac6b9e7157684805a7faf5a45ce9be169ba2af3 100644 --- a/mace/ops/resize_nearest_neighbor.cc +++ b/mace/ops/resize_nearest_neighbor.cc @@ -145,7 +145,7 @@ template<> class ResizeNearestNeighborOp : public Operation { public: explicit ResizeNearestNeighborOp(OpConstructContext *context) - : Operation(context) { + : Operation(context), dim_(Operation::GetRepeatedArgs("dim")) { bool align_corners = Operation::GetOptionalArg( "align_corners", false); if (context->GetOpMemoryType() == MemoryType::GPU_IMAGE) { @@ -163,10 +163,11 @@ class ResizeNearestNeighborOp : public Operation { "input must be 4-dimensional and size must be 1-dimensional.", input->dim_size(), size->dim_size()); - return kernel_->Compute(context, input, size, output); + return kernel_->Compute(context, input, size, dim_, output); } private: + std::vector dim_; std::unique_ptr kernel_; }; #endif // MACE_ENABLE_OPENCL diff --git a/tools/layers_validate.py b/tools/layers_validate.py index 893db5e4b4188433f733d24c7cc64a881a55b91b..b6a63b85b284c14bc84487ed3e19483cac493fc8 100644 --- a/tools/layers_validate.py +++ b/tools/layers_validate.py @@ -144,8 +144,9 @@ def convert(model_file, output_dir, layers): output_info.data_format = data_format output_info.dims.extend(op.output_shape[i].dims) output_info.data_type = mace_pb2.DT_FLOAT - output_info.scale = op.quantize_info[0].scale - output_info.zero_point = op.quantize_info[0].zero_point + if is_quantize: + output_info.scale = op.quantize_info[0].scale + output_info.zero_point = op.quantize_info[0].zero_point # modify output op if is_quantize: output_name = op.output[i] diff --git a/tools/python/transform/base_converter.py b/tools/python/transform/base_converter.py index 6db141cedc91e4141984bf2cf2fddbac6524af14..524027999983fcfb544182815e2217a56fe0d6e4 100644 --- a/tools/python/transform/base_converter.py +++ b/tools/python/transform/base_converter.py @@ -324,6 +324,7 @@ class TransformerRule(Enum): FP16_MATMUL_WEIGHT = 41 FP16_GATHER_WEIGHT = 42 QUANTIZE_LARGE_WEIGHTS = 43 + TRANSPOSE_SHAPE_TENSOR_TO_PARAM = 44 class ConverterInterface(object): @@ -534,6 +535,7 @@ class ConverterOption(object): TransformerRule.TRANSFORM_LSTMCELL_ZEROSTATE, TransformerRule.TRANSFORM_BASIC_LSTMCELL, TransformerRule.TRANSPOSE_RESHAPE_AND_FLATTEN, + TransformerRule.TRANSPOSE_SHAPE_TENSOR_TO_PARAM, TransformerRule.FOLD_RESHAPE, TransformerRule.TRANSFORM_MATMUL_TO_FC, # For StoB -> conv -> BtoS -> BN pattern diff --git a/tools/python/transform/transformer.py b/tools/python/transform/transformer.py index 2b67b1c9678332e4ccaa13fb4f40ecec44f2e9a1..4b5b9811344b985dff83b0e1f59fd6139c0b26e1 100644 --- a/tools/python/transform/transformer.py +++ b/tools/python/transform/transformer.py @@ -99,6 +99,8 @@ class Transformer(base_converter.ConverterInterface): TransformerRule.UPDATE_DATA_FORMAT: self.update_data_format, TransformerRule.TRANSPOSE_RESHAPE_AND_FLATTEN: self.transform_reshape_and_flatten, + TransformerRule.TRANSPOSE_SHAPE_TENSOR_TO_PARAM: + self.transform_shape_tensor_to_param, TransformerRule.TRANSPOSE_DATA_FORMAT: self.transpose_data_format, TransformerRule.CHECK_QUANTIZE_INFO: self.check_quantize_info, @@ -2119,9 +2121,21 @@ class Transformer(base_converter.ConverterInterface): mace_check(False, "Only support reshape and flatten") shape_tensor.int32_data.extend(dims) op.input.append(shape_tensor.name) - if len(op.input) == 2 and dim_arg is None: - if shape_tensor is None and op.input[1] in self._consts: - shape_tensor = self._consts[op.input[1]] + + def transform_shape_tensor_to_param(self): + kOpTypeInputIdxMap = { + MaceOp.ResizeNearestNeighbor.name: 1, + MaceOp.Deconv2D.name: 2, + MaceOp.Reshape.name: 1, + } + net = self._model + for op in net.op: + if op.type not in kOpTypeInputIdxMap: + continue + shape_idx = kOpTypeInputIdxMap[op.type] + dim_arg = ConverterUtil.get_arg(op, MaceKeyword.mace_dim_str) + if len(op.input) > shape_idx and dim_arg is None: + shape_tensor = self._consts[op.input[shape_idx]] if shape_tensor is not None: dim_arg = op.arg.add() dim_arg.name = MaceKeyword.mace_dim_str