diff --git a/mace/benchmark/benchmark_model.cc b/mace/benchmark/benchmark_model.cc index 666840e9fe31877d04b7197871775a7b3ed0f3c6..03061f2f7d7afccef9f2cf6e15615dbe640c0a86 100644 --- a/mace/benchmark/benchmark_model.cc +++ b/mace/benchmark/benchmark_model.cc @@ -95,6 +95,8 @@ DataFormat ParseDataFormat(const std::string &data_format_str) { return DataFormat::NHWC; } else if (data_format_str == "NCHW") { return DataFormat::NCHW; + } else if (data_format_str == "OIHW") { + return DataFormat::OIHW; } else { return DataFormat::DF_NONE; } diff --git a/mace/core/memory_optimizer.cc b/mace/core/memory_optimizer.cc index 0b448ac97c508cbc4786823d8318750db3e683d0..7948c816eda56037e41458fd68a1f0692c424800 100644 --- a/mace/core/memory_optimizer.cc +++ b/mace/core/memory_optimizer.cc @@ -61,21 +61,29 @@ void MemoryOptimizer::UpdateTensorRef(const mace::OperatorDef *op_def) { } MemoryBlock MemoryOptimizer::CreateMemoryBlock( - std::vector shape, + const OperatorDef *op_def, + int output_idx, DataType dt, - mace::MemoryType mem_type) { + MemoryType mem_type) { + auto shape = std::vector( + op_def->output_shape(output_idx).dims().begin(), + op_def->output_shape(output_idx).dims().end()); MemoryBlock block; #ifdef MACE_ENABLE_OPENCL if (mem_type == MemoryType::GPU_IMAGE) { + OpenCLBufferType buffer_type = OpenCLBufferType::IN_OUT_CHANNEL; + if (op_def->type() == "BufferTransform") { + buffer_type = static_cast( + ProtoArgHelper::GetOptionalArg( + *op_def, "buffer_type", OpenCLBufferType::IN_OUT_CHANNEL)); + } std::vector image_shape; if (shape.size() == 2) { shape = {shape[0], 1, 1, shape[1]}; } else { MACE_CHECK(shape.size() == 4) << "GPU only support 2D/4D input"; } - OpenCLUtil::CalImage2DShape(shape, - OpenCLBufferType::IN_OUT_CHANNEL, - &image_shape); + OpenCLUtil::CalImage2DShape(shape, buffer_type, &image_shape); block.set_x(image_shape[0]); block.set_y(image_shape[1]); return block; @@ -93,7 +101,7 @@ MemoryBlock MemoryOptimizer::CreateMemoryBlock( void MemoryOptimizer::Optimize( const mace::OperatorDef *op_def, - const std::unordered_map &mem_types) { + const std::unordered_map *mem_types) { MACE_LATENCY_LOGGER(2, "Optimize memory"); if (op_def->output_size() != op_def->output_shape_size()) { VLOG(1) << op_def->name() @@ -127,22 +135,15 @@ void MemoryOptimizer::Optimize( int best_mem_id = -1; MemoryType mem_type = MemoryType::CPU_BUFFER; if (device == DeviceType::GPU) { - mem_type = mem_types.at(op_def->output(i)); + mem_type = mem_types->at(op_def->output(i)); } - auto shape = std::vector( - op_def->output_shape(i).dims().begin(), - op_def->output_shape(i).dims().end()); - MemoryBlock op_mem_block = CreateMemoryBlock(shape, dt, mem_type); + MemoryBlock op_mem_block = CreateMemoryBlock(op_def, i, dt, mem_type); MemoryBlock best_mem_block; if (IsMemoryReuseOp(op_def->type())) { if (tensor_mem_map_.count(op_def->input(0)) == 1) { best_mem_id = tensor_mem_map_.at(op_def->input(0)).mem_id; } } else { - auto shape = std::vector( - op_def->output_shape(i).dims().begin(), - op_def->output_shape(i).dims().end()); - int64_t op_mem_size = op_mem_block.x() * op_mem_block.y(); int64_t best_added_mem_size = LLONG_MAX; int64_t best_wasted_mem_size = LLONG_MAX; diff --git a/mace/core/memory_optimizer.h b/mace/core/memory_optimizer.h index 2b05aa01e4a7944dc3d53c92d34735211203746e..986c5450280184990b426b18d99b886ee6f8fcac 100644 --- a/mace/core/memory_optimizer.h +++ b/mace/core/memory_optimizer.h @@ -92,8 +92,9 @@ class MemoryOptimizer { static bool IsMemoryReuseOp(const std::string &op_type); void UpdateTensorRef(const std::string &tensor_name); void UpdateTensorRef(const OperatorDef *op_def); - void Optimize(const OperatorDef *op_def, - const std::unordered_map &mem_types); + void Optimize( + const OperatorDef *op_def, + const std::unordered_map *mem_types = nullptr); const std::vector &mem_blocks() const; @@ -102,9 +103,11 @@ class MemoryOptimizer { std::string DebugInfo() const; private: - MemoryBlock CreateMemoryBlock(std::vector shape, - DataType dt, - MemoryType mem_type); + MemoryBlock CreateMemoryBlock( + const OperatorDef *op_def, + int output_idx, + DataType dt, + MemoryType mem_type); private: std::unordered_map tensor_ref_count_; diff --git a/mace/core/net.cc b/mace/core/net.cc index 4bcbbb70e5378ff1dd76620e1dedcc61831dd48d..613757b7554a33fa452ccaf2606d95b939055a4e 100644 --- a/mace/core/net.cc +++ b/mace/core/net.cc @@ -38,12 +38,15 @@ namespace { struct InternalOutputInfo { InternalOutputInfo(const MemoryType mem_type, const DataType dtype, + const DataFormat data_format, const std::vector &shape, int op_idx) - : mem_type(mem_type), dtype(dtype), shape(shape), op_idx(op_idx) {} + : mem_type(mem_type), dtype(dtype), data_format(data_format), + shape(shape), op_idx(op_idx) {} MemoryType mem_type; // transformed memory type DataType dtype; + DataFormat data_format; std::vector shape; // tensor shape int op_idx; // operation which generate the tensor }; @@ -132,13 +135,6 @@ SerialNet::SerialNet(const OpRegistryBase *op_registry, target_device->cpu_runtime()->policy(), target_device->cpu_runtime()->use_gemmlowp())) { MACE_LATENCY_LOGGER(1, "Constructing SerialNet"); - // output tensor : related information - std::unordered_map output_map; - // used for memory optimization - std::unordered_map output_mem_map; - std::unordered_set transformed_set; - // add input information - MemoryType target_mem_type; // quantize model flag bool is_quantize_model = IsQuantizedModel(*net_def); // Tensor Shape map @@ -161,7 +157,6 @@ SerialNet::SerialNet(const OpRegistryBase *op_registry, bool has_data_format = false; if (target_device_->device_type() == DeviceType::CPU) { - target_mem_type = MemoryType::CPU_BUFFER; for (auto &input_info : net_def->input_info()) { std::vector input_shape = std::vector(input_info.dims().begin(), @@ -178,26 +173,37 @@ SerialNet::SerialNet(const OpRegistryBase *op_registry, // NHWC -> NCHW input_shape = TransposeShape(input_shape, {0, 3, 1, 2}); + input_data_format = DataFormat::NCHW; } - output_map.emplace(input_info.name(), InternalOutputInfo( - target_mem_type, DataType::DT_FLOAT, input_shape, -1)); } } - #ifdef MACE_ENABLE_OPENCL - else { // GPU NOLINT[readability/braces] + // output tensor : related information + std::unordered_map output_map; + // used for memory optimization + std::unordered_map output_mem_map; + std::unordered_set transformed_set; + // add input information + MemoryType target_mem_type; + // default data format of output tensor + DataFormat default_output_df = DataFormat::DF_NONE; + if (target_device_->device_type() == DeviceType::GPU) { target_mem_type = MemoryType::GPU_BUFFER; for (auto &input_info : net_def->input_info()) { - has_data_format = static_cast( - input_info.data_format()) == NHWC; + DataFormat input_data_format = static_cast( + input_info.data_format()); + has_data_format = input_data_format != DataFormat::DF_NONE; std::vector input_shape = std::vector(input_info.dims().begin(), input_info.dims().end()); // update tensor shape map tensor_shape_map[input_info.name()] = input_shape; output_map.emplace(input_info.name(), InternalOutputInfo( - target_mem_type, DataType::DT_FLOAT, input_shape, -1)); + target_mem_type, DataType::DT_FLOAT, input_data_format, + input_shape, -1)); } + default_output_df = + has_data_format ? DataFormat::NHWC : DataFormat::DF_NONE; } #endif // MACE_ENABLE_OPENCL @@ -242,11 +248,13 @@ SerialNet::SerialNet(const OpRegistryBase *op_registry, << output_info.mem_type << " to " << wanted_in_mem_type << ", from Data Type " << output_info.dtype << " to " - << wanted_in_dt; + << wanted_in_dt << ". with data format " + << output_info.data_format; std::string input_name = op_def->input(i); op_def->set_input(i, t_input_name); auto input_shape = output_info.shape; if (output_info.mem_type == MemoryType::CPU_BUFFER && + output_info.data_format == DataFormat::NCHW && input_shape.size() == 4) { // NCHW -> NHWC input_shape = @@ -254,8 +262,9 @@ SerialNet::SerialNet(const OpRegistryBase *op_registry, {0, 2, 3, 1}); } auto transform_op_def = OpenCLUtil::CreateTransformOpDef( - input_name, input_shape, t_input_name, - wanted_in_dt, wanted_in_mem_type, has_data_format); + input_name, input_shape, t_input_name, wanted_in_dt, + construct_context.GetInputOpenCLBufferType(i), + wanted_in_mem_type, has_data_format); OpConstructContext t_construct_context(ws_); auto transform_op = CreateOperation( op_registry, @@ -295,6 +304,7 @@ SerialNet::SerialNet(const OpRegistryBase *op_registry, InternalOutputInfo( out_mem_type, dt, + default_output_df, op_def->output_shape().empty() ? std::vector() : std::vector( @@ -343,6 +353,7 @@ SerialNet::SerialNet(const OpRegistryBase *op_registry, internal_output_info.shape, output_info.name(), output_info.data_type(), + OpenCLBufferType::IN_OUT_CHANNEL, target_mem_type, output_has_data_format); auto transform_op = CreateOperation( @@ -366,7 +377,11 @@ SerialNet::SerialNet(const OpRegistryBase *op_registry, for (auto &op : operators_) { VLOG(2) << "Operator " << op->debug_def().name() << "<" << op->device_type() << ", " << op->debug_def().type() << ">"; - mem_optimizer->Optimize(op->operator_def().get(), output_mem_map); +#ifdef MACE_ENABLE_OPENCL + mem_optimizer->Optimize(op->operator_def().get(), &output_mem_map); +#else + mem_optimizer->Optimize(op->operator_def().get()); +#endif // MACE_ENABLE_OPENCL } VLOG(1) << mem_optimizer->DebugInfo(); } @@ -448,7 +463,7 @@ MaceStatus SerialNet::Run(RunMetadata *run_metadata) { bool transpose_a = op->GetOptionalArg("transpose_a", false); kernels = op->Input(0)->shape(); if (transpose_a) { - std::swap(kernels[kernels.size()-2], kernels[kernels.size()-1]); + std::swap(kernels[kernels.size() - 2], kernels[kernels.size() - 1]); } } else if (type.compare("FullyConnected") == 0) { kernels = op->Input(1)->shape(); @@ -494,16 +509,16 @@ MaceStatus SerialNet::Run(RunMetadata *run_metadata) { Tensor::MappingGuard guard(op->Output(i)); auto *output_data = op->Output(i)->data(); for (index_t j = 0; j < op->Output(i)->size(); ++j) { - int index = static_cast((output_data[j] - min_v) / bin_v); - if (index < 0) - index = 0; - else if (index > bin_size-1) - index = bin_size-1; - bin_distribution[index]++; + int index = static_cast((output_data[j] - min_v) / bin_v); + if (index < 0) + index = 0; + else if (index > bin_size - 1) + index = bin_size - 1; + bin_distribution[index]++; } LOG(INFO) << "Tensor range @@" << op->debug_def().output(i) - << "@@" << min_v << "," << max_v<< "@@" - << MakeString(bin_distribution); + << "@@" << min_v << "," << max_v << "@@" + << MakeString(bin_distribution); } } } diff --git a/mace/core/operator.cc b/mace/core/operator.cc index 319b0548d6b75794c3061862ee62599af38cdd7f..8fae1bd8a710f0fb9f6536960ae195ab6b94cba1 100644 --- a/mace/core/operator.cc +++ b/mace/core/operator.cc @@ -86,6 +86,27 @@ DataType OpConstructContext::GetInputDataType(size_t idx) const { return input_data_types_[idx]; } +#ifdef MACE_ENABLE_OPENCL +void OpConstructContext::SetInputOpenCLBufferType( + size_t idx, OpenCLBufferType buffer_type) { + if (input_opencl_buffer_types_.empty()) { + // the default inputs' memory types are same as output memory type. + input_opencl_buffer_types_.resize(operator_def_->input_size(), + OpenCLBufferType::IN_OUT_CHANNEL); + } + MACE_CHECK(idx < input_opencl_buffer_types_.size()); + input_opencl_buffer_types_[idx] = buffer_type; +} +OpenCLBufferType OpConstructContext::GetInputOpenCLBufferType( + size_t idx) const { + if (input_opencl_buffer_types_.empty()) { + return OpenCLBufferType::IN_OUT_CHANNEL; + } + MACE_CHECK(idx < input_opencl_buffer_types_.size()); + return input_opencl_buffer_types_[idx]; +} +#endif // MACE_ENABLE_OPENCL + OpInitContext::OpInitContext(Workspace *ws, Device *device) : ws_(ws), device_(device) {} diff --git a/mace/core/operator.h b/mace/core/operator.h index 03a0f0749954b052b9b2dae558c0fed36612f5e5..e59af9ab166a5ace99bc7cc59b17a025cc0b1645 100644 --- a/mace/core/operator.h +++ b/mace/core/operator.h @@ -26,6 +26,9 @@ #include "mace/core/tensor.h" #include "mace/core/workspace.h" #include "mace/proto/mace.pb.h" +#ifdef MACE_ENABLE_OPENCL +#include "mace/core/runtime/opencl/opencl_util.h" +#endif // MACE_ENABLE_OPENCL namespace mace { @@ -72,6 +75,11 @@ class OpConstructContext { DataType GetInputDataType(size_t idx) const; +#ifdef MACE_ENABLE_OPENCL + void SetInputOpenCLBufferType(size_t idx, OpenCLBufferType buffer_type); + OpenCLBufferType GetInputOpenCLBufferType(size_t idx) const; +#endif // MACE_ENABLE_OPENCL + private: std::shared_ptr operator_def_; Workspace *ws_; @@ -81,6 +89,9 @@ class OpConstructContext { std::vector input_mem_types_; std::vector input_data_types_; MemoryType output_mem_type_; // there is only one output memory type now. +#ifdef MACE_ENABLE_OPENCL + std::vector input_opencl_buffer_types_; +#endif // MACE_ENABLE_OPENCL }; // memory_optimizer, device diff --git a/mace/core/runtime/opencl/opencl_util.cc b/mace/core/runtime/opencl/opencl_util.cc index ff409e571bab3d92febd5cd8918c9ca4072d40cb..764084385375d315690a1f913e92b7c96825ae63 100644 --- a/mace/core/runtime/opencl/opencl_util.cc +++ b/mace/core/runtime/opencl/opencl_util.cc @@ -151,6 +151,7 @@ std::shared_ptr OpenCLUtil::CreateTransformOpDef( const std::vector &input_shape, const std::string &output_name, const mace::DataType dt, + const OpenCLBufferType buffer_type, const mace::MemoryType mem_type, bool has_data_format) { std::unique_ptr op(new OperatorDef); @@ -161,7 +162,7 @@ std::shared_ptr OpenCLUtil::CreateTransformOpDef( op->add_output(output_name); Argument *arg = op->add_arg(); arg->set_name("buffer_type"); - arg->set_i(static_cast(OpenCLBufferType::IN_OUT_CHANNEL)); + arg->set_i(static_cast(buffer_type)); arg = op->add_arg(); arg->set_name("mem_type"); arg->set_i(static_cast(mem_type)); diff --git a/mace/core/runtime/opencl/opencl_util.h b/mace/core/runtime/opencl/opencl_util.h index cae05f831cc5198a3e1e88a1edfc1222e1f4a15f..ea0e239ee17c6826f23a73412ebc0a71d6dd25cf 100644 --- a/mace/core/runtime/opencl/opencl_util.h +++ b/mace/core/runtime/opencl/opencl_util.h @@ -48,6 +48,7 @@ class OpenCLUtil { const std::vector &input_shape, const std::string &output_name, const mace::DataType dt, + const OpenCLBufferType buffer_type, const MemoryType mem_type, bool has_data_format); }; diff --git a/mace/core/tensor.h b/mace/core/tensor.h index ae999b05df7b7cc1df91cf4a716ea1b48da1b7e8..9c5e1b469013ea2981b65938094f0c93f65c78f9 100644 --- a/mace/core/tensor.h +++ b/mace/core/tensor.h @@ -97,8 +97,6 @@ inline std::ostream &operator<<(std::ostream &os, unsigned char c) { } } // namespace numerical_chars -enum FilterDataFormat { HWOI = 100, OIHW = 101, HWIO = 102, OHWI = 103 }; - class Tensor { public: Tensor(Allocator *alloc, DataType type, diff --git a/mace/core/workspace.cc b/mace/core/workspace.cc index 57844248d57137240382b64752dba309393b7065..8009fda180a7d186ec9e27b0c0751cd34eeb0a11 100644 --- a/mace/core/workspace.cc +++ b/mace/core/workspace.cc @@ -68,7 +68,7 @@ const Tensor *Workspace::GetTensor(const std::string &name) const { if (tensor_map_.count(name)) { return tensor_map_.at(name).get(); } else { - LOG(WARNING) << "Tensor " << name << " does not exist."; + VLOG(1) << "Tensor " << name << " does not exist."; } return nullptr; } diff --git a/mace/examples/cli/example.cc b/mace/examples/cli/example.cc index 8ab0b0d4a6852e37af8d2bf894e494d17792724b..d1ea38d105e40124aa5e4f29d032b1743aafe0ab 100644 --- a/mace/examples/cli/example.cc +++ b/mace/examples/cli/example.cc @@ -175,6 +175,8 @@ DataFormat ParseDataFormat(const std::string &data_format_str) { return DataFormat::NHWC; } else if (data_format_str == "NCHW") { return DataFormat::NCHW; + } else if (data_format_str == "OIHW") { + return DataFormat::OIHW; } else { return DataFormat::DF_NONE; } diff --git a/mace/libmace/mace.cc b/mace/libmace/mace.cc index 2d0acb450aadf9d60b285e4fb71a748c1870fa4f..7024731e767ae4831ba87b2141c462165d6c56b4 100644 --- a/mace/libmace/mace.cc +++ b/mace/libmace/mace.cc @@ -291,6 +291,9 @@ MaceTensor::MaceTensor(const std::vector &shape, std::shared_ptr data, const DataFormat format) { MACE_CHECK_NOTNULL(data.get()); + MACE_CHECK(format == DataFormat::NHWC || format == DataFormat::NCHW + || format == OIHW, + "MACE only support NHWC, NCHW and OIHW formats of input now."); impl_ = make_unique(); impl_->shape = shape; impl_->data = data; diff --git a/mace/ops/common/conv_pool_2d_util.cc b/mace/ops/common/conv_pool_2d_util.cc index 8634cf2cb8333d03a97b131692c84d5f5249cab5..ade33c59002d3924123eede8687269de3abb2119 100644 --- a/mace/ops/common/conv_pool_2d_util.cc +++ b/mace/ops/common/conv_pool_2d_util.cc @@ -24,7 +24,7 @@ namespace ops { void CalcPaddingAndOutputSize(const index_t *input_shape, const DataFormat input_format, const index_t *filter_shape, - const FilterDataFormat filter_format, + const DataFormat filter_format, const int *dilations, const int *strides, Padding padding, @@ -137,7 +137,7 @@ void CalcNHWCPaddingAndOutputSize(const index_t *input_shape, // NHWC void CalcOutputSize(const index_t *input_shape, const DataFormat input_format, const index_t *filter_shape, - const FilterDataFormat filter_format, + const DataFormat filter_format, const int *padding_size, const int *dilations, const int *strides, diff --git a/mace/ops/common/conv_pool_2d_util.h b/mace/ops/common/conv_pool_2d_util.h index db359ee92b02a88c48555ada851047f3ebe7f2e5..e8d0d335f1e0900cf1c265817cbcd73dd63c66b3 100644 --- a/mace/ops/common/conv_pool_2d_util.h +++ b/mace/ops/common/conv_pool_2d_util.h @@ -35,7 +35,7 @@ namespace ops { void CalcPaddingAndOutputSize(const index_t *input_shape, const DataFormat input_format, const index_t *filter_shape, - const FilterDataFormat filter_format, + const DataFormat filter_format, const int *dilations, const int *strides, Padding padding, @@ -61,7 +61,7 @@ void CalcNHWCPaddingAndOutputSize(const index_t *input_shape, void CalcOutputSize(const index_t *input_shape, const DataFormat input_format, const index_t *filter_shape, - const FilterDataFormat filter_format, + const DataFormat filter_format, const int *padding_size, const int *dilations, const int *strides, diff --git a/mace/ops/depthwise_conv2d.cc b/mace/ops/depthwise_conv2d.cc index 22130afc966c4141e8ee18245c78a9b2cbb12afc..22b13c268de07a10ffc4dfc06fdad69c953a37f5 100644 --- a/mace/ops/depthwise_conv2d.cc +++ b/mace/ops/depthwise_conv2d.cc @@ -500,13 +500,19 @@ class DepthwiseConv2dOp : public DepthwiseConv2dOpBase { kernel_ = make_unique>(); } context->set_output_mem_type(mem_type); - // Transform filter tensor to target format - MACE_CHECK(TransformFilter( - context, - operator_def_.get(), - 1, - OpenCLBufferType::DW_CONV2D_FILTER, - mem_type) == MaceStatus::MACE_SUCCESS); + Tensor *filter_tensor = context->workspace()->GetTensor( + operator_def_->input(1)); + if (filter_tensor != nullptr && filter_tensor->is_weight()) { + // Transform filter tensor to target format + MACE_CHECK(TransformFilter( + context, + operator_def_.get(), + 1, + OpenCLBufferType::DW_CONV2D_FILTER, + mem_type) == MaceStatus::MACE_SUCCESS); + } else { + context->SetInputOpenCLBufferType(1, OpenCLBufferType::DW_CONV2D_FILTER); + } if (operator_def_->input_size() > 2) { MACE_CHECK(TransformFilter( context, operator_def_.get(), 2, OpenCLBufferType::ARGUMENT, mem_type) diff --git a/mace/ops/ops_test_util.h b/mace/ops/ops_test_util.h index f60fff1d39cb86e51b8c63dae1f94fc4089efb41..034f5ddafdb56a580fd938fcd03517a069fb876b 100644 --- a/mace/ops/ops_test_util.h +++ b/mace/ops/ops_test_util.h @@ -259,9 +259,9 @@ class OpsTestNet { template void TransformFilterDataFormat(const std::string &src_name, - const FilterDataFormat src_format, + const DataFormat src_format, const std::string &dst_name, - const FilterDataFormat dst_format) { + const DataFormat dst_format) { Tensor *input = ws_.GetTensor(src_name); Tensor *output = ws_.CreateTensor( dst_name, diff --git a/mace/public/mace.h b/mace/public/mace.h index 575ca32877374badf249a3b7bcad89f2e740793e..00ac4c2cf9bcbb0b8a6f07a0c9fd1e167da2b757 100644 --- a/mace/public/mace.h +++ b/mace/public/mace.h @@ -34,7 +34,10 @@ class NetDef; enum DeviceType { CPU = 0, GPU = 2, HEXAGON = 3 }; -enum DataFormat { DF_NONE = 0, NHWC = 1, NCHW = 2}; +enum DataFormat { + DF_NONE = 0, NHWC = 1, NCHW = 2, + HWOI = 100, OIHW = 101, HWIO = 102, OHWI = 103 +}; enum GPUPerfHint { PERF_DEFAULT = 0, diff --git a/mace/python/tools/converter.py b/mace/python/tools/converter.py index 0d309d40ade3bd50e660fdeae7ed71ee7055fb3d..0cc66b653fd9fc6b600c2fc5629fd10a775784a1 100644 --- a/mace/python/tools/converter.py +++ b/mace/python/tools/converter.py @@ -43,6 +43,7 @@ data_format_map = { 'NONE': cvt.DataFormat.DF_NONE, 'NHWC': cvt.DataFormat.NHWC, 'NCHW': cvt.DataFormat.NCHW, + 'OIHW': cvt.DataFormat.OIHW, } diff --git a/mace/python/tools/converter_tool/base_converter.py b/mace/python/tools/converter_tool/base_converter.py index 4d2b841f58df13f839aae6a4d9391c4bc0f803a4..8d478c4797c9794aa5fa03ac27b64d0390a1bda6 100644 --- a/mace/python/tools/converter_tool/base_converter.py +++ b/mace/python/tools/converter_tool/base_converter.py @@ -28,9 +28,6 @@ class DataFormat(Enum): DF_NONE = 0 NHWC = 1 NCHW = 2 - - -class FilterFormat(Enum): HWIO = 100 OIHW = 101 HWOI = 102 @@ -571,11 +568,11 @@ class ConverterUtil(object): arg = ConverterUtil.get_arg(net, MaceKeyword.mace_filter_format_str) if arg is None: return None - elif arg.i == FilterFormat.HWIO.value: - return FilterFormat.HWIO - elif arg.i == FilterFormat.HWOI.value: - return FilterFormat.HWOI - elif arg.i == FilterFormat.OIHW.value: - return FilterFormat.OIHW + elif arg.i == DataFormat.HWIO.value: + return DataFormat.HWIO + elif arg.i == DataFormat.HWOI.value: + return DataFormat.HWOI + elif arg.i == DataFormat.OIHW.value: + return DataFormat.OIHW else: return None diff --git a/mace/python/tools/converter_tool/caffe_converter.py b/mace/python/tools/converter_tool/caffe_converter.py index 3231ea9fa58b9f6e43470250c2997f37b3ed87c3..db9e0da90c3058f3214989beb45fb43205e5882f 100644 --- a/mace/python/tools/converter_tool/caffe_converter.py +++ b/mace/python/tools/converter_tool/caffe_converter.py @@ -27,7 +27,6 @@ from mace.python.tools.converter_tool.base_converter import ActivationType from mace.python.tools.converter_tool.base_converter import EltwiseType from mace.python.tools.converter_tool.base_converter import FrameworkType from mace.python.tools.converter_tool.base_converter import DataFormat -from mace.python.tools.converter_tool.base_converter import FilterFormat from mace.python.tools.converter_tool.base_converter import MaceOp from mace.python.tools.converter_tool.base_converter import MaceKeyword from mace.python.tools.converter_tool.base_converter import ConverterUtil @@ -194,7 +193,7 @@ class CaffeConverter(base_converter.ConverterInterface): } self._option = option self._mace_net_def = mace_pb2.NetDef() - ConverterUtil.set_filter_format(self._mace_net_def, FilterFormat.OIHW) + ConverterUtil.set_filter_format(self._mace_net_def, DataFormat.OIHW) self._caffe_net = CaffeNet() self._caffe_layers = caffe_pb2.NetParameter() caffe_weights = caffe_pb2.NetParameter() diff --git a/mace/python/tools/converter_tool/onnx_converter.py b/mace/python/tools/converter_tool/onnx_converter.py index 6befa478318ec9f3fb5644317acf3b3f38ecb6bc..4fa41b38afacc76d41831fb080129763cc687379 100644 --- a/mace/python/tools/converter_tool/onnx_converter.py +++ b/mace/python/tools/converter_tool/onnx_converter.py @@ -27,7 +27,6 @@ from mace.python.tools.converter_tool.base_converter import ReduceType from mace.python.tools.converter_tool.base_converter import FrameworkType from mace.python.tools.converter_tool.base_converter import RoundMode from mace.python.tools.converter_tool.base_converter import DataFormat -from mace.python.tools.converter_tool.base_converter import FilterFormat from mace.python.tools.converter_tool.base_converter import MaceOp from mace.python.tools.converter_tool.base_converter import MaceKeyword from mace.python.tools.converter_tool.base_converter import ConverterUtil @@ -370,7 +369,7 @@ class OnnxConverter(base_converter.ConverterInterface): self._option = option self._mace_net_def = mace_pb2.NetDef() self._data_format = DataFormat.NCHW - ConverterUtil.set_filter_format(self._mace_net_def, FilterFormat.OIHW) + ConverterUtil.set_filter_format(self._mace_net_def, DataFormat.OIHW) onnx_model = onnx.load(src_model_file) ir_version = onnx_model.ir_version diff --git a/mace/python/tools/converter_tool/shape_inference.py b/mace/python/tools/converter_tool/shape_inference.py index 3e472216efa3651a663a32ee2db729497d059ff2..61d622016eb8ac7d2b1b56be8f02f3093d060306 100644 --- a/mace/python/tools/converter_tool/shape_inference.py +++ b/mace/python/tools/converter_tool/shape_inference.py @@ -20,7 +20,6 @@ import six from mace.python.tools.converter_tool.transformer import Transformer from mace.python.tools.converter_tool.base_converter import DataFormat -from mace.python.tools.converter_tool.base_converter import FilterFormat from mace.python.tools.converter_tool.base_converter import MaceOp from mace.python.tools.converter_tool.base_converter import MaceKeyword from mace.python.tools.converter_tool.base_converter import ConverterUtil @@ -129,7 +128,7 @@ class ShapeInference(object): output_shape[0] = input_shape[0] if ConverterUtil.data_format(op) == DataFormat.NCHW \ - and ConverterUtil.filter_format(self._net) == FilterFormat.OIHW: # noqa + and ConverterUtil.filter_format(self._net) == DataFormat.OIHW: # noqa # filter format: OIHW if op.type == MaceOp.DepthwiseConv2d.name: output_shape[1] = filter_shape[0] * filter_shape[1] @@ -170,7 +169,7 @@ class ShapeInference(object): MaceKeyword.mace_group_str) output_shape[0] = input_shape[0] if ConverterUtil.data_format(op) == DataFormat.NCHW \ - and ConverterUtil.filter_format(self._net) == FilterFormat.OIHW: # noqa + and ConverterUtil.filter_format(self._net) == DataFormat.OIHW: # noqa # filter format: IOHW output_shape[1] = filter_shape[1] if group_arg is not None and group_arg.i > 1: diff --git a/mace/python/tools/converter_tool/tensorflow_converter.py b/mace/python/tools/converter_tool/tensorflow_converter.py index eddb8d8685972a8dbc05070f444653288446657a..4f07b5eb11b1139f24c8d8531adc2623d60bf855 100644 --- a/mace/python/tools/converter_tool/tensorflow_converter.py +++ b/mace/python/tools/converter_tool/tensorflow_converter.py @@ -29,7 +29,6 @@ from mace.python.tools.converter_tool.base_converter import PadType from mace.python.tools.converter_tool.base_converter import FrameworkType from mace.python.tools.converter_tool.base_converter import ReduceType from mace.python.tools.converter_tool.base_converter import DataFormat -from mace.python.tools.converter_tool.base_converter import FilterFormat from mace.python.tools.converter_tool.base_converter import MaceOp from mace.python.tools.converter_tool.base_converter import MaceKeyword from mace.python.tools.converter_tool.base_converter import ConverterUtil @@ -280,7 +279,7 @@ class TensorflowConverter(base_converter.ConverterInterface): } self._option = option self._mace_net_def = mace_pb2.NetDef() - ConverterUtil.set_filter_format(self._mace_net_def, FilterFormat.HWIO) + ConverterUtil.set_filter_format(self._mace_net_def, DataFormat.HWIO) # import tensorflow graph tf_graph_def = tf.GraphDef() @@ -347,13 +346,19 @@ class TensorflowConverter(base_converter.ConverterInterface): for input_node in self._option.input_nodes.values(): if node.name == input_node.name \ or node.name + ':0' == input_node.name: + input_shape = input_node.shape + if input_node.data_format == DataFormat.OIHW \ + and len(input_shape) == 4: + # OIHW -> HWIO + input_shape = [input_shape[2], input_shape[3], + input_shape[1], input_shape[0]] del node.attr['shape'].shape.dim[:] node.attr['shape'].shape.dim.extend([ tensor_shape_pb2.TensorShapeProto.Dim(size=i) for i in - input_node.shape + input_shape ]) self._placeholders[node.name + ':0'] = \ - np.zeros(shape=input_node.shape, dtype=float) + np.zeros(shape=input_shape, dtype=float) @staticmethod def get_scope(tensor_name): diff --git a/mace/python/tools/converter_tool/transformer.py b/mace/python/tools/converter_tool/transformer.py index 91ffee89be82c6c621d9b9ae599346ef9d62149d..7b6fba839893b7cb60b1af7d16a71c362e1e748e 100644 --- a/mace/python/tools/converter_tool/transformer.py +++ b/mace/python/tools/converter_tool/transformer.py @@ -25,7 +25,6 @@ from mace.python.tools.converter_tool.base_converter import DataFormat from mace.python.tools.converter_tool.base_converter import DeviceType from mace.python.tools.converter_tool.base_converter import EltwiseType from mace.python.tools.converter_tool.base_converter import FrameworkType -from mace.python.tools.converter_tool.base_converter import FilterFormat from mace.python.tools.converter_tool.base_converter import MaceKeyword from mace.python.tools.converter_tool.base_converter import MaceOp from mace.python.tools.converter_tool.base_converter import PaddingMode @@ -149,12 +148,12 @@ class Transformer(base_converter.ConverterInterface): filter_format_value = ConverterUtil.get_arg(self._model, MaceKeyword.mace_filter_format_str).i # noqa filter_format = None - if filter_format_value == FilterFormat.HWIO.value: - filter_format = FilterFormat.HWIO - elif filter_format_value == FilterFormat.OIHW.value: - filter_format = FilterFormat.OIHW - elif filter_format_value == FilterFormat.HWOI.value: - filter_format = FilterFormat.HWOI + if filter_format_value == DataFormat.HWIO.value: + filter_format = DataFormat.HWIO + elif filter_format_value == DataFormat.OIHW.value: + filter_format = DataFormat.OIHW + elif filter_format_value == DataFormat.HWOI.value: + filter_format = DataFormat.HWOI else: mace_check(False, "filter format %d not supported" % filter_format_value) @@ -614,14 +613,14 @@ class Transformer(base_converter.ConverterInterface): offset = self._consts[consumer_op.input[2]] idx = 0 filter_format = self.filter_format() - if filter_format == FilterFormat.HWIO: + if filter_format == DataFormat.HWIO: for hwi in six.moves.range(filter.dims[0] * filter.dims[1] * filter.dims[2]): for o in six.moves.range(filter.dims[3]): filter.float_data[idx] *= scale.float_data[o] idx += 1 - elif filter_format == FilterFormat.OIHW: + elif filter_format == DataFormat.OIHW: for o in six.moves.range(filter.dims[0]): for hwi in six.moves.range(filter.dims[1] * filter.dims[2] @@ -673,7 +672,7 @@ class Transformer(base_converter.ConverterInterface): idx = 0 filter_format = self.filter_format() # in deconv op O and I channel is switched - if filter_format == FilterFormat.HWIO: + if filter_format == DataFormat.HWIO: for hw in six.moves.range(filter.dims[0] * filter.dims[1]): for o in six.moves.range(filter.dims[2]): @@ -681,7 +680,7 @@ class Transformer(base_converter.ConverterInterface): filter.float_data[idx] *=\ scale.float_data[o] idx += 1 - elif filter_format == FilterFormat.OIHW: + elif filter_format == DataFormat.OIHW: for i in six.moves.range(filter.dims[0]): for o in six.moves.range(filter.dims[1]): for hw in six.moves.range(filter.dims[2] @@ -736,7 +735,7 @@ class Transformer(base_converter.ConverterInterface): idx = 0 filter_format = self.filter_format() - if filter_format == FilterFormat.HWIO: + if filter_format == DataFormat.HWIO: for hw in six.moves.range(filter.dims[0] * filter.dims[1]): for i in six.moves.range(filter.dims[2]): @@ -744,7 +743,7 @@ class Transformer(base_converter.ConverterInterface): filter.float_data[idx] *= scale.float_data[ i * filter.dims[3] + o] idx += 1 - elif filter_format == FilterFormat.OIHW: + elif filter_format == DataFormat.OIHW: for o in six.moves.range(filter.dims[0]): for i in six.moves.range(filter.dims[1]): for hw in six.moves.range(filter.dims[2] @@ -791,17 +790,17 @@ class Transformer(base_converter.ConverterInterface): @staticmethod def sort_filter_shape(filter_shape, filter_format): """Return filter shape in HWIO order""" - if filter_format == FilterFormat.HWIO: + if filter_format == DataFormat.HWIO: filter_height = filter_shape[0] filter_width = filter_shape[1] in_channels = filter_shape[2] out_channels = filter_shape[3] - elif filter_format == FilterFormat.OIHW: + elif filter_format == DataFormat.OIHW: filter_height = filter_shape[2] filter_width = filter_shape[3] in_channels = filter_shape[1] out_channels = filter_shape[0] - elif filter_format == FilterFormat.HWOI: + elif filter_format == DataFormat.HWOI: filter_height = filter_shape[0] filter_width = filter_shape[1] in_channels = filter_shape[3] @@ -1006,9 +1005,9 @@ class Transformer(base_converter.ConverterInterface): input_shape = list(input_op.output_shape[0].dims) weight.dims[:] = [weight.dims[0]] + input_shape[1:] if len(input_shape) == 2: - if filter_format == FilterFormat.HWIO: + if filter_format == DataFormat.HWIO: weight.dims[:] = [1, 1] + weight.dims[:] - elif filter_format == FilterFormat.OIHW: + elif filter_format == DataFormat.OIHW: weight.dims[:] = weight.dims[:] + [1, 1] else: mace_check("FC does not support filter format %s", @@ -1141,9 +1140,9 @@ class Transformer(base_converter.ConverterInterface): if self._option.quantize and \ self._option.device == DeviceType.CPU.value: print("Transpose filters to OHWI") - if filter_format == FilterFormat.HWIO: + if filter_format == DataFormat.HWIO: transpose_order = [3, 0, 1, 2] - elif filter_format == FilterFormat.OIHW: + elif filter_format == DataFormat.OIHW: transpose_order = [0, 2, 3, 1] else: mace_check("Quantize model does not support conv " @@ -1172,20 +1171,21 @@ class Transformer(base_converter.ConverterInterface): filter.dims[:] = filter_data.shape transposed_deconv_filter.add(op.input[1]) - self.set_filter_format(FilterFormat.OHWI) + self.set_filter_format(DataFormat.OHWI) elif self._option.quantize and \ self._option.device == DeviceType.HEXAGON.value: print("Transpose filters to HWIO/HWIM") - mace_check(filter_format == FilterFormat.HWIO, + mace_check(filter_format == DataFormat.HWIO, "HEXAGON only support HWIO/HWIM filter format.") else: print("Transpose filters to OIHW/MIHW") # transpose filter to OIHW/MIHW for tensorflow (HWIO/HWIM) - if filter_format == FilterFormat.HWIO: + if filter_format == DataFormat.HWIO: for op in net.op: if (op.type == MaceOp.Conv2D.name or op.type == MaceOp.Deconv2D.name or op.type == MaceOp.DepthwiseConv2d.name) \ + and op.input[1] in self._consts \ and op.input[1] not in transposed_filter: filter = self._consts[op.input[1]] filter_data = np.array(filter.float_data).reshape( @@ -1215,7 +1215,7 @@ class Transformer(base_converter.ConverterInterface): weight.dims[:] = weight_data.shape transposed_filter.add(op.input[1]) - self.set_filter_format(FilterFormat.OIHW) + self.set_filter_format(DataFormat.OIHW) # deconv's filter's output channel and input channel is reversed for op in net.op: if op.type in [MaceOp.Deconv2D.name, @@ -1296,7 +1296,7 @@ class Transformer(base_converter.ConverterInterface): len(op.input) == 2 and \ op.input[1] in self._consts and \ len(op.output_shape[0].dims) == 2 and \ - filter_format == FilterFormat.HWIO and \ + filter_format == DataFormat.HWIO and \ op.input[0] in self._producer: input_op = self._producer[op.input[0]] input_shape = input_op.output_shape[0].dims @@ -1329,7 +1329,7 @@ class Transformer(base_converter.ConverterInterface): # transform `fc1(2D) -> matmul` to `fc1(2D) -> fc1(2D)` if op.type == MaceOp.MatMul.name and \ - filter_format == FilterFormat.HWIO and \ + filter_format == DataFormat.HWIO and \ op.input[1] in self._consts: producer = self._producer[op.input[0]] weight = self._consts[op.input[1]] diff --git a/mace/tools/validation/mace_run.cc b/mace/tools/validation/mace_run.cc index b6ee678600b770cd3cb2793200c51d2833a5c093..f480f7596afd82f2172e87d35c64733bc7d19820 100644 --- a/mace/tools/validation/mace_run.cc +++ b/mace/tools/validation/mace_run.cc @@ -108,6 +108,8 @@ DataFormat ParseDataFormat(const std::string &data_format_str) { return DataFormat::NHWC; } else if (data_format_str == "NCHW") { return DataFormat::NCHW; + } else if (data_format_str == "OIHW") { + return DataFormat::OIHW; } else { return DataFormat::DF_NONE; } diff --git a/tools/common.py b/tools/common.py index aa4bf9cd1cdf2780ac23b4fc229d4ebefbe409e9..b02e68a9b062237000a40fafa499a3d4f478d090 100644 --- a/tools/common.py +++ b/tools/common.py @@ -135,6 +135,7 @@ class DataFormat(object): NONE = "NONE" NHWC = "NHWC" NCHW = "NCHW" + OIHW = "OIHW" ################################ diff --git a/tools/converter.py b/tools/converter.py index c1afcb58354a813164da8c36b214be19a501dff4..f9a963403af9ffe850ac5ec9d9ecdbac01670cab 100644 --- a/tools/converter.py +++ b/tools/converter.py @@ -97,6 +97,7 @@ DataFormatStrs = [ "NONE", "NHWC", "NCHW", + "OIHW", ] diff --git a/tools/validate.py b/tools/validate.py index d4811ffa1e847c99d150c08fcfc3ef5f3baf2077..f970024e213cb509cbffe6f55b8f3bcd1a6a4e5c 100644 --- a/tools/validate.py +++ b/tools/validate.py @@ -178,6 +178,10 @@ def validate_tf_model(platform, device_type, model_file, if input_data_formats[i] == common.DataFormat.NCHW and\ len(input_shapes[i]) == 4: input_value = input_value.transpose((0, 2, 3, 1)) + elif input_data_formats[i] == common.DataFormat.OIHW and \ + len(input_shapes[i]) == 4: + # OIHW -> HWIO + input_value = input_value.transpose((2, 3, 1, 0)) input_node = graph.get_tensor_by_name( normalize_tf_tensor_name(input_names[i])) input_dict[input_node] = input_value