diff --git a/docs/user_guide/advanced_usage.rst b/docs/user_guide/advanced_usage.rst index 8395c45b783588f047e51a9a0bedcae0a5a7bd11..bea021601d3632e1f29b5d85936c281e847e2538 100644 --- a/docs/user_guide/advanced_usage.rst +++ b/docs/user_guide/advanced_usage.rst @@ -314,14 +314,18 @@ Tuning for specific SoC's GPU └── opencl └── arm64-v8a    ├── moblinet-v2_compiled_opencl_kernel.MiNote3.sdm660.bin -    └── moblinet-v2_tuned_opencl_parameter.MiNote3.sdm660.bin +    ├── moblinet-v2_compiled_opencl_kernel.MiNote3.sdm660.bin.cc +    ├── moblinet-v2_tuned_opencl_parameter.MiNote3.sdm660.bin +    └── moblinet-v2_tuned_opencl_parameter.MiNote3.sdm660.bin.cc * **mobilenet-v2-gpu_compiled_opencl_kernel.MI6.msm8998.bin** stands for the OpenCL binaries used for your models, which could accelerate the initialization stage. Details please refer to `OpenCL Specification `__. + * **mobilenet-v2-gpu_compiled_opencl_kernel.MI6.msm8998.bin.cc** contains C++ source code which defines OpenCL binary data as const array. * **mobilenet-v2-tuned_opencl_parameter.MI6.msm8998.bin** stands for the tuned OpenCL parameters for the SoC. + * **mobilenet-v2-tuned_opencl_parameter.MI6.msm8998.bin.cc** contains C++ source code which defines OpenCL binary data as const array. * **4. Deployment** * Change the names of files generated above for not collision and push them to **your own device's directory**. diff --git a/mace/codegen/BUILD b/mace/codegen/BUILD index 8a24594c15662dcb04d1c10772000acc0488f835..2aaf28fa13109be44b915c5236f0dce2b15de526 100644 --- a/mace/codegen/BUILD +++ b/mace/codegen/BUILD @@ -38,6 +38,18 @@ cc_library( copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"], ) +cc_library( + name = "generated_opencl_binary", + srcs = ["opencl/opencl_binary.cc"], + copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"], +) + +cc_library( + name = "generated_opencl_parameter", + srcs = ["opencl/opencl_parameter.cc"], + copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"], +) + cc_library( name = "generated_version", srcs = ["version/version.cc"], diff --git a/mace/core/device_context.cc b/mace/core/device_context.cc index 4f5ff8b10e700be29a697a7cf92a583777222f7f..67af31b58d36a89e205565796e4f916149967d90 100644 --- a/mace/core/device_context.cc +++ b/mace/core/device_context.cc @@ -39,20 +39,30 @@ std::string FindFirstExistPath(const std::vector &paths) { GPUContext::GPUContext(const std::string &storage_path, const std::vector &opencl_binary_paths, - const std::string &opencl_parameter_path) + const std::string &opencl_parameter_path, + const unsigned char *opencl_binary_ptr, + const size_t opencl_binary_size, + const unsigned char *opencl_parameter_ptr, + const size_t opencl_parameter_size) : storage_factory_(new FileStorageFactory(storage_path)), - opencl_tuner_(new Tuner(opencl_parameter_path)) { - + opencl_tuner_(new Tuner(opencl_parameter_path, + opencl_parameter_ptr, + opencl_parameter_size)) { if (!storage_path.empty()) { opencl_cache_storage_ = storage_factory_->CreateStorage(kPrecompiledProgramFileName); } - std::string precompiled_binary_path = - FindFirstExistPath(opencl_binary_paths); - if (!precompiled_binary_path.empty()) { + if (opencl_binary_ptr != nullptr) { opencl_binary_storage_.reset( - new FileStorage(precompiled_binary_path)); + new ReadOnlyByteStreamStorage(opencl_binary_ptr, opencl_binary_size)); + } else { + std::string precompiled_binary_path = + FindFirstExistPath(opencl_binary_paths); + if (!precompiled_binary_path.empty()) { + opencl_binary_storage_.reset( + new FileStorage(precompiled_binary_path)); + } } } diff --git a/mace/core/device_context.h b/mace/core/device_context.h index ea1e6590cfbe1afba0eb7d7df2cdfa57d168710c..5aea3d3f2af69a5f5d3858b2c88d9dcf9fbb6fd4 100644 --- a/mace/core/device_context.h +++ b/mace/core/device_context.h @@ -20,7 +20,7 @@ #include #include -#include "mace/core/file_storage.h" +#include "mace/core/kv_storage.h" #include "mace/utils/tuner.h" namespace mace { @@ -29,7 +29,11 @@ class GPUContext { public: GPUContext(const std::string &storage_path = "", const std::vector &opencl_binary_path = {}, - const std::string &opencl_parameter_path = ""); + const std::string &opencl_parameter_path = "", + const unsigned char *opencl_binary_ptr = nullptr, + const size_t opencl_binary_size = 0, + const unsigned char *opencl_parameter_ptr = nullptr, + const size_t opencl_parameter_size = 0); ~GPUContext(); std::shared_ptr opencl_binary_storage(); diff --git a/mace/core/file_storage.cc b/mace/core/kv_storage.cc similarity index 77% rename from mace/core/file_storage.cc rename to mace/core/kv_storage.cc index 6e6af1e23697bc7411c32b1b5b2ba36cee07888e..08a2d5b541785cf627c07d2c78053266280333fb 100644 --- a/mace/core/file_storage.cc +++ b/mace/core/kv_storage.cc @@ -23,11 +23,50 @@ #include #include -#include "mace/core/file_storage.h" +#include "mace/core/kv_storage.h" +#include "mace/core/macros.h" #include "mace/utils/logging.h" namespace mace { +namespace { +void ParseKVData(const unsigned char *data, + size_t data_size, + std::map> *kv_map) { + const size_t int_size = sizeof(int32_t); + + size_t parsed_offset = 0; + int64_t num_tuple = 0; + memcpy(&num_tuple, data, sizeof(num_tuple)); + data += sizeof(num_tuple); + parsed_offset += sizeof(num_tuple); + int32_t key_size = 0; + int32_t value_size = 0; + for (int i = 0; i < num_tuple; ++i) { + memcpy(&key_size, data, int_size); + data += int_size; + std::unique_ptr key(new char[key_size+1]); + memcpy(&key[0], data, key_size); + data += key_size; + key[key_size] = '\0'; + parsed_offset += int_size + key_size; + + memcpy(&value_size, data, int_size); + data += int_size; + std::vector value(value_size); + memcpy(value.data(), data, value_size); + data += value_size; + parsed_offset += int_size + value_size; + MACE_CHECK(parsed_offset <= data_size, + "Paring storage data out of range: ", + parsed_offset, " > ", data_size); + + kv_map->emplace(std::string(&key[0]), value); + } +} + +} // namespace + class FileStorageFactory::Impl { public: explicit Impl(const std::string &path); @@ -103,32 +142,8 @@ int FileStorage::Load() { } return -1; } - unsigned char *file_data_ptr = file_data; - - const size_t int_size = sizeof(int32_t); - - int64_t data_size = 0; - memcpy(&data_size, file_data_ptr, sizeof(int64_t)); - file_data_ptr += sizeof(int64_t); - int32_t key_size = 0; - int32_t value_size = 0; - for (int i = 0; i < data_size; ++i) { - memcpy(&key_size, file_data_ptr, int_size); - file_data_ptr += int_size; - std::unique_ptr key(new char[key_size+1]); - memcpy(&key[0], file_data_ptr, key_size); - file_data_ptr += key_size; - key[key_size] = '\0'; - - memcpy(&value_size, file_data_ptr, int_size); - file_data_ptr += int_size; - std::vector value(value_size); - memcpy(value.data(), file_data_ptr, value_size); - file_data_ptr += value_size; - - data_.emplace(std::string(&key[0]), value); - } + ParseKVData(file_data, file_size, &data_); res = munmap(file_data, file_size); if (res != 0) { LOG(WARNING) << "munmap file " << file_path_ @@ -245,4 +260,40 @@ int FileStorage::Flush() { return 0; } + +ReadOnlyByteStreamStorage::ReadOnlyByteStreamStorage( + const unsigned char *byte_stream, size_t byte_stream_size) { + ParseKVData(byte_stream, byte_stream_size, &data_); +} + +int ReadOnlyByteStreamStorage::Load() { + return 0; +} + +bool ReadOnlyByteStreamStorage::Clear() { + LOG(FATAL) << "ReadOnlyByteStreamStorage should not clear data"; + return true; +} + +const std::vector* ReadOnlyByteStreamStorage::Find( + const std::string &key) { + auto iter = data_.find(key); + if (iter == data_.end()) return nullptr; + + return &(iter->second); +} + +bool ReadOnlyByteStreamStorage::Insert( + const std::string &key, + const std::vector &value) { + MACE_UNUSED(key); + MACE_UNUSED(value); + LOG(FATAL) << "ReadOnlyByteStreamStorage should not insert data"; + return true; +} + +int ReadOnlyByteStreamStorage::Flush() { + return 0; +} + }; // namespace mace diff --git a/mace/core/file_storage.h b/mace/core/kv_storage.h similarity index 77% rename from mace/core/file_storage.h rename to mace/core/kv_storage.h index 7d15449f87af39cbdf59adcd41828574a3927d1b..62296a18e40cb408679963b78bfb505ccf46119c 100644 --- a/mace/core/file_storage.h +++ b/mace/core/kv_storage.h @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_CORE_FILE_STORAGE_H_ -#define MACE_CORE_FILE_STORAGE_H_ +#ifndef MACE_CORE_KV_STORAGE_H_ +#define MACE_CORE_KV_STORAGE_H_ #include #include @@ -80,6 +80,25 @@ class FileStorage : public KVStorage { utils::RWMutex data_mutex_; }; + +class ReadOnlyByteStreamStorage : public KVStorage { + public: + // load data from byte stream + explicit ReadOnlyByteStreamStorage(const unsigned char *byte_stream, + size_t byte_stream_size); + + public: + int Load() override; + bool Clear() override; + bool Insert(const std::string &key, + const std::vector &value) override; + const std::vector *Find(const std::string &key) override; + int Flush() override; + + private: + std::map> data_; +}; + } // namespace mace -#endif // MACE_CORE_FILE_STORAGE_H_ +#endif // MACE_CORE_KV_STORAGE_H_ diff --git a/mace/core/runtime/opencl/opencl_runtime.cc b/mace/core/runtime/opencl/opencl_runtime.cc index 904e74f6cd35d4b172808ba280ff2b53c54405ea..86a2b1166f66817a8084b9d53fd619f4f79fd95d 100644 --- a/mace/core/runtime/opencl/opencl_runtime.cc +++ b/mace/core/runtime/opencl/opencl_runtime.cc @@ -25,7 +25,7 @@ #include #include "mace/core/macros.h" -#include "mace/core/file_storage.h" +#include "mace/core/kv_storage.h" #include "mace/core/runtime/opencl/opencl_extension.h" #include "mace/utils/tuner.h" diff --git a/mace/core/runtime/opencl/opencl_runtime.h b/mace/core/runtime/opencl/opencl_runtime.h index 1e189b8eeb5f6e347d41680cc3977643757af22f..01ef4ca3216a12361b37ad2a152c2ed09c811874 100644 --- a/mace/core/runtime/opencl/opencl_runtime.h +++ b/mace/core/runtime/opencl/opencl_runtime.h @@ -22,7 +22,7 @@ #include #include -#include "mace/core/file_storage.h" +#include "mace/core/kv_storage.h" #include "mace/core/future.h" #include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/runtime/opencl/scratch_image.h" diff --git a/mace/examples/cli/BUILD b/mace/examples/cli/BUILD index b2c2291c947edc013f806083b1160ced2ed43222..97e42b7df148e94bd11ab0d1f3cd7bc5470e3fd2 100644 --- a/mace/examples/cli/BUILD +++ b/mace/examples/cli/BUILD @@ -31,6 +31,8 @@ cc_binary( "//external:gflags_nothreads", "//mace/codegen:generated_mace_engine_factory", "//mace/codegen:generated_libmace", + "//mace/codegen:generated_opencl_binary", + "//mace/codegen:generated_opencl_parameter", ] + if_hexagon_enabled([ "//third_party/nnlib:libhexagon", ]), @@ -59,5 +61,7 @@ cc_binary( "//external:gflags_nothreads", "//mace/codegen:generated_libmace", "//mace/codegen:generated_mace_engine_factory", + "//mace/codegen:generated_opencl_binary", + "//mace/codegen:generated_opencl_parameter", ], ) diff --git a/mace/examples/cli/example.cc b/mace/examples/cli/example.cc index 73adbb75dff9e02a84fd5c1520a2330c25054f39..cc24a94f49c29a7d702b2e7fba8d006dcdb4cfb2 100644 --- a/mace/examples/cli/example.cc +++ b/mace/examples/cli/example.cc @@ -27,6 +27,16 @@ #include "mace/codegen/engine/mace_engine_factory.h" #endif +#ifdef MACE_ENABLE_OPENCL +namespace mace { +const unsigned char *LoadOpenCLBinary(); +size_t OpenCLBinarySize(); +const unsigned char *LoadOpenCLParameter(); +size_t OpenCLParameterSize(); +} // namespace mace +#endif + + namespace mace { namespace examples { @@ -187,7 +197,9 @@ bool RunModel(const std::vector &input_names, gpu_context = GPUContextBuilder() .SetStoragePath(storage_path) .SetOpenCLBinaryPaths(opencl_binary_paths) + .SetOpenCLBinary(LoadOpenCLBinary(), OpenCLBinarySize()) .SetOpenCLParameterPath(FLAGS_opencl_parameter_file) + .SetOpenCLParameter(LoadOpenCLParameter(), OpenCLParameterSize()) .Finalize(); config.SetGPUContext(gpu_context); diff --git a/mace/libmace/mace.cc b/mace/libmace/mace.cc index 047cdf8e9e8d68d7254eb917bbfda3513db6015c..fad8c8ebbe1813a82b4715f1d15aa1836af87f9c 100644 --- a/mace/libmace/mace.cc +++ b/mace/libmace/mace.cc @@ -97,20 +97,34 @@ MaceStatus CheckGPUAvalibility(const NetDef *net_def, Device *device) { class GPUContextBuilder::Impl { public: + Impl(); void SetStoragePath(const std::string &path); void SetOpenCLBinaryPaths(const std::vector &paths); + void SetOpenCLBinary(const unsigned char *data, const size_t size); + void SetOpenCLParameterPath(const std::string &path); + void SetOpenCLParameter(const unsigned char *data, const size_t size); + std::shared_ptr Finalize(); public: std::string storage_path_; std::vector opencl_binary_paths_; std::string opencl_parameter_path_; + const unsigned char *opencl_binary_ptr_; + size_t opencl_binary_size_; + const unsigned char *opencl_parameter_ptr_; + size_t opencl_parameter_size_; }; +GPUContextBuilder::Impl::Impl() + : storage_path_(""), opencl_binary_paths_(0), opencl_parameter_path_(""), + opencl_binary_ptr_(nullptr), opencl_binary_size_(0), + opencl_parameter_ptr_(nullptr), opencl_parameter_size_(0) {} + void GPUContextBuilder::Impl::SetStoragePath(const std::string &path) { storage_path_ = path; } @@ -120,15 +134,31 @@ void GPUContextBuilder::Impl::SetOpenCLBinaryPaths( opencl_binary_paths_ = paths; } +void GPUContextBuilder::Impl::SetOpenCLBinary(const unsigned char *data, + const size_t size) { + opencl_binary_ptr_ = data; + opencl_binary_size_ = size; +} + void GPUContextBuilder::Impl::SetOpenCLParameterPath( const std::string &path) { opencl_parameter_path_ = path; } +void GPUContextBuilder::Impl::SetOpenCLParameter(const unsigned char *data, + const size_t size) { + opencl_parameter_ptr_ = data; + opencl_parameter_size_ = size; +} + std::shared_ptr GPUContextBuilder::Impl::Finalize() { return std::shared_ptr(new GPUContext(storage_path_, opencl_binary_paths_, - opencl_parameter_path_)); + opencl_parameter_path_, + opencl_binary_ptr_, + opencl_binary_size_, + opencl_parameter_ptr_, + opencl_parameter_size_)); } GPUContextBuilder::GPUContextBuilder() : impl_(new GPUContextBuilder::Impl) {} @@ -146,12 +176,24 @@ GPUContextBuilder &GPUContextBuilder::SetOpenCLBinaryPaths( return *this; } +GPUContextBuilder& GPUContextBuilder::SetOpenCLBinary( + const unsigned char *data, const size_t size) { + impl_->SetOpenCLBinary(data, size); + return *this; +} + GPUContextBuilder &GPUContextBuilder::SetOpenCLParameterPath( const std::string &path) { impl_->SetOpenCLParameterPath(path); return *this; } +GPUContextBuilder& GPUContextBuilder::SetOpenCLParameter( + const unsigned char *data, const size_t size) { + impl_->SetOpenCLParameter(data, size); + return *this; +} + std::shared_ptr GPUContextBuilder::Finalize() { return impl_->Finalize(); } diff --git a/mace/public/mace.h b/mace/public/mace.h index a7b2a13edb7b9d043bb7495bd3ccc327453a1448..775d3495ad480b8b5d1ce9e825c00492368f3e25 100644 --- a/mace/public/mace.h +++ b/mace/public/mace.h @@ -173,9 +173,9 @@ class MACE_API GPUContextBuilder { /// \param path Make sure your program have Read/Write permission of the path /// \return GPUContextBuilder &SetStoragePath(const std::string &path); - /// \brief Set paths of Generated OpenCL Compiled Kernel Binary file (not libOpenCL.so) // NOLINT(whitespace/line_length) + /// \brief Set paths of generated OpenCL compiled kernel binary file (not libOpenCL.so) // NOLINT(whitespace/line_length) /// - /// if you use gpu of specific soc, Using OpenCL binary will speed up the initialization. // NOLINT(whitespace/line_length) + /// If you use GPU of specific soc, using OpenCL binary will speed up the initialization. // NOLINT(whitespace/line_length) /// OpenCL binary is corresponding to the OpenCL Driver version, /// you should update the binary when OpenCL Driver changed. /// @@ -183,15 +183,38 @@ class MACE_API GPUContextBuilder { /// \return GPUContextBuilder &SetOpenCLBinaryPaths( const std::vector &paths); - /// \brief Set the path of Generated OpenCL parameter file + + /// \brief Set generated OpenCL compiled kernel binary with bytes array + /// + /// If you use GPU of specific soc, using OpenCL binary will speed up the initialization. // NOLINT(whitespace/line_length) + /// OpenCL binary is corresponding to the OpenCL Driver version, + /// you should update the binary when OpenCL Driver changed. + /// + /// \param data Byte stream of OpenCL binary file + /// \param size Size of byte stream (data) + /// \return + GPUContextBuilder &SetOpenCLBinary(const unsigned char *data, + const size_t size); + /// \brief Set the path of generated OpenCL parameter file /// - /// If you use gpu for specific soc, The parameters is the local work group + /// If you use GPU for specific soc, the parameters is the local work group /// size tuned for specific SOC, which may be faster than the /// general parameters. /// /// \param path Make sure your program have Read/Write permission of the path /// \return GPUContextBuilder &SetOpenCLParameterPath(const std::string &path); + /// \brief Set generated OpenCL parameter with bytes array + /// + /// If you use GPU for specific soc, the parameters is the local work group + /// size tuned for specific SOC, which may be faster than the + /// general parameters. + /// + /// \param data Byte stream of OpenCL parameter file + /// \param size Size of byte stream (data) + /// \return + GPUContextBuilder &SetOpenCLParameter(const unsigned char *data, + const size_t size); std::shared_ptr Finalize(); diff --git a/mace/python/tools/file_binary.cc.jinja2 b/mace/python/tools/file_binary.cc.jinja2 new file mode 100644 index 0000000000000000000000000000000000000000..4e59d47b30764d71560278fed71e38da1ddadd23 --- /dev/null +++ b/mace/python/tools/file_binary.cc.jinja2 @@ -0,0 +1,38 @@ +// Copyright 2019 Xiaomi, Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This is a generated file. DO NOT EDIT! + +#include + +namespace mace { + +const unsigned char *{{ load_func_name }}() { +{% if data_size == 0 %} + return nullptr; +{% else %} + static const unsigned char kData[{{ data_size }}] = { + {% for d in data %}{{"0x%02X, " % d }}{%endfor%} + }; + + return kData; +{% endif %} +} + +size_t {{ size_func_name }}() { + return {{ data_size }}; +} + +} // namespace mace + diff --git a/mace/python/tools/opencl_binary_codegen.py b/mace/python/tools/opencl_binary_codegen.py new file mode 100644 index 0000000000000000000000000000000000000000..0a06d0c4e301d2bf9b132599a5ce39aec57698d3 --- /dev/null +++ b/mace/python/tools/opencl_binary_codegen.py @@ -0,0 +1,77 @@ +# Copyright 2018 Xiaomi, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import jinja2 +import os +import sys + +import numpy as np + +FLAGS = None + + +def generate_opencl_code(binary_file_name, load_func_name, size_func_name, + output_path): + binary_array = [] + if os.path.exists(binary_file_name): + with open(binary_file_name, 'rb') as f: + binary_array = np.fromfile(f, dtype=np.uint8) + + env = jinja2.Environment( + loader=jinja2.FileSystemLoader(sys.path[0])) + content = env.get_template('file_binary.cc.jinja2').render( + data=binary_array, + data_size=len(binary_array), + load_func_name=load_func_name, + size_func_name=size_func_name) + + if os.path.isfile(output_path): + os.remove(output_path) + with open(output_path, "w") as w_file: + w_file.write(content) + + +def parse_args(): + """Parses command line arguments.""" + parser = argparse.ArgumentParser() + parser.add_argument( + "--file_name", + type=str, + default="opencl_binary.bin", + help="The binary file name.") + parser.add_argument( + "--output_path", + type=str, + default="", + help="The path of generated C++ source file which contains the binary." + ) + parser.add_argument( + "--load_func_name", + type=str, + default="LoadData", + help="load interface name.") + parser.add_argument( + "--size_func_name", + type=str, + default="DataSize", + help="size function name.") + return parser.parse_known_args() + + +if __name__ == '__main__': + FLAGS, unparsed = parse_args() + generate_opencl_code(FLAGS.file_name, + FLAGS.interface_name, + FLAGS.output_path) diff --git a/mace/utils/tuner.h b/mace/utils/tuner.h index 3295ddaec12e3703c9839e19e55414fca873dcaf..5dbb84018f4165865b67b6534e834fb7c6a2247d 100644 --- a/mace/utils/tuner.h +++ b/mace/utils/tuner.h @@ -14,7 +14,11 @@ #ifndef MACE_UTILS_TUNER_H_ #define MACE_UTILS_TUNER_H_ +#include #include +#include +#include +#include #include #include @@ -39,10 +43,16 @@ inline bool IsTuning() { template class Tuner { public: - explicit Tuner(const std::string tuned_param_file_path = ""): + explicit Tuner(const std::string tuned_param_file_path = "", + const unsigned char *param_byte_stream = nullptr, + const size_t param_byte_stream_size = 0): tuned_param_file_path_(tuned_param_file_path) { path_ = getenv("MACE_RUN_PARAMETER_PATH"); - ReadRunParamters(); + if (param_byte_stream != nullptr && param_byte_stream_size != 0) { + ParseData(param_byte_stream, param_byte_stream_size); + } else { + ReadRunParamters(); + } } ~Tuner() { WriteRunParameters(); } @@ -114,32 +124,100 @@ class Tuner { } } + inline void ParseData(const unsigned char *data, size_t data_size) { + const size_t int_size = sizeof(int32_t); + const size_t param_type_size = sizeof(param_type); + + size_t parsed_offset = 0; + int64_t num_params = 0; + memcpy(&num_params, data, sizeof(num_params)); + data += sizeof(num_params); + parsed_offset += sizeof(num_params); + while (num_params--) { + int32_t key_size = 0; + memcpy(&key_size, data, int_size); + data += int_size; + std::string key(key_size, ' '); + memcpy(&key[0], data, key_size); + data += key_size; + parsed_offset += int_size + key_size; + + int32_t params_size = 0; + memcpy(¶ms_size, data, int_size); + data += int_size; + parsed_offset += int_size; + int32_t params_count = params_size / param_type_size; + std::vector params(params_count); + for (int i = 0; i < params_count; ++i) { + memcpy(¶ms[i], data, param_type_size); + data += param_type_size; + parsed_offset += param_type_size; + } + MACE_CHECK(parsed_offset <= data_size, + "Parsing tuned data out of range: ", + parsed_offset, " > ", data_size); + param_table_.emplace(key, params); + } + } + inline void ReadRunParamters() { if (!tuned_param_file_path_.empty()) { - std::ifstream ifs(tuned_param_file_path_, - std::ios::binary | std::ios::in); - if (ifs.is_open()) { - int64_t num_params = 0; - ifs.read(reinterpret_cast(&num_params), sizeof(num_params)); - while (num_params--) { - int32_t key_size = 0; - ifs.read(reinterpret_cast(&key_size), sizeof(key_size)); - std::string key(key_size, ' '); - ifs.read(&key[0], key_size); - - int32_t params_size = 0; - ifs.read(reinterpret_cast(¶ms_size), sizeof(params_size)); - int32_t params_count = params_size / sizeof(unsigned int); - std::vector params(params_count); - for (int i = 0; i < params_count; ++i) { - ifs.read(reinterpret_cast(¶ms[i]), - sizeof(unsigned int)); - } - param_table_.emplace(key, params); + struct stat st; + if (stat(tuned_param_file_path_.c_str(), &st) == -1) { + if (errno == ENOENT) { + VLOG(1) << "File " << tuned_param_file_path_ + << " does not exist"; + } else { + LOG(WARNING) << "Stat file " << tuned_param_file_path_ + << " failed, error code: " << strerror(errno); } - ifs.close(); - } else { - LOG(WARNING) << "Read OpenCL tuned parameters file failed."; + return; + } + int fd = open(tuned_param_file_path_.c_str(), O_RDONLY); + if (fd < 0) { + if (errno == ENOENT) { + LOG(INFO) << "File " << tuned_param_file_path_ + << " does not exist"; + } else { + LOG(WARNING) << "open file " << tuned_param_file_path_ + << " failed, error code: " << strerror(errno); + } + return; + } + size_t file_size = st.st_size; + unsigned char *file_data = + static_cast(mmap(nullptr, file_size, PROT_READ, + MAP_PRIVATE, fd, 0)); + int res = 0; + if (file_data == MAP_FAILED) { + LOG(WARNING) << "mmap file " << tuned_param_file_path_ + << " failed, error code: " << strerror(errno); + + res = close(fd); + if (res != 0) { + LOG(WARNING) << "close file " << tuned_param_file_path_ + << " failed, error code: " << strerror(errno); + } + return; + } + + ParseData(file_data, file_size); + res = munmap(file_data, file_size); + if (res != 0) { + LOG(WARNING) << "munmap file " << tuned_param_file_path_ + << " failed, error code: " << strerror(errno); + res = close(fd); + if (res != 0) { + LOG(WARNING) << "close file " << tuned_param_file_path_ + << " failed, error code: " << strerror(errno); + } + return; + } + res = close(fd); + if (res != 0) { + LOG(WARNING) << "close file " << tuned_param_file_path_ + << " failed, error code: " << strerror(errno); + return; } } else { VLOG(1) << "There is no tuned parameters."; diff --git a/tools/common.py b/tools/common.py index fff51e4d080ae27f119acae1bfc80a0e7a90f526..4b58a03692ff7123ec723a7e74889ab427151d31 100644 --- a/tools/common.py +++ b/tools/common.py @@ -407,6 +407,7 @@ CODEGEN_BASE_DIR = 'mace/codegen' MODEL_CODEGEN_DIR = CODEGEN_BASE_DIR + '/models' ENGINE_CODEGEN_DIR = CODEGEN_BASE_DIR + '/engine' LIB_CODEGEN_DIR = CODEGEN_BASE_DIR + '/lib' +OPENCL_CODEGEN_DIR = CODEGEN_BASE_DIR + '/opencl' LIBMACE_SO_TARGET = "//mace/libmace:libmace.so" LIBMACE_STATIC_TARGET = "//mace/libmace:libmace_static" LIBMACE_STATIC_PATH = "bazel-genfiles/mace/libmace/libmace.a" diff --git a/tools/converter.py b/tools/converter.py index fa67ea919e3421e0f3554c1dc53743c3d5c5d7b0..4f82a56df502710a469df6185a33afbd8279316b 100644 --- a/tools/converter.py +++ b/tools/converter.py @@ -853,7 +853,7 @@ def build_mace_run(configs, target_abi, toolchain, enable_openmp, def build_example(configs, target_abi, toolchain, - enable_openmp, mace_lib_type): + enable_openmp, mace_lib_type, cl_binary_to_code, device): library_name = configs[YAMLKeyword.library_name] hexagon_mode = get_hexagon_mode(configs) @@ -862,6 +862,20 @@ def build_example(configs, target_abi, toolchain, sh.rm("-rf", build_tmp_binary_dir) os.makedirs(build_tmp_binary_dir) + if cl_binary_to_code: + sh_commands.gen_opencl_binary_cpps( + get_opencl_binary_output_path( + library_name, target_abi, device), + get_opencl_parameter_output_path( + library_name, target_abi, device), + OPENCL_CODEGEN_DIR + '/opencl_binary.cc', + OPENCL_CODEGEN_DIR + '/opencl_parameter.cc') + else: + sh_commands.gen_opencl_binary_cpps( + "", "", + OPENCL_CODEGEN_DIR + '/opencl_binary.cc', + OPENCL_CODEGEN_DIR + '/opencl_parameter.cc') + symbol_hidden = True libmace_target = LIBMACE_STATIC_TARGET @@ -942,12 +956,15 @@ def run_mace(flags): if target_abi in dev[YAMLKeyword.target_abis]: # get toolchain toolchain = infer_toolchain(target_abi) + device = DeviceWrapper(dev) if flags.example: build_example(configs, target_abi, toolchain, not flags.disable_openmp, - flags.mace_lib_type) + flags.mace_lib_type, + flags.cl_binary_to_code, + device) else: build_mace_run(configs, target_abi, @@ -956,7 +973,6 @@ def run_mace(flags): flags.address_sanitizer, flags.mace_lib_type) # run - device = DeviceWrapper(dev) with device.lock(): device.run_specify_abi(flags, configs, target_abi) elif dev[YAMLKeyword.device_name] != SystemType.host: @@ -1229,6 +1245,10 @@ def parse_args(): type=str, default="", help="quantize stat output dir.") + run.add_argument( + "--cl_binary_to_code", + action="store_true", + help="convert OpenCL binaries to cpp.") benchmark = subparsers.add_parser( 'benchmark', parents=[all_type_parent_parser, run_bm_parent_parser], diff --git a/tools/device.py b/tools/device.py index 8c78a09f8195ca8f577d9f1ed016f065fc50aa65..f6826399dffdc700a0bcc24cd6c9824c7dc1f28f 100644 --- a/tools/device.py +++ b/tools/device.py @@ -130,7 +130,7 @@ class DeviceWrapper: dst_file = "%s/%s" % (dst_path, file_name) if os.path.exists(dst_file): sh.rm('-f', dst_file) - six.print_("Pull %s to %s" % (src_path, dst_path)) + six.print_("Pull %s to %s" % (src_file, dst_path)) if self.system == SystemType.android: sh_commands.adb_pull( src_file, dst_file, self.address) @@ -626,6 +626,11 @@ class DeviceWrapper: model_output_dirs, CL_TUNED_PARAMETER_FILE_NAME, opencl_parameter_bin_path ) + sh_commands.gen_opencl_binary_cpps( + opencl_output_bin_path, + opencl_parameter_bin_path, + opencl_output_bin_path + '.cc', + opencl_parameter_bin_path + '.cc') def report_run_statistics(self, target_abi, diff --git a/tools/sh_commands.py b/tools/sh_commands.py index a824ba570ed44b76ec7c803b4645d503d7476594..e0d0edfc52365a6dbf3a8bbc93e44a16db4bc2ed 100644 --- a/tools/sh_commands.py +++ b/tools/sh_commands.py @@ -32,7 +32,7 @@ from common import abi_to_internal sys.path.insert(0, "mace/python/tools") try: from encrypt_opencl_codegen import encrypt_opencl_codegen - from binary_codegen import tuning_param_codegen + from opencl_binary_codegen import generate_opencl_code from generate_data import generate_input_data from validate import validate from mace_engine_factory_codegen import gen_mace_engine_factory @@ -567,6 +567,23 @@ def gen_random_input(model_output_dir, sh.cp("-f", input_file_list[i], dst_input_file) +def gen_opencl_binary_cpps(opencl_bin_file_path, + opencl_param_file_path, + opencl_bin_cpp_path, + opencl_param_cpp_path): + output_dir = os.path.dirname(opencl_bin_cpp_path) + if not os.path.exists(output_dir): + sh.mkdir("-p", output_dir) + opencl_bin_load_func_name = 'LoadOpenCLBinary' + opencl_bin_size_func_name = 'OpenCLBinarySize' + opencl_param_load_func_name = 'LoadOpenCLParameter' + opencl_param_size_func_name = 'OpenCLParameterSize' + generate_opencl_code(opencl_bin_file_path, opencl_bin_load_func_name, + opencl_bin_size_func_name, opencl_bin_cpp_path) + generate_opencl_code(opencl_param_file_path, opencl_param_load_func_name, + opencl_param_size_func_name, opencl_param_cpp_path) + + def update_mace_run_binary(build_tmp_binary_dir, link_dynamic=False): if link_dynamic: mace_run_filepath = build_tmp_binary_dir + "/mace_run_dynamic"