提交 951452c2 编写于 作者: L liuqi

Feature: Support convert OpenCL binary(binary and parameter) files to code.

1. Convert OpenCL binary files to code.
2. example support OpenCL byte stream with --cl_binary_to_code flag.
上级 22e40d66
...@@ -314,14 +314,18 @@ Tuning for specific SoC's GPU ...@@ -314,14 +314,18 @@ Tuning for specific SoC's GPU
└── opencl └── opencl
└── arm64-v8a └── arm64-v8a
   ├── moblinet-v2_compiled_opencl_kernel.MiNote3.sdm660.bin    ├── moblinet-v2_compiled_opencl_kernel.MiNote3.sdm660.bin
   └── moblinet-v2_tuned_opencl_parameter.MiNote3.sdm660.bin    ├── moblinet-v2_compiled_opencl_kernel.MiNote3.sdm660.bin.cc
   ├── moblinet-v2_tuned_opencl_parameter.MiNote3.sdm660.bin
   └── moblinet-v2_tuned_opencl_parameter.MiNote3.sdm660.bin.cc
* **mobilenet-v2-gpu_compiled_opencl_kernel.MI6.msm8998.bin** stands for the OpenCL binaries * **mobilenet-v2-gpu_compiled_opencl_kernel.MI6.msm8998.bin** stands for the OpenCL binaries
used for your models, which could accelerate the initialization stage. used for your models, which could accelerate the initialization stage.
Details please refer to `OpenCL Specification <https://www.khronos.org/registry/OpenCL/sdk/1.0/docs/man/xhtml/clCreateProgramWithBinary.html>`__. Details please refer to `OpenCL Specification <https://www.khronos.org/registry/OpenCL/sdk/1.0/docs/man/xhtml/clCreateProgramWithBinary.html>`__.
* **mobilenet-v2-gpu_compiled_opencl_kernel.MI6.msm8998.bin.cc** contains C++ source code which defines OpenCL binary data as const array.
* **mobilenet-v2-tuned_opencl_parameter.MI6.msm8998.bin** stands for the tuned OpenCL parameters * **mobilenet-v2-tuned_opencl_parameter.MI6.msm8998.bin** stands for the tuned OpenCL parameters
for the SoC. for the SoC.
* **mobilenet-v2-tuned_opencl_parameter.MI6.msm8998.bin.cc** contains C++ source code which defines OpenCL binary data as const array.
* **4. Deployment** * **4. Deployment**
* Change the names of files generated above for not collision and push them to **your own device's directory**. * Change the names of files generated above for not collision and push them to **your own device's directory**.
......
...@@ -38,6 +38,18 @@ cc_library( ...@@ -38,6 +38,18 @@ cc_library(
copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"], copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"],
) )
cc_library(
name = "generated_opencl_binary",
srcs = ["opencl/opencl_binary.cc"],
copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"],
)
cc_library(
name = "generated_opencl_parameter",
srcs = ["opencl/opencl_parameter.cc"],
copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"],
)
cc_library( cc_library(
name = "generated_version", name = "generated_version",
srcs = ["version/version.cc"], srcs = ["version/version.cc"],
......
...@@ -39,20 +39,30 @@ std::string FindFirstExistPath(const std::vector<std::string> &paths) { ...@@ -39,20 +39,30 @@ std::string FindFirstExistPath(const std::vector<std::string> &paths) {
GPUContext::GPUContext(const std::string &storage_path, GPUContext::GPUContext(const std::string &storage_path,
const std::vector<std::string> &opencl_binary_paths, const std::vector<std::string> &opencl_binary_paths,
const std::string &opencl_parameter_path) const std::string &opencl_parameter_path,
const unsigned char *opencl_binary_ptr,
const size_t opencl_binary_size,
const unsigned char *opencl_parameter_ptr,
const size_t opencl_parameter_size)
: storage_factory_(new FileStorageFactory(storage_path)), : storage_factory_(new FileStorageFactory(storage_path)),
opencl_tuner_(new Tuner<uint32_t>(opencl_parameter_path)) { opencl_tuner_(new Tuner<uint32_t>(opencl_parameter_path,
opencl_parameter_ptr,
opencl_parameter_size)) {
if (!storage_path.empty()) { if (!storage_path.empty()) {
opencl_cache_storage_ = opencl_cache_storage_ =
storage_factory_->CreateStorage(kPrecompiledProgramFileName); storage_factory_->CreateStorage(kPrecompiledProgramFileName);
} }
std::string precompiled_binary_path = if (opencl_binary_ptr != nullptr) {
FindFirstExistPath(opencl_binary_paths);
if (!precompiled_binary_path.empty()) {
opencl_binary_storage_.reset( opencl_binary_storage_.reset(
new FileStorage(precompiled_binary_path)); new ReadOnlyByteStreamStorage(opencl_binary_ptr, opencl_binary_size));
} else {
std::string precompiled_binary_path =
FindFirstExistPath(opencl_binary_paths);
if (!precompiled_binary_path.empty()) {
opencl_binary_storage_.reset(
new FileStorage(precompiled_binary_path));
}
} }
} }
......
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include "mace/core/file_storage.h" #include "mace/core/kv_storage.h"
#include "mace/utils/tuner.h" #include "mace/utils/tuner.h"
namespace mace { namespace mace {
...@@ -29,7 +29,11 @@ class GPUContext { ...@@ -29,7 +29,11 @@ class GPUContext {
public: public:
GPUContext(const std::string &storage_path = "", GPUContext(const std::string &storage_path = "",
const std::vector<std::string> &opencl_binary_path = {}, const std::vector<std::string> &opencl_binary_path = {},
const std::string &opencl_parameter_path = ""); const std::string &opencl_parameter_path = "",
const unsigned char *opencl_binary_ptr = nullptr,
const size_t opencl_binary_size = 0,
const unsigned char *opencl_parameter_ptr = nullptr,
const size_t opencl_parameter_size = 0);
~GPUContext(); ~GPUContext();
std::shared_ptr<KVStorage> opencl_binary_storage(); std::shared_ptr<KVStorage> opencl_binary_storage();
......
...@@ -23,11 +23,50 @@ ...@@ -23,11 +23,50 @@
#include <memory> #include <memory>
#include <utility> #include <utility>
#include "mace/core/file_storage.h" #include "mace/core/kv_storage.h"
#include "mace/core/macros.h"
#include "mace/utils/logging.h" #include "mace/utils/logging.h"
namespace mace { namespace mace {
namespace {
void ParseKVData(const unsigned char *data,
size_t data_size,
std::map<std::string, std::vector<unsigned char>> *kv_map) {
const size_t int_size = sizeof(int32_t);
size_t parsed_offset = 0;
int64_t num_tuple = 0;
memcpy(&num_tuple, data, sizeof(num_tuple));
data += sizeof(num_tuple);
parsed_offset += sizeof(num_tuple);
int32_t key_size = 0;
int32_t value_size = 0;
for (int i = 0; i < num_tuple; ++i) {
memcpy(&key_size, data, int_size);
data += int_size;
std::unique_ptr<char[]> key(new char[key_size+1]);
memcpy(&key[0], data, key_size);
data += key_size;
key[key_size] = '\0';
parsed_offset += int_size + key_size;
memcpy(&value_size, data, int_size);
data += int_size;
std::vector<unsigned char> value(value_size);
memcpy(value.data(), data, value_size);
data += value_size;
parsed_offset += int_size + value_size;
MACE_CHECK(parsed_offset <= data_size,
"Paring storage data out of range: ",
parsed_offset, " > ", data_size);
kv_map->emplace(std::string(&key[0]), value);
}
}
} // namespace
class FileStorageFactory::Impl { class FileStorageFactory::Impl {
public: public:
explicit Impl(const std::string &path); explicit Impl(const std::string &path);
...@@ -103,32 +142,8 @@ int FileStorage::Load() { ...@@ -103,32 +142,8 @@ int FileStorage::Load() {
} }
return -1; return -1;
} }
unsigned char *file_data_ptr = file_data;
const size_t int_size = sizeof(int32_t);
int64_t data_size = 0;
memcpy(&data_size, file_data_ptr, sizeof(int64_t));
file_data_ptr += sizeof(int64_t);
int32_t key_size = 0;
int32_t value_size = 0;
for (int i = 0; i < data_size; ++i) {
memcpy(&key_size, file_data_ptr, int_size);
file_data_ptr += int_size;
std::unique_ptr<char[]> key(new char[key_size+1]);
memcpy(&key[0], file_data_ptr, key_size);
file_data_ptr += key_size;
key[key_size] = '\0';
memcpy(&value_size, file_data_ptr, int_size);
file_data_ptr += int_size;
std::vector<unsigned char> value(value_size);
memcpy(value.data(), file_data_ptr, value_size);
file_data_ptr += value_size;
data_.emplace(std::string(&key[0]), value);
}
ParseKVData(file_data, file_size, &data_);
res = munmap(file_data, file_size); res = munmap(file_data, file_size);
if (res != 0) { if (res != 0) {
LOG(WARNING) << "munmap file " << file_path_ LOG(WARNING) << "munmap file " << file_path_
...@@ -245,4 +260,40 @@ int FileStorage::Flush() { ...@@ -245,4 +260,40 @@ int FileStorage::Flush() {
return 0; return 0;
} }
ReadOnlyByteStreamStorage::ReadOnlyByteStreamStorage(
const unsigned char *byte_stream, size_t byte_stream_size) {
ParseKVData(byte_stream, byte_stream_size, &data_);
}
int ReadOnlyByteStreamStorage::Load() {
return 0;
}
bool ReadOnlyByteStreamStorage::Clear() {
LOG(FATAL) << "ReadOnlyByteStreamStorage should not clear data";
return true;
}
const std::vector<unsigned char>* ReadOnlyByteStreamStorage::Find(
const std::string &key) {
auto iter = data_.find(key);
if (iter == data_.end()) return nullptr;
return &(iter->second);
}
bool ReadOnlyByteStreamStorage::Insert(
const std::string &key,
const std::vector<unsigned char> &value) {
MACE_UNUSED(key);
MACE_UNUSED(value);
LOG(FATAL) << "ReadOnlyByteStreamStorage should not insert data";
return true;
}
int ReadOnlyByteStreamStorage::Flush() {
return 0;
}
}; // namespace mace }; // namespace mace
...@@ -12,8 +12,8 @@ ...@@ -12,8 +12,8 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#ifndef MACE_CORE_FILE_STORAGE_H_ #ifndef MACE_CORE_KV_STORAGE_H_
#define MACE_CORE_FILE_STORAGE_H_ #define MACE_CORE_KV_STORAGE_H_
#include <map> #include <map>
#include <memory> #include <memory>
...@@ -80,6 +80,25 @@ class FileStorage : public KVStorage { ...@@ -80,6 +80,25 @@ class FileStorage : public KVStorage {
utils::RWMutex data_mutex_; utils::RWMutex data_mutex_;
}; };
class ReadOnlyByteStreamStorage : public KVStorage {
public:
// load data from byte stream
explicit ReadOnlyByteStreamStorage(const unsigned char *byte_stream,
size_t byte_stream_size);
public:
int Load() override;
bool Clear() override;
bool Insert(const std::string &key,
const std::vector<unsigned char> &value) override;
const std::vector<unsigned char> *Find(const std::string &key) override;
int Flush() override;
private:
std::map<std::string, std::vector<unsigned char>> data_;
};
} // namespace mace } // namespace mace
#endif // MACE_CORE_FILE_STORAGE_H_ #endif // MACE_CORE_KV_STORAGE_H_
...@@ -25,7 +25,7 @@ ...@@ -25,7 +25,7 @@
#include <utility> #include <utility>
#include "mace/core/macros.h" #include "mace/core/macros.h"
#include "mace/core/file_storage.h" #include "mace/core/kv_storage.h"
#include "mace/core/runtime/opencl/opencl_extension.h" #include "mace/core/runtime/opencl/opencl_extension.h"
#include "mace/utils/tuner.h" #include "mace/utils/tuner.h"
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include "mace/core/file_storage.h" #include "mace/core/kv_storage.h"
#include "mace/core/future.h" #include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/runtime/opencl/scratch_image.h" #include "mace/core/runtime/opencl/scratch_image.h"
......
...@@ -31,6 +31,8 @@ cc_binary( ...@@ -31,6 +31,8 @@ cc_binary(
"//external:gflags_nothreads", "//external:gflags_nothreads",
"//mace/codegen:generated_mace_engine_factory", "//mace/codegen:generated_mace_engine_factory",
"//mace/codegen:generated_libmace", "//mace/codegen:generated_libmace",
"//mace/codegen:generated_opencl_binary",
"//mace/codegen:generated_opencl_parameter",
] + if_hexagon_enabled([ ] + if_hexagon_enabled([
"//third_party/nnlib:libhexagon", "//third_party/nnlib:libhexagon",
]), ]),
...@@ -59,5 +61,7 @@ cc_binary( ...@@ -59,5 +61,7 @@ cc_binary(
"//external:gflags_nothreads", "//external:gflags_nothreads",
"//mace/codegen:generated_libmace", "//mace/codegen:generated_libmace",
"//mace/codegen:generated_mace_engine_factory", "//mace/codegen:generated_mace_engine_factory",
"//mace/codegen:generated_opencl_binary",
"//mace/codegen:generated_opencl_parameter",
], ],
) )
...@@ -27,6 +27,16 @@ ...@@ -27,6 +27,16 @@
#include "mace/codegen/engine/mace_engine_factory.h" #include "mace/codegen/engine/mace_engine_factory.h"
#endif #endif
#ifdef MACE_ENABLE_OPENCL
namespace mace {
const unsigned char *LoadOpenCLBinary();
size_t OpenCLBinarySize();
const unsigned char *LoadOpenCLParameter();
size_t OpenCLParameterSize();
} // namespace mace
#endif
namespace mace { namespace mace {
namespace examples { namespace examples {
...@@ -187,7 +197,9 @@ bool RunModel(const std::vector<std::string> &input_names, ...@@ -187,7 +197,9 @@ bool RunModel(const std::vector<std::string> &input_names,
gpu_context = GPUContextBuilder() gpu_context = GPUContextBuilder()
.SetStoragePath(storage_path) .SetStoragePath(storage_path)
.SetOpenCLBinaryPaths(opencl_binary_paths) .SetOpenCLBinaryPaths(opencl_binary_paths)
.SetOpenCLBinary(LoadOpenCLBinary(), OpenCLBinarySize())
.SetOpenCLParameterPath(FLAGS_opencl_parameter_file) .SetOpenCLParameterPath(FLAGS_opencl_parameter_file)
.SetOpenCLParameter(LoadOpenCLParameter(), OpenCLParameterSize())
.Finalize(); .Finalize();
config.SetGPUContext(gpu_context); config.SetGPUContext(gpu_context);
......
...@@ -97,20 +97,34 @@ MaceStatus CheckGPUAvalibility(const NetDef *net_def, Device *device) { ...@@ -97,20 +97,34 @@ MaceStatus CheckGPUAvalibility(const NetDef *net_def, Device *device) {
class GPUContextBuilder::Impl { class GPUContextBuilder::Impl {
public: public:
Impl();
void SetStoragePath(const std::string &path); void SetStoragePath(const std::string &path);
void SetOpenCLBinaryPaths(const std::vector<std::string> &paths); void SetOpenCLBinaryPaths(const std::vector<std::string> &paths);
void SetOpenCLBinary(const unsigned char *data, const size_t size);
void SetOpenCLParameterPath(const std::string &path); void SetOpenCLParameterPath(const std::string &path);
void SetOpenCLParameter(const unsigned char *data, const size_t size);
std::shared_ptr<GPUContext> Finalize(); std::shared_ptr<GPUContext> Finalize();
public: public:
std::string storage_path_; std::string storage_path_;
std::vector<std::string> opencl_binary_paths_; std::vector<std::string> opencl_binary_paths_;
std::string opencl_parameter_path_; std::string opencl_parameter_path_;
const unsigned char *opencl_binary_ptr_;
size_t opencl_binary_size_;
const unsigned char *opencl_parameter_ptr_;
size_t opencl_parameter_size_;
}; };
GPUContextBuilder::Impl::Impl()
: storage_path_(""), opencl_binary_paths_(0), opencl_parameter_path_(""),
opencl_binary_ptr_(nullptr), opencl_binary_size_(0),
opencl_parameter_ptr_(nullptr), opencl_parameter_size_(0) {}
void GPUContextBuilder::Impl::SetStoragePath(const std::string &path) { void GPUContextBuilder::Impl::SetStoragePath(const std::string &path) {
storage_path_ = path; storage_path_ = path;
} }
...@@ -120,15 +134,31 @@ void GPUContextBuilder::Impl::SetOpenCLBinaryPaths( ...@@ -120,15 +134,31 @@ void GPUContextBuilder::Impl::SetOpenCLBinaryPaths(
opencl_binary_paths_ = paths; opencl_binary_paths_ = paths;
} }
void GPUContextBuilder::Impl::SetOpenCLBinary(const unsigned char *data,
const size_t size) {
opencl_binary_ptr_ = data;
opencl_binary_size_ = size;
}
void GPUContextBuilder::Impl::SetOpenCLParameterPath( void GPUContextBuilder::Impl::SetOpenCLParameterPath(
const std::string &path) { const std::string &path) {
opencl_parameter_path_ = path; opencl_parameter_path_ = path;
} }
void GPUContextBuilder::Impl::SetOpenCLParameter(const unsigned char *data,
const size_t size) {
opencl_parameter_ptr_ = data;
opencl_parameter_size_ = size;
}
std::shared_ptr<GPUContext> GPUContextBuilder::Impl::Finalize() { std::shared_ptr<GPUContext> GPUContextBuilder::Impl::Finalize() {
return std::shared_ptr<GPUContext>(new GPUContext(storage_path_, return std::shared_ptr<GPUContext>(new GPUContext(storage_path_,
opencl_binary_paths_, opencl_binary_paths_,
opencl_parameter_path_)); opencl_parameter_path_,
opencl_binary_ptr_,
opencl_binary_size_,
opencl_parameter_ptr_,
opencl_parameter_size_));
} }
GPUContextBuilder::GPUContextBuilder() : impl_(new GPUContextBuilder::Impl) {} GPUContextBuilder::GPUContextBuilder() : impl_(new GPUContextBuilder::Impl) {}
...@@ -146,12 +176,24 @@ GPUContextBuilder &GPUContextBuilder::SetOpenCLBinaryPaths( ...@@ -146,12 +176,24 @@ GPUContextBuilder &GPUContextBuilder::SetOpenCLBinaryPaths(
return *this; return *this;
} }
GPUContextBuilder& GPUContextBuilder::SetOpenCLBinary(
const unsigned char *data, const size_t size) {
impl_->SetOpenCLBinary(data, size);
return *this;
}
GPUContextBuilder &GPUContextBuilder::SetOpenCLParameterPath( GPUContextBuilder &GPUContextBuilder::SetOpenCLParameterPath(
const std::string &path) { const std::string &path) {
impl_->SetOpenCLParameterPath(path); impl_->SetOpenCLParameterPath(path);
return *this; return *this;
} }
GPUContextBuilder& GPUContextBuilder::SetOpenCLParameter(
const unsigned char *data, const size_t size) {
impl_->SetOpenCLParameter(data, size);
return *this;
}
std::shared_ptr<GPUContext> GPUContextBuilder::Finalize() { std::shared_ptr<GPUContext> GPUContextBuilder::Finalize() {
return impl_->Finalize(); return impl_->Finalize();
} }
......
...@@ -173,9 +173,9 @@ class MACE_API GPUContextBuilder { ...@@ -173,9 +173,9 @@ class MACE_API GPUContextBuilder {
/// \param path Make sure your program have Read/Write permission of the path /// \param path Make sure your program have Read/Write permission of the path
/// \return /// \return
GPUContextBuilder &SetStoragePath(const std::string &path); GPUContextBuilder &SetStoragePath(const std::string &path);
/// \brief Set paths of Generated OpenCL Compiled Kernel Binary file (not libOpenCL.so) // NOLINT(whitespace/line_length) /// \brief Set paths of generated OpenCL compiled kernel binary file (not libOpenCL.so) // NOLINT(whitespace/line_length)
/// ///
/// if you use gpu of specific soc, Using OpenCL binary will speed up the initialization. // NOLINT(whitespace/line_length) /// If you use GPU of specific soc, using OpenCL binary will speed up the initialization. // NOLINT(whitespace/line_length)
/// OpenCL binary is corresponding to the OpenCL Driver version, /// OpenCL binary is corresponding to the OpenCL Driver version,
/// you should update the binary when OpenCL Driver changed. /// you should update the binary when OpenCL Driver changed.
/// ///
...@@ -183,15 +183,38 @@ class MACE_API GPUContextBuilder { ...@@ -183,15 +183,38 @@ class MACE_API GPUContextBuilder {
/// \return /// \return
GPUContextBuilder &SetOpenCLBinaryPaths( GPUContextBuilder &SetOpenCLBinaryPaths(
const std::vector<std::string> &paths); const std::vector<std::string> &paths);
/// \brief Set the path of Generated OpenCL parameter file
/// \brief Set generated OpenCL compiled kernel binary with bytes array
///
/// If you use GPU of specific soc, using OpenCL binary will speed up the initialization. // NOLINT(whitespace/line_length)
/// OpenCL binary is corresponding to the OpenCL Driver version,
/// you should update the binary when OpenCL Driver changed.
///
/// \param data Byte stream of OpenCL binary file
/// \param size Size of byte stream (data)
/// \return
GPUContextBuilder &SetOpenCLBinary(const unsigned char *data,
const size_t size);
/// \brief Set the path of generated OpenCL parameter file
/// ///
/// If you use gpu for specific soc, The parameters is the local work group /// If you use GPU for specific soc, the parameters is the local work group
/// size tuned for specific SOC, which may be faster than the /// size tuned for specific SOC, which may be faster than the
/// general parameters. /// general parameters.
/// ///
/// \param path Make sure your program have Read/Write permission of the path /// \param path Make sure your program have Read/Write permission of the path
/// \return /// \return
GPUContextBuilder &SetOpenCLParameterPath(const std::string &path); GPUContextBuilder &SetOpenCLParameterPath(const std::string &path);
/// \brief Set generated OpenCL parameter with bytes array
///
/// If you use GPU for specific soc, the parameters is the local work group
/// size tuned for specific SOC, which may be faster than the
/// general parameters.
///
/// \param data Byte stream of OpenCL parameter file
/// \param size Size of byte stream (data)
/// \return
GPUContextBuilder &SetOpenCLParameter(const unsigned char *data,
const size_t size);
std::shared_ptr<GPUContext> Finalize(); std::shared_ptr<GPUContext> Finalize();
......
// Copyright 2019 Xiaomi, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// This is a generated file. DO NOT EDIT!
#include <cstring>
namespace mace {
const unsigned char *{{ load_func_name }}() {
{% if data_size == 0 %}
return nullptr;
{% else %}
static const unsigned char kData[{{ data_size }}] = {
{% for d in data %}{{"0x%02X, " % d }}{%endfor%}
};
return kData;
{% endif %}
}
size_t {{ size_func_name }}() {
return {{ data_size }};
}
} // namespace mace
# Copyright 2018 Xiaomi, Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import jinja2
import os
import sys
import numpy as np
FLAGS = None
def generate_opencl_code(binary_file_name, load_func_name, size_func_name,
output_path):
binary_array = []
if os.path.exists(binary_file_name):
with open(binary_file_name, 'rb') as f:
binary_array = np.fromfile(f, dtype=np.uint8)
env = jinja2.Environment(
loader=jinja2.FileSystemLoader(sys.path[0]))
content = env.get_template('file_binary.cc.jinja2').render(
data=binary_array,
data_size=len(binary_array),
load_func_name=load_func_name,
size_func_name=size_func_name)
if os.path.isfile(output_path):
os.remove(output_path)
with open(output_path, "w") as w_file:
w_file.write(content)
def parse_args():
"""Parses command line arguments."""
parser = argparse.ArgumentParser()
parser.add_argument(
"--file_name",
type=str,
default="opencl_binary.bin",
help="The binary file name.")
parser.add_argument(
"--output_path",
type=str,
default="",
help="The path of generated C++ source file which contains the binary."
)
parser.add_argument(
"--load_func_name",
type=str,
default="LoadData",
help="load interface name.")
parser.add_argument(
"--size_func_name",
type=str,
default="DataSize",
help="size function name.")
return parser.parse_known_args()
if __name__ == '__main__':
FLAGS, unparsed = parse_args()
generate_opencl_code(FLAGS.file_name,
FLAGS.interface_name,
FLAGS.output_path)
...@@ -14,7 +14,11 @@ ...@@ -14,7 +14,11 @@
#ifndef MACE_UTILS_TUNER_H_ #ifndef MACE_UTILS_TUNER_H_
#define MACE_UTILS_TUNER_H_ #define MACE_UTILS_TUNER_H_
#include <fcntl.h>
#include <stdlib.h> #include <stdlib.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <unistd.h>
#include <cstring> #include <cstring>
#include <fstream> #include <fstream>
...@@ -39,10 +43,16 @@ inline bool IsTuning() { ...@@ -39,10 +43,16 @@ inline bool IsTuning() {
template <typename param_type> template <typename param_type>
class Tuner { class Tuner {
public: public:
explicit Tuner(const std::string tuned_param_file_path = ""): explicit Tuner(const std::string tuned_param_file_path = "",
const unsigned char *param_byte_stream = nullptr,
const size_t param_byte_stream_size = 0):
tuned_param_file_path_(tuned_param_file_path) { tuned_param_file_path_(tuned_param_file_path) {
path_ = getenv("MACE_RUN_PARAMETER_PATH"); path_ = getenv("MACE_RUN_PARAMETER_PATH");
ReadRunParamters(); if (param_byte_stream != nullptr && param_byte_stream_size != 0) {
ParseData(param_byte_stream, param_byte_stream_size);
} else {
ReadRunParamters();
}
} }
~Tuner() { WriteRunParameters(); } ~Tuner() { WriteRunParameters(); }
...@@ -114,32 +124,100 @@ class Tuner { ...@@ -114,32 +124,100 @@ class Tuner {
} }
} }
inline void ParseData(const unsigned char *data, size_t data_size) {
const size_t int_size = sizeof(int32_t);
const size_t param_type_size = sizeof(param_type);
size_t parsed_offset = 0;
int64_t num_params = 0;
memcpy(&num_params, data, sizeof(num_params));
data += sizeof(num_params);
parsed_offset += sizeof(num_params);
while (num_params--) {
int32_t key_size = 0;
memcpy(&key_size, data, int_size);
data += int_size;
std::string key(key_size, ' ');
memcpy(&key[0], data, key_size);
data += key_size;
parsed_offset += int_size + key_size;
int32_t params_size = 0;
memcpy(&params_size, data, int_size);
data += int_size;
parsed_offset += int_size;
int32_t params_count = params_size / param_type_size;
std::vector<param_type> params(params_count);
for (int i = 0; i < params_count; ++i) {
memcpy(&params[i], data, param_type_size);
data += param_type_size;
parsed_offset += param_type_size;
}
MACE_CHECK(parsed_offset <= data_size,
"Parsing tuned data out of range: ",
parsed_offset, " > ", data_size);
param_table_.emplace(key, params);
}
}
inline void ReadRunParamters() { inline void ReadRunParamters() {
if (!tuned_param_file_path_.empty()) { if (!tuned_param_file_path_.empty()) {
std::ifstream ifs(tuned_param_file_path_, struct stat st;
std::ios::binary | std::ios::in); if (stat(tuned_param_file_path_.c_str(), &st) == -1) {
if (ifs.is_open()) { if (errno == ENOENT) {
int64_t num_params = 0; VLOG(1) << "File " << tuned_param_file_path_
ifs.read(reinterpret_cast<char *>(&num_params), sizeof(num_params)); << " does not exist";
while (num_params--) { } else {
int32_t key_size = 0; LOG(WARNING) << "Stat file " << tuned_param_file_path_
ifs.read(reinterpret_cast<char *>(&key_size), sizeof(key_size)); << " failed, error code: " << strerror(errno);
std::string key(key_size, ' ');
ifs.read(&key[0], key_size);
int32_t params_size = 0;
ifs.read(reinterpret_cast<char *>(&params_size), sizeof(params_size));
int32_t params_count = params_size / sizeof(unsigned int);
std::vector<unsigned int> params(params_count);
for (int i = 0; i < params_count; ++i) {
ifs.read(reinterpret_cast<char *>(&params[i]),
sizeof(unsigned int));
}
param_table_.emplace(key, params);
} }
ifs.close(); return;
} else { }
LOG(WARNING) << "Read OpenCL tuned parameters file failed."; int fd = open(tuned_param_file_path_.c_str(), O_RDONLY);
if (fd < 0) {
if (errno == ENOENT) {
LOG(INFO) << "File " << tuned_param_file_path_
<< " does not exist";
} else {
LOG(WARNING) << "open file " << tuned_param_file_path_
<< " failed, error code: " << strerror(errno);
}
return;
}
size_t file_size = st.st_size;
unsigned char *file_data =
static_cast<unsigned char *>(mmap(nullptr, file_size, PROT_READ,
MAP_PRIVATE, fd, 0));
int res = 0;
if (file_data == MAP_FAILED) {
LOG(WARNING) << "mmap file " << tuned_param_file_path_
<< " failed, error code: " << strerror(errno);
res = close(fd);
if (res != 0) {
LOG(WARNING) << "close file " << tuned_param_file_path_
<< " failed, error code: " << strerror(errno);
}
return;
}
ParseData(file_data, file_size);
res = munmap(file_data, file_size);
if (res != 0) {
LOG(WARNING) << "munmap file " << tuned_param_file_path_
<< " failed, error code: " << strerror(errno);
res = close(fd);
if (res != 0) {
LOG(WARNING) << "close file " << tuned_param_file_path_
<< " failed, error code: " << strerror(errno);
}
return;
}
res = close(fd);
if (res != 0) {
LOG(WARNING) << "close file " << tuned_param_file_path_
<< " failed, error code: " << strerror(errno);
return;
} }
} else { } else {
VLOG(1) << "There is no tuned parameters."; VLOG(1) << "There is no tuned parameters.";
......
...@@ -407,6 +407,7 @@ CODEGEN_BASE_DIR = 'mace/codegen' ...@@ -407,6 +407,7 @@ CODEGEN_BASE_DIR = 'mace/codegen'
MODEL_CODEGEN_DIR = CODEGEN_BASE_DIR + '/models' MODEL_CODEGEN_DIR = CODEGEN_BASE_DIR + '/models'
ENGINE_CODEGEN_DIR = CODEGEN_BASE_DIR + '/engine' ENGINE_CODEGEN_DIR = CODEGEN_BASE_DIR + '/engine'
LIB_CODEGEN_DIR = CODEGEN_BASE_DIR + '/lib' LIB_CODEGEN_DIR = CODEGEN_BASE_DIR + '/lib'
OPENCL_CODEGEN_DIR = CODEGEN_BASE_DIR + '/opencl'
LIBMACE_SO_TARGET = "//mace/libmace:libmace.so" LIBMACE_SO_TARGET = "//mace/libmace:libmace.so"
LIBMACE_STATIC_TARGET = "//mace/libmace:libmace_static" LIBMACE_STATIC_TARGET = "//mace/libmace:libmace_static"
LIBMACE_STATIC_PATH = "bazel-genfiles/mace/libmace/libmace.a" LIBMACE_STATIC_PATH = "bazel-genfiles/mace/libmace/libmace.a"
......
...@@ -853,7 +853,7 @@ def build_mace_run(configs, target_abi, toolchain, enable_openmp, ...@@ -853,7 +853,7 @@ def build_mace_run(configs, target_abi, toolchain, enable_openmp,
def build_example(configs, target_abi, toolchain, def build_example(configs, target_abi, toolchain,
enable_openmp, mace_lib_type): enable_openmp, mace_lib_type, cl_binary_to_code, device):
library_name = configs[YAMLKeyword.library_name] library_name = configs[YAMLKeyword.library_name]
hexagon_mode = get_hexagon_mode(configs) hexagon_mode = get_hexagon_mode(configs)
...@@ -862,6 +862,20 @@ def build_example(configs, target_abi, toolchain, ...@@ -862,6 +862,20 @@ def build_example(configs, target_abi, toolchain,
sh.rm("-rf", build_tmp_binary_dir) sh.rm("-rf", build_tmp_binary_dir)
os.makedirs(build_tmp_binary_dir) os.makedirs(build_tmp_binary_dir)
if cl_binary_to_code:
sh_commands.gen_opencl_binary_cpps(
get_opencl_binary_output_path(
library_name, target_abi, device),
get_opencl_parameter_output_path(
library_name, target_abi, device),
OPENCL_CODEGEN_DIR + '/opencl_binary.cc',
OPENCL_CODEGEN_DIR + '/opencl_parameter.cc')
else:
sh_commands.gen_opencl_binary_cpps(
"", "",
OPENCL_CODEGEN_DIR + '/opencl_binary.cc',
OPENCL_CODEGEN_DIR + '/opencl_parameter.cc')
symbol_hidden = True symbol_hidden = True
libmace_target = LIBMACE_STATIC_TARGET libmace_target = LIBMACE_STATIC_TARGET
...@@ -942,12 +956,15 @@ def run_mace(flags): ...@@ -942,12 +956,15 @@ def run_mace(flags):
if target_abi in dev[YAMLKeyword.target_abis]: if target_abi in dev[YAMLKeyword.target_abis]:
# get toolchain # get toolchain
toolchain = infer_toolchain(target_abi) toolchain = infer_toolchain(target_abi)
device = DeviceWrapper(dev)
if flags.example: if flags.example:
build_example(configs, build_example(configs,
target_abi, target_abi,
toolchain, toolchain,
not flags.disable_openmp, not flags.disable_openmp,
flags.mace_lib_type) flags.mace_lib_type,
flags.cl_binary_to_code,
device)
else: else:
build_mace_run(configs, build_mace_run(configs,
target_abi, target_abi,
...@@ -956,7 +973,6 @@ def run_mace(flags): ...@@ -956,7 +973,6 @@ def run_mace(flags):
flags.address_sanitizer, flags.address_sanitizer,
flags.mace_lib_type) flags.mace_lib_type)
# run # run
device = DeviceWrapper(dev)
with device.lock(): with device.lock():
device.run_specify_abi(flags, configs, target_abi) device.run_specify_abi(flags, configs, target_abi)
elif dev[YAMLKeyword.device_name] != SystemType.host: elif dev[YAMLKeyword.device_name] != SystemType.host:
...@@ -1229,6 +1245,10 @@ def parse_args(): ...@@ -1229,6 +1245,10 @@ def parse_args():
type=str, type=str,
default="", default="",
help="quantize stat output dir.") help="quantize stat output dir.")
run.add_argument(
"--cl_binary_to_code",
action="store_true",
help="convert OpenCL binaries to cpp.")
benchmark = subparsers.add_parser( benchmark = subparsers.add_parser(
'benchmark', 'benchmark',
parents=[all_type_parent_parser, run_bm_parent_parser], parents=[all_type_parent_parser, run_bm_parent_parser],
......
...@@ -130,7 +130,7 @@ class DeviceWrapper: ...@@ -130,7 +130,7 @@ class DeviceWrapper:
dst_file = "%s/%s" % (dst_path, file_name) dst_file = "%s/%s" % (dst_path, file_name)
if os.path.exists(dst_file): if os.path.exists(dst_file):
sh.rm('-f', dst_file) sh.rm('-f', dst_file)
six.print_("Pull %s to %s" % (src_path, dst_path)) six.print_("Pull %s to %s" % (src_file, dst_path))
if self.system == SystemType.android: if self.system == SystemType.android:
sh_commands.adb_pull( sh_commands.adb_pull(
src_file, dst_file, self.address) src_file, dst_file, self.address)
...@@ -626,6 +626,11 @@ class DeviceWrapper: ...@@ -626,6 +626,11 @@ class DeviceWrapper:
model_output_dirs, CL_TUNED_PARAMETER_FILE_NAME, model_output_dirs, CL_TUNED_PARAMETER_FILE_NAME,
opencl_parameter_bin_path opencl_parameter_bin_path
) )
sh_commands.gen_opencl_binary_cpps(
opencl_output_bin_path,
opencl_parameter_bin_path,
opencl_output_bin_path + '.cc',
opencl_parameter_bin_path + '.cc')
def report_run_statistics(self, def report_run_statistics(self,
target_abi, target_abi,
......
...@@ -32,7 +32,7 @@ from common import abi_to_internal ...@@ -32,7 +32,7 @@ from common import abi_to_internal
sys.path.insert(0, "mace/python/tools") sys.path.insert(0, "mace/python/tools")
try: try:
from encrypt_opencl_codegen import encrypt_opencl_codegen from encrypt_opencl_codegen import encrypt_opencl_codegen
from binary_codegen import tuning_param_codegen from opencl_binary_codegen import generate_opencl_code
from generate_data import generate_input_data from generate_data import generate_input_data
from validate import validate from validate import validate
from mace_engine_factory_codegen import gen_mace_engine_factory from mace_engine_factory_codegen import gen_mace_engine_factory
...@@ -567,6 +567,23 @@ def gen_random_input(model_output_dir, ...@@ -567,6 +567,23 @@ def gen_random_input(model_output_dir,
sh.cp("-f", input_file_list[i], dst_input_file) sh.cp("-f", input_file_list[i], dst_input_file)
def gen_opencl_binary_cpps(opencl_bin_file_path,
opencl_param_file_path,
opencl_bin_cpp_path,
opencl_param_cpp_path):
output_dir = os.path.dirname(opencl_bin_cpp_path)
if not os.path.exists(output_dir):
sh.mkdir("-p", output_dir)
opencl_bin_load_func_name = 'LoadOpenCLBinary'
opencl_bin_size_func_name = 'OpenCLBinarySize'
opencl_param_load_func_name = 'LoadOpenCLParameter'
opencl_param_size_func_name = 'OpenCLParameterSize'
generate_opencl_code(opencl_bin_file_path, opencl_bin_load_func_name,
opencl_bin_size_func_name, opencl_bin_cpp_path)
generate_opencl_code(opencl_param_file_path, opencl_param_load_func_name,
opencl_param_size_func_name, opencl_param_cpp_path)
def update_mace_run_binary(build_tmp_binary_dir, link_dynamic=False): def update_mace_run_binary(build_tmp_binary_dir, link_dynamic=False):
if link_dynamic: if link_dynamic:
mace_run_filepath = build_tmp_binary_dir + "/mace_run_dynamic" mace_run_filepath = build_tmp_binary_dir + "/mace_run_dynamic"
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册