提交 951452c2 编写于 作者: L liuqi

Feature: Support convert OpenCL binary(binary and parameter) files to code.

1. Convert OpenCL binary files to code.
2. example support OpenCL byte stream with --cl_binary_to_code flag.
上级 22e40d66
......@@ -314,14 +314,18 @@ Tuning for specific SoC's GPU
└── opencl
└── arm64-v8a
   ├── moblinet-v2_compiled_opencl_kernel.MiNote3.sdm660.bin
   └── moblinet-v2_tuned_opencl_parameter.MiNote3.sdm660.bin
   ├── moblinet-v2_compiled_opencl_kernel.MiNote3.sdm660.bin.cc
   ├── moblinet-v2_tuned_opencl_parameter.MiNote3.sdm660.bin
   └── moblinet-v2_tuned_opencl_parameter.MiNote3.sdm660.bin.cc
* **mobilenet-v2-gpu_compiled_opencl_kernel.MI6.msm8998.bin** stands for the OpenCL binaries
used for your models, which could accelerate the initialization stage.
Details please refer to `OpenCL Specification <https://www.khronos.org/registry/OpenCL/sdk/1.0/docs/man/xhtml/clCreateProgramWithBinary.html>`__.
* **mobilenet-v2-gpu_compiled_opencl_kernel.MI6.msm8998.bin.cc** contains C++ source code which defines OpenCL binary data as const array.
* **mobilenet-v2-tuned_opencl_parameter.MI6.msm8998.bin** stands for the tuned OpenCL parameters
for the SoC.
* **mobilenet-v2-tuned_opencl_parameter.MI6.msm8998.bin.cc** contains C++ source code which defines OpenCL binary data as const array.
* **4. Deployment**
* Change the names of files generated above for not collision and push them to **your own device's directory**.
......
......@@ -38,6 +38,18 @@ cc_library(
copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"],
)
cc_library(
name = "generated_opencl_binary",
srcs = ["opencl/opencl_binary.cc"],
copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"],
)
cc_library(
name = "generated_opencl_parameter",
srcs = ["opencl/opencl_parameter.cc"],
copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"],
)
cc_library(
name = "generated_version",
srcs = ["version/version.cc"],
......
......@@ -39,20 +39,30 @@ std::string FindFirstExistPath(const std::vector<std::string> &paths) {
GPUContext::GPUContext(const std::string &storage_path,
const std::vector<std::string> &opencl_binary_paths,
const std::string &opencl_parameter_path)
const std::string &opencl_parameter_path,
const unsigned char *opencl_binary_ptr,
const size_t opencl_binary_size,
const unsigned char *opencl_parameter_ptr,
const size_t opencl_parameter_size)
: storage_factory_(new FileStorageFactory(storage_path)),
opencl_tuner_(new Tuner<uint32_t>(opencl_parameter_path)) {
opencl_tuner_(new Tuner<uint32_t>(opencl_parameter_path,
opencl_parameter_ptr,
opencl_parameter_size)) {
if (!storage_path.empty()) {
opencl_cache_storage_ =
storage_factory_->CreateStorage(kPrecompiledProgramFileName);
}
std::string precompiled_binary_path =
FindFirstExistPath(opencl_binary_paths);
if (!precompiled_binary_path.empty()) {
if (opencl_binary_ptr != nullptr) {
opencl_binary_storage_.reset(
new FileStorage(precompiled_binary_path));
new ReadOnlyByteStreamStorage(opencl_binary_ptr, opencl_binary_size));
} else {
std::string precompiled_binary_path =
FindFirstExistPath(opencl_binary_paths);
if (!precompiled_binary_path.empty()) {
opencl_binary_storage_.reset(
new FileStorage(precompiled_binary_path));
}
}
}
......
......@@ -20,7 +20,7 @@
#include <string>
#include <vector>
#include "mace/core/file_storage.h"
#include "mace/core/kv_storage.h"
#include "mace/utils/tuner.h"
namespace mace {
......@@ -29,7 +29,11 @@ class GPUContext {
public:
GPUContext(const std::string &storage_path = "",
const std::vector<std::string> &opencl_binary_path = {},
const std::string &opencl_parameter_path = "");
const std::string &opencl_parameter_path = "",
const unsigned char *opencl_binary_ptr = nullptr,
const size_t opencl_binary_size = 0,
const unsigned char *opencl_parameter_ptr = nullptr,
const size_t opencl_parameter_size = 0);
~GPUContext();
std::shared_ptr<KVStorage> opencl_binary_storage();
......
......@@ -23,11 +23,50 @@
#include <memory>
#include <utility>
#include "mace/core/file_storage.h"
#include "mace/core/kv_storage.h"
#include "mace/core/macros.h"
#include "mace/utils/logging.h"
namespace mace {
namespace {
void ParseKVData(const unsigned char *data,
size_t data_size,
std::map<std::string, std::vector<unsigned char>> *kv_map) {
const size_t int_size = sizeof(int32_t);
size_t parsed_offset = 0;
int64_t num_tuple = 0;
memcpy(&num_tuple, data, sizeof(num_tuple));
data += sizeof(num_tuple);
parsed_offset += sizeof(num_tuple);
int32_t key_size = 0;
int32_t value_size = 0;
for (int i = 0; i < num_tuple; ++i) {
memcpy(&key_size, data, int_size);
data += int_size;
std::unique_ptr<char[]> key(new char[key_size+1]);
memcpy(&key[0], data, key_size);
data += key_size;
key[key_size] = '\0';
parsed_offset += int_size + key_size;
memcpy(&value_size, data, int_size);
data += int_size;
std::vector<unsigned char> value(value_size);
memcpy(value.data(), data, value_size);
data += value_size;
parsed_offset += int_size + value_size;
MACE_CHECK(parsed_offset <= data_size,
"Paring storage data out of range: ",
parsed_offset, " > ", data_size);
kv_map->emplace(std::string(&key[0]), value);
}
}
} // namespace
class FileStorageFactory::Impl {
public:
explicit Impl(const std::string &path);
......@@ -103,32 +142,8 @@ int FileStorage::Load() {
}
return -1;
}
unsigned char *file_data_ptr = file_data;
const size_t int_size = sizeof(int32_t);
int64_t data_size = 0;
memcpy(&data_size, file_data_ptr, sizeof(int64_t));
file_data_ptr += sizeof(int64_t);
int32_t key_size = 0;
int32_t value_size = 0;
for (int i = 0; i < data_size; ++i) {
memcpy(&key_size, file_data_ptr, int_size);
file_data_ptr += int_size;
std::unique_ptr<char[]> key(new char[key_size+1]);
memcpy(&key[0], file_data_ptr, key_size);
file_data_ptr += key_size;
key[key_size] = '\0';
memcpy(&value_size, file_data_ptr, int_size);
file_data_ptr += int_size;
std::vector<unsigned char> value(value_size);
memcpy(value.data(), file_data_ptr, value_size);
file_data_ptr += value_size;
data_.emplace(std::string(&key[0]), value);
}
ParseKVData(file_data, file_size, &data_);
res = munmap(file_data, file_size);
if (res != 0) {
LOG(WARNING) << "munmap file " << file_path_
......@@ -245,4 +260,40 @@ int FileStorage::Flush() {
return 0;
}
ReadOnlyByteStreamStorage::ReadOnlyByteStreamStorage(
const unsigned char *byte_stream, size_t byte_stream_size) {
ParseKVData(byte_stream, byte_stream_size, &data_);
}
int ReadOnlyByteStreamStorage::Load() {
return 0;
}
bool ReadOnlyByteStreamStorage::Clear() {
LOG(FATAL) << "ReadOnlyByteStreamStorage should not clear data";
return true;
}
const std::vector<unsigned char>* ReadOnlyByteStreamStorage::Find(
const std::string &key) {
auto iter = data_.find(key);
if (iter == data_.end()) return nullptr;
return &(iter->second);
}
bool ReadOnlyByteStreamStorage::Insert(
const std::string &key,
const std::vector<unsigned char> &value) {
MACE_UNUSED(key);
MACE_UNUSED(value);
LOG(FATAL) << "ReadOnlyByteStreamStorage should not insert data";
return true;
}
int ReadOnlyByteStreamStorage::Flush() {
return 0;
}
}; // namespace mace
......@@ -12,8 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_CORE_FILE_STORAGE_H_
#define MACE_CORE_FILE_STORAGE_H_
#ifndef MACE_CORE_KV_STORAGE_H_
#define MACE_CORE_KV_STORAGE_H_
#include <map>
#include <memory>
......@@ -80,6 +80,25 @@ class FileStorage : public KVStorage {
utils::RWMutex data_mutex_;
};
class ReadOnlyByteStreamStorage : public KVStorage {
public:
// load data from byte stream
explicit ReadOnlyByteStreamStorage(const unsigned char *byte_stream,
size_t byte_stream_size);
public:
int Load() override;
bool Clear() override;
bool Insert(const std::string &key,
const std::vector<unsigned char> &value) override;
const std::vector<unsigned char> *Find(const std::string &key) override;
int Flush() override;
private:
std::map<std::string, std::vector<unsigned char>> data_;
};
} // namespace mace
#endif // MACE_CORE_FILE_STORAGE_H_
#endif // MACE_CORE_KV_STORAGE_H_
......@@ -25,7 +25,7 @@
#include <utility>
#include "mace/core/macros.h"
#include "mace/core/file_storage.h"
#include "mace/core/kv_storage.h"
#include "mace/core/runtime/opencl/opencl_extension.h"
#include "mace/utils/tuner.h"
......
......@@ -22,7 +22,7 @@
#include <string>
#include <vector>
#include "mace/core/file_storage.h"
#include "mace/core/kv_storage.h"
#include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/runtime/opencl/scratch_image.h"
......
......@@ -31,6 +31,8 @@ cc_binary(
"//external:gflags_nothreads",
"//mace/codegen:generated_mace_engine_factory",
"//mace/codegen:generated_libmace",
"//mace/codegen:generated_opencl_binary",
"//mace/codegen:generated_opencl_parameter",
] + if_hexagon_enabled([
"//third_party/nnlib:libhexagon",
]),
......@@ -59,5 +61,7 @@ cc_binary(
"//external:gflags_nothreads",
"//mace/codegen:generated_libmace",
"//mace/codegen:generated_mace_engine_factory",
"//mace/codegen:generated_opencl_binary",
"//mace/codegen:generated_opencl_parameter",
],
)
......@@ -27,6 +27,16 @@
#include "mace/codegen/engine/mace_engine_factory.h"
#endif
#ifdef MACE_ENABLE_OPENCL
namespace mace {
const unsigned char *LoadOpenCLBinary();
size_t OpenCLBinarySize();
const unsigned char *LoadOpenCLParameter();
size_t OpenCLParameterSize();
} // namespace mace
#endif
namespace mace {
namespace examples {
......@@ -187,7 +197,9 @@ bool RunModel(const std::vector<std::string> &input_names,
gpu_context = GPUContextBuilder()
.SetStoragePath(storage_path)
.SetOpenCLBinaryPaths(opencl_binary_paths)
.SetOpenCLBinary(LoadOpenCLBinary(), OpenCLBinarySize())
.SetOpenCLParameterPath(FLAGS_opencl_parameter_file)
.SetOpenCLParameter(LoadOpenCLParameter(), OpenCLParameterSize())
.Finalize();
config.SetGPUContext(gpu_context);
......
......@@ -97,20 +97,34 @@ MaceStatus CheckGPUAvalibility(const NetDef *net_def, Device *device) {
class GPUContextBuilder::Impl {
public:
Impl();
void SetStoragePath(const std::string &path);
void SetOpenCLBinaryPaths(const std::vector<std::string> &paths);
void SetOpenCLBinary(const unsigned char *data, const size_t size);
void SetOpenCLParameterPath(const std::string &path);
void SetOpenCLParameter(const unsigned char *data, const size_t size);
std::shared_ptr<GPUContext> Finalize();
public:
std::string storage_path_;
std::vector<std::string> opencl_binary_paths_;
std::string opencl_parameter_path_;
const unsigned char *opencl_binary_ptr_;
size_t opencl_binary_size_;
const unsigned char *opencl_parameter_ptr_;
size_t opencl_parameter_size_;
};
GPUContextBuilder::Impl::Impl()
: storage_path_(""), opencl_binary_paths_(0), opencl_parameter_path_(""),
opencl_binary_ptr_(nullptr), opencl_binary_size_(0),
opencl_parameter_ptr_(nullptr), opencl_parameter_size_(0) {}
void GPUContextBuilder::Impl::SetStoragePath(const std::string &path) {
storage_path_ = path;
}
......@@ -120,15 +134,31 @@ void GPUContextBuilder::Impl::SetOpenCLBinaryPaths(
opencl_binary_paths_ = paths;
}
void GPUContextBuilder::Impl::SetOpenCLBinary(const unsigned char *data,
const size_t size) {
opencl_binary_ptr_ = data;
opencl_binary_size_ = size;
}
void GPUContextBuilder::Impl::SetOpenCLParameterPath(
const std::string &path) {
opencl_parameter_path_ = path;
}
void GPUContextBuilder::Impl::SetOpenCLParameter(const unsigned char *data,
const size_t size) {
opencl_parameter_ptr_ = data;
opencl_parameter_size_ = size;
}
std::shared_ptr<GPUContext> GPUContextBuilder::Impl::Finalize() {
return std::shared_ptr<GPUContext>(new GPUContext(storage_path_,
opencl_binary_paths_,
opencl_parameter_path_));
opencl_parameter_path_,
opencl_binary_ptr_,
opencl_binary_size_,
opencl_parameter_ptr_,
opencl_parameter_size_));
}
GPUContextBuilder::GPUContextBuilder() : impl_(new GPUContextBuilder::Impl) {}
......@@ -146,12 +176,24 @@ GPUContextBuilder &GPUContextBuilder::SetOpenCLBinaryPaths(
return *this;
}
GPUContextBuilder& GPUContextBuilder::SetOpenCLBinary(
const unsigned char *data, const size_t size) {
impl_->SetOpenCLBinary(data, size);
return *this;
}
GPUContextBuilder &GPUContextBuilder::SetOpenCLParameterPath(
const std::string &path) {
impl_->SetOpenCLParameterPath(path);
return *this;
}
GPUContextBuilder& GPUContextBuilder::SetOpenCLParameter(
const unsigned char *data, const size_t size) {
impl_->SetOpenCLParameter(data, size);
return *this;
}
std::shared_ptr<GPUContext> GPUContextBuilder::Finalize() {
return impl_->Finalize();
}
......
......@@ -173,9 +173,9 @@ class MACE_API GPUContextBuilder {
/// \param path Make sure your program have Read/Write permission of the path
/// \return
GPUContextBuilder &SetStoragePath(const std::string &path);
/// \brief Set paths of Generated OpenCL Compiled Kernel Binary file (not libOpenCL.so) // NOLINT(whitespace/line_length)
/// \brief Set paths of generated OpenCL compiled kernel binary file (not libOpenCL.so) // NOLINT(whitespace/line_length)
///
/// if you use gpu of specific soc, Using OpenCL binary will speed up the initialization. // NOLINT(whitespace/line_length)
/// If you use GPU of specific soc, using OpenCL binary will speed up the initialization. // NOLINT(whitespace/line_length)
/// OpenCL binary is corresponding to the OpenCL Driver version,
/// you should update the binary when OpenCL Driver changed.
///
......@@ -183,15 +183,38 @@ class MACE_API GPUContextBuilder {
/// \return
GPUContextBuilder &SetOpenCLBinaryPaths(
const std::vector<std::string> &paths);
/// \brief Set the path of Generated OpenCL parameter file
/// \brief Set generated OpenCL compiled kernel binary with bytes array
///
/// If you use GPU of specific soc, using OpenCL binary will speed up the initialization. // NOLINT(whitespace/line_length)
/// OpenCL binary is corresponding to the OpenCL Driver version,
/// you should update the binary when OpenCL Driver changed.
///
/// \param data Byte stream of OpenCL binary file
/// \param size Size of byte stream (data)
/// \return
GPUContextBuilder &SetOpenCLBinary(const unsigned char *data,
const size_t size);
/// \brief Set the path of generated OpenCL parameter file
///
/// If you use gpu for specific soc, The parameters is the local work group
/// If you use GPU for specific soc, the parameters is the local work group
/// size tuned for specific SOC, which may be faster than the
/// general parameters.
///
/// \param path Make sure your program have Read/Write permission of the path
/// \return
GPUContextBuilder &SetOpenCLParameterPath(const std::string &path);
/// \brief Set generated OpenCL parameter with bytes array
///
/// If you use GPU for specific soc, the parameters is the local work group
/// size tuned for specific SOC, which may be faster than the
/// general parameters.
///
/// \param data Byte stream of OpenCL parameter file
/// \param size Size of byte stream (data)
/// \return
GPUContextBuilder &SetOpenCLParameter(const unsigned char *data,
const size_t size);
std::shared_ptr<GPUContext> Finalize();
......
// Copyright 2019 Xiaomi, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// This is a generated file. DO NOT EDIT!
#include <cstring>
namespace mace {
const unsigned char *{{ load_func_name }}() {
{% if data_size == 0 %}
return nullptr;
{% else %}
static const unsigned char kData[{{ data_size }}] = {
{% for d in data %}{{"0x%02X, " % d }}{%endfor%}
};
return kData;
{% endif %}
}
size_t {{ size_func_name }}() {
return {{ data_size }};
}
} // namespace mace
# Copyright 2018 Xiaomi, Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import jinja2
import os
import sys
import numpy as np
FLAGS = None
def generate_opencl_code(binary_file_name, load_func_name, size_func_name,
output_path):
binary_array = []
if os.path.exists(binary_file_name):
with open(binary_file_name, 'rb') as f:
binary_array = np.fromfile(f, dtype=np.uint8)
env = jinja2.Environment(
loader=jinja2.FileSystemLoader(sys.path[0]))
content = env.get_template('file_binary.cc.jinja2').render(
data=binary_array,
data_size=len(binary_array),
load_func_name=load_func_name,
size_func_name=size_func_name)
if os.path.isfile(output_path):
os.remove(output_path)
with open(output_path, "w") as w_file:
w_file.write(content)
def parse_args():
"""Parses command line arguments."""
parser = argparse.ArgumentParser()
parser.add_argument(
"--file_name",
type=str,
default="opencl_binary.bin",
help="The binary file name.")
parser.add_argument(
"--output_path",
type=str,
default="",
help="The path of generated C++ source file which contains the binary."
)
parser.add_argument(
"--load_func_name",
type=str,
default="LoadData",
help="load interface name.")
parser.add_argument(
"--size_func_name",
type=str,
default="DataSize",
help="size function name.")
return parser.parse_known_args()
if __name__ == '__main__':
FLAGS, unparsed = parse_args()
generate_opencl_code(FLAGS.file_name,
FLAGS.interface_name,
FLAGS.output_path)
......@@ -14,7 +14,11 @@
#ifndef MACE_UTILS_TUNER_H_
#define MACE_UTILS_TUNER_H_
#include <fcntl.h>
#include <stdlib.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <unistd.h>
#include <cstring>
#include <fstream>
......@@ -39,10 +43,16 @@ inline bool IsTuning() {
template <typename param_type>
class Tuner {
public:
explicit Tuner(const std::string tuned_param_file_path = ""):
explicit Tuner(const std::string tuned_param_file_path = "",
const unsigned char *param_byte_stream = nullptr,
const size_t param_byte_stream_size = 0):
tuned_param_file_path_(tuned_param_file_path) {
path_ = getenv("MACE_RUN_PARAMETER_PATH");
ReadRunParamters();
if (param_byte_stream != nullptr && param_byte_stream_size != 0) {
ParseData(param_byte_stream, param_byte_stream_size);
} else {
ReadRunParamters();
}
}
~Tuner() { WriteRunParameters(); }
......@@ -114,32 +124,100 @@ class Tuner {
}
}
inline void ParseData(const unsigned char *data, size_t data_size) {
const size_t int_size = sizeof(int32_t);
const size_t param_type_size = sizeof(param_type);
size_t parsed_offset = 0;
int64_t num_params = 0;
memcpy(&num_params, data, sizeof(num_params));
data += sizeof(num_params);
parsed_offset += sizeof(num_params);
while (num_params--) {
int32_t key_size = 0;
memcpy(&key_size, data, int_size);
data += int_size;
std::string key(key_size, ' ');
memcpy(&key[0], data, key_size);
data += key_size;
parsed_offset += int_size + key_size;
int32_t params_size = 0;
memcpy(&params_size, data, int_size);
data += int_size;
parsed_offset += int_size;
int32_t params_count = params_size / param_type_size;
std::vector<param_type> params(params_count);
for (int i = 0; i < params_count; ++i) {
memcpy(&params[i], data, param_type_size);
data += param_type_size;
parsed_offset += param_type_size;
}
MACE_CHECK(parsed_offset <= data_size,
"Parsing tuned data out of range: ",
parsed_offset, " > ", data_size);
param_table_.emplace(key, params);
}
}
inline void ReadRunParamters() {
if (!tuned_param_file_path_.empty()) {
std::ifstream ifs(tuned_param_file_path_,
std::ios::binary | std::ios::in);
if (ifs.is_open()) {
int64_t num_params = 0;
ifs.read(reinterpret_cast<char *>(&num_params), sizeof(num_params));
while (num_params--) {
int32_t key_size = 0;
ifs.read(reinterpret_cast<char *>(&key_size), sizeof(key_size));
std::string key(key_size, ' ');
ifs.read(&key[0], key_size);
int32_t params_size = 0;
ifs.read(reinterpret_cast<char *>(&params_size), sizeof(params_size));
int32_t params_count = params_size / sizeof(unsigned int);
std::vector<unsigned int> params(params_count);
for (int i = 0; i < params_count; ++i) {
ifs.read(reinterpret_cast<char *>(&params[i]),
sizeof(unsigned int));
}
param_table_.emplace(key, params);
struct stat st;
if (stat(tuned_param_file_path_.c_str(), &st) == -1) {
if (errno == ENOENT) {
VLOG(1) << "File " << tuned_param_file_path_
<< " does not exist";
} else {
LOG(WARNING) << "Stat file " << tuned_param_file_path_
<< " failed, error code: " << strerror(errno);
}
ifs.close();
} else {
LOG(WARNING) << "Read OpenCL tuned parameters file failed.";
return;
}
int fd = open(tuned_param_file_path_.c_str(), O_RDONLY);
if (fd < 0) {
if (errno == ENOENT) {
LOG(INFO) << "File " << tuned_param_file_path_
<< " does not exist";
} else {
LOG(WARNING) << "open file " << tuned_param_file_path_
<< " failed, error code: " << strerror(errno);
}
return;
}
size_t file_size = st.st_size;
unsigned char *file_data =
static_cast<unsigned char *>(mmap(nullptr, file_size, PROT_READ,
MAP_PRIVATE, fd, 0));
int res = 0;
if (file_data == MAP_FAILED) {
LOG(WARNING) << "mmap file " << tuned_param_file_path_
<< " failed, error code: " << strerror(errno);
res = close(fd);
if (res != 0) {
LOG(WARNING) << "close file " << tuned_param_file_path_
<< " failed, error code: " << strerror(errno);
}
return;
}
ParseData(file_data, file_size);
res = munmap(file_data, file_size);
if (res != 0) {
LOG(WARNING) << "munmap file " << tuned_param_file_path_
<< " failed, error code: " << strerror(errno);
res = close(fd);
if (res != 0) {
LOG(WARNING) << "close file " << tuned_param_file_path_
<< " failed, error code: " << strerror(errno);
}
return;
}
res = close(fd);
if (res != 0) {
LOG(WARNING) << "close file " << tuned_param_file_path_
<< " failed, error code: " << strerror(errno);
return;
}
} else {
VLOG(1) << "There is no tuned parameters.";
......
......@@ -407,6 +407,7 @@ CODEGEN_BASE_DIR = 'mace/codegen'
MODEL_CODEGEN_DIR = CODEGEN_BASE_DIR + '/models'
ENGINE_CODEGEN_DIR = CODEGEN_BASE_DIR + '/engine'
LIB_CODEGEN_DIR = CODEGEN_BASE_DIR + '/lib'
OPENCL_CODEGEN_DIR = CODEGEN_BASE_DIR + '/opencl'
LIBMACE_SO_TARGET = "//mace/libmace:libmace.so"
LIBMACE_STATIC_TARGET = "//mace/libmace:libmace_static"
LIBMACE_STATIC_PATH = "bazel-genfiles/mace/libmace/libmace.a"
......
......@@ -853,7 +853,7 @@ def build_mace_run(configs, target_abi, toolchain, enable_openmp,
def build_example(configs, target_abi, toolchain,
enable_openmp, mace_lib_type):
enable_openmp, mace_lib_type, cl_binary_to_code, device):
library_name = configs[YAMLKeyword.library_name]
hexagon_mode = get_hexagon_mode(configs)
......@@ -862,6 +862,20 @@ def build_example(configs, target_abi, toolchain,
sh.rm("-rf", build_tmp_binary_dir)
os.makedirs(build_tmp_binary_dir)
if cl_binary_to_code:
sh_commands.gen_opencl_binary_cpps(
get_opencl_binary_output_path(
library_name, target_abi, device),
get_opencl_parameter_output_path(
library_name, target_abi, device),
OPENCL_CODEGEN_DIR + '/opencl_binary.cc',
OPENCL_CODEGEN_DIR + '/opencl_parameter.cc')
else:
sh_commands.gen_opencl_binary_cpps(
"", "",
OPENCL_CODEGEN_DIR + '/opencl_binary.cc',
OPENCL_CODEGEN_DIR + '/opencl_parameter.cc')
symbol_hidden = True
libmace_target = LIBMACE_STATIC_TARGET
......@@ -942,12 +956,15 @@ def run_mace(flags):
if target_abi in dev[YAMLKeyword.target_abis]:
# get toolchain
toolchain = infer_toolchain(target_abi)
device = DeviceWrapper(dev)
if flags.example:
build_example(configs,
target_abi,
toolchain,
not flags.disable_openmp,
flags.mace_lib_type)
flags.mace_lib_type,
flags.cl_binary_to_code,
device)
else:
build_mace_run(configs,
target_abi,
......@@ -956,7 +973,6 @@ def run_mace(flags):
flags.address_sanitizer,
flags.mace_lib_type)
# run
device = DeviceWrapper(dev)
with device.lock():
device.run_specify_abi(flags, configs, target_abi)
elif dev[YAMLKeyword.device_name] != SystemType.host:
......@@ -1229,6 +1245,10 @@ def parse_args():
type=str,
default="",
help="quantize stat output dir.")
run.add_argument(
"--cl_binary_to_code",
action="store_true",
help="convert OpenCL binaries to cpp.")
benchmark = subparsers.add_parser(
'benchmark',
parents=[all_type_parent_parser, run_bm_parent_parser],
......
......@@ -130,7 +130,7 @@ class DeviceWrapper:
dst_file = "%s/%s" % (dst_path, file_name)
if os.path.exists(dst_file):
sh.rm('-f', dst_file)
six.print_("Pull %s to %s" % (src_path, dst_path))
six.print_("Pull %s to %s" % (src_file, dst_path))
if self.system == SystemType.android:
sh_commands.adb_pull(
src_file, dst_file, self.address)
......@@ -626,6 +626,11 @@ class DeviceWrapper:
model_output_dirs, CL_TUNED_PARAMETER_FILE_NAME,
opencl_parameter_bin_path
)
sh_commands.gen_opencl_binary_cpps(
opencl_output_bin_path,
opencl_parameter_bin_path,
opencl_output_bin_path + '.cc',
opencl_parameter_bin_path + '.cc')
def report_run_statistics(self,
target_abi,
......
......@@ -32,7 +32,7 @@ from common import abi_to_internal
sys.path.insert(0, "mace/python/tools")
try:
from encrypt_opencl_codegen import encrypt_opencl_codegen
from binary_codegen import tuning_param_codegen
from opencl_binary_codegen import generate_opencl_code
from generate_data import generate_input_data
from validate import validate
from mace_engine_factory_codegen import gen_mace_engine_factory
......@@ -567,6 +567,23 @@ def gen_random_input(model_output_dir,
sh.cp("-f", input_file_list[i], dst_input_file)
def gen_opencl_binary_cpps(opencl_bin_file_path,
opencl_param_file_path,
opencl_bin_cpp_path,
opencl_param_cpp_path):
output_dir = os.path.dirname(opencl_bin_cpp_path)
if not os.path.exists(output_dir):
sh.mkdir("-p", output_dir)
opencl_bin_load_func_name = 'LoadOpenCLBinary'
opencl_bin_size_func_name = 'OpenCLBinarySize'
opencl_param_load_func_name = 'LoadOpenCLParameter'
opencl_param_size_func_name = 'OpenCLParameterSize'
generate_opencl_code(opencl_bin_file_path, opencl_bin_load_func_name,
opencl_bin_size_func_name, opencl_bin_cpp_path)
generate_opencl_code(opencl_param_file_path, opencl_param_load_func_name,
opencl_param_size_func_name, opencl_param_cpp_path)
def update_mace_run_binary(build_tmp_binary_dir, link_dynamic=False):
if link_dynamic:
mace_run_filepath = build_tmp_binary_dir + "/mace_run_dynamic"
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册