提交 39fb3055 编写于 作者: L liuqi

Refactor opencl binary load logic: use file instead of code.

上级 d4124708
......@@ -365,6 +365,16 @@ The followings list the details.
``.pb`` file will be generated only when build_type is ``proto``.
**OpenCL compiled kernel binary file**
* ``opencl/compiled_kernel.bin``
.. note::
This file will be generated only when specify ``target_soc`` and runtime is ``gpu``.
.. warning::
This file rely on the OpenCL driver on the phone, you should update the file when OpenCL driver changed.
=============
5. how to use
......@@ -385,14 +395,21 @@ Please refer to \ ``mace/examples/example.cc``\ for full usage. the following li
new FileStorageFactory(file_path));
ConfigKVStorageFactory(storage_factory);
//1. Declare the device type(must be same with ``runtime`` in configuration file)
// 1. set precompiled OpenCL binary file paths if you use gpu of specified SOC,
// Besides the binary rely on the OpenCL driver of the SOC,
// if OpenCL driver changed, you should recompiled the binary file.
if (device_type == DeviceType::GPU) {
mace::SetOpenCLBinaryPaths(opencl_binary_paths);
}
// 2. Declare the device type(must be same with ``runtime`` in configuration file)
DeviceType device_type = DeviceType::GPU;
//2. Define the input and output tensor names.
// 3. Define the input and output tensor names.
std::vector<std::string> input_names = {...};
std::vector<std::string> output_names = {...};
//3. Create MaceEngine object
// 4. Create MaceEngine object
std::shared_ptr<mace::MaceEngine> engine;
MaceStatus create_engine_status;
// Create Engine from code
......@@ -415,7 +432,7 @@ Please refer to \ ``mace/examples/example.cc``\ for full usage. the following li
// do something
}
//4. Create Input and Output objects
// 5. Create Input and Output objects
std::map<std::string, mace::MaceTensor> inputs;
std::map<std::string, mace::MaceTensor> outputs;
for (size_t i = 0; i < input_count; ++i) {
......@@ -440,6 +457,6 @@ Please refer to \ ``mace/examples/example.cc``\ for full usage. the following li
outputs[output_names[i]] = mace::MaceTensor(output_shapes[i], buffer_out);
}
//5. Run the model
// 6. Run the model
MaceStatus status = engine.Run(inputs, &outputs);
......@@ -390,6 +390,13 @@ Mace目前只提供静态库,有以下两种使用场景。
new FileStorageFactory(file_path));
ConfigKVStorageFactory(storage_factory);
// 2. 如果你使用特定SOC的GPU,可以设置OpenCL预编译的二进制文件路径。
// * 该二进制文件是依赖于手机上OpenCL driver的,如果OpenCL driver改变了,
// 你需要重新编译并更新该二进制文件。
if (device_type == DeviceType::GPU) {
mace::SetOpenCLBinaryPaths(opencl_binary_paths);
}
//1. 声明设备类型(必须与build时指定的runtime一致)
DeviceType device_type = DeviceType::GPU;
......
......@@ -188,6 +188,9 @@ DEFINE_string(input_file, "", "input file name");
DEFINE_int32(max_num_runs, 100, "number of runs max");
DEFINE_string(max_time, "10.0", "length to run max");
DEFINE_int32(warmup_runs, 1, "how many runs to initialize model");
DEFINE_string(opencl_binary_file,
"",
"compiled opencl binary file path");
DEFINE_string(model_data_file, "",
"model data file name, used when EMBED_MODEL_DATA set to 0");
DEFINE_string(model_file, "",
......@@ -270,6 +273,11 @@ int Main(int argc, char **argv) {
new FileStorageFactory(kernel_file_path));
SetKVStorageFactory(storage_factory);
if (device_type == DeviceType::GPU) {
std::vector<std::string> opencl_binary_paths = {FLAGS_opencl_binary_file};
mace::SetOpenCLBinaryPaths(opencl_binary_paths);
}
// Create Engine
std::shared_ptr<mace::MaceEngine> engine;
MaceStatus create_engine_status;
......
......@@ -14,6 +14,8 @@
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include <sys/stat.h>
#include <cstdlib>
#include <fstream>
#include <memory>
......@@ -31,9 +33,6 @@
namespace mace {
extern const std::map<std::string, std::vector<unsigned char>>
kCompiledProgramMap;
extern const std::string kCompiledProgramPlatform;
extern const std::map<std::string, std::vector<unsigned char>>
kEncryptedProgramMap;
......@@ -43,6 +42,12 @@ void SetGPUHints(GPUPerfHint gpu_perf_hint, GPUPriorityHint gpu_priority_hint) {
OpenCLRuntime::Configure(gpu_perf_hint, gpu_priority_hint);
}
// Set OpenCL Compiled Binary paths, just call once. (Not thread-safe)
void SetOpenCLBinaryPaths(const std::vector<std::string> &paths) {
OpenCLRuntime::ConfigureOpenCLBinaryPath(paths);
}
const std::string OpenCLErrorToString(cl_int error) {
switch (error) {
case CL_SUCCESS:
......@@ -237,6 +242,25 @@ GPUType ParseGPUType(const std::string &device_name) {
return GPUType::UNKNOWN;
}
}
std::string FindFirstExistPath(const std::vector<std::string> &paths) {
std::string result;
struct stat st;
for (auto path : paths) {
if (stat(path.c_str(), &st) == 0) {
if (S_ISREG(st.st_mode)) {
result = path;
break;
}
}
}
return result;
}
const char *kOpenCLPlatformInfoKey =
"mace_opencl_precompiled_platform_info_key";
const char *kPrecompiledProgramFileName =
"mace_cl_compiled_program.bin";
} // namespace
void OpenCLProfilingTimer::StartTiming() {}
......@@ -267,6 +291,8 @@ void OpenCLProfilingTimer::ClearTiming() {
GPUPerfHint OpenCLRuntime::kGPUPerfHint = GPUPerfHint::PERF_NORMAL;
GPUPriorityHint OpenCLRuntime::kGPUPriorityHint =
GPUPriorityHint::PRIORITY_DEFAULT;
std::string
OpenCLRuntime::kPrecompiledBinaryPath = ""; // NOLINT(runtime/string)
OpenCLRuntime *OpenCLRuntime::Global() {
static OpenCLRuntime runtime;
......@@ -279,9 +305,19 @@ void OpenCLRuntime::Configure(GPUPerfHint gpu_perf_hint,
OpenCLRuntime::kGPUPriorityHint = gpu_priority_hint;
}
void OpenCLRuntime::ConfigureOpenCLBinaryPath(
const std::vector<std::string> &paths) {
OpenCLRuntime::kPrecompiledBinaryPath = FindFirstExistPath(paths);
if (OpenCLRuntime::kPrecompiledBinaryPath.empty()) {
LOG(WARNING) << "There is no precompiled OpenCL binary file in "
<< MakeString(paths);
}
}
OpenCLRuntime::OpenCLRuntime():
storage_(nullptr), is_profiling_enabled_(false) {
precompiled_binary_storage_(nullptr),
cache_storage_(nullptr),
is_profiling_enabled_(false) {
LoadOpenCLLibrary();
std::vector<cl::Platform> all_platforms;
......@@ -369,12 +405,38 @@ OpenCLRuntime::OpenCLRuntime():
extern std::shared_ptr<KVStorageFactory> kStorageFactory;
if (kStorageFactory != nullptr) {
const std::string cl_compiled_file_name = "mace_cl_compiled_program.bin";
storage_ = kStorageFactory->CreateStorage(cl_compiled_file_name);
cache_storage_ =
kStorageFactory->CreateStorage(kPrecompiledProgramFileName);
if (cache_storage_->Load() != 0) {
LOG(FATAL) << "Load OpenCL cached compiled kernel file failed";
}
auto platform_info_array =
this->cache_storage_->Find(kOpenCLPlatformInfoKey);
if (platform_info_array != nullptr) {
cached_binary_platform_info_ =
std::string(platform_info_array->begin(),
platform_info_array->end());
}
}
if (platform_info_ != kCompiledProgramPlatform) {
if (storage_->Load() != 0) {
LOG(FATAL) << "Load opencl compiled kernel file failed";
if (cached_binary_platform_info_ != platform_info_) {
if (OpenCLRuntime::kPrecompiledBinaryPath.empty()) {
LOG(WARNING) << "There is no precompiled OpenCL binary in"
" all OpenCL binary paths";
} else {
precompiled_binary_storage_.reset(
new FileStorage(OpenCLRuntime::kPrecompiledBinaryPath));
if (precompiled_binary_storage_->Load() != 0) {
LOG(FATAL) << "Load OpenCL precompiled kernel file failed";
}
auto platform_info_array =
this->precompiled_binary_storage_->Find(kOpenCLPlatformInfoKey);
if (platform_info_array != nullptr) {
precompiled_binary_platform_info_ =
std::string(platform_info_array->begin(),
platform_info_array->end());
}
}
}
......@@ -416,16 +478,23 @@ uint32_t OpenCLRuntime::device_compute_units() const {
return device_compute_units_;
}
bool OpenCLRuntime::BuildProgramFromBinary(
bool OpenCLRuntime::BuildProgramFromCache(
const std::string &built_program_key,
const std::string &build_options_str,
cl::Program *program) {
// Find from binary
if (kCompiledProgramPlatform != platform_info_) return false;
auto it_binary = kCompiledProgramMap.find(built_program_key);
if (it_binary == kCompiledProgramMap.end()) return false;
if (this->cache_storage_ == nullptr) return false;
if (cached_binary_platform_info_ != platform_info_) {
VLOG(3) << "cached OpenCL binary version is not same"
" with current version";
return false;
}
auto content = this->cache_storage_->Find(built_program_key);
if (content == nullptr) {
return false;
}
*program = cl::Program(context(), {device()}, {it_binary->second});
*program = cl::Program(context(), {device()}, {*content});
cl_int ret = program->build({device()}, build_options_str.c_str());
if (ret != CL_SUCCESS) {
if (program->getBuildInfo<CL_PROGRAM_BUILD_STATUS>(device()) ==
......@@ -435,25 +504,27 @@ bool OpenCLRuntime::BuildProgramFromBinary(
LOG(INFO) << "Program build log: " << build_log;
}
LOG(WARNING) << "Build program "
<< built_program_key << " from Binary failed:"
<< (ret == CL_INVALID_PROGRAM ? "CL_INVALID_PROGRAM, possible "
"cause 1: the MACE library is built from SoC 1 but is "
"used on different SoC 2, possible cause 2: the MACE "
"buffer is corrupted make sure your code has no "
"out-of-range memory writing" : MakeString(ret));
<< built_program_key << " from Cache failed:"
<< MakeString(ret);
return false;
}
VLOG(3) << "Program from Binary: " << built_program_key;
VLOG(3) << "Program from Cache: " << built_program_key;
return true;
}
bool OpenCLRuntime::BuildProgramFromCache(
bool OpenCLRuntime::BuildProgramFromPrecompiledBinary(
const std::string &built_program_key,
const std::string &build_options_str,
cl::Program *program) {
// Find from binary
if (this->storage_ == nullptr) return false;
auto content = this->storage_->Find(built_program_key);
if (this->precompiled_binary_storage_ == nullptr) return false;
if (precompiled_binary_platform_info_ != platform_info_) {
VLOG(3) << "precompiled OpenCL binary version "
<< precompiled_binary_platform_info_
<< " is not same with current version";
return false;
}
auto content = this->precompiled_binary_storage_->Find(built_program_key);
if (content == nullptr) {
return false;
}
......@@ -468,11 +539,11 @@ bool OpenCLRuntime::BuildProgramFromCache(
LOG(INFO) << "Program build log: " << build_log;
}
LOG(WARNING) << "Build program "
<< built_program_key << " from Cache failed:"
<< built_program_key << " from precompiled binary failed:"
<< MakeString(ret);
return false;
}
VLOG(3) << "Program from Cache: " << built_program_key;
VLOG(3) << "Program from precompiled binary: " << built_program_key;
return true;
}
......@@ -527,8 +598,8 @@ void OpenCLRuntime::BuildProgramFromSource(
reinterpret_cast<unsigned char const *>(program_binaries[0].get()) +
program_binary_sizes[0]);
if (this->storage_ != nullptr) {
this->storage_->Insert(built_program_key, content);
if (this->cache_storage_ != nullptr) {
this->cache_storage_->Insert(built_program_key, content);
}
VLOG(3) << "Program from source: " << built_program_key;
......@@ -543,13 +614,12 @@ void OpenCLRuntime::BuildProgram(const std::string &program_name,
std::string build_options_str =
build_options + " -Werror -cl-mad-enable -cl-fast-relaxed-math";
// TODO(heliangliang) -cl-unsafe-math-optimizations -cl-fast-relaxed-math
bool ret = BuildProgramFromBinary(built_program_key,
// Build flow: cache -> precompiled binary -> source
bool ret = BuildProgramFromCache(built_program_key,
build_options_str, program);
if (!ret) {
ret = BuildProgramFromCache(built_program_key,
ret = BuildProgramFromPrecompiledBinary(built_program_key,
build_options_str, program);
// Fallback to source.
if (!ret) {
BuildProgramFromSource(program_name, built_program_key,
build_options_str, program);
......@@ -581,8 +651,12 @@ cl::Kernel OpenCLRuntime::BuildKernel(
}
void OpenCLRuntime::SaveBuiltCLProgram() {
if (storage_ != nullptr) {
if (storage_->Flush() != 0) {
if (cache_storage_ != nullptr) {
// update platform info
cache_storage_->Insert(kOpenCLPlatformInfoKey,
std::vector<unsigned char>(platform_info_.begin(),
platform_info_.end()));
if (cache_storage_->Flush() != 0) {
LOG(FATAL) << "Store OPENCL compiled kernel to file failed."
" Please Make sure the storage directory exist.";
}
......
......@@ -66,6 +66,7 @@ class OpenCLRuntime {
public:
static OpenCLRuntime *Global();
static void Configure(GPUPerfHint, GPUPriorityHint);
static void ConfigureOpenCLBinaryPath(const std::vector<std::string> &paths);
cl::Context &context();
cl::Device &device();
......@@ -99,11 +100,11 @@ class OpenCLRuntime {
const std::string &binary_file_name,
const std::string &build_options,
cl::Program *program);
bool BuildProgramFromBinary(
bool BuildProgramFromCache(
const std::string &built_program_key,
const std::string &build_options_str,
cl::Program *program);
bool BuildProgramFromCache(
bool BuildProgramFromPrecompiledBinary(
const std::string &built_program_key,
const std::string &build_options_str,
cl::Program *program);
......@@ -115,7 +116,8 @@ class OpenCLRuntime {
const std::string ParseDeviceVersion(const std::string &device_version);
private:
std::unique_ptr<KVStorage> storage_;
std::unique_ptr<KVStorage> precompiled_binary_storage_;
std::unique_ptr<KVStorage> cache_storage_;
bool is_profiling_enabled_;
// All OpenCL object must be a pointer and manually deleted before unloading
// OpenCL library.
......@@ -126,6 +128,8 @@ class OpenCLRuntime {
std::mutex program_build_mutex_;
std::string platform_info_;
std::string opencl_version_;
std::string precompiled_binary_platform_info_;
std::string cached_binary_platform_info_;
bool out_of_range_check_;
uint64_t device_gloabl_mem_cache_size_;
uint32_t device_compute_units_;
......@@ -133,6 +137,7 @@ class OpenCLRuntime {
static GPUPerfHint kGPUPerfHint;
static GPUPriorityHint kGPUPriorityHint;
static std::string kPrecompiledBinaryPath;
};
} // namespace mace
......
......@@ -123,6 +123,9 @@ DEFINE_string(model_data_file,
DEFINE_string(model_file,
"",
"model file name, used when load mace model in pb");
DEFINE_string(opencl_binary_file,
"",
"compiled opencl binary file path");
DEFINE_string(device, "GPU", "CPU/GPU/HEXAGON");
DEFINE_int32(round, 1, "round");
DEFINE_int32(restart_round, 1, "restart round");
......@@ -151,6 +154,10 @@ bool RunModel(const std::vector<std::string> &input_names,
}
#endif // MACE_ENABLE_OPENCL
if (device_type == DeviceType::GPU) {
std::vector<std::string> opencl_binary_paths = {FLAGS_opencl_binary_file};
mace::SetOpenCLBinaryPaths(opencl_binary_paths);
}
// DO NOT USE tmp directory.
// Please use APP's own directory and make sure the directory exists.
// Just call once
......
......@@ -76,9 +76,16 @@ class FileStorageFactory : public KVStorageFactory {
std::unique_ptr<Impl> impl_;
};
// Set KV store factory used as OpenCL cache.
// Set KV store factory used as OpenCL cache. (Call Once)
void SetKVStorageFactory(std::shared_ptr<KVStorageFactory> storage_factory);
// Just call once. (Not thread-safe)
// Set paths of OpenCL Compiled Binary file if you use gpu of specific soc.
// Using OpenCL binary will speed up the initialization.
// OpenCL binary is corresponding to the OpenCL Driver version,
// you should update the binary when OpenCL Driver changed.
void SetOpenCLBinaryPaths(const std::vector<std::string> &paths);
// Set GPU hints, currently only supports Adreno GPU.
//
// Caution: this function may hurt performance if improper parameters provided.
......
# Copyright 2018 Xiaomi, Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os
import sys
import struct
import numpy as np
import jinja2
# python mace/python/tools/opencl_codegen.py \
# --cl_binary_dirs=${CL_BIN_DIR} --output_path=${CL_HEADER_PATH}
FLAGS = None
def generate_cpp_source(cl_binary_dirs,
built_kernel_file_name,
platform_info_file_name):
maps = {}
platform_info = ''
binary_dirs = cl_binary_dirs.strip().split(",")
for binary_dir in binary_dirs:
binary_path = os.path.join(binary_dir, built_kernel_file_name)
if not os.path.exists(binary_path):
continue
print 'generate opencl code from', binary_path
with open(binary_path, "rb") as f:
binary_array = np.fromfile(f, dtype=np.uint8)
idx = 0
size, = struct.unpack("Q", binary_array[idx:idx + 8])
idx += 8
for _ in xrange(size):
key_size, = struct.unpack("i", binary_array[idx:idx + 4])
idx += 4
key, = struct.unpack(
str(key_size) + "s", binary_array[idx:idx + key_size])
idx += key_size
value_size, = struct.unpack("i", binary_array[idx:idx + 4])
idx += 4
maps[key] = []
value = struct.unpack(
str(value_size) + "B", binary_array[idx:idx + value_size])
idx += value_size
for ele in value:
maps[key].append(hex(ele))
cl_platform_info_path = os.path.join(binary_dir,
platform_info_file_name)
with open(cl_platform_info_path, 'r') as f:
curr_platform_info = f.read()
if platform_info != "":
assert (curr_platform_info == platform_info)
platform_info = curr_platform_info
env = jinja2.Environment(loader=jinja2.FileSystemLoader(sys.path[0]))
return env.get_template('opencl_compiled_kernel.cc.jinja2').render(
maps=maps,
data_type='unsigned char',
variable_name='kCompiledProgramMap',
platform_info=platform_info,
)
def opencl_codegen(output_path,
cl_binary_dirs="",
built_kernel_file_name="",
platform_info_file_name=""):
cpp_cl_binary_source = generate_cpp_source(cl_binary_dirs,
built_kernel_file_name,
platform_info_file_name)
if os.path.isfile(output_path):
os.remove(output_path)
with open(output_path, "w") as w_file:
w_file.write(cpp_cl_binary_source)
def parse_args():
"""Parses command line arguments."""
parser = argparse.ArgumentParser()
parser.add_argument(
"--cl_binary_dirs",
type=str,
default="",
help="The cl binaries directories.")
parser.add_argument(
"--built_kernel_file_name",
type=str,
default="",
help="The cl binaries directories.")
parser.add_argument(
"--platform_info_file_name",
type=str,
default="",
help="The cl binaries directories.")
parser.add_argument(
"--output_path",
type=str,
default="./mace/examples/codegen/opencl/opencl_compiled_program.cc",
help="The path of generated C++ header file for cl binaries.")
return parser.parse_known_args()
if __name__ == '__main__':
FLAGS, unparsed = parse_args()
opencl_codegen(FLAGS.output_path,
FLAGS.cl_binary_dirs,
FLAGS.built_kernel_file_name,
FLAGS.platform_info_file_name)
// Copyright 2018 Xiaomi, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// This is a generated file. DO NOT EDIT!
#include <map>
#include <string>
#include <vector>
namespace mace {
extern const std::map<std::string, std::vector<{{data_type}}>> {{variable_name}} =
{
{% for key, value in maps.iteritems() %}
{
"{{key}}",
{
{%- for ele in value -%}
{{ele}},
{%- endfor -%}
}
}, // {{key}}
{% endfor %}
};
extern const std::string kCompiledProgramPlatform = {{platform_info|tojson}};
} // namespace mace
......@@ -13,6 +13,5 @@ cc_binary(
"//external:gflags_nothreads",
"//mace/codegen:generated_mace_engine_factory",
"//mace/codegen:generated_models",
"//mace/core",
],
)
......@@ -38,9 +38,6 @@
#include "mace/utils/logging.h"
#include "mace/utils/utils.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/opencl_runtime.h"
#endif // MACE_ENABLE_OPENCL
#include "mace/codegen/engine/mace_engine_factory.h"
namespace mace {
......@@ -100,22 +97,6 @@ DeviceType ParseDeviceType(const std::string &device_str) {
}
}
#ifdef MACE_ENABLE_OPENCL
void WriteOpenCLPlatformInfo(const std::string &output_dir) {
std::string platform_info = OpenCLRuntime::Global()->platform_info();
const std::string cl_platform_info_file_name = output_dir
+ "/mace_cl_platform_info.txt";
std::ofstream ofs(cl_platform_info_file_name);
if (ofs.is_open()) {
ofs << platform_info;
ofs.close();
} else {
LOG(WARNING) << "Write opencl platform info failed.";
}
}
#endif // MACE_ENABLE_OPENCL
struct mallinfo LogMallinfoChange(struct mallinfo prev) {
struct mallinfo curr = mallinfo();
if (prev.arena != curr.arena) {
......@@ -187,6 +168,9 @@ DEFINE_string(input_file,
DEFINE_string(output_file,
"",
"output file name | output file prefix for multiple outputs");
DEFINE_string(opencl_binary_file,
"",
"compiled opencl binary file path");
DEFINE_string(model_data_file,
"",
"model data file name, used when EMBED_MODEL_DATA set to 0 or 2");
......@@ -230,6 +214,11 @@ bool RunModel(const std::string &model_name,
new FileStorageFactory(kernel_file_path));
SetKVStorageFactory(storage_factory);
if (device_type == DeviceType::GPU) {
std::vector<std::string> opencl_binary_paths = {FLAGS_opencl_binary_file};
mace::SetOpenCLBinaryPaths(opencl_binary_paths);
}
std::vector<unsigned char> model_pb_data;
if (FLAGS_model_file != "") {
if (!mace::ReadBinaryFile(&model_pb_data, FLAGS_model_file)) {
......@@ -397,11 +386,6 @@ bool RunModel(const std::string &model_name,
printf("time %11.3f %11.3f %11.3f\n",
init_millis, warmup_millis, model_run_millis);
#ifdef MACE_ENABLE_OPENCL
if (device_type == DeviceType::GPU) {
WriteOpenCLPlatformInfo(kernel_file_path);
}
#endif // MACE_ENABLE_OPENCL
for (size_t i = 0; i < output_count; ++i) {
std::string output_name =
......
......@@ -132,7 +132,6 @@ def main(unused_args):
# generate sources
sh_commands.gen_encrypted_opencl_source()
sh_commands.gen_compiled_opencl_source()
sh_commands.gen_mace_version()
sh_commands.gen_tuning_param_code([])
......
......@@ -108,6 +108,15 @@ class StringFormatter:
return star_line + str(message).center(line_length) + '\n' + star_line
################################
# definitions
################################
class DeviceType(object):
CPU = 'CPU'
GPU = 'GPU'
HEXAGON = 'HEXAGON'
################################
# Argument types
################################
......
......@@ -29,6 +29,7 @@ import sh_commands
from sh_commands import BuildType
from common import CaffeEnvType
from common import DeviceType
from common import mace_check
from common import MaceLogger
from common import StringFormatter
......@@ -37,13 +38,14 @@ from common import StringFormatter
# common definitions
################################
BUILD_OUTPUT_DIR = 'build'
PHONE_DATA_DIR = "/data/local/tmp/mace_run/"
PHONE_DATA_DIR = "/data/local/tmp/mace_run"
MODEL_OUTPUT_DIR_NAME = 'model'
BUILD_TMP_DIR_NAME = '_tmp'
BUILD_TMP_GENERAL_OUTPUT_DIR_NAME = 'general'
OUTPUT_LIBRARY_DIR_NAME = 'library'
CL_BUILT_KERNEL_FILE_NAME = "mace_cl_compiled_program.bin"
CL_PLATFORM_INFO_FILE_NAME = "mace_cl_platform_info.txt"
OUTPUT_OPENCL_BINARY_DIR_NAME = 'opencl'
OUTPUT_OPENCL_BINARY_FILE_NAME = 'compiled_opencl_kernel.bin'
CL_COMPILED_BINARY_FILE_NAME = "mace_cl_compiled_program.bin"
CODEGEN_BASE_DIR = 'mace/codegen'
MODEL_CODEGEN_DIR = CODEGEN_BASE_DIR + '/models'
MACE_RUN_TARGET = "//mace/tools/validation:mace_run"
......@@ -176,11 +178,11 @@ def parse_device_type(runtime):
device_type = ""
if runtime == RuntimeType.dsp:
device_type = "HEXAGON"
device_type = DeviceType.HEXAGON
elif runtime == RuntimeType.gpu:
device_type = "GPU"
device_type = DeviceType.GPU
elif runtime == RuntimeType.cpu:
device_type = "CPU"
device_type = DeviceType.CPU
return device_type
......@@ -433,6 +435,13 @@ def get_build_model_dirs(library_name, model_name, target_abi, target_soc,
return model_output_base_dir, model_output_dir, mace_model_dir
def get_opencl_binary_output_path(library_name):
return '%s/%s/%s/%s' % (BUILD_OUTPUT_DIR,
library_name,
OUTPUT_OPENCL_BINARY_DIR_NAME,
OUTPUT_OPENCL_BINARY_FILE_NAME)
################################
# build
################################
......@@ -440,17 +449,7 @@ def pull_opencl_binary_and_tuning_param(target_abi,
serialno,
model_output_dirs):
sh_commands.pull_binaries(target_abi, serialno, model_output_dirs,
CL_BUILT_KERNEL_FILE_NAME,
CL_PLATFORM_INFO_FILE_NAME)
def gen_opencl_and_tuning_code(model_output_dirs):
# generate opencl binary code
sh_commands.gen_opencl_binary_code(model_output_dirs,
CL_BUILT_KERNEL_FILE_NAME,
CL_PLATFORM_INFO_FILE_NAME)
sh_commands.gen_tuning_param_code(model_output_dirs)
CL_COMPILED_BINARY_FILE_NAME)
def print_configuration(flags, configs):
......@@ -612,7 +611,7 @@ def build_specific_lib(target_abi, target_soc, serial_num,
sh.rm("-rf", build_tmp_binary_dir)
os.makedirs(build_tmp_binary_dir)
gen_opencl_and_tuning_code([])
sh_commands.gen_tuning_param_code(model_output_dirs)
sh_commands.bazel_build(
MACE_RUN_TARGET,
abi=target_abi,
......@@ -639,7 +638,7 @@ def build_specific_lib(target_abi, target_soc, serial_num,
os.makedirs(model_output_dir)
# build for specified soc
if not address_sanitizer and tuning and target_abi != ABIType.host \
if not address_sanitizer and target_abi != ABIType.host \
and target_soc is not None and \
model_runtime in [RuntimeType.gpu, RuntimeType.cpu_gpu]:
sh_commands.clear_phone_data_dir(serial_num, PHONE_DATA_DIR)
......@@ -674,7 +673,8 @@ def build_specific_lib(target_abi, target_soc, serial_num,
tuning=tuning,
out_of_range_check=False,
phone_data_dir=PHONE_DATA_DIR,
build_type=build_type
build_type=build_type,
opencl_binary_file="",
)
pull_opencl_binary_and_tuning_param(target_abi, serial_num,
......@@ -683,7 +683,10 @@ def build_specific_lib(target_abi, target_soc, serial_num,
binary_changed = True
if binary_changed:
gen_opencl_and_tuning_code(model_output_dirs)
sh_commands.merge_opencl_binaries(
model_output_dirs, CL_COMPILED_BINARY_FILE_NAME,
get_opencl_binary_output_path(library_name))
sh_commands.gen_tuning_param_code(model_output_dirs)
sh_commands.bazel_build(
MACE_RUN_TARGET,
abi=target_abi,
......@@ -919,6 +922,7 @@ def run_specific_target(flags, configs, target_abi,
gpu_priority_hint=flags.gpu_priority_hint,
runtime_failure_ratio=flags.runtime_failure_ratio,
address_sanitizer=flags.address_sanitizer,
opencl_binary_file=get_opencl_binary_output_path(library_name),
)
if flags.validate:
model_file_path, weight_file_path = get_model_files_path(
......@@ -1051,7 +1055,8 @@ def bm_specific_target(flags, configs, target_abi, target_soc, serial_num):
omp_num_threads=flags.omp_num_threads,
cpu_affinity_policy=flags.cpu_affinity_policy,
gpu_perf_hint=flags.gpu_perf_hint,
gpu_priority_hint=flags.gpu_priority_hint)
gpu_priority_hint=flags.gpu_priority_hint,
opencl_binary_file=get_opencl_binary_output_path(library_name))
def benchmark_model(flags):
......
......@@ -16,9 +16,11 @@ import falcon_cli
import filelock
import glob
import logging
import numpy as np
import os
import re
import sh
import struct
import subprocess
import sys
import time
......@@ -30,7 +32,6 @@ import common
sys.path.insert(0, "mace/python/tools")
try:
from encrypt_opencl_codegen import encrypt_opencl_codegen
from opencl_codegen import opencl_codegen
from binary_codegen import tuning_param_codegen
from generate_data import generate_input_data
from validate import validate
......@@ -362,8 +363,7 @@ def gen_mace_engine_factory_source(model_tags,
def pull_binaries(abi, serialno, model_output_dirs,
cl_built_kernel_file_name,
cl_platform_info_file_name):
cl_built_kernel_file_name):
compiled_opencl_dir = "/data/local/tmp/mace_run/interior/"
mace_run_param_file = "mace_run.config"
......@@ -379,26 +379,66 @@ def pull_binaries(abi, serialno, model_output_dirs,
if abi != "host":
adb_pull(compiled_opencl_dir + cl_built_kernel_file_name,
cl_bin_dir, serialno)
adb_pull(compiled_opencl_dir + cl_platform_info_file_name,
cl_bin_dir, serialno)
adb_pull("/data/local/tmp/mace_run/%s" % mace_run_param_file,
cl_bin_dir, serialno)
def gen_opencl_binary_code(model_output_dirs,
cl_built_kernel_file_name,
cl_platform_info_file_name,
codegen_path="mace/codegen"):
opencl_codegen_file = "%s/opencl/opencl_compiled_program.cc" % codegen_path
def merge_opencl_binaries(binaries_dirs,
cl_compiled_program_file_name,
output_file_path):
platform_info_key = 'mace_opencl_precompiled_platform_info_key'
cl_bin_dirs = []
for d in model_output_dirs:
for d in binaries_dirs:
cl_bin_dirs.append(os.path.join(d, "opencl_bin"))
cl_bin_dirs_str = ",".join(cl_bin_dirs)
opencl_codegen(opencl_codegen_file,
cl_bin_dirs_str,
cl_built_kernel_file_name,
cl_platform_info_file_name)
# create opencl binary output dir
opencl_binary_dir = os.path.dirname(output_file_path)
if os.path.exists(opencl_binary_dir):
sh.rm("-rf", opencl_binary_dir)
sh.mkdir("-p", opencl_binary_dir)
kvs = {}
for binary_dir in cl_bin_dirs:
binary_path = os.path.join(binary_dir, cl_compiled_program_file_name)
if not os.path.exists(binary_path):
continue
print 'generate opencl code from', binary_path
with open(binary_path, "rb") as f:
binary_array = np.fromfile(f, dtype=np.uint8)
idx = 0
size, = struct.unpack("Q", binary_array[idx:idx + 8])
idx += 8
for _ in xrange(size):
key_size, = struct.unpack("i", binary_array[idx:idx + 4])
idx += 4
key, = struct.unpack(
str(key_size) + "s", binary_array[idx:idx + key_size])
idx += key_size
value_size, = struct.unpack("i", binary_array[idx:idx + 4])
idx += 4
if key == platform_info_key and key in kvs:
common.mace_check(
(kvs[key] == binary_array[idx:idx + value_size]).all(),
"",
"There exists more than one OpenCL version for models:"
" %s vs %s " %
(kvs[key], binary_array[idx:idx + value_size]))
else:
kvs[key] = binary_array[idx:idx + value_size]
idx += value_size
output_byte_array = bytearray()
data_size = len(kvs)
output_byte_array.extend(struct.pack("Q", data_size))
for key, value in kvs.iteritems():
key_size = len(key)
output_byte_array.extend(struct.pack("i", key_size))
output_byte_array.extend(struct.pack(str(key_size) + "s", key))
value_size = len(value)
output_byte_array.extend(struct.pack("i", value_size))
output_byte_array.extend(value)
np.array(output_byte_array).tofile(output_file_path)
def gen_tuning_param_code(model_output_dirs,
......@@ -426,12 +466,6 @@ def gen_mace_version(codegen_path="mace/codegen"):
"%s/version/version.cc" % codegen_path)
def gen_compiled_opencl_source(codegen_path="mace/codegen"):
opencl_codegen_file = "%s/opencl/opencl_compiled_program.cc" % codegen_path
sh.mkdir("-p", "%s/opencl" % codegen_path)
opencl_codegen(opencl_codegen_file)
def gen_model_code(model_codegen_dir,
platform,
model_file_path,
......@@ -576,6 +610,7 @@ def tuning_run(abi,
out_of_range_check,
phone_data_dir,
build_type,
opencl_binary_file,
omp_num_threads=-1,
cpu_affinity_policy=1,
gpu_perf_hint=3,
......@@ -641,6 +676,10 @@ def tuning_run(abi,
adb_push("%s/%s.data" % (mace_model_dir, model_tag),
phone_data_dir, serialno)
if device_type == common.DeviceType.GPU\
and os.path.exists(opencl_binary_file):
adb_push(opencl_binary_file, phone_data_dir, serialno)
adb_push("third_party/nnlib/libhexagon_controller.so",
phone_data_dir, serialno)
......@@ -689,6 +728,8 @@ def tuning_run(abi,
"--gpu_perf_hint=%s" % gpu_perf_hint,
"--gpu_priority_hint=%s" % gpu_priority_hint,
"--model_file=%s" % mace_model_phone_path,
"--opencl_binary_file=%s/%s" %
(phone_data_dir, os.path.basename(opencl_binary_file)),
])
adb_cmd = ' '.join(adb_cmd)
sh.adb(
......@@ -1005,6 +1046,7 @@ def benchmark_model(abi,
device_type,
phone_data_dir,
build_type,
opencl_binary_file,
omp_num_threads=-1,
cpu_affinity_policy=1,
gpu_perf_hint=3,
......@@ -1049,6 +1091,9 @@ def benchmark_model(abi,
if not embed_model_data:
adb_push("%s/%s.data" % (mace_model_dir, model_tag),
phone_data_dir, serialno)
if device_type == common.DeviceType.GPU \
and os.path.exists(opencl_binary_file):
adb_push(opencl_binary_file, phone_data_dir, serialno)
mace_model_phone_path = ""
if build_type == BuildType.proto:
mace_model_phone_path = "%s/%s.pb" % (phone_data_dir, model_tag)
......@@ -1082,6 +1127,8 @@ def benchmark_model(abi,
"--gpu_perf_hint=%s" % gpu_perf_hint,
"--gpu_priority_hint=%s" % gpu_priority_hint,
"--model_file=%s" % mace_model_phone_path,
"--opencl_binary_file=%s/%s" %
(phone_data_dir, os.path.basename(opencl_binary_file)),
_fg=True)
print("Benchmark done!\n")
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册