提交 39fb3055 编写于 作者: L liuqi

Refactor opencl binary load logic: use file instead of code.

上级 d4124708
...@@ -365,6 +365,16 @@ The followings list the details. ...@@ -365,6 +365,16 @@ The followings list the details.
``.pb`` file will be generated only when build_type is ``proto``. ``.pb`` file will be generated only when build_type is ``proto``.
**OpenCL compiled kernel binary file**
* ``opencl/compiled_kernel.bin``
.. note::
This file will be generated only when specify ``target_soc`` and runtime is ``gpu``.
.. warning::
This file rely on the OpenCL driver on the phone, you should update the file when OpenCL driver changed.
============= =============
5. how to use 5. how to use
...@@ -385,14 +395,21 @@ Please refer to \ ``mace/examples/example.cc``\ for full usage. the following li ...@@ -385,14 +395,21 @@ Please refer to \ ``mace/examples/example.cc``\ for full usage. the following li
new FileStorageFactory(file_path)); new FileStorageFactory(file_path));
ConfigKVStorageFactory(storage_factory); ConfigKVStorageFactory(storage_factory);
//1. Declare the device type(must be same with ``runtime`` in configuration file) // 1. set precompiled OpenCL binary file paths if you use gpu of specified SOC,
// Besides the binary rely on the OpenCL driver of the SOC,
// if OpenCL driver changed, you should recompiled the binary file.
if (device_type == DeviceType::GPU) {
mace::SetOpenCLBinaryPaths(opencl_binary_paths);
}
// 2. Declare the device type(must be same with ``runtime`` in configuration file)
DeviceType device_type = DeviceType::GPU; DeviceType device_type = DeviceType::GPU;
//2. Define the input and output tensor names. // 3. Define the input and output tensor names.
std::vector<std::string> input_names = {...}; std::vector<std::string> input_names = {...};
std::vector<std::string> output_names = {...}; std::vector<std::string> output_names = {...};
//3. Create MaceEngine object // 4. Create MaceEngine object
std::shared_ptr<mace::MaceEngine> engine; std::shared_ptr<mace::MaceEngine> engine;
MaceStatus create_engine_status; MaceStatus create_engine_status;
// Create Engine from code // Create Engine from code
...@@ -415,7 +432,7 @@ Please refer to \ ``mace/examples/example.cc``\ for full usage. the following li ...@@ -415,7 +432,7 @@ Please refer to \ ``mace/examples/example.cc``\ for full usage. the following li
// do something // do something
} }
//4. Create Input and Output objects // 5. Create Input and Output objects
std::map<std::string, mace::MaceTensor> inputs; std::map<std::string, mace::MaceTensor> inputs;
std::map<std::string, mace::MaceTensor> outputs; std::map<std::string, mace::MaceTensor> outputs;
for (size_t i = 0; i < input_count; ++i) { for (size_t i = 0; i < input_count; ++i) {
...@@ -440,6 +457,6 @@ Please refer to \ ``mace/examples/example.cc``\ for full usage. the following li ...@@ -440,6 +457,6 @@ Please refer to \ ``mace/examples/example.cc``\ for full usage. the following li
outputs[output_names[i]] = mace::MaceTensor(output_shapes[i], buffer_out); outputs[output_names[i]] = mace::MaceTensor(output_shapes[i], buffer_out);
} }
//5. Run the model // 6. Run the model
MaceStatus status = engine.Run(inputs, &outputs); MaceStatus status = engine.Run(inputs, &outputs);
...@@ -390,6 +390,13 @@ Mace目前只提供静态库,有以下两种使用场景。 ...@@ -390,6 +390,13 @@ Mace目前只提供静态库,有以下两种使用场景。
new FileStorageFactory(file_path)); new FileStorageFactory(file_path));
ConfigKVStorageFactory(storage_factory); ConfigKVStorageFactory(storage_factory);
// 2. 如果你使用特定SOC的GPU,可以设置OpenCL预编译的二进制文件路径。
// * 该二进制文件是依赖于手机上OpenCL driver的,如果OpenCL driver改变了,
// 你需要重新编译并更新该二进制文件。
if (device_type == DeviceType::GPU) {
mace::SetOpenCLBinaryPaths(opencl_binary_paths);
}
//1. 声明设备类型(必须与build时指定的runtime一致) //1. 声明设备类型(必须与build时指定的runtime一致)
DeviceType device_type = DeviceType::GPU; DeviceType device_type = DeviceType::GPU;
......
...@@ -188,6 +188,9 @@ DEFINE_string(input_file, "", "input file name"); ...@@ -188,6 +188,9 @@ DEFINE_string(input_file, "", "input file name");
DEFINE_int32(max_num_runs, 100, "number of runs max"); DEFINE_int32(max_num_runs, 100, "number of runs max");
DEFINE_string(max_time, "10.0", "length to run max"); DEFINE_string(max_time, "10.0", "length to run max");
DEFINE_int32(warmup_runs, 1, "how many runs to initialize model"); DEFINE_int32(warmup_runs, 1, "how many runs to initialize model");
DEFINE_string(opencl_binary_file,
"",
"compiled opencl binary file path");
DEFINE_string(model_data_file, "", DEFINE_string(model_data_file, "",
"model data file name, used when EMBED_MODEL_DATA set to 0"); "model data file name, used when EMBED_MODEL_DATA set to 0");
DEFINE_string(model_file, "", DEFINE_string(model_file, "",
...@@ -270,6 +273,11 @@ int Main(int argc, char **argv) { ...@@ -270,6 +273,11 @@ int Main(int argc, char **argv) {
new FileStorageFactory(kernel_file_path)); new FileStorageFactory(kernel_file_path));
SetKVStorageFactory(storage_factory); SetKVStorageFactory(storage_factory);
if (device_type == DeviceType::GPU) {
std::vector<std::string> opencl_binary_paths = {FLAGS_opencl_binary_file};
mace::SetOpenCLBinaryPaths(opencl_binary_paths);
}
// Create Engine // Create Engine
std::shared_ptr<mace::MaceEngine> engine; std::shared_ptr<mace::MaceEngine> engine;
MaceStatus create_engine_status; MaceStatus create_engine_status;
......
...@@ -14,6 +14,8 @@ ...@@ -14,6 +14,8 @@
#include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/core/runtime/opencl/opencl_runtime.h"
#include <sys/stat.h>
#include <cstdlib> #include <cstdlib>
#include <fstream> #include <fstream>
#include <memory> #include <memory>
...@@ -31,9 +33,6 @@ ...@@ -31,9 +33,6 @@
namespace mace { namespace mace {
extern const std::map<std::string, std::vector<unsigned char>>
kCompiledProgramMap;
extern const std::string kCompiledProgramPlatform;
extern const std::map<std::string, std::vector<unsigned char>> extern const std::map<std::string, std::vector<unsigned char>>
kEncryptedProgramMap; kEncryptedProgramMap;
...@@ -43,6 +42,12 @@ void SetGPUHints(GPUPerfHint gpu_perf_hint, GPUPriorityHint gpu_priority_hint) { ...@@ -43,6 +42,12 @@ void SetGPUHints(GPUPerfHint gpu_perf_hint, GPUPriorityHint gpu_priority_hint) {
OpenCLRuntime::Configure(gpu_perf_hint, gpu_priority_hint); OpenCLRuntime::Configure(gpu_perf_hint, gpu_priority_hint);
} }
// Set OpenCL Compiled Binary paths, just call once. (Not thread-safe)
void SetOpenCLBinaryPaths(const std::vector<std::string> &paths) {
OpenCLRuntime::ConfigureOpenCLBinaryPath(paths);
}
const std::string OpenCLErrorToString(cl_int error) { const std::string OpenCLErrorToString(cl_int error) {
switch (error) { switch (error) {
case CL_SUCCESS: case CL_SUCCESS:
...@@ -237,6 +242,25 @@ GPUType ParseGPUType(const std::string &device_name) { ...@@ -237,6 +242,25 @@ GPUType ParseGPUType(const std::string &device_name) {
return GPUType::UNKNOWN; return GPUType::UNKNOWN;
} }
} }
std::string FindFirstExistPath(const std::vector<std::string> &paths) {
std::string result;
struct stat st;
for (auto path : paths) {
if (stat(path.c_str(), &st) == 0) {
if (S_ISREG(st.st_mode)) {
result = path;
break;
}
}
}
return result;
}
const char *kOpenCLPlatformInfoKey =
"mace_opencl_precompiled_platform_info_key";
const char *kPrecompiledProgramFileName =
"mace_cl_compiled_program.bin";
} // namespace } // namespace
void OpenCLProfilingTimer::StartTiming() {} void OpenCLProfilingTimer::StartTiming() {}
...@@ -267,6 +291,8 @@ void OpenCLProfilingTimer::ClearTiming() { ...@@ -267,6 +291,8 @@ void OpenCLProfilingTimer::ClearTiming() {
GPUPerfHint OpenCLRuntime::kGPUPerfHint = GPUPerfHint::PERF_NORMAL; GPUPerfHint OpenCLRuntime::kGPUPerfHint = GPUPerfHint::PERF_NORMAL;
GPUPriorityHint OpenCLRuntime::kGPUPriorityHint = GPUPriorityHint OpenCLRuntime::kGPUPriorityHint =
GPUPriorityHint::PRIORITY_DEFAULT; GPUPriorityHint::PRIORITY_DEFAULT;
std::string
OpenCLRuntime::kPrecompiledBinaryPath = ""; // NOLINT(runtime/string)
OpenCLRuntime *OpenCLRuntime::Global() { OpenCLRuntime *OpenCLRuntime::Global() {
static OpenCLRuntime runtime; static OpenCLRuntime runtime;
...@@ -279,9 +305,19 @@ void OpenCLRuntime::Configure(GPUPerfHint gpu_perf_hint, ...@@ -279,9 +305,19 @@ void OpenCLRuntime::Configure(GPUPerfHint gpu_perf_hint,
OpenCLRuntime::kGPUPriorityHint = gpu_priority_hint; OpenCLRuntime::kGPUPriorityHint = gpu_priority_hint;
} }
void OpenCLRuntime::ConfigureOpenCLBinaryPath(
const std::vector<std::string> &paths) {
OpenCLRuntime::kPrecompiledBinaryPath = FindFirstExistPath(paths);
if (OpenCLRuntime::kPrecompiledBinaryPath.empty()) {
LOG(WARNING) << "There is no precompiled OpenCL binary file in "
<< MakeString(paths);
}
}
OpenCLRuntime::OpenCLRuntime(): OpenCLRuntime::OpenCLRuntime():
storage_(nullptr), is_profiling_enabled_(false) { precompiled_binary_storage_(nullptr),
cache_storage_(nullptr),
is_profiling_enabled_(false) {
LoadOpenCLLibrary(); LoadOpenCLLibrary();
std::vector<cl::Platform> all_platforms; std::vector<cl::Platform> all_platforms;
...@@ -369,12 +405,38 @@ OpenCLRuntime::OpenCLRuntime(): ...@@ -369,12 +405,38 @@ OpenCLRuntime::OpenCLRuntime():
extern std::shared_ptr<KVStorageFactory> kStorageFactory; extern std::shared_ptr<KVStorageFactory> kStorageFactory;
if (kStorageFactory != nullptr) { if (kStorageFactory != nullptr) {
const std::string cl_compiled_file_name = "mace_cl_compiled_program.bin"; cache_storage_ =
storage_ = kStorageFactory->CreateStorage(cl_compiled_file_name); kStorageFactory->CreateStorage(kPrecompiledProgramFileName);
if (platform_info_ != kCompiledProgramPlatform) { if (cache_storage_->Load() != 0) {
if (storage_->Load() != 0) { LOG(FATAL) << "Load OpenCL cached compiled kernel file failed";
LOG(FATAL) << "Load opencl compiled kernel file failed"; }
auto platform_info_array =
this->cache_storage_->Find(kOpenCLPlatformInfoKey);
if (platform_info_array != nullptr) {
cached_binary_platform_info_ =
std::string(platform_info_array->begin(),
platform_info_array->end());
}
}
if (cached_binary_platform_info_ != platform_info_) {
if (OpenCLRuntime::kPrecompiledBinaryPath.empty()) {
LOG(WARNING) << "There is no precompiled OpenCL binary in"
" all OpenCL binary paths";
} else {
precompiled_binary_storage_.reset(
new FileStorage(OpenCLRuntime::kPrecompiledBinaryPath));
if (precompiled_binary_storage_->Load() != 0) {
LOG(FATAL) << "Load OpenCL precompiled kernel file failed";
}
auto platform_info_array =
this->precompiled_binary_storage_->Find(kOpenCLPlatformInfoKey);
if (platform_info_array != nullptr) {
precompiled_binary_platform_info_ =
std::string(platform_info_array->begin(),
platform_info_array->end());
} }
} }
} }
...@@ -416,16 +478,23 @@ uint32_t OpenCLRuntime::device_compute_units() const { ...@@ -416,16 +478,23 @@ uint32_t OpenCLRuntime::device_compute_units() const {
return device_compute_units_; return device_compute_units_;
} }
bool OpenCLRuntime::BuildProgramFromBinary( bool OpenCLRuntime::BuildProgramFromCache(
const std::string &built_program_key, const std::string &built_program_key,
const std::string &build_options_str, const std::string &build_options_str,
cl::Program *program) { cl::Program *program) {
// Find from binary // Find from binary
if (kCompiledProgramPlatform != platform_info_) return false; if (this->cache_storage_ == nullptr) return false;
auto it_binary = kCompiledProgramMap.find(built_program_key); if (cached_binary_platform_info_ != platform_info_) {
if (it_binary == kCompiledProgramMap.end()) return false; VLOG(3) << "cached OpenCL binary version is not same"
" with current version";
return false;
}
auto content = this->cache_storage_->Find(built_program_key);
if (content == nullptr) {
return false;
}
*program = cl::Program(context(), {device()}, {it_binary->second}); *program = cl::Program(context(), {device()}, {*content});
cl_int ret = program->build({device()}, build_options_str.c_str()); cl_int ret = program->build({device()}, build_options_str.c_str());
if (ret != CL_SUCCESS) { if (ret != CL_SUCCESS) {
if (program->getBuildInfo<CL_PROGRAM_BUILD_STATUS>(device()) == if (program->getBuildInfo<CL_PROGRAM_BUILD_STATUS>(device()) ==
...@@ -435,25 +504,27 @@ bool OpenCLRuntime::BuildProgramFromBinary( ...@@ -435,25 +504,27 @@ bool OpenCLRuntime::BuildProgramFromBinary(
LOG(INFO) << "Program build log: " << build_log; LOG(INFO) << "Program build log: " << build_log;
} }
LOG(WARNING) << "Build program " LOG(WARNING) << "Build program "
<< built_program_key << " from Binary failed:" << built_program_key << " from Cache failed:"
<< (ret == CL_INVALID_PROGRAM ? "CL_INVALID_PROGRAM, possible " << MakeString(ret);
"cause 1: the MACE library is built from SoC 1 but is "
"used on different SoC 2, possible cause 2: the MACE "
"buffer is corrupted make sure your code has no "
"out-of-range memory writing" : MakeString(ret));
return false; return false;
} }
VLOG(3) << "Program from Binary: " << built_program_key; VLOG(3) << "Program from Cache: " << built_program_key;
return true; return true;
} }
bool OpenCLRuntime::BuildProgramFromCache( bool OpenCLRuntime::BuildProgramFromPrecompiledBinary(
const std::string &built_program_key, const std::string &built_program_key,
const std::string &build_options_str, const std::string &build_options_str,
cl::Program *program) { cl::Program *program) {
// Find from binary // Find from binary
if (this->storage_ == nullptr) return false; if (this->precompiled_binary_storage_ == nullptr) return false;
auto content = this->storage_->Find(built_program_key); if (precompiled_binary_platform_info_ != platform_info_) {
VLOG(3) << "precompiled OpenCL binary version "
<< precompiled_binary_platform_info_
<< " is not same with current version";
return false;
}
auto content = this->precompiled_binary_storage_->Find(built_program_key);
if (content == nullptr) { if (content == nullptr) {
return false; return false;
} }
...@@ -468,11 +539,11 @@ bool OpenCLRuntime::BuildProgramFromCache( ...@@ -468,11 +539,11 @@ bool OpenCLRuntime::BuildProgramFromCache(
LOG(INFO) << "Program build log: " << build_log; LOG(INFO) << "Program build log: " << build_log;
} }
LOG(WARNING) << "Build program " LOG(WARNING) << "Build program "
<< built_program_key << " from Cache failed:" << built_program_key << " from precompiled binary failed:"
<< MakeString(ret); << MakeString(ret);
return false; return false;
} }
VLOG(3) << "Program from Cache: " << built_program_key; VLOG(3) << "Program from precompiled binary: " << built_program_key;
return true; return true;
} }
...@@ -527,8 +598,8 @@ void OpenCLRuntime::BuildProgramFromSource( ...@@ -527,8 +598,8 @@ void OpenCLRuntime::BuildProgramFromSource(
reinterpret_cast<unsigned char const *>(program_binaries[0].get()) + reinterpret_cast<unsigned char const *>(program_binaries[0].get()) +
program_binary_sizes[0]); program_binary_sizes[0]);
if (this->storage_ != nullptr) { if (this->cache_storage_ != nullptr) {
this->storage_->Insert(built_program_key, content); this->cache_storage_->Insert(built_program_key, content);
} }
VLOG(3) << "Program from source: " << built_program_key; VLOG(3) << "Program from source: " << built_program_key;
...@@ -543,13 +614,12 @@ void OpenCLRuntime::BuildProgram(const std::string &program_name, ...@@ -543,13 +614,12 @@ void OpenCLRuntime::BuildProgram(const std::string &program_name,
std::string build_options_str = std::string build_options_str =
build_options + " -Werror -cl-mad-enable -cl-fast-relaxed-math"; build_options + " -Werror -cl-mad-enable -cl-fast-relaxed-math";
// TODO(heliangliang) -cl-unsafe-math-optimizations -cl-fast-relaxed-math // Build flow: cache -> precompiled binary -> source
bool ret = BuildProgramFromBinary(built_program_key, bool ret = BuildProgramFromCache(built_program_key,
build_options_str, program); build_options_str, program);
if (!ret) { if (!ret) {
ret = BuildProgramFromCache(built_program_key, ret = BuildProgramFromPrecompiledBinary(built_program_key,
build_options_str, program); build_options_str, program);
// Fallback to source.
if (!ret) { if (!ret) {
BuildProgramFromSource(program_name, built_program_key, BuildProgramFromSource(program_name, built_program_key,
build_options_str, program); build_options_str, program);
...@@ -581,8 +651,12 @@ cl::Kernel OpenCLRuntime::BuildKernel( ...@@ -581,8 +651,12 @@ cl::Kernel OpenCLRuntime::BuildKernel(
} }
void OpenCLRuntime::SaveBuiltCLProgram() { void OpenCLRuntime::SaveBuiltCLProgram() {
if (storage_ != nullptr) { if (cache_storage_ != nullptr) {
if (storage_->Flush() != 0) { // update platform info
cache_storage_->Insert(kOpenCLPlatformInfoKey,
std::vector<unsigned char>(platform_info_.begin(),
platform_info_.end()));
if (cache_storage_->Flush() != 0) {
LOG(FATAL) << "Store OPENCL compiled kernel to file failed." LOG(FATAL) << "Store OPENCL compiled kernel to file failed."
" Please Make sure the storage directory exist."; " Please Make sure the storage directory exist.";
} }
......
...@@ -66,6 +66,7 @@ class OpenCLRuntime { ...@@ -66,6 +66,7 @@ class OpenCLRuntime {
public: public:
static OpenCLRuntime *Global(); static OpenCLRuntime *Global();
static void Configure(GPUPerfHint, GPUPriorityHint); static void Configure(GPUPerfHint, GPUPriorityHint);
static void ConfigureOpenCLBinaryPath(const std::vector<std::string> &paths);
cl::Context &context(); cl::Context &context();
cl::Device &device(); cl::Device &device();
...@@ -99,11 +100,11 @@ class OpenCLRuntime { ...@@ -99,11 +100,11 @@ class OpenCLRuntime {
const std::string &binary_file_name, const std::string &binary_file_name,
const std::string &build_options, const std::string &build_options,
cl::Program *program); cl::Program *program);
bool BuildProgramFromBinary( bool BuildProgramFromCache(
const std::string &built_program_key, const std::string &built_program_key,
const std::string &build_options_str, const std::string &build_options_str,
cl::Program *program); cl::Program *program);
bool BuildProgramFromCache( bool BuildProgramFromPrecompiledBinary(
const std::string &built_program_key, const std::string &built_program_key,
const std::string &build_options_str, const std::string &build_options_str,
cl::Program *program); cl::Program *program);
...@@ -115,7 +116,8 @@ class OpenCLRuntime { ...@@ -115,7 +116,8 @@ class OpenCLRuntime {
const std::string ParseDeviceVersion(const std::string &device_version); const std::string ParseDeviceVersion(const std::string &device_version);
private: private:
std::unique_ptr<KVStorage> storage_; std::unique_ptr<KVStorage> precompiled_binary_storage_;
std::unique_ptr<KVStorage> cache_storage_;
bool is_profiling_enabled_; bool is_profiling_enabled_;
// All OpenCL object must be a pointer and manually deleted before unloading // All OpenCL object must be a pointer and manually deleted before unloading
// OpenCL library. // OpenCL library.
...@@ -126,6 +128,8 @@ class OpenCLRuntime { ...@@ -126,6 +128,8 @@ class OpenCLRuntime {
std::mutex program_build_mutex_; std::mutex program_build_mutex_;
std::string platform_info_; std::string platform_info_;
std::string opencl_version_; std::string opencl_version_;
std::string precompiled_binary_platform_info_;
std::string cached_binary_platform_info_;
bool out_of_range_check_; bool out_of_range_check_;
uint64_t device_gloabl_mem_cache_size_; uint64_t device_gloabl_mem_cache_size_;
uint32_t device_compute_units_; uint32_t device_compute_units_;
...@@ -133,6 +137,7 @@ class OpenCLRuntime { ...@@ -133,6 +137,7 @@ class OpenCLRuntime {
static GPUPerfHint kGPUPerfHint; static GPUPerfHint kGPUPerfHint;
static GPUPriorityHint kGPUPriorityHint; static GPUPriorityHint kGPUPriorityHint;
static std::string kPrecompiledBinaryPath;
}; };
} // namespace mace } // namespace mace
......
...@@ -123,6 +123,9 @@ DEFINE_string(model_data_file, ...@@ -123,6 +123,9 @@ DEFINE_string(model_data_file,
DEFINE_string(model_file, DEFINE_string(model_file,
"", "",
"model file name, used when load mace model in pb"); "model file name, used when load mace model in pb");
DEFINE_string(opencl_binary_file,
"",
"compiled opencl binary file path");
DEFINE_string(device, "GPU", "CPU/GPU/HEXAGON"); DEFINE_string(device, "GPU", "CPU/GPU/HEXAGON");
DEFINE_int32(round, 1, "round"); DEFINE_int32(round, 1, "round");
DEFINE_int32(restart_round, 1, "restart round"); DEFINE_int32(restart_round, 1, "restart round");
...@@ -151,6 +154,10 @@ bool RunModel(const std::vector<std::string> &input_names, ...@@ -151,6 +154,10 @@ bool RunModel(const std::vector<std::string> &input_names,
} }
#endif // MACE_ENABLE_OPENCL #endif // MACE_ENABLE_OPENCL
if (device_type == DeviceType::GPU) {
std::vector<std::string> opencl_binary_paths = {FLAGS_opencl_binary_file};
mace::SetOpenCLBinaryPaths(opencl_binary_paths);
}
// DO NOT USE tmp directory. // DO NOT USE tmp directory.
// Please use APP's own directory and make sure the directory exists. // Please use APP's own directory and make sure the directory exists.
// Just call once // Just call once
......
...@@ -76,9 +76,16 @@ class FileStorageFactory : public KVStorageFactory { ...@@ -76,9 +76,16 @@ class FileStorageFactory : public KVStorageFactory {
std::unique_ptr<Impl> impl_; std::unique_ptr<Impl> impl_;
}; };
// Set KV store factory used as OpenCL cache. // Set KV store factory used as OpenCL cache. (Call Once)
void SetKVStorageFactory(std::shared_ptr<KVStorageFactory> storage_factory); void SetKVStorageFactory(std::shared_ptr<KVStorageFactory> storage_factory);
// Just call once. (Not thread-safe)
// Set paths of OpenCL Compiled Binary file if you use gpu of specific soc.
// Using OpenCL binary will speed up the initialization.
// OpenCL binary is corresponding to the OpenCL Driver version,
// you should update the binary when OpenCL Driver changed.
void SetOpenCLBinaryPaths(const std::vector<std::string> &paths);
// Set GPU hints, currently only supports Adreno GPU. // Set GPU hints, currently only supports Adreno GPU.
// //
// Caution: this function may hurt performance if improper parameters provided. // Caution: this function may hurt performance if improper parameters provided.
......
# Copyright 2018 Xiaomi, Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os
import sys
import struct
import numpy as np
import jinja2
# python mace/python/tools/opencl_codegen.py \
# --cl_binary_dirs=${CL_BIN_DIR} --output_path=${CL_HEADER_PATH}
FLAGS = None
def generate_cpp_source(cl_binary_dirs,
built_kernel_file_name,
platform_info_file_name):
maps = {}
platform_info = ''
binary_dirs = cl_binary_dirs.strip().split(",")
for binary_dir in binary_dirs:
binary_path = os.path.join(binary_dir, built_kernel_file_name)
if not os.path.exists(binary_path):
continue
print 'generate opencl code from', binary_path
with open(binary_path, "rb") as f:
binary_array = np.fromfile(f, dtype=np.uint8)
idx = 0
size, = struct.unpack("Q", binary_array[idx:idx + 8])
idx += 8
for _ in xrange(size):
key_size, = struct.unpack("i", binary_array[idx:idx + 4])
idx += 4
key, = struct.unpack(
str(key_size) + "s", binary_array[idx:idx + key_size])
idx += key_size
value_size, = struct.unpack("i", binary_array[idx:idx + 4])
idx += 4
maps[key] = []
value = struct.unpack(
str(value_size) + "B", binary_array[idx:idx + value_size])
idx += value_size
for ele in value:
maps[key].append(hex(ele))
cl_platform_info_path = os.path.join(binary_dir,
platform_info_file_name)
with open(cl_platform_info_path, 'r') as f:
curr_platform_info = f.read()
if platform_info != "":
assert (curr_platform_info == platform_info)
platform_info = curr_platform_info
env = jinja2.Environment(loader=jinja2.FileSystemLoader(sys.path[0]))
return env.get_template('opencl_compiled_kernel.cc.jinja2').render(
maps=maps,
data_type='unsigned char',
variable_name='kCompiledProgramMap',
platform_info=platform_info,
)
def opencl_codegen(output_path,
cl_binary_dirs="",
built_kernel_file_name="",
platform_info_file_name=""):
cpp_cl_binary_source = generate_cpp_source(cl_binary_dirs,
built_kernel_file_name,
platform_info_file_name)
if os.path.isfile(output_path):
os.remove(output_path)
with open(output_path, "w") as w_file:
w_file.write(cpp_cl_binary_source)
def parse_args():
"""Parses command line arguments."""
parser = argparse.ArgumentParser()
parser.add_argument(
"--cl_binary_dirs",
type=str,
default="",
help="The cl binaries directories.")
parser.add_argument(
"--built_kernel_file_name",
type=str,
default="",
help="The cl binaries directories.")
parser.add_argument(
"--platform_info_file_name",
type=str,
default="",
help="The cl binaries directories.")
parser.add_argument(
"--output_path",
type=str,
default="./mace/examples/codegen/opencl/opencl_compiled_program.cc",
help="The path of generated C++ header file for cl binaries.")
return parser.parse_known_args()
if __name__ == '__main__':
FLAGS, unparsed = parse_args()
opencl_codegen(FLAGS.output_path,
FLAGS.cl_binary_dirs,
FLAGS.built_kernel_file_name,
FLAGS.platform_info_file_name)
// Copyright 2018 Xiaomi, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// This is a generated file. DO NOT EDIT!
#include <map>
#include <string>
#include <vector>
namespace mace {
extern const std::map<std::string, std::vector<{{data_type}}>> {{variable_name}} =
{
{% for key, value in maps.iteritems() %}
{
"{{key}}",
{
{%- for ele in value -%}
{{ele}},
{%- endfor -%}
}
}, // {{key}}
{% endfor %}
};
extern const std::string kCompiledProgramPlatform = {{platform_info|tojson}};
} // namespace mace
...@@ -13,6 +13,5 @@ cc_binary( ...@@ -13,6 +13,5 @@ cc_binary(
"//external:gflags_nothreads", "//external:gflags_nothreads",
"//mace/codegen:generated_mace_engine_factory", "//mace/codegen:generated_mace_engine_factory",
"//mace/codegen:generated_models", "//mace/codegen:generated_models",
"//mace/core",
], ],
) )
...@@ -38,9 +38,6 @@ ...@@ -38,9 +38,6 @@
#include "mace/utils/logging.h" #include "mace/utils/logging.h"
#include "mace/utils/utils.h" #include "mace/utils/utils.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/opencl_runtime.h"
#endif // MACE_ENABLE_OPENCL
#include "mace/codegen/engine/mace_engine_factory.h" #include "mace/codegen/engine/mace_engine_factory.h"
namespace mace { namespace mace {
...@@ -100,22 +97,6 @@ DeviceType ParseDeviceType(const std::string &device_str) { ...@@ -100,22 +97,6 @@ DeviceType ParseDeviceType(const std::string &device_str) {
} }
} }
#ifdef MACE_ENABLE_OPENCL
void WriteOpenCLPlatformInfo(const std::string &output_dir) {
std::string platform_info = OpenCLRuntime::Global()->platform_info();
const std::string cl_platform_info_file_name = output_dir
+ "/mace_cl_platform_info.txt";
std::ofstream ofs(cl_platform_info_file_name);
if (ofs.is_open()) {
ofs << platform_info;
ofs.close();
} else {
LOG(WARNING) << "Write opencl platform info failed.";
}
}
#endif // MACE_ENABLE_OPENCL
struct mallinfo LogMallinfoChange(struct mallinfo prev) { struct mallinfo LogMallinfoChange(struct mallinfo prev) {
struct mallinfo curr = mallinfo(); struct mallinfo curr = mallinfo();
if (prev.arena != curr.arena) { if (prev.arena != curr.arena) {
...@@ -187,6 +168,9 @@ DEFINE_string(input_file, ...@@ -187,6 +168,9 @@ DEFINE_string(input_file,
DEFINE_string(output_file, DEFINE_string(output_file,
"", "",
"output file name | output file prefix for multiple outputs"); "output file name | output file prefix for multiple outputs");
DEFINE_string(opencl_binary_file,
"",
"compiled opencl binary file path");
DEFINE_string(model_data_file, DEFINE_string(model_data_file,
"", "",
"model data file name, used when EMBED_MODEL_DATA set to 0 or 2"); "model data file name, used when EMBED_MODEL_DATA set to 0 or 2");
...@@ -230,6 +214,11 @@ bool RunModel(const std::string &model_name, ...@@ -230,6 +214,11 @@ bool RunModel(const std::string &model_name,
new FileStorageFactory(kernel_file_path)); new FileStorageFactory(kernel_file_path));
SetKVStorageFactory(storage_factory); SetKVStorageFactory(storage_factory);
if (device_type == DeviceType::GPU) {
std::vector<std::string> opencl_binary_paths = {FLAGS_opencl_binary_file};
mace::SetOpenCLBinaryPaths(opencl_binary_paths);
}
std::vector<unsigned char> model_pb_data; std::vector<unsigned char> model_pb_data;
if (FLAGS_model_file != "") { if (FLAGS_model_file != "") {
if (!mace::ReadBinaryFile(&model_pb_data, FLAGS_model_file)) { if (!mace::ReadBinaryFile(&model_pb_data, FLAGS_model_file)) {
...@@ -397,11 +386,6 @@ bool RunModel(const std::string &model_name, ...@@ -397,11 +386,6 @@ bool RunModel(const std::string &model_name,
printf("time %11.3f %11.3f %11.3f\n", printf("time %11.3f %11.3f %11.3f\n",
init_millis, warmup_millis, model_run_millis); init_millis, warmup_millis, model_run_millis);
#ifdef MACE_ENABLE_OPENCL
if (device_type == DeviceType::GPU) {
WriteOpenCLPlatformInfo(kernel_file_path);
}
#endif // MACE_ENABLE_OPENCL
for (size_t i = 0; i < output_count; ++i) { for (size_t i = 0; i < output_count; ++i) {
std::string output_name = std::string output_name =
......
...@@ -132,7 +132,6 @@ def main(unused_args): ...@@ -132,7 +132,6 @@ def main(unused_args):
# generate sources # generate sources
sh_commands.gen_encrypted_opencl_source() sh_commands.gen_encrypted_opencl_source()
sh_commands.gen_compiled_opencl_source()
sh_commands.gen_mace_version() sh_commands.gen_mace_version()
sh_commands.gen_tuning_param_code([]) sh_commands.gen_tuning_param_code([])
......
...@@ -108,6 +108,15 @@ class StringFormatter: ...@@ -108,6 +108,15 @@ class StringFormatter:
return star_line + str(message).center(line_length) + '\n' + star_line return star_line + str(message).center(line_length) + '\n' + star_line
################################
# definitions
################################
class DeviceType(object):
CPU = 'CPU'
GPU = 'GPU'
HEXAGON = 'HEXAGON'
################################ ################################
# Argument types # Argument types
################################ ################################
......
...@@ -29,6 +29,7 @@ import sh_commands ...@@ -29,6 +29,7 @@ import sh_commands
from sh_commands import BuildType from sh_commands import BuildType
from common import CaffeEnvType from common import CaffeEnvType
from common import DeviceType
from common import mace_check from common import mace_check
from common import MaceLogger from common import MaceLogger
from common import StringFormatter from common import StringFormatter
...@@ -37,13 +38,14 @@ from common import StringFormatter ...@@ -37,13 +38,14 @@ from common import StringFormatter
# common definitions # common definitions
################################ ################################
BUILD_OUTPUT_DIR = 'build' BUILD_OUTPUT_DIR = 'build'
PHONE_DATA_DIR = "/data/local/tmp/mace_run/" PHONE_DATA_DIR = "/data/local/tmp/mace_run"
MODEL_OUTPUT_DIR_NAME = 'model' MODEL_OUTPUT_DIR_NAME = 'model'
BUILD_TMP_DIR_NAME = '_tmp' BUILD_TMP_DIR_NAME = '_tmp'
BUILD_TMP_GENERAL_OUTPUT_DIR_NAME = 'general' BUILD_TMP_GENERAL_OUTPUT_DIR_NAME = 'general'
OUTPUT_LIBRARY_DIR_NAME = 'library' OUTPUT_LIBRARY_DIR_NAME = 'library'
CL_BUILT_KERNEL_FILE_NAME = "mace_cl_compiled_program.bin" OUTPUT_OPENCL_BINARY_DIR_NAME = 'opencl'
CL_PLATFORM_INFO_FILE_NAME = "mace_cl_platform_info.txt" OUTPUT_OPENCL_BINARY_FILE_NAME = 'compiled_opencl_kernel.bin'
CL_COMPILED_BINARY_FILE_NAME = "mace_cl_compiled_program.bin"
CODEGEN_BASE_DIR = 'mace/codegen' CODEGEN_BASE_DIR = 'mace/codegen'
MODEL_CODEGEN_DIR = CODEGEN_BASE_DIR + '/models' MODEL_CODEGEN_DIR = CODEGEN_BASE_DIR + '/models'
MACE_RUN_TARGET = "//mace/tools/validation:mace_run" MACE_RUN_TARGET = "//mace/tools/validation:mace_run"
...@@ -176,11 +178,11 @@ def parse_device_type(runtime): ...@@ -176,11 +178,11 @@ def parse_device_type(runtime):
device_type = "" device_type = ""
if runtime == RuntimeType.dsp: if runtime == RuntimeType.dsp:
device_type = "HEXAGON" device_type = DeviceType.HEXAGON
elif runtime == RuntimeType.gpu: elif runtime == RuntimeType.gpu:
device_type = "GPU" device_type = DeviceType.GPU
elif runtime == RuntimeType.cpu: elif runtime == RuntimeType.cpu:
device_type = "CPU" device_type = DeviceType.CPU
return device_type return device_type
...@@ -433,6 +435,13 @@ def get_build_model_dirs(library_name, model_name, target_abi, target_soc, ...@@ -433,6 +435,13 @@ def get_build_model_dirs(library_name, model_name, target_abi, target_soc,
return model_output_base_dir, model_output_dir, mace_model_dir return model_output_base_dir, model_output_dir, mace_model_dir
def get_opencl_binary_output_path(library_name):
return '%s/%s/%s/%s' % (BUILD_OUTPUT_DIR,
library_name,
OUTPUT_OPENCL_BINARY_DIR_NAME,
OUTPUT_OPENCL_BINARY_FILE_NAME)
################################ ################################
# build # build
################################ ################################
...@@ -440,17 +449,7 @@ def pull_opencl_binary_and_tuning_param(target_abi, ...@@ -440,17 +449,7 @@ def pull_opencl_binary_and_tuning_param(target_abi,
serialno, serialno,
model_output_dirs): model_output_dirs):
sh_commands.pull_binaries(target_abi, serialno, model_output_dirs, sh_commands.pull_binaries(target_abi, serialno, model_output_dirs,
CL_BUILT_KERNEL_FILE_NAME, CL_COMPILED_BINARY_FILE_NAME)
CL_PLATFORM_INFO_FILE_NAME)
def gen_opencl_and_tuning_code(model_output_dirs):
# generate opencl binary code
sh_commands.gen_opencl_binary_code(model_output_dirs,
CL_BUILT_KERNEL_FILE_NAME,
CL_PLATFORM_INFO_FILE_NAME)
sh_commands.gen_tuning_param_code(model_output_dirs)
def print_configuration(flags, configs): def print_configuration(flags, configs):
...@@ -612,7 +611,7 @@ def build_specific_lib(target_abi, target_soc, serial_num, ...@@ -612,7 +611,7 @@ def build_specific_lib(target_abi, target_soc, serial_num,
sh.rm("-rf", build_tmp_binary_dir) sh.rm("-rf", build_tmp_binary_dir)
os.makedirs(build_tmp_binary_dir) os.makedirs(build_tmp_binary_dir)
gen_opencl_and_tuning_code([]) sh_commands.gen_tuning_param_code(model_output_dirs)
sh_commands.bazel_build( sh_commands.bazel_build(
MACE_RUN_TARGET, MACE_RUN_TARGET,
abi=target_abi, abi=target_abi,
...@@ -639,7 +638,7 @@ def build_specific_lib(target_abi, target_soc, serial_num, ...@@ -639,7 +638,7 @@ def build_specific_lib(target_abi, target_soc, serial_num,
os.makedirs(model_output_dir) os.makedirs(model_output_dir)
# build for specified soc # build for specified soc
if not address_sanitizer and tuning and target_abi != ABIType.host \ if not address_sanitizer and target_abi != ABIType.host \
and target_soc is not None and \ and target_soc is not None and \
model_runtime in [RuntimeType.gpu, RuntimeType.cpu_gpu]: model_runtime in [RuntimeType.gpu, RuntimeType.cpu_gpu]:
sh_commands.clear_phone_data_dir(serial_num, PHONE_DATA_DIR) sh_commands.clear_phone_data_dir(serial_num, PHONE_DATA_DIR)
...@@ -674,7 +673,8 @@ def build_specific_lib(target_abi, target_soc, serial_num, ...@@ -674,7 +673,8 @@ def build_specific_lib(target_abi, target_soc, serial_num,
tuning=tuning, tuning=tuning,
out_of_range_check=False, out_of_range_check=False,
phone_data_dir=PHONE_DATA_DIR, phone_data_dir=PHONE_DATA_DIR,
build_type=build_type build_type=build_type,
opencl_binary_file="",
) )
pull_opencl_binary_and_tuning_param(target_abi, serial_num, pull_opencl_binary_and_tuning_param(target_abi, serial_num,
...@@ -683,7 +683,10 @@ def build_specific_lib(target_abi, target_soc, serial_num, ...@@ -683,7 +683,10 @@ def build_specific_lib(target_abi, target_soc, serial_num,
binary_changed = True binary_changed = True
if binary_changed: if binary_changed:
gen_opencl_and_tuning_code(model_output_dirs) sh_commands.merge_opencl_binaries(
model_output_dirs, CL_COMPILED_BINARY_FILE_NAME,
get_opencl_binary_output_path(library_name))
sh_commands.gen_tuning_param_code(model_output_dirs)
sh_commands.bazel_build( sh_commands.bazel_build(
MACE_RUN_TARGET, MACE_RUN_TARGET,
abi=target_abi, abi=target_abi,
...@@ -919,6 +922,7 @@ def run_specific_target(flags, configs, target_abi, ...@@ -919,6 +922,7 @@ def run_specific_target(flags, configs, target_abi,
gpu_priority_hint=flags.gpu_priority_hint, gpu_priority_hint=flags.gpu_priority_hint,
runtime_failure_ratio=flags.runtime_failure_ratio, runtime_failure_ratio=flags.runtime_failure_ratio,
address_sanitizer=flags.address_sanitizer, address_sanitizer=flags.address_sanitizer,
opencl_binary_file=get_opencl_binary_output_path(library_name),
) )
if flags.validate: if flags.validate:
model_file_path, weight_file_path = get_model_files_path( model_file_path, weight_file_path = get_model_files_path(
...@@ -1051,7 +1055,8 @@ def bm_specific_target(flags, configs, target_abi, target_soc, serial_num): ...@@ -1051,7 +1055,8 @@ def bm_specific_target(flags, configs, target_abi, target_soc, serial_num):
omp_num_threads=flags.omp_num_threads, omp_num_threads=flags.omp_num_threads,
cpu_affinity_policy=flags.cpu_affinity_policy, cpu_affinity_policy=flags.cpu_affinity_policy,
gpu_perf_hint=flags.gpu_perf_hint, gpu_perf_hint=flags.gpu_perf_hint,
gpu_priority_hint=flags.gpu_priority_hint) gpu_priority_hint=flags.gpu_priority_hint,
opencl_binary_file=get_opencl_binary_output_path(library_name))
def benchmark_model(flags): def benchmark_model(flags):
......
...@@ -16,9 +16,11 @@ import falcon_cli ...@@ -16,9 +16,11 @@ import falcon_cli
import filelock import filelock
import glob import glob
import logging import logging
import numpy as np
import os import os
import re import re
import sh import sh
import struct
import subprocess import subprocess
import sys import sys
import time import time
...@@ -30,7 +32,6 @@ import common ...@@ -30,7 +32,6 @@ import common
sys.path.insert(0, "mace/python/tools") sys.path.insert(0, "mace/python/tools")
try: try:
from encrypt_opencl_codegen import encrypt_opencl_codegen from encrypt_opencl_codegen import encrypt_opencl_codegen
from opencl_codegen import opencl_codegen
from binary_codegen import tuning_param_codegen from binary_codegen import tuning_param_codegen
from generate_data import generate_input_data from generate_data import generate_input_data
from validate import validate from validate import validate
...@@ -362,8 +363,7 @@ def gen_mace_engine_factory_source(model_tags, ...@@ -362,8 +363,7 @@ def gen_mace_engine_factory_source(model_tags,
def pull_binaries(abi, serialno, model_output_dirs, def pull_binaries(abi, serialno, model_output_dirs,
cl_built_kernel_file_name, cl_built_kernel_file_name):
cl_platform_info_file_name):
compiled_opencl_dir = "/data/local/tmp/mace_run/interior/" compiled_opencl_dir = "/data/local/tmp/mace_run/interior/"
mace_run_param_file = "mace_run.config" mace_run_param_file = "mace_run.config"
...@@ -379,26 +379,66 @@ def pull_binaries(abi, serialno, model_output_dirs, ...@@ -379,26 +379,66 @@ def pull_binaries(abi, serialno, model_output_dirs,
if abi != "host": if abi != "host":
adb_pull(compiled_opencl_dir + cl_built_kernel_file_name, adb_pull(compiled_opencl_dir + cl_built_kernel_file_name,
cl_bin_dir, serialno) cl_bin_dir, serialno)
adb_pull(compiled_opencl_dir + cl_platform_info_file_name,
cl_bin_dir, serialno)
adb_pull("/data/local/tmp/mace_run/%s" % mace_run_param_file, adb_pull("/data/local/tmp/mace_run/%s" % mace_run_param_file,
cl_bin_dir, serialno) cl_bin_dir, serialno)
def gen_opencl_binary_code(model_output_dirs, def merge_opencl_binaries(binaries_dirs,
cl_built_kernel_file_name, cl_compiled_program_file_name,
cl_platform_info_file_name, output_file_path):
codegen_path="mace/codegen"): platform_info_key = 'mace_opencl_precompiled_platform_info_key'
opencl_codegen_file = "%s/opencl/opencl_compiled_program.cc" % codegen_path
cl_bin_dirs = [] cl_bin_dirs = []
for d in model_output_dirs: for d in binaries_dirs:
cl_bin_dirs.append(os.path.join(d, "opencl_bin")) cl_bin_dirs.append(os.path.join(d, "opencl_bin"))
cl_bin_dirs_str = ",".join(cl_bin_dirs) # create opencl binary output dir
opencl_codegen(opencl_codegen_file, opencl_binary_dir = os.path.dirname(output_file_path)
cl_bin_dirs_str, if os.path.exists(opencl_binary_dir):
cl_built_kernel_file_name, sh.rm("-rf", opencl_binary_dir)
cl_platform_info_file_name) sh.mkdir("-p", opencl_binary_dir)
kvs = {}
for binary_dir in cl_bin_dirs:
binary_path = os.path.join(binary_dir, cl_compiled_program_file_name)
if not os.path.exists(binary_path):
continue
print 'generate opencl code from', binary_path
with open(binary_path, "rb") as f:
binary_array = np.fromfile(f, dtype=np.uint8)
idx = 0
size, = struct.unpack("Q", binary_array[idx:idx + 8])
idx += 8
for _ in xrange(size):
key_size, = struct.unpack("i", binary_array[idx:idx + 4])
idx += 4
key, = struct.unpack(
str(key_size) + "s", binary_array[idx:idx + key_size])
idx += key_size
value_size, = struct.unpack("i", binary_array[idx:idx + 4])
idx += 4
if key == platform_info_key and key in kvs:
common.mace_check(
(kvs[key] == binary_array[idx:idx + value_size]).all(),
"",
"There exists more than one OpenCL version for models:"
" %s vs %s " %
(kvs[key], binary_array[idx:idx + value_size]))
else:
kvs[key] = binary_array[idx:idx + value_size]
idx += value_size
output_byte_array = bytearray()
data_size = len(kvs)
output_byte_array.extend(struct.pack("Q", data_size))
for key, value in kvs.iteritems():
key_size = len(key)
output_byte_array.extend(struct.pack("i", key_size))
output_byte_array.extend(struct.pack(str(key_size) + "s", key))
value_size = len(value)
output_byte_array.extend(struct.pack("i", value_size))
output_byte_array.extend(value)
np.array(output_byte_array).tofile(output_file_path)
def gen_tuning_param_code(model_output_dirs, def gen_tuning_param_code(model_output_dirs,
...@@ -426,12 +466,6 @@ def gen_mace_version(codegen_path="mace/codegen"): ...@@ -426,12 +466,6 @@ def gen_mace_version(codegen_path="mace/codegen"):
"%s/version/version.cc" % codegen_path) "%s/version/version.cc" % codegen_path)
def gen_compiled_opencl_source(codegen_path="mace/codegen"):
opencl_codegen_file = "%s/opencl/opencl_compiled_program.cc" % codegen_path
sh.mkdir("-p", "%s/opencl" % codegen_path)
opencl_codegen(opencl_codegen_file)
def gen_model_code(model_codegen_dir, def gen_model_code(model_codegen_dir,
platform, platform,
model_file_path, model_file_path,
...@@ -576,6 +610,7 @@ def tuning_run(abi, ...@@ -576,6 +610,7 @@ def tuning_run(abi,
out_of_range_check, out_of_range_check,
phone_data_dir, phone_data_dir,
build_type, build_type,
opencl_binary_file,
omp_num_threads=-1, omp_num_threads=-1,
cpu_affinity_policy=1, cpu_affinity_policy=1,
gpu_perf_hint=3, gpu_perf_hint=3,
...@@ -641,6 +676,10 @@ def tuning_run(abi, ...@@ -641,6 +676,10 @@ def tuning_run(abi,
adb_push("%s/%s.data" % (mace_model_dir, model_tag), adb_push("%s/%s.data" % (mace_model_dir, model_tag),
phone_data_dir, serialno) phone_data_dir, serialno)
if device_type == common.DeviceType.GPU\
and os.path.exists(opencl_binary_file):
adb_push(opencl_binary_file, phone_data_dir, serialno)
adb_push("third_party/nnlib/libhexagon_controller.so", adb_push("third_party/nnlib/libhexagon_controller.so",
phone_data_dir, serialno) phone_data_dir, serialno)
...@@ -689,6 +728,8 @@ def tuning_run(abi, ...@@ -689,6 +728,8 @@ def tuning_run(abi,
"--gpu_perf_hint=%s" % gpu_perf_hint, "--gpu_perf_hint=%s" % gpu_perf_hint,
"--gpu_priority_hint=%s" % gpu_priority_hint, "--gpu_priority_hint=%s" % gpu_priority_hint,
"--model_file=%s" % mace_model_phone_path, "--model_file=%s" % mace_model_phone_path,
"--opencl_binary_file=%s/%s" %
(phone_data_dir, os.path.basename(opencl_binary_file)),
]) ])
adb_cmd = ' '.join(adb_cmd) adb_cmd = ' '.join(adb_cmd)
sh.adb( sh.adb(
...@@ -1005,6 +1046,7 @@ def benchmark_model(abi, ...@@ -1005,6 +1046,7 @@ def benchmark_model(abi,
device_type, device_type,
phone_data_dir, phone_data_dir,
build_type, build_type,
opencl_binary_file,
omp_num_threads=-1, omp_num_threads=-1,
cpu_affinity_policy=1, cpu_affinity_policy=1,
gpu_perf_hint=3, gpu_perf_hint=3,
...@@ -1049,6 +1091,9 @@ def benchmark_model(abi, ...@@ -1049,6 +1091,9 @@ def benchmark_model(abi,
if not embed_model_data: if not embed_model_data:
adb_push("%s/%s.data" % (mace_model_dir, model_tag), adb_push("%s/%s.data" % (mace_model_dir, model_tag),
phone_data_dir, serialno) phone_data_dir, serialno)
if device_type == common.DeviceType.GPU \
and os.path.exists(opencl_binary_file):
adb_push(opencl_binary_file, phone_data_dir, serialno)
mace_model_phone_path = "" mace_model_phone_path = ""
if build_type == BuildType.proto: if build_type == BuildType.proto:
mace_model_phone_path = "%s/%s.pb" % (phone_data_dir, model_tag) mace_model_phone_path = "%s/%s.pb" % (phone_data_dir, model_tag)
...@@ -1082,6 +1127,8 @@ def benchmark_model(abi, ...@@ -1082,6 +1127,8 @@ def benchmark_model(abi,
"--gpu_perf_hint=%s" % gpu_perf_hint, "--gpu_perf_hint=%s" % gpu_perf_hint,
"--gpu_priority_hint=%s" % gpu_priority_hint, "--gpu_priority_hint=%s" % gpu_priority_hint,
"--model_file=%s" % mace_model_phone_path, "--model_file=%s" % mace_model_phone_path,
"--opencl_binary_file=%s/%s" %
(phone_data_dir, os.path.basename(opencl_binary_file)),
_fg=True) _fg=True)
print("Benchmark done!\n") print("Benchmark done!\n")
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册