提交 b50a6635 编写于 作者: 李寅

Merge branch 'update-docs' into 'master'

Update the document with latest APIs.

See merge request !791
......@@ -180,7 +180,6 @@ There are two common advanced use cases:
// Include the headers
#include "mace/public/mace.h"
#include "mace/public/mace_runtime.h"
// If the model_graph_format is code
#include "mace/public/${model_name}.h"
#include "mace/public/mace_engine_factory.h"
......@@ -199,7 +198,7 @@ There are two common advanced use cases:
device_type,
&engine);
if (create_engine_status != MaceStatus::MACE_SUCCESS) {
// Report error
// Report error or fallback
}
// ... Same with the code in basic usage
......@@ -271,13 +270,24 @@ There are two common advanced use cases:
// Include the headers
#include "mace/public/mace.h"
#include "mace/public/mace_runtime.h"
// 0. Set pre-compiled OpenCL binary program file paths and OpenCL parameters file path when available
if (device_type == DeviceType::GPU) {
mace::SetOpenCLBinaryPaths(path/to/opencl_binary_paths);
mace::SetOpenCLParameterPath(path/to/opencl_parameter_file);
}
// 0. Declare the device type (must be same with ``runtime`` in configuration file)
DeviceType device_type = DeviceType::GPU;
// 1. configuration
MaceStatus status;
MaceEngineConfig config(device_type);
std::shared_ptr<GPUContext> gpu_context;
const std::string storage_path ="path/to/storage";
gpu_context = GPUContextBuilder()
.SetStoragePath(storage_path)
.SetOpenCLBinaryPaths(path/to/opencl_binary_paths)
.SetOpenCLParameterPath(path/to/opencl_parameter_file)
.Finalize();
config.SetGPUContext(gpu_context);
config.SetGPUHints(
static_cast<GPUPerfHint>(GPUPerfHint::PERF_NORMAL),
static_cast<GPUPriorityHint>(GPUPriorityHint::PRIORITY_LOW));
// ... Same with the code in basic usage.
......
......@@ -332,21 +332,28 @@ Please refer to \ ``mace/examples/example.cc``\ for full usage. The following li
// Include the headers
#include "mace/public/mace.h"
#include "mace/public/mace_runtime.h"
// 0. Set compiled OpenCL kernel cache, this is used to reduce the
// initialization time since the compiling is too slow. It's suggested
// to set this even when pre-compiled OpenCL program file is provided
// because the OpenCL version upgrade may also leads to kernel
// recompilations.
const std::string file_path ="path/to/opencl_cache_file";
std::shared_ptr<KVStorageFactory> storage_factory(
new FileStorageFactory(file_path));
ConfigKVStorageFactory(storage_factory);
// 1. Declare the device type (must be same with ``runtime`` in configuration file)
// 0. Declare the device type (must be same with ``runtime`` in configuration file)
DeviceType device_type = DeviceType::GPU;
// 1. configuration
MaceStatus status;
MaceEngineConfig config(device_type);
std::shared_ptr<GPUContext> gpu_context;
// Set the path to store compiled OpenCL kernel binaries.
// please make sure your application have read/write rights of the directory.
// this is used to reduce the initialization time since the compiling is too slow.
// It's suggested to set this even when pre-compiled OpenCL program file is provided
// because the OpenCL version upgrade may also leads to kernel recompilations.
const std::string storage_path ="path/to/storage";
gpu_context = GPUContextBuilder()
.SetStoragePath(storage_path)
.Finalize();
config.SetGPUContext(gpu_context);
config.SetGPUHints(
static_cast<GPUPerfHint>(GPUPerfHint::PERF_NORMAL),
static_cast<GPUPriorityHint>(GPUPriorityHint::PRIORITY_LOW));
// 2. Define the input and output tensor names.
std::vector<std::string> input_names = {...};
std::vector<std::string> output_names = {...};
......
......@@ -87,9 +87,6 @@ MaceStatus SerialNet::Run(RunMetadata *run_metadata) {
} else {
future.wait_fn(nullptr);
}
#ifdef MACE_ENABLE_OPENCL
device_->opencl_runtime()->command_queue().finish();
#endif
} else if (run_metadata != nullptr) {
call_stats.start_micros = NowMicros();
MACE_RETURN_IF_ERROR(op->Run(nullptr));
......
......@@ -441,6 +441,7 @@ OpenCLRuntime::OpenCLRuntime(
}
OpenCLRuntime::~OpenCLRuntime() {
command_queue_->finish();
built_program_map_.clear();
// We need to control the destruction order, which has dependencies
command_queue_.reset();
......
......@@ -120,8 +120,8 @@ DEFINE_string(device, "GPU", "CPU/GPU/HEXAGON");
DEFINE_int32(round, 1, "round");
DEFINE_int32(restart_round, 1, "restart round");
DEFINE_int32(malloc_check_cycle, -1, "malloc debug check cycle, -1 to disable");
DEFINE_int32(gpu_perf_hint, 3, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(gpu_priority_hint, 3, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(gpu_perf_hint, 2, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(gpu_priority_hint, 1, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(omp_num_threads, -1, "num of openmp threads");
DEFINE_int32(cpu_affinity_policy, 1,
"0:AFFINITY_NONE/1:AFFINITY_BIG_ONLY/2:AFFINITY_LITTLE_ONLY");
......
......@@ -131,10 +131,6 @@ class OpsTestNet {
op_registry_(new OperatorRegistry()) {
}
~OpsTestNet() {
Sync();
}
template <DeviceType D, typename T>
void AddInputFromArray(const std::string &name,
const std::vector<index_t> &shape,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册