提交 b50a6635 编写于 作者: 李寅

Merge branch 'update-docs' into 'master'

Update the document with latest APIs.

See merge request !791
...@@ -180,7 +180,6 @@ There are two common advanced use cases: ...@@ -180,7 +180,6 @@ There are two common advanced use cases:
// Include the headers // Include the headers
#include "mace/public/mace.h" #include "mace/public/mace.h"
#include "mace/public/mace_runtime.h"
// If the model_graph_format is code // If the model_graph_format is code
#include "mace/public/${model_name}.h" #include "mace/public/${model_name}.h"
#include "mace/public/mace_engine_factory.h" #include "mace/public/mace_engine_factory.h"
...@@ -199,7 +198,7 @@ There are two common advanced use cases: ...@@ -199,7 +198,7 @@ There are two common advanced use cases:
device_type, device_type,
&engine); &engine);
if (create_engine_status != MaceStatus::MACE_SUCCESS) { if (create_engine_status != MaceStatus::MACE_SUCCESS) {
// Report error // Report error or fallback
} }
// ... Same with the code in basic usage // ... Same with the code in basic usage
...@@ -271,13 +270,24 @@ There are two common advanced use cases: ...@@ -271,13 +270,24 @@ There are two common advanced use cases:
// Include the headers // Include the headers
#include "mace/public/mace.h" #include "mace/public/mace.h"
#include "mace/public/mace_runtime.h" // 0. Declare the device type (must be same with ``runtime`` in configuration file)
DeviceType device_type = DeviceType::GPU;
// 0. Set pre-compiled OpenCL binary program file paths and OpenCL parameters file path when available
if (device_type == DeviceType::GPU) { // 1. configuration
mace::SetOpenCLBinaryPaths(path/to/opencl_binary_paths); MaceStatus status;
mace::SetOpenCLParameterPath(path/to/opencl_parameter_file); MaceEngineConfig config(device_type);
} std::shared_ptr<GPUContext> gpu_context;
const std::string storage_path ="path/to/storage";
gpu_context = GPUContextBuilder()
.SetStoragePath(storage_path)
.SetOpenCLBinaryPaths(path/to/opencl_binary_paths)
.SetOpenCLParameterPath(path/to/opencl_parameter_file)
.Finalize();
config.SetGPUContext(gpu_context);
config.SetGPUHints(
static_cast<GPUPerfHint>(GPUPerfHint::PERF_NORMAL),
static_cast<GPUPriorityHint>(GPUPriorityHint::PRIORITY_LOW));
// ... Same with the code in basic usage. // ... Same with the code in basic usage.
......
...@@ -332,21 +332,28 @@ Please refer to \ ``mace/examples/example.cc``\ for full usage. The following li ...@@ -332,21 +332,28 @@ Please refer to \ ``mace/examples/example.cc``\ for full usage. The following li
// Include the headers // Include the headers
#include "mace/public/mace.h" #include "mace/public/mace.h"
#include "mace/public/mace_runtime.h"
// 0. Declare the device type (must be same with ``runtime`` in configuration file)
// 0. Set compiled OpenCL kernel cache, this is used to reduce the
// initialization time since the compiling is too slow. It's suggested
// to set this even when pre-compiled OpenCL program file is provided
// because the OpenCL version upgrade may also leads to kernel
// recompilations.
const std::string file_path ="path/to/opencl_cache_file";
std::shared_ptr<KVStorageFactory> storage_factory(
new FileStorageFactory(file_path));
ConfigKVStorageFactory(storage_factory);
// 1. Declare the device type (must be same with ``runtime`` in configuration file)
DeviceType device_type = DeviceType::GPU; DeviceType device_type = DeviceType::GPU;
// 1. configuration
MaceStatus status;
MaceEngineConfig config(device_type);
std::shared_ptr<GPUContext> gpu_context;
// Set the path to store compiled OpenCL kernel binaries.
// please make sure your application have read/write rights of the directory.
// this is used to reduce the initialization time since the compiling is too slow.
// It's suggested to set this even when pre-compiled OpenCL program file is provided
// because the OpenCL version upgrade may also leads to kernel recompilations.
const std::string storage_path ="path/to/storage";
gpu_context = GPUContextBuilder()
.SetStoragePath(storage_path)
.Finalize();
config.SetGPUContext(gpu_context);
config.SetGPUHints(
static_cast<GPUPerfHint>(GPUPerfHint::PERF_NORMAL),
static_cast<GPUPriorityHint>(GPUPriorityHint::PRIORITY_LOW));
// 2. Define the input and output tensor names. // 2. Define the input and output tensor names.
std::vector<std::string> input_names = {...}; std::vector<std::string> input_names = {...};
std::vector<std::string> output_names = {...}; std::vector<std::string> output_names = {...};
......
...@@ -87,9 +87,6 @@ MaceStatus SerialNet::Run(RunMetadata *run_metadata) { ...@@ -87,9 +87,6 @@ MaceStatus SerialNet::Run(RunMetadata *run_metadata) {
} else { } else {
future.wait_fn(nullptr); future.wait_fn(nullptr);
} }
#ifdef MACE_ENABLE_OPENCL
device_->opencl_runtime()->command_queue().finish();
#endif
} else if (run_metadata != nullptr) { } else if (run_metadata != nullptr) {
call_stats.start_micros = NowMicros(); call_stats.start_micros = NowMicros();
MACE_RETURN_IF_ERROR(op->Run(nullptr)); MACE_RETURN_IF_ERROR(op->Run(nullptr));
......
...@@ -441,6 +441,7 @@ OpenCLRuntime::OpenCLRuntime( ...@@ -441,6 +441,7 @@ OpenCLRuntime::OpenCLRuntime(
} }
OpenCLRuntime::~OpenCLRuntime() { OpenCLRuntime::~OpenCLRuntime() {
command_queue_->finish();
built_program_map_.clear(); built_program_map_.clear();
// We need to control the destruction order, which has dependencies // We need to control the destruction order, which has dependencies
command_queue_.reset(); command_queue_.reset();
......
...@@ -120,8 +120,8 @@ DEFINE_string(device, "GPU", "CPU/GPU/HEXAGON"); ...@@ -120,8 +120,8 @@ DEFINE_string(device, "GPU", "CPU/GPU/HEXAGON");
DEFINE_int32(round, 1, "round"); DEFINE_int32(round, 1, "round");
DEFINE_int32(restart_round, 1, "restart round"); DEFINE_int32(restart_round, 1, "restart round");
DEFINE_int32(malloc_check_cycle, -1, "malloc debug check cycle, -1 to disable"); DEFINE_int32(malloc_check_cycle, -1, "malloc debug check cycle, -1 to disable");
DEFINE_int32(gpu_perf_hint, 3, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH"); DEFINE_int32(gpu_perf_hint, 2, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(gpu_priority_hint, 3, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH"); DEFINE_int32(gpu_priority_hint, 1, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(omp_num_threads, -1, "num of openmp threads"); DEFINE_int32(omp_num_threads, -1, "num of openmp threads");
DEFINE_int32(cpu_affinity_policy, 1, DEFINE_int32(cpu_affinity_policy, 1,
"0:AFFINITY_NONE/1:AFFINITY_BIG_ONLY/2:AFFINITY_LITTLE_ONLY"); "0:AFFINITY_NONE/1:AFFINITY_BIG_ONLY/2:AFFINITY_LITTLE_ONLY");
......
...@@ -131,10 +131,6 @@ class OpsTestNet { ...@@ -131,10 +131,6 @@ class OpsTestNet {
op_registry_(new OperatorRegistry()) { op_registry_(new OperatorRegistry()) {
} }
~OpsTestNet() {
Sync();
}
template <DeviceType D, typename T> template <DeviceType D, typename T>
void AddInputFromArray(const std::string &name, void AddInputFromArray(const std::string &name,
const std::vector<index_t> &shape, const std::vector<index_t> &shape,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册