diff --git a/mace/ops/ops_test_util.cc b/mace/ops/ops_test_util.cc index 5233ccde1e6a7043f864e558045b823bd63c9507..1cb835f60fd3a16e3c8bd12cfae38f91a8c57e99 100644 --- a/mace/ops/ops_test_util.cc +++ b/mace/ops/ops_test_util.cc @@ -100,6 +100,14 @@ void OpDefBuilder::Finalize(OperatorDef *op_def) const { *op_def = op_def_; } +namespace { +std::string GetStoragePathFromEnv() { + char *storage_path_str = getenv("MACE_INTERNAL_STORAGE_PATH"); + if (storage_path_str == nullptr) return ""; + return storage_path_str; +} +} // namespace + OpTestContext *OpTestContext::Get(int num_threads, CPUAffinityPolicy cpu_affinity_policy, bool use_gemmlowp) { @@ -112,7 +120,7 @@ OpTestContext *OpTestContext::Get(int num_threads, OpTestContext::OpTestContext(int num_threads, CPUAffinityPolicy cpu_affinity_policy, bool use_gemmlowp) - : gpu_context_(new GPUContext()), + : gpu_context_(new GPUContext(GetStoragePathFromEnv())), opencl_mem_types_({MemoryType::GPU_IMAGE}) { device_map_[DeviceType::CPU] = std::unique_ptr( new CPUDevice(num_threads, diff --git a/mace/public/mace.h b/mace/public/mace.h index 01818ef5719b48298bd501967bb91cb99521336f..90b1c16fe6260ce46d67e049e609534d33ef45de 100644 --- a/mace/public/mace.h +++ b/mace/public/mace.h @@ -146,6 +146,9 @@ class MaceStatus { /// /// There are some data in common between different MaceEngines using GPU, /// use one GPUContext could avoid duplication. +/// +/// Thread-safe. +/// You could use one GPUContext for multiple parallel MaceEngines. class GPUContext; /// \brief GPUContext builder. diff --git a/mace/test/mace_api_mt_test.cc b/mace/test/mace_api_mt_test.cc index 3a55a4dce485869d998e7350a94a8a629c2c13a0..65264cf02121f8834db763d66f1229c491196795 100644 --- a/mace/test/mace_api_mt_test.cc +++ b/mace/test/mace_api_mt_test.cc @@ -39,7 +39,6 @@ void MaceRunFunc(const int in_out_size) { std::shared_ptr net_def(new NetDef()); - std::vector data; ops::test::GenerateRandomRealTypeData(filter_shape, &data); AddTensor( @@ -63,6 +62,7 @@ void MaceRunFunc(const int in_out_size) { } MaceEngineConfig config(DeviceType::GPU); + config.SetGPUContext(mace::ops::test::OpTestContext::Get()->gpu_context()); MaceEngine engine(config); MaceStatus status = engine.Init(net_def.get(), input_names, output_names, diff --git a/tools/device.py b/tools/device.py index fccbf4428bf1c0376e922fb65a3ac93640b8ed89..90683eb87afed561a96e35c1402ec58536d46478 100644 --- a/tools/device.py +++ b/tools/device.py @@ -901,6 +901,8 @@ class DeviceWrapper: stdout_buf = [] process_output = sh_commands.make_output_processor(stdout_buf) + internal_storage_dir = self.create_internal_storage_dir() + if simpleperf and self.system == SystemType.android: self.push(sh_commands.find_simpleperf_library(abi), self.data_dir) @@ -909,6 +911,7 @@ class DeviceWrapper: ld_preload, 'MACE_OUT_OF_RANGE_CHECK=%s' % out_of_range_check, 'MACE_OPENCL_PROFILING=%d' % opencl_profiling, + 'MACE_INTERNAL_STORAGE_PATH=%s' % internal_storage_dir, 'MACE_CPP_MIN_VLOG_LEVEL=%d' % vlog_level, simpleperf_cmd, 'stat', @@ -928,6 +931,7 @@ class DeviceWrapper: ld_preload, 'MACE_OUT_OF_RANGE_CHECK=%d' % out_of_range_check, 'MACE_OPENCL_PROFILNG=%d' % opencl_profiling, + 'MACE_INTERNAL_STORAGE_PATH=%s' % internal_storage_dir, 'MACE_CPP_MIN_VLOG_LEVEL=%d' % vlog_level, device_bin_full_path, args