提交 3bd8ef35 编写于 作者: M Megvii Engine Team

feat(mgb/compnode): add atlas compnode

GitOrigin-RevId: 19f3c330039c3d0accd9787446c391495f425b6e
上级 aa147b74
...@@ -143,6 +143,15 @@ if(CXX_SUPPORT_GOLD AND NOT ANDROID AND NOT APPLE AND NOT MSVC AND NOT WIN32) ...@@ -143,6 +143,15 @@ if(CXX_SUPPORT_GOLD AND NOT ANDROID AND NOT APPLE AND NOT MSVC AND NOT WIN32)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fuse-ld=gold") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fuse-ld=gold")
endif() endif()
option(MGE_WITH_JIT "Build MegEngine with JIT." ON)
option(MGE_WITH_HALIDE "Build MegEngine with Halide JIT" ON)
option(MGE_DISABLE_FLOAT16 "Disable MegEngine float16 support." OFF)
option(MGE_WITH_CUDA "Enable MegEngine CUDA support." ON)
option(MGE_CUDA_USE_STATIC "Enable MegEngine CUDA static linking." ON)
option(MGE_WITH_TRT "Build MegEngine with TensorRT." ON)
option(MGE_USE_SYSTEM_LIB "Build MegEngine with system libraries." OFF)
option(MGB_WITH_FLATBUFFERS "Build MegBrain with FlatBuffers serialization support." ON)
if(NOT MGE_WITH_JIT) if(NOT MGE_WITH_JIT)
if(MGE_WITH_HALIDE) if(MGE_WITH_HALIDE)
message(WARNING "MGE_WITH_HALIDE is set to OFF with MGE_WITH_JIT disabled") message(WARNING "MGE_WITH_HALIDE is set to OFF with MGE_WITH_JIT disabled")
......
...@@ -84,6 +84,7 @@ megcoreStatus_t megcoreGetDeviceFlags( ...@@ -84,6 +84,7 @@ megcoreStatus_t megcoreGetDeviceFlags(
unsigned int *flags); unsigned int *flags);
megcoreStatus_t megcoreActivate(megcoreDeviceHandle_t handle); megcoreStatus_t megcoreActivate(megcoreDeviceHandle_t handle);
megcoreStatus_t megcoreDeactivate(megcoreDeviceHandle_t handle);
megcoreStatus_t megcoreMalloc(megcoreDeviceHandle_t handle, megcoreStatus_t megcoreMalloc(megcoreDeviceHandle_t handle,
void **devPtr, size_t sizeInBytes); void **devPtr, size_t sizeInBytes);
megcoreStatus_t megcoreFree(megcoreDeviceHandle_t handle, megcoreStatus_t megcoreFree(megcoreDeviceHandle_t handle,
......
...@@ -86,6 +86,7 @@ if (BUILD_SHARED_LIBS) ...@@ -86,6 +86,7 @@ if (BUILD_SHARED_LIBS)
else() else()
target_link_libraries(megdnn PRIVATE ${MGE_BLAS_LIBS}) target_link_libraries(megdnn PRIVATE ${MGE_BLAS_LIBS})
endif() endif()
if(CMAKE_THREAD_LIBS_INIT) if(CMAKE_THREAD_LIBS_INIT)
target_link_libraries(megdnn PRIVATE Threads::Threads) target_link_libraries(megdnn PRIVATE Threads::Threads)
endif() endif()
......
...@@ -38,6 +38,7 @@ class DeviceContext { ...@@ -38,6 +38,7 @@ class DeviceContext {
virtual size_t mem_alignment_in_bytes() const noexcept = 0; virtual size_t mem_alignment_in_bytes() const noexcept = 0;
virtual void activate() = 0; virtual void activate() = 0;
virtual void deactivate() {}
virtual void *malloc(size_t size_in_bytes) = 0; virtual void *malloc(size_t size_in_bytes) = 0;
virtual void free(void *ptr) = 0; virtual void free(void *ptr) = 0;
......
...@@ -74,6 +74,13 @@ megcoreStatus_t megcoreActivate(megcoreDeviceHandle_t handle) ...@@ -74,6 +74,13 @@ megcoreStatus_t megcoreActivate(megcoreDeviceHandle_t handle)
return megcoreSuccess; return megcoreSuccess;
} }
megcoreStatus_t megcoreDeactivate(megcoreDeviceHandle_t handle)
{
megdnn_assert(handle);
handle->content->deactivate();
return megcoreSuccess;
}
megcoreStatus_t megcoreMalloc(megcoreDeviceHandle_t handle, megcoreStatus_t megcoreMalloc(megcoreDeviceHandle_t handle,
void **devPtr, size_t sizeInBytes) void **devPtr, size_t sizeInBytes)
{ {
......
...@@ -27,7 +27,6 @@ endif() ...@@ -27,7 +27,6 @@ endif()
add_executable(megdnn_test ${SOURCES}) add_executable(megdnn_test ${SOURCES})
target_link_libraries(megdnn_test gtest) target_link_libraries(megdnn_test gtest)
target_link_libraries(megdnn_test megdnn ${MGE_BLAS_LIBS}) target_link_libraries(megdnn_test megdnn ${MGE_BLAS_LIBS})
......
...@@ -246,6 +246,7 @@ SymbolVarArray _Opr::tensor_rt_runtime(const SymbolVarArray& inputs, ...@@ -246,6 +246,7 @@ SymbolVarArray _Opr::tensor_rt_runtime(const SymbolVarArray& inputs,
} }
#endif #endif
SymbolVar _Opr::timestamp(SymbolVar input, PyObject* dest, size_t dest_off, SymbolVar _Opr::timestamp(SymbolVar input, PyObject* dest, size_t dest_off,
const OperatorNodeConfig& config) { const OperatorNodeConfig& config) {
auto tensor = std::make_shared<HostTensorND>( auto tensor = std::make_shared<HostTensorND>(
......
...@@ -118,6 +118,8 @@ static SymbolVarArray tensor_rt_runtime(const SymbolVarArray& inputs, ...@@ -118,6 +118,8 @@ static SymbolVarArray tensor_rt_runtime(const SymbolVarArray& inputs,
PyObject* data_bytes, PyObject* data_bytes,
const OperatorNodeConfig& config); const OperatorNodeConfig& config);
static SymbolVar timestamp(SymbolVar input, PyObject* dest, size_t dest_off, static SymbolVar timestamp(SymbolVar input, PyObject* dest, size_t dest_off,
const OperatorNodeConfig& config); const OperatorNodeConfig& config);
......
...@@ -18,7 +18,6 @@ ...@@ -18,7 +18,6 @@
#if MGB_ENABLE_OPR_MM #if MGB_ENABLE_OPR_MM
#include "megbrain/opr/collective_comm.h" #include "megbrain/opr/collective_comm.h"
#endif #endif
using AxisIndexer = mgb::opr::indexing::AxisIndexer; using AxisIndexer = mgb::opr::indexing::AxisIndexer;
/*! /*!
......
...@@ -88,7 +88,7 @@ if (MGB_WITH_FLATBUFFERS) ...@@ -88,7 +88,7 @@ if (MGB_WITH_FLATBUFFERS)
${CMAKE_CURRENT_BINARY_DIR}/serialization/impl/opr_param_defs.fbs ${CMAKE_CURRENT_BINARY_DIR}/serialization/impl/opr_param_defs.fbs
COMMAND COMMAND
${PYTHON_EXECUTABLE} ${GEN_FLATBUFFERS_SCHEMA_PY} ${OPR_PARAM_DEFS_PY} ${CMAKE_CURRENT_BINARY_DIR}/serialization/impl/opr_param_defs.fbs ${PYTHON_EXECUTABLE} ${GEN_FLATBUFFERS_SCHEMA_PY} ${OPR_PARAM_DEFS_PY} ${CMAKE_CURRENT_BINARY_DIR}/serialization/impl/opr_param_defs.fbs
DEPENDS ${GEN_FLATBUFFERS_SCHEMA_PY} ${OPR_PARAM_DEFS_PY} DEPENDS ${GEN_FLATBUFFERS_SCHEMA_PY} ${OPR_PARAM_DEFS_PY}
VERBATIM VERBATIM
) )
add_custom_command( add_custom_command(
...@@ -124,7 +124,6 @@ if (MGB_WITH_FLATBUFFERS) ...@@ -124,7 +124,6 @@ if (MGB_WITH_FLATBUFFERS)
target_include_directories(megbrain PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/serialization/include) target_include_directories(megbrain PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/serialization/include)
target_compile_definitions(megbrain PUBLIC MGB_ENABLE_FBS_SERIALIZATION=1) target_compile_definitions(megbrain PUBLIC MGB_ENABLE_FBS_SERIALIZATION=1)
target_link_libraries(megbrain PUBLIC flatbuffers) target_link_libraries(megbrain PUBLIC flatbuffers)
set (GENERATED_FLATBUFFERS_CONVERTER_PATH ${CMAKE_CURRENT_BINARY_DIR}/genfiles) set (GENERATED_FLATBUFFERS_CONVERTER_PATH ${CMAKE_CURRENT_BINARY_DIR}/genfiles)
set (GEN_FLATBUFFERS_CONVERTER_PY ${PROJECT_SOURCE_DIR}/dnn/scripts/gen_flatbuffers_converter.py) set (GEN_FLATBUFFERS_CONVERTER_PY ${PROJECT_SOURCE_DIR}/dnn/scripts/gen_flatbuffers_converter.py)
file (MAKE_DIRECTORY ${GENERATED_FLATBUFFERS_CONVERTER_PATH}) file (MAKE_DIRECTORY ${GENERATED_FLATBUFFERS_CONVERTER_PATH})
......
...@@ -96,7 +96,7 @@ megcore::AsyncErrorInfo* MegDNNHandle::make_async_error_info( ...@@ -96,7 +96,7 @@ megcore::AsyncErrorInfo* MegDNNHandle::make_async_error_info(
cn.free_device(ptr); cn.free_device(ptr);
} }
}; };
megcore::AsyncErrorInfo zero_info{0, nullptr, "", {0,0,0,0}}; megcore::AsyncErrorInfo zero_info{0, nullptr, "", {0, 0, 0, 0}};
auto ptr = static_cast<megcore::AsyncErrorInfo*>( auto ptr = static_cast<megcore::AsyncErrorInfo*>(
env.comp_node().alloc_device(sizeof(zero_info))); env.comp_node().alloc_device(sizeof(zero_info)));
cn.copy_to_device(ptr, &zero_info, sizeof(zero_info)); cn.copy_to_device(ptr, &zero_info, sizeof(zero_info));
...@@ -106,7 +106,7 @@ megcore::AsyncErrorInfo* MegDNNHandle::make_async_error_info( ...@@ -106,7 +106,7 @@ megcore::AsyncErrorInfo* MegDNNHandle::make_async_error_info(
} }
#endif #endif
/* =================== misc =================== */ /* =================== misc =================== */
#if MGB_CUDA #if MGB_CUDA
......
...@@ -123,9 +123,9 @@ StaticDeviceMemoryManager::make_default_impl() { ...@@ -123,9 +123,9 @@ StaticDeviceMemoryManager::make_default_impl() {
} }
#endif // MGB_THREAD_SAFE #endif // MGB_THREAD_SAFE
/* ==================== CUDAAsyncVarReleaser ==================== */ /* ==================== AsyncVarReleaser ==================== */
#if MGB_CUDA #if MGB_CUDA
class VarNodeMemManager::CUDAAsyncVarReleaser { class VarNodeMemManager::AsyncVarReleaser {
struct WaiterParam { struct WaiterParam {
CompNode cn; CompNode cn;
CompNode::Event *event; CompNode::Event *event;
...@@ -133,10 +133,10 @@ class VarNodeMemManager::CUDAAsyncVarReleaser { ...@@ -133,10 +133,10 @@ class VarNodeMemManager::CUDAAsyncVarReleaser {
}; };
class Waiter final: public AsyncQueueSC<WaiterParam, Waiter> { class Waiter final: public AsyncQueueSC<WaiterParam, Waiter> {
CUDAAsyncVarReleaser *m_par_releaser; AsyncVarReleaser *m_par_releaser;
public: public:
Waiter(CUDAAsyncVarReleaser *releaser): Waiter(AsyncVarReleaser *releaser):
m_par_releaser(releaser) m_par_releaser(releaser)
{ {
} }
...@@ -159,7 +159,7 @@ class VarNodeMemManager::CUDAAsyncVarReleaser { ...@@ -159,7 +159,7 @@ class VarNodeMemManager::CUDAAsyncVarReleaser {
Spinlock m_event_pool_lock; Spinlock m_event_pool_lock;
public: public:
~CUDAAsyncVarReleaser() { ~AsyncVarReleaser() {
wait_release_finish(); wait_release_finish();
} }
...@@ -247,15 +247,16 @@ bool VarNodeMemManager::ImpureMemPlanManager::check_need_realloc() { ...@@ -247,15 +247,16 @@ bool VarNodeMemManager::ImpureMemPlanManager::check_need_realloc() {
VarNodeMemManager::VarNodeMemManager(ComputingGraphImpl *graph): VarNodeMemManager::VarNodeMemManager(ComputingGraphImpl *graph):
m_owner_graph(graph), m_owner_graph(graph),
m_seq_mem_opt(graph) m_seq_mem_opt(graph)
#if MGB_CUDA #if MGB_CUDA
,m_cuda_asyn_var_releaser(new CUDAAsyncVarReleaser) ,m_asyn_var_releaser(new AsyncVarReleaser)
#endif #endif
{ {
auto on_comp_seq_finish = [this](const event::CompSeqExecFinished& ev) { auto on_comp_seq_finish = [this](const event::CompSeqExecFinished& ev) {
MGB_MARK_USED_VAR(ev);
// async release is only used for sync between multiple comp nodes, and // async release is only used for sync between multiple comp nodes, and
// does not wait for device to finish // does not wait for device to finish
#if MGB_CUDA #if MGB_CUDA
m_cuda_asyn_var_releaser->wait_release_finish(); m_asyn_var_releaser->wait_release_finish();
#endif #endif
m_cpu_async_release_barrier.wait_zero(); m_cpu_async_release_barrier.wait_zero();
}; };
...@@ -295,9 +296,10 @@ VarNodeMemManager::VarNodeMemManager(ComputingGraphImpl *graph): ...@@ -295,9 +296,10 @@ VarNodeMemManager::VarNodeMemManager(ComputingGraphImpl *graph):
graph->event().register_receiver_permanent<event::CompSeqExecError>( graph->event().register_receiver_permanent<event::CompSeqExecError>(
on_comp_seq_error); on_comp_seq_error);
#if MGB_ENABLE_VAR_DEV_MEM_DEFRAGMENTER #if MGB_ENABLE_VAR_DEV_MEM_DEFRAGMENTER && (MGB_CUDA \
)
auto on_mem_defrag_start = [this](const event::BeforeMemDefrag&) { auto on_mem_defrag_start = [this](const event::BeforeMemDefrag&) {
m_cuda_asyn_var_releaser->wait_release_finish(); m_asyn_var_releaser->wait_release_finish();
}; };
graph->event().register_receiver_permanent<event::BeforeMemDefrag>( graph->event().register_receiver_permanent<event::BeforeMemDefrag>(
on_mem_defrag_start); on_mem_defrag_start);
...@@ -1341,7 +1343,7 @@ void VarNodeMemManager::decr_var_mem_refcnt( ...@@ -1341,7 +1343,7 @@ void VarNodeMemManager::decr_var_mem_refcnt(
} }
#if MGB_CUDA #if MGB_CUDA
case DT::CUDA: case DT::CUDA:
m_cuda_asyn_var_releaser->add(dispatch_cn, var); m_asyn_var_releaser->add(dispatch_cn, var);
break; break;
#endif #endif
default: default:
......
...@@ -431,10 +431,10 @@ class VarNodeMemManager { ...@@ -431,10 +431,10 @@ class VarNodeMemManager {
SyncableCounter m_cpu_async_release_barrier; SyncableCounter m_cpu_async_release_barrier;
#if MGB_CUDA #if MGB_CUDA
//! release dynamic var on after cuda event finishes //! release dynamic var on after compnode event finishes
class CUDAAsyncVarReleaser; class AsyncVarReleaser;
std::unique_ptr<CUDAAsyncVarReleaser> m_cuda_asyn_var_releaser; std::unique_ptr<AsyncVarReleaser> m_asyn_var_releaser;
#endif #endif
VarDevMemDefragmenter m_var_dev_mem_defragmenter{this}; VarDevMemDefragmenter m_var_dev_mem_defragmenter{this};
......
...@@ -41,9 +41,9 @@ ...@@ -41,9 +41,9 @@
} \ } \
} while (0) } while (0)
#endif // MGB_ENABLE_LOGGING #endif //MGB_ENABLE_LOGGING
#endif //MGB_CUDA
#endif
//! whether to enable asynchronous initialization for CompNode and CompNodeEnv //! whether to enable asynchronous initialization for CompNode and CompNodeEnv
#define MGB_ENABLE_COMP_NODE_ASYNC_INIT (MGB_CUDA) #define MGB_ENABLE_COMP_NODE_ASYNC_INIT (MGB_CUDA)
......
...@@ -136,7 +136,6 @@ public: ...@@ -136,7 +136,6 @@ public:
* error message * error message
*/ */
static std::string get_cuda_extra_info(); static std::string get_cuda_extra_info();
CudaError(const std::string& msg); CudaError(const std::string& msg);
}; };
......
...@@ -59,9 +59,6 @@ TEST(TestCompNode, Parse) { ...@@ -59,9 +59,6 @@ TEST(TestCompNode, Parse) {
ASSERT_THROW(L::parse("cpu0:"), MegBrainError); ASSERT_THROW(L::parse("cpu0:"), MegBrainError);
ASSERT_THROW(L::parse("cpu0:x"), MegBrainError); ASSERT_THROW(L::parse("cpu0:x"), MegBrainError);
ASSERT_THROW(L::parse("cpu2:23x"), MegBrainError); ASSERT_THROW(L::parse("cpu2:23x"), MegBrainError);
ASSERT_THROW(L::parse("heaxgon0"), MegBrainError);
ASSERT_THROW(L::parse("rcom0"), MegBrainError);
ASSERT_THROW(L::parse("cmabricon0"), MegBrainError);
ASSERT_THROW(L::parse("multithread"), MegBrainError); ASSERT_THROW(L::parse("multithread"), MegBrainError);
ASSERT_THROW(L::parse("multithread1:"), MegBrainError); ASSERT_THROW(L::parse("multithread1:"), MegBrainError);
ASSERT_THROW(L::parse("multithread1:default"), MegBrainError); ASSERT_THROW(L::parse("multithread1:default"), MegBrainError);
......
...@@ -53,6 +53,7 @@ ...@@ -53,6 +53,7 @@
#cmakedefine01 MEGDNN_THREADS_512 #cmakedefine01 MEGDNN_THREADS_512
#cmakedefine01 MEGDNN_ENABLE_MULTI_THREADS #cmakedefine01 MEGDNN_ENABLE_MULTI_THREADS
// whether cuda is available // whether cuda is available
#ifndef MGB_CUDA #ifndef MGB_CUDA
#define MGB_CUDA 1 #define MGB_CUDA 1
......
...@@ -15,6 +15,7 @@ if (MGE_WITH_CUDA AND MGE_WITH_TRT) ...@@ -15,6 +15,7 @@ if (MGE_WITH_CUDA AND MGE_WITH_TRT)
list(APPEND SOURCES ${SOURCES_}) list(APPEND SOURCES ${SOURCES_})
endif() endif()
add_executable(megbrain_test ${SOURCES}) add_executable(megbrain_test ${SOURCES})
target_link_libraries(megbrain_test gtest) target_link_libraries(megbrain_test gtest)
target_link_libraries(megbrain_test megengine) target_link_libraries(megbrain_test megengine)
......
...@@ -98,22 +98,48 @@ dtype, RandomDistribution::UNIFORM>::operator ()( ...@@ -98,22 +98,48 @@ dtype, RandomDistribution::UNIFORM>::operator ()(
return ret; return ret;
} }
template<typename dtype>
std::shared_ptr<HostTensorND> HostTensorGenerator<
dtype, RandomDistribution::CONSTANT>::operator ()(
const TensorShape &shape, CompNode cn) {
if (!cn.valid())
cn = CompNode::load("xpu0");
std::shared_ptr<HostTensorND> ret =
std::make_shared<HostTensorND>(cn, shape, dtype());
auto ptr = ret->ptr<ctype>();
for (size_t i = 0, it = shape.total_nr_elems(); i < it; ++ i) {
ptr[i] = m_default_val;
}
return ret;
}
// explicit instantialization of HostTensorGenerator // explicit instantialization of HostTensorGenerator
namespace mgb { namespace mgb {
template class HostTensorGenerator< template class HostTensorGenerator<
dtype::Float32, RandomDistribution::GAUSSIAN>; dtype::Float32, RandomDistribution::GAUSSIAN>;
template class HostTensorGenerator< template class HostTensorGenerator<
dtype::Float32, RandomDistribution::UNIFORM>; dtype::Float32, RandomDistribution::UNIFORM>;
template class HostTensorGenerator<
dtype::Float32, RandomDistribution::CONSTANT>;
template class HostTensorGenerator< template class HostTensorGenerator<
dtype::Float16, RandomDistribution::GAUSSIAN>; dtype::Float16, RandomDistribution::GAUSSIAN>;
template class HostTensorGenerator< template class HostTensorGenerator<
dtype::Int8, RandomDistribution::UNIFORM>; dtype::Int8, RandomDistribution::UNIFORM>;
template class HostTensorGenerator<
dtype::Int8, RandomDistribution::CONSTANT>;
template class HostTensorGenerator< template class HostTensorGenerator<
dtype::Uint8, RandomDistribution::UNIFORM>; dtype::Uint8, RandomDistribution::UNIFORM>;
template class HostTensorGenerator<
dtype::Uint8, RandomDistribution::CONSTANT>;
template class HostTensorGenerator< template class HostTensorGenerator<
dtype::Int16, RandomDistribution::UNIFORM>; dtype::Int16, RandomDistribution::UNIFORM>;
template class HostTensorGenerator<
dtype::Int16, RandomDistribution::CONSTANT>;
template class HostTensorGenerator< template class HostTensorGenerator<
dtype::Int32, RandomDistribution::UNIFORM>; dtype::Int32, RandomDistribution::UNIFORM>;
template class HostTensorGenerator<
dtype::Int32, RandomDistribution::CONSTANT>;
std::shared_ptr<HostTensorND> std::shared_ptr<HostTensorND>
HostTensorGenerator<dtype::QuantizedS8, RandomDistribution::UNIFORM>:: HostTensorGenerator<dtype::QuantizedS8, RandomDistribution::UNIFORM>::
operator()(const TensorShape& shape, CompNode cn) { operator()(const TensorShape& shape, CompNode cn) {
......
...@@ -175,7 +175,7 @@ class RNGxorshf { ...@@ -175,7 +175,7 @@ class RNGxorshf {
}; };
enum class RandomDistribution { enum class RandomDistribution {
GAUSSIAN, UNIFORM GAUSSIAN, UNIFORM, CONSTANT
}; };
template<class dtype> template<class dtype>
...@@ -322,6 +322,26 @@ class HostTensorGenerator<dtype, RandomDistribution::UNIFORM> final: ...@@ -322,6 +322,26 @@ class HostTensorGenerator<dtype, RandomDistribution::UNIFORM> final:
ctype m_lo, m_hi; ctype m_lo, m_hi;
}; };
//! const value
template<class dtype>
class HostTensorGenerator<dtype, RandomDistribution::CONSTANT> final:
public HostTensorGeneratorBase {
public:
using ctype = typename DTypeTrait<dtype>::ctype;
HostTensorGenerator(ctype default_val)
: HostTensorGeneratorBase{next_rand_seed()},
m_default_val{default_val} {}
std::shared_ptr<HostTensorND> operator ()(
const TensorShape &shape, CompNode cn = {}) override;
using HostTensorGeneratorBase::operator();
private:
ctype m_default_val;
};
template <> template <>
class HostTensorGenerator<dtype::QuantizedS8, RandomDistribution::UNIFORM> final class HostTensorGenerator<dtype::QuantizedS8, RandomDistribution::UNIFORM> final
: public HostTensorGeneratorBase { : public HostTensorGeneratorBase {
......
...@@ -21,8 +21,8 @@ pdef('PersistentOutputStorage').add_fields( ...@@ -21,8 +21,8 @@ pdef('PersistentOutputStorage').add_fields(
(pdef('ExecutionPolicy', 'specify how to select an algorithm for an operator'). (pdef('ExecutionPolicy', 'specify how to select an algorithm for an operator').
add_enum('Strategy', add_enum('Strategy',
Doc('HEURISTIC', 'use heuristic to choose the fastest algorithm'), Doc('HEURISTIC', 'use heuristic to choose the fastest algorithm'),
Doc('HEURISTIC_REPRODUCIBLE', 'use heuristic to choose the fastest algorithm, ' Doc('HEURISTIC_REPRODUCIBLE', 'use heuristic to choose the fastest algorithm, '
'and the chosen algorithm is reproducible'), 'and the chosen algorithm is reproducible'),
Doc('PROFILE', Doc('PROFILE',
'run possible algorithms on real device to find the best'), 'run possible algorithms on real device to find the best'),
Doc('PROFILE_REPRODUCIBLE', Doc('PROFILE_REPRODUCIBLE',
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册