diff --git a/src/core/test/comp_node.cpp b/src/core/test/comp_node.cpp index 1a8e889f1b3947919ea576914029104695081129..b4420804f661299f0502cd36c68b571c30e04d79 100644 --- a/src/core/test/comp_node.cpp +++ b/src/core/test/comp_node.cpp @@ -40,6 +40,10 @@ TEST(TestCompNode, Parse) { ASSERT_EQ(L::parse("cpu2:23"), make_lc(D::CPU, 2, 23)); ASSERT_EQ(L::parse("cpu21:23"), make_lc(D::CPU, 21, 23)); + + ASSERT_EQ(L::parse("rocmx"), make_lc(D::ROCM, -1, 0)); + ASSERT_EQ(L::parse("rocm2"), make_lc(D::ROCM, 2, 0)); + ASSERT_EQ(L::parse("rocm2:3"), make_lc(D::ROCM, 2, 3)); ASSERT_EQ(L::parse("cambriconx"), make_lc(D::CAMBRICON, -1, 0)); ASSERT_EQ(L::parse("cambricon2"), make_lc(D::CAMBRICON, 2, 0)); ASSERT_EQ(L::parse("cambricon2:3"), make_lc(D::CAMBRICON, 2, 3)); @@ -66,6 +70,7 @@ TEST(TestCompNode, Parse) { ASSERT_THROW(L::parse("cpu0:"), MegBrainError); ASSERT_THROW(L::parse("cpu0:x"), MegBrainError); ASSERT_THROW(L::parse("cpu2:23x"), MegBrainError); + ASSERT_THROW(L::parse("rcom0"), MegBrainError); ASSERT_THROW(L::parse("cmabricon0"), MegBrainError); ASSERT_THROW(L::parse("atlast0"), MegBrainError); ASSERT_THROW(L::parse("multithread"), MegBrainError); @@ -296,6 +301,18 @@ TEST(TestCompNodeCuda, set_prealloc_config) { 4, CompNode::DeviceType::CUDA); } +#if MGB_ROCM +TEST(TestCompNodeROCm, MemNode) { + REQUIRE_AMD_GPU(2); + + auto cn00 = CompNode::load("rocm0"), + cn1 = CompNode::load("rocm1"), + cn01 = CompNode::load("rocm0:1"); + ASSERT_EQ(cn00, CompNode::load("rocm0")); + ASSERT_EQ(cn00.mem_node(), cn01.mem_node()); + ASSERT_NE(cn00.mem_node(), cn1.mem_node()); +} +#endif #if MGB_CAMBRICON TEST(TestCompNodeCambricon, MemNode) { @@ -466,6 +483,10 @@ TEST(TestCompNodeCPU, PeerCopyFromCUDA) { test_peer_copy_from_device("gpux"); } +TEST(TestCompNodeCPU, PeerCopyFromROCm) { + REQUIRE_AMD_GPU(1); + test_peer_copy_from_device("rocmx"); +} #if MGB_CAMBRICON TEST(TestCompNodeCPU, PeerCopyFromCambricon) { diff --git a/src/opr/impl/search_policy/profiler.cpp b/src/opr/impl/search_policy/profiler.cpp index 2abbc8a94e02d39bc21f69866d66f0f0f8fb8f06..d604885198b6ea3c855f1b0031dbfc2add1fa889 100644 --- a/src/opr/impl/search_policy/profiler.cpp +++ b/src/opr/impl/search_policy/profiler.cpp @@ -14,6 +14,10 @@ #include "../internal/invoke.h" +#if MGB_ROCM +#include "hcc_detail/hcc_defs_prologue.h" +#include "megcore_rocm.h" +#endif //! TODO: here has to be know some megdnn::opr when there is produced midout.h //! fix it if there is another graceful way. @@ -58,6 +62,11 @@ template typename TimedProfiler::TResult TimedProfiler::prof_impl( const TParam& raw_param) { MIDOUT_B(Opr, midout_iv(MGB_HASH_STR("TimedProfiler::prof_impl"))) +#if MGB_ROCM + bool miopen_algo_search_enabled; + megcore::getMIOpenAlgoSearchStatus(&miopen_algo_search_enabled); + mgb_assert(miopen_algo_search_enabled, "MIOpen algo search not enabled"); +#endif auto&& param = raw_param.as_single_pod(); CompNode cn = CompNode::load(param.comp_node_loc, param.comp_node_loc); auto megdnn_opr = intl::create_megdnn_opr(cn); @@ -234,6 +243,9 @@ Maybe::Result> TimedProfiler::profile( template void TimedProfiler::prof_init_device(const TParam& raw_param) { MIDOUT_B(Opr, midout_iv(MGB_HASH_STR("TimedProfiler::prof_init_device"))) +#if MGB_ROCM + megcore::enableMIOpenAlgoSearch(true); +#endif auto&& param = raw_param.as_single_pod(); CompNode cn = CompNode::load(param.comp_node_loc, param.comp_node_loc); // wait for cuda init, so its time does not get accounted in timeout diff --git a/test/src/helper.cpp b/test/src/helper.cpp index 113a5d035469ead11607c2b199c3c00434b1a845..e3ef4b0233ef5a9932a21236f20d78d39ab63180 100644 --- a/test/src/helper.cpp +++ b/test/src/helper.cpp @@ -327,6 +327,13 @@ bool mgb::check_gpu_available(size_t num) { return true; } +bool mgb::check_amd_gpu_available(size_t num) { + if (CompNode::get_device_count(CompNode::DeviceType::ROCM) < num) { + mgb_log_warn("skip test case that requires %zu AMD GPU(s)", num); + return false; + } + return true; +} bool mgb::check_cambricon_device_available(size_t num) { if (CompNode::get_device_count(CompNode::DeviceType::CAMBRICON) < num) { diff --git a/test/src/include/megbrain/test/helper.h b/test/src/include/megbrain/test/helper.h index 72630a2db30a743b4004a0dd5278900c053a574d..47de1c752f5c6cc5d2112fdf2543333b365d8a0d 100644 --- a/test/src/include/megbrain/test/helper.h +++ b/test/src/include/megbrain/test/helper.h @@ -460,6 +460,8 @@ std::vector load_multiple_xpus(size_t num); //! check whether given number of GPUs is available bool check_gpu_available(size_t num); +//! check whether given number of AMD GPUs is available +bool check_amd_gpu_available(size_t num); //! check whether given number of cambricon devices is available bool check_cambricon_device_available(size_t num);