feat(mgb/core): add comp node for cambricon

add testcase for cambricon comp node GitOrigin-RevId: 7794faa47ffbbd67521fcac2838d46a38c4bfe12

feat(mgb/core): add comp node for cambricon
add testcase for cambricon comp node GitOrigin-RevId: 7794faa47ffbbd67521fcac2838d46a38c4bfe12
712b87c8 · Megvii Engine Team · Xinran Xu · 856ef627 · 712b87c8 · 712b87c8
15 changed file
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -261,6 +261,7 @@ if(MGE_WITH_CUDA)
    set(MGE_CUDA_LIBS "${MGE_CUDA_LIBS}")
 endif()

+
 find_program(CCACHE_BIN ccache)
 if(CCACHE_BIN)
    set(CMAKE_CXX_COMPILER_LAUNCHER ${CCACHE_BIN})

--- a/dnn/src/CMakeLists.txt
+++ b/dnn/src/CMakeLists.txt
@@ -56,4 +56,3 @@ target_link_libraries(megdnn ${MGE_BLAS_LIBS})
 if(CMAKE_THREAD_LIBS_INIT)
    target_link_libraries(megdnn Threads::Threads)
 endif()
-
--- a/dnn/src/common/megcore/common/device_context.cpp
+++ b/dnn/src/common/megcore/common/device_context.cpp
@@ -16,7 +16,6 @@
 #include "src/cuda/megcore/cuda_device_context.hpp"
 #endif

-
 using namespace megcore;
 using namespace megdnn;


--- a/dnn/test/CMakeLists.txt
+++ b/dnn/test/CMakeLists.txt
@@ -26,6 +26,7 @@ if(MGE_WITH_CUDA)
 endif()


+
 add_executable(megdnn_test ${SOURCES})
 target_link_libraries(megdnn_test gtest)
 target_link_libraries(megdnn_test megdnn)

--- a/python_module/test/run.sh
+++ b/python_module/test/run.sh
@@ -9,5 +9,6 @@ pushd $(dirname "${BASH_SOURCE[0]}")/.. >/dev/null
        --ignore test/unit/data \
        --ignore test/integration/manual \
        --ignore megengine/module/pytorch \
+        --ignore test/unit/module/test_external.py \
        megengine test
 popd >/dev/null
--- a/python_module/test/unit/module/.gitattributes
+++ b/python_module/test/unit/module/.gitattributes
+*.mlu binary
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -31,6 +31,7 @@ if(MGE_WITH_CUDA AND MGE_WITH_TRT)
    list(APPEND SOURCES ${SOURCES_})
 endif()

+
 set(MGB_DEF ${MGB_DEF} PARENT_SCOPE)
 add_library(megbrain STATIC EXCLUDE_FROM_ALL ${SOURCES})
 target_link_libraries(megbrain mgb_opr_param_defs)

--- a/src/core/impl/comp_node_env.cpp
+++ b/src/core/impl/comp_node_env.cpp
@@ -22,7 +22,6 @@
 #endif
 #endif

-
 using namespace mgb;

 /* =================== MegDNNHandle =================== */

--- a/src/core/include/megbrain/comp_node/alloc.h
+++ b/src/core/include/megbrain/comp_node/alloc.h
@@ -200,7 +200,6 @@ class DevMemAlloc: virtual public MemAllocBase {
 #endif


-
        virtual ~DevMemAlloc() = default;

        /*!

--- a/src/core/include/megbrain/comp_node_env.h
+++ b/src/core/include/megbrain/comp_node_env.h
@@ -41,7 +41,7 @@
        }                                                               \
    } while (0)

-#endif //MGB_ENABLE_LOGGING
+#endif  // MGB_ENABLE_LOGGING

 #endif


--- a/src/core/include/megbrain_build_config.h.in
+++ b/src/core/include/megbrain_build_config.h.in
@@ -97,6 +97,10 @@
 #endif


+#ifndef MGB_CAMBRICON
+#define MGB_CAMBRICON 0
+#endif
+
 // whether to enable TensorRT support
 #ifndef MGB_ENABLE_TENSOR_RT
 #define MGB_ENABLE_TENSOR_RT    MGB_CUDA

--- a/src/core/test/comp_node.cpp
+++ b/src/core/test/comp_node.cpp
@@ -49,7 +49,8 @@ TEST(TestCompNode, Parse) {
    ASSERT_EQ(L::parse("cpu2:23"), make_lc(D::CPU, 2, 23));
    ASSERT_EQ(L::parse("cpu21:23"), make_lc(D::CPU, 21, 23));

-    ASSERT_EQ(L::parse("xpu"), make_lc(D::UNSPEC, -1, 0)); 
+    
+    ASSERT_EQ(L::parse("xpu"), make_lc(D::UNSPEC, -1, 0));
    ASSERT_EQ(L::parse("xpux"), make_lc(D::UNSPEC, -1, 0));
    ASSERT_EQ(L::parse("xpu23"), make_lc(D::UNSPEC, 23, 0));
    ASSERT_EQ(L::parse("xpu23:1"), make_lc(D::UNSPEC, 23, 1));
@@ -70,6 +71,7 @@ TEST(TestCompNode, Parse) {
    ASSERT_THROW(L::parse("cpu2:23x"), MegBrainError);
    ASSERT_THROW(L::parse("heaxgon0"), MegBrainError);
    ASSERT_THROW(L::parse("rcom0"), MegBrainError);
+    ASSERT_THROW(L::parse("cmabricon0"), MegBrainError);
 }

 TEST(TestCompNode, SetDefaultDev) {
@@ -546,6 +548,7 @@ TEST(TestCompNode, MultipleLoad) {
    }
 }

+
 namespace {
 class CompNodeDepedentObjectInst final : public CompNodeDepedentObject {
    int *m_dst, *m_timer;

--- a/src/core/test/mem_alloc.cpp
+++ b/src/core/test/mem_alloc.cpp
@@ -464,6 +464,7 @@ public:
    }
    void raw_dev_free(void* ptr) override { MGB_CUDA_CHECK(cudaFree(ptr)); }
 };
+#endif

 using Callback = std::function<void()>;
 void test_free_mem(CompNode cn0, CompNode cn1, DevicePolicy* policy,
@@ -529,7 +530,7 @@ void test_gather_other(CompNode cn0, CompNode cn1) {
    opr::Sleep::sleep(cn1, 0.7);
    func->execute();
 }
-#endif
+
 }  // namespace

 #if MGB_CUDA
@@ -562,4 +563,5 @@ TEST(TestCudaMemAlloc, FreeMem) {
 }
 #endif  // MGB_CUDA

+
 // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}
--- a/test/src/helper.cpp
+++ b/test/src/helper.cpp
@@ -70,8 +70,8 @@ dtype, RandomDistribution::GAUSSIAN>::operator ()(
    auto ptr = ret->ptr<ctype>();
    auto mean = m_mean, std = m_std;
    for (size_t i = 0, it = shape.total_nr_elems(); i < it; i += 2) {
-        ctype u1 = (m_rng() + 1.0) / (m_rng.max() + 1.0),
-              u2 = (m_rng() + 1.0) / (m_rng.max() + 1.0),
+        ctype u1 = ctype((m_rng() + 1.0) / (m_rng.max() + 1.0)),
+              u2 = ctype((m_rng() + 1.0) / (m_rng.max() + 1.0)),
              r = ctype(std * std::sqrt(-2 * std::log(u1))),
              theta = ctype(2 * M_PI * u2),
              z0 = ctype(r * std::cos(theta) + mean),
@@ -104,6 +104,8 @@ namespace mgb {
        dtype::Float32, RandomDistribution::GAUSSIAN>;
    template class HostTensorGenerator<
        dtype::Float32, RandomDistribution::UNIFORM>;
+    template class HostTensorGenerator<
+        dtype::Float16, RandomDistribution::GAUSSIAN>;
    template class HostTensorGenerator<
        dtype::Int8, RandomDistribution::UNIFORM>;
    template class HostTensorGenerator<

--- a/test/src/include/megbrain/test/helper.h
+++ b/test/src/include/megbrain/test/helper.h
@@ -400,6 +400,9 @@ bool check_gpu_available(size_t num);
 //! check whether given number of AMD GPUs is available
 bool check_amd_gpu_available(size_t num);

+//! check whether given number of cambricon devices is available
+bool check_cambricon_device_available(size_t num);
+
 //! check current capability >= major.minor
 bool check_compute_capability(int major, int minor);

@@ -436,6 +439,7 @@ public:
        return; \
 } while(0)

+
 #if MGB_HAVE_THREAD
 #define REQUIRE_THREAD()
 #else