#include #include "megbrain/graph/extern_copr_api.h" #include "megbrain/opr/io.h" #include "megbrain/opr/utility.h" #include "megbrain/serialization/extern_c_opr_io.h" #include "megbrain/serialization/serializer.h" #include "megbrain/test/helper.h" #include "megbrain/utils/debug.h" using namespace mgb; using namespace serialization; namespace { DType dtype_c2cpp(MGBDType dtype) { switch (dtype) { case MGB_DTYPE_UINT8: return dtype::Uint8{}; case MGB_DTYPE_INT32: return dtype::Int32{}; case MGB_DTYPE_FLOAT32: return dtype::Float32{}; #if !MEGDNN_DISABLE_FLOAT16 case MGB_DTYPE_FLOAT16: return dtype::Float16{}; #endif default: mgb_throw( SerializationError, "bad dtype value: %d", static_cast(dtype)); } } const void* prev_desc_buf_addr; size_t prev_desc_buf_size; //! a custom opr to compute x + bias template class MGBOprDescImpl { struct UserData { float bias; }; static UserData* user_data(const MGBOprDesc* self) { return static_cast(self->user_data); } static void release(MGBOprDesc* self) { delete user_data(self); delete self; --nr_inst; } static size_t hash(const MGBOprDesc* self) { return mgb::hash(user_data(self)->bias); } static int is_same(const MGBOprDesc* self, const MGBOprDesc* rhs) { return user_data(self)->bias == user_data(rhs)->bias; } static void execute( const MGBOprDesc* self, const MGBTensor* input, const MGBTensor* output) { if (self->dynamic_param) { auto device_id = self->dynamic_param->device_id; mgb_assert(0 == device_id || 8 == device_id); } bool use_extern_input = (self->dynamic_param && self->dynamic_param->nr_input > 0) ? true : false; bool use_extern_output = (self->dynamic_param && self->dynamic_param->nr_output > 0) ? true : false; auto&& i = input[0].layout; auto&& o = output[0].layout; mgb_assert( i.shape.ndim == 1 && o.shape.ndim == 1 && i.shape.shape[0] == o.shape.shape[0]); mgb_assert(i.dtype == MGB_DTYPE_FLOAT32 && o.dtype == out_dtype); auto input_p = static_cast(input[0].data); if (use_extern_input) input_p = static_cast(self->dynamic_param->input[0].device_ptr); auto bias = user_data(self)->bias; if (out_dtype == MGB_DTYPE_FLOAT32) { auto output_p = static_cast(output[0].data); if (use_extern_output) output_p = static_cast(self->dynamic_param->output[0].device_ptr); for (size_t x = 0; x < i.shape.shape[0]; ++x) { output_p[x] = input_p[x] + bias; } } else if (DNN_FLOAT16_SELECT(out_dtype == MGB_DTYPE_FLOAT16, false)) { #if !MEGDNN_DISABLE_FLOAT16 auto output_p = static_cast(output[0].data); for (size_t x = 0; x < i.shape.shape[0]; ++x) { output_p[x] = input_p[x] + bias; } #endif } else { mgb_assert(out_dtype == MGB_DTYPE_INT32); auto output_p = static_cast(output[0].data); for (size_t x = 0; x < i.shape.shape[0]; ++x) { output_p[x] = input_p[x] + bias; } } } static void infer_shape( const MGBOprDesc*, const MGBTensorShape* input, MGBTensorShape* output) { output[0] = input[0]; } static void infer_dtype( const struct MGBOprDesc* self, const MGBDType* input, MGBDType* output) { output[0] = out_dtype; } static const char* name() { return out_dtype == MGB_DTYPE_FLOAT32 ? "bias_adder_f23" : (out_dtype == MGB_DTYPE_INT32 ? "bias_adder_int32" : "bias_addr_float16"); } public: static int nr_inst; static MGBOprDesc* make(float bias) { ++nr_inst; auto ud = std::make_unique(); ud->bias = bias; auto desc = std::make_unique(); mgb_init_opr_desc(desc.get(), 1, name()); desc->user_data = ud.release(); #define s(n) desc->n = &MGBOprDescImpl::n; MGB_OPR_DESC_FOREACH_MEM_FN(s); #undef s if (out_dtype != MGB_DTYPE_FLOAT32) { desc->infer_dtype = infer_dtype; } return desc.release(); } }; template int MGBOprDescImpl::nr_inst = 0; template class MGBOprLoaderImpl { static MGBOprDesc* create_desc(size_t nr_input, const void* buf, size_t buf_len) { mgb_assert(buf_len == sizeof(float)); prev_desc_buf_addr = buf; prev_desc_buf_size = buf_len; float fv; memcpy(&fv, buf, buf_len); return MGBOprDescImpl::make(fv); } public: static MGBOprLoader make() { return {name(), &create_desc}; } static const char* name() { return out_dtype == MGB_DTYPE_FLOAT32 ? "bias_adder_dump" : (out_dtype == MGB_DTYPE_INT32 ? "bias_adder_dump_i32" : "bias_adder_dump_f16"); } }; template class MGBOprLoaderReg { public: MGBOprLoaderReg() { auto api = mgb_get_extern_c_opr_api(); auto loader = MGBOprLoaderImpl::make(); auto succ = api->register_loader(&loader); mgb_assert(succ); } }; MGBOprLoaderReg loader_reg_f32; MGBOprLoaderReg loader_reg_i32; #if !MEGDNN_DISABLE_FLOAT16 MGBOprLoaderReg loader_reg_f16; #endif std::vector create_graph_dump( float bias, float extra_scale, float sleep, MGBDType dtype) { HostTensorGenerator<> gen; auto host_x = gen({1}, "cpux"); auto graph = ComputingGraph::make(); auto x = opr::Host2DeviceCopy::make(*graph, host_x); if (sleep) x = opr::Sleep::make(x, sleep); x = opr::ExternCOprRunner::make_placeholder( {x}, {TensorShape{1}}, dtype == MGB_DTYPE_FLOAT32 ? "bias_adder_dump:test" : (dtype == MGB_DTYPE_INT32 ? "bias_adder_dump_i32" : "bias_adder_dump_f16"), &bias, sizeof(bias), {}, {dtype_c2cpp(dtype)}) ->output(0); if (extra_scale) x = x * extra_scale; std::vector ret; auto dumper = GraphDumper::make(OutputFile::make_vector_proxy(&ret)); dumper->dump({x}); return ret; } void check_dump_by_compute( std::unique_ptr input_file, CompNode cn, MGBDType dtype, float bias, float scale) { GraphLoadConfig config; config.comp_node_mapper = [loc = cn.locator()](CompNode::Locator& t) { t = loc; }; auto loader = GraphLoader::make(std::move(input_file)); auto load_ret = loader->load(config); load_ret.graph->options().var_sanity_check_first_run = false; SymbolVar y; unpack_vector(load_ret.output_var_list, y); HostTensorGenerator<> gen; auto host_x = load_ret.tensor_map.begin()->second; *host_x = *gen({23}, cn); HostTensorND y_expect; y_expect.copy_from(*host_x); { auto py = y_expect.ptr(); for (int i = 0; i < 23; ++i) { auto t = py[i] + bias; if (dtype == MGB_DTYPE_INT32) { t = int(t); #if !MEGDNN_DISABLE_FLOAT16 } else if (dtype == MGB_DTYPE_FLOAT16) { t = dt_float16(t); #endif } py[i] = t * scale; } } HostTensorND host_y; auto func = load_ret.graph->compile({make_callback_copy(y, host_y)}); func->execute(); MGB_ASSERT_TENSOR_EQ(y_expect, host_y); } void check_dump_by_compute_with_param( std::unique_ptr input_file, CompNode cn, MGBDType dtype, float bias, std::shared_ptr param) { GraphLoadConfig config; config.comp_node_mapper = [loc = cn.locator()](CompNode::Locator& t) { t = loc; }; auto loader = GraphLoader::make(std::move(input_file)); auto load_ret = loader->load(config); load_ret.graph->options().var_sanity_check_first_run = false; SymbolVar y; unpack_vector(load_ret.output_var_list, y); HostTensorGenerator<> gen; auto host_x = load_ret.tensor_map.begin()->second; *host_x = *gen({23}, cn); HostTensorND y_expect; y_expect.copy_from(*host_x); { auto py = y_expect.ptr(); float* extern_input_device_ptr = nullptr; if (param->nr_input && param->input && param->input->device_ptr) { extern_input_device_ptr = static_cast(param->input->device_ptr); } for (int i = 0; i < 23; ++i) { float t = 0; //! this test code is run before config_extern_c_opr_dynamic_param //! so we need double child member ptr is valid or not if (param->nr_input && param->input && param->input->device_ptr) { t = extern_input_device_ptr[i] + bias; } else { t = py[i] + bias; } if (dtype == MGB_DTYPE_INT32) { t = int(t); #if !MEGDNN_DISABLE_FLOAT16 } else if (dtype == MGB_DTYPE_FLOAT16) { t = dt_float16(t); #endif } py[i] = t; } } HostTensorND host_y; auto func = load_ret.graph->compile({make_callback_copy(y, host_y)}); config_extern_c_opr_dynamic_param(func, param); func->execute(); if (param->nr_output) { auto ph = host_y.ptr(); auto outp = static_cast(param->output->device_ptr); for (int i = 0; i < 23; ++i) { ph[i] = outp[i]; } } MGB_ASSERT_TENSOR_EQ(y_expect, host_y); } void run_compute_test(CompNode cn, MGBDType dtype) { float bias = 1.2, scale = -2.1; auto graph_dump = create_graph_dump(bias, scale, 0.3, dtype); check_dump_by_compute( InputFile::make_mem_proxy(graph_dump.data(), graph_dump.size()), cn, dtype, bias, scale); } void run_compute_test_with_param( CompNode cn, MGBDType dtype, std::shared_ptr param) { float bias = 1.2, scale = 0; auto graph_dump = create_graph_dump(bias, scale, 0.3, dtype); check_dump_by_compute_with_param( InputFile::make_mem_proxy(graph_dump.data(), graph_dump.size()), cn, dtype, bias, param); } } // namespace TEST(TestExternCOpr, ExternCOprParam) { //! same with check_dump_by_compute_with_param constexpr int input_output_size = 23; auto c_opr_param = std::make_shared(); MGBTensorLayout input_layput, output_layput; ExternDeviceTensor input, output; float* input_device_ptr = (float*)malloc(input_output_size * sizeof(float)); float* output_device_ptr = (float*)malloc(input_output_size * sizeof(float)); auto reset = [&] { memset(c_opr_param.get(), 0, sizeof(ExternCOprParam)); memset(&input_layput, 0, sizeof(MGBTensorLayout)); memset(&input, 0, sizeof(ExternDeviceTensor)); memset(&output_layput, 0, sizeof(MGBTensorLayout)); memset(&output, 0, sizeof(ExternDeviceTensor)); memset(input_device_ptr, 0, input_output_size * sizeof(float)); memset(output_device_ptr, 0, input_output_size * sizeof(float)); for (size_t i = 0; i < input_output_size; i++) { input_device_ptr[i] = i; } }; auto run_test = [&] { run_compute_test_with_param( CompNode::load("cpux"), MGB_DTYPE_FLOAT32, c_opr_param); }; auto init_param = [&] { reset(); c_opr_param->nr_input = 1; input_layput.shape = {1, {input_output_size}}; input.layout = input_layput; input.device_ptr = input_device_ptr; c_opr_param->input = &input; c_opr_param->nr_output = 1; output_layput.shape = {1, {input_output_size}}; output.layout = output_layput; output.device_ptr = output_device_ptr; c_opr_param->output = &output; }; //! run with null param reset(); run_test(); //! run with full param init_param(); run_test(); //! run with a right index init_param(); c_opr_param->extern_c_opr_dump_name = "bias_adder_dump:test"; run_test(); //! set a wrong index init_param(); c_opr_param->extern_c_opr_dump_name = "bias_adder_dump"; ASSERT_THROW(run_test(), MegBrainError); //! set a wrong index init_param(); c_opr_param->extern_c_opr_dump_name = "sdfsdfs"; ASSERT_THROW(run_test(), MegBrainError); //! set wrong input init_param(); c_opr_param->input = nullptr; ASSERT_THROW(run_test(), MegBrainError); //! set wrong nr_input init_param(); c_opr_param->nr_input = 3; ASSERT_THROW(run_test(), MegBrainError); //! set wrong input device_ptr init_param(); c_opr_param->input->device_ptr = nullptr; ASSERT_THROW(run_test(), MegBrainError); //! set wrong input shape init_param(); c_opr_param->input->layout.shape.shape[0] = input_output_size - 2; ASSERT_THROW(run_test(), MegBrainError); //! set wrong output init_param(); c_opr_param->output = nullptr; ASSERT_THROW(run_test(), MegBrainError); //! set wrong nr_output init_param(); c_opr_param->nr_output = 3; ASSERT_THROW(run_test(), MegBrainError); //! set wrong output device_ptr init_param(); c_opr_param->output->device_ptr = nullptr; ASSERT_THROW(run_test(), MegBrainError); //! set wrong output shape init_param(); c_opr_param->output->layout.shape.shape[0] = input_output_size - 2; ASSERT_THROW(run_test(), MegBrainError); //! set wrong dtype(test MGB_DTYPE_FLOAT32) init_param(); c_opr_param->input[0].layout.dtype = MGB_DTYPE_INT32; ASSERT_THROW(run_test(), MegBrainError); //! test only device_id reset(); c_opr_param->device_id = 8; run_test(); //! free free(input_device_ptr); free(output_device_ptr); } TEST(TestExternCOpr, CPUCompute) { run_compute_test(CompNode::load("cpux"), MGB_DTYPE_FLOAT32); } TEST(TestExternCOpr, GPUCompute) { REQUIRE_GPU(1); run_compute_test(CompNode::load("gpux"), MGB_DTYPE_FLOAT32); } #if MGB_OPENCL #include "megcore_opencl.h" #define REQUIRE_OPENCL() \ do { \ if (!CompNode::get_device_count(CompNode::DeviceType::OPENCL)) { \ return; \ } \ } while (0) TEST(TestExternCOpr, OPENCLCompute) { REQUIRE_OPENCL(); run_compute_test(CompNode::load("openclx"), MGB_DTYPE_FLOAT32); } #endif TEST(TestExternCOpr, CPUComputeMultiDtype) { run_compute_test(CompNode::load("cpux"), MGB_DTYPE_INT32); #if !MEGDNN_DISABLE_FLOAT16 run_compute_test(CompNode::load("cpux"), MGB_DTYPE_FLOAT16); #endif } TEST(TestExternCOpr, Register) { auto api = mgb_get_extern_c_opr_api(); ASSERT_TRUE(api->unregister_loader("bias_adder_dump")); ASSERT_FALSE(api->unregister_loader("bias_adder_dump")); auto loader = MGBOprLoaderImpl::make(); ASSERT_TRUE(api->register_loader(&loader)); ASSERT_FALSE(api->register_loader(&loader)); } TEST(TestExternCOpr, Dedup) { ASSERT_EQ(0, MGBOprDescImpl<>::nr_inst); { HostTensorGenerator<> gen; auto host_x = gen({1}); auto graph = ComputingGraph::make(); auto x = opr::Host2DeviceCopy::make(*graph, host_x); auto make_opr = [x](float bias) { std::string name = "test"; return opr::ExternCOprRunner::make_from_desc( name, {x.node()}, MGBOprDescImpl<>::make(bias)); }; auto y0 = make_opr(0.5), y1 = make_opr(0.6), y2 = make_opr(0.5); ASSERT_EQ(y0, y2); ASSERT_NE(y0, y1); ASSERT_EQ(2, MGBOprDescImpl<>::nr_inst); } ASSERT_EQ(0, MGBOprDescImpl<>::nr_inst); } TEST(TestExternCOpr, DummyLoaderName) { float bias = 0.0f; ASSERT_THROW( opr::ExternCOprRunner::make_placeholder( {SymbolVar()}, {TensorShape{1}, TensorShape{1}}, "extern_opr_dummy", &bias, sizeof(bias)), MegBrainError); ASSERT_THROW( opr::ExternCOprRunner::make_placeholder( {SymbolVar()}, {TensorShape{1}, TensorShape{1}}, "extern_opr_dummy:opr0", &bias, sizeof(bias)), MegBrainError); ASSERT_THROW( opr::ExternCOprRunner::make_placeholder( {SymbolVar()}, {TensorShape{1}, TensorShape{1}}, "extern_opr_dummy:opr1", &bias, sizeof(bias)), MegBrainError); } // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}