提交 bc5d7bb6 编写于 作者: X xutianbing

Add Sparse = dense * dense unit test with Daoyuan's Function test

上级 077f936a
develop 2.0.1-rocm-post Ligoml-patch-1 OliverLPH-patch-1 OliverLPH-patch-2 PaddlePM-patch-1 PaddlePM-patch-2 ZHUI-patch-1 add_default_att add_model_benchmark_ci add_some_yaml_config addfile all_new_design_exec ascendrc ascendrelease cherry_undefined_var compile_windows delete_2.0.1-rocm-post delete_add_default_att delete_all_new_design_exec delete_ascendrc delete_compile_windows delete_delete_addfile delete_disable_iterable_dataset_unittest delete_fix_dataloader_memory_leak delete_fix_imperative_dygraph_error delete_fix_retry_ci delete_fix_undefined_var delete_improve_sccache delete_incubate/lite delete_paddle_tiny_install delete_paralleltest delete_prv-disable-more-cache delete_revert-31068-fix_conv3d_windows delete_revert-31562-mean delete_revert-33630-bug-fix delete_revert-34159-add_npu_bce_logical_dev delete_revert-34910-spinlocks_for_allocator delete_revert-35069-revert-34910-spinlocks_for_allocator delete_revert-36057-dev/read_flags_in_ut dingjiaweiww-patch-1 disable_iterable_dataset_unittest dy2static enable_eager_model_test final_state_gen_python_c final_state_intermediate fix-numpy-issue fix_concat_slice fix_dataloader_memory_leak fix_imperative_dygraph_error fix_npu_ci fix_op_flops fix_retry_ci fix_rnn_docs fix_tensor_type fix_undefined_var fixiscan fixiscan1 fixiscan2 fixiscan3 github/fork/123malin/netifaces github/fork/123malin/tdm_abacus github/fork/AshburnLee/dev_unique github/fork/ForFishes/fix_memory_matmul github/fork/ForFishes/rm_fluid github/fork/LielinJiang/move-2.0-api github/fork/LielinJiang/visual-dl-cb github/fork/LiuChiachi/add-transformer-generate-square-subsequent-mask-api github/fork/LiuChiachi/fix-example-code-for-hapi-Model github/fork/LiuChiachi/remove-input-requirment-in-dygraph-Model github/fork/MrChengmo/fix_ps_profiler github/fork/MrChengmo/update_ps_heter github/fork/PWhiddy/patch-1 github/fork/Shixiaowei02/dev/save_load_upgrade github/fork/TCChenlong/fix_hapi github/fork/TCChenlong/fix_inden github/fork/Thunderbrook/xpu_slice github/fork/XieYunshen/disable_ut_test_parallel_executor_fetch_isolated_var github/fork/XieYunshen/disable_ut_test_parallel_executor_fetch_isolated_var_2 github/fork/XieYunshen/disable_ut_test_parallel_executor_fetch_isolated_var_3 github/fork/XieYunshen/timeout_20S_ut github/fork/ZeyuChen/remove-nltk github/fork/arlesniak/arlesniak/selective__mkldnn_flags github/fork/baiyfbupt/code_doc_mig github/fork/chalsliu/set_timeout github/fork/chen-zhiyu/develop github/fork/chenwhql/ci/try_to_find_test_buffer_shared_memory_reuse_pass_error github/fork/chenwhql/dygraph/remove_scale_loss_and_apply_collective_grads github/fork/chenwhql/saveload/add_get_inference_program github/fork/chenwhql/saveload/remove_save_load_config github/fork/cryoco/pass-compatibility-trt github/fork/danleifeng/isempty_api2.0 github/fork/frankwhzhang/api_transfer github/fork/hbwx24/error_msg/cuda_kernel_error_msg github/fork/heavengate/cherry_yolo_box github/fork/heavengate/update_yolo_box github/fork/iclementine/rnn_fix github/fork/iducn/testestse github/fork/jczaja/prv-25537-fix github/fork/jeff41404/release/1.8 github/fork/jiweibo/api_2.0 github/fork/jiweibo/fix_lite_resnet50_test github/fork/juncaipeng/fix_doc_1 github/fork/lfchener/sample_code github/fork/littletomatodonkey/fix_reg_doc github/fork/liym27/dy2stat_update_assign_to_rc20 github/fork/luotao1/profiler_ut github/fork/mapingshuo/add_wait github/fork/mapingshuo/doc_2.0 github/fork/mapingshuo/zero-0.5 github/fork/miraiwk/dev github/fork/pangyoki/add-Categorical-class-branch github/fork/pangyoki/add-multinomial-op-branch github/fork/pangyoki/fix-test_distritbution-CI github/fork/qjing666/doublegrad github/fork/qjing666/fix_hdfs_download github/fork/sandyhouse/add_gather_etc github/fork/sandyhouse/add_send_recv_alltoall_etc github/fork/sandyhouse/pipeline_exe_run github/fork/seiriosPlus/feature/large_scale_kv_save_delta github/fork/seiriosPlus/fix/paddle_errors_fix github/fork/seiriosPlus/fix/paddle_op_errors github/fork/shangzhizhou/fix_test_activation_op_random_bug github/fork/smallv0221/yxp0924 github/fork/smallv0221/yxp0925 github/fork/swtkiwi/del-matplotlib github/fork/tianshuo78520a/kunlun_test github/fork/tianshuo78520a/update_dockerfile github/fork/wanghaoshuang/bert_fuse github/fork/wanghaoshuang/label_smooth github/fork/wanghuancoder/develop_CUDASynchronize github/fork/wanghuancoder/develop_Layer_doc github/fork/wanghuancoder/develop_ParameterList_doc github/fork/wanghuancoder/develop_Sequential_doc github/fork/wanghuancoder/develop_bilinear_tensor_product github/fork/wanghuancoder/develop_coverage_build_sh github/fork/wanghuancoder/develop_in_dynamic_mode_doc github/fork/wanghuancoder/develop_unique_name_doc github/fork/wangxicoding/fleet_meta_combine github/fork/wawltor/error_message_fix_5 github/fork/willthefrog/remove_l2_norm github/fork/windstamp/momentum_op github/fork/windstamp/mv_op_5 github/fork/windstamp/normal_api github/fork/wojtuss/wojtuss/fusion_gru_quantization github/fork/wojtuss/wojtuss/quantization-with-shift github/fork/wzzju/fix_err_info github/fork/wzzju/pure_fp16 github/fork/xiemoyuan/op_error_message github/fork/xiemoyuan/optimize_error_message github/fork/yaoxuefeng6/fix_doc github/fork/yaoxuefeng6/mod_dataset_v2 github/fork/yongqiangma/lod github/fork/ysh329/fix-clip-by-norm-error github/fork/ysh329/fix-error-clip-by-value github/fork/yukavio/error_info github/fork/zhangting2020/conv_filter_grad github/fork/zhangting2020/is_compile_with_cuda github/fork/zhangting2020/place_doc github/fork/zhangting2020/program github/fork/zhhsplendid/fix_any github/fork/zhhsplendid/refine_api2 github/fork/zhhsplendid/refine_api2_test github/fork/zhhsplendid/refine_api_test_ptb_lm github/fork/zhhsplendid/refine_api_test_resnet github/fork/zhhsplendid/refine_api_test_simnet github/fork/zhiqiu/dev/refine_initializer github/fork/zhiqiu/dev/remove_inplace_argument github/fork/zlsh80826/nvinfer_plugin_var_len_cuda11 improve_sccache incubate/infrt incubate/lite inplace_addto make_flag_adding_easier master move_embedding_to_phi move_histogram_to_pten move_sgd_to_phi move_slice_to_pten move_temporal_shift_to_phi move_yolo_box_to_phi npu_fix_alloc numel paddle_tiny_install paralleltest preln_ernie prv-disable-more-cache prv-md-even-more prv-onednn-2.5 pten_tensor_refactor release/0.10.0 release/0.11.0 release/0.12.0 release/0.13.0 release/0.14.0 release/0.15.0 release/1.0.0 release/1.1 release/1.2 release/1.3 release/1.4 release/1.5 release/1.6 release/1.7 release/1.8 release/2.0 release/2.0-alpha release/2.0-beta release/2.0-rc release/2.0-rc1 release/2.1 release/2.2 release/2.3 release/2.3-fc-ernie-fix release/2.4 release/lite-0.1 revert-24981-add_device_attr_for_regulization revert-26856-strategy_example2 revert-27520-disable_pr revert-31068-fix_conv3d_windows revert-31562-mean revert-32290-develop-hardlabel revert-33037-forci revert-33475-fix_cifar_label_dimension revert-33630-bug-fix revert-34159-add_npu_bce_logical_dev revert-34406-add_copy_from_tensor revert-34910-spinlocks_for_allocator revert-35069-revert-34910-spinlocks_for_allocator revert-36057-dev/read_flags_in_ut revert-36201-refine_fast_threaded_ssa_graph_executor revert-36985-add_license revert-37318-refactor_dygraph_to_eager revert-37926-eager_coreops_500 revert-37956-revert-37727-pylayer_support_tuple revert-38100-mingdong revert-38301-allocation_rearrange_pr revert-38703-numpy_bf16_package_reupload revert-38732-remove_useless_header_in_elementwise_mul_grad revert-38959-Reduce_Grad revert-39143-adjust_empty revert-39227-move_trace_op_to_pten revert-39268-dev/remove_concat_fluid_kernel revert-40170-support_partial_grad revert-41056-revert-40727-move_some_activaion_to_phi revert-41065-revert-40993-mv_ele_floordiv_pow revert-41068-revert-40790-phi_new revert-41944-smaller_inference_api_test revert-42149-do-not-reset-default-stream-for-stream-safe-cuda-allocator revert-43155-fix_ut_tempfile revert-43882-revert-41944-smaller_inference_api_test revert-45808-phi/simplify_size_op revert-46827-deform_comment rocm_dev_0217 support_weight_transpose test_benchmark_ci test_feature_precision_test_c test_model_benchmark test_model_benchmark_ci zhiqiu-patch-1 v2.4.0-rc0 v2.3.2 v2.3.1 v2.3.0 v2.3.0-rc0 v2.2.2 v2.2.1 v2.2.0 v2.2.0-rc0 v2.2.0-bak0 v2.1.3 v2.1.2 v2.1.1 v2.1.0 v2.1.0-rc0 v2.0.2 v2.0.1 v2.0.0 v2.0.0-rc1 v2.0.0-rc0 v2.0.0-beta0 v2.0.0-alpha0 v1.8.5 v1.8.4 v1.8.3 v1.8.2 v1.8.1 v1.8.0 v1.7.2 v1.7.1 v1.7.0 v1.6.3 v1.6.2 v1.6.1 v1.6.0 v1.6.0-rc0 v1.5.2 v1.5.1 v1.5.0 v1.4.1 v1.4.0 v1.3.2 v1.3.1 v1.3.0 v1.2.1 v1.2.0 v1.1.0 v1.0.2 v1.0.1 v1.0.0 v1.0.0-rc0 v0.15.0 v0.15.0-rc0 v0.14.0 v0.13.0 v0.12.0 v0.11.1a2 v0.11.1a1 v0.11.0 v0.10.0 v0.10.0rc4 v0.10.0rc lite-v0.1
无相关合并请求
......@@ -172,6 +172,7 @@ public:
bool isTransposed() const { return trans_; }
bool isSparseArg() const { return TENSOR_SPARSE == bufferType_; }
bool isSequenceArg() const { return TENSOR_SEQUENCE_DATA == bufferType_; }
virtual size_t numElements() const { return shape_.getElements(); }
const SequenceArg& sequence() const;
const SparseMatrixArg& sparse() const;
......@@ -353,6 +354,8 @@ public:
size_t nnz() const { return nnz_; }
size_t numElements() const override { return nnz_; }
SparseFormat dataFormat() const { return format_; }
SparseValueType dataType() const { return type_; }
......
......@@ -101,6 +101,34 @@ public:
output.isTransposed()));
}
/// add and init output sparse matrix
void addOutputs(const SparseMatrixArg& output, ArgType argType = ASSIGN_TO) {
cpuSparse_ = std::make_shared<CpuSparseMatrix>(output.shape()[0],
output.shape()[1],
output.nnz(),
output.dataType(),
output.dataFormat(),
output.isTransposed());
gpuSparse_ = std::make_shared<GpuSparseMatrix>(output.shape()[0],
output.shape()[1],
output.nnz(),
output.dataType(),
output.dataFormat(),
output.isTransposed());
/// init sparse matrix
hl_stream_t stream(HPPL_STREAM_1);
cpuSparse_->randomizeUniform();
gpuSparse_->copyFrom(*cpuSparse_, stream);
hl_stream_synchronize(stream);
cpuOutputs_.emplace_back(
std::make_shared<SparseMatrixArg>(*cpuSparse_, argType));
gpuOutputs_.emplace_back(
std::make_shared<SparseMatrixArg>(*gpuSparse_, argType));
}
void addInputs(const SequenceArg& input) {
size_t batchSize = input.shape()[0];
size_t numSeqs = batchSize / 10 + 1;
......@@ -199,8 +227,7 @@ protected:
void initOutputs() {
for (size_t i = 0; i < cpuOutputs_.size(); i++) {
if (cpuOutputs_[i]->isSparseArg()) {
LOG(INFO) << "output sparse matrix already init";
continue;
continue; /// sparse matrix already init
}
initArg(*cpuOutputs_[i]);
......@@ -218,10 +245,11 @@ protected:
void compareOutputs() {
for (size_t i = 0; i < cpuOutputs_.size(); i++) {
// TODO, Need a BufferCheck used to compare the two buffers.
auto cpu = cpuOutputs_[i];
auto gpu = gpuOutputs_[i];
CpuVector cpuVector(cpu->shape().getElements(), (real*)cpu->data());
GpuVector gpuVector(cpu->shape().getElements(), (real*)gpu->data());
const auto cpu = cpuOutputs_[i];
const auto gpu = gpuOutputs_[i];
CHECK_EQ(cpu->numElements(), gpu->numElements());
CpuVector cpuVector(cpu->numElements(), (real*)cpu->data());
GpuVector gpuVector(gpu->numElements(), (real*)gpu->data());
autotest::TensorCheckErr(cpuVector, gpuVector);
}
}
......
......@@ -319,6 +319,13 @@ public:
auto outSparseMat = outputs[0].sparse().SparseMatrix<Device>();
if (!inputs[0].isSparseArg() && !inputs[1].isSparseArg() &&
outputs[0].isSparseArg()) {
/*
LOG(INFO) << "input0";
inputs[0].matrix<Device>().print(std::cout);
LOG(INFO) << "input1";
inputs[1].matrix<Device>().print(std::cout);
LOG(INFO) << "output sparse matrix";
outSparseMat.print(std::cout); */
MulOp<Device>(outSparseMat,
inputs[0].matrix<Device>(),
inputs[1].matrix<Device>(),
......
......@@ -183,75 +183,35 @@ TEST(MulOp, DDSparseMul) {
* C += A * B, A sparse, B, C dense
* sparse = dense * dense
*/
void testSparseDDMatrix(
void testFuncSparseDDMatrix(
size_t dimM, size_t dimN, size_t dimK, size_t nnz, SparseFormat FORMAT) {
real alpha = 1.0;
real beta = 1.0;
const auto cpuFunc = FunctionBase::funcRegistrar_.createByType("MulOp-CPU");
cpuFunc->init(FuncConfig().set("scaleAB", alpha).set("scaleT", beta));
const auto gpuFunc = FunctionBase::funcRegistrar_.createByType("MulOp-GPU");
gpuFunc->init(FuncConfig().set("scaleAB", alpha).set("scaleT", beta));
auto cpuMatrixA = Matrix::create(dimM, dimK, false, false);
auto gpuMatrixA = Matrix::create(dimM, dimK, false, true);
auto cpuDenseA = Matrix::create(dimM, dimK, false, false);
auto cpuMatrixB = Matrix::create(dimK, dimN, false, false);
auto gpuMatrixB = Matrix::create(dimK, dimN, false, true);
auto cpuDenseB = Matrix::create(dimK, dimN, false, false);
CpuSparseMatrix cpuMatrixC(dimM, dimN, nnz, FLOAT_VALUE, FORMAT, false);
CpuSparseMatrix gpuMatrixC_d2h(dimM, dimN, nnz, FLOAT_VALUE, FORMAT, false);
GpuSparseMatrix gpuMatrixC(dimM, dimN, nnz, FLOAT_VALUE, FORMAT, false);
CpuMatrix cpuDenseC(dimM, dimN, false);
/*matrix init*/
hl_stream_t stream(HPPL_STREAM_1);
cpuMatrixA->randomizeUniform();
cpuMatrixB->randomizeUniform();
cpuMatrixC.randomizeUniform();
gpuMatrixA->copyFrom(*cpuMatrixA, stream);
gpuMatrixB->copyFrom(*cpuMatrixB, stream);
gpuMatrixC.copyFrom(cpuMatrixC, stream);
cpuDenseA->copyFrom(*cpuMatrixA);
cpuDenseB->copyFrom(*cpuMatrixB);
cpuDenseC.copyFrom(cpuMatrixC);
hl_stream_synchronize(stream);
/*matrix mul*/
BufferArgs cpuInputs;
BufferArgs cpuOutputs;
cpuInputs.addArg(*cpuMatrixA);
cpuInputs.addArg(*cpuMatrixB);
cpuOutputs.addArg(cpuMatrixC, ADD_TO);
cpuFunc->calc(cpuInputs, cpuOutputs);
BufferArgs gpuInputs;
BufferArgs gpuOutputs;
gpuInputs.addArg(*gpuMatrixA);
gpuInputs.addArg(*gpuMatrixB);
gpuOutputs.addArg(gpuMatrixC, ADD_TO);
gpuFunc->calc(gpuInputs, gpuOutputs);
BufferArgs denseInputs;
BufferArgs denseOutputs;
denseInputs.addArg(*cpuDenseA);
denseInputs.addArg(*cpuDenseB);
denseOutputs.addArg(cpuDenseC, ADD_TO);
cpuFunc->calc(denseInputs, denseOutputs);
// init Test object
FunctionCompare test("MulOp",
FuncConfig().set("scaleAB", alpha).set("scaleT", beta));
// prepare input arguments
/// matrix A : M * K
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{dimM, dimK}));
gpuMatrixC_d2h.copyFrom(gpuMatrixC, stream);
hl_stream_synchronize(stream);
/// matrix B: K * N
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{dimK, dimN}));
/*check result*/
checkSMatrixEqual(cpuMatrixC, gpuMatrixC_d2h);
checkSMatrixEqual2Dense(cpuMatrixC, cpuDenseC);
/// output sparse matrix C: M * N
test.addOutputs(SparseMatrixArg(VALUE_TYPE_FLOAT,
TensorShape{dimM, dimN},
nnz,
FORMAT,
FLOAT_VALUE,
UNSPECIFIED,
false),
ADD_TO);
// run Function
test.run();
}
TEST(Matrix, SparseDDMul) {
LOG(INFO) << "test for sparse = dense * dense matrix";
TEST(MulOp, SparseDDMul) {
LOG(INFO) << "function test for sparse = dense * dense matrix";
for (const auto dimM : {10, 100, 1000}) {
for (const auto dimN : {10, 100}) {
for (const auto dimK : {3, 10}) {
......@@ -263,7 +223,7 @@ TEST(Matrix, SparseDDMul) {
<< " dimK=" << std::setw(5) << dimK
<< " nnz=" << std::setw(5) << nnz
<< " format=" << std::setw(5) << FORMAT;
testSparseDDMatrix(dimM, dimN, dimK, nnz, FORMAT);
testFuncSparseDDMatrix(dimM, dimN, dimK, nnz, FORMAT);
}
}
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册
反馈
建议
客服 返回
顶部