diff --git a/paddle/fluid/operators/concat_op_mlu.cc b/paddle/fluid/operators/concat_op_mlu.cc index f254228f8a09713191682b63bdb87e84d3780fca..a4cc1c37db0cf2b8468e5d607d66167525017b6b 100644 --- a/paddle/fluid/operators/concat_op_mlu.cc +++ b/paddle/fluid/operators/concat_op_mlu.cc @@ -121,6 +121,7 @@ class ConcatGradMLUKernel : public framework::OpKernel { out_grad->dims().size())); // get output tensor that the name is not kEmptyVarName std::vector outputs_vec; + std::vector tmp_outputs_vec; std::vector output_descs; std::vector descs_vec; for (size_t j = 0; j < outs.size(); ++j) { @@ -128,11 +129,15 @@ class ConcatGradMLUKernel : public framework::OpKernel { outs[j]->numel() != 0UL) { outs[j]->mutable_data(ctx.GetPlace()); output_descs.emplace_back(MLUCnnlTensorDesc(*outs[j])); - descs_vec.push_back(output_descs.back().get()); outputs_vec.push_back(GetBasePtr(outs[j])); } else { - outputs_vec.push_back(nullptr); + Tensor tmp_tensor; + tmp_tensor.mutable_data(ins[j]->dims(), ctx.GetPlace()); + tmp_outputs_vec.push_back(tmp_tensor); + output_descs.emplace_back(MLUCnnlTensorDesc(*ins[j])); + outputs_vec.push_back(GetBasePtr(&(tmp_outputs_vec.back()))); } + descs_vec.push_back(output_descs.back().get()); } MLUCnnlTensorDesc out_grad_desc(*out_grad); diff --git a/paddle/fluid/operators/sync_batch_norm_op_mlu.cc b/paddle/fluid/operators/sync_batch_norm_op_mlu.cc index ce511a12bbfdb2a685150d2a8a4980c599480ccd..a2091aa10a73bb982250129df178549f2f8d58d7 100644 --- a/paddle/fluid/operators/sync_batch_norm_op_mlu.cc +++ b/paddle/fluid/operators/sync_batch_norm_op_mlu.cc @@ -23,7 +23,9 @@ limitations under the Licnse. */ namespace paddle { namespace operators { +#define NO_USE_CNCL 0 #define GET_LAYOUT_OFFSET 2 + using Tensor = framework::Tensor; static std::vector supported_input_layout = { CNNL_LAYOUT_NC, CNNL_LAYOUT_NLC, CNNL_LAYOUT_NHWC, CNNL_LAYOUT_NDHWC}; @@ -165,6 +167,7 @@ class SyncBatchNormMLUKernel : public framework::OpKernel { Tensor mean_all(mean->dtype()); Tensor invstd_all(variance->dtype()); +#ifdef PADDLE_WITH_CNCL auto &dev_ctx = ctx.template device_context(); auto stream = dev_ctx.stream(); @@ -205,7 +208,9 @@ class SyncBatchNormMLUKernel : public framework::OpKernel { cncl_dtype, comm, stream)); - +#else + if (NO_USE_CNCL) { +#endif } else { count_all = input_count; mean_all.ShareDataWith(local_mean); @@ -404,6 +409,7 @@ class SyncBatchNormMLUGradKernel : public framework::OpKernel { FillMLUTensorWithHostValue( ctx, static_cast(x->numel() / C), &numel_count); +#ifdef PADDLE_WITH_CNCL auto &dev_ctx = ctx.template device_context(); auto stream = dev_ctx.stream(); @@ -440,6 +446,7 @@ class SyncBatchNormMLUGradKernel : public framework::OpKernel { comm, stream)); } +#endif if (d_x) { MLUCnnlTensorDesc desc_count(numel_count); diff --git a/python/paddle/fluid/tests/unittests/mlu/sync_batch_norm_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/sync_batch_norm_op_mlu.py index 4f80523a18254a9e5b618e7ed227714b06599621..5c1b8b602f2699feeaaaf46c1a5d957c050c57b9 100644 --- a/python/paddle/fluid/tests/unittests/mlu/sync_batch_norm_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/sync_batch_norm_op_mlu.py @@ -35,9 +35,9 @@ from multiprocessing import Process import paddle.fluid.layers as layers from functools import reduce from test_sync_batch_norm_base_mlu import TestSyncBatchNormRunnerBase, runtime_main -from paddle.fluid.tests.unittests.op_test import OpTest, _set_use_system_allocator +from op_test import OpTest, _set_use_system_allocator -from paddle.fluid.tests.unittests.test_sync_batch_norm_op import create_or_get_tensor +from test_sync_batch_norm_op import create_or_get_tensor _set_use_system_allocator(False) paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/mlu/test_sync_batch_norm_base_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_sync_batch_norm_base_mlu.py index 3081ee9d38754c0ef98ac54e8fa22cdd6b9ab672..3c774e47010f9adba57616fd3387d911b29d5316 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_sync_batch_norm_base_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_sync_batch_norm_base_mlu.py @@ -33,9 +33,9 @@ from paddle.fluid import core from six import string_types import paddle -from paddle.fluid.tests.unittests.op_test import OpTest, _set_use_system_allocator +from op_test import OpTest, _set_use_system_allocator -from paddle.fluid.tests.unittests.test_sync_batch_norm_op import create_or_get_tensor +from test_sync_batch_norm_op import create_or_get_tensor _set_use_system_allocator(False) paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/mlu/test_sync_batch_norm_op_mlu.sh b/python/paddle/fluid/tests/unittests/mlu/test_sync_batch_norm_op_mlu.sh new file mode 100644 index 0000000000000000000000000000000000000000..1417acb4be516a0383eaf1440ae1f4c4e6e20321 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/mlu/test_sync_batch_norm_op_mlu.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -e + +MLU_VISIBLE_DEVICES=0,1 python -m paddle.distributed.launch test_sync_batch_norm_op_mlu_baseline.py diff --git a/python/paddle/fluid/tests/unittests/mlu/test_sync_batch_norm_op_mlu_baseline.py b/python/paddle/fluid/tests/unittests/mlu/test_sync_batch_norm_op_mlu_baseline.py index ac3f686cb8fe23e29e0e0dcd611bbd2b0fa79832..f524e47b54a92abe43a4f6821e350cf2ec2d4ecc 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_sync_batch_norm_op_mlu_baseline.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_sync_batch_norm_op_mlu_baseline.py @@ -20,7 +20,7 @@ import os import sys sys.path.append("..") -from paddle.fluid.tests.unittests.op_test import OpTest, _set_use_system_allocator +from op_test import OpTest, _set_use_system_allocator from test_sync_batch_norm_base_mlu import TestDistBase diff --git a/python/paddle/fluid/tests/unittests/mlu/test_sync_batch_norm_op_mlu_extra.py b/python/paddle/fluid/tests/unittests/mlu/test_sync_batch_norm_op_mlu_extra.py index 955d9a122a292a23c82a34d386a3b575918d01f3..400d2f4afed4694467f344d3afba8bf17d2874e4 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_sync_batch_norm_op_mlu_extra.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_sync_batch_norm_op_mlu_extra.py @@ -29,8 +29,9 @@ import paddle.fluid as fluid import paddle.nn as nn from paddle.fluid import Program, program_guard -from paddle.fluid.tests.unittests.op_test import OpTest, _set_use_system_allocator -from paddle.fluid.tests.unittests.test_dist_base import TestDistBase +sys.path.append("..") +from op_test import OpTest, _set_use_system_allocator +from test_dist_base import TestDistBase paddle.enable_static()