diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt index afe8e6bf180147cdfec44a9157d568ec82b30054..4797b0e7154e0c7f425d40bef78ebcfcb4081b1f 100644 --- a/paddle/fluid/operators/CMakeLists.txt +++ b/paddle/fluid/operators/CMakeLists.txt @@ -184,6 +184,4 @@ endif() if(WITH_ASCEND_CL) cc_test(gelu_op_npu_test SRCS gelu_op_npu_test.cc DEPS op_registry gelu_op scope device_context enforce executor) -cc_test(mean_op_npu_test SRCS mean_op_npu_test.cc DEPS op_registry mean_op scope device_context enforce executor) endif() - diff --git a/paddle/fluid/operators/mean_op_npu.cc b/paddle/fluid/operators/mean_op_npu.cc deleted file mode 100644 index f7dba2660496419549be3da4139ebfbf400b5404..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/mean_op_npu.cc +++ /dev/null @@ -1,135 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/mean_op.h" -#include "paddle/fluid/platform/float16.h" -#include "paddle/fluid/operators/npu_op_runner.h" - - -namespace paddle { -namespace operators { - -template -class MeanNPUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* out = ctx.Output("Out"); - - auto reduce_ndim = x->dims().size(); - std::vector axes; - for (auto i = 0; i < reduce_ndim; ++i) { - axes.push_back(i); - } - - framework::NPUAttributeMap attr_input = { - {"keep_dims", false}, - {"axes", axes}}; - - std::vector out_dims; - out_dims.push_back(1); - out->Resize(framework::make_ddim(out_dims)); - out->mutable_data(ctx.GetPlace()); - - Tensor reduced_out(x->type()); - std::vector reduced_dout_dims; - reduced_dout_dims.push_back(1); - reduced_out.Resize(framework::make_ddim(reduced_dout_dims)); - reduced_out.mutable_data(ctx.GetPlace()); - - auto runner = NpuOpRunner("ReduceMeanD", - {*x}, - {*out}, - attr_input); - - auto stream = - ctx.template device_context< - paddle::platform::NPUDeviceContext>() - .stream(); - runner.Run(stream); - } -}; - - -template -class MeanGradNPUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - auto stream = - context.template device_context< - paddle::platform::NPUDeviceContext>() - .stream(); - - auto grad = context.Input(framework::GradVarName("Out")); - - PADDLE_ENFORCE_EQ(grad->numel(), 1, - platform::errors::InvalidArgument( - "Mean Gradient Input Tensor len should be 1. But " - "received Out@Grad's elements num is %d.", - grad->numel())); - - auto IG = context.Output(framework::GradVarName("X")); - IG->mutable_data(context.GetPlace()); - - // ones - Tensor ones(grad->type()); - std::vector dout_dims; - for (auto i = 0; i < IG->dims().size(); ++i) { - dout_dims.push_back(IG->dims()[i]); - } - ones.Resize(framework::make_ddim(dout_dims)); - ones.mutable_data(context.GetPlace()); - auto runner_ones = NpuOpRunner("OnesLike", {*IG}, {ones}, {}); - runner_ones.Run(stream); - - // means - Tensor mean_tensor(grad->type()); - mean_tensor.Resize({1}); - mean_tensor.mutable_data(context.GetPlace()); - std::vector mean_vec; - mean_vec.push_back(1.0/static_cast(IG->numel())); - framework::TensorFromVector(mean_vec, - context.device_context(), - &mean_tensor); - - // means mul ones - Tensor mean_ma(grad->type()); - mean_ma.Resize(framework::make_ddim(dout_dims)); - mean_ma.mutable_data(context.GetPlace()); - auto runner_mul_1 = NpuOpRunner("Mul", {mean_tensor, ones}, {mean_ma}, {}); - runner_mul_1.Run(stream); - - // and mul grad - auto runner_mul_2 = NpuOpRunner("Mul", {mean_ma, *grad}, {*IG}, {}); - runner_mul_2.Run(stream); - } -}; - - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -namespace plat = paddle::platform; -REGISTER_OP_NPU_KERNEL( - mean, - ops::MeanNPUKernel, - ops::MeanNPUKernel, - ops::MeanNPUKernel, - ops::MeanNPUKernel) - - -REGISTER_OP_NPU_KERNEL( - mean_grad, - ops::MeanGradNPUKernel, - ops::MeanGradNPUKernel, - ops::MeanGradNPUKernel, - ops::MeanGradNPUKernel) diff --git a/paddle/fluid/operators/mean_op_npu_test.cc b/paddle/fluid/operators/mean_op_npu_test.cc deleted file mode 100644 index 7379955084d1b057d18859d7580d191dc888a2b5..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/mean_op_npu_test.cc +++ /dev/null @@ -1,133 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifndef _WIN32 -#include -#endif - -#include -#include // NOLINT -#include - -#include "gtest/gtest.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/framework/program_desc.h" -#include "paddle/fluid/operators/dropout_op.h" -#include "paddle/fluid/operators/math/math_function.h" -#include "paddle/fluid/string/printf.h" - -namespace f = paddle::framework; -namespace p = paddle::platform; -namespace m = paddle::operators::math; - -USE_OP(mean); -USE_OP_DEVICE_KERNEL(mean, NPU); -USE_OP(mean_grad); -USE_OP_DEVICE_KERNEL(mean_grad, NPU); - -template -void Compare(f::Scope* scope, const p::DeviceContext& ctx, - std::string op_type) { - // init - auto x = scope->Var("X"); - auto tensor_x = x->GetMutable(); - - std::vector init; - init.push_back(static_cast(1.0)); - init.push_back(static_cast(2.0)); - init.push_back(static_cast(3.0)); - init.push_back(static_cast(4.0)); - - TensorFromVector(init, ctx, tensor_x); - tensor_x->Resize({4}); - - ctx.Wait(); - - auto place = ctx.GetPlace(); - auto out = scope->Var("Out"); - auto tensor_out = out->GetMutable(); - - auto op = f::OpRegistry::CreateOp(op_type, - {{"X", {"X"}}}, - {{"Out", {"Out"}}}, - {}); - - op->Run(*scope, place); - - std::vector out_vec; - TensorToVector(*tensor_out, ctx, &out_vec); - - ctx.Wait(); - - EXPECT_EQ((uint32_t)out_vec.size(), (uint32_t)1); - EXPECT_EQ((float)out_vec[0], (float)2.5); -} - -template -void CompareGrad(f::Scope* scope, const p::DeviceContext& ctx, - std::string op_type) { - // init - auto dout = scope->Var("DOut"); - auto tensor_dout = dout->GetMutable(); - float dvalue = 2.0; - tensor_dout->Resize({1}); - std::vector init_dout; - init_dout.push_back(static_cast(dvalue)); - TensorFromVector(init_dout, ctx, tensor_dout); - ctx.Wait(); - - auto x = scope->Var("X"); - auto tensor_x = x->GetMutable(); - tensor_x->Resize({4}); - - auto dx = scope->Var("DX"); - auto tensor_dx = dx->GetMutable(); - tensor_dx->Resize({4}); - - ctx.Wait(); - - auto op = f::OpRegistry::CreateOp(op_type, - {{"Out@GRAD", {"DOut"}}, - {"X", {"X"}}}, - {{"X@GRAD", {"DX"}}}, - {}); - - auto place = ctx.GetPlace(); - op->Run(*scope, place); - - std::vector out_vec; - TensorToVector(*tensor_dx, ctx, &out_vec); - - ctx.Wait(); - - EXPECT_EQ((uint32_t)out_vec.size(), (uint32_t)4); - EXPECT_EQ((float)out_vec[0], (float)1.0/dvalue); - EXPECT_EQ((float)out_vec[1], (float)1.0/dvalue); - EXPECT_EQ((float)out_vec[2], (float)1.0/dvalue); - EXPECT_EQ((float)out_vec[3], (float)1.0/dvalue); -} - -TEST(mean, NPU_fp32) { - f::Scope scope; - p::NPUDeviceContext ctx(p::NPUPlace(0)); - Compare(&scope, ctx, "mean"); -} - - -TEST(mean_grad, NPU_fp32) { - f::Scope scope; - p::NPUDeviceContext ctx(p::NPUPlace(0)); - CompareGrad(&scope, ctx, "mean_grad"); -} diff --git a/python/paddle/fluid/tests/unittests/npu/test_mean_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_mean_op_npu.py deleted file mode 100644 index f949f5ccf66d1da72a092f8fe982671547e6e4d4..0000000000000000000000000000000000000000 --- a/python/paddle/fluid/tests/unittests/npu/test_mean_op_npu.py +++ /dev/null @@ -1,149 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import print_function - -import numpy as np -import unittest -import sys -sys.path.append("..") -from op_test import OpTest -import paddle -import paddle.fluid as fluid -from paddle.fluid import core - -paddle.enable_static() -SEED = 2021 - - -@unittest.skipIf(not paddle.is_compiled_with_npu(), - "core is not compiled with NPU") -class TestMean(OpTest): - def setUp(self): - self.set_npu() - self.place = paddle.NPUPlace(0) - self.op_type = "mean" - self.init_dtype() - - x = np.random.random([3, 3]).astype(self.dtype) - self.inputs = {'X': x} - - self.attrs = {} - np_out = np.mean(x) - self.outputs = {'Out': np_out} - - def set_npu(self): - self.__class__.use_npu = True - self.__class__.no_need_check_grad = True - - def init_dtype(self): - self.dtype = np.float32 - - def test_check_output(self): - self.check_output_with_place(self.place, check_dygraph=False) - - -@unittest.skipIf(not paddle.is_compiled_with_npu(), - "core is not compiled with NPU") -class TestMeanFP16(OpTest): - def setUp(self): - self.set_npu() - self.place = paddle.NPUPlace(0) - self.op_type = "mean" - self.init_dtype() - - x = np.random.random([3, 3]).astype(self.dtype) - self.inputs = {'X': x} - - self.attrs = {} - np_out = np.mean(x) - self.outputs = {'Out': np_out} - - def set_npu(self): - self.__class__.use_npu = True - self.__class__.no_need_check_grad = True - - def init_dtype(self): - self.dtype = np.float16 - - def test_check_output(self): - self.check_output_with_place(self.place, check_dygraph=False) - - - -@unittest.skipIf(not paddle.is_compiled_with_npu(), - "core is not compiled with NPU") -class TestMeanNet(unittest.TestCase): - def _test(self, run_npu=True): - main_prog = paddle.static.Program() - startup_prog = paddle.static.Program() - main_prog.random_seed = SEED - startup_prog.random_seed = SEED - np.random.seed(SEED) - - a_np = np.random.random(size=(32, 32)).astype('float32') - b_np = np.random.random(size=(32, 32)).astype('float32') - label_np = np.random.randint(2, size=(32, 1)).astype('int64') - - with paddle.static.program_guard(main_prog, startup_prog): - a = paddle.static.data(name="a", shape=[32, 32], dtype='float32') - b = paddle.static.data(name="b", shape=[32, 32], dtype='float32') - label = paddle.static.data( - name="label", shape=[32, 1], dtype='int64') - - c = paddle.multiply(a, b) - d = paddle.sqrt(c) - - fc_1 = fluid.layers.fc(input=d, size=128) - prediction = fluid.layers.fc(input=fc_1, size=2, act='sigmoid') - - cost = fluid.layers.cross_entropy(input=prediction, label=label) - loss = fluid.layers.mean(cost) - sgd = fluid.optimizer.SGD(learning_rate=0.01) - sgd.minimize(loss) - - if run_npu: - place = paddle.NPUPlace(0) - else: - place = paddle.CPUPlace() - - exe = paddle.static.Executor(place) - exe.run(startup_prog) - - print("Start run on {}".format(place)) - for epoch in range(100): - - pred_res, loss_res = exe.run( - main_prog, - feed={"a": a_np, - "b": b_np, - "label": label_np}, - fetch_list=[prediction, loss]) - if epoch % 10 == 0: - print("Epoch {} | Prediction[0]: {}, Loss: {}".format( - epoch, pred_res[0], loss_res)) - - return pred_res, loss_res - - def test_npu(self): - cpu_pred, cpu_loss = self._test(False) - npu_pred, npu_loss = self._test(True) - - self.assertTrue(np.allclose(npu_pred, cpu_pred)) - self.assertTrue(np.allclose(npu_loss, cpu_loss)) - - -if __name__ == '__main__': - unittest.main() -