diff --git a/paddle/fluid/operators/arg_max_op_npu.cc b/paddle/fluid/operators/arg_max_op_npu.cc index 38f9813ad02b40a6e879f4ea06b2ff5585bdbc76..8b70332c651c8b1b3a33f28a1c96a703c407efab 100644 --- a/paddle/fluid/operators/arg_max_op_npu.cc +++ b/paddle/fluid/operators/arg_max_op_npu.cc @@ -17,30 +17,49 @@ limitations under the Licnse. */ namespace paddle { namespace operators { + using Tensor = framework::Tensor; +using NPUDeviceContext = platform::NPUDeviceContext; -template -class ArgMaxNPUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - int64_t axis = ctx.Attr("axis"); - auto dtype = ctx.Attr("dtype"); +template +struct VisitDataArgNPUMaxFunctor { + const framework::ExecutionContext& ctx; - auto* out = ctx.Output("Out"); - out->mutable_data(ctx.GetPlace()); + explicit VisitDataArgNPUMaxFunctor(const framework::ExecutionContext& ctx) + : ctx(ctx) {} + template + void apply() const { + auto& x = *(ctx.Input("X")); + auto& out = *(ctx.Output("Out")); + out.template mutable_data(ctx.GetPlace()); + auto axis = ctx.Attr("axis"); + auto dtype = ctx.Attr("dtype"); + auto stream = ctx.template device_context().stream(); NpuOpRunner runner; runner.SetType("ArgMaxV2") - .AddInput(*x) + .AddInput(x) .AddInput(std::vector{axis}) - .AddOutput(*out) - .AddAttr("dtype", dtype); + .AddOutput(out) + .AddAttrDataType("dtype", dtype) + .Run(stream); + } +}; - auto stream = - ctx.template device_context() - .stream(); - runner.Run(stream); +template +class ArgMaxNPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto& dtype = ctx.Attr("dtype"); + if (dtype < 0) { + framework::VisitDataTypeTiny(static_cast( + framework::proto::VarType::INT64), + VisitDataArgNPUMaxFunctor(ctx)); + return; + } + framework::VisitDataTypeTiny( + static_cast(dtype), + VisitDataArgNPUMaxFunctor(ctx)); } }; @@ -48,7 +67,5 @@ class ArgMaxNPUKernel : public framework::OpKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP_NPU_KERNEL( - arg_max, ops::ArgMaxNPUKernel, - ops::ArgMaxNPUKernel); +REGISTER_OP_NPU_KERNEL(arg_max, ops::ArgMaxNPUKernel, + ops::ArgMaxNPUKernel); diff --git a/paddle/fluid/operators/npu_op_runner.cc b/paddle/fluid/operators/npu_op_runner.cc index d10e94962d6a6d63da21edc1ebd435a454bf2d3c..830e18cb8a14c09a9f999bc11d3bbda08e31f1fc 100644 --- a/paddle/fluid/operators/npu_op_runner.cc +++ b/paddle/fluid/operators/npu_op_runner.cc @@ -188,6 +188,21 @@ NpuOpRunner &NpuOpRunner::AddAttr(const std::string &name, return *this; } +NpuOpRunner &NpuOpRunner::AddAttrDataType(const std::string &name, + const NPUAttribute &attr) { + PADDLE_ENFORCE_EQ( + (attr.type() == typeid(int)), true, + platform::errors::InvalidArgument( + "Attr type is NOT equal to framework::proto::VarType::Type.")); + if (!attr_) { + attr_ = aclopCreateAttr(); + } + auto dtype = ConvertToNpuDtype( + static_cast(BOOST_GET_CONST(int, attr))); + PADDLE_ENFORCE_NPU_SUCCESS(aclopSetAttrDataType(attr_, name.c_str(), dtype)); + return *this; +} + NpuOpRunner &NpuOpRunner::AddAttrs(const NPUAttributeMap &attrs) { for (const auto &pair : attrs) { AddAttr(pair.first, pair.second); diff --git a/paddle/fluid/operators/npu_op_runner.h b/paddle/fluid/operators/npu_op_runner.h index 45e973970a956d82b228c2078de20c0de238fe39..6db5f17d67118166b5d8a8a461c98ca83b79b782 100644 --- a/paddle/fluid/operators/npu_op_runner.h +++ b/paddle/fluid/operators/npu_op_runner.h @@ -58,6 +58,12 @@ class NpuOpRunner { NpuOpRunner &AddAttr(const std::string &name, const NPUAttribute &attr); + // NOTE(qili93): need to add indivisual api for aclopSetAttrDataType + // as typeid(aclDataType) and typeid(framework::proto::VarType::Type) + // always go to attr.type() == typeid(int) to call aclopSetAttrInt + NpuOpRunner &AddAttrDataType(const std::string &name, + const NPUAttribute &attr); + NpuOpRunner &AddAttrs(const NPUAttributeMap &attrs); NpuOpRunner &AddInput(const Tensor &tensor); diff --git a/python/paddle/fluid/tests/unittests/npu/test_arg_max_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_arg_max_op_npu.py index 9bc46697c0dfc034c33b2124bdfeb3c930650528..85ade1179b7d616d4dde2df6eecc977c9214f5de 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_arg_max_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_arg_max_op_npu.py @@ -1,10 +1,10 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, @@ -20,30 +20,31 @@ import sys sys.path.append("..") from op_test import OpTest import paddle +import paddle.fluid as fluid import paddle.fluid.core as core +from paddle.fluid import Program, program_guard paddle.enable_static() class BaseTestCase(OpTest): + def set_npu(self): + self.__class__.use_npu = True + self.place = paddle.NPUPlace(0) + def initTestCase(self): self.op_type = 'arg_max' - self.dims = (3, 4) + self.dims = (3, 4, 5) self.dtype = 'float32' - self.axis = 1 + self.axis = 0 def setUp(self): + self.set_npu() self.initTestCase() - self.__class__.use_npu = True - self.place = paddle.NPUPlace(0) - np.random.seed(2021) - self.x = (np.random.random(self.dims)).astype(self.dtype) + self.x = (1000 * np.random.random(self.dims)).astype(self.dtype) self.inputs = {'X': self.x} self.attrs = {'axis': self.axis} - if self.op_type == "arg_min": - self.outputs = {'Out': np.argmin(self.x, axis=self.axis)} - else: - self.outputs = {'Out': np.argmax(self.x, axis=self.axis)} + self.outputs = {'Out': np.argmax(self.x, axis=self.axis)} def test_check_output(self): self.check_output_with_place(self.place) @@ -211,6 +212,64 @@ class TestArgMaxFloat32Case10(BaseTestCase): self.axis = 0 +class BaseTestComplex1_1(OpTest): + def set_npu(self): + self.__class__.use_npu = True + self.place = paddle.NPUPlace(0) + + def initTestCase(self): + self.op_type = 'arg_max' + self.dims = (4, 5, 6) + self.dtype = 'float32' + self.axis = 2 + + def setUp(self): + self.set_npu() + self.initTestCase() + self.x = (np.random.random(self.dims)).astype(self.dtype) + self.inputs = {'X': self.x} + self.attrs = { + 'axis': self.axis, + 'dtype': int(core.VarDesc.VarType.INT32) + } + self.outputs = { + 'Out': np.argmax( + self.x, axis=self.axis).astype("int32") + } + + def test_check_output(self): + self.check_output_with_place(self.place) + + +class BaseTestComplex1_2(OpTest): + def set_npu(self): + self.__class__.use_npu = True + self.place = paddle.NPUPlace(0) + + def initTestCase(self): + self.op_type = 'arg_max' + self.dims = (4, 5, 6) + self.dtype = 'float16' + self.axis = 2 + + def setUp(self): + self.set_npu() + self.initTestCase() + self.x = (np.random.random(self.dims)).astype(self.dtype) + self.inputs = {'X': self.x} + self.attrs = { + 'axis': self.axis, + 'dtype': int(core.VarDesc.VarType.INT32) + } + self.outputs = { + 'Out': np.argmax( + self.x, axis=self.axis).astype("int32") + } + + def test_check_output(self): + self.check_output_with_place(self.place) + + class TestArgMaxAPI(unittest.TestCase): def initTestCase(self): self.dims = (3, 4, 5) diff --git a/python/paddle/nn/functional/loss.py b/python/paddle/nn/functional/loss.py index b1db45ad506695a729e446e5e8913a3ac9673a88..adf93b24d3926be520de42addb78a0f9962f289b 100755 --- a/python/paddle/nn/functional/loss.py +++ b/python/paddle/nn/functional/loss.py @@ -1675,11 +1675,16 @@ def cross_entropy(input, raise ValueError( "Target({}) is out of class_dimension's upper bound({})". format(invalid_label[0], input.shape[axis] - 1)) - - _, out = _C_ops.softmax_with_cross_entropy( - input, label, 'soft_label', soft_label, 'ignore_index', - ignore_index, 'numeric_stable_mode', True, 'axis', axis, - 'use_softmax', use_softmax) + if core.is_compiled_with_npu(): + _, _, out = _C_ops.softmax_with_cross_entropy( + input, label, 'soft_label', soft_label, 'ignore_index', + ignore_index, 'numeric_stable_mode', True, 'axis', axis, + 'use_softmax', use_softmax) + else: + _, out = _C_ops.softmax_with_cross_entropy( + input, label, 'soft_label', soft_label, 'ignore_index', + ignore_index, 'numeric_stable_mode', True, 'axis', axis, + 'use_softmax', use_softmax) if weight is not None: