diff --git a/paddle/fluid/operators/fill_constant_batch_size_like_op_npu.cc b/paddle/fluid/operators/fill_constant_batch_size_like_op_npu.cc new file mode 100644 index 0000000000000000000000000000000000000000..7edddce65cc6f567b7df89bc9713f49792298445 --- /dev/null +++ b/paddle/fluid/operators/fill_constant_batch_size_like_op_npu.cc @@ -0,0 +1,97 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/fill_constant_op.h" +#include "paddle/fluid/operators/npu_op_runner.h" +#include "paddle/fluid/operators/utils.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; + +template +class FillConstantBatchSizeLikeOpNPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &ctx) const override { + auto data_type = + static_cast(ctx.Attr("dtype")); + auto float_value = ctx.Attr("value"); + auto str_value = ctx.Attr("str_value"); + auto force_cpu = ctx.Attr("force_cpu"); + + auto *out = ctx.Output("Out"); + auto *input = ctx.Input("Input"); + if (&ctx.Attr("input_dim_idx") == 0) { + // set the correct batch size. + auto odims = out->dims(); + int input_dim_idx = ctx.Attr("input_dim_idx"); + int output_dim_idx = ctx.Attr("output_dim_idx"); + odims[output_dim_idx] = input->dims()[input_dim_idx]; + out->mutable_data(odims, ctx.GetPlace()); + } + + T value; + if (str_value.empty()) { + value = static_cast(float_value); + } else { + std::stringstream convert_stream(str_value); + if (std::is_same::value) { + int64_t tmp_value; + convert_stream >> tmp_value; + value = static_cast(tmp_value); + } else { + double tmp_value; + convert_stream >> tmp_value; + value = static_cast(tmp_value); + } + } + + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(ctx.GetPlace()); + bool cpu_place = force_cpu || ctx.GetPlace() == platform::CPUPlace(); + if (cpu_place) { + math::SetConstant functor; + out->mutable_data(platform::CPUPlace(), data_type); + functor(reinterpret_cast(dev_ctx), + out, static_cast(value)); + } else { + out->mutable_data(ctx.GetPlace(), data_type); + Tensor tensor_tmp(data_type); + tensor_tmp.mutable_data({1}, ctx.GetPlace()); + FillNpuTensorWithConstant(&tensor_tmp, value); + + auto stream = + ctx.template device_context() + .stream(); + const auto &runner = + NpuOpRunner("FillD", {tensor_tmp}, {*out}, + {{"dims", framework::vectorize(out->dims())}}); + runner.Run(stream); + } + } +}; +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; + +REGISTER_OP_NPU_KERNEL( + fill_constant_batch_size_like, + ops::FillConstantBatchSizeLikeOpNPUKernel< + paddle::platform::NPUDeviceContext, float>, + ops::FillConstantBatchSizeLikeOpNPUKernel< + paddle::platform::NPUDeviceContext, int>, + ops::FillConstantBatchSizeLikeOpNPUKernel< + paddle::platform::NPUDeviceContext, paddle::platform::float16>); diff --git a/python/paddle/fluid/tests/unittests/npu/test_fill_constant_batch_size_like_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_fill_constant_batch_size_like_op_npu.py new file mode 100644 index 0000000000000000000000000000000000000000..7736c85c87aa2975a459d7e1f4e6298542432f5c --- /dev/null +++ b/python/paddle/fluid/tests/unittests/npu/test_fill_constant_batch_size_like_op_npu.py @@ -0,0 +1,134 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import numpy as np +import unittest +import sys +sys.path.append("..") +from op_test import OpTest +import paddle +import paddle.fluid as fluid +from paddle.fluid import core + +paddle.enable_static() +SEED = 2021 + + +class TestFillConstantBatchSizeLike(OpTest): + def setUp(self): + self.set_npu() + self.place = paddle.NPUPlace(0) + self.op_type = "fill_constant_batch_size_like" + self.init_shape() + self.init_value() + self.init_dtype() + self.init_force_cpu() + self.init_dim_idx() + + self.inputs = { + 'Input': np.random.random(self.input_shape).astype("float32") + } + self.attrs = { + 'shape': self.shape, + 'value': self.value, + 'str_value': self.str_value, + 'dtype': self.dtype, + 'force_cpu': self.force_cpu, + 'input_dim_idx': self.input_dim_idx, + 'output_dim_idx': self.output_dim_idx + } + self.outputs = { + 'Out': np.full(self.output_shape, self.output_value, + self.output_dtype) + } + + def set_npu(self): + self.__class__.use_npu = True + + def init_shape(self): + self.input_shape = [4, 5] + self.shape = [123, 92] + self.output_shape = (4, 92) + + def init_value(self): + self.value = 3.8 + self.str_value = '' + self.output_value = 3.8 + + def init_dtype(self): + self.dtype = core.VarDesc.VarType.FP32 + self.output_dtype = np.float32 + + def init_force_cpu(self): + self.force_cpu = False + + def init_dim_idx(self): + self.input_dim_idx = 0 + self.output_dim_idx = 0 + + def test_check_output(self): + self.check_output_with_place(self.place) + + +class TestFillConstantBatchSizeLike2(TestFillConstantBatchSizeLike): + def init_shape(self): + # test shape + self.input_shape = [4, 5, 6, 7] + self.shape = [10, 123, 92] + self.output_shape = (4, 123, 92) + + +class TestFillConstantBatchSizeLike3(TestFillConstantBatchSizeLike): + def init_value(self): + # use 'str_value' rather than 'value' + self.value = 3.8 + self.str_value = '4.5' + self.output_value = 4.5 + + +class TestFillConstantBatchSizeLike6(TestFillConstantBatchSizeLike): + def init_dtype(self): + self.dtype = core.VarDesc.VarType.FP16 + self.output_dtype = np.float16 + + def test_check_output(self): + self.check_output_with_place(self.place, atol=1e-2) + + +class TestFillConstantBatchSizeLike7(TestFillConstantBatchSizeLike): + def init_dtype(self): + self.dtype = core.VarDesc.VarType.INT32 + self.output_dtype = np.int32 + + +class TestFillConstantBatchSizeLike8(TestFillConstantBatchSizeLike): + def init_force_cpu(self): + self.force_cpu = True + + +class TestFillConstantBatchSizeLike9(TestFillConstantBatchSizeLike): + def init_shape(self): + self.input_shape = [4, 5] + self.shape = [123, 92] + self.output_shape = (123, 4) + + def init_dim_idx(self): + self.input_dim_idx = 0 + self.output_dim_idx = 1 + + +if __name__ == '__main__': + unittest.main()