add fill_constant_batch_size_like npu op (#34563)

7d86737c · pangyoki · GitHub · aefec228 · 7d86737c · 7d86737c
2 changed file
--- a/paddle/fluid/operators/fill_constant_batch_size_like_op_npu.cc
+++ b/paddle/fluid/operators/fill_constant_batch_size_like_op_npu.cc
@@ -32,13 +32,12 @@ class FillConstantBatchSizeLikeOpNPUKernel : public framework::OpKernel<T> {
    auto force_cpu = ctx.Attr<bool>("force_cpu");
    auto *out = ctx.Output<Tensor>("Out");
-    auto *input = ctx.Input<Tensor>("Input");
+    auto *in = ctx.Input<framework::LoDTensor>("Input");
-    if (&ctx.Attr<int>("input_dim_idx") == 0) {
+    if (in->lod().size() && ctx.Attr<int>("input_dim_idx") == 0) {
-      // set the correct batch size.
+      // set the correct batch size for the LoDTensor.
      auto odims = out->dims();
-      int input_dim_idx = ctx.Attr<int>("input_dim_idx");
      int output_dim_idx = ctx.Attr<int>("output_dim_idx");
-      odims[output_dim_idx] = input->dims()[input_dim_idx];
+      odims[output_dim_idx] = static_cast<int>(in->lod().back().size()) - 1;
      out->mutable_data<T>(odims, ctx.GetPlace());
    }
@@ -46,15 +45,24 @@ class FillConstantBatchSizeLikeOpNPUKernel : public framework::OpKernel<T> {
    if (str_value.empty()) {
      value = static_cast<T>(float_value);
    } else {
-      std::stringstream convert_stream(str_value);
+      // handle NaN/Inf first, which cannot be read from stream.
-      if (std::is_same<int64_t, T>::value) {
+      if (str_value == "inf") {
-        int64_t tmp_value;
+        value = static_cast<T>(std::numeric_limits<double>::infinity());
-        convert_stream >> tmp_value;
+      } else if (str_value == "-inf") {
-        value = static_cast<T>(tmp_value);
+        value = static_cast<T>(-std::numeric_limits<double>::infinity());
+      } else if (str_value == "nan") {
+        value = static_cast<T>(std::numeric_limits<double>::quiet_NaN());
      } else {
-        double tmp_value;
+        std::stringstream convert_stream(str_value);
-        convert_stream >> tmp_value;
+        if (std::is_same<int64_t, T>::value) {
-        value = static_cast<T>(tmp_value);
+          int64_t tmp_value;
+          convert_stream >> tmp_value;
+          value = static_cast<T>(tmp_value);
+        } else {
+          double tmp_value;
+          convert_stream >> tmp_value;
+          value = static_cast<T>(tmp_value);
+        }
      }
    }

--- a/python/paddle/fluid/tests/unittests/npu/test_fill_constant_batch_size_like_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_fill_constant_batch_size_like_op_npu.py
@@ -99,6 +99,22 @@ class TestFillConstantBatchSizeLike3(TestFillConstantBatchSizeLike):
        self.output_value = 4.5
+class TestFillConstantBatchSizeLike4(TestFillConstantBatchSizeLike):
+    def init_value(self):
+        # str_value = 'inf'
+        self.value = 3.8
+        self.str_value = 'inf'
+        self.output_value = float('inf')
+class TestFillConstantBatchSizeLike5(TestFillConstantBatchSizeLike):
+    def init_value(self):
+        # str_value = '-inf'
+        self.value = 3.8
+        self.str_value = '-inf'
+        self.output_value = -float('inf')
 class TestFillConstantBatchSizeLike6(TestFillConstantBatchSizeLike):
    def init_dtype(self):
        self.dtype = core.VarDesc.VarType.FP16
@@ -130,5 +146,54 @@ class TestFillConstantBatchSizeLike9(TestFillConstantBatchSizeLike):
        self.output_dim_idx = 1
+class TestFillConstantBatchSizeLikeLodTensor(TestFillConstantBatchSizeLike):
+    # test LodTensor
+    def setUp(self):
+        self.set_npu()
+        self.place = paddle.NPUPlace(0)
+        self.op_type = "fill_constant_batch_size_like"
+        self.init_shape()
+        self.init_value()
+        self.init_dtype()
+        self.init_force_cpu()
+        self.init_dim_idx()
+        lod = [[3, 2, 5]]
+        self.inputs = {
+            'Input': (np.random.random(self.input_shape).astype("float32"), lod)
+        }
+        self.attrs = {
+            'shape': self.shape,
+            'value': self.value,
+            'str_value': self.str_value,
+            'dtype': self.dtype,
+            'force_cpu': self.force_cpu,
+            'input_dim_idx': self.input_dim_idx,
+            'output_dim_idx': self.output_dim_idx
+        }
+        self.outputs = {
+            'Out': np.full(self.output_shape, self.output_value,
+                           self.output_dtype)
+        }
+    def init_shape(self):
+        self.input_shape = [10, 20]
+        self.shape = [123, 92]
+        self.output_shape = (3, 92)
+class TestFillConstantBatchSizeLikeLodTensor2(
+        TestFillConstantBatchSizeLikeLodTensor):
+    # test LodTensor with 'input_dim_idx' != 0
+    def init_shape(self):
+        self.input_shape = [10, 20]
+        self.shape = [123, 92]
+        self.output_shape = (20, 92)
+    def init_dim_idx(self):
+        self.input_dim_idx = 1
+        self.output_dim_idx = 0
 if __name__ == '__main__':
    unittest.main()