Dev python add n (#2552)

* add add_n in math_ops.py * add_n bug fix * add test_add_n.py * rm usless debug codes * check inputs lenght > 1 for add_n * rm uselss lines * rm r = None

Dev python add n (#2552)
* add add_n in math_ops.py * add_n bug fix * add test_add_n.py * rm usless debug codes * check inputs lenght > 1 for add_n * rm uselss lines * rm r = None
04d68d6c · ShawnXuan · Li Xinqi · 17549fa4 · 04d68d6c · 04d68d6c
4 changed file
--- a/oneflow/core/kernel/add_kernel.cpp
+++ b/oneflow/core/kernel/add_kernel.cpp
@@ -31,12 +31,18 @@ struct AddUtil {
    if (r) {
      tuple_switch(r, add_kernel->tp_,
                   AdditionFunction<device_type, T, decltype(add_kernel)>{
-                       out_blob, std::move(BnInOp2Blob), ctx.device_ctx, 0, add_kernel});
+                       out_blob, BnInOp2Blob, ctx.device_ctx, 0, add_kernel});
    }
    for (; r < in_num; r += kWidth) {
-      Addition<device_type, T>(ctx.device_ctx, out_blob, out_blob, in_blob(r), in_blob(r + 1),
-                               in_blob(r + 2), in_blob(r + 3), in_blob(r + 4), in_blob(r + 5),
-                               in_blob(r + 6), in_blob(r + 7));
+      if (r == 0) {
+        Addition<device_type, T>(ctx.device_ctx, out_blob, in_blob(r), in_blob(r + 1),
+                                 in_blob(r + 2), in_blob(r + 3), in_blob(r + 4), in_blob(r + 5),
+                                 in_blob(r + 6), in_blob(r + 7));
+      } else {
+        Addition<device_type, T>(ctx.device_ctx, out_blob, out_blob, in_blob(r), in_blob(r + 1),
+                                 in_blob(r + 2), in_blob(r + 3), in_blob(r + 4), in_blob(r + 5),
+                                 in_blob(r + 6), in_blob(r + 7));
+      }
    }
  }
 };

--- a/oneflow/core/kernel/add_kernel.h
+++ b/oneflow/core/kernel/add_kernel.h
@@ -22,7 +22,7 @@ class AddKernel final : public KernelIf<device_type> {

  const PbMessage& GetCustomizedOpConf() const override;

-  decltype(make_tuple_from_sequence<7>()) tp_;
+  decltype(make_tuple_from_sequence<8>()) tp_;
 };

 void HalfGpuAdd(DeviceCtx* ctx, const int64_t n, float16* out_dptr,

--- a/oneflow/python/ops/math_ops.py
+++ b/oneflow/python/ops/math_ops.py
@@ -20,6 +20,23 @@ def add(x, y, name=None):
    else:
        return broadcast_add(x, y, name)

+@oneflow_export("math.add_n")
+def add_n(inputs, name=None):
+    op_conf = op_conf_util.OperatorConf()
+    setattr(
+        op_conf,
+        "name",
+        name if name is not None else id_util.UniqueStr("AddN_"),
+    )
+    assert len(inputs) > 1
+    for blob in inputs:
+        getattr(op_conf.add_conf, "in").append(blob.logical_blob_name)
+    op_conf.add_conf.out = "out"
+    compile_context.CurJobAddOp(op_conf)
+    lbi = logical_blob_id_util.LogicalBlobId()
+    lbi.op_name = op_conf.name
+    lbi.blob_name = "out"
+    return remote_blob_util.RemoteBlob(lbi)

 @oneflow_export("math.subtract")
 def subtract(x, y, name=None):

--- a/oneflow/python/test/ops/test_add_n.py
+++ b/oneflow/python/test/ops/test_add_n.py
+import oneflow as flow
+import numpy as np
+
+func_config = flow.FunctionConfig()
+func_config.default_data_type(flow.float)
+
+def test_naive(test_case):
+    @flow.function(func_config)
+    def AddJob(xs=[flow.FixedTensorDef((5, 2))] * 3):
+        return flow.math.add_n(xs)
+
+    inputs = [np.random.rand(5, 2).astype(np.float32) for i in range(3)]
+    r = AddJob(inputs).get().ndarray()
+    test_case.assertTrue(np.allclose(r, sum(inputs)))
+
+def test_2_inputs(test_case):
+    GenerateTest(test_case, (64, 64), 2)
+def test_3_inputs(test_case):
+    GenerateTest(test_case, (64, 64), 3)
+def test_4_inputs(test_case):
+    GenerateTest(test_case, (64, 64), 4)
+def test_5_inputs(test_case):
+    GenerateTest(test_case, (64, 64), 5)
+def test_6_inputs(test_case):
+    GenerateTest(test_case, (64, 64), 6)
+def test_7_inputs(test_case):
+    GenerateTest(test_case, (64, 64), 7)
+def test_8_inputs(test_case):
+    GenerateTest(test_case, (64, 64), 8)
+def test_9_inputs(test_case):
+    GenerateTest(test_case, (64, 64), 9)
+def test_10_inputs(test_case):
+    GenerateTest(test_case, (64, 64), 10)
+def test_11_inputs(test_case):
+    GenerateTest(test_case, (64, 64), 11)
+def test_12_inputs(test_case):
+    GenerateTest(test_case, (64, 64), 12)
+def test_13_inputs(test_case):
+    GenerateTest(test_case, (64, 64), 13)
+def test_14_inputs(test_case):
+    GenerateTest(test_case, (64, 64), 14)
+def test_15_inputs(test_case):
+    GenerateTest(test_case, (64, 64), 15)
+def test_16_inputs(test_case):
+    GenerateTest(test_case, (64, 64), 16)
+
+def GenerateTest(test_case, shape, num_inputs):
+    @flow.function(func_config)
+    def AddJob(xs=[flow.FixedTensorDef(shape)] * num_inputs):
+        return flow.math.add_n(xs)
+
+    inputs = [np.random.rand(*shape).astype(np.float32) for i in range(num_inputs)]
+    r = AddJob(inputs).get().ndarray()
+    test_case.assertTrue(np.allclose(r, sum(inputs)))