add tenor with different dimension

Former-commit-id: 29d73e1c3fae3b050a8d6362493a4decd6ff626d

add tenor with different dimension
Former-commit-id: 29d73e1c3fae3b050a8d6362493a4decd6ff626d
9714a9e3 · Xinqi Li · d7d745f2 · 9714a9e3 · 9714a9e3 · 9714a9e3
5 changed file
--- a/oneflow/core/auto_placement/buffer.h
+++ b/oneflow/core/auto_placement/buffer.h
@@ -28,25 +28,28 @@ class Buffer final {
  const Shape& shape() const { return shape_; }

  double At(size_t index) const {
-    CHECK(shape_.NumAxes() == 1);
+    CHECK(shape_.NumAxes() >= 1);
    return data_.at(index);
  }
  double At(size_t x, size_t y) const {
-    CHECK(shape_.NumAxes() == 2);
+    CHECK(shape_.NumAxes() >= 2);
    return data_.at(x * shape_.Count(1) + y);
  }
  double At(size_t x, size_t y, size_t z) const {
-    CHECK(shape_.NumAxes() == 3);
+    CHECK(shape_.NumAxes() >= 3);
    return data_.at(x * shape_.Count(1) + y * shape_.Count(2) + z);
  }

-  double& At(size_t index) { return data_.at(index); }
+  double& At(size_t index) {
+    CHECK(shape_.NumAxes() >= 1);
+    return data_.at(index);
+  }
  double& At(size_t x, size_t y) {
-    CHECK(shape_.NumAxes() == 2);
+    CHECK(shape_.NumAxes() >= 2);
    return data_.at(x * shape_.Count(1) + y);
  }
  double& At(size_t x, size_t y, size_t z) {
-    CHECK(shape_.NumAxes() == 2);
+    CHECK(shape_.NumAxes() >= 3);
    return data_.at(x * shape_.Count(1) + y * shape_.Count(2) + z);
  }


--- a/oneflow/core/auto_placement/df_demo.cpp
+++ b/oneflow/core/auto_placement/df_demo.cpp
@@ -5,15 +5,47 @@ namespace oneflow {
 namespace df {

 void DifferentialDemo() {
-  Tensor vec(Shape({3, 3}), [](size_t index) { return (index + 1) * 0.095; });
-  FOR_RANGE(int, i, 0, 300) {
-    double lr = 0.01;
-    if (i > 200) { lr = 0.001; }
-    const auto& x = Clone(Update(vec, lr), 2);
-    Backward(Max(TensorProduct(MatrixRowSum(x.at(0)),
-                               Reciprocal(MatrixColSum(x.at(1))))));
-    for (double x : vec.buffer().data()) { std::cout << x << " "; }
+  Tensor vec(Shape({4, 4}), [](size_t index) { return index % 2 ? 0 : 1000; });
+  Tensor output;
+  Tensor table;
+  Tensor row;
+  Tensor col;
+  Tensor epsilon(0.000000001);
+  FOR_RANGE(int, i, 0, 2000) {
+    double lr = 1;
+    if (i < 400) {
+      lr = 0.1;
+    } else if (i < 800) {
+      lr = 0.01;
+    } else if (i < 1200) {
+      lr = 0.001;
+    } else {
+      lr = 0.0001;
+    }
+
+    const auto& x =
+        Clone(Tee(Add(Square(FixedExpectation(Update(&vec, lr), 1)), epsilon),
+                  &output),
+              3);
+    const auto& load = Clone(MatrixColSum(x.at(1)), 2);
+    Backward(Add(
+        Max(Tee(ElemWiseMul(TensorProduct(Tee(MatrixRowSum(x.at(0)), &row),
+                                          Reciprocal(Tee(load.at(0), &col))),
+                            x.at(2)),
+                &table)),
+        Max(load.at(1))));
+    std::cout << "output: ";
+    for (double x : output.buffer().data()) { std::cout << x << " "; }
+    std::cout << std::endl;
+    std::cout << "row: ";
+    for (double x : row.buffer().data()) { std::cout << x << " "; }
+    std::cout << std::endl;
+    std::cout << "col: ";
+    for (double x : col.buffer().data()) { std::cout << x << " "; }
    std::cout << std::endl;
+    std::cout << "table: ";
+    for (double x : table.buffer().data()) { std::cout << x << " "; }
+    std::cout << std::endl << std::endl;
  }
  // Tensor var(Shape({1}), 0.5);
  // FOR_RANGE(int, i, 0, 40) {

--- a/oneflow/core/auto_placement/df_func.cpp
+++ b/oneflow/core/auto_placement/df_func.cpp
@@ -4,9 +4,9 @@ namespace oneflow {

 namespace df {

-Tensor Update(Tensor var, double lr) {
-  auto buffer = var.mut_buffer_ptr();
-  return Tensor(var, [buffer, lr](const Buffer& diff) {
+Tensor Update(Tensor* var, double lr) {
+  auto buffer = var->mut_buffer_ptr();
+  return Tensor(*var, [=](const Buffer& diff) {
    CHECK(buffer->data().size() == diff.data().size());
    FOR_RANGE(int, i, 0, buffer->data().size()) {
      double& w = buffer->mut_data()->at(i);
@@ -44,13 +44,68 @@ Tensor Minus(const Tensor& input) {
  });
 }

+Tensor Abs(const Tensor& input) {
+  std::shared_ptr<Buffer> out(new Buffer(input.buffer()));
+  FOR_RANGE(int, i, 0, out->Size()) {
+    double& x = out->mut_data()->at(i);
+    x = (x > 0) ? x : -x;
+  }
+  return Tensor(out, [=](const Buffer& out_diff) {
+    Buffer input_diff(out_diff);
+    FOR_RANGE(int, i, 0, input_diff.Size()) {
+      double& diff = input_diff.mut_data()->at(i);
+      diff *= (input.buffer().data().at(i) > 0) ? 1 : -1;
+    }
+    input.HandleDiff(input_diff);
+  });
+}
+
+Tensor Tee(const Tensor& input, Tensor* out) {
+  *out = input;
+  return Tensor(input);
+}
+
+Tensor Exp(const Tensor& input) {
+  std::shared_ptr<Buffer> out(new Buffer(input.buffer()));
+  FOR_RANGE(int, i, 0, out->Size()) {
+    double& x = out->mut_data()->at(i);
+    x = exp(x);
+  }
+  return Tensor(out, [=](const Buffer& out_diff) {
+    Buffer input_diff(out_diff);
+    FOR_RANGE(int, i, 0, input_diff.Size()) {
+      double& diff = input_diff.mut_data()->at(i);
+      diff *= out->data().at(i);
+    }
+    input.HandleDiff(input_diff);
+  });
+}
+
 Tensor Add(const Tensor& a, const Tensor& b) {
-  CHECK(a.buffer().Size() == b.buffer().Size());
-  std::shared_ptr<Buffer> out(new Buffer(a.buffer()));
-  FOR_RANGE(int, i, 0, out->Size()) { out->At(i) += b.At(i); }
+  Tensor big = a;
+  Tensor small = b;
+  if (a.Size() < b.Size()) {
+    big = b;
+    small = a;
+  }
+  CHECK(big.Size() % small.Size() == 0);
+  std::shared_ptr<Buffer> out(new Buffer(big.buffer()));
+  size_t small_size = small.Size();
+  size_t group_size = big.Size() / small_size;
+  FOR_RANGE(int, i, 0, small_size) {
+    FOR_RANGE(int, j, 0, group_size) {
+      out->At(i * group_size + j) += small.At(i);
+    }
+  }
  return Tensor(out, [=](const Buffer& out_diff) {
-    a.HandleDiff(out_diff);
-    b.HandleDiff(out_diff);
+    big.HandleDiff(out_diff);
+    Buffer small_diff(small.shape(), 0);
+    FOR_RANGE(int, i, 0, small_size) {
+      FOR_RANGE(int, j, 0, group_size) {
+        small_diff.At(i) += out_diff.At(i * group_size + j);
+      }
+    }
+    small.HandleDiff(small_diff);
  });
 }


--- a/oneflow/core/auto_placement/df_func.h
+++ b/oneflow/core/auto_placement/df_func.h
@@ -7,11 +7,15 @@ namespace oneflow {

 namespace df {

-Tensor Update(Tensor var, double lr);
+Tensor Update(Tensor* var, double lr);

 std::vector<Tensor> Clone(const Tensor& input, size_t n);

 Tensor Minus(const Tensor& input);
+Tensor Abs(const Tensor& input);
+Tensor Exp(const Tensor& input);
+
+Tensor Tee(const Tensor& input, Tensor* out);

 Tensor Add(const Tensor& a, const Tensor& b);


--- a/oneflow/core/auto_placement/tensor.h
+++ b/oneflow/core/auto_placement/tensor.h
@@ -8,16 +8,19 @@ namespace df {

 class Tensor final {
 public:
+  Tensor() = default;
  Tensor(const Tensor&) = default;
-  explicit Tensor(const Shape& shape, double init)
+  explicit Tensor(std::shared_ptr<Buffer> buffer)
+      : buffer_(buffer), diff_handler_([](const Buffer&) {}) {}
+  Tensor(const Shape& shape, double init)
      : buffer_(std::shared_ptr<Buffer>(new Buffer(shape, init))),
        diff_handler_([](const Buffer&) {}) {}
-  explicit Tensor(const Shape& shape,
-                  const std::function<double(size_t)>& Getter)
+  Tensor(double init)
+      : buffer_(std::shared_ptr<Buffer>(new Buffer(Shape({1}), init))),
+        diff_handler_([](const Buffer&) {}) {}
+  Tensor(const Shape& shape, const std::function<double(size_t)>& Getter)
      : buffer_(std::shared_ptr<Buffer>(new Buffer(shape, Getter))),
        diff_handler_([](const Buffer&) {}) {}
-  explicit Tensor(std::shared_ptr<Buffer> buffer)
-      : buffer_(buffer), diff_handler_([](const Buffer&) {}) {}
  Tensor(std::shared_ptr<Buffer> buffer,
         const std::function<void(const Buffer&)>& diff_handler)
      : buffer_(buffer), diff_handler_(diff_handler) {}
@@ -42,7 +45,7 @@ class Tensor final {

  const Buffer& buffer() const { return *buffer_; }
  const std::shared_ptr<Buffer>& buffer_ptr() const { return buffer_; }
-  
+
  std::shared_ptr<Buffer> mut_buffer_ptr() { return buffer_; }

  void HandleDiff(const Buffer& diff) const { diff_handler_(diff); }