From b314a69523195d1178f79a8854220ac395072adc Mon Sep 17 00:00:00 2001
From: fengjiayi <fengjiayi@baidu.com>
Date: Tue, 31 Jul 2018 19:53:59 +0800
Subject: [PATCH] make softmax supporting tensors

---
 paddle/fluid/operators/softmax_op.cc | 26 +++++++++++++-------------
 paddle/fluid/operators/softmax_op.h  | 22 ++++++++++++++++++++--
 2 files changed, 33 insertions(+), 15 deletions(-)
diff --git a/paddle/fluid/operators/softmax_op.cc b/paddle/fluid/operators/softmax_op.cc
index 31a7458f637..f2821cd1e86 100644
--- a/paddle/fluid/operators/softmax_op.cc
+++ b/paddle/fluid/operators/softmax_op.cc
@@ -37,10 +37,7 @@ class SoftmaxOp : public framework::OperatorWithKernel {
     PADDLE_ENFORCE(ctx->HasOutput("Out"),
                    "Output(Out) of SoftmaxOp should not be null.");
 
-    auto x_dims = ctx->GetInputDim("X");
-    PADDLE_ENFORCE(x_dims.size() == 2UL,
-                   "The input of softmax op must be a matrix.");
-    ctx->SetOutputDim("Out", x_dims);
+    ctx->SetOutputDim("Out", ctx->GetInputDim("X"));
     ctx->ShareLoD("X", /*->*/ "Out");
   }
 
@@ -81,8 +78,8 @@ class SoftmaxOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
     AddInput("X",
-             "The input tensor of softmax. "
-             "2-D with shape [batch_size, input_feature_dimensions].");
+             "The input tensor of softmax, "
+             "whose last dimension is the input_feature_dimensions.");
     AddOutput("Out", "The normalized values with the same shape as X.")
         .Reuse("X");
     AddAttr<bool>(
@@ -105,20 +102,23 @@ class SoftmaxOpMaker : public framework::OpProtoAndCheckerMaker {
     AddComment(R"DOC(
 Softmax Operator.
 
-The input of the softmax operator is a 2-D tensor with shape N x K (N is the
-batch_size, K is the dimension of input feature). The output tensor has the
-same shape as the input tensor.
+The input of the softmax operator is a tensor of any rank. The output tensor 
+has the same shape as the input.
 
-For each row of the input tensor, the softmax operator squashes the
-K-dimensional vector of arbitrary real values to a K-dimensional vector of real
-values in the range [0, 1] that add up to 1.
+The input tensor will first be logically flattened to a 2-D matrix. The matrix's 
+second dimension(row length) is as same as the last dimension of the input 
+tensor, and the first dimension(column length) is the product of all other 
+dimensions of the input tensor. For each row of the matrix, the softmax operator 
+squashes the K-dimensional(K is the width of the matrix, which is also the size 
+of the input tensor's last dimension) vector of arbitrary real values to a 
+K-dimensional vector of real values in the range [0, 1] that add up to 1.
 It computes the exponential of the given dimension and the sum of exponential
 values of all the other dimensions in the K-dimensional vector input.
 Then the ratio of the exponential of the given dimension and the sum of
 exponential values of all the other dimensions is the output of the softmax
 operator.
 
-For each row $i$ and each column $j$ in Input(X), we have:
+For each row $i$ and each column $j$ in the matrix, we have:
     $$Out[i, j] = \frac{\exp(X[i, j])}{\sum_j(exp(X[i, j])}$$
 
 )DOC");
diff --git a/paddle/fluid/operators/softmax_op.h b/paddle/fluid/operators/softmax_op.h
index 600da45a0bb..34c35ee421d 100644
--- a/paddle/fluid/operators/softmax_op.h
+++ b/paddle/fluid/operators/softmax_op.h
@@ -31,8 +31,16 @@ class SoftmaxKernel : public framework::OpKernel<T> {
     // allocate memory on device.
     Out->mutable_data<T>(context.GetPlace());
 
+    auto dims = X->dims();
+    auto flattened_dims = framework::flatten_to_2d(dims, dims.size() - 1);
+    framework::LoDTensor flattened_x;
+    framework::LoDTensor flattened_out;
+    flattened_x.ShareDataWith(*X);
+    flattened_out.ShareDataWith(*Out);
+
     math::SoftmaxFunctor<DeviceContext, T>()(
-        context.template device_context<DeviceContext>(), X, Out);
+        context.template device_context<DeviceContext>(), &flattened_x,
+        &flattened_out);
   }
 };
 
@@ -47,8 +55,18 @@ class SoftmaxGradKernel : public framework::OpKernel<T> {
     // allocate memory on device.
     dX->mutable_data<T>(context.GetPlace());
 
+    auto dims = Out->dims();
+    auto flattened_dims = framework::flatten_to_2d(dims, dims.size() - 1);
+    framework::LoDTensor flattened_out;
+    framework::LoDTensor flattened_d_out;
+    framework::LoDTensor flattened_d_x;
+    flattened_out.ShareDataWith(*Out);
+    flattened_d_out.ShareDataWith(*dOut);
+    flattened_d_x.ShareDataWith(*dX);
+
     math::SoftmaxGradFunctor<DeviceContext, T>()(
-        context.template device_context<DeviceContext>(), Out, dOut, dX);
+        context.template device_context<DeviceContext>(), &flattened_out,
+        &flattened_d_out, &flattened_d_x);
   }
 };
 
-- 
GitLab