- Noise adding removed for Test phase of softmax

c1fccc29 · Jacek Czaja · ff28b1ff · c1fccc29 · c1fccc29 · c1fccc29
5 changed file
--- a/paddle/fluid/operators/math/softmax.cc
+++ b/paddle/fluid/operators/math/softmax.cc
@@ -19,8 +19,10 @@ namespace paddle {
 namespace operators {
 namespace math {

-template class SoftmaxFunctor<platform::CPUDeviceContext, float>;
-template class SoftmaxFunctor<platform::CPUDeviceContext, double>;
+template class SoftmaxFunctor<platform::CPUDeviceContext, float,true>;
+template class SoftmaxFunctor<platform::CPUDeviceContext, float,false>;
+template class SoftmaxFunctor<platform::CPUDeviceContext, double,true>;
+template class SoftmaxFunctor<platform::CPUDeviceContext, double,false>;
 template class SoftmaxGradFunctor<platform::CPUDeviceContext, float>;
 template class SoftmaxGradFunctor<platform::CPUDeviceContext, double>;


--- a/paddle/fluid/operators/math/softmax.h
+++ b/paddle/fluid/operators/math/softmax.h
@@ -19,7 +19,7 @@ namespace paddle {
 namespace operators {
 namespace math {

-template <typename DeviceContext, typename T>
+template <typename DeviceContext, typename T, bool is_test>
 class SoftmaxFunctor {
 public:
  void operator()(const DeviceContext& context, const framework::Tensor* X,

--- a/paddle/fluid/operators/math/softmax_impl.h
+++ b/paddle/fluid/operators/math/softmax_impl.h
@@ -32,8 +32,8 @@ struct ValueClip {
  }
 };

-template <typename DeviceContext, typename T>
-void SoftmaxFunctor<DeviceContext, T>::operator()(const DeviceContext& context,
+template <typename DeviceContext, typename T, bool is_test>
+void SoftmaxFunctor<DeviceContext, T, is_test>::operator()(const DeviceContext& context,
                                                  const framework::Tensor* X,
                                                  framework::Tensor* Y) {
  auto logits = EigenMatrix<T>::From(*X);
@@ -65,6 +65,42 @@ void SoftmaxFunctor<DeviceContext, T>::operator()(const DeviceContext& context,
                                                 .broadcast(one_by_class));
 }

+template <typename DeviceContext, typename T>
+class SoftmaxFunctor<DeviceContext, T, true> {
+void operator()(const DeviceContext& context,
+                const framework::Tensor* X,
+                framework::Tensor* Y) {
+  auto logits = EigenMatrix<T>::From(*X);
+  auto softmax = EigenMatrix<T>::From(*Y);
+
+  const int kBatchDim = 0;
+  const int kClassDim = 1;
+
+  const int batch_size = logits.dimension(kBatchDim);
+  const int num_classes = logits.dimension(kClassDim);
+
+  Eigen::DSizes<int, 1> along_class(kClassDim);
+  Eigen::DSizes<int, 2> batch_by_one(batch_size, 1);
+  Eigen::DSizes<int, 2> one_by_class(1, num_classes);
+
+  auto shifted_logits = (logits -
+                         logits.maximum(along_class)
+                             .eval()
+                             .reshape(batch_by_one)
+                             .broadcast(one_by_class));
+
+  softmax.device(*context.eigen_device()) = shifted_logits.exp();
+  softmax.device(*context.eigen_device()) = (softmax *
+                                             softmax.sum(along_class)
+                                                 .inverse()
+                                                 .eval()
+                                                 .reshape(batch_by_one)
+                                                 .broadcast(one_by_class));
+}
+};
+
+
+
 template <typename DeviceContext, typename T>
 void SoftmaxGradFunctor<DeviceContext, T>::operator()(
    const DeviceContext& context, const framework::Tensor* y,

--- a/paddle/fluid/operators/softmax_op.h
+++ b/paddle/fluid/operators/softmax_op.h
@@ -35,8 +35,14 @@ class SoftmaxKernel : public framework::OpKernel<T> {
    Tensor X_2d = framework::ReshapeToMatrix(*X, rank - 1);
    Tensor Out_2d = framework::ReshapeToMatrix(*Out, rank - 1);

-    math::SoftmaxFunctor<DeviceContext, T>()(
-        context.template device_context<DeviceContext>(), &X_2d, &Out_2d);
+    const bool is_test = context.Attr<bool>("is_test");
+    if( is_test == true) {
+      math::SoftmaxFunctor<DeviceContext, T,true>()(
+          context.template device_context<DeviceContext>(), &X_2d, &Out_2d);
+    } else {
+      math::SoftmaxFunctor<DeviceContext, T,false>()(
+          context.template device_context<DeviceContext>(), &X_2d, &Out_2d);
+    }
  }
 };


--- a/paddle/fluid/operators/softmax_with_cross_entropy_op.h
+++ b/paddle/fluid/operators/softmax_with_cross_entropy_op.h
@@ -42,7 +42,7 @@ class SoftmaxWithCrossEntropyKernel : public framework::OpKernel<T> {

    auto& dev_ctx =
        context.template device_context<platform::CPUDeviceContext>();
-    math::SoftmaxFunctor<platform::CPUDeviceContext, T>()(dev_ctx, logits,
+    math::SoftmaxFunctor<platform::CPUDeviceContext, T, false>()(dev_ctx, logits,
                                                          softmax);
    math::CrossEntropyFunctor<platform::CPUDeviceContext, T>()(
        dev_ctx, loss, softmax, labels, context.Attr<bool>("soft_label"),