From 1d04b19ce86ccf055f58955142447aab577d6171 Mon Sep 17 00:00:00 2001 From: Abhinav Arora Date: Wed, 6 Dec 2017 01:55:12 +0530 Subject: [PATCH] Fix the rendering of latex equation for adamax op (#6294) * Using latex fraction syntax in sigmoid and logsigmoid op * Fixing the rendering of the latex equations in adamax operator --- paddle/operators/activation_op.cc | 8 ++++---- paddle/operators/adamax_op.cc | 10 ++++++---- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/paddle/operators/activation_op.cc b/paddle/operators/activation_op.cc index 154c618e8e..83262f950e 100644 --- a/paddle/operators/activation_op.cc +++ b/paddle/operators/activation_op.cc @@ -44,9 +44,9 @@ class SigmoidOpMaker : public framework::OpProtoAndCheckerMaker { AddInput("X", "Input of Sigmoid operator"); AddOutput("Y", "Output of Sigmoid operator"); AddComment(R"DOC( -Sigmoid Activation Operator. +Sigmoid Activation Operator -$y = 1 / (1 + e^{-x})$ +$$y = \frac{1}{1 + e^{-x}}$$ )DOC"); } @@ -60,9 +60,9 @@ class LogSigmoidOpMaker : public framework::OpProtoAndCheckerMaker { AddInput("X", "Input of LogSigmoid operator"); AddOutput("Y", "Output of LogSigmoid operator"); AddComment(R"DOC( -Logsigmoid Activation Operator. +Logsigmoid Activation Operator -$y = \log(1 / (1 + e^{-x}))$ +$$y = \log \frac{1}{1 + e^{-x}}$$ )DOC"); } diff --git a/paddle/operators/adamax_op.cc b/paddle/operators/adamax_op.cc index d5bbc672e1..867ddd9790 100644 --- a/paddle/operators/adamax_op.cc +++ b/paddle/operators/adamax_op.cc @@ -107,10 +107,12 @@ Adam algorithm based on the infinity norm. Adamax updates: -$$momentOut = \beta_1 * moment + (1 - \beta_1) * grad \break -infNormOut = max(\beta_2 * infNorm + \epsilon, |grad|) \break -learningRate = learningRate /(1 - \beta_1_{pow}) \break -paramOut = param - learningRate * momentPut / infNormOut$$ +$$ + momentOut = \beta_{1} * moment + (1 - \beta_{1}) * grad \\ + infNormOut = max(\beta_{2} * infNorm + \epsilon, |grad|) \\ + learningRate = \frac{learningRate}{1 - \beta_{1}^{Beta1Pow}} \\ + paramOut = param - learningRate * \frac{momentOut}{infNormOut} +$$ The original paper does not have an epsilon attribute. However, it is added here for numerical stability to prevent the -- GitLab