提交 3d77360b 编写于 作者: C caoying03

add negative clipping for softmax.

上级 360bde9a
...@@ -25,6 +25,14 @@ template <typename T, int MajorType = Eigen::RowMajor, ...@@ -25,6 +25,14 @@ template <typename T, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex> typename IndexType = Eigen::DenseIndex>
using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>; using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>;
template <typename T>
struct ValueClip {
HOSTDEVICE T operator()(const T& x) const {
const T kThreshold = -64.;
return x < kThreshold ? kThreshold : x;
}
};
template <typename Place, typename T> template <typename Place, typename T>
class SoftmaxFunctor { class SoftmaxFunctor {
public: public:
...@@ -47,7 +55,8 @@ class SoftmaxFunctor { ...@@ -47,7 +55,8 @@ class SoftmaxFunctor {
logits.maximum(along_class) logits.maximum(along_class)
.eval() .eval()
.reshape(batch_by_one) .reshape(batch_by_one)
.broadcast(one_by_class)); .broadcast(one_by_class))
.unaryExpr(ValueClip<T>());
softmax.device(context.GetEigenDevice<Place>()) = shifted_logits.exp(); softmax.device(context.GetEigenDevice<Place>()) = shifted_logits.exp();
softmax.device(context.GetEigenDevice<Place>()) = softmax.device(context.GetEigenDevice<Place>()) =
......
...@@ -5,7 +5,7 @@ from op_test import OpTest ...@@ -5,7 +5,7 @@ from op_test import OpTest
def stable_softmax(x): def stable_softmax(x):
"""Compute the softmax of vector x in a numerically stable way.""" """Compute the softmax of vector x in a numerically stable way."""
shiftx = x - np.max(x) shiftx = x - np.max(x).clip(-64.)
exps = np.exp(shiftx) exps = np.exp(shiftx)
return exps / np.sum(exps) return exps / np.sum(exps)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册