提交 a07da949 编写于 作者: H hedaoyuan 提交者: Yu Yang

fix floating-point overflow problem of tanh (#355)

上级 56b23d18
......@@ -209,6 +209,15 @@ typedef struct {
#define HL_FLOAT_MIN 2.2250738585072014e-308
#endif
/**
* The maximum input value for exp, used to avoid overflow problem.
*
* Currently only used for tanh function.
*/
#define EXP_MAX_INPUT 40.0
/**
* @brief DIVUP(x, y) is similar to ceil(x / y).
* @note For CUDA, DIVUP will be used to specify
......
......@@ -38,7 +38,9 @@ namespace hppl {
}
__m256 tanh(const __m256 a) {
__m256 max = _mm256_set1_ps(EXP_MAX_INPUT);
__m256 tmp = _mm256_mul_ps(_mm256_set1_ps(-2.0f), a);
tmp = _mm256_min_ps(tmp, max);
tmp = exp(tmp);
return _mm256_sub_ps(
_mm256_div_ps(_mm256_set1_ps(2.0f),
......
......@@ -30,7 +30,9 @@ namespace hppl {
}
real tanh(const real a) {
return (2.0 / (1.0 + exp(-2.0*a))) - 1.0;
real tmp = -2.0 * a;
tmp = (tmp > EXP_MAX_INPUT) ? EXP_MAX_INPUT : tmp;
return (2.0 / (1.0 + exp(tmp))) - 1.0;
}
real linear(const real a) {
......
......@@ -995,7 +995,7 @@ TEST(Layer, LstmLayer) {
TestConfig config;
config.layerConfig.set_type("lstmemory");
config.layerConfig.set_size(4);
config.layerConfig.set_active_type("sigmoid");
config.layerConfig.set_active_type("tanh");
config.layerConfig.set_active_state_type("sigmoid");
config.layerConfig.set_active_gate_type("sigmoid");
config.biasSize = 28;
......
......@@ -369,7 +369,7 @@ TEST(Layer, LstmLayer) {
LayerConfig layerConfig;
layerConfig.set_type("lstmemory");
layerConfig.set_active_type("relu");
layerConfig.set_active_state_type("sigmoid");
layerConfig.set_active_state_type("tanh");
layerConfig.set_active_gate_type("sigmoid");
layerConfig.add_inputs();
......
......@@ -625,7 +625,10 @@ void BaseMatrixT<T>::squareDerivative(BaseMatrixT& b) {
applyBinary(binary::SquareDerivative<T>(), b);
}
DEFINE_MATRIX_BINARY_OP(Tanh, b = 2.0 / (1.0 + exp(-2 * a)) - 1.0);
DEFINE_MATRIX_BINARY_OP(Tanh,
T tmp = -2.0 * a;
tmp = (tmp > EXP_MAX_INPUT) ? EXP_MAX_INPUT : tmp;
b = 2.0 / (1.0 + std::exp(tmp)) - 1.0);
template<>
void BaseMatrixT<real>::tanh(BaseMatrixT& b) {
applyBinary(binary::Tanh<real>(), b);
......
......@@ -200,7 +200,10 @@ void vLog1p(const int n, const T* a, T* r) {
binary::vLog1p<T>(), const_cast<T*>(a), r, 1, n, n, n);
}
DEFINE_MATRIX_BINARY_OP(vTanh, b = 2.0 / (1.0 + std::exp(-2 * a)) - 1.0);
DEFINE_MATRIX_BINARY_OP(vTanh,
T tmp = -2.0 * a;
tmp = (tmp > EXP_MAX_INPUT) ? EXP_MAX_INPUT : tmp;
b = 2.0 / (1.0 + std::exp(tmp)) - 1.0);
template<class T>
void vTanh(const int n, const T* a, T* r) {
hl_cpu_apply_binary_op<T, binary::vTanh<T>, 0, 0>(
......
......@@ -3471,9 +3471,7 @@ void CpuMatrix::tanh(Matrix& output) {
size_t dim = getWidth();
CHECK_EQ(output.getHeight(), numSamples);
CHECK_EQ(output.getWidth(), dim);
errno = 0;
vTanh(numSamples * dim, getData(), output.getData());
CHECK_EQ(errno, 0) << "vTanh error";
}
void CpuMatrix::tanhDerivative(Matrix& output) {
......@@ -3495,10 +3493,8 @@ void CpuMatrix::softrelu(Matrix& output) {
out[j] = x;
}
}
errno = 0;
vExp(numSamples * dim, output.getData(), output.getData());
vLog1p(numSamples * dim, output.getData(), output.getData());
CHECK_EQ(errno, 0) << "vExp+vLog1p error";
}
void CpuMatrix::softreluDerivative(Matrix& output) {
......@@ -3513,9 +3509,7 @@ void CpuMatrix::softreluDerivative(Matrix& output) {
MatrixPtr tmpMat = Matrix::create(numSamples, dim);
real* tmp = tmpMat->getData();
errno = 0;
vExp(size, output.getData(), tmpMat->getData());
CHECK_EQ(errno, 0) << "vExp error";
for (size_t i = 0; i < size; ++i) {
grad[i] *= (1.0 - 1.0 / tmp[i]);
......@@ -3538,10 +3532,7 @@ void CpuMatrix::scaledTanh(Matrix& output, real p1, real p2) {
out[i] = p2 * in[i];
}
// out = tanh(out)
errno = 0;
vTanh(numSamples * dim, out, out);
CHECK_EQ(errno, 0) << "vTanh error";
// out = p1 * out
for (size_t i = 0; i < numSamples * dim; ++i) {
......
......@@ -13,3 +13,4 @@ add_simple_unittest(test_sparseMatrixCompare)
add_simple_unittest(test_perturbation)
add_simple_unittest(test_CpuGpuVector)
add_simple_unittest(test_Allocator)
add_simple_unittest(test_FPException)
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
/**
* This test is about floating point calculation exception.
* Paddle catches FE_INVALID, FE DIVBYZERO and FE_OVERFLOW exceptions.
*
* Some exceptions occur in the middle of a set of formulas,
* that can be circumvented by some tricks.
* For example,
* calculate tanh
* b = 2.0 / (1.0 + exp(-2 * a)) - 1.0
*
* If the result of (-2 * a) is too large,
* a FE_OVERFLOW exception occurs when calculating exp.
* But the result of tanh is no overflow problem,
* so we can add some tricks to prevent exp calculate an excessive value.
*
*/
#include <fenv.h>
#include <gtest/gtest.h>
#include "paddle/math/Matrix.h"
#include "paddle/utils/Excepts.h"
using namespace paddle; // NOLINT
void SetTensorValue(Matrix& matrix, real value) {
int height = matrix.getHeight();
int width = matrix.getWidth();
int stride = matrix.getStride();
real* data = matrix.getData();
for (int i = 0; i < height; i++) {
int j = rand() % width; // NOLINT
if (typeid(matrix) == typeid(CpuMatrix)) {
data[i * stride + j] = value;
} else if (typeid(matrix) == typeid(GpuMatrix)) {
hl_memcpy(&data[i * stride + j], &value, sizeof(real));
} else {
LOG(FATAL) << "should not reach here";
}
}
}
template<typename Matrix>
void testTanh(real illegal) {
MatrixPtr A = std::make_shared<Matrix>(10, 10);
MatrixPtr B = std::make_shared<Matrix>(10, 10);
A->randomizeUniform();
B->randomizeUniform();
SetTensorValue(*A, illegal);
A->tanh(*B);
}
template<typename Matrix>
void testSigmoid(real illegal) {
MatrixPtr A = std::make_shared<Matrix>(10, 10);
MatrixPtr B = std::make_shared<Matrix>(10, 10);
A->randomizeUniform();
B->randomizeUniform();
SetTensorValue(*A, illegal);
A->sigmoid(*B);
}
TEST(fp, overflow) {
for (auto illegal : {-90.0, 90.0}) {
LOG(INFO) << " illegal=" << illegal;
testTanh<CpuMatrix>(illegal);
testSigmoid<CpuMatrix>(illegal);
}
}
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
initMain(argc, argv);
feenableexcept(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW);
return RUN_ALL_TESTS();
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册