提交 84a0574a 编写于 作者: H hedaoyuan

add a PerfUtils.h

上级 8d736813
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
// Performance Check
#ifdef PADDLE_DISABLE_TIMER
#define EXPRESSION_PERFORMANCE(expression) expression;
#else
#include "paddle/utils/Stat.h"
#define EXPRESSION_PERFORMANCE(expression) \
do { \
char expr[30]; \
strncpy(expr, #expression, 30); \
if (expr[29] != '\0') { \
expr[27] = '.'; \
expr[28] = '.'; \
expr[29] = '\0'; \
} \
expression; \
for (int i = 0; i < 20; i++) { \
REGISTER_TIMER(expr); \
expression; \
} \
LOG(INFO) << std::setiosflags(std::ios::left) << std::setfill(' ') \
<< *globalStat.getStat(expr); \
globalStat.reset(); \
} while (0)
#endif
...@@ -17,6 +17,7 @@ limitations under the License. */ ...@@ -17,6 +17,7 @@ limitations under the License. */
#include "paddle/math/TrainingAlgorithmOp.h" #include "paddle/math/TrainingAlgorithmOp.h"
#include "OriginalOptimizerApi.h" #include "OriginalOptimizerApi.h"
#include "TensorCheck.h" #include "TensorCheck.h"
#include "PerfUtils.h"
using namespace paddle; // NOLINT using namespace paddle; // NOLINT
...@@ -32,21 +33,20 @@ public: ...@@ -32,21 +33,20 @@ public:
max_diff_ = FLAGS_max_diff; max_diff_ = FLAGS_max_diff;
FLAGS_max_diff = max_diff; FLAGS_max_diff = max_diff;
} }
~SetMaxDiff() { ~SetMaxDiff() { FLAGS_max_diff = max_diff_; }
FLAGS_max_diff = max_diff_;
}
private: private:
double max_diff_; double max_diff_;
}; };
#define COPY_VECTOR_TO_CPU(cpuVec, vector) \ #define COPY_VECTOR_TO_CPU(cpuVec, vector) \
do {\ do { \
if (vector->useGpu()) {\ if (vector->useGpu()) { \
cpuVec = Vector::create(vector->getSize(), false);\ cpuVec = Vector::create(vector->getSize(), false); \
cpuVec->copyFrom(*vector);\ cpuVec->copyFrom(*vector); \
} else {\ } else { \
cpuVec = vector;\ cpuVec = vector; \
}\ } \
} while (0) } while (0)
int VectorCheckErr(const Vector& vector1, const Vector& vector2) { int VectorCheckErr(const Vector& vector1, const Vector& vector2) {
...@@ -79,8 +79,8 @@ int VectorCheckErr(const VectorPtr& vector1, const VectorPtr& vector2) { ...@@ -79,8 +79,8 @@ int VectorCheckErr(const VectorPtr& vector1, const VectorPtr& vector2) {
#ifdef PADDLE_DISABLE_TIMER #ifdef PADDLE_DISABLE_TIMER
#define CHECK_VECTORPTR(vector1, vector2) \ #define CHECK_VECTORPTR(vector1, vector2) \
EXPECT_EQ(VectorCheckErr(vector1, vector2), 0) EXPECT_EQ(VectorCheckErr(vector1, vector2), 0)
#else #else
...@@ -96,8 +96,20 @@ void testCase(testMatrixFunc matrixFunc) { ...@@ -96,8 +96,20 @@ void testCase(testMatrixFunc matrixFunc) {
#else #else
for (auto useGpu : {false}) { for (auto useGpu : {false}) {
#endif #endif
for (auto size : {1, 32, 64, 128, 512, 1024, 4096, 32768, 65536, 131072, for (auto size : {1,
262144, 524288, 1048576, 2097152}) { 32,
64,
128,
512,
1024,
4096,
32768,
65536,
131072,
262144,
524288,
1048576,
2097152}) {
LOG(INFO) << " size=" << size << " useGpu=" << useGpu; LOG(INFO) << " size=" << size << " useGpu=" << useGpu;
matrixFunc(size, useGpu); matrixFunc(size, useGpu);
} }
...@@ -105,10 +117,10 @@ void testCase(testMatrixFunc matrixFunc) { ...@@ -105,10 +117,10 @@ void testCase(testMatrixFunc matrixFunc) {
} }
#define INIT_VECTOR(vec1, vec2, type, size, useGpu) \ #define INIT_VECTOR(vec1, vec2, type, size, useGpu) \
vec1[type] = Vector::create(size, useGpu); \ vec1[type] = Vector::create(size, useGpu); \
vec2[type] = Vector::create(size, useGpu); \ vec2[type] = Vector::create(size, useGpu); \
vec1[type]->rand(); \ vec1[type]->rand(); \
vec2[type]->copyFrom(*vec1[type]); vec2[type]->copyFrom(*vec1[type]);
void testAdagrad(size_t size, bool useGpu) { void testAdagrad(size_t size, bool useGpu) {
VectorPtr bufs1[NUM_PARAMETER_TYPES]; VectorPtr bufs1[NUM_PARAMETER_TYPES];
...@@ -120,13 +132,13 @@ void testAdagrad(size_t size, bool useGpu) { ...@@ -120,13 +132,13 @@ void testAdagrad(size_t size, bool useGpu) {
INIT_VECTOR(bufs1, bufs2, PARAMETER_GRADIENT_SQURESUM1, size, useGpu); INIT_VECTOR(bufs1, bufs2, PARAMETER_GRADIENT_SQURESUM1, size, useGpu);
INIT_VECTOR(bufs1, bufs2, PARAMETER_LEARNING_RATE, size, useGpu); INIT_VECTOR(bufs1, bufs2, PARAMETER_LEARNING_RATE, size, useGpu);
real epsilon = (real)rand() / (real)RAND_MAX; // NOLINT real epsilon = (real)rand() / (real)RAND_MAX; // NOLINT
real learningRate = (real)rand() / (real)RAND_MAX; // NOLINT real learningRate = (real)rand() / (real)RAND_MAX; // NOLINT
real momentum = (real)rand() / (real)RAND_MAX; // NOLINT real momentum = (real)rand() / (real)RAND_MAX; // NOLINT
real decayRate = (real)rand() / (real)RAND_MAX; // NOLINT real decayRate = (real)rand() / (real)RAND_MAX; // NOLINT
EXPRESSION_PERFORMANCE(AdagradParameterOptimizer(bufs1, EXPRESSION_PERFORMANCE(AdagradParameterOptimizer(
epsilon, learningRate, momentum, decayRate)); bufs1, epsilon, learningRate, momentum, decayRate));
BaseMatrix& value = *bufs2[PARAMETER_VALUE]; BaseMatrix& value = *bufs2[PARAMETER_VALUE];
BaseMatrix& grad = *bufs2[PARAMETER_GRADIENT]; BaseMatrix& grad = *bufs2[PARAMETER_GRADIENT];
...@@ -135,8 +147,16 @@ void testAdagrad(size_t size, bool useGpu) { ...@@ -135,8 +147,16 @@ void testAdagrad(size_t size, bool useGpu) {
BaseMatrix& accum = *bufs2[PARAMETER_GRADIENT_SQURESUM1]; BaseMatrix& accum = *bufs2[PARAMETER_GRADIENT_SQURESUM1];
BaseMatrix& lr = *bufs2[PARAMETER_LEARNING_RATE]; BaseMatrix& lr = *bufs2[PARAMETER_LEARNING_RATE];
EXPRESSION_PERFORMANCE(adagradApply(value, grad, mom, accum_buffer, accum, lr, EXPRESSION_PERFORMANCE(adagradApply(value,
epsilon, learningRate, momentum, decayRate)); grad,
mom,
accum_buffer,
accum,
lr,
epsilon,
learningRate,
momentum,
decayRate));
CHECK_VECTORPTR(bufs1[PARAMETER_VALUE], bufs2[PARAMETER_VALUE]); CHECK_VECTORPTR(bufs1[PARAMETER_VALUE], bufs2[PARAMETER_VALUE]);
CHECK_VECTORPTR(bufs1[PARAMETER_MOMENTUM], bufs2[PARAMETER_MOMENTUM]); CHECK_VECTORPTR(bufs1[PARAMETER_MOMENTUM], bufs2[PARAMETER_MOMENTUM]);
...@@ -146,9 +166,7 @@ void testAdagrad(size_t size, bool useGpu) { ...@@ -146,9 +166,7 @@ void testAdagrad(size_t size, bool useGpu) {
bufs2[PARAMETER_LEARNING_RATE]); bufs2[PARAMETER_LEARNING_RATE]);
} }
TEST(Training, Adagrad) { TEST(Training, Adagrad) { testCase(testAdagrad); }
testCase(testAdagrad);
}
void testAdaDelta(size_t size, bool useGpu) { void testAdaDelta(size_t size, bool useGpu) {
VectorPtr bufs1[NUM_PARAMETER_TYPES]; VectorPtr bufs1[NUM_PARAMETER_TYPES];
...@@ -160,14 +178,14 @@ void testAdaDelta(size_t size, bool useGpu) { ...@@ -160,14 +178,14 @@ void testAdaDelta(size_t size, bool useGpu) {
INIT_VECTOR(bufs1, bufs2, PARAMETER_GRADIENT_SQURESUM1, size, useGpu); INIT_VECTOR(bufs1, bufs2, PARAMETER_GRADIENT_SQURESUM1, size, useGpu);
INIT_VECTOR(bufs1, bufs2, PARAMETER_LEARNING_RATE, size, useGpu); INIT_VECTOR(bufs1, bufs2, PARAMETER_LEARNING_RATE, size, useGpu);
real rou = (real)rand() / (real)RAND_MAX; // NOLINT real rou = (real)rand() / (real)RAND_MAX; // NOLINT
real epsilon = (real)rand() / (real)RAND_MAX; // NOLINT real epsilon = (real)rand() / (real)RAND_MAX; // NOLINT
real learningRate = (real)rand() / (real)RAND_MAX; // NOLINT real learningRate = (real)rand() / (real)RAND_MAX; // NOLINT
real momentum = (real)rand() / (real)RAND_MAX; // NOLINT real momentum = (real)rand() / (real)RAND_MAX; // NOLINT
real decayRate = (real)rand() / (real)RAND_MAX; // NOLINT real decayRate = (real)rand() / (real)RAND_MAX; // NOLINT
EXPRESSION_PERFORMANCE(AdaDeltaParameterOptimizer(bufs1, EXPRESSION_PERFORMANCE(AdaDeltaParameterOptimizer(
rou, epsilon, learningRate, momentum, decayRate)); bufs1, rou, epsilon, learningRate, momentum, decayRate));
BaseMatrix& value = *bufs2[PARAMETER_VALUE]; BaseMatrix& value = *bufs2[PARAMETER_VALUE];
BaseMatrix& grad = *bufs2[PARAMETER_GRADIENT]; BaseMatrix& grad = *bufs2[PARAMETER_GRADIENT];
...@@ -176,8 +194,17 @@ void testAdaDelta(size_t size, bool useGpu) { ...@@ -176,8 +194,17 @@ void testAdaDelta(size_t size, bool useGpu) {
BaseMatrix& accum_update = *bufs2[PARAMETER_GRADIENT_SQURESUM1]; BaseMatrix& accum_update = *bufs2[PARAMETER_GRADIENT_SQURESUM1];
BaseMatrix& lr = *bufs2[PARAMETER_LEARNING_RATE]; BaseMatrix& lr = *bufs2[PARAMETER_LEARNING_RATE];
EXPRESSION_PERFORMANCE(adadeltaApply(value, grad, mom, accum, accum_update, EXPRESSION_PERFORMANCE(adadeltaApply(value,
lr, rou, epsilon, learningRate, momentum, decayRate)); grad,
mom,
accum,
accum_update,
lr,
rou,
epsilon,
learningRate,
momentum,
decayRate));
CHECK_VECTORPTR(bufs1[PARAMETER_VALUE], bufs2[PARAMETER_VALUE]); CHECK_VECTORPTR(bufs1[PARAMETER_VALUE], bufs2[PARAMETER_VALUE]);
CHECK_VECTORPTR(bufs1[PARAMETER_MOMENTUM], bufs2[PARAMETER_MOMENTUM]); CHECK_VECTORPTR(bufs1[PARAMETER_MOMENTUM], bufs2[PARAMETER_MOMENTUM]);
...@@ -189,11 +216,9 @@ void testAdaDelta(size_t size, bool useGpu) { ...@@ -189,11 +216,9 @@ void testAdaDelta(size_t size, bool useGpu) {
bufs2[PARAMETER_LEARNING_RATE]); bufs2[PARAMETER_LEARNING_RATE]);
} }
TEST(Training, AdaDelta) { TEST(Training, AdaDelta) { testCase(testAdaDelta); }
testCase(testAdaDelta);
}
template<bool isFirstTime> template <bool isFirstTime>
void testRMSProp(size_t size, bool useGpu) { void testRMSProp(size_t size, bool useGpu) {
VectorPtr bufs1[NUM_PARAMETER_TYPES]; VectorPtr bufs1[NUM_PARAMETER_TYPES];
VectorPtr bufs2[NUM_PARAMETER_TYPES]; VectorPtr bufs2[NUM_PARAMETER_TYPES];
...@@ -207,18 +232,23 @@ void testRMSProp(size_t size, bool useGpu) { ...@@ -207,18 +232,23 @@ void testRMSProp(size_t size, bool useGpu) {
/* make sure 'g - f.square()' greater than 0 */ /* make sure 'g - f.square()' greater than 0 */
bufs1[PARAMETER_GRADIENT_SQURESUM]->add(1.0); bufs1[PARAMETER_GRADIENT_SQURESUM]->add(1.0);
bufs2[PARAMETER_GRADIENT_SQURESUM]->copyFrom( bufs2[PARAMETER_GRADIENT_SQURESUM]->copyFrom(
*bufs1[PARAMETER_GRADIENT_SQURESUM]); *bufs1[PARAMETER_GRADIENT_SQURESUM]);
real rou = (real)rand() / (real)RAND_MAX; // NOLINT real rou = (real)rand() / (real)RAND_MAX; // NOLINT
real epsilon = (real)rand() / (real)RAND_MAX; // NOLINT real epsilon = (real)rand() / (real)RAND_MAX; // NOLINT
real learningRate = (real)rand() / (real)RAND_MAX; // NOLINT real learningRate = (real)rand() / (real)RAND_MAX; // NOLINT
real momentum = (real)rand() / (real)RAND_MAX; // NOLINT real momentum = (real)rand() / (real)RAND_MAX; // NOLINT
real decayRate = (real)rand() / (real)RAND_MAX; // NOLINT real decayRate = (real)rand() / (real)RAND_MAX; // NOLINT
real accumulatedRou = rou; real accumulatedRou = rou;
EXPRESSION_PERFORMANCE(RMSPropParameterOptimizer(bufs1, EXPRESSION_PERFORMANCE(RMSPropParameterOptimizer(bufs1,
accumulatedRou, rou, epsilon, learningRate, momentum, decayRate, accumulatedRou,
isFirstTime)); rou,
epsilon,
learningRate,
momentum,
decayRate,
isFirstTime));
BaseMatrix& value = *bufs2[PARAMETER_VALUE]; BaseMatrix& value = *bufs2[PARAMETER_VALUE];
BaseMatrix& grad = *bufs2[PARAMETER_GRADIENT]; BaseMatrix& grad = *bufs2[PARAMETER_GRADIENT];
...@@ -227,9 +257,19 @@ void testRMSProp(size_t size, bool useGpu) { ...@@ -227,9 +257,19 @@ void testRMSProp(size_t size, bool useGpu) {
BaseMatrix& sum1 = *bufs2[PARAMETER_GRADIENT_SQURESUM1]; BaseMatrix& sum1 = *bufs2[PARAMETER_GRADIENT_SQURESUM1];
BaseMatrix& lr = *bufs2[PARAMETER_LEARNING_RATE]; BaseMatrix& lr = *bufs2[PARAMETER_LEARNING_RATE];
EXPRESSION_PERFORMANCE(rmspropApply(value, grad, mom, sum, sum1, lr, EXPRESSION_PERFORMANCE(rmspropApply(value,
accumulatedRou, rou, epsilon, learningRate, momentum, decayRate, grad,
isFirstTime)); mom,
sum,
sum1,
lr,
accumulatedRou,
rou,
epsilon,
learningRate,
momentum,
decayRate,
isFirstTime));
CHECK_VECTORPTR(bufs1[PARAMETER_VALUE], bufs2[PARAMETER_VALUE]); CHECK_VECTORPTR(bufs1[PARAMETER_VALUE], bufs2[PARAMETER_VALUE]);
CHECK_VECTORPTR(bufs1[PARAMETER_MOMENTUM], bufs2[PARAMETER_MOMENTUM]); CHECK_VECTORPTR(bufs1[PARAMETER_MOMENTUM], bufs2[PARAMETER_MOMENTUM]);
...@@ -246,7 +286,7 @@ TEST(Training, RMSProp) { ...@@ -246,7 +286,7 @@ TEST(Training, RMSProp) {
testCase(testRMSProp<false>); testCase(testRMSProp<false>);
} }
template<bool isFirstTime> template <bool isFirstTime>
void testDecayedAdagrad(size_t size, bool useGpu) { void testDecayedAdagrad(size_t size, bool useGpu) {
VectorPtr bufs1[NUM_PARAMETER_TYPES]; VectorPtr bufs1[NUM_PARAMETER_TYPES];
VectorPtr bufs2[NUM_PARAMETER_TYPES]; VectorPtr bufs2[NUM_PARAMETER_TYPES];
...@@ -256,11 +296,11 @@ void testDecayedAdagrad(size_t size, bool useGpu) { ...@@ -256,11 +296,11 @@ void testDecayedAdagrad(size_t size, bool useGpu) {
INIT_VECTOR(bufs1, bufs2, PARAMETER_GRADIENT_SQURESUM, size, useGpu); INIT_VECTOR(bufs1, bufs2, PARAMETER_GRADIENT_SQURESUM, size, useGpu);
INIT_VECTOR(bufs1, bufs2, PARAMETER_LEARNING_RATE, size, useGpu); INIT_VECTOR(bufs1, bufs2, PARAMETER_LEARNING_RATE, size, useGpu);
real rou = (real)rand() / (real)RAND_MAX; // NOLINT real rou = (real)rand() / (real)RAND_MAX; // NOLINT
real epsilon = (real)rand() / (real)RAND_MAX; // NOLINT real epsilon = (real)rand() / (real)RAND_MAX; // NOLINT
real learningRate = (real)rand() / (real)RAND_MAX; // NOLINT real learningRate = (real)rand() / (real)RAND_MAX; // NOLINT
real momentum = (real)rand() / (real)RAND_MAX; // NOLINT real momentum = (real)rand() / (real)RAND_MAX; // NOLINT
real decayRate = (real)rand() / (real)RAND_MAX; // NOLINT real decayRate = (real)rand() / (real)RAND_MAX; // NOLINT
real accumulatedRou = rou; real accumulatedRou = rou;
if (isFirstTime) { if (isFirstTime) {
...@@ -269,8 +309,13 @@ void testDecayedAdagrad(size_t size, bool useGpu) { ...@@ -269,8 +309,13 @@ void testDecayedAdagrad(size_t size, bool useGpu) {
} }
EXPRESSION_PERFORMANCE(DecayedAdagradParameterOptimizer(bufs1, EXPRESSION_PERFORMANCE(DecayedAdagradParameterOptimizer(bufs1,
accumulatedRou, rou, epsilon, learningRate, momentum, decayRate, accumulatedRou,
isFirstTime)); rou,
epsilon,
learningRate,
momentum,
decayRate,
isFirstTime));
BaseMatrix& value = *bufs2[PARAMETER_VALUE]; BaseMatrix& value = *bufs2[PARAMETER_VALUE];
BaseMatrix& grad = *bufs2[PARAMETER_GRADIENT]; BaseMatrix& grad = *bufs2[PARAMETER_GRADIENT];
...@@ -278,9 +323,18 @@ void testDecayedAdagrad(size_t size, bool useGpu) { ...@@ -278,9 +323,18 @@ void testDecayedAdagrad(size_t size, bool useGpu) {
BaseMatrix& sum = *bufs2[PARAMETER_GRADIENT_SQURESUM]; BaseMatrix& sum = *bufs2[PARAMETER_GRADIENT_SQURESUM];
BaseMatrix& lr = *bufs2[PARAMETER_LEARNING_RATE]; BaseMatrix& lr = *bufs2[PARAMETER_LEARNING_RATE];
EXPRESSION_PERFORMANCE(decayedAdagradApply(value, grad, mom, sum, lr, EXPRESSION_PERFORMANCE(decayedAdagradApply(value,
accumulatedRou, rou, epsilon, learningRate, momentum, decayRate, grad,
isFirstTime)); mom,
sum,
lr,
accumulatedRou,
rou,
epsilon,
learningRate,
momentum,
decayRate,
isFirstTime));
CHECK_VECTORPTR(bufs1[PARAMETER_VALUE], bufs2[PARAMETER_VALUE]); CHECK_VECTORPTR(bufs1[PARAMETER_VALUE], bufs2[PARAMETER_VALUE]);
CHECK_VECTORPTR(bufs1[PARAMETER_MOMENTUM], bufs2[PARAMETER_MOMENTUM]); CHECK_VECTORPTR(bufs1[PARAMETER_MOMENTUM], bufs2[PARAMETER_MOMENTUM]);
...@@ -303,23 +357,31 @@ void testAdam(size_t size, bool useGpu) { ...@@ -303,23 +357,31 @@ void testAdam(size_t size, bool useGpu) {
INIT_VECTOR(bufs1, bufs2, PARAMETER_MOMENTUM, size, useGpu); INIT_VECTOR(bufs1, bufs2, PARAMETER_MOMENTUM, size, useGpu);
INIT_VECTOR(bufs1, bufs2, PARAMETER_SECOND_MOMENTUM, size, useGpu); INIT_VECTOR(bufs1, bufs2, PARAMETER_SECOND_MOMENTUM, size, useGpu);
real beta1 = (real)rand() / (real)RAND_MAX; // NOLINT real beta1 = (real)rand() / (real)RAND_MAX; // NOLINT
real beta2 = (real)rand() / (real)RAND_MAX; // NOLINT real beta2 = (real)rand() / (real)RAND_MAX; // NOLINT
real beta1_power = (real)rand() / (real)RAND_MAX; // NOLINT real beta1_power = (real)rand() / (real)RAND_MAX; // NOLINT
real beta2_power = (real)rand() / (real)RAND_MAX; // NOLINT real beta2_power = (real)rand() / (real)RAND_MAX; // NOLINT
real epsilon = (real)rand() / (real)RAND_MAX; // NOLINT real epsilon = (real)rand() / (real)RAND_MAX; // NOLINT
real learningRate = (real)rand() / (real)RAND_MAX; // NOLINT real learningRate = (real)rand() / (real)RAND_MAX; // NOLINT
EXPRESSION_PERFORMANCE(AdamParameterOptimizer(bufs1, EXPRESSION_PERFORMANCE(AdamParameterOptimizer(
beta1, beta2, beta1_power, beta2_power, epsilon, learningRate)); bufs1, beta1, beta2, beta1_power, beta2_power, epsilon, learningRate));
BaseMatrix& value = *bufs2[PARAMETER_VALUE]; BaseMatrix& value = *bufs2[PARAMETER_VALUE];
BaseMatrix& grad = *bufs2[PARAMETER_GRADIENT]; BaseMatrix& grad = *bufs2[PARAMETER_GRADIENT];
BaseMatrix& mom = *bufs2[PARAMETER_MOMENTUM]; BaseMatrix& mom = *bufs2[PARAMETER_MOMENTUM];
BaseMatrix& v = *bufs2[PARAMETER_SECOND_MOMENTUM]; BaseMatrix& v = *bufs2[PARAMETER_SECOND_MOMENTUM];
EXPRESSION_PERFORMANCE(adamApply(value, grad, mom, v, EXPRESSION_PERFORMANCE(adamApply(value,
beta1, beta2, beta1_power, beta2_power, epsilon, learningRate)); grad,
mom,
v,
beta1,
beta2,
beta1_power,
beta2_power,
epsilon,
learningRate));
CHECK_VECTORPTR(bufs1[PARAMETER_VALUE], bufs2[PARAMETER_VALUE]); CHECK_VECTORPTR(bufs1[PARAMETER_VALUE], bufs2[PARAMETER_VALUE]);
CHECK_VECTORPTR(bufs1[PARAMETER_MOMENTUM], bufs2[PARAMETER_MOMENTUM]); CHECK_VECTORPTR(bufs1[PARAMETER_MOMENTUM], bufs2[PARAMETER_MOMENTUM]);
...@@ -327,9 +389,7 @@ void testAdam(size_t size, bool useGpu) { ...@@ -327,9 +389,7 @@ void testAdam(size_t size, bool useGpu) {
bufs2[PARAMETER_SECOND_MOMENTUM]); bufs2[PARAMETER_SECOND_MOMENTUM]);
} }
TEST(Training, Adam) { TEST(Training, Adam) { testCase(testAdam); }
testCase(testAdam);
}
void testAdamax(size_t size, bool useGpu) { void testAdamax(size_t size, bool useGpu) {
VectorPtr bufs1[NUM_PARAMETER_TYPES]; VectorPtr bufs1[NUM_PARAMETER_TYPES];
...@@ -344,16 +404,16 @@ void testAdamax(size_t size, bool useGpu) { ...@@ -344,16 +404,16 @@ void testAdamax(size_t size, bool useGpu) {
real alpha = (real)rand() / (real)RAND_MAX; // NOLINT real alpha = (real)rand() / (real)RAND_MAX; // NOLINT
int64_t step = 2; int64_t step = 2;
EXPRESSION_PERFORMANCE(AdamaxParameterOptimizer(bufs1, EXPRESSION_PERFORMANCE(
beta1, beta2, step, alpha)); AdamaxParameterOptimizer(bufs1, beta1, beta2, step, alpha));
BaseMatrix& value = *bufs2[PARAMETER_VALUE]; BaseMatrix& value = *bufs2[PARAMETER_VALUE];
BaseMatrix& grad = *bufs2[PARAMETER_GRADIENT]; BaseMatrix& grad = *bufs2[PARAMETER_GRADIENT];
BaseMatrix& mom = *bufs2[PARAMETER_MOMENTUM]; BaseMatrix& mom = *bufs2[PARAMETER_MOMENTUM];
BaseMatrix& u = *bufs2[PARAMETER_WEIGHTED_INFINITY_NORM]; BaseMatrix& u = *bufs2[PARAMETER_WEIGHTED_INFINITY_NORM];
EXPRESSION_PERFORMANCE(adamaxApply(value, grad, mom, u, EXPRESSION_PERFORMANCE(
beta1, beta2, step, alpha)); adamaxApply(value, grad, mom, u, beta1, beta2, step, alpha));
CHECK_VECTORPTR(bufs1[PARAMETER_VALUE], bufs2[PARAMETER_VALUE]); CHECK_VECTORPTR(bufs1[PARAMETER_VALUE], bufs2[PARAMETER_VALUE]);
CHECK_VECTORPTR(bufs1[PARAMETER_MOMENTUM], bufs2[PARAMETER_MOMENTUM]); CHECK_VECTORPTR(bufs1[PARAMETER_MOMENTUM], bufs2[PARAMETER_MOMENTUM]);
...@@ -376,33 +436,29 @@ void testSparseMomentum(size_t size, bool useGpu) { ...@@ -376,33 +436,29 @@ void testSparseMomentum(size_t size, bool useGpu) {
INIT_VECTOR(bufs1, bufs2, PARAMETER_MOMENTUM_UT, size, useGpu); INIT_VECTOR(bufs1, bufs2, PARAMETER_MOMENTUM_UT, size, useGpu);
INIT_VECTOR(bufs1, bufs2, PARAMETER_MOMENTUM_VT, size, useGpu); INIT_VECTOR(bufs1, bufs2, PARAMETER_MOMENTUM_VT, size, useGpu);
real alpha = (real)rand() / (real)RAND_MAX; // NOLINT real alpha = (real)rand() / (real)RAND_MAX; // NOLINT
real beta = (real)rand() / (real)RAND_MAX; // NOLINT real beta = (real)rand() / (real)RAND_MAX; // NOLINT
real gamma = (real)rand() / (real)RAND_MAX; // NOLINT real gamma = (real)rand() / (real)RAND_MAX; // NOLINT
real tau = (real)rand() / (real)RAND_MAX; // NOLINT real tau = (real)rand() / (real)RAND_MAX; // NOLINT
real learningRate = (real)rand() / (real)RAND_MAX; // NOLINT real learningRate = (real)rand() / (real)RAND_MAX; // NOLINT
EXPRESSION_PERFORMANCE(SparseMomentumParameterOptimizer(bufs1, EXPRESSION_PERFORMANCE(SparseMomentumParameterOptimizer(
alpha, beta, gamma, tau, learningRate)); bufs1, alpha, beta, gamma, tau, learningRate));
BaseMatrix& value = *bufs2[PARAMETER_VALUE]; BaseMatrix& value = *bufs2[PARAMETER_VALUE];
BaseMatrix& grad = *bufs2[PARAMETER_GRADIENT]; BaseMatrix& grad = *bufs2[PARAMETER_GRADIENT];
BaseMatrix& momU = *bufs2[PARAMETER_MOMENTUM_UT]; BaseMatrix& momU = *bufs2[PARAMETER_MOMENTUM_UT];
BaseMatrix& momV = *bufs2[PARAMETER_MOMENTUM_VT]; BaseMatrix& momV = *bufs2[PARAMETER_MOMENTUM_VT];
EXPRESSION_PERFORMANCE(sparseMomentumApply(value, grad, momU, momV, EXPRESSION_PERFORMANCE(sparseMomentumApply(
alpha, beta, gamma, tau, learningRate)); value, grad, momU, momV, alpha, beta, gamma, tau, learningRate));
CHECK_VECTORPTR(bufs1[PARAMETER_VALUE], bufs2[PARAMETER_VALUE]); CHECK_VECTORPTR(bufs1[PARAMETER_VALUE], bufs2[PARAMETER_VALUE]);
CHECK_VECTORPTR(bufs1[PARAMETER_MOMENTUM_UT], CHECK_VECTORPTR(bufs1[PARAMETER_MOMENTUM_UT], bufs2[PARAMETER_MOMENTUM_UT]);
bufs2[PARAMETER_MOMENTUM_UT]); CHECK_VECTORPTR(bufs1[PARAMETER_MOMENTUM_VT], bufs2[PARAMETER_MOMENTUM_VT]);
CHECK_VECTORPTR(bufs1[PARAMETER_MOMENTUM_VT],
bufs2[PARAMETER_MOMENTUM_VT]);
} }
TEST(Training, SparseMomentum) { TEST(Training, SparseMomentum) { testCase(testSparseMomentum); }
testCase(testSparseMomentum);
}
int main(int argc, char** argv) { int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv); testing::InitGoogleTest(&argc, argv);
...@@ -411,4 +467,3 @@ int main(int argc, char** argv) { ...@@ -411,4 +467,3 @@ int main(int argc, char** argv) {
hl_init(FLAGS_gpu_id); hl_init(FLAGS_gpu_id);
return RUN_ALL_TESTS(); return RUN_ALL_TESTS();
} }
...@@ -16,6 +16,7 @@ limitations under the License. */ ...@@ -16,6 +16,7 @@ limitations under the License. */
#include "paddle/math/Matrix.h" #include "paddle/math/Matrix.h"
#include "paddle/math/TensorAssign.h" #include "paddle/math/TensorAssign.h"
#include "TensorCheck.h" #include "TensorCheck.h"
#include "PerfUtils.h"
using namespace paddle; // NOLINT using namespace paddle; // NOLINT
using namespace std; // NOLINT using namespace std; // NOLINT
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册