提交 84a0574a 编写于 作者: H hedaoyuan

add a PerfUtils.h

上级 8d736813
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
// Performance Check
#ifdef PADDLE_DISABLE_TIMER
#define EXPRESSION_PERFORMANCE(expression) expression;
#else
#include "paddle/utils/Stat.h"
#define EXPRESSION_PERFORMANCE(expression) \
do { \
char expr[30]; \
strncpy(expr, #expression, 30); \
if (expr[29] != '\0') { \
expr[27] = '.'; \
expr[28] = '.'; \
expr[29] = '\0'; \
} \
expression; \
for (int i = 0; i < 20; i++) { \
REGISTER_TIMER(expr); \
expression; \
} \
LOG(INFO) << std::setiosflags(std::ios::left) << std::setfill(' ') \
<< *globalStat.getStat(expr); \
globalStat.reset(); \
} while (0)
#endif
......@@ -17,6 +17,7 @@ limitations under the License. */
#include "paddle/math/TrainingAlgorithmOp.h"
#include "OriginalOptimizerApi.h"
#include "TensorCheck.h"
#include "PerfUtils.h"
using namespace paddle; // NOLINT
......@@ -32,21 +33,20 @@ public:
max_diff_ = FLAGS_max_diff;
FLAGS_max_diff = max_diff;
}
~SetMaxDiff() {
FLAGS_max_diff = max_diff_;
}
~SetMaxDiff() { FLAGS_max_diff = max_diff_; }
private:
double max_diff_;
};
#define COPY_VECTOR_TO_CPU(cpuVec, vector) \
do {\
if (vector->useGpu()) {\
cpuVec = Vector::create(vector->getSize(), false);\
cpuVec->copyFrom(*vector);\
} else {\
cpuVec = vector;\
}\
do { \
if (vector->useGpu()) { \
cpuVec = Vector::create(vector->getSize(), false); \
cpuVec->copyFrom(*vector); \
} else { \
cpuVec = vector; \
} \
} while (0)
int VectorCheckErr(const Vector& vector1, const Vector& vector2) {
......@@ -96,8 +96,20 @@ void testCase(testMatrixFunc matrixFunc) {
#else
for (auto useGpu : {false}) {
#endif
for (auto size : {1, 32, 64, 128, 512, 1024, 4096, 32768, 65536, 131072,
262144, 524288, 1048576, 2097152}) {
for (auto size : {1,
32,
64,
128,
512,
1024,
4096,
32768,
65536,
131072,
262144,
524288,
1048576,
2097152}) {
LOG(INFO) << " size=" << size << " useGpu=" << useGpu;
matrixFunc(size, useGpu);
}
......@@ -125,8 +137,8 @@ void testAdagrad(size_t size, bool useGpu) {
real momentum = (real)rand() / (real)RAND_MAX; // NOLINT
real decayRate = (real)rand() / (real)RAND_MAX; // NOLINT
EXPRESSION_PERFORMANCE(AdagradParameterOptimizer(bufs1,
epsilon, learningRate, momentum, decayRate));
EXPRESSION_PERFORMANCE(AdagradParameterOptimizer(
bufs1, epsilon, learningRate, momentum, decayRate));
BaseMatrix& value = *bufs2[PARAMETER_VALUE];
BaseMatrix& grad = *bufs2[PARAMETER_GRADIENT];
......@@ -135,8 +147,16 @@ void testAdagrad(size_t size, bool useGpu) {
BaseMatrix& accum = *bufs2[PARAMETER_GRADIENT_SQURESUM1];
BaseMatrix& lr = *bufs2[PARAMETER_LEARNING_RATE];
EXPRESSION_PERFORMANCE(adagradApply(value, grad, mom, accum_buffer, accum, lr,
epsilon, learningRate, momentum, decayRate));
EXPRESSION_PERFORMANCE(adagradApply(value,
grad,
mom,
accum_buffer,
accum,
lr,
epsilon,
learningRate,
momentum,
decayRate));
CHECK_VECTORPTR(bufs1[PARAMETER_VALUE], bufs2[PARAMETER_VALUE]);
CHECK_VECTORPTR(bufs1[PARAMETER_MOMENTUM], bufs2[PARAMETER_MOMENTUM]);
......@@ -146,9 +166,7 @@ void testAdagrad(size_t size, bool useGpu) {
bufs2[PARAMETER_LEARNING_RATE]);
}
TEST(Training, Adagrad) {
testCase(testAdagrad);
}
TEST(Training, Adagrad) { testCase(testAdagrad); }
void testAdaDelta(size_t size, bool useGpu) {
VectorPtr bufs1[NUM_PARAMETER_TYPES];
......@@ -166,8 +184,8 @@ void testAdaDelta(size_t size, bool useGpu) {
real momentum = (real)rand() / (real)RAND_MAX; // NOLINT
real decayRate = (real)rand() / (real)RAND_MAX; // NOLINT
EXPRESSION_PERFORMANCE(AdaDeltaParameterOptimizer(bufs1,
rou, epsilon, learningRate, momentum, decayRate));
EXPRESSION_PERFORMANCE(AdaDeltaParameterOptimizer(
bufs1, rou, epsilon, learningRate, momentum, decayRate));
BaseMatrix& value = *bufs2[PARAMETER_VALUE];
BaseMatrix& grad = *bufs2[PARAMETER_GRADIENT];
......@@ -176,8 +194,17 @@ void testAdaDelta(size_t size, bool useGpu) {
BaseMatrix& accum_update = *bufs2[PARAMETER_GRADIENT_SQURESUM1];
BaseMatrix& lr = *bufs2[PARAMETER_LEARNING_RATE];
EXPRESSION_PERFORMANCE(adadeltaApply(value, grad, mom, accum, accum_update,
lr, rou, epsilon, learningRate, momentum, decayRate));
EXPRESSION_PERFORMANCE(adadeltaApply(value,
grad,
mom,
accum,
accum_update,
lr,
rou,
epsilon,
learningRate,
momentum,
decayRate));
CHECK_VECTORPTR(bufs1[PARAMETER_VALUE], bufs2[PARAMETER_VALUE]);
CHECK_VECTORPTR(bufs1[PARAMETER_MOMENTUM], bufs2[PARAMETER_MOMENTUM]);
......@@ -189,11 +216,9 @@ void testAdaDelta(size_t size, bool useGpu) {
bufs2[PARAMETER_LEARNING_RATE]);
}
TEST(Training, AdaDelta) {
testCase(testAdaDelta);
}
TEST(Training, AdaDelta) { testCase(testAdaDelta); }
template<bool isFirstTime>
template <bool isFirstTime>
void testRMSProp(size_t size, bool useGpu) {
VectorPtr bufs1[NUM_PARAMETER_TYPES];
VectorPtr bufs2[NUM_PARAMETER_TYPES];
......@@ -217,7 +242,12 @@ void testRMSProp(size_t size, bool useGpu) {
real accumulatedRou = rou;
EXPRESSION_PERFORMANCE(RMSPropParameterOptimizer(bufs1,
accumulatedRou, rou, epsilon, learningRate, momentum, decayRate,
accumulatedRou,
rou,
epsilon,
learningRate,
momentum,
decayRate,
isFirstTime));
BaseMatrix& value = *bufs2[PARAMETER_VALUE];
......@@ -227,8 +257,18 @@ void testRMSProp(size_t size, bool useGpu) {
BaseMatrix& sum1 = *bufs2[PARAMETER_GRADIENT_SQURESUM1];
BaseMatrix& lr = *bufs2[PARAMETER_LEARNING_RATE];
EXPRESSION_PERFORMANCE(rmspropApply(value, grad, mom, sum, sum1, lr,
accumulatedRou, rou, epsilon, learningRate, momentum, decayRate,
EXPRESSION_PERFORMANCE(rmspropApply(value,
grad,
mom,
sum,
sum1,
lr,
accumulatedRou,
rou,
epsilon,
learningRate,
momentum,
decayRate,
isFirstTime));
CHECK_VECTORPTR(bufs1[PARAMETER_VALUE], bufs2[PARAMETER_VALUE]);
......@@ -246,7 +286,7 @@ TEST(Training, RMSProp) {
testCase(testRMSProp<false>);
}
template<bool isFirstTime>
template <bool isFirstTime>
void testDecayedAdagrad(size_t size, bool useGpu) {
VectorPtr bufs1[NUM_PARAMETER_TYPES];
VectorPtr bufs2[NUM_PARAMETER_TYPES];
......@@ -269,7 +309,12 @@ void testDecayedAdagrad(size_t size, bool useGpu) {
}
EXPRESSION_PERFORMANCE(DecayedAdagradParameterOptimizer(bufs1,
accumulatedRou, rou, epsilon, learningRate, momentum, decayRate,
accumulatedRou,
rou,
epsilon,
learningRate,
momentum,
decayRate,
isFirstTime));
BaseMatrix& value = *bufs2[PARAMETER_VALUE];
......@@ -278,8 +323,17 @@ void testDecayedAdagrad(size_t size, bool useGpu) {
BaseMatrix& sum = *bufs2[PARAMETER_GRADIENT_SQURESUM];
BaseMatrix& lr = *bufs2[PARAMETER_LEARNING_RATE];
EXPRESSION_PERFORMANCE(decayedAdagradApply(value, grad, mom, sum, lr,
accumulatedRou, rou, epsilon, learningRate, momentum, decayRate,
EXPRESSION_PERFORMANCE(decayedAdagradApply(value,
grad,
mom,
sum,
lr,
accumulatedRou,
rou,
epsilon,
learningRate,
momentum,
decayRate,
isFirstTime));
CHECK_VECTORPTR(bufs1[PARAMETER_VALUE], bufs2[PARAMETER_VALUE]);
......@@ -310,16 +364,24 @@ void testAdam(size_t size, bool useGpu) {
real epsilon = (real)rand() / (real)RAND_MAX; // NOLINT
real learningRate = (real)rand() / (real)RAND_MAX; // NOLINT
EXPRESSION_PERFORMANCE(AdamParameterOptimizer(bufs1,
beta1, beta2, beta1_power, beta2_power, epsilon, learningRate));
EXPRESSION_PERFORMANCE(AdamParameterOptimizer(
bufs1, beta1, beta2, beta1_power, beta2_power, epsilon, learningRate));
BaseMatrix& value = *bufs2[PARAMETER_VALUE];
BaseMatrix& grad = *bufs2[PARAMETER_GRADIENT];
BaseMatrix& mom = *bufs2[PARAMETER_MOMENTUM];
BaseMatrix& v = *bufs2[PARAMETER_SECOND_MOMENTUM];
EXPRESSION_PERFORMANCE(adamApply(value, grad, mom, v,
beta1, beta2, beta1_power, beta2_power, epsilon, learningRate));
EXPRESSION_PERFORMANCE(adamApply(value,
grad,
mom,
v,
beta1,
beta2,
beta1_power,
beta2_power,
epsilon,
learningRate));
CHECK_VECTORPTR(bufs1[PARAMETER_VALUE], bufs2[PARAMETER_VALUE]);
CHECK_VECTORPTR(bufs1[PARAMETER_MOMENTUM], bufs2[PARAMETER_MOMENTUM]);
......@@ -327,9 +389,7 @@ void testAdam(size_t size, bool useGpu) {
bufs2[PARAMETER_SECOND_MOMENTUM]);
}
TEST(Training, Adam) {
testCase(testAdam);
}
TEST(Training, Adam) { testCase(testAdam); }
void testAdamax(size_t size, bool useGpu) {
VectorPtr bufs1[NUM_PARAMETER_TYPES];
......@@ -344,16 +404,16 @@ void testAdamax(size_t size, bool useGpu) {
real alpha = (real)rand() / (real)RAND_MAX; // NOLINT
int64_t step = 2;
EXPRESSION_PERFORMANCE(AdamaxParameterOptimizer(bufs1,
beta1, beta2, step, alpha));
EXPRESSION_PERFORMANCE(
AdamaxParameterOptimizer(bufs1, beta1, beta2, step, alpha));
BaseMatrix& value = *bufs2[PARAMETER_VALUE];
BaseMatrix& grad = *bufs2[PARAMETER_GRADIENT];
BaseMatrix& mom = *bufs2[PARAMETER_MOMENTUM];
BaseMatrix& u = *bufs2[PARAMETER_WEIGHTED_INFINITY_NORM];
EXPRESSION_PERFORMANCE(adamaxApply(value, grad, mom, u,
beta1, beta2, step, alpha));
EXPRESSION_PERFORMANCE(
adamaxApply(value, grad, mom, u, beta1, beta2, step, alpha));
CHECK_VECTORPTR(bufs1[PARAMETER_VALUE], bufs2[PARAMETER_VALUE]);
CHECK_VECTORPTR(bufs1[PARAMETER_MOMENTUM], bufs2[PARAMETER_MOMENTUM]);
......@@ -382,27 +442,23 @@ void testSparseMomentum(size_t size, bool useGpu) {
real tau = (real)rand() / (real)RAND_MAX; // NOLINT
real learningRate = (real)rand() / (real)RAND_MAX; // NOLINT
EXPRESSION_PERFORMANCE(SparseMomentumParameterOptimizer(bufs1,
alpha, beta, gamma, tau, learningRate));
EXPRESSION_PERFORMANCE(SparseMomentumParameterOptimizer(
bufs1, alpha, beta, gamma, tau, learningRate));
BaseMatrix& value = *bufs2[PARAMETER_VALUE];
BaseMatrix& grad = *bufs2[PARAMETER_GRADIENT];
BaseMatrix& momU = *bufs2[PARAMETER_MOMENTUM_UT];
BaseMatrix& momV = *bufs2[PARAMETER_MOMENTUM_VT];
EXPRESSION_PERFORMANCE(sparseMomentumApply(value, grad, momU, momV,
alpha, beta, gamma, tau, learningRate));
EXPRESSION_PERFORMANCE(sparseMomentumApply(
value, grad, momU, momV, alpha, beta, gamma, tau, learningRate));
CHECK_VECTORPTR(bufs1[PARAMETER_VALUE], bufs2[PARAMETER_VALUE]);
CHECK_VECTORPTR(bufs1[PARAMETER_MOMENTUM_UT],
bufs2[PARAMETER_MOMENTUM_UT]);
CHECK_VECTORPTR(bufs1[PARAMETER_MOMENTUM_VT],
bufs2[PARAMETER_MOMENTUM_VT]);
CHECK_VECTORPTR(bufs1[PARAMETER_MOMENTUM_UT], bufs2[PARAMETER_MOMENTUM_UT]);
CHECK_VECTORPTR(bufs1[PARAMETER_MOMENTUM_VT], bufs2[PARAMETER_MOMENTUM_VT]);
}
TEST(Training, SparseMomentum) {
testCase(testSparseMomentum);
}
TEST(Training, SparseMomentum) { testCase(testSparseMomentum); }
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
......@@ -411,4 +467,3 @@ int main(int argc, char** argv) {
hl_init(FLAGS_gpu_id);
return RUN_ALL_TESTS();
}
......@@ -16,6 +16,7 @@ limitations under the License. */
#include "paddle/math/Matrix.h"
#include "paddle/math/TensorAssign.h"
#include "TensorCheck.h"
#include "PerfUtils.h"
using namespace paddle; // NOLINT
using namespace std; // NOLINT
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册