From d6a27ade5469dbaf832983fabaa32ec70ab4c2f5 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Mon, 25 Sep 2017 17:13:05 +0800 Subject: [PATCH] add OMP SGD to speedup with CPUs --- paddle/math/Vector.h | 22 +++++++++++++++++++ paddle/parameter/FirstOrderOptimizer.h | 10 +++++++++ paddle/parameter/ParameterUpdateFunctions.cpp | 3 +++ 3 files changed, 35 insertions(+) diff --git a/paddle/math/Vector.h b/paddle/math/Vector.h index 80b9775fccf..7dbf3cfb0d5 100644 --- a/paddle/math/Vector.h +++ b/paddle/math/Vector.h @@ -92,6 +92,28 @@ public: const T* getData() const { return this->data_; } T* getData() { return this->data_; } +#ifdef PADDLE_USE_MKLDNN + /** + * sgd update with openmp to speedup + */ + void sgdUpdateWithOMP(VectorT& gradVec, + VectorT& momVec, + T learningRate, + T momentum, + T decayRate) { + size_t size = this->getSize(); + T* val = this->getData(); + T* grd = gradVec.getData(); + T* mom = momVec.getData(); + decayRate *= learningRate; +#pragma omp parallel for + for (size_t i = 0; i < size; ++i) { + mom[i] = momentum * mom[i] - learningRate * grd[i] - decayRate * val[i]; + val[i] += mom[i]; + } + } +#endif + virtual void zeroMem() = 0; // set all elements to value virtual void reset(const T& value) = 0; diff --git a/paddle/parameter/FirstOrderOptimizer.h b/paddle/parameter/FirstOrderOptimizer.h index caa78acd98e..73e09aee236 100644 --- a/paddle/parameter/FirstOrderOptimizer.h +++ b/paddle/parameter/FirstOrderOptimizer.h @@ -37,6 +37,15 @@ public: real torch_learningRate = optConfig_.learning_method() == "torch_momentum" ? 1.0 - paraConfig.momentum() : 1.0; +#ifdef PADDLE_USE_MKLDNN + vecs[PARAMETER_VALUE]->sgdUpdateWithOMP( + *vecs[PARAMETER_GRADIENT], + *vecs[PARAMETER_MOMENTUM], + learningRate_ * paraConfig.learning_rate() * + (firstTime_ ? 1.0 : torch_learningRate), + paraConfig.momentum(), + applyDecay_ ? paraConfig.decay_rate() : 0); +#else vecs[PARAMETER_VALUE]->sgdUpdate( *vecs[PARAMETER_GRADIENT], *vecs[PARAMETER_MOMENTUM], @@ -44,6 +53,7 @@ public: (firstTime_ ? 1.0 : torch_learningRate), paraConfig.momentum(), applyDecay_ ? paraConfig.decay_rate() : 0); +#endif } virtual void finishBatch() { firstTime_ = false; } }; diff --git a/paddle/parameter/ParameterUpdateFunctions.cpp b/paddle/parameter/ParameterUpdateFunctions.cpp index c8af7105c78..8b3be062b65 100644 --- a/paddle/parameter/ParameterUpdateFunctions.cpp +++ b/paddle/parameter/ParameterUpdateFunctions.cpp @@ -30,6 +30,9 @@ void sgdUpdateCpu(real learningRate, const real* grad, real* momentumVec) { decayRate *= learningRate; +#ifdef PADDLE_USE_MKLDNN +#pragma omp parallel for +#endif for (size_t i = 0; i < size; ++i) { momentumVec[i] = momentum * momentumVec[i] - learningRate * grad[i] - decayRate * value[i]; -- GitLab