diff --git a/paddle/math/Vector.h b/paddle/math/Vector.h index 80b9775fccf10c57bb48145ef56165ec7c86d8b8..7dbf3cfb0d5433c1b44947fe7e24c7ab1f9ec183 100644 --- a/paddle/math/Vector.h +++ b/paddle/math/Vector.h @@ -92,6 +92,28 @@ public: const T* getData() const { return this->data_; } T* getData() { return this->data_; } +#ifdef PADDLE_USE_MKLDNN + /** + * sgd update with openmp to speedup + */ + void sgdUpdateWithOMP(VectorT& gradVec, + VectorT& momVec, + T learningRate, + T momentum, + T decayRate) { + size_t size = this->getSize(); + T* val = this->getData(); + T* grd = gradVec.getData(); + T* mom = momVec.getData(); + decayRate *= learningRate; +#pragma omp parallel for + for (size_t i = 0; i < size; ++i) { + mom[i] = momentum * mom[i] - learningRate * grd[i] - decayRate * val[i]; + val[i] += mom[i]; + } + } +#endif + virtual void zeroMem() = 0; // set all elements to value virtual void reset(const T& value) = 0; diff --git a/paddle/parameter/FirstOrderOptimizer.h b/paddle/parameter/FirstOrderOptimizer.h index caa78acd98ea4b35fc69643689cfce23026275e0..73e09aee2366bed095be532ab11f3c0d40f6d01f 100644 --- a/paddle/parameter/FirstOrderOptimizer.h +++ b/paddle/parameter/FirstOrderOptimizer.h @@ -37,6 +37,15 @@ public: real torch_learningRate = optConfig_.learning_method() == "torch_momentum" ? 1.0 - paraConfig.momentum() : 1.0; +#ifdef PADDLE_USE_MKLDNN + vecs[PARAMETER_VALUE]->sgdUpdateWithOMP( + *vecs[PARAMETER_GRADIENT], + *vecs[PARAMETER_MOMENTUM], + learningRate_ * paraConfig.learning_rate() * + (firstTime_ ? 1.0 : torch_learningRate), + paraConfig.momentum(), + applyDecay_ ? paraConfig.decay_rate() : 0); +#else vecs[PARAMETER_VALUE]->sgdUpdate( *vecs[PARAMETER_GRADIENT], *vecs[PARAMETER_MOMENTUM], @@ -44,6 +53,7 @@ public: (firstTime_ ? 1.0 : torch_learningRate), paraConfig.momentum(), applyDecay_ ? paraConfig.decay_rate() : 0); +#endif } virtual void finishBatch() { firstTime_ = false; } }; diff --git a/paddle/parameter/ParameterUpdateFunctions.cpp b/paddle/parameter/ParameterUpdateFunctions.cpp index c8af7105c78dcbf9f625a348b7f38efcf278469e..8b3be062b654a52e667626199be8c8bb4a2a96d7 100644 --- a/paddle/parameter/ParameterUpdateFunctions.cpp +++ b/paddle/parameter/ParameterUpdateFunctions.cpp @@ -30,6 +30,9 @@ void sgdUpdateCpu(real learningRate, const real* grad, real* momentumVec) { decayRate *= learningRate; +#ifdef PADDLE_USE_MKLDNN +#pragma omp parallel for +#endif for (size_t i = 0; i < size; ++i) { momentumVec[i] = momentum * momentumVec[i] - learningRate * grad[i] - decayRate * value[i];