diff --git a/paddle/parameter/FirstOrderOptimizer.cpp b/paddle/parameter/FirstOrderOptimizer.cpp index 02e600adb9e88803fbb54792c307ba4abaf8e570..207fb33f4e88cc8a4467ed7dc35003f348fbf3cf 100644 --- a/paddle/parameter/FirstOrderOptimizer.cpp +++ b/paddle/parameter/FirstOrderOptimizer.cpp @@ -305,12 +305,13 @@ void AdamaxParameterOptimizer::update(const VectorPtr vecs[], void OptimizerWithGradientClipping::update(const VectorPtr vecs[], const ParameterConfig& config, size_t sparseId) const { - // globalGradientClipping(vecs, config, FLAGS_log_clipping); real global_thres_ = optConfig_.gradient_clipping_threshold(); real local_thres_ = config.gradient_clipping_threshold(); real threshold; std::string field; + // Get the minimum of local and global threshold + // as the real threshold for clipping if (global_thres_ > 0.0f && local_thres_ > 0.0f) { threshold = global_thres_ < local_thres_ ? global_thres_ : local_thres_; field = global_thres_ < local_thres_ ? "global" : "local"; diff --git a/paddle/parameter/ParameterOptimizer.h b/paddle/parameter/ParameterOptimizer.h index 38d432ba9bc4d84edcdcfd93faf00b7181b385a5..f98ba569b569379b30d034739a7f84aaf97108db 100644 --- a/paddle/parameter/ParameterOptimizer.h +++ b/paddle/parameter/ParameterOptimizer.h @@ -170,9 +170,6 @@ public: real getLearningRate() const { return learningRate_; } - // real getGradientClippingThreshold() const {return - // gradientClippingThreshold_;} - virtual void setNoDecay() { applyDecay_ = false; } static ParameterOptimizer* create(const OptimizationConfig& optConfig, @@ -206,11 +203,6 @@ protected: */ real learningRate_; - /** - * global threshold for gradient clipping, - * init value is opt_config.gradient_clipping_thresholod - */ - std::unique_ptr learningRateScheduler_; int64_t pass_; // current training pass (starting from 0) bool firstTime_;