未验证 提交 189e0d44 编写于 作者: W wangzhen38 提交者: GitHub

Patch del (#52754)

* [DO NOT MERGE] adadelta lr support

* [DO NOT MERGE] gpu support

* [test] follow torch

* fix acc update order

* for ci

* [bug fix] update master para

* [bug fix] update test

* [bug fix] for ci test

* for ci

* fix xpu

* [adadelta fix] del fluid head file

* for ci

* del notes
上级 8d7c15a7
...@@ -13,10 +13,6 @@ ...@@ -13,10 +13,6 @@
// limitations under the License. // limitations under the License.
#pragma once #pragma once
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/eigen/eigen_function.h"
#include "paddle/phi/common/amp_type_traits.h" #include "paddle/phi/common/amp_type_traits.h"
#include "paddle/phi/kernels/adadelta_kernel.h" #include "paddle/phi/kernels/adadelta_kernel.h"
#include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/kernels/funcs/eigen/common.h"
...@@ -67,11 +63,6 @@ void AdadeltaKernel(const Context& dev_ctx, ...@@ -67,11 +63,6 @@ void AdadeltaKernel(const Context& dev_ctx,
-(((eigen_avg_squared_update + epsilon_).sqrt()) / -(((eigen_avg_squared_update + epsilon_).sqrt()) /
((eigen_avg_squared_grad_out + epsilon_).sqrt()) * eigen_grad_cast); ((eigen_avg_squared_grad_out + epsilon_).sqrt()) * eigen_grad_cast);
Eigen::DSizes<int, 1> m_dsize(avg_squared_update_out->numel()); Eigen::DSizes<int, 1> m_dsize(avg_squared_update_out->numel());
if (paddle::platform::is_cpu_place(dev_ctx.GetPlace())) {
auto* lr = learning_rate.data<T>();
eigen_param_out.device(place) =
eigen_param + lr[0] * update.template cast<T>();
} else {
auto lr = EigenVector<MPDType>::Flatten(learning_rate); auto lr = EigenVector<MPDType>::Flatten(learning_rate);
if (multi_precision) { if (multi_precision) {
auto eigen_master_param_out = auto eigen_master_param_out =
...@@ -80,14 +71,13 @@ void AdadeltaKernel(const Context& dev_ctx, ...@@ -80,14 +71,13 @@ void AdadeltaKernel(const Context& dev_ctx,
eigen_master_param_out.device(place) = eigen_master_param_out.device(place) =
eigen_master_param + lr.broadcast(m_dsize) * update; eigen_master_param + lr.broadcast(m_dsize) * update;
eigen_param_out.device(place) = (eigen_param.template cast<MPDType>() + eigen_param_out.device(place) =
lr.broadcast(m_dsize) * update) (eigen_param.template cast<MPDType>() + lr.broadcast(m_dsize) * update)
.template cast<T>(); .template cast<T>();
} else { } else {
eigen_param_out.device(place) = eigen_param_out.device(place) =
eigen_param + (lr.broadcast(m_dsize) * update).template cast<T>(); eigen_param + (lr.broadcast(m_dsize) * update).template cast<T>();
} }
}
eigen_avg_squared_update_out.device(place) = eigen_avg_squared_update_out.device(place) =
rho_ * eigen_avg_squared_update + (1 - rho_) * update.square(); rho_ * eigen_avg_squared_update + (1 - rho_) * update.square();
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册