未验证 提交 189e0d44 编写于 作者: W wangzhen38 提交者: GitHub

Patch del (#52754)

* [DO NOT MERGE] adadelta lr support

* [DO NOT MERGE] gpu support

* [test] follow torch

* fix acc update order

* for ci

* [bug fix] update master para

* [bug fix] update test

* [bug fix] for ci test

* for ci

* fix xpu

* [adadelta fix] del fluid head file

* for ci

* del notes
上级 8d7c15a7
...@@ -13,10 +13,6 @@ ...@@ -13,10 +13,6 @@
// limitations under the License. // limitations under the License.
#pragma once #pragma once
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/eigen/eigen_function.h"
#include "paddle/phi/common/amp_type_traits.h" #include "paddle/phi/common/amp_type_traits.h"
#include "paddle/phi/kernels/adadelta_kernel.h" #include "paddle/phi/kernels/adadelta_kernel.h"
#include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/kernels/funcs/eigen/common.h"
...@@ -67,26 +63,20 @@ void AdadeltaKernel(const Context& dev_ctx, ...@@ -67,26 +63,20 @@ void AdadeltaKernel(const Context& dev_ctx,
-(((eigen_avg_squared_update + epsilon_).sqrt()) / -(((eigen_avg_squared_update + epsilon_).sqrt()) /
((eigen_avg_squared_grad_out + epsilon_).sqrt()) * eigen_grad_cast); ((eigen_avg_squared_grad_out + epsilon_).sqrt()) * eigen_grad_cast);
Eigen::DSizes<int, 1> m_dsize(avg_squared_update_out->numel()); Eigen::DSizes<int, 1> m_dsize(avg_squared_update_out->numel());
if (paddle::platform::is_cpu_place(dev_ctx.GetPlace())) { auto lr = EigenVector<MPDType>::Flatten(learning_rate);
auto* lr = learning_rate.data<T>(); if (multi_precision) {
auto eigen_master_param_out =
EigenVector<MPDType>::Flatten(*master_param_outs);
auto eigen_master_param = EigenVector<MPDType>::Flatten(*master_param);
eigen_master_param_out.device(place) =
eigen_master_param + lr.broadcast(m_dsize) * update;
eigen_param_out.device(place) = eigen_param_out.device(place) =
eigen_param + lr[0] * update.template cast<T>(); (eigen_param.template cast<MPDType>() + lr.broadcast(m_dsize) * update)
.template cast<T>();
} else { } else {
auto lr = EigenVector<MPDType>::Flatten(learning_rate); eigen_param_out.device(place) =
if (multi_precision) { eigen_param + (lr.broadcast(m_dsize) * update).template cast<T>();
auto eigen_master_param_out =
EigenVector<MPDType>::Flatten(*master_param_outs);
auto eigen_master_param = EigenVector<MPDType>::Flatten(*master_param);
eigen_master_param_out.device(place) =
eigen_master_param + lr.broadcast(m_dsize) * update;
eigen_param_out.device(place) = (eigen_param.template cast<MPDType>() +
lr.broadcast(m_dsize) * update)
.template cast<T>();
} else {
eigen_param_out.device(place) =
eigen_param + (lr.broadcast(m_dsize) * update).template cast<T>();
}
} }
eigen_avg_squared_update_out.device(place) = eigen_avg_squared_update_out.device(place) =
rho_ * eigen_avg_squared_update + (1 - rho_) * update.square(); rho_ * eigen_avg_squared_update + (1 - rho_) * update.square();
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册