提交 00e8791f 编写于 作者: D dzhwinter

fix compile in cpu error. test=develop

上级 d239cf2e
...@@ -45,12 +45,15 @@ class MomentumOp : public framework::OperatorWithKernel { ...@@ -45,12 +45,15 @@ class MomentumOp : public framework::OperatorWithKernel {
"Output(VelocityOut) of Momentum should not be null."); "Output(VelocityOut) of Momentum should not be null.");
auto param_dim = ctx->GetInputDim("Param"); auto param_dim = ctx->GetInputDim("Param");
if (ctx->GetInputsVarType("Grad")[0] ==
framework::proto::VarType::LOD_TENSOR) {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
param_dim, ctx->GetInputDim("Grad"), param_dim, ctx->GetInputDim("Grad"),
"Param and Grad input of MomentumOp should have the same dimension."); "Param and Grad input of MomentumOp should have the same dimension.");
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
param_dim, ctx->GetInputDim("Velocity"), param_dim, ctx->GetInputDim("Velocity"),
"Param and Velocity of MomentumOp should have the same dimension."); "Param and Velocity of MomentumOp should have the same dimension.");
}
PADDLE_ENFORCE_EQ(framework::product(ctx->GetInputDim("LearningRate")), 1, PADDLE_ENFORCE_EQ(framework::product(ctx->GetInputDim("LearningRate")), 1,
"Learning_rate should be a scalar"); "Learning_rate should be a scalar");
......
...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include <string>
#include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/algorithm.h" #include "paddle/fluid/operators/math/algorithm.h"
...@@ -303,28 +304,30 @@ class MomentumOpKernel : public framework::OpKernel<T> { ...@@ -303,28 +304,30 @@ class MomentumOpKernel : public framework::OpKernel<T> {
auto* merged_grad = const_cast<framework::Scope&>(ctx.scope()) auto* merged_grad = const_cast<framework::Scope&>(ctx.scope())
.Var() .Var()
->GetMutable<framework::SelectedRows>(); ->GetMutable<framework::SelectedRows>();
math::scatter::MergeAdd<DeviceContext, T> merge_func; math::scatter::MergeAdd<DeviceContext, T> merge_func;
merge_func(ctx.template device_context<DeviceContext>(), *grad, merge_func(ctx.template device_context<DeviceContext>(), *grad,
merged_grad); merged_grad);
platform::ForRange<DeviceContext> for_range(
static_cast<const DeviceContext&>(ctx.device_context()),
param->numel());
const int64_t* rows = nullptr; const int64_t* rows = nullptr;
#ifdef PADDLE_WITH_CUDA
if (platform::is_gpu_place(ctx.GetPlace())) { if (platform::is_gpu_place(ctx.GetPlace())) {
rows = merged_grad->rows().CUDAData(ctx.GetPlace()); rows = merged_grad->rows().CUDAData(ctx.GetPlace());
} else { } else {
#endif
rows = merged_grad->rows().data(); rows = merged_grad->rows().data();
#ifdef PADDLE_WITH_CUDA
} }
#endif
int64_t row_numel =
merged_grad->value().numel() / merged_grad->rows().size();
platform::ForRange<DeviceContext> for_range(
static_cast<const DeviceContext&>(ctx.device_context()),
param->numel());
if (use_nesterov) { if (use_nesterov) {
SparseMomentumFunctor<T, UseNesterov> functor( SparseMomentumFunctor<T, UseNesterov> functor(
param->data<T>(), merged_grad->value().data<T>(), param->data<T>(), merged_grad->value().data<T>(),
velocity->data<T>(), learning_rate->data<T>(), mu, rows, velocity->data<T>(), learning_rate->data<T>(), mu, rows, row_numel,
static_cast<int64_t>(merged_grad->rows().size()), static_cast<int64_t>(merged_grad->rows().size()),
static_cast<int64_t>(merged_grad->height()),
param_out->mutable_data<T>(ctx.GetPlace()), param_out->mutable_data<T>(ctx.GetPlace()),
velocity_out->mutable_data<T>(ctx.GetPlace())); velocity_out->mutable_data<T>(ctx.GetPlace()));
for_range(functor); for_range(functor);
...@@ -332,9 +335,8 @@ class MomentumOpKernel : public framework::OpKernel<T> { ...@@ -332,9 +335,8 @@ class MomentumOpKernel : public framework::OpKernel<T> {
} else { } else {
SparseMomentumFunctor<T, NoNesterov> functor( SparseMomentumFunctor<T, NoNesterov> functor(
param->data<T>(), merged_grad->value().data<T>(), param->data<T>(), merged_grad->value().data<T>(),
velocity->data<T>(), learning_rate->data<T>(), mu, rows, velocity->data<T>(), learning_rate->data<T>(), mu, rows, row_numel,
static_cast<int64_t>(merged_grad->rows().size()), static_cast<int64_t>(merged_grad->rows().size()),
static_cast<int64_t>(merged_grad->height()),
param_out->mutable_data<T>(ctx.GetPlace()), param_out->mutable_data<T>(ctx.GetPlace()),
velocity_out->mutable_data<T>(ctx.GetPlace())); velocity_out->mutable_data<T>(ctx.GetPlace()));
for_range(functor); for_range(functor);
......
...@@ -121,22 +121,13 @@ class TestSparseMomentumOp(unittest.TestCase): ...@@ -121,22 +121,13 @@ class TestSparseMomentumOp(unittest.TestCase):
grad_tensor = grad_selected_rows.get_tensor() grad_tensor = grad_selected_rows.get_tensor()
grad_tensor.set(grad_np_array, place) grad_tensor.set(grad_np_array, place)
velocity_selected_rows = scope.var('Velocity').get_selected_rows() velocity = scope.var('Velocity').get_tensor()
velocity_selected_rows.set_height(height) velocity_np_array = np.ones((height, row_numel)).astype("float32")
velocity_selected_rows.set_rows(rows) velocity.set(velocity_np_array, place)
velocity_np_array = np.ones((len(rows), row_numel)).astype("float32") velocity_out = scope.var('VelocityOut').get_tensor()
velocity_np_array[0, 0] = 2.0 velocity_out_np_array = np.full((height, row_numel),
velocity_np_array[2, 8] = 2.0
velocity_tensor = velocity_selected_rows.get_tensor()
velocity_tensor.set(velocity_np_array, place)
velocity_out_selected_rows = scope.var('VelocityOut').get_selected_rows(
)
velocity_out_selected_rows.set_height(height)
velocity_out_selected_rows.set_rows(rows)
velocity_out_np_array = np.full((len(rows), row_numel),
0.0).astype("float32") 0.0).astype("float32")
velocity_out_tensor = velocity_out_selected_rows.get_tensor() velocity_out.set(velocity_out_np_array, place)
velocity_out_tensor.set(velocity_out_np_array, place)
# create and initialize LeraningRate Variable # create and initialize LeraningRate Variable
lr = scope.var('LearningRate').get_tensor() lr = scope.var('LearningRate').get_tensor()
...@@ -158,19 +149,22 @@ class TestSparseMomentumOp(unittest.TestCase): ...@@ -158,19 +149,22 @@ class TestSparseMomentumOp(unittest.TestCase):
# get and compare result # get and compare result
param_out_np_array = np.array(param_out) param_out_np_array = np.array(param_out)
velocity_out_np_array = np.array(velocity_out_tensor) velocity_out_np_array = np.array(velocity_out)
# TODO(dzh): add a more suitable general numpy interface # TODO(dzh): add a more suitable general numpy interface
# for sparse update. # for sparse update.
_velocity_out = mu * velocity_np_array + grad_np_array _grad_np_array = np.full((height, row_numel), 0.0).astype("float32")
_param = param_array[rows] for i in range(len(rows)):
_grad_np_array[rows[i]] = grad_np_array[i]
_velocity_out = mu * velocity_np_array + _grad_np_array
_param = param_array
if use_nesterov: if use_nesterov:
_param_out = _param - grad_np_array * lr_array - \ _param_out = _param - (_grad_np_array + _velocity_out * mu
_velocity_out * mu * lr_array ) * lr_array
else: else:
_param_out = _param - lr * _velocity_out _param_out = _param - lr_array * _velocity_out
self.assertTrue((_param_out == param_out_np_array[rows]).all())
self.assertTrue((_velocity_out == velocity_out_np_array).all()) self.assertTrue((_velocity_out == velocity_out_np_array).all())
self.assertTrue((_param_out == param_out_np_array).all())
def init_kernel(self): def init_kernel(self):
pass pass
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册