提交 a20f8e29 编写于 作者: Q Qiao Longfei

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into...

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into dist-table-support-optimizer-regular
......@@ -119,8 +119,8 @@ struct SparseAdagradFunctor<platform::CPUDeviceContext, T> {
auto* grad_merge_data = grad_merge.mutable_value()->template data<T>();
// 2. m += g_m * g_m
math::scatter::Mul<platform::CPUDeviceContext, T> sqare_func;
auto grad_square = sqare_func(context, grad_merge, grad_merge);
auto grad_square =
SquareSelectedRows<platform::CPUDeviceContext, T>(context, grad_merge);
math::SelectedRowsAddToTensor<platform::CPUDeviceContext, T> functor;
functor(context, grad_square, moment);
......
......@@ -84,8 +84,8 @@ struct SparseAdagradFunctor<platform::CUDADeviceContext, T> {
auto* grad_merge_data = grad_merge.mutable_value()->template data<T>();
framework::Vector<int64_t> merge_rows(grad_merge.rows());
// 2. m += g_m * g_m
math::scatter::Mul<platform::CUDADeviceContext, T> sqare_func;
auto grad_square = sqare_func(context, grad_merge, grad_merge);
auto grad_square =
SquareSelectedRows<platform::CUDADeviceContext, T>(context, grad_merge);
math::SelectedRowsAddToTensor<platform::CUDADeviceContext, T> functor;
functor(context, grad_square, moment);
......
......@@ -28,6 +28,20 @@ struct SparseAdagradFunctor {
framework::Tensor *moment, framework::Tensor *param);
};
template <typename DeviceContext, typename T>
framework::SelectedRows SquareSelectedRows(
const DeviceContext &context, const framework::SelectedRows &input) {
framework::SelectedRows out;
out.set_rows(input.rows());
out.set_height(input.height());
out.mutable_value()->mutable_data<T>(input.value().dims(),
context.GetPlace());
auto e_out = framework::EigenVector<T>::Flatten(*(out.mutable_value()));
auto e_in = framework::EigenVector<T>::Flatten(input.value());
e_out.device(*context.eigen_device()) = e_in.square();
return out;
}
template <typename DeviceContext, typename T>
class AdagradOpKernel : public framework::OpKernel<T> {
public:
......
......@@ -168,9 +168,11 @@ class VAddKernelImpl : public VAddKernel<T> {
#endif
this->Compute = VAddRefer<T>;
}
#ifdef PADDLE_WITH_XBYAK
private:
std::unique_ptr<gen::VVVJitCode> jitcode_{nullptr};
#endif
};
#ifdef PADDLE_WITH_XBYAK
......@@ -210,9 +212,11 @@ class VAddReluKernelImpl : public VAddReluKernel<T> {
#endif
this->Compute = VAddReluRefer<T>;
}
#ifdef PADDLE_WITH_XBYAK
private:
std::unique_ptr<gen::VVVJitCode> jitcode_{nullptr};
#endif
};
#ifdef PADDLE_WITH_XBYAK
......
......@@ -88,57 +88,6 @@ struct MergeAdd {
framework::SelectedRows* output);
};
template <typename DeviceContext, typename T>
struct Add {
framework::SelectedRows operator()(const DeviceContext& context,
const framework::SelectedRows& input1,
const framework::SelectedRows& input2) {
framework::SelectedRows out;
out.set_rows(input1.rows());
out.set_height(input1.height());
out.mutable_value()->mutable_data<T>(input1.value().dims(),
context.GetPlace());
auto e_out = framework::EigenVector<T>::Flatten(*(out.mutable_value()));
auto e_in1 = framework::EigenVector<T>::Flatten(input1.value());
auto e_in2 = framework::EigenVector<T>::Flatten(input2.value());
e_out.device(*context.eigen_device()) = e_in1 + e_in2;
return out;
}
};
template <typename DeviceContext, typename T>
struct Mul {
// multiply two SelectedRows
framework::SelectedRows operator()(const DeviceContext& context,
const framework::SelectedRows& input1,
const framework::SelectedRows& input2) {
framework::SelectedRows out;
out.set_rows(input1.rows());
out.set_height(input1.height());
out.mutable_value()->mutable_data<T>(input1.value().dims(),
context.GetPlace());
auto e_out = framework::EigenVector<T>::Flatten(*(out.mutable_value()));
auto e_in1 = framework::EigenVector<T>::Flatten(input1.value());
auto e_in2 = framework::EigenVector<T>::Flatten(input2.value());
e_out.device(*context.eigen_device()) = e_in1 * e_in2;
return out;
}
// multiply scalar to SelectedRows
framework::SelectedRows operator()(const DeviceContext& context,
const framework::SelectedRows& input1,
const T input2) {
framework::SelectedRows out;
out.set_rows(input1.rows());
out.set_height(input1.height());
out.mutable_value()->mutable_data<T>(input1.value().dims(),
context.GetPlace());
auto e_out = framework::EigenVector<T>::Flatten(*(out.mutable_value()));
auto e_in1 = framework::EigenVector<T>::Flatten(input1.value());
e_out.device(*context.eigen_device()) = input2 * e_in1;
return out;
}
};
enum class ScatterOps { ASSIGN, ADD, SUB, SUBBY, MUL, DIV, DIVBY };
// out = seleted_rows_in / tensor
......
......@@ -16,6 +16,7 @@ from __future__ import print_function
import paddle.dataset.conll05 as conll05
import paddle.fluid as fluid
import paddle.fluid.core as core
import unittest
import paddle
import numpy as np
......@@ -177,32 +178,36 @@ class TestCRFModel(unittest.TestCase):
def test_update_sparse_parameter_all_reduce(self):
build_strategy = fluid.BuildStrategy()
build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.AllReduce
self.check_network_convergence(
is_sparse=True, build_strategy=build_strategy, use_cuda=True)
if core.is_compiled_with_cuda():
self.check_network_convergence(
is_sparse=True, build_strategy=build_strategy, use_cuda=True)
self.check_network_convergence(
is_sparse=True, build_strategy=build_strategy, use_cuda=False)
def test_update_dense_parameter_all_reduce(self):
build_strategy = fluid.BuildStrategy()
build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.AllReduce
self.check_network_convergence(
is_sparse=False, build_strategy=build_strategy, use_cuda=True)
if core.is_compiled_with_cuda():
self.check_network_convergence(
is_sparse=False, build_strategy=build_strategy, use_cuda=True)
self.check_network_convergence(
is_sparse=False, build_strategy=build_strategy, use_cuda=False)
def test_update_sparse_parameter_reduce(self):
build_strategy = fluid.BuildStrategy()
build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce
self.check_network_convergence(
is_sparse=True, build_strategy=build_strategy, use_cuda=True)
if core.is_compiled_with_cuda():
self.check_network_convergence(
is_sparse=True, build_strategy=build_strategy, use_cuda=True)
self.check_network_convergence(
is_sparse=True, build_strategy=build_strategy, use_cuda=False)
def test_update_dense_parameter_reduce(self):
build_strategy = fluid.BuildStrategy()
build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce
self.check_network_convergence(
is_sparse=False, build_strategy=build_strategy, use_cuda=True)
if core.is_compiled_with_cuda():
self.check_network_convergence(
is_sparse=False, build_strategy=build_strategy, use_cuda=True)
self.check_network_convergence(
is_sparse=False, build_strategy=build_strategy, use_cuda=False)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册