From 8b7c50f49a16456d8e517c349c2cc1133078121b Mon Sep 17 00:00:00 2001 From: guofei <52460041+gfwm2013@users.noreply.github.com> Date: Thu, 19 Dec 2019 11:16:07 +0800 Subject: [PATCH] Make While Op could run on GPU place and add while_loop unittest (#21672) 1. Make while_op accept GPU conditional data 2. Add more complex test cases for while_loop API --- .../fluid/operators/controlflow/while_op.cc | 11 +- .../operators/controlflow/while_op_helper.cc | 20 +- .../operators/controlflow/while_op_helper.h | 2 + .../tests/unittests/test_while_loop_op.py | 180 +++++++++++++++++- 4 files changed, 198 insertions(+), 15 deletions(-) diff --git a/paddle/fluid/operators/controlflow/while_op.cc b/paddle/fluid/operators/controlflow/while_op.cc index 0953256b97..2e35941279 100644 --- a/paddle/fluid/operators/controlflow/while_op.cc +++ b/paddle/fluid/operators/controlflow/while_op.cc @@ -74,25 +74,26 @@ class WhileOp : public framework::OperatorBase { } PADDLE_ENFORCE_EQ(step_scopes->size(), 0, "The StepScope should be empty."); - PADDLE_ENFORCE(platform::is_cpu_place(cond.place()), - "Condition of while op must in CPU memory."); + bool cond_data = GetCondData(cond); bool is_test = Attr("is_test"); auto &skip_vars = Attr>(kSkipEagerDeletionVars); VLOG(2) << GetSkipEagerDeletionVarsDebugString(skip_vars); auto ctx = executor.Prepare(*program, block->ID(), skip_vars); if (!is_test) { - while (cond.data()[0]) { + while (cond_data) { auto ¤t_scope = scope.NewScope(); step_scopes->push_back(¤t_scope); executor.RunPreparedContext(ctx.get(), ¤t_scope, false, true, true); + cond_data = + GetCondData(scope.FindVar(Input(kCondition))->Get()); } } else { auto ¤t_scope = scope.NewScope(); executor.CreateVariables(*program, ¤t_scope, block->ID()); - while (cond.data()[0]) { + while (cond_data) { for (auto &name : current_scope.LocalVarNames()) { auto *var = current_scope.Var(name); if (var->IsType()) { @@ -108,6 +109,8 @@ class WhileOp : public framework::OperatorBase { } executor.RunPreparedContext(ctx.get(), ¤t_scope, false, false, false); + cond_data = + GetCondData(scope.FindVar(Input(kCondition))->Get()); } scope.DeleteScope(¤t_scope); } diff --git a/paddle/fluid/operators/controlflow/while_op_helper.cc b/paddle/fluid/operators/controlflow/while_op_helper.cc index 8f1e3f6092..e9a7dc4382 100644 --- a/paddle/fluid/operators/controlflow/while_op_helper.cc +++ b/paddle/fluid/operators/controlflow/while_op_helper.cc @@ -17,9 +17,10 @@ #include #include #include - +#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/operators/controlflow/op_variant.h" +#include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/string/string_helper.h" namespace paddle { @@ -196,5 +197,22 @@ void PrepareSafeEagerDeletionOnWhileOpAndWhileGradOp( &bwd_ops); } +// Make while_op could run on GPU place +bool GetCondData(const framework::LoDTensor &cond) { + if (platform::is_cpu_place(cond.place())) { + return cond.data()[0]; + } + // when platform::is_gpu_place(cond.place()) is true + std::unique_ptr cpu_cond{new framework::LoDTensor()}; +#ifdef PADDLE_WITH_CUDA + framework::TensorCopySync(cond, platform::CPUPlace(), cpu_cond.get()); +#else + PADDLE_THROW(platform::errors::PreconditionNotMet( + "This version of PaddlePaddle doen NOT support GPU but got GPU tensor " + "Cond in WhileOp. Please compile WITH_GPU option")); +#endif + return cpu_cond->data()[0]; +} + } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/controlflow/while_op_helper.h b/paddle/fluid/operators/controlflow/while_op_helper.h index e2cfece658..4f9d93c91f 100644 --- a/paddle/fluid/operators/controlflow/while_op_helper.h +++ b/paddle/fluid/operators/controlflow/while_op_helper.h @@ -40,5 +40,7 @@ void PrepareSafeEagerDeletionOnWhileOpAndWhileGradOp( const std::vector &while_ops, const std::vector &while_grad_ops); +bool GetCondData(const framework::LoDTensor &cond); + } // namespace operators } // namespace paddle diff --git a/python/paddle/fluid/tests/unittests/test_while_loop_op.py b/python/paddle/fluid/tests/unittests/test_while_loop_op.py index 78ceb5250c..dc74f650c5 100644 --- a/python/paddle/fluid/tests/unittests/test_while_loop_op.py +++ b/python/paddle/fluid/tests/unittests/test_while_loop_op.py @@ -22,6 +22,7 @@ import paddle.fluid.layers as layers import paddle.fluid.framework as framework from paddle.fluid.executor import Executor from paddle.fluid.framework import Program, program_guard +from paddle.fluid.backward import append_backward class TestApiWhileLoop(unittest.TestCase): @@ -40,7 +41,8 @@ class TestApiWhileLoop(unittest.TestCase): ten = layers.fill_constant(shape=[1], dtype='int64', value=10) out = layers.while_loop(cond, body, (i, )) - place = fluid.CPUPlace() + place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( + ) else fluid.CPUPlace() exe = fluid.Executor(place) res = exe.run(main_program, fetch_list=out) self.assertTrue( @@ -60,14 +62,19 @@ class TestApiWhileLoop(unittest.TestCase): with program_guard(main_program, startup_program): i = layers.zeros(shape=[1], dtype='int64') ten = layers.fill_constant(shape=[1], dtype='int64', value=10) - mem = layers.data(name="mem", shape=[10], dtype='float32') + mem = layers.data( + name='mem', + shape=[10], + dtype='float32', + append_batch_size=False) one = layers.fill_constant(shape=[10], dtype='float32', value=1) out = layers.while_loop(cond, body, [i, mem]) data = np.random.rand(10).astype('float32') data_one = np.ones(10).astype('float32') - place = fluid.CPUPlace() + place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( + ) else fluid.CPUPlace() exe = fluid.Executor(place) res = exe.run(main_program, feed={'mem': data}, fetch_list=out) for i in range(10): @@ -104,30 +111,183 @@ class TestApiWhileLoop_Nested(unittest.TestCase): with program_guard(main_program, startup_program): i = layers.zeros(shape=[1], dtype='int64') j = layers.zeros(shape=[1], dtype='int64') - init = layers.data(name="init", shape=[3, 3], dtype='float32') - sums = layers.data(name="sums", shape=[3, 3], dtype='float32') + init = layers.data( + name='init', + shape=[3, 3], + dtype='float32', + append_batch_size=False) + sums = layers.data( + name='sums', + shape=[3, 3], + dtype='float32', + append_batch_size=False) loop_len1 = layers.fill_constant(shape=[1], dtype='int64', value=2) loop_len2 = layers.fill_constant(shape=[1], dtype='int64', value=3) ones = layers.fill_constant(shape=[3, 3], dtype='float32', value=1) - res = layers.while_loop(external_cond, external_body, + out = layers.while_loop(external_cond, external_body, [i, j, init, sums]) data = np.random.rand(3, 3).astype('float32') data_sums = np.zeros([3, 3]).astype('float32') - place = fluid.CPUPlace() + place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( + ) else fluid.CPUPlace() exe = fluid.Executor(place) - ret = exe.run(main_program, + res = exe.run(main_program, feed={'init': data, 'sums': data_sums}, - fetch_list=res) + fetch_list=out) for i in range(3): data = np.add(data, 1) data_sums = np.add(data, data_sums) for j in range(2): data_sums = np.add(data, data_sums) - self.assertTrue(np.allclose(np.asarray(ret[3]), data_sums)) + self.assertTrue(np.allclose(np.asarray(res[3]), data_sums)) + + +class TestApiWhileLoop_Backward(unittest.TestCase): + def test_while_loop_backward(self): + def cond(i, x): + return layers.less_than(i, eleven) + + def body(i, x): + x = layers.elementwise_mul(x=i, y=i) + i = layers.increment(i) + return [i, x] + + main_program = Program() + startup_program = Program() + with fluid.program_guard(main_program, startup_program): + i = layers.data( + name='i', shape=[1], dtype='float32', append_batch_size=False) + i.stop_gradient = False + eleven = layers.fill_constant(shape=[1], dtype='float32', value=11) + one = layers.fill_constant(shape=[1], dtype='float32', value=1) + x = layers.data( + name='x', shape=[1], dtype='float32', append_batch_size=False) + x.stop_gradient = False + + out = layers.while_loop(cond, body, [i, x]) + mean = layers.mean(out[1]) + append_backward(mean) + + place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( + ) else fluid.CPUPlace() + exe = fluid.Executor(place) + + feed_i = np.ones(1).astype('float32') + feed_x = np.ones(1).astype('float32') + data = np.asarray([100]).astype('float32') + i_grad = np.asarray([110]).astype('float32') + + res = exe.run(main_program, + feed={'i': feed_i, + 'x': feed_x}, + fetch_list=[mean.name, i.grad_name]) + self.assertTrue(np.allclose(np.asarray(res[0]), data)) + self.assertTrue(np.allclose(np.asarray(res[1]), i_grad)) + + +class TestApiWhileLoop_NestedWithBackward(unittest.TestCase): + def test_nested_net_with_backward(self): + def external_cond(i, x, y): + return layers.less_than(i, ten) + + def external_body(i, x, y): + def internal_cond(i, x, y): + return layers.less_than(i, five) + + def internal_body(i, x, y): + x = layers.elementwise_add(x=i, y=i) + i = layers.increment(i) + return [i, x, y] + + temp = layers.while_loop(internal_cond, internal_body, [i, x, y]) + y = layers.elementwise_add(x=temp[1], y=i) + i = layers.increment(i) + return [i, x, y] + + main_program = Program() + startup_program = Program() + + with fluid.program_guard(main_program, startup_program): + i = layers.data( + name='i', shape=[1], dtype='float32', append_batch_size=False) + i.stop_gradient = False + ten = layers.fill_constant(shape=[1], dtype='float32', value=10) + five = layers.fill_constant(shape=[1], dtype='float32', value=5) + x = layers.data( + name='x', shape=[1], dtype='float32', append_batch_size=False) + x.stop_gradient = False + y = layers.data( + name='y', shape=[1], dtype='float32', append_batch_size=False) + y.stop_gradient = False + out = layers.while_loop(external_cond, external_body, [i, x, y]) + + mean = layers.mean(out[2]) + append_backward(mean) + + place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( + ) else fluid.CPUPlace() + exe = fluid.Executor(place) + + data = np.asarray([17]).astype('float32') + feed_x = np.zeros(1).astype('float32') + feed_i = np.ones(1).astype('float32') + feed_y = np.zeros(1).astype('float32') + i_grad = np.asarray(13).astype('int32') + + res = exe.run(main_program, + feed={'i': feed_i, + 'x': feed_x, + 'y': feed_y}, + fetch_list=[mean.name, i.grad_name]) + + self.assertTrue(np.allclose(np.asarray(res[0]), data)) + self.assertTrue(np.allclose(np.asarray(res[1]), i_grad)) + + +class TestApiWhileLoopWithSwitchCase(unittest.TestCase): + def test_with_switch_case(self): + def cond(i): + return layers.less_than(i, ten) + + def body(i): + def fn_add_three(): + data_add_three = layers.elementwise_add(x=i, y=three) + return data_add_three + + def fn_square(): + data_mul_data = layers.elementwise_mul(x=i, y=i) + return data_mul_data + + def fn_add_one(): + data_add_one = layers.elementwise_add(x=i, y=one) + return data_add_one + + return layers.switch_case( + branch_index=i, + branch_fns={2: fn_add_three, + 5: fn_square}, + default=fn_add_one) + + main_program = Program() + startup_program = Program() + with fluid.program_guard(main_program, startup_program): + i = layers.fill_constant(shape=[1], dtype='int64', value=1) + ten = layers.fill_constant(shape=[1], dtype='int64', value=10) + three = layers.fill_constant(shape=[1], dtype='int64', value=3) + one = layers.fill_constant(shape=[1], dtype='int64', value=1) + out = layers.while_loop(cond, body, [i]) + + place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( + ) else fluid.CPUPlace() + exe = fluid.Executor(place) + res = exe.run(main_program, fetch_list=out) + + data = np.asarray([25]).astype('int64') + self.assertTrue(np.allclose(np.asarray(res[0]), data)) class TestApiWhileLoop_Error(unittest.TestCase): -- GitLab