提交 8b7c50f4 编写于 作者: G guofei 提交者: Huihuang Zheng

Make While Op could run on GPU place and add while_loop unittest (#21672)

1. Make while_op accept GPU conditional data
2. Add more complex test cases for while_loop API
上级 17299b8d
...@@ -74,25 +74,26 @@ class WhileOp : public framework::OperatorBase { ...@@ -74,25 +74,26 @@ class WhileOp : public framework::OperatorBase {
} }
PADDLE_ENFORCE_EQ(step_scopes->size(), 0, "The StepScope should be empty."); PADDLE_ENFORCE_EQ(step_scopes->size(), 0, "The StepScope should be empty.");
PADDLE_ENFORCE(platform::is_cpu_place(cond.place()),
"Condition of while op must in CPU memory.");
bool cond_data = GetCondData(cond);
bool is_test = Attr<bool>("is_test"); bool is_test = Attr<bool>("is_test");
auto &skip_vars = Attr<std::vector<std::string>>(kSkipEagerDeletionVars); auto &skip_vars = Attr<std::vector<std::string>>(kSkipEagerDeletionVars);
VLOG(2) << GetSkipEagerDeletionVarsDebugString(skip_vars); VLOG(2) << GetSkipEagerDeletionVarsDebugString(skip_vars);
auto ctx = executor.Prepare(*program, block->ID(), skip_vars); auto ctx = executor.Prepare(*program, block->ID(), skip_vars);
if (!is_test) { if (!is_test) {
while (cond.data<bool>()[0]) { while (cond_data) {
auto &current_scope = scope.NewScope(); auto &current_scope = scope.NewScope();
step_scopes->push_back(&current_scope); step_scopes->push_back(&current_scope);
executor.RunPreparedContext(ctx.get(), &current_scope, false, true, executor.RunPreparedContext(ctx.get(), &current_scope, false, true,
true); true);
cond_data =
GetCondData(scope.FindVar(Input(kCondition))->Get<LoDTensor>());
} }
} else { } else {
auto &current_scope = scope.NewScope(); auto &current_scope = scope.NewScope();
executor.CreateVariables(*program, &current_scope, block->ID()); executor.CreateVariables(*program, &current_scope, block->ID());
while (cond.data<bool>()[0]) { while (cond_data) {
for (auto &name : current_scope.LocalVarNames()) { for (auto &name : current_scope.LocalVarNames()) {
auto *var = current_scope.Var(name); auto *var = current_scope.Var(name);
if (var->IsType<framework::LoDTensor>()) { if (var->IsType<framework::LoDTensor>()) {
...@@ -108,6 +109,8 @@ class WhileOp : public framework::OperatorBase { ...@@ -108,6 +109,8 @@ class WhileOp : public framework::OperatorBase {
} }
executor.RunPreparedContext(ctx.get(), &current_scope, false, false, executor.RunPreparedContext(ctx.get(), &current_scope, false, false,
false); false);
cond_data =
GetCondData(scope.FindVar(Input(kCondition))->Get<LoDTensor>());
} }
scope.DeleteScope(&current_scope); scope.DeleteScope(&current_scope);
} }
......
...@@ -17,9 +17,10 @@ ...@@ -17,9 +17,10 @@
#include <string> #include <string>
#include <unordered_set> #include <unordered_set>
#include <utility> #include <utility>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/operators/controlflow/op_variant.h" #include "paddle/fluid/operators/controlflow/op_variant.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/string/string_helper.h" #include "paddle/fluid/string/string_helper.h"
namespace paddle { namespace paddle {
...@@ -196,5 +197,22 @@ void PrepareSafeEagerDeletionOnWhileOpAndWhileGradOp( ...@@ -196,5 +197,22 @@ void PrepareSafeEagerDeletionOnWhileOpAndWhileGradOp(
&bwd_ops); &bwd_ops);
} }
// Make while_op could run on GPU place
bool GetCondData(const framework::LoDTensor &cond) {
if (platform::is_cpu_place(cond.place())) {
return cond.data<bool>()[0];
}
// when platform::is_gpu_place(cond.place()) is true
std::unique_ptr<framework::LoDTensor> cpu_cond{new framework::LoDTensor()};
#ifdef PADDLE_WITH_CUDA
framework::TensorCopySync(cond, platform::CPUPlace(), cpu_cond.get());
#else
PADDLE_THROW(platform::errors::PreconditionNotMet(
"This version of PaddlePaddle doen NOT support GPU but got GPU tensor "
"Cond in WhileOp. Please compile WITH_GPU option"));
#endif
return cpu_cond->data<bool>()[0];
}
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
...@@ -40,5 +40,7 @@ void PrepareSafeEagerDeletionOnWhileOpAndWhileGradOp( ...@@ -40,5 +40,7 @@ void PrepareSafeEagerDeletionOnWhileOpAndWhileGradOp(
const std::vector<framework::OperatorBase *> &while_ops, const std::vector<framework::OperatorBase *> &while_ops,
const std::vector<framework::OperatorBase *> &while_grad_ops); const std::vector<framework::OperatorBase *> &while_grad_ops);
bool GetCondData(const framework::LoDTensor &cond);
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
...@@ -22,6 +22,7 @@ import paddle.fluid.layers as layers ...@@ -22,6 +22,7 @@ import paddle.fluid.layers as layers
import paddle.fluid.framework as framework import paddle.fluid.framework as framework
from paddle.fluid.executor import Executor from paddle.fluid.executor import Executor
from paddle.fluid.framework import Program, program_guard from paddle.fluid.framework import Program, program_guard
from paddle.fluid.backward import append_backward
class TestApiWhileLoop(unittest.TestCase): class TestApiWhileLoop(unittest.TestCase):
...@@ -40,7 +41,8 @@ class TestApiWhileLoop(unittest.TestCase): ...@@ -40,7 +41,8 @@ class TestApiWhileLoop(unittest.TestCase):
ten = layers.fill_constant(shape=[1], dtype='int64', value=10) ten = layers.fill_constant(shape=[1], dtype='int64', value=10)
out = layers.while_loop(cond, body, (i, )) out = layers.while_loop(cond, body, (i, ))
place = fluid.CPUPlace() place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda(
) else fluid.CPUPlace()
exe = fluid.Executor(place) exe = fluid.Executor(place)
res = exe.run(main_program, fetch_list=out) res = exe.run(main_program, fetch_list=out)
self.assertTrue( self.assertTrue(
...@@ -60,14 +62,19 @@ class TestApiWhileLoop(unittest.TestCase): ...@@ -60,14 +62,19 @@ class TestApiWhileLoop(unittest.TestCase):
with program_guard(main_program, startup_program): with program_guard(main_program, startup_program):
i = layers.zeros(shape=[1], dtype='int64') i = layers.zeros(shape=[1], dtype='int64')
ten = layers.fill_constant(shape=[1], dtype='int64', value=10) ten = layers.fill_constant(shape=[1], dtype='int64', value=10)
mem = layers.data(name="mem", shape=[10], dtype='float32') mem = layers.data(
name='mem',
shape=[10],
dtype='float32',
append_batch_size=False)
one = layers.fill_constant(shape=[10], dtype='float32', value=1) one = layers.fill_constant(shape=[10], dtype='float32', value=1)
out = layers.while_loop(cond, body, [i, mem]) out = layers.while_loop(cond, body, [i, mem])
data = np.random.rand(10).astype('float32') data = np.random.rand(10).astype('float32')
data_one = np.ones(10).astype('float32') data_one = np.ones(10).astype('float32')
place = fluid.CPUPlace() place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda(
) else fluid.CPUPlace()
exe = fluid.Executor(place) exe = fluid.Executor(place)
res = exe.run(main_program, feed={'mem': data}, fetch_list=out) res = exe.run(main_program, feed={'mem': data}, fetch_list=out)
for i in range(10): for i in range(10):
...@@ -104,30 +111,183 @@ class TestApiWhileLoop_Nested(unittest.TestCase): ...@@ -104,30 +111,183 @@ class TestApiWhileLoop_Nested(unittest.TestCase):
with program_guard(main_program, startup_program): with program_guard(main_program, startup_program):
i = layers.zeros(shape=[1], dtype='int64') i = layers.zeros(shape=[1], dtype='int64')
j = layers.zeros(shape=[1], dtype='int64') j = layers.zeros(shape=[1], dtype='int64')
init = layers.data(name="init", shape=[3, 3], dtype='float32') init = layers.data(
sums = layers.data(name="sums", shape=[3, 3], dtype='float32') name='init',
shape=[3, 3],
dtype='float32',
append_batch_size=False)
sums = layers.data(
name='sums',
shape=[3, 3],
dtype='float32',
append_batch_size=False)
loop_len1 = layers.fill_constant(shape=[1], dtype='int64', value=2) loop_len1 = layers.fill_constant(shape=[1], dtype='int64', value=2)
loop_len2 = layers.fill_constant(shape=[1], dtype='int64', value=3) loop_len2 = layers.fill_constant(shape=[1], dtype='int64', value=3)
ones = layers.fill_constant(shape=[3, 3], dtype='float32', value=1) ones = layers.fill_constant(shape=[3, 3], dtype='float32', value=1)
res = layers.while_loop(external_cond, external_body, out = layers.while_loop(external_cond, external_body,
[i, j, init, sums]) [i, j, init, sums])
data = np.random.rand(3, 3).astype('float32') data = np.random.rand(3, 3).astype('float32')
data_sums = np.zeros([3, 3]).astype('float32') data_sums = np.zeros([3, 3]).astype('float32')
place = fluid.CPUPlace() place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda(
) else fluid.CPUPlace()
exe = fluid.Executor(place) exe = fluid.Executor(place)
ret = exe.run(main_program, res = exe.run(main_program,
feed={'init': data, feed={'init': data,
'sums': data_sums}, 'sums': data_sums},
fetch_list=res) fetch_list=out)
for i in range(3): for i in range(3):
data = np.add(data, 1) data = np.add(data, 1)
data_sums = np.add(data, data_sums) data_sums = np.add(data, data_sums)
for j in range(2): for j in range(2):
data_sums = np.add(data, data_sums) data_sums = np.add(data, data_sums)
self.assertTrue(np.allclose(np.asarray(ret[3]), data_sums)) self.assertTrue(np.allclose(np.asarray(res[3]), data_sums))
class TestApiWhileLoop_Backward(unittest.TestCase):
def test_while_loop_backward(self):
def cond(i, x):
return layers.less_than(i, eleven)
def body(i, x):
x = layers.elementwise_mul(x=i, y=i)
i = layers.increment(i)
return [i, x]
main_program = Program()
startup_program = Program()
with fluid.program_guard(main_program, startup_program):
i = layers.data(
name='i', shape=[1], dtype='float32', append_batch_size=False)
i.stop_gradient = False
eleven = layers.fill_constant(shape=[1], dtype='float32', value=11)
one = layers.fill_constant(shape=[1], dtype='float32', value=1)
x = layers.data(
name='x', shape=[1], dtype='float32', append_batch_size=False)
x.stop_gradient = False
out = layers.while_loop(cond, body, [i, x])
mean = layers.mean(out[1])
append_backward(mean)
place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda(
) else fluid.CPUPlace()
exe = fluid.Executor(place)
feed_i = np.ones(1).astype('float32')
feed_x = np.ones(1).astype('float32')
data = np.asarray([100]).astype('float32')
i_grad = np.asarray([110]).astype('float32')
res = exe.run(main_program,
feed={'i': feed_i,
'x': feed_x},
fetch_list=[mean.name, i.grad_name])
self.assertTrue(np.allclose(np.asarray(res[0]), data))
self.assertTrue(np.allclose(np.asarray(res[1]), i_grad))
class TestApiWhileLoop_NestedWithBackward(unittest.TestCase):
def test_nested_net_with_backward(self):
def external_cond(i, x, y):
return layers.less_than(i, ten)
def external_body(i, x, y):
def internal_cond(i, x, y):
return layers.less_than(i, five)
def internal_body(i, x, y):
x = layers.elementwise_add(x=i, y=i)
i = layers.increment(i)
return [i, x, y]
temp = layers.while_loop(internal_cond, internal_body, [i, x, y])
y = layers.elementwise_add(x=temp[1], y=i)
i = layers.increment(i)
return [i, x, y]
main_program = Program()
startup_program = Program()
with fluid.program_guard(main_program, startup_program):
i = layers.data(
name='i', shape=[1], dtype='float32', append_batch_size=False)
i.stop_gradient = False
ten = layers.fill_constant(shape=[1], dtype='float32', value=10)
five = layers.fill_constant(shape=[1], dtype='float32', value=5)
x = layers.data(
name='x', shape=[1], dtype='float32', append_batch_size=False)
x.stop_gradient = False
y = layers.data(
name='y', shape=[1], dtype='float32', append_batch_size=False)
y.stop_gradient = False
out = layers.while_loop(external_cond, external_body, [i, x, y])
mean = layers.mean(out[2])
append_backward(mean)
place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda(
) else fluid.CPUPlace()
exe = fluid.Executor(place)
data = np.asarray([17]).astype('float32')
feed_x = np.zeros(1).astype('float32')
feed_i = np.ones(1).astype('float32')
feed_y = np.zeros(1).astype('float32')
i_grad = np.asarray(13).astype('int32')
res = exe.run(main_program,
feed={'i': feed_i,
'x': feed_x,
'y': feed_y},
fetch_list=[mean.name, i.grad_name])
self.assertTrue(np.allclose(np.asarray(res[0]), data))
self.assertTrue(np.allclose(np.asarray(res[1]), i_grad))
class TestApiWhileLoopWithSwitchCase(unittest.TestCase):
def test_with_switch_case(self):
def cond(i):
return layers.less_than(i, ten)
def body(i):
def fn_add_three():
data_add_three = layers.elementwise_add(x=i, y=three)
return data_add_three
def fn_square():
data_mul_data = layers.elementwise_mul(x=i, y=i)
return data_mul_data
def fn_add_one():
data_add_one = layers.elementwise_add(x=i, y=one)
return data_add_one
return layers.switch_case(
branch_index=i,
branch_fns={2: fn_add_three,
5: fn_square},
default=fn_add_one)
main_program = Program()
startup_program = Program()
with fluid.program_guard(main_program, startup_program):
i = layers.fill_constant(shape=[1], dtype='int64', value=1)
ten = layers.fill_constant(shape=[1], dtype='int64', value=10)
three = layers.fill_constant(shape=[1], dtype='int64', value=3)
one = layers.fill_constant(shape=[1], dtype='int64', value=1)
out = layers.while_loop(cond, body, [i])
place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda(
) else fluid.CPUPlace()
exe = fluid.Executor(place)
res = exe.run(main_program, fetch_list=out)
data = np.asarray([25]).astype('int64')
self.assertTrue(np.allclose(np.asarray(res[0]), data))
class TestApiWhileLoop_Error(unittest.TestCase): class TestApiWhileLoop_Error(unittest.TestCase):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册