diff --git a/paddle/fluid/operators/compare_op.cc b/paddle/fluid/operators/compare_op.cc index 86f7046058c7001fcaa588727b1cdc0f3f20c35f..9a139ab27ec53395a8d1ab1347dbce93ea68fd8e 100644 --- a/paddle/fluid/operators/compare_op.cc +++ b/paddle/fluid/operators/compare_op.cc @@ -29,6 +29,11 @@ class CompareOpProtoMaker : public framework::OpProtoAndCheckerMaker { AddInput("Y", string::Sprintf( "(LoDTensor) the right hand operand of %s operator", comment.type)); + AddAttr("force_cpu", + "(bool, default false) Force fill output variable to cpu " + "memory. Otherwise, fill output variable to the running " + "device") + .SetDefault(false); AddOutput("Out", string::Sprintf( "(LoDTensor) n-dim bool tensor. Each element is %s", comment.equation)); @@ -75,7 +80,9 @@ class CompareOp : public framework::OperatorWithKernel { const framework::ExecutionContext &ctx) const override { framework::OpKernelType kt = OperatorWithKernel::GetExpectedKernelType(ctx); // CompareOp kernel's device type is decided by input tensor place - kt.place_ = ctx.Input("X")->place(); + bool force_cpu = ctx.Attr("force_cpu"); + kt.place_ = force_cpu ? platform::CPUPlace() + : ctx.Input("X")->place(); return kt; } }; diff --git a/paddle/fluid/operators/while_op.cc b/paddle/fluid/operators/while_op.cc index 8c1a2549e0390dd6e0603dd1bed429ff25ad3220..8b62b242cf8745378eb216db10605388b294ca75 100644 --- a/paddle/fluid/operators/while_op.cc +++ b/paddle/fluid/operators/while_op.cc @@ -54,6 +54,8 @@ class WhileOp : public framework::OperatorBase { auto step_scopes = scope.FindVar(Output(kStepScopes))->GetMutable(); + PADDLE_ENFORCE(platform::is_cpu_place(cond.place()), + "Condition of while op must in CPU memory."); while (cond.data()[0]) { auto ¤t_scope = scope.NewScope(); step_scopes->push_back(¤t_scope); diff --git a/python/paddle/fluid/layers/control_flow.py b/python/paddle/fluid/layers/control_flow.py index af55ef49beaeab612f17369c766385d661aa4ab7..fbfc383d118acca42e16d27a8dc962b0ba0a539b 100644 --- a/python/paddle/fluid/layers/control_flow.py +++ b/python/paddle/fluid/layers/control_flow.py @@ -18,6 +18,7 @@ from tensor import assign, fill_constant from .. import core from ..framework import Program, Variable, Operator from ..layer_helper import LayerHelper, unique_name +from ..initializer import force_init_on_cpu from ops import logical_and, logical_not, logical_or __all__ = [ @@ -949,7 +950,7 @@ def create_array(dtype): dtype=dtype) -def less_than(x, y, cond=None, **ignored): +def less_than(x, y, force_cpu=True, cond=None, **ignored): """ **Less than** @@ -958,6 +959,7 @@ def less_than(x, y, cond=None, **ignored): Args: x(Variable): First operand of *less_than* y(Variable): Second operand of *less_than* + force_cpu(Bool|True): The output data will be on CPU if set true. cond(Variable|None): Optional output variable to store the result of *less_than* Returns: @@ -974,8 +976,11 @@ def less_than(x, y, cond=None, **ignored): cond.stop_gradient = True helper.append_op( - type='less_than', inputs={'X': [x], - 'Y': [y]}, outputs={'Out': [cond]}) + type='less_than', + inputs={'X': [x], + 'Y': [y]}, + outputs={'Out': [cond]}, + attrs={'force_cpu': force_cpu or force_init_on_cpu()}) return cond @@ -1395,7 +1400,8 @@ class DynamicRNN(object): type='less_than', inputs={'X': self.step_idx, 'Y': self.max_seq_len}, - outputs={'Out': self.cond}) + outputs={'Out': self.cond}, + attrs={'force_cpu': True}) input_array = parent_block.create_var( name=unique_name.generate('dynamic_rnn_input_array'), @@ -1443,7 +1449,11 @@ class DynamicRNN(object): for new_mem, mem_array in self.mem_link: array_write(x=new_mem, i=self.step_idx, array=mem_array) - less_than(x=self.step_idx, y=self.max_seq_len, cond=self.cond) + less_than( + x=self.step_idx, + y=self.max_seq_len, + force_cpu=True, + cond=self.cond) self.status = DynamicRNN.AFTER_RNN for each_array in self.output_array: