提交 52574733 编写于 作者: J JiayiFeng

Add KernelType switch for IncrementOp kernel

上级 0ac43217
......@@ -33,6 +33,15 @@ class IncrementOp : public framework::OperatorWithKernel {
ctx->SetOutputDim("Out", ctx->GetInputDim("X"));
ctx->ShareLoD("X", "Out");
}
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext &ctx) const override {
framework::OpKernelType kt = OperatorWithKernel::GetExpectedKernelType(ctx);
// IncrementOp kernel's device type is decided by input tensor place
kt.place_ = ctx.Input<framework::LoDTensor>("X")->place();
return kt;
}
};
class IncrementOpMaker : public framework::OpProtoAndCheckerMaker {
......
......@@ -1362,7 +1362,8 @@ class DynamicRNN(object):
self.lod_rank_table = None
self.max_seq_len = None
self.step_idx = None
self.zero_idx = fill_constant(shape=[1], value=0, dtype='int64')
self.zero_idx = fill_constant(
shape=[1], value=0, dtype='int64', force_cpu=True)
self.mem_dict = dict()
self.output_array = []
self.outputs = []
......@@ -1439,7 +1440,8 @@ class DynamicRNN(object):
def block(self):
if self.status != DynamicRNN.BEFORE_RNN:
raise ValueError("rnn.block() can only be invoke once")
self.step_idx = fill_constant(shape=[1], dtype='int64', value=0)
self.step_idx = fill_constant(
shape=[1], dtype='int64', value=0, force_cpu=True)
self.step_idx.stop_gradient = False
self.status = DynamicRNN.IN_RNN
with self.while_op.block():
......
......@@ -3307,7 +3307,8 @@ def autoincreased_step_counter(counter_name=None, begin=1, step=1):
name=counter_name, dtype='int64', shape=[1], persistable=True)
if is_new_var:
helper.set_variable_initializer(
counter, initializer=Constant(value=begin - 1))
counter, initializer=Constant(
value=begin - 1, force_cpu=True))
helper.main_program.global_block().prepend_op(
type='increment',
inputs={'X': [counter]},
......
......@@ -83,7 +83,7 @@ def decoder_train(context, is_sparse):
def decoder_decode(context, is_sparse):
init_state = context
array_len = pd.fill_constant(shape=[1], dtype='int64', value=max_length)
counter = pd.zeros(shape=[1], dtype='int64')
counter = pd.zeros(shape=[1], dtype='int64', force_cpu=True)
# fill the first element with init_state
state_array = pd.create_array('float32')
......
......@@ -33,7 +33,8 @@ class TestProfiler(unittest.TestCase):
image = fluid.layers.data(name='x', shape=[784], dtype='float32')
hidden1 = fluid.layers.fc(input=image, size=64, act='relu')
i = layers.zeros(shape=[1], dtype='int64')
counter = fluid.layers.zeros(shape=[1], dtype='int64')
counter = fluid.layers.zeros(
shape=[1], dtype='int64', force_cpu=True)
until = layers.fill_constant([1], dtype='int64', value=10)
data_arr = layers.array_write(hidden1, i)
cond = fluid.layers.less_than(x=counter, y=until)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册