diff --git a/paddle/fluid/framework/new_executor/data_transfer.cc b/paddle/fluid/framework/new_executor/data_transfer.cc index 581701c1e119c876464fd62c25ab2815c20cf1e9..d9dcef62237e114621ab4f0616c7cca3dcbafc7a 100644 --- a/paddle/fluid/framework/new_executor/data_transfer.cc +++ b/paddle/fluid/framework/new_executor/data_transfer.cc @@ -305,11 +305,6 @@ void ApplyDataTransform(const OpKernelType& expected_kernel_key, var_scope->GetIdByName(var_name); op_func_node->output_index[pair.first][j] = var_scope->VarId(new_var_name); - // NOTE(zhiqiu): The inplace op with `transfer` also changes - // original output after that - // so add original output as well - op_func_node->output_index[pair.first].push_back( - var_scope->VarId(var_name)); } } } diff --git a/paddle/fluid/framework/new_executor/interpretercore_util.cc b/paddle/fluid/framework/new_executor/interpretercore_util.cc index b89683dc4399167e3c702dadb1bc9e25f31c0677..07fb60bd2186c218bf544da6158562c9d04bb185 100644 --- a/paddle/fluid/framework/new_executor/interpretercore_util.cc +++ b/paddle/fluid/framework/new_executor/interpretercore_util.cc @@ -667,6 +667,23 @@ std::map> build_op_downstream_map( } } } + + // NOTE(zhiqiu): The inplace op with `transfer` also changes + // original output after that so add original output as well + // original: a->op->a + // after: a->data_transfer->a'->op->a'->transfer_back->a + // which means op writes a and a' + if (!vec_instruction[op_idx].InplaceBackMap().empty()) { + auto& m = vec_instruction[op_idx].InplaceBackMap(); + for (auto& p : m) { + auto var = p.second; + var2recent_write_op[var] = op_idx; + // var in input list and in output list, so remove it. + if (remove_duplicate.count(var) == 0) { + update_var_min_rw_op(op2dependences, &var2min_rw_op, op_idx, var); + } + } + } } return std::move(get_downstream_map(op2dependences)); } diff --git a/paddle/phi/kernels/cpu/adam_kernel.cc b/paddle/phi/kernels/cpu/adam_kernel.cc index 661d30095d2cd141f3d91274a640b3739f56b82a..1e0f5c4df9fd67bb64318a4d1a905576cf24c718 100644 --- a/paddle/phi/kernels/cpu/adam_kernel.cc +++ b/paddle/phi/kernels/cpu/adam_kernel.cc @@ -71,8 +71,8 @@ void AdamDenseKernel(const Context& dev_ctx, phi::Copy(dev_ctx, param, dev_ctx.GetPlace(), false, param_out); phi::Copy(dev_ctx, moment1, dev_ctx.GetPlace(), false, moment1_out); phi::Copy(dev_ctx, moment2, dev_ctx.GetPlace(), false, moment2_out); - phi::Copy(dev_ctx, beta1_pow, dev_ctx.GetPlace(), false, beta1_pow_out); - phi::Copy(dev_ctx, beta2_pow, dev_ctx.GetPlace(), false, beta2_pow_out); + phi::Copy(dev_ctx, beta1_pow, beta1_pow.place(), false, beta1_pow_out); + phi::Copy(dev_ctx, beta2_pow, beta2_pow.place(), false, beta2_pow_out); return; } diff --git a/paddle/phi/kernels/gpu/adam_kernel.cu b/paddle/phi/kernels/gpu/adam_kernel.cu index 68bf1757a99122b52851c21ea1531732149e9cfd..d3317e258e5382d5d2ca49916da056e8f8506527 100644 --- a/paddle/phi/kernels/gpu/adam_kernel.cu +++ b/paddle/phi/kernels/gpu/adam_kernel.cu @@ -172,8 +172,8 @@ void AdamDenseKernel(const Context& dev_ctx, phi::Copy(dev_ctx, param, dev_ctx.GetPlace(), false, param_out); phi::Copy(dev_ctx, moment1, dev_ctx.GetPlace(), false, moment1_out); phi::Copy(dev_ctx, moment2, dev_ctx.GetPlace(), false, moment2_out); - phi::Copy(dev_ctx, beta1_pow, dev_ctx.GetPlace(), false, beta1_pow_out); - phi::Copy(dev_ctx, beta2_pow, dev_ctx.GetPlace(), false, beta2_pow_out); + phi::Copy(dev_ctx, beta1_pow, beta1_pow.place(), false, beta1_pow_out); + phi::Copy(dev_ctx, beta2_pow, beta2_pow.place(), false, beta2_pow_out); return; }