未验证 提交 8b59f5e0 编写于 作者: B Baibaifan 提交者: GitHub

mode_npu_gather_v2 (#34194)

上级 fb55e00e
...@@ -40,6 +40,9 @@ class LookupTableV2NPUKernel : public framework::OpKernel<T> { ...@@ -40,6 +40,9 @@ class LookupTableV2NPUKernel : public framework::OpKernel<T> {
platform::errors::InvalidArgument("npu only accept LoDTensor")); platform::errors::InvalidArgument("npu only accept LoDTensor"));
output_t->mutable_data<T>(ctx.GetPlace()); output_t->mutable_data<T>(ctx.GetPlace());
std::vector<int> ids;
TensorToVector(*ids_t, ctx.device_context(), &ids);
NpuOpRunner runner; NpuOpRunner runner;
runner.SetType("GatherV2") runner.SetType("GatherV2")
.AddInput(*table_t) .AddInput(*table_t)
......
...@@ -374,7 +374,7 @@ class ShardingOptimizer(MetaOptimizerBase): ...@@ -374,7 +374,7 @@ class ShardingOptimizer(MetaOptimizerBase):
'w') as f: 'w') as f:
f.writelines(str(main_block.program)) f.writelines(str(main_block.program))
if core.is_compiled_with_cuda(): # GPU and NPU need to wait server ready
self._wait() self._wait()
return optimize_ops, params_grads return optimize_ops, params_grads
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册