diff --git a/paddle/fluid/operators/array_operator.h b/paddle/fluid/operators/array_operator.h index 1beff472ecaf75e531e9ca8874d45b9379ce39d7..44063f233caf80455f6ef76c3939412bb2c4bd48 100644 --- a/paddle/fluid/operators/array_operator.h +++ b/paddle/fluid/operators/array_operator.h @@ -20,6 +20,7 @@ limitations under the License. */ namespace paddle { namespace operators { + class ArrayOp : public framework::OperatorBase { public: ArrayOp(const std::string &type, const framework::VariableNameMap &inputs, @@ -45,7 +46,8 @@ class ArrayOp : public framework::OperatorBase { auto &dev_ctx = *pool.Get(place); size_t offset; - if (platform::is_gpu_place(i_tensor.place())) { + if (platform::is_gpu_place(i_tensor.place()) || + platform::is_xpu_place(i_tensor.place())) { // FIXME: Avoid copy from GPU to CPU framework::Tensor t; framework::TensorCopy(i_tensor, platform::CPUPlace(), dev_ctx, &t); diff --git a/paddle/fluid/operators/concat_op_xpu.cc b/paddle/fluid/operators/concat_op_xpu.cc index 4ebe92801e623aef0ca4e90927e2b2d0fce4d9e7..aa0002cc6d1777dab6e598fc7c123e5255d0f094 100644 --- a/paddle/fluid/operators/concat_op_xpu.cc +++ b/paddle/fluid/operators/concat_op_xpu.cc @@ -47,19 +47,6 @@ class ConcatXPUKernel : public framework::OpKernel { "size is %d.", axis, ins[0]->dims().size())); - auto place = ctx.GetPlace(); - out->mutable_data(place); - std::vector choose_idx; - int n = 0; - for (unsigned int i = 0; i < ins.size(); ++i) { - if (ins[i] && ins[i]->numel() > 0) { - choose_idx.push_back(i); - n++; - } - } - PADDLE_ENFORCE_GT( - n, 0, platform::errors::InvalidArgument("No tensor need concat?")); - // If axis is 0, the lod of the output is not the same as inputs. if (axis == 0 && ins[0]->lod().size() > 0) { size_t lod_size_0 = ins[0]->lod().size(); @@ -87,30 +74,32 @@ class ConcatXPUKernel : public framework::OpKernel { } } } - - auto input_dims = ins[0]->dims(); - std::vector> xdims_list(n); - for (int i = 0; i < n; ++i) { - std::vector tmp_dims(input_dims.size()); - for (int j = 0; j < input_dims.size(); ++j) { - tmp_dims[j] = ins[i]->dims()[j]; + auto place = ctx.GetPlace(); + out->mutable_data(place); + std::vector> xdims_list; + std::vector ptrs; + for (unsigned int i = 0; i < ins.size(); ++i) { + if (ins[i] && ins[i]->numel() > 0) { + ptrs.push_back(ins[i]->data()); + int size = ins[i]->dims().size(); + std::vector tmp_dims(size); + for (int j = 0; j < size; ++j) { + tmp_dims[j] = ins[i]->dims()[j]; + } + xdims_list.push_back(tmp_dims); } - xdims_list[i] = tmp_dims; } + PADDLE_ENFORCE_GT(xdims_list.size(), 0, platform::errors::InvalidArgument( + "No tensor need concat")); auto& dev_ctx = ctx.template device_context(); - std::vector ptrs; - for (int i = 0; i < n; ++i) { - ptrs.push_back(ins[choose_idx[i]]->data()); - } + int r = xpu::concat(dev_ctx.x_context(), ptrs, out->data(), xdims_list, axis); - PADDLE_ENFORCE_EQ( - r, XPU_SUCCESS, - platform::errors::External( - "XPU API return wrong value[%d], please check whether " - "Baidu Kunlun Card is properly installed.", - r)); + PADDLE_ENFORCE_EQ(r, XPU_SUCCESS, + platform::errors::External( + "XPU concat kernel return wrong value[%d %s]", r, + XPUAPIErrorMsg[r])); } }; diff --git a/paddle/fluid/operators/reshape_op.cc b/paddle/fluid/operators/reshape_op.cc index 7b93ea15de3da3c0b5d6f0a93b1e96dbbc75fd2f..41f631f5547369a491e886434b243336fc57b0b4 100644 --- a/paddle/fluid/operators/reshape_op.cc +++ b/paddle/fluid/operators/reshape_op.cc @@ -380,11 +380,20 @@ class ReshapeKernel { #ifdef PADDLE_WITH_XPU if (platform::is_xpu_place(ctx.GetPlace())) { - auto &dev_ctx = - ctx.template device_context(); - xpu::memcpy_device( - dev_ctx.x_context(), out->data(), in->data(), - in->numel() * paddle::framework::SizeOfType(in->type())); + void *out_ptr = out->data(); + const void *in_ptr = in->data(); + if ((out_ptr != nullptr) && (in_ptr != nullptr) && + (paddle::framework::SizeOfType(in->type()) > 0)) { + auto &dev_ctx = + ctx.template device_context(); + int r = xpu::memcpy_device( + dev_ctx.x_context(), out_ptr, in_ptr, + in->numel() * paddle::framework::SizeOfType(in->type())); + PADDLE_ENFORCE_EQ(r, XPU_SUCCESS, + platform::errors::External( + "XPU memcpy_device return wrong value[%d %s]", r, + XPUAPIErrorMsg[r])); + } } else { #endif framework::TensorCopy(