提交 2c29cf1e 编写于 作者: W wanghaoshuang

Use Tensor as the temp variables instead of CUDA api

上级 8d9d537b
...@@ -27,12 +27,12 @@ class CropOp : public framework::OperatorWithKernel { ...@@ -27,12 +27,12 @@ class CropOp : public framework::OperatorWithKernel {
protected: protected:
void InferShape(const framework::InferShapeContext &ctx) const override { void InferShape(const framework::InferShapeContext &ctx) const override {
auto x_dim = ctx.Input<LoDTensor>("X")->dims();
auto Y = ctx.Input<LoDTensor>("Y");
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"),
"Input(X) of CropOp should not be null."); "Input(X) of CropOp should not be null.");
PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("Out"), PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("Out"),
"Output(Out) of CropOp should not be null."); "Output(Out) of CropOp should not be null.");
auto x_dim = ctx.Input<LoDTensor>("X")->dims();
auto Y = ctx.Input<LoDTensor>("Y");
if (Y == nullptr) { if (Y == nullptr) {
auto shape = Attr<std::vector<int>>("shape"); auto shape = Attr<std::vector<int>>("shape");
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
...@@ -40,7 +40,7 @@ class CropOp : public framework::OperatorWithKernel { ...@@ -40,7 +40,7 @@ class CropOp : public framework::OperatorWithKernel {
"Shape size should be equal to dimention size of input tensor."); "Shape size should be equal to dimention size of input tensor.");
std::vector<int64_t> tensor_shape(shape.size()); std::vector<int64_t> tensor_shape(shape.size());
for (size_t i = 0; i < shape.size(); ++i) { for (size_t i = 0; i < shape.size(); ++i) {
tensor_shape[i] = (int64_t)shape[i]; tensor_shape[i] = static_cast<int64_t>(shape[i]);
} }
ctx.Output<LoDTensor>("Out")->Resize(framework::make_ddim(tensor_shape)); ctx.Output<LoDTensor>("Out")->Resize(framework::make_ddim(tensor_shape));
} else { } else {
...@@ -65,6 +65,15 @@ class CropOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -65,6 +65,15 @@ class CropOpMaker : public framework::OpProtoAndCheckerMaker {
AddOutput("Out", AddOutput("Out",
"The output of crop op " "The output of crop op "
"with the same dimension as X."); "with the same dimension as X.");
AddAttr<std::vector<int>>("offsets",
"A list<int> describing offsets to be cropped."
"The size of offsets list should be as same as "
"dimension size of input X.");
AddAttr<std::vector<int>>("shape",
"A list<int> describing the shape of output."
"The size of shape list should be as same as "
"dimension size of input X.")
.SetDefault(std::vector<int>());
AddComment(R"DOC( AddComment(R"DOC(
Crop Operator. Crop Operator.
Crop input into output, as specified by offsets and shape. Crop input into output, as specified by offsets and shape.
...@@ -81,33 +90,24 @@ The input should be a k-D tensor(k > 0 and k < 7). As an example: ...@@ -81,33 +90,24 @@ The input should be a k-D tensor(k > 0 and k < 7). As an example:
Given: Given:
X = [[0, 1, 2, 0, 0] X = [[0, 1, 2, 0, 0]
[0, 3, 4, 0, 0] [0, 3, 4, 0, 0]
[0, 0, 0, 0, 0]] [0, 0, 0, 0, 0]]
and and
offsets = [0, 1] offsets = [0, 1]
and and
shape = [2, 2] shape = [2, 2]
then we get then we get
Out = [[1, 2], Out = [[1, 2],
[3, 4]] [3, 4]]
)DOC"); )DOC");
AddAttr<std::vector<int>>("offsets",
"A list<int> describing offsets to be cropped."
"The size of offsets list should be as same as "
"dimension size of input X.");
AddAttr<std::vector<int>>("shape",
"A list<int> describing the shape of output."
"The size of shape list should be as same as "
"dimension size of input X.")
.SetDefault(std::vector<int>());
} }
}; };
...@@ -149,17 +149,17 @@ template <typename T> ...@@ -149,17 +149,17 @@ template <typename T>
class CropCPUKernel : public framework::OpKernel { class CropCPUKernel : public framework::OpKernel {
public: public:
void Compute(const framework::ExecutionContext &context) const override { void Compute(const framework::ExecutionContext &context) const override {
auto *x = context.Input<LoDTensor>("X"); auto *x = context.Input<Tensor>("X");
auto *out = context.Output<LoDTensor>("Out"); auto *out = context.Output<Tensor>("Out");
auto x_data = x->data<T>(); auto x_data = x->data<T>();
T *out_data = out->mutable_data<T>(context.GetPlace()); T *out_data = out->mutable_data<T>(context.GetPlace());
auto x_dims = x->dims(); auto x_dims = x->dims();
auto out_dims = out->dims(); auto out_dims = out->dims();
int64_t out_count = framework::product(out_dims); int64_t out_count = out->numel();
std::vector<int64_t> x_shape = framework::vectorize(x_dims); std::vector<int64_t> x_shape = framework::vectorize(x_dims);
std::vector<int64_t> out_shape = framework::vectorize(out_dims); std::vector<int64_t> out_shape = framework::vectorize(out_dims);
auto offsets = context.op().Attr<std::vector<int>>("offsets"); auto offsets = context.Attr<std::vector<int>>("offsets");
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
x_dims.size(), offsets.size(), x_dims.size(), offsets.size(),
"Offsets size should be equal to dimension size of input tensor."); "Offsets size should be equal to dimension size of input tensor.");
......
...@@ -20,6 +20,7 @@ namespace paddle { ...@@ -20,6 +20,7 @@ namespace paddle {
namespace operators { namespace operators {
using framework::LoDTensor; using framework::LoDTensor;
using framework::Tensor;
template <typename T, int D> template <typename T, int D>
__global__ void CropKernel(const int N, const int64_t* out_shape, __global__ void CropKernel(const int N, const int64_t* out_shape,
...@@ -54,35 +55,36 @@ void CropCUDAFunctoin(const framework::ExecutionContext& context) { ...@@ -54,35 +55,36 @@ void CropCUDAFunctoin(const framework::ExecutionContext& context) {
T* out_data = out->mutable_data<T>(paddle::platform::GPUPlace()); T* out_data = out->mutable_data<T>(paddle::platform::GPUPlace());
auto x_dims = x->dims(); auto x_dims = x->dims();
auto out_dims = out->dims(); auto out_dims = out->dims();
int64_t out_count = framework::product(out_dims); int64_t out_count = out->numel();
int64_t x_shape[D]; Tensor x_shape;
int64_t out_shape[D]; Tensor out_shape;
int64_t* x_shape_data =
x_shape.mutable_data<int64_t>({D}, paddle::platform::CPUPlace());
int64_t* out_shape_data =
out_shape.mutable_data<int64_t>({D}, paddle::platform::CPUPlace());
for (int i = 0; i < D; ++i) { for (int i = 0; i < D; ++i) {
x_shape[i] = x_dims[i]; x_shape_data[i] = x_dims[i];
out_shape[i] = out_dims[i]; out_shape_data[i] = out_dims[i];
} }
int64_t* x_shape_gpu; Tensor x_shape_gpu;
int64_t* out_shape_gpu; Tensor out_shape_gpu;
cudaMalloc((void**)&x_shape_gpu, sizeof(int64_t) * D); x_shape_gpu.CopyFrom<int64_t>(x_shape, paddle::platform::GPUPlace());
cudaMemcpy(x_shape_gpu, x_shape, sizeof(int64_t) * D, cudaMemcpyHostToDevice); out_shape_gpu.CopyFrom<int64_t>(out_shape, paddle::platform::GPUPlace());
cudaMalloc((void**)&out_shape_gpu, sizeof(int64_t) * D);
cudaMemcpy(out_shape_gpu, out_shape, sizeof(int64_t) * D,
cudaMemcpyHostToDevice);
auto offsets = context.op().Attr<std::vector<int>>("offsets"); auto offsets = context.op().Attr<std::vector<int>>("offsets");
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
D, offsets.size(), D, offsets.size(),
"Offsets size should be equal to dimension size of input tensor."); "Offsets size should be equal to dimension size of input tensor.");
int crop_rules[D * 2]; Tensor crop_rules;
for (size_t i = 0; i < x_dims.size(); ++i) { int* crop_rules_data =
crop_rules[i * 2] = offsets[i]; crop_rules.mutable_data<int>({D * 2}, paddle::platform::CPUPlace());
crop_rules[i * 2 + 1] = x_dims[i] - out_dims[i] - offsets[i]; for (size_t i = 0; i < D; ++i) {
crop_rules_data[i * 2] = offsets[i];
crop_rules_data[i * 2 + 1] = x_dims[i] - out_dims[i] - offsets[i];
} }
int* crop_rules_gpu; Tensor crop_rules_gpu;
cudaMalloc((void**)&crop_rules_gpu, sizeof(int) * D * 2); crop_rules_gpu.CopyFrom<int>(crop_rules, paddle::platform::GPUPlace());
cudaMemcpy(crop_rules_gpu, crop_rules, sizeof(int) * D * 2,
cudaMemcpyHostToDevice);
int n = out_dims[0]; int n = out_dims[0];
int d = out_dims[1]; int d = out_dims[1];
...@@ -94,11 +96,9 @@ void CropCUDAFunctoin(const framework::ExecutionContext& context) { ...@@ -94,11 +96,9 @@ void CropCUDAFunctoin(const framework::ExecutionContext& context) {
CropKernel<T, CropKernel<T,
D><<<grid, block, 0, D><<<grid, block, 0,
reinterpret_cast<platform::CUDADeviceContext*>(device_context) reinterpret_cast<platform::CUDADeviceContext*>(device_context)
->stream()>>>(out_count, out_shape_gpu, x_shape_gpu, ->stream()>>>(
crop_rules_gpu, x_data, out_data); out_count, out_shape_gpu.data<int64_t>(), x_shape_gpu.data<int64_t>(),
cudaFree(crop_rules_gpu); crop_rules_gpu.data<int>(), x_data, out_data);
cudaFree(x_shape_gpu);
cudaFree(out_shape_gpu);
} }
template <typename T> template <typename T>
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册