提交 47269273 编写于 作者: P peterzhang2029

refine memory transform

上级 f5cb52ca
...@@ -34,34 +34,34 @@ class BilinearTensorProductOp : public framework::OperatorWithKernel { ...@@ -34,34 +34,34 @@ class BilinearTensorProductOp : public framework::OperatorWithKernel {
auto y_dims = ctx->GetInputDim("Y"); auto y_dims = ctx->GetInputDim("Y");
auto weight_dims = ctx->GetInputDim("Weight"); auto weight_dims = ctx->GetInputDim("Weight");
PADDLE_ENFORCE_EQ(x_dims.size(), 2, "The input X must be a 2D Tensor."); PADDLE_ENFORCE_EQ(x_dims.size(), 2UL, "The input X must be a 2D Tensor.");
PADDLE_ENFORCE_EQ(y_dims.size(), 2, "The input Y must be a 2D Tensor."); PADDLE_ENFORCE_EQ(y_dims.size(), 2UL, "The input Y must be a 2D Tensor.");
PADDLE_ENFORCE_EQ(weight_dims.size(), 3, PADDLE_ENFORCE_EQ(weight_dims.size(), 3UL,
"The input Weight must be a 3D tensor."); "The input Weight must be a 3D tensor.");
PADDLE_ENFORCE_GT(weight_dims[0], 0, PADDLE_ENFORCE(weight_dims[0],
"The first dimension of Weight must be larger than 0."); "The first dimension of Weight must be larger than 0.");
PADDLE_ENFORCE_GT(weight_dims[1], 0, PADDLE_ENFORCE(weight_dims[1],
"The second dimension of Weight must be larger than 0."); "The second dimension of Weight must be larger than 0.");
PADDLE_ENFORCE_GT(weight_dims[2], 0, PADDLE_ENFORCE(weight_dims[2],
"The third dimension of Weight must be larger than 0."); "The third dimension of Weight must be larger than 0.");
PADDLE_ENFORCE_EQ(x_dims[0], y_dims[0], PADDLE_ENFORCE_EQ(x_dims[0], y_dims[0],
"The first dimension(batch_size) of X must be " "The first dimension(batch_size) of X must be "
"equal with the first dimension of the Y."); "equal to the first dimension of the Y.");
PADDLE_ENFORCE_EQ(x_dims[1], weight_dims[1], PADDLE_ENFORCE_EQ(x_dims[1], weight_dims[1],
"The second dimension of X must be equal with the second " "The second dimension of X must be equal to the second "
"dimension of the Weight."); "dimension of the Weight.");
PADDLE_ENFORCE_EQ(y_dims[1], weight_dims[2], PADDLE_ENFORCE_EQ(y_dims[1], weight_dims[2],
"The second dimension of Y must be equal with the third " "The second dimension of Y must be equal to the third "
"dimension of the Weight."); "dimension of the Weight.");
if (ctx->HasInput("Bias")) { if (ctx->HasInput("Bias")) {
auto bias_dims = ctx->GetInputDim("Bias"); auto bias_dims = ctx->GetInputDim("Bias");
PADDLE_ENFORCE_EQ(bias_dims.size(), 2, PADDLE_ENFORCE_EQ(bias_dims.size(), 2UL,
"The input Bias must have 2 dimensions."); "The input Bias must have 2 dimensions.");
PADDLE_ENFORCE_EQ(bias_dims[0], 1, PADDLE_ENFORCE_EQ(bias_dims[0], 1UL,
"The first dimention of input Bias must be 1."); "The first dimention of input Bias must be 1.");
PADDLE_ENFORCE_EQ(bias_dims[1], weight_dims[0], PADDLE_ENFORCE_EQ(bias_dims[1], weight_dims[0],
"The second dimension of Bias must be equal with the " "The second dimension of Bias must be equal to the "
"first dimension of the Weight."); "first dimension of the Weight.");
} }
...@@ -75,12 +75,12 @@ class BilinearTensorProductOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -75,12 +75,12 @@ class BilinearTensorProductOpMaker : public framework::OpProtoAndCheckerMaker {
BilinearTensorProductOpMaker(framework::OpProto* proto, BilinearTensorProductOpMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker) framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The first input of BilinearTensorProduct op"); AddInput("X", "The first input of BilinearTensorProduct op.");
AddInput("Y", "The second input of BilinearTensorProduct op"); AddInput("Y", "The second input of BilinearTensorProduct op.");
AddInput("Weight", "The input weight of BilinearTensorProduct op"); AddInput("Weight", "The input weight of BilinearTensorProduct op.");
AddInput("Bias", "The input bias of BilinearTensorProduct op") AddInput("Bias", "The input bias of BilinearTensorProduct op.")
.AsDispensable(); .AsDispensable();
AddOutput("Out", "The output of BilinearTensorProduct op"); AddOutput("Out", "The output of BilinearTensorProduct op.");
AddComment(R"DOC( AddComment(R"DOC(
Bilinear Tensor Product operator. Bilinear Tensor Product operator.
Given input X and Y, a 3D tensor weight, and bias. Each column of the Given input X and Y, a 3D tensor weight, and bias. Each column of the
...@@ -99,30 +99,32 @@ class BilinearTensorProductOpGrad : public framework::OperatorWithKernel { ...@@ -99,30 +99,32 @@ class BilinearTensorProductOpGrad : public framework::OperatorWithKernel {
protected: protected:
void InferShape(framework::InferShapeContext* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null"); PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Y"), "Input(Y) should not be null"); PADDLE_ENFORCE(ctx->HasInput("Y"), "Input(Y) should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Weight"), "Input(Weight) should not be null"); PADDLE_ENFORCE(ctx->HasInput("Weight"),
"Input(Weight) should not be null.");
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")), PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
"Input (Out@GRAD) should not be null"); "Input (Out@GRAD) should not be null.");
auto x_dims = ctx->GetInputDim("X"); auto x_dims = ctx->GetInputDim("X");
auto y_dims = ctx->GetInputDim("Y"); auto y_dims = ctx->GetInputDim("Y");
auto weight_dims = ctx->GetInputDim("Weight"); auto weight_dims = ctx->GetInputDim("Weight");
auto out_dims = ctx->GetInputDim(framework::GradVarName("Out")); auto out_dims = ctx->GetInputDim(framework::GradVarName("Out"));
PADDLE_ENFORCE_EQ(out_dims.size(), 2, "The Out@GRAD must be a 2D Tensor."); PADDLE_ENFORCE_EQ(out_dims.size(), 2UL,
"The Out@GRAD must be a 2D Tensor.");
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
x_dims[0], out_dims[0], x_dims[0], out_dims[0],
"The first dimension(batch_size) of Out@GRAD must be equal with " "The first dimension(batch_size) of Out@GRAD must be equal to "
"the first dimension of the X."); "the first dimension of the Input(X).");
PADDLE_ENFORCE_EQ(weight_dims[0], out_dims[1], PADDLE_ENFORCE_EQ(weight_dims[0], out_dims[1],
"The second dimension of Out@GRAD must be equal with " "The second dimension of Out@GRAD must be equal to "
"the third dimension of the Weight."); "the third dimension of the Input(Weight).");
if (ctx->HasInput("Bias")) { if (ctx->HasInput("Bias")) {
auto bias_dims = ctx->GetInputDim("Bias"); auto bias_dims = ctx->GetInputDim("Bias");
PADDLE_ENFORCE_EQ(bias_dims[1], out_dims[1], PADDLE_ENFORCE_EQ(bias_dims[1], out_dims[1],
"The second dimension of Bias must be equal with " "The second dimension of Out@GRAD must be equal to "
"the second dimension of the Out@GRAD."); "the second dimension of the Input(Bias).");
auto bias_grad_name = framework::GradVarName("Bias"); auto bias_grad_name = framework::GradVarName("Bias");
if (ctx->HasOutput(bias_grad_name)) if (ctx->HasOutput(bias_grad_name))
ctx->SetOutputDim(bias_grad_name, bias_dims); ctx->SetOutputDim(bias_grad_name, bias_dims);
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License"); Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.
You may obtain a copy of the License at You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0 http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS, distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#define EIGEN_USE_GPU #define EIGEN_USE_GPU
#include "paddle/operators/bilinear_tensor_product_op.h" #include "paddle/operators/bilinear_tensor_product_op.h"
namespace paddle {
namespace operators {
template <typename Place, typename T>
class BilinearTensorProductCUDAKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* x = ctx.Input<Tensor>("X");
auto* y = ctx.Input<Tensor>("Y");
auto* weight = ctx.Input<Tensor>("Weight");
auto* bias = ctx.Input<Tensor>("Bias");
auto* out = ctx.Output<Tensor>("Out");
out->mutable_data<T>(ctx.GetPlace());
auto y_mat = EigenMatrix<T>::From(*y);
auto batch_size = x->dims()[0];
auto weight_dims = weight->dims();
auto place = ctx.GetEigenDevice<Place>();
auto cpu_place = ctx.GetEigenDevice<platform::CPUPlace>();
// Copy the output to cpu.
Tensor output_cpu;
output_cpu.CopyFrom(*out, platform::CPUPlace(), ctx.device_context());
auto* output_cpu_ptr = output_cpu.data<T>();
auto output_cpu_mat = EigenMatrix<T>::From(output_cpu);
// Create the temporary variables.
Tensor left_mul;
left_mul.mutable_data<T>(framework::make_ddim({batch_size, weight_dims[2]}),
ctx.GetPlace());
auto left_mul_mat = EigenMatrix<T>::From(left_mul);
Tensor output_col;
output_col.mutable_data<T>(framework::make_ddim({batch_size}),
ctx.GetPlace());
auto output_col_vec = EigenVector<T>::From(output_col);
for (size_t i = 0; i < weight_dims[0]; ++i) {
Tensor weight_mat = weight->Slice(i, i + 1).Resize(
framework::make_ddim({weight_dims[1], weight_dims[2]}));
math::gemm<Place, T>(ctx.device_context(), CblasNoTrans, CblasNoTrans,
batch_size, weight_dims[2], weight_dims[1], 1,
x->data<T>(), weight_mat.data<T>(), 0,
left_mul.data<T>());
output_col_vec.device(place) =
(left_mul_mat * y_mat).sum(Eigen::DSizes<int, 1>(1));
// Copy the output_col to cpu.
Tensor output_col_cpu;
output_col_cpu.CopyFrom(output_col, platform::CPUPlace(),
ctx.device_context());
auto* output_col_ptr = output_col_cpu.data<T>();
for (size_t j = 0; j < batch_size; ++j) {
output_cpu_ptr[i + j * weight_dims[0]] = output_col_ptr[j];
}
}
if (bias) {
// Copy the bias to cpu.
Tensor bias_cpu;
bias_cpu.CopyFrom(*bias, platform::CPUPlace(), ctx.device_context());
auto bias_vec = EigenMatrix<T>::From(bias_cpu);
Eigen::DSizes<int, 2> bcast(batch_size, 1);
output_cpu_mat.device(cpu_place) =
bias_vec.broadcast(bcast) + output_cpu_mat;
}
// Copy the output to gpu.
out->CopyFrom(output_cpu, platform::GPUPlace(), ctx.device_context());
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL( REGISTER_OP_GPU_KERNEL(
bilinear_tensor_product, bilinear_tensor_product,
ops::BilinearTensorProductCUDAKernel<paddle::platform::GPUPlace, float>); ops::BilinearTensorProductKernel<paddle::platform::GPUPlace, float>);
REGISTER_OP_GPU_KERNEL( REGISTER_OP_GPU_KERNEL(
bilinear_tensor_product_grad, bilinear_tensor_product_grad,
ops::BilinearTensorProductGradKernel<paddle::platform::GPUPlace, float>); ops::BilinearTensorProductGradKernel<paddle::platform::GPUPlace, float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License"); Licensed under the Apache License, Version 2.0 (the "License");
You may not use this file except in compliance with the License. you may not use this file except in compliance with the License.
You may obtain a copy of the License at You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0 http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS, distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
...@@ -21,7 +21,7 @@ ...@@ -21,7 +21,7 @@
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = framework::Tensor; using framework::Tensor;
template <typename T, int MajorType = Eigen::RowMajor, template <typename T, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex> typename IndexType = Eigen::DenseIndex>
...@@ -49,34 +49,27 @@ class BilinearTensorProductKernel : public framework::OpKernel<T> { ...@@ -49,34 +49,27 @@ class BilinearTensorProductKernel : public framework::OpKernel<T> {
auto weight_dims = weight->dims(); auto weight_dims = weight->dims();
auto place = ctx.GetEigenDevice<Place>(); auto place = ctx.GetEigenDevice<Place>();
// Create the temporary variables. // Create the intermediate variables.
Tensor left_mul; Tensor left_mul;
left_mul.mutable_data<T>(framework::make_ddim({batch_size, weight_dims[2]}), left_mul.mutable_data<T>(framework::make_ddim({batch_size, weight_dims[2]}),
ctx.GetPlace()); ctx.GetPlace());
auto left_mul_mat = EigenMatrix<T>::From(left_mul); auto left_mul_mat = EigenMatrix<T>::From(left_mul);
Tensor output_col;
output_col.mutable_data<T>(framework::make_ddim({weight_dims[0]}),
ctx.GetPlace());
auto output_col_vec = EigenVector<T>::From(output_col);
for (size_t i = 0; i < weight_dims[0]; ++i) { for (size_t i = 0; i < weight_dims[0]; ++i) {
auto output_col_vec = output_mat.chip(i, 1);
Tensor weight_mat = weight->Slice(i, i + 1).Resize( Tensor weight_mat = weight->Slice(i, i + 1).Resize(
framework::make_ddim({weight_dims[1], weight_dims[2]})); framework::make_ddim({weight_dims[1], weight_dims[2]}));
math::gemm<Place, T>(ctx.device_context(), CblasNoTrans, CblasNoTrans, math::gemm<Place, T>(ctx.device_context(), CblasNoTrans, CblasNoTrans,
batch_size, weight_dims[2], weight_dims[1], 1, batch_size, weight_dims[2], weight_dims[1], 1,
x->data<T>(), weight_mat.data<T>(), 0, x->data<T>(), weight_mat.data<T>(), 0,
left_mul.data<T>()); left_mul.data<T>());
output_col_vec = (left_mul_mat * y_mat).sum(Eigen::DSizes<int, 1>(1)); output_col_vec.device(place) =
for (size_t j = 0; j < batch_size; ++j) { (left_mul_mat * y_mat).sum(Eigen::DSizes<int, 1>(1));
output_mat(j, i) = output_col_vec(j);
}
} }
if (bias) { if (bias) {
auto bias_vec = EigenMatrix<T>::From(*bias); auto bias_vec = EigenMatrix<T>::From(*bias);
Eigen::DSizes<int, 2> bcast(batch_size, 1); Eigen::DSizes<int, 2> bcast(batch_size, 1);
output_mat.device(place) = bias_vec.broadcast(bcast) + output_mat; output_mat.device(place) = bias_vec.broadcast(bcast) + output_mat;
} else {
output_mat.device(place) = output_mat;
} }
} }
}; };
...@@ -102,7 +95,7 @@ class BilinearTensorProductGradKernel : public framework::OpKernel<T> { ...@@ -102,7 +95,7 @@ class BilinearTensorProductGradKernel : public framework::OpKernel<T> {
auto d_out_mat = EigenMatrix<T>::From(*d_out); auto d_out_mat = EigenMatrix<T>::From(*d_out);
auto place = ctx.GetEigenDevice<Place>(); auto place = ctx.GetEigenDevice<Place>();
// Create the temporary variables for gradient. // Create the intermediate variables for gradient.
Tensor x_scale; Tensor x_scale;
x_scale.mutable_data<T>(framework::make_ddim({batch_size, weight_dims[1]}), x_scale.mutable_data<T>(framework::make_ddim({batch_size, weight_dims[1]}),
ctx.GetPlace()); ctx.GetPlace());
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册