未验证 提交 342252c9 编写于 作者: L Leo Chen 提交者: GitHub

[NPU] change transpose to transpose2 (#31734)

* change transpose to transpose2

* fix bug
上级 7b450e78
......@@ -9,75 +9,73 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_ASCEND_CL
#include <iostream>
#include <memory>
#include <string>
#include <iostream>
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/expand_op.h"
#include "paddle/fluid/operators/npu_op_runner.h"
namespace paddle {
namespace operators {
template <typename DeviceContext, typename T>
class TransposeNPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* x = ctx.Input<framework::LoDTensor>("X");
auto* out = ctx.Output<framework::LoDTensor>("Out");
std::vector<int> axis = ctx.Attr<std::vector<int>>("axis");
framework::NPUAttributeMap attr_input = {{"perm", axis}};
out->mutable_data<T>(ctx.device_context().GetPlace());
auto runner = NpuOpRunner("TransposeD", {*x}, {*out}, attr_input);
auto stream = ctx.template device_context<paddle::platform::NPUDeviceContext>().stream();
runner.Run(stream);
}
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* x = ctx.Input<framework::LoDTensor>("X");
auto* out = ctx.Output<framework::LoDTensor>("Out");
std::vector<int> axis = ctx.Attr<std::vector<int>>("axis");
framework::NPUAttributeMap attr_input = {{"perm", axis}};
out->mutable_data<T>(ctx.device_context().GetPlace());
auto runner = NpuOpRunner("TransposeD", {*x}, {*out}, attr_input);
auto stream =
ctx.template device_context<paddle::platform::NPUDeviceContext>()
.stream();
runner.Run(stream);
}
};
template <typename T>
class TransposeGradNPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
auto* out_grad = ctx.Input<framework::LoDTensor>(framework::GradVarName("Out"));
auto* x_grad = ctx.Output<framework::LoDTensor>(framework::GradVarName("X"));
void Compute(const framework::ExecutionContext& ctx) const override {
auto* out_grad =
ctx.Input<framework::LoDTensor>(framework::GradVarName("Out"));
auto* x_grad =
ctx.Output<framework::LoDTensor>(framework::GradVarName("X"));
std::vector<int> axis = ctx.Attr<std::vector<int>>("axis");
std::vector<int> reversed_axis(axis);
for (size_t i = 0; i < axis.size(); i++) {
reversed_axis[axis[i]] = i;
}
x_grad->mutable_data<T>(ctx.GetPlace());
framework::NPUAttributeMap attr_input = {{"perm", reversed_axis}};
auto runner = NpuOpRunner("TransposeD", {*out_grad}, {*x_grad}, attr_input);
auto stream = ctx.template device_context<paddle::platform::NPUDeviceContext>().stream();
auto stream =
ctx.template device_context<paddle::platform::NPUDeviceContext>()
.stream();
runner.Run(stream);
}
};
}
}
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_NPU_KERNEL(transpose,
REGISTER_OP_NPU_KERNEL(
transpose2,
ops::TransposeNPUKernel<paddle::platform::NPUDeviceContext, float>,
ops::TransposeNPUKernel<paddle::platform::NPUDeviceContext, paddle::platform::float16>,
ops::TransposeNPUKernel<paddle::platform::NPUDeviceContext,
paddle::platform::float16>,
ops::TransposeNPUKernel<paddle::platform::NPUDeviceContext, int>,
ops::TransposeNPUKernel<paddle::platform::NPUDeviceContext, uint8_t>,
ops::TransposeNPUKernel<paddle::platform::NPUDeviceContext, int8_t>
);
REGISTER_OP_NPU_KERNEL(transpose_grad,
ops::TransposeGradNPUKernel<float>,
ops::TransposeGradNPUKernel<paddle::platform::float16>,
ops::TransposeGradNPUKernel<int>,
ops::TransposeGradNPUKernel<uint8_t>,
ops::TransposeGradNPUKernel<int8_t>
);
#endif
ops::TransposeNPUKernel<paddle::platform::NPUDeviceContext, int8_t>);
REGISTER_OP_NPU_KERNEL(transpose2_grad, ops::TransposeGradNPUKernel<float>,
ops::TransposeGradNPUKernel<paddle::platform::float16>,
ops::TransposeGradNPUKernel<int>,
ops::TransposeGradNPUKernel<uint8_t>,
ops::TransposeGradNPUKernel<int8_t>);
......@@ -13,12 +13,12 @@ limitations under the License. */
#include <unistd.h>
#endif
#include <string>
#include <cmath>
#include <iostream>
#include <numeric>
#include <string>
#include <thread> // NOLINT
#include <vector>
#include <numeric>
#include <iostream>
#include "gtest/gtest.h"
#include "paddle/fluid/framework/op_registry.h"
......@@ -32,17 +32,18 @@ namespace f = paddle::framework;
namespace p = paddle::platform;
namespace m = paddle::operators::math;
USE_OP(transpose);
USE_OP_DEVICE_KERNEL(transpose, NPU);
USE_OP(transpose2);
USE_OP_DEVICE_KERNEL(transpose2, NPU);
template <typename T>
void Compare(f::Scope* scope, const p::DeviceContext& ctx) {
// init
// init
auto x = scope->Var("X");
auto out = scope->Var("Out");
auto xshape = scope->Var("XShape");
auto* x_t = x->GetMutable<f::LoDTensor>();
auto* out_t = out->GetMutable<f::LoDTensor>();
auto* xshape_t = xshape->GetMutable<f::LoDTensor>();
auto place = ctx.GetPlace();
int dim0 = 2;
......@@ -54,12 +55,13 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) {
ctx.Wait();
out_t->mutable_data<T>(place);
ctx.Wait();
f::AttributeMap attrs = {
{"axis", std::vector<int>({1, 0})},
{"data_format", std::string("AnyLayout")}
};
auto op = f::OpRegistry::CreateOp("transpose", {{"X", {"X"}}},
{{"Out", {"Out"}}}, attrs);
xshape_t->Resize({dim0, dim1});
xshape_t->mutable_data<T>(place);
f::AttributeMap attrs = {{"axis", std::vector<int>({1, 0})},
{"data_format", std::string("AnyLayout")}};
auto op = f::OpRegistry::CreateOp("transpose2", {{"X", {"X"}}},
{{"Out", {"Out"}}, {"XShape", {"XShape"}}},
attrs);
ctx.Wait();
op->Run(*scope, place);
ctx.Wait();
......@@ -76,47 +78,42 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) {
EXPECT_EQ(out_v[5], 5);
}
template <typename T>
void CompareGrad(f::Scope* scope, const p::DeviceContext& ctx) {
// init
auto x = scope->Var("X");
// init
auto xshape = scope->Var("XShape");
auto x_grad = scope->Var("X@GRAD");
auto out = scope->Var("Out");
auto out_grad = scope->Var("Out@GRAD");
auto* x_grad_t = x_grad->GetMutable<f::LoDTensor>();
auto* x_t = x->GetMutable<f::LoDTensor>();
auto* xshape_t = xshape->GetMutable<f::LoDTensor>();
auto* out_grad_t = out_grad->GetMutable<f::LoDTensor>();
auto* out_t = out->GetMutable<f::LoDTensor>();
int dim0 = 2;
int dim1 = 3;
auto place = ctx.GetPlace();
TensorFromVector(std::vector<T>({0, 1, 2, 3, 4, 5}), ctx, out_grad_t);
TensorFromVector(std::vector<T>({0, 1, 2, 3, 4, 5}), ctx, x_t);
ctx.Wait();
x_grad_t->Resize({dim0, dim1});
x_t->Resize({dim0, dim1});
xshape_t->Resize(
{0, dim0,
dim1}); // NOTE(zhiqiu): 0 is needed, see its infershape function
out_grad_t->Resize({dim0, dim1});
out_t->Resize({dim0, dim1});
x_grad_t->mutable_data<T>(place);
out_t->mutable_data<T>(place);
ctx.Wait();
f::AttributeMap attrs = {
{"axis", std::vector<int>({1, 0})},
{"data_format", std::string("AnyLayout")}
};
f::AttributeMap attrs = {{"axis", std::vector<int>({1, 0})},
{"data_format", std::string("AnyLayout")}};
auto op = f::OpRegistry::CreateOp(
"transpose_grad",
{{"Out@GRAD", {"Out@GRAD"}}, {"X", {"X"}}, {"Out", {"Out"}}},
"transpose2_grad", {{"Out@GRAD", {"Out@GRAD"}}, {"XShape", {"XShape"}}},
{{"X@GRAD", {"X@GRAD"}}}, attrs);
op->Run(*scope, place);
ctx.Wait();
ctx.Wait();
std::vector<T> out_v;
TensorToVector(*x_grad_t, ctx, &out_v);
ctx.Wait();
ctx.Wait();
EXPECT_EQ(x_grad_t->numel(), dim0 * dim1);
EXPECT_EQ(out_v[0], 0);
......@@ -125,19 +122,16 @@ void CompareGrad(f::Scope* scope, const p::DeviceContext& ctx) {
EXPECT_EQ(out_v[3], 4);
EXPECT_EQ(out_v[4], 2);
EXPECT_EQ(out_v[5], 5);
}
TEST(transpose, NPU_fp32) {
TEST(transpose2, NPU_fp32) {
f::Scope scope;
p::NPUDeviceContext ctx(p::NPUPlace(0));
Compare<float>(&scope, ctx);
}
TEST(transpose_grad, NPU_fp32) {
TEST(transpose2_grad, NPU_fp32) {
f::Scope scope;
p::NPUDeviceContext ctx(p::NPUPlace(0));
CompareGrad<float>(&scope, ctx);
}
......@@ -30,7 +30,7 @@ paddle.enable_static()
class TestTransposeOp(OpTest):
def setUp(self):
self.set_npu()
self.op_type = "transpose"
self.op_type = "transpose2"
self.place = paddle.NPUPlace(0)
self.init_dtype()
self.init_input_output()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册