Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
342252c9
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
342252c9
编写于
3月 19, 2021
作者:
L
Leo Chen
提交者:
GitHub
3月 19, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[NPU] change transpose to transpose2 (#31734)
* change transpose to transpose2 * fix bug
上级
7b450e78
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
69 addition
and
77 deletion
+69
-77
paddle/fluid/operators/transpose_op_npu.cc
paddle/fluid/operators/transpose_op_npu.cc
+36
-38
paddle/fluid/operators/transpose_op_npu_test.cc
paddle/fluid/operators/transpose_op_npu_test.cc
+32
-38
python/paddle/fluid/tests/unittests/npu/test_transpose_op_npu.py
...paddle/fluid/tests/unittests/npu/test_transpose_op_npu.py
+1
-1
未找到文件。
paddle/fluid/operators/transpose_op_npu.cc
浏览文件 @
342252c9
...
...
@@ -9,75 +9,73 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#i
fdef PADDLE_WITH_ASCEND_CL
#i
nclude <iostream>
#include <memory>
#include <string>
#include <iostream>
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/expand_op.h"
#include "paddle/fluid/operators/npu_op_runner.h"
namespace
paddle
{
namespace
operators
{
template
<
typename
DeviceContext
,
typename
T
>
class
TransposeNPUKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
x
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"X"
);
auto
*
out
=
ctx
.
Output
<
framework
::
LoDTensor
>
(
"Out"
);
std
::
vector
<
int
>
axis
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"axis"
);
framework
::
NPUAttributeMap
attr_input
=
{{
"perm"
,
axis
}};
out
->
mutable_data
<
T
>
(
ctx
.
device_context
().
GetPlace
());
auto
runner
=
NpuOpRunner
(
"TransposeD"
,
{
*
x
},
{
*
out
},
attr_input
);
auto
stream
=
ctx
.
template
device_context
<
paddle
::
platform
::
NPUDeviceContext
>().
stream
();
runner
.
Run
(
stream
);
}
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
x
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"X"
);
auto
*
out
=
ctx
.
Output
<
framework
::
LoDTensor
>
(
"Out"
);
std
::
vector
<
int
>
axis
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"axis"
);
framework
::
NPUAttributeMap
attr_input
=
{{
"perm"
,
axis
}};
out
->
mutable_data
<
T
>
(
ctx
.
device_context
().
GetPlace
());
auto
runner
=
NpuOpRunner
(
"TransposeD"
,
{
*
x
},
{
*
out
},
attr_input
);
auto
stream
=
ctx
.
template
device_context
<
paddle
::
platform
::
NPUDeviceContext
>()
.
stream
();
runner
.
Run
(
stream
);
}
};
template
<
typename
T
>
class
TransposeGradNPUKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
out_grad
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
x_grad
=
ctx
.
Output
<
framework
::
LoDTensor
>
(
framework
::
GradVarName
(
"X"
));
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
out_grad
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
x_grad
=
ctx
.
Output
<
framework
::
LoDTensor
>
(
framework
::
GradVarName
(
"X"
));
std
::
vector
<
int
>
axis
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"axis"
);
std
::
vector
<
int
>
reversed_axis
(
axis
);
for
(
size_t
i
=
0
;
i
<
axis
.
size
();
i
++
)
{
reversed_axis
[
axis
[
i
]]
=
i
;
}
x_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
framework
::
NPUAttributeMap
attr_input
=
{{
"perm"
,
reversed_axis
}};
auto
runner
=
NpuOpRunner
(
"TransposeD"
,
{
*
out_grad
},
{
*
x_grad
},
attr_input
);
auto
stream
=
ctx
.
template
device_context
<
paddle
::
platform
::
NPUDeviceContext
>().
stream
();
auto
stream
=
ctx
.
template
device_context
<
paddle
::
platform
::
NPUDeviceContext
>()
.
stream
();
runner
.
Run
(
stream
);
}
};
}
}
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_NPU_KERNEL
(
transpose
,
REGISTER_OP_NPU_KERNEL
(
transpose2
,
ops
::
TransposeNPUKernel
<
paddle
::
platform
::
NPUDeviceContext
,
float
>
,
ops
::
TransposeNPUKernel
<
paddle
::
platform
::
NPUDeviceContext
,
paddle
::
platform
::
float16
>
,
ops
::
TransposeNPUKernel
<
paddle
::
platform
::
NPUDeviceContext
,
paddle
::
platform
::
float16
>
,
ops
::
TransposeNPUKernel
<
paddle
::
platform
::
NPUDeviceContext
,
int
>
,
ops
::
TransposeNPUKernel
<
paddle
::
platform
::
NPUDeviceContext
,
uint8_t
>
,
ops
::
TransposeNPUKernel
<
paddle
::
platform
::
NPUDeviceContext
,
int8_t
>
);
REGISTER_OP_NPU_KERNEL
(
transpose_grad
,
ops
::
TransposeGradNPUKernel
<
float
>
,
ops
::
TransposeGradNPUKernel
<
paddle
::
platform
::
float16
>
,
ops
::
TransposeGradNPUKernel
<
int
>
,
ops
::
TransposeGradNPUKernel
<
uint8_t
>
,
ops
::
TransposeGradNPUKernel
<
int8_t
>
);
#endif
ops
::
TransposeNPUKernel
<
paddle
::
platform
::
NPUDeviceContext
,
int8_t
>
);
REGISTER_OP_NPU_KERNEL
(
transpose2_grad
,
ops
::
TransposeGradNPUKernel
<
float
>
,
ops
::
TransposeGradNPUKernel
<
paddle
::
platform
::
float16
>
,
ops
::
TransposeGradNPUKernel
<
int
>
,
ops
::
TransposeGradNPUKernel
<
uint8_t
>
,
ops
::
TransposeGradNPUKernel
<
int8_t
>
);
paddle/fluid/operators/transpose_op_npu_test.cc
浏览文件 @
342252c9
...
...
@@ -13,12 +13,12 @@ limitations under the License. */
#include <unistd.h>
#endif
#include <string>
#include <cmath>
#include <iostream>
#include <numeric>
#include <string>
#include <thread> // NOLINT
#include <vector>
#include <numeric>
#include <iostream>
#include "gtest/gtest.h"
#include "paddle/fluid/framework/op_registry.h"
...
...
@@ -32,17 +32,18 @@ namespace f = paddle::framework;
namespace
p
=
paddle
::
platform
;
namespace
m
=
paddle
::
operators
::
math
;
USE_OP
(
transpose
);
USE_OP_DEVICE_KERNEL
(
transpose
,
NPU
);
USE_OP
(
transpose2
);
USE_OP_DEVICE_KERNEL
(
transpose2
,
NPU
);
template
<
typename
T
>
void
Compare
(
f
::
Scope
*
scope
,
const
p
::
DeviceContext
&
ctx
)
{
// init
// init
auto
x
=
scope
->
Var
(
"X"
);
auto
out
=
scope
->
Var
(
"Out"
);
auto
xshape
=
scope
->
Var
(
"XShape"
);
auto
*
x_t
=
x
->
GetMutable
<
f
::
LoDTensor
>
();
auto
*
out_t
=
out
->
GetMutable
<
f
::
LoDTensor
>
();
auto
*
xshape_t
=
xshape
->
GetMutable
<
f
::
LoDTensor
>
();
auto
place
=
ctx
.
GetPlace
();
int
dim0
=
2
;
...
...
@@ -54,12 +55,13 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) {
ctx
.
Wait
();
out_t
->
mutable_data
<
T
>
(
place
);
ctx
.
Wait
();
f
::
AttributeMap
attrs
=
{
{
"axis"
,
std
::
vector
<
int
>
({
1
,
0
})},
{
"data_format"
,
std
::
string
(
"AnyLayout"
)}
};
auto
op
=
f
::
OpRegistry
::
CreateOp
(
"transpose"
,
{{
"X"
,
{
"X"
}}},
{{
"Out"
,
{
"Out"
}}},
attrs
);
xshape_t
->
Resize
({
dim0
,
dim1
});
xshape_t
->
mutable_data
<
T
>
(
place
);
f
::
AttributeMap
attrs
=
{{
"axis"
,
std
::
vector
<
int
>
({
1
,
0
})},
{
"data_format"
,
std
::
string
(
"AnyLayout"
)}};
auto
op
=
f
::
OpRegistry
::
CreateOp
(
"transpose2"
,
{{
"X"
,
{
"X"
}}},
{{
"Out"
,
{
"Out"
}},
{
"XShape"
,
{
"XShape"
}}},
attrs
);
ctx
.
Wait
();
op
->
Run
(
*
scope
,
place
);
ctx
.
Wait
();
...
...
@@ -76,47 +78,42 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) {
EXPECT_EQ
(
out_v
[
5
],
5
);
}
template
<
typename
T
>
void
CompareGrad
(
f
::
Scope
*
scope
,
const
p
::
DeviceContext
&
ctx
)
{
// init
auto
x
=
scope
->
Var
(
"X
"
);
// init
auto
x
shape
=
scope
->
Var
(
"XShape
"
);
auto
x_grad
=
scope
->
Var
(
"X@GRAD"
);
auto
out
=
scope
->
Var
(
"Out"
);
auto
out_grad
=
scope
->
Var
(
"Out@GRAD"
);
auto
*
x_grad_t
=
x_grad
->
GetMutable
<
f
::
LoDTensor
>
();
auto
*
x
_t
=
x
->
GetMutable
<
f
::
LoDTensor
>
();
auto
*
x
shape_t
=
xshape
->
GetMutable
<
f
::
LoDTensor
>
();
auto
*
out_grad_t
=
out_grad
->
GetMutable
<
f
::
LoDTensor
>
();
auto
*
out_t
=
out
->
GetMutable
<
f
::
LoDTensor
>
();
int
dim0
=
2
;
int
dim1
=
3
;
auto
place
=
ctx
.
GetPlace
();
TensorFromVector
(
std
::
vector
<
T
>
({
0
,
1
,
2
,
3
,
4
,
5
}),
ctx
,
out_grad_t
);
TensorFromVector
(
std
::
vector
<
T
>
({
0
,
1
,
2
,
3
,
4
,
5
}),
ctx
,
x_t
);
ctx
.
Wait
();
x_grad_t
->
Resize
({
dim0
,
dim1
});
x_t
->
Resize
({
dim0
,
dim1
});
xshape_t
->
Resize
(
{
0
,
dim0
,
dim1
});
// NOTE(zhiqiu): 0 is needed, see its infershape function
out_grad_t
->
Resize
({
dim0
,
dim1
});
out_t
->
Resize
({
dim0
,
dim1
});
x_grad_t
->
mutable_data
<
T
>
(
place
);
out_t
->
mutable_data
<
T
>
(
place
);
ctx
.
Wait
();
f
::
AttributeMap
attrs
=
{
{
"axis"
,
std
::
vector
<
int
>
({
1
,
0
})},
{
"data_format"
,
std
::
string
(
"AnyLayout"
)}
};
f
::
AttributeMap
attrs
=
{{
"axis"
,
std
::
vector
<
int
>
({
1
,
0
})},
{
"data_format"
,
std
::
string
(
"AnyLayout"
)}};
auto
op
=
f
::
OpRegistry
::
CreateOp
(
"transpose_grad"
,
{{
"Out@GRAD"
,
{
"Out@GRAD"
}},
{
"X"
,
{
"X"
}},
{
"Out"
,
{
"Out"
}}},
"transpose2_grad"
,
{{
"Out@GRAD"
,
{
"Out@GRAD"
}},
{
"XShape"
,
{
"XShape"
}}},
{{
"X@GRAD"
,
{
"X@GRAD"
}}},
attrs
);
op
->
Run
(
*
scope
,
place
);
ctx
.
Wait
();
ctx
.
Wait
();
std
::
vector
<
T
>
out_v
;
TensorToVector
(
*
x_grad_t
,
ctx
,
&
out_v
);
ctx
.
Wait
();
ctx
.
Wait
();
EXPECT_EQ
(
x_grad_t
->
numel
(),
dim0
*
dim1
);
EXPECT_EQ
(
out_v
[
0
],
0
);
...
...
@@ -125,19 +122,16 @@ void CompareGrad(f::Scope* scope, const p::DeviceContext& ctx) {
EXPECT_EQ
(
out_v
[
3
],
4
);
EXPECT_EQ
(
out_v
[
4
],
2
);
EXPECT_EQ
(
out_v
[
5
],
5
);
}
TEST
(
transpose
,
NPU_fp32
)
{
TEST
(
transpose2
,
NPU_fp32
)
{
f
::
Scope
scope
;
p
::
NPUDeviceContext
ctx
(
p
::
NPUPlace
(
0
));
Compare
<
float
>
(
&
scope
,
ctx
);
}
TEST
(
transpose_grad
,
NPU_fp32
)
{
TEST
(
transpose
2
_grad
,
NPU_fp32
)
{
f
::
Scope
scope
;
p
::
NPUDeviceContext
ctx
(
p
::
NPUPlace
(
0
));
CompareGrad
<
float
>
(
&
scope
,
ctx
);
}
python/paddle/fluid/tests/unittests/npu/test_transpose_op_npu.py
浏览文件 @
342252c9
...
...
@@ -30,7 +30,7 @@ paddle.enable_static()
class
TestTransposeOp
(
OpTest
):
def
setUp
(
self
):
self
.
set_npu
()
self
.
op_type
=
"transpose"
self
.
op_type
=
"transpose
2
"
self
.
place
=
paddle
.
NPUPlace
(
0
)
self
.
init_dtype
()
self
.
init_input_output
()
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录