Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
941308c2
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
941308c2
编写于
6月 04, 2021
作者:
L
limingshu
提交者:
GitHub
6月 04, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Reimplement logical functors with the new optimized elementwise function (#33089)
上级
8752c912
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
120 addition
and
58 deletion
+120
-58
paddle/fluid/operators/controlflow/logical_op.cu
paddle/fluid/operators/controlflow/logical_op.cu
+64
-8
paddle/fluid/operators/elementwise/elementwise_max_op.cu
paddle/fluid/operators/elementwise/elementwise_max_op.cu
+0
-1
paddle/fluid/operators/elementwise/elementwise_min_op.cu
paddle/fluid/operators/elementwise/elementwise_min_op.cu
+0
-1
paddle/fluid/operators/elementwise/elementwise_mul_op.cu
paddle/fluid/operators/elementwise/elementwise_mul_op.cu
+2
-41
paddle/fluid/operators/elementwise/elementwise_op_broadcast.cu.h
...fluid/operators/elementwise/elementwise_op_broadcast.cu.h
+6
-0
paddle/fluid/operators/elementwise/elementwise_op_function.h
paddle/fluid/operators/elementwise/elementwise_op_function.h
+48
-7
未找到文件。
paddle/fluid/operators/controlflow/logical_op.cu
浏览文件 @
941308c2
...
@@ -13,12 +13,68 @@ See the License for the specific language governing permissions and
...
@@ -13,12 +13,68 @@ See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "paddle/fluid/operators/controlflow/logical_op.h"
#include "paddle/fluid/operators/controlflow/logical_op.h"
#include "paddle/fluid/operators/elementwise/elementwise_op_broadcast.cu.h"
REGISTER_BINARY_LOGICAL_KERNEL
(
logical_and
,
CUDA
,
namespace
ops
=
paddle
::
operators
;
paddle
::
operators
::
LogicalAndFunctor
);
namespace
plat
=
paddle
::
platform
;
REGISTER_BINARY_LOGICAL_KERNEL
(
logical_or
,
CUDA
,
paddle
::
operators
::
LogicalOrFunctor
);
namespace
paddle
{
REGISTER_UNARY_LOGICAL_KERNEL
(
logical_not
,
CUDA
,
namespace
operators
{
paddle
::
operators
::
LogicalNotFunctor
);
REGISTER_BINARY_LOGICAL_KERNEL
(
logical_xor
,
CUDA
,
#define LOGICAL_BINARY_FUNCTOR(func_name, op) \
paddle
::
operators
::
LogicalXorFunctor
);
template <typename T> \
struct func_name { \
using ELEMENT_TYPE = T; \
HOSTDEVICE bool operator()(const T* args) const { \
return args[0] op args[1]; \
} \
};
LOGICAL_BINARY_FUNCTOR
(
CudaOrFunctor
,
||
)
LOGICAL_BINARY_FUNCTOR
(
CudaAndFunctor
,
&&
)
LOGICAL_BINARY_FUNCTOR
(
CudaXorFunctor
,
^
)
#undef LOGICAL_BINARY_FUNCTOR
template
<
typename
T
>
struct
CudaNotFunctor
{
using
ELEMENT_TYPE
=
T
;
HOSTDEVICE
bool
operator
()(
const
T
*
args
)
const
{
return
!
args
[
0
];
}
};
template
<
typename
Functor
>
class
BinaryLogicalOpKernel
<
platform
::
CUDADeviceContext
,
Functor
>
:
public
framework
::
OpKernel
<
typename
Functor
::
ELEMENT_TYPE
>
{
public:
using
InT
=
typename
Functor
::
ELEMENT_TYPE
;
using
OutT
=
bool
;
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
functor
=
Functor
();
std
::
vector
<
const
framework
::
Tensor
*>
ins
;
std
::
vector
<
framework
::
Tensor
*>
outs
;
const
auto
&
cuda_ctx
=
ctx
.
template
device_context
<
platform
::
CUDADeviceContext
>();
int
axis
=
PackTensorsIntoVector
<
OutT
>
(
ctx
,
&
ins
,
&
outs
);
if
(
ins
.
size
()
==
1
)
{
LaunchElementwiseCudaKernel
<
ElementwiseType
::
kUnary
,
InT
,
OutT
>
(
cuda_ctx
,
ins
,
&
outs
,
axis
,
functor
);
}
else
{
LaunchElementwiseCudaKernel
<
ElementwiseType
::
kBinary
,
InT
,
OutT
>
(
cuda_ctx
,
ins
,
&
outs
,
axis
,
functor
);
}
}
};
}
// namespace operators
}
// namespace paddle
#define REGISTER_LOGICAL_CUDA_KERNEL(op_name, func) \
REGISTER_OP_CUDA_KERNEL( \
op_name, \
ops::BinaryLogicalOpKernel<plat::CUDADeviceContext, ops::func<bool>>);
REGISTER_LOGICAL_CUDA_KERNEL
(
logical_or
,
CudaOrFunctor
)
REGISTER_LOGICAL_CUDA_KERNEL
(
logical_and
,
CudaAndFunctor
)
REGISTER_LOGICAL_CUDA_KERNEL
(
logical_xor
,
CudaXorFunctor
)
REGISTER_LOGICAL_CUDA_KERNEL
(
logical_not
,
CudaNotFunctor
)
#undef REGISTER_LOGICAL_CUDA_KERNEL
paddle/fluid/operators/elementwise/elementwise_max_op.cu
浏览文件 @
941308c2
...
@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
...
@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "paddle/fluid/operators/elementwise/elementwise_max_op.h"
#include "paddle/fluid/operators/elementwise/elementwise_max_op.h"
#include "paddle/fluid/operators/elementwise/elementwise_op_broadcast.cu.h"
#include "paddle/fluid/operators/elementwise/elementwise_op_broadcast.cu.h"
#include "paddle/fluid/operators/elementwise/elementwise_op_function.cu.h"
namespace
ops
=
paddle
::
operators
;
namespace
ops
=
paddle
::
operators
;
...
...
paddle/fluid/operators/elementwise/elementwise_min_op.cu
浏览文件 @
941308c2
...
@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
...
@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "paddle/fluid/operators/elementwise/elementwise_min_op.h"
#include "paddle/fluid/operators/elementwise/elementwise_min_op.h"
#include "paddle/fluid/operators/elementwise/elementwise_op_broadcast.cu.h"
#include "paddle/fluid/operators/elementwise/elementwise_op_broadcast.cu.h"
#include "paddle/fluid/operators/elementwise/elementwise_op_function.cu.h"
namespace
ops
=
paddle
::
operators
;
namespace
ops
=
paddle
::
operators
;
...
...
paddle/fluid/operators/elementwise/elementwise_mul_op.cu
浏览文件 @
941308c2
...
@@ -36,52 +36,13 @@ class ElementwiseMulKernel<platform::CUDADeviceContext, T>
...
@@ -36,52 +36,13 @@ class ElementwiseMulKernel<platform::CUDADeviceContext, T>
:
public
framework
::
OpKernel
<
T
>
{
:
public
framework
::
OpKernel
<
T
>
{
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
int
axis
=
-
1
;
framework
::
Tensor
x_for_selectedrows
;
auto
x_var
=
ctx
.
InputVar
(
"X"
);
PADDLE_ENFORCE_NOT_NULL
(
x_var
,
platform
::
errors
::
InvalidArgument
(
"Cannot get input Variable X, Variable name = %s."
,
ctx
.
InputName
(
"X"
)));
auto
*
y
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"Y"
);
framework
::
Tensor
x
,
*
z
;
std
::
vector
<
const
framework
::
Tensor
*>
ins
;
std
::
vector
<
const
framework
::
Tensor
*>
ins
;
std
::
vector
<
framework
::
Tensor
*>
outs
;
std
::
vector
<
framework
::
Tensor
*>
outs
;
const
auto
&
cuda_ctx
=
const
auto
&
cuda_ctx
=
ctx
.
template
device_context
<
platform
::
CUDADeviceContext
>();
ctx
.
template
device_context
<
platform
::
CUDADeviceContext
>();
if
(
x_var
->
IsType
<
framework
::
LoDTensor
>
())
{
int
axis
=
PackTensorsIntoVector
<
T
>
(
ctx
,
&
ins
,
&
outs
,
&
x_for_selectedrows
);
x
=
x_var
->
Get
<
framework
::
LoDTensor
>
();
z
=
ctx
.
Output
<
framework
::
LoDTensor
>
(
"Out"
);
axis
=
PackTensorsIntoVector
<
T
>
(
ctx
,
&
ins
,
&
outs
);
}
else
if
(
x_var
->
IsType
<
framework
::
SelectedRows
>
())
{
PADDLE_ENFORCE_EQ
(
y
->
dims
().
size
()
==
1
&&
y
->
dims
()[
0
]
==
1
,
true
,
platform
::
errors
::
InvalidArgument
(
"For elementwise_op, if X is Sparse, Y must be "
"scalar. But reveived the size of Y = %s."
,
y
->
dims
().
size
()));
auto
&
x_sele
=
x_var
->
Get
<
framework
::
SelectedRows
>
();
auto
out_sele
=
ctx
.
Output
<
framework
::
SelectedRows
>
(
"Out"
);
x
=
x_sele
.
value
();
out_sele
->
set_rows
(
x_sele
.
rows
());
out_sele
->
set_height
(
x_sele
.
height
());
out_sele
->
mutable_value
()
->
Resize
(
x_sele
.
value
().
dims
());
out_sele
->
mutable_value
()
->
mutable_data
(
ctx
.
GetPlace
(),
x
.
type
());
z
=
ctx
.
Output
<
framework
::
SelectedRows
>
(
"Out"
)
->
mutable_value
();
z
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
outs
.
emplace_back
(
z
);
ins
.
emplace_back
(
&
x
);
ins
.
emplace_back
(
y
);
axis
=
ctx
.
HasAttr
(
"axis"
)
?
ctx
.
Attr
<
int
>
(
"axis"
)
:
-
1
;
axis
=
axis
==
-
1
?
std
::
abs
(
y
->
dims
().
size
()
-
x
.
dims
().
size
())
:
axis
;
}
else
{
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"X's type[%s] is not supported by elementwise_op. X's type should be "
"LoDTensor or SelectedRows."
,
framework
::
ToTypeName
(
x_var
->
Type
())));
}
LaunchElementwiseCudaKernel
<
ElementwiseType
::
kBinary
,
T
,
T
>
(
LaunchElementwiseCudaKernel
<
ElementwiseType
::
kBinary
,
T
,
T
>
(
cuda_ctx
,
ins
,
&
outs
,
axis
,
CudaMulFunctor
<
T
>
());
cuda_ctx
,
ins
,
&
outs
,
axis
,
CudaMulFunctor
<
T
>
());
}
}
...
...
paddle/fluid/operators/elementwise/elementwise_op_broadcast.cu.h
浏览文件 @
941308c2
...
@@ -509,15 +509,21 @@ void LaunchElementwiseCudaKernel(
...
@@ -509,15 +509,21 @@ void LaunchElementwiseCudaKernel(
const
platform
::
CUDADeviceContext
&
cuda_ctx
,
const
platform
::
CUDADeviceContext
&
cuda_ctx
,
const
std
::
vector
<
const
framework
::
Tensor
*>
&
ins
,
const
std
::
vector
<
const
framework
::
Tensor
*>
&
ins
,
std
::
vector
<
framework
::
Tensor
*>
*
outs
,
int
axis
,
Functor
func
)
{
std
::
vector
<
framework
::
Tensor
*>
*
outs
,
int
axis
,
Functor
func
)
{
std
::
vector
<
int
>
dims_size
;
bool
no_broadcast_flag
=
true
;
bool
no_broadcast_flag
=
true
;
for
(
auto
*
in
:
ins
)
{
for
(
auto
*
in
:
ins
)
{
no_broadcast_flag
=
ins
[
0
]
->
dims
()
==
in
->
dims
();
no_broadcast_flag
=
ins
[
0
]
->
dims
()
==
in
->
dims
();
dims_size
.
emplace_back
(
in
->
dims
().
size
());
}
}
if
(
no_broadcast_flag
)
{
if
(
no_broadcast_flag
)
{
LaunchSameDimsElementwiseCudaKernel
<
ET
,
InT
,
OutT
>
(
cuda_ctx
,
ins
,
outs
,
LaunchSameDimsElementwiseCudaKernel
<
ET
,
InT
,
OutT
>
(
cuda_ctx
,
ins
,
outs
,
func
);
func
);
}
else
{
}
else
{
axis
=
axis
==
-
1
?
*
std
::
max_element
(
dims_size
.
begin
(),
dims_size
.
end
())
-
*
std
::
min_element
(
dims_size
.
begin
(),
dims_size
.
end
())
:
axis
;
LaunchBroadcastElementwiseCudaKernel
<
ET
,
InT
,
OutT
>
(
cuda_ctx
,
ins
,
outs
,
LaunchBroadcastElementwiseCudaKernel
<
ET
,
InT
,
OutT
>
(
cuda_ctx
,
ins
,
outs
,
axis
,
func
);
axis
,
func
);
}
}
...
...
paddle/fluid/operators/elementwise/elementwise_op_function.h
浏览文件 @
941308c2
...
@@ -61,25 +61,66 @@ namespace paddle {
...
@@ -61,25 +61,66 @@ namespace paddle {
namespace
operators
{
namespace
operators
{
/*
/*
* To pack the input and output tnesors into vector for
* Pack input and output tensors into respective vectors with
* LaunchElementwiseCudaKernel
* consideration of varible X`s class type.
* Input variable X is supported to be whether LoDTensor or
* SelectedRows class type in this package function, once X
* was SelectedRows type, a valid pointer x_for_selectedrows
* is excepted to be passed in from op kernel for acquisition
* of the valid address of LoDTensor created ahead in the function.
*/
*/
template
<
typename
OutT
>
template
<
typename
OutT
>
int
PackTensorsIntoVector
(
const
framework
::
ExecutionContext
&
ctx
,
int
PackTensorsIntoVector
(
const
framework
::
ExecutionContext
&
ctx
,
std
::
vector
<
const
framework
::
Tensor
*>
*
ins
,
std
::
vector
<
const
framework
::
Tensor
*>
*
ins
,
std
::
vector
<
framework
::
Tensor
*>
*
outs
)
{
std
::
vector
<
framework
::
Tensor
*>
*
outs
,
framework
::
Tensor
*
x_for_selectedrows
=
nullptr
)
{
int
axis
=
-
1
;
int
axis
=
-
1
;
auto
*
x
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"X"
);
auto
x_var
=
ctx
.
InputVar
(
"X"
);
PADDLE_ENFORCE_NOT_NULL
(
x_var
,
platform
::
errors
::
InvalidArgument
(
"Unable to get input Variable X, Variable name is %s.
\n
"
,
ctx
.
InputName
(
"X"
)));
auto
*
y
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"Y"
);
auto
*
y
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"Y"
);
auto
*
z
=
ctx
.
Output
<
framework
::
LoDTensor
>
(
"Out"
);
framework
::
Tensor
*
z
;
if
(
x_var
->
IsType
<
framework
::
LoDTensor
>
())
{
auto
*
x
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"X"
);
z
=
ctx
.
Output
<
framework
::
LoDTensor
>
(
"Out"
);
ins
->
emplace_back
(
x
);
}
else
if
(
x_var
->
IsType
<
framework
::
SelectedRows
>
())
{
PADDLE_ENFORCE_EQ
(
y
->
dims
().
size
()
==
1
&&
y
->
dims
()[
0
]
==
1
,
true
,
platform
::
errors
::
InvalidArgument
(
"For elementwise_op, if X is Sparse, Y must be "
"scalar. But reveived the size of Y = %d."
,
y
->
dims
().
size
()));
PADDLE_ENFORCE_NOT_NULL
(
x_for_selectedrows
,
platform
::
errors
::
InvalidArgument
(
"The parameter x_for_selectedrows is excepted to "
"be valid, once input varible X`s class type is "
"SelectedRows.
\n
"
));
auto
&
x_sele
=
x_var
->
Get
<
framework
::
SelectedRows
>
();
auto
out_sele
=
ctx
.
Output
<
framework
::
SelectedRows
>
(
"Out"
);
*
x_for_selectedrows
=
x_sele
.
value
();
out_sele
->
set_rows
(
x_sele
.
rows
());
out_sele
->
set_height
(
x_sele
.
height
());
out_sele
->
mutable_value
()
->
Resize
(
x_sele
.
value
().
dims
());
out_sele
->
mutable_value
()
->
mutable_data
(
ctx
.
GetPlace
(),
x_for_selectedrows
->
type
());
z
=
ctx
.
Output
<
framework
::
SelectedRows
>
(
"Out"
)
->
mutable_value
();
ins
->
emplace_back
(
x_for_selectedrows
);
}
else
{
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"X's type[%s] is not supported by elementwise_op. X's type should be "
"LoDTensor or SelectedRows."
,
framework
::
ToTypeName
(
x_var
->
Type
())));
}
z
->
mutable_data
<
OutT
>
(
ctx
.
GetPlace
());
z
->
mutable_data
<
OutT
>
(
ctx
.
GetPlace
());
outs
->
emplace_back
(
z
);
outs
->
emplace_back
(
z
);
ins
->
emplace_back
(
x
);
if
(
y
!=
nullptr
)
{
if
(
y
!=
nullptr
)
{
ins
->
emplace_back
(
y
);
ins
->
emplace_back
(
y
);
axis
=
ctx
.
HasAttr
(
"axis"
)
?
ctx
.
Attr
<
int
>
(
"axis"
)
:
-
1
;
axis
=
ctx
.
HasAttr
(
"axis"
)
?
ctx
.
Attr
<
int
>
(
"axis"
)
:
-
1
;
axis
=
axis
==
-
1
?
std
::
abs
(
y
->
dims
().
size
()
-
x
->
dims
().
size
())
:
axis
;
}
}
return
axis
;
return
axis
;
}
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录