Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
878e117b
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2299
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
878e117b
编写于
3月 19, 2021
作者:
C
Chen Weihang
提交者:
GitHub
3月 19, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[CustomOp] Support float16 in custom op (#31725)
* support float16 in custom op * fix failed unittests
上级
c9e1d9dc
变更
13
隐藏空白更改
内联
并排
Showing
13 changed file
with
116 addition
and
29 deletion
+116
-29
cmake/inference_lib.cmake
cmake/inference_lib.cmake
+3
-0
paddle/fluid/extension/include/ext_dispatch.h
paddle/fluid/extension/include/ext_dispatch.h
+16
-0
paddle/fluid/extension/include/ext_dtype.h
paddle/fluid/extension/include/ext_dtype.h
+6
-0
paddle/fluid/extension/src/ext_tensor.cc
paddle/fluid/extension/src/ext_tensor.cc
+16
-0
paddle/fluid/framework/custom_tensor_test.cc
paddle/fluid/framework/custom_tensor_test.cc
+11
-0
paddle/fluid/framework/custom_tensor_utils.h
paddle/fluid/framework/custom_tensor_utils.h
+4
-0
python/paddle/fluid/tests/custom_op/CMakeLists.txt
python/paddle/fluid/tests/custom_op/CMakeLists.txt
+2
-11
python/paddle/fluid/tests/custom_op/custom_relu_op.cu
python/paddle/fluid/tests/custom_op/custom_relu_op.cu
+5
-4
python/paddle/fluid/tests/custom_op/dispatch_test_op.cc
python/paddle/fluid/tests/custom_op/dispatch_test_op.cc
+18
-0
python/paddle/fluid/tests/custom_op/test_custom_relu_op_jit.py
...n/paddle/fluid/tests/custom_op/test_custom_relu_op_jit.py
+14
-6
python/paddle/fluid/tests/custom_op/test_custom_relu_op_setup.py
...paddle/fluid/tests/custom_op/test_custom_relu_op_setup.py
+12
-2
python/paddle/fluid/tests/custom_op/test_dispatch_jit.py
python/paddle/fluid/tests/custom_op/test_dispatch_jit.py
+6
-0
python/setup.py.in
python/setup.py.in
+3
-6
未找到文件。
cmake/inference_lib.cmake
浏览文件 @
878e117b
...
@@ -198,6 +198,9 @@ copy(inference_lib_dist
...
@@ -198,6 +198,9 @@ copy(inference_lib_dist
copy
(
inference_lib_dist
copy
(
inference_lib_dist
SRCS
${
PADDLE_SOURCE_DIR
}
/paddle/fluid/platform/complex128.h
SRCS
${
PADDLE_SOURCE_DIR
}
/paddle/fluid/platform/complex128.h
DSTS
${
PADDLE_INFERENCE_INSTALL_DIR
}
/paddle/include/experimental/
)
DSTS
${
PADDLE_INFERENCE_INSTALL_DIR
}
/paddle/include/experimental/
)
copy
(
inference_lib_dist
SRCS
${
PADDLE_SOURCE_DIR
}
/paddle/fluid/platform/float16.h
DSTS
${
PADDLE_INFERENCE_INSTALL_DIR
}
/paddle/include/experimental/
)
# CAPI inference library for only inference
# CAPI inference library for only inference
set
(
PADDLE_INFERENCE_C_INSTALL_DIR
"
${
CMAKE_BINARY_DIR
}
/paddle_inference_c_install_dir"
CACHE STRING
set
(
PADDLE_INFERENCE_C_INSTALL_DIR
"
${
CMAKE_BINARY_DIR
}
/paddle_inference_c_install_dir"
CACHE STRING
...
...
paddle/fluid/extension/include/ext_dispatch.h
浏览文件 @
878e117b
...
@@ -47,6 +47,22 @@ namespace paddle {
...
@@ -47,6 +47,22 @@ namespace paddle {
} \
} \
}()
}()
#define PD_DISPATCH_FLOATING_AND_HALF_TYPES(TYPE, NAME, ...) \
[&] { \
const auto& __dtype__ = TYPE; \
switch (__dtype__) { \
PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::FLOAT32, float, \
__VA_ARGS__) \
PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::FLOAT64, double, \
__VA_ARGS__) \
PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::FLOAT16, paddle::float16, \
__VA_ARGS__) \
default: \
PD_THROW("function " #NAME " is not implemented for data type `", \
::paddle::ToString(__dtype__), "`"); \
} \
}()
///////// Integral Dispatch Marco ///////////
///////// Integral Dispatch Marco ///////////
#define PD_DISPATCH_INTEGRAL_TYPES(TYPE, NAME, ...) \
#define PD_DISPATCH_INTEGRAL_TYPES(TYPE, NAME, ...) \
...
...
paddle/fluid/extension/include/ext_dtype.h
浏览文件 @
878e117b
...
@@ -19,11 +19,13 @@ limitations under the License. */
...
@@ -19,11 +19,13 @@ limitations under the License. */
#include "complex128.h" // NOLINT
#include "complex128.h" // NOLINT
#include "complex64.h" // NOLINT
#include "complex64.h" // NOLINT
#include "ext_exception.h" // NOLINT
#include "ext_exception.h" // NOLINT
#include "float16.h" // NOLINT
namespace
paddle
{
namespace
paddle
{
using
complex64
=
paddle
::
platform
::
complex64
;
using
complex64
=
paddle
::
platform
::
complex64
;
using
complex128
=
paddle
::
platform
::
complex128
;
using
complex128
=
paddle
::
platform
::
complex128
;
using
float16
=
paddle
::
platform
::
float16
;
enum
class
DataType
{
enum
class
DataType
{
BOOL
,
BOOL
,
...
@@ -32,6 +34,7 @@ enum class DataType {
...
@@ -32,6 +34,7 @@ enum class DataType {
INT16
,
INT16
,
INT32
,
INT32
,
INT64
,
INT64
,
FLOAT16
,
FLOAT32
,
FLOAT32
,
FLOAT64
,
FLOAT64
,
COMPLEX64
,
COMPLEX64
,
...
@@ -53,6 +56,8 @@ inline std::string ToString(DataType dtype) {
...
@@ -53,6 +56,8 @@ inline std::string ToString(DataType dtype) {
return
"int32_t"
;
return
"int32_t"
;
case
DataType
::
INT64
:
case
DataType
::
INT64
:
return
"int64_t"
;
return
"int64_t"
;
case
DataType
::
FLOAT16
:
return
"float16"
;
case
DataType
::
FLOAT32
:
case
DataType
::
FLOAT32
:
return
"float"
;
return
"float"
;
case
DataType
::
FLOAT64
:
case
DataType
::
FLOAT64
:
...
@@ -73,6 +78,7 @@ inline std::string ToString(DataType dtype) {
...
@@ -73,6 +78,7 @@ inline std::string ToString(DataType dtype) {
_(int16_t, DataType::INT16) \
_(int16_t, DataType::INT16) \
_(int, DataType::INT32) \
_(int, DataType::INT32) \
_(int64_t, DataType::INT64) \
_(int64_t, DataType::INT64) \
_(float16, DataType::FLOAT16) \
_(float, DataType::FLOAT32) \
_(float, DataType::FLOAT32) \
_(double, DataType::FLOAT64) \
_(double, DataType::FLOAT64) \
_(complex64, DataType::COMPLEX64) \
_(complex64, DataType::COMPLEX64) \
...
...
paddle/fluid/extension/src/ext_tensor.cc
浏览文件 @
878e117b
...
@@ -22,6 +22,7 @@ limitations under the License. */
...
@@ -22,6 +22,7 @@ limitations under the License. */
#include "paddle/fluid/platform/complex128.h"
#include "paddle/fluid/platform/complex128.h"
#include "paddle/fluid/platform/complex64.h"
#include "paddle/fluid/platform/complex64.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/float16.h"
#include "paddle/fluid/platform/transform.h"
#include "paddle/fluid/platform/transform.h"
namespace
paddle
{
namespace
paddle
{
...
@@ -170,6 +171,8 @@ DataType Tensor::type() const {
...
@@ -170,6 +171,8 @@ DataType Tensor::type() const {
return
DataType
::
COMPLEX64
;
return
DataType
::
COMPLEX64
;
}
else
if
(
type
==
framework
::
proto
::
VarType
::
COMPLEX128
)
{
}
else
if
(
type
==
framework
::
proto
::
VarType
::
COMPLEX128
)
{
return
DataType
::
COMPLEX128
;
return
DataType
::
COMPLEX128
;
}
else
if
(
type
==
framework
::
proto
::
VarType
::
FP16
)
{
return
DataType
::
FLOAT16
;
}
}
// TODO(JiabinYang) Support more dtype here
// TODO(JiabinYang) Support more dtype here
return
DataType
::
FLOAT32
;
return
DataType
::
FLOAT32
;
...
@@ -229,6 +232,8 @@ template PD_DLL_DECL Tensor Tensor::copy_to<paddle::platform::complex64>(
...
@@ -229,6 +232,8 @@ template PD_DLL_DECL Tensor Tensor::copy_to<paddle::platform::complex64>(
const
PlaceType
&
target_place
)
const
;
const
PlaceType
&
target_place
)
const
;
template
PD_DLL_DECL
Tensor
Tensor
::
copy_to
<
paddle
::
platform
::
complex128
>(
template
PD_DLL_DECL
Tensor
Tensor
::
copy_to
<
paddle
::
platform
::
complex128
>(
const
PlaceType
&
target_place
)
const
;
const
PlaceType
&
target_place
)
const
;
template
PD_DLL_DECL
Tensor
Tensor
::
copy_to
<
paddle
::
platform
::
float16
>(
const
PlaceType
&
target_place
)
const
;
template
PD_DLL_DECL
float
*
Tensor
::
data
<
float
>()
const
;
template
PD_DLL_DECL
float
*
Tensor
::
data
<
float
>()
const
;
template
PD_DLL_DECL
double
*
Tensor
::
data
<
double
>()
const
;
template
PD_DLL_DECL
double
*
Tensor
::
data
<
double
>()
const
;
...
@@ -242,6 +247,8 @@ template PD_DLL_DECL paddle::platform::complex64 *
...
@@ -242,6 +247,8 @@ template PD_DLL_DECL paddle::platform::complex64 *
Tensor
::
data
<
paddle
::
platform
::
complex64
>()
const
;
Tensor
::
data
<
paddle
::
platform
::
complex64
>()
const
;
template
PD_DLL_DECL
paddle
::
platform
::
complex128
*
template
PD_DLL_DECL
paddle
::
platform
::
complex128
*
Tensor
::
data
<
paddle
::
platform
::
complex128
>()
const
;
Tensor
::
data
<
paddle
::
platform
::
complex128
>()
const
;
template
PD_DLL_DECL
paddle
::
platform
::
float16
*
Tensor
::
data
<
paddle
::
platform
::
float16
>()
const
;
template
PD_DLL_DECL
float
*
Tensor
::
mutable_data
<
float
>();
template
PD_DLL_DECL
float
*
Tensor
::
mutable_data
<
float
>();
template
PD_DLL_DECL
double
*
Tensor
::
mutable_data
<
double
>();
template
PD_DLL_DECL
double
*
Tensor
::
mutable_data
<
double
>();
...
@@ -255,6 +262,8 @@ template PD_DLL_DECL paddle::platform::complex64 *
...
@@ -255,6 +262,8 @@ template PD_DLL_DECL paddle::platform::complex64 *
Tensor
::
mutable_data
<
paddle
::
platform
::
complex64
>();
Tensor
::
mutable_data
<
paddle
::
platform
::
complex64
>();
template
PD_DLL_DECL
paddle
::
platform
::
complex128
*
template
PD_DLL_DECL
paddle
::
platform
::
complex128
*
Tensor
::
mutable_data
<
paddle
::
platform
::
complex128
>();
Tensor
::
mutable_data
<
paddle
::
platform
::
complex128
>();
template
PD_DLL_DECL
paddle
::
platform
::
float16
*
Tensor
::
mutable_data
<
paddle
::
platform
::
float16
>();
template
PD_DLL_DECL
float
*
Tensor
::
mutable_data
<
float
>(
const
PlaceType
&
place
);
template
PD_DLL_DECL
float
*
Tensor
::
mutable_data
<
float
>(
const
PlaceType
&
place
);
template
PD_DLL_DECL
double
*
Tensor
::
mutable_data
<
double
>(
template
PD_DLL_DECL
double
*
Tensor
::
mutable_data
<
double
>(
...
@@ -274,6 +283,8 @@ template PD_DLL_DECL paddle::platform::complex64 *
...
@@ -274,6 +283,8 @@ template PD_DLL_DECL paddle::platform::complex64 *
Tensor
::
mutable_data
<
paddle
::
platform
::
complex64
>(
const
PlaceType
&
place
);
Tensor
::
mutable_data
<
paddle
::
platform
::
complex64
>(
const
PlaceType
&
place
);
template
PD_DLL_DECL
paddle
::
platform
::
complex128
*
template
PD_DLL_DECL
paddle
::
platform
::
complex128
*
Tensor
::
mutable_data
<
paddle
::
platform
::
complex128
>(
const
PlaceType
&
place
);
Tensor
::
mutable_data
<
paddle
::
platform
::
complex128
>(
const
PlaceType
&
place
);
template
PD_DLL_DECL
paddle
::
platform
::
float16
*
Tensor
::
mutable_data
<
paddle
::
platform
::
float16
>(
const
PlaceType
&
place
);
std
::
vector
<
int64_t
>
Tensor
::
shape
()
const
{
std
::
vector
<
int64_t
>
Tensor
::
shape
()
const
{
GET_CASTED_TENSOR
GET_CASTED_TENSOR
...
@@ -344,6 +355,11 @@ Tensor Tensor::cast(const DataType &target_type) const {
...
@@ -344,6 +355,11 @@ Tensor Tensor::cast(const DataType &target_type) const {
CastDataType
<
paddle
::
platform
::
complex128
>
(
CastDataType
<
paddle
::
platform
::
complex128
>
(
*
tensor
,
rlt_tensor_
,
ctx
));
*
tensor
,
rlt_tensor_
,
ctx
));
break
;
break
;
case
framework
::
proto
::
VarType
::
FP16
:
framework
::
VisitDataType
(
dst_type
,
CastDataType
<
paddle
::
platform
::
float16
>
(
*
tensor
,
rlt_tensor_
,
ctx
));
break
;
// TODO(JiabinYang) Support more dtype here
// TODO(JiabinYang) Support more dtype here
default:
default:
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
...
...
paddle/fluid/framework/custom_tensor_test.cc
浏览文件 @
878e117b
...
@@ -113,6 +113,8 @@ void GroupTestCopy() {
...
@@ -113,6 +113,8 @@ void GroupTestCopy() {
TestCopyTensor
<
paddle
::
complex64
>
();
TestCopyTensor
<
paddle
::
complex64
>
();
VLOG
(
2
)
<<
"complex128 cpu-cpu-gpu-gpu-cpu"
;
VLOG
(
2
)
<<
"complex128 cpu-cpu-gpu-gpu-cpu"
;
TestCopyTensor
<
paddle
::
complex128
>
();
TestCopyTensor
<
paddle
::
complex128
>
();
VLOG
(
2
)
<<
"Fp16 cpu-cpu-gpu-gpu-cpu"
;
TestCopyTensor
<
paddle
::
float16
>
();
}
}
void
GroupTestCast
()
{
void
GroupTestCast
()
{
...
@@ -134,6 +136,8 @@ void GroupTestCast() {
...
@@ -134,6 +136,8 @@ void GroupTestCast() {
TestCast
<
paddle
::
complex64
>
(
paddle
::
DataType
::
FLOAT32
);
TestCast
<
paddle
::
complex64
>
(
paddle
::
DataType
::
FLOAT32
);
VLOG
(
2
)
<<
"complex128 cast"
;
VLOG
(
2
)
<<
"complex128 cast"
;
TestCast
<
paddle
::
complex128
>
(
paddle
::
DataType
::
FLOAT32
);
TestCast
<
paddle
::
complex128
>
(
paddle
::
DataType
::
FLOAT32
);
VLOG
(
2
)
<<
"float16 cast"
;
TestCast
<
paddle
::
float16
>
(
paddle
::
DataType
::
FLOAT16
);
}
}
void
GroupTestDtype
()
{
void
GroupTestDtype
()
{
...
@@ -146,6 +150,7 @@ void GroupTestDtype() {
...
@@ -146,6 +150,7 @@ void GroupTestDtype() {
CHECK
(
TestDtype
<
uint8_t
>
()
==
paddle
::
DataType
::
UINT8
);
CHECK
(
TestDtype
<
uint8_t
>
()
==
paddle
::
DataType
::
UINT8
);
CHECK
(
TestDtype
<
paddle
::
complex64
>
()
==
paddle
::
DataType
::
COMPLEX64
);
CHECK
(
TestDtype
<
paddle
::
complex64
>
()
==
paddle
::
DataType
::
COMPLEX64
);
CHECK
(
TestDtype
<
paddle
::
complex128
>
()
==
paddle
::
DataType
::
COMPLEX128
);
CHECK
(
TestDtype
<
paddle
::
complex128
>
()
==
paddle
::
DataType
::
COMPLEX128
);
CHECK
(
TestDtype
<
paddle
::
float16
>
()
==
paddle
::
DataType
::
FLOAT16
);
}
}
void
GroupTestDtypeConvert
()
{
void
GroupTestDtypeConvert
()
{
...
@@ -178,6 +183,9 @@ void GroupTestDtypeConvert() {
...
@@ -178,6 +183,9 @@ void GroupTestDtypeConvert() {
CHECK
(
paddle
::
framework
::
CustomTensorUtils
::
ConvertEnumDTypeToInnerDType
(
CHECK
(
paddle
::
framework
::
CustomTensorUtils
::
ConvertEnumDTypeToInnerDType
(
paddle
::
DataType
::
COMPLEX128
)
==
paddle
::
DataType
::
COMPLEX128
)
==
paddle
::
framework
::
proto
::
VarType
::
COMPLEX128
);
paddle
::
framework
::
proto
::
VarType
::
COMPLEX128
);
CHECK
(
paddle
::
framework
::
CustomTensorUtils
::
ConvertEnumDTypeToInnerDType
(
paddle
::
DataType
::
FLOAT16
)
==
paddle
::
framework
::
proto
::
VarType
::
FP16
);
// proto -> enum
// proto -> enum
CHECK
(
paddle
::
framework
::
CustomTensorUtils
::
ConvertInnerDTypeToEnumDType
(
CHECK
(
paddle
::
framework
::
CustomTensorUtils
::
ConvertInnerDTypeToEnumDType
(
paddle
::
framework
::
proto
::
VarType
::
FP64
)
==
paddle
::
framework
::
proto
::
VarType
::
FP64
)
==
...
@@ -207,6 +215,9 @@ void GroupTestDtypeConvert() {
...
@@ -207,6 +215,9 @@ void GroupTestDtypeConvert() {
CHECK
(
paddle
::
framework
::
CustomTensorUtils
::
ConvertInnerDTypeToEnumDType
(
CHECK
(
paddle
::
framework
::
CustomTensorUtils
::
ConvertInnerDTypeToEnumDType
(
paddle
::
framework
::
proto
::
VarType
::
COMPLEX128
)
==
paddle
::
framework
::
proto
::
VarType
::
COMPLEX128
)
==
paddle
::
DataType
::
COMPLEX128
);
paddle
::
DataType
::
COMPLEX128
);
CHECK
(
paddle
::
framework
::
CustomTensorUtils
::
ConvertInnerDTypeToEnumDType
(
paddle
::
framework
::
proto
::
VarType
::
FP16
)
==
paddle
::
DataType
::
FLOAT16
);
}
}
TEST
(
CustomTensor
,
copyTest
)
{
TEST
(
CustomTensor
,
copyTest
)
{
...
...
paddle/fluid/framework/custom_tensor_utils.h
浏览文件 @
878e117b
...
@@ -60,6 +60,8 @@ class CustomTensorUtils {
...
@@ -60,6 +60,8 @@ class CustomTensorUtils {
return
framework
::
proto
::
VarType
::
COMPLEX64
;
return
framework
::
proto
::
VarType
::
COMPLEX64
;
case
paddle
::
DataType
::
COMPLEX128
:
case
paddle
::
DataType
::
COMPLEX128
:
return
framework
::
proto
::
VarType
::
COMPLEX128
;
return
framework
::
proto
::
VarType
::
COMPLEX128
;
case
paddle
::
DataType
::
FLOAT16
:
return
framework
::
proto
::
VarType
::
FP16
;
case
paddle
::
DataType
::
BOOL
:
case
paddle
::
DataType
::
BOOL
:
return
framework
::
proto
::
VarType
::
BOOL
;
return
framework
::
proto
::
VarType
::
BOOL
;
default:
default:
...
@@ -91,6 +93,8 @@ class CustomTensorUtils {
...
@@ -91,6 +93,8 @@ class CustomTensorUtils {
return
paddle
::
DataType
::
COMPLEX64
;
return
paddle
::
DataType
::
COMPLEX64
;
case
framework
::
proto
::
VarType
::
COMPLEX128
:
case
framework
::
proto
::
VarType
::
COMPLEX128
:
return
paddle
::
DataType
::
COMPLEX128
;
return
paddle
::
DataType
::
COMPLEX128
;
case
framework
::
proto
::
VarType
::
FP16
:
return
paddle
::
DataType
::
FLOAT16
;
case
framework
::
proto
::
VarType
::
BOOL
:
case
framework
::
proto
::
VarType
::
BOOL
:
return
paddle
::
DataType
::
BOOL
;
return
paddle
::
DataType
::
BOOL
;
default:
default:
...
...
python/paddle/fluid/tests/custom_op/CMakeLists.txt
浏览文件 @
878e117b
...
@@ -13,24 +13,15 @@ endif()
...
@@ -13,24 +13,15 @@ endif()
py_test
(
test_sysconfig SRCS test_sysconfig.py
)
py_test
(
test_sysconfig SRCS test_sysconfig.py
)
#
'test_dispatch'
compile .cc file
#
CPU custom op tests: only
compile .cc file
py_test
(
test_dispatch_jit SRCS test_dispatch_jit.py
)
py_test
(
test_dispatch_jit SRCS test_dispatch_jit.py
)
set_tests_properties
(
test_dispatch_jit PROPERTIES TIMEOUT 120
)
py_test
(
test_multi_out_jit SRCS test_multi_out_jit.py
)
py_test
(
test_multi_out_jit SRCS test_multi_out_jit.py
)
set_tests_properties
(
test_multi_out_jit PROPERTIES TIMEOUT 120
)
py_test
(
test_custom_attrs_jit SRCS test_custom_attrs_jit.py
)
py_test
(
test_custom_attrs_jit SRCS test_custom_attrs_jit.py
)
set_tests_properties
(
test_custom_attrs_jit PROPERTIES TIMEOUT 120
)
py_test
(
test_custom_concat SRCS test_custom_concat.py
)
py_test
(
test_custom_concat SRCS test_custom_concat.py
)
set_tests_properties
(
test_custom_concat PROPERTIES TIMEOUT 120
)
py_test
(
test_custom_conj SRCS test_custom_conj.py
)
py_test
(
test_custom_conj SRCS test_custom_conj.py
)
set_tests_properties
(
test_custom_conj PROPERTIES TIMEOUT 120
)
# other tests
py_test
(
test_check_abi SRCS test_check_abi.py
)
py_test
(
test_check_abi SRCS test_check_abi.py
)
cc_test
(
test_check_error SRCS test_check_error.cc DEPS gtest
)
cc_test
(
test_check_error SRCS test_check_error.cc DEPS gtest
)
if
(
NOT LINUX
)
if
(
NOT LINUX
)
...
...
python/paddle/fluid/tests/custom_op/custom_relu_op.cu
浏览文件 @
878e117b
...
@@ -20,7 +20,7 @@ __global__ void relu_cuda_forward_kernel(const data_t* x,
...
@@ -20,7 +20,7 @@ __global__ void relu_cuda_forward_kernel(const data_t* x,
const
int
num
)
{
const
int
num
)
{
int
gid
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
gid
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
for
(
int
i
=
gid
;
i
<
num
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
int
i
=
gid
;
i
<
num
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
y
[
i
]
=
max
(
x
[
i
],
static_cast
<
data_t
>
(
0.
)
);
y
[
i
]
=
x
[
i
]
>
static_cast
<
data_t
>
(
0.
)
?
x
[
i
]
:
static_cast
<
data_t
>
(
0.
);
}
}
}
}
...
@@ -31,7 +31,8 @@ __global__ void relu_cuda_backward_kernel(const data_t* dy,
...
@@ -31,7 +31,8 @@ __global__ void relu_cuda_backward_kernel(const data_t* dy,
const
int
num
)
{
const
int
num
)
{
int
gid
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
gid
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
for
(
int
i
=
gid
;
i
<
num
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
int
i
=
gid
;
i
<
num
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
dx
[
i
]
=
dy
[
i
]
*
(
y
[
i
]
>
0
?
1.
:
0.
);
dx
[
i
]
=
dy
[
i
]
*
(
y
[
i
]
>
static_cast
<
data_t
>
(
0.
)
?
static_cast
<
data_t
>
(
1.
)
:
static_cast
<
data_t
>
(
0.
));
}
}
}
}
...
@@ -42,7 +43,7 @@ std::vector<paddle::Tensor> relu_cuda_forward(const paddle::Tensor& x) {
...
@@ -42,7 +43,7 @@ std::vector<paddle::Tensor> relu_cuda_forward(const paddle::Tensor& x) {
int
numel
=
x
.
size
();
int
numel
=
x
.
size
();
int
block
=
512
;
int
block
=
512
;
int
grid
=
(
numel
+
block
-
1
)
/
block
;
int
grid
=
(
numel
+
block
-
1
)
/
block
;
PD_DISPATCH_FLOATING_TYPES
(
PD_DISPATCH_FLOATING_
AND_HALF_
TYPES
(
x
.
type
(),
"relu_cuda_forward_kernel"
,
([
&
]
{
x
.
type
(),
"relu_cuda_forward_kernel"
,
([
&
]
{
relu_cuda_forward_kernel
<
data_t
><<<
grid
,
block
,
0
,
x
.
stream
()
>>>
(
relu_cuda_forward_kernel
<
data_t
><<<
grid
,
block
,
0
,
x
.
stream
()
>>>
(
x
.
data
<
data_t
>
(),
out
.
mutable_data
<
data_t
>
(
x
.
place
()),
numel
);
x
.
data
<
data_t
>
(),
out
.
mutable_data
<
data_t
>
(
x
.
place
()),
numel
);
...
@@ -60,7 +61,7 @@ std::vector<paddle::Tensor> relu_cuda_backward(const paddle::Tensor& x,
...
@@ -60,7 +61,7 @@ std::vector<paddle::Tensor> relu_cuda_backward(const paddle::Tensor& x,
int
numel
=
out
.
size
();
int
numel
=
out
.
size
();
int
block
=
512
;
int
block
=
512
;
int
grid
=
(
numel
+
block
-
1
)
/
block
;
int
grid
=
(
numel
+
block
-
1
)
/
block
;
PD_DISPATCH_FLOATING_TYPES
(
PD_DISPATCH_FLOATING_
AND_HALF_
TYPES
(
out
.
type
(),
"relu_cuda_backward_kernel"
,
([
&
]
{
out
.
type
(),
"relu_cuda_backward_kernel"
,
([
&
]
{
relu_cuda_backward_kernel
<
data_t
><<<
grid
,
block
,
0
,
x
.
stream
()
>>>
(
relu_cuda_backward_kernel
<
data_t
><<<
grid
,
block
,
0
,
x
.
stream
()
>>>
(
grad_out
.
data
<
data_t
>
(),
grad_out
.
data
<
data_t
>
(),
...
...
python/paddle/fluid/tests/custom_op/dispatch_test_op.cc
浏览文件 @
878e117b
...
@@ -118,3 +118,21 @@ PD_BUILD_OP(dispatch_test_float_and_integer_and_complex)
...
@@ -118,3 +118,21 @@ PD_BUILD_OP(dispatch_test_float_and_integer_and_complex)
.
Inputs
({
"X"
})
.
Inputs
({
"X"
})
.
Outputs
({
"Out"
})
.
Outputs
({
"Out"
})
.
SetKernelFn
(
PD_KERNEL
(
DispatchTestFloatAndIntegerAndComplex
));
.
SetKernelFn
(
PD_KERNEL
(
DispatchTestFloatAndIntegerAndComplex
));
std
::
vector
<
paddle
::
Tensor
>
DispatchTestFloatAndHalf
(
const
paddle
::
Tensor
&
x
)
{
auto
out
=
paddle
::
Tensor
(
paddle
::
PlaceType
::
kCPU
);
out
.
reshape
(
x
.
shape
());
PD_DISPATCH_FLOATING_AND_HALF_TYPES
(
x
.
type
(),
"assign_cpu_kernel"
,
([
&
]
{
assign_cpu_kernel
<
data_t
>
(
x
.
data
<
data_t
>
(),
out
.
mutable_data
<
data_t
>
(),
x
.
size
());
}));
return
{
out
};
}
PD_BUILD_OP
(
dispatch_test_float_and_half
)
.
Inputs
({
"X"
})
.
Outputs
({
"Out"
})
.
SetKernelFn
(
PD_KERNEL
(
DispatchTestFloatAndHalf
));
python/paddle/fluid/tests/custom_op/test_custom_relu_op_jit.py
浏览文件 @
878e117b
...
@@ -50,11 +50,17 @@ class TestJITLoad(unittest.TestCase):
...
@@ -50,11 +50,17 @@ class TestJITLoad(unittest.TestCase):
custom_module
.
custom_relu
,
custom_module
.
custom_relu_dup
custom_module
.
custom_relu
,
custom_module
.
custom_relu_dup
]
]
self
.
dtypes
=
[
'float32'
,
'float64'
]
self
.
dtypes
=
[
'float32'
,
'float64'
]
self
.
devices
=
[
'cpu'
,
'gpu'
]
if
paddle
.
is_compiled_with_cuda
():
self
.
dtypes
.
append
(
'float16'
)
self
.
devices
=
[
'cpu'
]
if
paddle
.
is_compiled_with_cuda
():
self
.
devices
.
append
(
'gpu'
)
def
test_static
(
self
):
def
test_static
(
self
):
for
device
in
self
.
devices
:
for
device
in
self
.
devices
:
for
dtype
in
self
.
dtypes
:
for
dtype
in
self
.
dtypes
:
if
device
==
'cpu'
and
dtype
==
'float16'
:
continue
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
4
,
8
]).
astype
(
dtype
)
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
4
,
8
]).
astype
(
dtype
)
for
custom_op
in
self
.
custom_ops
:
for
custom_op
in
self
.
custom_ops
:
out
=
custom_relu_static
(
custom_op
,
device
,
dtype
,
x
)
out
=
custom_relu_static
(
custom_op
,
device
,
dtype
,
x
)
...
@@ -68,6 +74,8 @@ class TestJITLoad(unittest.TestCase):
...
@@ -68,6 +74,8 @@ class TestJITLoad(unittest.TestCase):
def
test_dynamic
(
self
):
def
test_dynamic
(
self
):
for
device
in
self
.
devices
:
for
device
in
self
.
devices
:
for
dtype
in
self
.
dtypes
:
for
dtype
in
self
.
dtypes
:
if
device
==
'cpu'
and
dtype
==
'float16'
:
continue
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
4
,
8
]).
astype
(
dtype
)
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
4
,
8
]).
astype
(
dtype
)
for
custom_op
in
self
.
custom_ops
:
for
custom_op
in
self
.
custom_ops
:
out
,
x_grad
=
custom_relu_dynamic
(
custom_op
,
device
,
dtype
,
out
,
x_grad
=
custom_relu_dynamic
(
custom_op
,
device
,
dtype
,
...
@@ -87,7 +95,7 @@ class TestJITLoad(unittest.TestCase):
...
@@ -87,7 +95,7 @@ class TestJITLoad(unittest.TestCase):
caught_exception
=
False
caught_exception
=
False
try
:
try
:
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
4
,
8
]).
astype
(
'int32'
)
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
4
,
8
]).
astype
(
'int32'
)
custom_relu_dynamic
(
custom_module
.
custom_relu
,
'cpu'
,
'
floa
t32'
,
x
)
custom_relu_dynamic
(
custom_module
.
custom_relu
,
'cpu'
,
'
in
t32'
,
x
)
except
OSError
as
e
:
except
OSError
as
e
:
caught_exception
=
True
caught_exception
=
True
self
.
assertTrue
(
self
.
assertTrue
(
...
@@ -105,15 +113,15 @@ class TestJITLoad(unittest.TestCase):
...
@@ -105,15 +113,15 @@ class TestJITLoad(unittest.TestCase):
caught_exception
=
False
caught_exception
=
False
try
:
try
:
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
4
,
8
]).
astype
(
'int
64
'
)
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
4
,
8
]).
astype
(
'int
32
'
)
custom_relu_dynamic
(
custom_module
.
custom_relu
,
'gpu'
,
'
floa
t32'
,
x
)
custom_relu_dynamic
(
custom_module
.
custom_relu
,
'gpu'
,
'
in
t32'
,
x
)
except
OSError
as
e
:
except
OSError
as
e
:
caught_exception
=
True
caught_exception
=
True
self
.
assertTrue
(
self
.
assertTrue
(
"function
\"
relu_cuda_forward_kernel
\"
is not implemented for data type `int
64
_t`"
"function
\"
relu_cuda_forward_kernel
\"
is not implemented for data type `int
32
_t`"
in
str
(
e
))
in
str
(
e
))
self
.
assertTrue
(
self
.
assertTrue
(
"python/paddle/fluid/tests/custom_op/custom_relu_op.cu:
49
"
in
"python/paddle/fluid/tests/custom_op/custom_relu_op.cu:
50
"
in
str
(
e
))
str
(
e
))
self
.
assertTrue
(
caught_exception
)
self
.
assertTrue
(
caught_exception
)
...
...
python/paddle/fluid/tests/custom_op/test_custom_relu_op_setup.py
浏览文件 @
878e117b
...
@@ -26,7 +26,7 @@ from paddle.utils.cpp_extension.extension_utils import run_cmd
...
@@ -26,7 +26,7 @@ from paddle.utils.cpp_extension.extension_utils import run_cmd
def
custom_relu_dynamic
(
func
,
device
,
dtype
,
np_x
,
use_func
=
True
):
def
custom_relu_dynamic
(
func
,
device
,
dtype
,
np_x
,
use_func
=
True
):
paddle
.
set_device
(
device
)
paddle
.
set_device
(
device
)
t
=
paddle
.
to_tensor
(
np_x
)
t
=
paddle
.
to_tensor
(
np_x
,
dtype
=
dtype
)
t
.
stop_gradient
=
False
t
.
stop_gradient
=
False
out
=
func
(
t
)
if
use_func
else
paddle
.
nn
.
functional
.
relu
(
t
)
out
=
func
(
t
)
if
use_func
else
paddle
.
nn
.
functional
.
relu
(
t
)
...
@@ -171,7 +171,11 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
...
@@ -171,7 +171,11 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
]
]
self
.
dtypes
=
[
'float32'
,
'float64'
]
self
.
dtypes
=
[
'float32'
,
'float64'
]
self
.
devices
=
[
'cpu'
,
'gpu'
]
if
paddle
.
is_compiled_with_cuda
():
self
.
dtypes
.
append
(
'float16'
)
self
.
devices
=
[
'cpu'
]
if
paddle
.
is_compiled_with_cuda
():
self
.
devices
.
append
(
'gpu'
)
# config seed
# config seed
SEED
=
2021
SEED
=
2021
...
@@ -181,6 +185,8 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
...
@@ -181,6 +185,8 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
def
test_static
(
self
):
def
test_static
(
self
):
for
device
in
self
.
devices
:
for
device
in
self
.
devices
:
for
dtype
in
self
.
dtypes
:
for
dtype
in
self
.
dtypes
:
if
device
==
'cpu'
and
dtype
==
'float16'
:
continue
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
4
,
8
]).
astype
(
dtype
)
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
4
,
8
]).
astype
(
dtype
)
for
custom_op
in
self
.
custom_ops
:
for
custom_op
in
self
.
custom_ops
:
out
=
custom_relu_static
(
custom_op
,
device
,
dtype
,
x
)
out
=
custom_relu_static
(
custom_op
,
device
,
dtype
,
x
)
...
@@ -194,6 +200,8 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
...
@@ -194,6 +200,8 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
def
test_static_pe
(
self
):
def
test_static_pe
(
self
):
for
device
in
self
.
devices
:
for
device
in
self
.
devices
:
for
dtype
in
self
.
dtypes
:
for
dtype
in
self
.
dtypes
:
if
device
==
'cpu'
and
dtype
==
'float16'
:
continue
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
4
,
8
]).
astype
(
dtype
)
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
4
,
8
]).
astype
(
dtype
)
for
custom_op
in
self
.
custom_ops
:
for
custom_op
in
self
.
custom_ops
:
out
=
custom_relu_static_pe
(
custom_op
,
device
,
dtype
,
x
)
out
=
custom_relu_static_pe
(
custom_op
,
device
,
dtype
,
x
)
...
@@ -207,6 +215,8 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
...
@@ -207,6 +215,8 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
def
test_dynamic
(
self
):
def
test_dynamic
(
self
):
for
device
in
self
.
devices
:
for
device
in
self
.
devices
:
for
dtype
in
self
.
dtypes
:
for
dtype
in
self
.
dtypes
:
if
device
==
'cpu'
and
dtype
==
'float16'
:
continue
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
4
,
8
]).
astype
(
dtype
)
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
4
,
8
]).
astype
(
dtype
)
for
custom_op
in
self
.
custom_ops
:
for
custom_op
in
self
.
custom_ops
:
out
,
x_grad
=
custom_relu_dynamic
(
custom_op
,
device
,
dtype
,
out
,
x_grad
=
custom_relu_dynamic
(
custom_op
,
device
,
dtype
,
...
...
python/paddle/fluid/tests/custom_op/test_dispatch_jit.py
浏览文件 @
878e117b
...
@@ -83,6 +83,12 @@ class TestJitDispatch(unittest.TestCase):
...
@@ -83,6 +83,12 @@ class TestJitDispatch(unittest.TestCase):
self
.
run_dispatch_test
(
self
.
run_dispatch_test
(
dispatch_op
.
dispatch_test_float_and_integer_and_complex
,
dtype
)
dispatch_op
.
dispatch_test_float_and_integer_and_complex
,
dtype
)
def
test_dispatch_float_and_half
(
self
):
dtypes
=
[
"float32"
,
"float64"
,
"float16"
]
for
dtype
in
dtypes
:
self
.
run_dispatch_test
(
dispatch_op
.
dispatch_test_float_and_half
,
dtype
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
unittest
.
main
()
unittest
.
main
()
python/setup.py.in
浏览文件 @
878e117b
...
@@ -453,15 +453,12 @@ class InstallHeaders(Command):
...
@@ -453,15 +453,12 @@ class InstallHeaders(Command):
def copy_data_type_headers(self, header):
def copy_data_type_headers(self, header):
if os.name == 'nt':
if os.name == 'nt':
data_type_headers = ['platform\\complex64.h', 'platform\\complex128.h']
data_type_headers = ['platform\\complex64.h', 'platform\\complex128.h'
, 'platform\\float16.h'
]
else:
else:
data_type_headers = ['platform/complex64.h', 'platform/complex128.h']
data_type_headers = ['platform/complex64.h', 'platform/complex128.h'
, 'platform/float16.h'
]
for dtype_header in data_type_headers:
for dtype_header in data_type_headers:
if dtype_header in header:
if dtype_header in header:
if os.name == 'nt':
install_dir = os.path.join(self.install_dir, "paddle/fluid/extension/include")
install_dir = os.path.join(self.install_dir, "paddle\\fluid\\extension\\include")
else:
install_dir = os.path.join(self.install_dir, "paddle/fluid/extension/include")
if not os.path.exists(install_dir):
if not os.path.exists(install_dir):
self.mkpath(install_dir)
self.mkpath(install_dir)
return self.copy_file(header, install_dir)
return self.copy_file(header, install_dir)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录