Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
878e117b
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
878e117b
编写于
3月 19, 2021
作者:
C
Chen Weihang
提交者:
GitHub
3月 19, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[CustomOp] Support float16 in custom op (#31725)
* support float16 in custom op * fix failed unittests
上级
c9e1d9dc
变更
13
隐藏空白更改
内联
并排
Showing
13 changed file
with
116 addition
and
29 deletion
+116
-29
cmake/inference_lib.cmake
cmake/inference_lib.cmake
+3
-0
paddle/fluid/extension/include/ext_dispatch.h
paddle/fluid/extension/include/ext_dispatch.h
+16
-0
paddle/fluid/extension/include/ext_dtype.h
paddle/fluid/extension/include/ext_dtype.h
+6
-0
paddle/fluid/extension/src/ext_tensor.cc
paddle/fluid/extension/src/ext_tensor.cc
+16
-0
paddle/fluid/framework/custom_tensor_test.cc
paddle/fluid/framework/custom_tensor_test.cc
+11
-0
paddle/fluid/framework/custom_tensor_utils.h
paddle/fluid/framework/custom_tensor_utils.h
+4
-0
python/paddle/fluid/tests/custom_op/CMakeLists.txt
python/paddle/fluid/tests/custom_op/CMakeLists.txt
+2
-11
python/paddle/fluid/tests/custom_op/custom_relu_op.cu
python/paddle/fluid/tests/custom_op/custom_relu_op.cu
+5
-4
python/paddle/fluid/tests/custom_op/dispatch_test_op.cc
python/paddle/fluid/tests/custom_op/dispatch_test_op.cc
+18
-0
python/paddle/fluid/tests/custom_op/test_custom_relu_op_jit.py
...n/paddle/fluid/tests/custom_op/test_custom_relu_op_jit.py
+14
-6
python/paddle/fluid/tests/custom_op/test_custom_relu_op_setup.py
...paddle/fluid/tests/custom_op/test_custom_relu_op_setup.py
+12
-2
python/paddle/fluid/tests/custom_op/test_dispatch_jit.py
python/paddle/fluid/tests/custom_op/test_dispatch_jit.py
+6
-0
python/setup.py.in
python/setup.py.in
+3
-6
未找到文件。
cmake/inference_lib.cmake
浏览文件 @
878e117b
...
@@ -198,6 +198,9 @@ copy(inference_lib_dist
...
@@ -198,6 +198,9 @@ copy(inference_lib_dist
copy
(
inference_lib_dist
copy
(
inference_lib_dist
SRCS
${
PADDLE_SOURCE_DIR
}
/paddle/fluid/platform/complex128.h
SRCS
${
PADDLE_SOURCE_DIR
}
/paddle/fluid/platform/complex128.h
DSTS
${
PADDLE_INFERENCE_INSTALL_DIR
}
/paddle/include/experimental/
)
DSTS
${
PADDLE_INFERENCE_INSTALL_DIR
}
/paddle/include/experimental/
)
copy
(
inference_lib_dist
SRCS
${
PADDLE_SOURCE_DIR
}
/paddle/fluid/platform/float16.h
DSTS
${
PADDLE_INFERENCE_INSTALL_DIR
}
/paddle/include/experimental/
)
# CAPI inference library for only inference
# CAPI inference library for only inference
set
(
PADDLE_INFERENCE_C_INSTALL_DIR
"
${
CMAKE_BINARY_DIR
}
/paddle_inference_c_install_dir"
CACHE STRING
set
(
PADDLE_INFERENCE_C_INSTALL_DIR
"
${
CMAKE_BINARY_DIR
}
/paddle_inference_c_install_dir"
CACHE STRING
...
...
paddle/fluid/extension/include/ext_dispatch.h
浏览文件 @
878e117b
...
@@ -47,6 +47,22 @@ namespace paddle {
...
@@ -47,6 +47,22 @@ namespace paddle {
} \
} \
}()
}()
#define PD_DISPATCH_FLOATING_AND_HALF_TYPES(TYPE, NAME, ...) \
[&] { \
const auto& __dtype__ = TYPE; \
switch (__dtype__) { \
PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::FLOAT32, float, \
__VA_ARGS__) \
PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::FLOAT64, double, \
__VA_ARGS__) \
PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::FLOAT16, paddle::float16, \
__VA_ARGS__) \
default: \
PD_THROW("function " #NAME " is not implemented for data type `", \
::paddle::ToString(__dtype__), "`"); \
} \
}()
///////// Integral Dispatch Marco ///////////
///////// Integral Dispatch Marco ///////////
#define PD_DISPATCH_INTEGRAL_TYPES(TYPE, NAME, ...) \
#define PD_DISPATCH_INTEGRAL_TYPES(TYPE, NAME, ...) \
...
...
paddle/fluid/extension/include/ext_dtype.h
浏览文件 @
878e117b
...
@@ -19,11 +19,13 @@ limitations under the License. */
...
@@ -19,11 +19,13 @@ limitations under the License. */
#include "complex128.h" // NOLINT
#include "complex128.h" // NOLINT
#include "complex64.h" // NOLINT
#include "complex64.h" // NOLINT
#include "ext_exception.h" // NOLINT
#include "ext_exception.h" // NOLINT
#include "float16.h" // NOLINT
namespace
paddle
{
namespace
paddle
{
using
complex64
=
paddle
::
platform
::
complex64
;
using
complex64
=
paddle
::
platform
::
complex64
;
using
complex128
=
paddle
::
platform
::
complex128
;
using
complex128
=
paddle
::
platform
::
complex128
;
using
float16
=
paddle
::
platform
::
float16
;
enum
class
DataType
{
enum
class
DataType
{
BOOL
,
BOOL
,
...
@@ -32,6 +34,7 @@ enum class DataType {
...
@@ -32,6 +34,7 @@ enum class DataType {
INT16
,
INT16
,
INT32
,
INT32
,
INT64
,
INT64
,
FLOAT16
,
FLOAT32
,
FLOAT32
,
FLOAT64
,
FLOAT64
,
COMPLEX64
,
COMPLEX64
,
...
@@ -53,6 +56,8 @@ inline std::string ToString(DataType dtype) {
...
@@ -53,6 +56,8 @@ inline std::string ToString(DataType dtype) {
return
"int32_t"
;
return
"int32_t"
;
case
DataType
::
INT64
:
case
DataType
::
INT64
:
return
"int64_t"
;
return
"int64_t"
;
case
DataType
::
FLOAT16
:
return
"float16"
;
case
DataType
::
FLOAT32
:
case
DataType
::
FLOAT32
:
return
"float"
;
return
"float"
;
case
DataType
::
FLOAT64
:
case
DataType
::
FLOAT64
:
...
@@ -73,6 +78,7 @@ inline std::string ToString(DataType dtype) {
...
@@ -73,6 +78,7 @@ inline std::string ToString(DataType dtype) {
_(int16_t, DataType::INT16) \
_(int16_t, DataType::INT16) \
_(int, DataType::INT32) \
_(int, DataType::INT32) \
_(int64_t, DataType::INT64) \
_(int64_t, DataType::INT64) \
_(float16, DataType::FLOAT16) \
_(float, DataType::FLOAT32) \
_(float, DataType::FLOAT32) \
_(double, DataType::FLOAT64) \
_(double, DataType::FLOAT64) \
_(complex64, DataType::COMPLEX64) \
_(complex64, DataType::COMPLEX64) \
...
...
paddle/fluid/extension/src/ext_tensor.cc
浏览文件 @
878e117b
...
@@ -22,6 +22,7 @@ limitations under the License. */
...
@@ -22,6 +22,7 @@ limitations under the License. */
#include "paddle/fluid/platform/complex128.h"
#include "paddle/fluid/platform/complex128.h"
#include "paddle/fluid/platform/complex64.h"
#include "paddle/fluid/platform/complex64.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/float16.h"
#include "paddle/fluid/platform/transform.h"
#include "paddle/fluid/platform/transform.h"
namespace
paddle
{
namespace
paddle
{
...
@@ -170,6 +171,8 @@ DataType Tensor::type() const {
...
@@ -170,6 +171,8 @@ DataType Tensor::type() const {
return
DataType
::
COMPLEX64
;
return
DataType
::
COMPLEX64
;
}
else
if
(
type
==
framework
::
proto
::
VarType
::
COMPLEX128
)
{
}
else
if
(
type
==
framework
::
proto
::
VarType
::
COMPLEX128
)
{
return
DataType
::
COMPLEX128
;
return
DataType
::
COMPLEX128
;
}
else
if
(
type
==
framework
::
proto
::
VarType
::
FP16
)
{
return
DataType
::
FLOAT16
;
}
}
// TODO(JiabinYang) Support more dtype here
// TODO(JiabinYang) Support more dtype here
return
DataType
::
FLOAT32
;
return
DataType
::
FLOAT32
;
...
@@ -229,6 +232,8 @@ template PD_DLL_DECL Tensor Tensor::copy_to<paddle::platform::complex64>(
...
@@ -229,6 +232,8 @@ template PD_DLL_DECL Tensor Tensor::copy_to<paddle::platform::complex64>(
const
PlaceType
&
target_place
)
const
;
const
PlaceType
&
target_place
)
const
;
template
PD_DLL_DECL
Tensor
Tensor
::
copy_to
<
paddle
::
platform
::
complex128
>(
template
PD_DLL_DECL
Tensor
Tensor
::
copy_to
<
paddle
::
platform
::
complex128
>(
const
PlaceType
&
target_place
)
const
;
const
PlaceType
&
target_place
)
const
;
template
PD_DLL_DECL
Tensor
Tensor
::
copy_to
<
paddle
::
platform
::
float16
>(
const
PlaceType
&
target_place
)
const
;
template
PD_DLL_DECL
float
*
Tensor
::
data
<
float
>()
const
;
template
PD_DLL_DECL
float
*
Tensor
::
data
<
float
>()
const
;
template
PD_DLL_DECL
double
*
Tensor
::
data
<
double
>()
const
;
template
PD_DLL_DECL
double
*
Tensor
::
data
<
double
>()
const
;
...
@@ -242,6 +247,8 @@ template PD_DLL_DECL paddle::platform::complex64 *
...
@@ -242,6 +247,8 @@ template PD_DLL_DECL paddle::platform::complex64 *
Tensor
::
data
<
paddle
::
platform
::
complex64
>()
const
;
Tensor
::
data
<
paddle
::
platform
::
complex64
>()
const
;
template
PD_DLL_DECL
paddle
::
platform
::
complex128
*
template
PD_DLL_DECL
paddle
::
platform
::
complex128
*
Tensor
::
data
<
paddle
::
platform
::
complex128
>()
const
;
Tensor
::
data
<
paddle
::
platform
::
complex128
>()
const
;
template
PD_DLL_DECL
paddle
::
platform
::
float16
*
Tensor
::
data
<
paddle
::
platform
::
float16
>()
const
;
template
PD_DLL_DECL
float
*
Tensor
::
mutable_data
<
float
>();
template
PD_DLL_DECL
float
*
Tensor
::
mutable_data
<
float
>();
template
PD_DLL_DECL
double
*
Tensor
::
mutable_data
<
double
>();
template
PD_DLL_DECL
double
*
Tensor
::
mutable_data
<
double
>();
...
@@ -255,6 +262,8 @@ template PD_DLL_DECL paddle::platform::complex64 *
...
@@ -255,6 +262,8 @@ template PD_DLL_DECL paddle::platform::complex64 *
Tensor
::
mutable_data
<
paddle
::
platform
::
complex64
>();
Tensor
::
mutable_data
<
paddle
::
platform
::
complex64
>();
template
PD_DLL_DECL
paddle
::
platform
::
complex128
*
template
PD_DLL_DECL
paddle
::
platform
::
complex128
*
Tensor
::
mutable_data
<
paddle
::
platform
::
complex128
>();
Tensor
::
mutable_data
<
paddle
::
platform
::
complex128
>();
template
PD_DLL_DECL
paddle
::
platform
::
float16
*
Tensor
::
mutable_data
<
paddle
::
platform
::
float16
>();
template
PD_DLL_DECL
float
*
Tensor
::
mutable_data
<
float
>(
const
PlaceType
&
place
);
template
PD_DLL_DECL
float
*
Tensor
::
mutable_data
<
float
>(
const
PlaceType
&
place
);
template
PD_DLL_DECL
double
*
Tensor
::
mutable_data
<
double
>(
template
PD_DLL_DECL
double
*
Tensor
::
mutable_data
<
double
>(
...
@@ -274,6 +283,8 @@ template PD_DLL_DECL paddle::platform::complex64 *
...
@@ -274,6 +283,8 @@ template PD_DLL_DECL paddle::platform::complex64 *
Tensor
::
mutable_data
<
paddle
::
platform
::
complex64
>(
const
PlaceType
&
place
);
Tensor
::
mutable_data
<
paddle
::
platform
::
complex64
>(
const
PlaceType
&
place
);
template
PD_DLL_DECL
paddle
::
platform
::
complex128
*
template
PD_DLL_DECL
paddle
::
platform
::
complex128
*
Tensor
::
mutable_data
<
paddle
::
platform
::
complex128
>(
const
PlaceType
&
place
);
Tensor
::
mutable_data
<
paddle
::
platform
::
complex128
>(
const
PlaceType
&
place
);
template
PD_DLL_DECL
paddle
::
platform
::
float16
*
Tensor
::
mutable_data
<
paddle
::
platform
::
float16
>(
const
PlaceType
&
place
);
std
::
vector
<
int64_t
>
Tensor
::
shape
()
const
{
std
::
vector
<
int64_t
>
Tensor
::
shape
()
const
{
GET_CASTED_TENSOR
GET_CASTED_TENSOR
...
@@ -344,6 +355,11 @@ Tensor Tensor::cast(const DataType &target_type) const {
...
@@ -344,6 +355,11 @@ Tensor Tensor::cast(const DataType &target_type) const {
CastDataType
<
paddle
::
platform
::
complex128
>
(
CastDataType
<
paddle
::
platform
::
complex128
>
(
*
tensor
,
rlt_tensor_
,
ctx
));
*
tensor
,
rlt_tensor_
,
ctx
));
break
;
break
;
case
framework
::
proto
::
VarType
::
FP16
:
framework
::
VisitDataType
(
dst_type
,
CastDataType
<
paddle
::
platform
::
float16
>
(
*
tensor
,
rlt_tensor_
,
ctx
));
break
;
// TODO(JiabinYang) Support more dtype here
// TODO(JiabinYang) Support more dtype here
default:
default:
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
...
...
paddle/fluid/framework/custom_tensor_test.cc
浏览文件 @
878e117b
...
@@ -113,6 +113,8 @@ void GroupTestCopy() {
...
@@ -113,6 +113,8 @@ void GroupTestCopy() {
TestCopyTensor
<
paddle
::
complex64
>
();
TestCopyTensor
<
paddle
::
complex64
>
();
VLOG
(
2
)
<<
"complex128 cpu-cpu-gpu-gpu-cpu"
;
VLOG
(
2
)
<<
"complex128 cpu-cpu-gpu-gpu-cpu"
;
TestCopyTensor
<
paddle
::
complex128
>
();
TestCopyTensor
<
paddle
::
complex128
>
();
VLOG
(
2
)
<<
"Fp16 cpu-cpu-gpu-gpu-cpu"
;
TestCopyTensor
<
paddle
::
float16
>
();
}
}
void
GroupTestCast
()
{
void
GroupTestCast
()
{
...
@@ -134,6 +136,8 @@ void GroupTestCast() {
...
@@ -134,6 +136,8 @@ void GroupTestCast() {
TestCast
<
paddle
::
complex64
>
(
paddle
::
DataType
::
FLOAT32
);
TestCast
<
paddle
::
complex64
>
(
paddle
::
DataType
::
FLOAT32
);
VLOG
(
2
)
<<
"complex128 cast"
;
VLOG
(
2
)
<<
"complex128 cast"
;
TestCast
<
paddle
::
complex128
>
(
paddle
::
DataType
::
FLOAT32
);
TestCast
<
paddle
::
complex128
>
(
paddle
::
DataType
::
FLOAT32
);
VLOG
(
2
)
<<
"float16 cast"
;
TestCast
<
paddle
::
float16
>
(
paddle
::
DataType
::
FLOAT16
);
}
}
void
GroupTestDtype
()
{
void
GroupTestDtype
()
{
...
@@ -146,6 +150,7 @@ void GroupTestDtype() {
...
@@ -146,6 +150,7 @@ void GroupTestDtype() {
CHECK
(
TestDtype
<
uint8_t
>
()
==
paddle
::
DataType
::
UINT8
);
CHECK
(
TestDtype
<
uint8_t
>
()
==
paddle
::
DataType
::
UINT8
);
CHECK
(
TestDtype
<
paddle
::
complex64
>
()
==
paddle
::
DataType
::
COMPLEX64
);
CHECK
(
TestDtype
<
paddle
::
complex64
>
()
==
paddle
::
DataType
::
COMPLEX64
);
CHECK
(
TestDtype
<
paddle
::
complex128
>
()
==
paddle
::
DataType
::
COMPLEX128
);
CHECK
(
TestDtype
<
paddle
::
complex128
>
()
==
paddle
::
DataType
::
COMPLEX128
);
CHECK
(
TestDtype
<
paddle
::
float16
>
()
==
paddle
::
DataType
::
FLOAT16
);
}
}
void
GroupTestDtypeConvert
()
{
void
GroupTestDtypeConvert
()
{
...
@@ -178,6 +183,9 @@ void GroupTestDtypeConvert() {
...
@@ -178,6 +183,9 @@ void GroupTestDtypeConvert() {
CHECK
(
paddle
::
framework
::
CustomTensorUtils
::
ConvertEnumDTypeToInnerDType
(
CHECK
(
paddle
::
framework
::
CustomTensorUtils
::
ConvertEnumDTypeToInnerDType
(
paddle
::
DataType
::
COMPLEX128
)
==
paddle
::
DataType
::
COMPLEX128
)
==
paddle
::
framework
::
proto
::
VarType
::
COMPLEX128
);
paddle
::
framework
::
proto
::
VarType
::
COMPLEX128
);
CHECK
(
paddle
::
framework
::
CustomTensorUtils
::
ConvertEnumDTypeToInnerDType
(
paddle
::
DataType
::
FLOAT16
)
==
paddle
::
framework
::
proto
::
VarType
::
FP16
);
// proto -> enum
// proto -> enum
CHECK
(
paddle
::
framework
::
CustomTensorUtils
::
ConvertInnerDTypeToEnumDType
(
CHECK
(
paddle
::
framework
::
CustomTensorUtils
::
ConvertInnerDTypeToEnumDType
(
paddle
::
framework
::
proto
::
VarType
::
FP64
)
==
paddle
::
framework
::
proto
::
VarType
::
FP64
)
==
...
@@ -207,6 +215,9 @@ void GroupTestDtypeConvert() {
...
@@ -207,6 +215,9 @@ void GroupTestDtypeConvert() {
CHECK
(
paddle
::
framework
::
CustomTensorUtils
::
ConvertInnerDTypeToEnumDType
(
CHECK
(
paddle
::
framework
::
CustomTensorUtils
::
ConvertInnerDTypeToEnumDType
(
paddle
::
framework
::
proto
::
VarType
::
COMPLEX128
)
==
paddle
::
framework
::
proto
::
VarType
::
COMPLEX128
)
==
paddle
::
DataType
::
COMPLEX128
);
paddle
::
DataType
::
COMPLEX128
);
CHECK
(
paddle
::
framework
::
CustomTensorUtils
::
ConvertInnerDTypeToEnumDType
(
paddle
::
framework
::
proto
::
VarType
::
FP16
)
==
paddle
::
DataType
::
FLOAT16
);
}
}
TEST
(
CustomTensor
,
copyTest
)
{
TEST
(
CustomTensor
,
copyTest
)
{
...
...
paddle/fluid/framework/custom_tensor_utils.h
浏览文件 @
878e117b
...
@@ -60,6 +60,8 @@ class CustomTensorUtils {
...
@@ -60,6 +60,8 @@ class CustomTensorUtils {
return
framework
::
proto
::
VarType
::
COMPLEX64
;
return
framework
::
proto
::
VarType
::
COMPLEX64
;
case
paddle
::
DataType
::
COMPLEX128
:
case
paddle
::
DataType
::
COMPLEX128
:
return
framework
::
proto
::
VarType
::
COMPLEX128
;
return
framework
::
proto
::
VarType
::
COMPLEX128
;
case
paddle
::
DataType
::
FLOAT16
:
return
framework
::
proto
::
VarType
::
FP16
;
case
paddle
::
DataType
::
BOOL
:
case
paddle
::
DataType
::
BOOL
:
return
framework
::
proto
::
VarType
::
BOOL
;
return
framework
::
proto
::
VarType
::
BOOL
;
default:
default:
...
@@ -91,6 +93,8 @@ class CustomTensorUtils {
...
@@ -91,6 +93,8 @@ class CustomTensorUtils {
return
paddle
::
DataType
::
COMPLEX64
;
return
paddle
::
DataType
::
COMPLEX64
;
case
framework
::
proto
::
VarType
::
COMPLEX128
:
case
framework
::
proto
::
VarType
::
COMPLEX128
:
return
paddle
::
DataType
::
COMPLEX128
;
return
paddle
::
DataType
::
COMPLEX128
;
case
framework
::
proto
::
VarType
::
FP16
:
return
paddle
::
DataType
::
FLOAT16
;
case
framework
::
proto
::
VarType
::
BOOL
:
case
framework
::
proto
::
VarType
::
BOOL
:
return
paddle
::
DataType
::
BOOL
;
return
paddle
::
DataType
::
BOOL
;
default:
default:
...
...
python/paddle/fluid/tests/custom_op/CMakeLists.txt
浏览文件 @
878e117b
...
@@ -13,24 +13,15 @@ endif()
...
@@ -13,24 +13,15 @@ endif()
py_test
(
test_sysconfig SRCS test_sysconfig.py
)
py_test
(
test_sysconfig SRCS test_sysconfig.py
)
#
'test_dispatch'
compile .cc file
#
CPU custom op tests: only
compile .cc file
py_test
(
test_dispatch_jit SRCS test_dispatch_jit.py
)
py_test
(
test_dispatch_jit SRCS test_dispatch_jit.py
)
set_tests_properties
(
test_dispatch_jit PROPERTIES TIMEOUT 120
)
py_test
(
test_multi_out_jit SRCS test_multi_out_jit.py
)
py_test
(
test_multi_out_jit SRCS test_multi_out_jit.py
)
set_tests_properties
(
test_multi_out_jit PROPERTIES TIMEOUT 120
)
py_test
(
test_custom_attrs_jit SRCS test_custom_attrs_jit.py
)
py_test
(
test_custom_attrs_jit SRCS test_custom_attrs_jit.py
)
set_tests_properties
(
test_custom_attrs_jit PROPERTIES TIMEOUT 120
)
py_test
(
test_custom_concat SRCS test_custom_concat.py
)
py_test
(
test_custom_concat SRCS test_custom_concat.py
)
set_tests_properties
(
test_custom_concat PROPERTIES TIMEOUT 120
)
py_test
(
test_custom_conj SRCS test_custom_conj.py
)
py_test
(
test_custom_conj SRCS test_custom_conj.py
)
set_tests_properties
(
test_custom_conj PROPERTIES TIMEOUT 120
)
# other tests
py_test
(
test_check_abi SRCS test_check_abi.py
)
py_test
(
test_check_abi SRCS test_check_abi.py
)
cc_test
(
test_check_error SRCS test_check_error.cc DEPS gtest
)
cc_test
(
test_check_error SRCS test_check_error.cc DEPS gtest
)
if
(
NOT LINUX
)
if
(
NOT LINUX
)
...
...
python/paddle/fluid/tests/custom_op/custom_relu_op.cu
浏览文件 @
878e117b
...
@@ -20,7 +20,7 @@ __global__ void relu_cuda_forward_kernel(const data_t* x,
...
@@ -20,7 +20,7 @@ __global__ void relu_cuda_forward_kernel(const data_t* x,
const
int
num
)
{
const
int
num
)
{
int
gid
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
gid
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
for
(
int
i
=
gid
;
i
<
num
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
int
i
=
gid
;
i
<
num
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
y
[
i
]
=
max
(
x
[
i
],
static_cast
<
data_t
>
(
0.
)
);
y
[
i
]
=
x
[
i
]
>
static_cast
<
data_t
>
(
0.
)
?
x
[
i
]
:
static_cast
<
data_t
>
(
0.
);
}
}
}
}
...
@@ -31,7 +31,8 @@ __global__ void relu_cuda_backward_kernel(const data_t* dy,
...
@@ -31,7 +31,8 @@ __global__ void relu_cuda_backward_kernel(const data_t* dy,
const
int
num
)
{
const
int
num
)
{
int
gid
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
gid
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
for
(
int
i
=
gid
;
i
<
num
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
int
i
=
gid
;
i
<
num
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
dx
[
i
]
=
dy
[
i
]
*
(
y
[
i
]
>
0
?
1.
:
0.
);
dx
[
i
]
=
dy
[
i
]
*
(
y
[
i
]
>
static_cast
<
data_t
>
(
0.
)
?
static_cast
<
data_t
>
(
1.
)
:
static_cast
<
data_t
>
(
0.
));
}
}
}
}
...
@@ -42,7 +43,7 @@ std::vector<paddle::Tensor> relu_cuda_forward(const paddle::Tensor& x) {
...
@@ -42,7 +43,7 @@ std::vector<paddle::Tensor> relu_cuda_forward(const paddle::Tensor& x) {
int
numel
=
x
.
size
();
int
numel
=
x
.
size
();
int
block
=
512
;
int
block
=
512
;
int
grid
=
(
numel
+
block
-
1
)
/
block
;
int
grid
=
(
numel
+
block
-
1
)
/
block
;
PD_DISPATCH_FLOATING_TYPES
(
PD_DISPATCH_FLOATING_
AND_HALF_
TYPES
(
x
.
type
(),
"relu_cuda_forward_kernel"
,
([
&
]
{
x
.
type
(),
"relu_cuda_forward_kernel"
,
([
&
]
{
relu_cuda_forward_kernel
<
data_t
><<<
grid
,
block
,
0
,
x
.
stream
()
>>>
(
relu_cuda_forward_kernel
<
data_t
><<<
grid
,
block
,
0
,
x
.
stream
()
>>>
(
x
.
data
<
data_t
>
(),
out
.
mutable_data
<
data_t
>
(
x
.
place
()),
numel
);
x
.
data
<
data_t
>
(),
out
.
mutable_data
<
data_t
>
(
x
.
place
()),
numel
);
...
@@ -60,7 +61,7 @@ std::vector<paddle::Tensor> relu_cuda_backward(const paddle::Tensor& x,
...
@@ -60,7 +61,7 @@ std::vector<paddle::Tensor> relu_cuda_backward(const paddle::Tensor& x,
int
numel
=
out
.
size
();
int
numel
=
out
.
size
();
int
block
=
512
;
int
block
=
512
;
int
grid
=
(
numel
+
block
-
1
)
/
block
;
int
grid
=
(
numel
+
block
-
1
)
/
block
;
PD_DISPATCH_FLOATING_TYPES
(
PD_DISPATCH_FLOATING_
AND_HALF_
TYPES
(
out
.
type
(),
"relu_cuda_backward_kernel"
,
([
&
]
{
out
.
type
(),
"relu_cuda_backward_kernel"
,
([
&
]
{
relu_cuda_backward_kernel
<
data_t
><<<
grid
,
block
,
0
,
x
.
stream
()
>>>
(
relu_cuda_backward_kernel
<
data_t
><<<
grid
,
block
,
0
,
x
.
stream
()
>>>
(
grad_out
.
data
<
data_t
>
(),
grad_out
.
data
<
data_t
>
(),
...
...
python/paddle/fluid/tests/custom_op/dispatch_test_op.cc
浏览文件 @
878e117b
...
@@ -118,3 +118,21 @@ PD_BUILD_OP(dispatch_test_float_and_integer_and_complex)
...
@@ -118,3 +118,21 @@ PD_BUILD_OP(dispatch_test_float_and_integer_and_complex)
.
Inputs
({
"X"
})
.
Inputs
({
"X"
})
.
Outputs
({
"Out"
})
.
Outputs
({
"Out"
})
.
SetKernelFn
(
PD_KERNEL
(
DispatchTestFloatAndIntegerAndComplex
));
.
SetKernelFn
(
PD_KERNEL
(
DispatchTestFloatAndIntegerAndComplex
));
std
::
vector
<
paddle
::
Tensor
>
DispatchTestFloatAndHalf
(
const
paddle
::
Tensor
&
x
)
{
auto
out
=
paddle
::
Tensor
(
paddle
::
PlaceType
::
kCPU
);
out
.
reshape
(
x
.
shape
());
PD_DISPATCH_FLOATING_AND_HALF_TYPES
(
x
.
type
(),
"assign_cpu_kernel"
,
([
&
]
{
assign_cpu_kernel
<
data_t
>
(
x
.
data
<
data_t
>
(),
out
.
mutable_data
<
data_t
>
(),
x
.
size
());
}));
return
{
out
};
}
PD_BUILD_OP
(
dispatch_test_float_and_half
)
.
Inputs
({
"X"
})
.
Outputs
({
"Out"
})
.
SetKernelFn
(
PD_KERNEL
(
DispatchTestFloatAndHalf
));
python/paddle/fluid/tests/custom_op/test_custom_relu_op_jit.py
浏览文件 @
878e117b
...
@@ -50,11 +50,17 @@ class TestJITLoad(unittest.TestCase):
...
@@ -50,11 +50,17 @@ class TestJITLoad(unittest.TestCase):
custom_module
.
custom_relu
,
custom_module
.
custom_relu_dup
custom_module
.
custom_relu
,
custom_module
.
custom_relu_dup
]
]
self
.
dtypes
=
[
'float32'
,
'float64'
]
self
.
dtypes
=
[
'float32'
,
'float64'
]
self
.
devices
=
[
'cpu'
,
'gpu'
]
if
paddle
.
is_compiled_with_cuda
():
self
.
dtypes
.
append
(
'float16'
)
self
.
devices
=
[
'cpu'
]
if
paddle
.
is_compiled_with_cuda
():
self
.
devices
.
append
(
'gpu'
)
def
test_static
(
self
):
def
test_static
(
self
):
for
device
in
self
.
devices
:
for
device
in
self
.
devices
:
for
dtype
in
self
.
dtypes
:
for
dtype
in
self
.
dtypes
:
if
device
==
'cpu'
and
dtype
==
'float16'
:
continue
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
4
,
8
]).
astype
(
dtype
)
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
4
,
8
]).
astype
(
dtype
)
for
custom_op
in
self
.
custom_ops
:
for
custom_op
in
self
.
custom_ops
:
out
=
custom_relu_static
(
custom_op
,
device
,
dtype
,
x
)
out
=
custom_relu_static
(
custom_op
,
device
,
dtype
,
x
)
...
@@ -68,6 +74,8 @@ class TestJITLoad(unittest.TestCase):
...
@@ -68,6 +74,8 @@ class TestJITLoad(unittest.TestCase):
def
test_dynamic
(
self
):
def
test_dynamic
(
self
):
for
device
in
self
.
devices
:
for
device
in
self
.
devices
:
for
dtype
in
self
.
dtypes
:
for
dtype
in
self
.
dtypes
:
if
device
==
'cpu'
and
dtype
==
'float16'
:
continue
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
4
,
8
]).
astype
(
dtype
)
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
4
,
8
]).
astype
(
dtype
)
for
custom_op
in
self
.
custom_ops
:
for
custom_op
in
self
.
custom_ops
:
out
,
x_grad
=
custom_relu_dynamic
(
custom_op
,
device
,
dtype
,
out
,
x_grad
=
custom_relu_dynamic
(
custom_op
,
device
,
dtype
,
...
@@ -87,7 +95,7 @@ class TestJITLoad(unittest.TestCase):
...
@@ -87,7 +95,7 @@ class TestJITLoad(unittest.TestCase):
caught_exception
=
False
caught_exception
=
False
try
:
try
:
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
4
,
8
]).
astype
(
'int32'
)
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
4
,
8
]).
astype
(
'int32'
)
custom_relu_dynamic
(
custom_module
.
custom_relu
,
'cpu'
,
'
floa
t32'
,
x
)
custom_relu_dynamic
(
custom_module
.
custom_relu
,
'cpu'
,
'
in
t32'
,
x
)
except
OSError
as
e
:
except
OSError
as
e
:
caught_exception
=
True
caught_exception
=
True
self
.
assertTrue
(
self
.
assertTrue
(
...
@@ -105,15 +113,15 @@ class TestJITLoad(unittest.TestCase):
...
@@ -105,15 +113,15 @@ class TestJITLoad(unittest.TestCase):
caught_exception
=
False
caught_exception
=
False
try
:
try
:
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
4
,
8
]).
astype
(
'int
64
'
)
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
4
,
8
]).
astype
(
'int
32
'
)
custom_relu_dynamic
(
custom_module
.
custom_relu
,
'gpu'
,
'
floa
t32'
,
x
)
custom_relu_dynamic
(
custom_module
.
custom_relu
,
'gpu'
,
'
in
t32'
,
x
)
except
OSError
as
e
:
except
OSError
as
e
:
caught_exception
=
True
caught_exception
=
True
self
.
assertTrue
(
self
.
assertTrue
(
"function
\"
relu_cuda_forward_kernel
\"
is not implemented for data type `int
64
_t`"
"function
\"
relu_cuda_forward_kernel
\"
is not implemented for data type `int
32
_t`"
in
str
(
e
))
in
str
(
e
))
self
.
assertTrue
(
self
.
assertTrue
(
"python/paddle/fluid/tests/custom_op/custom_relu_op.cu:
49
"
in
"python/paddle/fluid/tests/custom_op/custom_relu_op.cu:
50
"
in
str
(
e
))
str
(
e
))
self
.
assertTrue
(
caught_exception
)
self
.
assertTrue
(
caught_exception
)
...
...
python/paddle/fluid/tests/custom_op/test_custom_relu_op_setup.py
浏览文件 @
878e117b
...
@@ -26,7 +26,7 @@ from paddle.utils.cpp_extension.extension_utils import run_cmd
...
@@ -26,7 +26,7 @@ from paddle.utils.cpp_extension.extension_utils import run_cmd
def
custom_relu_dynamic
(
func
,
device
,
dtype
,
np_x
,
use_func
=
True
):
def
custom_relu_dynamic
(
func
,
device
,
dtype
,
np_x
,
use_func
=
True
):
paddle
.
set_device
(
device
)
paddle
.
set_device
(
device
)
t
=
paddle
.
to_tensor
(
np_x
)
t
=
paddle
.
to_tensor
(
np_x
,
dtype
=
dtype
)
t
.
stop_gradient
=
False
t
.
stop_gradient
=
False
out
=
func
(
t
)
if
use_func
else
paddle
.
nn
.
functional
.
relu
(
t
)
out
=
func
(
t
)
if
use_func
else
paddle
.
nn
.
functional
.
relu
(
t
)
...
@@ -171,7 +171,11 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
...
@@ -171,7 +171,11 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
]
]
self
.
dtypes
=
[
'float32'
,
'float64'
]
self
.
dtypes
=
[
'float32'
,
'float64'
]
self
.
devices
=
[
'cpu'
,
'gpu'
]
if
paddle
.
is_compiled_with_cuda
():
self
.
dtypes
.
append
(
'float16'
)
self
.
devices
=
[
'cpu'
]
if
paddle
.
is_compiled_with_cuda
():
self
.
devices
.
append
(
'gpu'
)
# config seed
# config seed
SEED
=
2021
SEED
=
2021
...
@@ -181,6 +185,8 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
...
@@ -181,6 +185,8 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
def
test_static
(
self
):
def
test_static
(
self
):
for
device
in
self
.
devices
:
for
device
in
self
.
devices
:
for
dtype
in
self
.
dtypes
:
for
dtype
in
self
.
dtypes
:
if
device
==
'cpu'
and
dtype
==
'float16'
:
continue
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
4
,
8
]).
astype
(
dtype
)
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
4
,
8
]).
astype
(
dtype
)
for
custom_op
in
self
.
custom_ops
:
for
custom_op
in
self
.
custom_ops
:
out
=
custom_relu_static
(
custom_op
,
device
,
dtype
,
x
)
out
=
custom_relu_static
(
custom_op
,
device
,
dtype
,
x
)
...
@@ -194,6 +200,8 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
...
@@ -194,6 +200,8 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
def
test_static_pe
(
self
):
def
test_static_pe
(
self
):
for
device
in
self
.
devices
:
for
device
in
self
.
devices
:
for
dtype
in
self
.
dtypes
:
for
dtype
in
self
.
dtypes
:
if
device
==
'cpu'
and
dtype
==
'float16'
:
continue
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
4
,
8
]).
astype
(
dtype
)
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
4
,
8
]).
astype
(
dtype
)
for
custom_op
in
self
.
custom_ops
:
for
custom_op
in
self
.
custom_ops
:
out
=
custom_relu_static_pe
(
custom_op
,
device
,
dtype
,
x
)
out
=
custom_relu_static_pe
(
custom_op
,
device
,
dtype
,
x
)
...
@@ -207,6 +215,8 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
...
@@ -207,6 +215,8 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
def
test_dynamic
(
self
):
def
test_dynamic
(
self
):
for
device
in
self
.
devices
:
for
device
in
self
.
devices
:
for
dtype
in
self
.
dtypes
:
for
dtype
in
self
.
dtypes
:
if
device
==
'cpu'
and
dtype
==
'float16'
:
continue
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
4
,
8
]).
astype
(
dtype
)
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
4
,
8
]).
astype
(
dtype
)
for
custom_op
in
self
.
custom_ops
:
for
custom_op
in
self
.
custom_ops
:
out
,
x_grad
=
custom_relu_dynamic
(
custom_op
,
device
,
dtype
,
out
,
x_grad
=
custom_relu_dynamic
(
custom_op
,
device
,
dtype
,
...
...
python/paddle/fluid/tests/custom_op/test_dispatch_jit.py
浏览文件 @
878e117b
...
@@ -83,6 +83,12 @@ class TestJitDispatch(unittest.TestCase):
...
@@ -83,6 +83,12 @@ class TestJitDispatch(unittest.TestCase):
self
.
run_dispatch_test
(
self
.
run_dispatch_test
(
dispatch_op
.
dispatch_test_float_and_integer_and_complex
,
dtype
)
dispatch_op
.
dispatch_test_float_and_integer_and_complex
,
dtype
)
def
test_dispatch_float_and_half
(
self
):
dtypes
=
[
"float32"
,
"float64"
,
"float16"
]
for
dtype
in
dtypes
:
self
.
run_dispatch_test
(
dispatch_op
.
dispatch_test_float_and_half
,
dtype
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
unittest
.
main
()
unittest
.
main
()
python/setup.py.in
浏览文件 @
878e117b
...
@@ -453,15 +453,12 @@ class InstallHeaders(Command):
...
@@ -453,15 +453,12 @@ class InstallHeaders(Command):
def copy_data_type_headers(self, header):
def copy_data_type_headers(self, header):
if os.name == 'nt':
if os.name == 'nt':
data_type_headers = ['platform\\complex64.h', 'platform\\complex128.h']
data_type_headers = ['platform\\complex64.h', 'platform\\complex128.h'
, 'platform\\float16.h'
]
else:
else:
data_type_headers = ['platform/complex64.h', 'platform/complex128.h']
data_type_headers = ['platform/complex64.h', 'platform/complex128.h'
, 'platform/float16.h'
]
for dtype_header in data_type_headers:
for dtype_header in data_type_headers:
if dtype_header in header:
if dtype_header in header:
if os.name == 'nt':
install_dir = os.path.join(self.install_dir, "paddle/fluid/extension/include")
install_dir = os.path.join(self.install_dir, "paddle\\fluid\\extension\\include")
else:
install_dir = os.path.join(self.install_dir, "paddle/fluid/extension/include")
if not os.path.exists(install_dir):
if not os.path.exists(install_dir):
self.mkpath(install_dir)
self.mkpath(install_dir)
return self.copy_file(header, install_dir)
return self.copy_file(header, install_dir)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录