Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
d3dcbd37
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
d3dcbd37
编写于
3月 01, 2022
作者:
P
phlrain
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into move_sgd_to_phi
上级
943dedec
852a872f
变更
48
展开全部
隐藏空白更改
内联
并排
Showing
48 changed file
with
1138 addition
and
383 deletion
+1138
-383
paddle/fluid/eager/api/utils/hook_utils.cc
paddle/fluid/eager/api/utils/hook_utils.cc
+29
-34
paddle/fluid/eager/auto_code_generator/eager_generator.cc
paddle/fluid/eager/auto_code_generator/eager_generator.cc
+15
-10
paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py
...er/auto_code_generator/final_state_generator/eager_gen.py
+21
-1
paddle/fluid/eager/backward.cc
paddle/fluid/eager/backward.cc
+4
-3
paddle/fluid/eager/grad_node_info.cc
paddle/fluid/eager/grad_node_info.cc
+1
-1
paddle/fluid/eager/utils.cc
paddle/fluid/eager/utils.cc
+9
-0
paddle/fluid/framework/infershape_utils.cc
paddle/fluid/framework/infershape_utils.cc
+5
-1
paddle/fluid/framework/operator.h
paddle/fluid/framework/operator.h
+2
-0
paddle/fluid/framework/phi_utils.cc
paddle/fluid/framework/phi_utils.cc
+9
-0
paddle/fluid/inference/tests/api/CMakeLists.txt
paddle/fluid/inference/tests/api/CMakeLists.txt
+29
-0
paddle/fluid/inference/tests/api/analyzer_bfloat16_image_classification_tester.cc
...ests/api/analyzer_bfloat16_image_classification_tester.cc
+13
-2
paddle/fluid/inference/tests/api/analyzer_int8_image_classification_tester.cc
...ce/tests/api/analyzer_int8_image_classification_tester.cc
+6
-1
paddle/fluid/inference/tests/api/tester_helper.h
paddle/fluid/inference/tests/api/tester_helper.h
+18
-12
paddle/fluid/operators/reduce_ops/reduce_mean_op.cc
paddle/fluid/operators/reduce_ops/reduce_mean_op.cc
+9
-1
paddle/fluid/operators/reduce_ops/reduce_sum_op.cc
paddle/fluid/operators/reduce_ops/reduce_sum_op.cc
+9
-1
paddle/fluid/operators/selu_op.cc
paddle/fluid/operators/selu_op.cc
+0
-8
paddle/fluid/operators/selu_op.cu
paddle/fluid/operators/selu_op.cu
+0
-22
paddle/fluid/platform/device/ipu/ipu_strategy.cc
paddle/fluid/platform/device/ipu/ipu_strategy.cc
+175
-131
paddle/fluid/platform/device/ipu/ipu_strategy.h
paddle/fluid/platform/device/ipu/ipu_strategy.h
+37
-35
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+2
-0
paddle/infrt/dialect/phi/pass/proto_arg_map_context.h
paddle/infrt/dialect/phi/pass/proto_arg_map_context.h
+2
-0
paddle/phi/core/compat/arg_map_context.h
paddle/phi/core/compat/arg_map_context.h
+4
-0
paddle/phi/infermeta/unary.cc
paddle/phi/infermeta/unary.cc
+50
-10
paddle/phi/infermeta/unary.h
paddle/phi/infermeta/unary.h
+11
-4
paddle/phi/kernels/cpu/selu_grad_kernel.cc
paddle/phi/kernels/cpu/selu_grad_kernel.cc
+21
-0
paddle/phi/kernels/cpu/selu_kernel.cc
paddle/phi/kernels/cpu/selu_kernel.cc
+21
-0
paddle/phi/kernels/gpu/selu_grad_kernel.cu
paddle/phi/kernels/gpu/selu_grad_kernel.cu
+22
-0
paddle/phi/kernels/gpu/selu_kernel.cu
paddle/phi/kernels/gpu/selu_kernel.cu
+21
-0
paddle/phi/kernels/impl/selu_grad_kernel_impl.h
paddle/phi/kernels/impl/selu_grad_kernel_impl.h
+35
-0
paddle/phi/kernels/impl/selu_kernel_impl.h
paddle/phi/kernels/impl/selu_kernel_impl.h
+88
-0
paddle/phi/kernels/math_kernel.h
paddle/phi/kernels/math_kernel.h
+1
-1
paddle/phi/kernels/primitive/compute_primitives.h
paddle/phi/kernels/primitive/compute_primitives.h
+215
-69
paddle/phi/kernels/primitive/compute_primitives_xpu2.h
paddle/phi/kernels/primitive/compute_primitives_xpu2.h
+23
-0
paddle/phi/kernels/primitive/datamover_primitives.h
paddle/phi/kernels/primitive/datamover_primitives.h
+32
-0
paddle/phi/kernels/primitive/datamover_primitives_xpu2.h
paddle/phi/kernels/primitive/datamover_primitives_xpu2.h
+41
-0
paddle/phi/kernels/selu_grad_kernel.h
paddle/phi/kernels/selu_grad_kernel.h
+29
-0
paddle/phi/kernels/selu_kernel.h
paddle/phi/kernels/selu_kernel.h
+28
-0
paddle/phi/ops/compat/reduce_sig.cc
paddle/phi/ops/compat/reduce_sig.cc
+21
-13
paddle/phi/ops/compat/selu_sig.cc
paddle/phi/ops/compat/selu_sig.cc
+28
-0
paddle/phi/tests/ops/test_op_signature.h
paddle/phi/tests/ops/test_op_signature.h
+2
-0
paddle/scripts/paddle_build.sh
paddle/scripts/paddle_build.sh
+1
-1
python/paddle/fluid/contrib/slim/tests/CMakeLists.txt
python/paddle/fluid/contrib/slim/tests/CMakeLists.txt
+9
-2
python/paddle/fluid/tests/unittests/ipu/CMakeLists.txt
python/paddle/fluid/tests/unittests/ipu/CMakeLists.txt
+8
-0
python/paddle/fluid/tests/unittests/test_transpose_op.py
python/paddle/fluid/tests/unittests/test_transpose_op.py
+8
-0
python/paddle/fluid/tests/unittests/test_var_base.py
python/paddle/fluid/tests/unittests/test_var_base.py
+4
-0
python/paddle/tensor/creation.py
python/paddle/tensor/creation.py
+15
-16
python/paddle/tensor/manipulation.py
python/paddle/tensor/manipulation.py
+4
-3
python/paddle/utils/code_gen/api.yaml
python/paddle/utils/code_gen/api.yaml
+1
-1
未找到文件。
paddle/fluid/eager/api/utils/hook_utils.cc
浏览文件 @
d3dcbd37
...
...
@@ -52,49 +52,44 @@ void RegisterReduceHookForTensor(const paddle::experimental::Tensor& tensor,
}
}
static
void
RetainGradForRegularNode
(
const
paddle
::
experimental
::
Tensor
&
tensor
)
{
AutogradMeta
*
meta
=
EagerUtils
::
unsafe_autograd_meta
(
tensor
);
if
(
meta
->
RetainGrads
())
{
void
RetainGradForTensor
(
const
paddle
::
experimental
::
Tensor
&
tensor
)
{
if
(
IsLeafTensor
(
tensor
)
)
{
// Leaf tensor's grad will always be retained
// Refer to implementation of AccumulationNode for more details
return
;
}
else
{
meta
->
SetRetainGrads
(
true
);
}
AutogradMeta
*
meta
=
EagerUtils
::
unsafe_autograd_meta
(
tensor
);
if
(
meta
->
RetainGrads
())
{
return
;
}
else
{
meta
->
SetRetainGrads
(
true
);
}
std
::
weak_ptr
<
paddle
::
experimental
::
Tensor
>
weak_grad_tensor
=
meta
->
WeakGrad
();
std
::
weak_ptr
<
paddle
::
experimental
::
Tensor
>
weak_grad_tensor
=
meta
->
WeakGrad
();
// Define Hook
auto
hook
=
[
weak_grad_tensor
](
const
paddle
::
experimental
::
Tensor
&
t
)
{
if
(
!
weak_grad_tensor
.
expired
())
{
auto
grad_tensor
=
weak_grad_tensor
.
lock
();
if
(
t
.
defined
())
{
VLOG
(
7
)
<<
"Set impl for RetainGrad Hook for tensor: "
<<
t
.
name
();
// Simply Copy impl() to grad_tensor
grad_tensor
->
set_impl
(
t
.
impl
());
return
*
grad_tensor
.
get
();
// Define Hook
auto
hook
=
[
weak_grad_tensor
](
const
paddle
::
experimental
::
Tensor
&
t
)
{
if
(
!
weak_grad_tensor
.
expired
())
{
auto
grad_tensor
=
weak_grad_tensor
.
lock
();
if
(
t
.
defined
())
{
VLOG
(
7
)
<<
"Set impl for RetainGrad Hook for tensor: "
<<
t
.
name
();
// Simply Copy impl() to grad_tensor
grad_tensor
->
set_impl
(
t
.
impl
());
return
*
grad_tensor
.
get
();
}
else
{
VLOG
(
7
)
<<
"Retain NULL paddle::experimental::Tensor in Grad Hook"
;
return
paddle
::
experimental
::
Tensor
();
}
}
else
{
VLOG
(
7
)
<<
"Retain NULL paddle::experimental::Tensor in Grad Hook"
;
return
paddle
::
experimental
::
Tensor
();
}
}
else
{
VLOG
(
7
)
<<
"Retain NULL paddle::experimental::Tensor in Grad Hook"
;
return
paddle
::
experimental
::
Tensor
();
}
};
};
// Append to GradientHooks
RegisterGradientHookForTensor
(
tensor
,
std
::
make_shared
<
egr
::
CppTensorHook
>
(
hook
));
}
void
RetainGradForTensor
(
const
paddle
::
experimental
::
Tensor
&
tensor
)
{
if
(
IsLeafTensor
(
tensor
))
{
// Leaf tensor's grad will always be retained
// Refer to implementation of AccumulationNode for more details
return
;
}
else
{
RetainGradForRegularNode
(
tensor
);
// Append to GradientHooks
RegisterGradientHookForTensor
(
tensor
,
std
::
make_shared
<
egr
::
CppTensorHook
>
(
hook
));
}
}
...
...
paddle/fluid/eager/auto_code_generator/eager_generator.cc
浏览文件 @
d3dcbd37
...
...
@@ -1156,11 +1156,13 @@ static std::string GenerateGradNodeCreationContent(
grad_node_creation_str
+=
paddle
::
string
::
Sprintf
(
SET_OUT_RANK_TEMPLATE
,
output_autograd_name
,
output_position
);
const
char
*
SET_HISTORY_TEMPLATE
=
" egr::EagerUtils::SetHistory(&%s, grad_node);
\n
"
;
grad_node_creation_str
+=
paddle
::
string
::
Sprintf
(
SET_HISTORY_TEMPLATE
,
output_autograd_name
);
// Intermediate Tensor does not require SetHistory
if
(
!
output
.
intermediate
())
{
const
char
*
SET_HISTORY_TEMPLATE
=
" egr::EagerUtils::SetHistory(&%s, grad_node);
\n
"
;
grad_node_creation_str
+=
paddle
::
string
::
Sprintf
(
SET_HISTORY_TEMPLATE
,
output_autograd_name
);
}
const
char
*
SET_GRAD_IN_META_TEMPLATE
=
" grad_node->SetGradInMeta(&%s, %d);
\n
"
;
grad_node_creation_str
+=
paddle
::
string
::
Sprintf
(
...
...
@@ -1173,17 +1175,20 @@ static std::string GenerateGradNodeCreationContent(
grad_node_creation_str
+=
paddle
::
string
::
Sprintf
(
SET_OUT_RANK_TEMPLATE
,
output_autograd_name
,
output_position
);
const
char
*
SET_HISTORY_TEMPLATE
=
" egr::EagerUtils::SetHistory(%s, grad_node);
\n
"
;
grad_node_creation_str
+=
paddle
::
string
::
Sprintf
(
SET_HISTORY_TEMPLATE
,
output_autograd_name
);
// Intermediate Tensor does not require SetHistory
if
(
!
output
.
intermediate
())
{
const
char
*
SET_HISTORY_TEMPLATE
=
" egr::EagerUtils::SetHistory(%s, grad_node);
\n
"
;
grad_node_creation_str
+=
paddle
::
string
::
Sprintf
(
SET_HISTORY_TEMPLATE
,
output_autograd_name
);
}
const
char
*
SET_GRAD_IN_META_TEMPLATE
=
" grad_node->SetGradInMeta(%s, %d);
\n
"
;
grad_node_creation_str
+=
paddle
::
string
::
Sprintf
(
SET_GRAD_IN_META_TEMPLATE
,
output_autograd_name
,
output_position
);
}
// Intermediate Tensor does not require CheckAndRetainGrad
if
(
!
output
.
intermediate
())
{
VLOG
(
6
)
<<
"Generated Call RetainGradForTensor"
;
const
char
*
RETAIN_GRAD_TEMPLATE
=
...
...
paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py
浏览文件 @
d3dcbd37
...
...
@@ -24,6 +24,17 @@ core_ops_args_info = {}
core_ops_args_type_info
=
{}
yaml_types_mapping
=
{
'int'
:
'int'
,
'int32_t'
:
'int32_t'
,
'int64_t'
:
'int64_t'
,
'size_t'
:
'size_t'
,
\
'float'
:
'float'
,
'double'
:
'double'
,
'bool'
:
'bool'
,
\
'Backend'
:
'Backend'
,
'DataLayout'
:
'DataLayout'
,
'DataType'
:
'DataType'
,
\
'int64_t[]'
:
'std::vector<int64_t>'
,
'int[]'
:
'std::vector<int>'
,
'Tensor'
:
'Tensor'
,
'Tensor[]'
:
'std::vector<Tensor>'
,
'Tensor[Tensor[]]'
:
'std::vector<std::vector<Tensor>>'
}
def
ParseArguments
():
parser
=
argparse
.
ArgumentParser
(
description
=
'Eager Code Generator Args Parser'
)
...
...
@@ -59,7 +70,9 @@ def IsPlainTensorType(string):
def
IsVectorTensorType
(
string
):
vector_tensor_types
=
[
'list(Tensor)'
]
vector_tensor_types
=
[
'std::vector<std::vector<Tensor>>'
,
'std::vector<Tensor>'
]
if
string
in
vector_tensor_types
:
return
True
return
False
...
...
@@ -180,6 +193,9 @@ def ParseYamlArgs(string):
arg_name
=
m
.
group
(
3
).
split
(
"="
)[
0
].
strip
()
default_value
=
m
.
group
(
3
).
split
(
"="
)[
1
].
strip
()
if
len
(
m
.
group
(
3
).
split
(
"="
))
>
1
else
None
assert
arg_type
in
yaml_types_mapping
.
keys
()
arg_type
=
yaml_types_mapping
[
arg_type
]
if
"Tensor"
in
arg_type
:
assert
default_value
is
None
inputs_list
.
append
([
arg_name
,
arg_type
,
i
])
...
...
@@ -219,6 +235,10 @@ def ParseYamlReturnsWithName(string):
m
=
re
.
search
(
pattern
,
ret
)
ret_type
=
m
.
group
(
1
)
ret_name
=
m
.
group
(
2
)
assert
ret_type
in
yaml_types_mapping
.
keys
()
ret_type
=
yaml_types_mapping
[
ret_type
]
assert
"Tensor"
in
ret_type
returns_list
.
append
([
ret_name
,
ret_type
,
i
])
...
...
paddle/fluid/eager/backward.cc
浏览文件 @
d3dcbd37
...
...
@@ -221,10 +221,11 @@ void RunBackward(const std::vector<paddle::experimental::Tensor>& tensors,
<<
" 's name is: "
<<
grad_output_tensor
.
name
();
auto
*
next_node
=
next_node_shared
.
get
();
if
(
!
node_input_buffers_dict
.
count
(
next_node
))
{
node_input_buffers_dict
[
next_node
]
=
std
::
make_unique
<
GradTensorHolder
>
(
next_node
->
InputMeta
());
const
auto
&
input_meta
=
next_node
->
InputMeta
();
auto
grad_tensor_holder
=
std
::
make_unique
<
GradTensorHolder
>
(
input_meta
);
node_input_buffers_dict
[
next_node
]
=
std
::
move
(
grad_tensor_holder
);
}
VLOG
(
6
)
<<
"Sum grad inputs for edge slot: "
<<
edge_rank
.
first
<<
", rank: "
<<
edge_rank
.
second
;
...
...
paddle/fluid/eager/grad_node_info.cc
浏览文件 @
d3dcbd37
...
...
@@ -244,7 +244,7 @@ GradNodeBase::ApplyGradientHooks(
if
(
!
out
.
defined
()
||
!
out
.
initialized
())
{
out
=
(
*
hook
)(
tensors
[
slot_id
][
rank
]);
}
else
{
//
If more than one hook is registered, the input to the next hook func
// If more than one hook is registered, the input to the next hook func
// should be the output of the previous hook
out
=
(
*
hook
)(
out
);
}
...
...
paddle/fluid/eager/utils.cc
浏览文件 @
d3dcbd37
...
...
@@ -122,12 +122,21 @@ paddle::experimental::Tensor* EagerUtils::mutable_grad(
void
EagerUtils
::
SetHistory
(
std
::
vector
<
AutogradMeta
*>*
autograd_metas
,
const
std
::
shared_ptr
<
GradNodeBase
>&
grad_node
)
{
for
(
const
auto
&
autograd_meta
:
*
autograd_metas
)
{
if
(
dynamic_cast
<
GradNodeAccumulation
*>
(
autograd_meta
->
GradNode
()))
{
VLOG
(
6
)
<<
"Warning: Reseting GradNodeAccumulation for leaf tensor is "
"detected"
;
}
autograd_meta
->
SetGradNode
(
grad_node
);
}
}
void
EagerUtils
::
SetHistory
(
AutogradMeta
*
autograd_meta
,
const
std
::
shared_ptr
<
GradNodeBase
>&
grad_node
)
{
if
(
dynamic_cast
<
GradNodeAccumulation
*>
(
autograd_meta
->
GradNode
()))
{
VLOG
(
6
)
<<
"Warning: Reseting GradNodeAccumulation for leaf tensor is detected"
;
}
autograd_meta
->
SetGradNode
(
grad_node
);
}
...
...
paddle/fluid/framework/infershape_utils.cc
浏览文件 @
d3dcbd37
...
...
@@ -88,6 +88,8 @@ class InferShapeArgumentMappingContext : public phi::ArgumentMappingContext {
return
var_types
[
0
]
==
proto
::
VarType
::
SELECTED_ROWS
;
}
bool
IsForInferShape
()
const
override
{
return
true
;
}
private:
const
InferShapeContext
&
ctx_
;
};
...
...
@@ -127,7 +129,9 @@ class CompatMetaTensor : public phi::MetaTensor {
}
}
else
{
auto
*
var
=
BOOST_GET_CONST
(
VarDesc
*
,
var_
);
return
phi
::
make_ddim
(
var
->
GetShape
());
return
var
->
GetShape
().
empty
()
?
phi
::
make_ddim
({
0UL
})
:
phi
::
make_ddim
(
var
->
GetShape
());
}
}
...
...
paddle/fluid/framework/operator.h
浏览文件 @
d3dcbd37
...
...
@@ -489,6 +489,8 @@ class ExecutionArgumentMappingContext : public phi::ArgumentMappingContext {
return
ctx_
.
OutputVar
(
name
)
->
IsType
<
phi
::
SelectedRows
>
();
}
bool
IsForInferShape
()
const
override
{
return
false
;
}
private:
const
ExecutionContext
&
ctx_
;
};
...
...
paddle/fluid/framework/phi_utils.cc
浏览文件 @
d3dcbd37
...
...
@@ -125,6 +125,15 @@ phi::KernelKey FallBackToCpu(const OpKernelType& expected_kernel_key,
return
phi
::
KernelKey
(
phi
::
Backend
::
CPU
,
kernel_key
.
layout
(),
kernel_key
.
dtype
());
}
#endif
#ifdef PADDLE_WITH_IPU
if
(
platform
::
is_ipu_place
(
expected_kernel_key
.
place_
))
{
VLOG
(
3
)
<<
"pten missing IPU kernel: "
<<
op
.
Type
()
<<
", expected_kernel_key:"
<<
expected_kernel_key
<<
", fallbacking to CPU one!"
;
return
phi
::
KernelKey
(
phi
::
Backend
::
CPU
,
kernel_key
.
layout
(),
kernel_key
.
dtype
());
}
#endif
return
phi
::
KernelKey
();
}
...
...
paddle/fluid/inference/tests/api/CMakeLists.txt
浏览文件 @
d3dcbd37
...
...
@@ -453,6 +453,23 @@ if(WITH_MKLDNN)
download_int8_data_without_verify
(
${
INT8_GOOGLENET_MODEL_DIR
}
"GoogleNet_int8_model.tar.gz"
)
inference_analysis_api_int8_test_run_custom_warmup_batch_size
(
test_analyzer_int8_googlenet
${
INT8_IMG_CLASS_TEST_APP
}
${
INT8_GOOGLENET_MODEL_DIR
}
${
IMAGENET_DATA_PATH
}
10
)
# mobilenetv3_large_x1_0 int8
set
(
INT8_MOBILENETV3_LARGE_MODEL_DIR
"
${
INT8_DATA_DIR
}
/mobilenetv3_large"
)
set
(
INT8_MOBILENETV3_FILE_NAME
"MobileNetV3_large_x1_0_infer.tar"
)
if
(
NOT EXISTS
${
INT8_MOBILENETV3_LARGE_MODEL_DIR
}
/
${
INT8_MOBILENETV3_FILE_NAME
}
)
inference_download_and_uncompress_without_verify
(
${
INT8_MOBILENETV3_LARGE_MODEL_DIR
}
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/"
${
INT8_MOBILENETV3_FILE_NAME
}
)
endif
()
inference_analysis_test_run
(
test_analyzer_int8_mobilenetv3_large
COMMAND
${
INT8_IMG_CLASS_TEST_APP
}
ARGS --infer_model=
${
INT8_MOBILENETV3_LARGE_MODEL_DIR
}
/MobileNetV3_large_x1_0_infer
--infer_data=
${
IMAGENET_DATA_PATH
}
--warmup_batch_size=50
--batch_size=1
--enable_int8=true
--cpu_num_threads=
${
CPU_NUM_THREADS_ON_CI
}
--iterations=100
--with_accuracy_layer=false
)
### BFLOAT16 tests
# build test binary to be used in subsequent tests
...
...
@@ -472,6 +489,17 @@ if(WITH_MKLDNN)
# mobilenetv2 bfloat16
inference_analysis_api_bfloat16_test_run
(
test_analyzer_bfloat16_mobilenetv2
${
BF16_IMG_CLASS_TEST_APP
}
${
INT8_MOBILENETV2_MODEL_DIR
}
${
IMAGENET_DATA_PATH
}
)
# mobilenetv3_large
inference_analysis_test_run
(
test_analyzer_bfloat16_mobilenetv3_large
COMMAND
${
BF16_IMG_CLASS_TEST_APP
}
ARGS --infer_model=
${
INT8_MOBILENETV3_LARGE_MODEL_DIR
}
/MobileNetV3_large_x1_0_infer
--infer_data=
${
IMAGENET_DATA_PATH
}
--batch_size=1
--enable_bf16=true
--paddle_num_threads=
${
CPU_NUM_THREADS_ON_CI
}
--iterations=100
--with_accuracy_layer=false
)
### Object detection models
set
(
PASCALVOC_DATA_PATH
"
${
INT8_DATA_DIR
}
/pascalvoc_val_head_300.bin"
)
set
(
INT8_OBJ_DETECT_TEST_APP
"test_analyzer_int8_object_detection"
)
...
...
@@ -739,6 +767,7 @@ if(WITH_MKLDNN)
set_tests_properties
(
test_analyzer_quant_performance_benchmark PROPERTIES TIMEOUT 120
)
set_tests_properties
(
test_analyzer_int8_mobilenetv2 PROPERTIES TIMEOUT 120
)
set_tests_properties
(
test_analyzer_int8_mobilenetv1 PROPERTIES TIMEOUT 120
)
set_tests_properties
(
test_analyzer_int8_mobilenetv3_large PROPERTIES TIMEOUT 120
)
endif
()
set_tests_properties
(
lite_resnet50_test PROPERTIES TIMEOUT 120
)
...
...
paddle/fluid/inference/tests/api/analyzer_bfloat16_image_classification_tester.cc
浏览文件 @
d3dcbd37
...
...
@@ -14,13 +14,19 @@ limitations under the License. */
#include "paddle/fluid/inference/api/paddle_analysis_config.h"
#include "paddle/fluid/inference/tests/api/tester_helper.h"
#include "paddle/fluid/platform/cpu_info.h"
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
void
SetConfig
(
AnalysisConfig
*
cfg
)
{
cfg
->
SetModel
(
FLAGS_infer_model
);
std
::
ifstream
model_file
(
FLAGS_infer_model
+
"/__model__"
);
if
(
model_file
.
good
())
cfg
->
SetModel
(
FLAGS_infer_model
);
else
cfg
->
SetModel
(
FLAGS_infer_model
+
"/inference.pdmodel"
,
FLAGS_infer_model
+
"/inference.pdiparams"
);
cfg
->
DisableGpu
();
cfg
->
SwitchIrOptim
();
cfg
->
SwitchSpecifyInputNames
();
...
...
@@ -38,7 +44,12 @@ TEST(Analyzer_bfloat16_image_classification, bfloat16) {
// read data from file and prepare batches with test data
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInputs
(
&
input_slots_all
);
b_cfg
.
EnableMkldnnBfloat16
();
if
(
FLAGS_enable_bf16
&&
platform
::
MayIUse
(
platform
::
cpu_isa_t
::
avx512_bf16
))
{
b_cfg
.
EnableMkldnnBfloat16
();
}
else
{
FLAGS_enable_bf16
=
false
;
}
CompareBFloat16AndAnalysis
(
&
cfg
,
&
b_cfg
,
input_slots_all
);
}
...
...
paddle/fluid/inference/tests/api/analyzer_int8_image_classification_tester.cc
浏览文件 @
d3dcbd37
...
...
@@ -22,7 +22,12 @@ namespace inference {
namespace
analysis
{
void
SetConfig
(
AnalysisConfig
*
cfg
)
{
cfg
->
SetModel
(
FLAGS_infer_model
);
std
::
ifstream
model_file
(
FLAGS_infer_model
+
"/__model__"
);
if
(
model_file
.
good
())
cfg
->
SetModel
(
FLAGS_infer_model
);
else
cfg
->
SetModel
(
FLAGS_infer_model
+
"/inference.pdmodel"
,
FLAGS_infer_model
+
"/inference.pdiparams"
);
cfg
->
DisableGpu
();
cfg
->
SwitchIrOptim
();
cfg
->
SwitchSpecifyInputNames
();
...
...
paddle/fluid/inference/tests/api/tester_helper.h
浏览文件 @
d3dcbd37
...
...
@@ -213,15 +213,15 @@ std::shared_ptr<std::vector<PaddleTensor>> GetWarmupData(
element_in_batch
*
3
*
224
*
224
,
3
*
224
*
224
,
static_cast
<
float
*>
(
images
.
data
.
data
())
+
i
*
3
*
224
*
224
);
std
::
copy_n
(
static_cast
<
int64_t
*>
(
test_data
[
batch
][
1
].
data
.
data
())
+
element_in_batch
,
1
,
static_cast
<
int64_t
*>
(
labels
.
data
.
data
())
+
i
);
if
(
FLAGS_with_accuracy_layer
)
std
::
copy_n
(
static_cast
<
int64_t
*>
(
test_data
[
batch
][
1
].
data
.
data
())
+
element_in_batch
,
1
,
static_cast
<
int64_t
*>
(
labels
.
data
.
data
())
+
i
);
}
auto
warmup_data
=
std
::
make_shared
<
std
::
vector
<
PaddleTensor
>>
(
2
);
auto
warmup_data
=
std
::
make_shared
<
std
::
vector
<
PaddleTensor
>>
(
FLAGS_with_accuracy_layer
?
2
:
1
);
(
*
warmup_data
)[
0
]
=
std
::
move
(
images
);
(
*
warmup_data
)[
1
]
=
std
::
move
(
labels
);
if
(
FLAGS_with_accuracy_layer
)
(
*
warmup_data
)[
1
]
=
std
::
move
(
labels
);
return
warmup_data
;
}
...
...
@@ -254,9 +254,13 @@ void SetInputs(std::vector<std::vector<PaddleTensor>> *inputs,
}
for
(
auto
i
=
0
;
i
<
iterations
;
i
++
)
{
auto
images
=
image_reader
.
NextBatch
();
auto
labels
=
label_reader
.
NextBatch
();
inputs
->
emplace_back
(
std
::
vector
<
PaddleTensor
>
{
std
::
move
(
images
),
std
::
move
(
labels
)});
std
::
vector
<
PaddleTensor
>
tmp_vec
;
tmp_vec
.
push_back
(
std
::
move
(
images
));
if
(
FLAGS_with_accuracy_layer
)
{
auto
labels
=
label_reader
.
NextBatch
();
tmp_vec
.
push_back
(
std
::
move
(
labels
));
}
inputs
->
push_back
(
std
::
move
(
tmp_vec
));
}
}
...
...
@@ -825,7 +829,8 @@ void CompareQuantizedAndAnalysis(
SummarizePerformance
(
"FP32"
,
sample_latency_fp32
,
"INT8"
,
sample_latency_int8
);
CompareAccuracy
(
quantized_outputs
,
analysis_outputs
,
compared_idx
);
if
(
FLAGS_with_accuracy_layer
)
CompareAccuracy
(
quantized_outputs
,
analysis_outputs
,
compared_idx
);
}
void
CompareBFloat16AndAnalysis
(
...
...
@@ -864,7 +869,8 @@ void CompareBFloat16AndAnalysis(
SummarizePerformance
(
"FP32"
,
sample_latency_fp32
,
"BF16"
,
sample_latency_bf16
);
CompareAccuracy
(
bf16_outputs
,
analysis_outputs
,
compared_idx
);
if
(
FLAGS_with_accuracy_layer
)
CompareAccuracy
(
bf16_outputs
,
analysis_outputs
,
compared_idx
);
}
void
CompareAnalysisAndAnalysis
(
...
...
paddle/fluid/operators/reduce_ops/reduce_mean_op.cc
浏览文件 @
d3dcbd37
...
...
@@ -18,6 +18,10 @@
#include <utility>
#include <vector>
#include "paddle/fluid/framework/infershape_utils.h"
#include "paddle/phi/core/infermeta_utils.h"
#include "paddle/phi/infermeta/unary.h"
namespace
paddle
{
namespace
operators
{
...
...
@@ -92,9 +96,13 @@ class __reduce_meanMaker__ : public ops::ReduceOpMaker {
virtual
std
::
string
GetOpType
()
const
{
return
"Reduce reduce_mean"
;
}
};
DELCARE_INFER_SHAPE_FUNCTOR
(
reduce_mean
,
ReduceMeanInferShapeFunctor
,
PT_INFER_META
(
phi
::
MeanRawInferMeta
));
REGISTER_OPERATOR
(
reduce_mean
,
ops
::
ReduceOp
,
__reduce_meanMaker__
,
ops
::
ReduceMeanOpGradMaker
<
paddle
::
framework
::
OpDesc
>
,
ops
::
ReduceMeanOpGradMaker
<
paddle
::
imperative
::
OpBase
>
);
ops
::
ReduceMeanOpGradMaker
<
paddle
::
imperative
::
OpBase
>
,
ReduceMeanInferShapeFunctor
);
REGISTER_OPERATOR
(
reduce_mean_grad
,
ops
::
ReduceGradOp
,
ops
::
ReduceMeanDoubleGradDescMaker
,
ops
::
ReduceMeanDoubleGradOpBaseMaker
,
...
...
paddle/fluid/operators/reduce_ops/reduce_sum_op.cc
浏览文件 @
d3dcbd37
...
...
@@ -16,6 +16,10 @@
#include <string>
#include "paddle/fluid/framework/infershape_utils.h"
#include "paddle/phi/core/infermeta_utils.h"
#include "paddle/phi/infermeta/unary.h"
namespace
paddle
{
namespace
framework
{
class
OpDesc
;
...
...
@@ -98,10 +102,14 @@ class ReduceSumOpMaker : public ops::ReduceOpMaker {
virtual
std
::
string
GetOpType
()
const
{
return
"Reduce reduce_sum"
;
}
};
DELCARE_INFER_SHAPE_FUNCTOR
(
reduce_sum
,
ReduceSumInferShapeFunctor
,
PT_INFER_META
(
phi
::
ReduceInferMetaBase
));
REGISTER_OPERATOR
(
reduce_sum
,
ops
::
ReduceOp
,
ReduceSumOpMaker
,
ops
::
ReduceSumVarTypeInference
,
ops
::
ReduceSumOpGradMaker
<
paddle
::
framework
::
OpDesc
>
,
ops
::
ReduceSumOpGradMaker
<
paddle
::
imperative
::
OpBase
>
);
ops
::
ReduceSumOpGradMaker
<
paddle
::
imperative
::
OpBase
>
,
ReduceSumInferShapeFunctor
);
REGISTER_OPERATOR
(
reduce_sum_grad
,
ops
::
ReduceGradOp
,
ops
::
ReduceSumDoubleOpGradMaker
<
paddle
::
framework
::
OpDesc
>
,
ops
::
ReduceSumDoubleOpGradMaker
<
paddle
::
imperative
::
OpBase
>
,
...
...
paddle/fluid/operators/selu_op.cc
浏览文件 @
d3dcbd37
...
...
@@ -12,8 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/selu_op.h"
#include <memory>
#include <string>
#include <unordered_map>
...
...
@@ -127,9 +125,3 @@ REGISTER_OPERATOR(selu, ops::SeluOp, ops::SeluOpMaker, ops::SeluOpInferVarType,
ops
::
SeluGradMaker
<
paddle
::
framework
::
OpDesc
>
,
ops
::
SeluGradMaker
<
paddle
::
imperative
::
OpBase
>
);
REGISTER_OPERATOR
(
selu_grad
,
ops
::
SeluGradOp
);
REGISTER_OP_CPU_KERNEL
(
selu
,
ops
::
SeluKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
SeluKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
REGISTER_OP_CPU_KERNEL
(
selu_grad
,
ops
::
SeluGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
SeluGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
paddle/fluid/operators/selu_op.cu
已删除
100644 → 0
浏览文件 @
943dedec
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/selu_op.h"
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
selu
,
ops
::
SeluKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
SeluKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
);
REGISTER_OP_CUDA_KERNEL
(
selu_grad
,
ops
::
SeluGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
SeluGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
);
paddle/fluid/platform/device/ipu/ipu_strategy.cc
浏览文件 @
d3dcbd37
此差异已折叠。
点击以展开。
paddle/fluid/platform/device/ipu/ipu_strategy.h
浏览文件 @
d3dcbd37
...
...
@@ -24,7 +24,8 @@ namespace paddle {
namespace
platform
{
namespace
ipu
{
struct
IpuStrategy
{
class
IpuStrategy
{
public:
IpuStrategy
();
// TODO(alleng) create PaddleOptions
...
...
@@ -75,22 +76,30 @@ struct IpuStrategy {
// custom ops
std
::
vector
<
IpuCustomOpIdentifier
>
custom_ops
;
private:
std
::
map
<
std
::
string
,
std
::
function
<
void
(
bool
)
>>
bool_options
;
std
::
map
<
std
::
string
,
std
::
function
<
void
(
std
::
uint64_t
)
>>
uint64_options
;
std
::
map
<
std
::
string
,
std
::
function
<
void
(
double
)
>>
double_options
;
std
::
map
<
std
::
string
,
std
::
function
<
void
(
std
::
string
)
>>
string_options
;
std
::
map
<
std
::
string
,
std
::
function
<
void
(
std
::
pair
<
std
::
string
,
std
::
string
>
)
>>
container_options
;
public:
void
AddBoolOption
(
const
std
::
string
&
option
,
bool
value
);
void
AddUint64Option
(
const
std
::
string
&
option
,
std
::
uint64_t
value
);
void
AddDoubleOption
(
const
std
::
string
&
option
,
double
value
);
void
AddStringOption
(
const
std
::
string
&
option
,
const
std
::
string
&
value
);
void
InsertStringOption
(
const
std
::
string
&
option
,
const
std
::
string
&
value
);
void
InsertStringPairOption
(
const
std
::
string
&
option
,
const
std
::
string
&
key
,
const
std
::
string
&
value
);
void
SetTensorLocation
(
const
std
::
string
&
tensor
,
const
std
::
string
&
option
,
std
::
uint64_t
value
);
void
AddCustomOp
(
const
std
::
string
&
paddle_op
,
const
std
::
string
&
popart_op
,
const
std
::
string
&
domain
,
int
version
);
std
::
map
<
std
::
string
,
std
::
function
<
std
::
string
()
>>
options_getter
;
std
::
map
<
std
::
string
,
std
::
function
<
std
::
vector
<
std
::
string
>
()
>>
vector_options_getter
;
std
::
map
<
std
::
string
,
std
::
function
<
std
::
map
<
std
::
string
,
std
::
string
>
()
>>
map_options_getter
;
std
::
map
<
std
::
string
,
std
::
string
>
options_type
;
std
::
string
GetOption
(
const
std
::
string
&
);
std
::
vector
<
std
::
string
>
GetVectorOption
(
const
std
::
string
&
);
std
::
map
<
std
::
string
,
std
::
string
>
GetMapOption
(
const
std
::
string
&
);
std
::
string
GetOptionType
(
const
std
::
string
&
);
std
::
vector
<
std
::
string
>
GetAllOptionNames
();
void
EnablePattern
(
const
std
::
string
&
t
);
void
DisablePattern
(
const
std
::
string
&
t
);
const
bool
IsPatternEnabled
(
const
std
::
string
&
t
);
private:
template
<
typename
ValueType
>
void
set
(
const
std
::
string
&
key
,
ValueType
value
,
...
...
@@ -117,27 +126,20 @@ struct IpuStrategy {
return
it
->
second
();
}
public:
void
AddBoolOption
(
const
std
::
string
&
option
,
bool
value
);
void
AddUint64Option
(
const
std
::
string
&
option
,
std
::
uint64_t
value
);
void
AddDoubleOption
(
const
std
::
string
&
option
,
double
value
);
void
AddStringOption
(
const
std
::
string
&
option
,
const
std
::
string
&
value
);
void
InsertStringOption
(
const
std
::
string
&
option
,
const
std
::
string
&
value
);
void
InsertStringPairOption
(
const
std
::
string
&
option
,
const
std
::
string
&
key
,
const
std
::
string
&
value
);
void
SetTensorLocation
(
const
std
::
string
&
tensor
,
const
std
::
string
&
option
,
std
::
uint64_t
value
);
void
AddCustomOp
(
const
std
::
string
&
paddle_op
,
const
std
::
string
&
popart_op
,
const
std
::
string
&
domain
,
int
version
);
std
::
string
GetOption
(
const
std
::
string
&
);
std
::
vector
<
std
::
string
>
GetVectorOption
(
const
std
::
string
&
);
std
::
map
<
std
::
string
,
std
::
string
>
GetMapOption
(
const
std
::
string
&
);
std
::
string
GetOptionType
(
const
std
::
string
&
);
std
::
map
<
std
::
string
,
std
::
function
<
void
(
bool
)
>>
bool_options
;
std
::
map
<
std
::
string
,
std
::
function
<
void
(
std
::
uint64_t
)
>>
uint64_options
;
std
::
map
<
std
::
string
,
std
::
function
<
void
(
double
)
>>
double_options
;
std
::
map
<
std
::
string
,
std
::
function
<
void
(
std
::
string
)
>>
string_options
;
std
::
map
<
std
::
string
,
std
::
function
<
void
(
std
::
pair
<
std
::
string
,
std
::
string
>
)
>>
container_options
;
void
EnablePattern
(
const
std
::
string
&
t
);
void
DisablePattern
(
const
std
::
string
&
t
);
const
bool
IsPatternEnabled
(
const
std
::
string
&
t
);
std
::
map
<
std
::
string
,
std
::
function
<
std
::
string
()
>>
options_getter
;
std
::
map
<
std
::
string
,
std
::
function
<
std
::
vector
<
std
::
string
>
()
>>
vector_options_getter
;
std
::
map
<
std
::
string
,
std
::
function
<
std
::
map
<
std
::
string
,
std
::
string
>
()
>>
map_options_getter
;
std
::
map
<
std
::
string
,
std
::
string
>
options_type
;
};
}
// namespace ipu
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
d3dcbd37
...
...
@@ -3919,6 +3919,8 @@ All parameter, weight, gradient are variables in Paddle.
}
return
res
;
})
.
def
(
"get_all_option_names"
,
&
platform
::
ipu
::
IpuStrategy
::
GetAllOptionNames
)
.
def
(
"enable_pattern"
,
&
platform
::
ipu
::
IpuStrategy
::
EnablePattern
)
.
def
(
"disable_pattern"
,
&
platform
::
ipu
::
IpuStrategy
::
DisablePattern
)
.
def
(
"is_pattern_enabled"
,
&
platform
::
ipu
::
IpuStrategy
::
IsPatternEnabled
);
...
...
paddle/infrt/dialect/phi/pass/proto_arg_map_context.h
浏览文件 @
d3dcbd37
...
...
@@ -46,6 +46,8 @@ class ProtoArgumentMappingContext : public phi::ArgumentMappingContext {
bool
IsDenseTensorOutput
(
const
std
::
string
&
name
)
const
override
;
bool
IsSelectedRowsOutput
(
const
std
::
string
&
name
)
const
override
;
bool
IsForInferShape
()
const
override
{
return
false
;
}
private:
mlir
::
Operation
*
op_
;
const
std
::
unordered_map
<
std
::
string
,
uint8_t
>&
input_map_
;
...
...
paddle/phi/core/compat/arg_map_context.h
浏览文件 @
d3dcbd37
...
...
@@ -91,6 +91,10 @@ class ArgumentMappingContext {
virtual
bool
IsDenseTensorOutput
(
const
std
::
string
&
name
)
const
=
0
;
virtual
bool
IsSelectedRowsOutput
(
const
std
::
string
&
name
)
const
=
0
;
// use this function to mark it comes from InferShapeArgumentMappingContext
// and will be used in infershape
virtual
bool
IsForInferShape
()
const
=
0
;
};
}
// namespace phi
paddle/phi/infermeta/unary.cc
浏览文件 @
d3dcbd37
...
...
@@ -375,7 +375,7 @@ void ReshapeWithXShapeInferMeta(const MetaTensor& x,
ReshapeInferMeta
(
x
,
shape
,
out
,
config
);
}
/* Why not use ReduceInferMeta directly?
/* Why not use ReduceInferMeta
Base
directly?
Because we need make InferMetaFunction's args follow the design of api.yaml
*/
void
SumInferMeta
(
const
MetaTensor
&
x
,
...
...
@@ -383,22 +383,53 @@ void SumInferMeta(const MetaTensor& x,
DataType
dtype
,
bool
keep_dim
,
MetaTensor
*
out
)
{
ReduceInferMetaBase
(
x
,
axis
,
keep_dim
,
dtype
,
out
);
bool
reduce_all
=
false
;
ReduceInferMetaBase
(
x
,
axis
,
keep_dim
,
reduce_all
,
dtype
,
out
);
}
void
ReduceInferMetaBase
(
const
MetaTensor
&
x
,
const
std
::
vector
<
int64_t
>&
axis
,
bool
keep_dim
,
bool
reduce_all
,
DataType
dtype
,
MetaTensor
*
out
)
{
bool
reduce_all
=
true
;
std
::
set
<
int64_t
>
dims_set
(
axis
.
begin
(),
axis
.
end
());
auto
x_rank
=
x
.
dims
().
size
();
std
::
vector
<
int64_t
>
formated_axis
=
axis
;
for
(
size_t
i
=
0
;
i
<
axis
.
size
();
++
i
)
{
PADDLE_ENFORCE_LT
(
axis
[
i
],
x_rank
,
errors
::
InvalidArgument
(
"The reduce dim index %d should be in the "
"range [-dimension(X), dimension(X)] "
"which dimesion = %d. But received dim index = %d."
,
i
,
x_rank
,
axis
[
i
]));
PADDLE_ENFORCE_GE
(
axis
[
i
],
-
x_rank
,
errors
::
InvalidArgument
(
"The reduce dim index %d should be in the "
"range [-dimension(X), dimension(X)] "
"which dimesion = %d. But received dim index = %d."
,
i
,
x_rank
,
axis
[
i
]));
if
(
axis
[
i
]
<
0
)
{
formated_axis
[
i
]
=
axis
[
i
]
+
x_rank
;
}
}
bool
full_dim
=
true
;
std
::
set
<
int64_t
>
dims_set
(
formated_axis
.
begin
(),
formated_axis
.
end
());
for
(
int64_t
i
=
0
;
i
<
x
.
dims
().
size
();
++
i
)
{
if
(
dims_set
.
find
(
i
)
==
dims_set
.
end
())
{
reduce_all
=
false
;
full_dim
=
false
;
break
;
}
}
reduce_all
=
reduce_all
||
full_dim
;
std
::
vector
<
int64_t
>
out_dim_vector
;
if
(
keep_dim
)
{
...
...
@@ -441,11 +472,20 @@ void ReduceInferMetaBase(const MetaTensor& x,
out
->
set_layout
(
x
.
layout
());
}
void
ReduceInferMeta
(
const
MetaTensor
&
x
,
const
std
::
vector
<
int64_t
>&
axis
,
bool
keep_dim
,
MetaTensor
*
out
)
{
ReduceInferMetaBase
(
x
,
axis
,
keep_dim
,
DataType
::
UNDEFINED
,
out
);
void
MeanRawInferMeta
(
const
MetaTensor
&
x
,
const
std
::
vector
<
int64_t
>&
axis
,
bool
keep_dim
,
bool
reduce_all
,
MetaTensor
*
out
)
{
ReduceInferMetaBase
(
x
,
axis
,
keep_dim
,
reduce_all
,
DataType
::
UNDEFINED
,
out
);
}
void
MeanInferMeta
(
const
MetaTensor
&
x
,
const
std
::
vector
<
int64_t
>&
axis
,
bool
keep_dim
,
MetaTensor
*
out
)
{
bool
reduce_all
=
false
;
ReduceInferMetaBase
(
x
,
axis
,
keep_dim
,
reduce_all
,
DataType
::
UNDEFINED
,
out
);
}
void
TransferLayoutInferMeta
(
const
MetaTensor
&
x
,
...
...
paddle/phi/infermeta/unary.h
浏览文件 @
d3dcbd37
...
...
@@ -86,13 +86,20 @@ void ReshapeWithXShapeInferMeta(const MetaTensor& x,
void
ReduceInferMetaBase
(
const
MetaTensor
&
x
,
const
std
::
vector
<
int64_t
>&
axis
,
bool
keep_dim
,
bool
reduce_all
,
DataType
dtype
,
MetaTensor
*
out
);
void
ReduceInferMeta
(
const
MetaTensor
&
x
,
const
std
::
vector
<
int64_t
>&
axis
,
bool
keep_dim
,
MetaTensor
*
out
);
void
MeanRawInferMeta
(
const
MetaTensor
&
x
,
const
std
::
vector
<
int64_t
>&
axis
,
bool
keep_dim
,
bool
reduce_all
,
MetaTensor
*
out
);
void
MeanInferMeta
(
const
MetaTensor
&
x
,
const
std
::
vector
<
int64_t
>&
axis
,
bool
keep_dim
,
MetaTensor
*
out
);
void
SumInferMeta
(
const
MetaTensor
&
x
,
const
std
::
vector
<
int64_t
>&
axis
,
...
...
paddle/phi/kernels/cpu/selu_grad_kernel.cc
0 → 100644
浏览文件 @
d3dcbd37
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/selu_grad_kernel.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/impl/selu_grad_kernel_impl.h"
PD_REGISTER_KERNEL
(
selu_grad
,
CPU
,
ALL_LAYOUT
,
phi
::
SeluGradKernel
,
float
,
double
)
{}
paddle/phi/kernels/cpu/selu_kernel.cc
0 → 100644
浏览文件 @
d3dcbd37
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/selu_kernel.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/impl/selu_kernel_impl.h"
PD_REGISTER_KERNEL
(
selu
,
CPU
,
ALL_LAYOUT
,
phi
::
SeluKernel
,
float
,
double
)
{}
paddle/phi/kernels/gpu/selu_grad_kernel.cu
0 → 100644
浏览文件 @
d3dcbd37
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/selu_grad_kernel.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/impl/selu_grad_kernel_impl.h"
PD_REGISTER_KERNEL
(
selu_grad
,
GPU
,
ALL_LAYOUT
,
phi
::
SeluGradKernel
,
float
,
double
)
{}
paddle/phi/kernels/gpu/selu_kernel.cu
0 → 100644
浏览文件 @
d3dcbd37
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/selu_kernel.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/impl/selu_kernel_impl.h"
PD_REGISTER_KERNEL
(
selu
,
GPU
,
ALL_LAYOUT
,
phi
::
SeluKernel
,
float
,
double
)
{}
paddle/phi/kernels/impl/selu_grad_kernel_impl.h
0 → 100644
浏览文件 @
d3dcbd37
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/kernels/impl/selu_kernel_impl.h"
#include "paddle/phi/core/dense_tensor.h"
namespace
phi
{
template
<
typename
T
,
typename
Context
>
void
SeluGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
out
,
const
DenseTensor
&
dout
,
float
scale
,
float
alpha
,
DenseTensor
*
dx
)
{
auto
dx_ptr
=
dev_ctx
.
template
Alloc
<
T
>(
dx
);
SeluGradFunctor
<
T
>
functor
(
out
.
data
<
T
>
(),
dout
.
data
<
T
>
(),
alpha
,
scale
,
dx_ptr
);
size_t
limit
=
static_cast
<
size_t
>
(
out
.
numel
());
paddle
::
platform
::
ForRange
<
Context
>
for_range
(
dev_ctx
,
limit
);
for_range
(
functor
);
}
}
// namespace phi
paddle/
fluid/operators/selu_op
.h
→
paddle/
phi/kernels/impl/selu_kernel_impl
.h
浏览文件 @
d3dcbd37
/
* Copyright (c) 2018
PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
/
/ Copyright (c) 2022
PaddlePaddle Authors. All Rights Reserved.
//
//
Licensed under the Apache License, Version 2.0 (the "License");
//
you may not use this file except in compliance with the License.
//
You may obtain a copy of the License at
//
//
http://www.apache.org/licenses/LICENSE-2.0
//
//
Unless required by applicable law or agreed to in writing, software
//
distributed under the License is distributed on an "AS IS" BASIS,
//
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
//
See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math.h"
#include "paddle/fluid/platform/for_range.h"
#include "paddle/phi/core/dense_tensor.h"
namespace
paddle
{
namespace
operators
{
namespace
phi
{
template
<
typename
T
>
struct
SeluFunctor
{
...
...
@@ -32,7 +31,7 @@ struct SeluFunctor {
HOSTDEVICE
void
operator
()(
size_t
idx
)
const
{
T
x_ele
=
x_data_ptr_
[
idx
];
if
(
x_ele
<=
0
)
{
x_ele
=
alpha_
*
real_exp
(
x_ele
)
-
alpha_
;
x_ele
=
alpha_
*
paddle
::
operators
::
real_exp
(
x_ele
)
-
alpha_
;
}
y_data_ptr_
[
idx
]
=
scale_
*
x_ele
;
}
...
...
@@ -44,8 +43,11 @@ struct SeluFunctor {
template
<
typename
T
>
struct
SeluGradFunctor
{
SeluGradFunctor
(
const
T
*
y_data_ptr
,
const
T
*
dy_data_ptr
,
float
alpha
,
float
scale
,
T
*
dx_data_ptr
)
SeluGradFunctor
(
const
T
*
y_data_ptr
,
const
T
*
dy_data_ptr
,
float
alpha
,
float
scale
,
T
*
dx_data_ptr
)
:
y_data_ptr_
(
y_data_ptr
),
dy_data_ptr_
(
dy_data_ptr
),
alpha_
(
alpha
),
...
...
@@ -71,53 +73,16 @@ struct SeluGradFunctor {
T
*
dx_data_ptr_
;
};
template
<
typename
DeviceContext
,
typename
T
>
class
SeluKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
using
Tensor
=
framework
::
Tensor
;
auto
*
x
=
context
.
Input
<
Tensor
>
(
"X"
);
auto
*
out
=
context
.
Output
<
Tensor
>
(
"Out"
);
float
alpha
=
context
.
Attr
<
float
>
(
"alpha"
);
float
scale
=
context
.
Attr
<
float
>
(
"scale"
);
auto
out_ptr
=
out
->
mutable_data
<
T
>
(
context
.
GetPlace
());
SeluFunctor
<
T
>
functor
(
x
->
data
<
T
>
(),
alpha
,
scale
,
out_ptr
);
auto
&
dev_ctx
=
context
.
template
device_context
<
DeviceContext
>();
size_t
limit
=
static_cast
<
size_t
>
(
x
->
numel
());
platform
::
ForRange
<
DeviceContext
>
for_range
(
dev_ctx
,
limit
);
for_range
(
functor
);
}
};
template
<
typename
DeviceContext
,
typename
T
>
class
SeluGradKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
using
Tensor
=
framework
::
Tensor
;
auto
*
out
=
context
.
Input
<
Tensor
>
(
"Out"
);
auto
*
dout
=
context
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
dx
=
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
float
alpha
=
context
.
Attr
<
float
>
(
"alpha"
);
float
scale
=
context
.
Attr
<
float
>
(
"scale"
);
auto
dx_ptr
=
dx
->
mutable_data
<
T
>
(
context
.
GetPlace
());
SeluGradFunctor
<
T
>
functor
(
out
->
data
<
T
>
(),
dout
->
data
<
T
>
(),
alpha
,
scale
,
dx_ptr
);
auto
&
dev_ctx
=
context
.
template
device_context
<
DeviceContext
>();
size_t
limit
=
static_cast
<
size_t
>
(
out
->
numel
());
platform
::
ForRange
<
DeviceContext
>
for_range
(
dev_ctx
,
limit
);
for_range
(
functor
);
}
};
}
// namespace operators
}
// namespace paddle
template
<
typename
T
,
typename
Context
>
void
SeluKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
float
scale
,
float
alpha
,
DenseTensor
*
out
)
{
auto
out_ptr
=
dev_ctx
.
template
Alloc
<
T
>(
out
);
SeluFunctor
<
T
>
functor
(
x
.
data
<
T
>
(),
alpha
,
scale
,
out_ptr
);
size_t
limit
=
static_cast
<
size_t
>
(
x
.
numel
());
paddle
::
platform
::
ForRange
<
Context
>
for_range
(
dev_ctx
,
limit
);
for_range
(
functor
);
}
}
// namespace phi
paddle/phi/kernels/math_kernel.h
浏览文件 @
d3dcbd37
...
...
@@ -156,7 +156,7 @@ DenseTensor Mean(const Context& dev_ctx,
bool
keep_dim
)
{
auto
dense_out
=
phi
::
Empty
<
T
,
Context
>
(
dev_ctx
);
MetaTensor
meta_out
(
&
dense_out
);
ReduceInferMetaBase
(
x
,
axis
,
keep_dim
,
x
.
dtype
(),
&
meta_out
);
ReduceInferMetaBase
(
x
,
axis
,
keep_dim
,
false
,
x
.
dtype
(),
&
meta_out
);
MeanKernel
<
T
,
Context
>
(
dev_ctx
,
x
,
axis
,
keep_dim
,
&
dense_out
);
return
dense_out
;
}
...
...
paddle/phi/kernels/primitive/compute_primitives.h
浏览文件 @
d3dcbd37
...
...
@@ -136,7 +136,9 @@ __device__ __forceinline__ T BlockYReduce(T val, ReduceOp reducer) {
return
shared_memory
[
threadIdx
.
x
];
}
// Swap data
/**
* @brief Swap data
*/
template
<
typename
T
>
__device__
__forceinline__
void
Swap
(
T
*
first_value
,
T
*
second_value
)
{
T
t_value
;
...
...
@@ -145,7 +147,9 @@ __device__ __forceinline__ void Swap(T* first_value, T* second_value) {
(
*
second_value
)
=
t_value
;
}
// swap with monotonic_type
/**
* @brief Swap data according to monotonic_type.
*/
template
<
typename
T
>
__device__
__forceinline__
void
Comparator
(
T
*
first_value
,
T
*
second_value
,
...
...
@@ -155,6 +159,9 @@ __device__ __forceinline__ void Comparator(T* first_value,
}
}
/**
* @brief Swap data and data index according to monotonic_type.
*/
template
<
typename
T
,
typename
IndexType
>
__device__
__forceinline__
void
ComparatorWithIndex
(
T
*
first_value
,
...
...
@@ -170,6 +177,18 @@ __device__ __forceinline__ void ComparatorWithIndex(T* first_value,
}
}
/**
* @brief get the last pow of 2
*/
__device__
inline
int
GetLastPow2
(
int
n
)
{
n
|=
(
n
>>
1
);
n
|=
(
n
>>
2
);
n
|=
(
n
>>
4
);
n
|=
(
n
>>
8
);
n
|=
(
n
>>
16
);
return
std
::
max
(
1
,
n
-
(
n
>>
1
));
}
}
// namespace details
/**
...
...
@@ -453,6 +472,29 @@ __device__ __forceinline__ void Reduce(T* out,
}
}
/*
* @brief Fill register with a constant according to OpFunc
*
* @template paraments
* InT: The data type of in1 and in2.
* OutT: The data type of out.
* NX: The number of data columns loaded by each thread.
* NY: The number of data rows loaded by each thread.
* BlockSize: Identifies the current device thread index method. Currently only
* GPU was supported.
* OpFunc: Compute functor which has an operator() as following
* template <typename InT>
* struct XxxFunctor {
* HOSTDEVICE InT operator()()
* const {
* return a;
* }
* };
*
* @param
* out: The register pointer of out, the size is NX * NY.
* compute: Compute function which was declared like OpFunc<InT>().
*/
template
<
typename
InT
,
typename
OutT
,
int
NX
,
...
...
@@ -466,6 +508,33 @@ __device__ __forceinline__ void ElementwiseConstant(OutT* out, OpFunc compute) {
}
}
/*
* @brief Get ReturnsCount random data fromm compute according to state, state
* can be curandStatePhilox4_32_10_t, hiprandStatePhilox4_32_10_t which has beed
* initialized.
*
* @template paraments
* StateType: the type of state, can be curandStatePhilox4_32_10_t or
* hiprandStatePhilox4_32_10_t.
* OutT: the type of out register.
* ReturnsCount: The number of random data generated by OpFunc.
* BlockSize: Identifies the current device thread index method. Currently only
* GPU was supported.
* OpFunc: Compute functor which has an operator() as following
* template <typename T>
* struct XxxFunctor {
* HOSTDEVICE InT operator()(StateType state)
* const {
* return ranomd(state); // Returns ReturnsCount random numbers with
* data type T
* }
* };
*
* @param
* out: The register pointer of out, the size is NX * NY.
* compute: Compute function which was declared like OpFunc<T>().
*/
template
<
typename
StateType
,
typename
OutT
,
int
ReturnsCount
,
...
...
@@ -481,131 +550,208 @@ __device__ __forceinline__ void ElementwiseRandom(OutT* out,
}
}
// attention please set share_size = blockDim.x;
// data and b are the register pointer
#define shared_size 64
template
<
typename
InT
,
typename
OutT
,
int
NX
,
int
NY
,
int
BlockSize
,
class
OpFunc
>
/*
* @brief Complete the prefix and in the block, each thread calculates 2 data,
* the size of out and in is 2, and BlockDim.x must be less then 512.
*
* @template paraments
* InT: the type of input register.
* OutT: the type of out register.
* BlockSize: Identifies the current device thread index method. Currently only
* GPU was supported.
* OpFunc: Compute functor which has an operator() as following
* template <typename T>
* struct XxxFunctor {
* HOSTDEVICE InT operator()(T a, T b)
* const {
* return a + b;
* }
* };
*
* @param
* out: The register pointer of out, the size is 2;
* in: The register pointer of input, the size is 2;
* compute: Compute function which was declared like OpFunc<T>().
*/
#define SHARED_SIZE_LIMIT 512
template
<
typename
InT
,
typename
OutT
,
int
BlockSize
,
class
OpFunc
>
__device__
__forceinline__
void
Cumsum
(
OutT
*
out
,
const
InT
*
in
,
OpFunc
compute
)
{
__shared__
InT
temp
[
shared_size
*
2
+
(
shared_size
*
2
)
/
32
];
constexpr
int
kSize
=
SHARED_SIZE_LIMIT
*
2
+
(
SHARED_SIZE_LIMIT
*
2
)
/
32
;
__shared__
InT
temp
[
kSize
];
int
stride_size
=
blockDim
.
x
;
int
tidx
=
threadIdx
.
x
;
temp
[
tidx
+
tidx
/
32
]
=
in
[
0
];
temp
[
s
hared_size
+
tidx
+
(
shared
_size
+
tidx
)
/
32
]
=
in
[
1
];
for
(
int
stride
=
1
;
stride
<=
blockDim
.
x
;
stride
*=
2
)
{
temp
[
s
tride_size
+
tidx
+
(
stride
_size
+
tidx
)
/
32
]
=
in
[
1
];
for
(
int
stride
=
1
;
stride
<=
stride_size
;
stride
*=
2
)
{
__syncthreads
();
int
index
=
(
tidx
+
1
)
*
2
*
stride
-
1
;
if
(
index
<
(
blockDim
.
x
*
2
))
{
temp
[
index
+
index
/
32
]
+=
temp
[
index
-
stride
+
(
index
-
stride
)
/
32
];
temp
[
index
+
index
/
32
]
=
compute
(
temp
[
index
+
index
/
2
],
temp
[
index
-
stride
+
(
index
-
stride
)
/
32
]);
}
}
for
(
int
stride
=
(
blockDim
.
x
*
2
)
/
4
;
stride
>
0
;
stride
/=
2
)
{
__syncthreads
();
int
index
=
(
tidx
+
1
)
*
2
*
stride
-
1
;
if
((
index
+
stride
)
<
(
blockDim
.
x
*
2
))
{
temp
[
index
+
stride
+
(
stride
+
index
)
/
32
]
+=
temp
[
index
+
(
index
)
/
32
];
temp
[
index
+
stride
+
(
stride
+
index
)
/
32
]
=
compute
(
temp
[
index
+
stride
+
(
stride
+
index
)
/
32
],
temp
[
index
+
(
index
)
/
32
]);
}
}
__syncthreads
();
out
[
0
]
=
static_cast
<
OutT
>
(
temp
[
tidx
+
tidx
/
32
]);
out
[
1
]
=
static_cast
<
OutT
>
(
temp
[
tidx
+
s
hared_size
+
(
tidx
+
shared
_size
)
/
32
]);
static_cast
<
OutT
>
(
temp
[
tidx
+
s
tride_size
+
(
tidx
+
stride
_size
)
/
32
]);
}
#define SHARED_SIZE_LIMIT \
1024 // each thread load 2 data from global memory so SHARED_SIZE_LIMIT must
// larger than blockDim.x * 2
// if monotonic_type = 1 then increase
// if gridDim.x > 1 please set monotonic_type = blockIdx.x & 1; blockIdx.x % 2
// == 1 the increase
template
<
typename
T
>
__device__
__forceinline__
void
Sort
(
T
*
dst
,
const
T
*
src_data
,
#undef SHARED_SIZE_LIMIT
/*
* @brief Sort data in this block, each thread calculates 2 data, the size of out
* and in is 2, and BlockDim.x must be less then 512.
*
* @template paraments
* InT: the type of input register.
* OutT: the type of out register.
* BlockSize: Identifies the current device thread index method. Currently only
* GPU was supported.
*
* @param
* out: The register pointer of out, the size is 2.
* in: The register pointer of input, the size is 2.
* num: The num of this block
* monotonic_type: if monotonic_type = 1 then sorted in ascending order, eles
* sorted in escending.
*/
#define SHARED_SIZE_LIMIT 1024
// each thread load 2 data from global memory so SHARED_SIZE_LIMIT must
// larger than blockDim.x * 2
template
<
typename
InT
,
typename
OutT
,
int
BlockSize
>
__device__
__forceinline__
void
Sort
(
OutT
*
out
,
const
InT
*
in
,
int
num
,
int
monotonic_type
)
{
// todo: set num = Pow2(num)
int
upper_bound
=
blockDim
.
x
;
// update upper_bound
upper_bound
=
std
::
min
(
details
::
GetLastPow2
(
num
),
upper_bound
);
// shareMem for value and index num must smaller than SHARED_SIZE_LIMIT / 2
__shared__
T
value
[
SHARED_SIZE_LIMIT
];
// shareMem's size must larger than
// blockDim * 2
// Copy value and index from src and src_index
value
[
threadIdx
.
x
]
=
src_data
[
0
];
value
[
threadIdx
.
x
+
(
SHARED_SIZE_LIMIT
/
2
)]
=
src_data
[
1
];
__shared__
InT
value
[
SHARED_SIZE_LIMIT
];
int
stride_size
=
blockDim
.
x
;
// shareMem's size must larger than blockDim * 2
// Copy value from in
value
[
threadIdx
.
x
]
=
in
[
0
];
value
[
threadIdx
.
x
+
stride_size
]
=
in
[
1
];
// make bitonicSort
for
(
int
size
=
2
;
size
<
num
;
size
<<=
1
)
{
for
(
int
size
=
2
;
size
<
upper_bound
;
size
<<=
1
)
{
int
bitonic_type
=
(
threadIdx
.
x
&
(
size
/
2
))
!=
0
;
for
(
int
stride
=
size
/
2
;
stride
>
0
;
stride
>>=
1
)
{
__syncthreads
();
int
pos
=
2
*
threadIdx
.
x
-
(
threadIdx
.
x
&
(
stride
-
1
));
details
::
Comparator
<
T
>
(
&
value
[
pos
],
&
value
[
pos
+
stride
],
bitonic_type
);
details
::
Comparator
<
In
T
>
(
&
value
[
pos
],
&
value
[
pos
+
stride
],
bitonic_type
);
}
}
// last sort
for
(
int
stride
=
SHARED_SIZE_LIMIT
/
2
;
stride
>
0
;
stride
>>=
1
)
{
for
(
int
stride
=
stride_size
;
stride
>
0
;
stride
>>=
1
)
{
__syncthreads
();
int
pos
=
2
*
threadIdx
.
x
-
(
threadIdx
.
x
&
(
stride
-
1
));
// last sort when monotonic_type = 1 then increase
details
::
Comparator
<
T
>
(
&
value
[
pos
],
&
value
[
pos
+
stride
],
monotonic_type
);
details
::
Comparator
<
In
T
>
(
&
value
[
pos
],
&
value
[
pos
+
stride
],
monotonic_type
);
}
__syncthreads
();
dst
[
0
]
=
value
[
threadIdx
.
x
]
;
dst
[
1
]
=
value
[
threadIdx
.
x
+
(
SHARED_SIZE_LIMIT
/
2
)]
;
out
[
0
]
=
static_cast
<
OutT
>
(
value
[
threadIdx
.
x
])
;
out
[
1
]
=
static_cast
<
OutT
>
(
value
[
threadIdx
.
x
+
stride_size
])
;
}
template
<
typename
T
,
typename
IndexType
>
__device__
__forceinline__
void
Sort
(
T
*
dst
,
IndexType
*
dst_index
,
const
T
*
src_data
,
IndexType
*
src_index
,
/*
* @brief Sort data with data_index in this block, each thread calculates 2 data,
* the size of out and in is 2, and BlockDim.x must be less then 512.
*
* @template paraments
* InT: The type of input register.
* OutT: The type of out register.
* IndexType: The type of index.
* BlockSize: Identifies the current device thread index method. Currently only
* GPU was supported.
*
* @param
* out: The register pointer of out, the size is 2.
* out_index: The register pointer of out_index, the size is 2.
* in: The register pointer of input, the size is 2.
* in_index: The register pointer of in_index, the size is 2.
* num: The num of this block.
* monotonic_type: if monotonic_type = 1 then sorted in ascending order, eles
* sorted in escending.
*/
template
<
typename
InT
,
typename
OutT
,
typename
IndexType
,
int
BlockSize
>
__device__
__forceinline__
void
Sort
(
OutT
*
out
,
IndexType
*
out_index
,
const
InT
*
in
,
IndexType
*
in_index
,
int
num
,
int
monotonic_type
)
{
// todo: set num = Pow2(num)
int
upper_bound
=
blockDim
.
x
;
// update upper_bound
upper_bound
=
std
::
min
(
details
::
GetLastPow2
(
num
),
upper_bound
);
// shareMem for value and index num must smaller than SHARED_SIZE_LIMIT / 2
__shared__
T
value
[
SHARED_SIZE_LIMIT
];
// shareMem's size must larger than
//
blockDim * 2
__shared__
InT
value
[
SHARED_SIZE_LIMIT
];
// shareMem's size must larger than
blockDim * 2
__shared__
IndexType
index
[
SHARED_SIZE_LIMIT
];
// Copy value and index from src and src_index
value
[
threadIdx
.
x
]
=
src_data
[
0
];
value
[
threadIdx
.
x
+
(
SHARED_SIZE_LIMIT
/
2
)]
=
src_data
[
1
];
// Copy value and index from in and in_index
int
stride_size
=
blockDim
.
x
;
value
[
threadIdx
.
x
]
=
in
[
0
];
value
[
threadIdx
.
x
+
stride_size
]
=
in
[
1
];
// index
index
[
threadIdx
.
x
]
=
src
_index
[
0
];
index
[
threadIdx
.
x
+
(
SHARED_SIZE_LIMIT
/
2
)]
=
src
_index
[
1
];
index
[
threadIdx
.
x
]
=
in
_index
[
0
];
index
[
threadIdx
.
x
+
stride_size
]
=
in
_index
[
1
];
// make bitonicSort
for
(
int
size
=
2
;
size
<
num
;
size
<<=
1
)
{
for
(
int
size
=
2
;
size
<
upper_bound
;
size
<<=
1
)
{
int
bitonic_type
=
(
threadIdx
.
x
&
(
size
/
2
))
!=
0
;
for
(
int
stride
=
size
/
2
;
stride
>
0
;
stride
>>=
1
)
{
__syncthreads
();
int
pos
=
2
*
threadIdx
.
x
-
(
threadIdx
.
x
&
(
stride
-
1
));
details
::
ComparatorWithIndex
<
T
,
IndexType
>
(
&
value
[
pos
],
&
value
[
pos
+
stride
],
&
index
[
pos
],
&
index
[
pos
+
stride
],
bitonic_type
);
details
::
ComparatorWithIndex
<
In
T
,
IndexType
>
(
&
value
[
pos
],
&
value
[
pos
+
stride
],
&
index
[
pos
],
&
index
[
pos
+
stride
],
bitonic_type
);
}
}
for
(
int
stride
=
SHARED_SIZE_LIMIT
/
2
;
stride
>
0
;
stride
>>=
1
)
{
for
(
int
stride
=
stride_size
;
stride
>
0
;
stride
>>=
1
)
{
__syncthreads
();
int
pos
=
2
*
threadIdx
.
x
-
(
threadIdx
.
x
&
(
stride
-
1
));
// last sort when monotonic_type = 1 then increase
details
::
ComparatorWithIndex
<
T
,
IndexType
>
(
&
value
[
pos
],
&
value
[
pos
+
stride
],
&
index
[
pos
],
&
index
[
pos
+
stride
],
monotonic_type
);
details
::
ComparatorWithIndex
<
In
T
,
IndexType
>
(
&
value
[
pos
],
&
value
[
pos
+
stride
],
&
index
[
pos
],
&
index
[
pos
+
stride
],
monotonic_type
);
}
__syncthreads
();
dst
[
0
]
=
value
[
threadIdx
.
x
];
dst
[
1
]
=
value
[
threadIdx
.
x
+
(
SHARED_SIZE_LIMIT
/
2
)];
dst_index
[
0
]
=
index
[
threadIdx
.
x
];
dst_index
[
1
]
=
index
[
threadIdx
.
x
+
(
SHARED_SIZE_LIMIT
/
2
)];
out
[
0
]
=
static_cast
<
OutT
>
(
value
[
threadIdx
.
x
]);
out
[
1
]
=
static_cast
<
OutT
>
(
value
[
threadIdx
.
x
+
stride_size
]);
out_index
[
0
]
=
index
[
threadIdx
.
x
];
out_index
[
1
]
=
index
[
threadIdx
.
x
+
stride_size
];
}
template
<
typename
T1
,
typename
T2
,
typename
OutT
,
typename
OpFunc
>
HOSTDEVICE
__forceinline__
void
OperatorTernary
(
OutT
*
out
,
const
T1
*
in1
,
const
T2
*
in2
,
OpFunc
func
,
int
num
)
{
func
(
out
,
in1
,
in2
,
num
);
}
template
<
typename
InT
,
typename
OutT
,
typename
OpFunc
>
HOSTDEVICE
__forceinline__
void
OperatorBinary
(
OutT
*
out
,
const
InT
*
in
,
OpFunc
func
,
int
num
)
{
func
(
out
,
in
,
num
);
}
}
// namespace kps
...
...
paddle/phi/kernels/primitive/compute_primitives_xpu2.h
浏览文件 @
d3dcbd37
...
...
@@ -348,6 +348,29 @@ __device__ __forceinline__ void Reduce(T* out,
}
}
/*
* @brief Fill register with a constant according to OpFunc
*
* @template paraments
* InT: The data type of in1 and in2.
* OutT: The data type of out.
* NX: The number of data columns loaded by each thread.
* NY: The number of data rows loaded by each thread.
* BlockSize: Identifies the current device thread index method. For xpu,
* core_id() is used as the index.
* OpFunc: Compute functor which has an operator() as following
* template <typename InT>
* struct XxxFunctor {
* HOSTDEVICE InT operator()()
* const {
* return a;
* }
* };
*
* @param
* out: The register pointer of out, the size is NX * NY.
* compute: Compute function which was declared like OpFunc<InT>().
*/
template
<
typename
InT
,
typename
OutT
,
int
NX
,
...
...
paddle/phi/kernels/primitive/datamover_primitives.h
浏览文件 @
d3dcbd37
...
...
@@ -297,6 +297,24 @@ __device__ __forceinline__ void ReadData(T* dst,
/**
* @brief Read 1D data from global memory to register. The difference
* from the above function is that it supports different data types of inputs.
*
* @template paraments
* T: The type of data.
* NX: Each thread load NX data from global memory continuously.
* NY: Each thread need to load NY rows, only NY = 1 was supported.
* ArgsT: The Type if dst, ArgsT can be std::tuple<T> or std::tuple<Args>
* Index: The index of data stored in dst.
* BlockSize: Identifies the current device thread index method. For GPU,
* threadIdx.x is used as the thread index. Currently only GPU was supported.
* IsBoundary: Whether to make an out-of-bounds judgment on access to memory.
* When the number of data processed by this block is less than
* NX x NY x blockDim.x, boundary judgment is required to avoid memory access
* crossing the boundary.
*
* @param:
* dst: The register pointer of the thread, the size is NX * NY.
* src: The data pointer of the current block.
* size: The current block needs to load size data continuously.
*/
template
<
typename
T
,
int
NX
,
...
...
@@ -714,6 +732,20 @@ __device__ __forceinline__ void ReadDataBc(
}
}
/**
* @brief Initialize register with data index.
*
* @template paraments
* T: Data type of register.
* NX: Number of data to initialize.
* NY: Number of data to initialize, NY only can be 1.
* BlockSize: Identifies the current device thread index method. For GPU,
* threadIdx.x is used as the thread index. Currently only GPU was supported.
*
* @param:
* dst: The register pointer of the thread, the size is NX.
* init_data: The register pointer of init data, the size is NX.
*/
template
<
typename
T
,
int
NX
,
int
NY
,
int
BlockSize
>
__device__
__forceinline__
void
InitWithDataIndex
(
T
*
dst
,
int
block_offset
)
{
int
thread_offset
=
block_offset
+
threadIdx
.
x
*
NX
;
...
...
paddle/phi/kernels/primitive/datamover_primitives_xpu2.h
浏览文件 @
d3dcbd37
...
...
@@ -244,6 +244,24 @@ __device__ __inline__ void ReadData(T* dst,
/**
* @brief Read 1D data from global memory to register. The difference
* from the above function is that it supports different data types of inputs.
*
* @template paraments
* T: The type of data.
* NX: Each thread load NX data from global memory continuously.
* NY: Each thread need to load NY rows, only NY = 1 was supported.
* ArgsT: The Type if dst, ArgsT can be std::tuple<T> or std::tuple<Args>
* Index: The index of data stored in dst.
* BlockSize: Identifies the current device thread index method. For xpu,
* core_id() is used as the index.
* IsBoundary: Whether to make an out-of-bounds judgment on access to memory.
* When the number of data processed by this block is less than
* NX x NY x blockDim.x, boundary judgment is required to avoid memory access
* crossing the boundary.
*
* @param:
* dst: The register pointer of the thread, the size is NX * NY.
* src: The data pointer of the current block.
* size: The current block needs to load size data continuously.
*/
template
<
typename
T
,
int
NX
,
...
...
@@ -646,5 +664,28 @@ __device__ __inline__ void ReadDataBc(
}
}
/**
* @brief Initialize register with data index.
*
* @template paraments
* T: Data type of register.
* NX: Number of data to initialize.
* NY: Number of data to initialize, NY only can be 1.
* BlockSize: Identifies the current device thread index method. For xpu,
* core_id() is used as the index.
*
* @param:
* dst: The register pointer of the thread, the size is NX.
* init_data: The register pointer of init data, the size is NX.
*/
template
<
typename
T
,
int
NX
,
int
NY
,
int
BlockSize
>
__device__
__forceinline__
void
InitWithDataIndex
(
T
*
dst
,
int
block_offset
)
{
int
thread_offset
=
block_offset
+
core_id
()
*
NX
;
#pragma unroll
for
(
int
nx
=
0
;
nx
<
NX
;
++
nx
)
{
dst
[
nx
]
=
static_cast
<
T
>
(
thread_offset
+
nx
);
}
}
}
// namespace kps
}
// namespace phi
paddle/phi/kernels/selu_grad_kernel.h
0 → 100644
浏览文件 @
d3dcbd37
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/core/dense_tensor.h"
namespace
phi
{
template
<
typename
T
,
typename
Context
>
void
SeluGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
out
,
const
DenseTensor
&
d_out
,
float
scale
,
float
alpha
,
DenseTensor
*
d_x
);
}
// namespace phi
paddle/phi/kernels/selu_kernel.h
0 → 100644
浏览文件 @
d3dcbd37
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/core/dense_tensor.h"
namespace
phi
{
template
<
typename
T
,
typename
Context
>
void
SeluKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
float
scale
,
float
alpha
,
DenseTensor
*
out
);
}
// phi
paddle/phi/ops/compat/reduce_sig.cc
浏览文件 @
d3dcbd37
...
...
@@ -17,28 +17,36 @@ limitations under the License. */
namespace
phi
{
KernelSignature
ReduceSumOpArgumentMapping
(
const
ArgumentMappingContext
&
ctx
)
{
bool
reduce_all
=
paddle
::
any_cast
<
bool
>
(
ctx
.
Attr
(
"reduce_all"
));
if
(
ctx
.
IsDenseTensorInput
(
"X"
))
{
if
(
!
reduce_all
)
{
return
KernelSignature
(
"sum"
,
{
"X"
},
{
"dim"
,
"out_dtype"
,
"keep_dim"
},
{
"Out"
});
bool
reduce_all
=
paddle
::
any_cast
<
bool
>
(
ctx
.
Attr
(
"reduce_all"
));
// When ctx is InferShapeArgumentMappingContext, the reduce_all is used in
// InferShape, so we must return the "sum_raw" KernelSignature.
// And the InferMeta function(i.e. ReduceInferMetaBase) is accordance with
// the "sum_raw" KernelSignature
if
(
ctx
.
IsForInferShape
()
||
reduce_all
)
{
return
KernelSignature
(
"sum_raw"
,
{
"X"
},
{
"dim"
,
"keep_dim"
,
"reduce_all"
,
"out_dtype"
},
{
"Out"
});
}
return
KernelSignature
(
"sum_raw"
,
{
"X"
},
{
"dim"
,
"keep_dim"
,
"reduce_all"
,
"out_dtype"
},
{
"Out"
});
return
KernelSignature
(
"sum"
,
{
"X"
},
{
"dim"
,
"out_dtype"
,
"keep_dim"
},
{
"Out"
});
}
return
KernelSignature
(
"unregistered"
,
{},
{},
{});
}
KernelSignature
ReduceMeanOpArgumentMapping
(
const
ArgumentMappingContext
&
ctx
)
{
bool
reduce_all
=
paddle
::
any_cast
<
bool
>
(
ctx
.
Attr
(
"reduce_all"
));
if
(
ctx
.
IsDenseTensorInput
(
"X"
))
{
if
(
!
reduce_all
)
{
return
KernelSignature
(
"mean"
,
{
"X"
},
{
"dim"
,
"keep_dim"
},
{
"Out"
});
bool
reduce_all
=
paddle
::
any_cast
<
bool
>
(
ctx
.
Attr
(
"reduce_all"
));
// When ctx is InferShapeArgumentMappingContext, the reduce_all is used in
// InferShape, so we must return the "mean_raw" KernelSignature.
// And the InferMeta function(i.e. MeanRawInferMeta) is accordance with the
// "mean_raw" KernelSignature
if
(
ctx
.
IsForInferShape
()
||
reduce_all
)
{
return
KernelSignature
(
"mean_raw"
,
{
"X"
},
{
"dim"
,
"keep_dim"
,
"reduce_all"
},
{
"Out"
});
}
return
KernelSignature
(
"mean_raw"
,
{
"X"
},
{
"dim"
,
"keep_dim"
,
"reduce_all"
},
{
"Out"
});
return
KernelSignature
(
"mean"
,
{
"X"
},
{
"dim"
,
"keep_dim"
},
{
"Out"
});
}
return
KernelSignature
(
"unregistered"
,
{},
{},
{});
}
...
...
paddle/phi/ops/compat/selu_sig.cc
0 → 100644
浏览文件 @
d3dcbd37
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/core/compat/op_utils.h"
namespace
phi
{
KernelSignature
SeluGradGradOpArgumentMapping
(
const
ArgumentMappingContext
&
ctx
)
{
return
KernelSignature
(
"selu_grad"
,
{
"Out"
,
GradVarName
(
"Out"
)},
{
"scale"
,
"alpha"
},
{
GradVarName
(
"X"
)});
}
}
// namespace phi
PD_REGISTER_ARG_MAPPING_FN
(
selu_grad
,
phi
::
SeluGradGradOpArgumentMapping
);
paddle/phi/tests/ops/test_op_signature.h
浏览文件 @
d3dcbd37
...
...
@@ -80,6 +80,8 @@ class TestArgumentMappingContext : public phi::ArgumentMappingContext {
return
selected_rows_outputs
.
count
(
name
)
>
0
;
}
bool
IsForInferShape
()
const
override
{
return
false
;
}
private:
const
std
::
unordered_set
<
std
::
string
>
dense_tensor_inputs
;
const
std
::
unordered_set
<
std
::
string
>
selected_rows_inputs
;
...
...
paddle/scripts/paddle_build.sh
浏览文件 @
d3dcbd37
...
...
@@ -1266,7 +1266,7 @@ function card_test() {
elif
[
"
${
WITH_ASCEND_CL
}
"
==
"ON"
]
;
then
CUDA_DEVICE_COUNT
=
1
elif
[
"
${
WITH_ROCM
}
"
==
"ON"
]
;
then
CUDA_DEVICE_COUNT
=
4
CUDA_DEVICE_COUNT
=
$(
rocm-smi
-i
|
grep
GPU |
wc
-l
)
else
CUDA_DEVICE_COUNT
=
$(
nvidia-smi
-L
|
wc
-l
)
fi
...
...
python/paddle/fluid/contrib/slim/tests/CMakeLists.txt
浏览文件 @
d3dcbd37
...
...
@@ -25,6 +25,12 @@ function(inference_analysis_python_api_int8_test_mkldnn target model_dir data_pa
_inference_analysis_python_api_int8_test
(
${
target
}
${
model_dir
}
${
data_path
}
${
filename
}
True
)
endfunction
()
function
(
download_data install_dir url data_file check_sum
)
if
(
NOT EXISTS
${
install_dir
}
/
${
data_file
}
)
inference_download_and_uncompress
(
${
install_dir
}
${
url
}
${
data_file
}
${
check_sum
}
)
endif
()
endfunction
()
function
(
download_quant_data install_dir data_file check_sum
)
if
(
NOT EXISTS
${
install_dir
}
/
${
data_file
}
)
inference_download_and_uncompress
(
${
install_dir
}
${
INFERENCE_URL
}
/int8
${
data_file
}
${
check_sum
}
)
...
...
@@ -290,8 +296,9 @@ if(LINUX AND WITH_MKLDNN)
### PTQ INT8
# PTQ int8 lstm model
set
(
LSTM_DATA_ARCHIVE
"unittest_model_data/quant_lstm_input_data.tar.gz"
)
download_quant_data
(
${
QUANT2_INT8_LSTM_SAVE_PATH
}
${
LSTM_DATA_ARCHIVE
}
add84c754e9b792fea1fbd728d134ab7
)
set
(
LSTM_DATA_FILE
"quant_lstm_input_data.tar.gz"
)
set
(
LSTM_URL
"
${
INFERENCE_URL
}
/int8/unittest_model_data"
)
download_data
(
${
QUANT2_INT8_LSTM_SAVE_PATH
}
${
LSTM_URL
}
${
LSTM_DATA_FILE
}
add84c754e9b792fea1fbd728d134ab7
)
set
(
QUANT2_FP32_LSTM_MODEL_ARCHIVE
"lstm_fp32_model.tar.gz"
)
download_lstm_model
(
${
QUANT2_INT8_LSTM_SAVE_PATH
}
${
QUANT2_FP32_LSTM_MODEL_ARCHIVE
}
eecd9f44d69a84acc1cf2235c4b8b743
)
inference_quant2_int8_lstm_model_test
(
test_quant2_int8_lstm_mkldnn
${
QUANT2_INT8_LSTM_SAVE_PATH
}
/lstm_fp32_model
${
QUANT2_LSTM_MODEL_DIR
}
/lstm_quant
${
QUANT2_INT8_LSTM_SAVE_PATH
}
/quant_lstm_input_data
)
...
...
python/paddle/fluid/tests/unittests/ipu/CMakeLists.txt
0 → 100644
浏览文件 @
d3dcbd37
if
(
WITH_IPU
)
file
(
GLOB TEST_OPS RELATIVE
"
${
CMAKE_CURRENT_SOURCE_DIR
}
"
"test_*.py"
)
string
(
REPLACE
".py"
""
TEST_OPS
"
${
TEST_OPS
}
"
)
foreach
(
TEST_OP
${
TEST_OPS
}
)
py_test_modules
(
${
TEST_OP
}
MODULES
${
TEST_OP
}
)
endforeach
(
TEST_OP
)
endif
()
python/paddle/fluid/tests/unittests/test_transpose_op.py
浏览文件 @
d3dcbd37
...
...
@@ -423,6 +423,14 @@ class TestMoveAxis(unittest.TestCase):
self
.
assertEqual
(
np
.
array_equal
(
out
.
numpy
(),
expected
),
True
)
paddle
.
enable_static
()
def
test_moveaxis3
(
self
):
paddle
.
disable_static
()
x
=
paddle
.
to_tensor
(
[[
1
+
1j
,
-
1
-
1j
],
[
1
+
1j
,
-
1
-
1j
],
[
1
+
1j
,
-
1
-
1j
]])
out
=
x
.
moveaxis
(
0
,
1
)
self
.
assertEqual
(
out
.
shape
,
[
2
,
3
])
paddle
.
enable_static
()
def
test_error
(
self
):
x
=
paddle
.
randn
([
2
,
3
,
4
,
5
])
# src must have the same number with dst
...
...
python/paddle/fluid/tests/unittests/test_var_base.py
浏览文件 @
d3dcbd37
...
...
@@ -51,6 +51,10 @@ class TestVarBase(unittest.TestCase):
np
.
array_equal
(
x
.
numpy
(),
np
.
array
([
1.2
],
'float16'
)))
self
.
assertEqual
(
x
.
dtype
,
core
.
VarDesc
.
VarType
.
FP16
)
# set_default_dtype take effect on int
x
=
paddle
.
to_tensor
(
1
,
place
=
place
)
self
.
assertTrue
(
x
.
dtype
,
core
.
VarDesc
.
VarType
.
INT64
)
# set_default_dtype take effect on float
x
=
paddle
.
to_tensor
(
1.2
,
place
=
place
,
stop_gradient
=
False
)
self
.
assertTrue
(
...
...
python/paddle/tensor/creation.py
浏览文件 @
d3dcbd37
...
...
@@ -110,12 +110,6 @@ def to_tensor(data, dtype=None, place=None, stop_gradient=True):
"'place' must be any of paddle.Place, paddle.CPUPlace, paddle.CUDAPinnedPlace, paddle.CUDAPlace, paddle.NPUPlace, paddle.XPUPlace, paddle.CustomPlace"
)
#Todo(zhouwei): Support allocate tensor on any other specified card
if
isinstance
(
place
,
core
.
CUDAPlace
)
and
isinstance
(
_current_expected_place
(),
core
.
CUDAPlace
)
and
place
.
_get_device_id
(
)
!=
_current_expected_place
().
_get_device_id
():
place
=
_current_expected_place
()
if
not
isinstance
(
data
,
np
.
ndarray
):
def
_handle_dtype
(
data
,
dtype
):
...
...
@@ -139,7 +133,7 @@ def to_tensor(data, dtype=None, place=None, stop_gradient=True):
data
.
stop_gradient
=
stop_gradient
return
data
elif
isinstance
(
data
,
(
core
.
LoDTensor
,
core
.
Tensor
)):
#
Note(zhouwei25):
should't expose it to users, just for internal use.
# should't expose it to users, just for internal use.
# convert core.Tensor/core.LoDTensor to VarBase first
# Currenly, there is no copy when places are same
data
=
paddle
.
Tensor
(
data
)
...
...
@@ -152,15 +146,20 @@ def to_tensor(data, dtype=None, place=None, stop_gradient=True):
raise
TypeError
(
"Can't constructs a 'paddle.Tensor' with data type {}, data type must be scalar|list|tuple|numpy.ndarray|paddle.Tensor"
.
format
(
type
(
data
)))
if
not
dtype
and
data
.
dtype
in
[
'float16'
,
'float32'
,
'float64'
,
'complex64'
,
'complex128'
]:
default_type
=
paddle
.
get_default_dtype
()
if
np
.
iscomplexobj
(
data
):
default_type
=
'complex64'
if
default_type
in
[
'float16'
,
'float32'
]
else
'complex128'
data
=
data
.
astype
(
default_type
)
if
not
dtype
:
if
data
.
dtype
in
[
'float16'
,
'float32'
,
'float64'
,
'complex64'
,
'complex128'
]:
default_type
=
paddle
.
get_default_dtype
()
if
np
.
iscomplexobj
(
data
):
default_type
=
'complex64'
if
default_type
in
[
'float16'
,
'float32'
]
else
'complex128'
data
=
data
.
astype
(
default_type
)
# Windows default type is 'int32', while Linux/Mac is 'int64'. Unify they.
if
data
.
dtype
in
[
'int32'
]:
default_type
=
"int64"
data
=
data
.
astype
(
default_type
)
if
dtype
and
convert_dtype
(
dtype
)
!=
data
.
dtype
:
data
=
data
.
astype
(
convert_dtype
(
dtype
))
...
...
python/paddle/tensor/manipulation.py
浏览文件 @
d3dcbd37
...
...
@@ -2737,9 +2737,10 @@ def moveaxis(x, source, destination, name=None):
out
,
_
=
_C_ops
.
transpose2
(
x
,
'axis'
,
perm
)
return
out
check_variable_and_dtype
(
x
,
'x'
,
[
'bool'
,
'float16'
,
'float32'
,
'float64'
,
'int32'
,
'int64'
],
'moveaxis'
)
check_variable_and_dtype
(
x
,
'x'
,
[
'bool'
,
'float16'
,
'float32'
,
'float64'
,
'int32'
,
'int64'
,
'complex64'
,
'complex128'
],
'moveaxis'
)
helper
=
LayerHelper
(
'moveaxis'
,
**
locals
())
out
=
helper
.
create_variable_for_type_inference
(
x
.
dtype
)
...
...
python/paddle/utils/code_gen/api.yaml
浏览文件 @
d3dcbd37
...
...
@@ -124,7 +124,7 @@
args
:
(Tensor x, int64_t[] axis={}, bool keep_dim=false)
output
:
Tensor
infer_meta
:
func
:
Reduce
InferMeta
func
:
Mean
InferMeta
kernel
:
func
:
mean
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录