Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
bea300dd
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
bea300dd
编写于
12月 29, 2020
作者:
石
石晓伟
提交者:
GitHub
12月 29, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[cherry-pick] #26920 , #22924 (#29948)
上级
160b3477
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
138 addition
and
1 deletion
+138
-1
cmake/configure.cmake
cmake/configure.cmake
+5
-0
paddle/fluid/framework/CMakeLists.txt
paddle/fluid/framework/CMakeLists.txt
+1
-1
paddle/fluid/framework/naive_executor.cc
paddle/fluid/framework/naive_executor.cc
+2
-0
paddle/fluid/inference/api/details/reset_tensor_array.h
paddle/fluid/inference/api/details/reset_tensor_array.h
+6
-0
paddle/fluid/platform/CMakeLists.txt
paddle/fluid/platform/CMakeLists.txt
+1
-0
paddle/fluid/platform/denormal.cc
paddle/fluid/platform/denormal.cc
+80
-0
paddle/fluid/platform/denormal.h
paddle/fluid/platform/denormal.h
+43
-0
未找到文件。
cmake/configure.cmake
浏览文件 @
bea300dd
...
...
@@ -31,6 +31,11 @@ elseif(SSE3_FOUND)
set
(
SIMD_FLAG
${
SSE3_FLAG
}
)
endif
()
if
(
SSE3_FOUND
)
# TODO: Runtime detection should be used here.
add_definitions
(
-DPADDLE_WITH_SSE3
)
endif
()
if
(
WIN32
)
# windows header option for all targets.
add_definitions
(
-D_XKEYCHECK_H
)
...
...
paddle/fluid/framework/CMakeLists.txt
浏览文件 @
bea300dd
...
...
@@ -196,7 +196,7 @@ cc_library(lod_rank_table SRCS lod_rank_table.cc DEPS lod_tensor)
cc_library
(
feed_fetch_method SRCS feed_fetch_method.cc DEPS lod_tensor scope glog
)
cc_library
(
variable_helper SRCS variable_helper.cc DEPS lod_tensor
)
cc_library
(
naive_executor SRCS naive_executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass variable_helper
)
cc_library
(
naive_executor SRCS naive_executor.cc DEPS op_registry de
normal de
vice_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass variable_helper
)
cc_library
(
executor_gc_helper SRCS executor_gc_helper.cc DEPS scope proto_desc operator garbage_collector
)
if
(
WITH_DISTRIBUTE
)
...
...
paddle/fluid/framework/naive_executor.cc
浏览文件 @
bea300dd
...
...
@@ -24,6 +24,7 @@
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/framework/variable_helper.h"
#include "paddle/fluid/platform/denormal.h"
#include "paddle/fluid/string/pretty_log.h"
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h"
...
...
@@ -47,6 +48,7 @@ void NaiveExecutor::Run() {
#ifdef PADDLE_WITH_MKLDNN
platform
::
AttachPointerHashToMKLDNNKey
(
this
,
place_
);
#endif
platform
::
ScopedFlushDenormal
flush
;
for
(
auto
&
op
:
ops_
)
{
VLOG
(
4
)
<<
std
::
this_thread
::
get_id
()
<<
" run "
<<
op
->
DebugStringEx
(
scope_
)
<<
" on scope "
<<
scope_
;
...
...
paddle/fluid/inference/api/details/reset_tensor_array.h
浏览文件 @
bea300dd
...
...
@@ -38,8 +38,14 @@ struct TensorArrayBatchCleaner {
constexpr
auto
kTensorId
=
framework
::
VarTypeTrait
<
framework
::
Tensor
>::
kId
;
constexpr
auto
kLoDTensorId
=
framework
::
VarTypeTrait
<
framework
::
LoDTensor
>::
kId
;
constexpr
auto
kSelectedRowsId
=
framework
::
VarTypeTrait
<
framework
::
SelectedRows
>::
kId
;
constexpr
auto
kFetchListId
=
framework
::
VarTypeTrait
<
framework
::
FetchList
>::
kId
;
valid_types_
.
insert
(
kTensorId
);
valid_types_
.
insert
(
kLoDTensorId
);
valid_types_
.
insert
(
kSelectedRowsId
);
valid_types_
.
insert
(
kFetchListId
);
}
// Collect the variables that are not Tensor or LoDTensor, and reset them to a
// bool(trick), because some of them are containers, and some operators just
...
...
paddle/fluid/platform/CMakeLists.txt
浏览文件 @
bea300dd
...
...
@@ -32,6 +32,7 @@ if (WITH_PYTHON)
endif
()
cc_library
(
flags SRCS flags.cc DEPS gflags
)
cc_library
(
denormal SRCS denormal.cc DEPS
)
cc_library
(
errors SRCS errors.cc DEPS error_codes_proto
)
cc_test
(
errors_test SRCS errors_test.cc DEPS errors enforce
)
...
...
paddle/fluid/platform/denormal.cc
0 → 100644
浏览文件 @
bea300dd
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/platform/denormal.h"
#include <tuple>
#include <utility>
// Refer to https://github.com/tensorflow/tensorflow/pull/17141
// If we're on gcc 4.8 or older, there's a known bug that prevents the use of
// intrinsics when the architecture is not defined in the flags. See
// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=57202
#if !defined(__SSE3__) && !defined(__clang__) && \
(defined(__GNUC__) && (__GNUC__ < 4) || \
((__GNUC__ == 4) && (__GNUC_MINOR__ < 9)))
#define GCC_WITHOUT_INTRINSICS
#endif
#if !defined(GCC_WITHOUT_INTRINSICS) && !defined(PADDLE_WITH_ARM)
#define DENORM_USE_INTRINSICS
#endif
#ifdef DENORM_USE_INTRINSICS
#include <pmmintrin.h>
#endif
namespace
paddle
{
namespace
platform
{
static
void
SetDenormalState
(
bool
flush_zero_mode
,
bool
denormals_zero_mode
)
{
#ifdef DENORM_USE_INTRINSICS
#ifdef PADDLE_WITH_SSE3
// Intel's C and Fortran compilers enable the denormals-are-zero (DAZ) and
// flush-to-zero (FTZ) flags for SSE by default for optimization levels higher
// than -O0.
// AArch32 NEON (SIMD) FPU always uses a flush-to-zero mode.
// Refer to https://en.wikipedia.org/wiki/Denormal_number
// and https://software.intel.com/sites/landingpage/IntrinsicsGuide/
_MM_SET_FLUSH_ZERO_MODE
(
flush_zero_mode
?
_MM_FLUSH_ZERO_ON
:
_MM_FLUSH_ZERO_OFF
);
_MM_SET_DENORMALS_ZERO_MODE
(
denormals_zero_mode
?
_MM_DENORMALS_ZERO_ON
:
_MM_DENORMALS_ZERO_OFF
);
#endif
#endif
}
static
std
::
pair
<
bool
,
bool
>
GetDenormalState
()
{
#ifdef DENORM_USE_INTRINSICS
#ifdef PADDLE_WITH_SSE3
bool
flush_zero_mode
=
_MM_GET_FLUSH_ZERO_MODE
()
==
_MM_FLUSH_ZERO_ON
;
bool
denormals_zero_mode
=
_MM_GET_DENORMALS_ZERO_MODE
()
==
_MM_DENORMALS_ZERO_ON
;
return
{
flush_zero_mode
,
denormals_zero_mode
};
#endif
#endif
return
{
false
,
false
};
}
ScopedRestoreFlushDenormalState
::
ScopedRestoreFlushDenormalState
()
{
std
::
tie
(
flush_zero_mode_
,
denormals_zero_mode_
)
=
GetDenormalState
();
}
ScopedRestoreFlushDenormalState
::~
ScopedRestoreFlushDenormalState
()
{
SetDenormalState
(
flush_zero_mode_
,
denormals_zero_mode_
);
}
ScopedFlushDenormal
::
ScopedFlushDenormal
()
{
SetDenormalState
(
true
,
true
);
}
}
// namespace platform
}
// namespace paddle
paddle/fluid/platform/denormal.h
0 → 100644
浏览文件 @
bea300dd
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/platform/macros.h"
namespace
paddle
{
namespace
platform
{
// Used to restore the initial value at the end of the scope.
class
ScopedRestoreFlushDenormalState
{
public:
ScopedRestoreFlushDenormalState
();
~
ScopedRestoreFlushDenormalState
();
private:
bool
flush_zero_mode_
;
bool
denormals_zero_mode_
;
DISABLE_COPY_AND_ASSIGN
(
ScopedRestoreFlushDenormalState
);
};
class
ScopedFlushDenormal
{
public:
ScopedFlushDenormal
();
private:
ScopedRestoreFlushDenormalState
restore_
;
DISABLE_COPY_AND_ASSIGN
(
ScopedFlushDenormal
);
};
}
// namespace platform
}
// namespace paddle
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录