未验证 提交 bea300dd 编写于 作者: 石晓伟 提交者: GitHub

[cherry-pick] #26920 , #22924 (#29948)

上级 160b3477
...@@ -31,6 +31,11 @@ elseif(SSE3_FOUND) ...@@ -31,6 +31,11 @@ elseif(SSE3_FOUND)
set(SIMD_FLAG ${SSE3_FLAG}) set(SIMD_FLAG ${SSE3_FLAG})
endif() endif()
if (SSE3_FOUND)
# TODO: Runtime detection should be used here.
add_definitions(-DPADDLE_WITH_SSE3)
endif()
if(WIN32) if(WIN32)
# windows header option for all targets. # windows header option for all targets.
add_definitions(-D_XKEYCHECK_H) add_definitions(-D_XKEYCHECK_H)
......
...@@ -196,7 +196,7 @@ cc_library(lod_rank_table SRCS lod_rank_table.cc DEPS lod_tensor) ...@@ -196,7 +196,7 @@ cc_library(lod_rank_table SRCS lod_rank_table.cc DEPS lod_tensor)
cc_library(feed_fetch_method SRCS feed_fetch_method.cc DEPS lod_tensor scope glog) cc_library(feed_fetch_method SRCS feed_fetch_method.cc DEPS lod_tensor scope glog)
cc_library(variable_helper SRCS variable_helper.cc DEPS lod_tensor) cc_library(variable_helper SRCS variable_helper.cc DEPS lod_tensor)
cc_library(naive_executor SRCS naive_executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass variable_helper) cc_library(naive_executor SRCS naive_executor.cc DEPS op_registry denormal device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass variable_helper)
cc_library(executor_gc_helper SRCS executor_gc_helper.cc DEPS scope proto_desc operator garbage_collector) cc_library(executor_gc_helper SRCS executor_gc_helper.cc DEPS scope proto_desc operator garbage_collector)
if(WITH_DISTRIBUTE) if(WITH_DISTRIBUTE)
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/reader.h" #include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/framework/variable_helper.h" #include "paddle/fluid/framework/variable_helper.h"
#include "paddle/fluid/platform/denormal.h"
#include "paddle/fluid/string/pretty_log.h" #include "paddle/fluid/string/pretty_log.h"
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h" #include "paddle/fluid/platform/mkldnn_helper.h"
...@@ -47,6 +48,7 @@ void NaiveExecutor::Run() { ...@@ -47,6 +48,7 @@ void NaiveExecutor::Run() {
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
platform::AttachPointerHashToMKLDNNKey(this, place_); platform::AttachPointerHashToMKLDNNKey(this, place_);
#endif #endif
platform::ScopedFlushDenormal flush;
for (auto &op : ops_) { for (auto &op : ops_) {
VLOG(4) << std::this_thread::get_id() << " run " VLOG(4) << std::this_thread::get_id() << " run "
<< op->DebugStringEx(scope_) << " on scope " << scope_; << op->DebugStringEx(scope_) << " on scope " << scope_;
......
...@@ -38,8 +38,14 @@ struct TensorArrayBatchCleaner { ...@@ -38,8 +38,14 @@ struct TensorArrayBatchCleaner {
constexpr auto kTensorId = framework::VarTypeTrait<framework::Tensor>::kId; constexpr auto kTensorId = framework::VarTypeTrait<framework::Tensor>::kId;
constexpr auto kLoDTensorId = constexpr auto kLoDTensorId =
framework::VarTypeTrait<framework::LoDTensor>::kId; framework::VarTypeTrait<framework::LoDTensor>::kId;
constexpr auto kSelectedRowsId =
framework::VarTypeTrait<framework::SelectedRows>::kId;
constexpr auto kFetchListId =
framework::VarTypeTrait<framework::FetchList>::kId;
valid_types_.insert(kTensorId); valid_types_.insert(kTensorId);
valid_types_.insert(kLoDTensorId); valid_types_.insert(kLoDTensorId);
valid_types_.insert(kSelectedRowsId);
valid_types_.insert(kFetchListId);
} }
// Collect the variables that are not Tensor or LoDTensor, and reset them to a // Collect the variables that are not Tensor or LoDTensor, and reset them to a
// bool(trick), because some of them are containers, and some operators just // bool(trick), because some of them are containers, and some operators just
......
...@@ -32,6 +32,7 @@ if (WITH_PYTHON) ...@@ -32,6 +32,7 @@ if (WITH_PYTHON)
endif() endif()
cc_library(flags SRCS flags.cc DEPS gflags) cc_library(flags SRCS flags.cc DEPS gflags)
cc_library(denormal SRCS denormal.cc DEPS)
cc_library(errors SRCS errors.cc DEPS error_codes_proto) cc_library(errors SRCS errors.cc DEPS error_codes_proto)
cc_test(errors_test SRCS errors_test.cc DEPS errors enforce) cc_test(errors_test SRCS errors_test.cc DEPS errors enforce)
......
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/platform/denormal.h"
#include <tuple>
#include <utility>
// Refer to https://github.com/tensorflow/tensorflow/pull/17141
// If we're on gcc 4.8 or older, there's a known bug that prevents the use of
// intrinsics when the architecture is not defined in the flags. See
// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=57202
#if !defined(__SSE3__) && !defined(__clang__) && \
(defined(__GNUC__) && (__GNUC__ < 4) || \
((__GNUC__ == 4) && (__GNUC_MINOR__ < 9)))
#define GCC_WITHOUT_INTRINSICS
#endif
#if !defined(GCC_WITHOUT_INTRINSICS) && !defined(PADDLE_WITH_ARM)
#define DENORM_USE_INTRINSICS
#endif
#ifdef DENORM_USE_INTRINSICS
#include <pmmintrin.h>
#endif
namespace paddle {
namespace platform {
static void SetDenormalState(bool flush_zero_mode, bool denormals_zero_mode) {
#ifdef DENORM_USE_INTRINSICS
#ifdef PADDLE_WITH_SSE3
// Intel's C and Fortran compilers enable the denormals-are-zero (DAZ) and
// flush-to-zero (FTZ) flags for SSE by default for optimization levels higher
// than -O0.
// AArch32 NEON (SIMD) FPU always uses a flush-to-zero mode.
// Refer to https://en.wikipedia.org/wiki/Denormal_number
// and https://software.intel.com/sites/landingpage/IntrinsicsGuide/
_MM_SET_FLUSH_ZERO_MODE(flush_zero_mode ? _MM_FLUSH_ZERO_ON
: _MM_FLUSH_ZERO_OFF);
_MM_SET_DENORMALS_ZERO_MODE(denormals_zero_mode ? _MM_DENORMALS_ZERO_ON
: _MM_DENORMALS_ZERO_OFF);
#endif
#endif
}
static std::pair<bool, bool> GetDenormalState() {
#ifdef DENORM_USE_INTRINSICS
#ifdef PADDLE_WITH_SSE3
bool flush_zero_mode = _MM_GET_FLUSH_ZERO_MODE() == _MM_FLUSH_ZERO_ON;
bool denormals_zero_mode =
_MM_GET_DENORMALS_ZERO_MODE() == _MM_DENORMALS_ZERO_ON;
return {flush_zero_mode, denormals_zero_mode};
#endif
#endif
return {false, false};
}
ScopedRestoreFlushDenormalState::ScopedRestoreFlushDenormalState() {
std::tie(flush_zero_mode_, denormals_zero_mode_) = GetDenormalState();
}
ScopedRestoreFlushDenormalState::~ScopedRestoreFlushDenormalState() {
SetDenormalState(flush_zero_mode_, denormals_zero_mode_);
}
ScopedFlushDenormal::ScopedFlushDenormal() { SetDenormalState(true, true); }
} // namespace platform
} // namespace paddle
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/platform/macros.h"
namespace paddle {
namespace platform {
// Used to restore the initial value at the end of the scope.
class ScopedRestoreFlushDenormalState {
public:
ScopedRestoreFlushDenormalState();
~ScopedRestoreFlushDenormalState();
private:
bool flush_zero_mode_;
bool denormals_zero_mode_;
DISABLE_COPY_AND_ASSIGN(ScopedRestoreFlushDenormalState);
};
class ScopedFlushDenormal {
public:
ScopedFlushDenormal();
private:
ScopedRestoreFlushDenormalState restore_;
DISABLE_COPY_AND_ASSIGN(ScopedFlushDenormal);
};
} // namespace platform
} // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册