提交 49a04d75 编写于 作者: F fengjiayi

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into dev_data_balance

...@@ -4,7 +4,6 @@ ...@@ -4,7 +4,6 @@
[![Build Status](https://travis-ci.org/PaddlePaddle/Paddle.svg?branch=develop)](https://travis-ci.org/PaddlePaddle/Paddle) [![Build Status](https://travis-ci.org/PaddlePaddle/Paddle.svg?branch=develop)](https://travis-ci.org/PaddlePaddle/Paddle)
[![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](http://www.paddlepaddle.org/docs/develop/documentation/en/getstarted/index_en.html) [![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](http://www.paddlepaddle.org/docs/develop/documentation/en/getstarted/index_en.html)
[![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](http://www.paddlepaddle.org/docs/develop/documentation/zh/getstarted/index_cn.html) [![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](http://www.paddlepaddle.org/docs/develop/documentation/zh/getstarted/index_cn.html)
[![Coverage Status](https://coveralls.io/repos/github/PaddlePaddle/Paddle/badge.svg?branch=develop)](https://coveralls.io/github/PaddlePaddle/Paddle?branch=develop)
[![Release](https://img.shields.io/github/release/PaddlePaddle/Paddle.svg)](https://github.com/PaddlePaddle/Paddle/releases) [![Release](https://img.shields.io/github/release/PaddlePaddle/Paddle.svg)](https://github.com/PaddlePaddle/Paddle/releases)
[![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE) [![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE)
......
...@@ -96,6 +96,20 @@ if(NOT APPLE AND NOT ANDROID) ...@@ -96,6 +96,20 @@ if(NOT APPLE AND NOT ANDROID)
set(CMAKE_CXX_LINK_EXECUTABLE "${CMAKE_CXX_LINK_EXECUTABLE} -pthread -ldl -lrt") set(CMAKE_CXX_LINK_EXECUTABLE "${CMAKE_CXX_LINK_EXECUTABLE} -pthread -ldl -lrt")
endif(NOT APPLE AND NOT ANDROID) endif(NOT APPLE AND NOT ANDROID)
set_property(GLOBAL PROPERTY FLUID_MODULES "")
# find all fluid modules is used for paddle fluid static library
# for building inference libs
function(find_fluid_modules TARGET_NAME)
get_filename_component(__target_path ${TARGET_NAME} ABSOLUTE)
string(REGEX REPLACE "^${PADDLE_SOURCE_DIR}/" "" __target_path ${__target_path})
string(FIND "${__target_path}" "fluid" pos)
if(pos GREATER 1)
get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES)
set(fluid_modules ${fluid_modules} ${TARGET_NAME})
set_property(GLOBAL PROPERTY FLUID_MODULES "${fluid_modules}")
endif()
endfunction(find_fluid_modules)
function(merge_static_libs TARGET_NAME) function(merge_static_libs TARGET_NAME)
set(libs ${ARGN}) set(libs ${ARGN})
list(REMOVE_DUPLICATES libs) list(REMOVE_DUPLICATES libs)
...@@ -250,6 +264,7 @@ function(cc_test TARGET_NAME) ...@@ -250,6 +264,7 @@ function(cc_test TARGET_NAME)
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
if (${cc_test_SERIAL}) if (${cc_test_SERIAL})
set_property(TEST ${TARGET_NAME} PROPERTY SERIAL 1) set_property(TEST ${TARGET_NAME} PROPERTY SERIAL 1)
set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_init_allocated_mem=true)
endif() endif()
endif() endif()
endfunction(cc_test) endfunction(cc_test)
...@@ -314,6 +329,7 @@ function(nv_test TARGET_NAME) ...@@ -314,6 +329,7 @@ function(nv_test TARGET_NAME)
add_test(${TARGET_NAME} ${TARGET_NAME}) add_test(${TARGET_NAME} ${TARGET_NAME})
if (nv_test_SERIAL) if (nv_test_SERIAL)
set_property(TEST ${TARGET_NAME} PROPERTY SERIAL 1) set_property(TEST ${TARGET_NAME} PROPERTY SERIAL 1)
set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_init_allocated_mem=true)
endif() endif()
endif() endif()
endfunction(nv_test) endfunction(nv_test)
...@@ -561,7 +577,7 @@ function(py_test TARGET_NAME) ...@@ -561,7 +577,7 @@ function(py_test TARGET_NAME)
set(multiValueArgs SRCS DEPS ARGS ENVS) set(multiValueArgs SRCS DEPS ARGS ENVS)
cmake_parse_arguments(py_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cmake_parse_arguments(py_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
add_test(NAME ${TARGET_NAME} add_test(NAME ${TARGET_NAME}
COMMAND env PYTHONPATH=${PADDLE_BINARY_DIR}/python ${py_test_ENVS} COMMAND env FLAGS_init_allocated_mem=true PYTHONPATH=${PADDLE_BINARY_DIR}/python ${py_test_ENVS}
${PYTHON_EXECUTABLE} -u ${py_test_SRCS} ${py_test_ARGS} ${PYTHON_EXECUTABLE} -u ${py_test_SRCS} ${py_test_ARGS}
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
endif() endif()
......
...@@ -12,19 +12,6 @@ ...@@ -12,19 +12,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
set_property(GLOBAL PROPERTY FLUID_MODULES "")
# find all fluid modules is used for paddle fluid static library
function(find_fluid_modules TARGET_NAME)
get_filename_component(__target_path ${TARGET_NAME} ABSOLUTE)
string(REGEX REPLACE "^${PADDLE_SOURCE_DIR}/" "" __target_path ${__target_path})
string(FIND "${__target_path}" "fluid" pos)
if(pos GREATER 1)
get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES)
set(fluid_modules ${fluid_modules} ${TARGET_NAME})
set_property(GLOBAL PROPERTY FLUID_MODULES "${fluid_modules}")
endif()
endfunction(find_fluid_modules)
# make package for paddle fluid shared and static library # make package for paddle fluid shared and static library
function(copy TARGET) function(copy TARGET)
set(options "") set(options "")
...@@ -163,9 +150,9 @@ if(WITH_CONTRIB) ...@@ -163,9 +150,9 @@ if(WITH_CONTRIB)
list(APPEND inference_deps contrib_anakin_inference_lib) list(APPEND inference_deps contrib_anakin_inference_lib)
endif() endif()
copy(contrib_inference_lib DEPS paddle_inference_api copy(contrib_inference_lib DEPS paddle_inference_api paddle_inference_api_shared
SRCS ${PADDLE_SOURCE_DIR}/paddle/contrib/inference/paddle_inference_api.h SRCS ${PADDLE_SOURCE_DIR}/paddle/contrib/inference/paddle_inference_api.h
${PADDLE_BINARY_DIR}/paddle/contrib/inference/libpaddle_inference_api.* ${PADDLE_BINARY_DIR}/paddle/contrib/inference/libpaddle_inference_api*
DSTS ${contrib_dst_dir} ${contrib_dst_dir}) DSTS ${contrib_dst_dir} ${contrib_dst_dir})
list(APPEND inference_deps contrib_inference_lib) list(APPEND inference_deps contrib_inference_lib)
endif() endif()
......
...@@ -74,10 +74,10 @@ void OperatorWithKernel::Run( ...@@ -74,10 +74,10 @@ void OperatorWithKernel::Run(
auto kernel_type_for_var = this->GetKernelTypeForVar(...); auto kernel_type_for_var = this->GetKernelTypeForVar(...);
if (kernel_type_for_var.place_ != expected_kernel_key.place_) { if (kernel_type_for_var.place_ != expected_kernel_key.place_) {
auto* trans_var = new_scope.Var(var_name); auto* trans_var = new_scope.Var(var_name);
auto* out = DataTransform(expected_kernel_key, auto* out = TransformData(expected_kernel_key,
kernel_type_for_var, kernel_type_for_var,
*tensor_in); *tensor_in);
CopyVariableWithTensor(...); SetTensorToVariable(...);
} }
} }
......
...@@ -46,6 +46,10 @@ cc_library(paddle_inference_api ...@@ -46,6 +46,10 @@ cc_library(paddle_inference_api
SRCS paddle_inference_api.cc paddle_inference_api_impl.cc SRCS paddle_inference_api.cc paddle_inference_api_impl.cc
DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB}) DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB})
cc_library(paddle_inference_api_shared SHARED
SRCS paddle_inference_api.cc paddle_inference_api_impl.cc
DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB})
cc_test(test_paddle_inference_api cc_test(test_paddle_inference_api
SRCS test_paddle_inference_api.cc SRCS test_paddle_inference_api.cc
DEPS paddle_inference_api) DEPS paddle_inference_api)
......
...@@ -147,9 +147,9 @@ void TransDataLayoutFromMKLDNN(const OpKernelType& kernel_type_for_var, ...@@ -147,9 +147,9 @@ void TransDataLayoutFromMKLDNN(const OpKernelType& kernel_type_for_var,
"Input tensor type is not supported: ", in.type().name()); "Input tensor type is not supported: ", in.type().name());
memory::data_type out_type = in_type; memory::data_type out_type = in_type;
auto in_format = MKLDNNFormatForSize(in_tz.size(), in.format()); auto in_format = platform::MKLDNNFormatForSize(in_tz.size(), in.format());
auto out_format = auto out_format =
MKLDNNFormatForSize(in_tz.size(), ToMKLDNNFormat(out_layout)); platform::MKLDNNFormatForSize(in_tz.size(), ToMKLDNNFormat(out_layout));
void* in_data = GetDataFromTensor(in, in_type); void* in_data = GetDataFromTensor(in, in_type);
......
...@@ -62,12 +62,6 @@ inline MKLDNNDataType ToMKLDNNDataType(const std::type_index type) { ...@@ -62,12 +62,6 @@ inline MKLDNNDataType ToMKLDNNDataType(const std::type_index type) {
return MKLDNNDataType::data_undef; return MKLDNNDataType::data_undef;
} }
inline MKLDNNFormat MKLDNNFormatForSize(size_t dims_size,
MKLDNNFormat default_format) {
return (dims_size == 1
? mkldnn::memory::format::x
: dims_size == 2 ? mkldnn::memory::format::nc : default_format);
}
#endif #endif
void TransDataLayoutFromMKLDNN(const OpKernelType& kernel_type_for_var, void TransDataLayoutFromMKLDNN(const OpKernelType& kernel_type_for_var,
......
...@@ -18,17 +18,21 @@ limitations under the License. */ ...@@ -18,17 +18,21 @@ limitations under the License. */
#include "paddle/fluid/framework/data_layout_transform.h" #include "paddle/fluid/framework/data_layout_transform.h"
#include "paddle/fluid/framework/data_type_transform.h" #include "paddle/fluid/framework/data_type_transform.h"
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h"
#endif
namespace paddle { namespace paddle {
namespace framework { namespace framework {
static void PassTensorData(Tensor* from, Tensor* to) { static void PassTensorData(Tensor *from, Tensor *to) {
to->ShareDataWith(*from); to->ShareDataWith(*from);
*from = Tensor(); *from = Tensor();
} }
void DataTransform(const OpKernelType& expected_kernel_type, void TransformData(const OpKernelType &expected_kernel_type,
const OpKernelType& kernel_type_for_var, const OpKernelType &kernel_type_for_var,
const Tensor& input_tensor, Tensor* output_tensor) { const Tensor &input_tensor, Tensor *output_tensor) {
bool transformed = false; bool transformed = false;
Tensor in; Tensor in;
in.ShareDataWith(input_tensor); in.ShareDataWith(input_tensor);
...@@ -48,8 +52,8 @@ void DataTransform(const OpKernelType& expected_kernel_type, ...@@ -48,8 +52,8 @@ void DataTransform(const OpKernelType& expected_kernel_type,
// Case1 - transform from Non-MKLDNN OPKernel to MKLDNN OPKernel // Case1 - transform from Non-MKLDNN OPKernel to MKLDNN OPKernel
// Just set layout/format. No real transform occur // Just set layout/format. No real transform occur
auto out_format = auto out_format = platform::MKLDNNFormatForSize(in.dims().size(),
MKLDNNFormatForSize(in.dims().size(), ToMKLDNNFormat(lin)); ToMKLDNNFormat(lin));
out.ShareDataWith(input_tensor); out.ShareDataWith(input_tensor);
out.set_layout(DataLayout::kMKLDNN); out.set_layout(DataLayout::kMKLDNN);
...@@ -89,17 +93,17 @@ void DataTransform(const OpKernelType& expected_kernel_type, ...@@ -89,17 +93,17 @@ void DataTransform(const OpKernelType& expected_kernel_type,
output_tensor->ShareDataWith(in); output_tensor->ShareDataWith(in);
} }
void CopyVariableWithTensor(const Variable& in_var, const Tensor& tensor, void SetTensorToVariable(const Variable &in_var, const Tensor &tensor,
Variable* out_var) { Variable *out_var) {
if (in_var.IsType<LoDTensor>()) { if (in_var.IsType<LoDTensor>()) {
auto& in_lod_tensor = in_var.Get<LoDTensor>(); auto &in_lod_tensor = in_var.Get<LoDTensor>();
auto* tran_lod_tensor = out_var->GetMutable<LoDTensor>(); auto *tran_lod_tensor = out_var->GetMutable<LoDTensor>();
tran_lod_tensor->set_lod(in_lod_tensor.lod()); tran_lod_tensor->set_lod(in_lod_tensor.lod());
tran_lod_tensor->set_layout(in_lod_tensor.layout()); tran_lod_tensor->set_layout(in_lod_tensor.layout());
tran_lod_tensor->ShareDataWith(tensor); tran_lod_tensor->ShareDataWith(tensor);
} else if (in_var.IsType<SelectedRows>()) { } else if (in_var.IsType<SelectedRows>()) {
auto& in_selected_rows = in_var.Get<SelectedRows>(); auto &in_selected_rows = in_var.Get<SelectedRows>();
auto* trans_selected_rows = out_var->GetMutable<SelectedRows>(); auto *trans_selected_rows = out_var->GetMutable<SelectedRows>();
trans_selected_rows->set_height(in_selected_rows.height()); trans_selected_rows->set_height(in_selected_rows.height());
trans_selected_rows->set_rows(in_selected_rows.rows()); trans_selected_rows->set_rows(in_selected_rows.rows());
trans_selected_rows->mutable_value()->ShareDataWith(tensor); trans_selected_rows->mutable_value()->ShareDataWith(tensor);
......
...@@ -30,12 +30,15 @@ limitations under the License. */ ...@@ -30,12 +30,15 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace framework { namespace framework {
void DataTransform(const OpKernelType& expected_kernel_type, void TransformData(const OpKernelType &expected_kernel_type,
const OpKernelType& kernel_type_for_var, const OpKernelType &kernel_type_for_var,
const Tensor& input_tensor, Tensor* out); const Tensor &input_tensor, Tensor *out);
void CopyVariableWithTensor(const Variable& in_var, const Tensor& tensor, /**
Variable* out_var); * Set OutVar from InVar, except the tensor is shared with `tensor`
*/
void SetTensorToVariable(const Variable &in_var, const Tensor &tensor,
Variable *out_var);
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -97,7 +97,7 @@ inline bool NeedTransformLayout(const DataLayout& l, const DataLayout& r) { ...@@ -97,7 +97,7 @@ inline bool NeedTransformLayout(const DataLayout& l, const DataLayout& r) {
return ret; return ret;
} }
inline bool TransFromNeeded(const OpKernelType& l, const OpKernelType& r) { inline bool NeedTransform(const OpKernelType& l, const OpKernelType& r) {
return (!platform::places_are_same_class(l.place_, r.place_)) || return (!platform::places_are_same_class(l.place_, r.place_)) ||
(l.data_type_ != r.data_type_) || (l.data_type_ != r.data_type_) ||
NeedTransformLayout(l.data_layout_, r.data_layout_); NeedTransformLayout(l.data_layout_, r.data_layout_);
......
...@@ -620,8 +620,6 @@ void OperatorWithKernel::RunImpl(const Scope& scope, ...@@ -620,8 +620,6 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
"There are no kernels which are registered in the %s operator.", type_); "There are no kernels which are registered in the %s operator.", type_);
} }
ExecutionContext ctx(*this, scope, *dev_ctx);
OpKernelMap& kernels = kernels_iter->second; OpKernelMap& kernels = kernels_iter->second;
// TODO(dzhwinter) : kernel fallback mechanism will be added when all the // TODO(dzhwinter) : kernel fallback mechanism will be added when all the
...@@ -631,7 +629,8 @@ void OperatorWithKernel::RunImpl(const Scope& scope, ...@@ -631,7 +629,8 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
// Do selection // Do selection
// } // }
auto expected_kernel_key = this->GetExpectedKernelType(ctx); auto expected_kernel_key =
this->GetExpectedKernelType(ExecutionContext(*this, scope, *dev_ctx));
VLOG(3) << "expected_kernel_key:" << expected_kernel_key; VLOG(3) << "expected_kernel_key:" << expected_kernel_key;
auto kernel_iter = kernels.find(expected_kernel_key); auto kernel_iter = kernels.find(expected_kernel_key);
...@@ -640,56 +639,34 @@ void OperatorWithKernel::RunImpl(const Scope& scope, ...@@ -640,56 +639,34 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
KernelTypeToString(expected_kernel_key)); KernelTypeToString(expected_kernel_key));
} }
// do data transform // do data transformScope &transfer_scope;
Scope& new_scope = scope.NewScope(); std::vector<std::string> transfered_inplace_vars;
auto* transfer_scope =
TryTransferData(scope, expected_kernel_key, &transfered_inplace_vars);
std::vector<std::string> inplace_vars; // exec scope is the scope that kernel actually executed on.
for (auto& var_name_item : this->Inputs()) { const Scope& exec_scope =
for (auto& var_name : var_name_item.second) { (transfer_scope == nullptr ? scope : *transfer_scope);
auto* var = scope.FindVar(var_name);
if (var && VarIsTensor(var)) { if (!(expected_kernel_key.place_ == dev_ctx->GetPlace())) {
auto* tensor_in = GetTensorFromVar(var); dev_ctx = pool.Get(expected_kernel_key.place_);
if (tensor_in->IsInitialized()) {
auto kernel_type_for_var = this->GetKernelTypeForVar(
var_name_item.first, *tensor_in, expected_kernel_key);
if (TransFromNeeded(kernel_type_for_var, expected_kernel_key)) {
auto out_var_names = OutputVars(true);
if (std::find(out_var_names.begin(), out_var_names.end(),
var_name) != out_var_names.end()) {
inplace_vars.push_back(var_name);
}
VLOG(3) << "Transform Variable " << var_name << " from "
<< kernel_type_for_var << " to " << expected_kernel_key;
auto* trans_var = new_scope.Var(var_name);
std::shared_ptr<Tensor> out(new Tensor);
DataTransform(expected_kernel_key, kernel_type_for_var, *tensor_in,
out.get());
CopyVariableWithTensor(*var, *(out.get()), trans_var);
}
}
}
}
} }
auto* new_dev_ctx = pool.Get(expected_kernel_key.place_); kernel_iter->second->Compute(ExecutionContext(*this, exec_scope, *dev_ctx));
kernel_iter->second->Compute(
ExecutionContext(*this, new_scope, *new_dev_ctx));
for (auto& var_name : inplace_vars) { if (!transfered_inplace_vars.empty()) {
VLOG(3) << "share inplace var " + var_name + " back to it's original scope"; // there is inplace variable has been transfered.
auto* original_tensor = GetMutableTensorFromVar(scope.FindVar(var_name)); TransferInplaceVarsBack(scope, transfered_inplace_vars, *transfer_scope);
auto* transformed_tensor = GetTensorFromVar(new_scope.FindVar(var_name));
original_tensor->ShareDataWith(*transformed_tensor);
} }
/*For profiling/benchmark only*/ /*For profiling/benchmark only*/
if (FLAGS_benchmark) { if (FLAGS_benchmark) {
new_dev_ctx->Wait(); dev_ctx->Wait();
} }
if (FLAGS_check_nan_inf) { if (FLAGS_check_nan_inf) {
for (auto& vname : OutputVars(true)) { for (auto& vname : OutputVars(true)) {
auto* var = new_scope.FindVar(vname); auto* var = exec_scope.FindVar(vname);
if (var == nullptr) continue; if (var == nullptr) continue;
if (var->IsType<framework::LoDTensor>()) { if (var->IsType<framework::LoDTensor>()) {
CheckTensorNANOrInf(vname, var->Get<framework::LoDTensor>()); CheckTensorNANOrInf(vname, var->Get<framework::LoDTensor>());
...@@ -697,6 +674,64 @@ void OperatorWithKernel::RunImpl(const Scope& scope, ...@@ -697,6 +674,64 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
} }
} }
} }
void OperatorWithKernel::TransferInplaceVarsBack(
const Scope& scope, const std::vector<std::string>& inplace_vars,
const Scope& transfer_scope) const {
for (auto& var_name : inplace_vars) {
VLOG(3) << "share inplace var " + var_name + " back to it's original scope";
auto* original_tensor = GetMutableTensorFromVar(scope.FindVar(var_name));
auto* transformed_tensor =
GetTensorFromVar(transfer_scope.FindVar(var_name));
original_tensor->ShareDataWith(*transformed_tensor);
}
}
Scope* OperatorWithKernel::TryTransferData(
const Scope& scope, const OpKernelType& expected_kernel_key,
std::vector<std::string>* transfered_inplace_vars) const {
Scope* new_scope = nullptr;
for (auto& var_name_item : Inputs()) {
for (auto& var_name : var_name_item.second) {
auto* var = scope.FindVar(var_name);
// Only tensor can be tranfer to another device.
if (var == nullptr || !VarIsTensor(var)) {
continue;
}
auto* tensor_in = GetTensorFromVar(var);
if (!tensor_in->IsInitialized()) {
continue;
}
auto kernel_type_for_var = GetKernelTypeForVar(
var_name_item.first, *tensor_in, expected_kernel_key);
if (!NeedTransform(kernel_type_for_var, expected_kernel_key)) {
continue;
}
auto out_var_names = OutputVars(true);
if (std::find(out_var_names.begin(), out_var_names.end(), var_name) !=
out_var_names.end()) {
transfered_inplace_vars->emplace_back(var_name);
}
VLOG(3) << "Transform Variable " << var_name << " from "
<< kernel_type_for_var << " to " << expected_kernel_key;
if (new_scope == nullptr) {
new_scope = &scope.NewScope();
}
auto* trans_var = new_scope->Var(var_name);
Tensor out;
TransformData(expected_kernel_key, kernel_type_for_var, *tensor_in, &out);
SetTensorToVariable(*var, out, trans_var);
}
}
return new_scope;
}
proto::VarType::Type OperatorWithKernel::IndicateDataType( proto::VarType::Type OperatorWithKernel::IndicateDataType(
const ExecutionContext& ctx) const { const ExecutionContext& ctx) const {
......
...@@ -384,6 +384,20 @@ class OperatorWithKernel : public OperatorBase { ...@@ -384,6 +384,20 @@ class OperatorWithKernel : public OperatorBase {
// same. // same.
proto::VarType::Type IndicateDataType(const ExecutionContext& ctx) const; proto::VarType::Type IndicateDataType(const ExecutionContext& ctx) const;
void RunImpl(const Scope& scope, const platform::Place& place) const final; void RunImpl(const Scope& scope, const platform::Place& place) const final;
/**
* Transfer data from scope to a transfered scope. If there is no data need to
* be tranfered, it returns nullptr.
*
* * transfered_inplace_vars is a output vector.
*/
Scope* TryTransferData(
const Scope& scope, const OpKernelType& expected_kernel_key,
std::vector<std::string>* transfered_inplace_vars) const;
void TransferInplaceVarsBack(const Scope& scope,
const std::vector<std::string>& inplace_vars,
const Scope& exec_scope) const;
}; };
extern bool OpSupportGPU(const std::string& op_type); extern bool OpSupportGPU(const std::string& op_type);
......
...@@ -253,9 +253,6 @@ void ParallelExecutor::FeedAndSplitTensorIntoLocalScopes( ...@@ -253,9 +253,6 @@ void ParallelExecutor::FeedAndSplitTensorIntoLocalScopes(
t->set_lod(lod_tensors[j].lod()); t->set_lod(lod_tensors[j].lod());
} }
} }
for (auto &p : member_->places_) {
platform::DeviceContextPool::Instance().Get(p)->Wait();
}
} }
ParallelExecutor::~ParallelExecutor() { ParallelExecutor::~ParallelExecutor() {
......
...@@ -69,7 +69,22 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place, ...@@ -69,7 +69,22 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
PADDLE_ENFORCE(platform::is_gpu_place(ctx_place)); PADDLE_ENFORCE(platform::is_gpu_place(ctx_place));
auto stream = auto stream =
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream(); reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
memory::Copy(dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size, stream); if (platform::is_same_place(src_place, dst_place)) {
memory::Copy(dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size,
stream);
} else {
if (platform::is_same_place(ctx_place, src_place)) {
memory::Copy(dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size,
stream);
platform::DeviceContextPool::Instance().Get(src.place())->Wait();
} else if (platform::is_same_place(ctx_place, dst_place)) {
platform::DeviceContextPool::Instance().Get(src.place())->Wait();
memory::Copy(dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size,
stream);
} else {
PADDLE_THROW("ctx is not belong to dst_gpu_place or src_gpu_place.");
}
}
} }
#endif #endif
} }
...@@ -78,10 +93,10 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place, ...@@ -78,10 +93,10 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
Tensor* dst) { Tensor* dst) {
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
const platform::DeviceContext* dev_ctx; const platform::DeviceContext* dev_ctx;
if (platform::is_gpu_place(src.place())) { if (platform::is_gpu_place(dst_place)) {
dev_ctx = pool.Get(src.place());
} else {
dev_ctx = pool.Get(dst_place); dev_ctx = pool.Get(dst_place);
} else {
dev_ctx = pool.Get(src.place());
} }
TensorCopy(src, dst_place, *dev_ctx, dst); TensorCopy(src, dst_place, *dev_ctx, dst);
} }
......
...@@ -23,10 +23,25 @@ limitations under the License. */ ...@@ -23,10 +23,25 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace framework { namespace framework {
// NOTE(zcd): Because TensorCopy is an async operation, when the src_place
// and dst_place are two different GPU, to ensure that the operation can
// be carried out correctly, there is a src_ctx wait operation in TensorCopy.
// If ctx_place and src_place are the same, src_ctx.Wait() is added
// after memory::Copy; if ctx_place and dst_place are the same,
// src_ctx.Wait() is added before memory::Copy.
void TensorCopy(const Tensor& src, const platform::Place& dst_place, void TensorCopy(const Tensor& src, const platform::Place& dst_place,
const platform::DeviceContext& ctx, Tensor* dst); const platform::DeviceContext& ctx, Tensor* dst);
// NOTE(zcd): If the src.place() and dst_place are two different GPU,
// the copy operation is carried out on the dst_place's stream. This is
// very important, because TensorCopy is an async operator, and in most
// case, once this copy operator returns, dst is to be used in dst_place's
// stream, if this copy operation is carried out on the src_place's stream,
// when dst is used in dst_place's stream the copy operation may be
// not completed.
void TensorCopy(const Tensor& src, const platform::Place& dst_place, void TensorCopy(const Tensor& src, const platform::Place& dst_place,
Tensor* dst); Tensor* dst);
void TensorCopySync(const Tensor& src, const platform::Place& dst_place, void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
Tensor* dst); Tensor* dst);
......
# Inference Analysis
The `inference/analysis` module is used to analyze and optimize the inference program,
it references some philosophy from `LLVM/analysis`,
and make the various optimization features be pluggable and co-exist in a pipeline.
We borrowed some concepts from LLVM, such as
- [Pass](./pass.h)es to implement optimization that traverse the inference program,
- [DataFlowGraph](./data_flow_graph.h) to represent the data flow graph built from a program,
- [PassManager](./pass_manager.h) to manage a sequence of `Pass`es over a graph.
There are some other basic concepts here
- [Node](./node.h), the node in a `DataFlowGraph`,
- `Function`, the Operator in Fluid,
- `Value`, the Variable in Fluid;
- [Argument](./argument.h), the argument that treat as the input and output of all `Pass`es in the pipeline,
## How it works
The `inference/analysis` module make all the passes in a pipeline, and works in such way:
1. Build a `DataFlowGraph` from a Fluid inference ProgramDesc,
2. Call the middle passes one by one, the same `DataFlowGraph` is passed across all the passes,
3. Transform a new ProgramDesc from the modified `DataFlowGraph`.
The new optimization features can be added as an independent `Pass` and controlled by gflags,
each pass will generate unified debug information or visualization for better debugging.
## Supported Passes
### `FluidToDataFlowGraphPass`
Transform the fluid `ProgramDesc` to a `DataFlowGraph` to give an abstract representation for all the middle passes,
this should be the first pass of the pipeline.
### `DataFlowGraphToFluidPass`
Generate a final `ProgramDesc` from a data flow graph, this should be the last pass of the pipeline.
### `TensorRTSubgraphNodeMarkPass`
Mark the `Node` that are supported by TensorRT,
this pass will generate a visualization file which can be used for debugging.
### `TensorRTSubGraphPass`
Split the sub-graph that are can be accelerated by TensorRT.
### `DFG_GraphvizDrawPass`
This pass is just for debug, it will visualize the `DataFlowGraph` using the [graphviz](http://www.graphviz.org) tool.
It can be used as a helper class that draws the modified graph after each pass.
## Utilities
There is some helper function/class for analysis.
- [dot.h](./dot.h) give a easy to use interface for generating `DOT` codes,
- [graph_traits.h](./graph_traits.h) contains the graph traversal algorithms, it uses `iterator` to make the algorithms easy to share across different passes.
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
#include "paddle/fluid/inference/analysis/analyzer.h" #include "paddle/fluid/inference/analysis/analyzer.h"
#include <string>
#include "paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h" #include "paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h"
#include "paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h" #include "paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h"
#include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h" #include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h"
...@@ -79,4 +80,4 @@ void Analyzer::Run(Argument* argument) { ...@@ -79,4 +80,4 @@ void Analyzer::Run(Argument* argument) {
} // namespace analysis } // namespace analysis
} // namespace inference } // namespace inference
} // namespace paddle } // namespace paddle
\ No newline at end of file
...@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once
/* /*
* This file contains Analyzer, an class that exposed as a library that analyze * This file contains Analyzer, an class that exposed as a library that analyze
* and optimize * and optimize
......
...@@ -138,7 +138,7 @@ struct GraphTraits<DataFlowGraph> { ...@@ -138,7 +138,7 @@ struct GraphTraits<DataFlowGraph> {
// sub-graph is the inputs nodes and output nodes that doesn't inside the // sub-graph is the inputs nodes and output nodes that doesn't inside the
// sub-graph. // sub-graph.
static std::pair<std::vector<Node *>, std::vector<Node *>> static std::pair<std::vector<Node *>, std::vector<Node *>>
ExtractInputAndOutputOfSubGraph(std::vector<Node *> &graph) { ExtractInputAndOutputOfSubGraph(std::vector<Node *> &graph) { // NOLINT
std::unordered_set<Node *> nodes(graph.begin(), graph.end()); std::unordered_set<Node *> nodes(graph.begin(), graph.end());
std::unordered_set<Node *> inputs; std::unordered_set<Node *> inputs;
std::unordered_set<Node *> outputs; std::unordered_set<Node *> outputs;
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
#include "paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h" #include "paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h"
#include <vector>
#include "paddle/fluid/framework/block_desc.h" #include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/framework/op_desc.h"
#include "paddle/fluid/framework/proto_desc.h" #include "paddle/fluid/framework/proto_desc.h"
...@@ -150,13 +151,14 @@ namespace { ...@@ -150,13 +151,14 @@ namespace {
class DFG_DebuggerPass : public DFG_GraphvizDrawPass { class DFG_DebuggerPass : public DFG_GraphvizDrawPass {
public: public:
using Config = DFG_GraphvizDrawPass::Config; using Config = DFG_GraphvizDrawPass::Config;
DFG_DebuggerPass(const Config& config) : DFG_GraphvizDrawPass(config) {} explicit DFG_DebuggerPass(const Config& config)
: DFG_GraphvizDrawPass(config) {}
std::string repr() const override { return "dfg-to-fluid-debuger-pass"; } std::string repr() const override { return "dfg-to-fluid-debuger-pass"; }
bool Finalize() override { return true; } bool Finalize() override { return true; }
}; };
} } // namespace
Pass* DataFlowGraphToFluidPass::CreateGraphvizDebugerPass() const { Pass* DataFlowGraphToFluidPass::CreateGraphvizDebugerPass() const {
return new DFG_DebuggerPass(DFG_GraphvizDrawPass::Config( return new DFG_DebuggerPass(DFG_GraphvizDrawPass::Config(
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#pragma once #pragma once
#include <string>
#include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/inference/analysis/data_flow_graph.h" #include "paddle/fluid/inference/analysis/data_flow_graph.h"
#include "paddle/fluid/inference/analysis/pass.h" #include "paddle/fluid/inference/analysis/pass.h"
......
...@@ -46,7 +46,7 @@ class DFG_GraphvizDrawPass : public DataFlowGraphPass { ...@@ -46,7 +46,7 @@ class DFG_GraphvizDrawPass : public DataFlowGraphPass {
const bool display_deleted_node; const bool display_deleted_node;
}; };
DFG_GraphvizDrawPass(const Config &config) : config_(config) {} explicit DFG_GraphvizDrawPass(const Config &config) : config_(config) {}
bool Initialize(Argument *argument) override { return true; } bool Initialize(Argument *argument) override { return true; }
void Run(DataFlowGraph *graph) override; void Run(DataFlowGraph *graph) override;
......
...@@ -15,7 +15,7 @@ limitations under the License. */ ...@@ -15,7 +15,7 @@ limitations under the License. */
#include <string> #include <string>
#include <vector> #include <vector>
#include "analyzer.h" #include "paddle/fluid/inference/analysis/analyzer.h"
#include "paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h" #include "paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h"
#include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h" #include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h"
...@@ -88,7 +88,8 @@ namespace { ...@@ -88,7 +88,8 @@ namespace {
class DFG_DebuggerPass : public DFG_GraphvizDrawPass { class DFG_DebuggerPass : public DFG_GraphvizDrawPass {
public: public:
using Config = DFG_GraphvizDrawPass::Config; using Config = DFG_GraphvizDrawPass::Config;
DFG_DebuggerPass(const Config &config) : DFG_GraphvizDrawPass(config) {} explicit DFG_DebuggerPass(const Config &config)
: DFG_GraphvizDrawPass(config) {}
std::string repr() const override { return "fluid-to-dfg-debuger-pass"; } std::string repr() const override { return "fluid-to-dfg-debuger-pass"; }
bool Finalize() override { return true; } bool Finalize() override { return true; }
}; };
......
...@@ -12,14 +12,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,14 +12,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/inference/analysis/pass_manager.h" #include <gtest/gtest.h>
#include "paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h" #include "paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h"
#include "paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h" #include "paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h"
#include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h" #include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h"
#include "paddle/fluid/inference/analysis/pass_manager.h"
#include "paddle/fluid/inference/analysis/ut_helper.h" #include "paddle/fluid/inference/analysis/ut_helper.h"
#include <gtest/gtest.h>
namespace paddle { namespace paddle {
namespace inference { namespace inference {
namespace analysis { namespace analysis {
......
...@@ -12,10 +12,12 @@ ...@@ -12,10 +12,12 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.h" #include <string>
#include "paddle/fluid/inference/analysis/analyzer.h" #include "paddle/fluid/inference/analysis/analyzer.h"
#include "paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h" #include "paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h"
#include "paddle/fluid/inference/analysis/node_attr_flags.h" #include "paddle/fluid/inference/analysis/node_attr_flags.h"
#include "paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.h"
namespace paddle { namespace paddle {
namespace inference { namespace inference {
...@@ -29,7 +31,7 @@ void TensorRTSubgraphNodeMarkPass::Run(DataFlowGraph *graph) { ...@@ -29,7 +31,7 @@ void TensorRTSubgraphNodeMarkPass::Run(DataFlowGraph *graph) {
class DfgDebuggerPass : public DFG_GraphvizDrawPass { class DfgDebuggerPass : public DFG_GraphvizDrawPass {
public: public:
DfgDebuggerPass(const DFG_GraphvizDrawPass::Config &config) explicit DfgDebuggerPass(const DFG_GraphvizDrawPass::Config &config)
: DFG_GraphvizDrawPass(config) {} : DFG_GraphvizDrawPass(config) {}
std::string repr() const override { std::string repr() const override {
......
...@@ -20,6 +20,12 @@ limitations under the License. */ ...@@ -20,6 +20,12 @@ limitations under the License. */
#include "paddle/fluid/memory/detail/system_allocator.h" #include "paddle/fluid/memory/detail/system_allocator.h"
#include "paddle/fluid/platform/gpu_info.h" #include "paddle/fluid/platform/gpu_info.h"
DEFINE_bool(init_allocated_mem, false,
"It is a mistake that the values of the memory allocated by "
"BuddyAllocator are always zeroed in some op's implementation. "
"To find this error in time, we use init_allocated_mem to indicate "
"that initializing the allocated memory with a small value "
"during unit testing.");
DECLARE_double(fraction_of_gpu_memory_to_use); DECLARE_double(fraction_of_gpu_memory_to_use);
namespace paddle { namespace paddle {
...@@ -41,6 +47,9 @@ template <> ...@@ -41,6 +47,9 @@ template <>
void* Alloc<platform::CPUPlace>(platform::CPUPlace place, size_t size) { void* Alloc<platform::CPUPlace>(platform::CPUPlace place, size_t size) {
VLOG(10) << "Allocate " << size << " bytes on " << platform::Place(place); VLOG(10) << "Allocate " << size << " bytes on " << platform::Place(place);
void* p = GetCPUBuddyAllocator()->Alloc(size); void* p = GetCPUBuddyAllocator()->Alloc(size);
if (FLAGS_init_allocated_mem) {
memset(p, 0xEF, size);
}
VLOG(10) << " pointer=" << p; VLOG(10) << " pointer=" << p;
return p; return p;
} }
...@@ -104,6 +113,9 @@ void* Alloc<platform::CUDAPlace>(platform::CUDAPlace place, size_t size) { ...@@ -104,6 +113,9 @@ void* Alloc<platform::CUDAPlace>(platform::CUDAPlace place, size_t size) {
LOG(WARNING) << "GPU memory used: " << Used<platform::CUDAPlace>(place); LOG(WARNING) << "GPU memory used: " << Used<platform::CUDAPlace>(place);
platform::SetDeviceId(cur_dev); platform::SetDeviceId(cur_dev);
} }
if (FLAGS_init_allocated_mem) {
cudaMemset(ptr, 0xEF, size);
}
return ptr; return ptr;
} }
...@@ -137,6 +149,9 @@ void* Alloc<platform::CUDAPinnedPlace>(platform::CUDAPinnedPlace place, ...@@ -137,6 +149,9 @@ void* Alloc<platform::CUDAPinnedPlace>(platform::CUDAPinnedPlace place,
LOG(WARNING) << "cudaMallocHost Cannot allocate " << size LOG(WARNING) << "cudaMallocHost Cannot allocate " << size
<< " bytes in CUDAPinnedPlace"; << " bytes in CUDAPinnedPlace";
} }
if (FLAGS_init_allocated_mem) {
memset(ptr, 0xEF, size);
}
return ptr; return ptr;
} }
......
...@@ -115,9 +115,12 @@ class BatchNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -115,9 +115,12 @@ class BatchNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
if (fuse_with_relu) flags |= mkldnn::fuse_bn_relu; if (fuse_with_relu) flags |= mkldnn::fuse_bn_relu;
// create mkldnn memory from input x tensor // create mkldnn memory from input x tensor
auto src_memory = mkldnn::memory::format input_format =
memory({{{src_tz}, memory::data_type::f32, x->format()}, mkldnn_engine}, platform::MKLDNNFormatForSize(src_tz.size(), x->format());
to_void_cast(x_data));
auto src_memory = memory(
{{{src_tz}, memory::data_type::f32, input_format}, mkldnn_engine},
to_void_cast(x_data));
// create primitive descriptor for batch norm forward // create primitive descriptor for batch norm forward
using bn_fwd_types = bn_type_traits<mkldnn::batch_normalization_forward>; using bn_fwd_types = bn_type_traits<mkldnn::batch_normalization_forward>;
...@@ -251,15 +254,21 @@ class BatchNormMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> { ...@@ -251,15 +254,21 @@ class BatchNormMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
using bn_bwd_types = bn_type_traits<mkldnn::batch_normalization_backward>; using bn_bwd_types = bn_type_traits<mkldnn::batch_normalization_backward>;
// create mkldnn memory from input diff_y tensor // create mkldnn memory from input diff_y tensor
auto user_diff_dst_memory =
memory({{{diff_dst_tz}, memory::data_type::f32, diff_y->format()}, mkldnn::memory::format dst_format =
mkldnn_engine}, platform::MKLDNNFormatForSize(src_tz.size(), diff_y->format());
to_void_cast(diff_y_data));
auto user_diff_dst_memory = memory(
{{{diff_dst_tz}, memory::data_type::f32, dst_format}, mkldnn_engine},
to_void_cast(diff_y_data));
// create mkldnn memory from input x tensor // create mkldnn memory from input x tensor
auto src_memory = mkldnn::memory::format input_format =
memory({{{src_tz}, memory::data_type::f32, x->format()}, mkldnn_engine}, platform::MKLDNNFormatForSize(src_tz.size(), x->format());
to_void_cast(x_data));
auto src_memory = memory(
{{{src_tz}, memory::data_type::f32, input_format}, mkldnn_engine},
to_void_cast(x_data));
// for diff_dst, try to use same format as dst in forward pass // for diff_dst, try to use same format as dst in forward pass
auto diff_dst_pd = batch_norm_fwd_pd.get()->dst_primitive_desc(); auto diff_dst_pd = batch_norm_fwd_pd.get()->dst_primitive_desc();
......
...@@ -228,7 +228,7 @@ class MKLDNNHandler { ...@@ -228,7 +228,7 @@ class MKLDNNHandler {
return dstr; return dstr;
}; };
return dims2str(operand_dims) + suffix; return dims2str(operand_dims) + suffix;
}; }
protected: protected:
const MKLDNNDeviceContext& dev_ctx_; const MKLDNNDeviceContext& dev_ctx_;
...@@ -237,5 +237,15 @@ class MKLDNNHandler { ...@@ -237,5 +237,15 @@ class MKLDNNHandler {
bool is_reusing_; bool is_reusing_;
}; };
inline mkldnn::memory::format MKLDNNFormatForSize(
size_t dims_size, mkldnn::memory::format data_format) {
if (dims_size == 1) {
return mkldnn::memory::format::x;
} else if (dims_size == 2) {
return mkldnn::memory::format::nc;
}
return data_format;
}
} // namespace platform } // namespace platform
} // namespace paddle } // namespace paddle
...@@ -118,7 +118,8 @@ def __bootstrap__(): ...@@ -118,7 +118,8 @@ def __bootstrap__():
read_env_flags = [ read_env_flags = [
'use_pinned_memory', 'check_nan_inf', 'benchmark', 'warpctc_dir', 'use_pinned_memory', 'check_nan_inf', 'benchmark', 'warpctc_dir',
'eager_delete_scope', 'use_mkldnn', 'initial_cpu_memory_in_mb' 'eager_delete_scope', 'use_mkldnn', 'initial_cpu_memory_in_mb',
'init_allocated_mem'
] ]
if core.is_compiled_with_cuda(): if core.is_compiled_with_cuda():
read_env_flags += [ read_env_flags += [
......
...@@ -18,6 +18,8 @@ import unittest ...@@ -18,6 +18,8 @@ import unittest
import paddle.fluid as fluid import paddle.fluid as fluid
import time import time
import numpy as np import numpy as np
import math
import sys
__all__ = ['TestParallelExecutorBase'] __all__ = ['TestParallelExecutorBase']
...@@ -93,6 +95,12 @@ class TestParallelExecutorBase(unittest.TestCase): ...@@ -93,6 +95,12 @@ class TestParallelExecutorBase(unittest.TestCase):
print "%.4f Instance per second" % ( print "%.4f Instance per second" % (
(batch_size * iter + 2) / (end - begin)) (batch_size * iter + 2) / (end - begin))
avg_last_loss_val = np.array(last_loss).mean()
avg_first_loss_val = np.array(first_loss).mean()
if math.isnan(float(avg_last_loss_val)) or math.isnan(
float(avg_first_loss_val)):
sys.exit("got NaN loss, training failed.")
print first_loss, last_loss print first_loss, last_loss
# self.assertGreater(first_loss[0], last_loss[0]) # self.assertGreater(first_loss[0], last_loss[0])
return first_loss, last_loss return first_loss, last_loss
...@@ -16,6 +16,8 @@ import paddle.fluid as fluid ...@@ -16,6 +16,8 @@ import paddle.fluid as fluid
import numpy as np import numpy as np
import unittest import unittest
import os import os
import sys
import math
def simple_fc_net(): def simple_fc_net():
...@@ -73,6 +75,14 @@ class ParallelExecutorTestingDuringTraining(unittest.TestCase): ...@@ -73,6 +75,14 @@ class ParallelExecutorTestingDuringTraining(unittest.TestCase):
train_loss, = train_exe.run([loss.name], feed=feed_dict) train_loss, = train_exe.run([loss.name], feed=feed_dict)
avg_test_loss_val = np.array(test_loss).mean()
if math.isnan(float(avg_test_loss_val)):
sys.exit("got NaN loss, testing failed.")
avg_train_loss_val = np.array(train_loss).mean()
if math.isnan(float(avg_train_loss_val)):
sys.exit("got NaN loss, training failed.")
self.assertTrue( self.assertTrue(
np.allclose( np.allclose(
train_loss, test_loss, atol=1e-8), train_loss, test_loss, atol=1e-8),
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册