提交 a318a490 编写于 作者: M minqiyang

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into accelerate_ddpg

test=develop
......@@ -57,46 +57,43 @@ int main()
return 0;
}" SSE3_FOUND)
# disable AVX by default on windows
if(NOT WIN32)
# Check AVX
set(CMAKE_REQUIRED_FLAGS ${AVX_FLAG})
set(AVX_FOUND_EXITCODE 1 CACHE STRING "Result from TRY_RUN" FORCE)
CHECK_CXX_SOURCE_RUNS("
#include <immintrin.h>
int main()
{
__m256 a = _mm256_set_ps (-1.0f, 2.0f, -3.0f, 4.0f, -1.0f, 2.0f, -3.0f, 4.0f);
__m256 b = _mm256_set_ps (1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
__m256 result = _mm256_add_ps (a, b);
return 0;
}" AVX_FOUND)
# Check AVX
set(CMAKE_REQUIRED_FLAGS ${AVX_FLAG})
set(AVX_FOUND_EXITCODE 1 CACHE STRING "Result from TRY_RUN" FORCE)
CHECK_CXX_SOURCE_RUNS("
#include <immintrin.h>
int main()
{
__m256 a = _mm256_set_ps (-1.0f, 2.0f, -3.0f, 4.0f, -1.0f, 2.0f, -3.0f, 4.0f);
__m256 b = _mm256_set_ps (1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
__m256 result = _mm256_add_ps (a, b);
return 0;
}" AVX_FOUND)
# Check AVX 2
set(CMAKE_REQUIRED_FLAGS ${AVX2_FLAG})
set(AVX2_FOUND_EXITCODE 1 CACHE STRING "Result from TRY_RUN" FORCE)
CHECK_CXX_SOURCE_RUNS("
#include <immintrin.h>
int main()
{
__m256i a = _mm256_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4);
__m256i result = _mm256_abs_epi32 (a);
return 0;
}" AVX2_FOUND)
# Check AVX 2
set(CMAKE_REQUIRED_FLAGS ${AVX2_FLAG})
set(AVX2_FOUND_EXITCODE 1 CACHE STRING "Result from TRY_RUN" FORCE)
CHECK_CXX_SOURCE_RUNS("
#include <immintrin.h>
int main()
{
__m256i a = _mm256_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4);
__m256i result = _mm256_abs_epi32 (a);
return 0;
}" AVX2_FOUND)
# Check AVX512F
set(CMAKE_REQUIRED_FLAGS ${AVX512F_FLAG})
set(AVX512F_FOUND_EXITCODE 1 CACHE STRING "Result from TRY_RUN" FORCE)
CHECK_CXX_SOURCE_RUNS("
#include <immintrin.h>
int main()
{
__m512i a = _mm512_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4,
13, -5, 6, -7, 9, 2, -6, 3);
__m512i result = _mm512_abs_epi32 (a);
return 0;
}" AVX512F_FOUND)
endif(NOT WIN32)
# Check AVX512F
set(CMAKE_REQUIRED_FLAGS ${AVX512F_FLAG})
set(AVX512F_FOUND_EXITCODE 1 CACHE STRING "Result from TRY_RUN" FORCE)
CHECK_CXX_SOURCE_RUNS("
#include <immintrin.h>
int main()
{
__m512i a = _mm512_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4,
13, -5, 6, -7, 9, 2, -6, 3);
__m512i result = _mm512_abs_epi32 (a);
return 0;
}" AVX512F_FOUND)
set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_RETAINED})
mark_as_advanced(MMX_FOUND SSE2_FOUND SSE3_FOUND AVX_FOUND AVX2_FOUND AVX512F_FOUND)
......@@ -7,27 +7,17 @@ function(windows_symbolic TARGET)
cmake_parse_arguments(windows_symbolic "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
set(final_path ${CMAKE_CURRENT_SOURCE_DIR}/${windows_symbolic_PATH})
foreach(src ${windows_symbolic_SRCS})
get_filename_component(src ${src} NAME_WE)
if (NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${src}.cc OR NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${src}.cu)
message(FATAL " ${src}.cc and ${src}.cu must exsits, and ${src}.cu must be symbolic file.")
endif()
#only copy the xx.cu to.xx.cu when the content are modified
set(copy_flag 1)
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/.${src}.cu)
file(READ ${CMAKE_CURRENT_SOURCE_DIR}/${src}.cc SOURCE_STR)
file(READ ${CMAKE_CURRENT_SOURCE_DIR}/.${src}.cu TARGET_STR)
if (SOURCE_STR STREQUAL TARGET_STR)
set(copy_flag 0)
endif()
endif()
if (copy_flag)
add_custom_command(OUTPUT .${src}.cu
COMMAND ${CMAKE_COMMAND} -E remove ${CMAKE_CURRENT_SOURCE_DIR}/.${src}.cu
COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/${src}.cc" "${CMAKE_CURRENT_SOURCE_DIR}/.${src}.cu"
COMMENT "create hidden file of ${src}.cu")
endif(copy_flag)
add_custom_target(${TARGET} ALL DEPENDS .${src}.cu)
get_filename_component(src ${src} NAME_WE)
if (NOT EXISTS ${final_path}/${src}.cc OR NOT EXISTS ${final_path}/${src}.cu)
message(FATAL " ${src}.cc and ${src}.cu must exsits, and ${src}.cu must be symbolic file.")
endif()
file(GENERATE OUTPUT ${final_path}/.${src}.cu INPUT ${final_path}/${src}.cc)
add_custom_command(OUTPUT ${final_path}/.${src}.cu
COMMAND ${CMAKE_COMMAND} -E copy_if_different "${final_path}/${src}.cc" "${final_path}/.${src}.cu"
COMMENT "create hidden file of ${src}.cu")
add_custom_target(${TARGET} ALL DEPENDS .${src}.cu)
endforeach()
endfunction()
......@@ -78,18 +68,23 @@ cc_library(garbage_collector SRCS garbage_collector.cc DEPS device_context memor
cc_library(reader SRCS reader.cc DEPS lod_tensor ddim)
cc_test(reader_test SRCS reader_test.cc DEPS reader)
cc_test(variable_test SRCS variable_test.cc)
cc_library(threadpool SRCS threadpool.cc DEPS enforce)
cc_test(threadpool_test SRCS threadpool_test.cc DEPS threadpool)
cc_library(scope SRCS scope.cc DEPS glog threadpool xxhash)
cc_library(var_type_traits SRCS var_type_traits DEPS lod_tensor selected_rows framework_proto)
if (WITH_GPU)
target_link_libraries(var_type_traits dynload_cuda)
endif()
cc_test(var_type_traits_test SRCS var_type_traits_test.cc DEPS var_type_traits)
cc_library(scope SRCS scope.cc DEPS glog threadpool xxhash var_type_traits)
cc_library(scope_pool SRCS scope_pool.cc DEPS scope)
cc_test(scope_test SRCS scope_test.cc DEPS scope)
cc_test(variable_test SRCS variable_test.cc DEPS tensor var_type_traits)
cc_library(data_device_transform SRCS data_device_transform.cc DEPS tensor)
nv_test(data_device_transform_test SRCS data_device_transform_test.cu
DEPS operator op_registry device_context math_function)
DEPS operator op_registry device_context math_function scope)
if(WITH_GPU)
if (WIN32)
......
......@@ -17,6 +17,7 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op_info.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/operators/elementwise/elementwise_op_function.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/platform/device_context.h"
......
......@@ -88,7 +88,7 @@ void EagerDeletionOpHandle::RunImpl() {
}
} else {
PADDLE_THROW("Type %s of %s is not supported eager deletion",
var->Type().name(), name);
framework::ToTypeName(var->Type()), name);
}
}
......
......@@ -25,7 +25,7 @@ struct ExecutionStrategy {
size_t num_threads_{0};
bool use_cuda_{true};
bool allow_op_delay_{false};
size_t num_iteration_per_drop_scope_{1};
size_t num_iteration_per_drop_scope_{100};
ExecutorType type_{kDefault};
bool dry_run_{false};
};
......
......@@ -45,7 +45,7 @@ class MultiDevSSAGraphBuilder : public ir::Pass {
#endif
int GetVarDeviceID(
const ir::Graph &graph, const std::string &varname,
const std::string &varname,
const std::unordered_map<std::string, int> &sharded_var_device) const;
bool IsScaleLossOp(ir::Node *node) const;
......@@ -57,12 +57,6 @@ class MultiDevSSAGraphBuilder : public ir::Pass {
ir::Graph *result, ir::Node *node,
std::unordered_map<std::string, int> *sharded_var_device) const;
std::vector<std::string> FindDistTrainSendVars(
const std::vector<ir::Node *> &nodes) const;
std::vector<std::string> FindDistTrainRecvVars(
const std::vector<ir::Node *> &nodes) const;
void CreateComputationalOps(ir::Graph *result, ir::Node *node,
size_t num_places) const;
......@@ -77,7 +71,7 @@ class MultiDevSSAGraphBuilder : public ir::Pass {
int dev_id) const;
int GetOpDeviceID(
const ir::Graph &graph, ir::Node *node,
ir::Node *node,
const std::unordered_map<std::string, int> &sharded_var_device) const;
void InsertAllReduceOp(ir::Graph *result, const std::string &og) const;
......@@ -100,6 +94,15 @@ class MultiDevSSAGraphBuilder : public ir::Pass {
void SetCommunicationContext(OpHandleBase *op_handle,
const platform::Place &p) const;
std::vector<ir::Node *> SortForReduceMode(
const std::vector<ir::Node *> &) const;
int GetOpDeviceID(
ir::Node *node,
const std::unordered_map<std::string, int> &shared_var_device,
std::unordered_map<std::string, std::vector<ir::Node *>> *delay_ops)
const;
mutable std::string loss_var_name_;
mutable std::vector<platform::Place> places_;
mutable std::vector<Scope *> local_scopes_;
......
......@@ -85,7 +85,6 @@ FeedFetchList ScopeBufferedSSAGraphExecutor::Run(
drop_scope_counter_ = 0;
}
if (eptr) {
std::rethrow_exception(eptr);
} else {
......
......@@ -24,7 +24,7 @@ static void VisitVariable(Variable* var, Func* func) {
} else if (var->IsType<SelectedRows>()) {
(*func)(var->GetMutable<SelectedRows>());
} else {
PADDLE_THROW("Not supported type %s", var->Type().name());
PADDLE_THROW("Not supported type %s", ToTypeName(var->Type()));
}
}
......@@ -35,7 +35,7 @@ static void VisitVariable(const Variable& var, Func* func) {
} else if (var.IsType<SelectedRows>()) {
(*func)(var.Get<SelectedRows>());
} else {
PADDLE_THROW("Not supported type %s", var.Type().name());
PADDLE_THROW("Not supported type %s", ToTypeName(var.Type()));
}
}
......
......@@ -119,7 +119,7 @@ static void DeleteUnusedTensors(
}
} else {
PADDLE_THROW("Type %s of %s is not supported eager deletion",
var->Type().name(), name);
framework::ToTypeName(var->Type()), name);
}
}
}
......
......@@ -40,18 +40,20 @@ framework::proto::OpDesc PrepareOpDesc(
const std::string& output) {
auto proto = base_desc;
framework::OpDesc desc(proto, nullptr);
desc.SetType("conv2d_fusion");
desc.SetInput("Bias", {bias});
desc.SetInput("ResidualData", {bias1});
desc.SetAttr("activation", activation);
desc.SetOutput("Output", {output});
desc.SetAttr("is_test", true);
desc.SetAttr("use_cudnn", false);
desc.Flush();
return *desc.Proto();
}
std::unique_ptr<ir::Graph> ConvElementwiseAdd2ActFusePass::ApplyImpl(
std::unique_ptr<ir::Graph> graph) const {
const std::string pattern_name = "conv_elementwise_add_act_fuse";
const std::string pattern_name = "conv_elementwise_add2_act_fuse";
FusePassBase::Init(pattern_name, graph.get());
GraphPatternDetector gpd;
......@@ -76,22 +78,23 @@ std::unique_ptr<ir::Graph> ConvElementwiseAdd2ActFusePass::ApplyImpl(
framework::OpDesc new_op_desc(new_op_proto, nullptr);
// Create a new node for the fused op.
graph->CreateOpNode(&new_op_desc);
auto* new_conv_op = graph->CreateOpNode(&new_op_desc);
// Link inputs and outputs.
PADDLE_ENFORCE(subgraph.count(x));
auto* conv_in_node = subgraph.at(x);
IR_NODE_LINK_TO(conv_in_node, conv_op); // Input
IR_NODE_LINK_TO(conv_filter, conv_op); // Filter
IR_NODE_LINK_TO(conv_op, conv_out); // Output
IR_NODE_LINK_TO(elementwise_add_in_y, conv_op); // Bias
IR_NODE_LINK_TO(elementwise_add_in_y_1, conv_op); // Bias
IR_NODE_LINK_TO(conv_in_node, new_conv_op); // Input
IR_NODE_LINK_TO(conv_filter, new_conv_op); // Filter
IR_NODE_LINK_TO(elementwise_add_in_y, new_conv_op); // Bias
IR_NODE_LINK_TO(elementwise_add_in_y_1, new_conv_op); // Bias
IR_NODE_LINK_TO(new_conv_op, act_out); // Output
// Delete the unneeded nodes.
GraphSafeRemoveNodes(graph.get(),
{conv_op, elementwise_add_op, elementwise_add_op_1,
elementwise_add_out});
GraphSafeRemoveNodes(
graph.get(),
{conv_op, conv_out, elementwise_add_op, elementwise_add_op_1,
elementwise_add_out, elementwise_add_out_1, act_op});
};
gpd(graph.get(), handler);
return graph;
......
......@@ -20,102 +20,11 @@ limitations under the License. */
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/var_desc.h"
DEFINE_bool(enforce_when_check_program, true,
"Checking whether the program is correct or not. We will log "
"errors rather than throwing exceptions if this flag turned off");
namespace paddle {
namespace framework {
namespace ir {
namespace {
void CheckProgram(const ProgramDesc &program) {
#define _INT(role) static_cast<int>(role)
std::map<int, bool> visit;
for (OpDesc *op : program.Block(0).AllOps()) {
// For backward compatibility, some program doesn't have role added.
if (!op->HasAttr(OpProtoAndCheckerMaker::OpRoleAttrName())) continue;
int role_id =
boost::get<int>(op->GetAttr(OpProtoAndCheckerMaker::OpRoleAttrName()));
visit[role_id] = true;
switch (role_id) {
case _INT(OpRole::kForward):
if (visit.find(_INT(OpRole::kBackward)) != visit.end()) {
LOG(ERROR) << "Cannot add backward operator before forward operator "
<< op->Type();
}
break;
case _INT(OpRole::kBackward):
case _INT(OpRole::kBackward) | _INT(OpRole::kLoss):
if (!FLAGS_enforce_when_check_program) {
PADDLE_ENFORCE(
visit.find(_INT(OpRole::kOptimize)) == visit.end(),
"Cannot add backward operator %s after optimize operator.",
op->Type());
} else {
if (visit.find(_INT(OpRole::kOptimize)) != visit.end()) {
LOG(ERROR)
<< "Cannot add backward operator %s after optimize operator."
<< op->Type();
}
}
break;
case _INT(OpRole::kForward) | _INT(OpRole::kLoss):
if (!FLAGS_enforce_when_check_program) {
PADDLE_ENFORCE(visit.find(_INT(OpRole::kBackward) |
_INT(OpRole::kLoss)) == visit.end(),
"Cannot add backward|loss operator before "
"forward|loss operator %s.",
op->Type());
PADDLE_ENFORCE(
visit.find(_INT(OpRole::kOptimize)) == visit.end(),
"Cannot add forward|loss operator %s after optimize operator.",
op->Type());
} else {
if (visit.find(_INT(OpRole::kBackward) | _INT(OpRole::kLoss)) !=
visit.end()) {
LOG(ERROR) << "Cannot add backward|loss operator before "
<< "forward|loss operator %s." << op->Type();
}
if (visit.find(_INT(OpRole::kOptimize)) != visit.end()) {
LOG(ERROR) << "Cannot add forward|loss operator %s after optimize "
"operator."
<< op->Type();
}
}
break;
case _INT(OpRole::kOptimize):
case _INT(OpRole::kOptimize) | _INT(OpRole::kLRSched):
if (!FLAGS_enforce_when_check_program) {
PADDLE_ENFORCE(visit.find(_INT(OpRole::kBackward)) != visit.end(),
"Optimize operators %s must follow backward operator.",
op->Type());
} else {
if (visit.find(_INT(OpRole::kBackward)) == visit.end()) {
LOG(ERROR) << "Optimize operators %s must follow backward operator."
<< op->Type();
}
}
break;
case _INT(OpRole::kLRSched):
case _INT(OpRole::kDist):
case _INT(OpRole::kRPC):
case _INT(OpRole::kNotSpecified):
break;
default:
LOG(FATAL) << "Unknown operator role. Don't add new role because "
"you don't know what you are doing.";
}
}
#undef _INT
}
} // namespace
Graph::Graph(const ProgramDesc &program) : program_(program) {
CheckProgram(program_);
auto var_nodes = InitFromProgram(program_);
ResolveHazard(var_nodes);
}
......
......@@ -1101,9 +1101,7 @@ PDNode *patterns::ElementwiseAdd::operator()(PDNode *x_var, PDNode *y_var) {
return out_var;
}
std::unordered_set<std::string> conv_act_set({"identity", "sigmoid", "relu",
"relu6", "relux", "tanh",
"band_pass"});
std::unordered_set<std::string> conv_act_set({"identity", "relu"});
PDNode *patterns::ConvElementwiseaddAct::operator()(PDNode *conv_in) {
conv_in->AsInput();
......@@ -1169,13 +1167,13 @@ PDNode *patterns::ConvElementwiseadd2Act::operator()(PDNode *conv_in) {
->AsInput();
auto elementwise_add_out = pattern->NewNode(elementwise_add_out_repr())
->assert_is_op_output("elementwise_add")
->assert_is_op_input("elementwise_add", "X")
->assert_is_op_input("elementwise_add", "Y")
->AsIntermediate();
auto elementwise_add_op_1 = pattern->NewNode(elementwise_add_op_1_repr())
->assert_is_op("elementwise_add");
auto elementwise_add_in_y_1 = pattern->NewNode(elementwise_add_in_y_1_repr())
->assert_is_op_input("elementwise_add", "Y")
->assert_is_op_input("elementwise_add", "X")
->AsInput();
auto elementwise_add_out_1 = pattern->NewNode(elementwise_add_out_1_repr())
->assert_is_op_output("elementwise_add")
......@@ -1203,8 +1201,8 @@ PDNode *patterns::ConvElementwiseadd2Act::operator()(PDNode *conv_in) {
conv_op->LinksFrom({conv_in, conv_filter}).LinksTo({conv_out});
elementwise_add_op->LinksFrom({conv_out, elementwise_add_in_y})
.LinksTo({elementwise_add_out});
elementwise_add_op_1->LinksFrom(
{elementwise_add_out, elementwise_add_in_y_1});
elementwise_add_op_1->LinksFrom({elementwise_add_out, elementwise_add_in_y_1})
.LinksTo({elementwise_add_out_1});
act_op->LinksFrom({elementwise_add_out_1}).LinksTo({act_out});
return act_out;
}
......
......@@ -215,8 +215,8 @@ class Vector {
auto stream = dev_ctx->stream();
void *src = gpu_->ptr();
void *dst = cpu_.data();
memory::Copy(platform::CPUPlace(), dst, CUDAPlace().get(), src,
gpu_->size(), stream);
paddle::memory::Copy(platform::CPUPlace(), dst, CUDAPlace().get(), src,
gpu_->size(), stream);
dev_ctx->Wait();
}
......@@ -261,8 +261,8 @@ class Vector {
auto *dev_ctx = static_cast<platform::CUDADeviceContext *>(
platform::DeviceContextPool::Instance().Get(place));
auto stream = dev_ctx->stream();
memory::Copy(CUDAPlace().get(), dst, platform::CPUPlace(), src,
gpu_->size(), stream);
paddle::memory::Copy(CUDAPlace().get(), dst, platform::CPUPlace(), src,
gpu_->size(), stream);
}
void ImmutableCPU() const {
......@@ -284,7 +284,7 @@ class Vector {
bool IsInCPU() const { return flag_ & kDataInCPU; }
mutable std::vector<T> cpu_;
mutable memory::AllocationPtr gpu_;
mutable paddle::memory::AllocationPtr gpu_;
mutable int flag_;
mutable std::mutex mtx_;
......
......@@ -82,10 +82,6 @@ void OpProtoAndCheckerMaker::operator()(proto::OpProto* proto,
AddAttr<std::string>(OpNamescopeAttrName(), "Operator name with namesope.")
.SetDefault("");
AddAttr<std::vector<std::string>>(OpCreationCallstackAttrName(),
"Callstack for Op Creatation.")
.SetDefault({});
Validate();
}
......
......@@ -47,7 +47,6 @@ class OpProtoAndCheckerMaker {
static const char *OpRoleAttrName() { return "op_role"; }
static const char *OpRoleVarAttrName() { return "op_role_var"; }
static const char *OpNamescopeAttrName() { return "op_namescope"; }
static const char *OpCreationCallstackAttrName() { return "op_callstack"; }
void operator()(proto::OpProto *proto, OpAttrChecker *attr_checker);
......
......@@ -23,7 +23,8 @@ limitations under the License. */
#include <unordered_map>
#include <unordered_set>
#include "glog/logging.h" // For VLOG()
#define GLOG_NO_ABBREVIATED_SEVERITIES // msvc conflict logging with windows.h
#include "glog/logging.h" // For VLOG()
#include "paddle/fluid/framework/attribute.h"
#include "paddle/fluid/framework/details/op_registry.h"
#include "paddle/fluid/framework/framework.pb.h"
......
......@@ -16,15 +16,10 @@ limitations under the License. */
#include <glog/logging.h>
#include <algorithm>
#include <sstream>
#include <string>
#include <vector>
#include "gflags/gflags.h"
#include "glog/logging.h"
#include "paddle/fluid/framework/data_transform.h"
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op_proto_maker.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/shape_inference.h"
#include "paddle/fluid/framework/transfer_scope_cache.h"
......@@ -162,59 +157,27 @@ RuntimeContext::RuntimeContext(const VariableNameMap& innames,
}
void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
try {
if (VLOG_IS_ON(4)) {
VLOG(4) << place << " " << DebugStringEx(&scope);
}
if (platform::is_gpu_place(place)) {
VLOG(4) << place << " " << DebugStringEx(&scope);
if (platform::is_gpu_place(place)) {
#ifndef PADDLE_WITH_CUDA
PADDLE_THROW("Cannot run operator on place %s", place);
PADDLE_THROW("Cannot run operator on place %s", place);
#else
auto dev_id = boost::get<platform::CUDAPlace>(place).device;
platform::SetDeviceId(dev_id);
auto dev_id = boost::get<platform::CUDAPlace>(place).device;
platform::SetDeviceId(dev_id);
#endif
}
// The profile has a process-wide mutex, results in serious performance
// issue
// in concurrency scenerio. Here use an `if` to fix this issue.
// Please not remove the `if`, ask @Superjomn if there are any concern.
if (platform::IsProfileEnabled()) {
platform::DeviceContextPool& pool =
platform::DeviceContextPool::Instance();
platform::RecordEvent record_event(Type(), pool.Get(place));
RunImpl(scope, place);
} else {
RunImpl(scope, place);
}
if (VLOG_IS_ON(3)) {
VLOG(3) << place << " " << DebugStringEx(&scope);
}
} catch (platform::EnforceNotMet exception) {
if (Attrs().count("sub_block") != 0) {
throw exception;
}
auto& callstack = Attr<std::vector<std::string>>(
OpProtoAndCheckerMaker::OpCreationCallstackAttrName());
}
if (callstack.empty()) {
throw exception;
}
std::ostringstream sout;
sout << "Invoke operator " << Type() << " error.\n";
sout << "Python Callstacks: \n";
for (auto& line : callstack) {
sout << line;
}
sout << "C++ Callstacks: \n";
sout << exception.err_str_;
exception.err_str_ = sout.str();
throw exception;
} catch (...) {
std::rethrow_exception(std::current_exception());
// The profile has a process-wide mutex, results in serious performance issue
// in concurrency scenerio. Here use an `if` to fix this issue.
// Please not remove the `if`, ask @Superjomn if there are any concern.
if (platform::IsProfileEnabled()) {
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
platform::RecordEvent record_event(Type(), pool.Get(place));
RunImpl(scope, place);
} else {
RunImpl(scope, place);
}
VLOG(3) << place << " " << DebugStringEx(&scope);
}
bool OperatorBase::HasInputs(const std::string& name) const {
......@@ -417,7 +380,7 @@ const Tensor* GetLoDTensorOrSelectedRowsValueFromVar(const Variable& var) {
return &(var.Get<SelectedRows>().value());
} else {
PADDLE_THROW("Variable type_id %s, expect LoDTensor/SelectedRows.",
var.Type().name());
ToTypeName(var.Type()));
}
}
......@@ -428,7 +391,7 @@ Tensor* GetMutableLoDTensorOrSelectedRowsValueFromVar(Variable* var) {
return var->GetMutable<SelectedRows>()->mutable_value();
} else {
PADDLE_THROW("Variable type_id %s, expect LoDTensor/SelectedRows.",
var->Type().name());
ToTypeName(var->Type()));
}
}
......@@ -522,7 +485,7 @@ const std::vector<const Tensor*> ExecutionContext::MultiInput<Tensor>(
PADDLE_ENFORCE(
var->IsType<LoDTensor>(),
"should be LoDTensor, but the received type is %s",
var->Type().name());
ToTypeName(var->Type()));
return &(var->Get<LoDTensor>());
});
return res;
......@@ -541,7 +504,7 @@ const std::vector<const Tensor*> ExecutionContext::LegacyMultiInput<Tensor>(
PADDLE_ENFORCE(
var->IsType<LoDTensor>(),
"%s should be LoDTensor, but the received type is %s",
sub_name, var->Type().name());
sub_name, ToTypeName(var->Type()));
return &(var->Get<LoDTensor>());
});
return res;
......@@ -570,7 +533,7 @@ std::vector<Tensor*> ExecutionContext::MultiOutput<Tensor>(
PADDLE_ENFORCE(
var->IsType<LoDTensor>(),
"%s should be LoDTensor, but the received type is %s",
sub_name, var->Type().name());
sub_name, ToTypeName(var->Type()));
return var->GetMutable<LoDTensor>();
});
return res;
......@@ -812,7 +775,7 @@ class RuntimeInferShapeContext : public InferShapeContext {
PADDLE_THROW(
"Only LoDTensor/SelectedRows support 'GetDim', but Variables "
"type_id is %s.",
var->Type().name());
ToTypeName(var->Type()));
}
}
......@@ -835,7 +798,7 @@ class RuntimeInferShapeContext : public InferShapeContext {
var->GetMutable<SelectedRows>()->set_height(dim[0]);
} else {
PADDLE_THROW("Variable type_id %s, expect LoDTensor/SelectedRows.",
var->Type().name());
ToTypeName(var->Type()));
}
}
......
......@@ -49,6 +49,8 @@ constexpr char kTempVarName[] = "@TEMP@";
/// e.g. Variable "x@GRAD" is the gradient of varibale "x".
constexpr char kGradVarSuffix[] = "@GRAD";
constexpr size_t kGradVarSuffixSize = 5U;
/// Variables with this suffix are supposed to be filled up with zeros.
constexpr char kZeroVarSuffix[] = "@ZERO";
......@@ -60,7 +62,11 @@ constexpr char kNewGradSuffix[] = "@NEWGRAD@";
extern std::vector<std::tuple<platform::Place, LibraryType>> kKernelPriority;
inline std::string GradVarName(const std::string& var_name) {
return var_name + kGradVarSuffix;
std::string result;
result.reserve(var_name.size() + kGradVarSuffixSize);
result += var_name;
result += kGradVarSuffix;
return result;
}
proto::VarType::Type GetDataTypeOfVar(const Variable* var);
......@@ -110,8 +116,8 @@ class OperatorBase {
bool HasAttr(const std::string& name) const { return attrs_.count(name); }
template <typename T>
inline const T& Attr(const std::string& name) const {
PADDLE_ENFORCE(attrs_.count(name) != 0, "%s should be in AttributeMap",
name);
PADDLE_ENFORCE(attrs_.find(name) != attrs_.end(),
"%s should be in AttributeMap", name);
return boost::get<T>(attrs_.at(name));
}
const AttributeMap& Attrs() const { return attrs_; }
......
......@@ -320,6 +320,7 @@ void ParallelExecutor::BCastParamsToDevices(
if (paddle::platform::is_gpu_place(main_tensor.place())) {
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
std::vector<void *> buffers;
buffers.reserve(member_->places_.size());
size_t numel = main_tensor.numel();
ncclDataType_t data_type = platform::ToNCCLDataType(main_tensor.type());
for (size_t i = 0; i < member_->places_.size(); ++i) {
......@@ -353,9 +354,7 @@ void ParallelExecutor::BCastParamsToDevices(
#endif
} else {
platform::CPUPlace cpu;
for (size_t i = 0; i < member_->places_.size(); ++i) {
if (i == 0) continue;
for (size_t i = 1; i < member_->places_.size(); ++i) {
auto local_scope = member_->local_scopes_[i];
auto *t = local_scope->Var(var)->GetMutable<LoDTensor>();
......
......@@ -47,15 +47,9 @@ DEFINE_bool(fast_eager_deletion_mode, false,
// the mutex will cause serious performance issue.
// So the mutex is disabled when `ON_INFER`.
#ifdef PADDLE_ON_INFERENCE
#define SCOPE_KIDS_READER_LOCK
#define SCOPE_KIDS_WRITER_LOCK
#define SCOPE_VARS_READER_LOCK
#define SCOPE_VARS_WRITER_LOCK
#define SCOPE_LOCK_GUARD
#else
#define SCOPE_KIDS_READER_LOCK AutoRDLock auto_lock(&kids_lock_);
#define SCOPE_KIDS_WRITER_LOCK AutoWRLock auto_lock(&kids_lock_);
#define SCOPE_VARS_READER_LOCK AutoRDLock auto_lock(&vars_lock_);
#define SCOPE_VARS_WRITER_LOCK AutoWRLock auto_lock(&vars_lock_);
#define SCOPE_LOCK_GUARD std::lock_guard<std::mutex> lock(mutex_);
#endif
namespace paddle {
......@@ -73,69 +67,64 @@ bool IsFastEagerDeletionModeEnabled() { return FLAGS_fast_eager_deletion_mode; }
Scope::~Scope() { DropKids(); }
Scope& Scope::NewScope() const {
Scope* child = new Scope(this);
{
SCOPE_KIDS_WRITER_LOCK
kids_.push_back(child);
}
return *child;
SCOPE_LOCK_GUARD
kids_.push_back(new Scope(this));
return *kids_.back();
}
Variable* Scope::Var(const std::string& name) {
SCOPE_VARS_WRITER_LOCK
SCOPE_LOCK_GUARD
return VarInternal(name);
}
Variable* Scope::Var(std::string* name) {
SCOPE_LOCK_GUARD
auto new_name = string::Sprintf("%p.%d", this, vars_.size());
if (name != nullptr) {
*name = new_name;
}
SCOPE_VARS_WRITER_LOCK
return VarInternal(new_name);
}
Variable* Scope::FindVar(const std::string& name) const {
SCOPE_VARS_READER_LOCK
SCOPE_LOCK_GUARD
return FindVarInternal(name);
}
Variable* Scope::FindLocalVar(const std::string& name) const {
SCOPE_VARS_READER_LOCK
SCOPE_LOCK_GUARD
return FindVarLocally(name);
}
const Scope* Scope::FindScope(const Variable* var) const {
SCOPE_VARS_READER_LOCK
SCOPE_LOCK_GUARD
return FindScopeInternal(var);
}
void Scope::DropKids() {
SCOPE_KIDS_WRITER_LOCK
SCOPE_LOCK_GUARD
for (Scope* s : kids_) delete s;
kids_.clear();
}
bool Scope::HasKid(const Scope* scope) const {
SCOPE_KIDS_READER_LOCK
SCOPE_LOCK_GUARD
auto it = std::find(this->kids_.begin(), this->kids_.end(), scope);
return it != this->kids_.end();
}
std::vector<std::string> Scope::LocalVarNames() const {
SCOPE_LOCK_GUARD
std::vector<std::string> known_vars;
{
SCOPE_VARS_READER_LOCK
known_vars.reserve(this->vars_.size());
for (auto& p : vars_) {
known_vars.emplace_back(p.first);
}
known_vars.reserve(this->vars_.size());
for (auto& p : vars_) {
known_vars.emplace_back(p.first);
}
return known_vars;
}
void Scope::DeleteScope(Scope* scope) const {
SCOPE_KIDS_WRITER_LOCK
SCOPE_LOCK_GUARD
auto it = std::find(this->kids_.begin(), this->kids_.end(), scope);
PADDLE_ENFORCE(it != this->kids_.end(), "%p Cannot find %p as kid scope",
this, scope);
......@@ -149,8 +138,8 @@ void Scope::DeleteScope(Scope* scope) const {
}
void Scope::EraseVars(const std::vector<std::string>& var_names) {
SCOPE_LOCK_GUARD
std::set<std::string> var_set(var_names.begin(), var_names.end());
SCOPE_VARS_WRITER_LOCK
for (auto it = vars_.begin(); it != vars_.end();) {
if (var_set.find(it->first) != var_set.end()) {
it = vars_.erase(it);
......@@ -162,12 +151,12 @@ void Scope::EraseVars(const std::vector<std::string>& var_names) {
void Scope::Rename(const std::string& origin_name,
const std::string& new_name) const {
SCOPE_VARS_WRITER_LOCK
SCOPE_LOCK_GUARD
RenameInternal(origin_name, new_name);
}
std::string Scope::Rename(const std::string& origin_name) const {
SCOPE_VARS_WRITER_LOCK
SCOPE_LOCK_GUARD
auto new_name = string::Sprintf("%p.%d", this, vars_.size());
RenameInternal(origin_name, new_name);
return new_name;
......@@ -176,11 +165,9 @@ std::string Scope::Rename(const std::string& origin_name) const {
Variable* Scope::VarInternal(const std::string& name) {
auto* v = FindVarLocally(name);
if (v != nullptr) return v;
v = new Variable();
vars_[name].reset(v);
vars_.emplace(name, std::unique_ptr<Variable>(v));
VLOG(3) << "Create variable " << name;
v->name_ = &(vars_.find(name)->first);
return v;
}
......
......@@ -14,19 +14,12 @@ limitations under the License. */
#pragma once
extern "C" {
#include <xxhash.h>
}
#include <functional>
#include <list>
#include <memory>
#include <mutex> // NOLINT
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/rw_lock.h"
#include "paddle/fluid/framework/variable.h"
#include "paddle/fluid/platform/macros.h"
......@@ -138,8 +131,7 @@ class Scope {
DISABLE_COPY_AND_ASSIGN(Scope);
private:
mutable RWLock kids_lock_;
mutable RWLock vars_lock_;
mutable std::mutex mutex_;
};
// Generate some debug string about the inherience structure of scope, quite
......
......@@ -19,52 +19,50 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/framework/var_type_traits.h"
#include "paddle/fluid/framework/variable.h"
namespace paddle {
namespace framework {
template <typename T>
inline bool IsType(const std::type_index& type_index) {
return type_index == std::type_index(typeid(T));
inline bool IsType(const std::type_index& type) {
return type == typeid(T);
}
inline proto::VarType::Type ToVarType(std::type_index type) {
if (IsType<LoDTensor>(type)) {
return proto::VarType_Type_LOD_TENSOR;
} else if (IsType<LoDRankTable>(type)) {
return proto::VarType_Type_LOD_RANK_TABLE;
} else if (IsType<LoDTensorArray>(type)) {
return proto::VarType_Type_LOD_TENSOR_ARRAY;
} else if (IsType<SelectedRows>(type)) {
return proto::VarType_Type_SELECTED_ROWS;
} else if (IsType<ReaderHolder>(type)) {
return proto::VarType_Type_READER;
} else {
PADDLE_THROW("ToVarType:Unsupported type %s", type.name());
inline proto::VarType::Type ToVarType(int type) {
switch (type) {
case proto::VarType::LOD_TENSOR:
case proto::VarType::SELECTED_ROWS:
case proto::VarType::LOD_RANK_TABLE:
case proto::VarType::LOD_TENSOR_ARRAY:
case proto::VarType::READER:
return static_cast<proto::VarType::Type>(type);
default:
PADDLE_THROW("ToVarType:Unsupported type %d", type);
}
}
template <typename Visitor>
inline void VisitVarType(const framework::Variable& var, Visitor visitor) {
switch (ToVarType(var.Type())) {
case proto::VarType_Type_LOD_TENSOR:
switch (var.Type()) {
case proto::VarType::LOD_TENSOR:
visitor(var.Get<LoDTensor>());
return;
case proto::VarType_Type_LOD_RANK_TABLE:
case proto::VarType::LOD_RANK_TABLE:
visitor(var.Get<LoDRankTable>());
return;
case proto::VarType_Type_LOD_TENSOR_ARRAY:
case proto::VarType::LOD_TENSOR_ARRAY:
visitor(var.Get<LoDTensorArray>());
return;
case proto::VarType_Type_SELECTED_ROWS:
case proto::VarType::SELECTED_ROWS:
visitor(var.Get<SelectedRows>());
return;
case proto::VarType_Type_READER:
case proto::VarType::READER:
visitor(var.Get<ReaderHolder>());
return;
default:
PADDLE_THROW("Not supported visit type, %d", ToVarType(var.Type()));
PADDLE_THROW("Not supported visit type, %s", ToTypeName(var.Type()));
}
}
......
......@@ -108,7 +108,7 @@ TEST(InferVarType, sum_op_without_infer_var_type) {
op->InferVarType(prog.MutableBlock(0));
ASSERT_EQ(proto::VarType_Type_LOD_TENSOR,
ASSERT_EQ(proto::VarType::LOD_TENSOR,
prog.MutableBlock(0)->Var("test2_out")->GetType());
}
......
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/var_type_traits.h"
#include "paddle/fluid/framework/lod_rank_table.h"
#include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/operators/reader/lod_tensor_blocking_queue.h"
#include "paddle/fluid/platform/macros.h"
#ifdef PADDLE_WITH_CUDA
#ifndef _WIN32
#include "paddle/fluid/operators/nccl/nccl_gpu_common.h"
#endif
#include <cudnn.h>
#include "paddle/fluid/operators/conv_cudnn_op_cache.h"
#include "paddle/fluid/operators/cudnn_rnn_cache.h"
#endif
namespace paddle {
namespace framework {
// Besides registering variable type id, it is helpful to register a
// var_id -> std::type_index map (for example, get type names according to id)
namespace detail {
template <int kStart, int kEnd, bool kStop>
struct VarIdToTypeIndexMapInitializerImpl {
template <typename MapType1, typename MapType2>
static void Init(MapType1 *id_to_type, MapType2 *type_to_id) {
using Type =
typename std::tuple_element<kStart, VarTypeRegistry::ArgTuple>::type;
static_assert(!std::is_same<Type, void>::value, "Type cannot be void");
constexpr int kId = VarTypeTrait<Type>::kId;
auto type = std::type_index(typeid(Type));
PADDLE_ENFORCE(id_to_type->count(kId) == 0,
"Registered duplicate type id %d for type %s", kId,
type.name());
PADDLE_ENFORCE(type_to_id->count(type) == 0,
"Registered duplicate type_index %s for id %d", type.name(),
kId);
id_to_type->emplace(kId, type);
type_to_id->emplace(type, kId);
VarIdToTypeIndexMapInitializerImpl<kStart + 1, kEnd,
kStart + 1 == kEnd>::Init(id_to_type,
type_to_id);
}
};
template <int kStart, int kEnd>
struct VarIdToTypeIndexMapInitializerImpl<kStart, kEnd, true> {
template <typename MapType1, typename MapType2>
static void Init(MapType1 *, MapType2 *) {}
};
// VarIdToTypeIndexMapInitializer is designed to initialize var_id ->
// std::type_index map and std::type_index -> var_id map
using VarIdToTypeIndexMapInitializer =
VarIdToTypeIndexMapInitializerImpl<0, VarTypeRegistry::kRegisteredTypeNum,
VarTypeRegistry::kRegisteredTypeNum ==
0>;
struct VarIdToTypeIndexMapHolder {
DISABLE_COPY_AND_ASSIGN(VarIdToTypeIndexMapHolder);
public:
static const std::type_index &ToTypeIndex(int var_id) {
auto it = Instance().id_to_type_map_.find(var_id);
PADDLE_ENFORCE(it != Instance().id_to_type_map_.end(),
"VarId %d is not registered.", var_id);
return it->second;
}
static int ToTypeId(const std::type_index &type) {
auto it = Instance().type_to_id_map_.find(type);
PADDLE_ENFORCE(it != Instance().type_to_id_map_.end(),
"VarType %s is not registered.", type.name());
return it->second;
}
private:
VarIdToTypeIndexMapHolder() {
VarIdToTypeIndexMapInitializer::Init(&id_to_type_map_, &type_to_id_map_);
}
static const VarIdToTypeIndexMapHolder &Instance() {
static const VarIdToTypeIndexMapHolder instance;
return instance;
}
std::unordered_map<int, std::type_index> id_to_type_map_;
std::unordered_map<std::type_index, int> type_to_id_map_;
};
} // namespace detail
const std::type_index &ToTypeIndex(int var_id) {
return detail::VarIdToTypeIndexMapHolder::ToTypeIndex(var_id);
}
const char *ToTypeName(int var_id) { return ToTypeIndex(var_id).name(); }
int ToTypeId(const std::type_index &type) {
return detail::VarIdToTypeIndexMapHolder::ToTypeId(type);
}
} // namespace framework
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <string>
#include <tuple>
#include <typeindex>
#include <vector>
#include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/platform/place.h"
#ifdef PADDLE_WITH_CUDA
#include <cudnn.h>
#ifndef _WIN32
#include <nccl.h>
#endif
#endif
// Users should add forward declarations here
namespace paddle {
namespace platform {
#ifdef PADDLE_WITH_CUDA
#ifndef _WIN32
class Communicator;
#endif
#endif
} // namespace platform
namespace framework {
class Tensor;
class LoDTensor;
class SelectedRows;
class LoDRankTable;
class ReaderHolder;
class Scope;
} // namespace framework
namespace operators {
template <typename T>
class AlgorithmsCache;
class CudnnRNNCache;
namespace reader {
class LoDTensorBlockingQueueHolder;
} // namespace reader
} // namespace operators
} // namespace paddle
namespace paddle {
namespace framework {
const char *ToTypeName(int var_id);
const std::type_index &ToTypeIndex(int var_id);
int ToTypeId(const std::type_index &type);
namespace detail {
template <bool kStop, int kStart, int kEnd, typename T1, typename T2,
typename... Args>
struct TypePosFinderImpl {
static constexpr int kPos =
std::is_same<T1, T2>::value
? kStart
: TypePosFinderImpl<kStart + 2 == kEnd, kStart + 1, kEnd, T1,
Args...>::kPos;
};
template <int kStart, int kEnd, typename T1, typename T2>
struct TypePosFinderImpl<true, kStart, kEnd, T1, T2> {
static constexpr int kPos = std::is_same<T1, T2>::value ? kStart : -1;
};
// TypePosFinder helps to find the position in which T is inside Args...
// If T is not inside Args..., kPos would be -1
template <typename T, typename... Args>
struct TypePosFinder {
static constexpr int kPos =
TypePosFinderImpl<sizeof...(Args) == 1, 0, sizeof...(Args), T,
Args...>::kPos;
};
template <typename... Args>
struct VarTypeRegistryImpl {
static constexpr size_t kRegisteredTypeNum = sizeof...(Args);
using ArgTuple = std::tuple<Args...>;
// TypePos() returns the position in which T is inside Args...
// If T is not inside Args..., return -1
template <typename T>
static constexpr int TypePos() {
return TypePosFinder<T, Args...>::kPos;
}
// IsRegistered() returns whether T is registered inside RegistryImpl
template <typename T>
static constexpr bool IsRegistered() {
return TypePos<T>() >= 0;
}
};
} // namespace detail
#define REG_PROTO_VAR_TYPE_TRAIT(type, proto_id) \
template <> \
struct VarTypeTrait<type> { \
static_assert(VarTypeRegistry::IsRegistered<type>(), \
"Must be registered type"); \
using Type = type; \
static constexpr int kId = static_cast<int>(proto_id); \
}
/**
* The following codes are designed to register variable types.
* Only registered types can be stored in Variable.
* This registry mechanism is designed to speed up Variable.
*
* Caution: If you want to add more var types, please consider carefully
* whether you really need to add it.
*/
// Users should add other variable types below.
// Paddle would generate unique Ids for each registered variable types.
using VarTypeRegistry = detail::VarTypeRegistryImpl<
Tensor, LoDTensor, SelectedRows, std::vector<Scope *>, LoDRankTable,
LoDTensorArray, platform::PlaceList, ReaderHolder, std::string, Scope *,
std::map<size_t, Tensor>, operators::reader::LoDTensorBlockingQueueHolder,
#ifdef PADDLE_WITH_CUDA
#ifndef _WIN32
ncclUniqueId, platform::Communicator,
#endif
operators::AlgorithmsCache<cudnnConvolutionFwdAlgo_t>,
operators::AlgorithmsCache<cudnnConvolutionBwdDataAlgo_t>,
operators::AlgorithmsCache<cudnnConvolutionBwdFilterAlgo_t>,
operators::CudnnRNNCache,
#endif
int, float>;
template <typename T>
struct VarTypeTrait {
static_assert(VarTypeRegistry::IsRegistered<T>(), "Must be registered type");
using Type = T;
/**
* Unique VarType Id generation.
*
* The auto-generated id should not be the same as any protobuf id defined in
* framework.proto. Therefore, we generate id by adding the type pos and
* maximum protobuf id (i.e., proto::VarType::TUPLE).
*
* However, we may need more protobuf id in the future.
* To avoid changing this auto id generation algorithm frequently, we
* generate id by adding the type pos and twice of maximum protobuf id (i.e.,
* proto::VarType::TUPLE).
*/
static constexpr int kId = VarTypeRegistry::TypePos<T>() +
static_cast<int>(proto::VarType::TUPLE) * 2;
};
// Users should set some of variable type ids to be what is defined in
// framework.proto below
REG_PROTO_VAR_TYPE_TRAIT(LoDTensor, proto::VarType::LOD_TENSOR);
REG_PROTO_VAR_TYPE_TRAIT(SelectedRows, proto::VarType::SELECTED_ROWS);
REG_PROTO_VAR_TYPE_TRAIT(std::vector<Scope *>, proto::VarType::STEP_SCOPES);
REG_PROTO_VAR_TYPE_TRAIT(LoDRankTable, proto::VarType::LOD_RANK_TABLE);
REG_PROTO_VAR_TYPE_TRAIT(LoDTensorArray, proto::VarType::LOD_TENSOR_ARRAY);
REG_PROTO_VAR_TYPE_TRAIT(platform::PlaceList, proto::VarType::PLACE_LIST);
REG_PROTO_VAR_TYPE_TRAIT(ReaderHolder, proto::VarType::READER);
REG_PROTO_VAR_TYPE_TRAIT(int, proto::VarType::INT32);
REG_PROTO_VAR_TYPE_TRAIT(float, proto::VarType::FP32);
/** End of variable type registration */
template <typename T>
inline constexpr bool IsRegisteredVarType() {
return VarTypeRegistry::IsRegistered<T>();
}
#undef REG_PROTO_VAR_TYPE_TRAIT
} // namespace framework
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gtest/gtest.h>
#include <cstdint>
#include <iostream>
#include <unordered_set>
#include "paddle/fluid/framework/lod_rank_table.h"
#include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/framework/var_type_traits.h"
#include "paddle/fluid/operators/reader/lod_tensor_blocking_queue.h"
#ifdef PADDLE_WITH_CUDA
#ifndef _WIN32
#include "paddle/fluid/operators/nccl/nccl_gpu_common.h"
#endif
#include "paddle/fluid/operators/conv_cudnn_op_cache.h"
#include "paddle/fluid/operators/cudnn_rnn_cache.h"
#endif
namespace paddle {
namespace framework {
template <int kPos, int kEnd, bool kStop>
struct TypeIndexChecker {
template <typename SetType1, typename SetType2>
static void Check(SetType1 *var_id_set, SetType2 *type_index_set) {
using Type =
typename std::tuple_element<kPos, VarTypeRegistry::ArgTuple>::type;
static_assert(std::is_same<typename VarTypeTrait<Type>::Type, Type>::value,
"Type must be the same");
constexpr auto kId = VarTypeTrait<Type>::kId;
std::type_index actual_type(typeid(Type));
EXPECT_EQ(std::string(ToTypeName(kId)), std::string(actual_type.name()));
EXPECT_EQ(ToTypeIndex(kId), actual_type);
EXPECT_EQ(ToTypeId(actual_type), kId);
EXPECT_EQ(ToTypeIndex(ToTypeId(actual_type)), actual_type);
EXPECT_EQ(ToTypeId(ToTypeIndex(kId)), kId);
EXPECT_TRUE(var_id_set->count(kId) == 0); // NOLINT
EXPECT_TRUE(type_index_set->count(actual_type) == 0); // NOLINT
var_id_set->insert(kId);
type_index_set->insert(std::type_index(typeid(Type)));
TypeIndexChecker<kPos + 1, kEnd, kPos + 1 == kEnd>::Check(var_id_set,
type_index_set);
}
};
template <int kPos, int kEnd>
struct TypeIndexChecker<kPos, kEnd, true> {
template <typename SetType1, typename SetType2>
static void Check(SetType1 *, SetType2 *) {}
};
TEST(var_type_traits, check_no_duplicate_registry) {
constexpr size_t kRegisteredNum = VarTypeRegistry::kRegisteredTypeNum;
std::unordered_set<int> var_id_set;
std::unordered_set<std::type_index> type_index_set;
TypeIndexChecker<0, kRegisteredNum, kRegisteredNum == 0>::Check(
&var_id_set, &type_index_set);
}
template <typename T>
bool CheckVarId(int proto_id) {
static_assert(std::is_same<typename VarTypeTrait<T>::Type, T>::value,
"Type must be the same");
return VarTypeTrait<T>::kId == proto_id;
}
TEST(var_type_traits, check_proto_type_id) {
ASSERT_TRUE(CheckVarId<LoDTensor>(proto::VarType::LOD_TENSOR));
ASSERT_TRUE(CheckVarId<SelectedRows>(proto::VarType::SELECTED_ROWS));
ASSERT_TRUE(CheckVarId<std::vector<Scope *>>(proto::VarType::STEP_SCOPES));
ASSERT_TRUE(CheckVarId<LoDRankTable>(proto::VarType::LOD_RANK_TABLE));
ASSERT_TRUE(CheckVarId<LoDTensorArray>(proto::VarType::LOD_TENSOR_ARRAY));
ASSERT_TRUE(CheckVarId<platform::PlaceList>(proto::VarType::PLACE_LIST));
ASSERT_TRUE(CheckVarId<ReaderHolder>(proto::VarType::READER));
ASSERT_TRUE(CheckVarId<int>(proto::VarType::INT32));
ASSERT_TRUE(CheckVarId<float>(proto::VarType::FP32));
ASSERT_EQ(proto::VarType_Type_LOD_TENSOR, proto::VarType::LOD_TENSOR);
ASSERT_EQ(proto::VarType_Type_SELECTED_ROWS, proto::VarType::SELECTED_ROWS);
ASSERT_EQ(proto::VarType_Type_STEP_SCOPES, proto::VarType::STEP_SCOPES);
ASSERT_EQ(proto::VarType_Type_LOD_RANK_TABLE, proto::VarType::LOD_RANK_TABLE);
ASSERT_EQ(proto::VarType_Type_LOD_TENSOR_ARRAY,
proto::VarType::LOD_TENSOR_ARRAY);
ASSERT_EQ(proto::VarType_Type_PLACE_LIST, proto::VarType::PLACE_LIST);
ASSERT_EQ(proto::VarType_Type_READER, proto::VarType::READER);
ASSERT_EQ(proto::VarType_Type_FEED_MINIBATCH, proto::VarType::FEED_MINIBATCH);
ASSERT_EQ(proto::VarType_Type_FETCH_LIST, proto::VarType::FETCH_LIST);
ASSERT_EQ(proto::VarType_Type_RAW, proto::VarType::RAW);
ASSERT_EQ(proto::VarType_Type_TUPLE, proto::VarType::TUPLE);
ASSERT_EQ(proto::VarType_Type_INT32, proto::VarType::INT32);
ASSERT_EQ(proto::VarType_Type_FP32, proto::VarType::FP32);
}
TEST(var_type_traits, test_registry) {
using Registry = detail::VarTypeRegistryImpl<int8_t, int32_t, size_t, double>;
ASSERT_TRUE(Registry::TypePos<int8_t>() == 0);
ASSERT_TRUE(Registry::TypePos<int32_t>() == 1);
ASSERT_TRUE(Registry::TypePos<size_t>() == 2);
ASSERT_TRUE(Registry::TypePos<double>() == 3);
ASSERT_TRUE(Registry::TypePos<float>() == -1);
}
} // namespace framework
} // namespace paddle
......@@ -18,7 +18,7 @@
#include <typeindex>
#include <typeinfo>
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/framework/var_type_traits.h"
namespace paddle {
namespace framework {
......@@ -27,10 +27,14 @@ class Variable {
public:
template <typename T>
const T& Get() const {
static_assert(
IsRegisteredVarType<T>(),
"Not registered type. Please register T inside var_type_traits.h");
PADDLE_ENFORCE(holder_ != nullptr, "Variable must hold some thing");
PADDLE_ENFORCE(IsType<T>(),
PADDLE_ENFORCE(holder_->Type() == VarTypeTrait<T>::kId,
"Variable must be type %s, the holding type is %s",
typeid(T).name(), holder_->Type().name());
ToTypeName(VarTypeTrait<T>::kId),
ToTypeName(holder_->Type()));
return *static_cast<const T*>(holder_->Ptr());
}
......@@ -39,61 +43,61 @@ class Variable {
template <typename T>
T* GetMutable() {
if (!holder_) {
holder_.reset(new PlaceholderImpl<T>(new T()));
holder_.reset(new PlaceholderImpl<T>());
} else {
PADDLE_ENFORCE(IsType<T>(),
PADDLE_ENFORCE(holder_->Type() == VarTypeTrait<T>::kId,
"Variable must be type %s, the holding type is %s",
typeid(T).name(), holder_->Type().name());
ToTypeName(VarTypeTrait<T>::kId),
ToTypeName(holder_->Type()));
}
return static_cast<T*>(holder_->Ptr());
}
template <typename T>
bool IsType() const {
return holder_ != nullptr &&
std::type_index(typeid(T)) == std::type_index(holder_->Type());
return holder_ && holder_->Type() == VarTypeTrait<T>::kId;
}
void Clear() { holder_.reset(); }
std::type_index Type() const {
int Type() const {
PADDLE_ENFORCE(holder_ != nullptr, "Must hold memory");
return holder_->Type();
}
private:
struct Placeholder {
virtual ~Placeholder() {}
virtual const std::type_info& Type() const = 0;
virtual void* Ptr() const = 0;
virtual ~Placeholder() = default;
inline int Type() const { return type_; }
inline const void* Ptr() const { return ptr_; }
inline void* Ptr() { return ptr_; }
protected:
inline void Init(void* p, int type) {
ptr_ = p;
type_ = type;
}
void* ptr_;
int type_;
};
// Placeholder hides type T, so it doesn't appear as a template
// parameter of Variable.
template <typename T>
struct PlaceholderImpl : public Placeholder {
explicit PlaceholderImpl(T* ptr) : ptr_(ptr), type_(typeid(T)) {}
virtual const std::type_info& Type() const { return type_; }
virtual void* Ptr() const { return static_cast<void*>(ptr_.get()); }
static_assert(
IsRegisteredVarType<T>(),
"Not registered type. Please register T inside var_type_traits.h");
PlaceholderImpl() { this->Init(&obj_, VarTypeTrait<T>::kId); }
std::unique_ptr<T> ptr_;
const std::type_info& type_;
private:
T obj_;
};
std::unique_ptr<Placeholder>
holder_; // pointers to a PlaceholderImpl object indeed.
// name_ is only meaningful with a Scope and accessible by it.
//
// NOTE: Please don't expose name_ by adding methods like
// Variable::Name or Scope::VarName! A variable could have a human
// readable name or an auto-generated scope-unique name. In the
// former case, the caller knows the name and doesn't need to access
// the name; in the latter case, the variable should be identified
// by its address but not the unreadable name.
friend class Scope;
const std::string* name_;
// pointers to a PlaceholderImpl object indeed.
std::unique_ptr<Placeholder> holder_;
};
} // namespace framework
......
......@@ -16,27 +16,28 @@
#include <string>
#include "gtest/gtest.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/variable.h"
TEST(Variable, GetMutable) {
using paddle::framework::Variable;
struct Tensor {
int content_;
};
namespace paddle {
namespace framework {
TEST(Variable, GetMutable) {
std::unique_ptr<Variable> v(new Variable());
Tensor* t = v->GetMutable<Tensor>();
t->content_ = 1234;
auto* t = v->GetMutable<std::string>();
*t = "1234";
const Tensor& tt = v->Get<Tensor>();
EXPECT_EQ(1234, tt.content_);
const auto& tt = v->Get<std::string>();
EXPECT_EQ("1234", tt);
try {
v->GetMutable<std::string>();
v->GetMutable<Tensor>();
} catch (std::exception& e) {
return;
}
EXPECT_TRUE(false);
}
} // namespace framework
} // namespace paddle
......@@ -69,17 +69,17 @@ void TestWord2vecPrediction(const std::string& model_path) {
std::vector<PaddleTensor> outputs;
CHECK(predictor->Run(slots, &outputs));
PADDLE_ENFORCE(outputs.size(), 1UL);
PADDLE_ENFORCE_EQ(outputs.size(), 1UL);
// Check the output buffer size and result of each tid.
PADDLE_ENFORCE(outputs.front().data.length(), 33168UL);
PADDLE_ENFORCE_EQ(outputs.front().data.length(), 33168UL);
float result[5] = {0.00129761, 0.00151112, 0.000423564, 0.00108815,
0.000932706};
const size_t num_elements = outputs.front().data.length() / sizeof(float);
// The outputs' buffers are in CPU memory.
for (size_t i = 0; i < std::min(static_cast<size_t>(5UL), num_elements);
i++) {
LOG(INFO) << "data: "
<< static_cast<float*>(outputs.front().data.data())[i];
LOG(INFO) << "data: " << static_cast<float*>(outputs.front().data.data())[i]
<< " result: " << result[i];
PADDLE_ENFORCE(static_cast<float*>(outputs.front().data.data())[i],
result[i]);
}
......
......@@ -25,7 +25,7 @@ void TensorArrayBatchCleaner::CollectTensorArrays(framework::Scope *scope) {
// TODO(Superjomn) should avoid the case when a TensorArray is a
// parameter.
if (var_name == "feed" || var_name == "fetch") continue;
if (var->Type() == typeid(framework::LoDTensorArray)) {
if (var->IsType<framework::LoDTensorArray>()) {
VLOG(4) << "collect " << var_name;
arrays_.push_back(var->GetMutable<framework::LoDTensorArray>());
}
......
......@@ -27,8 +27,11 @@ namespace details {
// training phase.
struct TensorArrayBatchCleaner {
TensorArrayBatchCleaner() {
valid_types_.insert(typeid(framework::Tensor));
valid_types_.insert(typeid(framework::LoDTensor));
constexpr auto kTensorId = framework::VarTypeTrait<framework::Tensor>::kId;
constexpr auto kLoDTensorId =
framework::VarTypeTrait<framework::LoDTensor>::kId;
valid_types_.insert(kTensorId);
valid_types_.insert(kLoDTensorId);
}
// Collect the variables that are not Tensor or LoDTensor, and reset them to a
// bool(trick), because some of them are containers, and some operators just
......@@ -46,7 +49,7 @@ struct TensorArrayBatchCleaner {
bool no_tensor_flag_{true};
std::vector<framework::LoDTensorArray *> arrays_;
std::unordered_set<std::type_index> valid_types_;
std::unordered_set<int> valid_types_;
std::unordered_set<framework::Variable *> no_tensor_vars_;
};
......
......@@ -113,6 +113,16 @@ static void TensorAssignData(PaddleTensor *tensor,
}
}
template <typename T>
static void TensorAssignData(PaddleTensor *tensor,
const std::vector<std::vector<T>> &data,
const std::vector<size_t> &lod) {
int size = lod[lod.size() - 1];
tensor->shape.assign({size, 1});
tensor->lod.assign({lod});
TensorAssignData(tensor, data);
}
template <typename T>
static int ZeroCopyTensorAssignData(ZeroCopyTensor *tensor,
const std::vector<std::vector<T>> &data) {
......
......@@ -98,10 +98,8 @@ void GetOneBatch(std::vector<PaddleTensor> *input_slots, DataRecord *data,
auto one_batch = data->NextBatch();
PaddleTensor input_tensor;
input_tensor.name = "word";
input_tensor.shape.assign({static_cast<int>(one_batch.data.size()), 1});
input_tensor.lod.assign({one_batch.lod});
input_tensor.dtype = PaddleDType::INT64;
TensorAssignData<int64_t>(&input_tensor, {one_batch.data});
TensorAssignData<int64_t>(&input_tensor, {one_batch.data}, one_batch.lod);
PADDLE_ENFORCE_EQ(batch_size, static_cast<int>(one_batch.lod.size() - 1));
input_slots->assign({input_tensor});
}
......
......@@ -19,11 +19,9 @@ namespace inference {
using contrib::AnalysisConfig;
struct DataRecord {
std::vector<std::vector<int64_t>> query_data_all, title_data_all;
std::vector<std::vector<int64_t>> query, title;
std::vector<size_t> lod1, lod2;
size_t batch_iter{0};
size_t batch_size{1};
size_t num_samples; // total number of samples
size_t batch_iter{0}, batch_size{1}, num_samples; // total number of samples
DataRecord() = default;
explicit DataRecord(const std::string &path, int batch_size = 1)
: batch_size(batch_size) {
......@@ -33,22 +31,9 @@ struct DataRecord {
DataRecord data;
size_t batch_end = batch_iter + batch_size;
// NOTE skip the final batch, if no enough data is provided.
if (batch_end <= query_data_all.size()) {
data.query_data_all.assign(query_data_all.begin() + batch_iter,
query_data_all.begin() + batch_end);
data.title_data_all.assign(title_data_all.begin() + batch_iter,
title_data_all.begin() + batch_end);
// Prepare LoDs
data.lod1.push_back(0);
data.lod2.push_back(0);
CHECK(!data.query_data_all.empty());
CHECK(!data.title_data_all.empty());
CHECK_EQ(data.query_data_all.size(), data.title_data_all.size());
for (size_t j = 0; j < data.query_data_all.size(); j++) {
// calculate lod
data.lod1.push_back(data.lod1.back() + data.query_data_all[j].size());
data.lod2.push_back(data.lod2.back() + data.title_data_all[j].size());
}
if (batch_end <= query.size()) {
GetInputPerBatch(query, &data.query, &data.lod1, batch_iter, batch_end);
GetInputPerBatch(title, &data.title, &data.lod2, batch_iter, batch_end);
}
batch_iter += batch_size;
return data;
......@@ -67,8 +52,8 @@ struct DataRecord {
// load title data
std::vector<int64_t> title_data;
split_to_int64(data[1], ' ', &title_data);
query_data_all.push_back(std::move(query_data));
title_data_all.push_back(std::move(title_data));
query.push_back(std::move(query_data));
title.push_back(std::move(title_data));
}
num_samples = num_lines;
}
......@@ -80,15 +65,9 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
lod_query_tensor.name = "left";
lod_title_tensor.name = "right";
auto one_batch = data->NextBatch();
int size1 = one_batch.lod1[one_batch.lod1.size() - 1]; // token batch size
int size2 = one_batch.lod2[one_batch.lod2.size() - 1]; // token batch size
lod_query_tensor.shape.assign({size1, 1});
lod_query_tensor.lod.assign({one_batch.lod1});
lod_title_tensor.shape.assign({size2, 1});
lod_title_tensor.lod.assign({one_batch.lod2});
// assign data
TensorAssignData<int64_t>(&lod_query_tensor, one_batch.query_data_all);
TensorAssignData<int64_t>(&lod_title_tensor, one_batch.title_data_all);
TensorAssignData<int64_t>(&lod_query_tensor, one_batch.query, one_batch.lod1);
TensorAssignData<int64_t>(&lod_title_tensor, one_batch.title, one_batch.lod2);
// Set inputs.
input_slots->assign({lod_query_tensor, lod_title_tensor});
for (auto &tensor : *input_slots) {
......
......@@ -19,11 +19,9 @@ namespace inference {
using contrib::AnalysisConfig;
struct DataRecord {
std::vector<std::vector<int64_t>> word_data_all, mention_data_all;
std::vector<std::vector<int64_t>> word, mention;
std::vector<size_t> lod; // two inputs have the same lod info.
size_t batch_iter{0};
size_t batch_size{1};
size_t num_samples; // total number of samples
size_t batch_iter{0}, batch_size{1}, num_samples; // total number of samples
DataRecord() = default;
explicit DataRecord(const std::string &path, int batch_size = 1)
: batch_size(batch_size) {
......@@ -33,20 +31,10 @@ struct DataRecord {
DataRecord data;
size_t batch_end = batch_iter + batch_size;
// NOTE skip the final batch, if no enough data is provided.
if (batch_end <= word_data_all.size()) {
data.word_data_all.assign(word_data_all.begin() + batch_iter,
word_data_all.begin() + batch_end);
data.mention_data_all.assign(mention_data_all.begin() + batch_iter,
mention_data_all.begin() + batch_end);
// Prepare LoDs
data.lod.push_back(0);
CHECK(!data.word_data_all.empty());
CHECK(!data.mention_data_all.empty());
CHECK_EQ(data.word_data_all.size(), data.mention_data_all.size());
for (size_t j = 0; j < data.word_data_all.size(); j++) {
// calculate lod
data.lod.push_back(data.lod.back() + data.word_data_all[j].size());
}
if (batch_end <= word.size()) {
GetInputPerBatch(word, &data.word, &data.lod, batch_iter, batch_end);
GetInputPerBatch(mention, &data.mention, &data.lod, batch_iter,
batch_end);
}
batch_iter += batch_size;
return data;
......@@ -65,8 +53,8 @@ struct DataRecord {
// load mention data
std::vector<int64_t> mention_data;
split_to_int64(data[3], ' ', &mention_data);
word_data_all.push_back(std::move(word_data));
mention_data_all.push_back(std::move(mention_data));
word.push_back(std::move(word_data));
mention.push_back(std::move(mention_data));
}
num_samples = num_lines;
}
......@@ -78,14 +66,10 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
lod_word_tensor.name = "word";
lod_mention_tensor.name = "mention";
auto one_batch = data->NextBatch();
int size = one_batch.lod[one_batch.lod.size() - 1]; // token batch size
lod_word_tensor.shape.assign({size, 1});
lod_word_tensor.lod.assign({one_batch.lod});
lod_mention_tensor.shape.assign({size, 1});
lod_mention_tensor.lod.assign({one_batch.lod});
// assign data
TensorAssignData<int64_t>(&lod_word_tensor, one_batch.word_data_all);
TensorAssignData<int64_t>(&lod_mention_tensor, one_batch.mention_data_all);
TensorAssignData<int64_t>(&lod_word_tensor, one_batch.word, one_batch.lod);
TensorAssignData<int64_t>(&lod_mention_tensor, one_batch.mention,
one_batch.lod);
// Set inputs.
input_slots->assign({lod_word_tensor, lod_mention_tensor});
for (auto &tensor : *input_slots) {
......
......@@ -18,12 +18,9 @@ namespace paddle {
namespace inference {
struct DataRecord {
std::vector<std::vector<int64_t>> title1_all, title2_all, title3_all, l1_all;
std::vector<std::vector<int64_t>> title1, title2, title3, l1;
std::vector<size_t> title1_lod, title2_lod, title3_lod, l1_lod;
size_t batch_iter{0};
size_t batch_size{1};
size_t num_samples; // total number of samples
std::vector<size_t> lod1, lod2, lod3, l1_lod;
size_t batch_iter{0}, batch_size{1}, num_samples; // total number of samples
DataRecord() = default;
explicit DataRecord(const std::string &path, int batch_size = 1)
: batch_size(batch_size) {
......@@ -33,41 +30,11 @@ struct DataRecord {
DataRecord data;
size_t batch_end = batch_iter + batch_size;
// NOTE skip the final batch, if no enough data is provided.
if (batch_end <= title1_all.size()) {
data.title1_all.assign(title1_all.begin() + batch_iter,
title1_all.begin() + batch_end);
data.title2_all.assign(title2_all.begin() + batch_iter,
title2_all.begin() + batch_end);
data.title3_all.assign(title3_all.begin() + batch_iter,
title3_all.begin() + batch_end);
data.l1_all.assign(l1_all.begin() + batch_iter,
l1_all.begin() + batch_end);
// Prepare LoDs
data.title1_lod.push_back(0);
data.title2_lod.push_back(0);
data.title3_lod.push_back(0);
data.l1_lod.push_back(0);
CHECK(!data.title1_all.empty());
CHECK(!data.title2_all.empty());
CHECK(!data.title3_all.empty());
CHECK(!data.l1_all.empty());
CHECK_EQ(data.title1_all.size(), data.title2_all.size());
CHECK_EQ(data.title1_all.size(), data.title3_all.size());
CHECK_EQ(data.title1_all.size(), data.l1_all.size());
for (size_t j = 0; j < data.title1_all.size(); j++) {
data.title1.push_back(data.title1_all[j]);
data.title2.push_back(data.title2_all[j]);
data.title3.push_back(data.title3_all[j]);
data.l1.push_back(data.l1_all[j]);
// calculate lod
data.title1_lod.push_back(data.title1_lod.back() +
data.title1_all[j].size());
data.title2_lod.push_back(data.title2_lod.back() +
data.title2_all[j].size());
data.title3_lod.push_back(data.title3_lod.back() +
data.title3_all[j].size());
data.l1_lod.push_back(data.l1_lod.back() + data.l1_all[j].size());
}
if (batch_end <= title1.size()) {
GetInputPerBatch(title1, &data.title1, &data.lod1, batch_iter, batch_end);
GetInputPerBatch(title2, &data.title2, &data.lod2, batch_iter, batch_end);
GetInputPerBatch(title3, &data.title3, &data.lod3, batch_iter, batch_end);
GetInputPerBatch(l1, &data.l1, &data.l1_lod, batch_iter, batch_end);
}
batch_iter += batch_size;
return data;
......@@ -92,10 +59,10 @@ struct DataRecord {
// load l1 data
std::vector<int64_t> l1_data;
split_to_int64(data[3], ' ', &l1_data);
title1_all.push_back(std::move(title1_data));
title2_all.push_back(std::move(title2_data));
title3_all.push_back(std::move(title3_data));
l1_all.push_back(std::move(l1_data));
title1.push_back(std::move(title1_data));
title2.push_back(std::move(title2_data));
title3.push_back(std::move(title3_data));
l1.push_back(std::move(l1_data));
}
num_samples = num_lines;
}
......@@ -109,24 +76,11 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
title3_tensor.name = "title3";
l1_tensor.name = "l1";
auto one_batch = data->NextBatch();
int title1_size = one_batch.title1_lod[one_batch.title1_lod.size() - 1];
title1_tensor.shape.assign({title1_size, 1});
title1_tensor.lod.assign({one_batch.title1_lod});
int title2_size = one_batch.title2_lod[one_batch.title2_lod.size() - 1];
title2_tensor.shape.assign({title2_size, 1});
title2_tensor.lod.assign({one_batch.title2_lod});
int title3_size = one_batch.title3_lod[one_batch.title3_lod.size() - 1];
title3_tensor.shape.assign({title3_size, 1});
title3_tensor.lod.assign({one_batch.title3_lod});
int l1_size = one_batch.l1_lod[one_batch.l1_lod.size() - 1];
l1_tensor.shape.assign({l1_size, 1});
l1_tensor.lod.assign({one_batch.l1_lod});
// assign data
TensorAssignData<int64_t>(&title1_tensor, one_batch.title1);
TensorAssignData<int64_t>(&title2_tensor, one_batch.title2);
TensorAssignData<int64_t>(&title3_tensor, one_batch.title3);
TensorAssignData<int64_t>(&l1_tensor, one_batch.l1);
TensorAssignData<int64_t>(&title1_tensor, one_batch.title1, one_batch.lod1);
TensorAssignData<int64_t>(&title2_tensor, one_batch.title2, one_batch.lod2);
TensorAssignData<int64_t>(&title3_tensor, one_batch.title3, one_batch.lod3);
TensorAssignData<int64_t>(&l1_tensor, one_batch.l1, one_batch.l1_lod);
// Set inputs.
input_slots->assign({title1_tensor, title2_tensor, title3_tensor, l1_tensor});
for (auto &tensor : *input_slots) {
......
......@@ -176,6 +176,18 @@ void SetFakeImageInput(std::vector<std::vector<PaddleTensor>> *inputs,
(*inputs).emplace_back(input_slots);
}
void GetInputPerBatch(const std::vector<std::vector<int64_t>> &in,
std::vector<std::vector<int64_t>> *out,
std::vector<size_t> *lod, size_t batch_iter,
size_t batch_end) {
lod->clear();
lod->push_back(0);
for (auto it = in.begin() + batch_iter; it < in.begin() + batch_end; it++) {
out->push_back(*it);
lod->push_back(lod->back() + (*it).size()); // calculate lod
}
}
void TestOneThreadPrediction(
const PaddlePredictor::Config *config,
const std::vector<std::vector<PaddleTensor>> &inputs,
......
......@@ -3,14 +3,16 @@ set(INFERENCE_DEMO_INSTALL_DIR "${THIRD_PARTY_PATH}/inference_demo" CACHE STRING
"A path setting inference demo download directories.")
function (inference_download install_dir url filename)
message(STATUS "Download inference test stuff from ${url}/${filename}")
execute_process(COMMAND bash -c "mkdir -p ${install_dir}")
execute_process(COMMAND bash -c "cd ${install_dir} && wget -q ${url}/${filename}")
file(DOWNLOAD "${url}/${filename}" "${install_dir}/${filename}")
message(STATUS "finish downloading ${filename}")
endfunction()
function (inference_download_and_uncompress install_dir url filename)
inference_download(${install_dir} ${url} ${filename})
execute_process(COMMAND bash -c "cd ${install_dir} && tar xzf ${filename}")
execute_process(
COMMAND ${CMAKE_COMMAND} -E tar xzf ${install_dir}/${filename}
WORKING_DIRECTORY ${install_dir}
)
endfunction()
set(WORD2VEC_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/word2vec")
......
......@@ -46,7 +46,7 @@ endif()
register_operators(EXCLUDES py_func_op warpctc_op conv_fusion_op DEPS ${OP_HEADER_DEPS} ${OP_PREFETCH_DEPS})
# warpctc_op needs cudnn 7 above
if (WITH_GPU AND NOT WIN32)
if (WITH_GPU)
if (${CUDNN_MAJOR_VERSION} VERSION_LESS 7)
op_library(warpctc_op DEPS dynload_warpctc sequence_padding sequence_scale SRCS warpctc_op.cc warpctc_op.cu.cc)
else()
......
......@@ -64,7 +64,7 @@ class ClipByNormKernel : public framework::OpKernel<T> {
output->mutable_data<T>(context.GetPlace());
} else {
PADDLE_THROW("Unexpected branch, input variable type is %s",
in_var->Type().name());
framework::ToTypeName(in_var->Type()));
}
PADDLE_ENFORCE_NOT_NULL(input);
......
......@@ -175,14 +175,13 @@ class WhileGradOp : public framework::OperatorBase {
auto &og_inside =
detail::Ref(cur_scope.Var(inside_og_name),
"Cannot find inside gradient %s", inside_og_name);
if (framework::IsType<framework::LoDTensor>(og_outside.Type())) {
if (og_outside.IsType<framework::LoDTensor>()) {
auto &outside_tensor = og_outside.Get<framework::LoDTensor>();
auto &inside_tensor =
detail::Ref(og_inside.GetMutable<framework::LoDTensor>());
inside_tensor.set_lod(outside_tensor.lod());
inside_tensor.ShareDataWith(outside_tensor);
} else if (framework::IsType<framework::LoDTensorArray>(
og_outside.Type())) {
} else if (og_outside.IsType<framework::LoDTensorArray>()) {
auto &outside_array = og_outside.Get<framework::LoDTensorArray>();
auto &inside_array =
detail::Ref(og_inside.GetMutable<framework::LoDTensorArray>());
......@@ -256,7 +255,7 @@ class WhileGradOp : public framework::OperatorBase {
var->IsType<LoDTensor>(),
"Currently the type of var only can be LoDTensorArray, "
"or LoDTensor, but the received var[%s] is %s.",
inside_grad_name, var->Type().name());
inside_grad_name, framework::ToTypeName(var->Type()));
if (var->IsType<LoDTensor>()) {
auto &inside_tensor = var->Get<framework::LoDTensor>();
......
......@@ -22,7 +22,7 @@ DECLARE_bool(cudnn_exhaustive_search);
namespace paddle {
namespace operators {
#if CUDNN_VERSION >= 7001
#if CUDNN_VERSION >= 7100
using Tensor = framework::Tensor;
using ScopedTensorDescriptor = platform::ScopedTensorDescriptor;
using ScopedFilterDescriptor = platform::ScopedFilterDescriptor;
......@@ -204,7 +204,7 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel<T> {
} // namespace operators
} // namespace paddle
#if CUDNN_VERSION >= 7001
#if CUDNN_VERSION >= 7100
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(conv2d_fusion, ops::CUDNNConvFusionOpKernel<float>,
ops::CUDNNConvFusionOpKernel<double>);
......
......@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/cudnn_rnn_cache.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/platform/cudnn_helper.h"
namespace paddle {
namespace operators {
......@@ -22,239 +22,6 @@ namespace operators {
using LoDTensor = framework::LoDTensor;
using Tensor = framework::Tensor;
struct CudnnRNNCache {
CudnnRNNCache() {
x_desc_ = NULL;
y_desc_ = NULL;
dx_desc_ = NULL;
dy_desc_ = NULL;
}
~CudnnRNNCache() { release(); }
cudnnRNNDescriptor_t rnn_desc_;
cudnnTensorDescriptor_t *x_desc_;
cudnnTensorDescriptor_t *y_desc_;
cudnnTensorDescriptor_t *dx_desc_;
cudnnTensorDescriptor_t *dy_desc_;
cudnnTensorDescriptor_t hx_desc_;
cudnnTensorDescriptor_t cx_desc_;
cudnnTensorDescriptor_t hy_desc_;
cudnnTensorDescriptor_t cy_desc_;
cudnnTensorDescriptor_t dhx_desc_;
cudnnTensorDescriptor_t dcx_desc_;
cudnnTensorDescriptor_t dhy_desc_;
cudnnTensorDescriptor_t dcy_desc_;
cudnnTensorDescriptor_t output_x_desc_;
cudnnTensorDescriptor_t output_y_desc_;
cudnnDropoutDescriptor_t dropout_desc_;
size_t weights_size_;
cudnnFilterDescriptor_t w_desc_;
cudnnFilterDescriptor_t dw_desc_;
size_t workspace_size_;
size_t reserve_size_;
Tensor reserve_data_;
Tensor workspace_data_;
Tensor dropout_state_;
size_t max_length_;
float dropout_prob_;
bool is_bidirec_;
int batch_size_;
int input_size_;
int hidden_size_;
int num_layers_;
int seed_;
void init(cudnnHandle_t handle, const framework::ExecutionContext &ctx,
size_t max_len, int batch_size, int input_size, int hidden_size,
int num_layers, float dropout_prob, bool is_bidirec, int seed,
int weight_numel) {
max_length_ = max_len;
batch_size_ = batch_size;
input_size_ = input_size;
hidden_size_ = hidden_size;
num_layers_ = num_layers;
dropout_prob_ = dropout_prob;
is_bidirec_ = is_bidirec;
seed_ = seed;
x_desc_ = new cudnnTensorDescriptor_t[max_length_];
y_desc_ = new cudnnTensorDescriptor_t[max_length_];
dx_desc_ = new cudnnTensorDescriptor_t[max_length_];
dy_desc_ = new cudnnTensorDescriptor_t[max_length_];
int dim_a[3];
int stride_a[3];
for (size_t i = 0; i < max_length_; ++i) {
CUDNN_ENFORCE(
platform::dynload::cudnnCreateTensorDescriptor(&x_desc_[i]));
CUDNN_ENFORCE(
platform::dynload::cudnnCreateTensorDescriptor(&y_desc_[i]));
CUDNN_ENFORCE(
platform::dynload::cudnnCreateTensorDescriptor(&dx_desc_[i]));
CUDNN_ENFORCE(
platform::dynload::cudnnCreateTensorDescriptor(&dy_desc_[i]));
dim_a[0] = batch_size_;
dim_a[1] = input_size_;
dim_a[2] = 1;
stride_a[0] = dim_a[2] * dim_a[1];
stride_a[1] = dim_a[2];
stride_a[2] = 1;
CUDNN_ENFORCE(platform::dynload::cudnnSetTensorNdDescriptor(
x_desc_[i], CUDNN_DATA_FLOAT, 3, dim_a, stride_a));
CUDNN_ENFORCE(platform::dynload::cudnnSetTensorNdDescriptor(
dx_desc_[i], CUDNN_DATA_FLOAT, 3, dim_a, stride_a));
dim_a[0] = batch_size_;
dim_a[1] = is_bidirec_ ? hidden_size_ * 2 : hidden_size_;
dim_a[2] = 1;
stride_a[0] = dim_a[2] * dim_a[1];
stride_a[1] = dim_a[2];
stride_a[2] = 1;
CUDNN_ENFORCE(platform::dynload::cudnnSetTensorNdDescriptor(
y_desc_[i], CUDNN_DATA_FLOAT, 3, dim_a, stride_a));
CUDNN_ENFORCE(platform::dynload::cudnnSetTensorNdDescriptor(
dy_desc_[i], CUDNN_DATA_FLOAT, 3, dim_a, stride_a));
}
dim_a[0] = num_layers_ * (is_bidirec_ ? 2 : 1);
dim_a[1] = batch_size_;
dim_a[2] = hidden_size_;
stride_a[0] = dim_a[2] * dim_a[1];
stride_a[1] = dim_a[2];
stride_a[2] = 1;
CUDNN_ENFORCE(platform::dynload::cudnnCreateTensorDescriptor(&hx_desc_));
CUDNN_ENFORCE(platform::dynload::cudnnCreateTensorDescriptor(&cx_desc_));
CUDNN_ENFORCE(platform::dynload::cudnnCreateTensorDescriptor(&hy_desc_));
CUDNN_ENFORCE(platform::dynload::cudnnCreateTensorDescriptor(&cy_desc_));
CUDNN_ENFORCE(platform::dynload::cudnnCreateTensorDescriptor(&dhx_desc_));
CUDNN_ENFORCE(platform::dynload::cudnnCreateTensorDescriptor(&dcx_desc_));
CUDNN_ENFORCE(platform::dynload::cudnnCreateTensorDescriptor(&dhy_desc_));
CUDNN_ENFORCE(platform::dynload::cudnnCreateTensorDescriptor(&dcy_desc_));
CUDNN_ENFORCE(platform::dynload::cudnnSetTensorNdDescriptor(
hx_desc_, CUDNN_DATA_FLOAT, 3, dim_a, stride_a));
CUDNN_ENFORCE(platform::dynload::cudnnSetTensorNdDescriptor(
cx_desc_, CUDNN_DATA_FLOAT, 3, dim_a, stride_a));
CUDNN_ENFORCE(platform::dynload::cudnnSetTensorNdDescriptor(
hy_desc_, CUDNN_DATA_FLOAT, 3, dim_a, stride_a));
CUDNN_ENFORCE(platform::dynload::cudnnSetTensorNdDescriptor(
cy_desc_, CUDNN_DATA_FLOAT, 3, dim_a, stride_a));
CUDNN_ENFORCE(platform::dynload::cudnnSetTensorNdDescriptor(
dhx_desc_, CUDNN_DATA_FLOAT, 3, dim_a, stride_a));
CUDNN_ENFORCE(platform::dynload::cudnnSetTensorNdDescriptor(
dcx_desc_, CUDNN_DATA_FLOAT, 3, dim_a, stride_a));
CUDNN_ENFORCE(platform::dynload::cudnnSetTensorNdDescriptor(
dhy_desc_, CUDNN_DATA_FLOAT, 3, dim_a, stride_a));
CUDNN_ENFORCE(platform::dynload::cudnnSetTensorNdDescriptor(
dcy_desc_, CUDNN_DATA_FLOAT, 3, dim_a, stride_a));
CUDNN_ENFORCE(
platform::dynload::cudnnCreateDropoutDescriptor(&dropout_desc_));
size_t state_size;
CUDNN_ENFORCE(
platform::dynload::cudnnDropoutGetStatesSize(handle, &state_size);
dropout_state_.Resize({static_cast<int64_t>(state_size)}));
auto *dropout_state_data =
dropout_state_.mutable_data<uint8_t>(ctx.GetPlace());
CUDNN_ENFORCE(platform::dynload::cudnnSetDropoutDescriptor(
dropout_desc_, handle, dropout_prob_, dropout_state_data, state_size,
seed_));
CUDNN_ENFORCE(platform::dynload::cudnnCreateRNNDescriptor(&rnn_desc_));
#if CUDNN_VERSION >= 6000
CUDNN_ENFORCE(platform::dynload::cudnnSetRNNDescriptor_v6(
handle, rnn_desc_, hidden_size_, num_layers_, dropout_desc_,
CUDNN_LINEAR_INPUT,
is_bidirec_ ? CUDNN_BIDIRECTIONAL : CUDNN_UNIDIRECTIONAL, CUDNN_LSTM,
CUDNN_RNN_ALGO_STANDARD, CUDNN_DATA_FLOAT));
#else
CUDNN_ENFORCE(platform::dynload::cudnnSetRNNDescriptor(
rnn_desc_, hidden_size_, num_layers_, dropout_desc_, CUDNN_LINEAR_INPUT,
is_bidirec_ ? CUDNN_BIDIRECTIONAL : CUDNN_UNIDIRECTIONAL, CUDNN_LSTM,
CUDNN_DATA_FLOAT));
#endif
CUDNN_ENFORCE(platform::dynload::cudnnCreateFilterDescriptor(&w_desc_));
CUDNN_ENFORCE(platform::dynload::cudnnCreateFilterDescriptor(&dw_desc_));
CUDNN_ENFORCE(platform::dynload::cudnnGetRNNParamsSize(
handle, rnn_desc_, x_desc_[0], &weights_size_, CUDNN_DATA_FLOAT));
PADDLE_ENFORCE_EQ(weights_size_, sizeof(float) * weight_numel,
"cudnn lstm weight size should be SAME");
int dim_w[3];
dim_w[0] = weights_size_ / sizeof(float);
dim_w[1] = 1;
dim_w[2] = 1;
CUDNN_ENFORCE(platform::dynload::cudnnSetFilterNdDescriptor(
w_desc_, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 3, dim_w));
CUDNN_ENFORCE(platform::dynload::cudnnSetFilterNdDescriptor(
dw_desc_, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 3, dim_w));
CUDNN_ENFORCE(platform::dynload::cudnnGetRNNWorkspaceSize(
handle, rnn_desc_, max_length_, x_desc_, &workspace_size_));
CUDNN_ENFORCE(platform::dynload::cudnnGetRNNTrainingReserveSize(
handle, rnn_desc_, max_length_, x_desc_, &reserve_size_));
reserve_data_.Resize({static_cast<int64_t>(reserve_size_)});
reserve_data_.mutable_data<uint8_t>(ctx.GetPlace());
workspace_data_.Resize({static_cast<int64_t>(workspace_size_)});
workspace_data_.mutable_data<uint8_t>(ctx.GetPlace());
}
void release() {
for (size_t i = 0; i < max_length_; ++i) {
CUDNN_ENFORCE(
platform::dynload::cudnnDestroyTensorDescriptor(x_desc_[i]));
CUDNN_ENFORCE(
platform::dynload::cudnnDestroyTensorDescriptor(y_desc_[i]));
CUDNN_ENFORCE(
platform::dynload::cudnnDestroyTensorDescriptor(dx_desc_[i]));
CUDNN_ENFORCE(
platform::dynload::cudnnDestroyTensorDescriptor(dy_desc_[i]));
}
delete[] x_desc_;
delete[] y_desc_;
delete[] dx_desc_;
delete[] dy_desc_;
CUDNN_ENFORCE(platform::dynload::cudnnDestroyTensorDescriptor(hx_desc_));
CUDNN_ENFORCE(platform::dynload::cudnnDestroyTensorDescriptor(cx_desc_));
CUDNN_ENFORCE(platform::dynload::cudnnDestroyTensorDescriptor(hy_desc_));
CUDNN_ENFORCE(platform::dynload::cudnnDestroyTensorDescriptor(cy_desc_));
CUDNN_ENFORCE(platform::dynload::cudnnDestroyTensorDescriptor(dhx_desc_));
CUDNN_ENFORCE(platform::dynload::cudnnDestroyTensorDescriptor(dcx_desc_));
CUDNN_ENFORCE(platform::dynload::cudnnDestroyTensorDescriptor(dhy_desc_));
CUDNN_ENFORCE(platform::dynload::cudnnDestroyTensorDescriptor(dcy_desc_));
CUDNN_ENFORCE(
platform::dynload::cudnnDestroyDropoutDescriptor(dropout_desc_));
CUDNN_ENFORCE(platform::dynload::cudnnDestroyRNNDescriptor(rnn_desc_));
CUDNN_ENFORCE(platform::dynload::cudnnDestroyFilterDescriptor(w_desc_));
CUDNN_ENFORCE(platform::dynload::cudnnDestroyFilterDescriptor(dw_desc_));
}
};
template <typename T>
class CudnnLSTMGPUKernel : public framework::OpKernel<T> {
public:
......@@ -315,9 +82,9 @@ class CudnnLSTMGPUKernel : public framework::OpKernel<T> {
auto input_w_numel = w->numel();
auto batch_size = x->dims()[1];
cudnn_rnn_cache->init(handle, ctx, max_len, batch_size, input_size,
hidden_size, num_layers, dropout_prob, is_bidirec,
seed, input_w_numel);
cudnn_rnn_cache->init(handle, ctx.GetPlace(), max_len, batch_size,
input_size, hidden_size, num_layers, dropout_prob,
is_bidirec, seed, input_w_numel);
}
auto run_seq_len = x->dims()[0];
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/platform/cudnn_helper.h"
namespace paddle {
namespace operators {
struct CudnnRNNCache {
CudnnRNNCache() {
x_desc_ = NULL;
y_desc_ = NULL;
dx_desc_ = NULL;
dy_desc_ = NULL;
}
~CudnnRNNCache() { release(); }
cudnnRNNDescriptor_t rnn_desc_;
cudnnTensorDescriptor_t *x_desc_;
cudnnTensorDescriptor_t *y_desc_;
cudnnTensorDescriptor_t *dx_desc_;
cudnnTensorDescriptor_t *dy_desc_;
cudnnTensorDescriptor_t hx_desc_;
cudnnTensorDescriptor_t cx_desc_;
cudnnTensorDescriptor_t hy_desc_;
cudnnTensorDescriptor_t cy_desc_;
cudnnTensorDescriptor_t dhx_desc_;
cudnnTensorDescriptor_t dcx_desc_;
cudnnTensorDescriptor_t dhy_desc_;
cudnnTensorDescriptor_t dcy_desc_;
cudnnTensorDescriptor_t output_x_desc_;
cudnnTensorDescriptor_t output_y_desc_;
cudnnDropoutDescriptor_t dropout_desc_;
size_t weights_size_;
cudnnFilterDescriptor_t w_desc_;
cudnnFilterDescriptor_t dw_desc_;
size_t workspace_size_;
size_t reserve_size_;
framework::Tensor reserve_data_;
framework::Tensor workspace_data_;
framework::Tensor dropout_state_;
size_t max_length_;
float dropout_prob_;
bool is_bidirec_;
int batch_size_;
int input_size_;
int hidden_size_;
int num_layers_;
int seed_;
void init(cudnnHandle_t handle, const platform::Place &place, size_t max_len,
int batch_size, int input_size, int hidden_size, int num_layers,
float dropout_prob, bool is_bidirec, int seed, int weight_numel) {
max_length_ = max_len;
batch_size_ = batch_size;
input_size_ = input_size;
hidden_size_ = hidden_size;
num_layers_ = num_layers;
dropout_prob_ = dropout_prob;
is_bidirec_ = is_bidirec;
seed_ = seed;
x_desc_ = new cudnnTensorDescriptor_t[max_length_];
y_desc_ = new cudnnTensorDescriptor_t[max_length_];
dx_desc_ = new cudnnTensorDescriptor_t[max_length_];
dy_desc_ = new cudnnTensorDescriptor_t[max_length_];
int dim_a[3];
int stride_a[3];
for (size_t i = 0; i < max_length_; ++i) {
CUDNN_ENFORCE(
platform::dynload::cudnnCreateTensorDescriptor(&x_desc_[i]));
CUDNN_ENFORCE(
platform::dynload::cudnnCreateTensorDescriptor(&y_desc_[i]));
CUDNN_ENFORCE(
platform::dynload::cudnnCreateTensorDescriptor(&dx_desc_[i]));
CUDNN_ENFORCE(
platform::dynload::cudnnCreateTensorDescriptor(&dy_desc_[i]));
dim_a[0] = batch_size_;
dim_a[1] = input_size_;
dim_a[2] = 1;
stride_a[0] = dim_a[2] * dim_a[1];
stride_a[1] = dim_a[2];
stride_a[2] = 1;
CUDNN_ENFORCE(platform::dynload::cudnnSetTensorNdDescriptor(
x_desc_[i], CUDNN_DATA_FLOAT, 3, dim_a, stride_a));
CUDNN_ENFORCE(platform::dynload::cudnnSetTensorNdDescriptor(
dx_desc_[i], CUDNN_DATA_FLOAT, 3, dim_a, stride_a));
dim_a[0] = batch_size_;
dim_a[1] = is_bidirec_ ? hidden_size_ * 2 : hidden_size_;
dim_a[2] = 1;
stride_a[0] = dim_a[2] * dim_a[1];
stride_a[1] = dim_a[2];
stride_a[2] = 1;
CUDNN_ENFORCE(platform::dynload::cudnnSetTensorNdDescriptor(
y_desc_[i], CUDNN_DATA_FLOAT, 3, dim_a, stride_a));
CUDNN_ENFORCE(platform::dynload::cudnnSetTensorNdDescriptor(
dy_desc_[i], CUDNN_DATA_FLOAT, 3, dim_a, stride_a));
}
dim_a[0] = num_layers_ * (is_bidirec_ ? 2 : 1);
dim_a[1] = batch_size_;
dim_a[2] = hidden_size_;
stride_a[0] = dim_a[2] * dim_a[1];
stride_a[1] = dim_a[2];
stride_a[2] = 1;
CUDNN_ENFORCE(platform::dynload::cudnnCreateTensorDescriptor(&hx_desc_));
CUDNN_ENFORCE(platform::dynload::cudnnCreateTensorDescriptor(&cx_desc_));
CUDNN_ENFORCE(platform::dynload::cudnnCreateTensorDescriptor(&hy_desc_));
CUDNN_ENFORCE(platform::dynload::cudnnCreateTensorDescriptor(&cy_desc_));
CUDNN_ENFORCE(platform::dynload::cudnnCreateTensorDescriptor(&dhx_desc_));
CUDNN_ENFORCE(platform::dynload::cudnnCreateTensorDescriptor(&dcx_desc_));
CUDNN_ENFORCE(platform::dynload::cudnnCreateTensorDescriptor(&dhy_desc_));
CUDNN_ENFORCE(platform::dynload::cudnnCreateTensorDescriptor(&dcy_desc_));
CUDNN_ENFORCE(platform::dynload::cudnnSetTensorNdDescriptor(
hx_desc_, CUDNN_DATA_FLOAT, 3, dim_a, stride_a));
CUDNN_ENFORCE(platform::dynload::cudnnSetTensorNdDescriptor(
cx_desc_, CUDNN_DATA_FLOAT, 3, dim_a, stride_a));
CUDNN_ENFORCE(platform::dynload::cudnnSetTensorNdDescriptor(
hy_desc_, CUDNN_DATA_FLOAT, 3, dim_a, stride_a));
CUDNN_ENFORCE(platform::dynload::cudnnSetTensorNdDescriptor(
cy_desc_, CUDNN_DATA_FLOAT, 3, dim_a, stride_a));
CUDNN_ENFORCE(platform::dynload::cudnnSetTensorNdDescriptor(
dhx_desc_, CUDNN_DATA_FLOAT, 3, dim_a, stride_a));
CUDNN_ENFORCE(platform::dynload::cudnnSetTensorNdDescriptor(
dcx_desc_, CUDNN_DATA_FLOAT, 3, dim_a, stride_a));
CUDNN_ENFORCE(platform::dynload::cudnnSetTensorNdDescriptor(
dhy_desc_, CUDNN_DATA_FLOAT, 3, dim_a, stride_a));
CUDNN_ENFORCE(platform::dynload::cudnnSetTensorNdDescriptor(
dcy_desc_, CUDNN_DATA_FLOAT, 3, dim_a, stride_a));
CUDNN_ENFORCE(
platform::dynload::cudnnCreateDropoutDescriptor(&dropout_desc_));
size_t state_size;
CUDNN_ENFORCE(
platform::dynload::cudnnDropoutGetStatesSize(handle, &state_size);
dropout_state_.Resize({static_cast<int64_t>(state_size)}));
auto *dropout_state_data = dropout_state_.mutable_data<uint8_t>(place);
CUDNN_ENFORCE(platform::dynload::cudnnSetDropoutDescriptor(
dropout_desc_, handle, dropout_prob_, dropout_state_data, state_size,
seed_));
CUDNN_ENFORCE(platform::dynload::cudnnCreateRNNDescriptor(&rnn_desc_));
#if CUDNN_VERSION >= 6000
CUDNN_ENFORCE(platform::dynload::cudnnSetRNNDescriptor_v6(
handle, rnn_desc_, hidden_size_, num_layers_, dropout_desc_,
CUDNN_LINEAR_INPUT,
is_bidirec_ ? CUDNN_BIDIRECTIONAL : CUDNN_UNIDIRECTIONAL, CUDNN_LSTM,
CUDNN_RNN_ALGO_STANDARD, CUDNN_DATA_FLOAT));
#else
CUDNN_ENFORCE(platform::dynload::cudnnSetRNNDescriptor(
rnn_desc_, hidden_size_, num_layers_, dropout_desc_, CUDNN_LINEAR_INPUT,
is_bidirec_ ? CUDNN_BIDIRECTIONAL : CUDNN_UNIDIRECTIONAL, CUDNN_LSTM,
CUDNN_DATA_FLOAT));
#endif
CUDNN_ENFORCE(platform::dynload::cudnnCreateFilterDescriptor(&w_desc_));
CUDNN_ENFORCE(platform::dynload::cudnnCreateFilterDescriptor(&dw_desc_));
CUDNN_ENFORCE(platform::dynload::cudnnGetRNNParamsSize(
handle, rnn_desc_, x_desc_[0], &weights_size_, CUDNN_DATA_FLOAT));
PADDLE_ENFORCE_EQ(weights_size_, sizeof(float) * weight_numel,
"cudnn lstm weight size should be SAME");
int dim_w[3];
dim_w[0] = weights_size_ / sizeof(float);
dim_w[1] = 1;
dim_w[2] = 1;
CUDNN_ENFORCE(platform::dynload::cudnnSetFilterNdDescriptor(
w_desc_, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 3, dim_w));
CUDNN_ENFORCE(platform::dynload::cudnnSetFilterNdDescriptor(
dw_desc_, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 3, dim_w));
CUDNN_ENFORCE(platform::dynload::cudnnGetRNNWorkspaceSize(
handle, rnn_desc_, max_length_, x_desc_, &workspace_size_));
CUDNN_ENFORCE(platform::dynload::cudnnGetRNNTrainingReserveSize(
handle, rnn_desc_, max_length_, x_desc_, &reserve_size_));
reserve_data_.Resize({static_cast<int64_t>(reserve_size_)});
reserve_data_.mutable_data<uint8_t>(place);
workspace_data_.Resize({static_cast<int64_t>(workspace_size_)});
workspace_data_.mutable_data<uint8_t>(place);
}
void release() {
for (size_t i = 0; i < max_length_; ++i) {
CUDNN_ENFORCE(
platform::dynload::cudnnDestroyTensorDescriptor(x_desc_[i]));
CUDNN_ENFORCE(
platform::dynload::cudnnDestroyTensorDescriptor(y_desc_[i]));
CUDNN_ENFORCE(
platform::dynload::cudnnDestroyTensorDescriptor(dx_desc_[i]));
CUDNN_ENFORCE(
platform::dynload::cudnnDestroyTensorDescriptor(dy_desc_[i]));
}
delete[] x_desc_;
delete[] y_desc_;
delete[] dx_desc_;
delete[] dy_desc_;
CUDNN_ENFORCE(platform::dynload::cudnnDestroyTensorDescriptor(hx_desc_));
CUDNN_ENFORCE(platform::dynload::cudnnDestroyTensorDescriptor(cx_desc_));
CUDNN_ENFORCE(platform::dynload::cudnnDestroyTensorDescriptor(hy_desc_));
CUDNN_ENFORCE(platform::dynload::cudnnDestroyTensorDescriptor(cy_desc_));
CUDNN_ENFORCE(platform::dynload::cudnnDestroyTensorDescriptor(dhx_desc_));
CUDNN_ENFORCE(platform::dynload::cudnnDestroyTensorDescriptor(dcx_desc_));
CUDNN_ENFORCE(platform::dynload::cudnnDestroyTensorDescriptor(dhy_desc_));
CUDNN_ENFORCE(platform::dynload::cudnnDestroyTensorDescriptor(dcy_desc_));
CUDNN_ENFORCE(
platform::dynload::cudnnDestroyDropoutDescriptor(dropout_desc_));
CUDNN_ENFORCE(platform::dynload::cudnnDestroyRNNDescriptor(rnn_desc_));
CUDNN_ENFORCE(platform::dynload::cudnnDestroyFilterDescriptor(w_desc_));
CUDNN_ENFORCE(platform::dynload::cudnnDestroyFilterDescriptor(dw_desc_));
}
};
} // namespace operators
} // namespace paddle
......@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <array>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
......
......@@ -25,7 +25,7 @@ namespace detail {
*/
template <typename T, typename... ARGS>
inline T& Ref(T* ptr, ARGS&&... args) {
PADDLE_ENFORCE(ptr != nullptr, args...);
PADDLE_ENFORCE(ptr != nullptr, ::paddle::string::Sprintf(args...));
return *ptr;
}
......
......@@ -84,7 +84,9 @@ class ProtoEncodeHelper {
~ProtoEncodeHelper() {
#define REPLACE_ENFORCE_GLOG 1
// Make sure callers didn't do operations that went over max_size promised
paddle::platform::throw_on_error(p_ <= limit_);
if (paddle::platform::is_error(p_ <= limit_)) {
paddle::platform::throw_on_error(p_ <= limit_);
}
#undef REPLACE_ENFORCE_GLOG
}
......
......@@ -33,7 +33,7 @@ register_operators(EXCLUDES gen_nccl_id_op DEPS ${DISTRIBUTE_DEPS})
if(WITH_GPU AND NOT WIN32)
set(DISTRIBUTE_DEPS ${DISTRIBUTE_DEPS} nccl_common)
op_library(gen_nccl_id_op ${DISTRIBUTE_DEPS} nccl_common)
op_library(gen_nccl_id_op DEPS ${DISTRIBUTE_DEPS} nccl_common)
endif()
set(OPERATOR_DEPS ${OPERATOR_DEPS} ${DISTRIBUTE_DEPS} PARENT_SCOPE)
......
......@@ -116,7 +116,7 @@ class SplitIdsOpKernel : public framework::OpKernel<T> {
} else {
PADDLE_THROW(
"% should be LoDTensor or SelectedRows, but the received type is %s",
ctx.Inputs("Ids")[0], ids_var->Type().name());
ctx.Inputs("Ids")[0], framework::ToTypeName(ids_var->Type()));
}
}
};
......
......@@ -83,7 +83,7 @@ class ElementwiseMulKernel : public framework::OpKernel<T> {
z = ctx.Output<framework::LoDTensor>("Out");
} else {
PADDLE_THROW("X's type[%s] is not supported by elementwise_op.",
x_var->Type().name());
framework::ToTypeName(x_var->Type()));
}
z->mutable_data<T>(ctx.GetPlace());
......
......@@ -50,8 +50,8 @@ template <typename T>
class LRNMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
public:
void Compute(const paddle::framework::ExecutionContext& ctx) const override {
PADDLE_ENFORCE(std::is_same<T, float>::value,
"MKLDNN LRN must use float data.");
const bool is_float_type = std::is_same<T, float>::value;
PADDLE_ENFORCE(is_float_type, "MKLDNN LRN must use float data.");
PADDLE_ENFORCE(paddle::platform::is_cpu_place(ctx.GetPlace()),
"MKLDNN LRN must use CPUPlace.");
......@@ -132,8 +132,8 @@ template <typename T>
class LRNMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
public:
void Compute(const paddle::framework::ExecutionContext& ctx) const override {
PADDLE_ENFORCE(std::is_same<T, float>::value,
"MKLDNN LRN must use float data.");
const bool is_float_type = std::is_same<T, float>::value;
PADDLE_ENFORCE(is_float_type, "MKLDNN LRN must use float data.");
PADDLE_ENFORCE(paddle::platform::is_cpu_place(ctx.GetPlace()),
"MKLDNN LRN must use CPUPlace.");
PADDLE_ENFORCE(
......
......@@ -27,12 +27,14 @@ class AdadeltaOpKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE(param_var->IsType<framework::LoDTensor>(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s",
ctx.Inputs("Param").front(), param_var->Type().name());
ctx.Inputs("Param").front(),
framework::ToTypeName(param_var->Type()));
const auto* grad_var = ctx.InputVar("Grad");
PADDLE_ENFORCE(grad_var->IsType<framework::LoDTensor>(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s",
ctx.Inputs("Grad").front(), grad_var->Type().name());
ctx.Inputs("Grad").front(),
framework::ToTypeName(grad_var->Type()));
auto param_out_tensor = ctx.Output<framework::Tensor>("ParamOut");
auto avg_squared_grad_out_tensor =
......
......@@ -50,7 +50,8 @@ class AdagradOpKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE(param_var->IsType<framework::LoDTensor>(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s",
ctx.Inputs("Param").front(), param_var->Type().name());
ctx.Inputs("Param").front(),
framework::ToTypeName(param_var->Type()));
auto *param_out_tensor = ctx.Output<framework::Tensor>("ParamOut");
auto *moment_out_tensor = ctx.Output<framework::Tensor>("MomentOut");
......
......@@ -347,7 +347,8 @@ class AdamOpKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE(param_var->IsType<framework::LoDTensor>(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s",
ctx.Inputs("Param").front(), param_var->Type().name());
ctx.Inputs("Param").front(),
framework::ToTypeName(param_var->Type()));
using paddle::framework::LoDTensor;
using paddle::operators::detail::Ref;
......
......@@ -27,12 +27,14 @@ class AdamaxOpKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE(param_var->IsType<framework::LoDTensor>(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s",
ctx.Inputs("Param").front(), param_var->Type().name());
ctx.Inputs("Param").front(),
framework::ToTypeName(param_var->Type()));
const auto* grad_var = ctx.InputVar("Grad");
PADDLE_ENFORCE(grad_var->IsType<framework::LoDTensor>(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s",
ctx.Inputs("Grad").front(), grad_var->Type().name());
ctx.Inputs("Grad").front(),
framework::ToTypeName(grad_var->Type()));
auto param_out_tensor = ctx.Output<framework::Tensor>("ParamOut");
auto moment_out_tensor = ctx.Output<framework::Tensor>("MomentOut");
......
......@@ -27,12 +27,14 @@ class DecayedAdagradOpKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE(param_var->IsType<framework::LoDTensor>(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s",
ctx.Inputs("Param").front(), param_var->Type().name());
ctx.Inputs("Param").front(),
framework::ToTypeName(param_var->Type()));
const auto* grad_var = ctx.InputVar("Grad");
PADDLE_ENFORCE(grad_var->IsType<framework::LoDTensor>(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s",
ctx.Inputs("Grad").front(), grad_var->Type().name());
ctx.Inputs("Grad").front(),
framework::ToTypeName(grad_var->Type()));
auto param_out_tensor = ctx.Output<framework::Tensor>("ParamOut");
auto moment_out_tensor = ctx.Output<framework::Tensor>("MomentOut");
......
......@@ -32,12 +32,14 @@ class FTRLOpKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE(param_var->IsType<framework::LoDTensor>(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s",
ctx.Inputs("Param").front(), param_var->Type().name());
ctx.Inputs("Param").front(),
framework::ToTypeName(param_var->Type()));
const auto* grad_var = ctx.InputVar("Grad");
PADDLE_ENFORCE(grad_var->IsType<framework::LoDTensor>(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s",
ctx.Inputs("Grad").front(), grad_var->Type().name());
ctx.Inputs("Grad").front(),
framework::ToTypeName(grad_var->Type()));
auto* param_out = ctx.Output<Tensor>("ParamOut");
auto* sq_accum_out = ctx.Output<Tensor>("SquaredAccumOut");
......
......@@ -395,7 +395,7 @@ class MomentumOpKernel : public framework::OpKernel<T> {
PADDLE_THROW(
string::Sprintf("MomentumOp only supports LoDTensor or SelectedRows "
"gradient, but the received Variable Type is %s",
grad_var->Type().name()));
framework::ToTypeName(grad_var->Type())));
}
}
};
......
......@@ -60,7 +60,8 @@ class SGDOpCUDAKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE(param_var->IsType<framework::LoDTensor>(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s",
ctx.Inputs("Param").front(), param_var->Type().name());
ctx.Inputs("Param").front(),
framework::ToTypeName(param_var->Type()));
auto* param = ctx.Input<framework::Tensor>("Param");
auto* param_out = ctx.Output<framework::Tensor>("ParamOut");
......
......@@ -245,7 +245,7 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
}
} else {
PADDLE_THROW("Unexpected branch, output variable type is %s",
out_var->Type().name());
framework::ToTypeName(out_var->Type()));
}
}
};
......
......@@ -126,7 +126,7 @@ class SumOp : public framework::OperatorWithKernel {
PADDLE_THROW("Cannot find the input data type by all input data");
}
PADDLE_THROW("Unexpected branch. Input type is %s",
x_vars[0]->Type().name());
framework::ToTypeName(x_vars[0]->Type()));
}
};
......
......@@ -163,7 +163,7 @@ class SumKernel : public framework::OpKernel<T> {
}
} else {
PADDLE_THROW("Unexpected branch, output variable type is %s",
out_var->Type().name());
framework::ToTypeName(out_var->Type()));
}
}
};
......
......@@ -140,68 +140,72 @@ struct EOFException : public std::exception {
#define LIKELY(condition) (condition)
#endif
inline bool is_error(bool stat) { return !stat; }
template <typename... Args>
inline typename std::enable_if<sizeof...(Args) != 0, void>::type throw_on_error(
bool stat, const Args&... args) {
if (UNLIKELY(!(stat))) {
#ifndef REPLACE_ENFORCE_GLOG
throw std::runtime_error(string::Sprintf(args...));
throw std::runtime_error(string::Sprintf(args...));
#else
LOG(FATAL) << string::Sprintf(args...);
LOG(FATAL) << string::Sprintf(args...);
#endif
}
}
#ifdef PADDLE_WITH_CUDA
inline bool is_error(cudaError_t e) { return UNLIKELY(e); }
template <typename... Args>
inline typename std::enable_if<sizeof...(Args) != 0, void>::type throw_on_error(
cudaError_t e, const Args&... args) {
if (UNLIKELY(e)) {
#ifndef REPLACE_ENFORCE_GLOG
throw thrust::system_error(e, thrust::cuda_category(),
string::Sprintf(args...));
throw thrust::system_error(e, thrust::cuda_category(),
string::Sprintf(args...));
#else
LOG(FATAL) << string::Sprintf(args...);
LOG(FATAL) << string::Sprintf(args...);
#endif
}
}
inline bool is_error(curandStatus_t stat) {
return stat != CURAND_STATUS_SUCCESS;
}
template <typename... Args>
inline typename std::enable_if<sizeof...(Args) != 0, void>::type throw_on_error(
curandStatus_t stat, const Args&... args) {
if (stat != CURAND_STATUS_SUCCESS) {
#ifndef REPLACE_ENFORCE_GLOG
throw thrust::system_error(cudaErrorLaunchFailure, thrust::cuda_category(),
string::Sprintf(args...));
throw thrust::system_error(cudaErrorLaunchFailure, thrust::cuda_category(),
string::Sprintf(args...));
#else
LOG(FATAL) << string::Sprintf(args...);
LOG(FATAL) << string::Sprintf(args...);
#endif
}
}
inline bool is_error(cudnnStatus_t stat) {
return stat != CUDNN_STATUS_SUCCESS;
}
template <typename... Args>
inline typename std::enable_if<sizeof...(Args) != 0, void>::type throw_on_error(
cudnnStatus_t stat, const Args&... args) {
if (stat == CUDNN_STATUS_SUCCESS) {
return;
} else {
#ifndef REPLACE_ENFORCE_GLOG
throw std::runtime_error(platform::dynload::cudnnGetErrorString(stat) +
string::Sprintf(args...));
throw std::runtime_error(platform::dynload::cudnnGetErrorString(stat) +
string::Sprintf(args...));
#else
LOG(FATAL) << string::Sprintf(args...);
LOG(FATAL) << string::Sprintf(args...);
#endif
}
}
inline bool is_error(cublasStatus_t stat) {
return stat != CUBLAS_STATUS_SUCCESS;
}
template <typename... Args>
inline typename std::enable_if<sizeof...(Args) != 0, void>::type throw_on_error(
cublasStatus_t stat, const Args&... args) {
std::string err;
if (stat == CUBLAS_STATUS_SUCCESS) {
return;
} else if (stat == CUBLAS_STATUS_NOT_INITIALIZED) {
if (stat == CUBLAS_STATUS_NOT_INITIALIZED) {
err = "CUBLAS: not initialized, ";
} else if (stat == CUBLAS_STATUS_ALLOC_FAILED) {
err = "CUBLAS: alloc failed, ";
......@@ -254,21 +258,49 @@ inline void throw_on_error(T e) {
#define PADDLE_THROW(...) \
throw ::paddle::platform::EnforceNotMet(__FILE__, __LINE__, __VA_ARGS__)
#define __PADDLE_THROW_ERROR_I(_, _9, _8, _7, _6, _5, _4, _3, _2, X_, ...) X_;
#define __THROW_ON_ERROR_ONE_ARG(COND, ARG) \
::paddle::platform::throw_on_error(COND, ::paddle::string::Sprintf(ARG));
#define __PADDLE_THROW_ON_ERROR(COND, ...) \
__PADDLE_THROW_ERROR_I( \
__VA_ARGS__, ::paddle::platform::throw_on_error(COND, __VA_ARGS__), \
::paddle::platform::throw_on_error(COND, __VA_ARGS__), \
::paddle::platform::throw_on_error(COND, __VA_ARGS__), \
::paddle::platform::throw_on_error(COND, __VA_ARGS__), \
::paddle::platform::throw_on_error(COND, __VA_ARGS__), \
::paddle::platform::throw_on_error(COND, __VA_ARGS__), \
::paddle::platform::throw_on_error(COND, __VA_ARGS__), \
::paddle::platform::throw_on_error(COND, __VA_ARGS__), \
__THROW_ON_ERROR_ONE_ARG(COND, __VA_ARGS__))
#define __PADDLE_UNARY_COMPARE(COND, ...) \
do { \
auto __cond = COND; \
if (UNLIKELY(::paddle::platform::is_error(__cond))) { \
__PADDLE_THROW_ON_ERROR(__cond, __VA_ARGS__); \
} \
} while (0)
#ifndef REPLACE_ENFORCE_GLOG
#define PADDLE_ENFORCE(...) \
#define __PADDLE_ENFORCE_I(COND, ...) \
do { \
try { \
::paddle::platform::throw_on_error(__VA_ARGS__); \
__PADDLE_UNARY_COMPARE(COND, __VA_ARGS__); \
} catch (...) { \
throw ::paddle::platform::EnforceNotMet(std::current_exception(), \
__FILE__, __LINE__); \
} \
} while (false)
} while (0)
#else
#define PADDLE_ENFORCE(...) ::paddle::platform::throw_on_error(__VA_ARGS__);
#define __PADDLE_ENFORCE_I(COND, ...) __PADDLE_UNARY_COMPARE(COND, __VA_ARGS__);
#endif // REPLACE_ENFORCE_GLOG
#define __PADDLE_ENFORCE(__args) __PADDLE_ENFORCE_I __args
#define PADDLE_ENFORCE(...) __PADDLE_ENFORCE((__VA_ARGS__))
#define PADDLE_THROW_EOF() \
do { \
throw ::paddle::platform::EOFException("There is no next data.", __FILE__, \
......
......@@ -37,6 +37,25 @@ TEST(ENFORCE, FAILED) {
HasPrefix(StringPiece(error.what()), "Enforce is not ok 123 at all"));
}
EXPECT_TRUE(caught_exception);
caught_exception = false;
try {
PADDLE_ENFORCE(false, "Enforce is not ok at all");
} catch (paddle::platform::EnforceNotMet error) {
caught_exception = true;
EXPECT_TRUE(
HasPrefix(StringPiece(error.what()), "Enforce is not ok at all"));
}
EXPECT_TRUE(caught_exception);
caught_exception = false;
try {
PADDLE_ENFORCE(false);
} catch (paddle::platform::EnforceNotMet error) {
caught_exception = true;
EXPECT_NE(std::string(error.what()).find(" at "), 0);
}
EXPECT_TRUE(caught_exception);
}
TEST(ENFORCE, NO_ARG_OK) {
......
......@@ -12,6 +12,7 @@ limitations under the License. */
#include <vector>
#define GLOG_NO_ABBREVIATED_SEVERITIES // msvc conflict logging with windows.h
#include "gtest/gtest.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/platform/init.h"
......
......@@ -11,6 +11,7 @@ limitations under the License. */
#include "paddle/fluid/platform/float16.h"
#define GLOG_NO_ABBREVIATED_SEVERITIES // msvc conflict logging with windows.h
#include <glog/logging.h>
#include <gtest/gtest.h>
#include <bitset>
......
......@@ -49,9 +49,6 @@ void BindConstValue(pybind11::module* m) {
op_proto_and_checker_maker.def(
"kOpNameScopeAttrName",
framework::OpProtoAndCheckerMaker::OpNamescopeAttrName);
op_proto_and_checker_maker.def(
"kOpCreationCallstackAttrName",
framework::OpProtoAndCheckerMaker::OpCreationCallstackAttrName);
}
} // namespace pybind
......
......@@ -87,7 +87,7 @@ void Fprintf(std::ostream& out, const char* fmt, const Args&... args) {
template <typename... Args>
std::string Sprintf(const Args&... args) {
std::ostringstream oss;
Fprintf(oss, "");
Fprintf(oss, "%s", args...);
return oss.str();
}
......
......@@ -20,7 +20,6 @@ import os
import re
import six
import sys
import traceback
import numpy as np
......@@ -605,10 +604,6 @@ class Operator(object):
if role_var_name in op_attrs and len(op_attrs[role_var_name]) == 0:
del op_attrs[role_var_name]
callstack_var_name = op_maker.kOpCreationCallstackAttrName()
op_attrs[callstack_var_name] = list(
reversed(traceback.format_stack()))[1:]
if len(self.desc.type()) != 0:
return
if type is None:
......
......@@ -148,7 +148,7 @@ class ParallelExecutor(object):
trainers_endpoints), "num_trainers == len(end_points)"
build_strategy.trainers_endpoints = trainers_endpoints
# step5: get persistable_vars, parameter_vars, places. persistable_vars
# step6: get persistable_vars, places. persistable_vars
# need be broadcast to other local_scope.
persistable_vars = set([
cpt.to_text(v.name) for v in [
......@@ -164,7 +164,7 @@ class ParallelExecutor(object):
places = list(map(place_obj, self._places))
# step6: init ParallelExecutor
# step7: init ParallelExecutor
self.executor = core.ParallelExecutor(
places, persistable_vars, main.desc,
cpt.to_text(loss_name)
......
......@@ -69,7 +69,7 @@ class TestOperator(unittest.TestCase):
set(mul_op.attr_names),
set([
"x_num_col_dims", "y_num_col_dims", "op_role", "op_role_var",
"op_namescope", "op_callstack"
"op_namescope"
]))
self.assertEqual(mul_op.has_attr("x_num_col_dims"), True)
self.assertEqual(mul_op.attr_type("x_num_col_dims"), core.AttrType.INT)
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import contextlib
import unittest
from functools import partial
import numpy as np
import paddle
import paddle.fluid.core as core
import paddle.fluid as fluid
def get_places():
places = []
if core.is_compiled_with_cuda():
places.append(core.CUDAPlace(0))
return places
@contextlib.contextmanager
def prog_scope_guard(main_prog, startup_prog):
scope = fluid.core.Scope()
with fluid.unique_name.guard():
with fluid.scope_guard(scope):
with fluid.program_guard(main_prog, startup_prog):
yield
def bow_net(data,
label,
dict_dim,
is_sparse=False,
emb_dim=128,
hid_dim=128,
hid_dim2=96,
class_dim=2):
"""
BOW net
This model is from https://github.com/PaddlePaddle/models:
fluid/PaddleNLP/text_classification/nets.py
"""
emb = fluid.layers.embedding(
input=data, is_sparse=is_sparse, size=[dict_dim, emb_dim])
bow = fluid.layers.sequence_pool(input=emb, pool_type='sum')
bow_tanh = fluid.layers.tanh(bow)
fc_1 = fluid.layers.fc(input=bow_tanh, size=hid_dim, act="tanh")
fc_2 = fluid.layers.fc(input=fc_1, size=hid_dim2, act="tanh")
prediction = fluid.layers.fc(input=[fc_2], size=class_dim, act="softmax")
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost)
return avg_cost
class TestWeightDecay(unittest.TestCase):
def setUp(self):
self.word_dict = paddle.dataset.imdb.word_dict()
reader = paddle.batch(
paddle.dataset.imdb.train(self.word_dict), batch_size=4)()
self.train_data = [next(reader) for _ in range(5)]
self.learning_rate = .5
def run_executor(self, place, feed_list, loss):
exe = fluid.Executor(place)
feeder = fluid.DataFeeder(feed_list=feed_list, place=place)
exe.run(fluid.default_startup_program())
main_prog = fluid.default_main_program()
loss_set = []
for data in self.train_data:
out = exe.run(main_prog,
feed=feeder.feed(data),
fetch_list=[loss.name])
print("loss %s" % (np.average(out)))
loss_set.append(np.average(out))
return loss_set
def run_parallel_exe(self,
place,
feed_list,
loss,
use_cuda=True,
use_reduce=False,
use_fast_executor=False,
use_ir_memory_optimize=False):
exe = fluid.Executor(place)
feeder = fluid.DataFeeder(feed_list=feed_list, place=place)
exe.run(fluid.default_startup_program())
exec_strategy = fluid.ExecutionStrategy()
if use_fast_executor:
exec_strategy.use_experimental_executor = True
build_strategy = fluid.BuildStrategy()
build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce \
if use_reduce else fluid.BuildStrategy.ReduceStrategy.AllReduce
build_strategy.memory_optimize = use_ir_memory_optimize
parallel_exe = fluid.ParallelExecutor(
use_cuda,
loss_name=loss.name,
exec_strategy=exec_strategy,
build_strategy=build_strategy)
loss_set = []
for data in self.train_data:
out = parallel_exe.run(feed=feeder.feed(data),
fetch_list=[loss.name])
print("loss %s" % (np.average(out)))
loss_set.append(np.average(out))
return loss_set
def check_weight_decay(self,
place,
model,
use_parallel_exe=False,
use_reduce=False):
main_prog = fluid.framework.Program()
startup_prog = fluid.framework.Program()
startup_prog.random_seed = 1
with prog_scope_guard(main_prog=main_prog, startup_prog=startup_prog):
data = fluid.layers.data(
name="words", shape=[1], dtype="int64", lod_level=1)
label = fluid.layers.data(name="label", shape=[1], dtype="int64")
avg_cost = model(data, label, len(self.word_dict))
param_list = [(var, var * self.learning_rate)
for var in main_prog.block(0).all_parameters()]
optimizer = fluid.optimizer.Adagrad(
learning_rate=self.learning_rate)
optimizer.minimize(avg_cost)
for params in param_list:
updated_p = fluid.layers.elementwise_sub(
x=params[0], y=params[1])
fluid.layers.assign(input=updated_p, output=params[0])
if use_parallel_exe:
loss = self.run_parallel_exe(
place, [data, label],
loss=avg_cost,
use_cuda=True,
use_reduce=use_reduce)
else:
loss = self.run_executor(place, [data, label], loss=avg_cost)
return loss
def test_weight_decay(self):
model = partial(bow_net, is_sparse=False)
for place in get_places():
loss = self.check_weight_decay(place, model, use_parallel_exe=False)
loss2 = self.check_weight_decay(
place, model, use_parallel_exe=True, use_reduce=False)
for i in range(len(loss)):
assert np.isclose(a=loss[i], b=loss2[i], rtol=5e-5)
loss3 = self.check_weight_decay(
place, model, use_parallel_exe=True, use_reduce=True)
for i in range(len(loss)):
assert np.isclose(a=loss[i], b=loss3[i], rtol=5e-5)
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册