diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index c2a57a95ee6aa1b03a687f07de74810e8e753f29..7436e8c228db2caeb1421f8d78ddcf55f00deee4 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -21,6 +21,8 @@ cc_test(variable_test SRCS variable_test.cc) cc_library(scope SRCS scope.cc DEPS glog) cc_test(scope_test SRCS scope_test.cc DEPS scope) +cc_library(data_transform SRCS data_transform.cc DEPS tensor framework_proto) +cc_test(data_transform_test SRCS data_transform_test.cc DEPS data_transform device_context) cc_library(attribute SRCS attribute.cc DEPS framework_proto) cc_test(program_desc_test SRCS program_desc_test.cc DEPS proto_desc @@ -29,7 +31,8 @@ cc_library(op_proto_maker SRCS op_proto_maker.cc DEPS framework_proto attribute) cc_test(op_proto_maker_test SRCS op_proto_maker_test.cc DEPS op_proto_maker) cc_library(op_info SRCS op_info.cc DEPS attribute framework_proto) cc_library(shape_inference SRCS shape_inference.cc DEPS ddim attribute) -cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope glog shape_inference) +cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope glog + shape_inference data_transform) cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry init) cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS shape_inference op_info operator glog) @@ -64,4 +67,4 @@ cc_test(threadpool_test SRCS threadpool_test.cc DEPS threadpool) cc_library(init SRCS init.cc DEPS gflags device_context place stringpiece) cc_test(init_test SRCS init_test.cc DEPS init) -cc_test(op_kernel_type_test SRCS op_kernel_type_test.cc DEPS place device_context) +cc_test(op_kernel_type_test SRCS op_kernel_type_test.cc DEPS place device_context framework_proto) diff --git a/paddle/framework/data_transform.cc b/paddle/framework/data_transform.cc new file mode 100644 index 0000000000000000000000000000000000000000..35f16025a9ae44bd70e15b19b25deb08299bea88 --- /dev/null +++ b/paddle/framework/data_transform.cc @@ -0,0 +1,26 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/framework/data_transform.h" + +namespace paddle { +namespace framework { + +DataTransformFnMap& DataTransformFnMap::Instance() { + static DataTransformFnMap data_transform_map; + return data_transform_map; +} + +} // namespace framework +} // namespace paddle diff --git a/paddle/framework/data_transform.h b/paddle/framework/data_transform.h new file mode 100644 index 0000000000000000000000000000000000000000..73f894a3e20ab779f8607e63a67139b0e8cce79a --- /dev/null +++ b/paddle/framework/data_transform.h @@ -0,0 +1,109 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include + +#include "paddle/framework/op_kernel_type.h" +#include "paddle/framework/tensor.h" +#include "paddle/framework/variable.h" +#include "paddle/platform/device_context.h" +#include "paddle/platform/macros.h" + +namespace paddle { +namespace framework { + +using DataTransformFN = + std::function ctx, + const Variable& in, Variable* out)>; +using KernelTypePair = std::pair; + +struct KernelTypePairHash { + static void HashCombine(const OpKernelType& t, std::size_t* seed) { + OpKernelType::Hash kernel_type_hasher; + (*seed) ^= kernel_type_hasher(t) + 0x9e3779b9 + (*seed << 6) + (*seed >> 2); + } + + size_t operator()(const KernelTypePair& kernel_pair) const { + std::size_t seed = 0; + HashCombine(kernel_pair.first, &seed); + HashCombine(kernel_pair.second, &seed); + return seed; + } +}; + +using DataTransformMap = + std::unordered_map; + +class DataTransformFnMap { + public: + static DataTransformFnMap& Instance(); + + bool Has(const KernelTypePair& key_pair) const { + return map_.find(key_pair) != map_.end(); + } + + void Insert(const OpKernelType& left, const OpKernelType& right, + const DataTransformFN& data_tranform_fn) { + Insert(std::make_pair(left, right), data_tranform_fn); + } + + void Insert(const KernelTypePair& kernel_type_pair, + const DataTransformFN& data_tranform_fn) { + PADDLE_ENFORCE(!Has(kernel_type_pair), + "KernelTypePair %s has been registered", ""); + map_.insert({kernel_type_pair, data_tranform_fn}); + } + + const DataTransformFN& Get(const KernelTypePair& key_pair) const { + auto data_transformer = GetNullable(key_pair); + PADDLE_ENFORCE_NOT_NULL(data_transformer, + "DataTransformFN should not be NULL"); + return *data_transformer; + } + + const DataTransformFN* GetNullable(const KernelTypePair& key_pair) const { + auto it = map_.find(key_pair); + if (it == map_.end()) { + return nullptr; + } else { + return &(it->second); + } + } + + const DataTransformMap& Map() const { return map_; } + + private: + DataTransformFnMap() = default; + DataTransformMap map_; + DISABLE_COPY_AND_ASSIGN(DataTransformFnMap); +}; + +// generate unique name with __LINE__ +// refs https://stackoverflow.com/questions/1597007 +#define TOKENPASTE(x, y) x##y +#define TOKENPASTE2(x, y) TOKENPASTE(x, y) +#define REGISTER_DATA_TRANSFORM_FN(from, to, fn) \ + static int TOKENPASTE2(fn_, __LINE__)() { \ + ::paddle::framework::DataTransformFnMap::Instance().Insert(from, to, fn); \ + return 0; \ + } \ + static int TOKENPASTE2(var_, __LINE__) __attribute__((unused)) = \ + TOKENPASTE2(fn_, __LINE__)() + +} // namespace framework +} // namespace paddle diff --git a/paddle/framework/data_transform_test.cc b/paddle/framework/data_transform_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..f93a47eeb567c4fc984954aa5198362c9939c556 --- /dev/null +++ b/paddle/framework/data_transform_test.cc @@ -0,0 +1,78 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/framework/data_transform.h" +#include + +namespace paddle { +namespace framework { + +using namespace platform; + +int test_value = 0; + +OpKernelType kernel_type_1(proto::DataType::FP32, CPUPlace(), DataLayout::kNCHW, + LibraryType::kCUDNN); +OpKernelType kernel_type_2(proto::DataType::FP32, CUDAPlace(0), + DataLayout::kNCHW, LibraryType::kCUDNN); +OpKernelType kernel_type_3(proto::DataType::FP16, CUDAPlace(0), + DataLayout::kNCHW, LibraryType::kCUDNN); + +void type1_to_type2(std::vector ctx, + const Variable& in, Variable* out) { + test_value++; +} + +void type2_to_type3(std::vector ctx, + const Variable& in, Variable* out) { + test_value--; +} + +void type1_to_type3(std::vector ctx, + const Variable& in, Variable* out) { + test_value += 2; +} + +} // namespace framework +} // namespace paddle + +namespace frw = paddle::framework; + +REGISTER_DATA_TRANSFORM_FN(frw::kernel_type_1, frw::kernel_type_2, + frw::type1_to_type2); +REGISTER_DATA_TRANSFORM_FN(frw::kernel_type_2, frw::kernel_type_3, + frw::type2_to_type3); +REGISTER_DATA_TRANSFORM_FN(frw::kernel_type_1, frw::kernel_type_3, + frw::type1_to_type3); + +TEST(DataTransform, Register) { + using namespace paddle::framework; + using namespace paddle::platform; + + auto& instance = DataTransformFnMap::Instance(); + ASSERT_EQ(instance.Map().size(), 3UL); + std::vector ctx; + paddle::framework::Variable in; + paddle::framework::Variable out; + + instance.Get(std::make_pair(frw::kernel_type_1, frw::kernel_type_2))(ctx, in, + &out); + ASSERT_EQ(test_value, 1); + instance.Get(std::make_pair(frw::kernel_type_2, frw::kernel_type_3))(ctx, in, + &out); + ASSERT_EQ(test_value, 0); + instance.Get(std::make_pair(frw::kernel_type_1, frw::kernel_type_3))(ctx, in, + &out); + ASSERT_EQ(test_value, 2); +} diff --git a/paddle/framework/executor.cc b/paddle/framework/executor.cc index 997773c1689efad4ce5a86c09ce58bd3a40185e0..9ee2ddb7c3e8d69a12fa97c67eb525a6c65915d5 100644 --- a/paddle/framework/executor.cc +++ b/paddle/framework/executor.cc @@ -14,18 +14,17 @@ limitations under the License. */ #include "paddle/framework/executor.h" -#include -#include -#include #include -#include +#include "gflags/gflags.h" #include "paddle/framework/feed_fetch_type.h" #include "paddle/framework/lod_rank_table.h" -#include "paddle/framework/lod_tensor.h" #include "paddle/framework/lod_tensor_array.h" #include "paddle/framework/op_registry.h" -#include "paddle/framework/scope.h" + +DEFINE_bool(check_nan_inf, false, + "Checking whether operator produce NAN/INF or not. It will be " + "extremely slow so please use this flag wisely."); namespace paddle { namespace framework { @@ -58,6 +57,19 @@ static void CreateTensor(Variable* var, proto::VarDesc::VarType var_type) { } } +static void CheckTensorNANOrInf(const std::string& name, + const framework::Tensor& tensor) { + if (tensor.type().hash_code() != typeid(float).hash_code() && + tensor.type().hash_code() != typeid(double).hash_code()) { + return; + } + if (tensor.memory_size() == 0) { + return; + } + PADDLE_ENFORCE(!framework::HasInf(tensor), "Tensor %s has Inf", name); + PADDLE_ENFORCE(!framework::HasNAN(tensor), "Tensor %s has NAN", name); +} + void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id, bool create_local_scope, bool create_vars) { // TODO(tonyyang-svail): @@ -101,6 +113,15 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id, auto op = paddle::framework::OpRegistry::CreateOp(*op_desc); VLOG(3) << op->DebugString(); op->Run(*local_scope, place_); + if (FLAGS_check_nan_inf) { + for (auto& vname : op->OutputVars(true)) { + auto* var = local_scope->FindVar(vname); + if (var == nullptr) continue; + if (var->IsType()) { + CheckTensorNANOrInf(vname, var->Get()); + } + } + } } if (create_local_scope) { scope->DeleteScope(local_scope); diff --git a/paddle/framework/init.cc b/paddle/framework/init.cc index d6601090d5b6150a5aa467210038d3693c3e67a8..682cff168d4d31e0565fc987604f97a671566fbd 100644 --- a/paddle/framework/init.cc +++ b/paddle/framework/init.cc @@ -71,7 +71,7 @@ bool InitDevices(const std::vector &devices) { places.emplace_back(platform::CPUPlace()); LOG(WARNING) << "Not specified CPU device, create CPU by Default."; } - platform::DeviceContextPool::Create(places); + platform::DeviceContextPool::Init(places); return true; } diff --git a/paddle/framework/operator.cc b/paddle/framework/operator.cc index 66840a2e037e7ca0fd1eacc64421865b170b47f8..e8d4be86758c89df3ef14544d3d6a8a1b1b92189 100644 --- a/paddle/framework/operator.cc +++ b/paddle/framework/operator.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include #include +#include "paddle/framework/data_transform.h" #include "paddle/framework/executor.h" #include "paddle/framework/lod_tensor_array.h" #include "paddle/framework/operator.h" @@ -387,8 +388,8 @@ void OperatorWithKernel::Run(const Scope& scope, const platform::Place& place) const { RuntimeInferShapeContext infer_shape_ctx(*this, scope); this->InferShape(&infer_shape_ctx); - platform::DeviceContextPool& pool = platform::DeviceContextPool::Get(); - auto dev_ctx = pool.Borrow(place); + platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); + auto dev_ctx = pool.Get(place); // check if op[type] has kernel registered. auto& all_op_kernels = AllOpKernels(); @@ -411,7 +412,38 @@ void OperatorWithKernel::Run(const Scope& scope, expected_kernel_key); } - kernel_iter->second->Compute(ctx); + if (actual_kernel_key == expected_kernel_key) { + kernel_iter->second->Compute(ctx); + } else { + Scope& op_scope = scope.NewScope(); + auto input_vars = this->InputVars(); + for (auto var_name : input_vars) { + op_scope.Var(var_name); + } + + // TODO(qijun) get appropriate DeviceContext from DeviceContext pool + platform::DeviceContext* trans_dev_ctx = nullptr; + std::vector trans_dev_ctx_vec{trans_dev_ctx}; + + // TODO(qijun) get appropriate DataTransformFN from global map + framework::DataTransformFN trans_fun = nullptr; + + // Wait for transform starting + dev_ctx->Wait(); + + for (auto var_name : input_vars) { + trans_fun(trans_dev_ctx_vec, *(scope.FindVar(var_name)), + op_scope.FindVar(var_name)); + } + // Wait for data transform finishing + for (auto ctx : trans_dev_ctx_vec) { + ctx->Wait(); + } + + // Create a new ExecutionContext + ExecutionContext op_ctx(*this, op_scope, *dev_ctx); + kernel_iter->second->Compute(op_ctx); + } } OpKernelType OperatorWithKernel::GetActualKernelType( diff --git a/paddle/framework/tensor_util.h b/paddle/framework/tensor_util.h index ea4e4f22ea82ccc9f8b683d2fd773a7bc37f78a3..7d786ad6141fa41f51ce8ec45ba7a37d513bec35 100644 --- a/paddle/framework/tensor_util.h +++ b/paddle/framework/tensor_util.h @@ -13,7 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once +#include "paddle/framework/data_type.h" +#include "paddle/framework/eigen.h" #include "paddle/framework/tensor.h" +#include "paddle/platform/device_context.h" namespace paddle { namespace framework { @@ -205,5 +208,100 @@ inline void CopyToVector(const Tensor& src, std::vector* dst) { src_ptr, size); } +template +struct AnyDTypeVisitor { + Predicate predicate_; + const Tensor& tensor_; + const DevCtx& ctx_; + Tensor* out_; + + AnyDTypeVisitor(Predicate predicate, const Tensor& tensor, const DevCtx& ctx, + Tensor* out) + : predicate_(predicate), tensor_(tensor), ctx_(ctx), out_(out) {} + + template + void operator()() const { + auto t = EigenVector::Flatten(tensor_); + auto o = EigenScalar::From(*out_); + o.device(*ctx_.eigen_device()) = predicate_(t).any(); + } +}; + +template +inline void AnyImpl(Predicate predicate, const framework::Tensor& tensor, + const DevCtx& ctx, framework::Tensor* out) { + VisitDataType(ToDataType(tensor.type()), AnyDTypeVisitor( + predicate, tensor, ctx, out)); +} + +template +struct AnyVisitor : public boost::static_visitor { + const framework::Tensor& tensor_; + Predicate predicate_; + + AnyVisitor(const framework::Tensor& tensor, Predicate predicate) + : tensor_(tensor), predicate_(std::move(predicate)) {} + + template + bool operator()(const Place& place) const { + framework::Tensor out; + out.Resize({1}); + out.mutable_data(place); + auto* ctx = platform::DeviceContextPool::Instance().GetByPlace(place); + AnyImpl(predicate_, tensor_, *ctx, &out); + return this->GetResult(out, place); + } + + bool GetResult(const framework::Tensor& out, + const platform::CUDAPlace& gpu) const { + platform::CPUPlace cpu; + framework::Tensor tmp; + tmp.Resize({1}); + tmp.mutable_data(cpu); + platform::DeviceContextPool::Instance().Get(gpu)->Wait(); + CopyFrom(out, cpu, &tmp); + platform::DeviceContextPool::Instance().Get(gpu)->Wait(); + return GetResult(tmp, cpu); + } + + bool GetResult(const framework::Tensor& out, + const platform::CPUPlace& cpu) const { + return *out.data(); + } +}; + +template +inline bool Any(const framework::Tensor& tensor, Predicate predicate) { + AnyVisitor visitor(tensor, predicate); + auto place = tensor.place(); + return platform::VisitPlace(place, visitor); +} + +struct HasNANPredicate { + template + auto operator()(const T& eigen_vec) const + -> decltype(std::declval().isnan()) { + return eigen_vec.isnan(); + } +}; + +inline bool HasNAN(const framework::Tensor& tensor) { + HasNANPredicate predicate; + return Any(tensor, predicate); +} + +struct HasInfPredicate { + template + auto operator()(const T& eigen_vec) const + -> decltype(std::declval().isinf()) { + return eigen_vec.isinf(); + } +}; + +inline bool HasInf(const framework::Tensor& tensor) { + HasInfPredicate predicate; + return Any(tensor, predicate); +} + } // namespace framework } // namespace paddle diff --git a/paddle/framework/tensor_util_test.cc b/paddle/framework/tensor_util_test.cc index f388c19f28ed28335818733f946d8eaf18464627..01dfd4deb9d126b9e28391b3643156cd1b0dfc9d 100644 --- a/paddle/framework/tensor_util_test.cc +++ b/paddle/framework/tensor_util_test.cc @@ -13,6 +13,7 @@ #include "paddle/framework/tensor_util.h" #include +#include #include namespace paddle { @@ -230,5 +231,28 @@ TEST(CopyToVector, Tensor) { #endif } +TEST(IsNAN, CPU) { + using namespace paddle::framework; + using namespace paddle::platform; + Tensor src; + float* buf = src.mutable_data({3}, CPUPlace()); + buf[0] = 0.0; + buf[1] = NAN; + buf[2] = 0.0; + + ASSERT_TRUE(HasNAN(src)); +} + +TEST(IsInf, CPU) { + using namespace paddle::framework; + using namespace paddle::platform; + Tensor src; + double* buf = src.mutable_data({3}, CPUPlace()); + buf[0] = 1.0; + buf[1] = INFINITY; + buf[2] = 0.0; + ASSERT_TRUE(HasInf(src)); +} + } // namespace framework } // namespace paddle diff --git a/paddle/gserver/layers/MKLDNNLRNLayer.cpp b/paddle/gserver/layers/MKLDNNLRNLayer.cpp index 741984bb68d3881f6ac26eaca7790190ed6e572a..ac217f1363dbd0360645bbe07cd71a17cc931a79 100644 --- a/paddle/gserver/layers/MKLDNNLRNLayer.cpp +++ b/paddle/gserver/layers/MKLDNNLRNLayer.cpp @@ -29,7 +29,7 @@ bool MKLDNNLRNLayer::init(const LayerMap& layerMap, } /* the size of inputs for norm-layer is 1 */ - CHECK_EQ(config_.inputs_size(), 1UL); + CHECK_EQ(config_.inputs_size(), 1); const NormConfig& conf = config_.inputs(0).norm_conf(); localSize_ = conf.size(); alpha_ = conf.scale(); diff --git a/paddle/operators/activation_op.cc b/paddle/operators/activation_op.cc index 55f673c22df53027ccf1a39f8b70b766ea45d8f0..4188858a90daf8b2c10eb6960393de977d467371 100644 --- a/paddle/operators/activation_op.cc +++ b/paddle/operators/activation_op.cc @@ -22,8 +22,8 @@ class ActivationOp : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext *ctx) const override { - ctx->SetOutputDim("Y", ctx->GetInputDim("X")); - ctx->ShareLoD("X", /*->*/ "Y"); + ctx->SetOutputDim("Out", ctx->GetInputDim("X")); + ctx->ShareLoD("X", /*->*/ "Out"); } }; @@ -32,7 +32,7 @@ class ActivationOpGrad : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext *ctx) const override { - ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("Y")); + ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("Out")); } }; @@ -41,11 +41,11 @@ class SigmoidOpMaker : public framework::OpProtoAndCheckerMaker { SigmoidOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of Sigmoid operator"); - AddOutput("Y", "Output of Sigmoid operator"); + AddOutput("Out", "Output of Sigmoid operator"); AddComment(R"DOC( Sigmoid Activation Operator -$$y = \frac{1}{1 + e^{-x}}$$ +$$out = \frac{1}{1 + e^{-x}}$$ )DOC"); } @@ -56,11 +56,11 @@ class LogSigmoidOpMaker : public framework::OpProtoAndCheckerMaker { LogSigmoidOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of LogSigmoid operator"); - AddOutput("Y", "Output of LogSigmoid operator"); + AddOutput("Out", "Output of LogSigmoid operator"); AddComment(R"DOC( Logsigmoid Activation Operator -$$y = \log \frac{1}{1 + e^{-x}}$$ +$$out = \log \frac{1}{1 + e^{-x}}$$ )DOC"); } @@ -71,11 +71,11 @@ class ExpOpMaker : public framework::OpProtoAndCheckerMaker { ExpOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of Exp operator"); - AddOutput("Y", "Output of Exp operator"); + AddOutput("Out", "Output of Exp operator"); AddComment(R"DOC( Exp Activation Operator. -$y = e^x$ +$out = e^x$ )DOC"); } @@ -86,11 +86,11 @@ class ReluOpMaker : public framework::OpProtoAndCheckerMaker { ReluOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of Relu operator"); - AddOutput("Y", "Output of Relu operator"); + AddOutput("Out", "Output of Relu operator"); AddComment(R"DOC( Relu Activation Operator. -$y = \max(x, 0)$ +$out = \max(x, 0)$ )DOC"); } @@ -101,12 +101,12 @@ class LeakyReluOpMaker : public framework::OpProtoAndCheckerMaker { LeakyReluOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of LeakyRelu operator"); - AddOutput("Y", "Output of LeakyRelu operator"); + AddOutput("Out", "Output of LeakyRelu operator"); AddAttr("alpha", "The small negative slope").SetDefault(0.02f); AddComment(R"DOC( LeakyRelu Activation Operator. -$y = \max(x, \alpha * x)$ +$out = \max(x, \alpha * x)$ )DOC"); } @@ -117,13 +117,13 @@ class SoftShrinkOpMaker : public framework::OpProtoAndCheckerMaker { SoftShrinkOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of Softshrink operator"); - AddOutput("Y", "Output of Softshrink operator"); + AddOutput("Out", "Output of Softshrink operator"); AddAttr("lambda", "non-negative offset").SetDefault(0.5f); AddComment(R"DOC( Softshrink Activation Operator. $$ -y = \begin{cases} +out = \begin{cases} x - \lambda, \text{if } x > \lambda \\ x + \lambda, \text{if } x < -\lambda \\ 0, \text{otherwise} @@ -139,11 +139,11 @@ class TanhOpMaker : public framework::OpProtoAndCheckerMaker { TanhOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of Tanh operator"); - AddOutput("Y", "Output of Tanh operator"); + AddOutput("Out", "Output of Tanh operator"); AddComment(R"DOC( Tanh Activation Operator. -$$y = \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$ +$$out = \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$ )DOC"); } @@ -154,11 +154,11 @@ class TanhShrinkOpMaker : public framework::OpProtoAndCheckerMaker { TanhShrinkOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of TanhShrink operator"); - AddOutput("Y", "Output of TanhShrink operator"); + AddOutput("Out", "Output of TanhShrink operator"); AddComment(R"DOC( TanhShrink Activation Operator. -$$y = x - \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$ +$$out = x - \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$ )DOC"); } @@ -169,14 +169,14 @@ class HardShrinkOpMaker : public framework::OpProtoAndCheckerMaker { HardShrinkOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of HardShrink operator"); - AddOutput("Y", "Output of HardShrink operator"); + AddOutput("Out", "Output of HardShrink operator"); AddAttr("threshold", "The value of threshold for HardShrink") .SetDefault(0.5f); AddComment(R"DOC( HardShrink Activation Operator. $$ -y = \begin{cases} +out = \begin{cases} x, \text{if } x > \lambda \\ x, \text{if } x < -\lambda \\ 0, \text{otherwise} @@ -192,11 +192,11 @@ class SqrtOpMaker : public framework::OpProtoAndCheckerMaker { SqrtOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of Sqrt operator"); - AddOutput("Y", "Output of Sqrt operator"); + AddOutput("Out", "Output of Sqrt operator"); AddComment(R"DOC( Sqrt Activation Operator. -$y = \sqrt{x}$ +$out = \sqrt{x}$ )DOC"); } @@ -207,11 +207,11 @@ class AbsOpMaker : public framework::OpProtoAndCheckerMaker { AbsOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of Abs operator"); - AddOutput("Y", "Output of Abs operator"); + AddOutput("Out", "Output of Abs operator"); AddComment(R"DOC( Abs Activation Operator. -$y = |x|$ +$out = |x|$ )DOC"); } @@ -222,11 +222,11 @@ class CeilOpMaker : public framework::OpProtoAndCheckerMaker { CeilOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of Ceil operator"); - AddOutput("Y", "Output of Ceil operator"); + AddOutput("Out", "Output of Ceil operator"); AddComment(R"DOC( Ceil Activation Operator. -$y = ceil(x)$ +$out = ceil(x)$ )DOC"); } @@ -237,11 +237,11 @@ class FloorOpMaker : public framework::OpProtoAndCheckerMaker { FloorOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of Floor operator"); - AddOutput("Y", "Output of Floor operator"); + AddOutput("Out", "Output of Floor operator"); AddComment(R"DOC( Floor Activation Operator. -$y = floor(x)$ +$out = floor(x)$ )DOC"); } @@ -252,11 +252,11 @@ class RoundOpMaker : public framework::OpProtoAndCheckerMaker { RoundOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of Round operator"); - AddOutput("Y", "Output of Round operator"); + AddOutput("Out", "Output of Round operator"); AddComment(R"DOC( Round Activation Operator. -$y = [x]$ +$out = [x]$ )DOC"); } @@ -267,11 +267,11 @@ class ReciprocalOpMaker : public framework::OpProtoAndCheckerMaker { ReciprocalOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of Reciprocal operator"); - AddOutput("Y", "Output of Reciprocal operator"); + AddOutput("Out", "Output of Reciprocal operator"); AddComment(R"DOC( Reciprocal Activation Operator. -$$y = \frac{1}{x}$$ +$$out = \frac{1}{x}$$ )DOC"); } @@ -282,11 +282,11 @@ class LogOpMaker : public framework::OpProtoAndCheckerMaker { LogOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of Log operator"); - AddOutput("Y", "Output of Log operator"); + AddOutput("Out", "Output of Log operator"); AddComment(R"DOC( Log Activation Operator. -$y = \ln(x)$ +$out = \ln(x)$ Natural logarithm of x. @@ -299,11 +299,11 @@ class SquareOpMaker : public framework::OpProtoAndCheckerMaker { SquareOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of Square operator"); - AddOutput("Y", "Output of Square operator"); + AddOutput("Out", "Output of Square operator"); AddComment(R"DOC( Square Activation Operator. -$y = x^2$ +$out = x^2$ )DOC"); } @@ -314,11 +314,11 @@ class SoftplusOpMaker : public framework::OpProtoAndCheckerMaker { SoftplusOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of Softplus operator"); - AddOutput("Y", "Output of Softplus operator"); + AddOutput("Out", "Output of Softplus operator"); AddComment(R"DOC( Softplus Activation Operator. -$y = \ln(1 + e^{x})$ +$out = \ln(1 + e^{x})$ )DOC"); } @@ -329,11 +329,11 @@ class SoftsignOpMaker : public framework::OpProtoAndCheckerMaker { SoftsignOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of Softsign operator"); - AddOutput("Y", "Output of Softsign operator"); + AddOutput("Out", "Output of Softsign operator"); AddComment(R"DOC( Softsign Activation Operator. -$$y = \frac{x}{1 + |x|}$$ +$$out = \frac{x}{1 + |x|}$$ )DOC"); } @@ -344,7 +344,7 @@ class BReluOpMaker : public framework::OpProtoAndCheckerMaker { BReluOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of BRelu operator"); - AddOutput("Y", "Output of BRelu operator"); + AddOutput("Out", "Output of BRelu operator"); AddAttr("t_min", "The min marginal value of BRelu") .SetDefault(static_cast(0)); AddAttr("t_max", "The max marginal value of BRelu") @@ -352,7 +352,7 @@ class BReluOpMaker : public framework::OpProtoAndCheckerMaker { AddComment(R"DOC( BRelu Activation Operator. -$y = \max(\min(x, t_{min}), t_{max})$ +$out = \max(\min(x, t_{min}), t_{max})$ )DOC"); } @@ -363,13 +363,13 @@ class SoftReluOpMaker : public framework::OpProtoAndCheckerMaker { SoftReluOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of SoftRelu operator"); - AddOutput("Y", "Output of SoftRelu operator"); + AddOutput("Out", "Output of SoftRelu operator"); AddAttr("threshold", "The threshold value of SoftRelu") .SetDefault(40.0f); AddComment(R"DOC( SoftRelu Activation Operator. -$y = \ln(1 + \exp(\max(\min(x, threshold), threshold))$ +$out = \ln(1 + \exp(\max(\min(x, threshold), threshold))$ )DOC"); } @@ -380,7 +380,7 @@ class ELUOpMaker : public framework::OpProtoAndCheckerMaker { ELUOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of ELU operator"); - AddOutput("Y", "Output of ELU operator"); + AddOutput("Out", "Output of ELU operator"); AddAttr("alpha", "The alpha value of ELU").SetDefault(1.0f); AddComment(R"DOC( ELU Activation Operator. @@ -388,7 +388,7 @@ ELU Activation Operator. Applies the following element-wise computation on the input according to https://arxiv.org/abs/1511.07289. -$y = \max(0, x) + \min(0, \alpha * (e^x - 1))$ +$out = \max(0, x) + \min(0, \alpha * (e^x - 1))$ )DOC"); } @@ -399,13 +399,13 @@ class Relu6OpMaker : public framework::OpProtoAndCheckerMaker { Relu6OpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of Relu6 operator"); - AddOutput("Y", "Output of Relu6 operator"); + AddOutput("Out", "Output of Relu6 operator"); AddAttr("threshold", "The threshold value of Relu6") .SetDefault(6.0f); AddComment(R"DOC( Relu6 Activation Operator. -$y = \min(\max(0, x), 6)$ +$out = \min(\max(0, x), 6)$ )DOC"); } @@ -416,12 +416,12 @@ class PowOpMaker : public framework::OpProtoAndCheckerMaker { PowOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of Pow operator"); - AddOutput("Y", "Output of Pow operator"); + AddOutput("Out", "Output of Pow operator"); AddAttr("factor", "The exponential factor of Pow").SetDefault(1.0f); AddComment(R"DOC( Pow Activation Operator. -$y = x^{factor}$ +$out = x^{factor}$ )DOC"); } @@ -432,7 +432,7 @@ class STanhOpMaker : public framework::OpProtoAndCheckerMaker { STanhOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of STanh operator"); - AddOutput("Y", "Output of STanh operator"); + AddOutput("Out", "Output of STanh operator"); AddAttr("scale_a", "The scale parameter of a for the input") .SetDefault(2.0f / 3.0f); AddAttr("scale_b", "The scale parameter of b for the input") @@ -440,7 +440,7 @@ class STanhOpMaker : public framework::OpProtoAndCheckerMaker { AddComment(R"DOC( STanh Activation Operator. -$$y = b * \frac{e^{a * x} - e^{-a * x}}{e^{a * x} + e^{-a * x}}$$ +$$out = b * \frac{e^{a * x} - e^{-a * x}}{e^{a * x} + e^{-a * x}}$$ )DOC"); } @@ -451,14 +451,14 @@ class ThresholdedReluOpMaker : public framework::OpProtoAndCheckerMaker { ThresholdedReluOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of ThresholdedRelu operator"); - AddOutput("Y", "Output of ThresholdedRelu operator"); + AddOutput("Out", "Output of ThresholdedRelu operator"); AddAttr("threshold", "The threshold location of activation") .SetDefault(1.0f); AddComment(R"DOC( ThresholdedRelu Activation Operator. $$ -y = \begin{cases} +out = \begin{cases} x, \text{if } x > threshold \\ 0, \text{otherwise} \end{cases} @@ -473,7 +473,7 @@ class HardSigmoidOpMaker : public framework::OpProtoAndCheckerMaker { HardSigmoidOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of HardSigmoid operator"); - AddOutput("Y", "Output of HardSigmoid operator"); + AddOutput("Out", "Output of HardSigmoid operator"); AddAttr("slope", "Slope for linear approximation of sigmoid") .SetDefault(0.2f); AddAttr("offset", "Offset for linear approximation of sigmoid") @@ -484,7 +484,7 @@ HardSigmoid Activation Operator. Segment-wise linear approximation of sigmoid(https://arxiv.org/abs/1603.00391), which is much faster than sigmoid. -$y = \max(0, \min(1, slope * x + shift))$ +$out = \max(0, \min(1, slope * x + shift))$ The slope should be positive. The offset can be either positive or negative. The default slope and shift are set according to the above reference. @@ -499,12 +499,12 @@ class SwishOpMaker : public framework::OpProtoAndCheckerMaker { SwishOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of Swish operator"); - AddOutput("Y", "Output of Swish operator"); + AddOutput("Out", "Output of Swish operator"); AddAttr("beta", "Constant beta of swish operator").SetDefault(1.0f); AddComment(R"DOC( Swish Activation Operator. -$$y = \frac{x}{1 + e^{- \beta x}}$$ +$$out = \frac{x}{1 + e^{- \beta x}}$$ )DOC"); } diff --git a/paddle/operators/activation_op.h b/paddle/operators/activation_op.h index 7b433df995afae7340ac97f721f962efdd2f07c0..0885f7c570b9b52dc51597347295734fd689da8d 100644 --- a/paddle/operators/activation_op.h +++ b/paddle/operators/activation_op.h @@ -27,11 +27,11 @@ class ActivationKernel void Compute(const framework::ExecutionContext& context) const override { auto* X = context.Input("X"); - auto* Y = context.Output("Y"); - Y->mutable_data(context.GetPlace()); + auto* Out = context.Output("Out"); + Out->mutable_data(context.GetPlace()); auto x = framework::EigenVector::Flatten(*X); - auto y = framework::EigenVector::Flatten(*Y); + auto out = framework::EigenVector::Flatten(*Out); auto* place = context.template device_context().eigen_device(); Functor functor; @@ -40,7 +40,7 @@ class ActivationKernel for (auto& attr : attrs) { *attr.second = context.Attr(attr.first); } - functor(*place, x, y); + functor(*place, x, out); } }; @@ -51,14 +51,15 @@ class ActivationGradKernel using T = typename Functor::ELEMENT_TYPE; void Compute(const framework::ExecutionContext& context) const override { auto* X = context.Input("X"); - auto* Y = context.Input("Y"); - auto* dY = context.Input(framework::GradVarName("Y")); + auto* Out = context.Input("Out"); + auto* dOut = + context.Input(framework::GradVarName("Out")); auto* dX = context.Output(framework::GradVarName("X")); dX->mutable_data(context.GetPlace()); - auto dy = framework::EigenVector::Flatten(*dY); + auto dout = framework::EigenVector::Flatten(*dOut); auto x = framework::EigenVector::Flatten(*X); - auto y = framework::EigenVector::Flatten(*Y); + auto out = framework::EigenVector::Flatten(*Out); auto dx = framework::EigenVector::Flatten(*dX); auto* place = context.template device_context().eigen_device(); @@ -67,7 +68,7 @@ class ActivationGradKernel for (auto& attr : attrs) { *attr.second = context.Attr(attr.first); } - functor(*place, x, y, dy, dx); + functor(*place, x, out, dout, dx); } }; @@ -83,17 +84,18 @@ struct BaseActivationFunctor { // sigmoid(x) = 1 / (1 + exp(-x)) template struct SigmoidFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Y y) const { - y.device(d) = static_cast(1) / (static_cast(1) + (-x).exp()); + template + void operator()(Device d, X x, Out out) const { + out.device(d) = static_cast(1) / (static_cast(1) + (-x).exp()); } }; template struct SigmoidGradFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Y y, dY dy, dX dx) const { - dx.device(d) = dy * y * (static_cast(1) - y); + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { + dx.device(d) = dout * out * (static_cast(1) - out); } }; @@ -101,7 +103,7 @@ struct SigmoidGradFunctor : public BaseActivationFunctor { // For numerical stability, we can use the log-sum-exp trick: // https://hips.seas.harvard.edu/blog/2013/01/09/computing-log-sum-exp/ // We can rewrite the above equation as: -// y = -log( exp(0) + exp(-x)) [since exp(0) = 1] +// out = -log( exp(0) + exp(-x)) [since exp(0) = 1] // = -log( exp(max(-x, 0) - max(-x, 0)) + exp(-x + max(-x, 0) - max(-x, 0))) // = -log( exp(max(-x, 0)) * exp(-max(-x, 0)) - exp(max(-x, 0)) * exp(-x - // max(-x, 0))) @@ -112,10 +114,10 @@ struct SigmoidGradFunctor : public BaseActivationFunctor { // + exp(-x - max(-x, 0)))) template struct LogSigmoidFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Y y) const { + template + void operator()(Device d, X x, Out out) const { auto temp = (-x).cwiseMax(static_cast(0)); // temp = max(-x, 0) - y.device(d) = -temp - (((-temp).exp() + (-x - temp).exp()).log()); + out.device(d) = -temp - (((-temp).exp() + (-x - temp).exp()).log()); } }; @@ -124,62 +126,66 @@ struct LogSigmoidFunctor : public BaseActivationFunctor { // exp(-x - max(-x, 0))) template struct LogSigmoidGradFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Y y, dY dy, dX dx) const { + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { auto temp = (-x).cwiseMax(static_cast(0)); // temp = max(-x, 0) dx.device(d) = - dy * ((-x - temp).exp() / ((-temp).exp() + (-x - temp).exp())); + dout * ((-x - temp).exp() / ((-temp).exp() + (-x - temp).exp())); } }; // exp(x) = e^x template struct ExpFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Y y) const { - y.device(d) = x.exp(); + template + void operator()(Device d, X x, Out out) const { + out.device(d) = x.exp(); } }; template struct ExpGradFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Y y, dY dy, dX dx) const { - dx.device(d) = dy * y; + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { + dx.device(d) = dout * out; } }; // relu(x) = max(x, 0) template struct ReluFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Y y) const { - y.device(d) = x.cwiseMax(static_cast(0)); + template + void operator()(Device d, X x, Out out) const { + out.device(d) = x.cwiseMax(static_cast(0)); } }; template struct ReluGradFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Y y, dY dy, dX dx) const { - dx.device(d) = dy * (x > static_cast(0)).template cast(); + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { + dx.device(d) = dout * (x > static_cast(0)).template cast(); } }; // tanh(x) = (exp(x) - exp(-x)) / (exp(x) + exp(-x)) template struct TanhFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Y y) const { - y.device(d) = x.tanh(); + template + void operator()(Device d, X x, Out out) const { + out.device(d) = x.tanh(); } }; template struct TanhGradFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Y y, dY dy, dX dx) const { - dx.device(d) = dy * (static_cast(1) - y * y); + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { + dx.device(d) = dout * (static_cast(1) - out * out); } }; @@ -187,17 +193,18 @@ struct TanhGradFunctor : public BaseActivationFunctor { // where tanh(x) = (exp(x) - exp(-x)) / (exp(x) + exp(-x)) template struct TanhShrinkFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Y y) const { - y.device(d) = x - x.tanh(); + template + void operator()(Device d, X x, Out out) const { + out.device(d) = x - x.tanh(); } }; template struct TanhShrinkGradFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Y y, dY dy, dX dx) const { - dx.device(d) = dy * (x.tanh() * x.tanh()); + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { + dx.device(d) = dout * (x.tanh() * x.tanh()); } }; @@ -210,11 +217,11 @@ struct HardShrinkFunctor : public BaseActivationFunctor { typename BaseActivationFunctor::AttrPair GetAttrs() { return {{"threshold", &threshold}}; } - template - void operator()(Device d, X x, Y y) const { + template + void operator()(Device d, X x, Out out) const { auto temp1 = (x < static_cast(threshold * -1)).template cast().eval(); auto temp2 = (x > static_cast(threshold)).template cast().eval(); - y.device(d) = x * (temp1 + temp2); + out.device(d) = x * (temp1 + temp2); } }; @@ -226,11 +233,12 @@ struct HardShrinkGradFunctor : public BaseActivationFunctor { return {{"threshold", &threshold}}; } - template - void operator()(Device d, X x, Y y, dY dy, dX dx) const { + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { auto temp1 = (x < static_cast(threshold * -1)).template cast().eval(); auto temp2 = (x > static_cast(threshold)).template cast().eval(); - dx.device(d) = dy * (temp1 + temp2).template cast(); + dx.device(d) = dout * (temp1 + temp2).template cast(); } }; @@ -243,12 +251,12 @@ struct SoftShrinkFunctor : public BaseActivationFunctor { return {{"lambda", &lambda}}; } - template - void operator()(Device d, X x, Y y) const { + template + void operator()(Device d, X x, Out out) const { auto lambdaT = static_cast(lambda); auto temp1 = (x > lambdaT).template cast().eval(); auto temp2 = (x < -lambdaT).template cast().eval(); - y.device(d) = temp1 * (x - lambdaT) + temp2 * (x + lambdaT); + out.device(d) = temp1 * (x - lambdaT) + temp2 * (x + lambdaT); } }; @@ -258,46 +266,49 @@ struct SoftShrinkGradFunctor : public BaseActivationFunctor { typename BaseActivationFunctor::AttrPair GetAttrs() { return {{"lambda", &lambda}}; } - template - void operator()(Device d, X x, Y y, dY dy, dX dx) const { + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { auto lambdaT = static_cast(lambda); auto temp1 = (x > lambdaT).template cast().eval(); auto temp2 = (x < -lambdaT).template cast().eval(); - dx.device(d) = dy * (temp1 + temp2).template cast(); + dx.device(d) = dout * (temp1 + temp2).template cast(); } }; // sqrt(x) = x^(1/2) template struct SqrtFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Y y) const { - y.device(d) = x.sqrt(); + template + void operator()(Device d, X x, Out out) const { + out.device(d) = x.sqrt(); } }; template struct SqrtGradFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Y y, dY dy, dX dx) const { - const Y y_conj = Eigen::numext::conj(y); - dx.device(d) = static_cast(0.5) * dy / y_conj; + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { + const Out out_conj = Eigen::numext::conj(out); + dx.device(d) = static_cast(0.5) * dout / out_conj; } }; // ceil(x) = ceiling(x) template struct CeilFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Y y) const { - y.device(d) = x.ceil(); + template + void operator()(Device d, X x, Out out) const { + out.device(d) = x.ceil(); } }; template struct ZeroGradFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Y y, dY dy, dX dx) const { + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { dx.device(d) = static_cast(0) / x; } }; @@ -305,86 +316,90 @@ struct ZeroGradFunctor : public BaseActivationFunctor { // floor(x) = flooring(x) template struct FloorFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Y y) const { - y.device(d) = x.ceil(); + template + void operator()(Device d, X x, Out out) const { + out.device(d) = x.ceil(); } }; // round(x) = [x] template struct RoundFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Y y) const { - y.device(d) = x.round(); + template + void operator()(Device d, X x, Out out) const { + out.device(d) = x.round(); } }; // abs(x) = |x| template struct AbsFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Y y) const { - y.device(d) = x.abs(); + template + void operator()(Device d, X x, Out out) const { + out.device(d) = x.abs(); } }; template struct AbsGradFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Y y, dY dy, dX dx) const { - dx.device(d) = dy * x.sign(); + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { + dx.device(d) = dout * x.sign(); } }; // reciprocal(x) = 1 / x template struct ReciprocalFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Y y) const { - y.device(d) = static_cast(1) / x; + template + void operator()(Device d, X x, Out out) const { + out.device(d) = static_cast(1) / x; } }; template struct ReciprocalGradFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Y y, dY dy, dX dx) const { - dx.device(d) = dy * static_cast(-1) * y * y; + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { + dx.device(d) = dout * static_cast(-1) * out * out; } }; // log(x) = natural logarithm of x template struct LogFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Y y) const { - y.device(d) = x.log(); + template + void operator()(Device d, X x, Out out) const { + out.device(d) = x.log(); } }; template struct LogGradFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Y y, dY dy, dX dx) const { - dx.device(d) = dy * (static_cast(1) / x); + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { + dx.device(d) = dout * (static_cast(1) / x); } }; // square(x) = x^2 template struct SquareFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Y y) const { - y.device(d) = x.square(); + template + void operator()(Device d, X x, Out out) const { + out.device(d) = x.square(); } }; template struct SquareGradFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Y y, dY dy, dX dx) const { - dx.device(d) = dy * static_cast(2) * x; + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { + dx.device(d) = dout * static_cast(2) * x; } }; @@ -399,9 +414,9 @@ struct BReluFunctor : public BaseActivationFunctor { return {{"t_min", &t_min}, {"t_max", &t_max}}; } - template - void operator()(Device d, X x, Y y) const { - y.device(d) = + template + void operator()(Device d, X x, Out out) const { + out.device(d) = x.cwiseMax(static_cast(t_min)).cwiseMin(static_cast(t_max)); } }; @@ -413,9 +428,10 @@ struct BReluGradFunctor : public BaseActivationFunctor { typename BaseActivationFunctor::AttrPair GetAttrs() { return {{"t_min", &t_min}, {"t_max", &t_max}}; } - template - void operator()(Device d, X x, Y y, dY dy, dX dx) const { - dx.device(d) = dy * + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { + dx.device(d) = dout * ((x > static_cast(t_min)) * (x < static_cast(t_max))) .template cast(); } @@ -430,9 +446,9 @@ struct Relu6Functor : public BaseActivationFunctor { return {{"threshold", &threshold}}; } - template - void operator()(Device d, X x, Y y) const { - y.device(d) = + template + void operator()(Device d, X x, Out out) const { + out.device(d) = x.cwiseMax(static_cast(0)).cwiseMin(static_cast(threshold)); } }; @@ -443,9 +459,10 @@ struct Relu6GradFunctor : public BaseActivationFunctor { typename BaseActivationFunctor::AttrPair GetAttrs() { return {{"threshold", &threshold}}; } - template - void operator()(Device d, X x, Y y, dY dy, dX dx) const { - dx.device(d) = dy * + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { + dx.device(d) = dout * ((x > static_cast(0)) * (x < static_cast(threshold))) .template cast(); } @@ -458,10 +475,10 @@ struct Relu6GradFunctor : public BaseActivationFunctor { // Then: softplus(x) = max(x, 0) + log(exp(-max(x, 0)) + exp(x - max(x, 0))) template struct SoftplusFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Y y) { + template + void operator()(Device d, X x, Out out) { auto temp = x.cwiseMax(static_cast(0)); // temp = max(x, 0) - y.device(d) = temp + (((-temp).exp() + (x - temp).exp()).log()); + out.device(d) = temp + (((-temp).exp() + (x - temp).exp()).log()); } }; @@ -471,19 +488,21 @@ struct SoftplusFunctor : public BaseActivationFunctor { // exp(x - max(x, 0))) template struct SoftplusGradFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Y y, dY dy, dX dx) { + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) { auto temp = x.cwiseMax(static_cast(0)); // temp = max(x, 0) - dx.device(d) = dy * ((x - temp).exp() / ((-temp).exp() + (x - temp).exp())); + dx.device(d) = + dout * ((x - temp).exp() / ((-temp).exp() + (x - temp).exp())); } }; // softsign(x) = x / (1 + |x|) template struct SoftsignFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Y y) { - y.device(d) = x / (static_cast(1) + x.abs()); + template + void operator()(Device d, X x, Out out) { + out.device(d) = x / (static_cast(1) + x.abs()); } }; @@ -491,10 +510,11 @@ struct SoftsignFunctor : public BaseActivationFunctor { // Taken from https://en.wikipedia.org/wiki/Activation_function template struct SoftsignGradFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Y y, dY dy, dX dx) { + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) { dx.device(d) = - dy * (static_cast(1) / (static_cast(1) + x.abs()).square()); + dout * (static_cast(1) / (static_cast(1) + x.abs()).square()); } }; @@ -505,11 +525,11 @@ struct SoftReluFunctor : public BaseActivationFunctor { return {{"threshold", &threshold}}; } - template - void operator()(Device d, X x, Y y) const { + template + void operator()(Device d, X x, Out out) const { auto tmp = static_cast(threshold); auto temp = x.cwiseMax(-tmp).cwiseMin(tmp); - y.device(d) = (static_cast(1) + temp.exp()).log(); + out.device(d) = (static_cast(1) + temp.exp()).log(); } }; @@ -519,11 +539,12 @@ struct SoftReluGradFunctor : public BaseActivationFunctor { typename BaseActivationFunctor::AttrPair GetAttrs() { return {{"threshold", &threshold}}; } - template - void operator()(Device d, X x, Y y, dY dy, dX dx) const { + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { auto tmp = static_cast(threshold); auto temp = ((x > -tmp) * (x < tmp)).template cast().eval(); - dx.device(d) = dy * (static_cast(1) - (-y).exp()) * temp; + dx.device(d) = dout * (static_cast(1) - (-out).exp()) * temp; } }; @@ -534,9 +555,9 @@ struct LeakyReluFunctor : public BaseActivationFunctor { return {{"alpha", &alpha}}; } - template - void operator()(Device d, X x, Y y) const { - y.device(d) = x.cwiseMax(static_cast(alpha) * x); + template + void operator()(Device d, X x, Out out) const { + out.device(d) = x.cwiseMax(static_cast(alpha) * x); } }; @@ -546,12 +567,13 @@ struct LeakyReluGradFunctor : public BaseActivationFunctor { typename BaseActivationFunctor::AttrPair GetAttrs() { return {{"alpha", &alpha}}; } - template - void operator()(Device d, X x, Y y, dY dy, dX dx) const { + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { auto temp1 = static_cast(alpha) * (x < static_cast(0)).template cast().eval(); auto temp2 = (x >= static_cast(0)).template cast().eval(); - dx.device(d) = dy * (temp1 + temp2).template cast(); + dx.device(d) = dout * (temp1 + temp2).template cast(); } }; @@ -562,11 +584,11 @@ struct ELUFunctor : public BaseActivationFunctor { return {{"alpha", &alpha}}; } - template - void operator()(Device d, X x, Y y) const { - y.device(d) = x.cwiseMax(static_cast(0)) + - (static_cast(alpha) * (x.exp() - static_cast(1))) - .cwiseMin(static_cast(0)); + template + void operator()(Device d, X x, Out out) const { + out.device(d) = x.cwiseMax(static_cast(0)) + + (static_cast(alpha) * (x.exp() - static_cast(1))) + .cwiseMin(static_cast(0)); } }; @@ -576,10 +598,11 @@ struct ELUGradFunctor : public BaseActivationFunctor { typename BaseActivationFunctor::AttrPair GetAttrs() { return {{"alpha", &alpha}}; } - template - void operator()(Device d, X x, Y y, dY dy, dX dx) const { - dx.device(d) = dy * (x > static_cast(0)).template cast() + - dy * (y + static_cast(alpha)) * + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { + dx.device(d) = dout * (x > static_cast(0)).template cast() + + dout * (out + static_cast(alpha)) * (x < static_cast(0)).template cast(); } }; @@ -591,9 +614,9 @@ struct PowFunctor : public BaseActivationFunctor { typename BaseActivationFunctor::AttrPair GetAttrs() { return {{"factor", &factor}}; } - template - void operator()(Device d, X x, Y y) const { - y.device(d) = x.pow(static_cast(factor)); + template + void operator()(Device d, X x, Out out) const { + out.device(d) = x.pow(static_cast(factor)); } }; @@ -603,9 +626,10 @@ struct PowGradFunctor : public BaseActivationFunctor { typename BaseActivationFunctor::AttrPair GetAttrs() { return {{"factor", &factor}}; } - template - void operator()(Device d, X x, Y y, dY dy, dX dx) const { - dx.device(d) = dy * static_cast(factor) * + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { + dx.device(d) = dout * static_cast(factor) * x.pow(static_cast(factor - static_cast(1))); } }; @@ -618,9 +642,9 @@ struct STanhFunctor : public BaseActivationFunctor { return {{"scale_a", &scale_a}, {"scale_b", &scale_b}}; } - template - void operator()(Device d, X x, Y y) const { - y.device(d) = + template + void operator()(Device d, X x, Out out) const { + out.device(d) = static_cast(scale_b) * (static_cast(scale_a) * x).tanh(); } }; @@ -633,12 +657,13 @@ struct STanhGradFunctor : public BaseActivationFunctor { return {{"scale_a", &scale_a}, {"scale_b", &scale_b}}; } - template - void operator()(Device d, X x, Y y, dY dy, dX dx) const { + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { auto a = static_cast(scale_a); auto b = static_cast(scale_b); auto temp = (a * x).tanh() * (a * x).tanh(); - dx.device(d) = dy * a * b * (static_cast(1) - temp); + dx.device(d) = dout * a * b * (static_cast(1) - temp); } }; @@ -649,10 +674,10 @@ struct ThresholdedReluFunctor : public BaseActivationFunctor { return {{"threshold", &threshold}}; } - template - void operator()(Device d, X x, Y y) const { + template + void operator()(Device d, X x, Out out) const { auto th = static_cast(threshold); - y.device(d) = (x > th).template cast() * x; + out.device(d) = (x > th).template cast() * x; } }; @@ -663,10 +688,11 @@ struct ThresholdedReluGradFunctor : public BaseActivationFunctor { return {{"threshold", &threshold}}; } - template - void operator()(Device d, X x, Y y, dY dy, dX dx) const { + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { auto th = static_cast(threshold); - dx.device(d) = dy * (x > th).template cast(); + dx.device(d) = dout * (x > th).template cast(); } }; @@ -678,10 +704,11 @@ struct HardSigmoidFunctor : public BaseActivationFunctor { return {{"slope", &slope}, {"offset", &offset}}; } - template - void operator()(Device d, X x, Y y) const { + template + void operator()(Device d, X x, Out out) const { auto temp = x * static_cast(slope) + static_cast(offset); - y.device(d) = temp.cwiseMax(static_cast(0)).cwiseMin(static_cast(1)); + out.device(d) = + temp.cwiseMax(static_cast(0)).cwiseMin(static_cast(1)); } }; @@ -693,12 +720,13 @@ struct HardSigmoidGradFunctor : public BaseActivationFunctor { return {{"slope", &slope}, {"offset", &offset}}; } - template - void operator()(Device d, X x, Y y, dY dy, dX dx) const { - dx.device(d) = - dy * - ((y > static_cast(0)) * (y < static_cast(1))).template cast() * - static_cast(slope); + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { + dx.device(d) = dout * + ((out > static_cast(0)) * (out < static_cast(1))) + .template cast() * + static_cast(slope); } }; @@ -709,9 +737,9 @@ struct SwishFunctor : public BaseActivationFunctor { return {{"beta", &beta}}; } - template - void operator()(Device d, X x, Y y) const { - y.device(d) = x / (static_cast(1) + (static_cast(-beta) * x).exp()); + template + void operator()(Device d, X x, Out out) const { + out.device(d) = x / (static_cast(1) + (static_cast(-beta) * x).exp()); } }; @@ -722,12 +750,13 @@ struct SwishGradFunctor : public BaseActivationFunctor { return {{"beta", &beta}}; } - template - void operator()(Device d, X x, Y y, dY dy, dX dx) const { + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { auto temp1 = static_cast(1) / (static_cast(1) + (static_cast(-beta) * x).exp()); - auto temp2 = temp1 * (static_cast(1) - (beta * y)); - dx.device(d) = dy * ((beta * y) + temp2); + auto temp2 = temp1 * (static_cast(1) - (beta * out)); + dx.device(d) = dout * ((beta * out) + temp2); } }; diff --git a/paddle/operators/array_operator.h b/paddle/operators/array_operator.h index 060ffac8273724fad48753d9a7e2b0affbb6f25e..e0eef5d9f93d70930ee82d663de9610cc0176e33 100644 --- a/paddle/operators/array_operator.h +++ b/paddle/operators/array_operator.h @@ -35,8 +35,8 @@ class ArrayOp : public framework::OperatorBase { PADDLE_ENFORCE_EQ(i_tensor.numel(), 1); // get device context from pool - platform::DeviceContextPool &pool = platform::DeviceContextPool::Get(); - auto &dev_ctx = *pool.Borrow(place); + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(place); size_t offset; if (platform::is_gpu_place(i_tensor.place())) { diff --git a/paddle/operators/array_to_lod_tensor_op.cc b/paddle/operators/array_to_lod_tensor_op.cc index 0aa04c268bd65dc246341a2f335c362496050bdb..49366fee8df5a44a97b7b4e87cbf0b7c813a414a 100644 --- a/paddle/operators/array_to_lod_tensor_op.cc +++ b/paddle/operators/array_to_lod_tensor_op.cc @@ -106,8 +106,9 @@ class ArrayToLoDTensorOp : public framework::OperatorBase { } auto slice = out->Slice(out_offset, out_offset + len); - platform::DeviceContextPool &pool = platform::DeviceContextPool::Get(); - auto &dev_ctx = *pool.Borrow(place); + platform::DeviceContextPool &pool = + platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(place); framework::CopyFrom(x[x_idx].Slice(start_offset, end_offset), place, dev_ctx, &slice); diff --git a/paddle/operators/assign_op.cc b/paddle/operators/assign_op.cc index 0560040509026e84eb543331996a6846751b8506..7d77be3be1034bb38f6c92c181aa525214073eec 100644 --- a/paddle/operators/assign_op.cc +++ b/paddle/operators/assign_op.cc @@ -82,8 +82,8 @@ class AssignOp : public framework::OperatorBase { out != nullptr, "The Output(Out) should not be null if the Input(X) is set."); - platform::DeviceContextPool &pool = platform::DeviceContextPool::Get(); - auto &dev_ctx = *pool.Borrow(place); + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(place); framework::VisitVarType(*x, AssignFunctor(out, dev_ctx)); } diff --git a/paddle/operators/beam_search_decode_op.cc b/paddle/operators/beam_search_decode_op.cc index 52c28e7f532f9751589176c8d37362620167cf63..72e05607b0b612807d552b4c45b58f9d9ce9c2af 100644 --- a/paddle/operators/beam_search_decode_op.cc +++ b/paddle/operators/beam_search_decode_op.cc @@ -57,8 +57,8 @@ class BeamSearchDecodeOp : public framework::OperatorBase { : OperatorBase(type, inputs, outputs, attrs) {} void Run(const framework::Scope& scope, const platform::Place& dev_place) const override { - platform::DeviceContextPool& pool = platform::DeviceContextPool::Get(); - auto& dev_ctx = *pool.Borrow(dev_place); + platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); + auto& dev_ctx = *pool.Get(dev_place); framework::ExecutionContext ctx(*this, scope, dev_ctx); diff --git a/paddle/operators/cond_op.cc b/paddle/operators/cond_op.cc index 455fbd8ca3f5083fac51776524daca6f6a029667..e333002bfd1ab40c62882f09cd207a12a0939648 100644 --- a/paddle/operators/cond_op.cc +++ b/paddle/operators/cond_op.cc @@ -195,8 +195,8 @@ void CondOp::MergeDataFromSubnet(const framework::Scope& scope, void CondOp::Run(const Scope& scope, const platform::Place& place) const { // get device context from pool - platform::DeviceContextPool& pool = platform::DeviceContextPool::Get(); - auto& dev_ctx = *pool.Borrow(place); + platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); + auto& dev_ctx = *pool.Get(place); PrepareDataForSubnet(scope, dev_ctx); std::vector& sub_scopes = GetSubScopes(scope); diff --git a/paddle/operators/feed_op.cc b/paddle/operators/feed_op.cc index cecbb7226aedabc3bee63c518d0ab9da496fca11..48da52c3b68879a1da8550a5448090f9f1e715d3 100644 --- a/paddle/operators/feed_op.cc +++ b/paddle/operators/feed_op.cc @@ -49,8 +49,8 @@ class FeedOp : public framework::OperatorBase { auto *out_item = out_var->GetMutable(); // get device context from pool - platform::DeviceContextPool &pool = platform::DeviceContextPool::Get(); - auto &dev_ctx = *pool.Borrow(place); + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(place); framework::CopyFrom(feed_item, place, dev_ctx, out_item); out_item->set_lod(feed_item.lod()); diff --git a/paddle/operators/fetch_op.cc b/paddle/operators/fetch_op.cc index fa20a06540efef05d6a15669b1f8dfb8bd5927bc..387d1e0a747f71d85826b52d140c2838112227f6 100644 --- a/paddle/operators/fetch_op.cc +++ b/paddle/operators/fetch_op.cc @@ -52,8 +52,8 @@ class FetchOp : public framework::OperatorBase { // FIXME(yuyang18): Should we assume the fetch operator always generate // CPU outputs? - platform::DeviceContextPool &pool = platform::DeviceContextPool::Get(); - auto &dev_ctx = *pool.Borrow(place); + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(place); CopyFrom(src_item, platform::CPUPlace(), dev_ctx, &dst_item); dev_ctx.Wait(); diff --git a/paddle/operators/fill_constant_op.cc b/paddle/operators/fill_constant_op.cc index fe0706c4a9da864025737584b72c02cca83c956b..dcd43a30c86b62d79f52ac640f14b295a062146c 100644 --- a/paddle/operators/fill_constant_op.cc +++ b/paddle/operators/fill_constant_op.cc @@ -49,8 +49,8 @@ class FillConstantOp : public framework::OperatorBase { out.mutable_data(dev_place, framework::ToTypeIndex(data_type)); } - platform::DeviceContextPool &pool = platform::DeviceContextPool::Get(); - auto &dev_ctx = *pool.Borrow(dev_place); + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(dev_place); math::set_constant(dev_ctx, &out, value); } }; diff --git a/paddle/operators/fill_op.cc b/paddle/operators/fill_op.cc index 57b4ec69384a55cef6d6d8d0c0145caae837b3f6..084ba1db62de0a6bf6829f8e9f4c274fb777e879 100644 --- a/paddle/operators/fill_op.cc +++ b/paddle/operators/fill_op.cc @@ -69,8 +69,9 @@ class FillOp : public framework::OperatorBase { if (!force_cpu && platform::is_gpu_place(place)) { // Copy tensor to out - platform::DeviceContextPool &pool = platform::DeviceContextPool::Get(); - auto &dev_ctx = *pool.Borrow(place); + platform::DeviceContextPool &pool = + platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(place); framework::CopyFrom(tensor, place, dev_ctx, &out); } } diff --git a/paddle/operators/load_op.cc b/paddle/operators/load_op.cc index 5425375c1ffa16c223ec1b05506ffb4b4394d6e1..65f021d91931541b712bd46aebc06e68144b2af0 100644 --- a/paddle/operators/load_op.cc +++ b/paddle/operators/load_op.cc @@ -40,8 +40,8 @@ class LoadOp : public framework::OperatorBase { auto *tensor = out_var->GetMutable(); framework::DeserializeFromStream(fin, tensor); - platform::DeviceContextPool &pool = platform::DeviceContextPool::Get(); - auto &dev_ctx = *pool.Borrow(place); + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(place); if (platform::is_gpu_place(place)) { // copy CPU to GPU diff --git a/paddle/operators/lod_tensor_to_array_op.cc b/paddle/operators/lod_tensor_to_array_op.cc index ed99915bb7fc312cac955b50ffbc3237aee503b0..8d164b4abc54722a95a176dfe8ed341f8c5125d1 100644 --- a/paddle/operators/lod_tensor_to_array_op.cc +++ b/paddle/operators/lod_tensor_to_array_op.cc @@ -88,8 +88,9 @@ class LoDTensorToArrayOp : public framework::OperatorBase { auto slice = out[i].Slice(static_cast(offset), static_cast(offset + len)); - platform::DeviceContextPool &pool = platform::DeviceContextPool::Get(); - auto &dev_ctx = *pool.Borrow(place); + platform::DeviceContextPool &pool = + platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(place); framework::CopyFrom(x.Slice(static_cast(each_range.begin), static_cast(each_range.end)), diff --git a/paddle/operators/merge_lod_tensor_op.cc b/paddle/operators/merge_lod_tensor_op.cc index 2287f347910e83c25d2155b80670f9d991c1e5b2..3f999e404f8afe6bded09c820509fa0f36d30bf6 100644 --- a/paddle/operators/merge_lod_tensor_op.cc +++ b/paddle/operators/merge_lod_tensor_op.cc @@ -30,8 +30,8 @@ class MergeLoDTensorOp : public framework::OperatorBase { void Run(const framework::Scope &scope, const platform::Place &dev_place) const override { // get device context from pool - platform::DeviceContextPool &pool = platform::DeviceContextPool::Get(); - auto &dev_ctx = *pool.Borrow(dev_place); + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(dev_place); auto &x = scope.FindVar(Input("X"))->Get(); auto &mask = scope.FindVar(Input("Mask"))->Get(); diff --git a/paddle/operators/nccl_op_test.cu.cc b/paddle/operators/nccl_op_test.cu.cc index 34a6e1a58d4bb8b267f3b6b3a0ac689c17036c37..6546096069d4c3fbc4908a16c2dba2ac6d7e6421 100644 --- a/paddle/operators/nccl_op_test.cu.cc +++ b/paddle/operators/nccl_op_test.cu.cc @@ -305,7 +305,7 @@ int main(int argc, char **argv) { } VLOG(0) << " DeviceCount " << count; - paddle::platform::DeviceContextPool::Create(places); + paddle::platform::DeviceContextPool::Init(places); testing::InitGoogleTest(&argc, argv); diff --git a/paddle/operators/recurrent_op.cc b/paddle/operators/recurrent_op.cc index 71769e67c7032f2d808d1394883cbe93f826b2f6..056fa46949cd623845956521b068109085a8795e 100644 --- a/paddle/operators/recurrent_op.cc +++ b/paddle/operators/recurrent_op.cc @@ -272,8 +272,9 @@ class RecurrentOp : public RecurrentBase { false /*create_local_scope*/); // get device context from pool - platform::DeviceContextPool &pool = platform::DeviceContextPool::Get(); - auto &dev_ctx = *pool.Borrow(place); + platform::DeviceContextPool &pool = + platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(place); // Copy inside::output -> outside::output // outside::output[seq_offset: seq_offset + 1] = inside::output @@ -326,8 +327,8 @@ class RecurrentGradOp : public RecurrentBase { auto *program = block->Program(); // get device context from pool - platform::DeviceContextPool &pool = platform::DeviceContextPool::Get(); - auto &dev_ctx = *pool.Borrow(place); + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(place); for (size_t step_id = 0; step_id < seq_len; ++step_id) { size_t seq_offset = reverse ? step_id : seq_len - step_id - 1; diff --git a/paddle/operators/reorder_lod_tensor_by_rank_op.cc b/paddle/operators/reorder_lod_tensor_by_rank_op.cc index 1063388e2539d47ca6ab56cd5fb0b946b6fb9147..8d652ff806461cea3d0e8d3bd70704b4b6bc2173 100644 --- a/paddle/operators/reorder_lod_tensor_by_rank_op.cc +++ b/paddle/operators/reorder_lod_tensor_by_rank_op.cc @@ -131,8 +131,8 @@ class ReorderLoDTensorByRankTableBase : public framework::OperatorBase { auto x_sliced = x.Slice(x_offset, x_offset + len); auto out_sliced = out->Slice(out_offset, out_offset + len); - platform::DeviceContextPool &pool = platform::DeviceContextPool::Get(); - auto &dev_ctx = *pool.Borrow(place); + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(place); framework::CopyFrom(x_sliced, out_sliced.place(), dev_ctx, &out_sliced); out_offset += len; return out_offset; diff --git a/paddle/operators/save_op.cc b/paddle/operators/save_op.cc index d045a8b5b8d852278fd5140b9cf8707462b93c93..4b1cbe88836e340c94f797806243a6768410ed3d 100644 --- a/paddle/operators/save_op.cc +++ b/paddle/operators/save_op.cc @@ -91,8 +91,8 @@ class SaveOp : public framework::OperatorBase { auto &tensor = var->Get(); // get device context from pool - platform::DeviceContextPool &pool = platform::DeviceContextPool::Get(); - auto &dev_ctx = *pool.Borrow(place); + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(place); framework::SerializeToStream(fout, tensor, dev_ctx); } diff --git a/paddle/operators/shrink_rnn_memory_op.cc b/paddle/operators/shrink_rnn_memory_op.cc index e8a4773547861478b1771562ecd9e8b0bb3c3739..e5ef0740b6f385de7f17a3a419000cb8c897d986 100644 --- a/paddle/operators/shrink_rnn_memory_op.cc +++ b/paddle/operators/shrink_rnn_memory_op.cc @@ -106,8 +106,8 @@ class ShrinkRNNMemoryGradOp : public ArrayOp { dx_tensor.mutable_data(x_tensor.place(), x_tensor.type()); // get device context from pool - platform::DeviceContextPool &pool = platform::DeviceContextPool::Get(); - auto &dev_ctx = *pool.Borrow(place); + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(place); if (dout_var == nullptr) { // dx_tensor fill zero math::set_constant(dev_ctx, &dx_tensor, 0.0f); diff --git a/paddle/operators/softmax_op.cc b/paddle/operators/softmax_op.cc index 6b3f19bb46c45b7dd8ec6c23ee449521b36d759c..e7306bc5f13377813e0bd49846bc834d501602eb 100644 --- a/paddle/operators/softmax_op.cc +++ b/paddle/operators/softmax_op.cc @@ -24,13 +24,13 @@ class SoftmaxOp : public framework::OperatorWithKernel { void InferShape(framework::InferShapeContext* ctx) const override { PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) of SoftmaxOp should not be null."); - PADDLE_ENFORCE(ctx->HasOutput("Y"), - "Output(Y) of SoftmaxOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Out"), + "Output(Out) of SoftmaxOp should not be null."); auto x_dims = ctx->GetInputDim("X"); PADDLE_ENFORCE(x_dims.size() == 2UL, "The input of softmax op must be a matrix."); - ctx->SetOutputDim("Y", x_dims); + ctx->SetOutputDim("Out", x_dims); } }; @@ -41,7 +41,7 @@ class SoftmaxOpMaker : public framework::OpProtoAndCheckerMaker { AddInput("X", "The input tensor of softmax. " "2-D with shape [batch_size, input_feature_dimensions]."); - AddOutput("Y", "The normalized values with the same shape as X."); + AddOutput("Out", "The normalized values with the same shape as X."); AddComment(R"DOC( Softmax Operator. @@ -59,7 +59,7 @@ exponential values of all the other dimensions is the output of the softmax operator. For each row $i$ and each column $j$ in Input(X), we have: - $$Y[i, j] = \frac{\exp(X[i, j])}{\sum_j(exp(X[i, j])}$$ + $$Out[i, j] = \frac{\exp(X[i, j])}{\sum_j(exp(X[i, j])}$$ )DOC"); } @@ -70,12 +70,12 @@ class SoftmaxOpGrad : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE(ctx->HasInput("Y"), "Input(Y) should be not null."); - PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Y")), - "Input(Y@GRAD) should be not null."); - PADDLE_ENFORCE_EQ(ctx->GetInputDim("Y"), - ctx->GetInputDim(framework::GradVarName("Y")), - "Input(Y) and its gradients should have a same shape."); + PADDLE_ENFORCE(ctx->HasInput("Out"), "Input(Out) should be not null."); + PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")), + "Input(Out@GRAD) should be not null."); + PADDLE_ENFORCE_EQ(ctx->GetInputDim("Out"), + ctx->GetInputDim(framework::GradVarName("Out")), + "Input(Out) and its gradients should have a same shape."); ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); } diff --git a/paddle/operators/softmax_op.h b/paddle/operators/softmax_op.h index 0f8998b99e93b5ed6c9b43ad7adabc2d515c1ff1..63e379a3b31a6c75aab0c56b4ce1b988fa7f0318 100644 --- a/paddle/operators/softmax_op.h +++ b/paddle/operators/softmax_op.h @@ -26,13 +26,13 @@ class SoftmaxKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto* X = context.Input("X"); - auto* Y = context.Output("Y"); + auto* Out = context.Output("Out"); // allocate memory on device. - Y->mutable_data(context.GetPlace()); + Out->mutable_data(context.GetPlace()); math::SoftmaxFunctor()( - context.template device_context(), X, Y); + context.template device_context(), X, Out); } }; @@ -40,15 +40,15 @@ template class SoftmaxGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* Y = context.Input("Y"); - auto* dY = context.Input(framework::GradVarName("Y")); + auto* Out = context.Input("Out"); + auto* dOut = context.Input(framework::GradVarName("Out")); auto* dX = context.Output(framework::GradVarName("X")); // allocate memory on device. dX->mutable_data(context.GetPlace()); math::SoftmaxGradFunctor()( - context.template device_context(), Y, dY, dX); + context.template device_context(), Out, dOut, dX); } }; diff --git a/paddle/operators/split_lod_tensor_op.cc b/paddle/operators/split_lod_tensor_op.cc index 89826ca6ee98d579f8b7c8795b6dc33cfa158ee1..2d8787d740c70f1d4696fdec381b572ecf031f57 100644 --- a/paddle/operators/split_lod_tensor_op.cc +++ b/paddle/operators/split_lod_tensor_op.cc @@ -45,8 +45,8 @@ class SplitLoDTensorOp : public framework::OperatorBase { auto &x_lod = x.lod(); auto &mask_dim = mask.dims(); - platform::DeviceContextPool &pool = platform::DeviceContextPool::Get(); - auto &dev_ctx = *pool.Borrow(dev_place); + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(dev_place); std::unique_ptr cpu_mask{new framework::LoDTensor()}; if (platform::is_cpu_place(mask.place())) { diff --git a/paddle/operators/tensor_array_read_write_op.cc b/paddle/operators/tensor_array_read_write_op.cc index 9529aab573a73f55f7a67d39cde99aca023f473e..53e38ec70336ca7f2d7c142e5fb1bbe427ab2957 100644 --- a/paddle/operators/tensor_array_read_write_op.cc +++ b/paddle/operators/tensor_array_read_write_op.cc @@ -40,8 +40,9 @@ class WriteToArrayOp : public ArrayOp { if (x_tensor.memory_size() > 0) { auto *out_tensor = &out->at(offset); - platform::DeviceContextPool &pool = platform::DeviceContextPool::Get(); - auto &dev_ctx = *pool.Borrow(place); + platform::DeviceContextPool &pool = + platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(place); CopyFrom(x_tensor, place, dev_ctx, out_tensor); out_tensor->set_lod(x_tensor.lod()); @@ -132,8 +133,9 @@ class ReadFromArrayOp : public ArrayOp { auto *out_tensor = out->GetMutable(); size_t offset = GetOffset(scope, place); if (offset < x_array.size()) { - platform::DeviceContextPool &pool = platform::DeviceContextPool::Get(); - auto &dev_ctx = *pool.Borrow(place); + platform::DeviceContextPool &pool = + platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(place); framework::CopyFrom(x_array[offset], place, dev_ctx, out_tensor); out_tensor->set_lod(x_array[offset].lod()); } else { diff --git a/paddle/platform/device_context.cc b/paddle/platform/device_context.cc index e450ef32a432a9dede05835341a166454a551889..ea07f2e002cb76d09a11f7a5305c2d45b780e7bd 100644 --- a/paddle/platform/device_context.cc +++ b/paddle/platform/device_context.cc @@ -17,7 +17,7 @@ namespace platform { DeviceContextPool* DeviceContextPool::pool = nullptr; -const platform::DeviceContext* DeviceContextPool::Borrow( +const platform::DeviceContext* DeviceContextPool::Get( const platform::Place& place) { auto it = device_contexts_.find(place); if (it == device_contexts_.end()) { @@ -28,24 +28,6 @@ const platform::DeviceContext* DeviceContextPool::Borrow( return it->second; } -std::vector DeviceContextPool::Borrow( - const std::vector& places) { - PADDLE_ENFORCE_GT(places.size(), 0); - PADDLE_ENFORCE_LE(places.size(), device_contexts_.size()); - std::vector borrowed_contexts; - for (auto& place : places) { - auto it = device_contexts_.find(place); - if (it != device_contexts_.end()) { - borrowed_contexts.emplace_back(it->second); - } else { - PADDLE_THROW( - "'Place' is not supported, Please re-compile with WITH_GPU " - "option"); - } - } - return borrowed_contexts; -} - DeviceContextPool::DeviceContextPool( const std::vector& places) { PADDLE_ENFORCE_GT(places.size(), 0); diff --git a/paddle/platform/device_context.h b/paddle/platform/device_context.h index 8ba12e1657b6671b36fa9ad3f498820d61af585e..fd441d27f97e463f59177e9aab2355766f04db73 100644 --- a/paddle/platform/device_context.h +++ b/paddle/platform/device_context.h @@ -52,6 +52,14 @@ class CPUDeviceContext : public DeviceContext { std::unique_ptr eigen_device_; }; +template +struct DefaultDeviceContextType; + +template <> +struct DefaultDeviceContextType { + using TYPE = CPUDeviceContext; +}; + #ifdef PADDLE_WITH_CUDA class EigenCudaStreamDevice; @@ -90,6 +98,11 @@ class CUDADeviceContext : public DeviceContext { cublasHandle_t cublas_handle_; }; +template <> +struct DefaultDeviceContextType { + using T = CUDADeviceContext; +}; + class CUDNNDeviceContext : public CUDADeviceContext { public: explicit CUDNNDeviceContext(CUDAPlace place); @@ -109,13 +122,13 @@ class DeviceContextPool { public: explicit DeviceContextPool(const std::vector& places); - static DeviceContextPool& Get() { + static DeviceContextPool& Instance() { PADDLE_ENFORCE_NOT_NULL(pool, "Need to Create DeviceContextPool first!"); return *pool; } /*! \brief Create should only called by Init function */ - static DeviceContextPool& Create(const std::vector& places) { + static DeviceContextPool& Init(const std::vector& places) { if (pool == nullptr) { pool = new DeviceContextPool(places); } @@ -123,13 +136,14 @@ class DeviceContextPool { } /*! \brief Return handle of single device context. */ - const platform::DeviceContext* Borrow(const platform::Place& place); - - /*! \brief Return handle of multi-device context. */ - std::vector Borrow( - const std::vector& places); + const platform::DeviceContext* Get(const platform::Place& place); - ~DeviceContextPool() {} + template + const typename DefaultDeviceContextType::TYPE* GetByPlace( + const Place& place) { + return reinterpret_cast< + const typename DefaultDeviceContextType::TYPE*>(Get(place)); + } private: static DeviceContextPool* pool; diff --git a/paddle/platform/device_context_test.cu b/paddle/platform/device_context_test.cu index 91011bf71ccf4295075aa2ed73dffa92cb22d773..ca10cf34639376798bf5ba05970c9c734e5a1ef8 100644 --- a/paddle/platform/device_context_test.cu +++ b/paddle/platform/device_context_test.cu @@ -71,35 +71,20 @@ TEST(Device, DeviceContextPool) { using paddle::platform::CPUPlace; using paddle::platform::CUDAPlace; - DeviceContextPool& pool = DeviceContextPool::Get(); - auto cpu_dev_ctx1 = pool.Borrow(CPUPlace()); - auto cpu_dev_ctx2 = pool.Borrow(CPUPlace()); - EXPECT_TRUE(cpu_dev_ctx2 == cpu_dev_ctx1); + DeviceContextPool& pool = DeviceContextPool::Instance(); + auto cpu_dev_ctx1 = pool.Get(CPUPlace()); + auto cpu_dev_ctx2 = pool.Get(CPUPlace()); + ASSERT_EQ(cpu_dev_ctx2, cpu_dev_ctx1); std::vector gpu_places; int count = paddle::platform::GetCUDADeviceCount(); for (int i = 0; i < count; ++i) { - gpu_places.emplace_back(CUDAPlace(i)); - } - auto dev_ctxs = pool.Borrow(gpu_places); - for (size_t i = 0; i < dev_ctxs.size(); ++i) { - auto* dev_ctx = static_cast(dev_ctxs[i]); - - // check same as CUDAPlace(i) - CUDAPlace place = boost::get(dev_ctx->GetPlace()); - EXPECT_EQ(place.GetDeviceId(), static_cast(i)); + auto dev_ctx = pool.Get(CUDAPlace(i)); + ASSERT_NE(dev_ctx, nullptr); } } int main(int argc, char** argv) { - int dev_count = paddle::platform::GetCUDADeviceCount(); - if (dev_count <= 1) { - LOG(WARNING) << "Cannot test multi-gpu DeviceContextPool, because the CUDA " - "device count is " - << dev_count; - return 0; - } - std::vector places; places.emplace_back(paddle::platform::CPUPlace()); @@ -109,7 +94,7 @@ int main(int argc, char** argv) { } VLOG(0) << " DeviceCount " << count; - paddle::platform::DeviceContextPool::Create(places); + paddle::platform::DeviceContextPool::Init(places); testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); diff --git a/paddle/platform/nccl_test.cu b/paddle/platform/nccl_test.cu index 8f815863a72b118a0a258a2aa4276a33b95b0e46..ef6d845874745af1150e4425f8d6be416cc44ece 100644 --- a/paddle/platform/nccl_test.cu +++ b/paddle/platform/nccl_test.cu @@ -144,7 +144,7 @@ int main(int argc, char** argv) { } VLOG(0) << " DeviceCount " << count; - paddle::platform::DeviceContextPool::Create(places); + paddle::platform::DeviceContextPool::Init(places); testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); diff --git a/paddle/platform/place.h b/paddle/platform/place.h index d25eaa689f4a4baa951db5c61bbf99288e365ee1..76b5c502cc48431a4e9b13b07505978884576e1d 100644 --- a/paddle/platform/place.h +++ b/paddle/platform/place.h @@ -15,7 +15,7 @@ limitations under the License. */ #pragma once #include - +#include "paddle/platform/enforce.h" #include "paddle/platform/variant.h" namespace paddle { @@ -64,5 +64,31 @@ bool places_are_same_class(const Place &, const Place &); std::ostream &operator<<(std::ostream &, const Place &); +template +struct PlaceVisitorWrapper + : public boost::static_visitor { + const Visitor &visitor_; + explicit PlaceVisitorWrapper(const Visitor &visitor) : visitor_(visitor) {} + + typename Visitor::result_type operator()(const CPUPlace &cpu) const { + return visitor_(cpu); + } + + typename Visitor::result_type operator()(const CUDAPlace &cuda) const { +#ifdef PADDLE_WITH_CUDA + return visitor_(cuda); +#else + PADDLE_THROW("Paddle is not compiled with CUDA. Cannot visit cuda device"); + return typename Visitor::result_type(); +#endif + } +}; + +template +typename Visitor::result_type VisitPlace(const Place &place, + const Visitor &visitor) { + return boost::apply_visitor(PlaceVisitorWrapper(visitor), place); +} + } // namespace platform } // namespace paddle diff --git a/paddle/pybind/tensor_py.h b/paddle/pybind/tensor_py.h index 67244d82602906231ac1fc870adccc7e82869407..4d5e73e2c266b301de4f19e09be7ab4009c936d3 100644 --- a/paddle/pybind/tensor_py.h +++ b/paddle/pybind/tensor_py.h @@ -63,9 +63,10 @@ struct CastToPyBufferImpl { auto *dst_ptr = static_cast(dst_tensor.mutable_data( tensor.dims(), platform::CPUPlace())); - platform::DeviceContextPool &pool = platform::DeviceContextPool::Get(); + platform::DeviceContextPool &pool = + platform::DeviceContextPool::Instance(); auto dev_ctx = static_cast( - pool.Borrow(tensor.place())); + pool.Get(tensor.place())); paddle::platform::GpuMemcpyAsync( dst_ptr, src_ptr, sizeof(CUR_TYPE) * tensor.numel(), @@ -137,9 +138,9 @@ void PyCUDATensorSetFromArray( self.Resize(framework::make_ddim(dims)); auto *dst = self.mutable_data(place); - platform::DeviceContextPool &pool = platform::DeviceContextPool::Get(); + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); auto dev_ctx = - static_cast(pool.Borrow(place)); + static_cast(pool.Get(place)); paddle::platform::GpuMemcpyAsync(dst, array.data(), sizeof(T) * array.size(), cudaMemcpyHostToDevice, dev_ctx->stream()); } diff --git a/python/paddle/v2/fluid/__init__.py b/python/paddle/v2/fluid/__init__.py index c72b5730695dbc4f772015f1fb8dec6814cd1837..225b41c5043b5792abb90bbad53cbbfce9a3156e 100644 --- a/python/paddle/v2/fluid/__init__.py +++ b/python/paddle/v2/fluid/__init__.py @@ -36,7 +36,7 @@ def __read_gflags_from_env__(): """ import sys import core - read_env_flags = ['use_pinned_memory'] + read_env_flags = ['use_pinned_memory', 'check_nan_inf'] if core.is_compile_gpu(): read_env_flags.append('fraction_of_gpu_memory_to_use') core.init_gflags([sys.argv[0]] + diff --git a/python/paddle/v2/fluid/io.py b/python/paddle/v2/fluid/io.py index e147ac22ad289eb00c83def66974d875fcdc31f8..69a732fc45a1946f260cdd9a9c2da150b87c3ddd 100644 --- a/python/paddle/v2/fluid/io.py +++ b/python/paddle/v2/fluid/io.py @@ -180,10 +180,22 @@ def save_inference_model(dirname, :return: None """ + if isinstance(feeded_var_names, basestring): + feeded_var_names = [feeded_var_names] + else: + if not (bool(feeded_var_names) and all( + isinstance(name, basestring) for name in feeded_var_names)): + raise ValueError("'feed_var_names' should be a list of str.") + + if isinstance(target_vars, Variable): + feeded_var_names = [feeded_var_names] + else: + if not (bool(target_vars) and all( + isinstance(var, Variable) for var in target_vars)): + raise ValueError("'target_vars' should be a list of Variable.") + if main_program is None: main_program = default_main_program() - if not isinstance(target_vars, list): - target_vars = [target_vars] if not os.path.isdir(dirname): os.makedirs(dirname) diff --git a/python/paddle/v2/fluid/layer_helper.py b/python/paddle/v2/fluid/layer_helper.py index a076f26f7ff279812f762bd62f72195dc2376378..4469f7285efe1c31d0955c6dd4ba3ecac08070af 100644 --- a/python/paddle/v2/fluid/layer_helper.py +++ b/python/paddle/v2/fluid/layer_helper.py @@ -184,7 +184,7 @@ class LayerHelper(object): self.append_op( type=act_type, inputs={"X": [input_var]}, - outputs={"Y": [tmp]}, + outputs={"Out": [tmp]}, attrs=act) return tmp diff --git a/python/paddle/v2/fluid/layers/nn.py b/python/paddle/v2/fluid/layers/nn.py index ee9d7554fb1f0cc3b9229f046f8bcdd4dbd7c35d..2a462ee6cb47496b17a8f584e56ac2c8934b319a 100644 --- a/python/paddle/v2/fluid/layers/nn.py +++ b/python/paddle/v2/fluid/layers/nn.py @@ -386,7 +386,8 @@ def square_error_cost(input, label, **kwargs): square_out = helper.create_tmp_variable(dtype=input.dtype) helper.append_op( - type='square', inputs={'X': [minus_out]}, outputs={'Y': [square_out]}) + type='square', inputs={'X': [minus_out]}, + outputs={'Out': [square_out]}) return square_out @@ -604,7 +605,7 @@ def sequence_pool(input, pool_type, **kwargs): sqrt : out.data = [2.82, 6.93, 4.24], where 2.82=(1+3)/sqrt(2), 6.93=(2+4+6)/sqrt(3), 4.24=(5+1)/sqrt(2) max : out.data = [3, 6, 5], where 3=max(1,3), 6=max(2,4,6), 5=max(5,1) - + Args: input(variable): The input variable which is a LoDTensor. pool_type (string): The pooling type of sequence_pool. @@ -616,7 +617,7 @@ def sequence_pool(input, pool_type, **kwargs): Examples: .. code-block:: python - + x = fluid.layers.data(name='x', shape=[7, 1], dtype='float32', lod_level=1) avg_x = fluid.layers.sequence_pool(input=x, pool_type='average') @@ -654,7 +655,7 @@ def sequence_first_step(input, **kwargs): out.dim = [3, 1] with condition len(x.lod[-1]) - 1 == out.dims[0] out.data = [1, 2, 5], where 1=first(1,3), 2=first(2,4,6), 5=first(5,1) - + Args: input(variable): The input variable which is a LoDTensor. @@ -664,7 +665,7 @@ def sequence_first_step(input, **kwargs): Examples: .. code-block:: python - + x = fluid.layers.data(name='x', shape=[7, 1], dtype='float32', lod_level=1) x_first_step = fluid.layers.sequence_first_step(input=x) @@ -687,7 +688,7 @@ def sequence_last_step(input, **kwargs): out.dim = [3, 1] with condition len(x.lod[-1]) - 1 == out.dims[0] out.data = [3, 6, 1], where 3=last(1,3), 6=last(2,4,6), 1=last(5,1) - + Args: input(variable): The input variable which is a LoDTensor. @@ -697,7 +698,7 @@ def sequence_last_step(input, **kwargs): Examples: .. code-block:: python - + x = fluid.layers.data(name='x', shape=[7, 1], dtype='float32', lod_level=1) x_last_step = fluid.layers.sequence_last_step(input=x) @@ -1132,7 +1133,7 @@ def reduce_sum(input, dim=None, keep_dim=False): Returns: Variable: The reduced Tensor variable. - + Examples: .. code-block:: python @@ -1176,7 +1177,7 @@ def reduce_mean(input, dim=None, keep_dim=False): Returns: Variable: The reduced Tensor variable. - + Examples: .. code-block:: python diff --git a/python/paddle/v2/fluid/tests/test_activation_op.py b/python/paddle/v2/fluid/tests/test_activation_op.py index b052374dc7ec3c5684d6adfda6b9d000c5e19fe0..03eb7deb9a35933e5a1676a262a371c69151e6d1 100644 --- a/python/paddle/v2/fluid/tests/test_activation_op.py +++ b/python/paddle/v2/fluid/tests/test_activation_op.py @@ -10,13 +10,13 @@ class TestExp(OpTest): self.inputs = { 'X': np.random.uniform(0.1, 1, [11, 17]).astype("float32") } - self.outputs = {'Y': np.exp(self.inputs['X'])} + self.outputs = {'Out': np.exp(self.inputs['X'])} def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y', max_relative_error=0.007) + self.check_grad(['X'], 'Out', max_relative_error=0.007) class TestSigmoid(OpTest): @@ -25,13 +25,13 @@ class TestSigmoid(OpTest): self.inputs = { 'X': np.random.uniform(0.1, 1, [11, 17]).astype("float32") } - self.outputs = {'Y': 1 / (1 + np.exp(-self.inputs['X']))} + self.outputs = {'Out': 1 / (1 + np.exp(-self.inputs['X']))} def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y', max_relative_error=0.008) + self.check_grad(['X'], 'Out', max_relative_error=0.008) class TestLogSigmoid(OpTest): @@ -40,13 +40,13 @@ class TestLogSigmoid(OpTest): self.inputs = { 'X': np.random.uniform(-1, 1, [11, 17]).astype("float32") } - self.outputs = {'Y': np.log(1 / (1 + np.exp(-self.inputs['X'])))} + self.outputs = {'Out': np.log(1 / (1 + np.exp(-self.inputs['X'])))} def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y', max_relative_error=0.008) + self.check_grad(['X'], 'Out', max_relative_error=0.008) class TestTanh(OpTest): @@ -55,13 +55,13 @@ class TestTanh(OpTest): self.inputs = { 'X': np.random.uniform(0.1, 1, [11, 17]).astype("float32") } - self.outputs = {'Y': np.tanh(self.inputs['X'])} + self.outputs = {'Out': np.tanh(self.inputs['X'])} def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y', max_relative_error=0.007) + self.check_grad(['X'], 'Out', max_relative_error=0.007) class TestTanhShrink(OpTest): @@ -70,13 +70,13 @@ class TestTanhShrink(OpTest): self.inputs = { 'X': np.random.uniform(0.1, 1, [10, 17]).astype("float32") } - self.outputs = {'Y': self.inputs['X'] - np.tanh(self.inputs['X'])} + self.outputs = {'Out': self.inputs['X'] - np.tanh(self.inputs['X'])} def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y', max_relative_error=0.008) + self.check_grad(['X'], 'Out', max_relative_error=0.008) class TestHardShrink(OpTest): @@ -90,13 +90,13 @@ class TestHardShrink(OpTest): t = np.copy(x) t[(t >= -threshold) & (t <= threshold)] = 0 - self.outputs = {'Y': t} + self.outputs = {'Out': t} def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y', max_relative_error=0.005) + self.check_grad(['X'], 'Out', max_relative_error=0.005) class TestSoftShrink(OpTest): @@ -110,13 +110,13 @@ class TestSoftShrink(OpTest): y = np.copy(self.inputs['X']) y = (y < -lambda_val) * (y + lambda_val) + (y > lambda_val) * ( y - lambda_val) - self.outputs = {'Y': y} + self.outputs = {'Out': y} def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y', max_relative_error=0.007) + self.check_grad(['X'], 'Out', max_relative_error=0.007) class TestSqrt(OpTest): @@ -125,13 +125,13 @@ class TestSqrt(OpTest): self.inputs = { 'X': np.random.uniform(0.1, 1, [11, 17]).astype("float32") } - self.outputs = {'Y': np.sqrt(self.inputs['X'])} + self.outputs = {'Out': np.sqrt(self.inputs['X'])} def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y', max_relative_error=0.007) + self.check_grad(['X'], 'Out', max_relative_error=0.007) class TestAbs(OpTest): @@ -144,13 +144,13 @@ class TestAbs(OpTest): # we should avoid this x[np.abs(x) < 0.005] = 0.02 self.inputs = {'X': x} - self.outputs = {'Y': np.abs(self.inputs['X'])} + self.outputs = {'Out': np.abs(self.inputs['X'])} def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y', max_relative_error=0.007) + self.check_grad(['X'], 'Out', max_relative_error=0.007) class TestCeil(OpTest): @@ -158,13 +158,13 @@ class TestCeil(OpTest): self.op_type = "ceil" x = np.random.uniform(-1, 1, [4, 4]).astype("float32") self.inputs = {'X': x} - self.outputs = {'Y': np.ceil(self.inputs['X'])} + self.outputs = {'Out': np.ceil(self.inputs['X'])} def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y', max_relative_error=0.007) + self.check_grad(['X'], 'Out', max_relative_error=0.007) class TestFloor(OpTest): @@ -173,13 +173,13 @@ class TestFloor(OpTest): x = np.random.uniform(-1, 1, [4, 4]).astype("float32") self.inputs = {'X': x} # numpy floor need +1 - self.outputs = {'Y': np.floor(self.inputs['X']) + 1.0} + self.outputs = {'Out': np.floor(self.inputs['X']) + 1.0} def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y', max_relative_error=0.007) + self.check_grad(['X'], 'Out', max_relative_error=0.007) class TestRound(OpTest): @@ -187,13 +187,13 @@ class TestRound(OpTest): self.op_type = "round" x = np.random.uniform(-1, 1, [4, 4]).astype("float32") self.inputs = {'X': x} - self.outputs = {'Y': np.round(self.inputs['X'])} + self.outputs = {'Out': np.round(self.inputs['X'])} def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y', max_relative_error=0.007) + self.check_grad(['X'], 'Out', max_relative_error=0.007) class TestRelu(OpTest): @@ -203,13 +203,13 @@ class TestRelu(OpTest): # The same reason with TestAbs x[np.abs(x) < 0.005] = 0.02 self.inputs = {'X': x} - self.outputs = {'Y': np.maximum(self.inputs['X'], 0)} + self.outputs = {'Out': np.maximum(self.inputs['X'], 0)} def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y', max_relative_error=0.007) + self.check_grad(['X'], 'Out', max_relative_error=0.007) class TestBRelu(OpTest): @@ -227,13 +227,13 @@ class TestBRelu(OpTest): t = np.copy(x) t[t < t_min] = t_min t[t > t_max] = t_max - self.outputs = {'Y': t} + self.outputs = {'Out': t} def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y', max_relative_error=0.02) + self.check_grad(['X'], 'Out', max_relative_error=0.02) class TestRelu6(OpTest): @@ -248,14 +248,14 @@ class TestRelu6(OpTest): self.inputs = {'X': x} self.attrs = {'threshold': threshold} self.outputs = { - 'Y': np.minimum(np.maximum(self.inputs['X'], 0), threshold) + 'Out': np.minimum(np.maximum(self.inputs['X'], 0), threshold) } def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y', max_relative_error=0.02) + self.check_grad(['X'], 'Out', max_relative_error=0.02) class TestSoftRelu(OpTest): @@ -271,13 +271,13 @@ class TestSoftRelu(OpTest): t = np.copy(x) t[t < -threshold] = -threshold t[t > threshold] = threshold - self.outputs = {'Y': np.log((np.exp(t) + 1))} + self.outputs = {'Out': np.log((np.exp(t) + 1))} def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y', max_relative_error=0.02) + self.check_grad(['X'], 'Out', max_relative_error=0.02) class TestELU(OpTest): @@ -290,27 +290,27 @@ class TestELU(OpTest): self.inputs = {'X': x} self.attrs = {'alpha': alpha} self.outputs = { - 'Y': np.maximum(0, x) + np.minimum(0, alpha * (np.exp(x) - 1)) + 'Out': np.maximum(0, x) + np.minimum(0, alpha * (np.exp(x) - 1)) } def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y', max_relative_error=0.02) + self.check_grad(['X'], 'Out', max_relative_error=0.02) class TestReciprocal(OpTest): def setUp(self): self.op_type = "reciprocal" self.inputs = {'X': np.random.uniform(1, 2, [11, 17]).astype("float32")} - self.outputs = {'Y': np.reciprocal(self.inputs['X'])} + self.outputs = {'Out': np.reciprocal(self.inputs['X'])} def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y', max_relative_error=0.01) + self.check_grad(['X'], 'Out', max_relative_error=0.01) class TestLog(OpTest): @@ -319,13 +319,13 @@ class TestLog(OpTest): self.inputs = { 'X': np.random.uniform(0.1, 1, [11, 17]).astype("float32") } - self.outputs = {'Y': np.log(self.inputs['X'])} + self.outputs = {'Out': np.log(self.inputs['X'])} def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y', max_relative_error=0.007) + self.check_grad(['X'], 'Out', max_relative_error=0.007) class TestSquare(OpTest): @@ -334,13 +334,13 @@ class TestSquare(OpTest): self.inputs = { 'X': np.random.uniform(0.1, 1, [11, 17]).astype("float32") } - self.outputs = {'Y': np.square(self.inputs['X'])} + self.outputs = {'Out': np.square(self.inputs['X'])} def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y', max_relative_error=0.007) + self.check_grad(['X'], 'Out', max_relative_error=0.007) class TestPow(OpTest): @@ -348,13 +348,13 @@ class TestPow(OpTest): self.op_type = "pow" self.inputs = {'X': np.random.uniform(1, 2, [11, 17]).astype("float32")} self.attrs = {'factor': 3.0} - self.outputs = {'Y': np.power(self.inputs['X'], 3)} + self.outputs = {'Out': np.power(self.inputs['X'], 3)} def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y', max_relative_error=0.02) + self.check_grad(['X'], 'Out', max_relative_error=0.02) class TestSTanh(OpTest): @@ -366,13 +366,13 @@ class TestSTanh(OpTest): scale_a = 2.0 / 3.0 scale_b = 1.7159 self.attrs = {'scale_a': scale_a, 'scale_b': scale_b} - self.outputs = {'Y': scale_b * np.tanh(self.inputs['X'] * scale_a)} + self.outputs = {'Out': scale_b * np.tanh(self.inputs['X'] * scale_a)} def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y', max_relative_error=0.007) + self.check_grad(['X'], 'Out', max_relative_error=0.007) class TestSoftplus(OpTest): @@ -381,13 +381,13 @@ class TestSoftplus(OpTest): self.inputs = { 'X': np.random.uniform(-1, 1, [11, 17]).astype("float64") } - self.outputs = {'Y': np.log(1 + np.exp(self.inputs['X']))} + self.outputs = {'Out': np.log(1 + np.exp(self.inputs['X']))} def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y', max_relative_error=0.007) + self.check_grad(['X'], 'Out', max_relative_error=0.007) class TestSoftsign(OpTest): @@ -397,14 +397,14 @@ class TestSoftsign(OpTest): 'X': np.random.uniform(-1, 1, [11, 17]).astype("float32") } self.outputs = { - 'Y': np.divide(self.inputs['X'], 1 + np.abs(self.inputs['X'])) + 'Out': np.divide(self.inputs['X'], 1 + np.abs(self.inputs['X'])) } def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y', max_relative_error=0.007) + self.check_grad(['X'], 'Out', max_relative_error=0.007) class TestThresholdedRelu(OpTest): @@ -419,13 +419,13 @@ class TestThresholdedRelu(OpTest): self.inputs = {'X': X} self.attrs = {'threshold': threshold} - self.outputs = {'Y': (X > threshold) * X} + self.outputs = {'Out': (X > threshold) * X} def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y', max_relative_error=self.relative_error) + self.check_grad(['X'], 'Out', max_relative_error=self.relative_error) class TestHardSigmoid(OpTest): @@ -447,13 +447,13 @@ class TestHardSigmoid(OpTest): upper_threshold - 0.2 temp = X * slope + offset - self.outputs = {'Y': np.maximum(0.0, np.minimum(1.0, temp))} + self.outputs = {'Out': np.maximum(0.0, np.minimum(1.0, temp))} def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y', max_relative_error=0.002) + self.check_grad(['X'], 'Out', max_relative_error=0.002) class TestSwish(OpTest): @@ -462,13 +462,13 @@ class TestSwish(OpTest): X = np.random.uniform(0.1, 1, [11, 17]).astype("float32") self.inputs = {'X': X} self.attrs = {'beta': 2.3} - self.outputs = {'Y': X * expit(self.attrs['beta'] * X)} + self.outputs = {'Out': X * expit(self.attrs['beta'] * X)} def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y', max_relative_error=0.008) + self.check_grad(['X'], 'Out', max_relative_error=0.008) if __name__ == "__main__": diff --git a/python/paddle/v2/fluid/tests/test_net.py b/python/paddle/v2/fluid/tests/test_net.py index 318df08a9e73ac95cab73c34182bc6220ef6c681..d9fe55a8af5c750c5c926e875ddbb645f8abb1a0 100644 --- a/python/paddle/v2/fluid/tests/test_net.py +++ b/python/paddle/v2/fluid/tests/test_net.py @@ -7,7 +7,7 @@ def fc(X, W, Y): ret_v = core.Net.create() ret_v.append_op(Operator("mul", X="X", Y="W", Out="pre_activation")) - ret_v.append_op(Operator("sigmoid", X="pre_activation", Y=Y)) + ret_v.append_op(Operator("sigmoid", X="pre_activation", Out=Y)) ret_v.complete_add_op(True) return ret_v @@ -30,7 +30,7 @@ Op(plain_net), inputs:{all[W, X, Y]}, outputs:{all[Out, fc.out, pre_activation]} Op(plain_net), inputs:{all[W, X]}, outputs:{all[fc.out, pre_activation]}. Op(plain_net), inputs:{all[W, X]}, outputs:{all[fc.out, pre_activation]}. Op(mul), inputs:{X[X], Y[W]}, outputs:{Out[pre_activation]}. - Op(sigmoid), inputs:{X[pre_activation]}, outputs:{Y[fc.out]}. + Op(sigmoid), inputs:{X[pre_activation]}, outputs:{Out[fc.out]}. ''' self.assertEqual(expected, "\n" + str(net)) diff --git a/python/paddle/v2/fluid/tests/test_softmax_op.py b/python/paddle/v2/fluid/tests/test_softmax_op.py index b41c810d9a6269c934a434b085748a86deccb475..136fc0283afd6acf1de4baae5e681789662295ce 100644 --- a/python/paddle/v2/fluid/tests/test_softmax_op.py +++ b/python/paddle/v2/fluid/tests/test_softmax_op.py @@ -17,14 +17,14 @@ class TestSoftmaxOp(OpTest): 'X': np.random.uniform(0.1, 1, [10, 10]).astype("float32") } self.outputs = { - 'Y': np.apply_along_axis(stable_softmax, 1, self.inputs['X']) + 'Out': np.apply_along_axis(stable_softmax, 1, self.inputs['X']) } def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y') + self.check_grad(['X'], 'Out') if __name__ == "__main__":