提交 5f3dd266 编写于 作者: Y Yang Yu

Merge branch 'feature/is_nan' into feature/check_nan_executor

...@@ -252,6 +252,11 @@ first_seq ...@@ -252,6 +252,11 @@ first_seq
.. autoclass:: paddle.v2.layer.first_seq .. autoclass:: paddle.v2.layer.first_seq
:noindex: :noindex:
sub_seq
---------
.. autoclass:: paddle.v2.layer.sub_seq
:noindex:
concat concat
------ ------
.. autoclass:: paddle.v2.layer.concat .. autoclass:: paddle.v2.layer.concat
......
...@@ -68,12 +68,6 @@ scale ...@@ -68,12 +68,6 @@ scale
:noindex: :noindex:
reshape
---------
.. autofunction:: paddle.v2.fluid.layers.reshape
:noindex:
transpose transpose
--------- ---------
.. autofunction:: paddle.v2.fluid.layers.transpose .. autofunction:: paddle.v2.fluid.layers.transpose
......
...@@ -79,7 +79,7 @@ class Optimizer(object): ...@@ -79,7 +79,7 @@ class Optimizer(object):
def minimize(self, loss, parameter_list): def minimize(self, loss, parameter_list):
"""Add operations to minimize `loss` by updating `parameter_list`. """Add operations to minimize `loss` by updating `parameter_list`.
This method combines interface `append_backward_ops()` and This method combines interface `append_backward()` and
`create_optimization_pass()` into one. `create_optimization_pass()` into one.
""" """
params_grads = self.create_backward_pass(loss, parameter_list) params_grads = self.create_backward_pass(loss, parameter_list)
......
...@@ -5,7 +5,11 @@ cc_library(ddim SRCS ddim.cc DEPS eigen3) ...@@ -5,7 +5,11 @@ cc_library(ddim SRCS ddim.cc DEPS eigen3)
cc_test(ddim_test SRCS ddim_test.cc DEPS ddim) cc_test(ddim_test SRCS ddim_test.cc DEPS ddim)
nv_test(dim_test SRCS dim_test.cu DEPS ddim) nv_test(dim_test SRCS dim_test.cu DEPS ddim)
cc_library(tensor SRCS tensor.cc DEPS ddim place paddle_memory device_context) if (WITH_GPU)
nv_library(tensor SRCS tensor.cc tensor_util.cu DEPS ddim place paddle_memory device_context)
else()
cc_library(tensor SRCS tensor.cc tensor_util.cc DEPS ddim place paddle_memory device_context)
endif ()
cc_test(tensor_test SRCS tensor_test.cc DEPS tensor) cc_test(tensor_test SRCS tensor_test.cc DEPS tensor)
cc_test(tensor_util_test SRCS tensor_util_test.cc DEPS tensor) cc_test(tensor_util_test SRCS tensor_util_test.cc DEPS tensor)
...@@ -37,7 +41,7 @@ cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry init) ...@@ -37,7 +41,7 @@ cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry init)
cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS shape_inference op_info operator glog) cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS shape_inference op_info operator glog)
cc_library(op_registry SRCS op_registry.cc DEPS op_proto_maker op_info operator glog proto_desc) cc_library(op_registry SRCS op_registry.cc DEPS op_proto_maker op_info operator glog proto_desc)
cc_test(op_registry_test SRCS op_registry_test.cc DEPS op_registry) nv_test(op_registry_test SRCS op_registry_test.cc DEPS op_registry)
py_proto_compile(framework_py_proto SRCS framework.proto) py_proto_compile(framework_py_proto SRCS framework.proto)
# Generate an empty __init__.py to make framework_py_proto as a valid python module. # Generate an empty __init__.py to make framework_py_proto as a valid python module.
......
...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/framework/data_transform.h" #include "paddle/framework/data_transform.h"
#include "paddle/framework/lod_tensor.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
......
...@@ -27,7 +27,7 @@ limitations under the License. */ ...@@ -27,7 +27,7 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace framework { namespace framework {
using DataTransformFN = using DataTransformFn =
std::function<void(const std::vector<platform::DeviceContext*> ctx, std::function<void(const std::vector<platform::DeviceContext*> ctx,
const Variable& in, Variable* out)>; const Variable& in, Variable* out)>;
using KernelTypePair = std::pair<OpKernelType, OpKernelType>; using KernelTypePair = std::pair<OpKernelType, OpKernelType>;
...@@ -47,7 +47,7 @@ struct KernelTypePairHash { ...@@ -47,7 +47,7 @@ struct KernelTypePairHash {
}; };
using DataTransformMap = using DataTransformMap =
std::unordered_map<KernelTypePair, DataTransformFN, KernelTypePairHash>; std::unordered_map<KernelTypePair, DataTransformFn, KernelTypePairHash>;
class DataTransformFnMap { class DataTransformFnMap {
public: public:
...@@ -58,25 +58,25 @@ class DataTransformFnMap { ...@@ -58,25 +58,25 @@ class DataTransformFnMap {
} }
void Insert(const OpKernelType& left, const OpKernelType& right, void Insert(const OpKernelType& left, const OpKernelType& right,
const DataTransformFN& data_tranform_fn) { const DataTransformFn& data_tranform_fn) {
Insert(std::make_pair(left, right), data_tranform_fn); Insert(std::make_pair(left, right), data_tranform_fn);
} }
void Insert(const KernelTypePair& kernel_type_pair, void Insert(const KernelTypePair& kernel_type_pair,
const DataTransformFN& data_tranform_fn) { const DataTransformFn& data_tranform_fn) {
PADDLE_ENFORCE(!Has(kernel_type_pair), PADDLE_ENFORCE(!Has(kernel_type_pair),
"KernelTypePair %s has been registered", ""); "KernelTypePair %s has been registered", "");
map_.insert({kernel_type_pair, data_tranform_fn}); map_.insert({kernel_type_pair, data_tranform_fn});
} }
const DataTransformFN& Get(const KernelTypePair& key_pair) const { const DataTransformFn& Get(const KernelTypePair& key_pair) const {
auto data_transformer = GetNullable(key_pair); auto data_transformer = GetNullable(key_pair);
PADDLE_ENFORCE_NOT_NULL(data_transformer, PADDLE_ENFORCE_NOT_NULL(data_transformer,
"DataTransformFN should not be NULL"); "DataTransformFn should not be NULL");
return *data_transformer; return *data_transformer;
} }
const DataTransformFN* GetNullable(const KernelTypePair& key_pair) const { const DataTransformFn* GetNullable(const KernelTypePair& key_pair) const {
auto it = map_.find(key_pair); auto it = map_.find(key_pair);
if (it == map_.end()) { if (it == map_.end()) {
return nullptr; return nullptr;
......
...@@ -11,36 +11,61 @@ distributed under the License is distributed on an "AS IS" BASIS, ...@@ -11,36 +11,61 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <array>
#include <vector>
#include "paddle/framework/data_transform.h"
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "paddle/framework/data_transform.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
using namespace platform; using namespace platform;
/**
* @brief cross validation of different kernel type transform
* We use four bit map represent different combination.
* If the field has multiple possible value, only choose two of them.
* For DataType, only test the FP32(float), FP64(double).
* e.g. 0000 -> FP32, CPUPlace, kNHWC, kPlain
* 1111 -> FP64, GPUPlace, kNCHW, kMKLDNN
*/
std::array<proto::DataType, 2> kDataType = {
{proto::DataType::FP32, proto::DataType::FP64}};
std::array<Place, 2> kPlace = {{CPUPlace(), CUDAPlace(0)}};
std::array<DataLayout, 2> kDataLayout = {
{DataLayout::kNHWC, DataLayout::kNCHW}};
std::array<LibraryType, 2> kLibraryType = {
{LibraryType::kPlain, LibraryType::kMKLDNN}};
OpKernelType GenFromBit(const std::vector<bool> bits) {
return OpKernelType(kDataType[bits[0]], kPlace[bits[1]], kDataLayout[bits[2]],
kLibraryType[bits[3]]);
}
int test_value = 0; int test_value = 0;
OpKernelType kernel_type_1(proto::DataType::FP32, CPUPlace(), DataLayout::kNCHW, auto kernel0 = GenFromBit({0, 0, 0, 0});
LibraryType::kCUDNN); auto kernel1 = GenFromBit({0, 0, 0, 1});
OpKernelType kernel_type_2(proto::DataType::FP32, CUDAPlace(0), auto kernel2 = GenFromBit({0, 0, 1, 0});
DataLayout::kNCHW, LibraryType::kCUDNN); auto kernel3 = GenFromBit({0, 0, 1, 1});
OpKernelType kernel_type_3(proto::DataType::FP16, CUDAPlace(0),
DataLayout::kNCHW, LibraryType::kCUDNN);
void type1_to_type2(std::vector<platform::DeviceContext*> ctx, void TransDataType_t(std::vector<platform::DeviceContext*> ctx,
const Variable& in, Variable* out) { const Variable& in, Variable* out) {
test_value++; test_value++;
} }
void type2_to_type3(std::vector<platform::DeviceContext*> ctx, void TransDataLayout_t(std::vector<platform::DeviceContext*> ctx,
const Variable& in, Variable* out) { const Variable& in, Variable* out) {
test_value--; test_value--;
} }
void type1_to_type3(std::vector<platform::DeviceContext*> ctx, void TransLibraryType_t(std::vector<platform::DeviceContext*> ctx,
const Variable& in, Variable* out) { const Variable& in, Variable* out) {
test_value += 2; test_value += 2;
} }
...@@ -49,30 +74,25 @@ void type1_to_type3(std::vector<platform::DeviceContext*> ctx, ...@@ -49,30 +74,25 @@ void type1_to_type3(std::vector<platform::DeviceContext*> ctx,
namespace frw = paddle::framework; namespace frw = paddle::framework;
REGISTER_DATA_TRANSFORM_FN(frw::kernel_type_1, frw::kernel_type_2, REGISTER_DATA_TRANSFORM_FN(frw::kernel0, frw::kernel1, frw::TransDataType_t);
frw::type1_to_type2); REGISTER_DATA_TRANSFORM_FN(frw::kernel1, frw::kernel2, frw::TransDataLayout_t);
REGISTER_DATA_TRANSFORM_FN(frw::kernel_type_2, frw::kernel_type_3, REGISTER_DATA_TRANSFORM_FN(frw::kernel0, frw::kernel2, frw::TransLibraryType_t);
frw::type2_to_type3);
REGISTER_DATA_TRANSFORM_FN(frw::kernel_type_1, frw::kernel_type_3,
frw::type1_to_type3);
TEST(DataTransform, Register) { TEST(DataTransform, Register) {
using namespace paddle::framework; using namespace paddle::framework;
using namespace paddle::platform; using namespace paddle::platform;
auto& instance = DataTransformFnMap::Instance(); auto& instance = DataTransformFnMap::Instance();
ASSERT_EQ(instance.Map().size(), 3UL);
std::vector<DeviceContext*> ctx; std::vector<DeviceContext*> ctx;
paddle::framework::Variable in; paddle::framework::Variable in;
paddle::framework::Variable out; paddle::framework::Variable out;
instance.Get(std::make_pair(frw::kernel_type_1, frw::kernel_type_2))(ctx, in, instance.Get(std::make_pair(frw::kernel0, frw::kernel1))(ctx, in, &out);
&out);
ASSERT_EQ(test_value, 1); ASSERT_EQ(test_value, 1);
instance.Get(std::make_pair(frw::kernel_type_2, frw::kernel_type_3))(ctx, in,
&out); instance.Get(std::make_pair(frw::kernel1, frw::kernel2))(ctx, in, &out);
ASSERT_EQ(test_value, 0); ASSERT_EQ(test_value, 0);
instance.Get(std::make_pair(frw::kernel_type_1, frw::kernel_type_3))(ctx, in,
&out); instance.Get(std::make_pair(frw::kernel0, frw::kernel2))(ctx, in, &out);
ASSERT_EQ(test_value, 2); ASSERT_EQ(test_value, 2);
} }
...@@ -123,7 +123,7 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id, ...@@ -123,7 +123,7 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id,
} }
} }
} }
if (create_local_scope) { if (create_vars && create_local_scope) {
scope->DeleteScope(local_scope); scope->DeleteScope(local_scope);
} }
} }
......
...@@ -20,7 +20,11 @@ namespace framework { ...@@ -20,7 +20,11 @@ namespace framework {
// For more details about the design of LibraryType, Please refer to // For more details about the design of LibraryType, Please refer to
// https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/operator_kernel_type.md#library // https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/operator_kernel_type.md#library
enum class LibraryType { kPlain = 0, kMKLDNN = 1, kCUDNN = 2 }; enum class LibraryType {
kPlain = 0,
kMKLDNN = 1,
kCUDNN = 2,
};
inline std::string LibraryTypeToString(const LibraryType& library_type) { inline std::string LibraryTypeToString(const LibraryType& library_type) {
switch (library_type) { switch (library_type) {
...@@ -31,7 +35,26 @@ inline std::string LibraryTypeToString(const LibraryType& library_type) { ...@@ -31,7 +35,26 @@ inline std::string LibraryTypeToString(const LibraryType& library_type) {
case LibraryType::kCUDNN: case LibraryType::kCUDNN:
return "CUDNN"; return "CUDNN";
default: default:
PADDLE_THROW("unknown LibraryType %d", library_type); PADDLE_THROW("unknown LibraryType %d", static_cast<int>(library_type));
}
}
inline LibraryType StringToLibraryType(const char* ctype) {
std::string s(ctype);
if (s == std::string("PLAIN")) {
return LibraryType::kPlain;
} else if (s == std::string("MKLDNN")) {
return LibraryType::kMKLDNN;
} else if (s == std::string("CUDNN")) {
return LibraryType::kCUDNN;
// To be compatible with register macro.
// CPU, CUDA, PLAIN are same library type.
} else if (s == std::string("CPU")) {
return LibraryType::kPlain;
} else if (s == std::string("CUDA")) {
return LibraryType::kPlain;
} else {
PADDLE_THROW("Unknown LibraryType %s", s.c_str());
} }
} }
......
...@@ -88,6 +88,14 @@ OpDesc::OpDesc(const std::string &type, const VariableNameMap &inputs, ...@@ -88,6 +88,14 @@ OpDesc::OpDesc(const std::string &type, const VariableNameMap &inputs,
need_update_ = true; need_update_ = true;
} }
void OpDesc::CopyFrom(const OpDesc &op_desc) {
desc_.set_type(op_desc.Type());
inputs_ = op_desc.inputs_;
outputs_ = op_desc.outputs_;
attrs_ = op_desc.attrs_;
need_update_ = true;
}
OpDesc::OpDesc(const proto::OpDesc &desc, ProgramDesc *prog) OpDesc::OpDesc(const proto::OpDesc &desc, ProgramDesc *prog)
: desc_(desc), need_update_(false) { : desc_(desc), need_update_(false) {
// restore inputs_ // restore inputs_
......
...@@ -35,6 +35,8 @@ class OpDesc { ...@@ -35,6 +35,8 @@ class OpDesc {
OpDesc(const proto::OpDesc &desc, ProgramDesc *prog); OpDesc(const proto::OpDesc &desc, ProgramDesc *prog);
void CopyFrom(const OpDesc &op_desc);
proto::OpDesc *Proto(); proto::OpDesc *Proto();
std::string Type() const { return desc_.type(); } std::string Type() const { return desc_.type(); }
......
...@@ -68,6 +68,8 @@ struct OpKernelType { ...@@ -68,6 +68,8 @@ struct OpKernelType {
data_type_ == o.data_type_ && data_layout_ == o.data_layout_ && data_type_ == o.data_type_ && data_layout_ == o.data_layout_ &&
library_type_ == o.library_type_; library_type_ == o.library_type_;
} }
bool operator!=(const OpKernelType& o) const { return !(*this == o); }
}; };
inline std::ostream& operator<<(std::ostream& os, inline std::ostream& operator<<(std::ostream& os,
...@@ -78,5 +80,11 @@ inline std::ostream& operator<<(std::ostream& os, ...@@ -78,5 +80,11 @@ inline std::ostream& operator<<(std::ostream& os,
return os; return os;
} }
inline std::string KernelTypeToString(const OpKernelType& kernel_key) {
std::ostringstream stream;
stream << kernel_key;
return stream.str();
}
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -26,10 +26,8 @@ TEST(OpKernelType, ToString) { ...@@ -26,10 +26,8 @@ TEST(OpKernelType, ToString) {
OpKernelType op_kernel_type(DataType::FP32, CPUPlace(), DataLayout::kNCHW, OpKernelType op_kernel_type(DataType::FP32, CPUPlace(), DataLayout::kNCHW,
LibraryType::kCUDNN); LibraryType::kCUDNN);
std::ostringstream stream;
stream << op_kernel_type;
ASSERT_EQ( ASSERT_EQ(
stream.str(), paddle::framework::KernelTypeToString(op_kernel_type),
"data_type[5]:data_layout[NCHW]:place[CPUPlace]:library_type[CUDNN]"); "data_type[5]:data_layout[NCHW]:place[CPUPlace]:library_type[CUDNN]");
} }
...@@ -48,4 +46,4 @@ TEST(OpKernelType, Hash) { ...@@ -48,4 +46,4 @@ TEST(OpKernelType, Hash) {
OpKernelType::Hash hasher; OpKernelType::Hash hasher;
ASSERT_NE(hasher(op_kernel_type_1), hasher(op_kernel_type_2)); ASSERT_NE(hasher(op_kernel_type_1), hasher(op_kernel_type_2));
} }
\ No newline at end of file
...@@ -79,30 +79,31 @@ struct OpKernelRegistrarFunctor<PlaceType, false, I, KernelTypes...> { ...@@ -79,30 +79,31 @@ struct OpKernelRegistrarFunctor<PlaceType, false, I, KernelTypes...> {
using KERNEL_TYPE = using KERNEL_TYPE =
typename std::tuple_element<I, std::tuple<KernelTypes...>>::type; typename std::tuple_element<I, std::tuple<KernelTypes...>>::type;
void operator()(const char* op_type) const { void operator()(const char* op_type, const char* library_type) const {
using T = typename KERNEL_TYPE::ELEMENT_TYPE; using T = typename KERNEL_TYPE::ELEMENT_TYPE;
OpKernelType key(ToDataType(std::type_index(typeid(T))), PlaceType()); OpKernelType key(ToDataType(std::type_index(typeid(T))), PlaceType(),
DataLayout::kAnyLayout, StringToLibraryType(library_type));
OperatorWithKernel::AllOpKernels()[op_type][key].reset(new KERNEL_TYPE); OperatorWithKernel::AllOpKernels()[op_type][key].reset(new KERNEL_TYPE);
constexpr auto size = std::tuple_size<std::tuple<KernelTypes...>>::value; constexpr auto size = std::tuple_size<std::tuple<KernelTypes...>>::value;
OpKernelRegistrarFunctor<PlaceType, I + 1 == size, I + 1, KernelTypes...> OpKernelRegistrarFunctor<PlaceType, I + 1 == size, I + 1, KernelTypes...>
func; func;
func(op_type); func(op_type, library_type);
} }
}; };
template <typename PlaceType, size_t I, typename... KernelType> template <typename PlaceType, size_t I, typename... KernelType>
struct OpKernelRegistrarFunctor<PlaceType, true, I, KernelType...> { struct OpKernelRegistrarFunctor<PlaceType, true, I, KernelType...> {
void operator()(const char* op_type) const {} void operator()(const char* op_type, const char* library_type) const {}
}; };
// User can register many kernel in one place. The data type could be different. // User can register many kernel in one place. The data type could be different.
template <typename PlaceType, typename... KernelType> template <typename PlaceType, typename... KernelType>
class OpKernelRegistrar : public Registrar { class OpKernelRegistrar : public Registrar {
public: public:
explicit OpKernelRegistrar(const char* op_type) { explicit OpKernelRegistrar(const char* op_type, const char* library_type) {
OpKernelRegistrarFunctor<PlaceType, false, 0, KernelType...> func; OpKernelRegistrarFunctor<PlaceType, false, 0, KernelType...> func;
func(op_type); func(op_type, library_type);
} }
}; };
...@@ -181,7 +182,8 @@ class OpKernelRegistrar : public Registrar { ...@@ -181,7 +182,8 @@ class OpKernelRegistrar : public Registrar {
__reg_op_kernel_##op_type##_##DEVICE_TYPE##__, \ __reg_op_kernel_##op_type##_##DEVICE_TYPE##__, \
"REGISTER_OP_KERNEL must be called in global namespace"); \ "REGISTER_OP_KERNEL must be called in global namespace"); \
static ::paddle::framework::OpKernelRegistrar<place_class, __VA_ARGS__> \ static ::paddle::framework::OpKernelRegistrar<place_class, __VA_ARGS__> \
__op_kernel_registrar_##op_type##_##DEVICE_TYPE##__(#op_type); \ __op_kernel_registrar_##op_type##_##DEVICE_TYPE##__(#op_type, \
#DEVICE_TYPE); \
int TouchOpKernelRegistrar_##op_type##_##DEVICE_TYPE() { \ int TouchOpKernelRegistrar_##op_type##_##DEVICE_TYPE() { \
__op_kernel_registrar_##op_type##_##DEVICE_TYPE##__.Touch(); \ __op_kernel_registrar_##op_type##_##DEVICE_TYPE##__.Touch(); \
return 0; \ return 0; \
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/framework/op_registry.h" #include "paddle/framework/op_registry.h"
#include <gtest/gtest.h> #include <gtest/gtest.h>
...@@ -182,3 +196,71 @@ TEST(OperatorRegistrar, Test) { ...@@ -182,3 +196,71 @@ TEST(OperatorRegistrar, Test) {
using namespace paddle::framework; using namespace paddle::framework;
OperatorRegistrar<CosineOpComplete, CosineOpProtoAndCheckerMaker> reg("cos"); OperatorRegistrar<CosineOpComplete, CosineOpProtoAndCheckerMaker> reg("cos");
} }
namespace paddle {
namespace framework {
class OpKernelTestMaker : public OpProtoAndCheckerMaker {
public:
OpKernelTestMaker(OpProto* proto, OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddComment("NoGradOp, same input output. no Grad");
}
};
class OpWithKernelTest : public OperatorWithKernel {
public:
using OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(InferShapeContext* ctx) const override {}
framework::OpKernelType GetActualKernelType(
const framework::ExecutionContext& ctx) const override {
return framework::OpKernelType(proto::DataType::FP32, ctx.device_context());
}
};
template <typename DeviceContext, typename T>
class OpKernelTest : public paddle::framework::OpKernel<T> {
public:
void Compute(const paddle::framework::ExecutionContext& ctx) const {}
};
} // namespace framework
} // namespace paddle
REGISTER_OP_WITHOUT_GRADIENT(op_with_kernel,
paddle::framework::OpWithKernelTest,
paddle::framework::OpKernelTestMaker);
REGISTER_OP_CPU_KERNEL(
op_with_kernel,
paddle::framework::OpKernelTest<paddle::platform::CPUDeviceContext, float>);
REGISTER_OP_CUDA_KERNEL(op_with_kernel,
paddle::framework::OpKernelTest<
paddle::platform::CUDADeviceContext, float>);
TEST(OperatorRegistrar, CPU) {
paddle::framework::proto::OpDesc op_desc;
paddle::platform::CPUPlace cpu_place;
paddle::framework::Scope scope;
op_desc.set_type("op_with_kernel");
auto op = paddle::framework::OpRegistry::CreateOp(op_desc);
op->Run(scope, cpu_place);
}
#ifdef PADDLE_WITH_CUDA
TEST(OperatorRegistrar, CUDA) {
paddle::framework::proto::OpDesc op_desc;
paddle::platform::CUDAPlace cuda_place(0);
paddle::framework::Scope scope;
op_desc.set_type("op_with_kernel");
auto op = paddle::framework::OpRegistry::CreateOp(op_desc);
op->Run(scope, cuda_place);
}
#endif
...@@ -413,37 +413,51 @@ void OperatorWithKernel::Run(const Scope& scope, ...@@ -413,37 +413,51 @@ void OperatorWithKernel::Run(const Scope& scope,
} }
if (actual_kernel_key == expected_kernel_key) { if (actual_kernel_key == expected_kernel_key) {
kernel_iter->second->Compute(ctx); PADDLE_ENFORCE_EQ(actual_kernel_key.place_, expected_kernel_key.place_,
"Currently, model parallelism is only supported between "
"CPU and other devices. For example, multi-GPU model "
"parallelism will failed.");
} else { } else {
Scope& op_scope = scope.NewScope(); const DataTransformFn* trans_fun =
auto input_vars = this->InputVars(); DataTransformFnMap::Instance().GetNullable(
for (auto var_name : input_vars) { std::make_pair(actual_kernel_key, expected_kernel_key));
op_scope.Var(var_name); if (trans_fun) {
} auto input_vars = this->InputVars();
// TODO(qijun) filter the input vars that do not need to be transformed
// TODO(qijun) get appropriate DeviceContext from DeviceContext pool
platform::DeviceContext* trans_dev_ctx = nullptr; // filter vars that has been transformed
std::vector<platform::DeviceContext*> trans_dev_ctx_vec{trans_dev_ctx}; std::vector<std::string> need_trans;
for (auto var_name : input_vars) {
auto var_name_trans =
var_name + framework::KernelTypeToString(expected_kernel_key);
if (!scope.FindVar(var_name_trans)) {
const_cast<Scope&>(scope).Var(var_name_trans);
need_trans.push_back(var_name);
}
}
// TODO(qijun) get appropriate DataTransformFN from global map if (!need_trans.empty()) {
framework::DataTransformFN trans_fun = nullptr; // TODO(qijun) get appropriate DeviceContext from DeviceContext pool
platform::DeviceContext* trans_dev_ctx = nullptr;
std::vector<platform::DeviceContext*> trans_dev_ctx_vec{trans_dev_ctx};
// Wait for transform starting // Wait for transform starting
dev_ctx->Wait(); dev_ctx->Wait();
for (auto var_name : input_vars) { for (auto var_name : need_trans) {
trans_fun(trans_dev_ctx_vec, *(scope.FindVar(var_name)), (*trans_fun)(trans_dev_ctx_vec, *(scope.FindVar(var_name)),
op_scope.FindVar(var_name)); scope.FindVar(var_name + framework::KernelTypeToString(
} expected_kernel_key)));
// Wait for data transform finishing }
for (auto ctx : trans_dev_ctx_vec) { // Wait for data transform finishing
ctx->Wait(); for (auto ctx : trans_dev_ctx_vec) {
ctx->Wait();
}
}
} }
// Create a new ExecutionContext
ExecutionContext op_ctx(*this, op_scope, *dev_ctx);
kernel_iter->second->Compute(op_ctx);
} }
kernel_iter->second->Compute(ctx);
} }
OpKernelType OperatorWithKernel::GetActualKernelType( OpKernelType OperatorWithKernel::GetActualKernelType(
......
...@@ -178,7 +178,7 @@ class Tensor { ...@@ -178,7 +178,7 @@ class Tensor {
DDim dims_; DDim dims_;
/** /**
* @brief the layout of memory block, default is NCHW. * @brief the layout of memory block, default is NHWC.
* *
* @note the memory allocation order, describe how weight/data is stored * @note the memory allocation order, describe how weight/data is stored
* For example, in 4-D Tensor(rank=4), there are three commonly * For example, in 4-D Tensor(rank=4), there are three commonly
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/framework/tensor_util.h"
namespace paddle {
namespace framework {
template <typename Predicate, typename DevCtx>
struct AnyDTypeVisitor {
Predicate predicate_;
const Tensor& tensor_;
const DevCtx& ctx_;
Tensor* out_;
AnyDTypeVisitor(Predicate predicate, const Tensor& tensor, const DevCtx& ctx,
Tensor* out)
: predicate_(predicate), tensor_(tensor), ctx_(ctx), out_(out) {}
template <typename T>
void operator()() const {
auto t = EigenVector<T>::Flatten(tensor_);
auto o = EigenScalar<bool>::From(*out_);
o.device(*ctx_.eigen_device()) = predicate_(t).any();
}
};
template <typename Predicate, typename DevCtx>
inline void AnyImpl(Predicate predicate, const framework::Tensor& tensor,
const DevCtx& ctx, framework::Tensor* out) {
VisitDataType(ToDataType(tensor.type()), AnyDTypeVisitor<Predicate, DevCtx>(
predicate, tensor, ctx, out));
}
template <typename Predicate>
struct AnyVisitor : public boost::static_visitor<bool> {
const framework::Tensor& tensor_;
Predicate predicate_;
AnyVisitor(const framework::Tensor& tensor, Predicate predicate)
: tensor_(tensor), predicate_(std::move(predicate)) {}
template <typename Place>
bool operator()(const Place& place) const {
framework::Tensor out;
out.Resize({1});
out.mutable_data<bool>(place);
auto* ctx = platform::DeviceContextPool::Instance().GetByPlace(place);
AnyImpl(predicate_, tensor_, *ctx, &out);
return this->GetResult(out, place);
}
bool GetResult(const framework::Tensor& out,
const platform::CUDAPlace& gpu) const {
platform::CPUPlace cpu;
framework::Tensor tmp;
tmp.Resize({1});
tmp.mutable_data<bool>(cpu);
platform::DeviceContextPool::Instance().Get(gpu)->Wait();
CopyFrom(out, cpu, &tmp);
platform::DeviceContextPool::Instance().Get(gpu)->Wait();
return GetResult(tmp, cpu);
}
bool GetResult(const framework::Tensor& out,
const platform::CPUPlace& cpu) const {
return *out.data<bool>();
}
};
template <typename Predicate>
inline bool Any(const framework::Tensor& tensor, Predicate predicate) {
AnyVisitor<Predicate> visitor(tensor, predicate);
auto place = tensor.place();
return platform::VisitPlace(place, visitor);
}
struct HasNANPredicate {
template <typename T>
auto operator()(const T& eigen_vec) const
-> decltype(std::declval<T>().isnan()) {
return eigen_vec.isnan();
}
};
bool HasNAN(const framework::Tensor& tensor) {
HasNANPredicate predicate;
return Any(tensor, predicate);
}
struct HasInfPredicate {
template <typename T>
auto operator()(const T& eigen_vec) const
-> decltype(std::declval<T>().isinf()) {
return eigen_vec.isinf();
}
};
bool HasInf(const framework::Tensor& tensor) {
HasInfPredicate predicate;
return Any(tensor, predicate);
}
} // namespace framework
} // namespace paddle
./tensor_util.cc
\ No newline at end of file
...@@ -208,100 +208,8 @@ inline void CopyToVector(const Tensor& src, std::vector<T>* dst) { ...@@ -208,100 +208,8 @@ inline void CopyToVector(const Tensor& src, std::vector<T>* dst) {
src_ptr, size); src_ptr, size);
} }
template <typename Predicate, typename DevCtx> extern bool HasNAN(const framework::Tensor& tensor);
struct AnyDTypeVisitor { extern bool HasInf(const framework::Tensor& tensor);
Predicate predicate_;
const Tensor& tensor_;
const DevCtx& ctx_;
Tensor* out_;
AnyDTypeVisitor(Predicate predicate, const Tensor& tensor, const DevCtx& ctx,
Tensor* out)
: predicate_(predicate), tensor_(tensor), ctx_(ctx), out_(out) {}
template <typename T>
void operator()() const {
auto t = EigenVector<T>::Flatten(tensor_);
auto o = EigenScalar<bool>::From(*out_);
o.device(*ctx_.eigen_device()) = predicate_(t).any();
}
};
template <typename Predicate, typename DevCtx>
inline void AnyImpl(Predicate predicate, const framework::Tensor& tensor,
const DevCtx& ctx, framework::Tensor* out) {
VisitDataType(ToDataType(tensor.type()), AnyDTypeVisitor<Predicate, DevCtx>(
predicate, tensor, ctx, out));
}
template <typename Predicate>
struct AnyVisitor : public boost::static_visitor<bool> {
const framework::Tensor& tensor_;
Predicate predicate_;
AnyVisitor(const framework::Tensor& tensor, Predicate predicate)
: tensor_(tensor), predicate_(std::move(predicate)) {}
template <typename Place>
bool operator()(const Place& place) const {
framework::Tensor out;
out.Resize({1});
out.mutable_data<bool>(place);
auto* ctx = platform::DeviceContextPool::Instance().GetByPlace(place);
AnyImpl(predicate_, tensor_, *ctx, &out);
return this->GetResult(out, place);
}
bool GetResult(const framework::Tensor& out,
const platform::CUDAPlace& gpu) const {
platform::CPUPlace cpu;
framework::Tensor tmp;
tmp.Resize({1});
tmp.mutable_data<bool>(cpu);
platform::DeviceContextPool::Instance().Get(gpu)->Wait();
CopyFrom(out, cpu, &tmp);
platform::DeviceContextPool::Instance().Get(gpu)->Wait();
return GetResult(tmp, cpu);
}
bool GetResult(const framework::Tensor& out,
const platform::CPUPlace& cpu) const {
return *out.data<bool>();
}
};
template <typename Predicate>
inline bool Any(const framework::Tensor& tensor, Predicate predicate) {
AnyVisitor<Predicate> visitor(tensor, predicate);
auto place = tensor.place();
return platform::VisitPlace(place, visitor);
}
struct HasNANPredicate {
template <typename T>
auto operator()(const T& eigen_vec) const
-> decltype(std::declval<T>().isnan()) {
return eigen_vec.isnan();
}
};
inline bool HasNAN(const framework::Tensor& tensor) {
HasNANPredicate predicate;
return Any(tensor, predicate);
}
struct HasInfPredicate {
template <typename T>
auto operator()(const T& eigen_vec) const
-> decltype(std::declval<T>().isinf()) {
return eigen_vec.isinf();
}
};
inline bool HasInf(const framework::Tensor& tensor) {
HasInfPredicate predicate;
return Any(tensor, predicate);
}
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -74,7 +74,7 @@ const proto::TensorDesc &VarDesc::tensor_desc() const { ...@@ -74,7 +74,7 @@ const proto::TensorDesc &VarDesc::tensor_desc() const {
case proto::VarDesc::LOD_TENSOR_ARRAY: case proto::VarDesc::LOD_TENSOR_ARRAY:
return desc_.tensor_array().tensor(); return desc_.tensor_array().tensor();
default: default:
PADDLE_THROW("Unexpected branch."); PADDLE_THROW("The type of var '", this->Name(), "' is unsupported.");
} }
} }
......
file(GLOB GENERAL_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*_op.cc") file(GLOB GENERAL_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*_op.cc")
string(REPLACE ".cc" "" GENERAL_OPS "${GENERAL_OPS}") string(REPLACE ".cc" "" GENERAL_OPS "${GENERAL_OPS}")
set(DEPS_OPS "")
set(pybind_file ${PADDLE_SOURCE_DIR}/paddle/pybind/pybind.h) set(pybind_file ${PADDLE_SOURCE_DIR}/paddle/pybind/pybind.h)
file(WRITE ${pybind_file} "// Generated by the paddle/operator/CMakeLists.txt. DO NOT EDIT!\n\n") file(WRITE ${pybind_file} "// Generated by the paddle/operator/CMakeLists.txt. DO NOT EDIT!\n\n")
function(op_library TARGET) function(op_library TARGET)
...@@ -48,6 +49,11 @@ function(op_library TARGET) ...@@ -48,6 +49,11 @@ function(op_library TARGET)
message(FATAL_ERROR "The op library ${TARGET} should contains at least one .cc file") message(FATAL_ERROR "The op library ${TARGET} should contains at least one .cc file")
endif() endif()
list(LENGTH op_library_DEPS op_library_DEPS_len)
if (${op_library_DEPS_len} GREATER 0)
set(DEPS_OPS ${TARGET} ${DEPS_OPS} PARENT_SCOPE)
endif()
if (WITH_GPU) if (WITH_GPU)
nv_library(${TARGET} SRCS ${cc_srcs} ${cu_cc_srcs} ${cu_srcs} DEPS ${op_library_DEPS} nv_library(${TARGET} SRCS ${cc_srcs} ${cu_cc_srcs} ${cu_srcs} DEPS ${op_library_DEPS}
${op_common_deps}) ${op_common_deps})
...@@ -181,55 +187,26 @@ endfunction() ...@@ -181,55 +187,26 @@ endfunction()
add_subdirectory(math) add_subdirectory(math)
add_subdirectory(nccl) add_subdirectory(nccl)
set(DEPS_OPS if(WITH_GPU)
cond_op op_library(nccl_op DEPS nccl_common)
cross_entropy_op else()
recurrent_op set(DEPS_OPS ${DEPS_OPS} nccl_op)
softmax_with_cross_entropy_op endif()
softmax_op
sequence_softmax_op
sum_op
pool_op
maxout_op
unpool_op
pool_with_index_op
conv_op
conv_transpose_op
nccl_op
sequence_conv_op
sequence_pool_op
lod_rank_table_op
lod_tensor_to_array_op
array_to_lod_tensor_op
max_sequence_len_op
lstm_op
tensor_array_read_write_op
gru_op
adagrad_op
sgd_op
save_op
load_op
send_op
recv_op)
if(WITH_DISTRIBUTE) if(WITH_DISTRIBUTE)
add_subdirectory(detail) add_subdirectory(detail)
op_library(send_op SRCS send_op.cc DEPS sendrecvop_grpc grpc++_unsecure grpc_unsecure gpr cares zlib_target protobuf) set(DISTRIBUTE_DEPS sendrecvop_grpc grpc++_unsecure grpc_unsecure gpr cares zlib_target protobuf)
set_source_files_properties( set(DISTRIBUTE_COMPILE_FLAGS "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor")
send_op.cc op_library(send_op DEPS ${DISTRIBUTE_DEPS})
PROPERTIES set_source_files_properties(send_op.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
COMPILE_FLAGS "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor") op_library(recv_op DEPS ${DISTRIBUTE_DEPS})
set_source_files_properties(recv_op.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
op_library(recv_op SRCS recv_op.cc DEPS sendrecvop_grpc grpc++_unsecure grpc_unsecure gpr cares zlib_target protobuf) cc_test(test_send_recv SRCS send_recv_op_test.cc DEPS send_op recv_op sum_op executor)
set_source_files_properties( else()
recv_op.cc set(DEPS_OPS ${DEPS_OPS} send_op recv_op)
PROPERTIES
COMPILE_FLAGS "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor")
cc_test(test_send_recv SRCS send_recv_op_test.cc DEPS send_op recv_op sum_op executor)
endif() endif()
op_library(cond_op SRCS cond_op.cc DEPS framework_proto tensor operator net_op) op_library(cond_op DEPS framework_proto tensor net_op)
op_library(cross_entropy_op DEPS cross_entropy) op_library(cross_entropy_op DEPS cross_entropy)
op_library(softmax_with_cross_entropy_op DEPS cross_entropy softmax) op_library(softmax_with_cross_entropy_op DEPS cross_entropy softmax)
op_library(softmax_op DEPS softmax) op_library(softmax_op DEPS softmax)
...@@ -242,21 +219,16 @@ op_library(pool_op DEPS pooling) ...@@ -242,21 +219,16 @@ op_library(pool_op DEPS pooling)
op_library(maxout_op DEPS maxouting) op_library(maxout_op DEPS maxouting)
op_library(unpool_op DEPS unpooling) op_library(unpool_op DEPS unpooling)
op_library(pool_with_index_op DEPS pooling) op_library(pool_with_index_op DEPS pooling)
op_library(lod_rank_table_op SRCS lod_rank_table_op.cc DEPS lod_rank_table) op_library(lod_rank_table_op DEPS lod_rank_table)
op_library(lod_tensor_to_array_op SRCS lod_tensor_to_array_op.cc DEPS lod_rank_table_op) op_library(lod_tensor_to_array_op DEPS lod_rank_table_op)
op_library(array_to_lod_tensor_op SRCS array_to_lod_tensor_op.cc DEPS lod_rank_table_op) op_library(array_to_lod_tensor_op DEPS lod_rank_table_op)
op_library(max_sequence_len_op SRCS max_sequence_len_op.cc DEPS lod_rank_table) op_library(max_sequence_len_op DEPS lod_rank_table)
op_library(tensor_array_read_write_op SRCS tensor_array_read_write_op.cc)
if(WITH_GPU)
op_library(nccl_op DEPS nccl_common)
endif()
op_library(sequence_conv_op DEPS context_project) op_library(sequence_conv_op DEPS context_project)
op_library(sequence_pool_op DEPS sequence_pooling) op_library(sequence_pool_op DEPS sequence_pooling)
op_library(lstm_op DEPS sequence2batch lstm_compute) op_library(lstm_op DEPS sequence2batch lstm_compute)
op_library(conv_transpose_op DEPS vol2col) op_library(conv_transpose_op DEPS vol2col)
op_library(gru_op DEPS sequence2batch gru_compute) op_library(gru_op DEPS sequence2batch gru_compute)
op_library(recurrent_op SRCS recurrent_op.cc DEPS executor) op_library(recurrent_op DEPS executor)
# FIXME(typhoonzero): save/load depends lodtensor serialization functions # FIXME(typhoonzero): save/load depends lodtensor serialization functions
op_library(save_op DEPS lod_tensor) op_library(save_op DEPS lod_tensor)
op_library(load_op DEPS lod_tensor) op_library(load_op DEPS lod_tensor)
...@@ -269,13 +241,12 @@ endforeach() ...@@ -269,13 +241,12 @@ endforeach()
set(GLOB_OP_LIB ${OP_LIBRARY} CACHE INTERNAL "Global OP library") set(GLOB_OP_LIB ${OP_LIBRARY} CACHE INTERNAL "Global OP library")
cc_test(gather_test SRCS gather_test.cc DEPS tensor) cc_test(gather_test SRCS gather_test.cc DEPS tensor)
cc_test(net_op_test SRCS net_op_test.cc DEPS net_op) cc_test(net_op_test SRCS net_op_test.cc DEPS net_op)
cc_test(scatter_test SRCS scatter_test.cc DEPS tensor) cc_test(scatter_test SRCS scatter_test.cc DEPS tensor)
cc_test(beam_search_decode_op_test SRCS beam_search_decode_op_test.cc DEPS lod_tensor) cc_test(beam_search_decode_op_test SRCS beam_search_decode_op_test.cc DEPS lod_tensor)
cc_test(strided_memcpy_test SRCS strided_memcpy_test.cc DEPS tensor paddle_memory) cc_test(strided_memcpy_test SRCS strided_memcpy_test.cc DEPS tensor paddle_memory)
if(WITH_GPU) if(WITH_GPU)
cc_test(nccl_op_test SRCS nccl_op_test.cu.cc DEPS nccl_op gpu_info device_context) cc_test(nccl_op_test SRCS nccl_op_test.cu.cc DEPS nccl_op gpu_info device_context)
endif() endif()
cc_test(save_load_op_test SRCS save_load_op_test.cc DEPS save_op load_op) cc_test(save_load_op_test SRCS save_load_op_test.cc DEPS save_op load_op)
...@@ -50,10 +50,6 @@ class BatchNormOp : public framework::OperatorWithKernel { ...@@ -50,10 +50,6 @@ class BatchNormOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE(ctx->HasOutput("SavedMean"), ""); PADDLE_ENFORCE(ctx->HasOutput("SavedMean"), "");
PADDLE_ENFORCE(ctx->HasOutput("SavedVariance"), ""); PADDLE_ENFORCE(ctx->HasOutput("SavedVariance"), "");
const float epsilon = ctx->Attrs().Get<float>("epsilon");
PADDLE_ENFORCE_GE(epsilon, 0.0, "epsilon should be larger than 0");
PADDLE_ENFORCE_LE(epsilon, 0.001, "epsilon should not be too large");
// make sure Mean/MeanOut and Variance/VarianceOut share memory in Python // make sure Mean/MeanOut and Variance/VarianceOut share memory in Python
PADDLE_ENFORCE_EQ(ctx->Inputs("Mean")[0], ctx->Outputs("MeanOut")[0], PADDLE_ENFORCE_EQ(ctx->Inputs("Mean")[0], ctx->Outputs("MeanOut")[0],
"Mean and MeanOut should share the same memory"); "Mean and MeanOut should share the same memory");
...@@ -91,7 +87,12 @@ class BatchNormOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -91,7 +87,12 @@ class BatchNormOpMaker : public framework::OpProtoAndCheckerMaker {
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddAttr<bool>("is_test", "").SetDefault(false); AddAttr<bool>("is_test", "").SetDefault(false);
AddAttr<float>("momentum", "").SetDefault(0.9); AddAttr<float>("momentum", "").SetDefault(0.9);
AddAttr<float>("epsilon", "").SetDefault(1e-5); AddAttr<float>("epsilon", "")
.SetDefault(1e-5)
.AddCustomChecker([](const float &epsilon) {
PADDLE_ENFORCE(epsilon >= 0.0f && epsilon <= 0.001f,
"'epsilon' should be between 0.0 and 0.001.");
});
AddAttr<std::string>("data_layout", "").SetDefault("NCHW"); AddAttr<std::string>("data_layout", "").SetDefault("NCHW");
AddInput("X", "The input tensor"); AddInput("X", "The input tensor");
AddInput("Scale", AddInput("Scale",
......
...@@ -315,6 +315,10 @@ class CudnnConvGradOpKernel : public framework::OpKernel<T> { ...@@ -315,6 +315,10 @@ class CudnnConvGradOpKernel : public framework::OpKernel<T> {
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
REGISTER_OP_KERNEL(conv2d, CUDNN, paddle::platform::CUDAPlace,
paddle::operators::CudnnConvOpKernel<float>,
paddle::operators::CudnnConvOpKernel<double>);
REGISTER_OP_CUDA_KERNEL(conv2d_cudnn, REGISTER_OP_CUDA_KERNEL(conv2d_cudnn,
paddle::operators::CudnnConvOpKernel<float>, paddle::operators::CudnnConvOpKernel<float>,
paddle::operators::CudnnConvOpKernel<double>); paddle::operators::CudnnConvOpKernel<double>);
......
...@@ -114,15 +114,15 @@ class CrossEntropyOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -114,15 +114,15 @@ class CrossEntropyOpMaker : public framework::OpProtoAndCheckerMaker {
CrossEntropyOpMaker(OpProto* proto, OpAttrChecker* op_checker) CrossEntropyOpMaker(OpProto* proto, OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", AddInput("X",
"(Tensor, default Tensor<float>), a 2-D tensor with shape N x D, " "(Tensor, default Tensor<float>), a 2-D tensor with shape [N x D],"
"where N is the batch size and D is the number of classes. " " where N is the batch size and D is the number of classes. "
"This input is a probability computed by the previous operator, " "This input is a probability computed by the previous operator, "
"which is almost always the result of a softmax operator."); "which is almost always the result of a softmax operator.");
AddInput("Label", AddInput("Label",
"(Tensor), the ground truth which is a 2-D tensor. When " "(Tensor), the ground truth which is a 2-D tensor. When "
"soft_label is set to false, Label is a Tensor<int64> with shape " "soft_label is set to false, Label is a Tensor<int64> with shape "
"[N x 1]. When soft_label is set to true, Label is a " "[N x 1]. When soft_label is set to true, Label is a "
"Tensor<float/double> with shape [N x K]."); "Tensor<float/double> with shape [N x D].");
AddOutput("Y", AddOutput("Y",
"(Tensor, default Tensor<float>), a 2-D tensor with shape " "(Tensor, default Tensor<float>), a 2-D tensor with shape "
"[N x 1]. The cross entropy loss."); "[N x 1]. The cross entropy loss.");
......
...@@ -25,8 +25,6 @@ class DropoutOp : public framework::OperatorWithKernel { ...@@ -25,8 +25,6 @@ class DropoutOp : public framework::OperatorWithKernel {
void InferShape(framework::InferShapeContext* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) must not be null."); PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) must not be null.");
PADDLE_ENFORCE_GE(ctx->Attrs().Get<float>("dropout_prob"), 0);
PADDLE_ENFORCE_LE(ctx->Attrs().Get<float>("dropout_prob"), 1);
auto x_dims = ctx->GetInputDim("X"); auto x_dims = ctx->GetInputDim("X");
ctx->SetOutputDim("Out", x_dims); ctx->SetOutputDim("Out", x_dims);
...@@ -47,7 +45,11 @@ class DropoutOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -47,7 +45,11 @@ class DropoutOpMaker : public framework::OpProtoAndCheckerMaker {
AddOutput("Mask", "The random sampled dropout mask.").AsIntermediate(); AddOutput("Mask", "The random sampled dropout mask.").AsIntermediate();
AddAttr<float>("dropout_prob", "Probability of setting units to zero.") AddAttr<float>("dropout_prob", "Probability of setting units to zero.")
.SetDefault(.5f); .SetDefault(.5f)
.AddCustomChecker([](const float& drop_p) {
PADDLE_ENFORCE(drop_p >= 0.0f && drop_p <= 1.0f,
"'dropout_prob' must be between 0.0 and 1.0.");
});
AddAttr<bool>("is_test", "True if in test phase.").SetDefault(false); AddAttr<bool>("is_test", "True if in test phase.").SetDefault(false);
AddAttr<int>("seed", "Dropout random seed.").SetDefault(0); AddAttr<int>("seed", "Dropout random seed.").SetDefault(0);
...@@ -78,8 +80,6 @@ class DropoutOpGrad : public framework::OperatorWithKernel { ...@@ -78,8 +80,6 @@ class DropoutOpGrad : public framework::OperatorWithKernel {
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")), PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
"Input(Out@GRAD) must not be null."); "Input(Out@GRAD) must not be null.");
PADDLE_ENFORCE_GE(ctx->Attrs().Get<float>("dropout_prob"), 0);
PADDLE_ENFORCE_LE(ctx->Attrs().Get<float>("dropout_prob"), 1);
auto x_dims = ctx->GetInputDim("X"); auto x_dims = ctx->GetInputDim("X");
auto out_dims = ctx->GetInputDim(framework::GradVarName("Out")); auto out_dims = ctx->GetInputDim(framework::GradVarName("Out"));
PADDLE_ENFORCE_EQ(x_dims, out_dims, PADDLE_ENFORCE_EQ(x_dims, out_dims,
......
...@@ -30,16 +30,15 @@ struct MaskGenerator { ...@@ -30,16 +30,15 @@ struct MaskGenerator {
__host__ __device__ MaskGenerator(AttrType dropout_prob, int seed) __host__ __device__ MaskGenerator(AttrType dropout_prob, int seed)
: dropout_prob(dropout_prob), seed(seed) {} : dropout_prob(dropout_prob), seed(seed) {}
__host__ __device__ T operator()(const unsigned int n) const { inline __host__ __device__ T operator()(const unsigned int n) const {
thrust::minstd_rand rng; thrust::minstd_rand rng;
rng.seed(seed); rng.seed(seed);
thrust::uniform_real_distribution<AttrType> dist(0, 1); thrust::uniform_real_distribution<AttrType> dist(0, 1);
rng.discard(n); rng.discard(n);
if (dist(rng) < dropout_prob) { if (dist(rng) < dropout_prob) {
return static_cast<T>(0); return static_cast<T>(0);
} else {
return static_cast<T>(1);
} }
return static_cast<T>(1);
} }
}; };
......
...@@ -302,8 +302,29 @@ void set_constant(const platform::DeviceContext& context, ...@@ -302,8 +302,29 @@ void set_constant(const platform::DeviceContext& context,
#endif #endif
} }
template <typename T>
struct RowwiseAdd<platform::CPUDeviceContext, T> {
void operator()(const platform::CPUDeviceContext& context,
const framework::Tensor& input,
const framework::Tensor& vector, framework::Tensor* output) {
auto in_dims = input.dims();
auto size = input.numel() / in_dims[0];
PADDLE_ENFORCE_EQ(vector.numel(), size);
PADDLE_ENFORCE_EQ(output->dims(), in_dims);
auto in = framework::EigenMatrix<T>::From(input);
auto vec = framework::EigenVector<T>::Flatten(vector);
auto out = framework::EigenMatrix<T>::From(*output);
for (int64_t i = 0; i < in_dims[0]; ++i) {
out.chip(i, 0) = in.chip(i, 0) + vec;
}
}
};
template struct RowwiseAdd<platform::CPUDeviceContext, float>; template struct RowwiseAdd<platform::CPUDeviceContext, float>;
template struct RowwiseAdd<platform::CPUDeviceContext, double>; template struct RowwiseAdd<platform::CPUDeviceContext, double>;
template struct ColwiseSum<platform::CPUDeviceContext, float>; template struct ColwiseSum<platform::CPUDeviceContext, float>;
template struct ColwiseSum<platform::CPUDeviceContext, double>; template struct ColwiseSum<platform::CPUDeviceContext, double>;
......
...@@ -273,6 +273,35 @@ void set_constant_with_place<platform::CUDAPlace>( ...@@ -273,6 +273,35 @@ void set_constant_with_place<platform::CUDAPlace>(
TensorSetConstantGPU(context, tensor, value)); TensorSetConstantGPU(context, tensor, value));
} }
template <typename T>
__global__ void RowwiseAddKernel(const T* a, const T* b, T* c, int width,
int num) {
T tmp = 1.0 / width;
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < num;
i += blockDim.x * gridDim.x) {
int h = i * tmp;
int w = i - h * width;
c[i] = a[i] + b[w];
}
}
template <typename T>
struct RowwiseAdd<platform::CUDADeviceContext, T> {
void operator()(const platform::CUDADeviceContext& context,
const framework::Tensor& input,
const framework::Tensor& vector, framework::Tensor* output) {
auto in_dims = input.dims();
auto size = input.numel() / in_dims[0];
PADDLE_ENFORCE_EQ(vector.numel(), size);
PADDLE_ENFORCE_EQ(output->dims(), in_dims);
int blocks = 512;
int grids = (input.numel() + blocks - 1) / blocks;
RowwiseAddKernel<T><<<grids, blocks, 0, context.stream()>>>(
input.data<T>(), vector.data<T>(), output->data<T>(),
static_cast<int>(in_dims[1]), static_cast<int>(input.numel()));
}
};
template struct RowwiseAdd<platform::CUDADeviceContext, float>; template struct RowwiseAdd<platform::CUDADeviceContext, float>;
template struct RowwiseAdd<platform::CUDADeviceContext, double>; template struct RowwiseAdd<platform::CUDADeviceContext, double>;
template struct ColwiseSum<platform::CUDADeviceContext, float>; template struct ColwiseSum<platform::CUDADeviceContext, float>;
......
...@@ -45,25 +45,6 @@ void Transpose<DeviceContext, T, Rank>::operator()( ...@@ -45,25 +45,6 @@ void Transpose<DeviceContext, T, Rank>::operator()(
eigen_out.device(*dev) = eigen_in.shuffle(permute); eigen_out.device(*dev) = eigen_in.shuffle(permute);
} }
template <typename DeviceContext, typename T>
void RowwiseAdd<DeviceContext, T>::operator()(const DeviceContext& context,
const framework::Tensor& input,
const framework::Tensor& vector,
framework::Tensor* output) {
auto in_dims = input.dims();
auto size = input.numel() / in_dims[0];
PADDLE_ENFORCE_EQ(vector.numel(), size);
PADDLE_ENFORCE_EQ(output->dims(), in_dims);
auto in = framework::EigenMatrix<T>::From(input);
auto vec = framework::EigenMatrix<T>::From(vector);
auto out = framework::EigenMatrix<T>::From(*output);
Eigen::array<int, 2> shape({{1, static_cast<int>(size)}});
Eigen::array<int, 2> bcast({{static_cast<int>(in_dims[0]), 1}});
out.device(*context.eigen_device()) =
in + vec.reshape(shape).broadcast(bcast);
}
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
void ColwiseSum<DeviceContext, T>::operator()(const DeviceContext& context, void ColwiseSum<DeviceContext, T>::operator()(const DeviceContext& context,
const framework::Tensor& input, const framework::Tensor& input,
......
...@@ -79,7 +79,7 @@ class SendOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -79,7 +79,7 @@ class SendOpMaker : public framework::OpProtoAndCheckerMaker {
AddComment(R"DOC( AddComment(R"DOC(
Recv operator Recv operator
This operator will recv tensor from send_op This operator will send tensor to recv_op.
)DOC"); )DOC");
AddAttr<std::vector<std::string>>("endpoints", AddAttr<std::vector<std::string>>("endpoints",
"(string vector, default 127.0.0.1:6164)" "(string vector, default 127.0.0.1:6164)"
......
...@@ -100,7 +100,7 @@ class CUDADeviceContext : public DeviceContext { ...@@ -100,7 +100,7 @@ class CUDADeviceContext : public DeviceContext {
template <> template <>
struct DefaultDeviceContextType<platform::CUDAPlace> { struct DefaultDeviceContextType<platform::CUDAPlace> {
using T = CUDADeviceContext; using TYPE = CUDADeviceContext;
}; };
class CUDNNDeviceContext : public CUDADeviceContext { class CUDNNDeviceContext : public CUDADeviceContext {
......
...@@ -171,12 +171,23 @@ void BindBlockDesc(py::module &m) { ...@@ -171,12 +171,23 @@ void BindBlockDesc(py::module &m) {
std::string name = byte_name; std::string name = byte_name;
return self.HasVar(name); return self.HasVar(name);
}) })
.def("has_var_recursive",
[](BlockDesc &self, py::bytes byte_name) {
std::string name = byte_name;
return self.HasVarRecursive(name);
})
.def("find_var", .def("find_var",
[](BlockDesc &self, py::bytes byte_name) { [](BlockDesc &self, py::bytes byte_name) {
std::string name = byte_name; std::string name = byte_name;
return self.FindVar(name); return self.FindVar(name);
}, },
py::return_value_policy::reference) py::return_value_policy::reference)
.def("find_var_recursive",
[](BlockDesc &self, py::bytes byte_name) {
std::string name = byte_name;
return self.FindVarRecursive(name);
},
py::return_value_policy::reference)
.def("all_vars", &BlockDesc::AllVars, py::return_value_policy::reference) .def("all_vars", &BlockDesc::AllVars, py::return_value_policy::reference)
.def("op_size", &BlockDesc::OpSize) .def("op_size", &BlockDesc::OpSize)
.def("op", &BlockDesc::Op, py::return_value_policy::reference) .def("op", &BlockDesc::Op, py::return_value_policy::reference)
...@@ -204,7 +215,7 @@ void BindVarDsec(py::module &m) { ...@@ -204,7 +215,7 @@ void BindVarDsec(py::module &m) {
.def("set_shape", &VarDesc::SetShape) .def("set_shape", &VarDesc::SetShape)
.def("set_dtype", &VarDesc::SetDataType) .def("set_dtype", &VarDesc::SetDataType)
.def("shape", &VarDesc::Shape, py::return_value_policy::reference) .def("shape", &VarDesc::Shape, py::return_value_policy::reference)
.def("dtype", &VarDesc::GetDataType) .def("dtype", &VarDesc::GetDataType, py::return_value_policy::reference)
.def("lod_level", &VarDesc::GetLodLevel) .def("lod_level", &VarDesc::GetLodLevel)
.def("set_lod_level", &VarDesc::SetLoDLevel) .def("set_lod_level", &VarDesc::SetLoDLevel)
.def("type", &VarDesc::GetType) .def("type", &VarDesc::GetType)
...@@ -236,14 +247,22 @@ void BindOpDesc(py::module &m) { ...@@ -236,14 +247,22 @@ void BindOpDesc(py::module &m) {
.value("BLOCK", proto::AttrType::BLOCK); .value("BLOCK", proto::AttrType::BLOCK);
py::class_<OpDesc> op_desc(m, "OpDesc", ""); py::class_<OpDesc> op_desc(m, "OpDesc", "");
op_desc.def("type", &OpDesc::Type) op_desc
.def("__init__", [](OpDesc &self) { new (&self) OpDesc(); },
py::return_value_policy::reference)
.def("copy_from", &OpDesc::CopyFrom)
.def("type", &OpDesc::Type)
.def("set_type", &OpDesc::SetType) .def("set_type", &OpDesc::SetType)
.def("input", &OpDesc::Input) .def("input", &OpDesc::Input)
.def("input_names", &OpDesc::InputNames) .def("input_names", &OpDesc::InputNames)
.def("set_input", &OpDesc::SetInput)
.def("output", &OpDesc::Output) .def("output", &OpDesc::Output)
.def("output_names", &OpDesc::OutputNames) .def("output_names", &OpDesc::OutputNames)
.def("set_input", &OpDesc::SetInput)
.def("set_output", &OpDesc::SetOutput) .def("set_output", &OpDesc::SetOutput)
.def("input_arg_names", &OpDesc::InputArgumentNames)
.def("output_arg_names", &OpDesc::OutputArgumentNames)
.def("rename_input", &OpDesc::RenameInput)
.def("rename_output", &OpDesc::RenameOutput)
.def("has_attr", &OpDesc::HasAttr) .def("has_attr", &OpDesc::HasAttr)
.def("attr_type", &OpDesc::GetAttrType) .def("attr_type", &OpDesc::GetAttrType)
.def("attr_names", &OpDesc::AttrNames) .def("attr_names", &OpDesc::AttrNames)
......
...@@ -269,23 +269,22 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -269,23 +269,22 @@ All parameter, weight, gradient are variables in Paddle.
} }
return ret_values; return ret_values;
}); });
m.def("get_grad_op_descs", m.def(
[](const OpDesc &op_desc, "get_grad_op_desc", [](const OpDesc &op_desc,
const std::unordered_set<std::string> &no_grad_set, const std::unordered_set<std::string> &no_grad_set,
std::unordered_map<std::string, std::string> &grad_to_var, const std::vector<BlockDesc *> &grad_sub_block) {
const std::vector<BlockDesc *> &grad_sub_block) { std::unordered_map<std::string, std::string> grad_to_var;
std::vector<std::unique_ptr<OpDesc>> grad_op_descs = std::vector<std::unique_ptr<OpDesc>> grad_op_descs =
framework::OpInfoMap::Instance() framework::OpInfoMap::Instance()
.Get(op_desc.Type()) .Get(op_desc.Type())
.GradOpMaker()(op_desc, no_grad_set, &grad_to_var, .GradOpMaker()(op_desc, no_grad_set, &grad_to_var,
grad_sub_block); grad_sub_block);
std::vector<OpDesc *> grad_op_desc_ptrs(grad_op_descs.size()); std::vector<OpDesc *> grad_op_desc_ptrs(grad_op_descs.size());
std::transform( std::transform(grad_op_descs.begin(), grad_op_descs.end(),
grad_op_descs.begin(), grad_op_descs.end(), grad_op_desc_ptrs.begin(),
grad_op_desc_ptrs.begin(), [](std::unique_ptr<OpDesc> &p) { return p.release(); });
[](std::unique_ptr<OpDesc> &p) { return p.release(); }); return std::make_pair(grad_op_desc_ptrs, grad_to_var);
return grad_op_desc_ptrs; });
});
m.def("prune", [](const ProgramDesc &origin, m.def("prune", [](const ProgramDesc &origin,
const std::vector<std::array<size_t, 2>> &targets) { const std::vector<std::array<size_t, 2>> &targets) {
ProgramDesc prog_with_targets(origin); ProgramDesc prog_with_targets(origin);
...@@ -301,6 +300,8 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -301,6 +300,8 @@ All parameter, weight, gradient are variables in Paddle.
InferenceOptimize(*(origin.Proto()), &pruned_desc); InferenceOptimize(*(origin.Proto()), &pruned_desc);
return new ProgramDesc(pruned_desc); return new ProgramDesc(pruned_desc);
}); });
m.def("empty_var_name", []() { return framework::kEmptyVarName; });
m.def("grad_var_suffix", []() { return framework::kGradVarSuffix; });
m.def_submodule( m.def_submodule(
"var_names", "var_names",
"The module will return special predefined variable name in Paddle") "The module will return special predefined variable name in Paddle")
......
from paddle.v2.fluid import framework as framework from paddle.v2.fluid import framework as framework
from . import core
import collections
__all__ = ['append_backward_ops'] __all__ = ['append_backward']
def append_backward_ops(loss, parameter_list=None, no_grad_set=None): def _rename_arg_(op_desc_list, old_name, new_name, begin_idx=None,
end_idx=None):
if begin_idx is None:
begin_idx = 0
if end_idx is None:
end_idx = len(op_desc_list)
for i in range(begin_idx, end_idx):
op_desc = op_desc_list[i]
if isinstance(op_desc, tuple):
op_desc = op_desc[0]
op_desc.rename_input(old_name, new_name)
op_desc.rename_output(old_name, new_name)
def _create_op_desc_(op_type, inputs, outputs, attrs):
op_desc = core.OpDesc()
op_desc.set_type(op_type)
for para, args in inputs.iteritems():
op_desc.set_input(para, args)
for para, args in outputs.iteritems():
op_desc.set_output(para, args)
for name, val in attrs.iteritems():
if isinstance(val, framework.Block):
op_desc.set_block_attr(name, val.desc)
else:
op_desc.set_attr(name, val)
return op_desc
def _infer_var_data_type_(var_name, block):
grad_var = block.desc.find_var(var_name.encode("ascii"))
fwd_name = _strip_grad_suffix_(var_name.encode("ascii"))
if block.desc.has_var_recursive(fwd_name):
fwd_var = block.desc.find_var_recursive(fwd_name.encode("ascii"))
grad_var.set_dtype(fwd_var.dtype())
else:
grad_var.set_dtype(core.DataType.FP32)
def _all_in_set_(cands, s):
for c in cands:
if not c in s:
return False
return True
def _strip_grad_suffix_(name):
pos = name.find(core.grad_var_suffix())
return name[:pos] if pos != -1 else name
def _append_grad_suffix_(name):
return name + core.grad_var_suffix()
def _addup_repetitive_outputs_(op_descs):
# In backward part, an variable my be the output of more than one ops.
# In this case, the variable should be the accumulation of all the outputs.
# We adopt adding `sum_op`s to implement the accumulate.
pending_sum_ops = []
var_rename_count = collections.defaultdict(int)
renamed_vars = collections.defaultdict(list)
for idx, op_desc in enumerate(op_descs):
for var_name in op_desc.input_arg_names():
if len(renamed_vars[var_name]) > 1:
pending_sum_ops.append(
(_create_op_desc_("sum", {"X": renamed_vars[var_name]},
{"Out": [var_name]}, {}), idx))
renamed_vars[var_name] = [var_name]
for var_name in op_desc.output_arg_names():
if var_name == core.empty_var_name(
) or var_name in op_desc.input_arg_names():
# empty variable or inplace op
continue
if len(renamed_vars[var_name]) == 0:
# it's the first time we get the variable
renamed_vars[var_name] = [var_name]
else:
if len(renamed_vars[var_name]) == 1:
new_name = var_name + "@RENAME@" + \
str(var_rename_count[var_name])
var_rename_count[var_name] += 1
# rename original var_name
renamed_vars[var_name][0] = new_name
_rename_arg_(op_descs, var_name, new_name, 0, idx)
_rename_arg_(pending_sum_ops, var_name, new_name)
new_name = var_name + "@RENAME@" + \
str(var_rename_count[var_name])
var_rename_count[var_name] += 1
op_desc.rename_output(var_name, new_name)
renamed_vars[var_name].append(new_name)
for var_name, inputs in renamed_vars.iteritems():
if len(inputs) > 1:
pending_sum_ops.append((_create_op_desc_(
"sum", {"X": inputs}, {"Out": [var_name]}, {}), len(op_descs)))
# sum_op descs are sorted according to their insert position
for p in reversed(pending_sum_ops):
op_descs.insert(p[1], p[0])
return op_descs
def _remove_no_grad_branch_(op_descs, no_grad_set):
# Remove ops whose outputs are all in no_grad_dict
op_descs = filter(
lambda op_desc: not _all_in_set_(op_desc.output_arg_names(), no_grad_set),
op_descs)
# Insert fill_zeros_like_op
to_insert = []
for idx, op_desc in enumerate(op_descs):
for arg in op_desc.input_arg_names():
if core.grad_var_suffix() in arg and arg in no_grad_set:
to_insert.append((_create_op_desc_("fill_zeros_like", {
"X": [_strip_grad_suffix_(arg)]
}, {"Y": [arg]}, {}), idx))
map(lambda p: op_descs.insert(p[1], p[0]), reversed(to_insert))
return op_descs
def _append_backward_ops_(target,
block,
target_block,
no_grad_dict,
grad_to_var,
callback=None):
grad_op_descs = []
program = block.program
for op in reversed(block.ops):
grad_sub_block_list = []
# If the op has its own sub-block, deal with the sub-block first
if op.has_attr("sub_block"):
sub_block = program.block(op.block_attr("sub_block"))
grad_sub_block = program.create_block(parent_idx=sub_block.idx)
_append_backward_ops_(target, sub_block, grad_sub_block,
no_grad_dict, grad_to_var, callback)
grad_sub_block_list.append(grad_sub_block.desc)
grad_op_desc, op_grad_to_var = core.get_grad_op_desc(
op.desc, no_grad_dict[block.idx], grad_sub_block_list)
grad_op_descs.extend(grad_op_desc)
grad_to_var.update(op_grad_to_var)
grad_op_descs = _addup_repetitive_outputs_(grad_op_descs)
grad_op_descs = _remove_no_grad_branch_(grad_op_descs,
no_grad_dict[block.idx])
if target_block.idx == 0:
grad_op_descs.insert(
0,
_create_op_desc_("fill_constant", {}, {
"Out": [_append_grad_suffix_(target.name)]
}, {"shape": [1],
"value": 1.0,
"dtype": target.dtype}))
# append op_desc in grad_op_descs to target_block
for op_desc in grad_op_descs:
new_op_desc = target_block.desc.append_op()
new_op_desc.copy_from(op_desc)
def _append_backward_vars_(block, start_op_idx, grad_to_var, grad_info_map):
for op_idx in range(start_op_idx, block.desc.op_size()):
op_desc = block.desc.op(op_idx)
if op_desc.has_attr("sub_block"):
sub_block = block.program.block(op_desc.block_attr("sub_block"))
_append_backward_vars_(sub_block, 0, grad_to_var, grad_info_map)
new_vars = set()
# create new gradient variables
for grad_var_name in op_desc.output_arg_names():
grad_var_name = grad_var_name.encode("ascii")
if block.desc.has_var_recursive(
grad_var_name) or grad_var_name == core.empty_var_name():
continue
block.desc.var(grad_var_name)
new_vars.add(grad_var_name)
if not grad_to_var.has_key(grad_var_name):
continue
grad_info_map[grad_to_var[grad_var_name]] = (grad_var_name, block)
# infer_shape and infer_type
op_desc.infer_var_type(block.desc)
op_desc.infer_shape(block.desc)
for arg in op_desc.output_arg_names():
if arg in new_vars:
_infer_var_data_type_(arg, block)
def append_backward(loss, parameter_list=None, no_grad_set=None):
""" """
Create and add gradient Operators in BlockDesc to compute Create and add gradient Operators in BlockDesc to compute
gradients of `loss` for parameters in parameter_list gradients of `loss` for parameters in parameter_list
:param loss: an variable generated by cost function. :param loss: an variable generated by cost function.
:type loss: Variable :type loss: Variable
:param no_grad_set: variable that should not create gradient :param no_grad_dict: variable that should not create gradient
:type no_grad_set: set :type no_grad_dict: set
:param parameter_list: parameters that need to compute gradient and :param parameter_list: parameters that need to compute gradient and
update to optimize the lost. update to optimize the lost.
:type: list :type: list
...@@ -20,35 +212,53 @@ def append_backward_ops(loss, parameter_list=None, no_grad_set=None): ...@@ -20,35 +212,53 @@ def append_backward_ops(loss, parameter_list=None, no_grad_set=None):
""" """
assert isinstance(loss, framework.Variable) assert isinstance(loss, framework.Variable)
program = loss.block.program
no_grad_dict = dict()
if no_grad_set is None: if no_grad_set is None:
program = loss.block.program
assert isinstance(program, framework.Program) assert isinstance(program, framework.Program)
no_grad_set = list()
for block in program.blocks: for block in program.blocks:
assert isinstance(block, framework.Block) assert isinstance(block, framework.Block)
block_no_grad_set = set()
for var in block.vars.itervalues(): for var in block.vars.itervalues():
assert isinstance(var, framework.Variable) assert isinstance(var, framework.Variable)
if var.stop_gradient: if var.stop_gradient:
no_grad_set.append(var.name) block_no_grad_set.add(_append_grad_suffix_(var.name))
no_grad_set = set(no_grad_set) no_grad_dict[block.idx] = block_no_grad_set
elif isinstance(no_grad_set, set):
no_grad_dict = {0: no_grad_set}
else:
raise ValueError("'no_grad_set' should be a set or None.")
grad_info_map = dict()
root_block = program.block(0)
fwd_op_num = root_block.desc.op_size()
current_block_idx = program.current_block_idx
grad_to_var = dict()
_append_backward_ops_(loss, root_block, root_block, no_grad_dict,
grad_to_var)
_append_backward_vars_(root_block, fwd_op_num, grad_to_var, grad_info_map)
program.current_block_idx = current_block_idx
program.sync_with_cpp()
param_grad_map = loss.block.program.append_backward(loss, no_grad_set)
if parameter_list is not None: if parameter_list is not None:
parameters = parameter_list parameters = parameter_list
else: else:
params = loss.block.program.global_block().all_parameters() params = program.global_block().all_parameters()
parameters = [param.name for param in params] parameters = [param.name for param in params]
params_and_grads = [] params_and_grads = []
for param in parameters: for param in parameters:
if param not in param_grad_map: if param not in grad_info_map:
raise ValueError("param %s is not in map" % param) raise ValueError("param %s is not in map" % param)
grad_info = param_grad_map[param] grad_info = grad_info_map[param]
grad_block = loss.block.program.block(grad_info[1]) grad_block = grad_info[1]
if not grad_block.has_var(grad_info[0]): if not grad_block.has_var(grad_info[0]):
raise ValueError("grad block[{0}] did not have grad var {1}".format( raise ValueError("grad block[{0}] did not have grad var {1}".format(
grad_info[1], grad_info[0])) grad_info[1], grad_info[0]))
# Get the param var from the global block # Get the param var from the global block
param_var = loss.block.program.global_block().var(param) param_var = program.global_block().var(param)
grad_var = grad_block.var(grad_info[0]) grad_var = grad_block.var(grad_info[0])
if loss.block.has_var(grad_info[0]): if loss.block.has_var(grad_info[0]):
params_and_grads.append((param_var, grad_var)) params_and_grads.append((param_var, grad_var))
......
...@@ -95,7 +95,9 @@ class DistributeTranspiler: ...@@ -95,7 +95,9 @@ class DistributeTranspiler:
""" """
if program is None: if program is None:
program = default_main_program() program = default_main_program()
self.program = program
self.trainers = trainers self.trainers = trainers
self.optimize_ops = optimize_ops
self._optimize_distributed( self._optimize_distributed(
optimize_ops, optimize_ops,
program, program,
...@@ -156,9 +158,10 @@ class DistributeTranspiler: ...@@ -156,9 +158,10 @@ class DistributeTranspiler:
attrs={"endpoints": pserver_endpoints, attrs={"endpoints": pserver_endpoints,
"epmap": epmap}) "epmap": epmap})
def get_trainer_program(optimize_ops, program): def get_trainer_program(self):
# remove optimize ops and add a send op to main_program # remove optimize ops and add a send op to main_program
program.global_block().delete_ops(optimize_ops) self.program.global_block().delete_ops(self.optimize_ops)
return self.program
def _create_var_for_trainers(self, block, var, trainers): def _create_var_for_trainers(self, block, var, trainers):
var_list = [] var_list = []
...@@ -210,7 +213,6 @@ class DistributeTranspiler: ...@@ -210,7 +213,6 @@ class DistributeTranspiler:
if opt_op.inputs.has_key("Grad"): if opt_op.inputs.has_key("Grad"):
if opt_op.inputs["Grad"].name in grad_var_names: if opt_op.inputs["Grad"].name in grad_var_names:
print "appending ", opt_op.type, opt_op.inputs
optimize_sub_program.global_block().append_op( optimize_sub_program.global_block().append_op(
type=opt_op.type, type=opt_op.type,
inputs=opt_op.inputs, inputs=opt_op.inputs,
......
...@@ -663,7 +663,7 @@ class Block(object): ...@@ -663,7 +663,7 @@ class Block(object):
end = list(self.ops).index(ops[-1]) end = list(self.ops).index(ops[-1])
except Exception, e: except Exception, e:
raise e raise e
self.desc.remove_op(start, end) self.desc.remove_op(start, end + 1)
def prepend_op(self, *args, **kwargs): def prepend_op(self, *args, **kwargs):
op_desc = self.desc.prepend_op() op_desc = self.desc.prepend_op()
...@@ -846,9 +846,11 @@ class Program(object): ...@@ -846,9 +846,11 @@ class Program(object):
self.sync_with_cpp() self.sync_with_cpp()
return param_to_grad_info return param_to_grad_info
def create_block(self): def create_block(self, parent_idx=None):
new_block_idx = len(self.blocks) new_block_idx = len(self.blocks)
self.desc.append_block(self.current_block().desc) parent = self.current_block() if parent_idx is None else self.block(
parent_idx)
self.desc.append_block(parent.desc)
self.current_block_idx = new_block_idx self.current_block_idx = new_block_idx
self.blocks.append(Block(self, self.current_block_idx)) self.blocks.append(Block(self, self.current_block_idx))
return self.current_block() return self.current_block()
......
...@@ -270,6 +270,7 @@ def gru_unit(input, ...@@ -270,6 +270,7 @@ def gru_unit(input,
attr=helper.param_attr, shape=[size, 3 * size], dtype=dtype) attr=helper.param_attr, shape=[size, 3 * size], dtype=dtype)
# create bias # create bias
if bias is None: if bias is None:
bias_size = [1, 3 * size] bias_size = [1, 3 * size]
bias = helper.create_parameter( bias = helper.create_parameter(
...@@ -358,7 +359,59 @@ def cos_sim(X, Y, **kwargs): ...@@ -358,7 +359,59 @@ def cos_sim(X, Y, **kwargs):
def cross_entropy(input, label, **kwargs): def cross_entropy(input, label, **kwargs):
""" """
This function computes cross_entropy using the input and label. **Cross Entropy Layer**
This layer computes the cross entropy between `input` and `label`. It supports
both standard cross-entropy and soft-label cross-entropy loss computation.
1) One-hot cross-entropy:
`soft_label = False`, `Label[i, 0]` indicates the class index for sample i:
.. math::
Y[i] = -\log(X[i, Label[i]])
2) Soft-label cross-entropy:
`soft_label = True`, `Label[i, j]` indicates the soft label of class j
for sample i:
.. math::
Y[i] = \sum_j{-Label[i, j] * log(X[i, j])}
Please make sure that in this case the summation of each row of `label`
equals one.
3) One-hot cross-entropy with vecterized `label`:
As a special case of 2), when each row of 'label' has only one
non-zero element which is equal to 1, soft-label cross-entropy degenerates
to a one-hot cross-entropy with one-hot label representation.
Args:
input (Variable|list): a 2-D tensor with shape [N x D], where N is the
batch size and D is the number of classes. This input is a probability
computed by the previous operator, which is almost always the result
of a softmax operator.
label (Variable|list): the ground truth which is a 2-D tensor. When
`soft_label` is set to `False`, `label` is a tensor<int64> with shape
[N x 1]. When `soft_label` is set to `True`, `label` is a
tensor<float/double> with shape [N x D].
soft_label (bool, via `**kwargs`): a flag indicating whether to interpretate
the given labels as soft labels, default `False`.
Returns:
A 2-D tensor with shape [N x 1], the cross entropy loss.
Raises:
`ValueError`: 1) the 1st dimension of `input` and `label` are not equal; 2) when \
`soft_label == True`, and the 2nd dimension of `input` and `label` are not \
equal; 3) when `soft_label == False`, and the 2nd dimension of `label` is not 1.
Examples:
.. code-block:: python
predict = fluid.layers.fc(input=net, size=classdim, act='softmax')
cost = fluid.layers.cross_entropy(input=predict, label=label)
""" """
helper = LayerHelper('cross_entropy', **kwargs) helper = LayerHelper('cross_entropy', **kwargs)
out = helper.create_tmp_variable(dtype=input.dtype) out = helper.create_tmp_variable(dtype=input.dtype)
...@@ -514,14 +567,83 @@ def conv2d(input, ...@@ -514,14 +567,83 @@ def conv2d(input,
groups=None, groups=None,
param_attr=None, param_attr=None,
bias_attr=None, bias_attr=None,
act=None, act=None):
name=None):
""" """
This function creates the op for a 2-dimensional Convolution. **Convlution2D Layer**
This is performed using the parameters of filters(size, dimensionality etc)
, stride and other configurations for a Convolution operation. The convolution2D layer calculates the output based on the input, filter
This funciton can also append an activation on top of the and strides, paddings, dilations, groups parameters. Input(Input) and Output(Output)
conv-2d output, if mentioned in the input parameters. are in NCHW format. Where N is batch size, C is the number of channels, H is the height
of the feature, and W is the width of the feature.
The details of convolution layer, please refer UFLDL's `convolution,
<http://ufldl.stanford.edu/tutorial/supervised/FeatureExtractionUsingConvolution/>`_ .
If bias attribution and activation type are provided, bias is added to the output of the convolution,
and the corresponding activation function is applied to the final result.
For each input :math:`X`, the equation is:
.. math::
Out = \sigma (W \\ast X + b)
In the above equation:
* :math:`X`: Input value, a tensor with NCHW format.
* :math:`W`: Filter value, a tensor with MCHW format.
* :math:`\\ast`: Convolution operation.
* :math:`b`: Bias value, a 2-D tensor with shape [M, 1].
* :math:`\\sigma`: Activation function.
* :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different.
Example:
Input:
Input shape: $(N, C_{in}, H_{in}, W_{in})$
Filter shape: $(C_{out}, C_{in}, H_f, W_f)$
Output:
Output shape: $(N, C_{out}, H_{out}, W_{out})$
Where
.. math::
H_{out}&= \\frac{(H_{in} + 2 * paddings[0] - (dilations[0] * (H_f - 1) + 1))}{strides[0]} + 1 \\\\
W_{out}&= \\frac{(W_{in} + 2 * paddings[1] - (dilations[1] * (W_f - 1) + 1))}{strides[1]} + 1
Args:
input(Variable): The input image with [N, C, H, W] format.
num_filters(int): The number of filter. It is as same as the output
image channel.
filter_size(int|tuple|None): The filter size. If filter_size is a tuple,
it must contain two integers, (filter_size_H, filter_size_W).
Otherwise, the filter will be a square.
stride(int|tuple): The stride size. If stride is a tuple, it must
contain two integers, (stride_H, stride_W). Otherwise, the
stride_H = stride_W = stride. Default: stride = 1.
padding(int|tuple): The padding size. If padding is a tuple, it must
contain two integers, (padding_H, padding_W). Otherwise, the
padding_H = padding_W = padding. Default: padding = 0.
groups(int): The groups number of the Conv2d Layer. According to grouped
convolution in Alex Krizhevsky's Deep CNN paper: when group=2,
the first half of the filters is only connected to the first half
of the input channels, while the second half of the filters is only
connected to the second half of the input channels. Default: groups=1
param_attr(ParamAttr): The parameters to the Conv2d Layer. Default: None
bias_attr(ParamAttr): Bias parameter for the Conv2d layer. Default: None
act(str): Activation type. Default: None
Returns:
Variable: The tensor variable storing the convolution and \
non-linearity activation result.
Raises:
ValueError: If the shapes of input, filter_size, stride, padding and groups mismatch.
Examples:
.. code-block:: python
data = fluid.layers.data(name='data', shape=[3, 32, 32], dtype='float32')
conv2d = fluid.layers.conv2d(input=data, num_filters=2, filter_size=3, act="relu")
""" """
if stride is None: if stride is None:
......
from collections import defaultdict from collections import defaultdict
import framework import framework
from backward import append_backward_ops from backward import append_backward
from framework import unique_name, program_guard from framework import unique_name, program_guard
from initializer import Constant from initializer import Constant
from layer_helper import LayerHelper from layer_helper import LayerHelper
...@@ -194,10 +194,10 @@ class Optimizer(object): ...@@ -194,10 +194,10 @@ class Optimizer(object):
no_grad_set=None): no_grad_set=None):
"""Add operations to minimize `loss` by updating `parameter_list`. """Add operations to minimize `loss` by updating `parameter_list`.
This method combines interface `append_backward_ops()` and This method combines interface `append_backward()` and
`create_optimization_pass()` into one. `create_optimization_pass()` into one.
""" """
params_grads = append_backward_ops(loss, parameter_list, no_grad_set) params_grads = append_backward(loss, parameter_list, no_grad_set)
params_grads = append_gradient_clip_ops(params_grads) params_grads = append_gradient_clip_ops(params_grads)
......
...@@ -38,35 +38,43 @@ train_reader = paddle.batch( ...@@ -38,35 +38,43 @@ train_reader = paddle.batch(
place = fluid.CPUPlace() place = fluid.CPUPlace()
exe = fluid.Executor(place) exe = fluid.Executor(place)
t = fluid.DistributeTranspiler() t = fluid.DistributeTranspiler()
# all parameter server endpoints list for spliting parameters
pserver_endpoints = os.getenv("PSERVERS") pserver_endpoints = os.getenv("PSERVERS")
# server endpoint for current node
current_endpoint = os.getenv("SERVER_ENDPOINT")
# run as trainer or parameter server
training_role = os.getenv("TRAINING_ROLE", training_role = os.getenv("TRAINING_ROLE",
"TRAINER") # get the training role: trainer/pserver "TRAINER") # get the training role: trainer/pserver
t.transpile(optimize_ops, params_grads, pservers=pserver_endpoints, trainers=1) t.transpile(optimize_ops, params_grads, pservers=pserver_endpoints, trainers=2)
if training_role == "PSERVER": if training_role == "PSERVER":
pserver_prog = t.get_pserver_program(pserver_endpoints, optimize_ops) if not current_endpoint:
print("need env SERVER_ENDPOINT")
exit(1)
pserver_prog = t.get_pserver_program(current_endpoint, optimize_ops)
exe.run(fluid.default_startup_program()) exe.run(fluid.default_startup_program())
exe.run(pserver_prog) exe.run(pserver_prog)
elif training_role == "TRAINER": elif training_role == "TRAINER":
trainer_prog = t.get_trainer_program()
feeder = fluid.DataFeeder(feed_list=[images, label], place=place) feeder = fluid.DataFeeder(feed_list=[images, label], place=place)
exe.run(fluid.default_startup_program()) exe.run(fluid.default_startup_program())
for pass_id in range(PASS_NUM): for pass_id in range(PASS_NUM):
accuracy.reset(exe) accuracy.reset(exe)
batch_id = 0
for data in train_reader(): for data in train_reader():
loss, acc = exe.run(fluid.default_main_program(), loss, acc = exe.run(trainer_prog,
feed=feeder.feed(data), feed=feeder.feed(data),
fetch_list=[avg_cost] + accuracy.metrics) fetch_list=[avg_cost] + accuracy.metrics)
pass_acc = accuracy.eval(exe) pass_acc = accuracy.eval(exe)
# print loss, acc if batch_id % 100 == 0:
if loss < 10.0 and pass_acc > 0.9: print("batch_id %d, loss: %f, acc: %f" %
# if avg cost less than 10.0 and accuracy is larger than 0.9, we think our code is good. (batch_id, loss, pass_acc))
exit(0) batch_id += 1
pass_acc = accuracy.eval(exe) pass_acc = accuracy.eval(exe)
print("pass_id=" + str(pass_id) + " pass_acc=" + str(pass_acc)) print("pass_id=" + str(pass_id) + " pass_acc=" + str(pass_acc))
else: else:
print("environment var TRAINER_ROLE should be TRAINER os PSERVER") print("environment var TRAINER_ROLE should be TRAINER os PSERVER")
exit(1)
...@@ -4,7 +4,7 @@ import random ...@@ -4,7 +4,7 @@ import random
import itertools import itertools
import paddle.v2.fluid.core as core import paddle.v2.fluid.core as core
import collections import collections
from paddle.v2.fluid.backward import append_backward_ops from paddle.v2.fluid.backward import append_backward
from paddle.v2.fluid.op import Operator from paddle.v2.fluid.op import Operator
from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.executor import Executor
from paddle.v2.fluid.framework import Program, OpProtoHolder from paddle.v2.fluid.framework import Program, OpProtoHolder
...@@ -491,7 +491,7 @@ class OpTest(unittest.TestCase): ...@@ -491,7 +491,7 @@ class OpTest(unittest.TestCase):
op_loss.desc.infer_var_type(block.desc) op_loss.desc.infer_var_type(block.desc)
op_loss.desc.infer_shape(block.desc) op_loss.desc.infer_shape(block.desc)
param_grad_list = append_backward_ops( param_grad_list = append_backward(
loss=loss, parameter_list=input_to_check, no_grad_set=no_grad_set) loss=loss, parameter_list=input_to_check, no_grad_set=no_grad_set)
feed_dict = { feed_dict = {
......
...@@ -2,7 +2,7 @@ import unittest ...@@ -2,7 +2,7 @@ import unittest
import paddle.v2.fluid.core as core import paddle.v2.fluid.core as core
import paddle.v2.fluid.layers as layers import paddle.v2.fluid.layers as layers
from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.executor import Executor
from paddle.v2.fluid.backward import append_backward_ops from paddle.v2.fluid.backward import append_backward
from paddle.v2.fluid.framework import default_main_program from paddle.v2.fluid.framework import default_main_program
import numpy import numpy
...@@ -64,7 +64,7 @@ class TestArrayReadWrite(unittest.TestCase): ...@@ -64,7 +64,7 @@ class TestArrayReadWrite(unittest.TestCase):
total_sum = layers.sums(input=[a_sum, x_sum]) total_sum = layers.sums(input=[a_sum, x_sum])
total_sum_scaled = layers.scale(x=total_sum, scale=1 / 6.0) total_sum_scaled = layers.scale(x=total_sum, scale=1 / 6.0)
append_backward_ops(total_sum_scaled) append_backward(total_sum_scaled)
g_vars = map(default_main_program().global_block().var, g_vars = map(default_main_program().global_block().var,
[each_x.name + "@GRAD" for each_x in x]) [each_x.name + "@GRAD" for each_x in x])
......
...@@ -3,7 +3,7 @@ import paddle.v2.fluid.layers as layers ...@@ -3,7 +3,7 @@ import paddle.v2.fluid.layers as layers
import paddle.v2.fluid.core as core import paddle.v2.fluid.core as core
from paddle.v2.fluid.framework import default_startup_program, default_main_program from paddle.v2.fluid.framework import default_startup_program, default_main_program
from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.executor import Executor
from paddle.v2.fluid.backward import append_backward_ops from paddle.v2.fluid.backward import append_backward
import numpy import numpy
...@@ -26,7 +26,7 @@ class ConditionalBlock(unittest.TestCase): ...@@ -26,7 +26,7 @@ class ConditionalBlock(unittest.TestCase):
outs = exe.run(feed={'X': x}, fetch_list=[out])[0] outs = exe.run(feed={'X': x}, fetch_list=[out])[0]
print outs print outs
loss = layers.mean(x=out) loss = layers.mean(x=out)
append_backward_ops(loss=loss) append_backward(loss=loss)
outs = exe.run( outs = exe.run(
feed={'X': x}, feed={'X': x},
fetch_list=[ fetch_list=[
......
...@@ -4,7 +4,7 @@ import numpy ...@@ -4,7 +4,7 @@ import numpy
import paddle.v2.fluid.layers as layers import paddle.v2.fluid.layers as layers
from paddle.v2.fluid.framework import Program, program_guard from paddle.v2.fluid.framework import Program, program_guard
from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.executor import Executor
from paddle.v2.fluid.backward import append_backward_ops from paddle.v2.fluid.backward import append_backward
class TestCPULoDTensorArrayOps(unittest.TestCase): class TestCPULoDTensorArrayOps(unittest.TestCase):
...@@ -170,7 +170,7 @@ class TestCPULoDTensorArrayOpGrad(unittest.TestCase): ...@@ -170,7 +170,7 @@ class TestCPULoDTensorArrayOpGrad(unittest.TestCase):
mean = layers.mean(x=result) mean = layers.mean(x=result)
append_backward_ops(mean) append_backward(mean)
tensor = core.LoDTensor() tensor = core.LoDTensor()
tensor.set(numpy.arange(10).reshape(10, 1).astype('float32'), place) tensor.set(numpy.arange(10).reshape(10, 1).astype('float32'), place)
......
...@@ -2,7 +2,7 @@ import unittest ...@@ -2,7 +2,7 @@ import unittest
import paddle.v2.fluid.framework as framework import paddle.v2.fluid.framework as framework
import paddle.v2.fluid.optimizer as optimizer import paddle.v2.fluid.optimizer as optimizer
from paddle.v2.fluid.backward import append_backward_ops from paddle.v2.fluid.backward import append_backward
class TestOptimizer(unittest.TestCase): class TestOptimizer(unittest.TestCase):
...@@ -102,7 +102,7 @@ class TestMomentumOptimizer(unittest.TestCase): ...@@ -102,7 +102,7 @@ class TestMomentumOptimizer(unittest.TestCase):
dtype="float32", shape=[1], lod_level=0, name="mean.out") dtype="float32", shape=[1], lod_level=0, name="mean.out")
block.append_op( block.append_op(
type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out}) type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out})
params_grads = append_backward_ops(mean_out) params_grads = append_backward(mean_out)
self.assertEqual(len(params_grads), 1) self.assertEqual(len(params_grads), 1)
self.assertEqual(len(momentum_optimizer.get_accumulators()), 0) self.assertEqual(len(momentum_optimizer.get_accumulators()), 0)
opts = momentum_optimizer.create_optimization_pass( opts = momentum_optimizer.create_optimization_pass(
...@@ -151,7 +151,7 @@ class TestMomentumOptimizer(unittest.TestCase): ...@@ -151,7 +151,7 @@ class TestMomentumOptimizer(unittest.TestCase):
learning_rate = 0.01 learning_rate = 0.01
momentum_optimizer = self.MockMomentum( momentum_optimizer = self.MockMomentum(
learning_rate=learning_rate, momentum=0.2, use_nesterov=True) learning_rate=learning_rate, momentum=0.2, use_nesterov=True)
params_grads = append_backward_ops(mean_out) params_grads = append_backward(mean_out)
self.assertEqual(len(params_grads), 1) self.assertEqual(len(params_grads), 1)
self.assertEqual(len(momentum_optimizer.get_accumulators()), 0) self.assertEqual(len(momentum_optimizer.get_accumulators()), 0)
opts = momentum_optimizer.create_optimization_pass( opts = momentum_optimizer.create_optimization_pass(
...@@ -209,7 +209,7 @@ class TestAdagradOptimizer(unittest.TestCase): ...@@ -209,7 +209,7 @@ class TestAdagradOptimizer(unittest.TestCase):
learning_rate = 0.01 learning_rate = 0.01
adagrad_optimizer = self.MockAdagrad( adagrad_optimizer = self.MockAdagrad(
learning_rate=learning_rate, epsilon=1.0e-6) learning_rate=learning_rate, epsilon=1.0e-6)
params_grads = append_backward_ops(mean_out) params_grads = append_backward(mean_out)
self.assertEqual(len(params_grads), 1) self.assertEqual(len(params_grads), 1)
self.assertEqual(len(adagrad_optimizer.get_accumulators()), 0) self.assertEqual(len(adagrad_optimizer.get_accumulators()), 0)
opts = adagrad_optimizer.create_optimization_pass(params_grads, mul_out, opts = adagrad_optimizer.create_optimization_pass(params_grads, mul_out,
...@@ -269,7 +269,7 @@ class TestAdamOptimizer(unittest.TestCase): ...@@ -269,7 +269,7 @@ class TestAdamOptimizer(unittest.TestCase):
learning_rate = 0.01 learning_rate = 0.01
adam_optimizer = self.MockAdam( adam_optimizer = self.MockAdam(
learning_rate=learning_rate, beta1=0.9, beta2=0.999) learning_rate=learning_rate, beta1=0.9, beta2=0.999)
params_grads = append_backward_ops(mean_out) params_grads = append_backward(mean_out)
self.assertEqual(len(params_grads), 1) self.assertEqual(len(params_grads), 1)
self.assertEqual(len(adam_optimizer.get_accumulators()), 0) self.assertEqual(len(adam_optimizer.get_accumulators()), 0)
opts = adam_optimizer.create_optimization_pass(params_grads, mul_out, opts = adam_optimizer.create_optimization_pass(params_grads, mul_out,
...@@ -331,7 +331,7 @@ class TestAdamaxOptimizer(unittest.TestCase): ...@@ -331,7 +331,7 @@ class TestAdamaxOptimizer(unittest.TestCase):
learning_rate = 0.01 learning_rate = 0.01
adamax_optimizer = self.MockAdamax( adamax_optimizer = self.MockAdamax(
learning_rate=learning_rate, beta1=0.9, beta2=0.999) learning_rate=learning_rate, beta1=0.9, beta2=0.999)
params_grads = append_backward_ops(mean_out) params_grads = append_backward(mean_out)
self.assertEqual(len(params_grads), 1) self.assertEqual(len(params_grads), 1)
self.assertEqual(len(adamax_optimizer.get_accumulators()), 0) self.assertEqual(len(adamax_optimizer.get_accumulators()), 0)
opts = adamax_optimizer.create_optimization_pass(params_grads, mul_out, opts = adamax_optimizer.create_optimization_pass(params_grads, mul_out,
...@@ -390,7 +390,7 @@ class TestDecayedAdagradOptimizer(unittest.TestCase): ...@@ -390,7 +390,7 @@ class TestDecayedAdagradOptimizer(unittest.TestCase):
learning_rate = 0.01 learning_rate = 0.01
decayed_adagrad_optimizer = self.MockDecayedAdagrad( decayed_adagrad_optimizer = self.MockDecayedAdagrad(
learning_rate=learning_rate, decay=0.95, epsilon=1.0e-6) learning_rate=learning_rate, decay=0.95, epsilon=1.0e-6)
params_grads = append_backward_ops(mean_out) params_grads = append_backward(mean_out)
self.assertEqual(len(params_grads), 1) self.assertEqual(len(params_grads), 1)
self.assertEqual(len(decayed_adagrad_optimizer.get_accumulators()), 0) self.assertEqual(len(decayed_adagrad_optimizer.get_accumulators()), 0)
opts = decayed_adagrad_optimizer.create_optimization_pass( opts = decayed_adagrad_optimizer.create_optimization_pass(
......
...@@ -3,7 +3,7 @@ import unittest ...@@ -3,7 +3,7 @@ import unittest
import paddle.v2.fluid.layers as layers import paddle.v2.fluid.layers as layers
from paddle.v2.fluid.framework import Program, grad_var_name from paddle.v2.fluid.framework import Program, grad_var_name
from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.executor import Executor
from paddle.v2.fluid.backward import append_backward_ops from paddle.v2.fluid.backward import append_backward
import numpy as np import numpy as np
import paddle.v2.fluid.core as core import paddle.v2.fluid.core as core
...@@ -177,7 +177,7 @@ class RecurrentOpTest1(unittest.TestCase): ...@@ -177,7 +177,7 @@ class RecurrentOpTest1(unittest.TestCase):
def test_backward(self): def test_backward(self):
self.check_forward() self.check_forward()
append_backward_ops(self.output) append_backward(self.output)
ana_grad = [np.array(x) for x in self.backward()] ana_grad = [np.array(x) for x in self.backward()]
......
...@@ -3,7 +3,7 @@ import unittest ...@@ -3,7 +3,7 @@ import unittest
import paddle.v2.fluid.framework as framework import paddle.v2.fluid.framework as framework
import paddle.v2.fluid.optimizer as optimizer import paddle.v2.fluid.optimizer as optimizer
import paddle.v2.fluid.regularizer as regularizer import paddle.v2.fluid.regularizer as regularizer
from paddle.v2.fluid.backward import append_backward_ops from paddle.v2.fluid.backward import append_backward
class TestL2DecayRegularizer(unittest.TestCase): class TestL2DecayRegularizer(unittest.TestCase):
...@@ -33,7 +33,7 @@ class TestL2DecayRegularizer(unittest.TestCase): ...@@ -33,7 +33,7 @@ class TestL2DecayRegularizer(unittest.TestCase):
dtype="float32", shape=[1], lod_level=0, name="mean.out") dtype="float32", shape=[1], lod_level=0, name="mean.out")
block.append_op( block.append_op(
type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out}) type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out})
params_grads = append_backward_ops(mean_out) params_grads = append_backward(mean_out)
self.assertEqual(len(params_grads), 1) self.assertEqual(len(params_grads), 1)
count_ops = len(block.ops) count_ops = len(block.ops)
params_grads = optimizer.append_regularization_ops(params_grads) params_grads = optimizer.append_regularization_ops(params_grads)
...@@ -70,7 +70,7 @@ class TestL1DecayRegularizer(unittest.TestCase): ...@@ -70,7 +70,7 @@ class TestL1DecayRegularizer(unittest.TestCase):
dtype="float32", shape=[1], lod_level=0, name="mean.out") dtype="float32", shape=[1], lod_level=0, name="mean.out")
block.append_op( block.append_op(
type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out}) type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out})
params_grads = append_backward_ops(mean_out) params_grads = append_backward(mean_out)
self.assertEqual(len(params_grads), 1) self.assertEqual(len(params_grads), 1)
count_ops = len(block.ops) count_ops = len(block.ops)
params_grads = optimizer.append_regularization_ops(params_grads) params_grads = optimizer.append_regularization_ops(params_grads)
......
...@@ -12,7 +12,7 @@ class TestReorderLoDTensor(unittest.TestCase): ...@@ -12,7 +12,7 @@ class TestReorderLoDTensor(unittest.TestCase):
new_dat = fluid.layers.reorder_lod_tensor_by_rank( new_dat = fluid.layers.reorder_lod_tensor_by_rank(
x=dat, rank_table=table) x=dat, rank_table=table)
loss = fluid.layers.mean(x=new_dat) loss = fluid.layers.mean(x=new_dat)
fluid.backward.append_backward_ops(loss=loss) fluid.backward.append_backward(loss=loss)
cpu = fluid.CPUPlace() cpu = fluid.CPUPlace()
exe = fluid.Executor(cpu) exe = fluid.Executor(cpu)
......
...@@ -2,7 +2,7 @@ import unittest ...@@ -2,7 +2,7 @@ import unittest
from paddle.v2.fluid.framework import Program from paddle.v2.fluid.framework import Program
from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.executor import Executor
from paddle.v2.fluid.backward import append_backward_ops from paddle.v2.fluid.backward import append_backward
import numpy as np import numpy as np
import paddle.v2.fluid.core as core import paddle.v2.fluid.core as core
......
...@@ -2,7 +2,7 @@ import unittest ...@@ -2,7 +2,7 @@ import unittest
import paddle.v2.fluid.core as core import paddle.v2.fluid.core as core
from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.executor import Executor
import paddle.v2.fluid.layers as layers import paddle.v2.fluid.layers as layers
from paddle.v2.fluid.backward import append_backward_ops from paddle.v2.fluid.backward import append_backward
from paddle.v2.fluid.framework import default_main_program from paddle.v2.fluid.framework import default_main_program
import numpy import numpy
...@@ -35,7 +35,7 @@ class TestShrinkRNNMemory(unittest.TestCase): ...@@ -35,7 +35,7 @@ class TestShrinkRNNMemory(unittest.TestCase):
self.assertTrue(numpy.allclose(tensor_np[0:1], outs[2])) self.assertTrue(numpy.allclose(tensor_np[0:1], outs[2]))
mem3_mean = layers.mean(x=mem3) mem3_mean = layers.mean(x=mem3)
append_backward_ops(loss=mem3_mean) append_backward(loss=mem3_mean)
x_grad = exe.run( x_grad = exe.run(
feed={'x': tensor}, feed={'x': tensor},
fetch_list=[main_program.global_block().var('x@GRAD')])[0] fetch_list=[main_program.global_block().var('x@GRAD')])[0]
......
...@@ -4,7 +4,7 @@ import numpy as np ...@@ -4,7 +4,7 @@ import numpy as np
import paddle.v2.fluid.layers as layers import paddle.v2.fluid.layers as layers
from paddle.v2.fluid.framework import Program, program_guard from paddle.v2.fluid.framework import Program, program_guard
from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.executor import Executor
from paddle.v2.fluid.backward import append_backward_ops from paddle.v2.fluid.backward import append_backward
class TestCPULoDTensorArrayOps(unittest.TestCase): class TestCPULoDTensorArrayOps(unittest.TestCase):
...@@ -133,7 +133,7 @@ class TestCPUSplitMergeLoDTensorGrad(unittest.TestCase): ...@@ -133,7 +133,7 @@ class TestCPUSplitMergeLoDTensorGrad(unittest.TestCase):
in_true=out_true, in_false=out_false, mask=y, x=x, level=level) in_true=out_true, in_false=out_false, mask=y, x=x, level=level)
mean = layers.mean(x=out) mean = layers.mean(x=out)
append_backward_ops(mean) append_backward(mean)
tensor = core.LoDTensor() tensor = core.LoDTensor()
tensor.set(np.arange(10).reshape(10, 1).astype('float32'), place) tensor.set(np.arange(10).reshape(10, 1).astype('float32'), place)
......
...@@ -2,7 +2,7 @@ import unittest ...@@ -2,7 +2,7 @@ import unittest
import paddle.v2.fluid.layers as layers import paddle.v2.fluid.layers as layers
from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.executor import Executor
import paddle.v2.fluid.core as core import paddle.v2.fluid.core as core
from paddle.v2.fluid.backward import append_backward_ops from paddle.v2.fluid.backward import append_backward
import numpy import numpy
...@@ -46,7 +46,7 @@ class TestWhileOp(unittest.TestCase): ...@@ -46,7 +46,7 @@ class TestWhileOp(unittest.TestCase):
sum_result = layers.array_read(array=mem_array, i=i) sum_result = layers.array_read(array=mem_array, i=i)
loss = layers.mean(x=sum_result) loss = layers.mean(x=sum_result)
append_backward_ops(loss) append_backward(loss)
cpu = core.CPUPlace() cpu = core.CPUPlace()
exe = Executor(cpu) exe = Executor(cpu)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册