提交 fb7d8d88 编写于 作者: T tensor-tang

Merge remote-tracking branch 'upstream/develop' into remove-flag

...@@ -15,6 +15,8 @@ ...@@ -15,6 +15,8 @@
#include "paddle/framework/backward.h" #include "paddle/framework/backward.h"
#include <list> #include <list>
#include <memory>
#include "paddle/framework/op_registry.h" #include "paddle/framework/op_registry.h"
#include "paddle/operators/net_op.h" #include "paddle/operators/net_op.h"
#include "paddle/operators/recurrent_op.h" #include "paddle/operators/recurrent_op.h"
...@@ -43,11 +45,11 @@ static bool AllInSet( ...@@ -43,11 +45,11 @@ static bool AllInSet(
return all_in_set; return all_in_set;
} }
static std::shared_ptr<OperatorBase> NOP() { static std::unique_ptr<OperatorBase> NOP() {
auto net_op = std::make_shared<operators::NetOp>(); auto net_op = new operators::NetOp();
net_op->SetType("@NOP@"); net_op->SetType("@NOP@");
net_op->CompleteAddOp(); net_op->CompleteAddOp();
return net_op; return std::unique_ptr<OperatorBase>(net_op);
} }
// Get backward operator from a forward operator, a recursive implementation. // Get backward operator from a forward operator, a recursive implementation.
...@@ -62,11 +64,7 @@ static std::shared_ptr<OperatorBase> NOP() { ...@@ -62,11 +64,7 @@ static std::shared_ptr<OperatorBase> NOP() {
// operator, in a complex situation, it maybe a NetOp. // operator, in a complex situation, it maybe a NetOp.
// //
// See Backward.h for details // See Backward.h for details
static std::shared_ptr<OperatorBase> BackwardRecursive( static std::unique_ptr<OperatorBase> BackwardRecursive(
const OperatorBase& forwardOp,
std::unordered_set<std::string>& no_grad_names, size_t& uniq_id);
std::shared_ptr<OperatorBase> BackwardRecursive(
const OperatorBase& forwardOp, const OperatorBase& forwardOp,
std::unordered_set<std::string>& no_grad_names, size_t& uniq_id) { std::unordered_set<std::string>& no_grad_names, size_t& uniq_id) {
// If all input gradients of forwarding operator do not need to calculate, // If all input gradients of forwarding operator do not need to calculate,
...@@ -91,7 +89,7 @@ std::shared_ptr<OperatorBase> BackwardRecursive( ...@@ -91,7 +89,7 @@ std::shared_ptr<OperatorBase> BackwardRecursive(
} }
// Returned gradient network // Returned gradient network
auto net = std::make_shared<operators::NetOp>(); auto net = std::unique_ptr<operators::NetOp>(new operators::NetOp());
if (forwardOp.IsNetOp()) { if (forwardOp.IsNetOp()) {
// Because forwardOp is a net op, it can static_cast. // Because forwardOp is a net op, it can static_cast.
...@@ -105,14 +103,14 @@ std::shared_ptr<OperatorBase> BackwardRecursive( ...@@ -105,14 +103,14 @@ std::shared_ptr<OperatorBase> BackwardRecursive(
// reversely travel forwardNet and collect all duplicate outputs. // reversely travel forwardNet and collect all duplicate outputs.
for (auto it = forwardNet.ops_.rbegin(); it != forwardNet.ops_.rend(); for (auto it = forwardNet.ops_.rbegin(); it != forwardNet.ops_.rend();
++it, ++local_op_id) { ++it, ++local_op_id) {
auto fwd = *it; auto& fwd = *it;
auto bwd = BackwardRecursive(*fwd, no_grad_names, uniq_id); auto bwd = BackwardRecursive(*fwd, no_grad_names, uniq_id);
net->AddOp(bwd);
ForEachVarName(bwd->Outputs(), ForEachVarName(bwd->Outputs(),
[&dup_output_ops, local_op_id](const std::string& out) { [&dup_output_ops, local_op_id](const std::string& out) {
dup_output_ops[out].emplace_back(local_op_id); dup_output_ops[out].emplace_back(local_op_id);
return false; return false;
}); });
net->AddOp(std::move(bwd));
} }
// Get unique ID for this method. // Get unique ID for this method.
auto uid = uniq_id++; auto uid = uniq_id++;
...@@ -122,7 +120,7 @@ std::shared_ptr<OperatorBase> BackwardRecursive( ...@@ -122,7 +120,7 @@ std::shared_ptr<OperatorBase> BackwardRecursive(
// to handle this case. For each duplicate output, rename it to an alias // to handle this case. For each duplicate output, rename it to an alias
// (original name with a offset), append an `add` op for its operator, // (original name with a offset), append an `add` op for its operator,
// and finally sum all the alias variable to the final output variable y. // and finally sum all the alias variable to the final output variable y.
using Pos = std::pair<size_t, std::shared_ptr<OperatorBase>>; using Pos = std::pair<size_t, std::unique_ptr<OperatorBase>>;
std::list<Pos> insert_position; std::list<Pos> insert_position;
for (auto& dup_output_op : dup_output_ops) { for (auto& dup_output_op : dup_output_ops) {
const std::string& name = dup_output_op.first; const std::string& name = dup_output_op.first;
...@@ -150,13 +148,13 @@ std::shared_ptr<OperatorBase> BackwardRecursive( ...@@ -150,13 +148,13 @@ std::shared_ptr<OperatorBase> BackwardRecursive(
[](const Pos& l, const Pos& r) { return l.first > r.first; }); [](const Pos& l, const Pos& r) { return l.first > r.first; });
for (auto& pos : insert_position) { for (auto& pos : insert_position) {
net->InsertOp(pos.first + 1, pos.second); net->InsertOp(pos.first + 1, std::move(pos.second));
} }
} else { } else {
std::shared_ptr<OperatorBase> grad_op = OpRegistry::CreateGradOp(forwardOp); std::unique_ptr<OperatorBase> grad_op(OpRegistry::CreateGradOp(forwardOp));
ForEachVarName(grad_op->Inputs(), [&no_grad_names, &net, ForEachVarName(grad_op->Inputs(), [&no_grad_names, &net, &grad_op](
grad_op](const std::string& grad_input) { const std::string& grad_input) {
if (no_grad_names.count(grad_input)) { if (no_grad_names.count(grad_input)) {
// +1 for \0 // +1 for \0
std::string prefix = grad_input.substr( std::string prefix = grad_input.substr(
...@@ -190,23 +188,23 @@ std::shared_ptr<OperatorBase> BackwardRecursive( ...@@ -190,23 +188,23 @@ std::shared_ptr<OperatorBase> BackwardRecursive(
const auto& stepnet_op = const auto& stepnet_op =
*static_cast<const OperatorBase*>(&rnnop.stepnet()); *static_cast<const OperatorBase*>(&rnnop.stepnet());
// create stepnet's gradient op // create stepnet's gradient op
auto grad_stepnet = BackwardRecursive(stepnet_op, no_grad_names, uniq_id);
rnn_grad_op->set_stepnet( rnn_grad_op->set_stepnet(
std::static_pointer_cast<operators::NetOp>(grad_stepnet)); BackwardRecursive(stepnet_op, no_grad_names, uniq_id));
} }
if (net->ops_.empty()) { // Current no aux op is added to network if (net->ops_.empty()) { // Current no aux op is added to network
return grad_op; return grad_op;
} }
net->AddOp(grad_op); net->AddOp(std::move(grad_op));
} }
net->SetType("@GENERATED_BACKWARD@"); net->SetType("@GENERATED_BACKWARD@");
net->CompleteAddOp(); net->CompleteAddOp();
return net; return std::unique_ptr<OperatorBase>(
} // namespace framework static_cast<OperatorBase*>(net.release()));
}
// See header for comments // See header for comments
std::shared_ptr<OperatorBase> Backward( std::unique_ptr<OperatorBase> Backward(
const OperatorBase& forwardOp, const OperatorBase& forwardOp,
const std::unordered_set<std::string>& no_grad_vars) { const std::unordered_set<std::string>& no_grad_vars) {
std::unordered_set<std::string> no_grad_names; std::unordered_set<std::string> no_grad_names;
......
...@@ -20,7 +20,7 @@ namespace framework { ...@@ -20,7 +20,7 @@ namespace framework {
// Create the backward operator from a forward operator. // Create the backward operator from a forward operator.
// TODO(yuyang18): Add more API reference comment. // TODO(yuyang18): Add more API reference comment.
extern std::shared_ptr<OperatorBase> Backward( extern std::unique_ptr<OperatorBase> Backward(
const OperatorBase& forwardOp, const OperatorBase& forwardOp,
const std::unordered_set<std::string>& no_grad_vars); const std::unordered_set<std::string>& no_grad_vars);
} // namespace framework } // namespace framework
......
...@@ -180,8 +180,7 @@ TEST(Backward, simple_op_not_need_grad) { ...@@ -180,8 +180,7 @@ TEST(Backward, simple_op_not_need_grad) {
auto no_input_gop = f::Backward(*fwd, {"x", "b"}); auto no_input_gop = f::Backward(*fwd, {"x", "b"});
ASSERT_NE(no_input_gop, nullptr); ASSERT_NE(no_input_gop, nullptr);
ASSERT_TRUE(no_input_gop->IsNetOp()); ASSERT_TRUE(no_input_gop->IsNetOp());
ASSERT_EQ(0UL, ASSERT_EQ(0UL, static_cast<ops::NetOp *>(no_input_gop.get())->ops_.size());
std::static_pointer_cast<ops::NetOp>(no_input_gop)->ops_.size());
} }
TEST(Backward, net_fc_backward_normal) { TEST(Backward, net_fc_backward_normal) {
......
...@@ -19,7 +19,7 @@ limitations under the License. */ ...@@ -19,7 +19,7 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace framework { namespace framework {
std::shared_ptr<OperatorBase> OpRegistry::CreateOp(const std::string& type, std::unique_ptr<OperatorBase> OpRegistry::CreateOp(const std::string& type,
const VarNameMap& inputs, const VarNameMap& inputs,
const VarNameMap& outputs, const VarNameMap& outputs,
AttributeMap attrs) { AttributeMap attrs) {
...@@ -28,10 +28,10 @@ std::shared_ptr<OperatorBase> OpRegistry::CreateOp(const std::string& type, ...@@ -28,10 +28,10 @@ std::shared_ptr<OperatorBase> OpRegistry::CreateOp(const std::string& type,
"Operator '%s' has not been registered.", type); "Operator '%s' has not been registered.", type);
it->second.checker_->Check(attrs); it->second.checker_->Check(attrs);
auto op = it->second.creator_(type, inputs, outputs, attrs); auto op = it->second.creator_(type, inputs, outputs, attrs);
return std::shared_ptr<OperatorBase>(op); return std::unique_ptr<OperatorBase>(op);
} }
std::shared_ptr<OperatorBase> OpRegistry::CreateOp(const OpDesc& op_desc) { std::unique_ptr<OperatorBase> OpRegistry::CreateOp(const OpDesc& op_desc) {
VarNameMap inputs = ConvertOpDescVarsToVarNameMap(op_desc.inputs()); VarNameMap inputs = ConvertOpDescVarsToVarNameMap(op_desc.inputs());
VarNameMap outputs = ConvertOpDescVarsToVarNameMap(op_desc.outputs()); VarNameMap outputs = ConvertOpDescVarsToVarNameMap(op_desc.outputs());
AttributeMap attrs; AttributeMap attrs;
...@@ -55,10 +55,9 @@ OperatorBase::VarNameMap OpRegistry::ConvertOpDescVarsToVarNameMap( ...@@ -55,10 +55,9 @@ OperatorBase::VarNameMap OpRegistry::ConvertOpDescVarsToVarNameMap(
return ret_val; return ret_val;
} }
std::shared_ptr<OperatorBase> OpRegistry::CreateGradOp(const OperatorBase& op) { std::unique_ptr<OperatorBase> OpRegistry::CreateGradOp(const OperatorBase& op) {
PADDLE_ENFORCE(!op.IsNetOp(), "Use framework::Backward to get backward ops"); PADDLE_ENFORCE(!op.IsNetOp(), "Use framework::Backward to get backward ops");
std::shared_ptr<OperatorBase> grad_op(BuildGradOp(&op)); return std::unique_ptr<OperatorBase>(BuildGradOp(&op));
return grad_op;
} }
} // namespace framework } // namespace framework
......
...@@ -77,17 +77,17 @@ class OpRegistry { ...@@ -77,17 +77,17 @@ class OpRegistry {
} }
} }
static std::shared_ptr<OperatorBase> CreateOp(const std::string& type, static std::unique_ptr<OperatorBase> CreateOp(const std::string& type,
const VarNameMap& inputs, const VarNameMap& inputs,
const VarNameMap& outputs, const VarNameMap& outputs,
AttributeMap attrs); AttributeMap attrs);
static std::shared_ptr<OperatorBase> CreateOp(const OpDesc& op_desc); static std::unique_ptr<OperatorBase> CreateOp(const OpDesc& op_desc);
static VarNameMap ConvertOpDescVarsToVarNameMap( static VarNameMap ConvertOpDescVarsToVarNameMap(
const google::protobuf::RepeatedPtrField<OpDesc::Var>& op_desc_vars); const google::protobuf::RepeatedPtrField<OpDesc::Var>& op_desc_vars);
static std::shared_ptr<OperatorBase> CreateGradOp(const OperatorBase& op); static std::unique_ptr<OperatorBase> CreateGradOp(const OperatorBase& op);
static std::unordered_map<std::string, const OpInfo>& op_info_map() { static std::unordered_map<std::string, const OpInfo>& op_info_map() {
static std::unordered_map<std::string, const OpInfo> op_info_map_; static std::unordered_map<std::string, const OpInfo> op_info_map_;
......
...@@ -76,8 +76,7 @@ TEST(OpRegistry, CreateOp) { ...@@ -76,8 +76,7 @@ TEST(OpRegistry, CreateOp) {
attr->set_type(paddle::framework::AttrType::FLOAT); attr->set_type(paddle::framework::AttrType::FLOAT);
attr->set_f(scale); attr->set_f(scale);
std::shared_ptr<paddle::framework::OperatorBase> op = auto op = paddle::framework::OpRegistry::CreateOp(op_desc);
paddle::framework::OpRegistry::CreateOp(op_desc);
paddle::framework::Scope scope; paddle::framework::Scope scope;
paddle::platform::CPUDeviceContext dev_ctx; paddle::platform::CPUDeviceContext dev_ctx;
op->Run(scope, dev_ctx); op->Run(scope, dev_ctx);
...@@ -118,8 +117,7 @@ TEST(OpRegistry, DefaultValue) { ...@@ -118,8 +117,7 @@ TEST(OpRegistry, DefaultValue) {
ASSERT_TRUE(op_desc.IsInitialized()); ASSERT_TRUE(op_desc.IsInitialized());
std::shared_ptr<paddle::framework::OperatorBase> op = auto op = paddle::framework::OpRegistry::CreateOp(op_desc);
paddle::framework::OpRegistry::CreateOp(op_desc);
paddle::framework::Scope scope; paddle::framework::Scope scope;
paddle::platform::CPUDeviceContext dev_ctx; paddle::platform::CPUDeviceContext dev_ctx;
op->Run(scope, dev_ctx); op->Run(scope, dev_ctx);
......
...@@ -48,29 +48,6 @@ namespace framework { ...@@ -48,29 +48,6 @@ namespace framework {
using Tensor = framework::Tensor; using Tensor = framework::Tensor;
template <typename ClassType>
void ExposeOperator(ClassType &m) {
m.def("infer_shape", &ClassType::type::InferShape)
.def("run", &ClassType::type::Run)
.def("type",
[](const typename ClassType::type &op) -> std::string {
return op.Type();
})
.def("outputs",
[](const typename ClassType::type &op)
-> std::map<std::string, std::vector<std::string>> {
return op.Outputs();
})
.def("inputs",
[](const typename ClassType::type &op) { return op.Inputs(); })
.def("__str__", &ClassType::type::DebugString)
.def("no_intermediate_outputs",
[](const typename ClassType::type &op) {
return op.OutputVars(false);
})
.def("support_gpu", &ClassType::type::SupportGPU);
}
static size_t UniqueIntegerGenerator() { static size_t UniqueIntegerGenerator() {
static std::atomic<size_t> generator; static std::atomic<size_t> generator;
return generator.fetch_add(1); return generator.fetch_add(1);
...@@ -207,75 +184,69 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -207,75 +184,69 @@ All parameter, weight, gradient are variables in Paddle.
.def(py::init<>()) .def(py::init<>())
.def("__str__", string::to_string<const platform::CPUPlace &>); .def("__str__", string::to_string<const platform::CPUPlace &>);
py::class_<OperatorBase, std::shared_ptr<OperatorBase>> operator_base( py::class_<OperatorBase>(m, "Operator")
m, "Operator"); .def_static("create",
[](py::bytes protobin) {
operator_base.def_static("create", [](py::bytes protobin) { OpDesc desc;
OpDesc desc; PADDLE_ENFORCE(desc.ParsePartialFromString(protobin),
PADDLE_ENFORCE(desc.ParsePartialFromString(protobin), "Cannot parse user input to OpDesc");
"Cannot parse user input to OpDesc"); PADDLE_ENFORCE(desc.IsInitialized(),
PADDLE_ENFORCE(desc.IsInitialized(), "User OpDesc is not initialized, reason %s",
"User OpDesc is not initialized, reason %s", desc.InitializationErrorString());
desc.InitializationErrorString()); return OpRegistry::CreateOp(desc);
return OpRegistry::CreateOp(desc); })
}); .def("backward",
[](const OperatorBase &forwardOp,
operator_base.def("backward", const std::unordered_set<std::string> &no_grad_vars) {
[](const OperatorBase &forwardOp, return Backward(forwardOp, no_grad_vars).release();
const std::unordered_set<std::string> &no_grad_vars) {
return Backward(forwardOp, no_grad_vars);
});
ExposeOperator(operator_base);
py::class_<operators::NetOp, std::shared_ptr<operators::NetOp>> net(m, "Net");
net.def_static("create",
[]() -> std::shared_ptr<operators::NetOp> {
auto retv = std::make_shared<operators::NetOp>();
retv->SetType("plain_net");
return retv;
})
.def("add_op", &operators::NetOp::AddOp)
.def("add_op",
[](operators::NetOp &self,
const std::shared_ptr<operators::NetOp> &net) -> void {
self.AddOp(std::static_pointer_cast<OperatorBase>(net));
})
.def("add_op",
[](operators::NetOp &self,
const std::shared_ptr<operators::RecurrentOp> &rnn) -> void {
self.AddOp(std::static_pointer_cast<OperatorBase>(rnn));
}) })
.def("infer_shape", &OperatorBase::InferShape)
.def("run", &OperatorBase::Run)
.def("type",
[](const OperatorBase &op) -> std::string { return op.Type(); })
.def("outputs",
[](const OperatorBase &op)
-> std::map<std::string, std::vector<std::string>> {
return op.Outputs();
})
.def("inputs", [](const OperatorBase &op) { return op.Inputs(); })
.def("__str__", &OperatorBase::DebugString)
.def("no_intermediate_outputs",
[](const OperatorBase &op) { return op.OutputVars(false); })
.def("support_gpu", &OperatorBase::SupportGPU);
py::class_<operators::NetOp, OperatorBase>(m, "Net")
.def_static("create",
[]() -> operators::NetOp * {
auto *retv = new operators::NetOp;
retv->SetType("plain_net");
return retv;
})
.def("add_op", [](operators::NetOp &self,
const OperatorBase &op) { self.AddOp(op); })
.def("complete_add_op", &operators::NetOp::CompleteAddOp) .def("complete_add_op", &operators::NetOp::CompleteAddOp)
.def("complete_add_op", [](std::shared_ptr<operators::NetOp> &self) { .def("complete_add_op", [](std::shared_ptr<operators::NetOp> &self) {
self->CompleteAddOp(); self->CompleteAddOp();
}); });
ExposeOperator(net);
// recurrent_op // recurrent_op
py::class_<operators::RecurrentOp, std::shared_ptr<operators::RecurrentOp>> py::class_<operators::RecurrentOp, OperatorBase>(m, "RecurrentOp")
rnn(m, "RecurrentOp"); .def_static(
"create",
rnn.def_static( [](py::bytes protobin) -> operators::RecurrentOp * {
"create", OpDesc desc;
[](py::bytes protobin) -> std::shared_ptr<operators::RecurrentOp> { PADDLE_ENFORCE(desc.ParsePartialFromString(protobin),
OpDesc desc; "Cannot parse user input to OpDesc");
PADDLE_ENFORCE(desc.ParsePartialFromString(protobin), PADDLE_ENFORCE(desc.IsInitialized(),
"Cannot parse user input to OpDesc"); "User OpDesc is not initialized, reason %s",
PADDLE_ENFORCE(desc.IsInitialized(), desc.InitializationErrorString());
"User OpDesc is not initialized, reason %s", auto rnn_op = OpRegistry::CreateOp(desc);
desc.InitializationErrorString()); return static_cast<operators::RecurrentOp *>(rnn_op.release());
auto rnn_op = OpRegistry::CreateOp(desc); })
return std::dynamic_pointer_cast<operators::RecurrentOp>(rnn_op); .def("set_stepnet", [](operators::RecurrentOp &self,
}) const operators::NetOp &net) -> void {
.def("set_stepnet", self.set_stepnet(net.Clone());
[](operators::RecurrentOp &self, });
const std::shared_ptr<operators::NetOp> &net) -> void {
self.set_stepnet(net);
});
ExposeOperator(rnn);
m.def("unique_integer", UniqueIntegerGenerator); m.def("unique_integer", UniqueIntegerGenerator);
......
...@@ -27,7 +27,7 @@ limitations under the License. */ ...@@ -27,7 +27,7 @@ limitations under the License. */
// between host and device. Allocates too much would reduce the amount // between host and device. Allocates too much would reduce the amount
// of memory available to the system for paging. So, by default, we // of memory available to the system for paging. So, by default, we
// should set false to use_pinned_memory. // should set false to use_pinned_memory.
DEFINE_bool(use_pinned_memory, false, "If set, allocate cpu pinned memory."); DEFINE_bool(use_pinned_memory, true, "If set, allocate cpu pinned memory.");
namespace paddle { namespace paddle {
namespace memory { namespace memory {
......
...@@ -13,22 +13,33 @@ See the License for the specific language governing permissions and ...@@ -13,22 +13,33 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/memory/memory.h" #include "paddle/memory/memory.h"
#include <algorithm> // for transform
#include <cstring> // for memcpy
#include <memory> // for unique_ptr
#include <mutex> // for call_once
#include "paddle/memory/detail/buddy_allocator.h" #include "paddle/memory/detail/buddy_allocator.h"
#include "paddle/memory/detail/system_allocator.h" #include "paddle/memory/detail/system_allocator.h"
#include <cstring> // for memcpy
namespace paddle { namespace paddle {
namespace memory { namespace memory {
detail::BuddyAllocator* GetCPUBuddyAllocator() { using BuddyAllocator = detail::BuddyAllocator;
static detail::BuddyAllocator* a = nullptr;
if (a == nullptr) { std::once_flag cpu_allocator_flag;
a = new detail::BuddyAllocator(new detail::CPUAllocator, std::once_flag gpu_allocator_flag;
platform::CpuMinChunkSize(),
platform::CpuMaxChunkSize()); BuddyAllocator* GetCPUBuddyAllocator() {
} static std::unique_ptr<BuddyAllocator> a{nullptr};
return a;
std::call_once(cpu_allocator_flag, [&]() {
a.reset(new BuddyAllocator(new detail::CPUAllocator,
platform::CpuMinChunkSize(),
platform::CpuMaxChunkSize()));
});
return a.get();
} }
template <> template <>
...@@ -48,20 +59,31 @@ size_t Used<platform::CPUPlace>(platform::CPUPlace place) { ...@@ -48,20 +59,31 @@ size_t Used<platform::CPUPlace>(platform::CPUPlace place) {
#ifndef PADDLE_ONLY_CPU #ifndef PADDLE_ONLY_CPU
detail::BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) { BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) {
static detail::BuddyAllocator** as = NULL; using BuddyAllocVec = std::vector<BuddyAllocator*>;
if (as == NULL) { static std::unique_ptr<BuddyAllocVec, void (*)(BuddyAllocVec * p)> as{
new BuddyAllocVec, [](BuddyAllocVec* p) {
std::for_each(p->begin(), p->end(),
[](BuddyAllocator* p) { delete p; });
}};
// GPU buddy allocators
auto& allocators = *as.get();
// GPU buddy allocator initialization
std::call_once(gpu_allocator_flag, [&]() {
int gpu_num = platform::GetDeviceCount(); int gpu_num = platform::GetDeviceCount();
as = new detail::BuddyAllocator*[gpu_num]; allocators.reserve(gpu_num);
for (int gpu = 0; gpu < gpu_num; gpu++) { for (int gpu = 0; gpu < gpu_num; gpu++) {
platform::SetDeviceId(gpu); platform::SetDeviceId(gpu);
as[gpu] = new detail::BuddyAllocator(new detail::GPUAllocator, allocators.emplace_back(new BuddyAllocator(new detail::GPUAllocator,
platform::GpuMinChunkSize(), platform::GpuMinChunkSize(),
platform::GpuMaxChunkSize()); platform::GpuMaxChunkSize()));
} }
} });
platform::SetDeviceId(gpu_id); platform::SetDeviceId(gpu_id);
return as[gpu_id]; return allocators[gpu_id];
} }
template <> template <>
......
...@@ -45,4 +45,8 @@ TEST(Gather, GatherData) { ...@@ -45,4 +45,8 @@ TEST(Gather, GatherData) {
for (int i = 0; i < 4; ++i) EXPECT_EQ(p_output[i], i + 4); for (int i = 0; i < 4; ++i) EXPECT_EQ(p_output[i], i + 4);
for (int i = 4; i < 8; ++i) EXPECT_EQ(p_output[i], i - 4); for (int i = 4; i < 8; ++i) EXPECT_EQ(p_output[i], i - 4);
delete src;
delete index;
delete output;
} }
...@@ -55,9 +55,10 @@ class MeanGradKernel : public framework::OpKernel { ...@@ -55,9 +55,10 @@ class MeanGradKernel : public framework::OpKernel {
IG->mutable_data<T>(context.GetPlace()); IG->mutable_data<T>(context.GetPlace());
T ig_size = (T)framework::product(IG->dims()); T ig_size = (T)framework::product(IG->dims());
Eigen::DSizes<int, 1> bcast(ig_size);
EigenVector<T>::Flatten(*IG).device(context.GetEigenDevice<Place>()) = EigenVector<T>::Flatten(*IG).device(context.GetEigenDevice<Place>()) =
EigenScalar<T>::From(*OG) / ig_size; (EigenVector<T>::From(*OG) / ig_size).broadcast(bcast);
} }
}; };
......
...@@ -41,15 +41,13 @@ class NetOp : public framework::OperatorBase { ...@@ -41,15 +41,13 @@ class NetOp : public framework::OperatorBase {
NetOp(const std::string& type, const VarNameMap& inputs, NetOp(const std::string& type, const VarNameMap& inputs,
const VarNameMap& outputs, const framework::AttributeMap& attrs); const VarNameMap& outputs, const framework::AttributeMap& attrs);
NetOp(const NetOp& o) NetOp(const NetOp& o) : framework::OperatorBase(o.type_, {}, {}, o.attrs_) {
: framework::OperatorBase(
static_cast<const framework::OperatorBase&>(o)) {
this->ops_.reserve(o.ops_.size()); this->ops_.reserve(o.ops_.size());
std::transform(o.ops_.begin(), o.ops_.end(), std::back_inserter(this->ops_), std::transform(
[](const std::shared_ptr<OperatorBase>& op) o.ops_.begin(), o.ops_.end(), std::back_inserter(this->ops_),
-> std::shared_ptr<OperatorBase> { [](const std::unique_ptr<framework::OperatorBase>& op) {
return std::shared_ptr<OperatorBase>(op->Clone()); return std::unique_ptr<framework::OperatorBase>(op->Clone());
}); });
this->CompleteAddOp(); this->CompleteAddOp();
} }
...@@ -86,21 +84,27 @@ class NetOp : public framework::OperatorBase { ...@@ -86,21 +84,27 @@ class NetOp : public framework::OperatorBase {
return true; return true;
} }
void AddOp(const framework::OperatorBase& op) { AddOp(op.Clone()); }
/** /**
* @brief Add an operator by ptr * @brief Add an operator by ptr
*/ */
void AddOp(const std::shared_ptr<OperatorBase>& op) { void AddOp(std::unique_ptr<framework::OperatorBase> op) {
PADDLE_ENFORCE(!add_op_done_, "Cannot AddOp when this network is sealed"); PADDLE_ENFORCE(!add_op_done_, "Cannot AddOp when this network is sealed");
PADDLE_ENFORCE_NOT_NULL(op, "Cannot Insert Null op"); PADDLE_ENFORCE_NOT_NULL(op, "Cannot Insert Null op");
ops_.push_back(op); ops_.push_back(std::move(op));
} }
void InsertOp(size_t pos, const std::shared_ptr<OperatorBase>& op) { void InsertOp(size_t pos, std::unique_ptr<framework::OperatorBase> op) {
PADDLE_ENFORCE(!add_op_done_, PADDLE_ENFORCE(!add_op_done_,
"Cannot InsertOp when this network is sealed"); "Cannot InsertOp when this network is sealed");
PADDLE_ENFORCE_NOT_NULL(op, "Cannot Insert Null op"); PADDLE_ENFORCE_NOT_NULL(op, "Cannot Insert Null op");
PADDLE_ENFORCE_LE(pos, ops_.size(), "Out of range"); PADDLE_ENFORCE_LE(pos, ops_.size(), "Out of range");
ops_.insert(ops_.begin() + pos, op); ops_.insert(ops_.begin() + pos, std::move(op));
}
void InsertOp(size_t pos, const framework::OperatorBase& op) {
InsertOp(pos, op.Clone());
} }
void CompleteAddOp(bool calculate = true); void CompleteAddOp(bool calculate = true);
...@@ -112,7 +116,7 @@ class NetOp : public framework::OperatorBase { ...@@ -112,7 +116,7 @@ class NetOp : public framework::OperatorBase {
std::unique_ptr<framework::OperatorBase> Clone() const override; std::unique_ptr<framework::OperatorBase> Clone() const override;
std::vector<std::shared_ptr<OperatorBase>> ops_; std::vector<std::unique_ptr<framework::OperatorBase>> ops_;
private: private:
bool add_op_done_{false}; bool add_op_done_{false};
......
...@@ -38,15 +38,12 @@ TEST(OpKernel, all) { ...@@ -38,15 +38,12 @@ TEST(OpKernel, all) {
auto net = std::make_shared<NetOp>(); auto net = std::make_shared<NetOp>();
ASSERT_NE(net, nullptr); ASSERT_NE(net, nullptr);
auto op1 = std::shared_ptr<TestOp>( net->AddOp(std::unique_ptr<TestOp>(
new TestOp("test", {{"X", {"x"}}, {"W", {"w1"}}, {"b", {"b1"}}}, new TestOp("test", {{"X", {"x"}}, {"W", {"w1"}}, {"b", {"b1"}}},
{{"Out", {"y"}}}, {})); {{"Out", {"y"}}}, {})));
net->AddOp(op1); net->AddOp(std::unique_ptr<TestOp>(
auto op2 = std::shared_ptr<TestOp>(
new TestOp("test", {{"X", {"y"}}, {"W", {"w2"}}, {"b", {"b2"}}}, new TestOp("test", {{"X", {"y"}}, {"W", {"w2"}}, {"b", {"b2"}}},
{{"Out", {"z"}}}, {})); {{"Out", {"z"}}}, {})));
net->AddOp(op2);
net->CompleteAddOp(); net->CompleteAddOp();
AssertSameVectorWithoutOrder({"x", "w1", "b1", "w2", "b2"}, AssertSameVectorWithoutOrder({"x", "w1", "b1", "w2", "b2"},
...@@ -61,21 +58,21 @@ TEST(OpKernel, all) { ...@@ -61,21 +58,21 @@ TEST(OpKernel, all) {
TEST(NetOp, insert_op) { TEST(NetOp, insert_op) {
NetOp net; NetOp net;
auto op1 = std::shared_ptr<framework::NOP>( auto op1 = std::unique_ptr<framework::NOP>(
new framework::NOP("empty", {{"X", {"x"}}, {"W", {"w1"}}, {"b", {"b1"}}}, new framework::NOP("empty", {{"X", {"x"}}, {"W", {"w1"}}, {"b", {"b1"}}},
{{"Out", {"y"}}}, {})); {{"Out", {"y"}}}, {}));
net.AddOp(op1); net.AddOp(*op1);
net.InsertOp(0, op1); net.InsertOp(0, *op1);
ASSERT_EQ(2UL, net.ops_.size()); ASSERT_EQ(2UL, net.ops_.size());
net.InsertOp(2, op1); net.InsertOp(2, std::move(op1));
ASSERT_EQ(3UL, net.ops_.size()); ASSERT_EQ(3UL, net.ops_.size());
} }
TEST(NetOp, Clone) { TEST(NetOp, Clone) {
NetOp net; NetOp net;
net.AddOp( net.AddOp(
std::shared_ptr<framework::NOP>(new framework::NOP{"empty", {}, {}, {}})); std::unique_ptr<framework::NOP>(new framework::NOP{"empty", {}, {}, {}}));
net.AddOp(std::shared_ptr<framework::NOP>( net.AddOp(std::unique_ptr<framework::NOP>(
new framework::NOP{"empty2", {}, {}, {}})); new framework::NOP{"empty2", {}, {}, {}}));
net.CompleteAddOp(true); net.CompleteAddOp(true);
auto new_net_op = net.Clone(); auto new_net_op = net.Clone();
......
...@@ -34,7 +34,8 @@ class RecurrentAlgorithm { ...@@ -34,7 +34,8 @@ class RecurrentAlgorithm {
void Run(const framework::Scope& scope, void Run(const framework::Scope& scope,
const platform::DeviceContext& dev_ctx) const; const platform::DeviceContext& dev_ctx) const;
void Init(rnn::Argument* arg, std::shared_ptr<NetOp>* stepnet) { void Init(rnn::Argument* arg,
std::unique_ptr<framework::OperatorBase>* stepnet) {
PADDLE_ENFORCE_NOT_NULL(stepnet, "stepnet should be set before."); PADDLE_ENFORCE_NOT_NULL(stepnet, "stepnet should be set before.");
arg_ = arg; arg_ = arg;
stepnet_ = stepnet; stepnet_ = stepnet;
...@@ -63,7 +64,7 @@ class RecurrentAlgorithm { ...@@ -63,7 +64,7 @@ class RecurrentAlgorithm {
void InitMemories(framework::Scope* step_scopes, bool infer_shape_mode) const; void InitMemories(framework::Scope* step_scopes, bool infer_shape_mode) const;
private: private:
std::shared_ptr<NetOp>* stepnet_; std::unique_ptr<framework::OperatorBase>* stepnet_;
rnn::Argument* arg_; rnn::Argument* arg_;
mutable size_t seq_len_; mutable size_t seq_len_;
}; };
...@@ -80,7 +81,8 @@ class RecurrentGradientAlgorithm { ...@@ -80,7 +81,8 @@ class RecurrentGradientAlgorithm {
* operator. * operator.
*/ */
public: public:
void Init(rnn::Argument* arg, std::shared_ptr<NetOp>* stepnet) { void Init(rnn::Argument* arg,
std::unique_ptr<framework::OperatorBase>* stepnet) {
PADDLE_ENFORCE_NOT_NULL(stepnet, "stepnet should be set before."); PADDLE_ENFORCE_NOT_NULL(stepnet, "stepnet should be set before.");
arg_ = std::move(arg); arg_ = std::move(arg);
stepnet_ = stepnet; stepnet_ = stepnet;
...@@ -107,7 +109,7 @@ class RecurrentGradientAlgorithm { ...@@ -107,7 +109,7 @@ class RecurrentGradientAlgorithm {
private: private:
rnn::Argument* arg_; rnn::Argument* arg_;
mutable size_t seq_len_; mutable size_t seq_len_;
std::shared_ptr<NetOp>* stepnet_; std::unique_ptr<framework::OperatorBase>* stepnet_;
}; };
class RecurrentOp : public framework::OperatorBase { class RecurrentOp : public framework::OperatorBase {
...@@ -133,15 +135,17 @@ class RecurrentOp : public framework::OperatorBase { ...@@ -133,15 +135,17 @@ class RecurrentOp : public framework::OperatorBase {
alg_.Run(scope, dev_ctx); alg_.Run(scope, dev_ctx);
} }
void set_stepnet(std::shared_ptr<NetOp> net) { stepnet_ = net; } void set_stepnet(std::unique_ptr<OperatorBase> net) {
const NetOp& stepnet() const { return *stepnet_; } stepnet_ = std::move(net);
}
const OperatorBase& stepnet() const { return *stepnet_; }
static const rnn::ArgumentName kArgName; static const rnn::ArgumentName kArgName;
private: private:
RecurrentAlgorithm alg_; RecurrentAlgorithm alg_;
rnn::Argument arg_; rnn::Argument arg_;
std::shared_ptr<NetOp> stepnet_; std::unique_ptr<OperatorBase> stepnet_;
}; };
class RecurrentGradientOp : public framework::OperatorBase { class RecurrentGradientOp : public framework::OperatorBase {
...@@ -171,12 +175,14 @@ class RecurrentGradientOp : public framework::OperatorBase { ...@@ -171,12 +175,14 @@ class RecurrentGradientOp : public framework::OperatorBase {
static const rnn::ArgumentName kArgName; static const rnn::ArgumentName kArgName;
void set_stepnet(const std::shared_ptr<NetOp>& net) { stepnet_ = net; } void set_stepnet(std::unique_ptr<OperatorBase> net) {
const NetOp& stepnet() const { return *stepnet_; } stepnet_ = std::move(net);
}
const OperatorBase& stepnet() const { return *stepnet_; }
private: private:
RecurrentGradientAlgorithm alg_; RecurrentGradientAlgorithm alg_;
std::shared_ptr<NetOp> stepnet_; std::unique_ptr<OperatorBase> stepnet_;
rnn::Argument arg_; rnn::Argument arg_;
}; };
......
...@@ -49,4 +49,8 @@ TEST(scatter, ScatterUpdate) { ...@@ -49,4 +49,8 @@ TEST(scatter, ScatterUpdate) {
EXPECT_EQ(output->data<float>()[i], float(i - 4)); EXPECT_EQ(output->data<float>()[i], float(i - 4));
for (size_t i = 8; i < 16; ++i) EXPECT_EQ(p_output[i], float(0)); for (size_t i = 8; i < 16; ++i) EXPECT_EQ(p_output[i], float(0));
for (size_t i = 8; i < 16; ++i) EXPECT_EQ(output->data<float>()[i], float(0)); for (size_t i = 8; i < 16; ++i) EXPECT_EQ(output->data<float>()[i], float(0));
delete src;
delete index;
delete output;
} }
...@@ -44,7 +44,8 @@ class SigmoidOpGrad : public framework::OperatorWithKernel { ...@@ -44,7 +44,8 @@ class SigmoidOpGrad : public framework::OperatorWithKernel {
protected: protected:
void InferShape(const framework::InferShapeContext &ctx) const override { void InferShape(const framework::InferShapeContext &ctx) const override {
ctx.Output<Tensor>(0)->Resize(ctx.Input<Tensor>(0)->dims()); ctx.Output<Tensor>(framework::GradVarName("X"))
->Resize(ctx.Input<Tensor>("Y")->dims());
} }
}; };
......
...@@ -37,7 +37,7 @@ class SigmoidKernel : public framework::OpKernel { ...@@ -37,7 +37,7 @@ class SigmoidKernel : public framework::OpKernel {
auto Y = EigenVector<T>::Flatten(*output); auto Y = EigenVector<T>::Flatten(*output);
auto place = context.GetEigenDevice<Place>(); auto place = context.GetEigenDevice<Place>();
Y.device(place) = 1.0 / (1.0 + (-1.0 * X).exp()); Y.device(place) = 1. / (1. + (-X).exp());
} }
}; };
......
...@@ -146,7 +146,8 @@ RUN apt-get update &&\ ...@@ -146,7 +146,8 @@ RUN apt-get update &&\
pip install /*.whl; apt-get install -f -y && \ pip install /*.whl; apt-get install -f -y && \
apt-get clean -y && \ apt-get clean -y && \
rm -f /*.whl && \ rm -f /*.whl && \
paddle version paddle version && \
ldconfig
${DOCKERFILE_CUDNN_DSO} ${DOCKERFILE_CUDNN_DSO}
${DOCKERFILE_GPU_ENV} ${DOCKERFILE_GPU_ENV}
ADD go/cmd/pserver/pserver /usr/bin/ ADD go/cmd/pserver/pserver /usr/bin/
......
...@@ -25,3 +25,4 @@ py_test(test_operator SRCS test_operator.py) ...@@ -25,3 +25,4 @@ py_test(test_operator SRCS test_operator.py)
# py_test(test_gaussian_random_op SRCS test_gaussian_random_op.py) # py_test(test_gaussian_random_op SRCS test_gaussian_random_op.py)
py_test(test_uniform_random_op SRCS test_uniform_random_op.py) py_test(test_uniform_random_op SRCS test_uniform_random_op.py)
py_test(test_recurrent_op SRCS test_recurrent_op.py) py_test(test_recurrent_op SRCS test_recurrent_op.py)
py_test(test_gradient_checker SRCS test_gradient_checker.py)
import unittest import unittest
import numpy import numpy
import itertools
import paddle.v2.framework.core as core import paddle.v2.framework.core as core
from paddle.v2.framework.op import Operator from paddle.v2.framework.op import Operator
...@@ -8,6 +9,7 @@ __all__ = ['get_numeric_gradient'] ...@@ -8,6 +9,7 @@ __all__ = ['get_numeric_gradient']
def create_op(op_type): def create_op(op_type):
# TODO need to set attrs
kwargs = dict() kwargs = dict()
for in_name in Operator.get_op_input_names(op_type): for in_name in Operator.get_op_input_names(op_type):
kwargs[in_name] = in_name kwargs[in_name] = in_name
...@@ -66,7 +68,6 @@ def get_numeric_gradient(op, ...@@ -66,7 +68,6 @@ def get_numeric_gradient(op,
local_scope.find_var(output).get_tensor().alloc_float(core.CPUPlace( local_scope.find_var(output).get_tensor().alloc_float(core.CPUPlace(
)) ))
# TODO(yuyang18): Only CPU is support now.
cpu_ctx = core.DeviceContext.create(core.CPUPlace()) cpu_ctx = core.DeviceContext.create(core.CPUPlace())
def get_output(): def get_output():
...@@ -109,12 +110,110 @@ def get_numeric_gradient(op, ...@@ -109,12 +110,110 @@ def get_numeric_gradient(op,
class GradientChecker(unittest.TestCase): class GradientChecker(unittest.TestCase):
def assert_is_close(self, numeric_grads, scope, max_relative_error, def __get_gradient(self, forward_op, backward_op, input_value, grad_names,
msg_prefix): place):
for name in numeric_grads: """Get the input gradients after running forward and backward operators
b = numpy.array(scope.find_var(grad_var_name(name)).get_tensor()) on the given places.
a = numeric_grads[name]
:param forward_op: forward operator
:type forward_op: Operator
:param backward_op: backward operator
:type backward_op: Operator
:param input_value: input values.
:type input_value: dict{string:numpy.array}
:param grad_names: the names of returned input gradients.
:type input_value: a list of string
:param place: the device type.
:type place: CPUPlace or GPUPlace
:return: the input grdients of given grad_names.
:rtype: a list of numpy.array
"""
scope = core.Scope()
ctx = core.DeviceContext.create(place)
inputs = forward_op.inputs()
in_names = [item for k in inputs for item in inputs[k]]
outputs = forward_op.outputs()
out_names = [item for k in outputs for item in outputs[k]]
# create input var and set value
for name, value in input_value.iteritems():
if name not in in_names:
raise ValueError(name + "does not exist in Op's inputs.")
var = scope.new_var(name).get_tensor()
var.set_dims(value.shape)
var.set(value, place)
# run forward op
for out_name in out_names:
scope.new_var(out_name)
forward_op.infer_shape(scope)
forward_op.run(scope, ctx)
# set output var's shape
# set output grad to ones
for name in out_names:
out_tensor = scope.find_var(name).get_tensor()
grad_tensor = scope.new_var(grad_var_name(name)).get_tensor()
grad_tensor.set_dims(out_tensor.shape())
data = numpy.ones(out_tensor.shape(), dtype=numpy.float32)
grad_tensor.set(data, place)
# run backward op
for name in backward_op.outputs():
scope.new_var(name)
backward_op.infer_shape(scope)
backward_op.run(scope, ctx)
outs = [
numpy.array(scope.find_var(name).get_tensor())
for name in grad_names
]
return outs
def compare_grad(self, forward_op, input_value):
""" Compare the input gradients between CPU and GPU for the given forward
operator.
:param forward_op: forward operator
:type forward_op: Operator
:param input_value: input values.
:type input_value: dict{string:numpy.array}
:raises: AssertionError, there is different gradient value.
"""
backward_op = core.Operator.backward(forward_op, set())
# return if not compile with GPU or not implementing GPU kernel
if not (core.is_compile_gpu() and backward_op.support_gpu()):
return
outputs = backward_op.outputs()
out_names = [item for k in outputs for item in outputs[k]]
cpu_grads = self.__get_gradient(forward_op, backward_op, input_value,
out_names, core.CPUPlace())
gpu_grads = self.__get_gradient(forward_op, backward_op, input_value,
out_names, core.GPUPlace(0))
for c_grad, g_grad, name in itertools.izip(cpu_grads, gpu_grads,
out_names):
self.assertTrue(
numpy.allclose(
c_grad, g_grad, atol=1e-4),
"output name: " + name + " has diff")
def __assert_is_close(self, numeric_grads, analytic_grads, names,
max_relative_error, msg_prefix):
"""Use relative error for the comparison.
:param numeric_grads: the numerical graidents.
:type numeric_grads: a list of numpy.array
:param analytic_grads: the analytical graidents.
:type analytic_grads: a list of numpy.array
:param name: the names of gradients, used to print for debug.
:type names: a list of string
:param msg_prefix: string info, used to print for debug.
:type msf_prefix: string
"""
for a, b, name in itertools.izip(numeric_grads, analytic_grads, names):
abs_a = numpy.abs(a) abs_a = numpy.abs(a)
# if abs_a is nearly zero, then use abs error for a, not relative # if abs_a is nearly zero, then use abs error for a, not relative
# error. # error.
...@@ -159,106 +258,26 @@ class GradientChecker(unittest.TestCase): ...@@ -159,106 +258,26 @@ class GradientChecker(unittest.TestCase):
inputs = forward_op.inputs() inputs = forward_op.inputs()
in_names = [item for k in inputs for item in inputs[k]] in_names = [item for k in inputs for item in inputs[k]]
outputs = forward_op.outputs()
out_names = [item for k in outputs for item in outputs[k]]
for no_grad in no_grad_set: for no_grad in no_grad_set:
if no_grad not in in_names: if no_grad not in in_names:
raise ValueError("no_grad should be in in_names") raise ValueError("no_grad should be in in_names")
backward_op = core.Operator.backward(forward_op, no_grad_set) backward_op = core.Operator.backward(forward_op, no_grad_set)
bwd_outputs = backward_op.outputs()
bwd_out_names = [item for k in bwd_outputs for item in bwd_outputs[k]]
places = [core.CPUPlace()] places = [core.CPUPlace()]
if not only_cpu and core.is_compile_gpu() and backward_op.support_gpu(): if not only_cpu and core.is_compile_gpu() and backward_op.support_gpu():
places.append(core.GPUPlace(0)) places.append(core.GPUPlace(0))
numeric_grad = dict() # get numerical gradients
# get numeric gradient numeric_grads = [
for check_name in inputs_to_check: get_numeric_gradient(forward_op, input_vars, output_name, name)
numeric_grad[check_name] = \ for name in inputs_to_check
get_numeric_gradient(forward_op, input_vars, output_name, ]
check_name)
# get operator gradient according to different device check_names = [grad_var_name(name) for name in inputs_to_check]
for place in places: for place in places:
scope = core.Scope() # get analytical gradients according to different device
ctx = core.DeviceContext.create(place) analytic_grads = self.__get_gradient(forward_op, backward_op,
input_vars, check_names, place)
# create input var and set value self.__assert_is_close(numeric_grads, analytic_grads, check_names,
for name, value in input_vars.iteritems(): max_relative_error,
if name not in in_names: "Gradient Check On %s" % str(place))
raise ValueError(name + " not in op.inputs_")
var = scope.new_var(name).get_tensor()
var.set_dims(value.shape)
var.set(value, place)
# create output var
for out_name in out_names:
scope.new_var(out_name).get_tensor()
# infer the shape of output var and compute/set value of output var
forward_op.infer_shape(scope)
forward_op.run(scope, ctx)
# create output grad var
# set shape as the output var
# set value of this grad to ones
for name in out_names:
out_tensor = scope.find_var(name).get_tensor()
grad_tensor = scope.new_var(grad_var_name(name)).get_tensor()
grad_tensor.set_dims(out_tensor.shape())
data = 1.0 * numpy.ones(out_tensor.shape())
grad_tensor.set(data, place)
# create input grad var
for name in bwd_out_names:
scope.new_var(name).get_tensor()
# infer the shape of input gradient var and compute/set it's value
# with backward op
backward_op.infer_shape(scope)
backward_op.run(scope, ctx)
self.assert_is_close(numeric_grad, scope, max_relative_error,
"Gradient Check On %s" % str(place))
if __name__ == '__main__':
class GetNumericGradientTest(unittest.TestCase):
def test_add_op(self):
add_op = Operator('add_two', X="X", Y="Y", Out="Z")
x = numpy.random.random((10, 1)).astype("float32")
y = numpy.random.random((10, 1)).astype("float32")
arr = get_numeric_gradient(add_op, {'X': x, "Y": y}, 'Z', 'X')
self.assertAlmostEqual(arr.mean(), 1.0, delta=1e-2)
def test_softmax_op(self):
def stable_softmax(x):
"""Compute the softmax of vector x in a numerically stable way."""
shiftx = x - numpy.max(x)
exps = numpy.exp(shiftx)
return exps / numpy.sum(exps)
def label_softmax_grad(Y, dY):
dX = Y * 0.0
for i in range(Y.shape[0]):
d = numpy.dot(Y[i, :], dY[i, :])
dX[i, :] = Y[i, :] * (dY[i, :] - d)
return dX
softmax_op = Operator("softmax", X="X", Y="Y")
X = numpy.random.random((2, 2)).astype("float32")
Y = numpy.apply_along_axis(stable_softmax, 1, X)
dY = numpy.ones(Y.shape)
dX = label_softmax_grad(Y, dY)
arr = get_numeric_gradient(softmax_op, {"X": X}, 'Y', 'X')
numpy.testing.assert_almost_equal(arr, dX, decimal=1e-2)
unittest.main()
import unittest
import numpy
from paddle.v2.framework.op import Operator
from gradient_checker import GradientChecker
from gradient_checker import get_numeric_gradient
class GetNumericGradientTest(unittest.TestCase):
def test_add_op(self):
add_op = Operator('add_two', X="X", Y="Y", Out="Z")
x = numpy.random.random((10, 1)).astype("float32")
y = numpy.random.random((10, 1)).astype("float32")
arr = get_numeric_gradient(add_op, {'X': x, "Y": y}, 'Z', 'X')
self.assertAlmostEqual(arr.mean(), 1.0, delta=1e-4)
def test_softmax_op(self):
def stable_softmax(x):
"""Compute the softmax of vector x in a numerically stable way."""
shiftx = x - numpy.max(x)
exps = numpy.exp(shiftx)
return exps / numpy.sum(exps)
def label_softmax_grad(Y, dY):
dX = Y * 0.0
for i in range(Y.shape[0]):
d = numpy.dot(Y[i, :], dY[i, :])
dX[i, :] = Y[i, :] * (dY[i, :] - d)
return dX
softmax_op = Operator("softmax", X="X", Y="Y")
X = numpy.random.random((2, 2)).astype("float32")
Y = numpy.apply_along_axis(stable_softmax, 1, X)
dY = numpy.ones(Y.shape)
dX = label_softmax_grad(Y, dY)
arr = get_numeric_gradient(softmax_op, {"X": X}, 'Y', 'X')
numpy.testing.assert_almost_equal(arr, dX, decimal=1e-2)
if __name__ == '__main__':
unittest.main()
import unittest import unittest
from op_test_util import OpTestMeta from op_test_util import OpTestMeta
from gradient_checker import GradientChecker, create_op
import numpy as np import numpy as np
...@@ -12,5 +13,12 @@ class TestMeanOp(unittest.TestCase): ...@@ -12,5 +13,12 @@ class TestMeanOp(unittest.TestCase):
self.outputs = {'Out': np.mean(self.inputs['X'])} self.outputs = {'Out': np.mean(self.inputs['X'])}
class MeanGradOpTest(GradientChecker):
def test_normal(self):
op = create_op("mean")
inputs = {"X": np.random.random((10, 10)).astype("float32")}
self.check_grad(op, inputs, set("X"), "Out")
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
import unittest import unittest
from op_test_util import OpTestMeta
import numpy as np import numpy as np
from op_test_util import OpTestMeta
from gradient_checker import GradientChecker, create_op
class TestSigmoidOp(unittest.TestCase): class TestSigmoidOp(unittest.TestCase):
...@@ -8,12 +9,20 @@ class TestSigmoidOp(unittest.TestCase): ...@@ -8,12 +9,20 @@ class TestSigmoidOp(unittest.TestCase):
def setUp(self): def setUp(self):
self.type = "sigmoid" self.type = "sigmoid"
self.inputs = {'X': np.random.random((32, 100)).astype("float32")} self.inputs = {'X': np.random.random((15, 31)).astype("float32")}
self.outputs = {'Y': 1 / (1 + np.exp(-self.inputs['X']))} self.outputs = {'Y': 1 / (1 + np.exp(-self.inputs['X']))}
#class TestSigmoidGradOp(unittest.TestCase): class TestSigmoidGradOp(GradientChecker):
#TODO(qingqing) add unit test def test_grad(self):
op = create_op("sigmoid")
inputs = {"X": np.random.uniform(0.1, 1, [11, 17]).astype("float32")}
# compare gpu and cpu results for backward op.
# this test will be skiped if only compiling CPU version.
self.compare_grad(op, inputs)
# check gradients
self.check_grad(op, inputs, set("X"), "Y", max_relative_error=0.007)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册