diff --git a/paddle/framework/backward.cc b/paddle/framework/backward.cc index 8fd2906107c490eee129fc10262df28bfa67800b..c8b85caaca27134f36593940c33378b661a0c111 100644 --- a/paddle/framework/backward.cc +++ b/paddle/framework/backward.cc @@ -27,6 +27,18 @@ namespace paddle { namespace framework { +static std::unordered_set* g_ctrl_flow_ops_ = nullptr; +// Control Flow operators's backward is significantly different from +// computational operators. Hack Code here. +// We should design a better way to backward CtrlFlowOps. +static std::unordered_set& CtrlFlowOps() { + if (g_ctrl_flow_ops_ == nullptr) { + g_ctrl_flow_ops_ = + new std::unordered_set{"increment", "lod_rank_table"}; + } + return *g_ctrl_flow_ops_; +} + static inline std::unique_ptr CreateGradOp( const OperatorBase& op, const std::unordered_set& no_grad_set, std::unordered_map* grad_to_var) { @@ -288,12 +300,24 @@ static void CreateGradVarInBlock( for (size_t op_index = grad_op_start_index; op_index < ops.size(); ++op_index) { std::unordered_set new_vars; + auto& ctrl_flow_ops = CtrlFlowOps(); ForEachVarName(ops[op_index]->Outputs(), [&](const std::string& grad_var_name) { - if (block_desc->HasVar(grad_var_name)) { + if (ctrl_flow_ops.find(ops[op_index]->Type()) != + ctrl_flow_ops.end()) { + if (block_desc->HasVarRecursive(grad_var_name)) { + return false; + } + } else { + if (block_desc->HasVar(grad_var_name)) { + return false; + } + } + if (grad_var_name == framework::kEmptyVarName) { return false; } auto var = block_desc->Var(grad_var_name); + VLOG(10) << "Creating Variable " << grad_var_name; new_vars.insert(var->Name()); auto it = param_name_map.find(grad_var_name); if (it == param_name_map.end()) { @@ -333,14 +357,25 @@ std::vector> MakeOpGrad( // All input gradients of forwarding operator do not need to calculate. const std::vector& inputs = op_desc->InputArgumentNames(); if (AllGradInSet(inputs, *no_grad_vars)) { + VLOG(10) << "Drop operator " << op_desc->Type(); return grad_op_descs; // empty vector } + // All output gradients of forwarding operator do not need to calculate. const std::vector& outputs = op_desc->OutputArgumentNames(); + if (AllGradInSet(outputs, *no_grad_vars)) { - for (const std::string& name : inputs) { - no_grad_vars->insert(GradVarName(name)); + VLOG(10) << "Drop operator " << op_desc->Type(); + // FIXME: Hack code here + auto& ctrl_flow_ops = CtrlFlowOps(); + if (ctrl_flow_ops.find(op_desc->Type()) == ctrl_flow_ops.end()) { + // Only computational op need drop input's gradient. + for (const std::string& name : inputs) { + no_grad_vars->insert(GradVarName(name)); + VLOG(10) << " Also drop " << GradVarName(name); + } } + return grad_op_descs; // empty vector } diff --git a/paddle/framework/block_desc.cc b/paddle/framework/block_desc.cc index 11764810e1d40e5e6eb3cd0d8e9b4b63a79855b4..6a7a07d5cf471a32822cdccf5c616d8748fd1bd7 100644 --- a/paddle/framework/block_desc.cc +++ b/paddle/framework/block_desc.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/framework/block_desc.h" +#include "paddle/framework/operator.h" #include "paddle/framework/program_desc.h" namespace paddle { @@ -42,6 +43,8 @@ bool BlockDescBind::HasVar(const std::string &name) const { } VarDescBind *BlockDescBind::FindVarRecursive(const std::string &name) const { + if (name == kEmptyVarName) return nullptr; + auto it = vars_.find(name); if (it == vars_.end()) { return Parent() == kNoneBlockIndex ? nullptr diff --git a/paddle/framework/executor.cc b/paddle/framework/executor.cc index 2ffb5b7dbb27b561092856eac0de23d0c3788f75..83aa927c293676c3800ed945c175e4f3dc5629d6 100644 --- a/paddle/framework/executor.cc +++ b/paddle/framework/executor.cc @@ -97,6 +97,10 @@ void Executor::Run(const ProgramDescBind& pdesc, Scope* scope, int block_id, if (create_local_scope) { local_scope = &scope->NewScope(); for (auto& var : block.AllVars()) { + if (var->Name() == framework::kEmptyVarName) { + continue; + } + if (var->Persistable()) { auto* ptr = scope->Var(var->Name()); CreateTensor(ptr, var->GetType()); diff --git a/paddle/framework/op_desc.cc b/paddle/framework/op_desc.cc index 02a825324328fa5cfd3a4d23a8c64488cc88aeec..2281d93df90d0829cede63eaaefade5d5bac776b 100644 --- a/paddle/framework/op_desc.cc +++ b/paddle/framework/op_desc.cc @@ -466,7 +466,12 @@ DDim CompileTimeInferShapeContext::GetDim(const std::string &name) const { auto var = block_.FindVarRecursive(name); PADDLE_ENFORCE(var != nullptr, "Cannot find variable %s", name); try { - return framework::make_ddim(var->Shape()); + auto shape = var->Shape(); + if (shape.empty()) { + return framework::make_ddim({0UL}); + } else { + return framework::make_ddim(var->Shape()); + } } catch (...) { VLOG(5) << "GetDim of variable " << name << " error"; std::rethrow_exception(std::current_exception()); diff --git a/paddle/framework/scope.cc b/paddle/framework/scope.cc index 9ad6272c99dd6a85520ae44c1331ac232bc6a9a2..656736e23846c8de50553a608c54a0bdd3272cb1 100644 --- a/paddle/framework/scope.cc +++ b/paddle/framework/scope.cc @@ -36,12 +36,9 @@ Scope& Scope::NewScope() const { } Variable* Scope::Var(const std::string& name) { - auto iter = vars_.find(name); - if (iter != vars_.end()) { - VLOG(3) << "Get existing variable " << name; - return iter->second; - } - Variable* v = new Variable(); + auto* v = FindVarLocally(name); + if (v != nullptr) return v; + v = new Variable(); vars_[name] = v; VLOG(3) << "Create variable " << name; v->name_ = &(vars_.find(name)->first); @@ -57,8 +54,10 @@ Variable* Scope::Var(std::string* name) { } Variable* Scope::FindVar(const std::string& name) const { - auto it = vars_.find(name); - if (it != vars_.end()) return it->second; + auto var = FindVarLocally(name); + if (var != nullptr) { + return var; + } return (parent_ == nullptr) ? nullptr : parent_->FindVar(name); } @@ -116,6 +115,11 @@ std::string Scope::Rename(const std::string& origin_name) const { Rename(origin_name, var_name); return var_name; } +Variable* Scope::FindVarLocally(const std::string& name) const { + auto it = vars_.find(name); + if (it != vars_.end()) return it->second; + return nullptr; +} } // namespace framework } // namespace paddle diff --git a/paddle/framework/scope.h b/paddle/framework/scope.h index c2aafb6ad825f9bd9ffef754923a15afdeaa8e5c..56e815db54b6385c4e4d87f456ed5d59113ca77b 100644 --- a/paddle/framework/scope.h +++ b/paddle/framework/scope.h @@ -76,6 +76,8 @@ class Scope { std::string Rename(const std::string& origin_name) const; private: + Variable* FindVarLocally(const std::string& name) const; + // Call Scope::NewScope for a sub-scope. explicit Scope(Scope const* parent) : parent_(parent) {} diff --git a/paddle/framework/shape_inference.cc b/paddle/framework/shape_inference.cc index 2298507471c54c5b7751beff900466737eea36d4..7dac1cfd5ee0c320c67bc0b2448417d258d6862b 100644 --- a/paddle/framework/shape_inference.cc +++ b/paddle/framework/shape_inference.cc @@ -12,6 +12,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/framework/shape_inference.h" +#include "grad_op_desc_maker.h" +#include "paddle/framework/operator.h" namespace paddle { namespace framework { @@ -49,6 +51,9 @@ void InferShapeContext::SetDims(const std::vector &names, size_t length = names.size(); PADDLE_ENFORCE_EQ(length, dims.size()); for (size_t i = 0; i < length; ++i) { + if (names[i] == framework::kEmptyVarName) { + continue; + } SetDim(names[i], dims[i]); } } diff --git a/paddle/operators/increment_op.cc b/paddle/operators/increment_op.cc index 35efb12932f1d61fdb511b4ee2cdab3891507c61..54911267e36dfdbc62d533f40f0b754e7d2cb7bf 100644 --- a/paddle/operators/increment_op.cc +++ b/paddle/operators/increment_op.cc @@ -61,6 +61,8 @@ class IncrementOp : public framework::OperatorBase { out.Resize(x.dims()); out.mutable_data(x.place(), x.type()); float value = Attr("step"); + VLOG(10) << Output("Out") << " increase " << Input("X") << " with " + << value; framework::VisitDataType(framework::ToDataType(out.type()), IncrementFunctor(x, &out, value)); } diff --git a/paddle/operators/lod_tensor_to_array_op.cc b/paddle/operators/lod_tensor_to_array_op.cc index 010c79d4e153463d4b2e48e5fd798d3bc4febaf1..b970bf31773f4c6feb0010bd40ba906b388ec310 100644 --- a/paddle/operators/lod_tensor_to_array_op.cc +++ b/paddle/operators/lod_tensor_to_array_op.cc @@ -14,6 +14,7 @@ #include "paddle/framework/lod_rank_table.h" #include "paddle/framework/lod_tensor_array.h" #include "paddle/framework/op_registry.h" +#include "paddle/operators/detail/safe_ref.h" namespace paddle { namespace operators { @@ -32,15 +33,20 @@ class LoDTensorToArrayOp : public framework::OperatorBase { : OperatorBase(type, inputs, outputs, attrs) {} void Run(const framework::Scope &scope, const platform::DeviceContext &dev_ctx) const override { - auto &x = scope.FindVar(Input("X"))->Get(); - auto &rank_table = - scope.FindVar(Input("RankTable"))->Get(); - auto &out = - *scope.FindVar(Output("Out"))->GetMutable(); - + auto &x = detail::Ref(scope.FindVar(Input("X")), "Cannot find input %s", + Input("X")) + .Get(); + auto &rank_table = detail::Ref(scope.FindVar(Input("RankTable"))) + .Get(); + auto &out = *detail::Ref(scope.FindVar(Output("Out"))) + .GetMutable(); auto &items = rank_table.items(); auto max_seq_len = items[0].length; auto rank_level = rank_table.level(); + + PADDLE_ENFORCE_LT(rank_level, x.lod().size(), + "Input should be a LOD tensor, and size is at least %d", + rank_level + 1); out.resize(max_seq_len); std::vector> copy_ranges(max_seq_len); @@ -55,16 +61,13 @@ class LoDTensorToArrayOp : public framework::OperatorBase { size_t start_idx = x.lod()[rank_level][item.index] + t; auto lod_and_offset = framework::GetSubLoDAndAbsoluteOffset( x.lod(), start_idx, start_idx + 1, rank_level + 1); - auto &lod_length = lod_and_offset.first; framework::AppendLoD(&lod, lod_length); - size_t start_offset = lod_and_offset.second.first; size_t end_offset = lod_and_offset.second.second; copy_ranges[t].emplace_back(CopyRange{start_offset, end_offset}); } } - for (size_t i = 0; i < max_seq_len; ++i) { auto &ranges = copy_ranges[i]; size_t height = std::accumulate( diff --git a/paddle/operators/multiplex_op.cc b/paddle/operators/multiplex_op.cc index f8527dfab3f3c42f430c433a11351f12b8dfae8b..8e7f544e0d3c5f4afc2c63dd8335fadd4753d83d 100644 --- a/paddle/operators/multiplex_op.cc +++ b/paddle/operators/multiplex_op.cc @@ -99,13 +99,7 @@ class MultiplexGradOp : public framework::OperatorWithKernel { "Output(X@Grad) should not be null."); PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")), "Input(Out@GRAD) should not be null."); - std::vector d_ins; - auto ins = ctx->GetInputsDim("X"); - // No need to compute gradient for Input(Ids) - for (size_t i = 0; i < ins.size(); i++) { - d_ins.push_back(ins[i]); - } - ctx->SetOutputsDim(framework::GradVarName("X"), d_ins); + ctx->SetOutputsDim(framework::GradVarName("X"), ctx->GetInputsDim("X")); } protected: diff --git a/paddle/operators/recurrent_op.cc b/paddle/operators/recurrent_op.cc index c976e22c7740ad11279ab5ee75e4d130be8fa0c5..8b60b9c9122956e96441b67c752786bddd2d71fc 100644 --- a/paddle/operators/recurrent_op.cc +++ b/paddle/operators/recurrent_op.cc @@ -599,7 +599,9 @@ class RecurrentGradOpShapeInference : public framework::InferShapeBase { std::vector output{kOutputs}; for (auto &s : input) { PADDLE_ENFORCE(ctx->HasInputs(s)); - PADDLE_ENFORCE(ctx->HasOutputs(framework::GradVarName(s))); + PADDLE_ENFORCE(ctx->HasOutputs(framework::GradVarName(s)), + "Cannot find the gradient variable %s", + framework::GradVarName(s)); } for (auto &s : output) { PADDLE_ENFORCE(ctx->HasInputs(s)); diff --git a/paddle/operators/sequence_pool_op.cc b/paddle/operators/sequence_pool_op.cc index 2a000ac60b176737277605c3ac812ea65a0e03fc..a2f42570370f50c65fc595d0adb758e10b9aa00e 100644 --- a/paddle/operators/sequence_pool_op.cc +++ b/paddle/operators/sequence_pool_op.cc @@ -104,6 +104,7 @@ class SequencePoolGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ(og_dims[i], x_dims[i], "The dimension mismatch."); } ctx->SetOutputDim(framework::GradVarName("X"), x_dims); + ctx->ShareLoD("X", framework::GradVarName("X")); } protected: diff --git a/paddle/operators/sum_op.cc b/paddle/operators/sum_op.cc index ddc210c26e69566fef9baa20f49ba1052e993b3f..744b2fe3f297ad84d4e2d524ecf180c130c85658 100644 --- a/paddle/operators/sum_op.cc +++ b/paddle/operators/sum_op.cc @@ -37,10 +37,16 @@ class SumOp : public framework::OperatorWithKernel { size_t N = x_dims.size(); PADDLE_ENFORCE_GT(N, 1, "Input tensors count should > 1."); - auto in_dim = x_dims[0]; - for (size_t i = 1; i < N; i++) { - auto dim = x_dims[i]; - PADDLE_ENFORCE_EQ(in_dim, dim, "Input tensors must have same shape"); + framework::DDim in_dim({0}); + for (auto& x_dim : x_dims) { + if (framework::product(x_dim) == 0) { + continue; + } + if (framework::product(in_dim) == 0) { + in_dim = x_dim; + } else { + PADDLE_ENFORCE_EQ(in_dim, x_dim, "Input tensors must have same shape"); + } } ctx->SetOutputDim("Out", in_dim); ctx->ShareLoD("X", /*->*/ "Out"); @@ -51,9 +57,23 @@ class SumOp : public framework::OperatorWithKernel { const framework::ExecutionContext& ctx) const override { auto x_vars = ctx.MultiInputVar("X"); if (x_vars[0]->IsType()) { - return framework::OpKernelType( - framework::ToDataType(x_vars[0]->Get().type()), - ctx.device_context()); + int dtype = -1; + for (auto& x_var : x_vars) { + auto& lod_tensor = x_var->Get(); + if (lod_tensor.numel() == 0) { + continue; + } + if (dtype == -1) { + dtype = framework::ToDataType(lod_tensor.type()); + } else { + PADDLE_ENFORCE_EQ(dtype, framework::ToDataType(lod_tensor.type())); + } + } + PADDLE_ENFORCE_NE(dtype, -1, + "Sum operator should have at least one tensor"); + + return framework::OpKernelType(static_cast(dtype), + ctx.device_context()); } else if (x_vars[0]->IsType()) { return framework::OpKernelType( framework::ToDataType( diff --git a/paddle/operators/sum_op.h b/paddle/operators/sum_op.h index a1eb3b014edc65b7ed604c8b7f17d72f7e460f70..ed6c80ce60da44de1a749ce075cfbca7d53032e0 100644 --- a/paddle/operators/sum_op.h +++ b/paddle/operators/sum_op.h @@ -53,6 +53,9 @@ class SumKernel : public framework::OpKernel { for (int i = in_place ? 1 : 0; i < N; i++) { if (in_vars[i]->IsType()) { auto &in_t = in_vars[i]->Get(); + if (in_t.numel() == 0) { + continue; + } auto in = EigenVector::Flatten(in_t); result.device(place) = result + in; } else if (in_vars[i]->IsType()) { diff --git a/paddle/operators/tensor_array_read_write_op.cc b/paddle/operators/tensor_array_read_write_op.cc index efde850143ce188300667b21e4b539b1d150d9ae..4eb8b60f471902ef594c3742e6652cea24e7fd77 100644 --- a/paddle/operators/tensor_array_read_write_op.cc +++ b/paddle/operators/tensor_array_read_write_op.cc @@ -27,7 +27,7 @@ class WriteToArrayOp : public ArrayOp { void Run(const framework::Scope &scope, const platform::DeviceContext &dev_ctx) const override { auto *x = scope.FindVar(Input("X")); - PADDLE_ENFORCE(x != nullptr, "X must be set"); + if (x == nullptr) return; auto &x_tensor = x->Get(); size_t offset = GetOffset(scope, dev_ctx); auto *out = @@ -76,7 +76,9 @@ class WriteToArrayInferShape : public framework::InferShapeBase { PADDLE_ENFORCE(context->HasInput("I"), "Must set the subscript index"); PADDLE_ENFORCE_EQ(framework::product(context->GetInputDim("I")), 1, "The number of element of subscript index must be 1"); - PADDLE_ENFORCE(context->HasInput("X"), NotHasXError()); + if (!context->HasInput("X")) { + return; + } PADDLE_ENFORCE(context->HasOutput("Out"), NotHasOutError()); context->SetOutputDim("Out", context->GetInputDim("X")); } @@ -99,9 +101,10 @@ class WriteToArrayInferVarType : public framework::VarTypeInference { auto &out = detail::Ref(block->FindRecursiveOrCreateVar(out_name), "Cannot found %s", out_name); out.SetType(framework::VarDesc::LOD_TENSOR_ARRAY); - auto &x = - detail::Ref(block->FindVarRecursive(x_name), "Cannot found %s", x_name); - out.SetDataType(x.GetDataType()); + auto *x = block->FindVarRecursive(x_name); + if (x != nullptr) { + out.SetDataType(x->GetDataType()); + } } }; @@ -121,10 +124,13 @@ class ReadFromArrayOp : public ArrayOp { PADDLE_ENFORCE(out != nullptr, "Out must be set"); auto *out_tensor = out->GetMutable(); size_t offset = GetOffset(scope, dev_ctx); - PADDLE_ENFORCE_LT(offset, x_array.size()); - framework::CopyFrom(x_array[offset], dev_ctx.GetPlace(), dev_ctx, - out_tensor); - out_tensor->set_lod(x_array[offset].lod()); + if (offset < x_array.size()) { + framework::CopyFrom(x_array[offset], dev_ctx.GetPlace(), dev_ctx, + out_tensor); + out_tensor->set_lod(x_array[offset].lod()); + } else { + VLOG(10) << "offset " << offset << " >= " << x_array.size(); + } } }; diff --git a/paddle/operators/while_op.cc b/paddle/operators/while_op.cc index 59460f6c879cf2c14fd27e33ecb1ba45b21da485..9b3f21cf94db338dee1173a76393dc66c3cbddaf 100644 --- a/paddle/operators/while_op.cc +++ b/paddle/operators/while_op.cc @@ -98,8 +98,6 @@ class WhileGradOp : public framework::OperatorBase { void Run(const framework::Scope &scope, const platform::DeviceContext &dev_ctx) const override { - // PADDLE_ENFORCE(...) - framework::Executor executor(dev_ctx); auto *block = Attr(kStepBlock); auto *program = block->Program(); @@ -124,8 +122,12 @@ class WhileGradOp : public framework::OperatorBase { auto inside_og_name = inside_og_names[i]; VLOG(10) << "Linking outside " << outside_og_name << " --> inside " << inside_og_name; - auto &og_outside = detail::Ref(scope.FindVar(outside_og_name)); - auto &og_inside = detail::Ref(cur_scope.Var(inside_og_name)); + auto &og_outside = + detail::Ref(scope.FindVar(outside_og_name), + "Cannot find Outside Gradient %s", outside_og_name); + auto &og_inside = + detail::Ref(cur_scope.Var(inside_og_name), + "Cannot find inside gradient %s", inside_og_name); if (og_outside.Type().hash_code() == typeid(framework::LoDTensor).hash_code()) { auto &outside_tensor = og_outside.Get(); @@ -160,7 +162,7 @@ class WhileGradOp : public framework::OperatorBase { PADDLE_ENFORCE_EQ(pg_names.size(), p_names.size()); for (size_t param_id = 0; param_id < pg_names.size(); ++param_id) { if (pg_names[param_id] == framework::kEmptyVarName) { - continue; // iterator doesn't have gradient + continue; // parameter doesn't have gradient } auto inside_grad_name = framework::GradVarName(p_names[param_id]); @@ -190,7 +192,6 @@ class WhileGradOp : public framework::OperatorBase { } } - // sum gradient auto new_inside_name = cur_scope.Rename(inside_grad_name); auto sum_op = framework::OpRegistry::CreateOp( "sum", {{"X", {pg_names[param_id], new_inside_name}}}, @@ -207,18 +208,35 @@ class WhileGradOpDescMaker : public framework::SingleGradOpDescMaker { using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; protected: - virtual std::unique_ptr Apply() const { + std::unique_ptr Apply() const override { auto *grad = new framework::OpDescBind(); grad->SetType("while_grad"); grad->SetInput(kParameters, Input(kParameters)); - grad->SetOutput( - framework::GradVarName(kParameters), - InputGrad(kParameters, /*do not drop empty gradient*/ false)); + + // Not all of IGs will be generated by inner gradient operators of while op. + // Ignore IGs that is not generated by the inside block. + auto igs = InputGrad(kParameters, /*do not drop empty gradient*/ false); + std::unordered_set all_outs; + for (size_t i = 0; i < grad_block_[0]->OpSize(); ++i) { + for (auto &oname : grad_block_[0]->Op(i)->OutputArgumentNames()) { + all_outs.insert(oname); + } + } + for (auto &each_ig : igs) { + if (all_outs.find(each_ig) == all_outs.end()) { + VLOG(10) << "Ignore " << each_ig; + each_ig = framework::kEmptyVarName; + } + } + + grad->SetOutput(framework::GradVarName(kParameters), igs); + grad->SetInput(kOutputs, Output(kOutputs)); // OG should be re-calculated by step blocks, since many outputs of while op // do not need to calculate gradients. std::unordered_set block_ins; + auto *fwd_block = this->grad_block_[0]->ParentBlock(); { for (auto &p : Input(kParameters)) { block_ins.insert(p); @@ -233,6 +251,13 @@ class WhileGradOpDescMaker : public framework::SingleGradOpDescMaker { if (block_ins.find(input_name) != block_ins.end()) { continue; } + + // If the input of Op is generated by the forward block, do not make it + // as input again. + if (fwd_block->FindVar(input_name) != nullptr) { + continue; + } + extra_inputs.insert(input_name); } diff --git a/python/paddle/v2/fluid/data_feeder.py b/python/paddle/v2/fluid/data_feeder.py index 3dee0b5b73e6405073a55560b97fc34409deb9f7..30a542af212926c93381aade426e25f2117e4662 100644 --- a/python/paddle/v2/fluid/data_feeder.py +++ b/python/paddle/v2/fluid/data_feeder.py @@ -1,5 +1,4 @@ from __future__ import print_function - import core import numpy import six.moves as six diff --git a/python/paddle/v2/fluid/layers.py b/python/paddle/v2/fluid/layers.py index 5568619fe65f6ea9d7d159c9052fedba6d444117..99d0ac4a1bb9436a7eb27fb90687a593accc94af 100644 --- a/python/paddle/v2/fluid/layers.py +++ b/python/paddle/v2/fluid/layers.py @@ -10,7 +10,7 @@ from param_attr import ParamAttr __all__ = [ 'fc', 'data', 'cross_entropy', 'conv2d', 'pool2d', 'embedding', 'concat', 'StaticRNN', 'cast', 'sequence_conv', 'sequence_pool', 'sums', 'cos_sim', - 'batch_norm', 'accuracy', 'split_lod_tensor' + 'batch_norm', 'accuracy', 'split_lod_tensor', 'While' ] @@ -1439,7 +1439,7 @@ def increment(x, value=1.0, in_place=True, main_program=None): type='increment', inputs={'X': [x]}, outputs={'Out': [out]}, - attrs={'step': value}) + attrs={'step': float(value)}) return out diff --git a/python/paddle/v2/fluid/optimizer.py b/python/paddle/v2/fluid/optimizer.py index 934e024742fd00bf05cc0d7caaaa870c18a68074..719e3b2563449ddd8df6207ba01968aa0dcd3322 100644 --- a/python/paddle/v2/fluid/optimizer.py +++ b/python/paddle/v2/fluid/optimizer.py @@ -197,8 +197,7 @@ class Optimizer(object): This method combines interface `append_backward_ops()` and `create_optimization_pass()` into one. """ - params_grads = append_backward_ops(loss, parameter_list, no_grad_set or - set()) + params_grads = append_backward_ops(loss, parameter_list, no_grad_set) # Add regularization if any params_grads = append_regularization_ops(params_grads) optimize_ops = self.create_optimization_pass(params_grads, loss, diff --git a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_lstm.py b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_lstm.py index b2479320330bde5771c3d4a8e2923b5ab1eecf2e..80f859967979ec07536d652d4ea620fd4ddb2daa 100644 --- a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_lstm.py +++ b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_lstm.py @@ -8,7 +8,8 @@ def lstm_net(dict_dim, class_dim=2, emb_dim=32, seq_len=80, batch_size=50): name="words", shape=[seq_len * batch_size, 1], append_batch_size=False, - dtype="int64") + dtype="int64", + lod_level=1) label = fluid.layers.data( name="label", shape=[batch_size, 1], @@ -21,6 +22,7 @@ def lstm_net(dict_dim, class_dim=2, emb_dim=32, seq_len=80, batch_size=50): c_pre_init = fluid.layers.fill_constant( dtype=emb.dtype, shape=[batch_size, emb_dim], value=0.0) + c_pre_init.stop_gradient = False layer_1_out = fluid.layers.lstm( emb, c_pre_init=c_pre_init, hidden_dim=emb_dim) layer_1_out = fluid.layers.transpose(x=layer_1_out, axis=[1, 0, 2]) diff --git a/python/paddle/v2/fluid/tests/test_dyn_rnn.py b/python/paddle/v2/fluid/tests/test_dyn_rnn.py new file mode 100644 index 0000000000000000000000000000000000000000..271e39a0e0f05938ae32322c50410b56d94a8dc5 --- /dev/null +++ b/python/paddle/v2/fluid/tests/test_dyn_rnn.py @@ -0,0 +1,87 @@ +import paddle.v2.fluid as fluid +import paddle.v2 as paddle +import unittest +import numpy + + +class TestDynRNN(unittest.TestCase): + def setUp(self): + self.word_dict = paddle.dataset.imdb.word_dict() + self.BATCH_SIZE = 100 + self.train_data = paddle.batch( + paddle.dataset.imdb.train(self.word_dict), + batch_size=self.BATCH_SIZE) + + def test_plain_while_op(self): + main_program = fluid.Program() + startup_program = fluid.Program() + + with fluid.program_guard(main_program, startup_program): + sentence = fluid.layers.data( + name='word', shape=[1], dtype='int64', lod_level=1) + sent_emb = fluid.layers.embedding( + input=sentence, size=[len(self.word_dict), 32], dtype='float32') + + label = fluid.layers.data(name='label', shape=[1], dtype='float32') + + rank_table = fluid.layers.lod_rank_table(x=sent_emb) + + sent_emb_array = fluid.layers.lod_tensor_to_array( + x=sent_emb, table=rank_table) + + seq_len = fluid.layers.max_sequence_len(rank_table=rank_table) + i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=0) + i.stop_gradient = False + + boot_mem = fluid.layers.fill_constant_batch_size_like( + input=fluid.layers.array_read( + array=sent_emb_array, i=i), + value=0, + shape=[-1, 100], + dtype='float32') + boot_mem.stop_gradient = False + + mem_array = fluid.layers.array_write(x=boot_mem, i=i) + + cond = fluid.layers.less_than(x=i, y=seq_len) + cond.stop_gradient = False + while_op = fluid.layers.While(cond=cond) + out = fluid.layers.create_array(dtype='float32') + + with while_op.block(): + mem = fluid.layers.array_read(array=mem_array, i=i) + ipt = fluid.layers.array_read(array=sent_emb_array, i=i) + + mem = fluid.layers.shrink_memory(x=mem, i=i, table=rank_table) + + hidden = fluid.layers.fc(input=[mem, ipt], size=100, act='tanh') + fluid.layers.array_write(x=hidden, i=i, array=out) + fluid.layers.increment(x=i, in_place=True) + fluid.layers.array_write(x=hidden, i=i, array=mem_array) + fluid.layers.less_than(x=i, y=seq_len, cond=cond) + + all_timesteps = fluid.layers.array_to_lod_tensor( + x=out, table=rank_table) + last = fluid.layers.sequence_pool( + input=all_timesteps, pool_type='last') + logits = fluid.layers.fc(input=last, size=1, act=None) + loss = fluid.layers.sigmoid_cross_entropy_with_logits( + x=logits, label=label) + loss = fluid.layers.mean(x=loss) + sgd = fluid.optimizer.SGD(1e-4) + sgd.minimize(loss=loss) + cpu = fluid.CPUPlace() + exe = fluid.Executor(cpu) + exe.run(startup_program) + feeder = fluid.DataFeeder(feed_list=[sentence, label], place=cpu) + + data = next(self.train_data()) + val = exe.run(main_program, feed=feeder.feed(data), + fetch_list=[loss])[0] + self.assertEqual((1, ), val.shape) + print(val) + self.assertFalse(numpy.isnan(val)) + + +if __name__ == '__main__': + unittest.main()