diff --git a/.gitignore b/.gitignore index 351b8204100dfd71e94cb3efa2e946b44b9e4285..1512c1438e9e0b0b7b6e0c273a24b273cb652b04 100644 --- a/.gitignore +++ b/.gitignore @@ -28,3 +28,4 @@ cmake_install.cmake paddle/.timestamp python/paddlepaddle.egg-info/ paddle/pybind/pybind.h +python/paddle/v2/framework/tests/tmp/* diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index 0d1617424ecffdcdaaccba6cbd761b2563f6b073..f69a3cfbf8b195cceb15fc2ce81d8bb9d28e5830 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -15,7 +15,7 @@ nv_test(lod_tensor_gpu_test SRCS lod_tensor_test.cu DEPS lod_tensor) cc_test(variable_test SRCS variable_test.cc) -cc_library(scope SRCS scope.cc) +cc_library(scope SRCS scope.cc DEPS glog) cc_test(scope_test SRCS scope_test.cc DEPS scope) diff --git a/paddle/framework/op_desc.h b/paddle/framework/op_desc.h index 9b8fe17d6eb8e95c6453a230015f59b84a76095d..e3e96441bbf51729f2ba69c9257e6961b1de0d5c 100644 --- a/paddle/framework/op_desc.h +++ b/paddle/framework/op_desc.h @@ -107,6 +107,8 @@ class OpDescBind { void InferVarType(BlockDescBind *block) const; + void MarkAsTarget() { desc_.set_is_target(true); } + void Flush(); private: diff --git a/paddle/framework/program_desc.cc b/paddle/framework/program_desc.cc index 82f16a7c8b9de2b46dcae4288d999bc5c644aede..4af8d94563ad0ecf6fcc6fe0575b0f69006a9a2d 100644 --- a/paddle/framework/program_desc.cc +++ b/paddle/framework/program_desc.cc @@ -49,6 +49,13 @@ ProgramDescBind::ProgramDescBind(const ProgramDescBind &o) { } } +ProgramDescBind::ProgramDescBind(const ProgramDesc &desc) { + desc_ = desc; + for (auto &block_desc : *desc_.mutable_blocks()) { + blocks_.emplace_back(new BlockDescBind(this, &block_desc)); + } +} + ProgramDescBind::ProgramDescBind(const std::string &binary_str) { PADDLE_ENFORCE(desc_.ParseFromString(binary_str), "Fail to parse program_desc from binary string."); diff --git a/paddle/framework/program_desc.h b/paddle/framework/program_desc.h index b6e76515a5af0f1ff663442faebc50e1c5cc2520..ce1721472d9046f50b7fc88253fa3f2dbaaf51a8 100644 --- a/paddle/framework/program_desc.h +++ b/paddle/framework/program_desc.h @@ -29,6 +29,8 @@ class ProgramDescBind { public: ProgramDescBind(); + explicit ProgramDescBind(const ProgramDesc &desc); + ProgramDescBind(const ProgramDescBind &o); explicit ProgramDescBind(const std::string &binary_str); diff --git a/paddle/framework/prune.cc b/paddle/framework/prune.cc index 95833692925af4477fe575d6bd908a2ce7653c1b..bf3066983cdcf44ae84f236ac72486e5d4fd5b92 100644 --- a/paddle/framework/prune.cc +++ b/paddle/framework/prune.cc @@ -46,7 +46,7 @@ bool IsTarget(const OpDesc& op_desc) { return false; } -void prune_impl(const ProgramDesc& input, ProgramDesc& output, int block_id) { +void prune_impl(const ProgramDesc& input, ProgramDesc* output, int block_id) { // TODO(tonyyang-svail): // - will change to use multiple blocks for RNN op and Cond Op @@ -91,8 +91,8 @@ void prune_impl(const ProgramDesc& input, ProgramDesc& output, int block_id) { // we reverse the should_run vector std::reverse(should_run.begin(), should_run.end()); - output = input; - auto* op_field = output.mutable_blocks(block_id)->mutable_ops(); + *output = input; + auto* op_field = output->mutable_blocks(block_id)->mutable_ops(); op_field->Clear(); for (size_t i = 0; i < should_run.size(); ++i) { if (should_run[i]) { @@ -101,7 +101,8 @@ void prune_impl(const ProgramDesc& input, ProgramDesc& output, int block_id) { } } -void Prune(const ProgramDesc& input, ProgramDesc& output) { +// TODO(fengjiayi): Prune() could be inplaced to avoid unnecessary copies +void Prune(const ProgramDesc& input, ProgramDesc* output) { prune_impl(input, output, 0); } diff --git a/paddle/framework/prune.h b/paddle/framework/prune.h index 9414ac64f9491c07aabb216a4c81dfe6e78e8043..8cfb16343aa44dcc8a3349b01adecce33f1c2b5b 100644 --- a/paddle/framework/prune.h +++ b/paddle/framework/prune.h @@ -20,7 +20,7 @@ limitations under the License. */ namespace paddle { namespace framework { -void Prune(const ProgramDesc& input, ProgramDesc& output); +void Prune(const ProgramDesc& input, ProgramDesc* output); } // namespace framework } // namespace paddle diff --git a/paddle/framework/prune_test.cc b/paddle/framework/prune_test.cc index 3ab4b43d9256af5880083b00df446c451e3f598b..cadd114fbc3de897a13504e665ce464e83d312ff 100644 --- a/paddle/framework/prune_test.cc +++ b/paddle/framework/prune_test.cc @@ -59,11 +59,11 @@ TEST(Prune, one_operator) { f::ProgramDesc *pdesc = program.Proto(); f::ProgramDesc pruned; - Prune(*pdesc, pruned); + Prune(*pdesc, &pruned); PADDLE_ENFORCE_EQ(pruned.blocks(0).ops_size(), 0); pdesc->mutable_blocks(0)->mutable_ops(0)->set_is_target(true); - Prune(*pdesc, pruned); + Prune(*pdesc, &pruned); PADDLE_ENFORCE_EQ(pruned.blocks(0).ops_size(), 1); } @@ -81,7 +81,7 @@ TEST(Prune, forward) { for (int i = 0; i < pdesc->blocks(0).ops_size(); ++i) { f::ProgramDesc pruned; pdesc->mutable_blocks(0)->mutable_ops(i)->set_is_target(true); - Prune(*pdesc, pruned); + Prune(*pdesc, &pruned); PADDLE_ENFORCE_EQ(pruned.blocks(0).ops_size(), i + 1); } } @@ -100,7 +100,7 @@ TEST(Prune, multi_input_op) { pdesc->mutable_blocks(0)->mutable_ops(3)->set_is_target(true); f::ProgramDesc pruned; - Prune(*pdesc, pruned); + Prune(*pdesc, &pruned); PADDLE_ENFORCE_EQ(pruned.blocks(0).ops_size(), 4); } @@ -116,7 +116,7 @@ TEST(Prune, multi_output_op) { pdesc->mutable_blocks(0)->mutable_ops(2)->set_is_target(true); f::ProgramDesc pruned; - Prune(*pdesc, pruned); + Prune(*pdesc, &pruned); PADDLE_ENFORCE_EQ(pruned.blocks(0).ops_size(), 2); } @@ -133,6 +133,6 @@ TEST(Prune, multi_target) { pdesc->mutable_blocks(0)->mutable_ops(2)->set_is_target(true); f::ProgramDesc pruned; - Prune(*pdesc, pruned); + Prune(*pdesc, &pruned); PADDLE_ENFORCE_EQ(pruned.blocks(0).ops_size(), 3); } diff --git a/paddle/framework/scope.cc b/paddle/framework/scope.cc index 19e25fba05f2f1c959da32c950320d3a44d5109d..14cc530448379eb6d4bf0435f607494aa01ef5b5 100644 --- a/paddle/framework/scope.cc +++ b/paddle/framework/scope.cc @@ -16,6 +16,7 @@ limitations under the License. */ #include // for unique_ptr #include // for call_once +#include "glog/logging.h" #include "paddle/string/printf.h" namespace paddle { @@ -23,7 +24,10 @@ namespace framework { Scope::~Scope() { DropKids(); - for (auto& kv : vars_) delete kv.second; + for (auto& kv : vars_) { + VLOG(3) << "Destroy variable " << kv.first; + delete kv.second; + } } Scope& Scope::NewScope() const { @@ -38,6 +42,7 @@ Variable* Scope::Var(const std::string& name) { } Variable* v = new Variable(); vars_[name] = v; + VLOG(3) << "Create variable " << name << " on scope"; v->name_ = &(vars_.find(name)->first); return v; } diff --git a/paddle/memory/CMakeLists.txt b/paddle/memory/CMakeLists.txt index 9cc4233e43267472d405c3e4e617f0782e1430ea..aed5275dbf9be707cc6e19e729133ba8eab58195 100644 --- a/paddle/memory/CMakeLists.txt +++ b/paddle/memory/CMakeLists.txt @@ -1,6 +1,6 @@ add_subdirectory(detail) -cc_library(memory SRCS memory.cc) +cc_library(memory SRCS memory.cc DEPS place) cc_library(memcpy SRCS memcpy.cc) cc_library(paddle_memory diff --git a/paddle/memory/detail/meta_cache.cc b/paddle/memory/detail/meta_cache.cc index 30ff80e7bac0b595fe60aeab0a3c59f4e23eae2d..f0721c3b94b74eed3a02e4bc744c24b97ac170a9 100644 --- a/paddle/memory/detail/meta_cache.cc +++ b/paddle/memory/detail/meta_cache.cc @@ -13,6 +13,7 @@ limitations under the License. */ #include "paddle/memory/detail/meta_cache.h" +#include "glog/logging.h" #include "paddle/memory/detail/memory_block.h" #include "paddle/platform/assert.h" @@ -28,7 +29,9 @@ Metadata MetadataCache::load(const MemoryBlock* block) { PADDLE_ASSERT(existing_metadata->second.check_guards()); return existing_metadata->second; } else { - PADDLE_ASSERT(reinterpret_cast(block)->check_guards()); + auto* meta = reinterpret_cast(block); + VLOG(3) << "Load MetaData type=" << meta->type; + PADDLE_ASSERT(meta->check_guards()); return *reinterpret_cast(block); } } diff --git a/paddle/memory/memory.cc b/paddle/memory/memory.cc index 8e561528f0e7e6ff524fc51b4776efc4e5bd28cd..0b648642f90a09db7452cce97eb04cedfcf55f4f 100644 --- a/paddle/memory/memory.cc +++ b/paddle/memory/memory.cc @@ -39,11 +39,15 @@ BuddyAllocator* GetCPUBuddyAllocator() { template <> void* Alloc(platform::CPUPlace place, size_t size) { - return GetCPUBuddyAllocator()->Alloc(size); + VLOG(3) << "Allocate " << size << " bytes on " << platform::Place(place); + void* p = GetCPUBuddyAllocator()->Alloc(size); + VLOG(3) << " pointer=" << p; + return p; } template <> void Free(platform::CPUPlace place, void* p) { + VLOG(3) << "Free pointer=" << p << " on " << platform::Place(place); GetCPUBuddyAllocator()->Free(p); } diff --git a/paddle/operators/batch_norm_op.cu b/paddle/operators/batch_norm_op.cu index 6ba6ee12ec7b0a5dc2ffcdfd7519377c8f32fef8..726d1ea1b8d7ced93f94bb0e5bb4df9e43b0ac7b 100644 --- a/paddle/operators/batch_norm_op.cu +++ b/paddle/operators/batch_norm_op.cu @@ -117,9 +117,6 @@ class BatchNormKernel : public framework::OpKernel { math::SetConstant functor; functor(ctx.device_context(), saved_mean, 0); functor(ctx.device_context(), saved_variance, 0); - // FIXME(qiao) should not set zero self - functor(ctx.device_context(), mean_out, 0); - functor(ctx.device_context(), variance_out, 0); auto handle = ctx.cuda_device_context().cudnn_handle(); @@ -211,8 +208,15 @@ class BatchNormGradKernel mode_ = CUDNN_BATCHNORM_SPATIAL; #endif - std::vector dims = {N, C, H, W, D}; - std::vector strides = {H * W * C * D, 1, W * D * C, D * C, C}; + std::vector dims; + std::vector strides; + if (tensor_format == TensorFormat::NCHW) { + dims = {N, C, H, W, D}; + strides = {C * H * W * D, H * W * D, W * D, D, 1}; + } else { + dims = {N, C, H, W, D}; + strides = {H * W * C * D, 1, W * D * C, D * C, C}; + } CUDNN_ENFORCE(platform::dynload::cudnnSetTensorNdDescriptor( data_desc_, CudnnDataType::type, x_dims.size() > 3 ? x_dims.size() : 4, dims.data(), strides.data())); diff --git a/paddle/operators/sequence_pool_op.h b/paddle/operators/sequence_pool_op.h index ead30e8e90b25165664b690491895ae68c8fc0ab..07bf61df45bf51c8648180ffc9eb97306865fab6 100644 --- a/paddle/operators/sequence_pool_op.h +++ b/paddle/operators/sequence_pool_op.h @@ -144,11 +144,11 @@ class SequencePoolGradKernel : public framework::OpKernel { Eigen::Map> in_t_map(in_t.data(), h, w); int row_id; - Eigen::array extents = {1, 1}; + Eigen::array extents{{1, 1}}; for (int col_id = 0; col_id < w; col_id++) { in_t_map.col(col_id).maxCoeff(&row_id); - Eigen::array in_offsets = {row_id, col_id}; - Eigen::array out_offsets = {0, col_id}; + Eigen::array in_offsets{{row_id, col_id}}; + Eigen::array out_offsets{{0, col_id}}; in_g_e.slice(in_offsets, extents).device(place) = out_g_e.slice(out_offsets, extents); } diff --git a/paddle/operators/sign_op.cc b/paddle/operators/sign_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..1b2f879d6d305e4e77be41683d8249904337a6f8 --- /dev/null +++ b/paddle/operators/sign_op.cc @@ -0,0 +1,70 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/operators/sign_op.h" + +namespace paddle { +namespace operators { + +class SignOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext *ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), + "Input(X) of SignOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Out"), + "Output(Out) of SignOp should not be null."); + ctx->SetOutputDim("Out", ctx->GetInputDim("X")); + ctx->ShareLoD("X", /*->*/ "Out"); + } +}; + +template +class SignOpMaker : public framework::OpProtoAndCheckerMaker { + public: + SignOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "(Tensor) Input tensor of sign operator."); + AddOutput("Out", "(Tensor) Output tensor of sign operator."); + AddComment(R"DOC(Sign operator + +The equation is: Out = X.sign() +)DOC"); + } +}; + +class SignGradMaker : public framework::SingleGradOpDescMaker { + public: + using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; + + std::unique_ptr Apply() const override { + auto *grad_op = new framework::OpDescBind(); + grad_op->SetType("scale"); + grad_op->SetInput("X", OutputGrad("Out")); + grad_op->SetOutput("Out", InputGrad("X")); + grad_op->SetAttr("scale", 0.0f); + return std::unique_ptr(grad_op); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; + +REGISTER_OPERATOR(sign, ops::SignOp, ops::SignOpMaker, + ops::SignGradMaker); +REGISTER_OP_CPU_KERNEL(sign, + ops::SignKernel); diff --git a/paddle/operators/sign_op.cu b/paddle/operators/sign_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..4d0638cb97d84bf650fb23e4d2a201adc51a4b68 --- /dev/null +++ b/paddle/operators/sign_op.cu @@ -0,0 +1,18 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/operators/sign_op.h" + +REGISTER_OP_GPU_KERNEL( + sign, paddle::operators::SignKernel); diff --git a/paddle/operators/sign_op.h b/paddle/operators/sign_op.h new file mode 100644 index 0000000000000000000000000000000000000000..ab5cd4bac019d602c63ea51629fb85fa7e206841 --- /dev/null +++ b/paddle/operators/sign_op.h @@ -0,0 +1,38 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" + +namespace paddle { +namespace operators { +template +class SignKernel : public framework::OpKernel { + public: + virtual void Compute(const framework::ExecutionContext& context) const { + auto* out = context.Output("Out"); + auto* in = context.Input("X"); + out->mutable_data(in->place()); + + auto eigen_out = framework::EigenVector::Flatten(*out); + auto eigen_in = framework::EigenVector::Flatten(*in); + auto& place = context.GetEigenDevice(); + eigen_out.device(place) = eigen_in.sign(); + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/pybind/CMakeLists.txt b/paddle/pybind/CMakeLists.txt index d7cd738828a10b431370c92026b89d62add1275e..a9bcc474387513a8ca019bc9382b88c93e08ff8d 100644 --- a/paddle/pybind/CMakeLists.txt +++ b/paddle/pybind/CMakeLists.txt @@ -1,7 +1,7 @@ if(WITH_PYTHON) cc_library(paddle_pybind SHARED SRCS pybind.cc exception.cc protobuf.cc - DEPS pybind python backward proto_desc tensor_array paddle_memory executor + DEPS pybind python backward proto_desc tensor_array paddle_memory executor prune ${GLOB_OP_LIB}) endif(WITH_PYTHON) diff --git a/paddle/pybind/protobuf.cc b/paddle/pybind/protobuf.cc index 145b4f63c235fa97dc03ba615f74f53473574064..14adfa1f35225ca5bf0c093dcf75d1c21af69676 100644 --- a/paddle/pybind/protobuf.cc +++ b/paddle/pybind/protobuf.cc @@ -141,6 +141,13 @@ void BindProgramDesc(py::module &m) { desc->SerializeToString(&res), "Serialize ProgramDesc Error. This could be a bug of Paddle."); return res; + }) + .def("parse_from_string", + [](ProgramDescBind &program_desc, const std::string &data) { + ProgramDesc *desc = program_desc.Proto(); + PADDLE_ENFORCE(desc->ParseFromString(data), + "Fail to parse ProgramDesc from string. This could " + "be a bug of Paddle."); }); } diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index b6e44fdbad6e2817e3077901f58177adc4bb0c71..e9c1d40de185768048159c10ee278ffde12335f7 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -19,6 +19,7 @@ limitations under the License. */ #include "paddle/framework/feed_fetch_method.h" #include "paddle/framework/framework.pb.h" #include "paddle/framework/lod_tensor.h" +#include "paddle/framework/prune.h" #include "paddle/framework/selected_rows.h" #include "paddle/framework/tensor_array.h" #include "paddle/operators/cond_op.h" @@ -237,6 +238,16 @@ All parameter, weight, gradient are variables in Paddle. } return ret_values; }); + m.def("prune", [](const ProgramDescBind &origin, + const std::vector> &targets) { + ProgramDescBind prog_with_targets(origin); + for (const auto &t : targets) { + prog_with_targets.Block(t[0])->Op(t[1])->MarkAsTarget(); + } + ProgramDesc pruned_desc; + Prune(*prog_with_targets.Proto(), &pruned_desc); + return new ProgramDescBind(pruned_desc); + }); m.def_submodule( "var_names", "The module will return special predefined variable name in Paddle") diff --git a/python/paddle/v2/framework/framework.py b/python/paddle/v2/framework/framework.py index 7c95b1b9c29b16ecdf75ae1aad0eae5e913fd102..348c393913b3d73f9c9c16580d19a19551f2a57b 100644 --- a/python/paddle/v2/framework/framework.py +++ b/python/paddle/v2/framework/framework.py @@ -251,6 +251,8 @@ class Operator(object): self.desc.set_output(out_proto.name, out_argu_names) if attrs is not None: + if not isinstance(attrs, dict): + raise TypeError("'attrs' should be a dict.") for attr in proto.attrs: attr_name = attr.name if (not attr_name in attrs) or (attrs[attr_name] is None): @@ -291,6 +293,14 @@ class Operator(object): def output_names(self): return self.desc.output_names() + @property + def idx(self): + for i, op in enumerate(self.block.ops): + if op == self: + return i + raise ValueError( + "Can't find op itself in it's block. It could be a bug of Paddle.") + def has_attr(self, name): return self.desc.has_attr(name) @@ -440,10 +450,31 @@ class Program(object): p.sync_with_cpp() return p + def prune(self, targets): + if not isinstance(targets, list): + targets = [targets] + targets_idx = [] + for t in targets: + if not isinstance(t, Operator): + if isinstance(t, Variable): + t = t.op + else: + raise ValueError( + "All targets of prune() can only be Variable or Operator." + ) + + targets_idx.append([t.block.idx, t.idx]) + res = Program() + res.desc = core.prune(self.desc, targets_idx) + res.blocks = [Block(res, i) for i in xrange(res.desc.num_blocks())] + res.sync_with_cpp() + return res + @staticmethod def parse_from_string(binary_str): p = Program() p.desc = core.ProgramDesc(binary_str) + p.blocks = [Block(p, i) for i in xrange(p.desc.num_blocks())] p.sync_with_cpp() return p diff --git a/python/paddle/v2/framework/io.py b/python/paddle/v2/framework/io.py index 7a2ac0e9ebf18d5c06df12869b73beb451a68177..f3ba719bde086f696a27b806228a8c97466a681e 100644 --- a/python/paddle/v2/framework/io.py +++ b/python/paddle/v2/framework/io.py @@ -1,11 +1,12 @@ import os +import cPickle as pickle from paddle.v2.framework.framework import Program, Parameter, g_program, \ Variable __all__ = [ 'save_vars', 'save_params', 'save_persistables', 'load_vars', 'load_params', - 'load_persistables' + 'load_persistables', "save_inference_model", "load_inference_model" ] @@ -31,7 +32,7 @@ def _clone_var_in_block_(block, var): def save_vars(executor, dirname, program=None, vars=None, predicate=None): """ Save variables to directory by executor. - + :param executor: executor that save variable :param dirname: directory path :param program: program. If vars is None, then filter all variables in this @@ -92,7 +93,7 @@ def save_persistables(executor, dirname, program=None): def load_vars(executor, dirname, program=None, vars=None, predicate=None): """ Load variables from directory by executor. - + :param executor: executor that save variable :param dirname: directory path :param program: program. If vars is None, then filter all variables in this @@ -124,6 +125,7 @@ def load_vars(executor, dirname, program=None, vars=None, predicate=None): inputs={}, outputs={"Out": [new_var]}, attrs={'file_path': os.path.join(dirname, new_var.name)}) + executor.run(load_prog) @@ -141,3 +143,88 @@ def load_persistables(executor, dirname, program=None): """ load_vars( executor, dirname=dirname, program=program, predicate=is_persistable) + + +def save_inference_model(dirname, + feeded_var_names, + target_vars, + executor, + program=None): + """ + Build a model especially for inference, + and save it to directory by the executor. + + :param dirname: directory path + :param feeded_var_names: Names of variables that need to be feeded data during inference + :param target_vars: Variables from which we can get inference results. + :param executor: executor that save inference model + :param program: original program, which will be pruned to build the inference model. + Default g_program. + + :return: None + """ + if program is None: + program = g_program + if not isinstance(target_vars, list): + target_vars = [target_vars] + + if not os.path.isdir(dirname): + os.makedirs(dirname) + + pruned_program = program.prune(target_vars) + fetch_var_names = [v.name for v in target_vars] + + model_file_name = dirname + "/__model__" + with open(model_file_name, "w") as f: + pickle.dump({ + "program_desc_str": pruned_program.desc.serialize_to_string(), + "feed_var_names": feeded_var_names, + "fetch_var_names": fetch_var_names + }, f, -1) + + save_params(executor, dirname, program) + + +def load_persistables_if_exist(executor, dirname, program=None): + filenames = next(os.walk(dirname))[2] + filenames = set(filenames) + + def _is_presistable_and_exist_(var): + if not is_persistable(var): + return False + else: + return var.name in filenames + + load_vars( + executor, + dirname, + program=program, + vars=None, + predicate=_is_presistable_and_exist_) + + +def load_inference_model(dirname, executor): + """ + Load inference model from a directory + + :param dirname: directory path + :param executor: executor that load inference model + + :return: [program, feed_var_names, fetch_var_names] + program: program especially for inference. + feeded_var_names: Names of variables that need to feed data + fetch_vars: Variables from which we can get inference results. + """ + if not os.path.isdir(dirname): + raise ValueError("There is no directory named '%s'", dirname) + + model_file_name = dirname + "/__model__" + model = pickle.load(open(model_file_name, "r")) + program_desc_str = model["program_desc_str"] + feed_var_names = model["feed_var_names"] + fetch_var_names = model["fetch_var_names"] + program = Program.parse_from_string(program_desc_str) + load_persistables_if_exist(executor, dirname, program) + fetch_vars = [program.global_block().var(name) for name in fetch_var_names] + + return [program, feed_var_names, fetch_vars] diff --git a/python/paddle/v2/framework/layer_helper.py b/python/paddle/v2/framework/layer_helper.py index 6142b1f93c3f84b7af03af5d5aeea70417a22839..1f72c9bc7b0ceda1dd954703fcc10c77a3e5ed25 100644 --- a/python/paddle/v2/framework/layer_helper.py +++ b/python/paddle/v2/framework/layer_helper.py @@ -131,12 +131,14 @@ class LayerHelper(object): return dtype def create_parameter(self, attr, shape, dtype, suffix='w'): - if attr['name'] is None: - attr['name'] = unique_name(".".join([self.name, suffix])) + # Deepcopy the attr so that parameters can be shared in program + attr_copy = copy.deepcopy(attr) + if attr_copy['name'] is None: + attr_copy['name'] = unique_name(".".join([self.name, suffix])) self.init_program.global_block().create_parameter( - dtype=dtype, shape=shape, **attr) + dtype=dtype, shape=shape, **attr_copy) return self.program.global_block().create_parameter( - name=attr['name'], dtype=dtype, shape=shape) + name=attr_copy['name'], dtype=dtype, shape=shape) def create_tmp_variable(self, dtype): return self.program.current_block().create_var( diff --git a/python/paddle/v2/framework/optimizer.py b/python/paddle/v2/framework/optimizer.py index e9d8bbab8662ed9e9db1320c89d6db03360d3983..4c608f96bdf0ca715fc89c0752e891f8c2b80d87 100644 --- a/python/paddle/v2/framework/optimizer.py +++ b/python/paddle/v2/framework/optimizer.py @@ -18,7 +18,8 @@ class Optimizer(object): but need to use one of it's implementation. """ - def __init__(self): + def __init__(self, global_step=None): + self._global_step = global_step # Dictionary of accumulators. Some optimizer subclasses need to # allocate and manage extra variables associated with the parameters # to train. These variables are called accumulators. @@ -109,6 +110,26 @@ class Optimizer(object): format(name, param.name)) return self._accumulators[name][param.name] + def _increment_global_step(self, block): + """Increment the global step by 1 after every iteration + + Args: + block: the block in which the loss variable is present + + Returns: + list with global_step increment op as its only element + """ + assert isinstance(block, framework.Block) + assert self._global_step is not None + # create the increment op + increment_op = block.append_op( + type="increment", + inputs={"X": self._global_step}, + outputs={"Out": self._global_step}, + attrs={"step": 1.0}) + + return increment_op + def create_optimization_pass(self, parameters_and_grads, loss): """Add optimization operators to update gradients to variables. @@ -152,6 +173,8 @@ class Optimizer(object): if finish_ops is not None: return_ops += finish_ops + if self._global_step is not None: + return_ops.append(self._increment_global_step(loss.block)) return return_ops def minimize(self, loss, parameter_list=None, no_grad_set=None): @@ -172,9 +195,9 @@ class SGDOptimizer(Optimizer): """ Simple SGD optimizer without any state. """ - def __init__(self, learning_rate): + def __init__(self, learning_rate, global_step=None): assert learning_rate is not None - super(SGDOptimizer, self).__init__() + super(SGDOptimizer, self).__init__(global_step) self.type = "sgd" self._learning_rate = learning_rate @@ -215,10 +238,14 @@ class MomentumOptimizer(Optimizer): """ _velocity_acc_str = "velocity" - def __init__(self, learning_rate, momentum, use_nesterov=False): + def __init__(self, + learning_rate, + momentum, + use_nesterov=False, + global_step=None): assert learning_rate is not None assert momentum is not None - super(MomentumOptimizer, self).__init__() + super(MomentumOptimizer, self).__init__(global_step) self.type = "momentum" self._learning_rate = learning_rate self._momentum = momentum @@ -275,10 +302,10 @@ class AdagradOptimizer(Optimizer): """ _moment_acc_str = "moment" - def __init__(self, learning_rate, epsilon=1.0e-6): + def __init__(self, learning_rate, epsilon=1.0e-6, global_step=None): assert learning_rate is not None assert epsilon is not None - super(AdagradOptimizer, self).__init__() + super(AdagradOptimizer, self).__init__(global_step) self.type = "adagrad" self._learning_rate = learning_rate self._epsilon = epsilon @@ -337,12 +364,13 @@ class AdamOptimizer(Optimizer): learning_rate=0.001, beta1=0.9, beta2=0.999, - epsilon=1e-8): + epsilon=1e-8, + global_step=None): assert learning_rate is not None assert beta1 is not None assert beta2 is not None assert epsilon is not None - super(AdamOptimizer, self).__init__() + super(AdamOptimizer, self).__init__(global_step) self.type = "adam" self._learning_rate = learning_rate self._beta1 = beta1 @@ -458,7 +486,8 @@ class AdamaxOptimizer(Optimizer): learning_rate=0.001, beta1=0.9, beta2=0.999, - epsilon=1e-8): + epsilon=1e-8, + global_step=None): assert learning_rate is not None assert beta1 is not None assert beta2 is not None diff --git a/python/paddle/v2/framework/regularizer.py b/python/paddle/v2/framework/regularizer.py index cc7ebbe97e530c1f491360e66ac4f7dc2bb3d8f2..5111ac5566feb7d334ff4cd8e70daa0cfbd6e552 100644 --- a/python/paddle/v2/framework/regularizer.py +++ b/python/paddle/v2/framework/regularizer.py @@ -1,6 +1,8 @@ import paddle.v2.framework.framework as framework -__all__ = ['append_regularization_ops', 'L2DecayRegularizer'] +__all__ = [ + 'append_regularization_ops', 'L2DecayRegularizer', 'L1DecayRegularizer' +] def append_regularization_ops(parameters_and_grads): @@ -97,3 +99,43 @@ class L2DecayRegularizer(WeightDecayRegularizer): attrs={"scale": self._regularization_coeff}) return decay + + +class L1DecayRegularizer(WeightDecayRegularizer): + """Implements the L1 Weight Decay Regularization + """ + + def __init__(self, regularization_coeff=0.0): + assert regularization_coeff is not None + super(L1DecayRegularizer, self).__init__() + self._regularization_coeff = regularization_coeff + + def __call__(self, param, block): + """Add L1 weight decay ops to network + + Adds L1 weight decay ops. + L1WeightDecay = reg_coeff * sign(parameter) + + Args: + param: parameter variable for which regularization is applied + block: block in which variable is to be created + + Returns: + new variable for weight decay + """ + assert isinstance(param, framework.Parameter) + assert isinstance(block, framework.Block) + decay = block.create_var( + dtype="float32", shape=param.shape, lod_level=param.lod_level) + # Append sign op + block.append_op( + type='sign', inputs={"X": param}, outputs={"Out": decay}) + + # Append scale op to the output of sign op + block.append_op( + type='scale', + inputs={"X": decay}, + outputs={"Out": decay}, + attrs={"scale": self._regularization_coeff}) + + return decay diff --git a/python/paddle/v2/framework/tests/test_batch_norm_op.py b/python/paddle/v2/framework/tests/test_batch_norm_op.py index b275521ac12f4b5d05cddea0aa70f67f9eb641f1..dee339f43c2ee33fc8a691e0915bddf2c1679285 100644 --- a/python/paddle/v2/framework/tests/test_batch_norm_op.py +++ b/python/paddle/v2/framework/tests/test_batch_norm_op.py @@ -21,16 +21,36 @@ def get_backward_op(scope, op, no_grad_set): def _reference_training(x, scale, offset, epsilon, data_format): - if data_format != "NHWC": - raise ValueError("data_format must be NHWC, got %s." % data_format) - x_square = x * x - x_square_sum = np.sum(x_square, (0, 1, 2)) - x_sum = np.sum(x, axis=(0, 1, 2)) - element_count = np.size(x) / int(np.shape(x)[-1]) - mean = x_sum / element_count - var = x_square_sum / element_count - mean * mean - normalized = (x - mean) / np.sqrt(var + epsilon) - return (normalized * scale + offset), mean, var + if data_format == "NCHW": + n, c, h, w = x.shape + x_square = x * x + x_square_sum = np.sum(x_square, (0, 2, 3)) + x_sum = np.sum(x, axis=(0, 2, 3)) + element_count = np.size(x) / int(np.shape(x)[1]) + mean = x_sum / element_count + var = x_square_sum / element_count - mean * mean + mean_tile = np.reshape(mean, (1, c, 1, 1)) + mean_tile = np.tile(mean_tile, (n, 1, h, w)) + var_tile = np.reshape(var, (1, c, 1, 1)) + var_tile = np.tile(var_tile, (n, 1, h, w)) + normalized = (x - mean_tile) / np.sqrt(var_tile + epsilon) + scale_tile = np.reshape(scale, (1, c, 1, 1)) + scale_tile = np.tile(scale_tile, (n, 1, h, w)) + offset_tile = np.reshape(offset, (1, c, 1, 1)) + offset_tile = np.reshape(offset_tile, (1, c, 1, 1)) + y = normalized * scale_tile + offset_tile + return y, mean, var + elif data_format == "NHWC": + x_square = x * x + x_square_sum = np.sum(x_square, (0, 1, 2)) + x_sum = np.sum(x, axis=(0, 1, 2)) + element_count = np.size(x) / int(np.shape(x)[-1]) + mean = x_sum / element_count + var = x_square_sum / element_count - mean * mean + normalized = (x - mean) / np.sqrt(var + epsilon) + return (normalized * scale + offset), mean, var + else: + raise ValueError("Unknown data order.") def _reference_grad(x, grad_y, scale, mean, var, epsilon, data_format): @@ -43,8 +63,13 @@ def _reference_grad(x, grad_y, scale, mean, var, epsilon, data_format): # grad_x = # 1/N * scale * rsqrt(var + epsilon) * (N * grad_y - sum(grad_y) - # (x - mean) * sum(grad_y * (x - mean)) / (var + epsilon)) - if data_format != "NHWC": - raise ValueError("data_format must be NHWC, got %s." % data_format) + + # transfer from (N, C, H, W) to (N, H, W, C) to simplify computation + if data_format == "NCHW": + x = np.transpose(x, (0, 2, 3, 1)) + grad_y = np.transpose(grad_y, (0, 2, 3, 1)) + + # raise ValueError("data_format must be NHWC, got %s." % data_format) grad_x = scale * (grad_y - np.mean( grad_y, axis=(0, 1, 2)) - (x - mean) * np.mean( grad_y * (x - mean), axis=(0, 1, 2)) / @@ -52,6 +77,12 @@ def _reference_grad(x, grad_y, scale, mean, var, epsilon, data_format): grad_scale = np.sum(grad_y * (x - mean) / np.sqrt(var + epsilon), axis=(0, 1, 2)) grad_offset = np.sum(grad_y, axis=(0, 1, 2)) + + # transfer back to N, C, H, W + if data_format == "NCHW": + grad_x = np.transpose(grad_x, (0, 3, 1, 2)) + x = np.transpose(x, (0, 3, 1, 2)) + grad_y = np.transpose(grad_y, (0, 3, 1, 2)) return grad_x, grad_scale, grad_offset @@ -65,61 +96,135 @@ def create_or_get_tensor(scope, var_name, var, place): return tensor -def set_output_grad(scope, outputs, place): - def __set_tensor__(name): +def set_output_grad(scope, outputs, place, feed_dict=None): + def __set_tensor__(name, data=None): out_tensor = scope.find_var(name).get_tensor() grad_tensor = scope.var(grad_var_name(name)).get_tensor() out_dtype = out_tensor.dtype() - if out_dtype == core.DataType.FP64: - data = np.ones(out_tensor.shape(), dtype=np.float64) - elif out_dtype == core.DataType.FP32: - data = np.ones(out_tensor.shape(), dtype=np.float32) - else: - raise ValueError("Not supported data type " + str(out_dtype)) - + if data is None: + if out_dtype == core.DataType.FP64: + data = np.ones(out_tensor.shape(), dtype=np.float64) + elif out_dtype == core.DataType.FP32: + data = np.ones(out_tensor.shape(), dtype=np.float32) + else: + raise ValueError("Not supported data type " + str(out_dtype)) grad_tensor.set(data, place) for output in outputs: - __set_tensor__(output) + data = None + if output in feed_dict: + data = feed_dict[output] + __set_tensor__(output, data) class TestBatchNormOp(OpTest): def __assert_close(self, tensor, np_array, msg, atol=1e-4): self.assertTrue(np.allclose(np.array(tensor), np_array, atol=atol), msg) - def test_forward_backward(self): - # attr + def test_python(self): data_format = "NHWC" epsilon = 0.00001 momentum = 0.9 - channel_num = 2 - x_shape = [2, 3, 4, channel_num] - scale_shape = [channel_num] + # N, H, W, C: 2, 3, 4, 2 + n, h, w, c = 2, 3, 4, 2 + x_shape = [n, h, w, c] + scale_shape = [c] - # input x_val = np.random.random_sample(x_shape).astype(np.float32) scale_val = np.random.random_sample(scale_shape).astype(np.float32) bias_val = np.random.random_sample(scale_shape).astype(np.float32) mean = np.zeros(scale_shape).astype(np.float32) - variance = np.zeros(scale_shape).astype(np.float32) + variance = np.ones(scale_shape).astype(np.float32) # run forward y_out, saved_mean, var_ref = _reference_training( - x_val, scale_val, bias_val, epsilon, data_format) + x_val, scale_val, bias_val, epsilon, "NHWC") + + # + mean_out = saved_mean * (1. - momentum) + momentum * mean + variance_out = var_ref * (1. - momentum) + momentum * variance + saved_variance = 1. / np.sqrt(var_ref + epsilon) + + # running N, C, H, W case + # should produce the same results + x_shape2 = [n, c, h, w] + x_val2 = np.transpose(x_val, (0, 3, 1, 2)) + y_out2, saved_mean2, var_ref2 = _reference_training( + x_val2, scale_val, bias_val, epsilon, "NCHW") + + self.__assert_close(saved_mean, saved_mean2, "batch mean") + self.__assert_close(var_ref, var_ref2, "batch variance") + + # transfer (N, C, H, W) back to (N, H, W, C) + y_out2_trans = np.transpose(y_out2, (0, 2, 3, 1)) + self.__assert_close(y_out, y_out2_trans, "batch variance") + print 'python: NHWC, NCHW, forward checking passed' + + # test backward now + # NHWC + self.y_grad = np.random.random_sample(x_shape).astype(np.float32) + y_grad = self.y_grad + # y_grad = np.ones(x_shape).astype(np.float32) + x_grad_ref, scale_grad_ref, bias_grad_ref = _reference_grad( + x_val, y_grad, scale_val, saved_mean, var_ref, epsilon, "NHWC") - # run backward - mean_out = saved_mean * (1 - momentum) - variance_out = var_ref * (1 - momentum) - saved_variance = 1 / np.sqrt(var_ref + epsilon) + # NCHW + y_grad2 = np.transpose(y_grad, (0, 3, 1, 2)) + # y_grad2 = np.ones(x_shape2).astype(np.float32) + x_grad_ref2, scale_grad_ref2, bias_grad_ref2 = _reference_grad( + x_val2, y_grad2, scale_val, saved_mean2, var_ref2, epsilon, "NCHW") - # for gradient test - y_grad = np.ones(x_shape).astype(np.float32) - x_grad_ref, scale_grad_ref, bias_grad_ref = _reference_grad( - x_val, y_grad, scale_val, saved_mean, var_ref, epsilon, data_format) + self.__assert_close(scale_grad_ref, scale_grad_ref2, "scale gradient") + self.__assert_close(bias_grad_ref, bias_grad_ref2, "bias gradient") + + x_grad_transpose = np.transpose(x_grad_ref2, (0, 2, 3, 1)) + self.__assert_close(x_grad_ref, x_grad_transpose, "x gradient") + print 'python: NHWC, NCHW, backward checking passed' + + def test_forward_backward(self): + def test_with_place(place, tensor_format): + # attr + epsilon = 0.00001 + momentum = 0.9 + + # N, H, W, C: 12, 3, 4, 2 + n, h, w, c = 2, 3, 4, 2 + + if data_format == "NHWC": + x_shape = [n, h, w, c] + elif data_format == "NCHW": + x_shape = [n, c, h, w] + else: + raise ValueError("Unknown data type.") + scale_shape = [c] + + x_val = np.random.random_sample(x_shape).astype(np.float32) + scale_val = np.random.random_sample(scale_shape).astype(np.float32) + bias_val = np.random.random_sample(scale_shape).astype(np.float32) + + mean = np.zeros(scale_shape).astype(np.float32) + variance = np.ones(scale_shape).astype(np.float32) + + # run forward + y_out, saved_mean, var_ref = _reference_training( + x_val, scale_val, bias_val, epsilon, data_format) + + # update moving mean and variance + mean_out = saved_mean * (1. - momentum) + momentum * mean + variance_out = var_ref * (1. - momentum) + momentum * variance + saved_variance = 1. / np.sqrt(var_ref + epsilon) + + # for gradient test + # y_grad = np.ones(x_shape).astype(np.float32) + y_grad = np.zeros(x_shape).astype(np.float32) + y_grad[0, 0, 0, 0] = 1. + # y_grad = np.random.random_sample(x_shape).astype(np.float32) + x_grad_ref, scale_grad_ref, bias_grad_ref = _reference_grad( + x_val, y_grad, scale_val, saved_mean, var_ref, epsilon, + data_format) - def test_with_place(place): scope = core.Scope() # create input @@ -157,7 +262,7 @@ class TestBatchNormOp(OpTest): SavedVariance="saved_variance", # attrs is_test=False, - tensor_format=data_format, + tensor_format=tensor_format, momentum=momentum, epsilon=epsilon) @@ -170,20 +275,21 @@ class TestBatchNormOp(OpTest): self.__assert_close(saved_variance_tensor, saved_variance, "saved_variance") self.__assert_close(mean_out_tensor, mean_out, "mean_out") - # FIXME(qiao) figure out why with cuDNN variance_out have a higher error rate if isinstance(place, core.GPUPlace): atol = 5e-2 else: atol = 1e-4 self.__assert_close(variance_out_tensor, variance_out, "variance_out", atol) + print "op test forward passed: ", str(place), tensor_format # run backward batch_norm_op_grad = get_backward_op(scope, batch_norm_op, set()) set_output_grad( scope, ["y_out", "mean", "variance", "saved_mean", "saved_variance"], - place) + place, + feed_dict={"y_out": y_grad}) batch_norm_op_grad.run(scope, ctx) x_grad_tensor = create_or_get_tensor(scope, @@ -200,12 +306,14 @@ class TestBatchNormOp(OpTest): self.__assert_close(x_grad_tensor, x_grad_ref, "x_grad") self.__assert_close(scale_grad_tensor, scale_grad_ref, "scale_grad") self.__assert_close(bias_grad_tensor, bias_grad_ref, "bias_grad") + print "op test backward passed: ", str(place), tensor_format places = [core.CPUPlace()] if core.is_compile_gpu() and core.op_support_gpu("batch_norm"): places.append(core.GPUPlace(0)) for place in places: - test_with_place(place) + for data_format in ["NCHW", "NHWC"]: + test_with_place(place, data_format) if __name__ == '__main__': diff --git a/python/paddle/v2/framework/tests/test_inference_model_io.py b/python/paddle/v2/framework/tests/test_inference_model_io.py new file mode 100644 index 0000000000000000000000000000000000000000..4487ab989f3c5da92e086c1fd395c3d776dce9a9 --- /dev/null +++ b/python/paddle/v2/framework/tests/test_inference_model_io.py @@ -0,0 +1,95 @@ +import paddle.v2 as paddle +import paddle.v2.framework.layers as layers +import paddle.v2.framework.core as core +import paddle.v2.framework.optimizer as optimizer + +from paddle.v2.framework.framework import Program, g_program +from paddle.v2.framework.io import save_inference_model, load_inference_model +import paddle.v2.framework.executor as executor +import unittest +import numpy as np + + +class TestBook(unittest.TestCase): + def test_fit_line_inference_model(self): + MODEL_DIR = "./tmp/inference_model" + + init_program = Program() + program = Program() + x = layers.data( + name='x', + shape=[2], + data_type='float32', + program=program, + init_program=init_program) + y = layers.data( + name='y', + shape=[1], + data_type='float32', + program=program, + init_program=init_program) + + y_predict = layers.fc(input=x, + size=1, + act=None, + program=program, + init_program=init_program) + + cost = layers.square_error_cost( + input=y_predict, + label=y, + program=program, + init_program=init_program) + avg_cost = layers.mean( + x=cost, program=program, init_program=init_program) + + sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001) + opts = sgd_optimizer.minimize(avg_cost) + + place = core.CPUPlace() + exe = executor.Executor(place) + + exe.run(init_program, feed={}, fetch_list=[]) + + for i in xrange(100): + x_data = np.array( + [[1, 1], [1, 2], [3, 4], [5, 2]]).astype("float32") + y_data = np.array([[-2], [-3], [-7], [-7]]).astype("float32") + + tensor_x = core.LoDTensor() + tensor_x.set(x_data, place) + tensor_y = core.LoDTensor() + tensor_y.set(y_data, place) + exe.run(program, + feed={'x': tensor_x, + 'y': tensor_y}, + fetch_list=[avg_cost]) + + save_inference_model(MODEL_DIR, ["x", "y"], [avg_cost], exe, program) + outs = exe.run(program, + feed={'x': tensor_x, + 'y': tensor_y}, + fetch_list=[avg_cost]) + expected = np.array(outs[0]) + + reload(executor) # reload to build a new scope + exe = executor.Executor(place) + + [infer_prog, feed_var_names, fetch_vars] = load_inference_model( + MODEL_DIR, exe) + + outs = exe.run( + infer_prog, + feed={feed_var_names[0]: tensor_x, + feed_var_names[1]: tensor_y}, + fetch_list=fetch_vars) + actual = np.array(outs[0]) + + self.assertEqual(feed_var_names, ["x", "y"]) + self.assertEqual(len(fetch_vars), 1) + self.assertEqual(str(fetch_vars[0]), str(avg_cost)) + self.assertEqual(expected, actual) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/framework/tests/test_mnist.py b/python/paddle/v2/framework/tests/test_mnist.py deleted file mode 100644 index c8d54b7c94b7815fa79e5a11f4e159657dc2a6cb..0000000000000000000000000000000000000000 --- a/python/paddle/v2/framework/tests/test_mnist.py +++ /dev/null @@ -1,257 +0,0 @@ -import paddle.v2.framework.core as core -from paddle.v2.framework.op import Operator -import numpy -import paddle.v2 as paddle -exit( - 0 -) # FIXME(yuyang18): InferShape has been removed, this unittest should be changed until compile time is ready - -BATCH_SIZE = 100 - -scope = core.Scope() -place = core.CPUPlace() -# if you want to test GPU training, you can use gpu place -# place = core.GPUPlace(0) -dev_ctx = core.DeviceContext.create(place) - -init_net = core.Net.create() -forward_net = core.Net.create() -backward_net = None -optimize_net = core.Net.create() - - -def atomic_id(): - id = 0 - while True: - yield id - id += 1 - - -uniq_id = atomic_id().next - - -def data_layer(name, dims): - var = scope.var(name) - tensor = var.get_tensor() - tensor.set_dims(dims) # 1 is batch size holder. - return name - - -def feed_data(name, data): - assert isinstance(data, numpy.ndarray) - tensor = scope.find_var(name).get_tensor() - tensor.set_dims(data.shape) - if data.dtype == numpy.dtype("int32"): - tensor.alloc_int(place) - elif data.dtype == numpy.dtype("float32"): - tensor.alloc_float(place) - else: - raise ValueError("data type not supported") - tensor.set(data, place) - - -def grad_var_name(var_name): - return var_name + "@GRAD" - - -def sgd_optimizer(net, param_name, learning_rate=0.005): - grad_name = grad_var_name(param_name) - optimize_op = Operator( - "sgd", - param=param_name, - grad=grad_name, - param_out=param_name, - learning_rate=learning_rate) - net.append_op(optimize_op) - - -# should use operator and add these to the init_network -def init_param(net, param_name, dims): - scope.var(param_name) - op = Operator( - "uniform_random", Out=param_name, dims=dims, min=-0.5, max=0.5, seed=10) - op.infer_shape(scope) - net.append_op(op) - - -# fc_layer -def fc_layer(net, input, size, act="softmax", bias=True, param=None, name=None): - """ - The fully connected layer. - - :param input: The name of input variable. - :type input: str - :param size: The size of fully connected layer. - :param act: The name of activation. - :param param: The attribute of learnable parameter which can be used to - modify initialization mean and std of the parameter. - :param bias: The attribute of bias. If set False, this layer does not have - a bias. - :param name: The name of this layer. If it is not set explictly, a name - will be generated automatically. - :return: The name of the output variable. - """ - - if name is None: - name = "fc_%d" % uniq_id() - if not isinstance(name, str): - raise ValueError("The name of a layer should be a string.") - - input_dims = scope.find_var(input).get_tensor().get_dims() - - w_name = param or name + ".w" - init_param(net=init_net, param_name=w_name, dims=[input_dims[1], size]) - sgd_optimizer(net=optimize_net, param_name=w_name, learning_rate=0.01) - - pre_activation = name + ".mul.out" - scope.var(pre_activation) - mul_op = Operator("mul", X=input, Y=w_name, Out=pre_activation) - net.append_op(mul_op) - - # create bias variable if needed - if bias: - bias_name = name + ".b" - init_param(net=init_net, param_name=bias_name, dims=[size]) - sgd_optimizer( - net=optimize_net, param_name=bias_name, learning_rate=0.001) - bias_out = name + ".rowwise_add.out" - scope.var(bias_out) - rowwise_append_op = Operator( - "rowwise_add", X=pre_activation, b=bias_name, Out=bias_out) - net.append_op(rowwise_append_op) - pre_activation = bias_out - - activation_op = Operator(act, X=pre_activation, Y=name) - net.append_op(activation_op) - scope.var(name) - net.infer_shape(scope) - return name - - -def cross_entropy_layer(net, input, label): - cost_name = "cross_entropy_%d" % uniq_id() - cross_entropy_op = Operator( - "cross_entropy", X=input, Label=label, Y=cost_name) - net.append_op(cross_entropy_op) - scope.var(cost_name) - net.infer_shape(scope) - return cost_name - - -def create_backward_net(forward_net): - net = core.Operator.backward(forward_net, set()) - for input in net.inputs()["all"]: - var = scope.var(input) - var.get_tensor() - for output in net.outputs()["all"]: - var = scope.var(output) - var.get_tensor() - return net - - -def debug_print_op(op): - print("===============" + op.type() + "==============") - print("***inputs:***") - for input in op.inputs()["all"]: - print input, scope.find_var(input).get_tensor().get_dims() - print("\n***outputs:***") - for output in op.outputs()["all"]: - print output, scope.find_var(output).get_tensor().get_dims() - print("") - print("") - - -def set_cost(cost): - cost_shape = numpy.array(scope.find_var(cost).get_tensor()).shape - cost_grad = \ - scope.find_var(grad_var_name(cost)).get_tensor() - cost_grad.set_dims(cost_shape) - cost_grad.alloc_float(place) - cost_grad.set(numpy.ones(cost_shape).astype("float32"), place) - - -def get_cost_mean(cost): - cost_data = numpy.array(scope.find_var(cost).get_tensor()) - return cost_data.sum() / len(cost_data) - - -def error_rate(predict, label): - predict_var = numpy.array(scope.find_var(predict).get_tensor()).argmax( - axis=1) - label = numpy.array(scope.find_var(label).get_tensor()) - error_num = numpy.sum(predict_var != label) - return error_num / float(len(label)) - - -images = data_layer(name="pixel", dims=[BATCH_SIZE, 784]) -labels = data_layer(name="label", dims=[BATCH_SIZE, 1]) -fc1 = fc_layer(net=forward_net, input=images, size=100, act="sigmoid") -fc2 = fc_layer(net=forward_net, input=fc1, size=100, act="sigmoid") -predict = fc_layer(net=forward_net, input=fc2, size=10, act="softmax") -cost = cross_entropy_layer(net=forward_net, input=predict, label=labels) - -init_net.complete_add_op(True) -forward_net.complete_add_op(True) -backward_net = create_backward_net(forward_net) -optimize_net.complete_add_op(True) - -print(init_net) -print(forward_net) -print(backward_net) -print(optimize_net) - -debug_print_op(forward_net) -debug_print_op(backward_net) -debug_print_op(optimize_net) - -train_reader = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.mnist.train(), buf_size=8192), - batch_size=BATCH_SIZE) - - -def test(cost_name): - test_reader = paddle.batch( - paddle.dataset.mnist.test(), batch_size=BATCH_SIZE) - cost = [] - error = [] - for data in test_reader(): - image_data = numpy.array(map(lambda x: x[0], data)).astype("float32") - label_data = numpy.array(map(lambda x: x[1], data)).astype("int32") - label_data = numpy.expand_dims(label_data, axis=1) - feed_data(images, image_data) - feed_data(labels, label_data) - - forward_net.infer_shape(scope) - forward_net.run(scope, dev_ctx) - cost.append(get_cost_mean(cost_name)) - error.append(error_rate(predict, "label")) - print("cost=" + str(sum(cost) / float(len(cost))) + " error_rate=" + str( - sum(error) / float(len(error)))) - - -PASS_NUM = 1 - -init_net.run(scope, dev_ctx) -for pass_id in range(PASS_NUM): - batch_id = 0 - - for data in train_reader(): - image_data = numpy.array(map(lambda x: x[0], data)).astype("float32") - label_data = numpy.array(map(lambda x: x[1], data)).astype("int32") - label_data = numpy.expand_dims(label_data, axis=1) - feed_data(images, image_data) - feed_data(labels, label_data) - - forward_net.infer_shape(scope) - forward_net.run(scope, dev_ctx) - set_cost(cost) - backward_net.infer_shape(scope) - backward_net.run(scope, dev_ctx) - - optimize_net.run(scope, dev_ctx) - if batch_id % 100 == 0: - print("pass[" + str(pass_id) + "] batch_id[" + str(batch_id) + "]") - test(cost) - - batch_id = batch_id + 1 diff --git a/python/paddle/v2/framework/tests/test_operator_desc.py b/python/paddle/v2/framework/tests/test_operator_desc.py index af4e980b8ed6db6cb9b76de49d8dc0860f07ec80..7355f72455ca4f821c9520d97162e3e0050383af 100644 --- a/python/paddle/v2/framework/tests/test_operator_desc.py +++ b/python/paddle/v2/framework/tests/test_operator_desc.py @@ -1,5 +1,5 @@ import unittest -from paddle.v2.framework.framework import Variable, g_program +from paddle.v2.framework.framework import Variable, Program, g_program import paddle.v2.framework.core as core @@ -21,7 +21,8 @@ class TestOperator(unittest.TestCase): "Operator \"no_such_op\" has not been registered.") def test_op_desc_creation(self): - block = g_program.current_block() + program = Program() + block = program.current_block() mul_x = block.create_var( dtype="float32", shape=[5, 10], lod_level=0, name="mul.x") mul_y = block.create_var( @@ -50,10 +51,12 @@ class TestOperator(unittest.TestCase): self.assertEqual(mul_op.has_attr("y_num_col_dims"), True) self.assertEqual(mul_op.attr_type("y_num_col_dims"), core.AttrType.INT) self.assertEqual(mul_op.attr("y_num_col_dims"), 1) + self.assertEqual(mul_op.idx, 0) self.assertEqual(mul_out.op, mul_op) def test_mult_input(self): - block = g_program.current_block() + program = Program() + block = program.current_block() sum_x1 = block.create_var( dtype="int", shape=[3, 4], lod_level=0, name="sum.x1") sum_x2 = block.create_var( @@ -71,6 +74,7 @@ class TestOperator(unittest.TestCase): self.assertEqual(sum_op.input("X"), ["sum.x1", "sum.x2", "sum.x3"]) self.assertEqual(sum_op.output_names, ["Out"]) self.assertEqual(sum_op.output("Out"), ["sum.out"]) + self.assertEqual(sum_op.idx, 0) self.assertEqual(sum_out.op, sum_op) diff --git a/python/paddle/v2/framework/tests/test_optimizer.py b/python/paddle/v2/framework/tests/test_optimizer.py index 6dfd94e8c8c96d87037faa028a3d2a537a90c9c7..45396c9bec9ccf0668b048b2b4855d7a665ebea5 100644 --- a/python/paddle/v2/framework/tests/test_optimizer.py +++ b/python/paddle/v2/framework/tests/test_optimizer.py @@ -27,6 +27,32 @@ class TestOptimizer(unittest.TestCase): sgd_op = opts[0] self.assertEqual(sgd_op.type, "sgd") + def test_sgd_optimizer_with_global_step(self): + program = framework.Program() + block = program.global_block() + mul_x = block.create_parameter( + dtype="float32", shape=[5, 10], lod_level=0, name="mul.x") + mul_y = block.create_var( + dtype="float32", shape=[10, 8], lod_level=0, name="mul.y") + mul_out = block.create_var( + dtype="float32", shape=[5, 8], lod_level=0, name="mul.out") + block.append_op( + type="mul", + inputs={"X": mul_x, + "Y": mul_y}, + outputs={"Out": mul_out}, + attrs={"x_num_col_dims": 1}) + global_step = block.create_var( + dtype="float32", shape=[1], lod_level=0, name="step") + sgd_optimizer = optimizer.SGDOptimizer( + learning_rate=0.01, global_step=global_step) + opts = sgd_optimizer.minimize(mul_out) + self.assertEqual(len(opts), 2) + sgd_op = opts[0] + self.assertEqual(sgd_op.type, "sgd") + increment_op = opts[1] + self.assertEqual(increment_op.type, "increment") + class TestMomentumOptimizer(unittest.TestCase): class MockMomentum(optimizer.MomentumOptimizer): diff --git a/python/paddle/v2/framework/tests/test_program.py b/python/paddle/v2/framework/tests/test_program.py index 9eb308bd44860d8f3d495f93333fc91ecc924376..be020573b7dcd9f8dcd0f99d654dc8b2106abb2b 100644 --- a/python/paddle/v2/framework/tests/test_program.py +++ b/python/paddle/v2/framework/tests/test_program.py @@ -99,6 +99,8 @@ class TestProgram(unittest.TestCase): outputs={"Out": add_out}, attrs={"x_num_col_dims": 1}) + self.assertEqual(mul_op.idx, 0) + self.assertEqual(add_op.idx, 1) param_to_grad = prog.append_backward(add_out, set()) def grad_name(name): diff --git a/python/paddle/v2/framework/tests/test_recognize_digits_mlp.py b/python/paddle/v2/framework/tests/test_recognize_digits_mlp.py index a985d1f3d38fcaa8372a70edd519b873d47f554a..44a768d5e2274c08c3b90b29b564549c11fb1105 100644 --- a/python/paddle/v2/framework/tests/test_recognize_digits_mlp.py +++ b/python/paddle/v2/framework/tests/test_recognize_digits_mlp.py @@ -5,9 +5,11 @@ import paddle.v2.framework.optimizer as optimizer from paddle.v2.framework.framework import Program, g_program from paddle.v2.framework.executor import Executor +from paddle.v2.framework.regularizer import L2DecayRegularizer import numpy as np +BATCH_SIZE = 128 init_program = Program() program = Program() image = layers.data( @@ -17,22 +19,35 @@ image = layers.data( program=program, init_program=init_program) +param_attr = { + 'name': None, + 'init_attr': { + 'type': 'uniform_random', + 'min': -1.0, + 'max': 1.0 + }, + 'regularization': L2DecayRegularizer(0.0005 * BATCH_SIZE) +} + hidden1 = layers.fc(input=image, size=128, act='relu', program=program, - init_program=init_program) + init_program=init_program, + param_attr=param_attr) hidden2 = layers.fc(input=hidden1, size=64, act='relu', program=program, - init_program=init_program) + init_program=init_program, + param_attr=param_attr) predict = layers.fc(input=hidden2, size=10, act='softmax', program=program, - init_program=init_program) + init_program=init_program, + param_attr=param_attr) label = layers.data( name='y', @@ -48,8 +63,6 @@ avg_cost = layers.mean(x=cost, program=program, init_program=init_program) sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001) opts = sgd_optimizer.minimize(avg_cost) -BATCH_SIZE = 128 - train_reader = paddle.batch( paddle.reader.shuffle( paddle.dataset.mnist.train(), buf_size=8192), diff --git a/python/paddle/v2/framework/tests/test_regularizer.py b/python/paddle/v2/framework/tests/test_regularizer.py index 06a892ada19743b444908061a98ef9d721ffaf8e..b21dceb584bdc660e48598a600f57cb6095b3802 100644 --- a/python/paddle/v2/framework/tests/test_regularizer.py +++ b/python/paddle/v2/framework/tests/test_regularizer.py @@ -39,5 +39,39 @@ class TestL2DecayRegularizer(unittest.TestCase): self.assertEqual(block.ops[-2].type, 'scale') +class TestL1DecayRegularizer(unittest.TestCase): + def test_l2decay_regularizer(self): + program = framework.Program() + block = program.global_block() + mul_x = block.create_parameter( + dtype="float32", + shape=[5, 10], + lod_level=0, + name="mul.x", + regularizer=regularizer.L1DecayRegularizer(0.5)) + self.assertTrue(mul_x.regularizer is not None) + self.assertTrue( + isinstance(mul_x.regularizer, regularizer.L1DecayRegularizer)) + mul_y = block.create_var( + dtype="float32", shape=[10, 8], lod_level=0, name="mul.y") + mul_out = block.create_var( + dtype="float32", shape=[5, 8], lod_level=0, name="mul.out") + block.append_op( + type="mul", + inputs={"X": mul_x, + "Y": mul_y}, + outputs={"Out": mul_out}, + attrs={"x_num_col_dims": 1}) + params_grads = append_backward_ops(mul_out) + self.assertEqual(len(params_grads), 1) + count_ops = len(block.ops) + params_grads = optimizer.append_regularization_ops(params_grads) + self.assertEqual(len(params_grads), 1) + self.assertEqual(len(block.ops), count_ops + 3) + self.assertEqual(block.ops[-1].type, 'elementwise_add') + self.assertEqual(block.ops[-2].type, 'scale') + self.assertEqual(block.ops[-3].type, 'sign') + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/v2/framework/tests/test_sign_op.py b/python/paddle/v2/framework/tests/test_sign_op.py new file mode 100644 index 0000000000000000000000000000000000000000..c6b59bcfd8ba71e54d4c3a2b7a3dac1f2a346265 --- /dev/null +++ b/python/paddle/v2/framework/tests/test_sign_op.py @@ -0,0 +1,22 @@ +import unittest +import numpy as np +from op_test import OpTest + + +class TestSignOp(OpTest): + def setUp(self): + self.op_type = "sign" + self.inputs = { + 'X': np.random.uniform(-10, 10, (10, 10)).astype("float32") + } + self.outputs = {'Out': np.sign(self.inputs['X'])} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X'], 'Out') + + +if __name__ == "__main__": + unittest.main()