diff --git a/benchmark/fluid/Dockerfile b/benchmark/fluid/Dockerfile index b9eaca5ee6b487bb37bb954b3c606c3096d37aeb..707fadb1fae97cefe8a41715cd57d71754abda41 100644 --- a/benchmark/fluid/Dockerfile +++ b/benchmark/fluid/Dockerfile @@ -1,11 +1,18 @@ FROM nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04 + +# Use UBUNTU_MIRROR can speed up apt-get speed. +# ARG UBUNTU_MIRROR +# RUN /bin/bash -c 'if [[ -n ${UBUNTU_MIRROR} ]]; then sed -i 's#http://archive.ubuntu.com/ubuntu#${UBUNTU_MIRROR}#g' /etc/apt/sources.list; fi' + RUN apt-get update && apt-get install -y python python-pip iputils-ping libgtk2.0-dev wget vim net-tools iftop python-opencv RUN ln -s /usr/lib/x86_64-linux-gnu/libcudnn.so.7 /usr/lib/libcudnn.so && ln -s /usr/lib/x86_64-linux-gnu/libnccl.so.2 /usr/lib/libnccl.so -RUN pip install -U pip -RUN pip install -U kubernetes paddlepaddle # IMPORTANT: # Add "ENV http_proxy=http://ip:port" if your download is slow, and don't forget to unset it at runtime. +# exmaple: unset http_proxy && unset https_proxy && python fluid_benchmark.py ... + +RUN pip install -U pip +RUN pip install -U kubernetes paddlepaddle RUN sh -c 'echo "import paddle.v2 as paddle\npaddle.dataset.cifar.train10()\npaddle.dataset.flowers.fetch()" | python' RUN sh -c 'echo "import paddle.v2 as paddle\npaddle.dataset.mnist.train()\npaddle.dataset.mnist.test()\npaddle.dataset.imdb.fetch()" | python' @@ -14,9 +21,11 @@ RUN pip uninstall -y paddlepaddle && mkdir /workspace ADD https://raw.githubusercontent.com/PaddlePaddle/cloud/develop/docker/paddle_k8s /usr/bin ADD https://raw.githubusercontent.com/PaddlePaddle/cloud/develop/docker/k8s_tools.py /root +RUN chmod +x /usr/bin/paddle_k8s ADD *.whl / -RUN pip install /*.whl && rm -f /*.whl && chmod +x /usr/bin/paddle_k8s +RUN pip install /*.whl && rm -f /*.whl ENV LD_LIBRARY_PATH=/usr/local/lib -ADD fluid_benchmark.py recordio_converter.py models/ /workspace/ +ADD fluid_benchmark.py recordio_converter.py args.py recordio_converter.py run.sh run_fluid_benchmark.sh /workspace/ +ADD models/ /workspace/models/ diff --git a/benchmark/fluid/fluid_benchmark.py b/benchmark/fluid/fluid_benchmark.py index aa70783ecd68be543b2d5aabee96a5b09bd72e6a..ece1102dce987cda994ff086b07f756498ce26e6 100644 --- a/benchmark/fluid/fluid_benchmark.py +++ b/benchmark/fluid/fluid_benchmark.py @@ -97,7 +97,7 @@ def dist_transpile(trainer_id, args): return train_program, fluid.default_startup_program() else: raise ValueError( - 'TRAINING_ROLE environment variable must be either TRAINER or PSERVER' + 'PADDLE_TRAINING_ROLE environment variable must be either TRAINER or PSERVER' ) @@ -264,8 +264,6 @@ def train_parallel(avg_loss, infer_prog, optimizer, train_reader, test_reader, break else: loss, = exe.run([avg_loss.name], feed=feeder.feed(data)) - if args.update_method == "pserver": - exe.bcast_params() if args.use_reader_op: num_samples += args.batch_size * args.gpus else: @@ -301,9 +299,18 @@ def print_train_time(start_time, end_time, num_samples): (num_samples, train_elapsed, examples_per_sec)) +def print_paddle_envs(): + print('----------- Configuration envs -----------') + for k in os.environ: + if "PADDLE_" in k: + print "ENV %s:%s" % (k, os.environ[k]) + print('------------------------------------------------') + + def main(): args = parse_args() print_arguments(args) + print_paddle_envs() # the unique trainer id, starting from 0, needed by trainer # only diff --git a/benchmark/fluid/kube_gen_job.py b/benchmark/fluid/kube_gen_job.py index 9da8a69af1d7b671b2648b1b3702776c1c0650b0..dfe8b5cdd58456902fa8ec355e9837dface3f7be 100644 --- a/benchmark/fluid/kube_gen_job.py +++ b/benchmark/fluid/kube_gen_job.py @@ -17,6 +17,7 @@ import copy import argparse import random import os +import copy from kube_templates import pserver, trainer, envs @@ -108,10 +109,9 @@ def gen_job(): tn_container["ports"][0]["containerPort"] = spreadport envs.append({"name": "PADDLE_JOB_NAME", "value": args.jobname}) - envs.append({"name": "TRAINERS", "value": str(args.trainers)}) - envs.append({"name": "PSERVERS", "value": str(args.pservers)}) + envs.append({"name": "PADDLE_TRAINERS", "value": str(args.trainers)}) + envs.append({"name": "PADDLE_PSERVERS", "value": str(args.pservers)}) envs.append({"name": "ENTRY", "value": args.entry}) - envs.append({"name": "PADDLE_INIT_PORT", "value": str(args.port)}) envs.append({"name": "PADDLE_PSERVER_PORT", "value": str(args.port)}) # NOTE: these directories below are cluster specific, please modify # this settings before you run on your own cluster. @@ -166,17 +166,23 @@ def gen_job(): tn["spec"]["template"]["spec"]["volumes"] = volumes tn_container["volumeMounts"] = volumeMounts - ps_container["env"] = envs - ps_container["env"].append({"name": "TRAINING_ROLE", "value": "PSERVER"}) + ps_container["env"] = copy.deepcopy(envs) + ps_container["env"].append({ + "name": "PADDLE_TRAINING_ROLE", + "value": "PSERVER" + }) tn_container["env"] = envs if args.disttype == "pserver": tn_container["env"].append({ - "name": "TRAINING_ROLE", + "name": "PADDLE_TRAINING_ROLE", "value": "TRAINER" }) elif args.disttype == "nccl2" or args.disttype == "local": # NCCL2 have no training role, set to plain WORKER - tn_container["env"].append({"name": "TRAINING_ROLE", "value": "WORKER"}) + tn_container["env"].append({ + "name": "PADDLE_TRAINING_ROLE", + "value": "WORKER" + }) os.mkdir(args.jobname) if args.disttype == "pserver": diff --git a/doc/fluid/api/gen_doc.sh b/doc/fluid/api/gen_doc.sh index acc8b4aa3fb258e5beef2d1e54919d429cf7ea6f..9ce6a9a7c329055a755cdb0a40c8c1c2af09a61c 100755 --- a/doc/fluid/api/gen_doc.sh +++ b/doc/fluid/api/gen_doc.sh @@ -1,7 +1,7 @@ #!/bin/bash python gen_doc.py layers --submodules control_flow device io nn ops tensor detection learning_rate_scheduler metric > layers.rst -for module in data_feeder clip metrics executor initializer io nets optimizer param_attr profiler regularizer +for module in data_feeder clip metrics executor initializer io nets optimizer param_attr profiler regularizer transpiler do python gen_doc.py ${module} > ${module}.rst done diff --git a/doc/fluid/api/transpiler.rst b/doc/fluid/api/transpiler.rst new file mode 100644 index 0000000000000000000000000000000000000000..b3535b449eb0e5ac6563256ddac3bf4a27fd8ce6 --- /dev/null +++ b/doc/fluid/api/transpiler.rst @@ -0,0 +1,46 @@ +.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` + !DO NOT EDIT THIS FILE MANUALLY! + +========== +transpiler +========== + +DistributeTranspiler +-------------------- + +.. autoclass:: paddle.fluid.transpiler.DistributeTranspiler + :members: + :noindex: + +InferenceTranspiler +------------------- + +.. autoclass:: paddle.fluid.transpiler.InferenceTranspiler + :members: + :noindex: + +memory_optimize +--------------- + +.. autofunction:: paddle.fluid.transpiler.memory_optimize + :noindex: + +release_memory +-------------- + +.. autofunction:: paddle.fluid.transpiler.release_memory + :noindex: + +HashName +-------- + +.. autoclass:: paddle.fluid.transpiler.HashName + :members: + :noindex: + +RoundRobin +---------- + +.. autoclass:: paddle.fluid.transpiler.RoundRobin + :members: + :noindex: diff --git a/doc/fluid/howto/cluster/fluid_cluster_train_cn.md b/doc/fluid/howto/cluster/fluid_cluster_train_cn.md index b99b90056b0a2e51f2668a6d27d94857bdc09c37..55326940ce7c7dbaa5bf19f1950f470527ddf4f0 100644 --- a/doc/fluid/howto/cluster/fluid_cluster_train_cn.md +++ b/doc/fluid/howto/cluster/fluid_cluster_train_cn.md @@ -168,13 +168,13 @@ cd /paddle/python/paddle/fluid/tests/book 第二步,启动Parameter Server: ```bash -PADDLE_INIT_PORT=6174 PADDLE_INIT_PSERVERS=192.168.1.2 TRAINERS=2 POD_IP=192.168.1.2 PADDLE_INIT_TRAINER_ID=1 TRAINING_ROLE=PSERVER python test_fit_a_line.py +PADDLE_PSERVER_PORT=6174 PADDLE_PSERVER_IPS=192.168.1.2 PADDLE_TRAINERS=2 PADDLE_CURRENT_IP=192.168.1.2 PADDLE_TRAINER_ID=1 PADDLE_TRAINING_ROLE=PSERVER python test_fit_a_line.py ``` 执行命令后请等待出现提示: ```Server listening on 192.168.1.2:6174 ```, 表示Paramter Server已经正常启动。 第三步,启动Trainer: ```bash -PADDLE_INIT_PORT=6174 PADDLE_INIT_PSERVERS=192.168.1.3 TRAINERS=2 POD_IP=192.168.1.3 PADDLE_INIT_TRAINER_ID=1 TRAINING_ROLE=TRAINER python test_fit_a_line.py +PADDLE_PSERVER_PORT=6174 PADDLE_PSERVER_IPS=192.168.1.3 PADDLE_TRAINERS=2 PADDLE_CURRENT_IPP=192.168.1.3 PADDLE_TRAINER_ID=1 PADDLE_TRAINING_ROLE=TRAINER python test_fit_a_line.py ``` 由于我们定义的Trainer的数量是2个,因此需要在另外一个计算节点上再启动一个Trainer。 diff --git a/doc/fluid/howto/cluster/fluid_recordio.md b/doc/fluid/howto/cluster/fluid_recordio.md index 55ce63ec193948424cd0b87f13d56b9cf6154dfc..92859e8f622d0c155128821c54252113c5016989 100644 --- a/doc/fluid/howto/cluster/fluid_recordio.md +++ b/doc/fluid/howto/cluster/fluid_recordio.md @@ -114,8 +114,8 @@ def gen_train_list(file_pattern, trainers, trainer_id): ret_list.append(f) return ret_list -trainers = int(os.getenv("TRAINERS")) -trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID")) +trainers = int(os.getenv("PADDLE_TRAINERS")) +trainer_id = int(os.getenv("PADDLE_TRAINER_ID")) data_file = fluid.layers.io.open_files( filenames=gen_train_list("./mnist-[0-9]*.recordio", 2, 0), thread_num=1, diff --git a/doc/fluid/howto/inference/build_and_install_lib_cn.rst b/doc/fluid/howto/inference/build_and_install_lib_cn.rst index c8d9992fcc92c25f8c14f71c79bde9f79fd92b1f..84005b54e07cf810649370d2c1f6b6c522434bf6 100644 --- a/doc/fluid/howto/inference/build_and_install_lib_cn.rst +++ b/doc/fluid/howto/inference/build_and_install_lib_cn.rst @@ -13,6 +13,7 @@ cpu_noavx_openblas `fluid.tgz `_ cuda8.0_cudnn5_avx_mkl `fluid.tgz `_ cuda8.0_cudnn7_avx_mkl `fluid.tgz `_ +cuda9.0_cudnn7_avx_mkl `fluid.tgz `_ ====================== ======================================== 从源码编译 diff --git a/paddle/contrib/inference/demo/simple_on_word2vec.cc b/paddle/contrib/inference/demo/simple_on_word2vec.cc index 192a6414260ce06048b8c765402d89882cabc51b..2a4bfc87069b9fd8ece58dde210a6cb8344da536 100644 --- a/paddle/contrib/inference/demo/simple_on_word2vec.cc +++ b/paddle/contrib/inference/demo/simple_on_word2vec.cc @@ -40,10 +40,9 @@ void Main(bool use_gpu) { //# 2. Prepare input. int64_t data[4] = {1, 2, 3, 4}; - PaddleBuf buf{.data = data, .length = sizeof(data)}; PaddleTensor tensor{.name = "", .shape = std::vector({4, 1}), - .data = buf, + .data = PaddleBuf(data, sizeof(data)), .dtype = PaddleDType::INT64}; // For simplicity, we set all the slots with the same data. @@ -55,14 +54,12 @@ void Main(bool use_gpu) { //# 4. Get output. ASSERT_EQ(outputs.size(), 1UL); - LOG(INFO) << "output buffer size: " << outputs.front().data.length; - const size_t num_elements = outputs.front().data.length / sizeof(float); + LOG(INFO) << "output buffer size: " << outputs.front().data.length(); + const size_t num_elements = outputs.front().data.length() / sizeof(float); // The outputs' buffers are in CPU memory. for (size_t i = 0; i < std::min(5UL, num_elements); i++) { - LOG(INFO) << static_cast(outputs.front().data.data)[i]; + LOG(INFO) << static_cast(outputs.front().data.data())[i]; } - // TODO(Superjomn): this is should be free automatically - free(outputs[0].data.data); } } @@ -86,10 +83,9 @@ void MainThreads(int num_threads, bool use_gpu) { for (int batch_id = 0; batch_id < num_batches; ++batch_id) { // 2. Dummy Input Data int64_t data[4] = {1, 2, 3, 4}; - PaddleBuf buf{.data = data, .length = sizeof(data)}; PaddleTensor tensor{.name = "", .shape = std::vector({4, 1}), - .data = buf, + .data = PaddleBuf(data, sizeof(data)), .dtype = PaddleDType::INT64}; std::vector inputs(4, tensor); std::vector outputs; @@ -99,13 +95,13 @@ void MainThreads(int num_threads, bool use_gpu) { // 4. Get output. ASSERT_EQ(outputs.size(), 1UL); LOG(INFO) << "TID: " << tid << ", " - << "output buffer size: " << outputs.front().data.length; - const size_t num_elements = outputs.front().data.length / sizeof(float); + << "output buffer size: " << outputs.front().data.length(); + const size_t num_elements = + outputs.front().data.length() / sizeof(float); // The outputs' buffers are in CPU memory. for (size_t i = 0; i < std::min(5UL, num_elements); i++) { - LOG(INFO) << static_cast(outputs.front().data.data)[i]; + LOG(INFO) << static_cast(outputs.front().data.data())[i]; } - free(outputs[0].data.data); } }); } diff --git a/paddle/contrib/inference/paddle_inference_api.cc b/paddle/contrib/inference/paddle_inference_api.cc index d67e1e7667800d6dd00cb8915b0d6dc7c664970b..dc2842ae0eeb5592b6d4571b70df162886aee7a2 100644 --- a/paddle/contrib/inference/paddle_inference_api.cc +++ b/paddle/contrib/inference/paddle_inference_api.cc @@ -13,3 +13,53 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/contrib/inference/paddle_inference_api.h" + +namespace paddle { + +PaddleBuf::PaddleBuf(PaddleBuf&& other) + : data_(other.data_), + length_(other.length_), + memory_owned_(other.memory_owned_) { + other.memory_owned_ = false; + other.data_ = nullptr; + other.length_ = 0; +} + +PaddleBuf::PaddleBuf(const PaddleBuf& other) { *this = other; } + +PaddleBuf& PaddleBuf::operator=(const PaddleBuf& other) { + // only the buffer with external memory can be copied + assert(!other.memory_owned_); + data_ = other.data_; + length_ = other.length_; + memory_owned_ = other.memory_owned_; + return *this; +} + +void PaddleBuf::Resize(size_t length) { + // Only the owned memory can be reset, the external memory can't be changed. + if (length_ == length) return; + assert(memory_owned_); + Free(); + data_ = new char[length]; + length_ = length; + memory_owned_ = true; +} + +void PaddleBuf::Reset(void* data, size_t length) { + Free(); + memory_owned_ = false; + data_ = data; + length_ = length; +} + +void PaddleBuf::Free() { + if (memory_owned_ && data_) { + assert(length_ > 0); + delete static_cast(data_); + data_ = nullptr; + length_ = 0; + } +} + +} // namespace paddle \ No newline at end of file diff --git a/paddle/contrib/inference/paddle_inference_api.h b/paddle/contrib/inference/paddle_inference_api.h index 77e2d77b6b7fe3eeed865c8de0818d059cfa6c6e..bd4530fcf9518cb3bf06179d8f60a1dde38ff7dd 100644 --- a/paddle/contrib/inference/paddle_inference_api.h +++ b/paddle/contrib/inference/paddle_inference_api.h @@ -21,6 +21,7 @@ limitations under the License. */ #pragma once +#include #include #include #include @@ -32,12 +33,38 @@ enum PaddleDType { INT64, }; -struct PaddleBuf { - void* data; // pointer to the data memory. - size_t length; // number of memory bytes. +class PaddleBuf { + public: + PaddleBuf() = default; + PaddleBuf(PaddleBuf&& other); + // Copy only available when memory is managed externally. + explicit PaddleBuf(const PaddleBuf&); + PaddleBuf& operator=(const PaddleBuf&); + // Do not own the memory. + PaddleBuf(void* data, size_t length) + : data_(data), length_(length), memory_owned_{false} {} + // Own memory. + PaddleBuf(size_t length) + : data_(new char[length]), length_(length), memory_owned_(true) {} + // Resize to `length` bytes. + void Resize(size_t length); + // Reset to external memory. + void Reset(void* data, size_t length); + bool empty() const { return length_ == 0; } + void* data() const { return data_; } + size_t length() const { return length_; } + + ~PaddleBuf() { Free(); } + + private: + void Free(); + void* data_{nullptr}; // pointer to the data memory. + size_t length_{0}; // number of memory bytes. + bool memory_owned_{true}; }; struct PaddleTensor { + PaddleTensor() = default; std::string name; // variable name. std::vector shape; // TODO(Superjomn) for LoD support, add a vector> field if needed. @@ -67,8 +94,9 @@ class PaddlePredictor { // Predict an record. // The caller should be responsible for allocating and releasing the memory of - // `inputs`. `inputs` should be alive until Run returns. caller should be - // responsible for releasing the memory of `output_data`. + // `inputs`. `inputs` should be available until Run returns. Caller should be + // responsible for the output tensor's buffer, either allocated or passed from + // outside. virtual bool Run(const std::vector& inputs, std::vector* output_data) = 0; diff --git a/paddle/contrib/inference/paddle_inference_api_anakin_engine.cc b/paddle/contrib/inference/paddle_inference_api_anakin_engine.cc index 5bafc58fa53f7d99de571f66b6224f0f2de66e32..ba2d30314715a57c5ab85e5ae1d8ac0512bbc74f 100644 --- a/paddle/contrib/inference/paddle_inference_api_anakin_engine.cc +++ b/paddle/contrib/inference/paddle_inference_api_anakin_engine.cc @@ -48,7 +48,7 @@ bool PaddleInferenceAnakinPredictor::Run( auto d_tensor_in_p = executor_.get_in(input.name); float *d_data_p = d_tensor_in_p->mutable_data(); if (cudaMemcpy(d_data_p, - static_cast(input.data.data), + static_cast(input.data.data()), d_tensor_in_p->valid_size() * sizeof(float), cudaMemcpyHostToDevice) != 0) { LOG(ERROR) << "copy data from CPU to GPU error"; @@ -65,8 +65,11 @@ bool PaddleInferenceAnakinPredictor::Run( for (auto &output : *output_data) { auto *tensor = executor_.get_out(output.name); output.shape = tensor->shape(); + if (output.data.length() < tensor->valid_size() * sizeof(float)) { + output.data.Resize(tensor->valid_size() * sizeof(float)); + } // Copy data from GPU -> CPU - if (cudaMemcpy(output.data.data, + if (cudaMemcpy(output.data.data(), tensor->mutable_data(), tensor->valid_size() * sizeof(float), cudaMemcpyDeviceToHost) != 0) { diff --git a/paddle/contrib/inference/paddle_inference_api_anakin_engine_tester.cc b/paddle/contrib/inference/paddle_inference_api_anakin_engine_tester.cc index 1d41a5c73e75723f8614d810eae09ed8cdc8cf2b..f92e9d4190412f5847e353ef1dc0324cad668c9a 100644 --- a/paddle/contrib/inference/paddle_inference_api_anakin_engine_tester.cc +++ b/paddle/contrib/inference/paddle_inference_api_anakin_engine_tester.cc @@ -37,28 +37,26 @@ TEST(inference, anakin) { float data[1 * 3 * 224 * 224] = {1.0f}; - PaddleBuf buf{.data = data, .length = sizeof(data)}; PaddleTensor tensor{.name = "input_0", .shape = std::vector({1, 3, 224, 224}), - .data = buf, + .data = PaddleBuf(data, sizeof(data)), .dtype = PaddleDType::FLOAT32}; // For simplicity, we set all the slots with the same data. - std::vector paddle_tensor_feeds(1, tensor); + std::vector paddle_tensor_feeds; + paddle_tensor_feeds.emplace_back(std::move(tensor)); - float data_out[1000]; - - PaddleBuf buf_out{.data = data_out, .length = sizeof(data)}; PaddleTensor tensor_out{.name = "prob_out", .shape = std::vector({1000, 1}), - .data = buf_out, + .data = PaddleBuf(), .dtype = PaddleDType::FLOAT32}; - std::vector outputs(1, tensor_out); + std::vector outputs; + outputs.emplace_back(std::move(tensor_out)); ASSERT_TRUE(predictor->Run(paddle_tensor_feeds, &outputs)); - float* data_o = static_cast(outputs[0].data.data); + float* data_o = static_cast(outputs[0].data.data()); for (size_t j = 0; j < 1000; ++j) { LOG(INFO) << "output[" << j << "]: " << data_o[j]; } diff --git a/paddle/contrib/inference/paddle_inference_api_impl.cc b/paddle/contrib/inference/paddle_inference_api_impl.cc index bda2981a14482e2c4a29773d37b074506cc344b1..d9129a704bc289ce1d416474537fc9234a07e5b8 100644 --- a/paddle/contrib/inference/paddle_inference_api_impl.cc +++ b/paddle/contrib/inference/paddle_inference_api_impl.cc @@ -178,8 +178,8 @@ bool NativePaddlePredictor::SetFeed(const std::vector &inputs, // TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy. std::memcpy(static_cast(input_ptr), - inputs[i].data.data, - inputs[i].data.length); + inputs[i].data.data(), + inputs[i].data.length()); feeds->push_back(input); } return true; @@ -241,10 +241,11 @@ bool NativePaddlePredictor::GetFetch( } outputs->at(i).shape = shape; - outputs->at(i).data.length = sizeof(float) * data.size(); - outputs->at(i).data.data = malloc(outputs->at(i).data.length); - std::memcpy( - outputs->at(i).data.data, data.data(), outputs->at(i).data.length); + auto &buffer = outputs->at(i).data; + if (buffer.empty() || buffer.length() < sizeof(float) * data.size()) { + buffer.Resize(sizeof(float) * data.size()); + } + std::memcpy(buffer.data(), data.data(), buffer.length()); outputs->at(i).dtype = PaddleDType::FLOAT32; // TODO(panyx0718): support other types? fill tensor name? avoid a copy. } diff --git a/paddle/contrib/inference/test_paddle_inference_api_impl.cc b/paddle/contrib/inference/test_paddle_inference_api_impl.cc index 5d843010e02b09087e6b328428e80fb40eb5bb97..88c4e665a3daed0ed34b23b75d360acbd586401f 100644 --- a/paddle/contrib/inference/test_paddle_inference_api_impl.cc +++ b/paddle/contrib/inference/test_paddle_inference_api_impl.cc @@ -27,13 +27,12 @@ namespace paddle { PaddleTensor LodTensorToPaddleTensor(framework::LoDTensor* t) { PaddleTensor pt; - pt.data.data = t->data(); if (t->type() == typeid(int64_t)) { - pt.data.length = t->numel() * sizeof(int64_t); + pt.data.Reset(t->data(), t->numel() * sizeof(int64_t)); pt.dtype = PaddleDType::INT64; } else if (t->type() == typeid(float)) { - pt.data.length = t->numel() * sizeof(float); + pt.data.Reset(t->data(), t->numel() * sizeof(float)); pt.dtype = PaddleDType::FLOAT32; } else { LOG(FATAL) << "unsupported type."; @@ -79,8 +78,8 @@ void MainWord2Vec(bool use_gpu) { std::vector outputs; ASSERT_TRUE(predictor->Run(paddle_tensor_feeds, &outputs)); ASSERT_EQ(outputs.size(), 1UL); - size_t len = outputs[0].data.length; - float* data = static_cast(outputs[0].data.data); + size_t len = outputs[0].data.length(); + float* data = static_cast(outputs[0].data.data()); for (size_t j = 0; j < len / sizeof(float); ++j) { ASSERT_LT(data[j], 1.0); ASSERT_GT(data[j], -1.0); @@ -103,8 +102,6 @@ void MainWord2Vec(bool use_gpu) { EXPECT_LT(lod_data[i] - data[i], 1e-3); EXPECT_GT(lod_data[i] - data[i], -1e-3); } - - free(outputs[0].data.data); } void MainImageClassification(bool use_gpu) { @@ -143,13 +140,12 @@ void MainImageClassification(bool use_gpu) { std::vector outputs; ASSERT_TRUE(predictor->Run(paddle_tensor_feeds, &outputs)); ASSERT_EQ(outputs.size(), 1UL); - size_t len = outputs[0].data.length; - float* data = static_cast(outputs[0].data.data); + size_t len = outputs[0].data.length(); + float* data = static_cast(outputs[0].data.data()); float* lod_data = output1.data(); for (size_t j = 0; j < len / sizeof(float); ++j) { EXPECT_NEAR(lod_data[j], data[j], 1e-3); } - free(data); } void MainThreadsWord2Vec(bool use_gpu) { @@ -192,8 +188,8 @@ void MainThreadsWord2Vec(bool use_gpu) { // check outputs range ASSERT_EQ(local_outputs.size(), 1UL); - const size_t len = local_outputs[0].data.length; - float* data = static_cast(local_outputs[0].data.data); + const size_t len = local_outputs[0].data.length(); + float* data = static_cast(local_outputs[0].data.data()); for (size_t j = 0; j < len / sizeof(float); ++j) { ASSERT_LT(data[j], 1.0); ASSERT_GT(data[j], -1.0); @@ -205,7 +201,6 @@ void MainThreadsWord2Vec(bool use_gpu) { for (int i = 0; i < refs[tid].numel(); ++i) { EXPECT_NEAR(ref_data[i], data[i], 1e-3); } - free(data); }); } for (int i = 0; i < num_jobs; ++i) { @@ -251,14 +246,13 @@ void MainThreadsImageClassification(bool use_gpu) { // check outputs correctness ASSERT_EQ(local_outputs.size(), 1UL); - const size_t len = local_outputs[0].data.length; - float* data = static_cast(local_outputs[0].data.data); + const size_t len = local_outputs[0].data.length(); + float* data = static_cast(local_outputs[0].data.data()); float* ref_data = refs[tid].data(); EXPECT_EQ(refs[tid].numel(), len / sizeof(float)); for (int i = 0; i < refs[tid].numel(); ++i) { EXPECT_NEAR(ref_data[i], data[i], 1e-3); } - free(data); }); } for (int i = 0; i < num_jobs; ++i) { diff --git a/paddle/fluid/framework/executor.cc b/paddle/fluid/framework/executor.cc index b30a9806eb19ee12d2a70afe3ca806224b0f75d6..f1eccb351ef68697c748814efb2987041b0da8d9 100644 --- a/paddle/fluid/framework/executor.cc +++ b/paddle/fluid/framework/executor.cc @@ -321,7 +321,8 @@ std::vector> Executor::Prepare( } void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope, - bool create_local_scope, bool create_vars) { + bool create_local_scope, bool create_vars, + bool keep_kids) { Scope* local_scope = scope; if (create_vars) { if (create_local_scope) { @@ -344,12 +345,20 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope, } } platform::DeviceContextPool::Instance().Get(place_)->Wait(); - if (create_vars && create_local_scope) { + if (local_scope != scope) { scope->DeleteScope(local_scope); } else { - // Delete the local scopes created in operators. - scope->DropKids(); + if (!keep_kids) { + // By default, we should delete all kid scopes after run executor because + // some operators may create local scope when running, such as while_op. + // But when while_op also create a local executor to run it's sub block, + // the sub scopes it created should not be dropped immediately, because + // while_grad_op will use some variables created during while_op run, so + // we need to keep the kids and wait for the outer executor to drop them. + scope->DropKids(); + } } + if (FLAGS_benchmark) { VLOG(2) << "-------------------------------------------------------"; VLOG(2) << "Memory used after deleting local scope: " diff --git a/paddle/fluid/framework/executor.h b/paddle/fluid/framework/executor.h index 67a0761dac2a9adcdd0ce2b218c4aa505d688d56..3aa5ffef69cd29681f248e915575c5715ad0d3fa 100644 --- a/paddle/fluid/framework/executor.h +++ b/paddle/fluid/framework/executor.h @@ -78,7 +78,7 @@ class Executor { void RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope, bool create_local_scope = true, - bool create_vars = true); + bool create_vars = true, bool keep_kids = false); void RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope, std::map* feed_targets, diff --git a/paddle/fluid/inference/analysis/tensorrt_subgraph_pass.cc b/paddle/fluid/inference/analysis/tensorrt_subgraph_pass.cc index b75df33b71311acd0e626e5a13c18469b19ef136..c7f40d43c922a328febd343cea7240fcb09f3d02 100644 --- a/paddle/fluid/inference/analysis/tensorrt_subgraph_pass.cc +++ b/paddle/fluid/inference/analysis/tensorrt_subgraph_pass.cc @@ -27,7 +27,7 @@ void TensorRTSubGraphPass::Run(DataFlowGraph *graph) { SubGraphFuse(graph, node_inside_subgraph_teller_); } -} // analysis -} // inference +} // namespace analysis +} // namespace inference -} // paddle +} // namespace paddle diff --git a/paddle/fluid/operators/activation_op.cc b/paddle/fluid/operators/activation_op.cc index b6b498a616c22898bcf99ada6f150a4b5a9bd54c..286b03d7b7d11a50f33f0190c1a5b9097ed0f4a2 100644 --- a/paddle/fluid/operators/activation_op.cc +++ b/paddle/fluid/operators/activation_op.cc @@ -143,7 +143,7 @@ $$out = \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$ __attribute__((unused)) constexpr char TanhShrinkDoc[] = R"DOC( TanhShrink Activation Operator. -$$out = x - \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$ +$$out = x - \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$ )DOC"; @@ -385,7 +385,7 @@ class STanhOpMaker : public framework::OpProtoAndCheckerMaker { AddComment(R"DOC( STanh Activation Operator. -$$out = b * \frac{e^{a * x} - e^{-a * x}}{e^{a * x} + e^{-a * x}}$$ +$$out = b * \\frac{e^{a * x} - e^{-a * x}}{e^{a * x} + e^{-a * x}}$$ )DOC"); } diff --git a/paddle/fluid/operators/batch_norm_mkldnn_op.cc b/paddle/fluid/operators/batch_norm_mkldnn_op.cc index 8206cc9890160da756efb13c991020f09b20126a..cc158e57f7140c84f02bc7e091d8eac0d2b672e1 100644 --- a/paddle/fluid/operators/batch_norm_mkldnn_op.cc +++ b/paddle/fluid/operators/batch_norm_mkldnn_op.cc @@ -21,8 +21,6 @@ namespace operators { using batch_norm_bwd = mkldnn::batch_normalization_backward; using batch_norm_fwd = mkldnn::batch_normalization_forward; -using framework::DataLayout; -using framework::Tensor; using mkldnn::memory; using mkldnn::primitive; using mkldnn::reorder; @@ -31,18 +29,6 @@ using paddle::platform::MKLDNNDeviceContext; using paddle::platform::MKLDNNMemDesc; using platform::to_void_cast; -template -using EigenArrayMap = - Eigen::Map>; -template -using ConstEigenArrayMap = - Eigen::Map>; -template -using EigenVectorArrayMap = Eigen::Map>; -template -using ConstEigenVectorArrayMap = - Eigen::Map>; - namespace { template struct bn_type_traits { diff --git a/paddle/fluid/operators/batch_norm_op.cc b/paddle/fluid/operators/batch_norm_op.cc index 625ca2d7c4c70d1098b0fb28380d8d1eb24cb338..52b0bf85c07fee380f9e7ba1c703b56367628644 100644 --- a/paddle/fluid/operators/batch_norm_op.cc +++ b/paddle/fluid/operators/batch_norm_op.cc @@ -22,22 +22,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; -using LoDTensor = framework::LoDTensor; -using DataLayout = framework::DataLayout; - -template -using EigenArrayMap = - Eigen::Map>; -template -using ConstEigenArrayMap = - Eigen::Map>; -template -using EigenVectorArrayMap = Eigen::Map>; -template -using ConstEigenVectorArrayMap = - Eigen::Map>; - class BatchNormOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/batch_norm_op.h b/paddle/fluid/operators/batch_norm_op.h index 9e5fc41598f29336074335f3624a2300ad018d09..5e3d630d6889e445c5e84fa836d2d81bb7266779 100644 --- a/paddle/fluid/operators/batch_norm_op.h +++ b/paddle/fluid/operators/batch_norm_op.h @@ -19,6 +19,22 @@ limitations under the License. */ namespace paddle { namespace operators { +using Tensor = framework::Tensor; +using LoDTensor = framework::LoDTensor; +using DataLayout = framework::DataLayout; + +template +using EigenArrayMap = + Eigen::Map>; +template +using ConstEigenArrayMap = + Eigen::Map>; +template +using EigenVectorArrayMap = Eigen::Map>; +template +using ConstEigenVectorArrayMap = + Eigen::Map>; + template class BatchNormKernel : public framework::OpKernel { public: diff --git a/paddle/fluid/operators/bilinear_interp_op.cc b/paddle/fluid/operators/bilinear_interp_op.cc index 2572e813d656353a2187c29da89266733a32f3ce..2dc3399da183fbcf7664066f6f7ce12db3dc6d5e 100644 --- a/paddle/fluid/operators/bilinear_interp_op.cc +++ b/paddle/fluid/operators/bilinear_interp_op.cc @@ -110,6 +110,7 @@ REGISTER_OPERATOR(bilinear_interp, ops::BilinearInterpOp, ops::BilinearInterpOpMaker, paddle::framework::DefaultGradOpDescMaker); REGISTER_OPERATOR(bilinear_interp_grad, ops::BilinearInterpOpGrad); -REGISTER_OP_CPU_KERNEL(bilinear_interp, ops::BilinearInterpKernel); +REGISTER_OP_CPU_KERNEL(bilinear_interp, ops::BilinearInterpKernel, + ops::BilinearInterpKernel); REGISTER_OP_CPU_KERNEL(bilinear_interp_grad, ops::BilinearInterpGradKernel); diff --git a/paddle/fluid/operators/bilinear_interp_op.h b/paddle/fluid/operators/bilinear_interp_op.h index 8b03cd5a0635584a45782fe5a4823c37fe4fa8e8..70847cb8c1abe2e94bc844ab8117d1f23fea533b 100644 --- a/paddle/fluid/operators/bilinear_interp_op.h +++ b/paddle/fluid/operators/bilinear_interp_op.h @@ -46,8 +46,10 @@ class BilinearInterpKernel : public framework::OpKernel { int in_chw = channels * in_hw; int out_chw = channels * out_hw; - T ratio_h = (out_h > 1) ? static_cast(in_h - 1) / (out_h - 1) : 0.f; - T ratio_w = (out_w > 1) ? static_cast(in_w - 1) / (out_w - 1) : 0.f; + float ratio_h = + (out_h > 1) ? static_cast(in_h - 1) / (out_h - 1) : 0.f; + float ratio_w = + (out_w > 1) ? static_cast(in_w - 1) / (out_w - 1) : 0.f; if (in_h == out_h && in_w == out_w) { memcpy(output, input, input_t->numel() * sizeof(T)); @@ -56,24 +58,24 @@ class BilinearInterpKernel : public framework::OpKernel { for (int i = 0; i < out_h; ++i) { // loop for images int h = ratio_h * i; int hid = (h < in_h - 1) ? 1 : 0; - T h1lambda = ratio_h * i - h; - T h2lambda = 1 - h1lambda; + float h1lambda = ratio_h * i - h; + float h2lambda = 1.f - h1lambda; for (int j = 0; j < out_w; ++j) { int w = ratio_w * j; int wid = (w < in_w - 1) ? 1 : 0; - T w1lambda = ratio_w * j - w; - T w2lambda = 1 - w1lambda; + float w1lambda = ratio_w * j - w; + float w2lambda = 1.f - w1lambda; // calculate four position for bilinear interpolation const T* in_pos = &input[k * in_chw + h * in_w + w]; T* out_pos = &output[k * out_chw + i * out_w + j]; for (int c = 0; c < channels; ++c) { // loop for channels // bilinear interpolation - out_pos[0] = + out_pos[0] = static_cast( h2lambda * (w2lambda * in_pos[0] + w1lambda * in_pos[wid]) + h1lambda * (w2lambda * in_pos[hid * in_w] + - w1lambda * in_pos[hid * in_w + wid]); + w1lambda * in_pos[hid * in_w + wid])); in_pos += in_hw; out_pos += out_hw; } @@ -117,8 +119,10 @@ class BilinearInterpGradKernel : public framework::OpKernel { int in_chw = channels * in_hw; int out_chw = channels * out_hw; - T ratio_h = (out_h > 1) ? static_cast(in_h - 1) / (out_h - 1) : 0.f; - T ratio_w = (out_w > 1) ? static_cast(in_w - 1) / (out_w - 1) : 0.f; + float ratio_h = + (out_h > 1) ? static_cast(in_h - 1) / (out_h - 1) : 0.f; + float ratio_w = + (out_w > 1) ? static_cast(in_w - 1) / (out_w - 1) : 0.f; if (in_h == out_h && in_w == out_w) { memcpy(d_input, d_output, d_input_t->numel() * sizeof(T)); @@ -127,22 +131,24 @@ class BilinearInterpGradKernel : public framework::OpKernel { for (int i = 0; i < out_h; ++i) { // loop for images int h = ratio_h * i; int hid = (h < in_h - 1) ? 1 : 0; - T h1lambda = ratio_h * i - h; - T h2lambda = 1 - h1lambda; + float h1lambda = ratio_h * i - h; + float h2lambda = 1 - h1lambda; for (int j = 0; j < out_w; ++j) { int w = ratio_w * j; int wid = (w < in_w - 1) ? 1 : 0; - T w1lambda = ratio_w * j - w; - T w2lambda = 1 - w1lambda; + float w1lambda = ratio_w * j - w; + float w2lambda = 1 - w1lambda; T* in_pos = &d_input[k * in_chw + h * in_w + w]; const T* out_pos = &d_output[k * out_chw + i * out_w + j]; for (int c = 0; c < channels; ++c) { // loop for channels - in_pos[0] += h2lambda * w2lambda * out_pos[0]; - in_pos[wid] += h2lambda * w1lambda * out_pos[0]; - in_pos[hid * in_w] += h1lambda * w2lambda * out_pos[0]; - in_pos[hid * in_w + wid] += h1lambda * w1lambda * out_pos[0]; + in_pos[0] += static_cast(h2lambda * w2lambda * out_pos[0]); + in_pos[wid] += static_cast(h2lambda * w1lambda * out_pos[0]); + in_pos[hid * in_w] += + static_cast(h1lambda * w2lambda * out_pos[0]); + in_pos[hid * in_w + wid] += + static_cast(h1lambda * w1lambda * out_pos[0]); in_pos += in_hw; out_pos += out_hw; } diff --git a/paddle/fluid/operators/logical_op.cc b/paddle/fluid/operators/logical_op.cc index db109f5cd053d84718ac85bd4693ecece12ce172..26970db8d2af62bb06fce4eb1a1f21fd41617bd1 100644 --- a/paddle/fluid/operators/logical_op.cc +++ b/paddle/fluid/operators/logical_op.cc @@ -146,6 +146,6 @@ REGISTER_UNARY_LOGICAL_OP(logical_not, "$$Out = !X$$"); REGISTER_UNARY_LOGICAL_KERNEL(logical_not, CPU, paddle::operators::LogicalNotFunctor); REGISTER_BINARY_LOGICAL_OP(logical_xor, - "$$Out = (X || Y) \\, \\&\\& \\, !(X \\&\\& Y)$$"); + "$$Out = (X || Y) \\&\\& !(X \\&\\& Y)$$"); REGISTER_BINARY_LOGICAL_KERNEL(logical_xor, CPU, paddle::operators::LogicalXorFunctor); diff --git a/paddle/fluid/operators/math/concat.cu b/paddle/fluid/operators/math/concat.cu index 6205f3cd85c7201e923eacad4319e2c6fda689fe..5863d74fca21de8b77bc208fb95d8fd52562f7a7 100644 --- a/paddle/fluid/operators/math/concat.cu +++ b/paddle/fluid/operators/math/concat.cu @@ -209,7 +209,7 @@ class ConcatGradFunctor { outputs_cols[0] = 0; for (int i = 0; i < o_num; ++i) { - int t_col = outputs->at(i)->numel() / out_row; + int t_col = ref_inputs.at(i)->numel() / out_row; if (sameShape) { if (t_col != out0_col) sameShape = false; } diff --git a/paddle/fluid/operators/math/math_function.cc b/paddle/fluid/operators/math/math_function.cc index d39154c6f88d6d17c1719eb9a5b048211f4bb52b..c3387be6daa3bd34a6e3410ced23fce5d65f2cf7 100644 --- a/paddle/fluid/operators/math/math_function.cc +++ b/paddle/fluid/operators/math/math_function.cc @@ -30,6 +30,7 @@ template struct SetConstant; template struct SetConstant; template struct SetConstant; template struct SetConstant; +template struct SetConstant; #define DEFINE_CPU_TRANS(RANK) \ template struct Transpose +#include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" #include "paddle/fluid/inference/tensorrt/engine.h" #include "paddle/fluid/inference/utils/singleton.h" +#include "paddle/fluid/operators/tensorrt_engine_op.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/tensorrt_engine_op.h b/paddle/fluid/operators/tensorrt_engine_op.h index 8455d24ddf47382b235edda10cb9b2e8934c5f06..295d6ba0395b68cabab3bd4117cedd912df48f5d 100644 --- a/paddle/fluid/operators/tensorrt_engine_op.h +++ b/paddle/fluid/operators/tensorrt_engine_op.h @@ -16,10 +16,12 @@ #ifdef PADDLE_WITH_CUDA +#include +#include + #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/inference/analysis/helper.h" #include "paddle/fluid/inference/tensorrt/engine.h" -#include "paddle/fluid/inference/tensorrt/engine.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/tensorrt_engine_op_test.cc b/paddle/fluid/operators/tensorrt_engine_op_test.cc index 3a2fef48052ae3943abad14bf87c14ca79251c94..358e2d151bb8f990503ea8a51ba5f81e0a1dc816 100644 --- a/paddle/fluid/operators/tensorrt_engine_op_test.cc +++ b/paddle/fluid/operators/tensorrt_engine_op_test.cc @@ -179,7 +179,6 @@ void Execute(int batch_size, int input_dim, int output_dim, int nlayers = 1) { const std::string& z_name, bool x_created, const shape_t& x_shape, const shape_t& y_shape, const shape_t& z_shape) { - LOG(INFO) << "create fc op"; auto* fc = block_desc.AppendOp(); fc->SetType("mul"); diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 74036bcb3114df8fc4613bd9f4dc327463397dba..dc02c6632e2a5265daf0c2f9949bdb94beec4232 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -159,6 +159,11 @@ PYBIND11_PLUGIN(core) { new (&instance) LoDTensor(new_offset_lod); }) .def("__init__", [](LoDTensor &instance) { new (&instance) LoDTensor(); }) + // We implement offset based LOD in C++ while we use length based with + // Python API. So we changed set_lod to set_recursive_sequence_lengths to + // avoid misuse. + // The discussion is here: + // https://github.com/PaddlePaddle/Paddle/issues/10855 .def("set_lod", [](LoDTensor &self, const std::vector> &lod) { // the input lod is offset-based level-of-detail info @@ -199,6 +204,7 @@ PYBIND11_PLUGIN(core) { std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod)); return new_lod; }) + // Set above comments of set_lod. .def("recursive_sequence_lengths", [](LoDTensor &self) -> std::vector> { // output the length-based lod info diff --git a/paddle/fluid/pybind/tensor_py.h b/paddle/fluid/pybind/tensor_py.h index 93b09ed6922b32a5531224acc470daf0d97f95bd..6da3846ac69980daac4f0fb7401b2573c21c89bf 100644 --- a/paddle/fluid/pybind/tensor_py.h +++ b/paddle/fluid/pybind/tensor_py.h @@ -97,7 +97,7 @@ struct CastToPyBufferImpl { inline pybind11::buffer_info CastToPyBuffer(const framework::Tensor &tensor) { auto buffer_info = details::CastToPyBufferImpl()(tensor); + uint8_t, platform::float16>()(tensor); return buffer_info; } diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py index 5af5bc9c4731317075b3912a4749a0b358bdd56e..45af83708ea63fc1b6aa86f1e8423bb44b7388a6 100644 --- a/python/paddle/fluid/__init__.py +++ b/python/paddle/fluid/__init__.py @@ -44,7 +44,7 @@ import metrics import transpiler from param_attr import ParamAttr, WeightNormParamAttr from data_feeder import DataFeeder -from core import LoDTensor, CPUPlace, CUDAPlace, CUDAPinnedPlace +from core import LoDTensor, CPUPlace, CUDAPlace, CUDAPinnedPlace, Scope from transpiler import DistributeTranspiler, InferenceTranspiler, \ memory_optimize, release_memory from concurrency import (Go, make_channel, channel_send, channel_recv, @@ -83,6 +83,7 @@ __all__ = framework.__all__ + executor.__all__ + concurrency.__all__ + \ 'profiler', 'unique_name', 'recordio_writer', + 'Scope', ] diff --git a/python/paddle/fluid/average.py b/python/paddle/fluid/average.py index 6abe8233b07c484494848c566e9898600a7d8f5c..358e24df31bb517604481bb48b9180e579f8460d 100644 --- a/python/paddle/fluid/average.py +++ b/python/paddle/fluid/average.py @@ -36,6 +36,25 @@ def _is_number_or_matrix_(var): class WeightedAverage(object): + """ + Calculate weighted average. + + The average calculating is accomplished via Python totally. + They do not change Paddle's Program, nor do anything to + modify NN model's configuration. They are completely + wrappers of Python functions. + + Examples: + .. code-block:: python + avg = fluid.average.WeightedAverage() + avg.add(value=2.0, weight=1) + avg.add(value=4.0, weight=2) + avg.eval() + + # The result is 3.333333333. + # For (2.0 * 1 + 4.0 * 2) / (1 + 2) = 3.333333333 + """ + def __init__(self): warnings.warn( "The %s is deprecated, please use fluid.metrics.Accuracy instead." % diff --git a/python/paddle/fluid/backward.py b/python/paddle/fluid/backward.py index 19c9b2fad4446de282f6f579914f63e0737ae604..4faa06303170488d0de2fda4c1461cfe2d623d35 100644 --- a/python/paddle/fluid/backward.py +++ b/python/paddle/fluid/backward.py @@ -147,7 +147,7 @@ def _addup_repetitive_outputs_(op_descs): else: if len(renamed_vars[var_name]) == 1: new_name = var_name + "@RENAME@" + \ - str(var_rename_count[var_name]) + str(var_rename_count[var_name]) var_rename_count[var_name] += 1 # rename original var_name renamed_vars[var_name][0] = new_name @@ -155,7 +155,7 @@ def _addup_repetitive_outputs_(op_descs): _rename_arg_(pending_sum_ops, var_name, new_name) new_name = var_name + "@RENAME@" + \ - str(var_rename_count[var_name]) + str(var_rename_count[var_name]) var_rename_count[var_name] += 1 op_desc.rename_output(var_name, new_name) renamed_vars[var_name].append(new_name) @@ -435,18 +435,65 @@ def _get_stop_gradients_(program): def append_backward(loss, parameter_list=None, no_grad_set=None, callbacks=None): """ - Append backward part to main_program + Append backward part to main_program. - Args: - loss(Variable): The variable generated by cost function. - parameter_list(list[string]): Parameters that need to be updated by - optimizer. If None, it means all parameters need to be updated. - no_grad_set(set): Variables that have no gradients in Block 0. - All variables with `step_gradient=True` from all blocks will be - automatically added. + A complete neural network training is made up of forward and backward + propagation. However, when we configure a network, we only need to + specify its forwrd part. The backward part is generated automatically + according to the forward part by this function. - Return: - (list[(Variable,Variable)]): list of (parameter, gradient) pair. + In most cases, users do not need to invoke this function manually. It + will be automatically invoked by the optimizer's `minimize` function. + + Args: + loss(Variable): The loss variable of the network. + parameter_list(list[string]|None): Names of parameters that need + to be updated by optimizers. + If it is None, all parameters + will be updated. + Default: None + no_grad_set(set|None): Variables in the Block 0 whose gradients + should be ignored. All variables with + `step_gradient=True` from all blocks will + be automatically added into this set. + Default: None + callbacks(list[callable object]|None): The callbacks are used for + doing some custom jobs during + backward part building. All + callable objects in it will + be invoked once each time a + new gradient operator is added + into the program. The callable + object must has two input + parameters: 'block' and 'context'. + The 'block' is the block which + the new gradient operator will + be added to. The 'context' is a + map, whose keys are gradient + variable names and values are + corresponding original variables. + In addition to this, the 'context' + has another special key-value pair: + the key is string '__current_op_desc__' + and the value is the op_desc of the + gradient operator who has just + triggered the callable object. + + Returns: + list[(Variable,Variable)]: Pairs of parameter and its + corresponding gradients. The key is the parameter and the + value is gradient variable. + + Raises: + AssertionError: If `loss` is not an instance of Variable. + + Examples: + .. code-block:: python + + # network configuration code + # ... + avg_loss = fluid.layers.mean(loss) + param_grad_list = fluid.backward.append_backward(loss=avg_loss) """ assert isinstance(loss, framework.Variable) diff --git a/python/paddle/fluid/data_feeder.py b/python/paddle/fluid/data_feeder.py index ac396002018d5952bee4aa79ff4aaa5463e2e9e1..c859778b3757f638ac531620f241e684522add57 100644 --- a/python/paddle/fluid/data_feeder.py +++ b/python/paddle/fluid/data_feeder.py @@ -29,6 +29,13 @@ class DataToLoDTensorConverter(object): self.place = place self.lod_level = lod_level self.shape = shape + negtive_count = 0 + for s in self.shape: + if s < 0: + negtive_count += 1 + if negtive_count > 1: + self.shape = None + break if dtype == core.VarDesc.VarType.FP32: self.dtype = 'float32' elif dtype == core.VarDesc.VarType.INT64: @@ -61,7 +68,9 @@ class DataToLoDTensorConverter(object): self._feed_impl_(each_data, lod[1:], lod_level - 1) def done(self): - arr = numpy.array(self.data, dtype=self.dtype).reshape(self.shape) + arr = numpy.array(self.data, dtype=self.dtype) + if self.shape: + arr = arr.reshape(self.shape) t = core.LoDTensor() t.set(arr, self.place) if self.lod_level > 0: @@ -70,6 +79,61 @@ class DataToLoDTensorConverter(object): class DataFeeder(object): + """ + DataFeeder converts the data that returned by a reader into a data + structure that can feed into Executor and ParallelExecutor. The reader + usually returns a list of mini-batch data entries. Each data entry in + the list is one sample. Each sample is a list or a tuple with one + feature or multiple features. + + The simple usage shows below: + + .. code-block:: python + + place = fluid.CPUPlace() + img = fluid.layers.data(name='image', shape=[1, 28, 28]) + label = fluid.layers.data(name='label', shape=[1], dtype='int64') + feeder = fluid.DataFeeder([img, label], fluid.CPUPlace()) + result = feeder.feed([([0] * 784, [9]), ([1] * 784, [1])]) + + + If you want to feed data into GPU side separately in advance when you + use multi-GPU to train a model, you can use `decorate_reader` function. + + .. code-block:: python + + place=fluid.CUDAPlace(0) + feeder = fluid.DataFeeder(place=place, feed_list=[data, label]) + reader = feeder.decorate_reader( + paddle.batch(flowers.train(), batch_size=16)) + + Args: + feed_list(list): The Variables or Variables'name that will + feed into model. + place(Place): place indicates feed data into CPU or GPU, if you want to + feed data into GPU, please using `fluid.CUDAPlace(i)` (`i` represents + the GPU id), or if you want to feed data into CPU, please using + `fluid.CPUPlace()`. + program(Program): The Program that will feed data into, if program + is None, it will use default_main_program(). Default None. + + Raises: + ValueError: If some Variable is not in this Program. + + Examples: + .. code-block:: python + + # ... + place = fluid.CPUPlace() + feed_list = [ + main_program.global_block().var(var_name) for var_name in feed_vars_name + ] # feed_vars_name is a list of variables' name. + feeder = fluid.DataFeeder(feed_list, place) + for data in reader(): + outs = exe.run(program=main_program, + feed=feeder.feed(data)) + """ + def __init__(self, feed_list, place, program=None): self.feed_dtypes = [] self.feed_names = [] @@ -99,6 +163,16 @@ class DataFeeder(object): self.place = place def feed(self, iterable): + """ + According to feed_list and iterable, converters the input into + a data structure that can feed into Executor and ParallelExecutor. + + Args: + iterable(list|tuple): the input data. + + Returns: + dict: the result of conversion. + """ converter = [] for lod_level, shape, dtype in six.zip( self.feed_lod_level, self.feed_shapes, self.feed_dtypes): @@ -121,6 +195,20 @@ class DataFeeder(object): return ret_dict def feed_parallel(self, iterable, num_places=None): + """ + Takes multiple mini-batches. Each mini-batch will be feed on each + device in advance. + + Args: + iterable(list|tuple): the input data. + num_places(int): the number of devices. Default None. + + Returns: + dict: the result of conversion. + + Notes: + The number of devices and number of mini-batches must be same. + """ if isinstance(self.place, core.CUDAPlace): places = [ core.CUDAPlace(i) @@ -159,6 +247,24 @@ class DataFeeder(object): multi_devices, num_places=None, drop_last=True): + """ + Converter the input data into a data that returned by reader into + multiple mini-batches. Each mini-batch will be feed on each device. + + Args: + reader(fun): the input data. + multi_devices(bool): the number of places. Default None. + num_places(int): the number of places. Default None. + drop_last(bool): the number of places. Default None. + + Returns: + dict: the result of conversion. + + Raises: + ValueError: If drop_last is False and the data batch which cannot + fit for devices. + """ + def __reader_creator__(): if not multi_devices: for item in reader(): diff --git a/python/paddle/fluid/executor.py b/python/paddle/fluid/executor.py index 33d8f709412b25d29c6618272500dd7b953d6645..159b0ca39eed547e4f3448e7ebf4807299d465b2 100644 --- a/python/paddle/fluid/executor.py +++ b/python/paddle/fluid/executor.py @@ -25,6 +25,13 @@ g_scope = core.Scope() def global_scope(): + """ + Get the global/default scope instance. There are a lot of APIs use + :code:`global_scope` as its default value, e.g., :code:`Executor.run` + + Returns: + Scope: The global/default scope instance. + """ return g_scope @@ -37,6 +44,19 @@ def switch_scope(scope): @contextlib.contextmanager def scope_guard(scope): + """ + Change the global/default scope instance by Python `with` statement. All + variable in runtime will assigned to the new scope. + + Examples: + >>> import paddle.fluid as fluid + >>> new_scope = fluid.Scope() + >>> with fluid.scope_guard(new_scope): + >>> ... + + Args: + scope: The new global/default scope. + """ ex = switch_scope(scope) yield switch_scope(ex) @@ -135,14 +155,18 @@ def has_fetch_operators(block, fetch_targets, fetch_holder_name): def fetch_var(name, scope=None, return_numpy=True): """ - Fetch the value of the variable with the given name from the given scope + Fetch the value of the variable with the given name from the + given scope. + Args: name(str): name of the variable. Typically, only persistable variables can be found in the scope used for running the program. scope(core.Scope|None): scope object. It should be the scope where you pass to Executor.run() when running your program. - If None, global_scope() will be used. - return_numpy(bool): whether convert the tensor to numpy.ndarray + If None, global_scope() will be used. Default None. + return_numpy(bool): whether convert the tensor to numpy.ndarray. + Default True. + Returns: LodTensor|numpy.ndarray """ diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index 42d3c9c153de2671f67bcb6d8f14e677413011ab..db21b1f3c03c40d79084b0dbb57d22f6d41fa270 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -30,8 +30,6 @@ __all__ = [ 'default_startup_program', 'default_main_program', 'program_guard', - 'switch_startup_program', - 'switch_main_program', 'get_var', ] @@ -43,7 +41,8 @@ ZERO_VAR_SUFFIX = core.kZeroVarSuffix() def grad_var_name(var_name): """ - return gradient name for a certain var name + Returns: + str: gradient name for a certain var name """ return var_name + GRAD_VAR_SUFFIX @@ -51,10 +50,12 @@ def grad_var_name(var_name): def convert_np_dtype_to_dtype_(np_dtype): """ Convert the data type in numpy to the data type in Paddle + Args: - np_dtype(np.dtype): the data type in numpy + np_dtype(np.dtype): the data type in numpy. - Returns(core.VarDesc.VarType): the data type in Paddle + Returns: + core.VarDesc.VarType: the data type in Paddle. """ dtype = np.dtype(np_dtype) @@ -120,37 +121,53 @@ def _debug_string_(proto, throw_on_error=True): class Variable(object): """ - Python variable. Every input and output of an operator is a variable. Every - variable belongs to a block. The variable has a name and two variables in - different blocks could have the same name. + In Fluid, every input and output of an operator is a variable. In most + cases, variables are used for holding different kinds of data or training + labels. A variable belongs to a block. All variable has its own name and + two variables in different blocks could have the same name. - There are many kinds of variables. Please reference the framework.proto for - details. + There are many kinds of variables. Each kind of them has its own attributes + and usages. Please reference the framework.proto for details. - Notes: The constructor of Variable should not be invoked directly. Please - use `Block.create_var` to create a variable. - - >>> cur_program = Program() - >>> cur_block = cur_program.current_block() - >>> new_variable = cur_block.create_var( - >>> name="X", shape=[-1, 23, 48], dtype='float32') + Most of a Variable's member variables can be setted to be None. It mean + it is not available or will be specified later. Args: - block(Block): The associated block. It will be passed by - `Block.create_var` automatically. + block(Block): The block that the variable belongs to. type(core.VarDesc.VarType): Variable type. Please reference the framework.proto for details. - shape(tuple|list|None): The shape of variable. -1 means the batch size. + name(str|None): The name of the variable. If setted None, it will be + generated automatically. Default: None + shape(tuple|list|None): The shape of the variable. -1 means the batch size. Some kinds of variable do not contain shape, just set it to None. - dtype(np.dtype|core.VarDesc.VarType|str): The data type of variable. - lod_level(int): The level of lod tensor. 0 means it is not a time + Default: None + dtype(np.dtype|core.VarDesc.VarType|str|None): The data type of variable. + Default: None + lod_level (int|None): The level of lod tensor. 0 means it is not a time series data. - capacity(int): The capacity of Channel variable. Ignored - for other types. - persistable(bool): True if the variable should be saved as check point. - Defaults to False. - stop_gradient(bool): True if the variable will stop to calculate - gradients when backward. Defaults to False. + Default: None + capacity (int|None): The capacity of Channel variable. Ignored for other + types. Default: None + persistable (bool|None): True if the variable is persistable. A persistable + variable will not be deleted after an iteration ending. Defaults: None. + error_clip (BaseErrorClipAttr|None): The error clip attributes of the + corresponding gradient variable. Default: None + stop_gradient (bool): True if the variable will stop to calculate its + gradients when backward. Default: False. + is_data (bool): True if the variable is an input data. Default: False + + Notes: + The constructor of Variable should not be invoked directly. Please + use `Block.create_var` to create a variable. + + Examples: + .. code-block:: python + + cur_program = Program() + cur_block = cur_program.current_block() + new_variable = cur_block.create_var(name="X", + shape=[-1, 23, 48], + dtype='float32') """ def __init__(self, @@ -253,13 +270,14 @@ class Variable(object): Get debug string. Args: - throw_on_error(bool): True if raise an exception when self is not - intialized. + throw_on_error(bool): True if raise an exception when self is + not initialized. with_details(bool): more details about variables and parameters - (e.g. trainable, optimize_attr, ...) will be printed when with_details is True - - Returns(str): The debug string. + (e.g. trainable, optimize_attr, ...) will be printed when + with_details is True. Default False; + Returns: + str: The debug string. """ assert isinstance(throw_on_error, bool) and isinstance(with_details, bool) @@ -276,6 +294,15 @@ class Variable(object): __repr__ = __str__ def set_desc(self, input): + """ + Set the variable description. + + Args: + input(core.VarDesc): The new VarDesc. + + Returns: + None + """ self.desc = input @property @@ -312,6 +339,15 @@ class Variable(object): return self.desc.type() def set_error_clip(self, error_clip): + """ + Set the error_clip. + + Args: + error_clip(BaseErrorClipAttr) : The new error_clip. + + Returns: + None + """ self.error_clip = error_clip @@ -319,8 +355,8 @@ def get_all_op_protos(): """ Get all registered op proto from PaddlePaddle C++ end. - Returns(list): list of OpProto - + Returns: + list: list of OpProto. """ protostrs = core.get_all_op_protos() ret_values = [] @@ -373,9 +409,45 @@ class OpProtoHolder(object): class Operator(object): """ - Python Operator class. The operator represents the build in instructions in a - Block. Users can use the build in instructions to describe their neural - network. + In Fluid, all the operation are represented by Operator, and Operator + is regarded as a build in an instruction of a Block. Users can use the + build in instructions to describe their neural network. + + Args: + block(Block): The block has the current operator. + desc(core.OpDesc): The protobuf description of Operator. + type(str): The type of operator. Default None. + inputs(dict): The input of this Operator. it is a dictionary, for every + element, key is the input parameter name, and value is a list of + variables. Default None. + outputs(dict): The output of this Operator. it is a dictionary, for + every element, key is the input parameter name, and value is a list + of variables. Default None. + attrs(dict): The attributes of this Operator. it is a dictionary, for + every element, key is attribute name, and value is the attribute value. + The attribute type should be as same as the type registered in C++ side. + Default None. + + Returns: + Operator: The initialized Operator. + + Raises: + ValueError: If the passed input, output and attrs doesn't match the + initializing Operator's that registered in C++ side. + + Notes: + The constructor of operator should not be invoked directly. Use + Block.append_op or Block.prepend_op instead. + + Examples: + .. code-block:: python + + cur_program = Program() + cur_block = cur_program.current_block() + # var1 += var2 + var3 + cur_block.append_op(type="sum", + inputs={"X": [var1, var2, var3]}, + outputs={"Out": [var1]}) """ OP_WITHOUT_KERNEL_SET = { 'feed', 'fetch', 'save', 'load', 'recurrent', 'go', @@ -392,31 +464,7 @@ class Operator(object): inputs=None, outputs=None, attrs=None): - """ - Constructor. - Notes: The constructor of operator should not be invoked directly. Use - Block.append_op or Block.prepend_op instead. - - >>> cur_program = Program() - >>> cur_block = cur_program.current_block() - >>> # var1 += var2 + var3 - >>> cur_block.append_op(type="sum", - >>> inputs={"X": [var1, var2, var3]}, - >>> outputs={"Out": [var1]}) - - Args: - block(Block): The block has the current operator. - desc(core.OpDesc): The protobuf description. - type(str): The type of operator. - inputs(dict): The input dictionary. Key is the input parameter name. - Value is a list of variables. - outputs(dict): The output dictionary which has the same format with - inputs. - attrs(dict): The attributes dictionary. Key is attribute name. Value - is the attribute value. The attribute type should be as same as - the type registered in C++ - """ self.block = block self.desc = desc self.attrs = attrs @@ -529,12 +577,14 @@ class Operator(object): def to_string(self, throw_on_error): """ - To debug string. + Get debug string. + Args: - throw_on_error(bool): raise exception when self is not initialized - when throw_on_error is True + throw_on_error(bool): Whether to raise exception if self is not + initialized. - Returns(str): The debug string. + Returns: + str: The debug string. """ protostr = self.desc.serialize_to_string() @@ -552,29 +602,45 @@ class Operator(object): def input(self, name): """ - Get input arguments by the input parameter name - Args: - name(str): The input parameter name + Get the input arguments according to the input parameter name. - Returns(list): return the list of argument names associated with the - specific parameter name. + Args: + name(str): The input parameter name. + Returns: + list: return the list of argument names that associated with \ + the specific parameter name. """ return self.desc.input(name) def rename_input(self, old_name, new_name): + """ + Rename the `old_name` to `new_name`. + + Args: + old_name(str): The old name of the Operator's input. + new_name(str): The new name of the Operator's input. + + Returns: + None + """ self.desc.rename_input(old_name, new_name) def rename_output(self, old_name, new_name): + """ + Rename the `old_name` to `new_name`. + + Args: + old_name(str): The old name of the Operator's output. + new_name(str): The new name of the Operator's output. + + Returns: + None + """ self.desc.rename_output(old_name, new_name) @property def input_names(self): - """ - Get all input parameter names - Returns(list): return a list of input parameter names - - """ return self.desc.input_names() @property @@ -587,33 +653,23 @@ class Operator(object): def output(self, name): """ - Get output arguments by the output parameter name - Args: - name(str): The output parameter name + Get output arguments by the output parameter name. - Returns(list): return the list of argument names associated with the - specific parameter name. + Args: + name(str): The output parameter name. + Returns: + list: return the list of argument names associated with \ + the specific parameter name. """ return self.desc.output(name) @property def output_names(self): - """ - Get all output parameter names - Returns(list): return a list of output parameter names - - """ return self.desc.output_names() @property def idx(self): - """ - Return the array index of current operator. - Returns(int): The array index in block.ops array - Raises: - ValueError: when the operator is not found. - """ for i, op in enumerate(self.block.ops): if op == self: return i @@ -622,27 +678,40 @@ class Operator(object): def has_attr(self, name): """ - operator has the attribute with name or not. + Whether this Operator has the attribute with name or not. + Args: - name(str): the attribute name + name(str): the attribute name. - Returns(bool): True if has this attribute. + Returns: + bool: True if has this attribute. """ return self.desc.has_attr(name) def attr_type(self, name): """ - Get the type of attribute by attribute name - Args: - name(str): the attribute name + Get the type of attribute by attribute's name. - Returns(core.AttrType): the attribute type + Args: + name(str): the attribute name. + Returns: + core.AttrType: the attribute type. """ return self.desc.attr_type(name) def set_attr(self, name, val): + """ + Set the value of attribute by attribute's name. + + Args: + name(str): the attribute name. + val(bool|int|str|float|list): the value of the attribute. + + Raises: + ValueError: If the type of value doesn't match with desc.attr_type(name). + """ self.attrs[name] = val if isinstance(val, Block): self.desc.set_block_attr(name, val.desc) @@ -654,40 +723,39 @@ class Operator(object): @property def attr_names(self): - """ - Get all attribute names - Returns(list): The list of attribute name - - """ return self.desc.attr_names() def attr(self, name): """ - Get attribute by name + Get the attribute by name. + Args: - name(str): the attribute name + name(str): the attribute name. - Returns(bool|int|str|float|list): The attribute value. The return value + Returns: + bool|int|str|float|list: The attribute value. The return value can be any valid attribute type. - """ return self.desc.attr(name) def block_attr(self, name): """ - Get the block attribute by name - Args: - name(str): the attribute name + Get the block attribute by name. - Returns(int): the block index + Args: + name(str): the attribute name. + Returns: + int: the block index. """ return self.desc.block_attr(name) def all_attrs(self): """ - Get the attribute dict - Returns(dict): The Operator's attribute dict + Get the attribute dict. + + Returns: + dict: The Operator's attribute dict. """ attr_names = self.attr_names attr_map = {} @@ -700,6 +768,35 @@ class Operator(object): class Block(object): + """ + In Fluid, a Program is consistence of multi-Block, and Block stores + VarDesc and OpDesc. In a specific Block, a VarDesc have a unique name. + One block could have some child blocks, and child block's name scopes + should inherit the parent's so that OpDesc in child block can reference + a VarDesc that is stored in the parent block. + Please reference the framework.proto for details. + + Args: + program(Program): The Program that the Block belongs to. + idx(int): The block's id in the Program. + + Notes: + The constructor of Block should not be invoked directly. Please + use `Program.create_block()` to create a block. + + Examples: + .. code-block:: python + + cur_program = Program() + cur_block = cur_program.current_block() + var = cur_block.create_var(name="X", + shape=[-1, 23, 48], + dtype='float32') + cur_block.append_op(type="abs", + inputs={"X": [var]}, + outputs={"Out": [var]}) + """ + def __init__(self, program, idx): self.desc = program.desc.block(idx) self.vars = collections.OrderedDict() # var_name --> var @@ -712,15 +809,17 @@ class Block(object): def to_string(self, throw_on_error, with_details=False): """ - To debug string. + Get debug string. + Args: throw_on_error(bool): raise exception when self is not initialized - when throw_on_error is True + when throw_on_error is True. with_details(bool): more details about variables and parameters - (e.g. trainable, optimize_attr, ...) will be printed when with_details is True - - Returns(str): The debug string. + (e.g. trainable, optimize_attr, ...) will be printed when + with_details is True. Default False. + Returns: + str: The debug string. """ assert isinstance(throw_on_error, bool) and isinstance(with_details, bool) @@ -752,6 +851,15 @@ class Block(object): return self.desc.get_forward_block_idx() def set_forward_block_idx(self, idx): + """ + Set the forward block Idx. + + Args: + idx(int): the block index. + + Returns: + None + """ self.desc.set_forward_block_idx(idx) @property @@ -759,6 +867,19 @@ class Block(object): return self.desc.id def var(self, name): + """ + Get a Variable by name from this block. + + Args: + name(str): the Variable's name. + + Raises: + ValueError: The If input's type is not str, or this block + doesn't have a Variable with the giving name. + + Returns: + Variable: the Variable with the giving name. + """ if not isinstance(name, basestring): raise TypeError( "var require string as parameter, but get %s instead." % @@ -769,6 +890,19 @@ class Block(object): return v def var_recursive(self, name): + """ + Get a Variable by name from this block recursively. + + Args: + name(str): the Variable's name. + + Raises: + ValueError: this block and this parent block doesn't + have a Variable with the giving name. + + Returns: + Variable: the Variable with the giving name. + """ frontier = list() visited = set() @@ -815,6 +949,18 @@ class Block(object): def rename_var(self, name, new_name): """ Rename variable in vars and ops' inputs and outputs + + Args: + name(str): the name that need to be renamed. + new_name(str): the name that need to rename to. + + Raises: + ValueError: If this block doesn't have this the giving name, + or the type of the var with the giving name is not Parameter + or Variable. + + Returns: + Variable: the Variable with the giving name. """ if not self.has_var(name): raise ValueError("var %s is not in current block" % name) @@ -878,12 +1024,27 @@ class Block(object): return param def append_op(self, *args, **kwargs): + """ + Appends a new Operator according to the giving arguments. + + Returns: + Operator: the append Operator. + """ op_desc = self.desc.append_op() op = Operator(block=self, desc=op_desc, *args, **kwargs) self.ops.append(op) return op def insert_op(self, index, *args, **kwargs): + """ + Insert a Operator according to the giving arguments. + + Args: + index(int): the place that the operator to insert. + + Returns: + Operator: the insert Operator. + """ self.sync_with_cpp() op_desc = self.desc.insert_op(index) op = Operator(block=self, desc=op_desc, *args, **kwargs) @@ -891,11 +1052,30 @@ class Block(object): return op def remove_op(self, index): + """ + Remove the specific position operator. + + Args: + index(int): the position that the operator to insert. + + Returns: + None + """ self.sync_with_cpp() self.desc.remove_op(index, index + 1) del self.ops[index] def slice_ops(self, start, end): + """ + Return the Operator between start and end. + + Args: + start(int): the start position. + end(int): the end position. + + Returns: + list: the Operators between start and end. + """ return self.ops[start:end] def prepend_op(self, *args, **kwargs): @@ -906,9 +1086,8 @@ class Block(object): def sync_with_cpp(self): """ - Sync from the desc on the c++ end. - - This method is used to synchronize the c++ desc instance generated by backward. + Sync from the desc on the c++ end. This method is used to synchronize + the c++ desc instance generated by backward. """ # sync variables from cpp for var in self.desc.all_vars(): @@ -973,9 +1152,14 @@ class Block(object): def copy_param_info_from(self, other): """ - Copy the information of parameters from the other block + Copy the information of parameters from the other block. + Args: - other(Block): the other block + other(Block): the other block. + + Raises: + ValueError: If type of input is not Block, or the `other` and this + block is not in the same topology. Returns: None @@ -1007,11 +1191,12 @@ class Block(object): def clone_variable(self, var): """ Clone a variable into current block. + Args: var: the variable to be cloned. Returns: - The new variable cloned from 'var' in current block. + Variable: the new variable cloned from 'var' in current block. """ assert isinstance(var, Variable) ret_var = None @@ -1051,23 +1236,18 @@ class Program(object): Notes: we have default_startup_program and default_main_program by default, a pair of them will shared the parameters. The default_startup_program only run once to initialize parameters, - default_main_program run in every minibatch and adjust the weights. - - Args: - None + default_main_program run in every mini batch and adjust the weights. Returns: - Python Program + A empty program. Examples: - .. code-block:: python - - main_program = Program() - startup_program = Program() - with fluid.program_guard(main_program=main_program, startup_program=startup_program): - fluid.layers.data(name="x", shape=[-1, 784], dtype='float32') - fluid.layers.data(name="y", shape=[-1, 1], dtype='int32') - fluid.layers.fc(name="fc", shape=[10], dtype='float32', act="relu") + >>> main_program = fluid.Program() + >>> startup_program = fluid.Program() + >>> with fluid.program_guard(main_program=main_program, startup_program=startup_program): + >>> fluid.layers.data(name="x", shape=[-1, 784], dtype='float32') + >>> fluid.layers.data(name="y", shape=[-1, 1], dtype='int32') + >>> fluid.layers.fc(name="fc", shape=[10], dtype='float32', act="relu") """ @@ -1081,6 +1261,19 @@ class Program(object): @property def op_role(self): + """ + The operator role. In a enum {Forward, Backward, Optimize}. + + Notes: this is a low level API. It is used only for ParallelExecutor to + duplicate or schedule operator to devices. + + For example, the forward operator should be executed on every device. + The backward operator should be executed on every device and the + parameter gradient of backward (use :code:`op_role_var` to get this + variable) operator should be merged to one device. The optimization + operators should be executed on only one device and broadcast the + optimization result, i.e., the new parameter, to every other device. + """ return self._current_role @op_role.setter @@ -1089,6 +1282,13 @@ class Program(object): @property def op_role_var(self): + """ + The auxiliary variables for :code:`op_role` property. + + See Also: :code:`Program.op_role`'s documentation for details. + + Notes: This is a very low-level API. Users should not use it directly. + """ return self._op_role_var @op_role_var.setter @@ -1097,6 +1297,21 @@ class Program(object): @contextlib.contextmanager def optimized_guard(self, var): + """ + A with guard to set :code:`Optimization` :code:`OpRole` and + :code:`OpRoleVar` automatically. + + Notes: This is a very low level API. Users should not use it directly. + + Args: + var(Variable|str): The variable (name) to be optimized. + + Examples: + + >>> p, g = backward(...) + >>> with program.optimized_guard(p): + >>> p = p - 0.001 * g + """ OpRole = core.op_proto_and_checker_maker.OpRole self._current_role = OpRole.Optimize self._op_role_var = [var.name if isinstance(var, Variable) else var] @@ -1105,18 +1320,35 @@ class Program(object): self._current_role = OpRole.Forward def __str__(self): + """ + Get the protobuf debug string of this Program. + + Returns: + (str): The protobuf debug string. + + Raises: + ValueError: If any of required fields is not set. + """ return self.to_string(True) def to_string(self, throw_on_error, with_details=False): """ To debug string. + Args: - throw_on_error(bool): raise exception when self is not initialized - when throw_on_error is True - with_details(bool): more details about variables and parameters - (e.g. trainable, optimize_attr, ...) will be printed when with_details is True + throw_on_error(bool): raise Value error when any of required fields + is not set. - Returns(str): The debug string. + with_details(bool): True if more details about variables and + parameters, e.g., :code:`trainable`, :code:`optimize_attr`, need + to print. + + Returns + (str): The debug string. + + Raises: + ValueError: If any of required fields is not set and throw_on_error is + True. """ assert isinstance(throw_on_error, bool) and isinstance(with_details, @@ -1132,25 +1364,89 @@ class Program(object): return res_str def get_desc(self): + """ + Get the C++ side of `ProgramDesc` object pointer. The C++ object is + exposed by :code:`pybind`. + + Notes: This is a very low level API. Users should not use this API + directly. + """ return self.desc def clone(self, for_test=False): - """Clone the Program object - Args: - for_test(bool): indicate whether clone for test. + """ + Create a new, duplicated program. + - Set for_test to False when we want to clone the program for training. - Set for_test to True when we want to clone the program for testing. + Some operators, e.g., :code:`batch_norm`, behave differently between + training and testing. They have an attribute, :code:`is_test`, to + control this behaviour. This method will change the :code:`is_test` + attribute of them to :code:`True` when :code:`for_test=True`. + + * Set for_test to False when we want to clone the program for training. + * Set for_test to True when we want to clone the program for testing. + + Notes: This API DOES NOT prune any operator. Use + :code:`clone(for_test=True)` before backward and optimization please. Args: - for_test(bool): Some operators, such as batch_norm and drop_out ops, - behave differently in training and testing. If for_test is True, - the is_test attributes in these operators will be set to True for - testing purposes, otherwise, they remain unchanged. + for_test(bool): True if change the :code:`is_test` attribute of + operators to :code:`True`. Returns: - Program: The cloned Program object. - + Program: The new, duplicated Program object. + + Examples: + + 1. To clone a test program, the sample code is: + + >>> import paddle.fluid as fluid + >>> train_program = fluid.Program() + >>> startup_program = fluid.Program() + >>> with fluid.program_guard(train_program, startup_program): + >>> img = fluid.layers.data(name='image', shape=[784]) + >>> hidden = fluid.layers.fc(input=img, size=200, act='relu') + >>> hidden = fluid.layers.dropout(hidden, dropout_prob=0.5) + >>> loss = fluid.layers.cross_entropy( + >>> input=fluid.layers.fc(hidden, size=10, act='softmax'), + >>> label=fluid.layers.data(name='label', shape=[1], dtype='int64')) + >>> + >>> test_program = train_program.clone(for_test=True) + >>> + >>> sgd = fluid.optimizer.SGD(learning_rate=1e-3) + >>> with fluid.program_guard(train_program, startup_program): + >>> sgd.minimize(loss) + + 2. The :code:`clone` method can be avoid if you create program for + training and program for testing individually. + + >>> import paddle.fluid as fluid + >>> + >>> def network(is_test): + >>> img = fluid.layers.data(name='image', shape=[784]) + >>> hidden = fluid.layers.fc(input=img, size=200, act='relu') + >>> hidden = fluid.layers.dropout(hidden, dropout_prob=0.5, is_test=is_test) + >>> loss = fluid.layers.cross_entropy( + >>> input=fluid.layers.fc(hidden, size=10, act='softmax'), + >>> label=fluid.layers.data(name='label', shape=[1], dtype='int64')) + >>> return loss + >>> + >>> train_program = fluid.Program() + >>> startup_program = fluid.Program() + >>> test_program = fluid.Program() + >>> + >>> with fluid.program_guard(train_program, startup_program): + >>> with fluid.unique_name.guard(): + >>> loss = network(is_test=False) + >>> sgd = fluid.optimizer.SGD(learning_rate=1e-3) + >>> sgd.minimize(loss) + >>> + >>> # the test startup program is not used. + >>> with fluid.program_guard(test_program, fluid.Program()): + >>> with fluid.unique_name.guard(): + >>> loss = network(is_test=True) + + The two code snippets above will generate same programs. """ if for_test: p = self.inference_optimize() @@ -1165,6 +1461,21 @@ class Program(object): return p def prune(self, targets): + """ + Prune operators and variables which are not needed to generate + :code:`targets`. + + Notes: This is a very low level API. Users should not use this API + directly. This API is in flux and not stable. + + Args: + targets(list|Variable|Operator): A list of variables or operators + need to be pruned + + Returns: + Program: A new, pruned program. + + """ if not isinstance(targets, list): targets = [targets] targets_idx = [] @@ -1199,6 +1510,17 @@ class Program(object): return res def inference_optimize(self): + """ + This method will create a new program and change the :code:`is_test` + attribute of operators to :code:`True`. All the :code:`Parameter` + information will be lost. + + Notes: This API is a very low level API. Use + :code:`Program.clone(for_test=True)` instead. + + Returns: + Program: The new program. + """ # this is an alternative implement before # core.inference_optimize being fixed. res = Program() @@ -1215,6 +1537,18 @@ class Program(object): @staticmethod def parse_from_string(binary_str): + """ + Deserialize a program desc from protobuf binary string. + + Notes: All information about parameters will be lost after serialization + and deserialization. + + Args: + binary_str(str): The binary prootbuf string. + + Returns: + Program: A deserialized program desc. + """ p = Program() p.desc = core.ProgramDesc(binary_str) p.blocks = [Block(p, i) for i in xrange(p.desc.num_blocks())] @@ -1223,10 +1557,19 @@ class Program(object): @property def random_seed(self): + """ + The default random seed for random operators in Program. Zero means get + the random seed from random device. + + Notes: It must be set before the operators have been added. + """ return self._seed @property def num_blocks(self): + """ + The number of blocks in this program. + """ return self.desc.num_blocks() @random_seed.setter @@ -1239,15 +1582,40 @@ class Program(object): return str(self) def global_block(self): + """ + Get the first block of this program. + """ return self.blocks[0] def block(self, index): + """ + Get the :code:`index` block of this program + Args: + index(int): The index of block to get + + Returns: + Block: The :code:`index` block + """ return self.blocks[index] def current_block(self): + """ + Get the current block. The :code:`current` block is the block to append + operators. + """ return self.blocks[self.current_block_idx] def create_block(self, parent_idx=None): + """ + Create a new block with the :code:`parent_idx` and change the current block + to new block. + + Args: + parent_idx(int): The parent block index. + + Returns: + Block: The new block. + """ new_block_idx = len(self.blocks) parent = self.current_block() if parent_idx is None else self.block( parent_idx) @@ -1257,9 +1625,24 @@ class Program(object): return self.current_block() def rollback(self): + """ + Exit a code block, i.e., roll back to the parent block. + Returns: + None + """ self.current_block_idx = self.current_block().parent_idx def sync_with_cpp(self): + """ + Synchronize Python instance to its binding C++ object instance. + If the program is modified in C++ space, this method should be invoked. + + Notes: This is a very low level API. Users should not invoke it + directly. + + Returns: + None + """ for block_idx in range(len(self.blocks), self.desc.num_blocks()): self.blocks.append(Block(self, block_idx)) for block in self.blocks: @@ -1269,6 +1652,9 @@ class Program(object): """ Copy the information of parameters from other program. + Notes: This is a very low level API. Users should not invoke it + directly. + Args: other(Program): Other program @@ -1288,6 +1674,9 @@ class Program(object): """ Copy the information of data variables from other program. + Notes: This is a very low level API. Users should not invoke it + directly. + Args: other(Program): Other program @@ -1306,12 +1695,41 @@ class Program(object): self.global_block().var(var.name).is_data = True def list_vars(self): + """ + Get all variables from this Program. A iterable object is returned. + + Returns: + iterable: The generator will yield every variable in this program. + """ for each_block in self.blocks: for each_var in each_block.vars.itervalues(): yield each_var class Parameter(Variable): + """ + Parameter is derived from Variable. A parameter is a persistable + Variable, and will be updated by optimizers after each iteration. + The training of a neural network is essentially the updating of + its parameters. + + Relative to a general Variable, a Parameter has several its own + member variables: + + Args: + trainable(bool): True if the parameter need to be updated after + iterations. + optimize_attr(map): Parameter attributes related with optimizing. + Currently, it only contains 'learning_rate'. + Default: {'learning_rate': 1.0} + regularizer(WeightDecayRegularizer): The Regularizer which will + be applied on the parameter. Default: None + gradient_clip_attr(BaseGradientClipAttr): The gradint clip strategy + which will be applied on the parameter. Default: None + do_model_average(bool): True if the model average strategy will + be applied on this parameter. + """ + def __init__(self, block, shape, dtype, **kwargs): if shape is None or dtype is None: raise ValueError("Parameter must set shape and dtype") @@ -1374,8 +1792,15 @@ _startup_program_ = Program() def default_startup_program(): """ - Get default startup program. In startup program, Paddle will initialize - parameters, initialize nccl handle, etc. + Get default/global startup program. + + The layer function in :code:`fluid.layers` will create parameters, readers, + NCCL handles as global variables. The :code:`startup_program` will + initialize them by the operators in startup program. The layer function will + append these initialization operators into startup program. + + This method will return the :code:`default` or the :code:`current` startup + program. Users can use :code:`fluid.program_guard` to switch program. Returns: Program: startup program @@ -1385,7 +1810,15 @@ def default_startup_program(): def default_main_program(): """ - Get default main program. The main program is used for training or testing. + Get default/global main program. The main program is used for training or + testing. + + All layer function in :code:`fluid.layers` will append operators and + variables to the :code:`default_main_program`. + + The :code:`default_main_program` is the default program in a lot of APIs. + For example, the :code:`Executor.run()` will execute the + :code:`default_main_program` when the program is not specified. Returns: Program: main program @@ -1427,20 +1860,34 @@ def switch_startup_program(program): @contextlib.contextmanager def program_guard(main_program, startup_program=None): """ - Switch program with `with` statement + Change the global main program and startup program with `with` statement. + Layer functions in the Python `with` block will append operators and + variables to the new main programs. Examples: - >>> with program_guard(Program()): - >>> data = fluid.layers.data(...) - >>> hidden = fluid.layers.fc(...) + + >>> import paddle.fluid as fluid + >>> main_program = fluid.Program() + >>> startup_program = fluid.Program() + >>> with fluid.program_guard(main_program, startup_program): + >>> data = fluid.layers.data(...) + >>> hidden = fluid.layers.fc(...) + + Notes: The temporary :code:`Program` can be used if the user does not need + to construct either of startup program or main program. + + Examples: + + >>> import paddle.fluid as fluid + >>> main_program = fluid.Program() + >>> # does not care about startup program. Just pass a temporary value. + >>> with fluid.program_guard(main_program, fluid.Program()): + >>> data = ... Args: - main_program(Program): New main program inside `with` statement + main_program(Program): New main program inside `with` statement. startup_program(Program): New startup program inside `with` statement. None means do not change startup program. - - Returns: - None """ if not isinstance(main_program, Program): raise TypeError("main_program should be Program") @@ -1457,7 +1904,8 @@ def program_guard(main_program, startup_program=None): def get_var(name, program=None): """ - Get a variable by name from the global block of a program + Get a variable by name from the global block of a program. + Args: name(str): name of the variable program(Program|None): program object. diff --git a/python/paddle/fluid/io.py b/python/paddle/fluid/io.py index 6323c9899e0080b436a52f852c647466b8f94bc1..6e527572f1ca77be9fe069654db00d16ad5c21ef 100644 --- a/python/paddle/fluid/io.py +++ b/python/paddle/fluid/io.py @@ -30,20 +30,42 @@ __all__ = [ def is_parameter(var): - """Check whether the variable is a Parameter. - - This function checks whether the input variable is a Parameter. + """ + Check whether the given variable is an instance of Parameter. Args: - var : The input variable. + var(Variable): The variable to be checked. Returns: - boolean result whether the variable is a Parameter. + bool: True if the given `var` is an instance of Parameter, + False if not. + + Examples: + .. code-block:: python + + param = fluid.default_main_program().global_block().var('fc.w') + res = fluid.io.is_parameter(param) """ return isinstance(var, Parameter) def is_persistable(var): + """ + Check whether the given variable is persistable. + + Args: + var(Variable): The variable to be checked. + + Returns: + bool: True if the given `var` is persistable + False if not. + + Examples: + .. code-block:: python + + param = fluid.default_main_program().global_block().var('fc.w') + res = fluid.io.is_persistable(param) + """ if var.desc.type() == core.VarDesc.VarType.FEED_MINIBATCH or \ var.desc.type() == core.VarDesc.VarType.FETCH_LIST: return False @@ -68,20 +90,69 @@ def save_vars(executor, predicate=None, filename=None): """ - Save variables to directory by executor. + Save variables to the given directory by executor. + + There are two ways to specify variables to be saved: The first way, list + variables in a list and assign it to the `vars`. The second way, assign the + `main_program` with an existing program, then all variables in the program + will be saved. The first way has a higher priority. In other words, if `vars` + are assigned, the `main_program` and the `predicate` will be ignored. - :param executor: executor that save variable - :param dirname: directory path - :param main_program: program. If vars is None, then filter all variables in this - program which fit `predicate`. Default default_main_program. - :param predicate: The Predicate describes a callable that returns a variable - as a bool. If it returns true, the corresponding input variable will be saved. - :param vars: variables need to be saved. If vars is specified, program & predicate - will be ignored - :param filename: The name of a single file that all vars are saved to. - If it is None, save variables to separate files. + The `dirname` are used to specify the folder where to save variables. + If you prefer to save variables in separate files in the folder `dirname`, + set `filename` None; if you prefer to save all variables in a single file, + use `filename` to specify it. - :return: None + Args: + executor(Executor): The executor to run for saving variables. + dirname(str): The directory path. + main_program(Program|None): The program whose variables will be saved. + If it is None, the default main program will + be used automatically. + Default: None + vars(list[Variable]|None): The list that contains all variables to save. + It has a higher priority than the `main_program`. + Default: None + predicate(function|None): If it is not None, only variables in the + `main_program` that makes predicate(variable)==True + will be saved. It only works when we are using the + `main_program` to specify variables (In other words + `vars` is None). + Default: None + filename(str|None): The file which to save all variables. If you prefer to save + variables separately, set it to None. + Default: None + + Returns: + None + + Raises: + TypeError: If `main_program` is not an instance of Program nor None. + + Examples: + .. code-block:: python + + exe = fluid.Executor(fluid.CPUPlace()) + param_path = "./my_paddle_model" + + # The first usage: using `main_program` to specify variables + def name_has_fc(var): + res = "fc" in var.name + return res + + prog = fluid.default_main_program() + fluid.io.save_vars(executor=exe, dirname=path, main_program=prog, + vars=None) + # All variables in `main_program` whose name includes "fc" will be saved. + # And variables are going to be saved separately. + + + # The second usage: using `vars` to specify variables + var_list = [var_a, var_b, var_c] + fluid.io.save_vars(executor=exe, dirname=path, vars=var_list, + filename="vars_file") + # var_a, var_b and var_c will be saved. And they are going to be + # saved in the same file named 'var_file' in the path "./my_paddle_model". """ if vars is None: if main_program is None: @@ -129,7 +200,42 @@ def save_vars(executor, def save_params(executor, dirname, main_program=None, filename=None): """ - Save all parameters to directory with executor. + This function filters out all parameters from the give `main_program` + and then save them to the folder `dirname` or the file `filename`. + + Use the `dirname` to specify the saving folder. If you would like to + save parameters in separate files, set `filename` None; if you would + like to save all parameters in a single file, use `filename` to specify + the file name. + + NOTICE: Some variables are not Parameter while they are necessary for + training. So you can NOT save and continue your training just by + `save_params()` and `load_params()`. Please use `save_persistables()` + and `load_persistables()` instead. + + Args: + executor(Executor): The executor to run for saving parameters. + dirname(str): The saving directory path. + main_program(Program|None): The program whose parameters will be + saved. If it is None, the default + main program will be used automatically. + Default: None + filename(str|None): The file to save all parameters. If you prefer + to save parameters in differnet files, set it + to None. + Default: None + + Returns: + None + + Examples: + .. code-block:: python + + exe = fluid.Executor(fluid.CPUPlace()) + param_path = "./my_paddle_model" + prog = fluid.default_main_program() + fluid.io.save_params(executor=exe, dirname=param_path, + main_program=None) """ save_vars( executor, @@ -142,7 +248,37 @@ def save_params(executor, dirname, main_program=None, filename=None): def save_persistables(executor, dirname, main_program=None, filename=None): """ - Save all persistables to directory with executor. + This function filters out all variables with `persistable==True` from the + give `main_program` and then saves these variables to the folder `dirname` + or file `filename`. + + The `dirname` is used to specify the folder where persistable variables + are going to be saved. If you would like to save variables in separate + files, set `filename` None; if you would like to save all variables in a + single file, use `filename` to specify the file name. + + Args: + executor(Executor): The executor to run for saving persistable variables. + dirname(str): The directory path. + main_program(Program|None): The program whose persistbale variables will + be saved. If it is None, the default main + program will be used automatically. + Default: None + filename(str|None): The file to saved all variables. If you prefer to + save variables in differnet files, set it to None. + Default: None + + Returns: + None + + Examples: + .. code-block:: python + + exe = fluid.Executor(fluid.CPUPlace()) + param_path = "./my_paddle_model" + prog = fluid.default_main_program() + fluid.io.save_persistables(executor=exe, dirname=param_path, + main_program=None) """ save_vars( executor, @@ -160,20 +296,69 @@ def load_vars(executor, predicate=None, filename=None): """ - Load variables from directory by executor. + Load variables from the given directory by executor. + + There are two ways to specify variables to be loaded: The first way, list + variables in a list and assign it to the `vars`. The second way, assign the + `main_program` with an existing program, then all variables in the program + will be loaded. The first way has a higher priority. In other words if `vars` + are assigned, the `main_program` and the `predicate` will be ignored. + + The `dirname` are used to specify the folder where to load variables. + If variables were saved in separate files in the folder `dirname`, + set `filename` None; if all variables were saved in a single file, + use `filename` to specify it. - :param executor: executor that load variable - :param dirname: directory path - :param main_program: program. If vars is None, then filter all variables in this - program which fit `predicate`. Default default_main_program(). - :param predicate: The Predicate describes a callable that returns a variable - as a bool. If it returns true, the corresponding input variable will be loaded. - :param vars: variables need to be loaded. If vars is specified, program & - predicate will be ignored - :param filename: The name of the single file that all vars are loaded from. - If it is None, load variables from separate files. + Args: + executor(Executor): The executor to run for loading variables. + dirname(str): The directory path. + main_program(Program|None): The program whose variables will be loaded. + If it is None, the default main program will + be used automatically. + Default: None + vars(list[Variable]|None): The list that contains all variables to load. + It has a higher priority than the `main_program`. + Default: None + predicate(function|None): If it is not None, only variables in the + `main_program` that makes predicate(variable)==True + will be loaded. It only works when we are using the + `main_program` to specify variables (In other words + `vars` is None). + Default: None + filename(str|None): The file which saved all required variables. If variables + were saved in differnet files, set it to None. + Default: None + + Returns: + None + + Raises: + TypeError: If `main_program` is not an instance of Program nor None. + + Examples: + .. code-block:: python + + exe = fluid.Executor(fluid.CPUPlace()) + param_path = "./my_paddle_model" + + # The first usage: using `main_program` to specify variables + def name_has_fc(var): + res = "fc" in var.name + return res - :return: None + prog = fluid.default_main_program() + fluid.io.load_vars(executor=exe, dirname=path, main_program=prog, + vars=None) + # All variables in `main_program` whose name includes "fc" will be loaded. + # And all the variables are supposed to have been saved in differnet files. + + + # The second usage: using `vars` to specify variables + var_list = [var_a, var_b, var_c] + fluid.io.load_vars(executor=exe, dirname=path, vars=var_list, + filename="vars_file") + # var_a, var_b and var_c will be loaded. And they are supposed to haven + # been saved in the same file named 'var_file' in the path "./my_paddle_model". """ if vars is None: if main_program is None: @@ -221,7 +406,42 @@ def load_vars(executor, def load_params(executor, dirname, main_program=None, filename=None): """ - load all parameters from directory by executor. + This function filters out all parameters from the give `main_program` + and then trys to load these parameters from the folder `dirname` or + the file `filename`. + + Use the `dirname` to specify the folder where parameters were saved. If + parameters were saved in separate files in the folder `dirname`, set + `filename` None; if all parameters were saved in a single file, use + `filename` to specify the file name. + + NOTICE: Some variables are not Parameter while they are necessary for + training. So you can NOT save and continue your training just by + `save_params()` and `load_params()`. Please use `save_persistables()` + and `load_persistables()` instead. + + Args: + executor(Executor): The executor to run for loading parameters. + dirname(str): The directory path. + main_program(Program|None): The program whose parameters will be + loaded. If it is None, the default + main program will be used automatically. + Default: None + filename(str|None): The file which saved all parameters. If parameters + were saved in differnet files, set it to None. + Default: None + + Returns: + None + + Examples: + .. code-block:: python + + exe = fluid.Executor(fluid.CPUPlace()) + param_path = "./my_paddle_model" + prog = fluid.default_main_program() + fluid.io.load_params(executor=exe, dirname=param_path, + main_program=None) """ load_vars( executor, @@ -233,7 +453,37 @@ def load_params(executor, dirname, main_program=None, filename=None): def load_persistables(executor, dirname, main_program=None, filename=None): """ - load all persistables from directory by executor. + This function filters out all variables with `persistable==True` from the + give `main_program` and then trys to load these variables from the folder + `dirname` or the file `filename`. + + Use the `dirname` to specify the folder where persistable variables were + saved. If variables were saved in separate files, set `filename` None; + if all variables were saved in a single file, use `filename` to specify + the file name. + + Args: + executor(Executor): The executor to run for loading persistable variables. + dirname(str): The directory path. + main_program(Program|None): The program whose persistbale variables will + be loaded. If it is None, the default main + program will be used automatically. + Default: None + filename(str|None): The file which saved all variables. If variables were + saved in differnet files, set it to None. + Default: None + + Returns: + None + + Examples: + .. code-block:: python + + exe = fluid.Executor(fluid.CPUPlace()) + param_path = "./my_paddle_model" + prog = fluid.default_main_program() + fluid.io.load_persistables(executor=exe, dirname=param_path, + main_program=None) """ load_vars( executor, @@ -306,22 +556,48 @@ def save_inference_model(dirname, model_filename=None, params_filename=None): """ - Build a model especially for inference, - and save it to directory by the executor. + Prune the given `main_program` to build a new program especially for inference, + and then save it and all related parameters to given `dirname` by the `executor`. + + Args: + dirname(str): The directory path to save the inference model. + feeded_var_names(list[str]): Names of variables that need to be feeded data + during inference. + target_vars(list[Variable]): Variables from which we can get inference + results. + executor(Executor): The executor that saves the inference model. + main_program(Program|None): The original program, which will be pruned to + build the inference model. If is setted None, + the default main program will be used. + Default: None. + model_filename(str|None): The name of file to save the inference program + itself. If is setted None, a default filename + `__model__` will be used. + params_filename(str|None): The name of file to save all related parameters. + If it is setted None, parameters will be saved + in separate files . - :param dirname: directory path - :param feeded_var_names: Names of variables that need to be feeded data during inference - :param target_vars: Variables from which we can get inference results. - :param executor: executor that save inference model - :param main_program: original program, which will be pruned to build the inference model. - Default default_main_program(). - :param model_filename: The name of file to save inference program. - If not specified, default filename `__model__` will be used. - :param params_filename: The name of file to save parameters. - It is used for the case that all parameters are saved in a single binary file. - If not specified, parameters are considered saved in separate files. + Returns: + None + + Raises: + ValueError: If `feed_var_names` is not a list of basestring. + ValueError: If `target_vars` is not a list of Variable. + + Examples: + .. code-block:: python + + exe = fluid.Executor(fluid.CPUPlace()) + path = "./infer_model" + fluid.io.save_inference_model(dirname=path, feeded_var_names=['img'], + target_vars=[predict_var], executor=exe) + + # In this exsample, the function will prune the default main program + # to make it suitable for infering the `predict_var`. The pruned + # inference program is going to be saved in the "./infer_model/__model__" + # and parameters are going to be saved in separate files under folder + # "./infer_model". - :return: None """ if isinstance(feeded_var_names, basestring): feeded_var_names = [feeded_var_names] @@ -382,18 +658,49 @@ def load_inference_model(dirname, """ Load inference model from a directory - :param dirname: directory path - :param executor: executor that load inference model - :param model_filename: The name of file to load inference program. - If not specified, default filename `__model__` will be used. - :param params_filename: The name of file to load parameters. - It is used for the case that all parameters are saved in a single binary file. - If not specified, parameters are considered saved in separate files. + Args: + dirname(str): The directory path + executor(Executor): The executor to run for loading inference model. + model_filename(str|None): The name of file to load inference program. + If it is None, the default filename + '__model__' will be used. + Default: None + params_filename(str|None): The name of file to load all parameters. + It is only used for the case that all + parameters were saved in a single binary + file. If parameters were saved in separate + files, set it as 'None'. + + Returns: + tuple: The return of this function is a tuple with three elements: + (program, feed_target_names, fetch_targets). The `program` is a + Program, it's the program for inference. The `feed_target_names` is + a list of str, it contains Names of variables that need to feed + data in the inference program. The `fetch_targets` is a list of + Variable. It contains variables from which we can get inference + results. + + Raises: + ValueError: If `dirname` is not a existing directory. + + Examples: + .. code-block:: python + + exe = fluid.Executor(fluid.CPUPlace()) + path = "./infer_model" + [inference_program, feed_target_names, fetch_targets] = + fluid.io.load_inference_model(dirname=path, executor=exe) + results = exe.run(inference_program, + feed={feed_target_names[0]: tensor_img}, + fetch_list=fetch_targets) + + # In this exsample, the inference program was saved in the + # "./infer_model/__model__" and parameters were saved in + # separate files in ""./infer_model". + # After getting inference program, feed target names and + # fetch targets, we can use an Executor to run the inference + # program to get the inference result. - :return: [program, feed_target_names, fetch_targets] - program: program especially for inference. - feed_target_names: Names of variables that need to feed data - fetch_targets: Variables from which we can get inference results. """ if not os.path.isdir(dirname): raise ValueError("There is no directory named '%s'", dirname) @@ -424,12 +731,25 @@ def load_inference_model(dirname, def get_parameter_value(para, executor): """ - Get the LoDTensor for the parameter + Get the LoDTensor value of the given parameter. + + Args: + para(Parameter): The parameter to get value from. + executor(Executor): The executor to run for retrieving the value. + + Returns: + numpy.array: The given parameter's values. + + Raises: + AssertionError: If the `para` is not an instance of Parameter. - :param executor: executor for retrieving the value - :param para: the given parameter + Examples: + .. code-block:: python + + exe = fluid.Executor(fluid.CPUPlace()) + param = fluid.default_main_program().global_block().var('fc.w') + p = fluid.io.get_parameter_value(param, exe) - :return: the LoDTensor for the parameter """ assert is_parameter(para) @@ -441,14 +761,30 @@ def get_parameter_value(para, executor): def get_parameter_value_by_name(name, executor, program=None): """ - Get the LoDTensor for paramter with the given name + Get the LoDTensor value of a certain parameter by its name. + + Args: + name(str): The parameter's name. + executor(Executor): The executor to run for retrieving the value. + program(Program | None): The program where to find the parameter. + If it's set to be None, the function will + try to find the parameter in the default + main program. - :param executor: executor for retrieving the value - :param name: the name of the parameter - :param program: the program where the variable is found - Default default_main_program(). + Returns: + numpy.array: The parameter's values. - :return: the LoDTensor for the variable + Raises: + TypeError: If given `name` is not an instance of basestring. + TypeError: If the parameter with the given name doesn't exist. + AssertionError: If there is a varibale named `name` in the + given program but it is not a Parameter. + + Examples: + .. code-block:: python + + exe = fluid.Executor(fluid.CPUPlace()) + p = fluid.io.get_parameter_value('fc.w', exe) """ if program is None: program = default_main_program() @@ -470,16 +806,58 @@ def save_checkpoint(executor, main_program=None, max_num_checkpoints=3): """ - Save Checkpoint will save persistable LodTensor variables from main_program in checkpoint directory, - the directory named by serial number from 0 to (n -1), save_checkpoint use LRU strategy - to keep numbers of checkpoint directory, the numbers of checkpoint directory are max_num_checkpoints at most, - The interval between two saved checkpoints must greater than save_interval_secs. + This function filters out all checkpoint variables from the give + main_program and then saves these variables to the `checkpoint_dir` + directory. + + In the training precess, we generally save a checkpoint in each + iteration. So there might be a lot of checkpoints in the + `checkpoint_dir`. To avoid them taking too much disk space, the + `max_num_checkpoints` are introduced to limit the total number of + checkpoints. If the number of existing checkpints is greater than + the `max_num_checkpoints`, oldest ones will be scroll deleted. + + A variable is a checkpoint variable and will be saved if it meets + all following conditions: + 1. It's persistable. + 2. It's type is not FEED_MINIBATCH nor FETCH_LIST nor RAW. + 3. It's name contains no "@GRAD" nor ".trainer_" nor ".block". - :param executor executor for save the value - :param checkpoint_dir the checkpoint directory - :param trainer_id currect trainer id, if id is equal to 0, the trainer is chief - :param main_program will save all variables in program - :param max_num_checkpoints will keep numbers of checkpoint serials not bigger than max_num_checkpoints + Args: + executor(Executor): The executor to run for save checkpoint. + checkpoint_dir(str): The folder where to save checkpoints. + trainer_id(int): currect trainer id, if id is equal to 0, the trainer + is chief. + trainer_args(dict|None): Current training arguments. Such as 'epoch_id' + and 'step_id'. + Defaut: None + main_program(Program|None): The program whose checkpoint variables will + be saved. If it is None, the default main program will be used. + max_num_checkpoints(int): The max number of total number of existing + checkpoints. + Default: 3 + + Returns: + None + + Raises: + ValueError: If `checkpoint_dir` is None. + AssertionError: If `trainer_args` is not a dict. + + Examples: + .. code-block:: python + + exe = fluid.Executor(fluid.CPUPlace()) + path = "./checkpoints" + prog = fluid.default_main_program() + trainer_args = {"epoch_id": 200, + "step_id": 20} # just an example + fluid.io.save_checkpoint(executor=exe, + checkpoint_dir=path, + trainer_id=0, + trainer_args=trainer_args, + main_program=prog, + max_num_checkpoints=3) """ if checkpoint_dir is None: raise ValueError("'checkpoint_dir' should not be None") @@ -503,13 +881,50 @@ def save_checkpoint(executor, def load_checkpoint(executor, checkpoint_dir, serial, main_program): """ - Load checkpoint from a directory by executor, - it will find the most recent saved checkpoint file and load it auto. + This function filters out all checkpoint variables from the give + main_program and then try to load these variables from the + `checkpoint_dir` directory. + + In the training precess, we generally save a checkpoint in each + iteration. So there are more than one checkpoint in the + `checkpoint_dir` (each checkpoint has its own sub folder), use + `serial` to specify which serial of checkpoint you would like to + load. + + A variable is a checkpoint variable and will be loaded if it meets + all following conditions: + 1. It's persistable. + 2. It's type is not FEED_MINIBATCH nor FETCH_LIST nor RAW. + 3. It's name contains no "@GRAD" nor ".trainer_" nor ".block". + + Args: + executor(Executor): The executor to run for loading checkpoint. + checkpoint_dir(str): The folder where all checkpoints are. + serial(int): The serial of checkpoint you would like to load. + main_program(Program): The program whose checkpoint variables will + be loaded. - :param executor executor for load the value - :param checkpoint_dir the checkpoint directory - :param serial the serial folder in checkpoint directory will be load - :param main_program will load all variables in program + Returns: + None + + Raises: + ValueError: If `checkpoint_dir` is None. + ValueError: If `serial` is None or `serial` is less than 0. + ValueError: If `main_program` is None. + + Examples: + .. code-block:: python + + exe = fluid.Executor(fluid.CPUPlace()) + path = "./checkpoints" + prog = fluid.default_main_program() + fluid.io.load_checkpoint(executor=exe, checkpoint_dir=path, + serial=9, main_program=prog) + + # In this example, `load_checkpoint` function + # will first filters out all checkpoint variables in the default + # main program, and then try to load these variables form the + # folder "./checkpoints/checkpoint_9/__model__". """ if checkpoint_dir is None: @@ -528,10 +943,10 @@ def load_checkpoint(executor, checkpoint_dir, serial, main_program): def clean_checkpoint(checkpoint_dir, delete_dir=False): """ clean the checkpoint dir, when the train exits normally, the trainer will call clean_checkpoint to delete checkpoint directory saved before. - delete_dir only works when the directory is empty, otherwise, OSError is raised. + delete_dir only works when the directory is empty, otherwise, OSError is raised. - :param checkpoint_dir - :param delete_dir + : param checkpoint_dir + : param delete_dir """ if checkpoint_dir is None: @@ -547,13 +962,40 @@ def load_persist_vars_without_grad(executor, program, has_model_dir=False): """ - load_persist_vars_without_grad will load variables from a directory by an executor, - the variable named end with "@GRAD" will not be loaded. + This function filters out all checkpoint variables from the give + program and then trys to load these variables from the given directory. + + A variable is a checkpoint variable if it meets all following + conditions: + 1. It's persistable. + 2. It's type is not FEED_MINIBATCH nor FETCH_LIST nor RAW. + 3. It's name contains no "@GRAD" nor ".trainer_" nor ".block". - :param executor executor for load the value - :param dirname the checkpoint directory - :param program will load all variables in program - :param has_model_dir if has_model_dir is True, will load variables from sub directory named __model__ + Args: + executor(Executor): The executor to run for loading variables. + dirname(str): The directory path. + program(Program): The program whose checkpoint variables will + be loaded. + has_model_dir(bool): if True, the function loads variables + from a sub directory named '__model__'. + Default: False + + Returns: + None + + Examples: + .. code-block:: python + + exe = fluid.Executor(fluid.CPUPlace()) + param_path = "./my_paddle_model" + prog = fluid.default_main_program() + fluid.io.load_persist_vars_without_grad(executor=exe, + dirname=param_path, program=prog, has_model_dir=True) + + # In this example, `load_persist_vars_without_grad` function + # will first filters out all checkpoint variables in the default + # main program, and then trys to load these variables form the + # folder "./my_paddle_model/__model__". """ if has_model_dir: @@ -569,12 +1011,38 @@ def load_persist_vars_without_grad(executor, def save_persist_vars_without_grad(executor, dirname, program): """ - save_persist_vars_without_grad will save variables to a directory by an executor, - the variable named end with "@GRAD" will not be saved. + This function filters out all checkpoint variables from the give + program and then save these variables to a sub-folder '__model__' of + the given directory. + + A variable is a checkpoint variable if it meets all following + conditions: + 1. It's persistable. + 2. It's type is not FEED_MINIBATCH nor FETCH_LIST nor RAW. + 3. It's name contains no "@GRAD" nor ".trainer_" nor ".block". + + Args: + executor(Executor): The executor to run for saving variables. + dirname(str): The directory path. + program(Program): The program whose checkpoint variables will + be saved. + + Returns: + None + + Examples: + .. code-block:: python + + exe = fluid.Executor(fluid.CPUPlace()) + param_path = "./my_paddle_model" + prog = fluid.default_main_program() + fluid.io.save_persist_vars_without_grad(executor=exe, + dirname=param_path, program=prog) - :param executor executor for load the value - :param dirname the checkpoint directory - :param program will load all variables in program + # In this example, `save_persist_vars_without_grad` function + # will first filters out all checkpoint variables in the default + # main program, and then saves these variables to the folder + # "./my_paddle_model/__model__". """ cur_dir = _get_model_dir(dirname) save_vars( @@ -620,7 +1088,7 @@ def _is_checkpoint_var(var): the checkpoint will not save or load all the variables. var type is FEED_MINIBATCH/FETCH_LIST/RAW or var name ends with @GRAD are discarded. - :param var + : param var """ if var.desc.type() == core.VarDesc.VarType.FEED_MINIBATCH or \ var.desc.type() == core.VarDesc.VarType.FETCH_LIST or \ @@ -701,7 +1169,7 @@ def _write_success(dirname): """ write an empty file named "_SUCCESS" in checkpoint dir, indicate this checkpoint is correct. - :param dirname + : param dirname """ success_file = os.path.join(dirname, SUCCESS_MARK_FILENAME) with open(success_file, 'a') as f: @@ -713,7 +1181,7 @@ def get_latest_checkpoint_serial(checkpoint_dir): """ get the latest file in checkpoint directory, the _SUCCESS file must exist in the directory - :param checkpoint_dir + : param checkpoint_dir """ if not checkpoint_dir: return -1 diff --git a/python/paddle/fluid/layers/control_flow.py b/python/paddle/fluid/layers/control_flow.py index 581770feea98230ce6161bd11dc43f79cecd0048..849474dc58461ac3772f439da7bf5d57592daa8c 100644 --- a/python/paddle/fluid/layers/control_flow.py +++ b/python/paddle/fluid/layers/control_flow.py @@ -185,12 +185,14 @@ def Print(input, Returns: Variable: Output tensor, same data with input tensor. + Examples: + .. code-block:: python - value = some_layer(...) - Print(value, summarize=10, - message="The content of some_layer: ") + value = some_layer(...) + Print(value, summarize=10, + message="The content of some_layer: ") ''' helper = LayerHelper('print', **locals()) out = helper.create_tmp_variable(dtype=helper.input_dtype()) @@ -1201,6 +1203,31 @@ class ConditionalBlockGuard(BlockGuard): class ConditionalBlock(object): + ''' + **ConditionalBlock** + + ConditionalBlock is an operator that bind a block to a specific condition, + if the condition matches, the corresponding block will be executed. + + Args: + inputs (Variable): bool conditions. + is_scalar_condition (bool): whether the branch is controled by a scalar. + name(str): name of this ConditionalBlock. + + Examples: + .. code-block:: python + + cond = layers.less_than(x=label, y=limit) + true_image, false_image = layers.split_lod_tensor( + input=image, mask=cond) + true_cond = layers.ConditionalBlock([true_image]) + + with true_cond.block(): + ... + with false_cond.block(): + ... + ''' + def __init__(self, inputs, is_scalar_condition=False, name=None): for each_input in inputs: if not isinstance(each_input, Variable): diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index c84c79424e615bfe8631eb4e0fc6b70afbf8a162..2979ff3057a78ac3074cbb43b7a32966212073f6 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -2678,18 +2678,35 @@ def sequence_expand(x, y, ref_level=-1, name=None): def beam_search(pre_ids, ids, scores, beam_size, end_id, level=0): ''' + **beam search** + This function implements the beam search algorithm. + Beam search is a classical algorithm for selecting candidate words + in a machine translation task. + + Refer to `Beam search `_ + for more details. + Args: - pre_ids (Variable): ${pre_ids_comment} - ids (Variable): ${ids_comment} - scores (Variable): ${scores_comment} - beam_size (int): ${beam_size_comment} - end_id (int): ${end_id_comment} - level (int): ${level_comment} + pre_ids (Variable): ids in previous step. + ids (Variable): a LoDTensor of shape of [None,k] + scores (Variable): a LoDTensor that has the same shape and LoD with `ids` + beam_size (int): beam size for beam search + end_id (int): the token id which indicates the end of a sequence + level (int): the level of LoDTensor Returns: - tuple: a tuple of beam_search output variables: selected_ids, selected_scores + tuple: a tuple of beam_search output variables: `selected_ids`, `selected_scores` + + Examples: + .. code-block:: python + + # current_score is a Tensor of shape (num_batch_size, embed_size), which + # consists score of each candidate word. + topk_scores, topk_indices = pd.topk(current_score, k=50) + selected_ids, selected_scores = pd.beam_search( + pre_ids, topk_indices, topk_scores, beam_size, end_id=10, level=0) ''' helper = LayerHelper('beam_search', **locals()) score_type = scores.dtype diff --git a/python/paddle/fluid/lod_tensor.py b/python/paddle/fluid/lod_tensor.py index 61be39c25912604f842ef8a9a6ec5f0d1cf70257..c417ab393fca88d476d2f1fe83d12f99271d6883 100644 --- a/python/paddle/fluid/lod_tensor.py +++ b/python/paddle/fluid/lod_tensor.py @@ -19,33 +19,41 @@ __all__ = ['create_lod_tensor', 'create_random_int_lodtensor'] def create_lod_tensor(data, lod, place): - """Create a lod tensor from a numpy array, a list, or an existing lod tensor. + """ + Create a lod tensor from a numpy array, a list, or an existing lod tensor. Create a lod tensor by doing the following: + 1. Check that the length-based input lod is valid. + 2. Convert the length-based lod to a offset-based LoD. - 3. Copy the data from a numpy array, a list or a existing lod tensor to + + 3. Copy the data from a numpy array, a list or a existing lod tensor to CPU or GPU device (based on input place). + 4. Set the level of detail (LoD) using the offset-based LoD. - Use example: - Suppose we want LoDTensor to hold data for sequences of word, where each word is - represented by an integer. If we want to create a LoDTensor to represent two - sentences, one of 2 words, and one of 3 words. + Examples: - Then 'data' can be a numpy array of integers with shape (5, 1). - 'lod' will be [[2, 3]], indicating the length(# of words) in each sentence. - This length-based input lod [[2, 3]] will be converted to offset-based lod [[0, 2, 5]] - inside the function call. + Suppose we want LoDTensor to hold data for sequences of word, where each + word is represented by an integer. If we want to create a LoDTensor to + represent two sentences, one of 2 words, and one of 3 words. - Please refer to - github.com/PaddlePaddle/Paddle/blob/develop/doc/fluid/design/concepts/lod_tensor.md - for more details regarding LoD. + Then :code:`data` can be a numpy array of integers with shape (5, 1). + :code:`lod` will be [[2, 3]], indicating the length(# of words) in each + sentence. This length-based input lod [[2, 3]] will be converted to + offset-based lod [[0, 2, 5]] inside the function call. + + Please reference :ref:`api_guide_low_level_lod_tensor` for more details + regarding LoD. Args: - data: a numpy array or a LoDTensor or a list holding the data to be copied. - lod: a list of lists indicating the length-based LoD info specified by the user. - place: CPU or GPU place indicating where the data in the new LoDTensor will be stored. + data(numpy.ndarray|list|LoDTensor): a numpy array or a LoDTensor or a + list holding the data to be copied. + lod(list): a list of lists indicating the length-based LoD info + specified by the user. + place(Place): CPU or GPU place indicating where the data in the new + LoDTensor will be stored. Returns: A fluid LoDTensor object with tensor data and lod info. @@ -77,31 +85,38 @@ def create_lod_tensor(data, lod, place): def create_random_int_lodtensor(lod, base_shape, place, low, high): - """Create a LoDTensor containing random integers. + """ + Create a LoDTensor containing random integers. - This function is frequently used in the book examples. So we revised it based on - the new create_lod_tensor API and put it here in the lod_tensor module to simplify - the code. + This function is frequently used in the book examples. So we revised it + based on the new create_lod_tensor API and put it here in the lod_tensor + module to simplify the code. The function does the following: - 1. Calculate the overall shape of the LoDTensor based on the length-based 'lod' input - and the shape of the basic element in 'base_shape'. + + 1. Calculate the overall shape of the LoDTensor based on the length-based + :code:`lod` input and the shape of the basic element in + :code:`base_shape`. + 2. Create a numpy array of this shape. + 3. Create the LoDTensor using create_lod_tensor API. - Suppose we want LoDTensor to hold data for sequences of word, where each word is - represented by an integer. If we want to create a LoDTensor to represent two - sentences, one of 2 words, and one of 3 words. Then 'base_shape' is [1], input - length-based 'lod' is [[2, 3]]. Then the overall shape of the LoDTensor would be - [5, 1], holding 5 words for two sentences. + Suppose we want LoDTensor to hold data for sequences of word, where each + word is represented by an integer. If we want to create a LoDTensor to + represent two sentences, one of 2 words, and one of 3 words. Then + 'base_shape' is [1], input length-based 'lod' is [[2, 3]]. Then the overall + shape of the LoDTensor would be [5, 1], holding 5 words for two sentences. Args: - data: a numpy array or a LoDTensor holding the data to be copied. - lod: a list of lists indicating the length-based LoD info specified by the user. - base_shape: the shape of the basic element to be held by the LoDTensor. - place: CPU or GPU place indicating where the data in the new LoDTensor will be stored. - low: the lower bound of the random integers. - high: the upper bound of the random integers. + lod(list): a list of lists indicating the length-based LoD info + specified by the user. + base_shape(list): the shape of the basic element to be held by the + LoDTensor. + place(Place): CPU or GPU place indicating where the data in the new + LoDTensor will be stored. + low(int): the lower bound of the random integers. + high(int): the upper bound of the random integers. Returns: A fluid LoDTensor object with tensor data and lod info. diff --git a/python/paddle/fluid/metrics.py b/python/paddle/fluid/metrics.py index bb9c6fdc60089fc2b43573a6421a6f9781d2d4a8..572475b483ff0341a97a91b6c5309fcf337dacbe 100644 --- a/python/paddle/fluid/metrics.py +++ b/python/paddle/fluid/metrics.py @@ -325,14 +325,14 @@ class Auc(MetricBase): """ def __init__(self, name, curve='ROC', num_thresholds=200): - super(MetricBase, self).__init__(name, curve, num_thresholds) + super(Auc, self).__init__(name=name) self._curve = curve self._num_thresholds = num_thresholds self._epsilon = 1e-6 - self.tp_list = np.ndarray((num_thresholds, )) - self.fn_list = np.ndarray((num_thresholds, )) - self.tn_list = np.ndarray((num_thresholds, )) - self.fp_list = np.ndarray((num_thresholds, )) + self.tp_list = np.zeros((num_thresholds, )) + self.fn_list = np.zeros((num_thresholds, )) + self.tn_list = np.zeros((num_thresholds, )) + self.fp_list = np.zeros((num_thresholds, )) def update(self, labels, predictions, axis=1): if not _is_numpy_(labels): @@ -350,12 +350,12 @@ class Auc(MetricBase): tp, fn, tn, fp = 0, 0, 0, 0 for i, lbl in enumerate(labels): if lbl: - if predictions[i, 0] >= thresh: + if predictions[i, 1] >= thresh: tp += 1 else: fn += 1 else: - if predictions[i, 0] >= thresh: + if predictions[i, 1] >= thresh: fp += 1 else: tn += 1 diff --git a/python/paddle/fluid/nets.py b/python/paddle/fluid/nets.py index bbedf6fde0872fd32d81c103bf5fe61449b7f57b..9b3f2aebee73e56ee820dc8ff4c9cfabd1456aaa 100644 --- a/python/paddle/fluid/nets.py +++ b/python/paddle/fluid/nets.py @@ -26,16 +26,87 @@ def simple_img_conv_pool(input, filter_size, pool_size, pool_stride, - act, - param_attr=None, + pool_padding=0, pool_type='max', + global_pooling=False, + conv_stride=1, + conv_padding=0, + conv_dilation=1, + conv_groups=1, + param_attr=None, + bias_attr=None, + act=None, use_cudnn=True, use_mkldnn=False): + """ + The simple_img_conv_pool is composed with one Convolution2d and one Pool2d. + + Args: + input (Variable): The input image with [N, C, H, W] format. + num_filters(int): The number of filter. It is as same as the output + feature channel. + filter_size (int|list|tuple): The filter size. If filter_size is a list or + tuple, it must contain two integers, (filter_size_H, filter_size_W). Otherwise, + the filter_size_H = filter_size_W = filter_size. + pool_size (int|list|tuple): The pooling size of Pool2d layer. If pool_size + is a list or tuple, it must contain two integers, (pool_size_H, pool_size_W). + Otherwise, the pool_size_H = pool_size_W = pool_size. + pool_stride (int|list|tuple): The pooling stride of Pool2d layer. If pool_stride + is a list or tuple, it must contain two integers, (pooling_stride_H, pooling_stride_W). + Otherwise, the pooling_stride_H = pooling_stride_W = pool_stride. + pool_padding (int|list|tuple): The padding of Pool2d layer. If pool_padding is a list or + tuple, it must contain two integers, (pool_padding_H, pool_padding_W). + Otherwise, the pool_padding_H = pool_padding_W = pool_padding. Default 0. + pool_type (str): Pooling type can be :math:`max` for max-pooling and :math:`avg` for + average-pooling. Default :math:`max`. + global_pooling (bool): Whether to use the global pooling. If global_pooling = true, + pool_size and pool_padding while be ignored. Default False + conv_stride (int|list|tuple): The stride size of the Conv2d Layer. If stride is a + list or tuple, it must contain two integers, (conv_stride_H, conv_stride_W). Otherwise, + the conv_stride_H = conv_stride_W = conv_stride. Default: conv_stride = 1. + conv_padding (int|list|tuple): The padding size of the Conv2d Layer. If padding is + a list or tuple, it must contain two integers, (conv_padding_H, conv_padding_W). + Otherwise, the conv_padding_H = conv_padding_W = conv_padding. Default: conv_padding = 0. + conv_dilation (int|list|tuple): The dilation size of the Conv2d Layer. If dilation is + a list or tuple, it must contain two integers, (conv_dilation_H, conv_dilation_W). + Otherwise, the conv_dilation_H = conv_dilation_W = conv_dilation. Default: conv_dilation = 1. + conv_groups (int): The groups number of the Conv2d Layer. According to grouped + convolution in Alex Krizhevsky's Deep CNN paper: when group=2, + the first half of the filters is only connected to the first half + of the input channels, while the second half of the filters is only + connected to the second half of the input channels. Default: groups=1 + param_attr (ParamAttr): The parameters to the Conv2d Layer. Default: None + bias_attr (ParamAttr): Bias parameter for the Conv2d layer. Default: None + act (str): Activation type for Conv2d. Default: None + use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn + library is installed. Default: True + use_mkldnn (bool): Use mkldnn kernels or not, it is valid only when compiled + with mkldnn library. Default: False + + Return: + Variable: The result of input after Convolution2d and Pool2d. + + Examples: + .. code-block:: python + + img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32') + conv_pool = fluid.nets.simple_img_conv_pool(input=img, + filter_size=5, + num_filters=20, + pool_size=2, + pool_stride=2, + act="relu") + """ conv_out = layers.conv2d( input=input, num_filters=num_filters, filter_size=filter_size, + stride=conv_stride, + padding=conv_padding, + dilation=conv_dilation, + groups=conv_groups, param_attr=param_attr, + bias_attr=bias_attr, act=act, use_cudnn=use_cudnn, use_mkldnn=use_mkldnn) @@ -45,6 +116,8 @@ def simple_img_conv_pool(input, pool_size=pool_size, pool_type=pool_type, pool_stride=pool_stride, + pool_padding=pool_padding, + global_pooling=global_pooling, use_cudnn=use_cudnn, use_mkldnn=use_mkldnn) return pool_out @@ -60,11 +133,65 @@ def img_conv_group(input, conv_with_batchnorm=False, conv_batchnorm_drop_rate=0.0, pool_stride=1, - pool_type=None, + pool_type="max", use_cudnn=True, use_mkldnn=False): """ - Image Convolution Group, Used for vgg net. + The Image Convolution Group is composed of Convolution2d, BatchNorm, DropOut, + and Pool2d. According to the input arguments, img_conv_group will do serials of + computation for Input using Convolution2d, BatchNorm, DropOut, and pass the last + result to Pool2d. + + Args: + input (Variable): The input image with [N, C, H, W] format. + conv_num_filter(list|tuple): Indicates the numbers of filter of this group. + pool_size (int|list|tuple): The pooling size of Pool2d Layer. If pool_size + is a list or tuple, it must contain two integers, (pool_size_H, pool_size_W). + Otherwise, the pool_size_H = pool_size_W = pool_size. + conv_padding (int|list|tuple): The padding size of the Conv2d Layer. If padding is + a list or tuple, its length must be equal to the length of conv_num_filter. + Otherwise the conv_padding of all Conv2d Layers are the same. Default 1. + conv_filter_size (int|list|tuple): The filter size. If filter_size is a list or + tuple, its length must be equal to the length of conv_num_filter. + Otherwise the conv_filter_size of all Conv2d Layers are the same. Default 3. + conv_act (str): Activation type for Conv2d Layer that is not followed by BatchNorm. + Default: None. + param_attr (ParamAttr): The parameters to the Conv2d Layer. Default: None + conv_with_batchnorm (bool|list): Indicates whether to use BatchNorm after Conv2d Layer. + If conv_with_batchnorm is a list, its length must be equal to the length of + conv_num_filter. Otherwise, conv_with_batchnorm indicates whether all the + Conv2d Layer follows a BatchNorm. Default False. + conv_batchnorm_drop_rate (float|list): Indicates the drop_rate of Dropout Layer + after BatchNorm. If conv_batchnorm_drop_rate is a list, its length must be + equal to the length of conv_num_filter. Otherwise, drop_rate of all Dropout + Layers is conv_batchnorm_drop_rate. Default 0.0. + pool_stride (int|list|tuple): The pooling stride of Pool2d layer. If pool_stride + is a list or tuple, it must contain two integers, (pooling_stride_H, + pooling_stride_W). Otherwise, the pooling_stride_H = pooling_stride_W = pool_stride. + Default 1. + pool_type (str): Pooling type can be :math:`max` for max-pooling and :math:`avg` for + average-pooling. Default :math:`max`. + use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn + library is installed. Default: True + use_mkldnn (bool): Use mkldnn kernels or not, it is valid only when compiled + with mkldnn library. Default: False + + Return: + Variable: The final result after serial computation using Convolution2d, + BatchNorm, DropOut, and Pool2d. + + Examples: + .. code-block:: python + + img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32') + conv_pool = fluid.nets.img_conv_group(input=img, + num_channels=3, + conv_padding=1, + conv_num_filter=[3, 3], + conv_filter_size=3, + conv_act="relu", + pool_size=2, + pool_stride=2) """ tmp = input assert isinstance(conv_num_filter, list) or \ @@ -74,6 +201,7 @@ def img_conv_group(input, if not hasattr(obj, '__len__'): return [obj] * len(conv_num_filter) else: + assert len(obj) == len(conv_num_filter) return obj conv_padding = __extend_list__(conv_padding) @@ -119,6 +247,39 @@ def sequence_conv_pool(input, param_attr=None, act="sigmoid", pool_type="max"): + """ + The sequence_conv_pool is composed with Sequence Convolution and Pooling. + + Args: + input (Variable): The input of sequence_conv, which supports variable-time + length input sequence. The underlying of input is a matrix with shape + (T, N), where T is the total time steps in this mini-batch and N is + the input_hidden_size + num_filters(int): The number of filter. + filter_size (int): The filter size. + param_attr (ParamAttr): The parameters to the Sequence_conv Layer. Default: None. + act (str): Activation type for Sequence_conv Layer. Default: "sigmoid". + pool_type (str): Pooling type can be :math:`max` for max-pooling, :math:`average` for + average-pooling, :math:`sum` for sum-pooling, :math:`sqrt` for sqrt-pooling. + Default :math:`max`. + + Return: + Variable: The final result after Sequence Convolution and Pooling. + + Examples: + .. code-block:: python + + input_dim = len(word_dict) + emb_dim = 128 + hid_dim = 512 + data = fluid.layers.data( ame="words", shape=[1], dtype="int64", lod_level=1) + emb = fluid.layers.embedding(input=data, size=[input_dim, emb_dim], is_sparse=True) + seq_conv = fluid.nets.sequence_conv_pool(input=emb, + num_filters=hid_dim, + filter_size=3, + act="tanh", + pool_type="sqrt") + """ conv_out = layers.sequence_conv( input=input, num_filters=num_filters, @@ -132,9 +293,9 @@ def sequence_conv_pool(input, def glu(input, dim=-1): """ - The gated linear unit composed by split, sigmoid activation and elementwise - multiplication. Specifically, Split the input into two equal sized parts - :math:`a` and :math:`b` along the given dimension and then compute as + The Gated Linear Units(GLU) composed by split, sigmoid activation and element-wise + multiplication. Specifically, Split the input into two equal sized parts, + :math:`a` and :math:`b`, along the given dimension and then compute as following: .. math:: @@ -147,16 +308,16 @@ def glu(input, dim=-1): Args: input (Variable): The input variable which is a Tensor or LoDTensor. dim (int): The dimension along which to split. If :math:`dim < 0`, the - dimension to split along is :math:`rank(input) + dim`. + dimension to split along is :math:`rank(input) + dim`. Default -1. Returns: - Variable: The Tensor variable with half the size of input. + Variable: Variable with half the size of input. Examples: .. code-block:: python - # x is a Tensor variable with shape [3, 6, 9] - fluid.nets.glu(input=x, dim=1) # shape of output: [3, 3, 9] + data = fluid.layers.data(name="words", shape=[3, 6, 9], dtype="float32") + output = fluid.nets.glu(input=data, dim=1) # shape of output: [3, 3, 9] """ a, b = layers.split(input, num_or_sections=2, dim=dim) @@ -189,40 +350,48 @@ def scaled_dot_product_attention(queries, `_. Args: - queries (Variable): The input variable which should be a 3-D Tensor. keys (Variable): The input variable which should be a 3-D Tensor. values (Variable): The input variable which should be a 3-D Tensor. num_heads (int): Head number to compute the scaled dot product - attention. Default value is 1. + attention. Default: 1. dropout_rate (float): The dropout rate to drop the attention weight. - Default value is 0. + Default: 0.0. Returns: - - Variable: A 3-D Tensor computed by multi-head scaled dot product \ - attention. + Variable: A 3-D Tensor computed by multi-head scaled dot product\ + attention. Raises: - ValueError: If input queries, keys, values are not 3-D Tensors. - NOTE: + NOTES: 1. When num_heads > 1, three linear projections are learned respectively - to map input queries, keys and values into queries', keys' and values'. - queries', keys' and values' have the same shapes with queries, keys - and values. - - 1. When num_heads == 1, scaled_dot_product_attention has no learnable - parameters. + to map input queries, keys and values into queries', keys' and values'. + queries', keys' and values' have the same shapes with queries, keys + and values. + 2. When num_heads == 1, scaled_dot_product_attention has no learnable + parameters. Examples: .. code-block:: python - # Suppose q, k, v are Tensors with the following shape: - # q: [3, 5, 9], k: [3, 6, 9], v: [3, 6, 10] - - contexts = fluid.nets.scaled_dot_product_attention(q, k, v) + queries = fluid.layers.data(name="queries", + shape=[3, 5, 9], + dtype="float32", + append_batch_size=False) + queries.stop_gradient = False + keys = fluid.layers.data(name="keys", + shape=[3, 6, 9], + dtype="float32", + append_batch_size=False) + keys.stop_gradient = False + values = fluid.layers.data(name="values", + shape=[3, 6, 10], + dtype="float32", + append_batch_size=False) + values.stop_gradient = False + contexts = fluid.nets.scaled_dot_product_attention(queries, keys, values) contexts.shape # [3, 5, 10] """ if not (len(queries.shape) == len(keys.shape) == len(values.shape) == 3): diff --git a/python/paddle/fluid/parallel_executor.py b/python/paddle/fluid/parallel_executor.py index 0fdc9a035292b3390cece6c5821a60b1b281e54d..25cc1355d5a53e44b7f45c1f7d80673abcf567ec 100644 --- a/python/paddle/fluid/parallel_executor.py +++ b/python/paddle/fluid/parallel_executor.py @@ -27,6 +27,40 @@ BuildStrategy = core.ParallelExecutor.BuildStrategy class ParallelExecutor(object): + """ + ParallelExecutor can run program in parallel. + + Args: + use_cuda (bool): Whether to use CUDA or not. + loss_name (str): The loss name must set in training. Default None. + main_program (Program): The program that need to run, if not provided, + then default_main_program will be used. Default None. + share_vars_from(ParallelExecutor): If provied, it will share variables + from the specified ParallelExecutor. Default None. + num_trainers(int): If greater than 1, NCCL will be initialized with + multiple rank of nodes, each node should have same number of GPUs. + Distributed training will be enabled then. Default 1. + trainer_id(int: Must use together with num_trainers. trainer_id is the + "rank" of current node starts from 0. Default 0. + + Returns: + ParallelExecutor: The initialized ParallelExecutor object. + + Raises: + TypeError: If share_vars_from is provided, but not ParallelExecutor object. + + Examples: + .. code-block:: python + + train_exe = fluid.ParallelExecutor(use_cuda=True, loss_name=loss.name) + test_exe = fluid.ParallelExecutor(use_cuda=True, + main_program=test_program, + share_vars_from=train_exe) + + train_loss, = train_exe.run([loss.name], feed=feed_dict) + test_loss, = test_exe.run([loss.name], feed=feed_dict) + """ + def __init__(self, use_cuda, loss_name=None, @@ -37,42 +71,6 @@ class ParallelExecutor(object): num_trainers=1, trainer_id=0, **kwargs): - """ - ParallelExecutor can run program in parallel. - - Args: - use_cuda(bool): Whether to use CUDA or not. - loss_name(str, default None): The loss name must set in training. - main_program(Program, default None): The program that need to run, - if not provided, then default_main_program will be used. - share_vars_from(ParallelExecutor, default None): If provied, - it will share variables from the specified ParallelExecutor. - num_trainers(int, default 1): If greater than 1, NCCL will be - initialized with multpile rank of nodes, each node should have - same number of GPUs. Distributed training will be enabled then. - trainer_id(int, default 0): Must use together with num_trainers. - trainer_id is the "rank" of current node starts from 0. - - Returns: - A ParallelExecutor object. - - Raises: - TypeError: If share_vars_from is provided, but not ParallelExecutor - object. - - Examples: - .. code-block:: python - - train_exe = fluid.ParallelExecutor( - use_cuda=True, loss_name=loss.name) - test_exe = fluid.ParallelExecutor( - use_cuda=True, - main_program=test_program, - share_vars_from=train_exe) - - train_loss, = train_exe.run([loss.name], feed=feed_dict) - test_loss, = test_exe.run([loss.name], feed=feed_dict) - """ if len(kwargs) != 0: err_msg = "" for key in kwargs: @@ -131,10 +129,16 @@ class ParallelExecutor(object): main = main_program main = main if main else framework.default_main_program() scope = executor.global_scope() + # FIXME(Yancey1989): it's a temporary approach to determinate the distribute + # train program, call self.bcast_param() at the end of each mini-batch. + self.is_dist = True if "recv" in [ + op.type for op in main.global_block().ops + ] else False if share_vars_from and not isinstance(share_vars_from, ParallelExecutor): raise TypeError("share_vars_from must be ParallelExecutor.") + local_scopes = share_vars_from.executor.local_scopes( ) if share_vars_from else [] @@ -166,12 +170,14 @@ class ParallelExecutor(object): element in the list will be copied to each device directly. For example, if the feed is a dict: + >>> exe = ParallelExecutor() >>> # the image will be splitted into devices. If there is two devices >>> # each device will process an image with shape (24, 1, 28, 28) >>> exe.run(feed={'image': numpy.random.random(size=(48, 1, 28, 28))}) For example, if the feed is a list: + >>> exe = ParallelExecutor() >>> # each device will process each element in the list. >>> # the 1st device will process an image with shape (48, 1, 28, 28) @@ -182,18 +188,40 @@ class ParallelExecutor(object): >>> {"image": numpy.random.random(size=(32, 1, 28, 28))}, >>> ]) - Args: fetch_list(list): The fetched variable names feed(list|dict|None): The feed variables. If the feed is a dict, tensors in that dict will be splitted into each devices. If the feed is a list, each element of the list will be copied - to each device. + to each device. Default None. feed_dict: Alias for feed parameter, for backward compatibility. - This parameter is deprecated. + This parameter has been deprecated. Default None. + + Returns: + List: The fetched result list. + + Raises: + ValueError: If the feed is a list, but its length is not equal the + length of active places, or its element's is not dict. + + NOTES: + 1. If the feed's type is dict, the number of data that feeds to + ParallelExecutor must be bigger than active places. Otherwise, + it will throw exception from C++ side. Special attention should be + paid to check whether the last batch of the dataset is bigger + than active places. + 2. If active places are more than one, the fetch results for each + variable is a list, and each element of this list is the variable of + respective active place. - Returns: fetched result list. + Examples: + .. code-block:: python + pe = fluid.ParallelExecutor(use_cuda=use_cuda, + loss_name=avg_cost.name, + main_program=fluid.default_main_program()) + loss = pe.run(feed=feeder.feed(cur_batch), + fetch_list=[avg_cost.name])) """ if feed is None and feed_dict is not None: feed = feed_dict @@ -238,9 +266,17 @@ class ParallelExecutor(object): fetch_var_name = '@FETCHED_VAR_NAME@' self.executor.run(fetch_list, fetch_var_name) arr = self.scope.find_var(fetch_var_name).get_lod_tensor_array() + + if self.is_dist: + self.bcast_params() + return [arr[i] for i in range(len(arr))] def bcast_params(self): + """ + Broadcast the parameters to other devices. It is used during + distributed training. + """ self.executor.bcast_params(set(self.persistable_vars)) @property diff --git a/python/paddle/fluid/param_attr.py b/python/paddle/fluid/param_attr.py index 1c6970441bccdc1c1221503256c30c83502bd123..0a42b9fca8dba7a11b414990be6c04c93158864f 100644 --- a/python/paddle/fluid/param_attr.py +++ b/python/paddle/fluid/param_attr.py @@ -22,6 +22,35 @@ __all__ = [ class ParamAttr(object): + """ + Parameter attributes object. To fine-tuning network training process, user + can set parameter's attributes to control training details. Such as learning rate, + regularization, trainable, do_model_average and the method to initialize param. + + + Args: + name(str): The parameter's name. Default None. + initializer(Initializer): The method to initial this parameter. Default None. + learning_rate(float): The parameter's learning rate. The learning rate when + optimize is :math:`global\_lr * parameter\_lr * scheduler\_factor`. + Default 1.0. + regularizer(WeightDecayRegularizer): Regularization factor. Default None. + trainable(bool): Whether this parameter is trainable. Default True. + gradient_clip(BaseGradientClipAttr): The method to clip this parameter's + gradient. Default None. + do_model_average(bool): Whether this parameter should do model average. + Default False. + + Examples: + .. code-block:: python + + w_param_attrs = fluid.ParamAttr(name="fc_weight", + learning_rate=0.5, + regularizer=fluid.L2Decay(1.0), + trainable=True) + y_predict = fluid.layers.fc(input=x, size=10, param_attr=w_param_attrs) + """ + def __init__(self, name=None, initializer=None, @@ -29,7 +58,7 @@ class ParamAttr(object): regularizer=None, trainable=True, gradient_clip=None, - do_model_average=None): + do_model_average=False): self.name = name self.initializer = initializer self.learning_rate = learning_rate @@ -39,6 +68,16 @@ class ParamAttr(object): self.model_average = do_model_average def set_default_initializer(self, initializer): + """ + Set the default initializer, the initializer should be Constant, + Uniform, Normal, Xavier, MSRA. + + Args: + initializer(Initializer): the initializer to set. + + Returns: + None + """ if initializer is None: if self.initializer is None: raise ValueError("ParamAttr.initializer is not set") @@ -50,13 +89,45 @@ class ParamAttr(object): self.initializer = initializer def set_default_param_initializer(self): + """ + Set the default initializer for the parameter with Xavier. + + Args: + None. + + Returns: + None. + """ self.set_default_initializer(Xavier()) def set_default_bias_initializer(self): + """ + Set the default initializer for the bias with Constant(0.0). + + Args: + None. + + Returns: + None. + """ self.set_default_initializer(Constant(0.0)) @staticmethod def to_attr(arg): + """ + Create ParamAttr[s]. + + Args: + arg: Arguments to initialize ParamAttr[s]. arg's type can be + str, Initializer, float, WeightDecayRegularizer, BaseGradientClipAttr, + bool, ParamAttr, or a list of above type. + + Returns: + ParamAttr[s]: ParamAttr[s] initialized with arg. + + Raises: + arg can not initialize a ParamAttr. + """ if arg is None: return ParamAttr() elif isinstance(arg, list) or isinstance(arg, tuple): @@ -75,6 +146,15 @@ class ParamAttr(object): raise TypeError("{0} cast to ParamAttr".format(type(arg))) def to_kwargs(self, with_initializer=False): + """ + Returns the attributes of this parameter. + + Args: + with_initializer(bool): Whether to add initializer attr. + + Returns: + Parameter attributes(map): The attributes of this parameter. + """ kwargs = { 'name': self.name, 'optimize_attr': { @@ -92,9 +172,27 @@ class ParamAttr(object): class WeightNormParamAttr(ParamAttr): """ - Used for weight normalization. Any field in ParamAttr can also be set here. - Besides, an extra field dim can be set to indicate the dimension except - which to normalize. + Used for weight Norm. Weight Norm is a reparameterization of the weight vectors + in a neural network that decouples the length of those weight vectors from + their direction. Weight Norm has been implemented as discussed in this + paper: `Weight Normalization: A Simple Reparameterization to Accelerate + Training of Deep Neural Networks + `_. + + Args: + dim(list): The parameter's name. Default None. + kwargs: Any field in ParamAttr. Default None. + + Examples: + .. code-block:: python + + data = fluid.layers.data(name="data", shape=[3, 32, 32], dtype="float32") + fc = fluid.layers.fc(input=data, + size=1000, + param_attr=WeightNormParamAttr( + dim=None, + name='weight_norm_param')) + """ # List to record the parameters reparameterized by weight normalization. # If these parameters are treated as Variable rather than Parameter, diff --git a/python/paddle/fluid/recordio_writer.py b/python/paddle/fluid/recordio_writer.py index 8d48e9abef0fb9861284c6302b30efb0e3994989..bd57772713057f12b876942de58ee43527e94834 100644 --- a/python/paddle/fluid/recordio_writer.py +++ b/python/paddle/fluid/recordio_writer.py @@ -36,6 +36,45 @@ def convert_reader_to_recordio_file( compressor=core.RecordIOWriter.Compressor.Snappy, max_num_records=1000, feed_order=None): + """ + Convert a Python Reader to a recordio file. + + Please see :ref:`api_guide_python_reader` and :ref:`api_guide_reader_op` for + details. + + Examples: + + >>> import paddle.fluid as fluid + >>> import paddle.dataset.mnist as mnist + >>> import paddle + >>> + >>> tmp_program = fluid.Program() + >>> with fluid.program_guard(tmp_program): + >>> img = fluid.layers.data(name='img', shape=[784]) + >>> label = fluid.layers.data(name='label', shape=[1], dtype='int64') + >>> feeder = fluid.DataFeeder(feed_list=[img, label], place=fluid.CPUPlace()) + >>> # mnist.recordio will be generated in current directory + >>> fluid.recordio_writer.convert_reader_to_recordio_file( + >>> filename="mnist.recordio", + >>> reader_creator=paddle.batch(mnist.train(), batch_size=32), + >>> feeder=feeder) + + Args: + filename(str): The recordio filename. + reader_creator(callable): The Python Reader Creator. See + :ref:`api_guide_python_reader`. + feeder(DataFeeder): The DataFeeder instance. Used to convert + :code:`reader_creator` to :code: `lod_tensor` + compressor: Must in fluid.core.RecordIOWriter.Compressor.Snappy or + fluid.core.RecordIOWriter.Compressor.NoCompress. Use :code:`Snappy` + by default. + max_num_records(int): Maximum number of records in one chuck. Each record + is each return value from reader function + feed_order(list): The order of variable names that the reader returns + + Returns: + int: the number of record that saved. + """ if feed_order is None: feed_order = feeder.feed_names counter = 0 @@ -58,6 +97,17 @@ def convert_reader_to_recordio_files( compressor=core.RecordIOWriter.Compressor.Snappy, max_num_records=1000, feed_order=None): + """ + convert a python reader to many recordio files. + + This API is basically same as :code:`convert_reader_to_recordio_file`, + instead of it will create many recordio files. Each file contains at + most :code:`batch_per_file` records. + + Please reference + :ref:`api_fluid_recordio_writer_convert_reader_to_recordio_file` for more + details. + """ if feed_order is None: feed_order = feeder.feed_names f_name, f_ext = os.path.splitext(filename) diff --git a/python/paddle/fluid/tests/book/notest_understand_sentiment.py b/python/paddle/fluid/tests/book/notest_understand_sentiment.py index c6687e8ad7fcc45c82d6dcb2256e9055a81cc61c..5d9a47c9ba3db07f240b42732536f1ea37627a11 100644 --- a/python/paddle/fluid/tests/book/notest_understand_sentiment.py +++ b/python/paddle/fluid/tests/book/notest_understand_sentiment.py @@ -194,16 +194,16 @@ def train(word_dict, if is_local: train_loop(fluid.default_main_program()) else: - port = os.getenv("PADDLE_INIT_PORT", "6174") - pserver_ips = os.getenv("PADDLE_INIT_PSERVERS") # ip,ip... + port = os.getenv("PADDLE_PSERVER_PORT", "6174") + pserver_ips = os.getenv("PADDLE_PSERVER_IPS") # ip,ip... eplist = [] for ip in pserver_ips.split(","): eplist.append(':'.join([ip, port])) pserver_endpoints = ",".join(eplist) # ip:port,ip:port... - trainers = int(os.getenv("TRAINERS")) + trainers = int(os.getenv("PADDLE_TRAINERS")) current_endpoint = os.getenv("POD_IP") + ":" + port - trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID")) - training_role = os.getenv("TRAINING_ROLE", "TRAINER") + trainer_id = int(os.getenv("PADDLE_TRAINER_ID")) + training_role = os.getenv("PADDLE_TRAINING_ROLE", "TRAINER") t = fluid.DistributeTranspiler() t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers) if training_role == "PSERVER": diff --git a/python/paddle/fluid/tests/book/test_fit_a_line.py b/python/paddle/fluid/tests/book/test_fit_a_line.py index b1a6b524d33cae97c8982ffb8f780b1b07761a09..74f96f456a8dc917b715d0f4308bb5ea41947f0b 100644 --- a/python/paddle/fluid/tests/book/test_fit_a_line.py +++ b/python/paddle/fluid/tests/book/test_fit_a_line.py @@ -69,16 +69,16 @@ def train(use_cuda, save_dirname, is_local): if is_local: train_loop(fluid.default_main_program()) else: - port = os.getenv("PADDLE_INIT_PORT", "6174") - pserver_ips = os.getenv("PADDLE_INIT_PSERVERS") # ip,ip... + port = os.getenv("PADDLE_PSERVER_PORT", "6174") + pserver_ips = os.getenv("PADDLE_PSERVER_IPS") # ip,ip... eplist = [] for ip in pserver_ips.split(","): eplist.append(':'.join([ip, port])) pserver_endpoints = ",".join(eplist) # ip:port,ip:port... - trainers = int(os.getenv("TRAINERS")) + trainers = int(os.getenv("PADDLE_TRAINERS")) current_endpoint = os.getenv("POD_IP") + ":" + port - trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID")) - training_role = os.getenv("TRAINING_ROLE", "TRAINER") + trainer_id = int(os.getenv("PADDLE_TRAINER_ID")) + training_role = os.getenv("PADDLE_TRAINING_ROLE", "TRAINER") t = fluid.DistributeTranspiler() t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers) if training_role == "PSERVER": diff --git a/python/paddle/fluid/tests/book/test_image_classification.py b/python/paddle/fluid/tests/book/test_image_classification.py index 0f3a4c9242a81a3c1fb90268245715a8e59a207a..a2fb186b86c9706ac1aff0de49defbfb06e2eb0f 100644 --- a/python/paddle/fluid/tests/book/test_image_classification.py +++ b/python/paddle/fluid/tests/book/test_image_classification.py @@ -178,16 +178,16 @@ def train(net_type, use_cuda, save_dirname, is_local): if is_local: train_loop(fluid.default_main_program()) else: - port = os.getenv("PADDLE_INIT_PORT", "6174") - pserver_ips = os.getenv("PADDLE_INIT_PSERVERS") # ip,ip... + port = os.getenv("PADDLE_PSERVER_PORT", "6174") + pserver_ips = os.getenv("PADDLE_PSERVER_IPS") # ip,ip... eplist = [] for ip in pserver_ips.split(","): eplist.append(':'.join([ip, port])) pserver_endpoints = ",".join(eplist) # ip:port,ip:port... - trainers = int(os.getenv("TRAINERS")) + trainers = int(os.getenv("PADDLE_TRAINERS")) current_endpoint = os.getenv("POD_IP") + ":" + port - trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID")) - training_role = os.getenv("TRAINING_ROLE", "TRAINER") + trainer_id = int(os.getenv("PADDLE_TRAINER_ID")) + training_role = os.getenv("PADDLE_TRAINING_ROLE", "TRAINER") t = fluid.DistributeTranspiler() t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers) if training_role == "PSERVER": diff --git a/python/paddle/fluid/tests/book/test_label_semantic_roles.py b/python/paddle/fluid/tests/book/test_label_semantic_roles.py index 99d51ae0076178aca50e36c2c187257a8ba1cbf2..e214ced0b5593c60ebd4a69edff1e961bcb4a72a 100644 --- a/python/paddle/fluid/tests/book/test_label_semantic_roles.py +++ b/python/paddle/fluid/tests/book/test_label_semantic_roles.py @@ -209,16 +209,16 @@ def train(use_cuda, save_dirname=None, is_local=True): if is_local: train_loop(fluid.default_main_program()) else: - port = os.getenv("PADDLE_INIT_PORT", "6174") - pserver_ips = os.getenv("PADDLE_INIT_PSERVERS") # ip,ip... + port = os.getenv("PADDLE_PSERVER_PORT", "6174") + pserver_ips = os.getenv("PADDLE_PSERVER_IPS") # ip,ip... eplist = [] for ip in pserver_ips.split(","): eplist.append(':'.join([ip, port])) pserver_endpoints = ",".join(eplist) # ip:port,ip:port... - trainers = int(os.getenv("TRAINERS")) + trainers = int(os.getenv("PADDLE_TRAINERS")) current_endpoint = os.getenv("POD_IP") + ":" + port - trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID")) - training_role = os.getenv("TRAINING_ROLE", "TRAINER") + trainer_id = int(os.getenv("PADDLE_TRAINER_ID")) + training_role = os.getenv("PADDLE_TRAINING_ROLE", "TRAINER") t = fluid.DistributeTranspiler() t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers) if training_role == "PSERVER": diff --git a/python/paddle/fluid/tests/book/test_machine_translation.py b/python/paddle/fluid/tests/book/test_machine_translation.py index 23e5900f127a7a3253c551f8f7fbceba08382209..372d6ec8223f69b69663137a646ba591108c40b7 100644 --- a/python/paddle/fluid/tests/book/test_machine_translation.py +++ b/python/paddle/fluid/tests/book/test_machine_translation.py @@ -200,16 +200,16 @@ def train_main(use_cuda, is_sparse, is_local=True): if is_local: train_loop(framework.default_main_program()) else: - port = os.getenv("PADDLE_INIT_PORT", "6174") - pserver_ips = os.getenv("PADDLE_INIT_PSERVERS") # ip,ip... + port = os.getenv("PADDLE_PSERVER_PORT", "6174") + pserver_ips = os.getenv("PADDLE_PSERVER_IPS") # ip,ip... eplist = [] for ip in pserver_ips.split(","): eplist.append(':'.join([ip, port])) pserver_endpoints = ",".join(eplist) # ip:port,ip:port... - trainers = int(os.getenv("TRAINERS")) + trainers = int(os.getenv("PADDLE_TRAINERS")) current_endpoint = os.getenv("POD_IP") + ":" + port - trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID")) - training_role = os.getenv("TRAINING_ROLE", "TRAINER") + trainer_id = int(os.getenv("PADDLE_TRAINER_ID")) + training_role = os.getenv("PADDLE_TRAINING_ROLE", "TRAINER") t = fluid.DistributeTranspiler() t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers) if training_role == "PSERVER": diff --git a/python/paddle/fluid/tests/book/test_recognize_digits.py b/python/paddle/fluid/tests/book/test_recognize_digits.py index 25bcb8a64103b845adbe2017120ce8d945faf6dd..5f5c8544bbdb87421f129b201a0ebaf4cb8602a1 100644 --- a/python/paddle/fluid/tests/book/test_recognize_digits.py +++ b/python/paddle/fluid/tests/book/test_recognize_digits.py @@ -151,16 +151,16 @@ def train(nn_type, if is_local: train_loop(fluid.default_main_program()) else: - port = os.getenv("PADDLE_INIT_PORT", "6174") - pserver_ips = os.getenv("PADDLE_INIT_PSERVERS") # ip,ip... + port = os.getenv("PADDLE_PSERVER_PORT", "6174") + pserver_ips = os.getenv("PADDLE_PSERVER_IPS") # ip,ip... eplist = [] for ip in pserver_ips.split(","): eplist.append(':'.join([ip, port])) pserver_endpoints = ",".join(eplist) # ip:port,ip:port... - trainers = int(os.getenv("TRAINERS")) + trainers = int(os.getenv("PADDLE_TRAINERS")) current_endpoint = os.getenv("POD_IP") + ":" + port - trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID")) - training_role = os.getenv("TRAINING_ROLE", "TRAINER") + trainer_id = int(os.getenv("PADDLE_TRAINER_ID")) + training_role = os.getenv("PADDLE_TRAINING_ROLE", "TRAINER") t = fluid.DistributeTranspiler() t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers) if training_role == "PSERVER": diff --git a/python/paddle/fluid/tests/book/test_recommender_system.py b/python/paddle/fluid/tests/book/test_recommender_system.py index 65d6552acc9b3d31a97a45290e4613a633fffa3c..937d8dd5b065f0c1fdfc052b0342b572e3fbd7ac 100644 --- a/python/paddle/fluid/tests/book/test_recommender_system.py +++ b/python/paddle/fluid/tests/book/test_recommender_system.py @@ -220,16 +220,16 @@ def train(use_cuda, save_dirname, is_local=True): if is_local: train_loop(fluid.default_main_program()) else: - port = os.getenv("PADDLE_INIT_PORT", "6174") - pserver_ips = os.getenv("PADDLE_INIT_PSERVERS") # ip,ip... + port = os.getenv("PADDLE_PSERVER_PORT", "6174") + pserver_ips = os.getenv("PADDLE_PSERVER_IPS") # ip,ip... eplist = [] for ip in pserver_ips.split(","): eplist.append(':'.join([ip, port])) pserver_endpoints = ",".join(eplist) # ip:port,ip:port... - trainers = int(os.getenv("TRAINERS")) + trainers = int(os.getenv("PADDLE_TRAINERS")) current_endpoint = os.getenv("POD_IP") + ":" + port - trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID")) - training_role = os.getenv("TRAINING_ROLE", "TRAINER") + trainer_id = int(os.getenv("PADDLE_TRAINER_ID")) + training_role = os.getenv("PADDLE_TRAINING_ROLE", "TRAINER") t = fluid.DistributeTranspiler() t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers) if training_role == "PSERVER": diff --git a/python/paddle/fluid/tests/book/test_word2vec.py b/python/paddle/fluid/tests/book/test_word2vec.py index 3118d88701e5f64ae50f7ee774ea8174aa7758eb..75bed06bd7a9b311ff9466589d6ecab2c37471ce 100644 --- a/python/paddle/fluid/tests/book/test_word2vec.py +++ b/python/paddle/fluid/tests/book/test_word2vec.py @@ -125,16 +125,16 @@ def train(use_cuda, is_sparse, is_parallel, save_dirname, is_local=True): if is_local: train_loop(fluid.default_main_program()) else: - port = os.getenv("PADDLE_INIT_PORT", "6174") - pserver_ips = os.getenv("PADDLE_INIT_PSERVERS") # ip,ip... + port = os.getenv("PADDLE_PSERVER_PORT", "6174") + pserver_ips = os.getenv("PADDLE_PSERVER_IPS") # ip,ip... eplist = [] for ip in pserver_ips.split(","): eplist.append(':'.join([ip, port])) pserver_endpoints = ",".join(eplist) # ip:port,ip:port... - trainers = int(os.getenv("TRAINERS")) + trainers = int(os.getenv("PADDLE_TRAINERS")) current_endpoint = os.getenv("POD_IP") + ":" + port - trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID")) - training_role = os.getenv("TRAINING_ROLE", "TRAINER") + trainer_id = int(os.getenv("PADDLE_TRAINER_ID")) + training_role = os.getenv("PADDLE_TRAINING_ROLE", "TRAINER") t = fluid.DistributeTranspiler() t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers) if training_role == "PSERVER": diff --git a/python/paddle/fluid/tests/unittests/test_bilinear_interp_op.py b/python/paddle/fluid/tests/unittests/test_bilinear_interp_op.py index 87c11e7880e73b911f21dda77c1cc2b4850b3591..b04f25ef874cc6204211a4f5f5991a0ec8c473dd 100644 --- a/python/paddle/fluid/tests/unittests/test_bilinear_interp_op.py +++ b/python/paddle/fluid/tests/unittests/test_bilinear_interp_op.py @@ -15,6 +15,7 @@ import unittest import numpy as np from op_test import OpTest +import paddle.fluid.core as core def bilinear_interp_np(input, out_h, out_w, out_size): @@ -45,9 +46,9 @@ def bilinear_interp_np(input, out_h, out_w, out_size): out[:, :, i, j] = h2lambda*(w2lambda*input[:, :, h, w] + w1lambda*input[:, :, h, w+wid]) + \ - h1lambda*(w2lambda*input[:, :, h+hid, w] + - w1lambda*input[:, :, h+hid, w+wid]) - return out.astype("float32") + h1lambda*(w2lambda*input[:, :, h+hid, w] + + w1lambda*input[:, :, h+hid, w+wid]) + return out.astype(input.dtype) class TestBilinearInterpOp(OpTest): @@ -122,5 +123,44 @@ class TestCase6(TestBilinearInterpOp): self.out_size = np.array([65, 129]).astype("int32") +class TestBilinearInterpOpUint8(OpTest): + def setUp(self): + self.out_size = None + self.init_test_case() + self.op_type = "bilinear_interp" + input_np = np.random.randint( + low=0, high=256, size=self.input_shape).astype("uint8") + output_np = bilinear_interp_np(input_np, self.out_h, self.out_w, + self.out_size) + self.inputs = {'X': input_np} + if self.out_size is not None: + self.inputs['OutSize'] = self.out_size + self.attrs = {'out_h': self.out_h, 'out_w': self.out_w} + self.outputs = {'Out': output_np} + + def test_check_output(self): + self.check_output_with_place(place=core.CPUPlace(), atol=1) + + def init_test_case(self): + self.input_shape = [1, 3, 9, 6] + self.out_h = 10 + self.out_w = 9 + + +class TestCase1Uint8(TestBilinearInterpOpUint8): + def init_test_case(self): + self.input_shape = [2, 3, 128, 64] + self.out_h = 120 + self.out_w = 50 + + +class TestCase2Uint8(TestBilinearInterpOpUint8): + def init_test_case(self): + self.input_shape = [4, 1, 7, 8] + self.out_h = 5 + self.out_w = 13 + self.out_size = np.array([6, 15]).astype("int32") + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/trainer.py b/python/paddle/fluid/trainer.py index efc28d899304b01a3085891f3ae9396d57c589a1..45ab889beaa1355d0e1e2922aedf0340f70809ba 100644 --- a/python/paddle/fluid/trainer.py +++ b/python/paddle/fluid/trainer.py @@ -33,23 +33,59 @@ __all__ = [ class BeginEpochEvent(object): + """ + The begin of a training epoch. + + Args: + epoch_id(int): The current epoch ID. + """ + def __init__(self, epoch_id): self.epoch = epoch_id class EndEpochEvent(object): + """ + The end of a training epoch. + + Args: + epoch_id(int): The current epoch ID. + """ + def __init__(self, epoch_id): self.epoch = epoch_id class BeginStepEvent(object): + """ + The begin of a training epoch. + + Args: + epoch_id(int): The current epoch ID. + step_id(int): The current step ID. + """ + def __init__(self, epoch_id, step_id): self.epoch = epoch_id self.step = step_id self.fetch_metrics = True + """ + If fetch_metrics is true, the metrics will be fetched at the + EndStepEvent. Default is True. + """ class EndStepEvent(object): + """ + The end of a training step. + + Args: + epoch_id(int): The current epoch ID. + step_id(int): The current step ID. + metrics(list): A list of fetched tensor. The order of this list is same + as the :code:`train_func` returns. + """ + def __init__(self, epoch_id, step_id, metrics): self.epoch = epoch_id self.step = step_id @@ -57,6 +93,27 @@ class EndStepEvent(object): class CheckpointConfig(object): + """ + Parameter object for :code:`fluid.io.save_checkpoint` and + :code:`fluid.Trainer`. Used to configuration how to save checkpoint. + + Args: + checkpoint_dir(str): Directory path to save check point. Default is the + current directory. + + max_num_checkpoints(int): The max number of local check points. + epoch_interval(int): Every number of epoch to save check point. + step_interval(int): Every number of step to save check point. + + Examples: + >>> config = fluid.CheckpointConfig("./checkpoints") + >>> trainer = fluid.Trainer(train_func=train_program, + >>> place=place, + >>> optimizer_func=optimizer_func, + >>> checkpoint_config=config) + >>> trainer.train(...) + """ + def __init__(self, checkpoint_dir=None, max_num_checkpoints=3, @@ -113,11 +170,62 @@ def check_and_get_place(place): class Trainer(object): """ + A trainer wraps MultiGPU/MultiNode training loops and can be used to train a + simple neural network easily. + + This API takes a :code:`train_func`. A :code:`train_func` is a function that + return loss as it first return value. The reset value can be fetched by + EndStepEvent.metrics + + This API also takes a :code:`optimizer_func` that will return an optimizer + instance. + + For example, to train a MLP for MNIST dataset, the sample program is + + >>> import paddle.fluid as fluid + >>> + >>> def mlp(image, layer_sizes=[200, 100], activation="relu", num_classes=10): + >>> hidden = image + >>> for layer_size in layer_sizes: + >>> hidden = fluid.layers.fc(input=hidden, size=layer_size, act=activation) + >>> return fluid.layers.fc(input=hidden, size=num_classes, act="softmax") + >>> + >>> def train_mnist_mlp(): + >>> img = fluid.layers.data(name='image', shape=[784]) + >>> label = fluid.layers.data(name='label', shape=[1], dtype='int64') + >>> prediction = mlp(img) + >>> return fluid.layers.mean(fluid.layers.cross_entropy(prediction, label)) + >>> + >>> def optimizer(): + >>> return fluid.optimizer.Adam() + >>> + >>> trainer = Trainer(train_func=train_mnist_mlp, + >>> optimizer_func=optimizer, + >>> place=fluid.CUDAPlace(0), + >>> parallel=True) + >>> + >>> def train_callback(event): + >>> if isinstance(event, fluid.EndStepEvent): + >>> print "Epoch ID", event.epoch, "Step ID",\ + >>> event.step, "AvgLoss", event.metrics[0] + >>> elif isinstance(event, fluid.EndEpochEvent): + >>> trainer.save_params("./model_{0}".format(event.epoch)) + >>> + >>> trainer.train(num_epochs=100, event_handler=train_callback) + + For more example, please see :ref:`api_guide_high_level_api`. + Args: - train_func(callable): A function which will return loss. The loss must be a scalar. + train_func(callable): A function which will return loss. The loss must be + a scalar tensor. optimizer_func(callable): A function that returns an Optimizer object. - place: The device place of this trainer. + place(CUDAPlace|CPUPlace): The device place of this trainer. If + :code:`parallel=True,` all CUDA Places will be used if :code:`place` + is a :code:`CUDAPlace`. + parallel(bool): True if use multiple devices. + checkpoint_config(CheckpointConfig): Configuration about how to save + checkpoints. """ def __init__(self, @@ -129,9 +237,6 @@ class Trainer(object): checkpoint_config=None): self.__stop = False self.parallel = parallel - # 1. we need to generate a framework.Program by calling - # program_func. Reference: fluid.program_guard in - # test_word2vec.py # config for checkpoint # only chief worker will save variables @@ -145,6 +250,10 @@ class Trainer(object): self.scope = core.Scope() + # 1. we need to generate a framework.Program by calling + # program_func. Reference: fluid.program_guard in + # test_word2vec.py + self.startup_program = framework.Program() self.train_program = framework.Program() @@ -277,17 +386,18 @@ class Trainer(object): def train(self, num_epochs, event_handler, reader=None, feed_order=None): """ - Train the model. + Start the train loop to train the model. Args: - num_epochs: The number of epoch. An epoch will process all data in reader - event_handler: The event handler. A function with type (ev:Event)->void - reader: - feed_order: Feeding order of reader. None will following the defining + num_epochs(int): The number of epoch. An epoch will process all data in reader + event_handler(callable): The event handler. A function with type (ev:Event)->void + reader(callable): A reader creator object. See also + :ref:`api_guide_python_reader` . + feed_order(list): Feeding order of reader. None will following the defining order in program Returns: - + None """ training_role = os.getenv("PADDLE_TRAINING_ROLE", "") if training_role == "PSERVER": @@ -307,16 +417,24 @@ class Trainer(object): Test the model on given test data Args: - reader: The reader that yields test data. - feed_order: Feeding order of reader. None will following the defining - order in program + reader(callable): The reader that yields test data. + feed_order(list): Feeding order of reader. None will following the + defining order in program """ return self._test_by_executor(reader, feed_order, self.train_func_outputs) def save_params(self, param_path): - # reference: save_persistables in io.py + """ + Save all parameters into :code:`param_path`. + + Args: + param_path(str): The path to save parameters. + + Returns: + None + """ with self._prog_and_scope_guard(): exe = executor.Executor(self.place) io.save_persistables(exe, dirname=param_path) diff --git a/python/paddle/fluid/transpiler/distribute_transpiler.py b/python/paddle/fluid/transpiler/distribute_transpiler.py index e94abfc006c6fbf85bfa3d0286d77c1d010eedef..dc0ec6b8c6c1445c43f1678757ed451ccd571570 100644 --- a/python/paddle/fluid/transpiler/distribute_transpiler.py +++ b/python/paddle/fluid/transpiler/distribute_transpiler.py @@ -12,14 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. """ -Transpile the program to distributed data-parallelism programs. -The main_program will be transformed to use a remote parameter server -to do parameter optimization. And the optimization graph will be put -into a parameter server program. - -Use different methods to split trainable variables to different -parameter servers. - Steps to transpile trainer: 1. split variable to multiple blocks, aligned by product(dim[1:]) (width). 2. rename splited grad variables to add trainer_id suffix ".trainer_%d". @@ -117,129 +109,41 @@ def slice_variable(var_list, slice_count, min_block_size=8192): return blocks -class DistributeTranspiler: - def _has_distributed_lookup_table(self): - # process lookup_table_op - # 1. check all lookup_table_op is distributed - # 2. check all lookup_table_op share the same table. - distributed_lookup_table_ops = [] - # support only one distributed_lookup_table now - self.table_name = None - for op in self.origin_program.global_block().ops: - if op.type == LOOKUP_TABLE_TYPE: - if op.attrs['is_distributed'] is True: - if self.table_name is None: - self.table_name = op.input("W")[0] - if self.table_name != op.input("W")[0]: - raise RuntimeError("all distributed lookup_table_ops" - " should have only one table") - distributed_lookup_table_ops.append(op) - else: - if self.table_name is not None: - assert op.input("W")[0] != self.table_name - - return len(distributed_lookup_table_ops) > 0 - - def _update_dist_lookup_table_vars(self, param_list, grad_list, - params_grads): - # TODO(wuyi): put find a way to put dist lookup table stuff all together. - # update self.table_param_grad and self.trainer_side_table_grad_list - program = self.origin_program - if self.has_distributed_lookup_table: - param_list = [ - param for param in param_list if param.name != self.table_name - ] - grad_list = [ - grad for grad in grad_list - if grad.name != grad_var_name(self.table_name) - ] - self.table_param_grad = [ - param_grad for param_grad in params_grads - if param_grad[0].name == self.table_name - ][0] - table_grad_var = self.table_param_grad[1] - if self.sync_mode: - self.trainer_side_table_grad_list = [ - program.global_block().create_var( - name="%s.trainer_%d.pserver_%d" % - (table_grad_var.name, self.trainer_id, index), - type=table_grad_var.type, - shape=table_grad_var.shape, - dtype=table_grad_var.dtype) - for index in range(len(self.pserver_endpoints)) - ] - else: - self.trainer_side_table_grad_list = [ - program.global_block().create_var( - name="%s.pserver_%d" % (table_grad_var.name, index), - type=table_grad_var.type, - shape=table_grad_var.shape, - dtype=table_grad_var.dtype) - for index in range(len(self.pserver_endpoints)) - ] - return param_list, grad_list - - def _init_splited_vars(self, slice_var_up): - # update these mappings for further transpile: - # 1. param_var_mapping: param var name -> [splited params vars] - # 2. grad_var_mapping: grad var name -> [splited grads vars] - # 3. grad_param_mapping: grad.blockx -> param.blockx - # 4. param_grad_ep_mapping: ep -> {"params": [], "grads": []} - - param_list = [] - grad_list = [] - param_grad_set = set() - for p, g in self.params_grads: - # skip parameter marked not trainable - if type(p) == Parameter and p.trainable == False: - continue - if p.name not in param_grad_set: - param_list.append(p) - param_grad_set.add(p.name) - if g.name not in param_grad_set: - grad_list.append(g) - param_grad_set.add(g.name) - - param_list, grad_list = self._update_dist_lookup_table_vars( - param_list, grad_list, self.params_grads) - - if slice_var_up: - # when we slice var up into blocks, we will slice the var according to - # pserver services' count. A pserver may have two or more listening ports. - grad_blocks = slice_variable(grad_list, len(self.pserver_endpoints)) - param_blocks = slice_variable(param_list, - len(self.pserver_endpoints)) - else: - # when we do NOT slice var up into blocks, we will always slice params - # grads into one block. - grad_blocks = slice_variable(grad_list, 1) - param_blocks = slice_variable(param_list, 1) - assert (len(grad_blocks) == len(param_blocks)) - - # origin_varname -> [splited_var] - self.param_var_mapping = self._create_vars_from_blocklist( - self.origin_program, param_blocks) - self.grad_var_mapping = self._create_vars_from_blocklist( - self.origin_program, - grad_blocks, - add_trainer_suffix=self.trainer_num > 1) - self.grad_param_mapping = dict() - for g, p in zip(grad_blocks, param_blocks): - g_name, g_bid, _ = g.split(":") - p_name, p_bid, _ = p.split(":") - self.grad_param_mapping[self.grad_var_mapping[g_name][int(g_bid)]] = \ - self.param_var_mapping[p_name][int(p_bid)] - - # create mapping of endpoint -> split var to create pserver side program - self.param_grad_ep_mapping = dict() - [ - self.param_grad_ep_mapping.update({ - ep: { - "params": [], - "grads": [] - } - }) for ep in self.pserver_endpoints - ] +class DistributeTranspiler(object): + """ + **DistributeTranspiler** + + Convert the fluid program to distributed data-parallelism programs. + + The main_program will be transformed to use a remote parameter server + to do parameter optimization. And the optimization graph will be put + into a parameter server program. + + Examples: + .. code-block:: python + + # Define your model before these codes. + port = os.getenv("PADDLE_PSERVER_PORT", "6174") + pserver_ips = os.getenv("PADDLE_PSERVER_IPS", "") + eplist = [] + for ip in pserver_ips.split(","): + eplist.append(':'.join([ip, port])) + pserver_endpoints = ",".join(eplist) + trainers = int(os.getenv("PADDLE_TRAINERS")) + current_endpoint = os.getenv("PADDLE_CURRENT_IP", "") + ":" + port + trainer_id = int(os.getenv("PADDLE_TRAINER_ID", "0")) + role = os.getenv("PADDLE_TRAINING_ROLE") + + t = distribute_transpiler.DistributeTranspiler() + t.transpile( + trainer_id, pservers=pserver_endpoints, trainers=trainers) + if role == "PSERVER": + pserver_program = t.get_pserver_program(current_endpoint) + pserver_startup_program = t.get_startup_program(current_endpoint, + pserver_program) + elif role == "TRAINER": + trainer_program = t.get_trainer_program() + """ def transpile(self, trainer_id, @@ -250,15 +154,20 @@ class DistributeTranspiler: split_method=RoundRobin, sync_mode=True): """ + Run the transpiler. + Args: - trainer_id(int): one unique id for each trainer in a job. - program(Program): program to transpile, default is default_main_program - pservers(string): parameter server endpoints like "m1:6174,m2:6174" - trainers(int): total number of workers/trainers in the job - split_method(PSDispatcher): A function to determin how to split variables - to different servers equally. - sync_mode(boolean): if sync_mode is set True, it means that dist transpiler - will transpile the program into sync_mode pserver and trainer program. + trainer_id (int): id for current trainer worker, if you have + n workers, the id may range from 0 ~ n-1 + program (Program|None): program to transpile, + default is fluid.default_main_program(). + pservers (str): comma separated ip:port string for the pserver + list. + trainers (int): number of trainers in the distributed job. + slice_var_up (bool): Do Tensor slice for pservers, default is True. + split_method (PSDispatcher): RoundRobin or HashName can be used + try to choose the best method to balance loads for pservers. + sync_mode (bool): Do sync training or not, default is True. """ assert (split_method.__bases__[0] == PSDispatcher) if program is None: @@ -385,6 +294,12 @@ class DistributeTranspiler: self._split_table_grad_and_add_send_vars(program, pserver_endpoints) def get_trainer_program(self): + """ + Get transpiled trainer side program. + + Returns: + Program: trainer side program. + """ # remove optimize ops and add a send op to main_program delete_ops(self.origin_program.global_block(), self.optimize_ops) # FIXME(typhoonzero): serialize once will fix error occurs when clone. @@ -393,17 +308,19 @@ class DistributeTranspiler: def get_pserver_program(self, endpoint): """ - Get pserver side program using the endpoint. - TODO(panyx0718): Revisit this assumption. what if #blocks > #pservers. - NOTE: assume blocks of the same variable is not distributed - on the same pserver, only change param/grad varnames for - trainers to fetch. + Get parameter server side program. + Args: - endpoint(string): the endpoint for the current pserver instance. - - Returns(Program): the pserver program - + endpoint (str): current parameter server endpoint. + + Returns: + Program: the program for current parameter server to run. """ + # TODO(panyx0718): Revisit this assumption. what if #blocks > #pservers. + # NOTE: assume blocks of the same variable is not distributed + # on the same pserver, only change param/grad varnames for + # trainers to fetch. + # step1 pserver_program = Program() # step2: Create vars to receive vars at parameter servers. @@ -481,7 +398,7 @@ class DistributeTranspiler: def __clone_lr_op_sub_block__(op, program, new_block, skip_sub_blks): if not op.has_attr('sub_block'): - return -1 + return origin_block_desc = op.attr('sub_block') origin_block = self.origin_program.block(origin_block_desc.id) @@ -587,11 +504,14 @@ class DistributeTranspiler: Get startup program for current parameter server. Modify operator input variables if there are variables that were split to several blocks. - Args: - endpoint(string): the endpoint for the current pserver instance. - pserver_program(Program): the program for pserver to execute. - Returns(Program): the startup program for pserver + Args: + endpoint (str): current pserver endpoint. + pserver_program (Program): call get_pserver_program first and + pass the result here. + + Returns: + Program: parameter server side startup program. """ s_prog = Program() orig_s_prog = default_startup_program() @@ -643,6 +563,129 @@ class DistributeTranspiler: # ====================== private transpiler functions ===================== + def _has_distributed_lookup_table(self): + # process lookup_table_op + # 1. check all lookup_table_op is distributed + # 2. check all lookup_table_op share the same table. + distributed_lookup_table_ops = [] + # support only one distributed_lookup_table now + self.table_name = None + for op in self.origin_program.global_block().ops: + if op.type == LOOKUP_TABLE_TYPE: + if op.attrs['is_distributed'] is True: + if self.table_name is None: + self.table_name = op.input("W")[0] + if self.table_name != op.input("W")[0]: + raise RuntimeError("all distributed lookup_table_ops" + " should have only one table") + distributed_lookup_table_ops.append(op) + else: + if self.table_name is not None: + assert op.input("W")[0] != self.table_name + + return len(distributed_lookup_table_ops) > 0 + + def _update_dist_lookup_table_vars(self, param_list, grad_list, + params_grads): + # TODO(wuyi): put find a way to put dist lookup table stuff all together. + # update self.table_param_grad and self.trainer_side_table_grad_list + program = self.origin_program + if self.has_distributed_lookup_table: + param_list = [ + param for param in param_list if param.name != self.table_name + ] + grad_list = [ + grad for grad in grad_list + if grad.name != grad_var_name(self.table_name) + ] + self.table_param_grad = [ + param_grad for param_grad in params_grads + if param_grad[0].name == self.table_name + ][0] + table_grad_var = self.table_param_grad[1] + if self.sync_mode: + self.trainer_side_table_grad_list = [ + program.global_block().create_var( + name="%s.trainer_%d.pserver_%d" % + (table_grad_var.name, self.trainer_id, index), + type=table_grad_var.type, + shape=table_grad_var.shape, + dtype=table_grad_var.dtype) + for index in range(len(self.pserver_endpoints)) + ] + else: + self.trainer_side_table_grad_list = [ + program.global_block().create_var( + name="%s.pserver_%d" % (table_grad_var.name, index), + type=table_grad_var.type, + shape=table_grad_var.shape, + dtype=table_grad_var.dtype) + for index in range(len(self.pserver_endpoints)) + ] + return param_list, grad_list + + def _init_splited_vars(self, slice_var_up): + # update these mappings for further transpile: + # 1. param_var_mapping: param var name -> [splited params vars] + # 2. grad_var_mapping: grad var name -> [splited grads vars] + # 3. grad_param_mapping: grad.blockx -> param.blockx + # 4. param_grad_ep_mapping: ep -> {"params": [], "grads": []} + + param_list = [] + grad_list = [] + param_grad_set = set() + for p, g in self.params_grads: + # skip parameter marked not trainable + if type(p) == Parameter and p.trainable == False: + continue + if p.name not in param_grad_set: + param_list.append(p) + param_grad_set.add(p.name) + if g.name not in param_grad_set: + grad_list.append(g) + param_grad_set.add(g.name) + + param_list, grad_list = self._update_dist_lookup_table_vars( + param_list, grad_list, self.params_grads) + + if slice_var_up: + # when we slice var up into blocks, we will slice the var according to + # pserver services' count. A pserver may have two or more listening ports. + grad_blocks = slice_variable(grad_list, len(self.pserver_endpoints)) + param_blocks = slice_variable(param_list, + len(self.pserver_endpoints)) + else: + # when we do NOT slice var up into blocks, we will always slice params + # grads into one block. + grad_blocks = slice_variable(grad_list, 1) + param_blocks = slice_variable(param_list, 1) + assert (len(grad_blocks) == len(param_blocks)) + + # origin_varname -> [splited_var] + self.param_var_mapping = self._create_vars_from_blocklist( + self.origin_program, param_blocks) + self.grad_var_mapping = self._create_vars_from_blocklist( + self.origin_program, + grad_blocks, + add_trainer_suffix=self.trainer_num > 1) + self.grad_param_mapping = dict() + for g, p in zip(grad_blocks, param_blocks): + g_name, g_bid, _ = g.split(":") + p_name, p_bid, _ = p.split(":") + self.grad_param_mapping[self.grad_var_mapping[g_name][int(g_bid)]] = \ + self.param_var_mapping[p_name][int(p_bid)] + + # create mapping of endpoint -> split var to create pserver side program + self.param_grad_ep_mapping = dict() + [ + self.param_grad_ep_mapping.update({ + ep: { + "params": [], + "grads": [] + } + }) for ep in self.pserver_endpoints + ] + # transpiler function for dis lookup_table def _replace_lookup_table_op_with_prefetch(self, program, pserver_endpoints): diff --git a/python/paddle/fluid/transpiler/memory_optimization_transpiler.py b/python/paddle/fluid/transpiler/memory_optimization_transpiler.py index 8bfb554845d9b128f000d6c90cf626416a198eef..999ef43ca0feacbddff5f9db59589ce7097fe77e 100644 --- a/python/paddle/fluid/transpiler/memory_optimization_transpiler.py +++ b/python/paddle/fluid/transpiler/memory_optimization_transpiler.py @@ -383,6 +383,16 @@ def memory_optimize(input_program, skip_opt_set=None, print_log=False, level=0): def release_memory(input_program, skip_opt_set=None): + """ + Modify the input program and insert :code:`delete_op` to early drop not used + variables. The modification will be performed inplace. + + Notes: This is an experimental API and could be removed in next few + releases. Users should not use this API. + + Args: + input_program(Program): The program will be inserted :code:`delete_op`. + """ cfgs = _get_cfgs(input_program) for cfg in cfgs: cfg.release_memory(skip_opt_set=skip_opt_set) diff --git a/python/paddle/fluid/transpiler/ps_dispatcher.py b/python/paddle/fluid/transpiler/ps_dispatcher.py index d6a68677527deb09ace0e3a23cbc093d6d7b4349..dcffadd531719431f27feb464ed58a65c04770ee 100644 --- a/python/paddle/fluid/transpiler/ps_dispatcher.py +++ b/python/paddle/fluid/transpiler/ps_dispatcher.py @@ -33,15 +33,21 @@ class PSDispatcher(object): def dispatch(self, varlist): """ - :param varlist: a list of Variables - :return: a map of pserver endpoint -> varname + Args: + varlist(list): a list of Variables + Returns: + a map of pserver endpoint -> varname """ AssertionError("Interface has not been implemented.") class HashName(PSDispatcher): """ - Hash variable names to several endpoints + Hash variable names to several endpoints using python + "hash()" function. + + Args: + pserver_endpoints (list): list of endpoint(ip:port). """ def __init__(self, pserver_endpoints): @@ -61,7 +67,11 @@ class HashName(PSDispatcher): class RoundRobin(PSDispatcher): """ - Distribute variables to serveral endpoints. + Distribute variables to serveral endpoints using + RondRobin method. + + Args: + pserver_endpoints (list): list of endpoint(ip:port). """ def __init__(self, pserver_endpoints): diff --git a/python/paddle/fluid/unique_name.py b/python/paddle/fluid/unique_name.py index 33c53113ae7e8ed9aeada31f2aed6990b6fea110..776619cd36722e338a9fdd5e13bceeaf3724de2c 100644 --- a/python/paddle/fluid/unique_name.py +++ b/python/paddle/fluid/unique_name.py @@ -16,7 +16,7 @@ import collections import contextlib import sys -__all__ = ['generate', 'switch', 'guard', 'UniqueNameGenerator'] +__all__ = ['generate', 'switch', 'guard'] class UniqueNameGenerator(object): diff --git a/tools/codestyle/docstring_checker.py b/tools/codestyle/docstring_checker.py index 54a690462699651d3e14f9b24383df01a9740336..8d4b24a0cf6b743b72dca58fd885f927560964bf 100644 --- a/tools/codestyle/docstring_checker.py +++ b/tools/codestyle/docstring_checker.py @@ -291,6 +291,8 @@ class DocstringChecker(BaseChecker): True if successful otherwise False. """ + if node.name.startswith("__") or node.name.startswith("_"): + return True find = False for t in node.body: if not isinstance(t, astroid.Return): @@ -316,6 +318,8 @@ class DocstringChecker(BaseChecker): Returns: True if successful otherwise False. """ + if node.name.startswith("__") or node.name.startswith("_"): + return True args = [] for arg in node.args.get_children(): if (not isinstance(arg, astroid.AssignName)) \ diff --git a/tools/print_signatures.py b/tools/print_signatures.py new file mode 100644 index 0000000000000000000000000000000000000000..5e7ffd44c7b0ba2270069bc4467dc377a58b2417 --- /dev/null +++ b/tools/print_signatures.py @@ -0,0 +1,67 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Print all signature of a python module in alphabet order. + +Usage: + ./print_signature "paddle.fluid" > signature.txt +""" +import importlib +import inspect +import collections +import sys +import pydoc + +member_dict = collections.OrderedDict() + + +def visit_member(parent_name, member): + cur_name = ".".join([parent_name, member.__name__]) + if inspect.isclass(member): + for name, value in inspect.getmembers(member): + if hasattr(value, '__name__') and (not name.startswith("_") or + name == "__init__"): + visit_member(cur_name, value) + elif callable(member): + try: + member_dict[cur_name] = inspect.getargspec(member) + except TypeError: # special for PyBind method + member_dict[cur_name] = " ".join([ + line.strip() for line in pydoc.render_doc(member).split('\n') + if "->" in line + ]) + + else: + raise RuntimeError("Unsupported generate signature of member, type {0}". + format(str(type(member)))) + + +def visit_all_module(mod): + for member_name in ( + name + for name in (mod.__all__ if hasattr(mod, "__all__") else dir(mod)) + if not name.startswith("_")): + instance = getattr(mod, member_name, None) + if instance is None: + continue + if inspect.ismodule(instance): + visit_all_module(instance) + else: + visit_member(mod.__name__, instance) + + +visit_all_module(importlib.import_module(sys.argv[1])) + +for name in member_dict: + print name, member_dict[name]