diff --git a/cmake/external/boost.cmake b/cmake/external/boost.cmake index 137f11da7f2f1c46eebf6590d93402786ef543c9..c70d83b3f4bb24740ed67b4e2f98a3ced26d1648 100644 --- a/cmake/external/boost.cmake +++ b/cmake/external/boost.cmake @@ -15,9 +15,9 @@ include(ExternalProject) set(BOOST_PROJECT "extern_boost") -set(BOOST_VER "1.66.0") -set(BOOST_TAR "boost_1_66_0") -set(BOOST_URL "https://dl.bintray.com/boostorg/release/${BOOST_VER}/source/${BOOST_TAR}.tar.gz") +set(BOOST_VER "1.41.0") +set(BOOST_TAR "boost_1_41_0") +set(BOOST_URL "http://sourceforge.net/projects/boost/files/boost/${BOOST_VER}/${BOOST_TAR}.tar.gz") set(BOOST_SOURCES_DIR ${THIRD_PARTY_PATH}/boost) set(BOOST_DOWNLOAD_DIR "${BOOST_SOURCES_DIR}/src/${BOOST_PROJECT}") set(BOOST_INCLUDE_DIR "${BOOST_DOWNLOAD_DIR}/${BOOST_TAR}" CACHE PATH "boost include directory." FORCE) diff --git a/doc/api/v2/fluid/layers.rst b/doc/api/v2/fluid/layers.rst index f738bf15641d9fca0bfb0c10821de778ceee0d79..231ec2d4ba102a5d31c47cbc7a5d484ef17a7f3a 100644 --- a/doc/api/v2/fluid/layers.rst +++ b/doc/api/v2/fluid/layers.rst @@ -18,6 +18,11 @@ dynamic_lstm .. autofunction:: paddle.v2.fluid.layers.dynamic_lstm :noindex: +dynamic_lstmp +------------- +.. autofunction:: paddle.v2.fluid.layers.dynamic_lstmp + :noindex: + dynamic_gru ----------- .. autofunction:: paddle.v2.fluid.layers.dynamic_gru diff --git a/doc/design/support_new_device.md b/doc/design/support_new_device.md index 4c5f10e2ecb9ec09b78926ca27552741d02d7cc9..8983df900460127fc130043c52373dab505363ba 100644 --- a/doc/design/support_new_device.md +++ b/doc/design/support_new_device.md @@ -2,9 +2,9 @@ ## Background -Deep learning has a high demand for computing resources. New high-performance devices and computing libraries are appearing very frequently. Deep learning frameworks have to integrate these high-performance devices and computing libraries flexibly and efficiently. +Deep learning has a high demand for computing resources. New high-performance devices and computing libraries are appearing very frequently. Deep learning frameworks have to integrate these high-performance devices and computing libraries in a flexible and efficient manner. -On one hand, hardware and computing libraries usually do not have a one-to-one correspondence. For example,Intel CPUs support Eigen and MKL computing libraries while Nvidia GPUs support Eigen and cuDNN computing libraries. We have to implement operator specific kernels for each computing library. +On one hand, hardware and computing libraries usually do not have a one-to-one correspondence. For example, Intel CPUs support Eigen and MKL computing libraries while Nvidia GPUs support Eigen and cuDNN computing libraries. We have to implement operator specific kernels for each computing library. On the other hand, users usually do not want to care about the low-level hardware and computing libraries when writing a neural network configuration. In Fluid, `Layer` is exposed in `Python`, and `Operator` is exposed in `C++`. Both `Layer` and `Operator` are hardware independent. @@ -17,7 +17,7 @@ For a general overview of fluid, please refer to the [overview doc](https://gith There are mainly three parts that we have to consider while integrating a new device/library: -- Place and DeviceContext: indicates the device id and manages hardware resources +- Place and DeviceContext: indicate the device id and manage hardware resources - Memory and Tensor: malloc/free data on certain device @@ -25,10 +25,10 @@ There are mainly three parts that we have to consider while integrating a new de ### Place and DeviceContext -Please remind that device and computing library are not one-to-one corresponding. A device can have a lot of computing libraries and a computing library can also support several devices. +Please note that device and computing library are not one-to-one corresponding. A device can have a lot of computing libraries and a computing library can also support several devices. #### Place -Fluid uses class [Place](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/platform/place.h#L55) to represent the device memory where data is located. If we add another device, we have to add corresponding `DevicePlace`. +Fluid uses class [Place](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/platform/place.h#L55) to represent the device memory where data is located. If we add another device, we have to add the corresponding `DevicePlace`. ``` | CPUPlace @@ -144,7 +144,7 @@ class Tensor { }; ``` -`Placeholder` is used to delay memory allocation; that is, we can first define a tensor, using `Resize` to configure its shape, and then call `mutuable_data` to allocate the actual memory. +`Placeholder` is used to delay memory allocation; that is, we can first define a tensor, using `Resize` to configurate its shape, and then call `mutuable_data` to allocate the actual memory. ```cpp paddle::framework::Tensor t; @@ -163,7 +163,7 @@ Fluid implements computing units based on different DeviceContexts. Some computi Let's take [MaxOutFunctor](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/math/maxouting.h#L27) as an example: -The interface is defined in header file. +The interface is defined in the header file. ``` template @@ -174,7 +174,7 @@ class MaxOutFunctor { }; ``` -CPU implemention is in .cc file +CPU implementation is in .cc file ``` template @@ -188,7 +188,7 @@ class MaxOutFunctor { }; ``` -CUDA implemention is in .cu file +CUDA implementation is in .cu file ``` template @@ -203,9 +203,9 @@ class MaxOutFunctor { ``` -We get computing handle from a concrete DeviceContext, and make compution on tensors. +We first obtain the computing handle from a concrete DeviceContext and then compute on tensors. -The implemention of `OpKernel` is similar to math functors, the extra thing we need to do is to register the OpKernel in a global map. +The implementation of `OpKernel` is similar to math functors, the extra thing we need to do is to register the OpKernel in a global map. Fluid provides different register interfaces in op_registry.h @@ -231,7 +231,7 @@ REGISTER_OP_CUDA_KERNEL( ## Advanced topics: How to switch between different Device/Library -Generally, we will impelement OpKernel for all Device/Library of an Operator. We can easily train a Convolutional Neural Network in GPU. However, some OpKernel is not sutibale on a specific Device. For example, crf operator can only run on CPU, whereas most other operators can run at GPU. To achieve high performance in such circumstance, we have to switch between different Device/Library. +Generally, we will implement OpKernel for all Device/Library of an Operator. We can easily train a Convolutional Neural Network in GPU. However, some OpKernel is not suitable on a specific Device. For example, crf operator can only run on CPU, whereas most other operators can run on GPU. To achieve high performance in such circumstance, we have to switch between different Device/Library. For more details, please refer to following docs: diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index 280496984251919a8b4b6c52684f950a80b78356..8c28709a68bec4fca5acaf2ec74b6d02402a6139 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -26,7 +26,7 @@ nv_test(lod_tensor_gpu_test SRCS lod_tensor_test.cu DEPS lod_tensor) cc_test(variable_test SRCS variable_test.cc) -cc_library(threadpool SRCS threadpool.cc) +cc_library(threadpool SRCS threadpool.cc DEPS enforce) cc_test(threadpool_test SRCS threadpool_test.cc DEPS threadpool) cc_library(scope SRCS scope.cc DEPS glog threadpool) @@ -98,3 +98,5 @@ if(NOT WITH_C_API AND WITH_FLUID) install(FILES ${CMAKE_CURRENT_BINARY_DIR}/framework.pb.h DESTINATION include/paddle/framework) install(FILES details/cow_ptr.h details/op_registry.h DESTINATION include/paddle/framework/details) endif() + +cc_test(channel_test SRCS channel_test.cc) diff --git a/paddle/framework/channel.h b/paddle/framework/channel.h new file mode 100644 index 0000000000000000000000000000000000000000..70ecccc1a1078374f3190b3956103ed8000c4fc5 --- /dev/null +++ b/paddle/framework/channel.h @@ -0,0 +1,64 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include // for size_t + +namespace paddle { +namespace framework { + +// Channel is the abstract class of buffered and un-buffered channels. +template +class Channel { + public: + virtual void Send(T*) = 0; + virtual void Receive(T*) = 0; + virtual size_t Cap() = 0; + + // Don't delete channels; instead, call Channel::Close. + protected: + virtual ~Channel() {} +}; + +// Forward declaration of channel implementations. +namespace details { +template +class Buffered; +template +class UnBuffered; +} // namespace details + +template +Channel* MakeChannel(size_t buffer_size) { + if (buffer_size > 0) { + return new details::Buffered(buffer_size); + } + return new details::UnBuffered(); +} + +template +void CloseChannel(Channel* ch) { + if (ch->Cap() > 0) { + delete dynamic_cast*>(ch); + } else { + delete dynamic_cast*>(ch); + } +} + +} // namespace framework +} // namespace paddle + +#include "paddle/framework/details/buffered_channel.h" +#include "paddle/framework/details/unbuffered_channel.h" diff --git a/paddle/framework/channel_test.cc b/paddle/framework/channel_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..9efc0172658c800d14102531332dbef68fa392f4 --- /dev/null +++ b/paddle/framework/channel_test.cc @@ -0,0 +1,26 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/framework/channel.h" + +#include "gtest/gtest.h" + +TEST(Channel, MakeAndClose) { + using paddle::framework::Channel; + using paddle::framework::MakeChannel; + using paddle::framework::CloseChannel; + + Channel* ch = MakeChannel(10); + CloseChannel(ch); +} diff --git a/paddle/framework/data_type.h b/paddle/framework/data_type.h index 6a372ac32e48131eed28e2d42125feb5b92a11c7..98eb3e857d1943e71f1d41f24ecbedbe09e85b7b 100644 --- a/paddle/framework/data_type.h +++ b/paddle/framework/data_type.h @@ -79,5 +79,33 @@ inline void VisitDataType(proto::DataType type, Visitor visitor) { } } +inline std::string DataTypeToString(const proto::DataType type) { + using namespace paddle::framework::proto; + switch (type) { + case DataType::FP16: + return "float16"; + case DataType::FP32: + return "float32"; + case DataType::FP64: + return "float64"; + case DataType::INT16: + return "int16"; + case DataType::INT32: + return "int32"; + case DataType::INT64: + return "int64"; + case DataType::BOOL: + return "bool"; + default: + PADDLE_THROW("Not support type %d", type); + } +} + +inline std::ostream& operator<<(std::ostream& out, + const proto::DataType& type) { + out << DataTypeToString(type); + return out; +} + } // namespace framework } // namespace paddle diff --git a/paddle/framework/details/buffered_channel.h b/paddle/framework/details/buffered_channel.h new file mode 100644 index 0000000000000000000000000000000000000000..572e29d44a3baec84a029d87f9b0874784aa761b --- /dev/null +++ b/paddle/framework/details/buffered_channel.h @@ -0,0 +1,82 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include +#include +#include + +#include "paddle/framework/channel.h" + +namespace paddle { +namespace framework { +namespace details { + +template +class Buffered : public paddle::framework::Channel { + friend Channel* paddle::framework::MakeChannel(size_t); + friend void paddle::framework::CloseChannel(Channel*); + + public: + virtual void Send(T*); + virtual void Receive(T*); + virtual size_t Cap() { return cap_; } + + private: + size_t cap_; + std::mutex mu_; + std::condition_variable empty_cond_var_; + std::condition_variable full_cond_var_; + std::deque channel_; + + Buffered(size_t cap) : cap_(cap) {} + virtual ~Buffered(); + + void NotifyAllSenders(std::unique_lock*); +}; + +template +void Buffered::Send(T* item) { + std::unique_lock lock(mu_); + full_cond_var_.wait(lock, [this]() { return channel_.size() < cap_; }); + channel_.push_back(std::move(*item)); + lock.unlock(); + empty_cond_var_.notify_one(); +} + +template +void Buffered::Receive(T* item) { + std::unique_lock lock(mu_); + empty_cond_var_.wait(lock, [this]() { return !channel_.empty(); }); + *item = std::move(channel_.front()); + channel_.pop_front(); + NotifyAllSenders(&lock); +} + +template +Buffered::~Buffered() { + std::unique_lock lock(mu_); + channel_.clear(); + NotifyAllSenders(&lock); +} + +template +void Buffered::NotifyAllSenders(std::unique_lock* lock) { + lock->unlock(); + full_cond_var_.notify_one(); +} + +} // namespace details +} // namespace framework +} // namespace paddle diff --git a/paddle/framework/details/unbuffered_channel.h b/paddle/framework/details/unbuffered_channel.h new file mode 100644 index 0000000000000000000000000000000000000000..7ecced1fba88fea781fc342091bc71e5aa496d3a --- /dev/null +++ b/paddle/framework/details/unbuffered_channel.h @@ -0,0 +1,52 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include +#include +#include + +#include "paddle/framework/channel.h" + +namespace paddle { +namespace framework { +namespace details { + +template +class UnBuffered : public paddle::framework::Channel { + friend Channel* paddle::framework::MakeChannel(size_t); + friend void paddle::framework::CloseChannel(Channel*); + + public: + virtual void Send(T*); + virtual void Receive(T*); + virtual size_t Cap() { return 0; } + + private: + UnBuffered() {} + virtual ~UnBuffered(); +}; + +template +void UnBuffered::Send(T* channel_element) {} + +template +void UnBuffered::Receive(T*) {} + +template +UnBuffered::~UnBuffered() {} + +} // namespace details +} // namespace framework +} // namespace paddle diff --git a/paddle/framework/executor.cc b/paddle/framework/executor.cc index 50a70d723eb5605fccb2e40fe30fd3aac7f72340..cbf3ec75265fa74aaacffee684b7b7d5f73b7c02 100644 --- a/paddle/framework/executor.cc +++ b/paddle/framework/executor.cc @@ -170,8 +170,6 @@ static bool has_feed_operators( feed_targets.find(feed_target_name) != feed_targets.end(), "Feed operator output name '%s' cannot be found in 'feed_targets'", feed_target_name); - } else { - break; } } @@ -270,8 +268,6 @@ void Executor::Run(const ProgramDesc& program, Scope* scope, int idx = boost::get(op->GetAttr("col")); SetFeedVariable(scope, *feed_targets[feed_target_name], feed_holder_name, idx); - } else { - break; } } diff --git a/paddle/framework/op_kernel_type_test.cc b/paddle/framework/op_kernel_type_test.cc index 649afeee8a846b0579545f2edff77e9dbe3b4dd8..cb23bbde01493d1a3b5845e77d6160a75f409c7a 100644 --- a/paddle/framework/op_kernel_type_test.cc +++ b/paddle/framework/op_kernel_type_test.cc @@ -26,9 +26,9 @@ TEST(OpKernelType, ToString) { OpKernelType op_kernel_type(DataType::FP32, CPUPlace(), DataLayout::kNCHW, LibraryType::kCUDNN); - ASSERT_EQ( - paddle::framework::KernelTypeToString(op_kernel_type), - "data_type[5]:data_layout[NCHW]:place[CPUPlace]:library_type[CUDNN]"); + ASSERT_EQ(paddle::framework::KernelTypeToString(op_kernel_type), + "data_type[float32]:data_layout[NCHW]:place[CPUPlace]:library_type[" + "CUDNN]"); } TEST(OpKernelType, Hash) { diff --git a/paddle/framework/program_desc.cc b/paddle/framework/program_desc.cc index b5d9e5e385c1ba57169ef885824fc23b0f130692..e59e392dfd16e82c01ded8ca40099c2b71bdabcf 100644 --- a/paddle/framework/program_desc.cc +++ b/paddle/framework/program_desc.cc @@ -18,6 +18,9 @@ limitations under the License. */ namespace paddle { namespace framework { +const std::string kFeedOpType = "feed"; +const std::string kFetchOpType = "fetch"; + BlockDesc *ProgramDesc::AppendBlock(const BlockDesc &parent) { auto *b = desc_.add_blocks(); b->set_parent_idx(parent.ID()); @@ -64,5 +67,27 @@ ProgramDesc::ProgramDesc(const std::string &binary_str) { } } +const std::vector ProgramDesc::GetFeedVarNames() { + BlockDesc *global_block = blocks_[0].get(); + std::vector feed_var_names; + for (auto *op : global_block->AllOps()) { + if (op->Type() == "feed") { + feed_var_names.insert(feed_var_names.begin(), op->Output("Out")[0]); + } + } + return feed_var_names; +} + +const std::vector ProgramDesc::GetFetchVarNames() { + BlockDesc *global_block = blocks_[0].get(); + std::vector fetch_var_names; + for (auto *op : global_block->AllOps()) { + if (op->Type() == "fetch") { + fetch_var_names.push_back(op->Input("X")[0]); + } + } + return fetch_var_names; +} + } // namespace framework } // namespace paddle diff --git a/paddle/framework/program_desc.h b/paddle/framework/program_desc.h index 15a962bb696d6172acd1a83cf9bb1ffd0846d449..2c3883275a23c62a3d085467424c48505661fba3 100644 --- a/paddle/framework/program_desc.h +++ b/paddle/framework/program_desc.h @@ -45,6 +45,10 @@ class ProgramDesc { proto::ProgramDesc *Proto(); + const std::vector GetFeedVarNames(); + + const std::vector GetFetchVarNames(); + private: proto::ProgramDesc desc_; diff --git a/paddle/framework/prune.cc b/paddle/framework/prune.cc index 25eb813ffb96e9b1e13299421ead9f85c02da59f..bff8e0bceaca9749101b2c45edddba526d565624 100644 --- a/paddle/framework/prune.cc +++ b/paddle/framework/prune.cc @@ -17,6 +17,7 @@ limitations under the License. */ #include #include #include +#include #include #include @@ -102,6 +103,32 @@ void prune_impl(const proto::ProgramDesc& input, proto::ProgramDesc* output, *op_field->Add() = input.blocks(block_id).ops(i); } } + + // remove the VarDescs in BlockDesc that are not referenced in + // the pruned OpDescs + std::unordered_map var_map; + auto* var_field = output->mutable_blocks(block_id)->mutable_vars(); + for (const auto& var : *var_field) { + var_map[var.name()] = var; + } + + var_field->Clear(); + for (const auto& op : *op_field) { + // add VarDescs of all input arguments for each OpDesc + auto& input_field = op.inputs(); + for (auto& input_var : input_field) { + for (auto& arg : input_var.arguments()) { + *var_field->Add() = var_map[arg]; + } + } + // add VarDescs of all output arguments for each OpDesc + auto& output_field = op.outputs(); + for (auto& output_var : output_field) { + for (auto& arg : output_var.arguments()) { + *var_field->Add() = var_map[arg]; + } + } + } } // TODO(fengjiayi): Prune() could be inplaced to avoid unnecessary copies diff --git a/paddle/framework/threadpool.cc b/paddle/framework/threadpool.cc index 109a7e7dc440d91e8223f2c0924f489f54a06f64..b7d7c00bcf9d9770f58284023ca2defcda299d64 100644 --- a/paddle/framework/threadpool.cc +++ b/paddle/framework/threadpool.cc @@ -1,24 +1,95 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ #include "paddle/framework/threadpool.h" +#include "paddle/platform/enforce.h" + namespace paddle { namespace framework { -std::unique_ptr ThreadPool::threadpool(nullptr); -std::once_flag ThreadPool::init_flag; +std::unique_ptr ThreadPool::threadpool_(nullptr); +std::once_flag ThreadPool::init_flag_; + +ThreadPool* ThreadPool::GetInstance() { + std::call_once(init_flag_, &ThreadPool::Init); + return threadpool_.get(); +} + +void ThreadPool::Init() { + if (threadpool_.get() == nullptr) { + // TODO(Yancey1989): specify the max threads number + int num_threads = std::thread::hardware_concurrency(); + PADDLE_ENFORCE_GT(num_threads, 0); + threadpool_.reset(new ThreadPool(num_threads)); + } +} + +ThreadPool::ThreadPool(int num_threads) + : total_threads_(num_threads), idle_threads_(num_threads), running_(true) { + threads_.resize(num_threads); + for (auto& thread : threads_) { + // TODO(Yancey1989): binding the thread on the specify CPU number + thread.reset(new std::thread(std::bind(&ThreadPool::TaskLoop, this))); + } +} + +ThreadPool::~ThreadPool() { + { + // notify all threads to stop running + running_ = false; + scheduled_.notify_all(); + } + + for (auto& t : threads_) { + t->join(); + t.reset(nullptr); + } +} + +void ThreadPool::Wait() { + std::unique_lock lock(mutex_); + completed_.wait(lock, [=] { return Done() == true; }); +} + +void ThreadPool::TaskLoop() { + while (running_) { + std::unique_lock lock(mutex_); + scheduled_.wait(lock, [=] { return !tasks_.empty() || !running_; }); + + if (!running_) { + break; + } + // pop a task from the task queue + auto task = std::move(tasks_.front()); + tasks_.pop(); + + --idle_threads_; + lock.unlock(); + + // run the task + task(); + + { + std::unique_lock lock(mutex_); + ++idle_threads_; + if (Done()) { + completed_.notify_all(); + } + } + } +} } // namespace framework } // namespace paddle diff --git a/paddle/framework/threadpool.h b/paddle/framework/threadpool.h index 3ac345851c38557f82698786dd3bc8e1202a4256..4e9b58679d9e7c84adf76b6245b397c7a8872483 100644 --- a/paddle/framework/threadpool.h +++ b/paddle/framework/threadpool.h @@ -20,52 +20,36 @@ limitations under the License. */ #include #include #include +#include -#include "paddle/platform/enforce.h" +#include "paddle/platform/macros.h" // for DISABLE_COPY_AND_ASSIGN namespace paddle { namespace framework { +// ThreadPool maintains a queue of tasks, and runs them using a fixed +// number of threads. class ThreadPool { public: typedef std::packaged_task Task; - /** - * @brief Get a instance of threadpool, the thread number will - * be specified as the number of hardware thread contexts - */ - static ThreadPool* GetInstance() { - std::call_once(init_flag, &ThreadPool::Init); - return threadpool.get(); - } + // Returns the singleton of ThreadPool. + static ThreadPool* GetInstance(); - ~ThreadPool() { - { - // notify all threads to stop running - running_ = false; - scheduled_.notify_all(); - } - - for (auto& t : threads_) { - t->join(); - t.reset(nullptr); - } - } + ~ThreadPool(); - int GetNumThreads() const { return num_threads_; } + // Returns the number of threads created by the constructor. + size_t Threads() const { return total_threads_; } - int GetAvailable() { + // Returns the number of currently idle threads. + size_t IdleThreads() { std::unique_lock lock(mutex_); - return available_; + return idle_threads_; } - /** - * @brief Push a function to the queue, and will be scheduled and - * executed if a thread is available. - * @param[in] Task, will be pushed to the task queue. - * @return std::future, we could wait for the task finished by - * f.wait(). - */ + // Run pushes a function to the task queue and returns a std::future + // object. To wait for the completion of the task, call + // std::future::wait(). template std::future Run(Callback fn) { std::unique_lock lock(mutex_); @@ -77,84 +61,40 @@ class ThreadPool { return f; } - /** - * @brief Wait until all the tasks are completed. - */ - void Wait() { - std::unique_lock lock(mutex_); - completed_.wait(lock, [=] { return Done() == true; }); - } + // Wait until all the tasks are completed. + void Wait(); private: DISABLE_COPY_AND_ASSIGN(ThreadPool); - explicit ThreadPool(int num_threads) - : num_threads_(num_threads), available_(num_threads), running_(true) { - threads_.resize(num_threads); - for (auto& thread : threads_) { - // TODO(Yancey1989): binding the thread on the specify CPU number - thread.reset(new std::thread(std::bind(&ThreadPool::TaskLoop, this))); - } - } + explicit ThreadPool(int num_threads); - /** - * @brief If the task queue is empty and avaialbe - * is equal to the number of threads, means that - * all tasks are completed. - * - * Note: this function is not thread-safe. - * - * @return true if all tasks are completed. - */ - bool Done() { return tasks_.empty() && available_ == num_threads_; } - - void TaskLoop() { - while (running_) { - std::unique_lock lock(mutex_); - scheduled_.wait(lock, [=] { return !tasks_.empty() || !running_; }); - - if (!running_) { - break; - } - // pop a task from the task queue - auto task = std::move(tasks_.front()); - tasks_.pop(); - - --available_; - lock.unlock(); - - // run the task - task(); - - { - std::unique_lock lock(mutex_); - ++available_; - if (Done()) { - completed_.notify_all(); - } - } - } - } + // If the task queue is empty and avaialbe is equal to the number of + // threads, means that all tasks are completed. Note: this function + // is not thread-safe. Returns true if all tasks are completed. + // Note: don't delete the data member total_threads_ and use + // threads_.size() instead; because you'd need to lock the mutex + // before accessing threads_. + bool Done() { return tasks_.empty() && idle_threads_ == total_threads_; } - static void Init() { - if (threadpool.get() == nullptr) { - // TODO(Yancey1989): specify the max threads number - int num_threads = std::thread::hardware_concurrency(); - PADDLE_ENFORCE_GT(num_threads, 0); - threadpool.reset(new ThreadPool(num_threads)); - } - } + // The constructor starts threads to run TaskLoop, which retrieves + // and runs tasks from the queue. + void TaskLoop(); + + // Init is called by GetInstance. + static void Init(); private: - static std::unique_ptr threadpool; - static std::once_flag init_flag; + static std::unique_ptr threadpool_; + static std::once_flag init_flag_; - int num_threads_; - int available_; - bool running_; - std::queue tasks_; std::vector> threads_; + const size_t total_threads_; + size_t idle_threads_; + + std::queue tasks_; std::mutex mutex_; + bool running_; std::condition_variable scheduled_; std::condition_variable completed_; }; diff --git a/paddle/framework/threadpool_test.cc b/paddle/framework/threadpool_test.cc index 50b6238cd8786be9d8cf2d5f821daadea12bd208..3fbfe7efc867144dbd0dd2613c824c6a3c41b7d8 100644 --- a/paddle/framework/threadpool_test.cc +++ b/paddle/framework/threadpool_test.cc @@ -22,11 +22,7 @@ namespace framework = paddle::framework; void do_sum(framework::ThreadPool* pool, std::atomic& sum, int cnt) { std::vector> fs; for (int i = 0; i < cnt; ++i) { - auto f = pool->Run([&sum]() { sum.fetch_add(1); }); - fs.push_back(std::move(f)); - } - for (auto& f : fs) { - f.wait(); + fs.push_back(framework::Async([&sum]() { sum.fetch_add(1); })); } } diff --git a/paddle/inference/CMakeLists.txt b/paddle/inference/CMakeLists.txt index fedf9e4cb87d97a32b3b16da5ee8f3c16a848caf..0288266c08f3ddfc5337bf2847cf65267491105b 100644 --- a/paddle/inference/CMakeLists.txt +++ b/paddle/inference/CMakeLists.txt @@ -1,14 +1,14 @@ set(FLUID_CORE_MODULES proto_desc paddle_memory executor prune init) cc_library(paddle_fluid_api - SRCS inference.cc + SRCS io.cc DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB}) # Merge all modules into a single static library cc_library(paddle_fluid DEPS paddle_fluid_api ${FLUID_CORE_MODULES} ${GLOB_OP_LIB}) # Create shared library -add_library(paddle_fluid_shared SHARED inference.cc) +add_library(paddle_fluid_shared SHARED io.cc) target_circle_link_libraries(paddle_fluid_shared ARCHIVE_START @@ -20,7 +20,7 @@ SET_TARGET_PROPERTIES(paddle_fluid_shared PROPERTIES OUTPUT_NAME paddle_fluid) # install library & headers if(NOT WITH_C_API AND WITH_FLUID) - install(FILES inference.h DESTINATION include/paddle/inference) + install(FILES io.h DESTINATION include/paddle/inference) install(TARGETS paddle_fluid_shared DESTINATION lib) endif() diff --git a/paddle/inference/inference.h b/paddle/inference/inference.h deleted file mode 100644 index 60caa41c702655518c2cbd62641b6db8c4626d9e..0000000000000000000000000000000000000000 --- a/paddle/inference/inference.h +++ /dev/null @@ -1,56 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "paddle/framework/block_desc.h" -#include "paddle/framework/executor.h" -#include "paddle/framework/lod_tensor.h" -#include "paddle/framework/program_desc.h" -#include "paddle/framework/scope.h" - -namespace paddle { - -class InferenceEngine { -public: - InferenceEngine() : program_(nullptr), load_program_(nullptr) {} - ~InferenceEngine() { - delete program_; - delete load_program_; - } - - framework::ProgramDesc* LoadInferenceModel(framework::Executor& exe, - framework::Scope* scope, - const std::string& dirname); - - const std::vector& GetFeedVarNames() const { - return feed_var_names_; - } - - const std::vector& GetFetchVarNames() const { - return fetch_var_names_; - } - -private: - bool IsParameter(const framework::VarDesc* var); - void GenerateLoadProgram(const std::string& dirname); - -private: - framework::ProgramDesc* program_; - framework::ProgramDesc* load_program_; - std::vector feed_var_names_; - std::vector fetch_var_names_; -}; - -} // namespace paddle diff --git a/paddle/inference/inference.cc b/paddle/inference/io.cc similarity index 60% rename from paddle/inference/inference.cc rename to paddle/inference/io.cc index 51d43a63eed1e4c5350111f2ff6f451f5499edf4..d1842ec938f23192ba8308d4f6a82aa92a3bb95c 100644 --- a/paddle/inference/inference.cc +++ b/paddle/inference/io.cc @@ -1,4 +1,4 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -12,51 +12,22 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "inference.h" +#include "paddle/inference/io.h" #include namespace paddle { +namespace inference { -framework::ProgramDesc* InferenceEngine::LoadInferenceModel( - framework::Executor& exe, - framework::Scope* scope, - const std::string& dirname) { - std::string model_filename = dirname + "/__model__"; - LOG(INFO) << "loading model from " << model_filename; - std::ifstream inputfs(model_filename, std::ios::in | std::ios::binary); - std::string program_desc_str; - inputfs.seekg(0, std::ios::end); - program_desc_str.resize(inputfs.tellg()); - inputfs.seekg(0, std::ios::beg); - LOG(INFO) << "program_desc_str's size: " << program_desc_str.size(); - inputfs.read(&program_desc_str[0], program_desc_str.size()); - inputfs.close(); - - program_ = new framework::ProgramDesc(program_desc_str); - GenerateLoadProgram(dirname); - exe.Run(*load_program_, scope, 0, true, true); - - framework::BlockDesc* global_block = program_->MutableBlock(0); - feed_var_names_.clear(); - fetch_var_names_.clear(); - for (auto* op : global_block->AllOps()) { - if (op->Type() == "feed") { - feed_var_names_.insert(feed_var_names_.begin(), op->Output("Out")[0]); - } else if (op->Type() == "fetch") { - fetch_var_names_.push_back(op->Input("X")[0]); - } - } - - return program_; -} +const std::string kFeedOpType = "feed"; -bool InferenceEngine::IsParameter(const framework::VarDesc* var) { +bool IsParameter(const framework::VarDesc* var, + const framework::ProgramDesc* main_program) { if (var->Persistable()) { // There are many unreachable variables in the program - for (size_t i = 0; i < program_->Size(); ++i) { - const framework::BlockDesc& block = program_->Block(i); + for (size_t i = 0; i < main_program->Size(); ++i) { + const framework::BlockDesc& block = main_program->Block(i); for (auto* op : block.AllOps()) { - if (op->Type() == "feed") { + if (op->Type() == kFeedOpType) { continue; } for (auto input_argument_name : op->InputArgumentNames()) { @@ -70,13 +41,16 @@ bool InferenceEngine::IsParameter(const framework::VarDesc* var) { return false; } -void InferenceEngine::GenerateLoadProgram(const std::string& dirname) { - framework::BlockDesc* global_block = program_->MutableBlock(0); +void LoadPersistables(framework::Executor& executor, + framework::Scope& scope, + const std::string& dirname, + framework::ProgramDesc* main_program) { + framework::BlockDesc* global_block = main_program->MutableBlock(0); - load_program_ = new framework::ProgramDesc(); - framework::BlockDesc* load_block = load_program_->MutableBlock(0); + framework::ProgramDesc* load_program = new framework::ProgramDesc(); + framework::BlockDesc* load_block = load_program->MutableBlock(0); for (auto* var : global_block->AllVars()) { - if (IsParameter(var)) { + if (IsParameter(var, main_program)) { VLOG(3) << "parameter's name: " << var->Name(); framework::VarDesc* new_var = load_block->Var(var->Name()); @@ -94,5 +68,30 @@ void InferenceEngine::GenerateLoadProgram(const std::string& dirname) { op->CheckAttrs(); } } + executor.Run(*load_program, &scope, 0, true, true); + delete load_program; } + +framework::ProgramDesc* Load(framework::Executor& executor, + framework::Scope& scope, + const std::string& dirname) { + std::string model_filename = dirname + "/__model__"; + LOG(INFO) << "loading model from " << model_filename; + std::ifstream inputfs(model_filename, std::ios::in | std::ios::binary); + std::string program_desc_str; + inputfs.seekg(0, std::ios::end); + program_desc_str.resize(inputfs.tellg()); + inputfs.seekg(0, std::ios::beg); + LOG(INFO) << "program_desc_str's size: " << program_desc_str.size(); + inputfs.read(&program_desc_str[0], program_desc_str.size()); + inputfs.close(); + + framework::ProgramDesc* main_program = + new framework::ProgramDesc(program_desc_str); + + LoadPersistables(executor, scope, dirname, main_program); + return main_program; +} + +} // namespace inference } // namespace paddle diff --git a/paddle/inference/io.h b/paddle/inference/io.h new file mode 100644 index 0000000000000000000000000000000000000000..400f5af8c53146cfc9d91ae15c77eb4364c5e3c8 --- /dev/null +++ b/paddle/inference/io.h @@ -0,0 +1,41 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include "paddle/framework/block_desc.h" +#include "paddle/framework/executor.h" +#include "paddle/framework/program_desc.h" +#include "paddle/framework/scope.h" +#include "paddle/framework/var_desc.h" + +namespace paddle { +namespace inference { + +bool IsParameter(const framework::VarDesc* var, + const framework::ProgramDesc* main_program); + +void LoadPersistables(framework::Executor& executor, + framework::Scope& scope, + const std::string& dirname, + framework::ProgramDesc* main_program); + +framework::ProgramDesc* Load(framework::Executor& executor, + framework::Scope& scope, + const std::string& dirname); + +} // namespace inference +} // namespace paddle diff --git a/paddle/inference/tests/book/test_inference_recognize_digits.cc b/paddle/inference/tests/book/test_inference_recognize_digits.cc index 45fbfe27a78a5de56d185d59aa924626025fce21..d8e4c4d7eee9fc20a184332833b0008e9ed9f716 100644 --- a/paddle/inference/tests/book/test_inference_recognize_digits.cc +++ b/paddle/inference/tests/book/test_inference_recognize_digits.cc @@ -16,8 +16,8 @@ limitations under the License. */ #include #include #include "gflags/gflags.h" -#include "paddle/framework/init.h" -#include "paddle/inference/inference.h" +#include "paddle/framework/lod_tensor.h" +#include "paddle/inference/io.h" DEFINE_string(dirname, "", "Directory of the inference model."); @@ -31,14 +31,13 @@ void TestInference(const std::string& dirname, auto* scope = new paddle::framework::Scope(); // 2. Initialize the inference_program and load all parameters from file - paddle::InferenceEngine* engine = new paddle::InferenceEngine(); - paddle::framework::ProgramDesc* inference_program = - engine->LoadInferenceModel(executor, scope, dirname); + auto* inference_program = paddle::inference::Load(executor, *scope, dirname); // 3. Get the feed_var_names and fetch_var_names - const std::vector& feed_target_names = engine->GetFeedVarNames(); + const std::vector& feed_target_names = + inference_program->GetFeedVarNames(); const std::vector& fetch_target_names = - engine->GetFetchVarNames(); + inference_program->GetFetchVarNames(); // 4. Prepare inputs std::map feed_targets; @@ -56,8 +55,8 @@ void TestInference(const std::string& dirname, // 6. Run the inference program executor.Run(*inference_program, scope, feed_targets, fetch_targets); + delete inference_program; delete scope; - delete engine; } TEST(inference, recognize_digits) { diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index 15f7cb6b560590f55e276fde4900d2e3c0045fb8..48cf5816cce4bb5ee8e66e72c5b1acea8535ab10 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -147,6 +147,7 @@ op_library(max_sequence_len_op DEPS lod_rank_table) op_library(sequence_conv_op DEPS context_project) op_library(sequence_pool_op DEPS sequence_pooling) op_library(lstm_op DEPS sequence2batch lstm_compute) +op_library(lstmp_op DEPS sequence2batch lstm_compute) op_library(gru_op DEPS sequence2batch gru_compute) op_library(recurrent_op DEPS executor) op_library(warpctc_op DEPS dynload_warpctc sequence_padding sequence_scale math_function) diff --git a/paddle/operators/activation_op.h b/paddle/operators/activation_op.h index 88c3d1c597a853abdee7753a5110be4a1726e905..c0809abc05104c1e8c1f42331c0530724dd1472f 100644 --- a/paddle/operators/activation_op.h +++ b/paddle/operators/activation_op.h @@ -323,7 +323,7 @@ template struct FloorFunctor : public BaseActivationFunctor { template void operator()(Device d, X x, Out out) const { - out.device(d) = x.ceil(); + out.device(d) = x.floor(); } }; diff --git a/paddle/operators/detail/grpc_client.cc b/paddle/operators/detail/grpc_client.cc index d699dabf2fb982f267c4869180efaf0e600eb46c..9b5f7afc6a48f13ff999f635efeb9e7bf0a76fb5 100644 --- a/paddle/operators/detail/grpc_client.cc +++ b/paddle/operators/detail/grpc_client.cc @@ -97,12 +97,27 @@ bool RPCClient::AsyncGetVariable(const std::string& ep, return true; } +bool RPCClient::AsyncSendBatchBarrier(const std::string& ep, int64_t time_out) { + const auto ch = GetChannel(ep); + + BatchBarrierProcessor* s = new BatchBarrierProcessor(ch); + s->Prepare(time_out); + + sendrecv::VariableMessage req; + req.set_varname(BATCH_BARRIER_MESSAGE); + auto rpc = s->stub_->AsyncSendVariable(s->context_.get(), req, &cq_); + rpc->Finish(&s->reply_, &s->status_, (void*)s); + req_count_++; + + return true; +} + bool RPCClient::Wait() { if (req_count_ <= 0) { return true; } - - std::vector a(req_count_); + const size_t kReqCnt = req_count_; + bool a[kReqCnt]; std::vector> waits(req_count_); for (int i = 0; i < req_count_; i++) { diff --git a/paddle/operators/detail/grpc_client.h b/paddle/operators/detail/grpc_client.h index a62e70a2533ae52d84d010504b19fed5aeb15dc0..f9499f6dc70c541c214e0b659f10b2ed1e8e8581 100644 --- a/paddle/operators/detail/grpc_client.h +++ b/paddle/operators/detail/grpc_client.h @@ -71,6 +71,15 @@ class ClientBase { context_->set_deadline(deadline); } + virtual void Prepare(int64_t time_out) { + context_.reset(new grpc::ClientContext()); + + std::chrono::system_clock::time_point deadline = + std::chrono::system_clock::now() + std::chrono::milliseconds(time_out); + + context_->set_deadline(deadline); + } + virtual void Process() = 0; std::unique_ptr stub_; @@ -117,6 +126,17 @@ class GetProcessor : public ClientBase { RequestGetCallBack response_call_back_ = ProcGetResponse; }; +class BatchBarrierProcessor : public ClientBase { + public: + explicit BatchBarrierProcessor(std::shared_ptr ch) + : ClientBase(ch) {} + + virtual ~BatchBarrierProcessor() {} + + virtual void Process() {} + sendrecv::VoidMessage reply_; +}; + class RPCClient { public: bool AsyncSendVariable(const std::string& ep, @@ -130,6 +150,10 @@ class RPCClient { const framework::Scope& scope, const std::string& var_name, int64_t time_out = 600 * 1000); + + bool AsyncSendBatchBarrier(const std::string& ep, + int64_t time_out = 600 * 1000); + bool Wait(); private: diff --git a/paddle/operators/detail/grpc_server.cc b/paddle/operators/detail/grpc_server.cc index 3ddcd839bdd23547216465dfaf44a3cd8285fe6d..4f94e1315fbd2810a05354f7c3fc54ea30967e8a 100644 --- a/paddle/operators/detail/grpc_server.cc +++ b/paddle/operators/detail/grpc_server.cc @@ -132,6 +132,7 @@ void AsyncGRPCServer::RunSyncUpdate() { cq_send_ = builder.AddCompletionQueue(); cq_get_ = builder.AddCompletionQueue(); + server_ = builder.BuildAndStart(); LOG(INFO) << "Server listening on " << address_ << std::endl; @@ -141,11 +142,11 @@ void AsyncGRPCServer::RunSyncUpdate() { std::bind(&AsyncGRPCServer::TryToRegisterNewGetOne, this); t_send_.reset( - new std::thread(std::bind(&AsyncGRPCServer::HandleRequest, this, false, + new std::thread(std::bind(&AsyncGRPCServer::HandleRequest, this, cq_send_.get(), "cq_send", send_register))); t_get_.reset( - new std::thread(std::bind(&AsyncGRPCServer::HandleRequest, this, true, + new std::thread(std::bind(&AsyncGRPCServer::HandleRequest, this, cq_get_.get(), "cq_get", get_register))); // wait server @@ -174,7 +175,7 @@ void AsyncGRPCServer::TryToRegisterNewSendOne() { } RequestSend* send = new RequestSend(&service_, cq_send_.get(), &var_recv_queue_); - VLOG(4) << "create RequestSend status:" << send->Status(); + VLOG(4) << "Create RequestSend status:" << send->Status(); } void AsyncGRPCServer::TryToRegisterNewGetOne() { @@ -184,11 +185,11 @@ void AsyncGRPCServer::TryToRegisterNewGetOne() { } RequestGet* get = new RequestGet(&service_, cq_get_.get(), scope_, dev_ctx_, &var_get_queue_); - VLOG(4) << "create Requestget status:" << get->Status(); + VLOG(4) << "Create RequestGet status:" << get->Status(); } -// FIXME(typhoonzero): remove wait argument and change cq_name to enum. -void AsyncGRPCServer::HandleRequest(bool wait, grpc::ServerCompletionQueue* cq, +// FIXME(typhoonzero): change cq_name to enum. +void AsyncGRPCServer::HandleRequest(grpc::ServerCompletionQueue* cq, std::string cq_name, std::function TryToRegisterNewOne) { TryToRegisterNewOne(); diff --git a/paddle/operators/detail/grpc_server.h b/paddle/operators/detail/grpc_server.h index 1ca9086c744c558fd05fb4fc1a7280729afbec28..3f8b9d93176148619d6820f6a365d9da2e73b10d 100644 --- a/paddle/operators/detail/grpc_server.h +++ b/paddle/operators/detail/grpc_server.h @@ -57,8 +57,7 @@ class AsyncGRPCServer final : public sendrecv::SendRecvService::Service { void ShutDown(); protected: - void HandleRequest(bool wait, grpc::ServerCompletionQueue *cq, - std::string cq_name, + void HandleRequest(grpc::ServerCompletionQueue *cq, std::string cq_name, std::function TryToRegisterNewOne); void TryToRegisterNewSendOne(); void TryToRegisterNewGetOne(); diff --git a/paddle/operators/detail/sendrecvop_utils.h b/paddle/operators/detail/sendrecvop_utils.h index bc6581afab93c626c7c2439d699c6c2d858df9fa..8e66f7299c7b4d30bc5a6fe6a18b7cb3ae3827a5 100644 --- a/paddle/operators/detail/sendrecvop_utils.h +++ b/paddle/operators/detail/sendrecvop_utils.h @@ -30,6 +30,9 @@ namespace paddle { namespace operators { namespace detail { +#define LISTEN_TERMINATE_MESSAGE "TERMINATE@RECV" +#define BATCH_BARRIER_MESSAGE "BATCH_BARRIER@RECV" + void SerializeToMessage(const std::string& name, const framework::Variable* var, const platform::DeviceContext& ctx, sendrecv::VariableMessage* msg); diff --git a/paddle/operators/gru_op.cc b/paddle/operators/gru_op.cc index 76f2adefede3b4bc4035f86f8f8663eed29343ae..fb901b639492a179925ff852f9030fc6674d1f63 100644 --- a/paddle/operators/gru_op.cc +++ b/paddle/operators/gru_op.cc @@ -135,14 +135,14 @@ class GRUOpMaker : public framework::OpProtoAndCheckerMaker { AddComment(R"DOC( GRU Operator implements part calculations of the complete GRU as following: -\f[ -update \ gate: u_t = actGate(xu_t + W_u * h_{t-1} + b_u) \\ -reset \ gate: r_t = actGate(xr_t + W_r * h_{t-1} + b_r) \\ -output \ candidate: {h}_t = actNode(xc_t + W_c * dot(r_t, h_{t-1}) + b_c) \\ +$$ +update\_gate: u_t = actGate(xu_t + W_u * h_{t-1} + b_u) \\ +reset\_gate: r_t = actGate(xr_t + W_r * h_{t-1} + b_r) \\ +output\_candidate: {h}_t = actNode(xc_t + W_c * dot(r_t, h_{t-1}) + b_c) \\ output: h_t = dot((1 - u_t), h_{t-1}) + dot(u_t, {h}_t) -\f] +$$ -@note To implement the complete GRU, fully-connected operator must be used +@note To implement the complete GRU, fully-connected operator must be used before to feed xu, xr and xc as the Input of GRU operator. )DOC"); } diff --git a/paddle/operators/one_hot_op.cc b/paddle/operators/one_hot_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..e78b7468de4ea5f29378c2dc5905fdd36fb0ae2f --- /dev/null +++ b/paddle/operators/one_hot_op.cc @@ -0,0 +1,95 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/operators/one_hot_op.h" +#include "paddle/framework/framework.pb.h" + +namespace paddle { +namespace operators { + +class OneHotOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), + "Input(X) of OneHotOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Out"), + "Output(Out) of OneHotOp should not be null."); + + auto x_dims = ctx->GetInputDim("X"); + PADDLE_ENFORCE_GE(x_dims.size(), 2, + "Rank of Input(X) should be at least 2."); + PADDLE_ENFORCE_GE(x_dims[x_dims.size() - 1], 1U, + "Last dimension of Input(X) should be 1."); + + int depth = ctx->Attrs().Get("depth"); + + PADDLE_ENFORCE_GT(depth, 0, "Should provide a positive depth (%d).", depth); + + framework::DDim out_dims(x_dims); + out_dims[out_dims.size() - 1] = depth; + ctx->SetOutputDim("Out", out_dims); + ctx->ShareLoD("X", /* --> */ "Out"); + } +}; + +class OneHotOpMaker : public framework::OpProtoAndCheckerMaker { + public: + OneHotOpMaker(OpProto* proto, OpAttrChecker* op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", + "(LoDTensor, LoDTensor) Input variable with rank at least 2. " + "The last dimension of X should be 1. Each value of X is an index " + "to indicate the position."); + AddOutput("Out", + "(Tensor, Tensor) Output tensor with same rank as X. " + "The tensor consists of one-hot representations of values in X."); + AddAttr("depth", + "A positive integer to specify the length of one-hot vector."); + AddAttr("dtype", + "An integer to specify the data type of one-hot " + "vector. The default value is FP32.") + .SetDefault(paddle::framework::proto::DataType::FP32); + AddComment(R"DOC( +One Hot Operator. This operator creates the one-hot representations for input +index values. The following example will help to explain the function of this +operator: + +X is a LoDTensor: + X.lod = [[0, 1, 4]] + X.shape = [4, 1] + X.data = [[1], [1], [3], [0]] + +set depth = 4 + +Out is a LoDTensor: + Out.lod = [[0, 1, 4]] + Out.shape = [4, 4] + Out.data = [[0., 1., 0., 0.], + [0., 1., 0., 0.], + [0., 0., 0., 1.], + [1., 0., 0., 0.]] +)DOC"); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OPERATOR(one_hot, ops::OneHotOp, ops::OneHotOpMaker, + paddle::framework::EmptyGradOpMaker); +REGISTER_OP_CPU_KERNEL( + one_hot, ops::OneHotKernel, + ops::OneHotKernel); diff --git a/paddle/operators/one_hot_op.cu b/paddle/operators/one_hot_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..16f6d9433eabd7be157ed57362a0d55d86c6ee92 --- /dev/null +++ b/paddle/operators/one_hot_op.cu @@ -0,0 +1,80 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/operators/one_hot_op.h" +#include "paddle/platform/cuda_helper.h" +#include "paddle/platform/gpu_info.h" + +namespace paddle { +namespace operators { +using platform::PADDLE_CUDA_NUM_THREADS; + +template +__global__ void FillOutputKernel(const InT* p_in_data, OutT* p_out_data, + const int64_t numel, const int depth) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < numel) { + *(p_out_data + (idx * depth) + p_in_data[idx]) = 1.0; + } +} + +template +struct OneHotOpCUDAFunctor { + const framework::LoDTensor* in_; + framework::LoDTensor* out_; + const DeviceContext& ctx_; + int depth_; + + OneHotOpCUDAFunctor(const framework::LoDTensor* in, framework::LoDTensor* out, + int depth, const DeviceContext& ctx) + : in_(in), out_(out), depth_(depth), ctx_(ctx) {} + + template + void operator()() const { + auto* p_in_data = in_->data(); + auto numel = in_->numel(); + auto* p_out_data = out_->mutable_data(ctx_.GetPlace()); + auto stream = ctx_.stream(); + math::set_constant(ctx_, out_, 0.0); + + FillOutputKernel<<<(numel + PADDLE_CUDA_NUM_THREADS - 1) / + PADDLE_CUDA_NUM_THREADS, + PADDLE_CUDA_NUM_THREADS, 0, stream>>>( + p_in_data, p_out_data, numel, depth_); + } +}; + +using LoDTensor = framework::LoDTensor; +template +class OneHotCUDAKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto* in = context.Input("X"); + auto* out = context.Output("Out"); + int depth = context.Attr("depth"); + + framework::VisitDataType( + static_cast(context.Attr("dtype")), + OneHotOpCUDAFunctor( + in, out, depth, context.template device_context())); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP_CUDA_KERNEL( + one_hot, ops::OneHotCUDAKernel, + ops::OneHotCUDAKernel); diff --git a/paddle/operators/one_hot_op.h b/paddle/operators/one_hot_op.h new file mode 100644 index 0000000000000000000000000000000000000000..12031ede2c3cd042a3d25003b714652b4d0d4453 --- /dev/null +++ b/paddle/operators/one_hot_op.h @@ -0,0 +1,68 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "paddle/framework/op_registry.h" +#include "paddle/operators/math/math_function.h" + +namespace paddle { +namespace operators { + +template +struct OneHotOpFunctor { + const framework::LoDTensor* in_; + framework::LoDTensor* out_; + int depth_; + const DeviceContext& ctx_; + + OneHotOpFunctor(const framework::LoDTensor* in, framework::LoDTensor* out, + int depth, const DeviceContext& ctx) + : in_(in), out_(out), depth_(depth), ctx_(ctx) {} + + template + void operator()() const { + auto* p_in_data = in_->data(); + auto numel = in_->numel(); + auto* p_out_data = out_->mutable_data(ctx_.GetPlace()); + math::set_constant(ctx_, out_, 0.0); + + for (int i = 0; i < numel; ++i) { + PADDLE_ENFORCE_GE(p_in_data[i], 0, + "Illegal index value, should be at least 0."); + PADDLE_ENFORCE_LT(p_in_data[i], depth_, + "Illegal index value, should be less than depth (%d).", + depth_); + *(p_out_data + i * depth_ + p_in_data[i]) = 1.0; + } + } +}; + +using LoDTensor = framework::LoDTensor; +template +class OneHotKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto* in = context.Input("X"); + auto* out = context.Output("Out"); + int depth = context.Attr("depth"); + + framework::VisitDataType( + static_cast(context.Attr("dtype")), + OneHotOpFunctor( + in, out, depth, context.template device_context())); + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/recv_op.cc b/paddle/operators/recv_op.cc index 593c35879ae2b3680b93ac5d8443110e61cb99fe..49e1eb3402482e7ff12d9b2b640f7271a80cf6d9 100644 --- a/paddle/operators/recv_op.cc +++ b/paddle/operators/recv_op.cc @@ -29,8 +29,6 @@ limitations under the License. */ #include "paddle/operators/detail/simple_block_queue.h" #include "paddle/string/printf.h" -#define LISTEN_TERMINATE_MESSAGE "TERMINATE@RECV" - namespace paddle { namespace operators { @@ -95,7 +93,6 @@ class RecvOp : public framework::OperatorBase { auto param_list = Attr>("ParamList"); auto grad_list = Attr>("GradList"); auto fan_in = Attr("Fanin"); - size_t param_count = param_list.size(); auto *block = Attr(kOptimizeBlock); auto *program = block->Program(); @@ -103,38 +100,50 @@ class RecvOp : public framework::OperatorBase { // TODO(typhoonzero): change this to a while_op for every cluster-batch. bool exit_flag = false; - size_t barrier_size = param_count * fan_in; while (!exit_flag) { // Get from multiple trainers, we don't care about the order in which // the gradients arrives, just add suffix 0~n and merge the gradient. rpc_service_->SetCond(0); - for (size_t i = 0; i < barrier_size; ++i) { + size_t recv_var_cnt = 0; + int batch_barrier = 0; + while (batch_barrier != fan_in) { const detail::MessageWithName &v = rpc_service_->Get(); auto grad_var_name = v.first; if (grad_var_name == LISTEN_TERMINATE_MESSAGE) { LOG(INFO) << "received terminate message and exit"; exit_flag = true; break; - } - auto it = std::find(grad_list.begin(), grad_list.end(), grad_var_name); - std::string param_var_name; - if (it != grad_list.end()) { - param_var_name = param_list[it - grad_list.begin()]; + } else if (grad_var_name == BATCH_BARRIER_MESSAGE) { + VLOG(3) << "recv batch barrier message"; + batch_barrier++; + continue; } else { - LOG(ERROR) << "grad has no paired param:" << grad_var_name; - } - VLOG(3) << "received grad: " << grad_var_name - << " updating param: " << param_var_name; - if (fan_in > 1) { - grad_var_name = this->GetGradVarNameForTrainer(grad_var_name); - } - auto *var = recv_scope.FindVar(grad_var_name); - if (var == nullptr) { - LOG(ERROR) << "Can not find server side var: " << grad_var_name; - PADDLE_THROW("Can not find server side var"); + // receive a variable + recv_var_cnt++; + auto it = + std::find(grad_list.begin(), grad_list.end(), grad_var_name); + std::string param_var_name; + if (it != grad_list.end()) { + param_var_name = param_list[it - grad_list.begin()]; + } else { + LOG(ERROR) << "grad has no paired param:" << grad_var_name; + } + VLOG(3) << "received grad: " << grad_var_name + << " updating param: " << param_var_name; + + if (fan_in > 1) { + grad_var_name = this->GetGradVarNameForTrainer(grad_var_name); + } + auto *var = recv_scope.FindVar(grad_var_name); + if (var == nullptr) { + LOG(ERROR) << "Can not find server side var: " << grad_var_name; + PADDLE_THROW("Can not find server side var"); + } + detail::DeserializeFromMessage(v.second, dev_ctx, var); } - detail::DeserializeFromMessage(v.second, dev_ctx, var); } + VLOG(3) << "recv " << recv_var_cnt << " parmeters for one barrier."; + // TODO(Yancey1989): merge SelectedRows variables here if (exit_flag) { break; } @@ -146,7 +155,7 @@ class RecvOp : public framework::OperatorBase { LOG(ERROR) << "run sub program error " << e.what(); } rpc_service_->SetCond(1); - rpc_service_->WaitClientGet(barrier_size); + rpc_service_->WaitClientGet(recv_var_cnt); grads_counter_.clear(); } // while(true) } @@ -161,7 +170,6 @@ class RecvOpMaker : public framework::OpProtoAndCheckerMaker { public: RecvOpMaker(OpProto *proto, OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("RX", "(Tensor) Input tensor to be optimized").AsDuplicable(); AddComment(R"DOC( Recv operator diff --git a/paddle/operators/reduce_op.cc b/paddle/operators/reduce_op.cc index 4a06babeda00f2420df80f81f876a0047a3285ef..84f24a909597915f0eebb6c9cad37510cbe93e7b 100644 --- a/paddle/operators/reduce_op.cc +++ b/paddle/operators/reduce_op.cc @@ -13,7 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/operators/reduce_op.h" -#include "paddle/operators/net_op.h" namespace paddle { namespace operators { @@ -38,10 +37,14 @@ class ReduceOp : public framework::OperatorWithKernel { dim, x_rank, "The dim should be in the range [-rank(input), rank(input))."); bool reduce_all = ctx->Attrs().Get("reduce_all"); + bool keep_dim = ctx->Attrs().Get("keep_dim"); if (reduce_all) { - ctx->SetOutputDim("Out", {1}); + if (keep_dim) + ctx->SetOutputDim( + "Out", framework::make_ddim(std::vector(x_rank, 1))); + else + ctx->SetOutputDim("Out", {1}); } else { - bool keep_dim = ctx->Attrs().Get("keep_dim"); auto dims_vector = vectorize(x_dims); if (keep_dim || x_rank == 1) { dims_vector[dim] = 1; diff --git a/paddle/operators/send_op.cc b/paddle/operators/send_op.cc index 5aa66c20eaf77959089100f8dcee55f2bc83a71a..bb719dc2a8a577bc042a2a70f7169b7d70f83684 100644 --- a/paddle/operators/send_op.cc +++ b/paddle/operators/send_op.cc @@ -37,17 +37,25 @@ class SendOp : public framework::OperatorBase { auto ins = Inputs("X"); auto outs = Outputs("Out"); std::vector epmap = Attr>("epmap"); + std::vector endpoints = + Attr>("endpoints"); platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); auto& ctx = *pool.Get(place); for (size_t i = 0; i < ins.size(); i++) { - VLOG(3) << "sending " << ins[i]; + VLOG(3) << "sending " << ins[i] << " to " << epmap[i]; client_.AsyncSendVariable(epmap[i], ctx, scope, ins[i]); } PADDLE_ENFORCE(client_.Wait()); + for (auto& ep : endpoints) { + VLOG(3) << "batch barrier, ep: " << ep; + client_.AsyncSendBatchBarrier(ep); + } + PADDLE_ENFORCE(client_.Wait()); + for (size_t i = 0; i < outs.size(); i++) { - VLOG(3) << "getting " << outs[i]; + VLOG(3) << "getting " << outs[i] << " from " << epmap[i]; client_.AsyncGetVariable(epmap[i], ctx, scope, outs[i]); } diff --git a/paddle/operators/sequence_reshape_op.cc b/paddle/operators/sequence_reshape_op.cc index 57cca13105537d88fe942b850cae10650d3096e2..d89a46a712c9c84a142e1e347219ed171556d761 100644 --- a/paddle/operators/sequence_reshape_op.cc +++ b/paddle/operators/sequence_reshape_op.cc @@ -30,8 +30,13 @@ class SequenceReshapeOp : public framework::OperatorWithKernel { auto x_numel = product(x_dims); PADDLE_ENFORCE_EQ(x_dims.size(), 2U, "Rank of Input(X) should be 2."); int new_dim = ctx->Attrs().Get("new_dim"); - ctx->SetOutputDim("Out", - {x_numel / new_dim, static_cast(new_dim)}); + if (ctx->IsRuntime()) { + ctx->SetOutputDim("Out", + {x_numel / new_dim, static_cast(new_dim)}); + } else { + // when compiling, the batch size is undetermined, just set to -1 + ctx->SetOutputDim("Out", {-1, static_cast(new_dim)}); + } } }; diff --git a/paddle/scripts/docker/build.sh b/paddle/scripts/docker/build.sh index e70d04d9017e9e36bbd55d6a28889d9ba7fb2a13..fbae37b2ca063e32cb12ded0da901d93438bc9a2 100644 --- a/paddle/scripts/docker/build.sh +++ b/paddle/scripts/docker/build.sh @@ -32,7 +32,7 @@ function cmake_gen() { cat <= 0 + parent_block = prog.block(parent_idx) + return parent_block + + def complete_op(self): + main_program = self.helper.main_program + current_block = main_program.current_block() + parent_block = self.parent_block() + + params, grads = self.get_params_and_grads() + param_names = [p.name for p in params] + grad_names = [g.name for g in grads] + parent_block.append_op( + type='recv', + inputs={}, + outputs={}, + attrs={ + 'endpoint': self.endpoint, + 'Fanin': self.fan_in, + 'ParamList': param_names, + 'GradList': grad_names, + 'OptimizeBlock': current_block + }) + + +def Send(endpoints, send_vars, get_vars): + """ + Send layer + + Args: + endpoints: comma seperated IP:PORT pairs in the order + of send_vars to send + send_vars: vars to send + get_vars: vars to get from server after send completes. + + Send variables to the server side, and get vars from server + side when server have finished running server side program. + """ + assert (type(send_vars) == list) + assert (type(get_vars) == list) + + epmap = endpoints.split(",") + endpoints = list(set(epmap)) + + helper = LayerHelper("Send", **locals()) + helper.append_op( + type="send", + inputs={"X": send_vars}, + outputs={"Out": get_vars}, + attrs={"endpoints": endpoints, + "epmap": epmap}) diff --git a/python/paddle/v2/fluid/layers/nn.py b/python/paddle/v2/fluid/layers/nn.py index bae33f6a156b094d6e9cccc9cabb42880f157ac8..d11dccfd22124d58d8634c01a00527c373b92f00 100644 --- a/python/paddle/v2/fluid/layers/nn.py +++ b/python/paddle/v2/fluid/layers/nn.py @@ -26,6 +26,7 @@ __all__ = [ 'fc', 'embedding', 'dynamic_lstm', + 'dynamic_lstmp', 'dynamic_gru', 'gru_unit', 'linear_chain_crf', @@ -256,7 +257,8 @@ def dynamic_lstm(input, gate_activation='sigmoid', cell_activation='tanh', candidate_activation='tanh', - dtype='float32'): + dtype='float32', + name=None): """ **Dynamic LSTM Layer** @@ -282,7 +284,7 @@ def dynamic_lstm(input, W_{fc}, W_{oc}` are diagonal weight matrices for peephole connections. In our implementation, we use vectors to reprenset these diagonal weight matrices. The :math:`b` terms denote bias vectors (:math:`b_i` is the input - gate bias vector), :math:`\sigma` is the non-line activations, such as + gate bias vector), :math:`\sigma` is the non-linear activations, such as logistic sigmoid function, and :math:`i, f, o` and :math:`c` are the input gate, forget gate, output gate, and cell activation vectors, respectively, all of which have the same size as the cell output activation vector :math:`h`. @@ -308,25 +310,25 @@ def dynamic_lstm(input, (T X 4D), where T is the total time steps in this mini-batch, D is the hidden size. size(int): 4 * hidden size. - param_attr(ParamAttr): The parameter attribute for the learnable + param_attr(ParamAttr|None): The parameter attribute for the learnable hidden-hidden weights. - - The shape is (D x 4D), where D is the hidden - size. - Weights = {:math:`W_{ch}, W_{ih}, \ W_{fh}, W_{oh}`} - bias_attr(ParamAttr): The bias attribute for the learnable bias + - The shape is (D x 4D), where D is the hidden + size. + bias_attr(ParamAttr|None): The bias attribute for the learnable bias weights, which contains two parts, input-hidden bias weights and peephole connections weights if setting `use_peepholes` to `True`. 1. `use_peepholes = False` - - The shape is (1 x 4D). - Biases = {:math:`b_c, b_i, b_f, b_o`}. + - The shape is (1 x 4D). 2. `use_peepholes = True` - - The shape is (1 x 7D). - Biases = { :math:`b_c, b_i, b_f, b_o, W_{ic}, \ W_{fc}, W_{oc}`}. + - The shape is (1 x 7D). use_peepholes(bool): Whether to enable diagonal/peephole connections, default `True`. is_reverse(bool): Whether to compute reversed LSTM, default `False`. @@ -339,6 +341,8 @@ def dynamic_lstm(input, Choices = ["sigmoid", "tanh", "relu", "identity"], default "tanh". dtype(str): Data type. Choices = ["float32", "float64"], default "float32". + name(str|None): A name for this layer(optional). If set None, the layer + will be named automatically. Returns: tuple: The hidden state, and cell state of LSTM. The shape of both \ @@ -353,6 +357,7 @@ def dynamic_lstm(input, forward, _ = fluid.layers.dynamic_lstm( input=forward_proj, size=hidden_dim * 4, use_peepholes=False) """ + helper = LayerHelper('lstm', **locals()) size = size / 4 weight = helper.create_parameter( @@ -389,6 +394,192 @@ def dynamic_lstm(input, return hidden, cell +def dynamic_lstmp(input, + size, + proj_size, + param_attr=None, + bias_attr=None, + use_peepholes=True, + is_reverse=False, + gate_activation='sigmoid', + cell_activation='tanh', + candidate_activation='tanh', + proj_activation='tanh', + dtype='float32', + name=None): + """ + **Dynamic LSTMP Layer** + + LSTMP (LSTM with recurrent projection) layer has a separate projection + layer after the LSTM layer, projecting the original hidden state to a + lower-dimensional one, which is proposed to reduce the number of total + parameters and furthermore computational complexity for the LSTM, + espeacially for the case that the size of output units is relative + large (https://research.google.com/pubs/archive/43905.pdf). + + The formula is as follows: + + .. math:: + + i_t & = \sigma(W_{ix}x_{t} + W_{ir}r_{t-1} + W_{ic}c_{t-1} + b_i) + + f_t & = \sigma(W_{fx}x_{t} + W_{fr}r_{t-1} + W_{fc}c_{t-1} + b_f) + + \\tilde{c_t} & = act_g(W_{cx}x_t + W_{cr}r_{t-1} + b_c) + + o_t & = \sigma(W_{ox}x_{t} + W_{or}r_{t-1} + W_{oc}c_t + b_o) + + c_t & = f_t \odot c_{t-1} + i_t \odot \\tilde{c_t} + + h_t & = o_t \odot act_h(c_t) + + r_t & = \overline{act_h}(W_{rh}h_t) + + In the above formula: + + * :math:`W`: Denotes weight matrices (e.g. :math:`W_{xi}` is \ + the matrix of weights from the input gate to the input). + * :math:`W_{ic}`, :math:`W_{fc}`, :math:`W_{oc}`: Diagonal weight \ + matrices for peephole connections. In our implementation, \ + we use vectors to reprenset these diagonal weight matrices. + * :math:`b`: Denotes bias vectors (e.g. :math:`b_i` is the input gate \ + bias vector). + * :math:`\sigma`: The activation, such as logistic sigmoid function. + * :math:`i, f, o` and :math:`c`: The input gate, forget gate, output \ + gate, and cell activation vectors, respectively, all of which have \ + the same size as the cell output activation vector :math:`h`. + * :math:`h`: The hidden state. + * :math:`r`: The recurrent projection of the hidden state. + * :math:`\\tilde{c_t}`: The candidate hidden state, whose \ + computation is based on the current input and previous hidden state. + * :math:`\odot`: The element-wise product of the vectors. + * :math:`act_g` and :math:`act_h`: The cell input and cell output \ + activation functions and `tanh` is usually used for them. + * :math:`\overline{act_h}`: The activation function for the projection \ + output, usually using `identity` or same as :math:`act_h`. + + Set `use_peepholes` to `False` to disable peephole connection. The formula + is omitted here, please refer to the paper + http://www.bioinf.jku.at/publications/older/2604.pdf for details. + + Note that these :math:`W_{xi}x_{t}, W_{xf}x_{t}, W_{xc}x_{t}, W_{xo}x_{t}` + operations on the input :math:`x_{t}` are NOT included in this operator. + Users can choose to use fully-connected layer before LSTMP layer. + + Args: + input(Variable): The input of dynamic_lstmp layer, which supports + variable-time length input sequence. The underlying + tensor in this Variable is a matrix with shape + (T X 4D), where T is the total time steps in this + mini-batch, D is the hidden size. + size(int): 4 * hidden size. + proj_size(int): The size of projection output. + param_attr(ParamAttr|None): The parameter attribute for the learnable + hidden-hidden weight and projection weight. + + - Hidden-hidden weight = {:math:`W_{ch}, W_{ih}, \ + W_{fh}, W_{oh}`}. + - The shape of hidden-hidden weight is (P x 4D), + where P is the projection size and D the hidden + size. + - Projection weight = {:math:`W_{rh}`}. + - The shape of projection weight is (D x P). + bias_attr(ParamAttr|None): The bias attribute for the learnable bias + weights, which contains two parts, input-hidden + bias weights and peephole connections weights if + setting `use_peepholes` to `True`. + + 1. `use_peepholes = False` + - Biases = {:math:`b_c, b_i, b_f, b_o`}. + - The shape is (1 x 4D). + 2. `use_peepholes = True` + - Biases = { :math:`b_c, b_i, b_f, b_o, W_{ic}, \ + W_{fc}, W_{oc}`}. + - The shape is (1 x 7D). + use_peepholes(bool): Whether to enable diagonal/peephole connections, + default `True`. + is_reverse(bool): Whether to compute reversed LSTM, default `False`. + gate_activation(str): The activation for input gate, forget gate and + output gate. Choices = ["sigmoid", "tanh", "relu", + "identity"], default "sigmoid". + cell_activation(str): The activation for cell output. Choices = ["sigmoid", + "tanh", "relu", "identity"], default "tanh". + candidate_activation(str): The activation for candidate hidden state. + Choices = ["sigmoid", "tanh", "relu", "identity"], + default "tanh". + proj_activation(str): The activation for projection output. + Choices = ["sigmoid", "tanh", "relu", "identity"], + default "tanh". + dtype(str): Data type. Choices = ["float32", "float64"], default "float32". + name(str|None): A name for this layer(optional). If set None, the layer + will be named automatically. + + Returns: + tuple: The projection of hidden state, and cell state of LSTMP. The \ + shape of projection is (T x P), for the cell state which is \ + (T x D), and both LoD is the same with the `input`. + + Examples: + .. code-block:: python + + hidden_dim, proj_dim = 512, 256 + fc_out = fluid.layers.fc(input=input_seq, size=hidden_dim * 4, + act=None, bias_attr=None) + proj_out, _ = fluid.layers.dynamic_lstmp(input=fc_out, + size=hidden_dim * 4, + proj_size=proj_dim, + use_peepholes=False, + is_reverse=True, + cell_activation="tanh", + proj_activation="tanh") + """ + + helper = LayerHelper('lstmp', **locals()) + size = size / 4 + weight = helper.create_parameter( + attr=helper.param_attr, shape=[proj_size, 4 * size], dtype=dtype) + proj_weight = helper.create_parameter( + attr=helper.param_attr, shape=[size, proj_size], dtype=dtype) + bias_size = [1, 7 * size] + if not use_peepholes: + bias_size[1] = 4 * size + bias = helper.create_parameter( + attr=helper.bias_attr, shape=bias_size, dtype=dtype, is_bias=True) + + projection = helper.create_tmp_variable(dtype) + cell = helper.create_tmp_variable(dtype) + ordered_proj0 = helper.create_tmp_variable(dtype) + batch_hidden = helper.create_tmp_variable(dtype) + batch_gate = helper.create_tmp_variable(dtype) + batch_cell_pre_act = helper.create_tmp_variable(dtype) + + helper.append_op( + type='lstmp', + inputs={ + 'Input': input, + 'Weight': weight, + 'ProjWeight': proj_weight, + 'Bias': bias + }, + outputs={ + 'Projection': projection, + 'Cell': cell, + 'OrderedP0': ordered_proj0, + 'BatchHidden': batch_hidden, + 'BatchGate': batch_gate, + 'BatchCellPreAct': batch_cell_pre_act + }, + attrs={ + 'use_peepholes': use_peepholes, + 'is_reverse': is_reverse, + 'gate_activation': gate_activation, + 'cell_activation': cell_activation, + 'candidate_activation': candidate_activation, + 'proj_activation': proj_activation + }) + return projection, cell + + def dynamic_gru(input, size, param_attr=None, diff --git a/python/paddle/v2/fluid/param_attr.py b/python/paddle/v2/fluid/param_attr.py index dcca8b6c547d10864ff4cd0af1c217d89e3b522f..fc566b8a2480ce9256d610b4731405cd6d89b7e4 100644 --- a/python/paddle/v2/fluid/param_attr.py +++ b/python/paddle/v2/fluid/param_attr.py @@ -15,7 +15,10 @@ from initializer import Initializer, Xavier, Constant from regularizer import WeightDecayRegularizer -__all__ = ['ParamAttr'] +__all__ = [ + 'ParamAttr', + 'WeightNormParamAttr', +] class ParamAttr(object): @@ -82,3 +85,20 @@ class ParamAttr(object): if with_initializer: kwargs['initializer'] = self.initializer return kwargs + + +class WeightNormParamAttr(ParamAttr): + """ + Used for weight normalization. Any field in ParamAttr can also be set here. + Besides, an extra field dim can be set to indicate the dimension except + which to normalize. + """ + # List to record the parameters reparameterized by weight normalization. + # If these parameters are treated as Variable rather than Parameter, + # it can be used to discriminate these parameters and help to serialize + # these paramters for inference. + params_with_weight_norm = [] + + def __init__(self, dim=None, **kwargs): + super(WeightNormParamAttr, self).__init__(**kwargs) + self.dim = dim diff --git a/python/paddle/v2/fluid/regularizer.py b/python/paddle/v2/fluid/regularizer.py index c2f28eecfda71e305d96c5a6b62c4f5f0fbf3fa6..0273da647afb6e95a136b5ecd0975347d9a378ff 100644 --- a/python/paddle/v2/fluid/regularizer.py +++ b/python/paddle/v2/fluid/regularizer.py @@ -87,6 +87,11 @@ class WeightDecayRegularizer(object): """ raise NotImplementedError() + def __str__(self): + """Debug string + """ + raise NotImplementedError() + class L2DecayRegularizer(WeightDecayRegularizer): """Implements the L2 Weight Decay Regularization @@ -123,6 +128,9 @@ class L2DecayRegularizer(WeightDecayRegularizer): return decay + def __str__(self): + return "L2Decay, regularization_coeff=%f" % self._regularization_coeff + class L1DecayRegularizer(WeightDecayRegularizer): """Implements the L1 Weight Decay Regularization @@ -163,6 +171,9 @@ class L1DecayRegularizer(WeightDecayRegularizer): return decay + def __str__(self): + return "L1Decay, regularization_coeff=%f" % self._regularization_coeff + # We short the class name, since users will use the regulaizer with the package # name. The sample code: diff --git a/python/paddle/v2/fluid/tests/CMakeLists.txt b/python/paddle/v2/fluid/tests/CMakeLists.txt index 83053160820a70bb5e54f721c0d7b881c5765004..628ce60b406d880d961d705a6abd2b5236fb1c8c 100644 --- a/python/paddle/v2/fluid/tests/CMakeLists.txt +++ b/python/paddle/v2/fluid/tests/CMakeLists.txt @@ -1,5 +1,10 @@ file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") + +if(NOT WITH_DISTRIBUTE) + list(REMOVE_ITEM TEST_OPS test_recv_op) +endif(NOT WITH_DISTRIBUTE) + foreach(src ${TEST_OPS}) py_test(${src} SRCS ${src}.py) endforeach() diff --git a/python/paddle/v2/fluid/tests/book/test_rnn_encoder_decoder.py b/python/paddle/v2/fluid/tests/book/test_rnn_encoder_decoder.py index 3fd3dbaf779c25aac29b6a3b085d1a08a7ccd5b0..fdc60861760163d2ebad3b050e551929321baafd 100644 --- a/python/paddle/v2/fluid/tests/book/test_rnn_encoder_decoder.py +++ b/python/paddle/v2/fluid/tests/book/test_rnn_encoder_decoder.py @@ -49,7 +49,11 @@ def bi_lstm_encoder(input_seq, hidden_size): size=hidden_size * 4, is_reverse=True, use_peepholes=USE_PEEPHOLES) - return forward, backward + + forward_last = fluid.layers.sequence_last_step(input=forward) + backward_first = fluid.layers.sequence_first_step(input=backward) + + return forward_last, backward_first # FIXME(peterzhang2029): Replace this function with the lstm_unit_op. @@ -115,16 +119,13 @@ def seq_to_seq_net(): size=[source_dict_dim, embedding_dim], dtype='float32') - src_forward, src_backward = bi_lstm_encoder( + src_forward_last, src_backward_first = bi_lstm_encoder( input_seq=src_embedding, hidden_size=encoder_size) - src_forward_last = fluid.layers.sequence_last_step(input=src_forward) - src_backward_first = fluid.layers.sequence_first_step(input=src_backward) - encoded_vector = fluid.layers.concat( input=[src_forward_last, src_backward_first], axis=1) - decoder_boot = fluid.layers.fc(input=encoded_vector, + decoder_boot = fluid.layers.fc(input=src_backward_first, size=decoder_size, bias_attr=False, act='tanh') diff --git a/python/paddle/v2/fluid/tests/book_distribute/notest_dist_image_classification.py b/python/paddle/v2/fluid/tests/book_distribute/notest_dist_image_classification.py index 218dea31e10757d901c5524567f13501b64dbea5..298ecfc386b3ae093cf714a41f5072759cb2cf2e 100644 --- a/python/paddle/v2/fluid/tests/book_distribute/notest_dist_image_classification.py +++ b/python/paddle/v2/fluid/tests/book_distribute/notest_dist_image_classification.py @@ -1,21 +1,19 @@ -#Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. # -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from __future__ import print_function -import sys - import paddle.v2 as paddle import paddle.v2.fluid as fluid import os @@ -106,10 +104,10 @@ if len(sys.argv) >= 2: net_type = sys.argv[1] if net_type == "vgg": - print("train vgg net") + print("training vgg net") net = vgg16_bn_drop(images) elif net_type == "resnet": - print("train resnet") + print("training resnet") net = resnet_cifar10(images, 32) else: raise ValueError("%s network is not supported" % net_type) @@ -129,6 +127,7 @@ train_reader = paddle.batch( batch_size=BATCH_SIZE) place = fluid.CPUPlace() +feeder = fluid.DataFeeder(place=place, feed_list=[images, label]) exe = fluid.Executor(place) t = fluid.DistributeTranspiler() @@ -146,17 +145,14 @@ if training_role == "PSERVER": if not current_endpoint: print("need env SERVER_ENDPOINT") exit(1) - print("start pserver at:", current_endpoint) pserver_prog = t.get_pserver_program(current_endpoint) pserver_startup = t.get_startup_program(current_endpoint, pserver_prog) exe.run(pserver_startup) exe.run(pserver_prog) - print("pserver run end") elif training_role == "TRAINER": - print("start trainer") trainer_prog = t.get_trainer_program() - feeder = fluid.DataFeeder(place=place, feed_list=[images, label]) exe.run(fluid.default_startup_program()) + for pass_id in range(PASS_NUM): accuracy.reset(exe) for data in train_reader(): @@ -164,9 +160,10 @@ elif training_role == "TRAINER": feed=feeder.feed(data), fetch_list=[avg_cost] + accuracy.metrics) pass_acc = accuracy.eval(exe) - print("loss:" + str(loss) + " acc:" + str(acc) + " pass_acc:" + str( - pass_acc)) - # this model is slow, so if we can train two mini batch, we think it works properly. + print("pass_id:" + str(pass_id) + "loss:" + str(loss) + " pass_acc:" + + str(pass_acc)) + # this model is slow, so if we can train two mini batches, + # we think it works properly. print("trainer run end") else: print("environment var TRAINER_ROLE should be TRAINER os PSERVER") diff --git a/python/paddle/v2/fluid/tests/book_distribute/notest_understand_sentiment_dynamic_lstm.py b/python/paddle/v2/fluid/tests/book_distribute/notest_understand_sentiment_dynamic_lstm.py new file mode 100644 index 0000000000000000000000000000000000000000..bff376a0e2ee0fbb0d869e0dddf4460ed5dc4ac6 --- /dev/null +++ b/python/paddle/v2/fluid/tests/book_distribute/notest_understand_sentiment_dynamic_lstm.py @@ -0,0 +1,135 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import os +import paddle.v2 as paddle +import paddle.v2.fluid as fluid + + +def stacked_lstm_net(data, + label, + input_dim, + class_dim=2, + emb_dim=128, + hid_dim=512, + stacked_num=3): + assert stacked_num % 2 == 1 + + emb = fluid.layers.embedding(input=data, size=[input_dim, emb_dim]) + # add bias attr + + # TODO(qijun) linear act + fc1 = fluid.layers.fc(input=emb, size=hid_dim) + lstm1, cell1 = fluid.layers.dynamic_lstm(input=fc1, size=hid_dim) + + inputs = [fc1, lstm1] + + for i in range(2, stacked_num + 1): + fc = fluid.layers.fc(input=inputs, size=hid_dim) + lstm, cell = fluid.layers.dynamic_lstm( + input=fc, size=hid_dim, is_reverse=(i % 2) == 0) + inputs = [fc, lstm] + + fc_last = fluid.layers.sequence_pool(input=inputs[0], pool_type='max') + lstm_last = fluid.layers.sequence_pool(input=inputs[1], pool_type='max') + + prediction = fluid.layers.fc(input=[fc_last, lstm_last], + size=class_dim, + act='softmax') + cost = fluid.layers.cross_entropy(input=prediction, label=label) + avg_cost = fluid.layers.mean(x=cost) + adam_optimizer = fluid.optimizer.Adam(learning_rate=0.002) + optimize_ops, params_grads = adam_optimizer.minimize(avg_cost) + accuracy = fluid.evaluator.Accuracy(input=prediction, label=label) + return avg_cost, accuracy, accuracy.metrics[0], optimize_ops, params_grads + + +def to_lodtensor(data, place): + seq_lens = [len(seq) for seq in data] + cur_len = 0 + lod = [cur_len] + for l in seq_lens: + cur_len += l + lod.append(cur_len) + flattened_data = np.concatenate(data, axis=0).astype("int64") + flattened_data = flattened_data.reshape([len(flattened_data), 1]) + res = fluid.LoDTensor() + res.set(flattened_data, place) + res.set_lod([lod]) + return res + + +def main(): + BATCH_SIZE = 100 + PASS_NUM = 5 + + word_dict = paddle.dataset.imdb.word_dict() + print "loaded word dict successfully" + dict_dim = len(word_dict) + class_dim = 2 + + data = fluid.layers.data( + name="words", shape=[1], dtype="int64", lod_level=1) + label = fluid.layers.data(name="label", shape=[1], dtype="int64") + cost, accuracy, acc_out, optimize_ops, params_grads = stacked_lstm_net( + data, label, input_dim=dict_dim, class_dim=class_dim) + + train_data = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.imdb.train(word_dict), buf_size=1000), + batch_size=BATCH_SIZE) + place = fluid.CPUPlace() + exe = fluid.Executor(place) + feeder = fluid.DataFeeder(feed_list=[data, label], place=place) + + t = fluid.DistributeTranspiler() + # all parameter server endpoints list for spliting parameters + pserver_endpoints = os.getenv("PSERVERS") + # server endpoint for current node + current_endpoint = os.getenv("SERVER_ENDPOINT") + # run as trainer or parameter server + training_role = os.getenv( + "TRAINING_ROLE", "TRAINER") # get the training role: trainer/pserver + t.transpile( + optimize_ops, params_grads, pservers=pserver_endpoints, trainers=2) + + if training_role == "PSERVER": + if not current_endpoint: + print("need env SERVER_ENDPOINT") + exit(1) + pserver_prog = t.get_pserver_program(current_endpoint) + pserver_startup = t.get_startup_program(current_endpoint, pserver_prog) + exe.run(pserver_startup) + exe.run(pserver_prog) + elif training_role == "TRAINER": + exe.run(fluid.default_startup_program()) + trainer_prog = t.get_trainer_program() + for pass_id in xrange(PASS_NUM): + accuracy.reset(exe) + for data in train_data(): + cost_val, acc_val = exe.run(trainer_prog, + feed=feeder.feed(data), + fetch_list=[cost, acc_out]) + pass_acc = accuracy.eval(exe) + print("cost=" + str(cost_val) + " acc=" + str(acc_val) + + " pass_acc=" + str(pass_acc)) + if cost_val < 1.0 and acc_val > 0.8: + exit(0) + else: + print("environment var TRAINER_ROLE should be TRAINER os PSERVER") + + +if __name__ == '__main__': + main() diff --git a/python/paddle/v2/fluid/tests/test_activation_op.py b/python/paddle/v2/fluid/tests/test_activation_op.py index 18605e60652a1614571a91918a012f0c08c8f1b3..1de5d446b8eaf57d3718dde7540c929996ee3432 100644 --- a/python/paddle/v2/fluid/tests/test_activation_op.py +++ b/python/paddle/v2/fluid/tests/test_activation_op.py @@ -186,8 +186,7 @@ class TestFloor(OpTest): self.op_type = "floor" x = np.random.uniform(-1, 1, [4, 4]).astype("float32") self.inputs = {'X': x} - # numpy floor need +1 - self.outputs = {'Out': np.floor(self.inputs['X']) + 1.0} + self.outputs = {'Out': np.floor(self.inputs['X'])} def test_check_output(self): self.check_output() diff --git a/python/paddle/v2/fluid/tests/test_layers.py b/python/paddle/v2/fluid/tests/test_layers.py index 4e863625422c93c77ad4fb65be35580943d1cf54..3f54e28defb76d3430a82e791578e20b84833f16 100644 --- a/python/paddle/v2/fluid/tests/test_layers.py +++ b/python/paddle/v2/fluid/tests/test_layers.py @@ -202,6 +202,18 @@ class TestBook(unittest.TestCase): x_t=x_t, hidden_t_prev=prev_hidden, cell_t_prev=prev_cell)) print(str(program)) + def test_dynamic_lstmp(self): + program = Program() + with program_guard(program): + hidden_dim, proj_dim = 16, 8 + seq_data = layers.data( + name='seq_data', shape=[10, 10], dtype='float32', lod_level=1) + fc_out = layers.fc(input=seq_data, size=4 * hidden_dim) + self.assertIsNotNone( + layers.dynamic_lstmp( + input=fc_out, size=4 * hidden_dim, proj_size=proj_dim)) + print(str(program)) + def test_sequence_softmax(self): program = Program() with program_guard(program): diff --git a/python/paddle/v2/fluid/tests/test_one_hot_op.py b/python/paddle/v2/fluid/tests/test_one_hot_op.py new file mode 100644 index 0000000000000000000000000000000000000000..e51ea27d14d0637021f8902fa935beb318658018 --- /dev/null +++ b/python/paddle/v2/fluid/tests/test_one_hot_op.py @@ -0,0 +1,110 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import numpy as np +import math +from op_test import OpTest +import paddle.v2.fluid as fluid +import paddle.v2.fluid.core as core +import paddle.v2.fluid.framework as framework +from paddle.v2.fluid.framework import Program, program_guard + + +class TestOneHotOp(OpTest): + def setUp(self): + self.op_type = 'one_hot' + depth = 10 + dimension = 12 + x_lod = [[0, 4, 5, 8, 11]] + x = [np.random.randint(0, depth - 1) for i in xrange(x_lod[0][-1])] + x = np.array(x).astype('int').reshape([x_lod[0][-1], 1]) + + out = np.zeros(shape=(np.product(x.shape[:-1]), + depth)).astype('float32') + + for i in xrange(np.product(x.shape)): + out[i, x[i]] = 1.0 + + self.inputs = {'X': (x, x_lod)} + self.attrs = {'depth': depth, 'dtype': int(core.DataType.FP32)} + self.outputs = {'Out': (out, x_lod)} + + def test_check_output(self): + self.check_output() + + +class TestOneHotOp_default_dtype(OpTest): + def setUp(self): + self.op_type = 'one_hot' + depth = 10 + dimension = 12 + x_lod = [[0, 4, 5, 8, 11]] + x = [np.random.randint(0, depth - 1) for i in xrange(x_lod[0][-1])] + x = np.array(x).astype('int').reshape([x_lod[0][-1], 1]) + + out = np.zeros(shape=(np.product(x.shape[:-1]), + depth)).astype('float32') + + for i in xrange(np.product(x.shape)): + out[i, x[i]] = 1.0 + + self.inputs = {'X': (x, x_lod)} + self.attrs = {'depth': depth} + self.outputs = {'Out': (out, x_lod)} + + def test_check_output(self): + self.check_output() + + +class TestOneHotOp_exception(OpTest): + def setUp(self): + self.op_type = 'one_hot' + self.depth = 10 + self.place = core.CPUPlace() + self.dimension = 12 + self.x = core.LoDTensor() + x_lod = [[0, 4, 5, 8, 11]] + data = [np.random.randint(11, 20) for i in xrange(x_lod[0][-1])] + data = np.array(data).astype('int').reshape([x_lod[0][-1], 1]) + self.x.set(data, self.place) + self.x.set_lod(x_lod) + + def test_check_output(self): + program = Program() + with program_guard(program): + x = fluid.layers.data( + name='x', shape=[self.dimension], dtype='float32', lod_level=1) + block = program.current_block() + one_hot_out = block.create_var( + name="one_hot_out", + type=core.VarDesc.VarType.LOD_TENSOR, + dtype='float32') + block.append_op( + type='one_hot', + inputs={'X': x}, + attrs={'depth': self.depth}, + outputs={'Out': one_hot_out}) + exe = fluid.Executor(self.place) + + def run(): + exe.run(feed={'x': self.x}, + fetch_list=[one_hot_out], + return_numpy=False) + + self.assertRaises(core.EnforceNotMet, run) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_recv_op.py b/python/paddle/v2/fluid/tests/test_recv_op.py new file mode 100644 index 0000000000000000000000000000000000000000..5c4cec028d354b99d6203281ec4c727d7e3eceac --- /dev/null +++ b/python/paddle/v2/fluid/tests/test_recv_op.py @@ -0,0 +1,68 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import paddle.v2.fluid as fluid +import paddle.v2.fluid.layers as layers +import numpy +from multiprocessing import Process +import os, sys + + +class TestRecvOp(unittest.TestCase): + def test_send(self): + # Run init_serv in a thread + place = fluid.CPUPlace() + p = Process(target=self.init_serv, args=(place, )) + p.daemon = True + p.start() + self.init_client(place) + # FIXME(typhoonzero): find a way to gracefully shutdown the server. + os.system("kill -9 %d" % p.pid) + p.join() + + def init_serv(self, place): + main = fluid.Program() + with fluid.program_guard(main): + x = layers.data( + shape=[32, 32], + dtype='float32', + name="X", + append_batch_size=False) + fluid.initializer.Constant(value=1.0)(x, main.global_block()) + serv = layers.ListenAndServ("127.0.0.1:6174", optimizer_mode=False) + with serv.do(): + o = layers.scale(x=x, scale=10.0) + main.global_block().create_var( + name=o.name, psersistable=False, dtype=o.dtype, shape=o.shape) + exe = fluid.Executor(place) + exe.run(main) + + def init_client(self, place): + main = fluid.Program() + with fluid.program_guard(main): + x = layers.data( + shape=[32, 32], + dtype='float32', + name='X', + append_batch_size=False) + fluid.initializer.Constant(value=1.0)(x, main.global_block()) + layers.Send("127.0.0.1:6174", [x], [x]) + exe = fluid.Executor(place) + exe.run(main) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_weight_normalization.py b/python/paddle/v2/fluid/tests/test_weight_normalization.py new file mode 100644 index 0000000000000000000000000000000000000000..80ad8285d8a3c2ced814cc3588a814c14ec60855 --- /dev/null +++ b/python/paddle/v2/fluid/tests/test_weight_normalization.py @@ -0,0 +1,121 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import numpy +import collections +import paddle.v2.fluid as fluid +import paddle.v2.fluid.core as core +from paddle.v2.fluid.initializer import ConstantInitializer +from paddle.v2.fluid.param_attr import WeightNormParamAttr + + +class TestWeightNormalization(unittest.TestCase): + batch_size = 3 + hidden_size = 5 + data_desc = (['x', [10], 0], ) + + @classmethod + def setUpClass(cls): + cls.set_program() + + @classmethod + def set_program(cls): + data = fluid.layers.data( + name=cls.data_desc[0][0], shape=cls.data_desc[0][1]) + out = fluid.layers.fc(input=data, + size=cls.hidden_size, + param_attr=WeightNormParamAttr( + dim=None, + name='weight_norm_param', + initializer=ConstantInitializer(1.0)), + bias_attr=False, + act=None) + loss = fluid.layers.reduce_sum(out) + fluid.backward.append_backward(loss=loss) + cls.fetch_list = [ + 'weight_norm_param_g', 'weight_norm_param_v', + 'weight_norm_param_g@GRAD' + ] + + def run_program(self): + outputs = [] + places = [core.CPUPlace()] + if core.is_compiled_with_cuda(): + places.append(core.CUDAPlace(0)) + for place in places: + self.set_inputs(place) + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + output = exe.run(fluid.default_main_program(), + feed=self.inputs, + fetch_list=self.fetch_list, + return_numpy=False) + outputs.append(output) + self.actual_outputs = outputs + + def set_data(self): + self.data = collections.OrderedDict() + for desc in self.data_desc: + data_name = desc[0] + data_shape = desc[1] + data_lod_level = desc[2] + data_lod = [] + for i in range(data_lod_level): + lod_level_i = numpy.random.randint( + low=1, + high=5, + size=self.batch_size if i == 0 else lod_level_i[-1]) + lod_level_i = [0] + numpy.cumsum(lod_level_i).tolist() + data_lod.append(lod_level_i) + data_value = numpy.random.random( + size=[data_lod[-1][-1] if data_lod else self.batch_size + ] + data_shape).astype('float32') + self.data[data_name] = (data_value, data_lod) + + def set_inputs(self, place): + self.inputs = {} + for desc in self.data_desc: + tensor = fluid.Tensor() + tensor.set(self.data[desc[0]][0], place) + if self.data[desc[0]][1]: + tensor.set_lod(self.data[desc[0]][1]) + self.inputs[desc[0]] = tensor + + def weight_normalize(self): + v = numpy.ones((self.data[self.data_desc[0][0]][0].shape[-1], + self.hidden_size)) + g = numpy.linalg.norm(v, axis=None, keepdims=True) + w = g * v / numpy.linalg.norm(v, axis=None, keepdims=True) + x = self.data[self.data_desc[0][0]][0] + out = numpy.dot(x, w) + g_grad = (numpy.dot(x.T, numpy.ones_like(out)) * (v / numpy.linalg.norm( + v, axis=None, keepdims=True))).sum(axis=None, keepdims=True) + return g, v, g_grad + + def test_weight_normalization(self): + self.set_data() + self.run_program() + expect_output = self.weight_normalize() + for actual_output in self.actual_outputs: + [ + self.assertTrue( + numpy.allclose( + numpy.array(actual), expect, atol=0.001)) + for expect, actual in zip(expect_output, actual_output) + ] + + +if __name__ == '__main__': + unittest.main()