diff --git a/paddle/contrib/inference/CMakeLists.txt b/paddle/contrib/inference/CMakeLists.txt index 98c2f68a6c39ed12795bad4a905558917c0275a4..87173fc42a46c8218fbf0beb4ebf7760f69b7c24 100644 --- a/paddle/contrib/inference/CMakeLists.txt +++ b/paddle/contrib/inference/CMakeLists.txt @@ -45,6 +45,10 @@ endfunction(inference_api_test) cc_library(paddle_inference_api SRCS paddle_inference_api.cc paddle_inference_api_impl.cc DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB}) +if(NOT APPLE) + set(LINK_FLAGS "-Wl,--retain-symbols-file ${CMAKE_CURRENT_SOURCE_DIR}/paddle_inference_api.sym") + set_target_properties(paddle_inference_api PROPERTIES LINK_FLAGS "${LINK_FLAGS}") +endif() # Here the shared library doesn't depend on other fluid libraries, or double free will occur. cc_library(paddle_inference_api_shared SHARED @@ -53,8 +57,19 @@ add_dependencies(paddle_inference_api_shared ${FLUID_CORE_MODULES} ${GLOB_OP_LIB set_target_properties(paddle_inference_api_shared PROPERTIES OUTPUT_NAME paddle_inference_api) if(NOT APPLE) - set(LINK_FLAGS "-fPIC -fvisibility=hidden") + set(LINK_FLAGS "-Wl,--version-script ${CMAKE_CURRENT_SOURCE_DIR}/paddle_inference_api.map") set_target_properties(paddle_inference_api_shared PROPERTIES LINK_FLAGS "${LINK_FLAGS}") + FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/check_symbol.cmake + "execute_process(COMMAND bash -c \"${CMAKE_CURRENT_SOURCE_DIR}/check_symbol.sh" + " ${CMAKE_CURRENT_BINARY_DIR}/libpaddle_inference_api.so\" RESULT_VARIABLE symbol_res)\n" + "if(NOT \"\${symbol_res}\" STREQUAL \"0\")\n" + " message(FATAL_ERROR \"Check symbol failed.\")\n" + "endif()\n") + add_custom_command( + OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/.check_symbol" + COMMAND ${CMAKE_COMMAND} -P "${CMAKE_CURRENT_BINARY_DIR}/check_symbol.cmake" + DEPENDS paddle_inference_api_shared) + add_custom_target(check_symbol ALL DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/.check_symbol") endif() cc_test(test_paddle_inference_api diff --git a/paddle/contrib/inference/check_symbol.sh b/paddle/contrib/inference/check_symbol.sh new file mode 100755 index 0000000000000000000000000000000000000000..6547ca1413649968e8a0be146915e07192a99898 --- /dev/null +++ b/paddle/contrib/inference/check_symbol.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +lib=$1 +if [ $# -ne 1 ]; then echo "No input library"; exit -1 ; fi + +num_paddle_syms=$(nm -D --defined-only ${lib} | grep paddle | wc -l) +num_google_syms=$(nm -D --defined-only ${lib} | grep google | wc -l) + +if [ $num_paddle_syms -le 0 ]; then echo "Have no paddle symbols"; exit -1 ; fi +if [ $num_google_syms -ge 1 ]; then echo "Have some google symbols"; exit -1 ; fi + +exit 0 diff --git a/paddle/contrib/inference/demo/CMakeLists.txt b/paddle/contrib/inference/demo/CMakeLists.txt index ecece6fe3471ad7b89c84c3e2b67af4ae9eb3c36..2d501bf0085b1bd4c39ee1a6dfaaa9622fd72ce1 100644 --- a/paddle/contrib/inference/demo/CMakeLists.txt +++ b/paddle/contrib/inference/demo/CMakeLists.txt @@ -13,8 +13,6 @@ # limitations under the License. # -inference_api_test(simple_on_word2vec ARGS test_word2vec) - option(WITH_INFERENCE_DEMO "Compile with Inference demo" OFF) if(NOT WITH_INFERENCE_DEMO) return() diff --git a/paddle/contrib/inference/demo_ci/CMakeLists.txt b/paddle/contrib/inference/demo_ci/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..789bff7f23cd89bfaeba180efa95972cef6fc58c --- /dev/null +++ b/paddle/contrib/inference/demo_ci/CMakeLists.txt @@ -0,0 +1,77 @@ +cmake_minimum_required(VERSION 3.0) + +project(cpp_inference_demo CXX C) + +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") + +if(NOT DEFINED PADDLE_LIB) + message(FATAL_ERROR "please set PADDLE_LIB with -DPADDLE_LIB=/path/paddle/lib") +endif() +if(NOT DEFINED DEMO_NAME) + message(FATAL_ERROR "please set DEMO_NAME with -DDEMO_NAME=demo_name") +endif() + +option(WITH_MKL "Compile demo with MKL/OpenBlas support, default use MKL." ON) +option(WITH_GPU "Compile demo with GPU/CPU, default use CPU." OFF) +option(WITH_STATIC_LIB "Compile demo with static/shared library, default use static." ON) + +if(WITH_GPU) + set(CUDA_LIB "/usr/local/cuda/lib64/" CACHE STRING "CUDA Library") +endif() + +include_directories("${PADDLE_LIB}") +include_directories("${PADDLE_LIB}/third_party/install/protobuf/include") +include_directories("${PADDLE_LIB}/third_party/install/glog/include") +include_directories("${PADDLE_LIB}/third_party/install/gflags/include") +include_directories("${PADDLE_LIB}/third_party/install/snappy/include") +include_directories("${PADDLE_LIB}/third_party/install/snappystream/include") +include_directories("${PADDLE_LIB}/third_party/install/zlib/include") + +include_directories("${PADDLE_LIB}/third_party/boost") +include_directories("${PADDLE_LIB}/third_party/eigen3") + +link_directories("${PADDLE_LIB}/third_party/install/snappy/lib") +link_directories("${PADDLE_LIB}/third_party/install/snappystream/lib") +link_directories("${PADDLE_LIB}/third_party/install/protobuf/lib") +link_directories("${PADDLE_LIB}/third_party/install/glog/lib") +link_directories("${PADDLE_LIB}/third_party/install/gflags/lib") +link_directories("${PADDLE_LIB}/third_party/install/zlib/lib") + +add_executable(${DEMO_NAME} ${DEMO_NAME}.cc) + +if(WITH_MKL) + include_directories("${PADDLE_LIB}/third_party/install/mklml/include") + set(MATH_LIB ${PADDLE_LIB}/third_party/install/mklml/lib/libmklml_intel.so + ${PADDLE_LIB}/third_party/install/mklml/lib/libiomp5.so) + set(MKLDNN_PATH "${PADDLE_LIB}/third_party/install/mkldnn") + if(EXISTS ${MKLDNN_PATH}) + include_directories("${MKLDNN_PATH}/include") + set(MKLDNN_LIB ${MKLDNN_PATH}/lib/libmkldnn.so.0) + endif() +else() + set(MATH_LIB ${PADDLE_LIB}/third_party/install/openblas/lib/libopenblas.a) +endif() + +if(WITH_STATIC_LIB) + set(DEPS + "-Wl,--whole-archive" + ${PADDLE_LIB}/paddle/fluid/inference/libpaddle_fluid.a + "-Wl,--no-whole-archive" + ${PADDLE_LIB}/contrib/inference/libpaddle_inference_api.a) +else() + # Note: libpaddle_inference_api.so must put before libpaddle_fluid.so + set(DEPS + ${PADDLE_LIB}/contrib/inference/libpaddle_inference_api.so + ${PADDLE_LIB}/paddle/fluid/inference/libpaddle_fluid.so) +endif() +set(EXTERNAL_LIB "-lrt -ldl -lpthread") + +set(DEPS ${DEPS} + ${MATH_LIB} ${MKLDNN_LIB} + glog gflags protobuf snappystream snappy z + ${EXTERNAL_LIB}) +if(WITH_GPU) + set(DEPS ${DEPS} ${CUDA_LIB}/libcudart.so) +endif() + +target_link_libraries(${DEMO_NAME} ${DEPS}) diff --git a/paddle/contrib/inference/demo_ci/run.sh b/paddle/contrib/inference/demo_ci/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..e3a7269af795b05c296423cb2dc92b753397c6b3 --- /dev/null +++ b/paddle/contrib/inference/demo_ci/run.sh @@ -0,0 +1,34 @@ +set -x +PADDLE_ROOT=$1 +WITH_MKL=$2 +WITH_GPU=$3 +if [ $3 == "ON" ]; then + use_gpu_list='true false' +else + use_gpu_list='false' +fi + +mkdir -p build +cd build + +for WITH_STATIC_LIB in false; do + rm -rf * + cmake .. -DPADDLE_LIB=${PADDLE_ROOT}/build/fluid_install_dir/ \ + -DWITH_MKL=$WITH_MKL \ + -DDEMO_NAME=simple_on_word2vec \ + -DWITH_GPU=$WITH_GPU \ + -DWITH_STATIC_LIB=$WITH_STATIC_LIB + make + for use_gpu in $use_gpu_list; do + ./simple_on_word2vec \ + --dirname=${PADDLE_ROOT}/build/python/paddle/fluid/tests/book/word2vec.inference.model \ + --use_gpu=$use_gpu + done +done +if [ $? -eq 0 ]; then + exit 0 +else + echo "inference demo runs fail." + exit 1 +fi +set +x diff --git a/paddle/contrib/inference/demo/simple_on_word2vec.cc b/paddle/contrib/inference/demo_ci/simple_on_word2vec.cc similarity index 68% rename from paddle/contrib/inference/demo/simple_on_word2vec.cc rename to paddle/contrib/inference/demo_ci/simple_on_word2vec.cc index c253014642f39a042430992548a285cc7078a959..9713837f86d40383da946af1681e1945c84336b0 100644 --- a/paddle/contrib/inference/demo/simple_on_word2vec.cc +++ b/paddle/contrib/inference/demo_ci/simple_on_word2vec.cc @@ -16,21 +16,27 @@ limitations under the License. */ * This file contains a simple demo for how to take a model for inference. */ +#include #include -#include #include #include -#include "paddle/contrib/inference/paddle_inference_api.h" +#include "contrib/inference/paddle_inference_api.h" +#include "paddle/fluid/platform/enforce.h" + +DEFINE_string(dirname, "", "Directory of the inference model."); +DEFINE_bool(use_gpu, false, "Whether use gpu."); namespace paddle { namespace demo { -DEFINE_string(dirname, "", "Directory of the inference model."); - void Main(bool use_gpu) { //# 1. Create PaddlePredictor with a config. NativeConfig config; - config.model_dir = FLAGS_dirname + "word2vec.inference.model"; + if (FLAGS_dirname.empty()) { + LOG(INFO) << "Usage: ./simple_on_word2vec --dirname=path/to/your/model"; + exit(1); + } + config.model_dir = FLAGS_dirname; config.use_gpu = use_gpu; config.fraction_of_gpu_memory = 0.15; config.device = 0; @@ -54,12 +60,16 @@ void Main(bool use_gpu) { CHECK(predictor->Run(slots, &outputs)); //# 4. Get output. - ASSERT_EQ(outputs.size(), 1UL); - LOG(INFO) << "output buffer size: " << outputs.front().data.length(); + PADDLE_ENFORCE(outputs.size(), 1UL); + // Check the output buffer size and result of each tid. + PADDLE_ENFORCE(outputs.front().data.length(), 33168UL); + float result[5] = { + 0.00129761, 0.00151112, 0.000423564, 0.00108815, 0.000932706}; const size_t num_elements = outputs.front().data.length() / sizeof(float); // The outputs' buffers are in CPU memory. for (size_t i = 0; i < std::min(5UL, num_elements); i++) { - LOG(INFO) << static_cast(outputs.front().data.data())[i]; + PADDLE_ENFORCE(static_cast(outputs.front().data.data())[i], + result[i]); } } } @@ -68,7 +78,7 @@ void MainThreads(int num_threads, bool use_gpu) { // Multi-threads only support on CPU // 0. Create PaddlePredictor with a config. NativeConfig config; - config.model_dir = FLAGS_dirname + "word2vec.inference.model"; + config.model_dir = FLAGS_dirname; config.use_gpu = use_gpu; config.fraction_of_gpu_memory = 0.15; config.device = 0; @@ -94,14 +104,17 @@ void MainThreads(int num_threads, bool use_gpu) { CHECK(predictor->Run(inputs, &outputs)); // 4. Get output. - ASSERT_EQ(outputs.size(), 1UL); - LOG(INFO) << "TID: " << tid << ", " - << "output buffer size: " << outputs.front().data.length(); + PADDLE_ENFORCE(outputs.size(), 1UL); + // Check the output buffer size and result of each tid. + PADDLE_ENFORCE(outputs.front().data.length(), 33168UL); + float result[5] = { + 0.00129761, 0.00151112, 0.000423564, 0.00108815, 0.000932706}; const size_t num_elements = outputs.front().data.length() / sizeof(float); // The outputs' buffers are in CPU memory. for (size_t i = 0; i < std::min(5UL, num_elements); i++) { - LOG(INFO) << static_cast(outputs.front().data.data())[i]; + PADDLE_ENFORCE(static_cast(outputs.front().data.data())[i], + result[i]); } } }); @@ -111,15 +124,18 @@ void MainThreads(int num_threads, bool use_gpu) { } } -TEST(demo, word2vec_cpu) { Main(false /*use_gpu*/); } -TEST(demo_multi_threads, word2vec_cpu_1) { MainThreads(1, false /*use_gpu*/); } -TEST(demo_multi_threads, word2vec_cpu_4) { MainThreads(4, false /*use_gpu*/); } - -#ifdef PADDLE_WITH_CUDA -TEST(demo, word2vec_gpu) { Main(true /*use_gpu*/); } -TEST(demo_multi_threads, word2vec_gpu_1) { MainThreads(1, true /*use_gpu*/); } -TEST(demo_multi_threads, word2vec_gpu_4) { MainThreads(4, true /*use_gpu*/); } -#endif - } // namespace demo } // namespace paddle + +int main(int argc, char** argv) { + google::ParseCommandLineFlags(&argc, &argv, true); + paddle::demo::Main(false /* use_gpu*/); + paddle::demo::MainThreads(1, false /* use_gpu*/); + paddle::demo::MainThreads(4, false /* use_gpu*/); + if (FLAGS_use_gpu) { + paddle::demo::Main(true /*use_gpu*/); + paddle::demo::MainThreads(1, true /*use_gpu*/); + paddle::demo::MainThreads(4, true /*use_gpu*/); + } + return 0; +} diff --git a/paddle/contrib/inference/paddle_inference_api.map b/paddle/contrib/inference/paddle_inference_api.map new file mode 100644 index 0000000000000000000000000000000000000000..5203784dc1fcb672eb6a26d9dfd3ffbe02e08038 --- /dev/null +++ b/paddle/contrib/inference/paddle_inference_api.map @@ -0,0 +1,6 @@ +{ + global: + *paddle*; + local: + *; +}; diff --git a/paddle/contrib/inference/paddle_inference_api.sym b/paddle/contrib/inference/paddle_inference_api.sym new file mode 100644 index 0000000000000000000000000000000000000000..ef2a04d788aa86b7f6a61c4af479d70d1137f374 --- /dev/null +++ b/paddle/contrib/inference/paddle_inference_api.sym @@ -0,0 +1 @@ +*paddle* diff --git a/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc b/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc index eb4e7ec52f907f9403e21ec2734d61824f51a58b..1d80bab90f513139f807b57258177c6b2ac53ac0 100644 --- a/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h" +#include #include #include #include "paddle/fluid/framework/executor.h" @@ -53,8 +54,14 @@ FeedFetchList ScopeBufferedSSAGraphExecutor::Run( } } } + std::vector fetch_data; + std::exception_ptr eptr; + try { + fetch_data = underlying_executor_->Run(fetch_tensors); + } catch (...) { + eptr = std::current_exception(); + } - auto fetch_data = underlying_executor_->Run(fetch_tensors); drop_scope_counter_ += 1; if (!fetch_tensors.empty() || drop_scope_counter_ == strategy_.num_iteration_per_drop_scope_) { @@ -69,7 +76,11 @@ FeedFetchList ScopeBufferedSSAGraphExecutor::Run( scope->DeleteScope(local_scope); } } - return fetch_data; + if (eptr) { + std::rethrow_exception(eptr); + } else { + return fetch_data; + } } } // namespace details } // namespace framework diff --git a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc index 99b10254a7961bf7b27b256acaece573a71c4115..07097c7e75c6ce638549716cd6523f387cdefd92 100644 --- a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc @@ -78,6 +78,10 @@ FeedFetchList ThreadedSSAGraphExecutor::Run( set.clear(); }; + // Clean run context + run_op_futures_.clear(); + exception_.reset(); + // Step 3. Execution while (!pending_vars.empty()) { // 1. Run All Ready ops @@ -96,16 +100,19 @@ FeedFetchList ThreadedSSAGraphExecutor::Run( auto cur_ready_vars = ready_vars.PopAll(1, &timeout); if (timeout) { - std::lock_guard l(exception_mu_); + std::unique_lock l(exception_mu_); if (exception_) { + l.unlock(); + for (auto &run_op_future : run_op_futures_) { + run_op_future.wait(); + } + l.lock(); std::exception *exp = exception_.get(); if (dynamic_cast(exp)) { auto e = *static_cast(exp); - exception_.reset(); throw e; } else if (dynamic_cast(exp)) { auto e = *static_cast(exp); - exception_.reset(); throw e; } else { LOG(FATAL) << "Unknown exception."; @@ -222,7 +229,7 @@ void ThreadedSSAGraphExecutor::RunOp( } }; if (pool_) { - pool_->enqueue(op_run); + run_op_futures_.emplace_back(pool_->enqueue(op_run)); } else { op_run(); } diff --git a/paddle/fluid/framework/details/threaded_ssa_graph_executor.h b/paddle/fluid/framework/details/threaded_ssa_graph_executor.h index c69e0487e2e503a0d445300aa2fd6bb9c30b06c9..09973b7a72881464ad9e7776d4aad3d2261a118d 100644 --- a/paddle/fluid/framework/details/threaded_ssa_graph_executor.h +++ b/paddle/fluid/framework/details/threaded_ssa_graph_executor.h @@ -15,6 +15,7 @@ #pragma once #include +#include #include #include #include @@ -77,6 +78,8 @@ class ThreadedSSAGraphExecutor : public SSAGraphExecutor { private: ExecutionStrategy strategy_; + // use std::list because clear(), push_back, and for_each are O(1) + std::list> run_op_futures_; }; } // namespace details diff --git a/paddle/fluid/inference/CMakeLists.txt b/paddle/fluid/inference/CMakeLists.txt index 1895aea7f98cb1ad12b2ce16545339252349ea37..b1c33c3415f49f9b1160655034350087432d0cb0 100644 --- a/paddle/fluid/inference/CMakeLists.txt +++ b/paddle/fluid/inference/CMakeLists.txt @@ -13,6 +13,12 @@ endif() # Create static library cc_library(paddle_fluid DEPS ${fluid_modules} paddle_fluid_api) +if(NOT APPLE) + # TODO(liuyiqu: Temporarily disable the link flag because it is not support on Mac. + set(LINK_FLAGS "-Wl,--retain-symbols-file ${CMAKE_CURRENT_SOURCE_DIR}/paddle_fluid.sym") + set_target_properties(paddle_fluid PROPERTIES LINK_FLAGS "${LINK_FLAGS}") +endif() + # Create shared library cc_library(paddle_fluid_shared SHARED SRCS io.cc diff --git a/paddle/fluid/inference/analysis/data_flow_graph.cc b/paddle/fluid/inference/analysis/data_flow_graph.cc index d09bf3ed161703b0cf273522921e157c7360a0bc..bd24e8a7d9c20b8cd9c4e41a76ffc33a004a9a69 100644 --- a/paddle/fluid/inference/analysis/data_flow_graph.cc +++ b/paddle/fluid/inference/analysis/data_flow_graph.cc @@ -90,6 +90,20 @@ std::string DataFlowGraph::DotString() const { return dot.Build(); } +std::string DataFlowGraph::HumanReadableInfo(bool show_values, + bool show_functions) const { + std::stringstream values, functions; + for (auto &n : nodes.nodes()) { + if (show_values && n->IsValue()) { + values << n->repr() << "\n"; + } + if (show_functions && n->IsFunction()) { + functions << n->repr() << "\n"; + } + } + return "Values:\n" + values.str() + "\n\n" + "Functions:\n" + functions.str(); +} + // // NodesBFSIterator // @@ -146,7 +160,7 @@ bool GraphTraits::NodesBFSIterator::operator==( if ((!queue_.empty()) && (!other.queue_.empty())) { return queue_.front() == other.queue_.front() && visited_.size() == other.visited_.size(); // here need to check the - // equality of queue and + // equality of queue and // visited. Just a light but week implementation. } return false; @@ -208,6 +222,76 @@ Node *GraphTraits::NodesDFSIterator::operator->() { return stack_.top(); } +GraphTraits::NodesTSIterator::NodesTSIterator( + const std::vector &source) { + PADDLE_ENFORCE(!source.empty(), + "Start points of topological sorting should not be empty!"); + std::unordered_set visited; + std::unordered_set to_visit{source.begin(), source.end()}; + + std::vector inlink_visited; + while (!to_visit.empty()) { + std::vector queue(to_visit.begin(), to_visit.end()); + for (auto *p : queue) { + inlink_visited.clear(); + + std::copy_if(p->inlinks.begin(), p->inlinks.end(), + std::back_inserter(inlink_visited), + [&](Node *x) { return visited.count(x); }); + + if (inlink_visited.size() == p->inlinks.size()) { + sorted_.push_back(p); + for (auto *_ : p->outlinks) { + if (!visited.count(_)) { + to_visit.insert(_); + } + } + + to_visit.erase(p); + visited.insert(p); + } + } + } +} + +GraphTraits::NodesTSIterator::NodesTSIterator( + const paddle::inference::analysis::GraphTraits< + DataFlowGraph>::NodesTSIterator &other) + : sorted_(other.sorted_), cursor_(other.cursor_) {} + +Node &GraphTraits::NodesTSIterator::operator*() { + PADDLE_ENFORCE_LT(cursor_, sorted_.size()); + return *sorted_[cursor_]; +} + +paddle::inference::analysis::GraphTraits::NodesTSIterator + &GraphTraits::NodesTSIterator::operator++() { + if (++cursor_ >= sorted_.size()) { + sorted_.clear(); + cursor_ = 0; + } + return *this; +} +paddle::inference::analysis::GraphTraits::NodesTSIterator & +GraphTraits::NodesTSIterator::operator=( + const paddle::inference::analysis::GraphTraits< + DataFlowGraph>::NodesTSIterator &other) { + cursor_ = other.cursor_; + sorted_ = other.sorted_; + return *this; +} + +bool GraphTraits::NodesTSIterator::operator==( + const paddle::inference::analysis::GraphTraits< + DataFlowGraph>::NodesTSIterator &other) { + return sorted_ == other.sorted_ && cursor_ == other.cursor_; +} + +Node *GraphTraits::NodesTSIterator::operator->() { + PADDLE_ENFORCE_LT(cursor_, sorted_.size()); + return sorted_[cursor_]; +} + } // namespace analysis } // namespace inference } // namespace paddle diff --git a/paddle/fluid/inference/analysis/data_flow_graph.h b/paddle/fluid/inference/analysis/data_flow_graph.h index a4fefc83e0c551d52bec87299bcbc966e7a2dbf7..5dd914d1971bfb5bcc0b1db41d73e2b67120bc06 100644 --- a/paddle/fluid/inference/analysis/data_flow_graph.h +++ b/paddle/fluid/inference/analysis/data_flow_graph.h @@ -48,6 +48,9 @@ struct DataFlowGraph { // Output a DOT graph file for debug. std::string DotString() const; + std::string HumanReadableInfo(bool show_values = true, + bool show_functions = true) const; + private: // Remove duplicate edges and so on. void Clean(); @@ -107,6 +110,32 @@ struct GraphTraits { std::unordered_set visited_; }; + // Topological sorting iterator on nodes. + struct NodesTSIterator + : public std::iterator { + NodesTSIterator() = default; + explicit NodesTSIterator(const std::vector &source); + NodesTSIterator(NodesTSIterator &&other) + : sorted_(std::move(other.sorted_)), cursor_(other.cursor_) { + other.cursor_ = 0; + } + NodesTSIterator(const NodesTSIterator &other); + + Node &operator*(); + NodesTSIterator &operator++(); + // TODO(Superjomn) current implementation just compare the first + // element, need to compare the graph and all the elements in the queue and + // set. + NodesTSIterator &operator=(const NodesTSIterator &other); + bool operator==(const NodesTSIterator &other); + bool operator!=(const NodesTSIterator &other) { return !(*this == other); } + Node *operator->(); + + private: + std::vector sorted_; + int cursor_{0}; + }; + explicit GraphTraits(DataFlowGraph *graph) : graph_(graph) {} // default use BFS to visit the nodes. @@ -119,17 +148,24 @@ struct GraphTraits { iterator_range nodes_in_DFS() { return iterator_range(nodes_dfs_begin(), nodes_dfs_end()); } + iterator_range nodes_in_TS() { + return iterator_range(nodes_ts_begin(), nodes_ts_end()); + } private: NodesBFSIterator nodes_bfs_begin() { return NodesBFSIterator(graph_->inputs); } NodesBFSIterator nodes_bfs_end() { return NodesBFSIterator(); } + NodesDFSIterator nodes_dfs_begin() { return NodesDFSIterator(graph_->inputs); } NodesDFSIterator nodes_dfs_end() { return NodesDFSIterator(); } + NodesTSIterator nodes_ts_begin() { return NodesTSIterator(graph_->inputs); } + NodesTSIterator nodes_ts_end() { return NodesTSIterator(); } + private: DataFlowGraph *graph_; }; diff --git a/paddle/fluid/inference/analysis/data_flow_graph_tester.cc b/paddle/fluid/inference/analysis/data_flow_graph_tester.cc index 9d7cceeb65888b8ba3fdf39e88fc2877abd82d11..7912f8d7f17ae3c79e8f73f36b7095fd52c9ac86 100644 --- a/paddle/fluid/inference/analysis/data_flow_graph_tester.cc +++ b/paddle/fluid/inference/analysis/data_flow_graph_tester.cc @@ -24,11 +24,11 @@ TEST(DataFlowGraph, BFS) { auto dfg = ProgramDescToDFG(desc); dfg.Build(); - for (auto* in : dfg.inputs) { + for (auto *in : dfg.inputs) { LOG(INFO) << "inputs: " << in->name() << " " << static_cast(in->type()); } - for (auto* out : dfg.outputs) { + for (auto *out : dfg.outputs) { LOG(INFO) << "outputs: " << out->name() << " " << static_cast(out->type()); } @@ -57,6 +57,71 @@ TEST(DataFlowGraph, DFS) { ASSERT_EQ(count, dfg.nodes.size()); } +// Topological sorting. +/* + * Graph topology + * inputs: 0, 1, 2 + * 0 -> 4 + * 0 -> 5 + * 1 -> 6 + * 2 -> 7 + * 4 -> 5 + * 4 -> 7 + * 4 -> 3 + * 7 -> 3 + */ +TEST(DataFlowGraph, TS) { + DataFlowGraph graph; + + for (int i = 0; i < 8; i++) { + auto *node = graph.nodes.Create(Node::Type::kValue); + node->SetName("node-" + std::to_string(i)); + } + + auto add_link = [&](int i, int j) { + Node *source = graph.nodes.GetMutable(i); + Node *target = graph.nodes.GetMutable(j); + target->inlinks.push_back(source); + source->outlinks.push_back(target); + }; + + graph.inputs.push_back(graph.nodes.GetMutable(0)); + graph.inputs.push_back(graph.nodes.GetMutable(1)); + graph.inputs.push_back(graph.nodes.GetMutable(2)); + + add_link(0, 4); + add_link(0, 5); + add_link(1, 6); + add_link(2, 7); + add_link(4, 5); + add_link(4, 7); + add_link(4, 3); + add_link(7, 3); + + auto its = GraphTraits(&graph).nodes_in_TS(); + std::vector sorted_ids; + for (auto it = its.begin(); it != its.end(); ++it) { + LOG(INFO) << it->name(); + sorted_ids.push_back(it->id()); + } + + // Assert a occurs prior to b in the sorted_ids. + auto assert_positive_sequence_pair = [&](int a, int b) { + auto a_offset = std::find(sorted_ids.begin(), sorted_ids.end(), a); + auto b_offset = std::find(sorted_ids.begin(), sorted_ids.end(), b); + ASSERT_LT(a_offset, b_offset); + }; + + assert_positive_sequence_pair(2, 7); + assert_positive_sequence_pair(7, 3); + assert_positive_sequence_pair(4, 3); + assert_positive_sequence_pair(0, 4); + assert_positive_sequence_pair(0, 5); + assert_positive_sequence_pair(1, 6); + assert_positive_sequence_pair(4, 5); + assert_positive_sequence_pair(4, 7); +} + } // namespace analysis } // namespace inference } // namespace paddle diff --git a/paddle/fluid/inference/paddle_fluid.sym b/paddle/fluid/inference/paddle_fluid.sym new file mode 100644 index 0000000000000000000000000000000000000000..ef2a04d788aa86b7f6a61c4af479d70d1137f374 --- /dev/null +++ b/paddle/fluid/inference/paddle_fluid.sym @@ -0,0 +1 @@ +*paddle* diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt index ab1d2143330fb8cbfd535758a83bc71de939c4e0..bc07bbe67e3dafc08aeb62cd75629966c216ce6e 100644 --- a/paddle/fluid/operators/CMakeLists.txt +++ b/paddle/fluid/operators/CMakeLists.txt @@ -265,6 +265,8 @@ op_library(recurrent_op DEPS executor) op_library(warpctc_op DEPS dynload_warpctc sequence_padding sequence_scale) op_library(cos_sim_op DEPS cos_sim_functor) op_library(parallel_do_op DEPS executor) +op_library(unsqueeze_op DEPS reshape_op) +op_library(squeeze_op DEPS reshape_op) if (WITH_GPU) op_library(conv_op DEPS vol2col depthwise_conv im2col) diff --git a/paddle/fluid/operators/conv_mkldnn_op.cc b/paddle/fluid/operators/conv_mkldnn_op.cc index 6b06913d1c83f4534238ac3dd22ac4035c0f0fbf..5bfa1aaa696d5cbe8bdcb94d708746259952740f 100644 --- a/paddle/fluid/operators/conv_mkldnn_op.cc +++ b/paddle/fluid/operators/conv_mkldnn_op.cc @@ -29,6 +29,79 @@ using mkldnn::stream; using platform::to_void_cast; using platform::GetMKLDNNFormat; +class ConvMKLDNNHandler : public platform::MKLDNNHandler { + public: + ConvMKLDNNHandler( + std::shared_ptr conv_pd, + const platform::MKLDNNDeviceContext& dev_ctx, mkldnn::engine engine, + const std::string& base_key) + : platform::MKLDNNHandler(dev_ctx, engine, base_key) { + conv_pd_ = conv_pd; + } + + std::shared_ptr AcquireDstMemoryFromPrimitive(void* ptr) { + return this->AcquireMemoryFromPrimitive(conv_pd_->dst_primitive_desc(), ptr, + "@dst_mem_p"); + } + + std::shared_ptr AcquireSrcMemoryFromPrimitive( + const std::shared_ptr user_memory_p, + std::vector& pipeline) { + auto src_pd = conv_pd_->src_primitive_desc(); + auto user_pd = user_memory_p->get_primitive_desc(); + return this->AcquireMemory(src_pd, user_pd, user_memory_p, "@src_mem_p", + pipeline); + } + + std::shared_ptr AcquireWeightsMemoryFromPrimitive( + const std::shared_ptr user_weights_memory_p, + std::vector& pipeline) { + auto user_weights_pd = user_weights_memory_p->get_primitive_desc(); + auto weights_pd = conv_pd_->weights_primitive_desc(); + return this->AcquireMemory(weights_pd, user_weights_pd, + user_weights_memory_p, "@weights_mem_p", + pipeline); + } + + std::shared_ptr AcquireConvolution( + std::shared_ptr src_memory_p, + std::shared_ptr weights_memory_p, + std::shared_ptr dst_memory_p) { + auto prim_key = key_ + "@conv_p"; + auto prim_desc_key = key_ + "@conv_pd"; + auto conv_p = std::static_pointer_cast( + dev_ctx_.GetBlob(prim_key)); + PADDLE_ENFORCE((conv_p != nullptr) || (is_reusing_ == false), + "Fail to find convolution primitive in device context"); + if (conv_p == nullptr) { + conv_p = std::make_shared( + *conv_pd_, *(src_memory_p), *(weights_memory_p.get()), + *(dst_memory_p.get())); + + dev_ctx_.SetBlob(prim_key, conv_p); + } else { + is_reusing_ = true; + } + return conv_p; + } + + // Generate keys for storing/retriving primitives for this operator + // TODO(jczaja): Make hashing function more optimial + static std::string GetHash(memory::dims& input_dims, + memory::dims& weights_dims, + std::vector& strides, + std::vector& paddings, + std::vector& dilations, int groups, + const std::string& suffix) { + return dims2str(input_dims) + dims2str(weights_dims) + dims2str(strides) + + dims2str(paddings) + dims2str(dilations) + std::to_string(groups) + + suffix; + } + + private: + std::shared_ptr conv_pd_; +}; + template class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { public: @@ -36,10 +109,6 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { PADDLE_ENFORCE(paddle::platform::is_cpu_place(ctx.GetPlace()), "It must use CPUPlace."); - // Get unique name for index - const std::string key = ctx.op().Output("Output"); - const std::string key_conv_pd = key + "@conv_pd"; - auto& dev_ctx = ctx.template device_context(); const auto& mkldnn_engine = dev_ctx.GetEngine(); @@ -80,68 +149,62 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { paddle::framework::vectorize2int(filter->dims()); std::vector dst_tz = paddle::framework::vectorize2int(output->dims()); - // create mkldnn memory from input tensors (data/weights) - auto user_src_memory = memory( - {{{src_tz}, memory::data_type::f32, input->format()}, mkldnn_engine}, - to_void_cast(input_data)); - auto user_weights_memory = - memory({{{weights_tz}, memory::data_type::f32, filter->format()}, - mkldnn_engine}, - to_void_cast(filter_data)); + // Get unique name for storing MKLDNN primitives + const std::string key = ConvMKLDNNHandler::GetHash( + src_tz, weights_tz, strides, paddings, dilations, groups, + ctx.op().Output("Output")); + const std::string key_conv_pd = key + "@conv_pd"; + + std::vector pipeline; + + auto user_src_md = platform::MKLDNNMemDesc( + {src_tz}, platform::MKLDNNGetDataType(), input->format()); + auto user_weights_md = platform::MKLDNNMemDesc( + {weights_tz}, platform::MKLDNNGetDataType(), filter->format()); /* create memory descriptor for convolution without specified format * ('any') which lets a primitive (convolution in this case) choose * the memory format preferred for best performance */ - auto src_md = platform::MKLDNNMemDesc(src_tz, memory::data_type::f32, - memory::format::any); + auto src_md = platform::MKLDNNMemDesc( + src_tz, platform::MKLDNNGetDataType(), memory::format::any); auto weights_md = platform::MKLDNNMemDesc( - weights_tz, memory::data_type::f32, memory::format::any); - auto dst_md = platform::MKLDNNMemDesc(dst_tz, memory::data_type::f32, - memory::format::any); + weights_tz, platform::MKLDNNGetDataType(), memory::format::any); + auto dst_md = platform::MKLDNNMemDesc( + dst_tz, platform::MKLDNNGetDataType(), memory::format::any); // create a conv primitive descriptor and save it for usage in backward std::shared_ptr conv_pd = ConvFwdPrimitiveDesc( src_md, weights_md, dst_md, strides, paddings, mkldnn_engine); + // Save conv_pd/src_memory/weights_memory for backward pass + dev_ctx.SetBlob(key_conv_pd, conv_pd); - // create reorder primitive if the input format is not the preferred one - auto src_memory = user_src_memory; - primitive reorder_src; - bool is_src_reordered = false; - if (memory::primitive_desc(conv_pd->src_primitive_desc()) != - user_src_memory.get_primitive_desc()) { - src_memory = memory(conv_pd->src_primitive_desc()); - reorder_src = reorder(user_src_memory, src_memory); - is_src_reordered = true; - } - auto weights_memory = user_weights_memory; - primitive reorder_weights; - bool is_weights_reordered = false; - if (memory::primitive_desc(conv_pd->weights_primitive_desc()) != - user_weights_memory.get_primitive_desc()) { - weights_memory = memory(conv_pd->weights_primitive_desc()); - reorder_weights = reorder(user_weights_memory, weights_memory); - is_weights_reordered = true; - } + ConvMKLDNNHandler handler(conv_pd, dev_ctx, mkldnn_engine, key); - // create memory primitive for conv dst - auto dst_memory = memory(conv_pd->dst_primitive_desc(), output_data); + // create mkldnn memory from input tensors (data/weights) + auto user_src_memory_p = + handler.AcquireSrcMemory(user_src_md, to_void_cast(input_data)); + auto user_weights_memory_p = handler.AcquireWeightsMemory( + user_weights_md, to_void_cast(filter_data)); + + // create reorder primitive if the input format is not the preferred one + auto src_memory_p = + handler.AcquireSrcMemoryFromPrimitive(user_src_memory_p, pipeline); + auto weights_memory_p = handler.AcquireWeightsMemoryFromPrimitive( + user_weights_memory_p, pipeline); + auto dst_memory_p = + handler.AcquireDstMemoryFromPrimitive(to_void_cast(output_data)); // create convolution op primitive - auto conv_prim = conv_fwd(*conv_pd, src_memory, weights_memory, dst_memory); + auto conv_p = handler.AcquireConvolution(src_memory_p, weights_memory_p, + dst_memory_p); // push primitive to stream and wait until it's executed - std::vector pipeline; - if (is_src_reordered) pipeline.push_back(reorder_src); - if (is_weights_reordered) pipeline.push_back(reorder_weights); - pipeline.push_back(conv_prim); + pipeline.push_back(*conv_p); stream(stream::kind::eager).submit(pipeline).wait(); - // Save conv_pd/src_memory/weights_memory for backward pass - dev_ctx.SetBlob(key_conv_pd, conv_pd); - output->set_layout(DataLayout::kMKLDNN); - output->set_format(GetMKLDNNFormat(dst_memory)); + output->set_format(GetMKLDNNFormat(*dst_memory_p)); } private: @@ -197,13 +260,10 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel { if (!input_grad && !filter_grad) return; - // Get an unique name from "argument" name of "Output" variable - // This name will be used as key when saving info into device context - const std::string key = ctx.op().Input("Output"); - const std::string key_conv_pd = key + "@conv_pd"; - std::vector strides = ctx.Attr>("strides"); std::vector paddings = ctx.Attr>("paddings"); + std::vector dilations = ctx.Attr>("dilations"); + int groups = ctx.Attr("groups"); const T* input_data = input->data(); const T* filter_data = filter->data(); @@ -223,6 +283,14 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel { paddle::framework::vectorize2int(filter->dims()); std::vector dst_tz = paddle::framework::vectorize2int(output->dims()); + // Get an unique name from "argument" name of "Output" variable + // This name will be used as key when saving info into device context + const std::string key = + ConvMKLDNNHandler::GetHash(src_tz, weights_tz, strides, paddings, + dilations, groups, ctx.op().Input("Output")); + + const std::string key_conv_pd = key + "@conv_pd"; + // create mkldnn memory from input tensors (input/weights/output_grad) auto user_src_memory = memory( {{{src_tz}, memory::data_type::f32, input->format()}, mkldnn_engine}, diff --git a/paddle/fluid/operators/detection/rpn_target_assign_op.cc b/paddle/fluid/operators/detection/rpn_target_assign_op.cc index 3b0c9b2886504ee381b2b33e06a4552602725e57..9a1643d5b35c067ba9064286bab32019fb34fbe8 100644 --- a/paddle/fluid/operators/detection/rpn_target_assign_op.cc +++ b/paddle/fluid/operators/detection/rpn_target_assign_op.cc @@ -86,8 +86,9 @@ class RpnTargetAssignKernel : public framework::OpKernel { std::minstd_rand engine, std::vector* inds) const { std::uniform_real_distribution uniform(0, 1); - if (inds->size() > num) { - for (int i = num; i < inds->size(); ++i) { + const int64_t size = static_cast(inds->size()); + if (size > num) { + for (int64_t i = num; i < size; ++i) { int rng_ind = std::floor(uniform(engine) * i); if (rng_ind < num) std::iter_swap(inds->begin() + rng_ind + offset, diff --git a/paddle/fluid/operators/im2sequence_op.cc b/paddle/fluid/operators/im2sequence_op.cc index 0669661d225c664010fce97f0a526b62988b92c5..c8c7f36536a76ea103ef6f5689c0fbdb76102688 100644 --- a/paddle/fluid/operators/im2sequence_op.cc +++ b/paddle/fluid/operators/im2sequence_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/im2sequence_op.h" +#include #include namespace paddle { @@ -28,20 +29,19 @@ class Im2SequenceOp : public framework::OperatorWithKernel { "Input(X) of Im2SequenceOp should not be null."); PADDLE_ENFORCE(ctx->HasOutput("Out"), "Output(Out) of Im2SequenceOp op should not be null."); - auto in_dim = ctx->GetInputDim("X"); + PADDLE_ENFORCE_EQ(in_dim.size(), 4, "Input(X) format must be 4D tensor, eg., NCHW."); - - auto kernels = ctx->Attrs().Get>("kernels"); - auto strides = ctx->Attrs().Get>("strides"); - auto paddings = ctx->Attrs().Get>("paddings"); - int batch_size = in_dim[0]; int img_channels = in_dim[1]; int img_height = in_dim[2]; int img_width = in_dim[3]; + auto kernels = ctx->Attrs().Get>("kernels"); + auto strides = ctx->Attrs().Get>("strides"); + auto paddings = ctx->Attrs().Get>("paddings"); + int output_height = Im2SeqOutputSize(img_height, kernels[0], paddings[0], paddings[2], strides[0]); int output_width = Im2SeqOutputSize(img_width, kernels[1], paddings[1], @@ -61,6 +61,10 @@ class Im2SequenceOpMaker : public framework::OpProtoAndCheckerMaker { "C: channels" "H: height" "W: width"); + AddInput("Y", + "(Tensor) The input tensor of image real size(H, W)." + "2-D with shape [batchsize, 2]") + .AsDispensable(); AddOutput("Out", "(LodTensor) The output data of im2sequence op,"); AddAttr>("kernels", "(vector), the " @@ -73,6 +77,13 @@ class Im2SequenceOpMaker : public framework::OpProtoAndCheckerMaker { "(vector default:{0, 0, 0, 0}), the " "paddings(up_pad, left_pad, down_pad, right_pad)") .SetDefault({0, 0, 0, 0}); + AddAttr>("out_stride", + "the attribute is valid only when input(Y)" + "is not NULL.this attribute represents the" + "scaling of the pic through the CNN" + "(vector dedault:{1,1}),the out_stride" + " (out_stride_height, out_stride_width)") + .SetDefault({1, 1}); AddComment(R"DOC( This op uses kernels to scan images and converts these images to sequences. After expanding, The number of time steps are output_height * output_width @@ -123,7 +134,7 @@ output.data = [[ 6. 2. 8. 3. 2. 4. 6. 3.] [ 7. 1. 7. 9. 2. 1. 3. 5.] [ 5. 7. 2. 4. 1. 3. 9. 0.] [ 7. 9. 4. 8. 3. 5. 0. 8.]] -output.dims = {8, 9} +output.dims = {8, 8} output.lod = [[0, 4, 8]] )DOC"); diff --git a/paddle/fluid/operators/im2sequence_op.h b/paddle/fluid/operators/im2sequence_op.h index d792c68f784d8ffec0eb303a6ab9b59c9f121fa7..5bfb91db1887909c65de5f2e5321a8e6be6cf5ac 100644 --- a/paddle/fluid/operators/im2sequence_op.h +++ b/paddle/fluid/operators/im2sequence_op.h @@ -13,6 +13,7 @@ limitations under the License. */ #pragma once +#include #include #include "paddle/fluid/framework/data_layout.h" #include "paddle/fluid/framework/eigen.h" @@ -39,50 +40,106 @@ class Im2SequenceKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { const Tensor* in = ctx.Input("X"); LoDTensor* out = ctx.Output("Out"); - out->mutable_data(ctx.GetPlace()); - // TODO(wanghaoshuang): Add layout checker after 'set_layout' - // being available for python API - // PADDLE_ENFORCE_EQ(in->layout(), framework::DataLayout::kNCHW, - // "Input(X) layout must be NCHW"); auto in_dim = in->dims(); int batch_size = in_dim[0]; int img_channels = in_dim[1]; int img_height = in_dim[2]; int img_width = in_dim[3]; - auto kernels = ctx.Attr>("kernels"); auto strides = ctx.Attr>("strides"); auto paddings = ctx.Attr>("paddings"); - int output_height = Im2SeqOutputSize(img_height, kernels[0], paddings[0], - paddings[2], strides[0]); - int output_width = Im2SeqOutputSize(img_width, kernels[1], paddings[1], - paddings[3], strides[1]); - - const std::vector dilations({1, 1}); - - auto out_dims = out->dims(); - out->Resize({batch_size, out->numel() / batch_size}); - for (int i = 0; i < batch_size; i++) { - const Tensor src = - in->Slice(i, i + 1).Resize({img_channels, img_height, img_width}); - Tensor dst = out->Slice(i, i + 1).Resize( - {output_height, output_width, img_channels, kernels[0], kernels[1]}); - - math::Im2ColFunctor f; - auto& dev_ctx = ctx.template device_context(); - f(dev_ctx, src, dilations, strides, paddings, &dst); - } - out->Resize(out_dims); - - // set lod information - // TODO(wanghaoshuang): Move this to InferShape - framework::LoD lod(1); - lod[0].reserve(batch_size + 1); - for (int i = 0, offset = 0; i < batch_size + 1; ++i) { + if (ctx.HasInput("Y") && batch_size > 1) { + const Tensor* imgrealsize = ctx.Input("Y"); + auto out_stride = ctx.Attr>("out_stride"); + Tensor cpu_shape_tensor; + TensorCopySync(*imgrealsize, platform::CPUPlace(), &cpu_shape_tensor); + std::vector imgreal_h; + std::vector imgreal_w; + std::vector output_height; + std::vector output_width; + int result = 0; + for (int i = 0; i < batch_size; i++) { + int tmp_real_h = static_cast((cpu_shape_tensor.data())[2 * i]); + int tmp_real_w = + static_cast((cpu_shape_tensor.data())[2 * i + 1]); + if (tmp_real_h % out_stride[0] == 0) { + tmp_real_h = tmp_real_h / out_stride[0]; + } else { + tmp_real_h = tmp_real_h / out_stride[0] + 1; + } + if (tmp_real_w % out_stride[1] == 0) { + tmp_real_w = tmp_real_w / out_stride[1]; + } else { + tmp_real_w = tmp_real_w / out_stride[1] + 1; + } + imgreal_h.push_back(tmp_real_h); + imgreal_w.push_back(tmp_real_w); + output_height.push_back(Im2SeqOutputSize( + imgreal_h[i], kernels[0], paddings[0], paddings[2], strides[0])); + output_width.push_back(Im2SeqOutputSize( + imgreal_w[i], kernels[1], paddings[1], paddings[3], strides[1])); + result += output_height[i] * output_width[i]; + } + + out->mutable_data({result, img_channels * kernels[0] * kernels[1]}, + ctx.GetPlace()); + + const std::vector dilations({1, 1}); + int offset_out = 0; + for (int i = 0; i < batch_size; i++) { + const Tensor src = + in->Slice(i, i + 1).Resize({img_channels, img_height, img_width}); + Tensor dst = out->Slice(offset_out, + offset_out + output_height[i] * output_width[i]) + .Resize({output_height[i], output_width[i], + img_channels, kernels[0], kernels[1]}); + offset_out += output_height[i] * output_width[i]; + + math::Im2ColFunctor f; + auto& dev_ctx = ctx.template device_context(); + f(dev_ctx, src, dilations, strides, paddings, &dst); + } + framework::LoD lod(1); + lod[0].reserve(batch_size + 1); + int offset = 0; + lod[0].push_back(offset); + for (int i = 0; i < batch_size; ++i) { + offset += output_height[i] * output_width[i]; + lod[0].push_back(offset); + } + out->set_lod(lod); + } else { + out->mutable_data(ctx.GetPlace()); + int output_height = Im2SeqOutputSize(img_height, kernels[0], paddings[0], + paddings[2], strides[0]); + int output_width = Im2SeqOutputSize(img_width, kernels[1], paddings[1], + paddings[3], strides[1]); + + const std::vector dilations({1, 1}); + auto out_dims = out->dims(); + out->Resize({batch_size, out->numel() / batch_size}); + for (int i = 0; i < batch_size; i++) { + const Tensor src = + in->Slice(i, i + 1).Resize({img_channels, img_height, img_width}); + Tensor dst = + out->Slice(i, i + 1).Resize({output_height, output_width, + img_channels, kernels[0], kernels[1]}); + + math::Im2ColFunctor f; + auto& dev_ctx = ctx.template device_context(); + f(dev_ctx, src, dilations, strides, paddings, &dst); + } + out->Resize(out_dims); + framework::LoD lod(1); + lod[0].reserve(batch_size + 1); + int offset = 0; lod[0].push_back(offset); - offset += output_height * output_width; + for (int i = 0; i < batch_size; ++i) { + offset += output_height * output_width; + lod[0].push_back(offset); + } + out->set_lod(lod); } - out->set_lod(lod); } }; diff --git a/paddle/fluid/operators/math/im2col.cc b/paddle/fluid/operators/math/im2col.cc index 336d6febc2ce3a55e82ed613bbc1081101f822f0..a50b9ace39249f4f899a46e171bbdced033b46bc 100644 --- a/paddle/fluid/operators/math/im2col.cc +++ b/paddle/fluid/operators/math/im2col.cc @@ -43,21 +43,6 @@ class Im2ColFunctordims()[3]; int col_width = col->dims()[4]; - PADDLE_ENFORCE_EQ((im_height + padding[0] + padding[2] - - ((dilation[0] * (filter_height - 1) + 1))) / - stride[0] + - 1, - col_height, - "Output_height and padding(padding_up, padding_down) are " - "inconsistent."); - PADDLE_ENFORCE_EQ((im_width + padding[1] + padding[3] - - ((dilation[1] * (filter_width - 1) + 1))) / - stride[1] + - 1, - col_width, - "Output_height and padding(padding_up, padding_down) are " - "inconsistent."); - int channels_col = im_channels * filter_height * filter_width; const T* im_data = im.data(); @@ -178,17 +163,6 @@ class Im2ColFunctordims()[0]; int col_width = col->dims()[1]; - PADDLE_ENFORCE_EQ( - (im_height + padding[0] + padding[2] - filter_height) / stride[0] + 1, - col_height, - "Output_height and padding(padding_up, padding_down) are " - "inconsistent."); - PADDLE_ENFORCE_EQ( - (im_width + padding[1] + padding[3] - filter_width) / stride[1] + 1, - col_width, - "col_width and padding(padding_left, padding_right) are " - "inconsistent."); - const T* im_data = im.data(); T* col_data = col->data(); diff --git a/paddle/fluid/operators/math/im2col.cu b/paddle/fluid/operators/math/im2col.cu index eecb233d22cea06da016b2671fd606b70eddf5a5..4897767f4d88d9e079f05c921153923c4eb354b0 100644 --- a/paddle/fluid/operators/math/im2col.cu +++ b/paddle/fluid/operators/math/im2col.cu @@ -77,21 +77,6 @@ class Im2ColFunctordims()[3]; int col_width = col->dims()[4]; - PADDLE_ENFORCE_EQ((im_height + padding[0] + padding[2] - - (dilation[0] * (filter_height - 1) + 1)) / - stride[0] + - 1, - col_height, - "Output_height and padding(padding_up, padding_down) are " - "inconsistent."); - PADDLE_ENFORCE_EQ((im_width + padding[1] + padding[3] - - (dilation[1] * (filter_width - 1) + 1)) / - stride[1] + - 1, - col_width, - "col_width and padding(padding_left, padding_right) are " - "inconsistent."); - int num_outputs = im_channels * col_height * col_width; int blocks = (num_outputs + 1024 - 1) / 1024; int block_x = 512; @@ -274,21 +259,6 @@ class Im2ColFunctordims()[0]; int col_width = col->dims()[1]; - PADDLE_ENFORCE_EQ((im_height + padding[0] + padding[2] - - (dilation[0] * (filter_height - 1) + 1)) / - stride[0] + - 1, - col_height, - "Output_height and padding(padding_up, padding_down) are " - "inconsistent."); - PADDLE_ENFORCE_EQ((im_width + padding[1] + padding[3] - - (dilation[1] * (filter_width - 1) + 1)) / - stride[1] + - 1, - col_width, - "col_width and padding(padding_left, padding_right) are " - "inconsistent."); - int block_dim_x = 0; int block_dim_y = 0; if (filter_height <= 4 && filter_width <= 4) { diff --git a/paddle/fluid/operators/reader/create_batch_reader_op.cc b/paddle/fluid/operators/reader/create_batch_reader_op.cc index 1dbafd23e92732bdaf0d263a01e267227786d839..e17c2ffd39eea31fe85933eda144ab97cf8c3dd8 100644 --- a/paddle/fluid/operators/reader/create_batch_reader_op.cc +++ b/paddle/fluid/operators/reader/create_batch_reader_op.cc @@ -23,7 +23,7 @@ class BatchReader : public framework::DecoratedReader { BatchReader(const std::shared_ptr& reader, int batch_size, bool discard_leftover) : DecoratedReader(reader), - batch_size_(batch_size), + batch_size_(static_cast(batch_size)), discard_leftover_(discard_leftover) { buffer_.reserve(batch_size_); } @@ -31,7 +31,7 @@ class BatchReader : public framework::DecoratedReader { void ReadNextImpl(std::vector* out) override; private: - int batch_size_; + size_t batch_size_; bool discard_leftover_; std::vector> buffer_; }; @@ -78,7 +78,7 @@ class CreateBatchReaderOpMaker : public DecoratedReaderMakerBase { void BatchReader::ReadNextImpl(std::vector* out) { buffer_.clear(); buffer_.reserve(batch_size_); - for (int i = 0; i < batch_size_; ++i) { + for (size_t i = 0; i < batch_size_; ++i) { buffer_.push_back(std::vector()); reader_->ReadNext(&buffer_.back()); if (buffer_.back().empty()) { @@ -95,9 +95,9 @@ void BatchReader::ReadNextImpl(std::vector* out) { // if buffer_ is empty, the 'out' will return as an empty vector. return; } - int out_num = buffer_[0].size(); + size_t out_num = buffer_[0].size(); out->reserve(out_num); - for (int j = 0; j < out_num; ++j) { + for (size_t j = 0; j < out_num; ++j) { // Merge shape and check date type std::type_index batch_type = buffer_[0][j].type(); framework::DDim batch_shape = buffer_[0][j].dims(); diff --git a/paddle/fluid/operators/squeeze_op.cc b/paddle/fluid/operators/squeeze_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..6c507baf3a0ab0a557d29a53700685753616193b --- /dev/null +++ b/paddle/fluid/operators/squeeze_op.cc @@ -0,0 +1,202 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include "paddle/fluid/framework/op_registry.h" + +namespace paddle { +namespace operators { + +class SqueezeOpInferShape : public framework::InferShapeBase { + public: + void operator()(framework::InferShapeContext *ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), + "Input(X) of SqueezeOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Out"), + "Output(Out) of SqueezeOp should not be null."); + + const auto &x_dims = ctx->GetInputDim("X"); + // Check input tensor dims (<6) Eigen limit. + PADDLE_ENFORCE(x_dims.size() <= 6, + "Invalid dimnesions, the rank of Input(X) " + "should be in the range of [1, 6] (Eigen limit)."); + + const auto &axes = ctx->Attrs().Get>("axes"); + for (int a : axes) { + PADDLE_ENFORCE_LT(a, x_dims.size(), + "The squeeze axis should be less than input " + "tensor's rank."); + } + + auto out_dims = GetOutputShape(axes, x_dims); + ctx->SetOutputDim("Out", out_dims); + if (x_dims[0] == out_dims[0]) { + // Only pass LoD when the first dimension of output and Input(X) + // are the same. + ctx->ShareLoD("X", "Out"); + } + } + + static framework::DDim GetOutputShape(const std::vector squeeze_dims, + const framework::DDim &in_dims) { + size_t num_squeeze_dims = squeeze_dims.size(); + int cnt_squeezed_dims = 0; + bool should_squeeze[9] = {false}; + + // Determines number of dimensions of output tensor after squeeze. + // Mark and count the dimensions need to be squeezed + if (num_squeeze_dims == 0) { + for (int idx = 0; idx < in_dims.size(); ++idx) { + if (in_dims[idx] == 1) { + should_squeeze[idx] = true; + ++cnt_squeezed_dims; + } + } + } else { + for (size_t idx = 0; idx < num_squeeze_dims; ++idx) { + int current = squeeze_dims[idx] < 0 ? squeeze_dims[idx] + in_dims.size() + : squeeze_dims[idx]; + // Check current index, the upper limit has beed checked in line 36. + PADDLE_ENFORCE(current >= 0, + "Invalid axis, the negative axis is out of range."); + PADDLE_ENFORCE(in_dims[current] == 1, + "Invalid axis index, the axis that will be squeezed " + "should be equal to 1."); + + if (!(should_squeeze[current])) { + ++cnt_squeezed_dims; + } + should_squeeze[current] = true; + } + } + + // Make output dimensions + std::vector output_shape(in_dims.size() - cnt_squeezed_dims, 0); + for (int in_idx = 0, out_idx = 0; in_idx < in_dims.size(); ++in_idx) { + if (!should_squeeze[in_idx]) { + output_shape[out_idx++] = in_dims[in_idx]; + } + } + + return framework::make_ddim(output_shape); + } +}; + +class SqueezeOp : public framework::OperatorBase { + public: + using OperatorBase::OperatorBase; + + private: + void RunImpl(const framework::Scope &scope, + const platform::Place &place) const override { + auto &axes = Attr>("axes"); + auto x_dims = scope.FindVar(Input("X"))->Get().dims(); + auto out_dims = SqueezeOpInferShape::GetOutputShape(axes, x_dims); + + framework::AttributeMap attrs; + attrs["shape"] = framework::vectorize2int(out_dims); + attrs["inplace"] = Attr("inplace"); + // Invoke Reshape Op + auto reshape_op = framework::OpRegistry::CreateOp( + "reshape", {{"X", {Input("X")}}, {"Shape", {}}}, + {{"Out", {Output("Out")}}}, attrs); + reshape_op->Run(scope, place); + } +}; + +class SqueezeOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("X", "(Tensor). The input tensor of squeeze operator."); + AddOutput("Out", "(Tensor). The output tensor of squeeze operator."); + AddAttr>("axes", + "(std::vector). List of integers," + " indicating the dimensions to squeeze.") + .SetDefault({}); + AddAttr("inplace", + "(default: false) Squeeze the source tensor's shape without " + "memory copy. When Attr(inplace) is set true, the output " + "tensor shares memory with Input(X), otherwise, a new output " + "tensor is created, and its data are copied from Input(x).") + .SetDefault(false); + AddComment(R"DOC( + Squeeze Operator. + + Remove single-dimensional entries from the shape of a tensor. + Takes a parameter axes with a list of axes to squeeze. + If axes is not provided, all the single dimensions will be removed from the shape. + If an axis is selected with shape entry not equal to one, an error is raised. + + Examples: + Case 1: + Given + X.shape = (1, 3, 1, 5) + and + axes = [0] + we get: + Out.shape = (3, 1, 5) + + Case 2: + Given + X.shape = (1, 3, 1, 5) + and + axes = [] + we get: + Out.shape = (3, 5) + )DOC"); + } +}; + +class SqueezeGradInferShape : public framework::InferShapeBase { + public: + void operator()(framework::InferShapeContext *context) const override { + context->SetOutputDim(framework::GradVarName("X"), + context->GetInputDim("X")); + context->ShareLoD("X", framework::GradVarName("X")); + } +}; + +class SqueezeGradOp : public framework::OperatorBase { + public: + using OperatorBase::OperatorBase; + + private: + void RunImpl(const framework::Scope &scope, + const platform::Place &place) const override { + auto dx_name = Output(framework::GradVarName("X")); + auto dout_name = Input(framework::GradVarName("Out")); + auto x_dims = scope.FindVar(Input("X"))->Get().dims(); + framework::AttributeMap attrs; + attrs["shape"] = framework::vectorize2int(x_dims); + attrs["inplace"] = Attr("inplace"); + + auto reshape_op = framework::OpRegistry::CreateOp( + "reshape", {{"X", {dout_name}}, {"Shape", {}}}, {{"Out", {dx_name}}}, + attrs); + reshape_op->Run(scope, place); + } +}; + +} // namespace operators +} // namespace paddle + +// Tell linker to use reshape op +USE_OP(reshape); + +namespace ops = paddle::operators; +REGISTER_OPERATOR(squeeze, ops::SqueezeOp, ops::SqueezeOpMaker, + ops::SqueezeOpInferShape, + paddle::framework::DefaultGradOpDescMaker); +REGISTER_OPERATOR(squeeze_grad, ops::SqueezeGradOp, ops::SqueezeGradInferShape); diff --git a/paddle/fluid/operators/unsqueeze_op.cc b/paddle/fluid/operators/unsqueeze_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..f2a15fdf572e0de30f9949dda5020e130b0c5585 --- /dev/null +++ b/paddle/fluid/operators/unsqueeze_op.cc @@ -0,0 +1,191 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include "paddle/fluid/framework/op_registry.h" + +namespace paddle { +namespace operators { + +class UnsqueezeOpInferShape : public framework::InferShapeBase { + public: + void operator()(framework::InferShapeContext *ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), + "Input(X) of UnsqueezeOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Out"), + "Output(Out) of UnsqueezeOp should not be null."); + + const auto &axes = ctx->Attrs().Get>("axes"); + const auto &x_dims = ctx->GetInputDim("X"); + // Validity Check: input tensor dims (<6). + PADDLE_ENFORCE(x_dims.size() <= 6, + "Invalid dimensions, the rank of Input(X) " + "should be in the range of [1, 6] (Eigen limit)"); + auto out_dims = GetOutputShape(axes, x_dims); + ctx->SetOutputDim("Out", out_dims); + if (x_dims[0] == out_dims[0]) { + // Only pass LoD when the first dimension of output and Input(X) + // are the same. + ctx->ShareLoD("X", "Out"); + } + } + + static framework::DDim GetOutputShape(const std::vector unsqz_dims, + const framework::DDim &in_dims) { + int output_size = in_dims.size() + static_cast(unsqz_dims.size()); + int cur_output_size = in_dims.size(); + std::vector output_shape(output_size, 0); + + // Validity Check: rank range. + PADDLE_ENFORCE(output_size <= 6, + "The output tensor's rank should be less than 6."); + + for (int axis : unsqz_dims) { + int cur = axis < 0 ? axis + cur_output_size + 1 : axis; + // Vaildity Check: the axis bound + PADDLE_ENFORCE( + cur >= 0 && cur <= cur_output_size, + "The unsqueeze dims must be within range of current rank."); + // Move old axis, and insert new axis + for (int i = cur_output_size; i >= cur; --i) { + if (output_shape[i] == 1) { + // Move axis + output_shape[i + 1] = 1; + output_shape[i] = 0; + } + } + output_shape[cur] = 1; + // Add the output size. + cur_output_size++; + } + + // Make output shape + for (int in_idx = 0, out_idx = 0; out_idx < output_size; ++out_idx) { + if (output_shape[out_idx] == 0) { + output_shape[out_idx] = in_dims[in_idx++]; + } + } + + return framework::make_ddim(output_shape); + } +}; + +class UnsqueezeOp : public framework::OperatorBase { + public: + using OperatorBase::OperatorBase; + + private: + void RunImpl(const framework::Scope &scope, + const platform::Place &place) const override { + auto &axes = Attr>("axes"); + auto x_dims = scope.FindVar(Input("X"))->Get().dims(); + auto out_dims = UnsqueezeOpInferShape::GetOutputShape(axes, x_dims); + + framework::AttributeMap attrs; + attrs["shape"] = framework::vectorize2int(out_dims); + attrs["inplace"] = Attr("inplace"); + // Invoke Reshape op. + auto reshape_op = framework::OpRegistry::CreateOp( + "reshape", {{"X", {Input("X")}}, {"Shape", {}}}, + {{"Out", {Output("Out")}}}, attrs); + reshape_op->Run(scope, place); + } +}; + +class UnsqueezeOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("X", "(Tensor). The input tensor of unsqueeze operator."); + AddOutput("Out", "(Tensor). The output tensor of unsqueeze operator."); + AddAttr>("axes", + "(std::vector). List of integers," + " indicating the dimensions to be inserted") + .AddCustomChecker([](const std::vector &axes) { + PADDLE_ENFORCE(!axes.empty(), + "Invalid axes, The unsqueeze axes is empty."); + // Validity Check: axes dims (<6). + PADDLE_ENFORCE(static_cast(axes.size()) < 6, + "Invalid dimensions, dynamic dimensions should be " + "within [1, 6] dimensions (Eigen limit)."); + // Validity Check: the range of unsqueeze aixs. + for (int axis : axes) { + PADDLE_ENFORCE(axis < 6, + "Invalid dimensions, input axis should be" + " within [1, 6] dimensions (Eigen limit)."); + } + }); + AddAttr( + "inplace", + "(default: false) Unsqueeze the source tensor's shape without " + "memory copy. When Attr(inplace) is set true, the output " + "tensor shares memory with Input(X), otherwise, a new output " + "tensor is created, and its data are copied from Input(x).") + .SetDefault(false); + AddComment(R"DOC( + Unsqueeze Operator. + + Insert single-dimensional entries to the shape of a tensor. + Takes one required argument axes, a list of dimensions that will be inserted. + Dimension indices in axes are as seen in the output tensor. + + For example: + Given a tensor such that tensor with shape [3, 4, 5], + then Unsqueeze(tensor, axes=[0, 4]) has shape [1, 3, 4, 5, 1] + )DOC"); + } +}; + +class UnsqueezeGradInferShape : public framework::InferShapeBase { + public: + void operator()(framework::InferShapeContext *ctx) const override { + ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); + ctx->ShareLoD("X", framework::GradVarName("X")); + } +}; + +class UnsqueezeGradOp : public framework::OperatorBase { + public: + using OperatorBase::OperatorBase; + + private: + void RunImpl(const framework::Scope &scope, + const platform::Place &place) const override { + auto dx_name = Output(framework::GradVarName("X")); + auto dout_name = Input(framework::GradVarName("Out")); + auto x_dims = scope.FindVar(Input("X"))->Get().dims(); + + framework::AttributeMap attrs; + attrs["shape"] = framework::vectorize2int(x_dims); + attrs["inplace"] = Attr("inplace"); + + auto reshape_op = framework::OpRegistry::CreateOp( + "reshape", {{"X", {dout_name}}, {"Shape", {}}}, {{"Out", {dx_name}}}, + attrs); + reshape_op->Run(scope, place); + } +}; + +} // namespace operators +} // namespace paddle + +// Tell linker to use reshape op. +USE_OP(reshape); + +namespace ops = paddle::operators; +REGISTER_OPERATOR(unsqueeze, ops::UnsqueezeOp, ops::UnsqueezeOpMaker, + ops::UnsqueezeOpInferShape, + paddle::framework::DefaultGradOpDescMaker); +REGISTER_OPERATOR(unsqueeze_grad, ops::UnsqueezeGradOp, + ops::UnsqueezeGradInferShape); diff --git a/paddle/fluid/platform/mkldnn_helper.h b/paddle/fluid/platform/mkldnn_helper.h index 33fec2c1073819d88d85a8872227adcb9df3e8f4..a8f93e6848a1db1f5aa0ee266a076af2b5d0c964 100644 --- a/paddle/fluid/platform/mkldnn_helper.h +++ b/paddle/fluid/platform/mkldnn_helper.h @@ -222,15 +222,16 @@ class MKLDNNHandler { static std::string GetHash(mkldnn::memory::dims& operand_dims, // NOLINT const std::string& suffix) { - auto dims2str = [](const mkldnn::memory::dims& operand_dims) { - std::string dstr = ""; - for (size_t i = 0; i < operand_dims.size(); ++i) { - dstr += std::to_string(operand_dims[i]) + "-"; - } - return dstr; - }; - return dims2str(operand_dims) + suffix; + }; + + protected: + static std::string dims2str(const mkldnn::memory::dims& operand_dims) { + std::string dstr = ""; + for (size_t i = 0; i < operand_dims.size(); ++i) { + dstr += std::to_string(operand_dims[i]) + "-"; + } + return dstr; } protected: diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 0c523b6f176345c0407b8541c04fb8c3b27f7c60..96ab5d457be2da5ed0a6b9d93b5510d7399bf20e 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -66,6 +66,14 @@ bool IsCompiledWithCUDA() { #endif } +bool IsCompiledWithDIST() { +#ifdef PADDLE_WITH_DIST + return true; +#else + return false; +#endif +} + PYBIND11_PLUGIN(core) { py::module m("core", "C++ core of PaddlePaddle"); @@ -508,6 +516,7 @@ All parameter, weight, gradient are variables in Paddle. [](bool init_p2p) { framework::InitDevices(init_p2p); }); m.def("is_compiled_with_cuda", IsCompiledWithCUDA); + m.def("is_compiled_with_dist", IsCompiledWithDIST); #ifdef PADDLE_WITH_CUDA m.def("is_float16_supported", [](const platform::CUDAPlace &place) -> bool { // Only GPUs with Compute Capability >= 53 support float16 diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index bb63497f4c9be3ecb8d38b8fe36808665844d180..5bac108151ab066b586f4baf3280a7995446b849 100755 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -518,11 +518,23 @@ function gen_fluid_inference_lib() { EOF make -j `nproc` inference_lib_dist cd ${PADDLE_ROOT}/build - mv fluid_install_dir fluid + cp -r fluid_install_dir fluid tar -cf fluid.tgz fluid fi } +function test_fluid_inference_lib() { + if [ ${WITH_C_API:-OFF} == "OFF" ] ; then + cat <> sys.stderr, err_msg + return func(*args, **kwargs) + + wrapper.__doc__ += "\n " + wrapper.__doc__ += err_msg + return wrapper + + return decorator diff --git a/python/paddle/fluid/backward.py b/python/paddle/fluid/backward.py index 4faa06303170488d0de2fda4c1461cfe2d623d35..ddcde04716d21df1f18e7202936f470d3d58a661 100644 --- a/python/paddle/fluid/backward.py +++ b/python/paddle/fluid/backward.py @@ -18,10 +18,7 @@ import collections import copy import unique_name -__all__ = [ - 'append_backward', - 'calc_gradient', -] +__all__ = ['append_backward'] def _rename_arg_(op_descs, old_name, new_name, begin_idx=None, end_idx=None): @@ -123,7 +120,8 @@ def _append_grad_suffix_(name): def _addup_repetitive_outputs_(op_descs): """ In backward part, an variable may be the output of more than one ops. - In this case, the variable should be the accumulation of all the outputs. + And one op may yield its multiple outputs to the same variable. + In these cases, the variable should be the accumulation of all the outputs. `sum_op`s are added to implement the accumulate. """ pending_sum_ops = [] @@ -136,29 +134,46 @@ def _addup_repetitive_outputs_(op_descs): "sum", {"X": renamed_vars[var_name]}, {"Out": [var_name]}, {"use_mkldnn": False}), idx)) renamed_vars[var_name] = [var_name] - for var_name in op_desc.output_arg_names(): - if var_name == core.empty_var_name( - ) or var_name in op_desc.input_arg_names(): - # empty variable or inplace op - continue - if len(renamed_vars[var_name]) == 0: - # it's the first time we get the variable - renamed_vars[var_name] = [var_name] - else: - if len(renamed_vars[var_name]) == 1: + for param_idx, param_name in enumerate(op_desc.output_names()): + arg_names = op_desc.output(param_name) + for arg_idx, var_name in enumerate(arg_names): + if var_name == core.empty_var_name( + ) or var_name in op_desc.input_arg_names(): + # empty variable or inplace op + continue + if len(renamed_vars[var_name]) == 0: + # it's the first time we get the variable + renamed_vars[var_name] = [var_name] + else: + if len(renamed_vars[var_name]) == 1: + new_name = var_name + "@RENAME@" + \ + str(var_rename_count[var_name]) + var_rename_count[var_name] += 1 + # rename original var_name + renamed_vars[var_name][0] = new_name + _rename_arg_(op_descs, var_name, new_name, 0, idx) + _rename_arg_(pending_sum_ops, var_name, new_name) + + for p in op_desc.output_names()[:param_idx]: + p_arg_names = op_desc.output(p) + if var_name in p_arg_names: + op_desc.set_output(p, [ + new_name if x == var_name else x + for x in p_arg_names + ]) + + arg_names = [ + new_name if x == var_name else x + for x in arg_names[:arg_idx] + ] + arg_names[arg_idx:] + new_name = var_name + "@RENAME@" + \ str(var_rename_count[var_name]) var_rename_count[var_name] += 1 - # rename original var_name - renamed_vars[var_name][0] = new_name - _rename_arg_(op_descs, var_name, new_name, 0, idx) - _rename_arg_(pending_sum_ops, var_name, new_name) - - new_name = var_name + "@RENAME@" + \ - str(var_rename_count[var_name]) - var_rename_count[var_name] += 1 - op_desc.rename_output(var_name, new_name) - renamed_vars[var_name].append(new_name) + arg_names[arg_idx] = new_name + op_desc.set_output(param_name, arg_names) + renamed_vars[var_name].append(new_name) + for var_name, inputs in renamed_vars.iteritems(): if len(inputs) > 1: pending_sum_ops.append( diff --git a/python/paddle/fluid/layers/device.py b/python/paddle/fluid/layers/device.py index e0c1aab230aeed7fb858e91e7da7eae58032ee16..384d302a709eeec220864b9e8c9210ed028470f6 100644 --- a/python/paddle/fluid/layers/device.py +++ b/python/paddle/fluid/layers/device.py @@ -18,10 +18,12 @@ All util layers. from layer_function_generator import autodoc from ..framework import unique_name from ..layer_helper import LayerHelper +from ..annotations import deprecated -__all__ = ['get_places'] +__all__ = [] +@deprecated(since='0.15.0', instead="ParallelExecutor") @autodoc() def get_places(device_count=None, device_type=None): helper = LayerHelper('get_places', **locals()) diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index bcf520d5a4e3bbe1d949d08f42199dd8c5cdc947..07b806f544497ccabe4dde9a370e90da372e6cba 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -1,4 +1,18 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Copyright (c ) 2018 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -3900,7 +3914,13 @@ def transpose(x, perm, name=None): return out -def im2sequence(input, filter_size=1, stride=1, padding=0, name=None): +def im2sequence(input, + filter_size=1, + stride=1, + padding=0, + input_image_size=None, + out_stride=1, + name=None): """ Extracts image patches from the input tensor to form a tensor of shape {input.batch_size * output_height * output_width, filter_size_H * @@ -3937,6 +3957,15 @@ def im2sequence(input, filter_size=1, stride=1, padding=0, name=None): padding_up = padding_down = padding_left = padding_right = padding Default: padding = 0. + input_image_size(Variable): the input contains image real size.It's dim + is [batchsize, 2]. It is dispensable.It is just for batch inference. + + out_stride(int|tuple): The scaling of image through CNN. It is + dispensable. It is valid only when input_image_size is not null. + If out_stride is tuple, it must contain two intergers, + (out_stride_H, out_stride_W). Otherwise, + the out_stride_H = out_stride_W = out_stride. + name (int): The name of this layer. It is optional. Returns: @@ -3987,7 +4016,7 @@ def im2sequence(input, filter_size=1, stride=1, padding=0, name=None): [ 5. 7. 2. 4. 1. 3. 9. 0.] [ 7. 9. 4. 8. 3. 5. 0. 8.]] - output.dims = {8, 9} + output.dims = {8, 8} output.lod = [[4, 4]] @@ -4009,18 +4038,17 @@ def im2sequence(input, filter_size=1, stride=1, padding=0, name=None): if len(padding) == 2: padding.append(padding[0]) padding.append(padding[1]) - + inputs = {"X": input} + attrs = {"kernels": filter_size, "strides": stride, "padding": padding} + if input_image_size: + if isinstance(out_stride, int): + out_stride = [out_stride, out_stride] + inputs["Y"] = input_image_size + attrs["out_stride"] = out_stride helper = LayerHelper('im2sequence', **locals()) out = helper.create_tmp_variable(dtype=helper.input_dtype()) helper.append_op( - type='im2sequence', - inputs={'X': input}, - outputs={'Out': out}, - attrs={ - 'kernels': filter_size, - 'strides': stride, - 'paddings': padding, - }) + type='im2sequence', inputs=inputs, outputs={'Out': out}, attrs=attrs) return out diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 75ee40fa9ca94cdd84ee7acbb62d6e652ac7fa33..e2acf6d41a0085e6f741e46063b47d2ff1e769cb 100644 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -29,7 +29,7 @@ __all__ = [ 'SGD', 'Momentum', 'Adagrad', 'Adam', 'Adamax', 'DecayedAdagrad', 'Ftrl', 'SGDOptimizer', 'MomentumOptimizer', 'AdagradOptimizer', 'AdamOptimizer', 'AdamaxOptimizer', 'DecayedAdagradOptimizer', 'RMSPropOptimizer', - 'FtrlOptimizer', 'Adadelta', 'ModelAverage', 'Optimizer', 'RMSPropOptimizer' + 'FtrlOptimizer', 'Adadelta', 'ModelAverage', 'RMSPropOptimizer' ] @@ -67,7 +67,7 @@ class Optimizer(object): self._LARS_weight_decay = LARS_weight_decay def _create_global_learning_rate(self): - lr = self.global_learning_rate() + lr = self._global_learning_rate() if isinstance(lr, framework.Variable): return @@ -86,7 +86,7 @@ class Optimizer(object): dtype='float32' if self._dtype == None else self._dtype, persistable=True) - def global_learning_rate(self, program=None): + def _global_learning_rate(self, program=None): """ get global decayed learning rate :return: @@ -110,9 +110,9 @@ class Optimizer(object): return param_lr else: if param_lr == 1.0: - return self.global_learning_rate() + return self._global_learning_rate() else: - return self.global_learning_rate() * param_lr + return self._global_learning_rate() * param_lr def _create_accumulators(self, block, parameters): """Create all accumulators needed by the parameters @@ -185,10 +185,10 @@ class Optimizer(object): format(name, param.name)) return self._accumulators[name][param.name] - def create_optimization_pass(self, - parameters_and_grads, - loss, - startup_program=None): + def _create_optimization_pass(self, + parameters_and_grads, + loss, + startup_program=None): """Add optimization operators to update gradients to variables. Args: @@ -221,7 +221,7 @@ class Optimizer(object): self._create_global_learning_rate() if self._LARS_weight_decay > 0.0: layers.append_LARS(parameters_and_grads, - self.global_learning_rate(), + self._global_learning_rate(), self._LARS_weight_decay) optimize_ops = [] @@ -262,8 +262,8 @@ class Optimizer(object): params_grads = append_regularization_ops(params_grads, self.regularization) - optimize_ops = self.create_optimization_pass(params_grads, loss, - startup_program) + optimize_ops = self._create_optimization_pass(params_grads, loss, + startup_program) return optimize_ops, params_grads diff --git a/python/paddle/fluid/tests/book/notest_understand_sentiment.py b/python/paddle/fluid/tests/book/notest_understand_sentiment.py index 1df7b99aad6094a8b8ddfe783b9de35cef61c524..95002aa7f9bb639828b47eb1e86e4ef954fb85ff 100644 --- a/python/paddle/fluid/tests/book/notest_understand_sentiment.py +++ b/python/paddle/fluid/tests/book/notest_understand_sentiment.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. from __future__ import print_function - +from paddle.fluid.layers.device import get_places import unittest import paddle.fluid as fluid import paddle @@ -144,7 +144,7 @@ def train(word_dict, cost, acc_out, prediction = net_method( data, label, input_dim=dict_dim, class_dim=class_dim) else: - places = fluid.layers.get_places() + places = get_places() pd = fluid.layers.ParallelDo(places) with pd.do(): cost, acc, _ = net_method( diff --git a/python/paddle/fluid/tests/book/test_recognize_digits.py b/python/paddle/fluid/tests/book/test_recognize_digits.py index 5f5c8544bbdb87421f129b201a0ebaf4cb8602a1..49f549fa184037a64aa846f0d1d0e1b57db1f2ef 100644 --- a/python/paddle/fluid/tests/book/test_recognize_digits.py +++ b/python/paddle/fluid/tests/book/test_recognize_digits.py @@ -12,15 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. from __future__ import print_function -import argparse -import paddle.fluid as fluid -import paddle -import sys -import numpy -import unittest + import math -import sys import os +import sys +import unittest + +import numpy + +import paddle +import paddle.fluid as fluid +from paddle.fluid.layers.device import get_places BATCH_SIZE = 64 @@ -76,7 +78,7 @@ def train(nn_type, net_conf = conv_net if parallel: - places = fluid.layers.get_places() + places = get_places() pd = fluid.layers.ParallelDo(places) with pd.do(): img_ = pd.read_input(img) diff --git a/python/paddle/fluid/tests/book/test_word2vec.py b/python/paddle/fluid/tests/book/test_word2vec.py index 49bd72c7a53c0ae740bdbabe15b1d37340699d41..80e0692bc640efc280c43bd5b929847ad29207c4 100644 --- a/python/paddle/fluid/tests/book/test_word2vec.py +++ b/python/paddle/fluid/tests/book/test_word2vec.py @@ -14,6 +14,7 @@ import paddle import paddle.fluid as fluid +from paddle.fluid.layers.device import get_places import unittest import os import numpy as np @@ -80,7 +81,7 @@ def train(use_cuda, is_sparse, is_parallel, save_dirname, is_local=True): avg_cost, predict_word = __network__( [first_word, second_word, third_word, forth_word, next_word]) else: - places = fluid.layers.get_places() + places = get_places() pd = fluid.layers.ParallelDo(places) with pd.do(): avg_cost, predict_word = __network__( diff --git a/python/paddle/fluid/tests/book_memory_optimization/test_memopt_fit_a_line.py b/python/paddle/fluid/tests/book_memory_optimization/test_memopt_fit_a_line.py index be347cd5315668dde0454d7959dbf9bcfa465b5f..bec9f8594ff7c1aff8ae5ed55c9623754d9ea091 100644 --- a/python/paddle/fluid/tests/book_memory_optimization/test_memopt_fit_a_line.py +++ b/python/paddle/fluid/tests/book_memory_optimization/test_memopt_fit_a_line.py @@ -12,12 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -import numpy as np -import paddle -import paddle.fluid as fluid import math import sys +import paddle +import paddle.fluid as fluid +from paddle.fluid.layers.device import get_places + # need to fix random seed and training data to compare the loss # value accurately calculated by the default and the memory optimization # version. @@ -34,7 +35,7 @@ if fluid.core.is_compiled_with_cuda(): use_nccl = False place = fluid.CUDAPlace(0) -places = fluid.layers.get_places(device_count=0, device_type=device_type) +places = get_places(device_count=0, device_type=device_type) pd = fluid.layers.ParallelDo(places, use_nccl=use_nccl) with pd.do(): x_ = pd.read_input(x) diff --git a/python/paddle/fluid/tests/unittests/test_calc_gradient.py b/python/paddle/fluid/tests/unittests/test_calc_gradient.py index 06e676cd83e77549afd679e730426c590cc046bf..7f2a9e6971ed933463216e38498d48ab132a1a37 100644 --- a/python/paddle/fluid/tests/unittests/test_calc_gradient.py +++ b/python/paddle/fluid/tests/unittests/test_calc_gradient.py @@ -16,8 +16,6 @@ import unittest import paddle.fluid as fluid import paddle.fluid.layers as layers -import paddle.fluid.framework as framework -import paddle.fluid.optimizer as optimizer from paddle.fluid.backward import calc_gradient diff --git a/python/paddle/fluid/tests/unittests/test_get_places_op.py b/python/paddle/fluid/tests/unittests/test_get_places_op.py index 6dab1e22f0c50ab011d6b8e8944097600cf3fecc..964423e2d2638224244b4ca774d8eee08f3ec989 100644 --- a/python/paddle/fluid/tests/unittests/test_get_places_op.py +++ b/python/paddle/fluid/tests/unittests/test_get_places_op.py @@ -13,6 +13,7 @@ # limitations under the License. import paddle.fluid as fluid +from paddle.fluid.layers.device import get_places import decorators import unittest @@ -20,7 +21,7 @@ import unittest class TestGetPlaces(unittest.TestCase): @decorators.prog_scope() def test_get_places(self): - places = fluid.layers.get_places() + places = get_places() cpu = fluid.CPUPlace() exe = fluid.Executor(cpu) exe.run(fluid.default_main_program()) diff --git a/python/paddle/fluid/tests/unittests/test_im2sequence_op.py b/python/paddle/fluid/tests/unittests/test_im2sequence_op.py index 4946475f11a4fc0ccaffeec6821d3976ea7c6560..13bc5768740ece00bbe285a0b47d82bb8a42d2c7 100644 --- a/python/paddle/fluid/tests/unittests/test_im2sequence_op.py +++ b/python/paddle/fluid/tests/unittests/test_im2sequence_op.py @@ -16,23 +16,48 @@ import numpy as np from op_test import OpTest -def get_output_shape(attrs, in_shape): +def get_output_shape(attrs, in_shape, img_real_size): + batchsize = in_shape[0] img_height = in_shape[2] img_width = in_shape[3] + paddings = np.array(attrs['paddings']).astype("int32") + kernels = np.array(attrs['kernels']).astype("int32") + strides = np.array(attrs['strides']).astype("int32") + output_height = np.zeros((1, batchsize)).astype("int32") + output_width = np.zeros((1, batchsize)).astype("int32") + if len(img_real_size): + out_stride = np.array(attrs['out_stride']).astype("int32") + imgreal_h = 0 + imgreal_w = 0 + for index in range(batchsize): + if img_real_size[index, 0] % out_stride[0] == 0: + imgreal_h = img_real_size[index, 0] / out_stride[0] + else: + imgreal_h = img_real_size[index, 0] / out_stride[0] + 1 + if img_real_size[index, 0] % out_stride[1] == 0: + imgreal_w = img_real_size[index, 1] / out_stride[1] + else: + imgreal_w = img_real_size[index, 0] / out_stride[1] + 1 + output_height[0,index] = \ + 1 + \ + (imgreal_h + paddings[0] + paddings[2] - kernels[0] + strides[0] - 1) / \ + strides[0] - paddings = attrs['paddings'] - kernels = attrs['kernels'] - strides = attrs['strides'] + output_width[0,index] = \ + 1 + \ + (imgreal_w + paddings[1] + paddings[3] - kernels[1] + strides[1] - 1) / \ + strides[1] + else: + for index in range(batchsize): + output_height[0,index] = \ + 1 + \ + (img_height + paddings[0] + paddings[2] - kernels[0] + strides[0] - 1) / \ + strides[0] - output_height = \ - 1 + \ - (img_height + paddings[0] + paddings[2] - kernels[0] + strides[0] - 1) / \ - strides[0] - - output_width = \ - 1 + \ - (img_width + paddings[1] + paddings[3] - kernels[1] + strides[1] - 1) / \ - strides[1] + output_width[0,index] = \ + 1 + \ + (img_width + paddings[1] + paddings[3] - kernels[1] + strides[1] - 1) / \ + strides[1] return output_height, output_width @@ -75,22 +100,25 @@ def im2col(attrs, im, col): im_row_offset][im_col_offset] -def Im2Sequence(inputs, attrs): - output_height, output_width = get_output_shape(attrs, inputs.shape) +def Im2Sequence(inputs, img_real_size, attrs): + output_height, output_width = get_output_shape(attrs, inputs.shape, + img_real_size) img_channels = inputs.shape[1] batch_size = inputs.shape[0] - out = np.zeros([ - batch_size, output_height, output_width, img_channels, - attrs['kernels'][0], attrs['kernels'][1] - ]).astype("float32") - - for i in range(len(inputs)): - im2col(attrs, inputs[i], out[i]) - - out = out.reshape([ - batch_size * output_height * output_width, - img_channels * attrs['kernels'][0] * attrs['kernels'][1] - ]) + out = [] + for index in range(batch_size): + tmp = np.zeros([ + output_height[0, index], output_width[0, index], img_channels, + attrs['kernels'][0], attrs['kernels'][1] + ]).astype("float32") + out.append(tmp) + for index in range(len(inputs)): + im2col(attrs, inputs[index], out[index]) + out[index] = out[index].reshape([ + output_height[0, index] * output_width[0, index], + img_channels * attrs['kernels'][0] * attrs['kernels'][1] + ]) + out = np.concatenate(out, axis=0) return out @@ -103,7 +131,7 @@ class TestBlockExpandOp(OpTest): self.attrs = { 'kernels': [2, 2], 'strides': [1, 1], - 'paddings': [1, 1, 1, 1] + 'paddings': [1, 1, 1, 1], } def setUp(self): @@ -113,7 +141,8 @@ class TestBlockExpandOp(OpTest): self.batch_size, self.img_channels, self.img_height, self.img_width ]).astype("float32") - out = Im2Sequence(x, self.attrs) + real_size = np.array([]).astype("float32") + out = Im2Sequence(x, real_size, self.attrs) self.inputs = {'X': x} self.outputs = {'Out': out} @@ -133,20 +162,20 @@ class TestBlockExpandOpCase2(TestBlockExpandOp): self.attrs = { 'kernels': [2, 1], 'strides': [2, 1], - 'paddings': [2, 1, 2, 1] + 'paddings': [2, 1, 2, 1], } class TestBlockExpandOpCase3(TestBlockExpandOp): def config(self): - self.batch_size = 3 + self.batch_size = 2 self.img_channels = 1 self.img_height = 4 self.img_width = 5 self.attrs = { 'kernels': [2, 1], 'strides': [2, 1], - 'paddings': [2, 0, 2, 0] + 'paddings': [2, 0, 2, 0], } @@ -159,9 +188,94 @@ class TestBlockExpandOpCase4(TestBlockExpandOp): self.attrs = { 'kernels': [2, 2], 'strides': [1, 1], - 'paddings': [0, 0, 0, 0] + 'paddings': [0, 0, 0, 0], + } + + +class TestBlockExpandOpCase5(OpTest): + def config(self): + self.batch_size = 1 + self.img_channels = 3 + self.img_height = 4 + self.img_width = 5 + self.attrs = { + 'kernels': [2, 1], + 'strides': [2, 1], + 'paddings': [2, 1, 2, 1], + 'out_stride': [2, 2], + } + + def setUp(self): + self.config() + self.op_type = "im2sequence" + x = np.random.uniform(0.1, 1, [ + self.batch_size, self.img_channels, self.img_height, self.img_width + ]).astype("float32") + real_size = np.array([[8, 10], [5, 8]]).astype("float32") + out = np.array(Im2Sequence(x, real_size, self.attrs)) + self.inputs = {'X': x, 'Y': real_size} #l ?? + self.outputs = {'Out': out} + + def test_check_output(self): + self.check_output() + + +class TestBlockExpandOpCase6(OpTest): + def config(self): + self.batch_size = 3 + self.img_channels = 1 + self.img_height = 4 + self.img_width = 5 + self.attrs = { + 'kernels': [2, 1], + 'strides': [1, 1], + 'paddings': [0, 0, 0, 0], + 'out_stride': [1, 1], + } + + def setUp(self): + self.config() + self.op_type = "im2sequence" + x = np.random.uniform(0.1, 1, [ + self.batch_size, self.img_channels, self.img_height, self.img_width + ]).astype("float32") + real_size = np.array([[8, 10], [5, 8], [5, 8]]).astype("float32") + out = np.array(Im2Sequence(x, real_size, self.attrs)) + self.inputs = {'X': x, 'Y': real_size} #l ?? + self.outputs = {'Out': out} + + def test_check_output(self): + self.check_output() + + +class TestBlockExpandOpCase7(OpTest): + def config(self): + self.batch_size = 2 + self.img_channels = 2 + self.img_height = 3 + self.img_width = 3 + self.attrs = { + 'kernels': [2, 2], + 'strides': [1, 1], + 'paddings': [1, 0, 1, 0], + 'out_stride': [2, 2], } + def setUp(self): + self.config() + self.op_type = "im2sequence" + x = np.random.uniform(0.1, 1, [ + self.batch_size, self.img_channels, self.img_height, self.img_width + ]).astype("float32") + real_size = np.array([[6, 6], [4, 4]]).astype("float32") + out = np.array(Im2Sequence(x, real_size, self.attrs)) + self.inputs = {'X': x, 'Y': real_size} + self.outputs = {'Out': out} + + def test_check_output(self): + self.check_output() + if __name__ == '__main__': unittest.main() +#set shiftwidth=4 set expandtab set tabstop=4 diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py index 842d34c07e94a79e3351347e2528ecc478cc56dc..82418f34ccb7e665a041079a19880c7bb34b0b0f 100644 --- a/python/paddle/fluid/tests/unittests/test_layers.py +++ b/python/paddle/fluid/tests/unittests/test_layers.py @@ -16,6 +16,7 @@ from __future__ import print_function import unittest import paddle.fluid.layers as layers +from paddle.fluid.layers.device import get_places import paddle.fluid.nets as nets from paddle.fluid.framework import Program, program_guard, default_main_program from paddle.fluid.param_attr import ParamAttr @@ -238,7 +239,7 @@ class TestBook(unittest.TestCase): def test_get_places(self): program = Program() with program_guard(program): - x = layers.get_places(device_count=4) + x = get_places(device_count=4) self.assertIsNotNone(x) print(str(program)) @@ -251,12 +252,16 @@ class TestBook(unittest.TestCase): print(str(program)) def test_im2sequence(self): - print("test_im2sequence") program = Program() with program_guard(program): x = layers.data(name='x', shape=[3, 128, 128], dtype='float32') + y = layers.data(name='y', shape=[], dtype='float32') output = layers.im2sequence( - input=x, stride=[1, 1], filter_size=[2, 2]) + input=x, + input_image_size=y, + stride=[1, 1], + filter_size=[2, 2], + out_stride=[1, 1]) self.assertIsNotNone(output) print(str(program)) diff --git a/python/paddle/fluid/tests/unittests/test_optimizer.py b/python/paddle/fluid/tests/unittests/test_optimizer.py index 7286c7c450108c4b5ad7136041bc4e989894a2ba..43385691bb3960004b5b69a1c55e41dd4252fa71 100644 --- a/python/paddle/fluid/tests/unittests/test_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_optimizer.py @@ -97,7 +97,7 @@ class TestMomentumOptimizer(unittest.TestCase): params_grads = append_backward(mean_out) self.assertEqual(len(params_grads), 1) self.assertEqual(len(momentum_optimizer.get_accumulators()), 0) - opts = momentum_optimizer.create_optimization_pass( + opts = momentum_optimizer._create_optimization_pass( params_grads, mul_out, init_program) self.assertEqual(len(opts), 3) sgd_op = opts[-1] @@ -151,7 +151,7 @@ class TestMomentumOptimizer(unittest.TestCase): params_grads = append_backward(mean_out) self.assertEqual(len(params_grads), 1) self.assertEqual(len(momentum_optimizer.get_accumulators()), 0) - opts = momentum_optimizer.create_optimization_pass( + opts = momentum_optimizer._create_optimization_pass( params_grads, mul_out, init_program) self.assertEqual(len(opts), 3) sgd_op = opts[-1] @@ -214,8 +214,8 @@ class TestAdagradOptimizer(unittest.TestCase): params_grads = append_backward(mean_out) self.assertEqual(len(params_grads), 1) self.assertEqual(len(adagrad_optimizer.get_accumulators()), 0) - opts = adagrad_optimizer.create_optimization_pass(params_grads, mul_out, - init_program) + opts = adagrad_optimizer._create_optimization_pass( + params_grads, mul_out, init_program) self.assertEqual(len(opts), 3) self.assertEqual([op.type for op in opts], ["fill_constant", "elementwise_mul", "adagrad"]) @@ -278,8 +278,8 @@ class TestAdamOptimizer(unittest.TestCase): params_grads = append_backward(mean_out) self.assertEqual(len(params_grads), 1) self.assertEqual(len(adam_optimizer.get_accumulators()), 0) - opts = adam_optimizer.create_optimization_pass(params_grads, mul_out, - init_program) + opts = adam_optimizer._create_optimization_pass(params_grads, mul_out, + init_program) self.assertEqual(len(opts), 5) self.assertEqual( [op.type for op in opts], @@ -345,8 +345,8 @@ class TestAdamaxOptimizer(unittest.TestCase): params_grads = append_backward(mean_out) self.assertEqual(len(params_grads), 1) self.assertEqual(len(adamax_optimizer.get_accumulators()), 0) - opts = adamax_optimizer.create_optimization_pass(params_grads, mul_out, - init_program) + opts = adamax_optimizer._create_optimization_pass(params_grads, mul_out, + init_program) self.assertEqual(len(opts), 4) self.assertEqual( [op.type for op in opts], @@ -409,7 +409,7 @@ class TestDecayedAdagradOptimizer(unittest.TestCase): params_grads = append_backward(mean_out) self.assertEqual(len(params_grads), 1) self.assertEqual(len(decayed_adagrad_optimizer.get_accumulators()), 0) - opts = decayed_adagrad_optimizer.create_optimization_pass( + opts = decayed_adagrad_optimizer._create_optimization_pass( params_grads, mul_out, init_program) self.assertEqual(len(opts), 3) self.assertEqual( @@ -475,8 +475,8 @@ class TestFtrlOptimizer(unittest.TestCase): params_grads = append_backward(mean_out) self.assertEqual(len(params_grads), 1) self.assertEqual(len(ftrl_optimizer.get_accumulators()), 0) - opts = ftrl_optimizer.create_optimization_pass(params_grads, mul_out, - init_program) + opts = ftrl_optimizer._create_optimization_pass(params_grads, mul_out, + init_program) self.assertEqual(len(opts), 3) self.assertEqual([op.type for op in opts], ["fill_constant", "elementwise_mul", "ftrl"]) diff --git a/python/paddle/fluid/tests/unittests/test_parallel_op.py b/python/paddle/fluid/tests/unittests/test_parallel_op.py index 9ba5f988f317a515b77c0b428da236626419a2c3..9ec05e02973138e3ec233ef07f98afd598ec86b1 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_op.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_op.py @@ -15,6 +15,7 @@ import unittest import paddle.fluid as fluid +from paddle.fluid.layers.device import get_places import paddle.fluid.profiler as profiler import numpy @@ -115,7 +116,7 @@ class BaseParallelForTest(unittest.TestCase): if use_parallel: thread_num = fluid.core.get_cuda_device_count( ) if use_gpu else 8 - places = fluid.layers.get_places(thread_num) + places = get_places(thread_num) pd = fluid.layers.ParallelDo(places, use_nccl=use_nccl) data = next(generator) diff --git a/python/paddle/fluid/tests/unittests/test_squeeze_op.py b/python/paddle/fluid/tests/unittests/test_squeeze_op.py new file mode 100644 index 0000000000000000000000000000000000000000..bca6af2fd5dfadbc48cf1a76cfa6ffd4f1fdfdef --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_squeeze_op.py @@ -0,0 +1,114 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import numpy as np + +from op_test import OpTest + + +# Correct: General. +class TestSqueezeOp(OpTest): + def setUp(self): + self.op_type = "squeeze" + self.init_test_case() + self.inputs = {"X": np.random.random(self.ori_shape).astype("float32")} + self.init_attrs() + self.outputs = {"Out": self.inputs["X"].reshape(self.new_shape)} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(["X"], "Out") + + def init_test_case(self): + self.ori_shape = (1, 3, 1, 5) + self.axes = (0, 2) + self.new_shape = (3, 5) + + def init_attrs(self): + self.attrs = {"axes": self.axes, "inplace": False} + + +# Correct: There is mins axis. +class TestSqueezeOp1(TestSqueezeOp): + def init_test_case(self): + self.ori_shape = (1, 3, 1, 5) + self.axes = (0, -2) + self.new_shape = (3, 5) + + +# Correct: No axes input. +class TestSqueezeOp2(TestSqueezeOp): + def init_test_case(self): + self.ori_shape = (1, 3, 1, 5) + self.axes = () + self.new_shape = (3, 5) + + +# Correct: Just part of axes be squeezed. +class TestSqueezeOp3(TestSqueezeOp): + def init_test_case(self): + self.ori_shape = (3, 1, 5, 1, 4, 1) + self.axes = (1, -1) + self.new_shape = (3, 5, 1, 4) + + +# Correct: Inplace. +class TestSqueezeOpInplace1(TestSqueezeOp): + def init_test_case(self): + self.ori_shape = (1, 3, 1, 5) + self.axes = (0, 2) + self.new_shape = (3, 5) + + def init_attrs(self): + self.attrs = {"axes": self.axes, "inplace": True} + + +# Correct: Inplace. There is mins axis. +class TestSqueezeOpInplace2(TestSqueezeOp): + def inti_test_case(self): + self.ori_shape = (1, 3, 1, 5) + self.axes = (0, -2) + self.new_shape = (3, 5) + + def init_attrs(self): + self.attrs = {"axes": self.axes, "inplace": True} + + +# Correct: Inplace. No axes input. +class TestSqueezeOpInplace3(TestSqueezeOp): + def init_test_case(self): + self.ori_shape = (1, 3, 1, 5) + self.axes = () + self.new_shape = (3, 5) + + def init_attrs(self): + self.attrs = {"axes": self.axes, "inplace": True} + + +# Correct: Inpalce. Just part of axes be squeezed. +class TestSqueezeOpInplace4(TestSqueezeOp): + def init_test_case(self): + self.ori_shape = (3, 1, 5, 1, 4, 1) + self.axes = (1, -1) + self.new_shape = (3, 5, 1, 4) + + def init_attrs(self): + self.attrs = {"axes": self.axes, "inplace": True} + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_unsqueeze_op.py b/python/paddle/fluid/tests/unittests/test_unsqueeze_op.py new file mode 100644 index 0000000000000000000000000000000000000000..7a4aa0a40b5eb494f6027e800ca6b466bbe1c302 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_unsqueeze_op.py @@ -0,0 +1,111 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import numpy as np + +from op_test import OpTest + + +# Correct: General. +class TestUnsqueezeOp(OpTest): + def setUp(self): + self.init_test_case() + self.op_type = "unsqueeze" + self.inputs = {"X": np.random.random(self.ori_shape).astype("float32")} + self.init_attrs() + self.outputs = {"Out": self.inputs["X"].reshape(self.new_shape)} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(["X"], "Out") + + def init_test_case(self): + self.ori_shape = (3, 5) + self.axes = (1, 2) + self.new_shape = (3, 1, 1, 5) + + def init_attrs(self): + self.attrs = {"axes": self.axes, "inplace": False} + + +# Correct: Single input index. +class TestUnsqueezeOp1(TestUnsqueezeOp): + def init_test_case(self): + self.ori_shape = (3, 5) + self.axes = (-1, ) + self.new_shape = (3, 5, 1) + + +# Correct: Mixed input axis. +class TestUnsqueezeOp2(TestUnsqueezeOp): + def init_test_case(self): + self.ori_shape = (3, 5) + self.axes = (0, -1) + self.new_shape = (1, 3, 5, 1) + + +# Correct: There is duplicated axis. +class TestUnsqueezeOp3(TestUnsqueezeOp): + def init_test_case(self): + self.ori_shape = (3, 2, 5) + self.axes = (0, 3, 3) + self.new_shape = (1, 3, 2, 1, 1, 5) + + +# Correct: Reversed axes. +class TestUnsqueezeOp4(TestUnsqueezeOp): + def init_test_case(self): + self.ori_shape = (3, 2, 5) + self.axes = (3, 1, 1) + self.new_shape = (3, 1, 1, 2, 5, 1) + + +# Correct: Inplace. +class TestUnsqueezeOpInplace1(TestUnsqueezeOp): + def init_test_case(self): + self.ori_shape = (3, 5) + self.axes = (0, 2) + self.new_shape = (1, 3, 1, 5) + + def init_attrs(self): + self.attrs = {"axes": self.axes, "inplace": True} + + +# Correct: Inplace. There is mins index. +class TestUnsqueezeOpInplace2(TestUnsqueezeOp): + def init_test_case(self): + self.ori_shape = (3, 5) + self.axes = (0, -2) + self.new_shape = (1, 3, 1, 5) + + def init_attrs(self): + self.attrs = {"axes": self.axes, "inplace": True} + + +# Correct: Inplace. There is duplicated axis. +class TestUnsqueezeOpInplace3(TestUnsqueezeOp): + def init_test_case(self): + self.ori_shape = (3, 2, 5) + self.axes = (0, 3, 3) + self.new_shape = (1, 3, 2, 1, 1, 5) + + def init_attrs(self): + self.attrs = {"axes": self.axes, "inplace": True} + + +if __name__ == "__main__": + unittest.main() diff --git a/python/setup.py.in b/python/setup.py.in index d620b5418208e0deebe98a62c7ea2963cee465b7..3e81ee7b17e52ed559464023cf7dfe373e5b090f 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -181,6 +181,14 @@ else: command = "patchelf --set-rpath '$ORIGIN/../libs/' ${PADDLE_BINARY_DIR}/python/paddle/fluid/core.so" if os.system(command) != 0: raise Exception("patch core.so failed, command: %s" % command) +if '${WITH_FLUID_ONLY}'== 'OFF': + # change rpath of _swig_paddle.so. + if "@APPLE@" == "1": + command = "install_name_tool -id \"@loader_path/../paddle/libs/\" ${PADDLE_BINARY_DIR}/python/py_paddle/_swig_paddle.so" + else: + command = "patchelf --set-rpath '$ORIGIN/../paddle/libs/' ${PADDLE_BINARY_DIR}/python/py_paddle/_swig_paddle.so" + if os.system(command) != 0: + raise Exception("patch _swig_paddle.so failed, command: %s" % command) setup(name='${PACKAGE_NAME}', version='${PADDLE_VERSION}',