提交 dbaaca78 编写于 作者: M minqiyang

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into port_py3

...@@ -45,6 +45,10 @@ endfunction(inference_api_test) ...@@ -45,6 +45,10 @@ endfunction(inference_api_test)
cc_library(paddle_inference_api cc_library(paddle_inference_api
SRCS paddle_inference_api.cc paddle_inference_api_impl.cc SRCS paddle_inference_api.cc paddle_inference_api_impl.cc
DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB}) DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB})
if(NOT APPLE)
set(LINK_FLAGS "-Wl,--retain-symbols-file ${CMAKE_CURRENT_SOURCE_DIR}/paddle_inference_api.sym")
set_target_properties(paddle_inference_api PROPERTIES LINK_FLAGS "${LINK_FLAGS}")
endif()
# Here the shared library doesn't depend on other fluid libraries, or double free will occur. # Here the shared library doesn't depend on other fluid libraries, or double free will occur.
cc_library(paddle_inference_api_shared SHARED cc_library(paddle_inference_api_shared SHARED
...@@ -53,8 +57,19 @@ add_dependencies(paddle_inference_api_shared ${FLUID_CORE_MODULES} ${GLOB_OP_LIB ...@@ -53,8 +57,19 @@ add_dependencies(paddle_inference_api_shared ${FLUID_CORE_MODULES} ${GLOB_OP_LIB
set_target_properties(paddle_inference_api_shared PROPERTIES OUTPUT_NAME paddle_inference_api) set_target_properties(paddle_inference_api_shared PROPERTIES OUTPUT_NAME paddle_inference_api)
if(NOT APPLE) if(NOT APPLE)
set(LINK_FLAGS "-fPIC -fvisibility=hidden") set(LINK_FLAGS "-Wl,--version-script ${CMAKE_CURRENT_SOURCE_DIR}/paddle_inference_api.map")
set_target_properties(paddle_inference_api_shared PROPERTIES LINK_FLAGS "${LINK_FLAGS}") set_target_properties(paddle_inference_api_shared PROPERTIES LINK_FLAGS "${LINK_FLAGS}")
FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/check_symbol.cmake
"execute_process(COMMAND bash -c \"${CMAKE_CURRENT_SOURCE_DIR}/check_symbol.sh"
" ${CMAKE_CURRENT_BINARY_DIR}/libpaddle_inference_api.so\" RESULT_VARIABLE symbol_res)\n"
"if(NOT \"\${symbol_res}\" STREQUAL \"0\")\n"
" message(FATAL_ERROR \"Check symbol failed.\")\n"
"endif()\n")
add_custom_command(
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/.check_symbol"
COMMAND ${CMAKE_COMMAND} -P "${CMAKE_CURRENT_BINARY_DIR}/check_symbol.cmake"
DEPENDS paddle_inference_api_shared)
add_custom_target(check_symbol ALL DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/.check_symbol")
endif() endif()
cc_test(test_paddle_inference_api cc_test(test_paddle_inference_api
......
#!/bin/bash
lib=$1
if [ $# -ne 1 ]; then echo "No input library"; exit -1 ; fi
num_paddle_syms=$(nm -D --defined-only ${lib} | grep paddle | wc -l)
num_google_syms=$(nm -D --defined-only ${lib} | grep google | wc -l)
if [ $num_paddle_syms -le 0 ]; then echo "Have no paddle symbols"; exit -1 ; fi
if [ $num_google_syms -ge 1 ]; then echo "Have some google symbols"; exit -1 ; fi
exit 0
...@@ -13,8 +13,6 @@ ...@@ -13,8 +13,6 @@
# limitations under the License. # limitations under the License.
# #
inference_api_test(simple_on_word2vec ARGS test_word2vec)
option(WITH_INFERENCE_DEMO "Compile with Inference demo" OFF) option(WITH_INFERENCE_DEMO "Compile with Inference demo" OFF)
if(NOT WITH_INFERENCE_DEMO) if(NOT WITH_INFERENCE_DEMO)
return() return()
......
cmake_minimum_required(VERSION 3.0)
project(cpp_inference_demo CXX C)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
if(NOT DEFINED PADDLE_LIB)
message(FATAL_ERROR "please set PADDLE_LIB with -DPADDLE_LIB=/path/paddle/lib")
endif()
if(NOT DEFINED DEMO_NAME)
message(FATAL_ERROR "please set DEMO_NAME with -DDEMO_NAME=demo_name")
endif()
option(WITH_MKL "Compile demo with MKL/OpenBlas support, default use MKL." ON)
option(WITH_GPU "Compile demo with GPU/CPU, default use CPU." OFF)
option(WITH_STATIC_LIB "Compile demo with static/shared library, default use static." ON)
if(WITH_GPU)
set(CUDA_LIB "/usr/local/cuda/lib64/" CACHE STRING "CUDA Library")
endif()
include_directories("${PADDLE_LIB}")
include_directories("${PADDLE_LIB}/third_party/install/protobuf/include")
include_directories("${PADDLE_LIB}/third_party/install/glog/include")
include_directories("${PADDLE_LIB}/third_party/install/gflags/include")
include_directories("${PADDLE_LIB}/third_party/install/snappy/include")
include_directories("${PADDLE_LIB}/third_party/install/snappystream/include")
include_directories("${PADDLE_LIB}/third_party/install/zlib/include")
include_directories("${PADDLE_LIB}/third_party/boost")
include_directories("${PADDLE_LIB}/third_party/eigen3")
link_directories("${PADDLE_LIB}/third_party/install/snappy/lib")
link_directories("${PADDLE_LIB}/third_party/install/snappystream/lib")
link_directories("${PADDLE_LIB}/third_party/install/protobuf/lib")
link_directories("${PADDLE_LIB}/third_party/install/glog/lib")
link_directories("${PADDLE_LIB}/third_party/install/gflags/lib")
link_directories("${PADDLE_LIB}/third_party/install/zlib/lib")
add_executable(${DEMO_NAME} ${DEMO_NAME}.cc)
if(WITH_MKL)
include_directories("${PADDLE_LIB}/third_party/install/mklml/include")
set(MATH_LIB ${PADDLE_LIB}/third_party/install/mklml/lib/libmklml_intel.so
${PADDLE_LIB}/third_party/install/mklml/lib/libiomp5.so)
set(MKLDNN_PATH "${PADDLE_LIB}/third_party/install/mkldnn")
if(EXISTS ${MKLDNN_PATH})
include_directories("${MKLDNN_PATH}/include")
set(MKLDNN_LIB ${MKLDNN_PATH}/lib/libmkldnn.so.0)
endif()
else()
set(MATH_LIB ${PADDLE_LIB}/third_party/install/openblas/lib/libopenblas.a)
endif()
if(WITH_STATIC_LIB)
set(DEPS
"-Wl,--whole-archive"
${PADDLE_LIB}/paddle/fluid/inference/libpaddle_fluid.a
"-Wl,--no-whole-archive"
${PADDLE_LIB}/contrib/inference/libpaddle_inference_api.a)
else()
# Note: libpaddle_inference_api.so must put before libpaddle_fluid.so
set(DEPS
${PADDLE_LIB}/contrib/inference/libpaddle_inference_api.so
${PADDLE_LIB}/paddle/fluid/inference/libpaddle_fluid.so)
endif()
set(EXTERNAL_LIB "-lrt -ldl -lpthread")
set(DEPS ${DEPS}
${MATH_LIB} ${MKLDNN_LIB}
glog gflags protobuf snappystream snappy z
${EXTERNAL_LIB})
if(WITH_GPU)
set(DEPS ${DEPS} ${CUDA_LIB}/libcudart.so)
endif()
target_link_libraries(${DEMO_NAME} ${DEPS})
set -x
PADDLE_ROOT=$1
WITH_MKL=$2
WITH_GPU=$3
if [ $3 == "ON" ]; then
use_gpu_list='true false'
else
use_gpu_list='false'
fi
mkdir -p build
cd build
for WITH_STATIC_LIB in false; do
rm -rf *
cmake .. -DPADDLE_LIB=${PADDLE_ROOT}/build/fluid_install_dir/ \
-DWITH_MKL=$WITH_MKL \
-DDEMO_NAME=simple_on_word2vec \
-DWITH_GPU=$WITH_GPU \
-DWITH_STATIC_LIB=$WITH_STATIC_LIB
make
for use_gpu in $use_gpu_list; do
./simple_on_word2vec \
--dirname=${PADDLE_ROOT}/build/python/paddle/fluid/tests/book/word2vec.inference.model \
--use_gpu=$use_gpu
done
done
if [ $? -eq 0 ]; then
exit 0
else
echo "inference demo runs fail."
exit 1
fi
set +x
...@@ -16,21 +16,27 @@ limitations under the License. */ ...@@ -16,21 +16,27 @@ limitations under the License. */
* This file contains a simple demo for how to take a model for inference. * This file contains a simple demo for how to take a model for inference.
*/ */
#include <gflags/gflags.h>
#include <glog/logging.h> #include <glog/logging.h>
#include <gtest/gtest.h>
#include <memory> #include <memory>
#include <thread> #include <thread>
#include "paddle/contrib/inference/paddle_inference_api.h" #include "contrib/inference/paddle_inference_api.h"
#include "paddle/fluid/platform/enforce.h"
DEFINE_string(dirname, "", "Directory of the inference model.");
DEFINE_bool(use_gpu, false, "Whether use gpu.");
namespace paddle { namespace paddle {
namespace demo { namespace demo {
DEFINE_string(dirname, "", "Directory of the inference model.");
void Main(bool use_gpu) { void Main(bool use_gpu) {
//# 1. Create PaddlePredictor with a config. //# 1. Create PaddlePredictor with a config.
NativeConfig config; NativeConfig config;
config.model_dir = FLAGS_dirname + "word2vec.inference.model"; if (FLAGS_dirname.empty()) {
LOG(INFO) << "Usage: ./simple_on_word2vec --dirname=path/to/your/model";
exit(1);
}
config.model_dir = FLAGS_dirname;
config.use_gpu = use_gpu; config.use_gpu = use_gpu;
config.fraction_of_gpu_memory = 0.15; config.fraction_of_gpu_memory = 0.15;
config.device = 0; config.device = 0;
...@@ -54,12 +60,16 @@ void Main(bool use_gpu) { ...@@ -54,12 +60,16 @@ void Main(bool use_gpu) {
CHECK(predictor->Run(slots, &outputs)); CHECK(predictor->Run(slots, &outputs));
//# 4. Get output. //# 4. Get output.
ASSERT_EQ(outputs.size(), 1UL); PADDLE_ENFORCE(outputs.size(), 1UL);
LOG(INFO) << "output buffer size: " << outputs.front().data.length(); // Check the output buffer size and result of each tid.
PADDLE_ENFORCE(outputs.front().data.length(), 33168UL);
float result[5] = {
0.00129761, 0.00151112, 0.000423564, 0.00108815, 0.000932706};
const size_t num_elements = outputs.front().data.length() / sizeof(float); const size_t num_elements = outputs.front().data.length() / sizeof(float);
// The outputs' buffers are in CPU memory. // The outputs' buffers are in CPU memory.
for (size_t i = 0; i < std::min(5UL, num_elements); i++) { for (size_t i = 0; i < std::min(5UL, num_elements); i++) {
LOG(INFO) << static_cast<float*>(outputs.front().data.data())[i]; PADDLE_ENFORCE(static_cast<float*>(outputs.front().data.data())[i],
result[i]);
} }
} }
} }
...@@ -68,7 +78,7 @@ void MainThreads(int num_threads, bool use_gpu) { ...@@ -68,7 +78,7 @@ void MainThreads(int num_threads, bool use_gpu) {
// Multi-threads only support on CPU // Multi-threads only support on CPU
// 0. Create PaddlePredictor with a config. // 0. Create PaddlePredictor with a config.
NativeConfig config; NativeConfig config;
config.model_dir = FLAGS_dirname + "word2vec.inference.model"; config.model_dir = FLAGS_dirname;
config.use_gpu = use_gpu; config.use_gpu = use_gpu;
config.fraction_of_gpu_memory = 0.15; config.fraction_of_gpu_memory = 0.15;
config.device = 0; config.device = 0;
...@@ -94,14 +104,17 @@ void MainThreads(int num_threads, bool use_gpu) { ...@@ -94,14 +104,17 @@ void MainThreads(int num_threads, bool use_gpu) {
CHECK(predictor->Run(inputs, &outputs)); CHECK(predictor->Run(inputs, &outputs));
// 4. Get output. // 4. Get output.
ASSERT_EQ(outputs.size(), 1UL); PADDLE_ENFORCE(outputs.size(), 1UL);
LOG(INFO) << "TID: " << tid << ", " // Check the output buffer size and result of each tid.
<< "output buffer size: " << outputs.front().data.length(); PADDLE_ENFORCE(outputs.front().data.length(), 33168UL);
float result[5] = {
0.00129761, 0.00151112, 0.000423564, 0.00108815, 0.000932706};
const size_t num_elements = const size_t num_elements =
outputs.front().data.length() / sizeof(float); outputs.front().data.length() / sizeof(float);
// The outputs' buffers are in CPU memory. // The outputs' buffers are in CPU memory.
for (size_t i = 0; i < std::min(5UL, num_elements); i++) { for (size_t i = 0; i < std::min(5UL, num_elements); i++) {
LOG(INFO) << static_cast<float*>(outputs.front().data.data())[i]; PADDLE_ENFORCE(static_cast<float*>(outputs.front().data.data())[i],
result[i]);
} }
} }
}); });
...@@ -111,15 +124,18 @@ void MainThreads(int num_threads, bool use_gpu) { ...@@ -111,15 +124,18 @@ void MainThreads(int num_threads, bool use_gpu) {
} }
} }
TEST(demo, word2vec_cpu) { Main(false /*use_gpu*/); }
TEST(demo_multi_threads, word2vec_cpu_1) { MainThreads(1, false /*use_gpu*/); }
TEST(demo_multi_threads, word2vec_cpu_4) { MainThreads(4, false /*use_gpu*/); }
#ifdef PADDLE_WITH_CUDA
TEST(demo, word2vec_gpu) { Main(true /*use_gpu*/); }
TEST(demo_multi_threads, word2vec_gpu_1) { MainThreads(1, true /*use_gpu*/); }
TEST(demo_multi_threads, word2vec_gpu_4) { MainThreads(4, true /*use_gpu*/); }
#endif
} // namespace demo } // namespace demo
} // namespace paddle } // namespace paddle
int main(int argc, char** argv) {
google::ParseCommandLineFlags(&argc, &argv, true);
paddle::demo::Main(false /* use_gpu*/);
paddle::demo::MainThreads(1, false /* use_gpu*/);
paddle::demo::MainThreads(4, false /* use_gpu*/);
if (FLAGS_use_gpu) {
paddle::demo::Main(true /*use_gpu*/);
paddle::demo::MainThreads(1, true /*use_gpu*/);
paddle::demo::MainThreads(4, true /*use_gpu*/);
}
return 0;
}
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
#include "paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h" #include "paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h"
#include <stdexcept>
#include <string> #include <string>
#include <vector> #include <vector>
#include "paddle/fluid/framework/executor.h" #include "paddle/fluid/framework/executor.h"
...@@ -53,8 +54,14 @@ FeedFetchList ScopeBufferedSSAGraphExecutor::Run( ...@@ -53,8 +54,14 @@ FeedFetchList ScopeBufferedSSAGraphExecutor::Run(
} }
} }
} }
std::vector<framework::LoDTensor> fetch_data;
std::exception_ptr eptr;
try {
fetch_data = underlying_executor_->Run(fetch_tensors);
} catch (...) {
eptr = std::current_exception();
}
auto fetch_data = underlying_executor_->Run(fetch_tensors);
drop_scope_counter_ += 1; drop_scope_counter_ += 1;
if (!fetch_tensors.empty() || if (!fetch_tensors.empty() ||
drop_scope_counter_ == strategy_.num_iteration_per_drop_scope_) { drop_scope_counter_ == strategy_.num_iteration_per_drop_scope_) {
...@@ -69,7 +76,11 @@ FeedFetchList ScopeBufferedSSAGraphExecutor::Run( ...@@ -69,7 +76,11 @@ FeedFetchList ScopeBufferedSSAGraphExecutor::Run(
scope->DeleteScope(local_scope); scope->DeleteScope(local_scope);
} }
} }
if (eptr) {
std::rethrow_exception(eptr);
} else {
return fetch_data; return fetch_data;
}
} }
} // namespace details } // namespace details
} // namespace framework } // namespace framework
......
...@@ -78,6 +78,10 @@ FeedFetchList ThreadedSSAGraphExecutor::Run( ...@@ -78,6 +78,10 @@ FeedFetchList ThreadedSSAGraphExecutor::Run(
set.clear(); set.clear();
}; };
// Clean run context
run_op_futures_.clear();
exception_.reset();
// Step 3. Execution // Step 3. Execution
while (!pending_vars.empty()) { while (!pending_vars.empty()) {
// 1. Run All Ready ops // 1. Run All Ready ops
...@@ -96,16 +100,19 @@ FeedFetchList ThreadedSSAGraphExecutor::Run( ...@@ -96,16 +100,19 @@ FeedFetchList ThreadedSSAGraphExecutor::Run(
auto cur_ready_vars = ready_vars.PopAll(1, &timeout); auto cur_ready_vars = ready_vars.PopAll(1, &timeout);
if (timeout) { if (timeout) {
std::lock_guard<std::mutex> l(exception_mu_); std::unique_lock<std::mutex> l(exception_mu_);
if (exception_) { if (exception_) {
l.unlock();
for (auto &run_op_future : run_op_futures_) {
run_op_future.wait();
}
l.lock();
std::exception *exp = exception_.get(); std::exception *exp = exception_.get();
if (dynamic_cast<platform::EOFException *>(exp)) { if (dynamic_cast<platform::EOFException *>(exp)) {
auto e = *static_cast<platform::EOFException *>(exp); auto e = *static_cast<platform::EOFException *>(exp);
exception_.reset();
throw e; throw e;
} else if (dynamic_cast<platform::EnforceNotMet *>(exp)) { } else if (dynamic_cast<platform::EnforceNotMet *>(exp)) {
auto e = *static_cast<platform::EnforceNotMet *>(exp); auto e = *static_cast<platform::EnforceNotMet *>(exp);
exception_.reset();
throw e; throw e;
} else { } else {
LOG(FATAL) << "Unknown exception."; LOG(FATAL) << "Unknown exception.";
...@@ -222,7 +229,7 @@ void ThreadedSSAGraphExecutor::RunOp( ...@@ -222,7 +229,7 @@ void ThreadedSSAGraphExecutor::RunOp(
} }
}; };
if (pool_) { if (pool_) {
pool_->enqueue(op_run); run_op_futures_.emplace_back(pool_->enqueue(op_run));
} else { } else {
op_run(); op_run();
} }
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#pragma once #pragma once
#include <deque> #include <deque>
#include <list>
#include <string> #include <string>
#include <unordered_set> #include <unordered_set>
#include <utility> #include <utility>
...@@ -77,6 +78,8 @@ class ThreadedSSAGraphExecutor : public SSAGraphExecutor { ...@@ -77,6 +78,8 @@ class ThreadedSSAGraphExecutor : public SSAGraphExecutor {
private: private:
ExecutionStrategy strategy_; ExecutionStrategy strategy_;
// use std::list because clear(), push_back, and for_each are O(1)
std::list<std::future<void>> run_op_futures_;
}; };
} // namespace details } // namespace details
......
...@@ -13,6 +13,12 @@ endif() ...@@ -13,6 +13,12 @@ endif()
# Create static library # Create static library
cc_library(paddle_fluid DEPS ${fluid_modules} paddle_fluid_api) cc_library(paddle_fluid DEPS ${fluid_modules} paddle_fluid_api)
if(NOT APPLE)
# TODO(liuyiqu: Temporarily disable the link flag because it is not support on Mac.
set(LINK_FLAGS "-Wl,--retain-symbols-file ${CMAKE_CURRENT_SOURCE_DIR}/paddle_fluid.sym")
set_target_properties(paddle_fluid PROPERTIES LINK_FLAGS "${LINK_FLAGS}")
endif()
# Create shared library # Create shared library
cc_library(paddle_fluid_shared SHARED cc_library(paddle_fluid_shared SHARED
SRCS io.cc SRCS io.cc
......
...@@ -90,6 +90,20 @@ std::string DataFlowGraph::DotString() const { ...@@ -90,6 +90,20 @@ std::string DataFlowGraph::DotString() const {
return dot.Build(); return dot.Build();
} }
std::string DataFlowGraph::HumanReadableInfo(bool show_values,
bool show_functions) const {
std::stringstream values, functions;
for (auto &n : nodes.nodes()) {
if (show_values && n->IsValue()) {
values << n->repr() << "\n";
}
if (show_functions && n->IsFunction()) {
functions << n->repr() << "\n";
}
}
return "Values:\n" + values.str() + "\n\n" + "Functions:\n" + functions.str();
}
// //
// NodesBFSIterator // NodesBFSIterator
// //
...@@ -208,6 +222,76 @@ Node *GraphTraits<DataFlowGraph>::NodesDFSIterator::operator->() { ...@@ -208,6 +222,76 @@ Node *GraphTraits<DataFlowGraph>::NodesDFSIterator::operator->() {
return stack_.top(); return stack_.top();
} }
GraphTraits<DataFlowGraph>::NodesTSIterator::NodesTSIterator(
const std::vector<Node *> &source) {
PADDLE_ENFORCE(!source.empty(),
"Start points of topological sorting should not be empty!");
std::unordered_set<Node *> visited;
std::unordered_set<Node *> to_visit{source.begin(), source.end()};
std::vector<Node *> inlink_visited;
while (!to_visit.empty()) {
std::vector<Node *> queue(to_visit.begin(), to_visit.end());
for (auto *p : queue) {
inlink_visited.clear();
std::copy_if(p->inlinks.begin(), p->inlinks.end(),
std::back_inserter(inlink_visited),
[&](Node *x) { return visited.count(x); });
if (inlink_visited.size() == p->inlinks.size()) {
sorted_.push_back(p);
for (auto *_ : p->outlinks) {
if (!visited.count(_)) {
to_visit.insert(_);
}
}
to_visit.erase(p);
visited.insert(p);
}
}
}
}
GraphTraits<DataFlowGraph>::NodesTSIterator::NodesTSIterator(
const paddle::inference::analysis::GraphTraits<
DataFlowGraph>::NodesTSIterator &other)
: sorted_(other.sorted_), cursor_(other.cursor_) {}
Node &GraphTraits<DataFlowGraph>::NodesTSIterator::operator*() {
PADDLE_ENFORCE_LT(cursor_, sorted_.size());
return *sorted_[cursor_];
}
paddle::inference::analysis::GraphTraits<DataFlowGraph>::NodesTSIterator
&GraphTraits<DataFlowGraph>::NodesTSIterator::operator++() {
if (++cursor_ >= sorted_.size()) {
sorted_.clear();
cursor_ = 0;
}
return *this;
}
paddle::inference::analysis::GraphTraits<DataFlowGraph>::NodesTSIterator &
GraphTraits<DataFlowGraph>::NodesTSIterator::operator=(
const paddle::inference::analysis::GraphTraits<
DataFlowGraph>::NodesTSIterator &other) {
cursor_ = other.cursor_;
sorted_ = other.sorted_;
return *this;
}
bool GraphTraits<DataFlowGraph>::NodesTSIterator::operator==(
const paddle::inference::analysis::GraphTraits<
DataFlowGraph>::NodesTSIterator &other) {
return sorted_ == other.sorted_ && cursor_ == other.cursor_;
}
Node *GraphTraits<DataFlowGraph>::NodesTSIterator::operator->() {
PADDLE_ENFORCE_LT(cursor_, sorted_.size());
return sorted_[cursor_];
}
} // namespace analysis } // namespace analysis
} // namespace inference } // namespace inference
} // namespace paddle } // namespace paddle
...@@ -48,6 +48,9 @@ struct DataFlowGraph { ...@@ -48,6 +48,9 @@ struct DataFlowGraph {
// Output a DOT graph file for debug. // Output a DOT graph file for debug.
std::string DotString() const; std::string DotString() const;
std::string HumanReadableInfo(bool show_values = true,
bool show_functions = true) const;
private: private:
// Remove duplicate edges and so on. // Remove duplicate edges and so on.
void Clean(); void Clean();
...@@ -107,6 +110,32 @@ struct GraphTraits<DataFlowGraph> { ...@@ -107,6 +110,32 @@ struct GraphTraits<DataFlowGraph> {
std::unordered_set<Node *> visited_; std::unordered_set<Node *> visited_;
}; };
// Topological sorting iterator on nodes.
struct NodesTSIterator
: public std::iterator<std::forward_iterator_tag, Node *> {
NodesTSIterator() = default;
explicit NodesTSIterator(const std::vector<Node *> &source);
NodesTSIterator(NodesTSIterator &&other)
: sorted_(std::move(other.sorted_)), cursor_(other.cursor_) {
other.cursor_ = 0;
}
NodesTSIterator(const NodesTSIterator &other);
Node &operator*();
NodesTSIterator &operator++();
// TODO(Superjomn) current implementation just compare the first
// element, need to compare the graph and all the elements in the queue and
// set.
NodesTSIterator &operator=(const NodesTSIterator &other);
bool operator==(const NodesTSIterator &other);
bool operator!=(const NodesTSIterator &other) { return !(*this == other); }
Node *operator->();
private:
std::vector<Node *> sorted_;
int cursor_{0};
};
explicit GraphTraits(DataFlowGraph *graph) : graph_(graph) {} explicit GraphTraits(DataFlowGraph *graph) : graph_(graph) {}
// default use BFS to visit the nodes. // default use BFS to visit the nodes.
...@@ -119,17 +148,24 @@ struct GraphTraits<DataFlowGraph> { ...@@ -119,17 +148,24 @@ struct GraphTraits<DataFlowGraph> {
iterator_range<NodesDFSIterator> nodes_in_DFS() { iterator_range<NodesDFSIterator> nodes_in_DFS() {
return iterator_range<NodesDFSIterator>(nodes_dfs_begin(), nodes_dfs_end()); return iterator_range<NodesDFSIterator>(nodes_dfs_begin(), nodes_dfs_end());
} }
iterator_range<NodesTSIterator> nodes_in_TS() {
return iterator_range<NodesTSIterator>(nodes_ts_begin(), nodes_ts_end());
}
private: private:
NodesBFSIterator nodes_bfs_begin() { NodesBFSIterator nodes_bfs_begin() {
return NodesBFSIterator(graph_->inputs); return NodesBFSIterator(graph_->inputs);
} }
NodesBFSIterator nodes_bfs_end() { return NodesBFSIterator(); } NodesBFSIterator nodes_bfs_end() { return NodesBFSIterator(); }
NodesDFSIterator nodes_dfs_begin() { NodesDFSIterator nodes_dfs_begin() {
return NodesDFSIterator(graph_->inputs); return NodesDFSIterator(graph_->inputs);
} }
NodesDFSIterator nodes_dfs_end() { return NodesDFSIterator(); } NodesDFSIterator nodes_dfs_end() { return NodesDFSIterator(); }
NodesTSIterator nodes_ts_begin() { return NodesTSIterator(graph_->inputs); }
NodesTSIterator nodes_ts_end() { return NodesTSIterator(); }
private: private:
DataFlowGraph *graph_; DataFlowGraph *graph_;
}; };
......
...@@ -24,11 +24,11 @@ TEST(DataFlowGraph, BFS) { ...@@ -24,11 +24,11 @@ TEST(DataFlowGraph, BFS) {
auto dfg = ProgramDescToDFG(desc); auto dfg = ProgramDescToDFG(desc);
dfg.Build(); dfg.Build();
for (auto* in : dfg.inputs) { for (auto *in : dfg.inputs) {
LOG(INFO) << "inputs: " << in->name() << " " LOG(INFO) << "inputs: " << in->name() << " "
<< static_cast<int>(in->type()); << static_cast<int>(in->type());
} }
for (auto* out : dfg.outputs) { for (auto *out : dfg.outputs) {
LOG(INFO) << "outputs: " << out->name() << " " LOG(INFO) << "outputs: " << out->name() << " "
<< static_cast<int>(out->type()); << static_cast<int>(out->type());
} }
...@@ -57,6 +57,71 @@ TEST(DataFlowGraph, DFS) { ...@@ -57,6 +57,71 @@ TEST(DataFlowGraph, DFS) {
ASSERT_EQ(count, dfg.nodes.size()); ASSERT_EQ(count, dfg.nodes.size());
} }
// Topological sorting.
/*
* Graph topology
* inputs: 0, 1, 2
* 0 -> 4
* 0 -> 5
* 1 -> 6
* 2 -> 7
* 4 -> 5
* 4 -> 7
* 4 -> 3
* 7 -> 3
*/
TEST(DataFlowGraph, TS) {
DataFlowGraph graph;
for (int i = 0; i < 8; i++) {
auto *node = graph.nodes.Create(Node::Type::kValue);
node->SetName("node-" + std::to_string(i));
}
auto add_link = [&](int i, int j) {
Node *source = graph.nodes.GetMutable(i);
Node *target = graph.nodes.GetMutable(j);
target->inlinks.push_back(source);
source->outlinks.push_back(target);
};
graph.inputs.push_back(graph.nodes.GetMutable(0));
graph.inputs.push_back(graph.nodes.GetMutable(1));
graph.inputs.push_back(graph.nodes.GetMutable(2));
add_link(0, 4);
add_link(0, 5);
add_link(1, 6);
add_link(2, 7);
add_link(4, 5);
add_link(4, 7);
add_link(4, 3);
add_link(7, 3);
auto its = GraphTraits<DataFlowGraph>(&graph).nodes_in_TS();
std::vector<int> sorted_ids;
for (auto it = its.begin(); it != its.end(); ++it) {
LOG(INFO) << it->name();
sorted_ids.push_back(it->id());
}
// Assert a occurs prior to b in the sorted_ids.
auto assert_positive_sequence_pair = [&](int a, int b) {
auto a_offset = std::find(sorted_ids.begin(), sorted_ids.end(), a);
auto b_offset = std::find(sorted_ids.begin(), sorted_ids.end(), b);
ASSERT_LT(a_offset, b_offset);
};
assert_positive_sequence_pair(2, 7);
assert_positive_sequence_pair(7, 3);
assert_positive_sequence_pair(4, 3);
assert_positive_sequence_pair(0, 4);
assert_positive_sequence_pair(0, 5);
assert_positive_sequence_pair(1, 6);
assert_positive_sequence_pair(4, 5);
assert_positive_sequence_pair(4, 7);
}
} // namespace analysis } // namespace analysis
} // namespace inference } // namespace inference
} // namespace paddle } // namespace paddle
...@@ -265,6 +265,8 @@ op_library(recurrent_op DEPS executor) ...@@ -265,6 +265,8 @@ op_library(recurrent_op DEPS executor)
op_library(warpctc_op DEPS dynload_warpctc sequence_padding sequence_scale) op_library(warpctc_op DEPS dynload_warpctc sequence_padding sequence_scale)
op_library(cos_sim_op DEPS cos_sim_functor) op_library(cos_sim_op DEPS cos_sim_functor)
op_library(parallel_do_op DEPS executor) op_library(parallel_do_op DEPS executor)
op_library(unsqueeze_op DEPS reshape_op)
op_library(squeeze_op DEPS reshape_op)
if (WITH_GPU) if (WITH_GPU)
op_library(conv_op DEPS vol2col depthwise_conv im2col) op_library(conv_op DEPS vol2col depthwise_conv im2col)
......
...@@ -29,6 +29,79 @@ using mkldnn::stream; ...@@ -29,6 +29,79 @@ using mkldnn::stream;
using platform::to_void_cast; using platform::to_void_cast;
using platform::GetMKLDNNFormat; using platform::GetMKLDNNFormat;
class ConvMKLDNNHandler : public platform::MKLDNNHandler {
public:
ConvMKLDNNHandler(
std::shared_ptr<mkldnn::convolution_forward::primitive_desc> conv_pd,
const platform::MKLDNNDeviceContext& dev_ctx, mkldnn::engine engine,
const std::string& base_key)
: platform::MKLDNNHandler(dev_ctx, engine, base_key) {
conv_pd_ = conv_pd;
}
std::shared_ptr<mkldnn::memory> AcquireDstMemoryFromPrimitive(void* ptr) {
return this->AcquireMemoryFromPrimitive(conv_pd_->dst_primitive_desc(), ptr,
"@dst_mem_p");
}
std::shared_ptr<mkldnn::memory> AcquireSrcMemoryFromPrimitive(
const std::shared_ptr<mkldnn::memory> user_memory_p,
std::vector<mkldnn::primitive>& pipeline) {
auto src_pd = conv_pd_->src_primitive_desc();
auto user_pd = user_memory_p->get_primitive_desc();
return this->AcquireMemory(src_pd, user_pd, user_memory_p, "@src_mem_p",
pipeline);
}
std::shared_ptr<mkldnn::memory> AcquireWeightsMemoryFromPrimitive(
const std::shared_ptr<mkldnn::memory> user_weights_memory_p,
std::vector<mkldnn::primitive>& pipeline) {
auto user_weights_pd = user_weights_memory_p->get_primitive_desc();
auto weights_pd = conv_pd_->weights_primitive_desc();
return this->AcquireMemory(weights_pd, user_weights_pd,
user_weights_memory_p, "@weights_mem_p",
pipeline);
}
std::shared_ptr<mkldnn::convolution_forward> AcquireConvolution(
std::shared_ptr<mkldnn::memory> src_memory_p,
std::shared_ptr<mkldnn::memory> weights_memory_p,
std::shared_ptr<mkldnn::memory> dst_memory_p) {
auto prim_key = key_ + "@conv_p";
auto prim_desc_key = key_ + "@conv_pd";
auto conv_p = std::static_pointer_cast<mkldnn::convolution_forward>(
dev_ctx_.GetBlob(prim_key));
PADDLE_ENFORCE((conv_p != nullptr) || (is_reusing_ == false),
"Fail to find convolution primitive in device context");
if (conv_p == nullptr) {
conv_p = std::make_shared<mkldnn::convolution_forward>(
*conv_pd_, *(src_memory_p), *(weights_memory_p.get()),
*(dst_memory_p.get()));
dev_ctx_.SetBlob(prim_key, conv_p);
} else {
is_reusing_ = true;
}
return conv_p;
}
// Generate keys for storing/retriving primitives for this operator
// TODO(jczaja): Make hashing function more optimial
static std::string GetHash(memory::dims& input_dims,
memory::dims& weights_dims,
std::vector<int>& strides,
std::vector<int>& paddings,
std::vector<int>& dilations, int groups,
const std::string& suffix) {
return dims2str(input_dims) + dims2str(weights_dims) + dims2str(strides) +
dims2str(paddings) + dims2str(dilations) + std::to_string(groups) +
suffix;
}
private:
std::shared_ptr<mkldnn::convolution_forward::primitive_desc> conv_pd_;
};
template <typename T> template <typename T>
class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> { class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
public: public:
...@@ -36,10 +109,6 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -36,10 +109,6 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
PADDLE_ENFORCE(paddle::platform::is_cpu_place(ctx.GetPlace()), PADDLE_ENFORCE(paddle::platform::is_cpu_place(ctx.GetPlace()),
"It must use CPUPlace."); "It must use CPUPlace.");
// Get unique name for index
const std::string key = ctx.op().Output("Output");
const std::string key_conv_pd = key + "@conv_pd";
auto& dev_ctx = auto& dev_ctx =
ctx.template device_context<paddle::platform::MKLDNNDeviceContext>(); ctx.template device_context<paddle::platform::MKLDNNDeviceContext>();
const auto& mkldnn_engine = dev_ctx.GetEngine(); const auto& mkldnn_engine = dev_ctx.GetEngine();
...@@ -80,68 +149,62 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -80,68 +149,62 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
paddle::framework::vectorize2int(filter->dims()); paddle::framework::vectorize2int(filter->dims());
std::vector<int> dst_tz = paddle::framework::vectorize2int(output->dims()); std::vector<int> dst_tz = paddle::framework::vectorize2int(output->dims());
// create mkldnn memory from input tensors (data/weights) // Get unique name for storing MKLDNN primitives
auto user_src_memory = memory( const std::string key = ConvMKLDNNHandler::GetHash(
{{{src_tz}, memory::data_type::f32, input->format()}, mkldnn_engine}, src_tz, weights_tz, strides, paddings, dilations, groups,
to_void_cast(input_data)); ctx.op().Output("Output"));
auto user_weights_memory = const std::string key_conv_pd = key + "@conv_pd";
memory({{{weights_tz}, memory::data_type::f32, filter->format()},
mkldnn_engine}, std::vector<primitive> pipeline;
to_void_cast(filter_data));
auto user_src_md = platform::MKLDNNMemDesc(
{src_tz}, platform::MKLDNNGetDataType<T>(), input->format());
auto user_weights_md = platform::MKLDNNMemDesc(
{weights_tz}, platform::MKLDNNGetDataType<T>(), filter->format());
/* create memory descriptor for convolution without specified format /* create memory descriptor for convolution without specified format
* ('any') which lets a primitive (convolution in this case) choose * ('any') which lets a primitive (convolution in this case) choose
* the memory format preferred for best performance * the memory format preferred for best performance
*/ */
auto src_md = platform::MKLDNNMemDesc(src_tz, memory::data_type::f32, auto src_md = platform::MKLDNNMemDesc(
memory::format::any); src_tz, platform::MKLDNNGetDataType<T>(), memory::format::any);
auto weights_md = platform::MKLDNNMemDesc( auto weights_md = platform::MKLDNNMemDesc(
weights_tz, memory::data_type::f32, memory::format::any); weights_tz, platform::MKLDNNGetDataType<T>(), memory::format::any);
auto dst_md = platform::MKLDNNMemDesc(dst_tz, memory::data_type::f32, auto dst_md = platform::MKLDNNMemDesc(
memory::format::any); dst_tz, platform::MKLDNNGetDataType<T>(), memory::format::any);
// create a conv primitive descriptor and save it for usage in backward // create a conv primitive descriptor and save it for usage in backward
std::shared_ptr<conv_fwd::primitive_desc> conv_pd = ConvFwdPrimitiveDesc( std::shared_ptr<conv_fwd::primitive_desc> conv_pd = ConvFwdPrimitiveDesc(
src_md, weights_md, dst_md, strides, paddings, mkldnn_engine); src_md, weights_md, dst_md, strides, paddings, mkldnn_engine);
// Save conv_pd/src_memory/weights_memory for backward pass
dev_ctx.SetBlob(key_conv_pd, conv_pd);
// create reorder primitive if the input format is not the preferred one ConvMKLDNNHandler handler(conv_pd, dev_ctx, mkldnn_engine, key);
auto src_memory = user_src_memory;
primitive reorder_src; // create mkldnn memory from input tensors (data/weights)
bool is_src_reordered = false; auto user_src_memory_p =
if (memory::primitive_desc(conv_pd->src_primitive_desc()) != handler.AcquireSrcMemory(user_src_md, to_void_cast<T>(input_data));
user_src_memory.get_primitive_desc()) { auto user_weights_memory_p = handler.AcquireWeightsMemory(
src_memory = memory(conv_pd->src_primitive_desc()); user_weights_md, to_void_cast<T>(filter_data));
reorder_src = reorder(user_src_memory, src_memory);
is_src_reordered = true;
}
auto weights_memory = user_weights_memory;
primitive reorder_weights;
bool is_weights_reordered = false;
if (memory::primitive_desc(conv_pd->weights_primitive_desc()) !=
user_weights_memory.get_primitive_desc()) {
weights_memory = memory(conv_pd->weights_primitive_desc());
reorder_weights = reorder(user_weights_memory, weights_memory);
is_weights_reordered = true;
}
// create memory primitive for conv dst // create reorder primitive if the input format is not the preferred one
auto dst_memory = memory(conv_pd->dst_primitive_desc(), output_data); auto src_memory_p =
handler.AcquireSrcMemoryFromPrimitive(user_src_memory_p, pipeline);
auto weights_memory_p = handler.AcquireWeightsMemoryFromPrimitive(
user_weights_memory_p, pipeline);
auto dst_memory_p =
handler.AcquireDstMemoryFromPrimitive(to_void_cast<T>(output_data));
// create convolution op primitive // create convolution op primitive
auto conv_prim = conv_fwd(*conv_pd, src_memory, weights_memory, dst_memory); auto conv_p = handler.AcquireConvolution(src_memory_p, weights_memory_p,
dst_memory_p);
// push primitive to stream and wait until it's executed // push primitive to stream and wait until it's executed
std::vector<primitive> pipeline; pipeline.push_back(*conv_p);
if (is_src_reordered) pipeline.push_back(reorder_src);
if (is_weights_reordered) pipeline.push_back(reorder_weights);
pipeline.push_back(conv_prim);
stream(stream::kind::eager).submit(pipeline).wait(); stream(stream::kind::eager).submit(pipeline).wait();
// Save conv_pd/src_memory/weights_memory for backward pass
dev_ctx.SetBlob(key_conv_pd, conv_pd);
output->set_layout(DataLayout::kMKLDNN); output->set_layout(DataLayout::kMKLDNN);
output->set_format(GetMKLDNNFormat(dst_memory)); output->set_format(GetMKLDNNFormat(*dst_memory_p));
} }
private: private:
...@@ -197,13 +260,10 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> { ...@@ -197,13 +260,10 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
if (!input_grad && !filter_grad) return; if (!input_grad && !filter_grad) return;
// Get an unique name from "argument" name of "Output" variable
// This name will be used as key when saving info into device context
const std::string key = ctx.op().Input("Output");
const std::string key_conv_pd = key + "@conv_pd";
std::vector<int> strides = ctx.Attr<std::vector<int>>("strides"); std::vector<int> strides = ctx.Attr<std::vector<int>>("strides");
std::vector<int> paddings = ctx.Attr<std::vector<int>>("paddings"); std::vector<int> paddings = ctx.Attr<std::vector<int>>("paddings");
std::vector<int> dilations = ctx.Attr<std::vector<int>>("dilations");
int groups = ctx.Attr<int>("groups");
const T* input_data = input->data<T>(); const T* input_data = input->data<T>();
const T* filter_data = filter->data<T>(); const T* filter_data = filter->data<T>();
...@@ -223,6 +283,14 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> { ...@@ -223,6 +283,14 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
paddle::framework::vectorize2int(filter->dims()); paddle::framework::vectorize2int(filter->dims());
std::vector<int> dst_tz = paddle::framework::vectorize2int(output->dims()); std::vector<int> dst_tz = paddle::framework::vectorize2int(output->dims());
// Get an unique name from "argument" name of "Output" variable
// This name will be used as key when saving info into device context
const std::string key =
ConvMKLDNNHandler::GetHash(src_tz, weights_tz, strides, paddings,
dilations, groups, ctx.op().Input("Output"));
const std::string key_conv_pd = key + "@conv_pd";
// create mkldnn memory from input tensors (input/weights/output_grad) // create mkldnn memory from input tensors (input/weights/output_grad)
auto user_src_memory = memory( auto user_src_memory = memory(
{{{src_tz}, memory::data_type::f32, input->format()}, mkldnn_engine}, {{{src_tz}, memory::data_type::f32, input->format()}, mkldnn_engine},
......
...@@ -86,8 +86,9 @@ class RpnTargetAssignKernel : public framework::OpKernel<T> { ...@@ -86,8 +86,9 @@ class RpnTargetAssignKernel : public framework::OpKernel<T> {
std::minstd_rand engine, std::minstd_rand engine,
std::vector<int>* inds) const { std::vector<int>* inds) const {
std::uniform_real_distribution<float> uniform(0, 1); std::uniform_real_distribution<float> uniform(0, 1);
if (inds->size() > num) { const int64_t size = static_cast<int64_t>(inds->size());
for (int i = num; i < inds->size(); ++i) { if (size > num) {
for (int64_t i = num; i < size; ++i) {
int rng_ind = std::floor(uniform(engine) * i); int rng_ind = std::floor(uniform(engine) * i);
if (rng_ind < num) if (rng_ind < num)
std::iter_swap(inds->begin() + rng_ind + offset, std::iter_swap(inds->begin() + rng_ind + offset,
......
...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/im2sequence_op.h" #include "paddle/fluid/operators/im2sequence_op.h"
#include <string>
#include <vector> #include <vector>
namespace paddle { namespace paddle {
...@@ -28,20 +29,19 @@ class Im2SequenceOp : public framework::OperatorWithKernel { ...@@ -28,20 +29,19 @@ class Im2SequenceOp : public framework::OperatorWithKernel {
"Input(X) of Im2SequenceOp should not be null."); "Input(X) of Im2SequenceOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"), PADDLE_ENFORCE(ctx->HasOutput("Out"),
"Output(Out) of Im2SequenceOp op should not be null."); "Output(Out) of Im2SequenceOp op should not be null.");
auto in_dim = ctx->GetInputDim("X"); auto in_dim = ctx->GetInputDim("X");
PADDLE_ENFORCE_EQ(in_dim.size(), 4, PADDLE_ENFORCE_EQ(in_dim.size(), 4,
"Input(X) format must be 4D tensor, eg., NCHW."); "Input(X) format must be 4D tensor, eg., NCHW.");
auto kernels = ctx->Attrs().Get<std::vector<int>>("kernels");
auto strides = ctx->Attrs().Get<std::vector<int>>("strides");
auto paddings = ctx->Attrs().Get<std::vector<int>>("paddings");
int batch_size = in_dim[0]; int batch_size = in_dim[0];
int img_channels = in_dim[1]; int img_channels = in_dim[1];
int img_height = in_dim[2]; int img_height = in_dim[2];
int img_width = in_dim[3]; int img_width = in_dim[3];
auto kernels = ctx->Attrs().Get<std::vector<int>>("kernels");
auto strides = ctx->Attrs().Get<std::vector<int>>("strides");
auto paddings = ctx->Attrs().Get<std::vector<int>>("paddings");
int output_height = Im2SeqOutputSize(img_height, kernels[0], paddings[0], int output_height = Im2SeqOutputSize(img_height, kernels[0], paddings[0],
paddings[2], strides[0]); paddings[2], strides[0]);
int output_width = Im2SeqOutputSize(img_width, kernels[1], paddings[1], int output_width = Im2SeqOutputSize(img_width, kernels[1], paddings[1],
...@@ -61,6 +61,10 @@ class Im2SequenceOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -61,6 +61,10 @@ class Im2SequenceOpMaker : public framework::OpProtoAndCheckerMaker {
"C: channels" "C: channels"
"H: height" "H: height"
"W: width"); "W: width");
AddInput("Y",
"(Tensor) The input tensor of image real size(H, W)."
"2-D with shape [batchsize, 2]")
.AsDispensable();
AddOutput("Out", "(LodTensor) The output data of im2sequence op,"); AddOutput("Out", "(LodTensor) The output data of im2sequence op,");
AddAttr<std::vector<int>>("kernels", AddAttr<std::vector<int>>("kernels",
"(vector<int>), the " "(vector<int>), the "
...@@ -73,6 +77,13 @@ class Im2SequenceOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -73,6 +77,13 @@ class Im2SequenceOpMaker : public framework::OpProtoAndCheckerMaker {
"(vector<int> default:{0, 0, 0, 0}), the " "(vector<int> default:{0, 0, 0, 0}), the "
"paddings(up_pad, left_pad, down_pad, right_pad)") "paddings(up_pad, left_pad, down_pad, right_pad)")
.SetDefault({0, 0, 0, 0}); .SetDefault({0, 0, 0, 0});
AddAttr<std::vector<int>>("out_stride",
"the attribute is valid only when input(Y)"
"is not NULL.this attribute represents the"
"scaling of the pic through the CNN"
"(vector<int> dedault:{1,1}),the out_stride"
" (out_stride_height, out_stride_width)")
.SetDefault({1, 1});
AddComment(R"DOC( AddComment(R"DOC(
This op uses kernels to scan images and converts these images to sequences. This op uses kernels to scan images and converts these images to sequences.
After expanding, The number of time steps are output_height * output_width After expanding, The number of time steps are output_height * output_width
...@@ -123,7 +134,7 @@ output.data = [[ 6. 2. 8. 3. 2. 4. 6. 3.] ...@@ -123,7 +134,7 @@ output.data = [[ 6. 2. 8. 3. 2. 4. 6. 3.]
[ 7. 1. 7. 9. 2. 1. 3. 5.] [ 7. 1. 7. 9. 2. 1. 3. 5.]
[ 5. 7. 2. 4. 1. 3. 9. 0.] [ 5. 7. 2. 4. 1. 3. 9. 0.]
[ 7. 9. 4. 8. 3. 5. 0. 8.]] [ 7. 9. 4. 8. 3. 5. 0. 8.]]
output.dims = {8, 9} output.dims = {8, 8}
output.lod = [[0, 4, 8]] output.lod = [[0, 4, 8]]
)DOC"); )DOC");
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include <string>
#include <vector> #include <vector>
#include "paddle/fluid/framework/data_layout.h" #include "paddle/fluid/framework/data_layout.h"
#include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/eigen.h"
...@@ -39,51 +40,107 @@ class Im2SequenceKernel : public framework::OpKernel<T> { ...@@ -39,51 +40,107 @@ class Im2SequenceKernel : public framework::OpKernel<T> {
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
const Tensor* in = ctx.Input<Tensor>("X"); const Tensor* in = ctx.Input<Tensor>("X");
LoDTensor* out = ctx.Output<LoDTensor>("Out"); LoDTensor* out = ctx.Output<LoDTensor>("Out");
out->mutable_data<T>(ctx.GetPlace());
// TODO(wanghaoshuang): Add layout checker after 'set_layout'
// being available for python API
// PADDLE_ENFORCE_EQ(in->layout(), framework::DataLayout::kNCHW,
// "Input(X) layout must be NCHW");
auto in_dim = in->dims(); auto in_dim = in->dims();
int batch_size = in_dim[0]; int batch_size = in_dim[0];
int img_channels = in_dim[1]; int img_channels = in_dim[1];
int img_height = in_dim[2]; int img_height = in_dim[2];
int img_width = in_dim[3]; int img_width = in_dim[3];
auto kernels = ctx.Attr<std::vector<int>>("kernels"); auto kernels = ctx.Attr<std::vector<int>>("kernels");
auto strides = ctx.Attr<std::vector<int>>("strides"); auto strides = ctx.Attr<std::vector<int>>("strides");
auto paddings = ctx.Attr<std::vector<int>>("paddings"); auto paddings = ctx.Attr<std::vector<int>>("paddings");
if (ctx.HasInput("Y") && batch_size > 1) {
const Tensor* imgrealsize = ctx.Input<Tensor>("Y");
auto out_stride = ctx.Attr<std::vector<int>>("out_stride");
Tensor cpu_shape_tensor;
TensorCopySync(*imgrealsize, platform::CPUPlace(), &cpu_shape_tensor);
std::vector<int> imgreal_h;
std::vector<int> imgreal_w;
std::vector<int> output_height;
std::vector<int> output_width;
int result = 0;
for (int i = 0; i < batch_size; i++) {
int tmp_real_h = static_cast<int>((cpu_shape_tensor.data<T>())[2 * i]);
int tmp_real_w =
static_cast<int>((cpu_shape_tensor.data<T>())[2 * i + 1]);
if (tmp_real_h % out_stride[0] == 0) {
tmp_real_h = tmp_real_h / out_stride[0];
} else {
tmp_real_h = tmp_real_h / out_stride[0] + 1;
}
if (tmp_real_w % out_stride[1] == 0) {
tmp_real_w = tmp_real_w / out_stride[1];
} else {
tmp_real_w = tmp_real_w / out_stride[1] + 1;
}
imgreal_h.push_back(tmp_real_h);
imgreal_w.push_back(tmp_real_w);
output_height.push_back(Im2SeqOutputSize(
imgreal_h[i], kernels[0], paddings[0], paddings[2], strides[0]));
output_width.push_back(Im2SeqOutputSize(
imgreal_w[i], kernels[1], paddings[1], paddings[3], strides[1]));
result += output_height[i] * output_width[i];
}
out->mutable_data<T>({result, img_channels * kernels[0] * kernels[1]},
ctx.GetPlace());
const std::vector<int> dilations({1, 1});
int offset_out = 0;
for (int i = 0; i < batch_size; i++) {
const Tensor src =
in->Slice(i, i + 1).Resize({img_channels, img_height, img_width});
Tensor dst = out->Slice(offset_out,
offset_out + output_height[i] * output_width[i])
.Resize({output_height[i], output_width[i],
img_channels, kernels[0], kernels[1]});
offset_out += output_height[i] * output_width[i];
math::Im2ColFunctor<math::ColFormat::kOCF, DeviceContext, T> f;
auto& dev_ctx = ctx.template device_context<DeviceContext>();
f(dev_ctx, src, dilations, strides, paddings, &dst);
}
framework::LoD lod(1);
lod[0].reserve(batch_size + 1);
int offset = 0;
lod[0].push_back(offset);
for (int i = 0; i < batch_size; ++i) {
offset += output_height[i] * output_width[i];
lod[0].push_back(offset);
}
out->set_lod(lod);
} else {
out->mutable_data<T>(ctx.GetPlace());
int output_height = Im2SeqOutputSize(img_height, kernels[0], paddings[0], int output_height = Im2SeqOutputSize(img_height, kernels[0], paddings[0],
paddings[2], strides[0]); paddings[2], strides[0]);
int output_width = Im2SeqOutputSize(img_width, kernels[1], paddings[1], int output_width = Im2SeqOutputSize(img_width, kernels[1], paddings[1],
paddings[3], strides[1]); paddings[3], strides[1]);
const std::vector<int> dilations({1, 1}); const std::vector<int> dilations({1, 1});
auto out_dims = out->dims(); auto out_dims = out->dims();
out->Resize({batch_size, out->numel() / batch_size}); out->Resize({batch_size, out->numel() / batch_size});
for (int i = 0; i < batch_size; i++) { for (int i = 0; i < batch_size; i++) {
const Tensor src = const Tensor src =
in->Slice(i, i + 1).Resize({img_channels, img_height, img_width}); in->Slice(i, i + 1).Resize({img_channels, img_height, img_width});
Tensor dst = out->Slice(i, i + 1).Resize( Tensor dst =
{output_height, output_width, img_channels, kernels[0], kernels[1]}); out->Slice(i, i + 1).Resize({output_height, output_width,
img_channels, kernels[0], kernels[1]});
math::Im2ColFunctor<math::ColFormat::kOCF, DeviceContext, T> f; math::Im2ColFunctor<math::ColFormat::kOCF, DeviceContext, T> f;
auto& dev_ctx = ctx.template device_context<DeviceContext>(); auto& dev_ctx = ctx.template device_context<DeviceContext>();
f(dev_ctx, src, dilations, strides, paddings, &dst); f(dev_ctx, src, dilations, strides, paddings, &dst);
} }
out->Resize(out_dims); out->Resize(out_dims);
// set lod information
// TODO(wanghaoshuang): Move this to InferShape
framework::LoD lod(1); framework::LoD lod(1);
lod[0].reserve(batch_size + 1); lod[0].reserve(batch_size + 1);
for (int i = 0, offset = 0; i < batch_size + 1; ++i) { int offset = 0;
lod[0].push_back(offset); lod[0].push_back(offset);
for (int i = 0; i < batch_size; ++i) {
offset += output_height * output_width; offset += output_height * output_width;
lod[0].push_back(offset);
} }
out->set_lod(lod); out->set_lod(lod);
} }
}
}; };
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
......
...@@ -43,21 +43,6 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kCFO, ...@@ -43,21 +43,6 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kCFO,
int col_height = col->dims()[3]; int col_height = col->dims()[3];
int col_width = col->dims()[4]; int col_width = col->dims()[4];
PADDLE_ENFORCE_EQ((im_height + padding[0] + padding[2] -
((dilation[0] * (filter_height - 1) + 1))) /
stride[0] +
1,
col_height,
"Output_height and padding(padding_up, padding_down) are "
"inconsistent.");
PADDLE_ENFORCE_EQ((im_width + padding[1] + padding[3] -
((dilation[1] * (filter_width - 1) + 1))) /
stride[1] +
1,
col_width,
"Output_height and padding(padding_up, padding_down) are "
"inconsistent.");
int channels_col = im_channels * filter_height * filter_width; int channels_col = im_channels * filter_height * filter_width;
const T* im_data = im.data<T>(); const T* im_data = im.data<T>();
...@@ -178,17 +163,6 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kOCF, ...@@ -178,17 +163,6 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kOCF,
int col_height = col->dims()[0]; int col_height = col->dims()[0];
int col_width = col->dims()[1]; int col_width = col->dims()[1];
PADDLE_ENFORCE_EQ(
(im_height + padding[0] + padding[2] - filter_height) / stride[0] + 1,
col_height,
"Output_height and padding(padding_up, padding_down) are "
"inconsistent.");
PADDLE_ENFORCE_EQ(
(im_width + padding[1] + padding[3] - filter_width) / stride[1] + 1,
col_width,
"col_width and padding(padding_left, padding_right) are "
"inconsistent.");
const T* im_data = im.data<T>(); const T* im_data = im.data<T>();
T* col_data = col->data<T>(); T* col_data = col->data<T>();
......
...@@ -77,21 +77,6 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kCFO, ...@@ -77,21 +77,6 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kCFO,
int col_height = col->dims()[3]; int col_height = col->dims()[3];
int col_width = col->dims()[4]; int col_width = col->dims()[4];
PADDLE_ENFORCE_EQ((im_height + padding[0] + padding[2] -
(dilation[0] * (filter_height - 1) + 1)) /
stride[0] +
1,
col_height,
"Output_height and padding(padding_up, padding_down) are "
"inconsistent.");
PADDLE_ENFORCE_EQ((im_width + padding[1] + padding[3] -
(dilation[1] * (filter_width - 1) + 1)) /
stride[1] +
1,
col_width,
"col_width and padding(padding_left, padding_right) are "
"inconsistent.");
int num_outputs = im_channels * col_height * col_width; int num_outputs = im_channels * col_height * col_width;
int blocks = (num_outputs + 1024 - 1) / 1024; int blocks = (num_outputs + 1024 - 1) / 1024;
int block_x = 512; int block_x = 512;
...@@ -274,21 +259,6 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kOCF, ...@@ -274,21 +259,6 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kOCF,
int col_height = col->dims()[0]; int col_height = col->dims()[0];
int col_width = col->dims()[1]; int col_width = col->dims()[1];
PADDLE_ENFORCE_EQ((im_height + padding[0] + padding[2] -
(dilation[0] * (filter_height - 1) + 1)) /
stride[0] +
1,
col_height,
"Output_height and padding(padding_up, padding_down) are "
"inconsistent.");
PADDLE_ENFORCE_EQ((im_width + padding[1] + padding[3] -
(dilation[1] * (filter_width - 1) + 1)) /
stride[1] +
1,
col_width,
"col_width and padding(padding_left, padding_right) are "
"inconsistent.");
int block_dim_x = 0; int block_dim_x = 0;
int block_dim_y = 0; int block_dim_y = 0;
if (filter_height <= 4 && filter_width <= 4) { if (filter_height <= 4 && filter_width <= 4) {
......
...@@ -23,7 +23,7 @@ class BatchReader : public framework::DecoratedReader { ...@@ -23,7 +23,7 @@ class BatchReader : public framework::DecoratedReader {
BatchReader(const std::shared_ptr<ReaderBase>& reader, int batch_size, BatchReader(const std::shared_ptr<ReaderBase>& reader, int batch_size,
bool discard_leftover) bool discard_leftover)
: DecoratedReader(reader), : DecoratedReader(reader),
batch_size_(batch_size), batch_size_(static_cast<size_t>(batch_size)),
discard_leftover_(discard_leftover) { discard_leftover_(discard_leftover) {
buffer_.reserve(batch_size_); buffer_.reserve(batch_size_);
} }
...@@ -31,7 +31,7 @@ class BatchReader : public framework::DecoratedReader { ...@@ -31,7 +31,7 @@ class BatchReader : public framework::DecoratedReader {
void ReadNextImpl(std::vector<framework::LoDTensor>* out) override; void ReadNextImpl(std::vector<framework::LoDTensor>* out) override;
private: private:
int batch_size_; size_t batch_size_;
bool discard_leftover_; bool discard_leftover_;
std::vector<std::vector<framework::LoDTensor>> buffer_; std::vector<std::vector<framework::LoDTensor>> buffer_;
}; };
...@@ -78,7 +78,7 @@ class CreateBatchReaderOpMaker : public DecoratedReaderMakerBase { ...@@ -78,7 +78,7 @@ class CreateBatchReaderOpMaker : public DecoratedReaderMakerBase {
void BatchReader::ReadNextImpl(std::vector<framework::LoDTensor>* out) { void BatchReader::ReadNextImpl(std::vector<framework::LoDTensor>* out) {
buffer_.clear(); buffer_.clear();
buffer_.reserve(batch_size_); buffer_.reserve(batch_size_);
for (int i = 0; i < batch_size_; ++i) { for (size_t i = 0; i < batch_size_; ++i) {
buffer_.push_back(std::vector<framework::LoDTensor>()); buffer_.push_back(std::vector<framework::LoDTensor>());
reader_->ReadNext(&buffer_.back()); reader_->ReadNext(&buffer_.back());
if (buffer_.back().empty()) { if (buffer_.back().empty()) {
...@@ -95,9 +95,9 @@ void BatchReader::ReadNextImpl(std::vector<framework::LoDTensor>* out) { ...@@ -95,9 +95,9 @@ void BatchReader::ReadNextImpl(std::vector<framework::LoDTensor>* out) {
// if buffer_ is empty, the 'out' will return as an empty vector. // if buffer_ is empty, the 'out' will return as an empty vector.
return; return;
} }
int out_num = buffer_[0].size(); size_t out_num = buffer_[0].size();
out->reserve(out_num); out->reserve(out_num);
for (int j = 0; j < out_num; ++j) { for (size_t j = 0; j < out_num; ++j) {
// Merge shape and check date type // Merge shape and check date type
std::type_index batch_type = buffer_[0][j].type(); std::type_index batch_type = buffer_[0][j].type();
framework::DDim batch_shape = buffer_[0][j].dims(); framework::DDim batch_shape = buffer_[0][j].dims();
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <string>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
namespace paddle {
namespace operators {
class SqueezeOpInferShape : public framework::InferShapeBase {
public:
void operator()(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of SqueezeOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"),
"Output(Out) of SqueezeOp should not be null.");
const auto &x_dims = ctx->GetInputDim("X");
// Check input tensor dims (<6) Eigen limit.
PADDLE_ENFORCE(x_dims.size() <= 6,
"Invalid dimnesions, the rank of Input(X) "
"should be in the range of [1, 6] (Eigen limit).");
const auto &axes = ctx->Attrs().Get<std::vector<int>>("axes");
for (int a : axes) {
PADDLE_ENFORCE_LT(a, x_dims.size(),
"The squeeze axis should be less than input "
"tensor's rank.");
}
auto out_dims = GetOutputShape(axes, x_dims);
ctx->SetOutputDim("Out", out_dims);
if (x_dims[0] == out_dims[0]) {
// Only pass LoD when the first dimension of output and Input(X)
// are the same.
ctx->ShareLoD("X", "Out");
}
}
static framework::DDim GetOutputShape(const std::vector<int> squeeze_dims,
const framework::DDim &in_dims) {
size_t num_squeeze_dims = squeeze_dims.size();
int cnt_squeezed_dims = 0;
bool should_squeeze[9] = {false};
// Determines number of dimensions of output tensor after squeeze.
// Mark and count the dimensions need to be squeezed
if (num_squeeze_dims == 0) {
for (int idx = 0; idx < in_dims.size(); ++idx) {
if (in_dims[idx] == 1) {
should_squeeze[idx] = true;
++cnt_squeezed_dims;
}
}
} else {
for (size_t idx = 0; idx < num_squeeze_dims; ++idx) {
int current = squeeze_dims[idx] < 0 ? squeeze_dims[idx] + in_dims.size()
: squeeze_dims[idx];
// Check current index, the upper limit has beed checked in line 36.
PADDLE_ENFORCE(current >= 0,
"Invalid axis, the negative axis is out of range.");
PADDLE_ENFORCE(in_dims[current] == 1,
"Invalid axis index, the axis that will be squeezed "
"should be equal to 1.");
if (!(should_squeeze[current])) {
++cnt_squeezed_dims;
}
should_squeeze[current] = true;
}
}
// Make output dimensions
std::vector<int64_t> output_shape(in_dims.size() - cnt_squeezed_dims, 0);
for (int in_idx = 0, out_idx = 0; in_idx < in_dims.size(); ++in_idx) {
if (!should_squeeze[in_idx]) {
output_shape[out_idx++] = in_dims[in_idx];
}
}
return framework::make_ddim(output_shape);
}
};
class SqueezeOp : public framework::OperatorBase {
public:
using OperatorBase::OperatorBase;
private:
void RunImpl(const framework::Scope &scope,
const platform::Place &place) const override {
auto &axes = Attr<std::vector<int>>("axes");
auto x_dims = scope.FindVar(Input("X"))->Get<framework::LoDTensor>().dims();
auto out_dims = SqueezeOpInferShape::GetOutputShape(axes, x_dims);
framework::AttributeMap attrs;
attrs["shape"] = framework::vectorize2int(out_dims);
attrs["inplace"] = Attr<bool>("inplace");
// Invoke Reshape Op
auto reshape_op = framework::OpRegistry::CreateOp(
"reshape", {{"X", {Input("X")}}, {"Shape", {}}},
{{"Out", {Output("Out")}}}, attrs);
reshape_op->Run(scope, place);
}
};
class SqueezeOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("X", "(Tensor). The input tensor of squeeze operator.");
AddOutput("Out", "(Tensor). The output tensor of squeeze operator.");
AddAttr<std::vector<int>>("axes",
"(std::vector<int>). List of integers,"
" indicating the dimensions to squeeze.")
.SetDefault({});
AddAttr<bool>("inplace",
"(default: false) Squeeze the source tensor's shape without "
"memory copy. When Attr(inplace) is set true, the output "
"tensor shares memory with Input(X), otherwise, a new output "
"tensor is created, and its data are copied from Input(x).")
.SetDefault(false);
AddComment(R"DOC(
Squeeze Operator.
Remove single-dimensional entries from the shape of a tensor.
Takes a parameter axes with a list of axes to squeeze.
If axes is not provided, all the single dimensions will be removed from the shape.
If an axis is selected with shape entry not equal to one, an error is raised.
Examples:
Case 1:
Given
X.shape = (1, 3, 1, 5)
and
axes = [0]
we get:
Out.shape = (3, 1, 5)
Case 2:
Given
X.shape = (1, 3, 1, 5)
and
axes = []
we get:
Out.shape = (3, 5)
)DOC");
}
};
class SqueezeGradInferShape : public framework::InferShapeBase {
public:
void operator()(framework::InferShapeContext *context) const override {
context->SetOutputDim(framework::GradVarName("X"),
context->GetInputDim("X"));
context->ShareLoD("X", framework::GradVarName("X"));
}
};
class SqueezeGradOp : public framework::OperatorBase {
public:
using OperatorBase::OperatorBase;
private:
void RunImpl(const framework::Scope &scope,
const platform::Place &place) const override {
auto dx_name = Output(framework::GradVarName("X"));
auto dout_name = Input(framework::GradVarName("Out"));
auto x_dims = scope.FindVar(Input("X"))->Get<framework::LoDTensor>().dims();
framework::AttributeMap attrs;
attrs["shape"] = framework::vectorize2int(x_dims);
attrs["inplace"] = Attr<bool>("inplace");
auto reshape_op = framework::OpRegistry::CreateOp(
"reshape", {{"X", {dout_name}}, {"Shape", {}}}, {{"Out", {dx_name}}},
attrs);
reshape_op->Run(scope, place);
}
};
} // namespace operators
} // namespace paddle
// Tell linker to use reshape op
USE_OP(reshape);
namespace ops = paddle::operators;
REGISTER_OPERATOR(squeeze, ops::SqueezeOp, ops::SqueezeOpMaker,
ops::SqueezeOpInferShape,
paddle::framework::DefaultGradOpDescMaker<true>);
REGISTER_OPERATOR(squeeze_grad, ops::SqueezeGradOp, ops::SqueezeGradInferShape);
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <string>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
namespace paddle {
namespace operators {
class UnsqueezeOpInferShape : public framework::InferShapeBase {
public:
void operator()(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of UnsqueezeOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"),
"Output(Out) of UnsqueezeOp should not be null.");
const auto &axes = ctx->Attrs().Get<std::vector<int>>("axes");
const auto &x_dims = ctx->GetInputDim("X");
// Validity Check: input tensor dims (<6).
PADDLE_ENFORCE(x_dims.size() <= 6,
"Invalid dimensions, the rank of Input(X) "
"should be in the range of [1, 6] (Eigen limit)");
auto out_dims = GetOutputShape(axes, x_dims);
ctx->SetOutputDim("Out", out_dims);
if (x_dims[0] == out_dims[0]) {
// Only pass LoD when the first dimension of output and Input(X)
// are the same.
ctx->ShareLoD("X", "Out");
}
}
static framework::DDim GetOutputShape(const std::vector<int> unsqz_dims,
const framework::DDim &in_dims) {
int output_size = in_dims.size() + static_cast<int>(unsqz_dims.size());
int cur_output_size = in_dims.size();
std::vector<int64_t> output_shape(output_size, 0);
// Validity Check: rank range.
PADDLE_ENFORCE(output_size <= 6,
"The output tensor's rank should be less than 6.");
for (int axis : unsqz_dims) {
int cur = axis < 0 ? axis + cur_output_size + 1 : axis;
// Vaildity Check: the axis bound
PADDLE_ENFORCE(
cur >= 0 && cur <= cur_output_size,
"The unsqueeze dims must be within range of current rank.");
// Move old axis, and insert new axis
for (int i = cur_output_size; i >= cur; --i) {
if (output_shape[i] == 1) {
// Move axis
output_shape[i + 1] = 1;
output_shape[i] = 0;
}
}
output_shape[cur] = 1;
// Add the output size.
cur_output_size++;
}
// Make output shape
for (int in_idx = 0, out_idx = 0; out_idx < output_size; ++out_idx) {
if (output_shape[out_idx] == 0) {
output_shape[out_idx] = in_dims[in_idx++];
}
}
return framework::make_ddim(output_shape);
}
};
class UnsqueezeOp : public framework::OperatorBase {
public:
using OperatorBase::OperatorBase;
private:
void RunImpl(const framework::Scope &scope,
const platform::Place &place) const override {
auto &axes = Attr<std::vector<int>>("axes");
auto x_dims = scope.FindVar(Input("X"))->Get<framework::LoDTensor>().dims();
auto out_dims = UnsqueezeOpInferShape::GetOutputShape(axes, x_dims);
framework::AttributeMap attrs;
attrs["shape"] = framework::vectorize2int(out_dims);
attrs["inplace"] = Attr<bool>("inplace");
// Invoke Reshape op.
auto reshape_op = framework::OpRegistry::CreateOp(
"reshape", {{"X", {Input("X")}}, {"Shape", {}}},
{{"Out", {Output("Out")}}}, attrs);
reshape_op->Run(scope, place);
}
};
class UnsqueezeOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("X", "(Tensor). The input tensor of unsqueeze operator.");
AddOutput("Out", "(Tensor). The output tensor of unsqueeze operator.");
AddAttr<std::vector<int>>("axes",
"(std::vector<int>). List of integers,"
" indicating the dimensions to be inserted")
.AddCustomChecker([](const std::vector<int> &axes) {
PADDLE_ENFORCE(!axes.empty(),
"Invalid axes, The unsqueeze axes is empty.");
// Validity Check: axes dims (<6).
PADDLE_ENFORCE(static_cast<int>(axes.size()) < 6,
"Invalid dimensions, dynamic dimensions should be "
"within [1, 6] dimensions (Eigen limit).");
// Validity Check: the range of unsqueeze aixs.
for (int axis : axes) {
PADDLE_ENFORCE(axis < 6,
"Invalid dimensions, input axis should be"
" within [1, 6] dimensions (Eigen limit).");
}
});
AddAttr<bool>(
"inplace",
"(default: false) Unsqueeze the source tensor's shape without "
"memory copy. When Attr(inplace) is set true, the output "
"tensor shares memory with Input(X), otherwise, a new output "
"tensor is created, and its data are copied from Input(x).")
.SetDefault(false);
AddComment(R"DOC(
Unsqueeze Operator.
Insert single-dimensional entries to the shape of a tensor.
Takes one required argument axes, a list of dimensions that will be inserted.
Dimension indices in axes are as seen in the output tensor.
For example:
Given a tensor such that tensor with shape [3, 4, 5],
then Unsqueeze(tensor, axes=[0, 4]) has shape [1, 3, 4, 5, 1]
)DOC");
}
};
class UnsqueezeGradInferShape : public framework::InferShapeBase {
public:
void operator()(framework::InferShapeContext *ctx) const override {
ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X"));
ctx->ShareLoD("X", framework::GradVarName("X"));
}
};
class UnsqueezeGradOp : public framework::OperatorBase {
public:
using OperatorBase::OperatorBase;
private:
void RunImpl(const framework::Scope &scope,
const platform::Place &place) const override {
auto dx_name = Output(framework::GradVarName("X"));
auto dout_name = Input(framework::GradVarName("Out"));
auto x_dims = scope.FindVar(Input("X"))->Get<framework::LoDTensor>().dims();
framework::AttributeMap attrs;
attrs["shape"] = framework::vectorize2int(x_dims);
attrs["inplace"] = Attr<bool>("inplace");
auto reshape_op = framework::OpRegistry::CreateOp(
"reshape", {{"X", {dout_name}}, {"Shape", {}}}, {{"Out", {dx_name}}},
attrs);
reshape_op->Run(scope, place);
}
};
} // namespace operators
} // namespace paddle
// Tell linker to use reshape op.
USE_OP(reshape);
namespace ops = paddle::operators;
REGISTER_OPERATOR(unsqueeze, ops::UnsqueezeOp, ops::UnsqueezeOpMaker,
ops::UnsqueezeOpInferShape,
paddle::framework::DefaultGradOpDescMaker<true>);
REGISTER_OPERATOR(unsqueeze_grad, ops::UnsqueezeGradOp,
ops::UnsqueezeGradInferShape);
...@@ -222,15 +222,16 @@ class MKLDNNHandler { ...@@ -222,15 +222,16 @@ class MKLDNNHandler {
static std::string GetHash(mkldnn::memory::dims& operand_dims, // NOLINT static std::string GetHash(mkldnn::memory::dims& operand_dims, // NOLINT
const std::string& suffix) { const std::string& suffix) {
auto dims2str = [](const mkldnn::memory::dims& operand_dims) { return dims2str(operand_dims) + suffix;
};
protected:
static std::string dims2str(const mkldnn::memory::dims& operand_dims) {
std::string dstr = ""; std::string dstr = "";
for (size_t i = 0; i < operand_dims.size(); ++i) { for (size_t i = 0; i < operand_dims.size(); ++i) {
dstr += std::to_string(operand_dims[i]) + "-"; dstr += std::to_string(operand_dims[i]) + "-";
} }
return dstr; return dstr;
};
return dims2str(operand_dims) + suffix;
} }
protected: protected:
......
...@@ -66,6 +66,14 @@ bool IsCompiledWithCUDA() { ...@@ -66,6 +66,14 @@ bool IsCompiledWithCUDA() {
#endif #endif
} }
bool IsCompiledWithDIST() {
#ifdef PADDLE_WITH_DIST
return true;
#else
return false;
#endif
}
PYBIND11_PLUGIN(core) { PYBIND11_PLUGIN(core) {
py::module m("core", "C++ core of PaddlePaddle"); py::module m("core", "C++ core of PaddlePaddle");
...@@ -508,6 +516,7 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -508,6 +516,7 @@ All parameter, weight, gradient are variables in Paddle.
[](bool init_p2p) { framework::InitDevices(init_p2p); }); [](bool init_p2p) { framework::InitDevices(init_p2p); });
m.def("is_compiled_with_cuda", IsCompiledWithCUDA); m.def("is_compiled_with_cuda", IsCompiledWithCUDA);
m.def("is_compiled_with_dist", IsCompiledWithDIST);
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
m.def("is_float16_supported", [](const platform::CUDAPlace &place) -> bool { m.def("is_float16_supported", [](const platform::CUDAPlace &place) -> bool {
// Only GPUs with Compute Capability >= 53 support float16 // Only GPUs with Compute Capability >= 53 support float16
......
...@@ -518,11 +518,23 @@ function gen_fluid_inference_lib() { ...@@ -518,11 +518,23 @@ function gen_fluid_inference_lib() {
EOF EOF
make -j `nproc` inference_lib_dist make -j `nproc` inference_lib_dist
cd ${PADDLE_ROOT}/build cd ${PADDLE_ROOT}/build
mv fluid_install_dir fluid cp -r fluid_install_dir fluid
tar -cf fluid.tgz fluid tar -cf fluid.tgz fluid
fi fi
} }
function test_fluid_inference_lib() {
if [ ${WITH_C_API:-OFF} == "OFF" ] ; then
cat <<EOF
========================================
Testing fluid inference library ...
========================================
EOF
cd ${PADDLE_ROOT}/paddle/contrib/inference/demo_ci
sh run.sh ${PADDLE_ROOT} ${WITH_MKL:-ON} ${WITH_GPU:-OFF}
fi
}
function main() { function main() {
set -e set -e
local CMD=$1 local CMD=$1
...@@ -576,6 +588,7 @@ function main() { ...@@ -576,6 +588,7 @@ function main() {
run_test run_test
gen_capi_package gen_capi_package
gen_fluid_inference_lib gen_fluid_inference_lib
test_fluid_inference_lib
;; ;;
*) *)
print_usage print_usage
......
...@@ -92,8 +92,15 @@ install(DIRECTORY ${PADDLE_PYTHON_PACKAGE_DIR} ...@@ -92,8 +92,15 @@ install(DIRECTORY ${PADDLE_PYTHON_PACKAGE_DIR}
DESTINATION opt/paddle/share/wheels DESTINATION opt/paddle/share/wheels
) )
find_program(PATCHELF_EXECUTABLE patchelf) if(APPLE)
if(NOT PATCHELF_EXECUTABLE) find_program(INSTALL_NAME_TOOL_EXECUTABLE install_name_tool)
if(NOT INSTALL_NAME_TOOL_EXECUTABLE)
message(FATAL_ERROR "install_name_tool not found, please check.\n")
endif()
else(APPLE)
find_program(PATCHELF_EXECUTABLE patchelf)
if(NOT PATCHELF_EXECUTABLE)
message(FATAL_ERROR "patchelf not found, please install it.\n" message(FATAL_ERROR "patchelf not found, please install it.\n"
"For Ubuntu, the command is: apt-get install -y patchelf.") "For Ubuntu, the command is: apt-get install -y patchelf.")
endif() endif()
endif(APPLE)
...@@ -121,6 +121,9 @@ def __bootstrap__(): ...@@ -121,6 +121,9 @@ def __bootstrap__():
'eager_delete_scope', 'use_mkldnn', 'initial_cpu_memory_in_mb', 'eager_delete_scope', 'use_mkldnn', 'initial_cpu_memory_in_mb',
'init_allocated_mem' 'init_allocated_mem'
] ]
if core.is_compiled_with_dist():
read_env_flags.append('rpc_deadline')
if core.is_compiled_with_cuda(): if core.is_compiled_with_cuda():
read_env_flags += [ read_env_flags += [
'fraction_of_gpu_memory_to_use', 'cudnn_deterministic' 'fraction_of_gpu_memory_to_use', 'cudnn_deterministic'
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import functools
import sys
__all__ = ['deprecated']
def deprecated(since, instead, extra_message=""):
def decorator(func):
err_msg = "API {0} is deprecated since {1}. Please use {2} instead.".format(
func.__name__, since, instead)
if len(extra_message) != 0:
err_msg += "\n"
err_msg += extra_message
@functools.wraps(func)
def wrapper(*args, **kwargs):
print >> sys.stderr, err_msg
return func(*args, **kwargs)
wrapper.__doc__ += "\n "
wrapper.__doc__ += err_msg
return wrapper
return decorator
...@@ -18,10 +18,7 @@ import collections ...@@ -18,10 +18,7 @@ import collections
import copy import copy
import unique_name import unique_name
__all__ = [ __all__ = ['append_backward']
'append_backward',
'calc_gradient',
]
def _rename_arg_(op_descs, old_name, new_name, begin_idx=None, end_idx=None): def _rename_arg_(op_descs, old_name, new_name, begin_idx=None, end_idx=None):
...@@ -123,7 +120,8 @@ def _append_grad_suffix_(name): ...@@ -123,7 +120,8 @@ def _append_grad_suffix_(name):
def _addup_repetitive_outputs_(op_descs): def _addup_repetitive_outputs_(op_descs):
""" """
In backward part, an variable may be the output of more than one ops. In backward part, an variable may be the output of more than one ops.
In this case, the variable should be the accumulation of all the outputs. And one op may yield its multiple outputs to the same variable.
In these cases, the variable should be the accumulation of all the outputs.
`sum_op`s are added to implement the accumulate. `sum_op`s are added to implement the accumulate.
""" """
pending_sum_ops = [] pending_sum_ops = []
...@@ -136,7 +134,9 @@ def _addup_repetitive_outputs_(op_descs): ...@@ -136,7 +134,9 @@ def _addup_repetitive_outputs_(op_descs):
"sum", {"X": renamed_vars[var_name]}, {"Out": [var_name]}, "sum", {"X": renamed_vars[var_name]}, {"Out": [var_name]},
{"use_mkldnn": False}), idx)) {"use_mkldnn": False}), idx))
renamed_vars[var_name] = [var_name] renamed_vars[var_name] = [var_name]
for var_name in op_desc.output_arg_names(): for param_idx, param_name in enumerate(op_desc.output_names()):
arg_names = op_desc.output(param_name)
for arg_idx, var_name in enumerate(arg_names):
if var_name == core.empty_var_name( if var_name == core.empty_var_name(
) or var_name in op_desc.input_arg_names(): ) or var_name in op_desc.input_arg_names():
# empty variable or inplace op # empty variable or inplace op
...@@ -154,11 +154,26 @@ def _addup_repetitive_outputs_(op_descs): ...@@ -154,11 +154,26 @@ def _addup_repetitive_outputs_(op_descs):
_rename_arg_(op_descs, var_name, new_name, 0, idx) _rename_arg_(op_descs, var_name, new_name, 0, idx)
_rename_arg_(pending_sum_ops, var_name, new_name) _rename_arg_(pending_sum_ops, var_name, new_name)
for p in op_desc.output_names()[:param_idx]:
p_arg_names = op_desc.output(p)
if var_name in p_arg_names:
op_desc.set_output(p, [
new_name if x == var_name else x
for x in p_arg_names
])
arg_names = [
new_name if x == var_name else x
for x in arg_names[:arg_idx]
] + arg_names[arg_idx:]
new_name = var_name + "@RENAME@" + \ new_name = var_name + "@RENAME@" + \
str(var_rename_count[var_name]) str(var_rename_count[var_name])
var_rename_count[var_name] += 1 var_rename_count[var_name] += 1
op_desc.rename_output(var_name, new_name) arg_names[arg_idx] = new_name
op_desc.set_output(param_name, arg_names)
renamed_vars[var_name].append(new_name) renamed_vars[var_name].append(new_name)
for var_name, inputs in renamed_vars.iteritems(): for var_name, inputs in renamed_vars.iteritems():
if len(inputs) > 1: if len(inputs) > 1:
pending_sum_ops.append( pending_sum_ops.append(
......
...@@ -18,10 +18,12 @@ All util layers. ...@@ -18,10 +18,12 @@ All util layers.
from layer_function_generator import autodoc from layer_function_generator import autodoc
from ..framework import unique_name from ..framework import unique_name
from ..layer_helper import LayerHelper from ..layer_helper import LayerHelper
from ..annotations import deprecated
__all__ = ['get_places'] __all__ = []
@deprecated(since='0.15.0', instead="ParallelExecutor")
@autodoc() @autodoc()
def get_places(device_count=None, device_type=None): def get_places(device_count=None, device_type=None):
helper = LayerHelper('get_places', **locals()) helper = LayerHelper('get_places', **locals())
......
...@@ -11,6 +11,20 @@ ...@@ -11,6 +11,20 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# Copyright (c ) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" """
All layers just related to the neural network. All layers just related to the neural network.
""" """
...@@ -3900,7 +3914,13 @@ def transpose(x, perm, name=None): ...@@ -3900,7 +3914,13 @@ def transpose(x, perm, name=None):
return out return out
def im2sequence(input, filter_size=1, stride=1, padding=0, name=None): def im2sequence(input,
filter_size=1,
stride=1,
padding=0,
input_image_size=None,
out_stride=1,
name=None):
""" """
Extracts image patches from the input tensor to form a tensor of shape Extracts image patches from the input tensor to form a tensor of shape
{input.batch_size * output_height * output_width, filter_size_H * {input.batch_size * output_height * output_width, filter_size_H *
...@@ -3937,6 +3957,15 @@ def im2sequence(input, filter_size=1, stride=1, padding=0, name=None): ...@@ -3937,6 +3957,15 @@ def im2sequence(input, filter_size=1, stride=1, padding=0, name=None):
padding_up = padding_down = padding_left = padding_right = padding padding_up = padding_down = padding_left = padding_right = padding
Default: padding = 0. Default: padding = 0.
input_image_size(Variable): the input contains image real size.It's dim
is [batchsize, 2]. It is dispensable.It is just for batch inference.
out_stride(int|tuple): The scaling of image through CNN. It is
dispensable. It is valid only when input_image_size is not null.
If out_stride is tuple, it must contain two intergers,
(out_stride_H, out_stride_W). Otherwise,
the out_stride_H = out_stride_W = out_stride.
name (int): The name of this layer. It is optional. name (int): The name of this layer. It is optional.
Returns: Returns:
...@@ -3987,7 +4016,7 @@ def im2sequence(input, filter_size=1, stride=1, padding=0, name=None): ...@@ -3987,7 +4016,7 @@ def im2sequence(input, filter_size=1, stride=1, padding=0, name=None):
[ 5. 7. 2. 4. 1. 3. 9. 0.] [ 5. 7. 2. 4. 1. 3. 9. 0.]
[ 7. 9. 4. 8. 3. 5. 0. 8.]] [ 7. 9. 4. 8. 3. 5. 0. 8.]]
output.dims = {8, 9} output.dims = {8, 8}
output.lod = [[4, 4]] output.lod = [[4, 4]]
...@@ -4009,18 +4038,17 @@ def im2sequence(input, filter_size=1, stride=1, padding=0, name=None): ...@@ -4009,18 +4038,17 @@ def im2sequence(input, filter_size=1, stride=1, padding=0, name=None):
if len(padding) == 2: if len(padding) == 2:
padding.append(padding[0]) padding.append(padding[0])
padding.append(padding[1]) padding.append(padding[1])
inputs = {"X": input}
attrs = {"kernels": filter_size, "strides": stride, "padding": padding}
if input_image_size:
if isinstance(out_stride, int):
out_stride = [out_stride, out_stride]
inputs["Y"] = input_image_size
attrs["out_stride"] = out_stride
helper = LayerHelper('im2sequence', **locals()) helper = LayerHelper('im2sequence', **locals())
out = helper.create_tmp_variable(dtype=helper.input_dtype()) out = helper.create_tmp_variable(dtype=helper.input_dtype())
helper.append_op( helper.append_op(
type='im2sequence', type='im2sequence', inputs=inputs, outputs={'Out': out}, attrs=attrs)
inputs={'X': input},
outputs={'Out': out},
attrs={
'kernels': filter_size,
'strides': stride,
'paddings': padding,
})
return out return out
......
...@@ -29,7 +29,7 @@ __all__ = [ ...@@ -29,7 +29,7 @@ __all__ = [
'SGD', 'Momentum', 'Adagrad', 'Adam', 'Adamax', 'DecayedAdagrad', 'Ftrl', 'SGD', 'Momentum', 'Adagrad', 'Adam', 'Adamax', 'DecayedAdagrad', 'Ftrl',
'SGDOptimizer', 'MomentumOptimizer', 'AdagradOptimizer', 'AdamOptimizer', 'SGDOptimizer', 'MomentumOptimizer', 'AdagradOptimizer', 'AdamOptimizer',
'AdamaxOptimizer', 'DecayedAdagradOptimizer', 'RMSPropOptimizer', 'AdamaxOptimizer', 'DecayedAdagradOptimizer', 'RMSPropOptimizer',
'FtrlOptimizer', 'Adadelta', 'ModelAverage', 'Optimizer', 'RMSPropOptimizer' 'FtrlOptimizer', 'Adadelta', 'ModelAverage', 'RMSPropOptimizer'
] ]
...@@ -67,7 +67,7 @@ class Optimizer(object): ...@@ -67,7 +67,7 @@ class Optimizer(object):
self._LARS_weight_decay = LARS_weight_decay self._LARS_weight_decay = LARS_weight_decay
def _create_global_learning_rate(self): def _create_global_learning_rate(self):
lr = self.global_learning_rate() lr = self._global_learning_rate()
if isinstance(lr, framework.Variable): if isinstance(lr, framework.Variable):
return return
...@@ -86,7 +86,7 @@ class Optimizer(object): ...@@ -86,7 +86,7 @@ class Optimizer(object):
dtype='float32' if self._dtype == None else self._dtype, dtype='float32' if self._dtype == None else self._dtype,
persistable=True) persistable=True)
def global_learning_rate(self, program=None): def _global_learning_rate(self, program=None):
""" """
get global decayed learning rate get global decayed learning rate
:return: :return:
...@@ -110,9 +110,9 @@ class Optimizer(object): ...@@ -110,9 +110,9 @@ class Optimizer(object):
return param_lr return param_lr
else: else:
if param_lr == 1.0: if param_lr == 1.0:
return self.global_learning_rate() return self._global_learning_rate()
else: else:
return self.global_learning_rate() * param_lr return self._global_learning_rate() * param_lr
def _create_accumulators(self, block, parameters): def _create_accumulators(self, block, parameters):
"""Create all accumulators needed by the parameters """Create all accumulators needed by the parameters
...@@ -185,7 +185,7 @@ class Optimizer(object): ...@@ -185,7 +185,7 @@ class Optimizer(object):
format(name, param.name)) format(name, param.name))
return self._accumulators[name][param.name] return self._accumulators[name][param.name]
def create_optimization_pass(self, def _create_optimization_pass(self,
parameters_and_grads, parameters_and_grads,
loss, loss,
startup_program=None): startup_program=None):
...@@ -221,7 +221,7 @@ class Optimizer(object): ...@@ -221,7 +221,7 @@ class Optimizer(object):
self._create_global_learning_rate() self._create_global_learning_rate()
if self._LARS_weight_decay > 0.0: if self._LARS_weight_decay > 0.0:
layers.append_LARS(parameters_and_grads, layers.append_LARS(parameters_and_grads,
self.global_learning_rate(), self._global_learning_rate(),
self._LARS_weight_decay) self._LARS_weight_decay)
optimize_ops = [] optimize_ops = []
...@@ -262,7 +262,7 @@ class Optimizer(object): ...@@ -262,7 +262,7 @@ class Optimizer(object):
params_grads = append_regularization_ops(params_grads, params_grads = append_regularization_ops(params_grads,
self.regularization) self.regularization)
optimize_ops = self.create_optimization_pass(params_grads, loss, optimize_ops = self._create_optimization_pass(params_grads, loss,
startup_program) startup_program)
return optimize_ops, params_grads return optimize_ops, params_grads
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function from __future__ import print_function
from paddle.fluid.layers.device import get_places
import unittest import unittest
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle import paddle
...@@ -144,7 +144,7 @@ def train(word_dict, ...@@ -144,7 +144,7 @@ def train(word_dict,
cost, acc_out, prediction = net_method( cost, acc_out, prediction = net_method(
data, label, input_dim=dict_dim, class_dim=class_dim) data, label, input_dim=dict_dim, class_dim=class_dim)
else: else:
places = fluid.layers.get_places() places = get_places()
pd = fluid.layers.ParallelDo(places) pd = fluid.layers.ParallelDo(places)
with pd.do(): with pd.do():
cost, acc, _ = net_method( cost, acc, _ = net_method(
......
...@@ -12,15 +12,17 @@ ...@@ -12,15 +12,17 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function from __future__ import print_function
import argparse
import paddle.fluid as fluid
import paddle
import sys
import numpy
import unittest
import math import math
import sys
import os import os
import sys
import unittest
import numpy
import paddle
import paddle.fluid as fluid
from paddle.fluid.layers.device import get_places
BATCH_SIZE = 64 BATCH_SIZE = 64
...@@ -76,7 +78,7 @@ def train(nn_type, ...@@ -76,7 +78,7 @@ def train(nn_type,
net_conf = conv_net net_conf = conv_net
if parallel: if parallel:
places = fluid.layers.get_places() places = get_places()
pd = fluid.layers.ParallelDo(places) pd = fluid.layers.ParallelDo(places)
with pd.do(): with pd.do():
img_ = pd.read_input(img) img_ = pd.read_input(img)
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.layers.device import get_places
import unittest import unittest
import os import os
import numpy as np import numpy as np
...@@ -80,7 +81,7 @@ def train(use_cuda, is_sparse, is_parallel, save_dirname, is_local=True): ...@@ -80,7 +81,7 @@ def train(use_cuda, is_sparse, is_parallel, save_dirname, is_local=True):
avg_cost, predict_word = __network__( avg_cost, predict_word = __network__(
[first_word, second_word, third_word, forth_word, next_word]) [first_word, second_word, third_word, forth_word, next_word])
else: else:
places = fluid.layers.get_places() places = get_places()
pd = fluid.layers.ParallelDo(places) pd = fluid.layers.ParallelDo(places)
with pd.do(): with pd.do():
avg_cost, predict_word = __network__( avg_cost, predict_word = __network__(
......
...@@ -12,12 +12,13 @@ ...@@ -12,12 +12,13 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import numpy as np
import paddle
import paddle.fluid as fluid
import math import math
import sys import sys
import paddle
import paddle.fluid as fluid
from paddle.fluid.layers.device import get_places
# need to fix random seed and training data to compare the loss # need to fix random seed and training data to compare the loss
# value accurately calculated by the default and the memory optimization # value accurately calculated by the default and the memory optimization
# version. # version.
...@@ -34,7 +35,7 @@ if fluid.core.is_compiled_with_cuda(): ...@@ -34,7 +35,7 @@ if fluid.core.is_compiled_with_cuda():
use_nccl = False use_nccl = False
place = fluid.CUDAPlace(0) place = fluid.CUDAPlace(0)
places = fluid.layers.get_places(device_count=0, device_type=device_type) places = get_places(device_count=0, device_type=device_type)
pd = fluid.layers.ParallelDo(places, use_nccl=use_nccl) pd = fluid.layers.ParallelDo(places, use_nccl=use_nccl)
with pd.do(): with pd.do():
x_ = pd.read_input(x) x_ = pd.read_input(x)
......
...@@ -16,8 +16,6 @@ import unittest ...@@ -16,8 +16,6 @@ import unittest
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.layers as layers import paddle.fluid.layers as layers
import paddle.fluid.framework as framework
import paddle.fluid.optimizer as optimizer
from paddle.fluid.backward import calc_gradient from paddle.fluid.backward import calc_gradient
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
# limitations under the License. # limitations under the License.
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.layers.device import get_places
import decorators import decorators
import unittest import unittest
...@@ -20,7 +21,7 @@ import unittest ...@@ -20,7 +21,7 @@ import unittest
class TestGetPlaces(unittest.TestCase): class TestGetPlaces(unittest.TestCase):
@decorators.prog_scope() @decorators.prog_scope()
def test_get_places(self): def test_get_places(self):
places = fluid.layers.get_places() places = get_places()
cpu = fluid.CPUPlace() cpu = fluid.CPUPlace()
exe = fluid.Executor(cpu) exe = fluid.Executor(cpu)
exe.run(fluid.default_main_program()) exe.run(fluid.default_main_program())
......
...@@ -16,20 +16,45 @@ import numpy as np ...@@ -16,20 +16,45 @@ import numpy as np
from op_test import OpTest from op_test import OpTest
def get_output_shape(attrs, in_shape): def get_output_shape(attrs, in_shape, img_real_size):
batchsize = in_shape[0]
img_height = in_shape[2] img_height = in_shape[2]
img_width = in_shape[3] img_width = in_shape[3]
paddings = np.array(attrs['paddings']).astype("int32")
kernels = np.array(attrs['kernels']).astype("int32")
strides = np.array(attrs['strides']).astype("int32")
output_height = np.zeros((1, batchsize)).astype("int32")
output_width = np.zeros((1, batchsize)).astype("int32")
if len(img_real_size):
out_stride = np.array(attrs['out_stride']).astype("int32")
imgreal_h = 0
imgreal_w = 0
for index in range(batchsize):
if img_real_size[index, 0] % out_stride[0] == 0:
imgreal_h = img_real_size[index, 0] / out_stride[0]
else:
imgreal_h = img_real_size[index, 0] / out_stride[0] + 1
if img_real_size[index, 0] % out_stride[1] == 0:
imgreal_w = img_real_size[index, 1] / out_stride[1]
else:
imgreal_w = img_real_size[index, 0] / out_stride[1] + 1
output_height[0,index] = \
1 + \
(imgreal_h + paddings[0] + paddings[2] - kernels[0] + strides[0] - 1) / \
strides[0]
paddings = attrs['paddings'] output_width[0,index] = \
kernels = attrs['kernels'] 1 + \
strides = attrs['strides'] (imgreal_w + paddings[1] + paddings[3] - kernels[1] + strides[1] - 1) / \
strides[1]
output_height = \ else:
for index in range(batchsize):
output_height[0,index] = \
1 + \ 1 + \
(img_height + paddings[0] + paddings[2] - kernels[0] + strides[0] - 1) / \ (img_height + paddings[0] + paddings[2] - kernels[0] + strides[0] - 1) / \
strides[0] strides[0]
output_width = \ output_width[0,index] = \
1 + \ 1 + \
(img_width + paddings[1] + paddings[3] - kernels[1] + strides[1] - 1) / \ (img_width + paddings[1] + paddings[3] - kernels[1] + strides[1] - 1) / \
strides[1] strides[1]
...@@ -75,22 +100,25 @@ def im2col(attrs, im, col): ...@@ -75,22 +100,25 @@ def im2col(attrs, im, col):
im_row_offset][im_col_offset] im_row_offset][im_col_offset]
def Im2Sequence(inputs, attrs): def Im2Sequence(inputs, img_real_size, attrs):
output_height, output_width = get_output_shape(attrs, inputs.shape) output_height, output_width = get_output_shape(attrs, inputs.shape,
img_real_size)
img_channels = inputs.shape[1] img_channels = inputs.shape[1]
batch_size = inputs.shape[0] batch_size = inputs.shape[0]
out = np.zeros([ out = []
batch_size, output_height, output_width, img_channels, for index in range(batch_size):
tmp = np.zeros([
output_height[0, index], output_width[0, index], img_channels,
attrs['kernels'][0], attrs['kernels'][1] attrs['kernels'][0], attrs['kernels'][1]
]).astype("float32") ]).astype("float32")
out.append(tmp)
for i in range(len(inputs)): for index in range(len(inputs)):
im2col(attrs, inputs[i], out[i]) im2col(attrs, inputs[index], out[index])
out[index] = out[index].reshape([
out = out.reshape([ output_height[0, index] * output_width[0, index],
batch_size * output_height * output_width,
img_channels * attrs['kernels'][0] * attrs['kernels'][1] img_channels * attrs['kernels'][0] * attrs['kernels'][1]
]) ])
out = np.concatenate(out, axis=0)
return out return out
...@@ -103,7 +131,7 @@ class TestBlockExpandOp(OpTest): ...@@ -103,7 +131,7 @@ class TestBlockExpandOp(OpTest):
self.attrs = { self.attrs = {
'kernels': [2, 2], 'kernels': [2, 2],
'strides': [1, 1], 'strides': [1, 1],
'paddings': [1, 1, 1, 1] 'paddings': [1, 1, 1, 1],
} }
def setUp(self): def setUp(self):
...@@ -113,7 +141,8 @@ class TestBlockExpandOp(OpTest): ...@@ -113,7 +141,8 @@ class TestBlockExpandOp(OpTest):
self.batch_size, self.img_channels, self.img_height, self.img_width self.batch_size, self.img_channels, self.img_height, self.img_width
]).astype("float32") ]).astype("float32")
out = Im2Sequence(x, self.attrs) real_size = np.array([]).astype("float32")
out = Im2Sequence(x, real_size, self.attrs)
self.inputs = {'X': x} self.inputs = {'X': x}
self.outputs = {'Out': out} self.outputs = {'Out': out}
...@@ -133,20 +162,20 @@ class TestBlockExpandOpCase2(TestBlockExpandOp): ...@@ -133,20 +162,20 @@ class TestBlockExpandOpCase2(TestBlockExpandOp):
self.attrs = { self.attrs = {
'kernels': [2, 1], 'kernels': [2, 1],
'strides': [2, 1], 'strides': [2, 1],
'paddings': [2, 1, 2, 1] 'paddings': [2, 1, 2, 1],
} }
class TestBlockExpandOpCase3(TestBlockExpandOp): class TestBlockExpandOpCase3(TestBlockExpandOp):
def config(self): def config(self):
self.batch_size = 3 self.batch_size = 2
self.img_channels = 1 self.img_channels = 1
self.img_height = 4 self.img_height = 4
self.img_width = 5 self.img_width = 5
self.attrs = { self.attrs = {
'kernels': [2, 1], 'kernels': [2, 1],
'strides': [2, 1], 'strides': [2, 1],
'paddings': [2, 0, 2, 0] 'paddings': [2, 0, 2, 0],
} }
...@@ -159,9 +188,94 @@ class TestBlockExpandOpCase4(TestBlockExpandOp): ...@@ -159,9 +188,94 @@ class TestBlockExpandOpCase4(TestBlockExpandOp):
self.attrs = { self.attrs = {
'kernels': [2, 2], 'kernels': [2, 2],
'strides': [1, 1], 'strides': [1, 1],
'paddings': [0, 0, 0, 0] 'paddings': [0, 0, 0, 0],
}
class TestBlockExpandOpCase5(OpTest):
def config(self):
self.batch_size = 1
self.img_channels = 3
self.img_height = 4
self.img_width = 5
self.attrs = {
'kernels': [2, 1],
'strides': [2, 1],
'paddings': [2, 1, 2, 1],
'out_stride': [2, 2],
}
def setUp(self):
self.config()
self.op_type = "im2sequence"
x = np.random.uniform(0.1, 1, [
self.batch_size, self.img_channels, self.img_height, self.img_width
]).astype("float32")
real_size = np.array([[8, 10], [5, 8]]).astype("float32")
out = np.array(Im2Sequence(x, real_size, self.attrs))
self.inputs = {'X': x, 'Y': real_size} #l ??
self.outputs = {'Out': out}
def test_check_output(self):
self.check_output()
class TestBlockExpandOpCase6(OpTest):
def config(self):
self.batch_size = 3
self.img_channels = 1
self.img_height = 4
self.img_width = 5
self.attrs = {
'kernels': [2, 1],
'strides': [1, 1],
'paddings': [0, 0, 0, 0],
'out_stride': [1, 1],
}
def setUp(self):
self.config()
self.op_type = "im2sequence"
x = np.random.uniform(0.1, 1, [
self.batch_size, self.img_channels, self.img_height, self.img_width
]).astype("float32")
real_size = np.array([[8, 10], [5, 8], [5, 8]]).astype("float32")
out = np.array(Im2Sequence(x, real_size, self.attrs))
self.inputs = {'X': x, 'Y': real_size} #l ??
self.outputs = {'Out': out}
def test_check_output(self):
self.check_output()
class TestBlockExpandOpCase7(OpTest):
def config(self):
self.batch_size = 2
self.img_channels = 2
self.img_height = 3
self.img_width = 3
self.attrs = {
'kernels': [2, 2],
'strides': [1, 1],
'paddings': [1, 0, 1, 0],
'out_stride': [2, 2],
} }
def setUp(self):
self.config()
self.op_type = "im2sequence"
x = np.random.uniform(0.1, 1, [
self.batch_size, self.img_channels, self.img_height, self.img_width
]).astype("float32")
real_size = np.array([[6, 6], [4, 4]]).astype("float32")
out = np.array(Im2Sequence(x, real_size, self.attrs))
self.inputs = {'X': x, 'Y': real_size}
self.outputs = {'Out': out}
def test_check_output(self):
self.check_output()
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
#set shiftwidth=4 set expandtab set tabstop=4
...@@ -16,6 +16,7 @@ from __future__ import print_function ...@@ -16,6 +16,7 @@ from __future__ import print_function
import unittest import unittest
import paddle.fluid.layers as layers import paddle.fluid.layers as layers
from paddle.fluid.layers.device import get_places
import paddle.fluid.nets as nets import paddle.fluid.nets as nets
from paddle.fluid.framework import Program, program_guard, default_main_program from paddle.fluid.framework import Program, program_guard, default_main_program
from paddle.fluid.param_attr import ParamAttr from paddle.fluid.param_attr import ParamAttr
...@@ -238,7 +239,7 @@ class TestBook(unittest.TestCase): ...@@ -238,7 +239,7 @@ class TestBook(unittest.TestCase):
def test_get_places(self): def test_get_places(self):
program = Program() program = Program()
with program_guard(program): with program_guard(program):
x = layers.get_places(device_count=4) x = get_places(device_count=4)
self.assertIsNotNone(x) self.assertIsNotNone(x)
print(str(program)) print(str(program))
...@@ -251,12 +252,16 @@ class TestBook(unittest.TestCase): ...@@ -251,12 +252,16 @@ class TestBook(unittest.TestCase):
print(str(program)) print(str(program))
def test_im2sequence(self): def test_im2sequence(self):
print("test_im2sequence")
program = Program() program = Program()
with program_guard(program): with program_guard(program):
x = layers.data(name='x', shape=[3, 128, 128], dtype='float32') x = layers.data(name='x', shape=[3, 128, 128], dtype='float32')
y = layers.data(name='y', shape=[], dtype='float32')
output = layers.im2sequence( output = layers.im2sequence(
input=x, stride=[1, 1], filter_size=[2, 2]) input=x,
input_image_size=y,
stride=[1, 1],
filter_size=[2, 2],
out_stride=[1, 1])
self.assertIsNotNone(output) self.assertIsNotNone(output)
print(str(program)) print(str(program))
......
...@@ -97,7 +97,7 @@ class TestMomentumOptimizer(unittest.TestCase): ...@@ -97,7 +97,7 @@ class TestMomentumOptimizer(unittest.TestCase):
params_grads = append_backward(mean_out) params_grads = append_backward(mean_out)
self.assertEqual(len(params_grads), 1) self.assertEqual(len(params_grads), 1)
self.assertEqual(len(momentum_optimizer.get_accumulators()), 0) self.assertEqual(len(momentum_optimizer.get_accumulators()), 0)
opts = momentum_optimizer.create_optimization_pass( opts = momentum_optimizer._create_optimization_pass(
params_grads, mul_out, init_program) params_grads, mul_out, init_program)
self.assertEqual(len(opts), 3) self.assertEqual(len(opts), 3)
sgd_op = opts[-1] sgd_op = opts[-1]
...@@ -151,7 +151,7 @@ class TestMomentumOptimizer(unittest.TestCase): ...@@ -151,7 +151,7 @@ class TestMomentumOptimizer(unittest.TestCase):
params_grads = append_backward(mean_out) params_grads = append_backward(mean_out)
self.assertEqual(len(params_grads), 1) self.assertEqual(len(params_grads), 1)
self.assertEqual(len(momentum_optimizer.get_accumulators()), 0) self.assertEqual(len(momentum_optimizer.get_accumulators()), 0)
opts = momentum_optimizer.create_optimization_pass( opts = momentum_optimizer._create_optimization_pass(
params_grads, mul_out, init_program) params_grads, mul_out, init_program)
self.assertEqual(len(opts), 3) self.assertEqual(len(opts), 3)
sgd_op = opts[-1] sgd_op = opts[-1]
...@@ -214,8 +214,8 @@ class TestAdagradOptimizer(unittest.TestCase): ...@@ -214,8 +214,8 @@ class TestAdagradOptimizer(unittest.TestCase):
params_grads = append_backward(mean_out) params_grads = append_backward(mean_out)
self.assertEqual(len(params_grads), 1) self.assertEqual(len(params_grads), 1)
self.assertEqual(len(adagrad_optimizer.get_accumulators()), 0) self.assertEqual(len(adagrad_optimizer.get_accumulators()), 0)
opts = adagrad_optimizer.create_optimization_pass(params_grads, mul_out, opts = adagrad_optimizer._create_optimization_pass(
init_program) params_grads, mul_out, init_program)
self.assertEqual(len(opts), 3) self.assertEqual(len(opts), 3)
self.assertEqual([op.type for op in opts], self.assertEqual([op.type for op in opts],
["fill_constant", "elementwise_mul", "adagrad"]) ["fill_constant", "elementwise_mul", "adagrad"])
...@@ -278,7 +278,7 @@ class TestAdamOptimizer(unittest.TestCase): ...@@ -278,7 +278,7 @@ class TestAdamOptimizer(unittest.TestCase):
params_grads = append_backward(mean_out) params_grads = append_backward(mean_out)
self.assertEqual(len(params_grads), 1) self.assertEqual(len(params_grads), 1)
self.assertEqual(len(adam_optimizer.get_accumulators()), 0) self.assertEqual(len(adam_optimizer.get_accumulators()), 0)
opts = adam_optimizer.create_optimization_pass(params_grads, mul_out, opts = adam_optimizer._create_optimization_pass(params_grads, mul_out,
init_program) init_program)
self.assertEqual(len(opts), 5) self.assertEqual(len(opts), 5)
self.assertEqual( self.assertEqual(
...@@ -345,7 +345,7 @@ class TestAdamaxOptimizer(unittest.TestCase): ...@@ -345,7 +345,7 @@ class TestAdamaxOptimizer(unittest.TestCase):
params_grads = append_backward(mean_out) params_grads = append_backward(mean_out)
self.assertEqual(len(params_grads), 1) self.assertEqual(len(params_grads), 1)
self.assertEqual(len(adamax_optimizer.get_accumulators()), 0) self.assertEqual(len(adamax_optimizer.get_accumulators()), 0)
opts = adamax_optimizer.create_optimization_pass(params_grads, mul_out, opts = adamax_optimizer._create_optimization_pass(params_grads, mul_out,
init_program) init_program)
self.assertEqual(len(opts), 4) self.assertEqual(len(opts), 4)
self.assertEqual( self.assertEqual(
...@@ -409,7 +409,7 @@ class TestDecayedAdagradOptimizer(unittest.TestCase): ...@@ -409,7 +409,7 @@ class TestDecayedAdagradOptimizer(unittest.TestCase):
params_grads = append_backward(mean_out) params_grads = append_backward(mean_out)
self.assertEqual(len(params_grads), 1) self.assertEqual(len(params_grads), 1)
self.assertEqual(len(decayed_adagrad_optimizer.get_accumulators()), 0) self.assertEqual(len(decayed_adagrad_optimizer.get_accumulators()), 0)
opts = decayed_adagrad_optimizer.create_optimization_pass( opts = decayed_adagrad_optimizer._create_optimization_pass(
params_grads, mul_out, init_program) params_grads, mul_out, init_program)
self.assertEqual(len(opts), 3) self.assertEqual(len(opts), 3)
self.assertEqual( self.assertEqual(
...@@ -475,7 +475,7 @@ class TestFtrlOptimizer(unittest.TestCase): ...@@ -475,7 +475,7 @@ class TestFtrlOptimizer(unittest.TestCase):
params_grads = append_backward(mean_out) params_grads = append_backward(mean_out)
self.assertEqual(len(params_grads), 1) self.assertEqual(len(params_grads), 1)
self.assertEqual(len(ftrl_optimizer.get_accumulators()), 0) self.assertEqual(len(ftrl_optimizer.get_accumulators()), 0)
opts = ftrl_optimizer.create_optimization_pass(params_grads, mul_out, opts = ftrl_optimizer._create_optimization_pass(params_grads, mul_out,
init_program) init_program)
self.assertEqual(len(opts), 3) self.assertEqual(len(opts), 3)
self.assertEqual([op.type for op in opts], self.assertEqual([op.type for op in opts],
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
import unittest import unittest
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.layers.device import get_places
import paddle.fluid.profiler as profiler import paddle.fluid.profiler as profiler
import numpy import numpy
...@@ -115,7 +116,7 @@ class BaseParallelForTest(unittest.TestCase): ...@@ -115,7 +116,7 @@ class BaseParallelForTest(unittest.TestCase):
if use_parallel: if use_parallel:
thread_num = fluid.core.get_cuda_device_count( thread_num = fluid.core.get_cuda_device_count(
) if use_gpu else 8 ) if use_gpu else 8
places = fluid.layers.get_places(thread_num) places = get_places(thread_num)
pd = fluid.layers.ParallelDo(places, use_nccl=use_nccl) pd = fluid.layers.ParallelDo(places, use_nccl=use_nccl)
data = next(generator) data = next(generator)
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import numpy as np
from op_test import OpTest
# Correct: General.
class TestSqueezeOp(OpTest):
def setUp(self):
self.op_type = "squeeze"
self.init_test_case()
self.inputs = {"X": np.random.random(self.ori_shape).astype("float32")}
self.init_attrs()
self.outputs = {"Out": self.inputs["X"].reshape(self.new_shape)}
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(["X"], "Out")
def init_test_case(self):
self.ori_shape = (1, 3, 1, 5)
self.axes = (0, 2)
self.new_shape = (3, 5)
def init_attrs(self):
self.attrs = {"axes": self.axes, "inplace": False}
# Correct: There is mins axis.
class TestSqueezeOp1(TestSqueezeOp):
def init_test_case(self):
self.ori_shape = (1, 3, 1, 5)
self.axes = (0, -2)
self.new_shape = (3, 5)
# Correct: No axes input.
class TestSqueezeOp2(TestSqueezeOp):
def init_test_case(self):
self.ori_shape = (1, 3, 1, 5)
self.axes = ()
self.new_shape = (3, 5)
# Correct: Just part of axes be squeezed.
class TestSqueezeOp3(TestSqueezeOp):
def init_test_case(self):
self.ori_shape = (3, 1, 5, 1, 4, 1)
self.axes = (1, -1)
self.new_shape = (3, 5, 1, 4)
# Correct: Inplace.
class TestSqueezeOpInplace1(TestSqueezeOp):
def init_test_case(self):
self.ori_shape = (1, 3, 1, 5)
self.axes = (0, 2)
self.new_shape = (3, 5)
def init_attrs(self):
self.attrs = {"axes": self.axes, "inplace": True}
# Correct: Inplace. There is mins axis.
class TestSqueezeOpInplace2(TestSqueezeOp):
def inti_test_case(self):
self.ori_shape = (1, 3, 1, 5)
self.axes = (0, -2)
self.new_shape = (3, 5)
def init_attrs(self):
self.attrs = {"axes": self.axes, "inplace": True}
# Correct: Inplace. No axes input.
class TestSqueezeOpInplace3(TestSqueezeOp):
def init_test_case(self):
self.ori_shape = (1, 3, 1, 5)
self.axes = ()
self.new_shape = (3, 5)
def init_attrs(self):
self.attrs = {"axes": self.axes, "inplace": True}
# Correct: Inpalce. Just part of axes be squeezed.
class TestSqueezeOpInplace4(TestSqueezeOp):
def init_test_case(self):
self.ori_shape = (3, 1, 5, 1, 4, 1)
self.axes = (1, -1)
self.new_shape = (3, 5, 1, 4)
def init_attrs(self):
self.attrs = {"axes": self.axes, "inplace": True}
if __name__ == "__main__":
unittest.main()
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import numpy as np
from op_test import OpTest
# Correct: General.
class TestUnsqueezeOp(OpTest):
def setUp(self):
self.init_test_case()
self.op_type = "unsqueeze"
self.inputs = {"X": np.random.random(self.ori_shape).astype("float32")}
self.init_attrs()
self.outputs = {"Out": self.inputs["X"].reshape(self.new_shape)}
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(["X"], "Out")
def init_test_case(self):
self.ori_shape = (3, 5)
self.axes = (1, 2)
self.new_shape = (3, 1, 1, 5)
def init_attrs(self):
self.attrs = {"axes": self.axes, "inplace": False}
# Correct: Single input index.
class TestUnsqueezeOp1(TestUnsqueezeOp):
def init_test_case(self):
self.ori_shape = (3, 5)
self.axes = (-1, )
self.new_shape = (3, 5, 1)
# Correct: Mixed input axis.
class TestUnsqueezeOp2(TestUnsqueezeOp):
def init_test_case(self):
self.ori_shape = (3, 5)
self.axes = (0, -1)
self.new_shape = (1, 3, 5, 1)
# Correct: There is duplicated axis.
class TestUnsqueezeOp3(TestUnsqueezeOp):
def init_test_case(self):
self.ori_shape = (3, 2, 5)
self.axes = (0, 3, 3)
self.new_shape = (1, 3, 2, 1, 1, 5)
# Correct: Reversed axes.
class TestUnsqueezeOp4(TestUnsqueezeOp):
def init_test_case(self):
self.ori_shape = (3, 2, 5)
self.axes = (3, 1, 1)
self.new_shape = (3, 1, 1, 2, 5, 1)
# Correct: Inplace.
class TestUnsqueezeOpInplace1(TestUnsqueezeOp):
def init_test_case(self):
self.ori_shape = (3, 5)
self.axes = (0, 2)
self.new_shape = (1, 3, 1, 5)
def init_attrs(self):
self.attrs = {"axes": self.axes, "inplace": True}
# Correct: Inplace. There is mins index.
class TestUnsqueezeOpInplace2(TestUnsqueezeOp):
def init_test_case(self):
self.ori_shape = (3, 5)
self.axes = (0, -2)
self.new_shape = (1, 3, 1, 5)
def init_attrs(self):
self.attrs = {"axes": self.axes, "inplace": True}
# Correct: Inplace. There is duplicated axis.
class TestUnsqueezeOpInplace3(TestUnsqueezeOp):
def init_test_case(self):
self.ori_shape = (3, 2, 5)
self.axes = (0, 3, 3)
self.new_shape = (1, 3, 2, 1, 1, 5)
def init_attrs(self):
self.attrs = {"axes": self.axes, "inplace": True}
if __name__ == "__main__":
unittest.main()
...@@ -181,6 +181,14 @@ else: ...@@ -181,6 +181,14 @@ else:
command = "patchelf --set-rpath '$ORIGIN/../libs/' ${PADDLE_BINARY_DIR}/python/paddle/fluid/core.so" command = "patchelf --set-rpath '$ORIGIN/../libs/' ${PADDLE_BINARY_DIR}/python/paddle/fluid/core.so"
if os.system(command) != 0: if os.system(command) != 0:
raise Exception("patch core.so failed, command: %s" % command) raise Exception("patch core.so failed, command: %s" % command)
if '${WITH_FLUID_ONLY}'== 'OFF':
# change rpath of _swig_paddle.so.
if "@APPLE@" == "1":
command = "install_name_tool -id \"@loader_path/../paddle/libs/\" ${PADDLE_BINARY_DIR}/python/py_paddle/_swig_paddle.so"
else:
command = "patchelf --set-rpath '$ORIGIN/../paddle/libs/' ${PADDLE_BINARY_DIR}/python/py_paddle/_swig_paddle.so"
if os.system(command) != 0:
raise Exception("patch _swig_paddle.so failed, command: %s" % command)
setup(name='${PACKAGE_NAME}', setup(name='${PACKAGE_NAME}',
version='${PADDLE_VERSION}', version='${PADDLE_VERSION}',
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册