diff --git a/paddle/fluid/inference/analysis/data_flow_graph.cc b/paddle/fluid/inference/analysis/data_flow_graph.cc index d09bf3ed161703b0cf273522921e157c7360a0bc..bd24e8a7d9c20b8cd9c4e41a76ffc33a004a9a69 100644 --- a/paddle/fluid/inference/analysis/data_flow_graph.cc +++ b/paddle/fluid/inference/analysis/data_flow_graph.cc @@ -90,6 +90,20 @@ std::string DataFlowGraph::DotString() const { return dot.Build(); } +std::string DataFlowGraph::HumanReadableInfo(bool show_values, + bool show_functions) const { + std::stringstream values, functions; + for (auto &n : nodes.nodes()) { + if (show_values && n->IsValue()) { + values << n->repr() << "\n"; + } + if (show_functions && n->IsFunction()) { + functions << n->repr() << "\n"; + } + } + return "Values:\n" + values.str() + "\n\n" + "Functions:\n" + functions.str(); +} + // // NodesBFSIterator // @@ -146,7 +160,7 @@ bool GraphTraits::NodesBFSIterator::operator==( if ((!queue_.empty()) && (!other.queue_.empty())) { return queue_.front() == other.queue_.front() && visited_.size() == other.visited_.size(); // here need to check the - // equality of queue and + // equality of queue and // visited. Just a light but week implementation. } return false; @@ -208,6 +222,76 @@ Node *GraphTraits::NodesDFSIterator::operator->() { return stack_.top(); } +GraphTraits::NodesTSIterator::NodesTSIterator( + const std::vector &source) { + PADDLE_ENFORCE(!source.empty(), + "Start points of topological sorting should not be empty!"); + std::unordered_set visited; + std::unordered_set to_visit{source.begin(), source.end()}; + + std::vector inlink_visited; + while (!to_visit.empty()) { + std::vector queue(to_visit.begin(), to_visit.end()); + for (auto *p : queue) { + inlink_visited.clear(); + + std::copy_if(p->inlinks.begin(), p->inlinks.end(), + std::back_inserter(inlink_visited), + [&](Node *x) { return visited.count(x); }); + + if (inlink_visited.size() == p->inlinks.size()) { + sorted_.push_back(p); + for (auto *_ : p->outlinks) { + if (!visited.count(_)) { + to_visit.insert(_); + } + } + + to_visit.erase(p); + visited.insert(p); + } + } + } +} + +GraphTraits::NodesTSIterator::NodesTSIterator( + const paddle::inference::analysis::GraphTraits< + DataFlowGraph>::NodesTSIterator &other) + : sorted_(other.sorted_), cursor_(other.cursor_) {} + +Node &GraphTraits::NodesTSIterator::operator*() { + PADDLE_ENFORCE_LT(cursor_, sorted_.size()); + return *sorted_[cursor_]; +} + +paddle::inference::analysis::GraphTraits::NodesTSIterator + &GraphTraits::NodesTSIterator::operator++() { + if (++cursor_ >= sorted_.size()) { + sorted_.clear(); + cursor_ = 0; + } + return *this; +} +paddle::inference::analysis::GraphTraits::NodesTSIterator & +GraphTraits::NodesTSIterator::operator=( + const paddle::inference::analysis::GraphTraits< + DataFlowGraph>::NodesTSIterator &other) { + cursor_ = other.cursor_; + sorted_ = other.sorted_; + return *this; +} + +bool GraphTraits::NodesTSIterator::operator==( + const paddle::inference::analysis::GraphTraits< + DataFlowGraph>::NodesTSIterator &other) { + return sorted_ == other.sorted_ && cursor_ == other.cursor_; +} + +Node *GraphTraits::NodesTSIterator::operator->() { + PADDLE_ENFORCE_LT(cursor_, sorted_.size()); + return sorted_[cursor_]; +} + } // namespace analysis } // namespace inference } // namespace paddle diff --git a/paddle/fluid/inference/analysis/data_flow_graph.h b/paddle/fluid/inference/analysis/data_flow_graph.h index a4fefc83e0c551d52bec87299bcbc966e7a2dbf7..5dd914d1971bfb5bcc0b1db41d73e2b67120bc06 100644 --- a/paddle/fluid/inference/analysis/data_flow_graph.h +++ b/paddle/fluid/inference/analysis/data_flow_graph.h @@ -48,6 +48,9 @@ struct DataFlowGraph { // Output a DOT graph file for debug. std::string DotString() const; + std::string HumanReadableInfo(bool show_values = true, + bool show_functions = true) const; + private: // Remove duplicate edges and so on. void Clean(); @@ -107,6 +110,32 @@ struct GraphTraits { std::unordered_set visited_; }; + // Topological sorting iterator on nodes. + struct NodesTSIterator + : public std::iterator { + NodesTSIterator() = default; + explicit NodesTSIterator(const std::vector &source); + NodesTSIterator(NodesTSIterator &&other) + : sorted_(std::move(other.sorted_)), cursor_(other.cursor_) { + other.cursor_ = 0; + } + NodesTSIterator(const NodesTSIterator &other); + + Node &operator*(); + NodesTSIterator &operator++(); + // TODO(Superjomn) current implementation just compare the first + // element, need to compare the graph and all the elements in the queue and + // set. + NodesTSIterator &operator=(const NodesTSIterator &other); + bool operator==(const NodesTSIterator &other); + bool operator!=(const NodesTSIterator &other) { return !(*this == other); } + Node *operator->(); + + private: + std::vector sorted_; + int cursor_{0}; + }; + explicit GraphTraits(DataFlowGraph *graph) : graph_(graph) {} // default use BFS to visit the nodes. @@ -119,17 +148,24 @@ struct GraphTraits { iterator_range nodes_in_DFS() { return iterator_range(nodes_dfs_begin(), nodes_dfs_end()); } + iterator_range nodes_in_TS() { + return iterator_range(nodes_ts_begin(), nodes_ts_end()); + } private: NodesBFSIterator nodes_bfs_begin() { return NodesBFSIterator(graph_->inputs); } NodesBFSIterator nodes_bfs_end() { return NodesBFSIterator(); } + NodesDFSIterator nodes_dfs_begin() { return NodesDFSIterator(graph_->inputs); } NodesDFSIterator nodes_dfs_end() { return NodesDFSIterator(); } + NodesTSIterator nodes_ts_begin() { return NodesTSIterator(graph_->inputs); } + NodesTSIterator nodes_ts_end() { return NodesTSIterator(); } + private: DataFlowGraph *graph_; }; diff --git a/paddle/fluid/inference/analysis/data_flow_graph_tester.cc b/paddle/fluid/inference/analysis/data_flow_graph_tester.cc index 9d7cceeb65888b8ba3fdf39e88fc2877abd82d11..7912f8d7f17ae3c79e8f73f36b7095fd52c9ac86 100644 --- a/paddle/fluid/inference/analysis/data_flow_graph_tester.cc +++ b/paddle/fluid/inference/analysis/data_flow_graph_tester.cc @@ -24,11 +24,11 @@ TEST(DataFlowGraph, BFS) { auto dfg = ProgramDescToDFG(desc); dfg.Build(); - for (auto* in : dfg.inputs) { + for (auto *in : dfg.inputs) { LOG(INFO) << "inputs: " << in->name() << " " << static_cast(in->type()); } - for (auto* out : dfg.outputs) { + for (auto *out : dfg.outputs) { LOG(INFO) << "outputs: " << out->name() << " " << static_cast(out->type()); } @@ -57,6 +57,71 @@ TEST(DataFlowGraph, DFS) { ASSERT_EQ(count, dfg.nodes.size()); } +// Topological sorting. +/* + * Graph topology + * inputs: 0, 1, 2 + * 0 -> 4 + * 0 -> 5 + * 1 -> 6 + * 2 -> 7 + * 4 -> 5 + * 4 -> 7 + * 4 -> 3 + * 7 -> 3 + */ +TEST(DataFlowGraph, TS) { + DataFlowGraph graph; + + for (int i = 0; i < 8; i++) { + auto *node = graph.nodes.Create(Node::Type::kValue); + node->SetName("node-" + std::to_string(i)); + } + + auto add_link = [&](int i, int j) { + Node *source = graph.nodes.GetMutable(i); + Node *target = graph.nodes.GetMutable(j); + target->inlinks.push_back(source); + source->outlinks.push_back(target); + }; + + graph.inputs.push_back(graph.nodes.GetMutable(0)); + graph.inputs.push_back(graph.nodes.GetMutable(1)); + graph.inputs.push_back(graph.nodes.GetMutable(2)); + + add_link(0, 4); + add_link(0, 5); + add_link(1, 6); + add_link(2, 7); + add_link(4, 5); + add_link(4, 7); + add_link(4, 3); + add_link(7, 3); + + auto its = GraphTraits(&graph).nodes_in_TS(); + std::vector sorted_ids; + for (auto it = its.begin(); it != its.end(); ++it) { + LOG(INFO) << it->name(); + sorted_ids.push_back(it->id()); + } + + // Assert a occurs prior to b in the sorted_ids. + auto assert_positive_sequence_pair = [&](int a, int b) { + auto a_offset = std::find(sorted_ids.begin(), sorted_ids.end(), a); + auto b_offset = std::find(sorted_ids.begin(), sorted_ids.end(), b); + ASSERT_LT(a_offset, b_offset); + }; + + assert_positive_sequence_pair(2, 7); + assert_positive_sequence_pair(7, 3); + assert_positive_sequence_pair(4, 3); + assert_positive_sequence_pair(0, 4); + assert_positive_sequence_pair(0, 5); + assert_positive_sequence_pair(1, 6); + assert_positive_sequence_pair(4, 5); + assert_positive_sequence_pair(4, 7); +} + } // namespace analysis } // namespace inference } // namespace paddle diff --git a/paddle/fluid/operators/detection/rpn_target_assign_op.cc b/paddle/fluid/operators/detection/rpn_target_assign_op.cc index 3b0c9b2886504ee381b2b33e06a4552602725e57..9a1643d5b35c067ba9064286bab32019fb34fbe8 100644 --- a/paddle/fluid/operators/detection/rpn_target_assign_op.cc +++ b/paddle/fluid/operators/detection/rpn_target_assign_op.cc @@ -86,8 +86,9 @@ class RpnTargetAssignKernel : public framework::OpKernel { std::minstd_rand engine, std::vector* inds) const { std::uniform_real_distribution uniform(0, 1); - if (inds->size() > num) { - for (int i = num; i < inds->size(); ++i) { + const int64_t size = static_cast(inds->size()); + if (size > num) { + for (int64_t i = num; i < size; ++i) { int rng_ind = std::floor(uniform(engine) * i); if (rng_ind < num) std::iter_swap(inds->begin() + rng_ind + offset, diff --git a/paddle/fluid/operators/im2sequence_op.cc b/paddle/fluid/operators/im2sequence_op.cc index 0669661d225c664010fce97f0a526b62988b92c5..c8c7f36536a76ea103ef6f5689c0fbdb76102688 100644 --- a/paddle/fluid/operators/im2sequence_op.cc +++ b/paddle/fluid/operators/im2sequence_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/im2sequence_op.h" +#include #include namespace paddle { @@ -28,20 +29,19 @@ class Im2SequenceOp : public framework::OperatorWithKernel { "Input(X) of Im2SequenceOp should not be null."); PADDLE_ENFORCE(ctx->HasOutput("Out"), "Output(Out) of Im2SequenceOp op should not be null."); - auto in_dim = ctx->GetInputDim("X"); + PADDLE_ENFORCE_EQ(in_dim.size(), 4, "Input(X) format must be 4D tensor, eg., NCHW."); - - auto kernels = ctx->Attrs().Get>("kernels"); - auto strides = ctx->Attrs().Get>("strides"); - auto paddings = ctx->Attrs().Get>("paddings"); - int batch_size = in_dim[0]; int img_channels = in_dim[1]; int img_height = in_dim[2]; int img_width = in_dim[3]; + auto kernels = ctx->Attrs().Get>("kernels"); + auto strides = ctx->Attrs().Get>("strides"); + auto paddings = ctx->Attrs().Get>("paddings"); + int output_height = Im2SeqOutputSize(img_height, kernels[0], paddings[0], paddings[2], strides[0]); int output_width = Im2SeqOutputSize(img_width, kernels[1], paddings[1], @@ -61,6 +61,10 @@ class Im2SequenceOpMaker : public framework::OpProtoAndCheckerMaker { "C: channels" "H: height" "W: width"); + AddInput("Y", + "(Tensor) The input tensor of image real size(H, W)." + "2-D with shape [batchsize, 2]") + .AsDispensable(); AddOutput("Out", "(LodTensor) The output data of im2sequence op,"); AddAttr>("kernels", "(vector), the " @@ -73,6 +77,13 @@ class Im2SequenceOpMaker : public framework::OpProtoAndCheckerMaker { "(vector default:{0, 0, 0, 0}), the " "paddings(up_pad, left_pad, down_pad, right_pad)") .SetDefault({0, 0, 0, 0}); + AddAttr>("out_stride", + "the attribute is valid only when input(Y)" + "is not NULL.this attribute represents the" + "scaling of the pic through the CNN" + "(vector dedault:{1,1}),the out_stride" + " (out_stride_height, out_stride_width)") + .SetDefault({1, 1}); AddComment(R"DOC( This op uses kernels to scan images and converts these images to sequences. After expanding, The number of time steps are output_height * output_width @@ -123,7 +134,7 @@ output.data = [[ 6. 2. 8. 3. 2. 4. 6. 3.] [ 7. 1. 7. 9. 2. 1. 3. 5.] [ 5. 7. 2. 4. 1. 3. 9. 0.] [ 7. 9. 4. 8. 3. 5. 0. 8.]] -output.dims = {8, 9} +output.dims = {8, 8} output.lod = [[0, 4, 8]] )DOC"); diff --git a/paddle/fluid/operators/im2sequence_op.h b/paddle/fluid/operators/im2sequence_op.h index d792c68f784d8ffec0eb303a6ab9b59c9f121fa7..5bfb91db1887909c65de5f2e5321a8e6be6cf5ac 100644 --- a/paddle/fluid/operators/im2sequence_op.h +++ b/paddle/fluid/operators/im2sequence_op.h @@ -13,6 +13,7 @@ limitations under the License. */ #pragma once +#include #include #include "paddle/fluid/framework/data_layout.h" #include "paddle/fluid/framework/eigen.h" @@ -39,50 +40,106 @@ class Im2SequenceKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { const Tensor* in = ctx.Input("X"); LoDTensor* out = ctx.Output("Out"); - out->mutable_data(ctx.GetPlace()); - // TODO(wanghaoshuang): Add layout checker after 'set_layout' - // being available for python API - // PADDLE_ENFORCE_EQ(in->layout(), framework::DataLayout::kNCHW, - // "Input(X) layout must be NCHW"); auto in_dim = in->dims(); int batch_size = in_dim[0]; int img_channels = in_dim[1]; int img_height = in_dim[2]; int img_width = in_dim[3]; - auto kernels = ctx.Attr>("kernels"); auto strides = ctx.Attr>("strides"); auto paddings = ctx.Attr>("paddings"); - int output_height = Im2SeqOutputSize(img_height, kernels[0], paddings[0], - paddings[2], strides[0]); - int output_width = Im2SeqOutputSize(img_width, kernels[1], paddings[1], - paddings[3], strides[1]); - - const std::vector dilations({1, 1}); - - auto out_dims = out->dims(); - out->Resize({batch_size, out->numel() / batch_size}); - for (int i = 0; i < batch_size; i++) { - const Tensor src = - in->Slice(i, i + 1).Resize({img_channels, img_height, img_width}); - Tensor dst = out->Slice(i, i + 1).Resize( - {output_height, output_width, img_channels, kernels[0], kernels[1]}); - - math::Im2ColFunctor f; - auto& dev_ctx = ctx.template device_context(); - f(dev_ctx, src, dilations, strides, paddings, &dst); - } - out->Resize(out_dims); - - // set lod information - // TODO(wanghaoshuang): Move this to InferShape - framework::LoD lod(1); - lod[0].reserve(batch_size + 1); - for (int i = 0, offset = 0; i < batch_size + 1; ++i) { + if (ctx.HasInput("Y") && batch_size > 1) { + const Tensor* imgrealsize = ctx.Input("Y"); + auto out_stride = ctx.Attr>("out_stride"); + Tensor cpu_shape_tensor; + TensorCopySync(*imgrealsize, platform::CPUPlace(), &cpu_shape_tensor); + std::vector imgreal_h; + std::vector imgreal_w; + std::vector output_height; + std::vector output_width; + int result = 0; + for (int i = 0; i < batch_size; i++) { + int tmp_real_h = static_cast((cpu_shape_tensor.data())[2 * i]); + int tmp_real_w = + static_cast((cpu_shape_tensor.data())[2 * i + 1]); + if (tmp_real_h % out_stride[0] == 0) { + tmp_real_h = tmp_real_h / out_stride[0]; + } else { + tmp_real_h = tmp_real_h / out_stride[0] + 1; + } + if (tmp_real_w % out_stride[1] == 0) { + tmp_real_w = tmp_real_w / out_stride[1]; + } else { + tmp_real_w = tmp_real_w / out_stride[1] + 1; + } + imgreal_h.push_back(tmp_real_h); + imgreal_w.push_back(tmp_real_w); + output_height.push_back(Im2SeqOutputSize( + imgreal_h[i], kernels[0], paddings[0], paddings[2], strides[0])); + output_width.push_back(Im2SeqOutputSize( + imgreal_w[i], kernels[1], paddings[1], paddings[3], strides[1])); + result += output_height[i] * output_width[i]; + } + + out->mutable_data({result, img_channels * kernels[0] * kernels[1]}, + ctx.GetPlace()); + + const std::vector dilations({1, 1}); + int offset_out = 0; + for (int i = 0; i < batch_size; i++) { + const Tensor src = + in->Slice(i, i + 1).Resize({img_channels, img_height, img_width}); + Tensor dst = out->Slice(offset_out, + offset_out + output_height[i] * output_width[i]) + .Resize({output_height[i], output_width[i], + img_channels, kernels[0], kernels[1]}); + offset_out += output_height[i] * output_width[i]; + + math::Im2ColFunctor f; + auto& dev_ctx = ctx.template device_context(); + f(dev_ctx, src, dilations, strides, paddings, &dst); + } + framework::LoD lod(1); + lod[0].reserve(batch_size + 1); + int offset = 0; + lod[0].push_back(offset); + for (int i = 0; i < batch_size; ++i) { + offset += output_height[i] * output_width[i]; + lod[0].push_back(offset); + } + out->set_lod(lod); + } else { + out->mutable_data(ctx.GetPlace()); + int output_height = Im2SeqOutputSize(img_height, kernels[0], paddings[0], + paddings[2], strides[0]); + int output_width = Im2SeqOutputSize(img_width, kernels[1], paddings[1], + paddings[3], strides[1]); + + const std::vector dilations({1, 1}); + auto out_dims = out->dims(); + out->Resize({batch_size, out->numel() / batch_size}); + for (int i = 0; i < batch_size; i++) { + const Tensor src = + in->Slice(i, i + 1).Resize({img_channels, img_height, img_width}); + Tensor dst = + out->Slice(i, i + 1).Resize({output_height, output_width, + img_channels, kernels[0], kernels[1]}); + + math::Im2ColFunctor f; + auto& dev_ctx = ctx.template device_context(); + f(dev_ctx, src, dilations, strides, paddings, &dst); + } + out->Resize(out_dims); + framework::LoD lod(1); + lod[0].reserve(batch_size + 1); + int offset = 0; lod[0].push_back(offset); - offset += output_height * output_width; + for (int i = 0; i < batch_size; ++i) { + offset += output_height * output_width; + lod[0].push_back(offset); + } + out->set_lod(lod); } - out->set_lod(lod); } }; diff --git a/paddle/fluid/operators/math/im2col.cc b/paddle/fluid/operators/math/im2col.cc index 336d6febc2ce3a55e82ed613bbc1081101f822f0..a50b9ace39249f4f899a46e171bbdced033b46bc 100644 --- a/paddle/fluid/operators/math/im2col.cc +++ b/paddle/fluid/operators/math/im2col.cc @@ -43,21 +43,6 @@ class Im2ColFunctordims()[3]; int col_width = col->dims()[4]; - PADDLE_ENFORCE_EQ((im_height + padding[0] + padding[2] - - ((dilation[0] * (filter_height - 1) + 1))) / - stride[0] + - 1, - col_height, - "Output_height and padding(padding_up, padding_down) are " - "inconsistent."); - PADDLE_ENFORCE_EQ((im_width + padding[1] + padding[3] - - ((dilation[1] * (filter_width - 1) + 1))) / - stride[1] + - 1, - col_width, - "Output_height and padding(padding_up, padding_down) are " - "inconsistent."); - int channels_col = im_channels * filter_height * filter_width; const T* im_data = im.data(); @@ -178,17 +163,6 @@ class Im2ColFunctordims()[0]; int col_width = col->dims()[1]; - PADDLE_ENFORCE_EQ( - (im_height + padding[0] + padding[2] - filter_height) / stride[0] + 1, - col_height, - "Output_height and padding(padding_up, padding_down) are " - "inconsistent."); - PADDLE_ENFORCE_EQ( - (im_width + padding[1] + padding[3] - filter_width) / stride[1] + 1, - col_width, - "col_width and padding(padding_left, padding_right) are " - "inconsistent."); - const T* im_data = im.data(); T* col_data = col->data(); diff --git a/paddle/fluid/operators/math/im2col.cu b/paddle/fluid/operators/math/im2col.cu index eecb233d22cea06da016b2671fd606b70eddf5a5..4897767f4d88d9e079f05c921153923c4eb354b0 100644 --- a/paddle/fluid/operators/math/im2col.cu +++ b/paddle/fluid/operators/math/im2col.cu @@ -77,21 +77,6 @@ class Im2ColFunctordims()[3]; int col_width = col->dims()[4]; - PADDLE_ENFORCE_EQ((im_height + padding[0] + padding[2] - - (dilation[0] * (filter_height - 1) + 1)) / - stride[0] + - 1, - col_height, - "Output_height and padding(padding_up, padding_down) are " - "inconsistent."); - PADDLE_ENFORCE_EQ((im_width + padding[1] + padding[3] - - (dilation[1] * (filter_width - 1) + 1)) / - stride[1] + - 1, - col_width, - "col_width and padding(padding_left, padding_right) are " - "inconsistent."); - int num_outputs = im_channels * col_height * col_width; int blocks = (num_outputs + 1024 - 1) / 1024; int block_x = 512; @@ -274,21 +259,6 @@ class Im2ColFunctordims()[0]; int col_width = col->dims()[1]; - PADDLE_ENFORCE_EQ((im_height + padding[0] + padding[2] - - (dilation[0] * (filter_height - 1) + 1)) / - stride[0] + - 1, - col_height, - "Output_height and padding(padding_up, padding_down) are " - "inconsistent."); - PADDLE_ENFORCE_EQ((im_width + padding[1] + padding[3] - - (dilation[1] * (filter_width - 1) + 1)) / - stride[1] + - 1, - col_width, - "col_width and padding(padding_left, padding_right) are " - "inconsistent."); - int block_dim_x = 0; int block_dim_y = 0; if (filter_height <= 4 && filter_width <= 4) { diff --git a/paddle/fluid/platform/CMakeLists.txt b/paddle/fluid/platform/CMakeLists.txt index 20037d0764056c2a093af801c9cc1eb788dd46d6..e0d7937ae2f3ce4bda12f3771727e2992d63cb9b 100644 --- a/paddle/fluid/platform/CMakeLists.txt +++ b/paddle/fluid/platform/CMakeLists.txt @@ -46,7 +46,7 @@ ENDIF() # memcpy depends on device_context, here add deps individually for # avoiding cycle dependencies cc_library(device_context SRCS device_context.cc init.cc DEPS malloc - place eigen3 stringpiece cpu_helper ${GPU_CTX_DEPS} ${MKLDNN_CTX_DEPS}) + place eigen3 stringpiece cpu_helper framework_proto ${GPU_CTX_DEPS} ${MKLDNN_CTX_DEPS}) nv_test(device_context_test SRCS device_context_test.cu DEPS device_context gpu_info) cc_test(init_test SRCS init_test.cc DEPS device_context) diff --git a/python/paddle/fluid/annotations.py b/python/paddle/fluid/annotations.py new file mode 100644 index 0000000000000000000000000000000000000000..bb8756a4664013643c278c013ca21bb237a6b4a7 --- /dev/null +++ b/python/paddle/fluid/annotations.py @@ -0,0 +1,38 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import functools +import sys + +__all__ = ['deprecated'] + + +def deprecated(since, instead, extra_message=""): + def decorator(func): + err_msg = "API {0} is deprecated since {1}. Please use {2} instead.".format( + func.__name__, since, instead) + if len(extra_message) != 0: + err_msg += "\n" + err_msg += extra_message + + @functools.wraps(func) + def wrapper(*args, **kwargs): + print >> sys.stderr, err_msg + return func(*args, **kwargs) + + wrapper.__doc__ += "\n " + wrapper.__doc__ += err_msg + return wrapper + + return decorator diff --git a/python/paddle/fluid/backward.py b/python/paddle/fluid/backward.py index 4faa06303170488d0de2fda4c1461cfe2d623d35..ddcde04716d21df1f18e7202936f470d3d58a661 100644 --- a/python/paddle/fluid/backward.py +++ b/python/paddle/fluid/backward.py @@ -18,10 +18,7 @@ import collections import copy import unique_name -__all__ = [ - 'append_backward', - 'calc_gradient', -] +__all__ = ['append_backward'] def _rename_arg_(op_descs, old_name, new_name, begin_idx=None, end_idx=None): @@ -123,7 +120,8 @@ def _append_grad_suffix_(name): def _addup_repetitive_outputs_(op_descs): """ In backward part, an variable may be the output of more than one ops. - In this case, the variable should be the accumulation of all the outputs. + And one op may yield its multiple outputs to the same variable. + In these cases, the variable should be the accumulation of all the outputs. `sum_op`s are added to implement the accumulate. """ pending_sum_ops = [] @@ -136,29 +134,46 @@ def _addup_repetitive_outputs_(op_descs): "sum", {"X": renamed_vars[var_name]}, {"Out": [var_name]}, {"use_mkldnn": False}), idx)) renamed_vars[var_name] = [var_name] - for var_name in op_desc.output_arg_names(): - if var_name == core.empty_var_name( - ) or var_name in op_desc.input_arg_names(): - # empty variable or inplace op - continue - if len(renamed_vars[var_name]) == 0: - # it's the first time we get the variable - renamed_vars[var_name] = [var_name] - else: - if len(renamed_vars[var_name]) == 1: + for param_idx, param_name in enumerate(op_desc.output_names()): + arg_names = op_desc.output(param_name) + for arg_idx, var_name in enumerate(arg_names): + if var_name == core.empty_var_name( + ) or var_name in op_desc.input_arg_names(): + # empty variable or inplace op + continue + if len(renamed_vars[var_name]) == 0: + # it's the first time we get the variable + renamed_vars[var_name] = [var_name] + else: + if len(renamed_vars[var_name]) == 1: + new_name = var_name + "@RENAME@" + \ + str(var_rename_count[var_name]) + var_rename_count[var_name] += 1 + # rename original var_name + renamed_vars[var_name][0] = new_name + _rename_arg_(op_descs, var_name, new_name, 0, idx) + _rename_arg_(pending_sum_ops, var_name, new_name) + + for p in op_desc.output_names()[:param_idx]: + p_arg_names = op_desc.output(p) + if var_name in p_arg_names: + op_desc.set_output(p, [ + new_name if x == var_name else x + for x in p_arg_names + ]) + + arg_names = [ + new_name if x == var_name else x + for x in arg_names[:arg_idx] + ] + arg_names[arg_idx:] + new_name = var_name + "@RENAME@" + \ str(var_rename_count[var_name]) var_rename_count[var_name] += 1 - # rename original var_name - renamed_vars[var_name][0] = new_name - _rename_arg_(op_descs, var_name, new_name, 0, idx) - _rename_arg_(pending_sum_ops, var_name, new_name) - - new_name = var_name + "@RENAME@" + \ - str(var_rename_count[var_name]) - var_rename_count[var_name] += 1 - op_desc.rename_output(var_name, new_name) - renamed_vars[var_name].append(new_name) + arg_names[arg_idx] = new_name + op_desc.set_output(param_name, arg_names) + renamed_vars[var_name].append(new_name) + for var_name, inputs in renamed_vars.iteritems(): if len(inputs) > 1: pending_sum_ops.append( diff --git a/python/paddle/fluid/layers/device.py b/python/paddle/fluid/layers/device.py index e0c1aab230aeed7fb858e91e7da7eae58032ee16..384d302a709eeec220864b9e8c9210ed028470f6 100644 --- a/python/paddle/fluid/layers/device.py +++ b/python/paddle/fluid/layers/device.py @@ -18,10 +18,12 @@ All util layers. from layer_function_generator import autodoc from ..framework import unique_name from ..layer_helper import LayerHelper +from ..annotations import deprecated -__all__ = ['get_places'] +__all__ = [] +@deprecated(since='0.15.0', instead="ParallelExecutor") @autodoc() def get_places(device_count=None, device_type=None): helper = LayerHelper('get_places', **locals()) diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index bcf520d5a4e3bbe1d949d08f42199dd8c5cdc947..07b806f544497ccabe4dde9a370e90da372e6cba 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -1,4 +1,18 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Copyright (c ) 2018 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -3900,7 +3914,13 @@ def transpose(x, perm, name=None): return out -def im2sequence(input, filter_size=1, stride=1, padding=0, name=None): +def im2sequence(input, + filter_size=1, + stride=1, + padding=0, + input_image_size=None, + out_stride=1, + name=None): """ Extracts image patches from the input tensor to form a tensor of shape {input.batch_size * output_height * output_width, filter_size_H * @@ -3937,6 +3957,15 @@ def im2sequence(input, filter_size=1, stride=1, padding=0, name=None): padding_up = padding_down = padding_left = padding_right = padding Default: padding = 0. + input_image_size(Variable): the input contains image real size.It's dim + is [batchsize, 2]. It is dispensable.It is just for batch inference. + + out_stride(int|tuple): The scaling of image through CNN. It is + dispensable. It is valid only when input_image_size is not null. + If out_stride is tuple, it must contain two intergers, + (out_stride_H, out_stride_W). Otherwise, + the out_stride_H = out_stride_W = out_stride. + name (int): The name of this layer. It is optional. Returns: @@ -3987,7 +4016,7 @@ def im2sequence(input, filter_size=1, stride=1, padding=0, name=None): [ 5. 7. 2. 4. 1. 3. 9. 0.] [ 7. 9. 4. 8. 3. 5. 0. 8.]] - output.dims = {8, 9} + output.dims = {8, 8} output.lod = [[4, 4]] @@ -4009,18 +4038,17 @@ def im2sequence(input, filter_size=1, stride=1, padding=0, name=None): if len(padding) == 2: padding.append(padding[0]) padding.append(padding[1]) - + inputs = {"X": input} + attrs = {"kernels": filter_size, "strides": stride, "padding": padding} + if input_image_size: + if isinstance(out_stride, int): + out_stride = [out_stride, out_stride] + inputs["Y"] = input_image_size + attrs["out_stride"] = out_stride helper = LayerHelper('im2sequence', **locals()) out = helper.create_tmp_variable(dtype=helper.input_dtype()) helper.append_op( - type='im2sequence', - inputs={'X': input}, - outputs={'Out': out}, - attrs={ - 'kernels': filter_size, - 'strides': stride, - 'paddings': padding, - }) + type='im2sequence', inputs=inputs, outputs={'Out': out}, attrs=attrs) return out diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 75ee40fa9ca94cdd84ee7acbb62d6e652ac7fa33..e2acf6d41a0085e6f741e46063b47d2ff1e769cb 100644 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -29,7 +29,7 @@ __all__ = [ 'SGD', 'Momentum', 'Adagrad', 'Adam', 'Adamax', 'DecayedAdagrad', 'Ftrl', 'SGDOptimizer', 'MomentumOptimizer', 'AdagradOptimizer', 'AdamOptimizer', 'AdamaxOptimizer', 'DecayedAdagradOptimizer', 'RMSPropOptimizer', - 'FtrlOptimizer', 'Adadelta', 'ModelAverage', 'Optimizer', 'RMSPropOptimizer' + 'FtrlOptimizer', 'Adadelta', 'ModelAverage', 'RMSPropOptimizer' ] @@ -67,7 +67,7 @@ class Optimizer(object): self._LARS_weight_decay = LARS_weight_decay def _create_global_learning_rate(self): - lr = self.global_learning_rate() + lr = self._global_learning_rate() if isinstance(lr, framework.Variable): return @@ -86,7 +86,7 @@ class Optimizer(object): dtype='float32' if self._dtype == None else self._dtype, persistable=True) - def global_learning_rate(self, program=None): + def _global_learning_rate(self, program=None): """ get global decayed learning rate :return: @@ -110,9 +110,9 @@ class Optimizer(object): return param_lr else: if param_lr == 1.0: - return self.global_learning_rate() + return self._global_learning_rate() else: - return self.global_learning_rate() * param_lr + return self._global_learning_rate() * param_lr def _create_accumulators(self, block, parameters): """Create all accumulators needed by the parameters @@ -185,10 +185,10 @@ class Optimizer(object): format(name, param.name)) return self._accumulators[name][param.name] - def create_optimization_pass(self, - parameters_and_grads, - loss, - startup_program=None): + def _create_optimization_pass(self, + parameters_and_grads, + loss, + startup_program=None): """Add optimization operators to update gradients to variables. Args: @@ -221,7 +221,7 @@ class Optimizer(object): self._create_global_learning_rate() if self._LARS_weight_decay > 0.0: layers.append_LARS(parameters_and_grads, - self.global_learning_rate(), + self._global_learning_rate(), self._LARS_weight_decay) optimize_ops = [] @@ -262,8 +262,8 @@ class Optimizer(object): params_grads = append_regularization_ops(params_grads, self.regularization) - optimize_ops = self.create_optimization_pass(params_grads, loss, - startup_program) + optimize_ops = self._create_optimization_pass(params_grads, loss, + startup_program) return optimize_ops, params_grads diff --git a/python/paddle/fluid/tests/book/notest_understand_sentiment.py b/python/paddle/fluid/tests/book/notest_understand_sentiment.py index 1df7b99aad6094a8b8ddfe783b9de35cef61c524..95002aa7f9bb639828b47eb1e86e4ef954fb85ff 100644 --- a/python/paddle/fluid/tests/book/notest_understand_sentiment.py +++ b/python/paddle/fluid/tests/book/notest_understand_sentiment.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. from __future__ import print_function - +from paddle.fluid.layers.device import get_places import unittest import paddle.fluid as fluid import paddle @@ -144,7 +144,7 @@ def train(word_dict, cost, acc_out, prediction = net_method( data, label, input_dim=dict_dim, class_dim=class_dim) else: - places = fluid.layers.get_places() + places = get_places() pd = fluid.layers.ParallelDo(places) with pd.do(): cost, acc, _ = net_method( diff --git a/python/paddle/fluid/tests/book/test_recognize_digits.py b/python/paddle/fluid/tests/book/test_recognize_digits.py index 5f5c8544bbdb87421f129b201a0ebaf4cb8602a1..49f549fa184037a64aa846f0d1d0e1b57db1f2ef 100644 --- a/python/paddle/fluid/tests/book/test_recognize_digits.py +++ b/python/paddle/fluid/tests/book/test_recognize_digits.py @@ -12,15 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. from __future__ import print_function -import argparse -import paddle.fluid as fluid -import paddle -import sys -import numpy -import unittest + import math -import sys import os +import sys +import unittest + +import numpy + +import paddle +import paddle.fluid as fluid +from paddle.fluid.layers.device import get_places BATCH_SIZE = 64 @@ -76,7 +78,7 @@ def train(nn_type, net_conf = conv_net if parallel: - places = fluid.layers.get_places() + places = get_places() pd = fluid.layers.ParallelDo(places) with pd.do(): img_ = pd.read_input(img) diff --git a/python/paddle/fluid/tests/book/test_word2vec.py b/python/paddle/fluid/tests/book/test_word2vec.py index 49bd72c7a53c0ae740bdbabe15b1d37340699d41..80e0692bc640efc280c43bd5b929847ad29207c4 100644 --- a/python/paddle/fluid/tests/book/test_word2vec.py +++ b/python/paddle/fluid/tests/book/test_word2vec.py @@ -14,6 +14,7 @@ import paddle import paddle.fluid as fluid +from paddle.fluid.layers.device import get_places import unittest import os import numpy as np @@ -80,7 +81,7 @@ def train(use_cuda, is_sparse, is_parallel, save_dirname, is_local=True): avg_cost, predict_word = __network__( [first_word, second_word, third_word, forth_word, next_word]) else: - places = fluid.layers.get_places() + places = get_places() pd = fluid.layers.ParallelDo(places) with pd.do(): avg_cost, predict_word = __network__( diff --git a/python/paddle/fluid/tests/book_memory_optimization/test_memopt_fit_a_line.py b/python/paddle/fluid/tests/book_memory_optimization/test_memopt_fit_a_line.py index be347cd5315668dde0454d7959dbf9bcfa465b5f..bec9f8594ff7c1aff8ae5ed55c9623754d9ea091 100644 --- a/python/paddle/fluid/tests/book_memory_optimization/test_memopt_fit_a_line.py +++ b/python/paddle/fluid/tests/book_memory_optimization/test_memopt_fit_a_line.py @@ -12,12 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -import numpy as np -import paddle -import paddle.fluid as fluid import math import sys +import paddle +import paddle.fluid as fluid +from paddle.fluid.layers.device import get_places + # need to fix random seed and training data to compare the loss # value accurately calculated by the default and the memory optimization # version. @@ -34,7 +35,7 @@ if fluid.core.is_compiled_with_cuda(): use_nccl = False place = fluid.CUDAPlace(0) -places = fluid.layers.get_places(device_count=0, device_type=device_type) +places = get_places(device_count=0, device_type=device_type) pd = fluid.layers.ParallelDo(places, use_nccl=use_nccl) with pd.do(): x_ = pd.read_input(x) diff --git a/python/paddle/fluid/tests/unittests/test_calc_gradient.py b/python/paddle/fluid/tests/unittests/test_calc_gradient.py index 06e676cd83e77549afd679e730426c590cc046bf..7f2a9e6971ed933463216e38498d48ab132a1a37 100644 --- a/python/paddle/fluid/tests/unittests/test_calc_gradient.py +++ b/python/paddle/fluid/tests/unittests/test_calc_gradient.py @@ -16,8 +16,6 @@ import unittest import paddle.fluid as fluid import paddle.fluid.layers as layers -import paddle.fluid.framework as framework -import paddle.fluid.optimizer as optimizer from paddle.fluid.backward import calc_gradient diff --git a/python/paddle/fluid/tests/unittests/test_get_places_op.py b/python/paddle/fluid/tests/unittests/test_get_places_op.py index 6dab1e22f0c50ab011d6b8e8944097600cf3fecc..964423e2d2638224244b4ca774d8eee08f3ec989 100644 --- a/python/paddle/fluid/tests/unittests/test_get_places_op.py +++ b/python/paddle/fluid/tests/unittests/test_get_places_op.py @@ -13,6 +13,7 @@ # limitations under the License. import paddle.fluid as fluid +from paddle.fluid.layers.device import get_places import decorators import unittest @@ -20,7 +21,7 @@ import unittest class TestGetPlaces(unittest.TestCase): @decorators.prog_scope() def test_get_places(self): - places = fluid.layers.get_places() + places = get_places() cpu = fluid.CPUPlace() exe = fluid.Executor(cpu) exe.run(fluid.default_main_program()) diff --git a/python/paddle/fluid/tests/unittests/test_im2sequence_op.py b/python/paddle/fluid/tests/unittests/test_im2sequence_op.py index 4946475f11a4fc0ccaffeec6821d3976ea7c6560..13bc5768740ece00bbe285a0b47d82bb8a42d2c7 100644 --- a/python/paddle/fluid/tests/unittests/test_im2sequence_op.py +++ b/python/paddle/fluid/tests/unittests/test_im2sequence_op.py @@ -16,23 +16,48 @@ import numpy as np from op_test import OpTest -def get_output_shape(attrs, in_shape): +def get_output_shape(attrs, in_shape, img_real_size): + batchsize = in_shape[0] img_height = in_shape[2] img_width = in_shape[3] + paddings = np.array(attrs['paddings']).astype("int32") + kernels = np.array(attrs['kernels']).astype("int32") + strides = np.array(attrs['strides']).astype("int32") + output_height = np.zeros((1, batchsize)).astype("int32") + output_width = np.zeros((1, batchsize)).astype("int32") + if len(img_real_size): + out_stride = np.array(attrs['out_stride']).astype("int32") + imgreal_h = 0 + imgreal_w = 0 + for index in range(batchsize): + if img_real_size[index, 0] % out_stride[0] == 0: + imgreal_h = img_real_size[index, 0] / out_stride[0] + else: + imgreal_h = img_real_size[index, 0] / out_stride[0] + 1 + if img_real_size[index, 0] % out_stride[1] == 0: + imgreal_w = img_real_size[index, 1] / out_stride[1] + else: + imgreal_w = img_real_size[index, 0] / out_stride[1] + 1 + output_height[0,index] = \ + 1 + \ + (imgreal_h + paddings[0] + paddings[2] - kernels[0] + strides[0] - 1) / \ + strides[0] - paddings = attrs['paddings'] - kernels = attrs['kernels'] - strides = attrs['strides'] + output_width[0,index] = \ + 1 + \ + (imgreal_w + paddings[1] + paddings[3] - kernels[1] + strides[1] - 1) / \ + strides[1] + else: + for index in range(batchsize): + output_height[0,index] = \ + 1 + \ + (img_height + paddings[0] + paddings[2] - kernels[0] + strides[0] - 1) / \ + strides[0] - output_height = \ - 1 + \ - (img_height + paddings[0] + paddings[2] - kernels[0] + strides[0] - 1) / \ - strides[0] - - output_width = \ - 1 + \ - (img_width + paddings[1] + paddings[3] - kernels[1] + strides[1] - 1) / \ - strides[1] + output_width[0,index] = \ + 1 + \ + (img_width + paddings[1] + paddings[3] - kernels[1] + strides[1] - 1) / \ + strides[1] return output_height, output_width @@ -75,22 +100,25 @@ def im2col(attrs, im, col): im_row_offset][im_col_offset] -def Im2Sequence(inputs, attrs): - output_height, output_width = get_output_shape(attrs, inputs.shape) +def Im2Sequence(inputs, img_real_size, attrs): + output_height, output_width = get_output_shape(attrs, inputs.shape, + img_real_size) img_channels = inputs.shape[1] batch_size = inputs.shape[0] - out = np.zeros([ - batch_size, output_height, output_width, img_channels, - attrs['kernels'][0], attrs['kernels'][1] - ]).astype("float32") - - for i in range(len(inputs)): - im2col(attrs, inputs[i], out[i]) - - out = out.reshape([ - batch_size * output_height * output_width, - img_channels * attrs['kernels'][0] * attrs['kernels'][1] - ]) + out = [] + for index in range(batch_size): + tmp = np.zeros([ + output_height[0, index], output_width[0, index], img_channels, + attrs['kernels'][0], attrs['kernels'][1] + ]).astype("float32") + out.append(tmp) + for index in range(len(inputs)): + im2col(attrs, inputs[index], out[index]) + out[index] = out[index].reshape([ + output_height[0, index] * output_width[0, index], + img_channels * attrs['kernels'][0] * attrs['kernels'][1] + ]) + out = np.concatenate(out, axis=0) return out @@ -103,7 +131,7 @@ class TestBlockExpandOp(OpTest): self.attrs = { 'kernels': [2, 2], 'strides': [1, 1], - 'paddings': [1, 1, 1, 1] + 'paddings': [1, 1, 1, 1], } def setUp(self): @@ -113,7 +141,8 @@ class TestBlockExpandOp(OpTest): self.batch_size, self.img_channels, self.img_height, self.img_width ]).astype("float32") - out = Im2Sequence(x, self.attrs) + real_size = np.array([]).astype("float32") + out = Im2Sequence(x, real_size, self.attrs) self.inputs = {'X': x} self.outputs = {'Out': out} @@ -133,20 +162,20 @@ class TestBlockExpandOpCase2(TestBlockExpandOp): self.attrs = { 'kernels': [2, 1], 'strides': [2, 1], - 'paddings': [2, 1, 2, 1] + 'paddings': [2, 1, 2, 1], } class TestBlockExpandOpCase3(TestBlockExpandOp): def config(self): - self.batch_size = 3 + self.batch_size = 2 self.img_channels = 1 self.img_height = 4 self.img_width = 5 self.attrs = { 'kernels': [2, 1], 'strides': [2, 1], - 'paddings': [2, 0, 2, 0] + 'paddings': [2, 0, 2, 0], } @@ -159,9 +188,94 @@ class TestBlockExpandOpCase4(TestBlockExpandOp): self.attrs = { 'kernels': [2, 2], 'strides': [1, 1], - 'paddings': [0, 0, 0, 0] + 'paddings': [0, 0, 0, 0], + } + + +class TestBlockExpandOpCase5(OpTest): + def config(self): + self.batch_size = 1 + self.img_channels = 3 + self.img_height = 4 + self.img_width = 5 + self.attrs = { + 'kernels': [2, 1], + 'strides': [2, 1], + 'paddings': [2, 1, 2, 1], + 'out_stride': [2, 2], + } + + def setUp(self): + self.config() + self.op_type = "im2sequence" + x = np.random.uniform(0.1, 1, [ + self.batch_size, self.img_channels, self.img_height, self.img_width + ]).astype("float32") + real_size = np.array([[8, 10], [5, 8]]).astype("float32") + out = np.array(Im2Sequence(x, real_size, self.attrs)) + self.inputs = {'X': x, 'Y': real_size} #l ?? + self.outputs = {'Out': out} + + def test_check_output(self): + self.check_output() + + +class TestBlockExpandOpCase6(OpTest): + def config(self): + self.batch_size = 3 + self.img_channels = 1 + self.img_height = 4 + self.img_width = 5 + self.attrs = { + 'kernels': [2, 1], + 'strides': [1, 1], + 'paddings': [0, 0, 0, 0], + 'out_stride': [1, 1], + } + + def setUp(self): + self.config() + self.op_type = "im2sequence" + x = np.random.uniform(0.1, 1, [ + self.batch_size, self.img_channels, self.img_height, self.img_width + ]).astype("float32") + real_size = np.array([[8, 10], [5, 8], [5, 8]]).astype("float32") + out = np.array(Im2Sequence(x, real_size, self.attrs)) + self.inputs = {'X': x, 'Y': real_size} #l ?? + self.outputs = {'Out': out} + + def test_check_output(self): + self.check_output() + + +class TestBlockExpandOpCase7(OpTest): + def config(self): + self.batch_size = 2 + self.img_channels = 2 + self.img_height = 3 + self.img_width = 3 + self.attrs = { + 'kernels': [2, 2], + 'strides': [1, 1], + 'paddings': [1, 0, 1, 0], + 'out_stride': [2, 2], } + def setUp(self): + self.config() + self.op_type = "im2sequence" + x = np.random.uniform(0.1, 1, [ + self.batch_size, self.img_channels, self.img_height, self.img_width + ]).astype("float32") + real_size = np.array([[6, 6], [4, 4]]).astype("float32") + out = np.array(Im2Sequence(x, real_size, self.attrs)) + self.inputs = {'X': x, 'Y': real_size} + self.outputs = {'Out': out} + + def test_check_output(self): + self.check_output() + if __name__ == '__main__': unittest.main() +#set shiftwidth=4 set expandtab set tabstop=4 diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py index 842d34c07e94a79e3351347e2528ecc478cc56dc..82418f34ccb7e665a041079a19880c7bb34b0b0f 100644 --- a/python/paddle/fluid/tests/unittests/test_layers.py +++ b/python/paddle/fluid/tests/unittests/test_layers.py @@ -16,6 +16,7 @@ from __future__ import print_function import unittest import paddle.fluid.layers as layers +from paddle.fluid.layers.device import get_places import paddle.fluid.nets as nets from paddle.fluid.framework import Program, program_guard, default_main_program from paddle.fluid.param_attr import ParamAttr @@ -238,7 +239,7 @@ class TestBook(unittest.TestCase): def test_get_places(self): program = Program() with program_guard(program): - x = layers.get_places(device_count=4) + x = get_places(device_count=4) self.assertIsNotNone(x) print(str(program)) @@ -251,12 +252,16 @@ class TestBook(unittest.TestCase): print(str(program)) def test_im2sequence(self): - print("test_im2sequence") program = Program() with program_guard(program): x = layers.data(name='x', shape=[3, 128, 128], dtype='float32') + y = layers.data(name='y', shape=[], dtype='float32') output = layers.im2sequence( - input=x, stride=[1, 1], filter_size=[2, 2]) + input=x, + input_image_size=y, + stride=[1, 1], + filter_size=[2, 2], + out_stride=[1, 1]) self.assertIsNotNone(output) print(str(program)) diff --git a/python/paddle/fluid/tests/unittests/test_optimizer.py b/python/paddle/fluid/tests/unittests/test_optimizer.py index 7286c7c450108c4b5ad7136041bc4e989894a2ba..43385691bb3960004b5b69a1c55e41dd4252fa71 100644 --- a/python/paddle/fluid/tests/unittests/test_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_optimizer.py @@ -97,7 +97,7 @@ class TestMomentumOptimizer(unittest.TestCase): params_grads = append_backward(mean_out) self.assertEqual(len(params_grads), 1) self.assertEqual(len(momentum_optimizer.get_accumulators()), 0) - opts = momentum_optimizer.create_optimization_pass( + opts = momentum_optimizer._create_optimization_pass( params_grads, mul_out, init_program) self.assertEqual(len(opts), 3) sgd_op = opts[-1] @@ -151,7 +151,7 @@ class TestMomentumOptimizer(unittest.TestCase): params_grads = append_backward(mean_out) self.assertEqual(len(params_grads), 1) self.assertEqual(len(momentum_optimizer.get_accumulators()), 0) - opts = momentum_optimizer.create_optimization_pass( + opts = momentum_optimizer._create_optimization_pass( params_grads, mul_out, init_program) self.assertEqual(len(opts), 3) sgd_op = opts[-1] @@ -214,8 +214,8 @@ class TestAdagradOptimizer(unittest.TestCase): params_grads = append_backward(mean_out) self.assertEqual(len(params_grads), 1) self.assertEqual(len(adagrad_optimizer.get_accumulators()), 0) - opts = adagrad_optimizer.create_optimization_pass(params_grads, mul_out, - init_program) + opts = adagrad_optimizer._create_optimization_pass( + params_grads, mul_out, init_program) self.assertEqual(len(opts), 3) self.assertEqual([op.type for op in opts], ["fill_constant", "elementwise_mul", "adagrad"]) @@ -278,8 +278,8 @@ class TestAdamOptimizer(unittest.TestCase): params_grads = append_backward(mean_out) self.assertEqual(len(params_grads), 1) self.assertEqual(len(adam_optimizer.get_accumulators()), 0) - opts = adam_optimizer.create_optimization_pass(params_grads, mul_out, - init_program) + opts = adam_optimizer._create_optimization_pass(params_grads, mul_out, + init_program) self.assertEqual(len(opts), 5) self.assertEqual( [op.type for op in opts], @@ -345,8 +345,8 @@ class TestAdamaxOptimizer(unittest.TestCase): params_grads = append_backward(mean_out) self.assertEqual(len(params_grads), 1) self.assertEqual(len(adamax_optimizer.get_accumulators()), 0) - opts = adamax_optimizer.create_optimization_pass(params_grads, mul_out, - init_program) + opts = adamax_optimizer._create_optimization_pass(params_grads, mul_out, + init_program) self.assertEqual(len(opts), 4) self.assertEqual( [op.type for op in opts], @@ -409,7 +409,7 @@ class TestDecayedAdagradOptimizer(unittest.TestCase): params_grads = append_backward(mean_out) self.assertEqual(len(params_grads), 1) self.assertEqual(len(decayed_adagrad_optimizer.get_accumulators()), 0) - opts = decayed_adagrad_optimizer.create_optimization_pass( + opts = decayed_adagrad_optimizer._create_optimization_pass( params_grads, mul_out, init_program) self.assertEqual(len(opts), 3) self.assertEqual( @@ -475,8 +475,8 @@ class TestFtrlOptimizer(unittest.TestCase): params_grads = append_backward(mean_out) self.assertEqual(len(params_grads), 1) self.assertEqual(len(ftrl_optimizer.get_accumulators()), 0) - opts = ftrl_optimizer.create_optimization_pass(params_grads, mul_out, - init_program) + opts = ftrl_optimizer._create_optimization_pass(params_grads, mul_out, + init_program) self.assertEqual(len(opts), 3) self.assertEqual([op.type for op in opts], ["fill_constant", "elementwise_mul", "ftrl"]) diff --git a/python/paddle/fluid/tests/unittests/test_parallel_op.py b/python/paddle/fluid/tests/unittests/test_parallel_op.py index 9ba5f988f317a515b77c0b428da236626419a2c3..9ec05e02973138e3ec233ef07f98afd598ec86b1 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_op.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_op.py @@ -15,6 +15,7 @@ import unittest import paddle.fluid as fluid +from paddle.fluid.layers.device import get_places import paddle.fluid.profiler as profiler import numpy @@ -115,7 +116,7 @@ class BaseParallelForTest(unittest.TestCase): if use_parallel: thread_num = fluid.core.get_cuda_device_count( ) if use_gpu else 8 - places = fluid.layers.get_places(thread_num) + places = get_places(thread_num) pd = fluid.layers.ParallelDo(places, use_nccl=use_nccl) data = next(generator) diff --git a/python/setup.py.in b/python/setup.py.in index 52138b414e3d908e7aa589e76fe924e138e54d83..38a3873430505936a1058359e61140dd302d3e3f 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -181,6 +181,14 @@ else: command = "patchelf --set-rpath '$ORIGIN/../libs/' ${PADDLE_BINARY_DIR}/python/paddle/fluid/core.so" if os.system(command) != 0: raise Exception("patch core.so failed, command: %s" % command) +if '${WITH_FLUID_ONLY}'== 'OFF': + # change rpath of _swig_paddle.so. + if "@APPLE@" == "1": + command = "install_name_tool -id \"@loader_path/../paddle/libs/\" ${PADDLE_BINARY_DIR}/python/py_paddle/_swig_paddle.so" + else: + command = "patchelf --set-rpath '$ORIGIN/../paddle/libs/' ${PADDLE_BINARY_DIR}/python/py_paddle/_swig_paddle.so" + if os.system(command) != 0: + raise Exception("patch _swig_paddle.so failed, command: %s" % command) setup(name='${PACKAGE_NAME}', version='${PADDLE_VERSION}',