未验证 提交 818e0708 编写于 作者: Y yuyang18

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into feature/hide_api_cont

...@@ -90,6 +90,20 @@ std::string DataFlowGraph::DotString() const { ...@@ -90,6 +90,20 @@ std::string DataFlowGraph::DotString() const {
return dot.Build(); return dot.Build();
} }
std::string DataFlowGraph::HumanReadableInfo(bool show_values,
bool show_functions) const {
std::stringstream values, functions;
for (auto &n : nodes.nodes()) {
if (show_values && n->IsValue()) {
values << n->repr() << "\n";
}
if (show_functions && n->IsFunction()) {
functions << n->repr() << "\n";
}
}
return "Values:\n" + values.str() + "\n\n" + "Functions:\n" + functions.str();
}
// //
// NodesBFSIterator // NodesBFSIterator
// //
...@@ -146,7 +160,7 @@ bool GraphTraits<DataFlowGraph>::NodesBFSIterator::operator==( ...@@ -146,7 +160,7 @@ bool GraphTraits<DataFlowGraph>::NodesBFSIterator::operator==(
if ((!queue_.empty()) && (!other.queue_.empty())) { if ((!queue_.empty()) && (!other.queue_.empty())) {
return queue_.front() == other.queue_.front() && return queue_.front() == other.queue_.front() &&
visited_.size() == other.visited_.size(); // here need to check the visited_.size() == other.visited_.size(); // here need to check the
// equality of queue and // equality of queue and
// visited. Just a light but week implementation. // visited. Just a light but week implementation.
} }
return false; return false;
...@@ -208,6 +222,76 @@ Node *GraphTraits<DataFlowGraph>::NodesDFSIterator::operator->() { ...@@ -208,6 +222,76 @@ Node *GraphTraits<DataFlowGraph>::NodesDFSIterator::operator->() {
return stack_.top(); return stack_.top();
} }
GraphTraits<DataFlowGraph>::NodesTSIterator::NodesTSIterator(
const std::vector<Node *> &source) {
PADDLE_ENFORCE(!source.empty(),
"Start points of topological sorting should not be empty!");
std::unordered_set<Node *> visited;
std::unordered_set<Node *> to_visit{source.begin(), source.end()};
std::vector<Node *> inlink_visited;
while (!to_visit.empty()) {
std::vector<Node *> queue(to_visit.begin(), to_visit.end());
for (auto *p : queue) {
inlink_visited.clear();
std::copy_if(p->inlinks.begin(), p->inlinks.end(),
std::back_inserter(inlink_visited),
[&](Node *x) { return visited.count(x); });
if (inlink_visited.size() == p->inlinks.size()) {
sorted_.push_back(p);
for (auto *_ : p->outlinks) {
if (!visited.count(_)) {
to_visit.insert(_);
}
}
to_visit.erase(p);
visited.insert(p);
}
}
}
}
GraphTraits<DataFlowGraph>::NodesTSIterator::NodesTSIterator(
const paddle::inference::analysis::GraphTraits<
DataFlowGraph>::NodesTSIterator &other)
: sorted_(other.sorted_), cursor_(other.cursor_) {}
Node &GraphTraits<DataFlowGraph>::NodesTSIterator::operator*() {
PADDLE_ENFORCE_LT(cursor_, sorted_.size());
return *sorted_[cursor_];
}
paddle::inference::analysis::GraphTraits<DataFlowGraph>::NodesTSIterator
&GraphTraits<DataFlowGraph>::NodesTSIterator::operator++() {
if (++cursor_ >= sorted_.size()) {
sorted_.clear();
cursor_ = 0;
}
return *this;
}
paddle::inference::analysis::GraphTraits<DataFlowGraph>::NodesTSIterator &
GraphTraits<DataFlowGraph>::NodesTSIterator::operator=(
const paddle::inference::analysis::GraphTraits<
DataFlowGraph>::NodesTSIterator &other) {
cursor_ = other.cursor_;
sorted_ = other.sorted_;
return *this;
}
bool GraphTraits<DataFlowGraph>::NodesTSIterator::operator==(
const paddle::inference::analysis::GraphTraits<
DataFlowGraph>::NodesTSIterator &other) {
return sorted_ == other.sorted_ && cursor_ == other.cursor_;
}
Node *GraphTraits<DataFlowGraph>::NodesTSIterator::operator->() {
PADDLE_ENFORCE_LT(cursor_, sorted_.size());
return sorted_[cursor_];
}
} // namespace analysis } // namespace analysis
} // namespace inference } // namespace inference
} // namespace paddle } // namespace paddle
...@@ -48,6 +48,9 @@ struct DataFlowGraph { ...@@ -48,6 +48,9 @@ struct DataFlowGraph {
// Output a DOT graph file for debug. // Output a DOT graph file for debug.
std::string DotString() const; std::string DotString() const;
std::string HumanReadableInfo(bool show_values = true,
bool show_functions = true) const;
private: private:
// Remove duplicate edges and so on. // Remove duplicate edges and so on.
void Clean(); void Clean();
...@@ -107,6 +110,32 @@ struct GraphTraits<DataFlowGraph> { ...@@ -107,6 +110,32 @@ struct GraphTraits<DataFlowGraph> {
std::unordered_set<Node *> visited_; std::unordered_set<Node *> visited_;
}; };
// Topological sorting iterator on nodes.
struct NodesTSIterator
: public std::iterator<std::forward_iterator_tag, Node *> {
NodesTSIterator() = default;
explicit NodesTSIterator(const std::vector<Node *> &source);
NodesTSIterator(NodesTSIterator &&other)
: sorted_(std::move(other.sorted_)), cursor_(other.cursor_) {
other.cursor_ = 0;
}
NodesTSIterator(const NodesTSIterator &other);
Node &operator*();
NodesTSIterator &operator++();
// TODO(Superjomn) current implementation just compare the first
// element, need to compare the graph and all the elements in the queue and
// set.
NodesTSIterator &operator=(const NodesTSIterator &other);
bool operator==(const NodesTSIterator &other);
bool operator!=(const NodesTSIterator &other) { return !(*this == other); }
Node *operator->();
private:
std::vector<Node *> sorted_;
int cursor_{0};
};
explicit GraphTraits(DataFlowGraph *graph) : graph_(graph) {} explicit GraphTraits(DataFlowGraph *graph) : graph_(graph) {}
// default use BFS to visit the nodes. // default use BFS to visit the nodes.
...@@ -119,17 +148,24 @@ struct GraphTraits<DataFlowGraph> { ...@@ -119,17 +148,24 @@ struct GraphTraits<DataFlowGraph> {
iterator_range<NodesDFSIterator> nodes_in_DFS() { iterator_range<NodesDFSIterator> nodes_in_DFS() {
return iterator_range<NodesDFSIterator>(nodes_dfs_begin(), nodes_dfs_end()); return iterator_range<NodesDFSIterator>(nodes_dfs_begin(), nodes_dfs_end());
} }
iterator_range<NodesTSIterator> nodes_in_TS() {
return iterator_range<NodesTSIterator>(nodes_ts_begin(), nodes_ts_end());
}
private: private:
NodesBFSIterator nodes_bfs_begin() { NodesBFSIterator nodes_bfs_begin() {
return NodesBFSIterator(graph_->inputs); return NodesBFSIterator(graph_->inputs);
} }
NodesBFSIterator nodes_bfs_end() { return NodesBFSIterator(); } NodesBFSIterator nodes_bfs_end() { return NodesBFSIterator(); }
NodesDFSIterator nodes_dfs_begin() { NodesDFSIterator nodes_dfs_begin() {
return NodesDFSIterator(graph_->inputs); return NodesDFSIterator(graph_->inputs);
} }
NodesDFSIterator nodes_dfs_end() { return NodesDFSIterator(); } NodesDFSIterator nodes_dfs_end() { return NodesDFSIterator(); }
NodesTSIterator nodes_ts_begin() { return NodesTSIterator(graph_->inputs); }
NodesTSIterator nodes_ts_end() { return NodesTSIterator(); }
private: private:
DataFlowGraph *graph_; DataFlowGraph *graph_;
}; };
......
...@@ -24,11 +24,11 @@ TEST(DataFlowGraph, BFS) { ...@@ -24,11 +24,11 @@ TEST(DataFlowGraph, BFS) {
auto dfg = ProgramDescToDFG(desc); auto dfg = ProgramDescToDFG(desc);
dfg.Build(); dfg.Build();
for (auto* in : dfg.inputs) { for (auto *in : dfg.inputs) {
LOG(INFO) << "inputs: " << in->name() << " " LOG(INFO) << "inputs: " << in->name() << " "
<< static_cast<int>(in->type()); << static_cast<int>(in->type());
} }
for (auto* out : dfg.outputs) { for (auto *out : dfg.outputs) {
LOG(INFO) << "outputs: " << out->name() << " " LOG(INFO) << "outputs: " << out->name() << " "
<< static_cast<int>(out->type()); << static_cast<int>(out->type());
} }
...@@ -57,6 +57,71 @@ TEST(DataFlowGraph, DFS) { ...@@ -57,6 +57,71 @@ TEST(DataFlowGraph, DFS) {
ASSERT_EQ(count, dfg.nodes.size()); ASSERT_EQ(count, dfg.nodes.size());
} }
// Topological sorting.
/*
* Graph topology
* inputs: 0, 1, 2
* 0 -> 4
* 0 -> 5
* 1 -> 6
* 2 -> 7
* 4 -> 5
* 4 -> 7
* 4 -> 3
* 7 -> 3
*/
TEST(DataFlowGraph, TS) {
DataFlowGraph graph;
for (int i = 0; i < 8; i++) {
auto *node = graph.nodes.Create(Node::Type::kValue);
node->SetName("node-" + std::to_string(i));
}
auto add_link = [&](int i, int j) {
Node *source = graph.nodes.GetMutable(i);
Node *target = graph.nodes.GetMutable(j);
target->inlinks.push_back(source);
source->outlinks.push_back(target);
};
graph.inputs.push_back(graph.nodes.GetMutable(0));
graph.inputs.push_back(graph.nodes.GetMutable(1));
graph.inputs.push_back(graph.nodes.GetMutable(2));
add_link(0, 4);
add_link(0, 5);
add_link(1, 6);
add_link(2, 7);
add_link(4, 5);
add_link(4, 7);
add_link(4, 3);
add_link(7, 3);
auto its = GraphTraits<DataFlowGraph>(&graph).nodes_in_TS();
std::vector<int> sorted_ids;
for (auto it = its.begin(); it != its.end(); ++it) {
LOG(INFO) << it->name();
sorted_ids.push_back(it->id());
}
// Assert a occurs prior to b in the sorted_ids.
auto assert_positive_sequence_pair = [&](int a, int b) {
auto a_offset = std::find(sorted_ids.begin(), sorted_ids.end(), a);
auto b_offset = std::find(sorted_ids.begin(), sorted_ids.end(), b);
ASSERT_LT(a_offset, b_offset);
};
assert_positive_sequence_pair(2, 7);
assert_positive_sequence_pair(7, 3);
assert_positive_sequence_pair(4, 3);
assert_positive_sequence_pair(0, 4);
assert_positive_sequence_pair(0, 5);
assert_positive_sequence_pair(1, 6);
assert_positive_sequence_pair(4, 5);
assert_positive_sequence_pair(4, 7);
}
} // namespace analysis } // namespace analysis
} // namespace inference } // namespace inference
} // namespace paddle } // namespace paddle
...@@ -86,8 +86,9 @@ class RpnTargetAssignKernel : public framework::OpKernel<T> { ...@@ -86,8 +86,9 @@ class RpnTargetAssignKernel : public framework::OpKernel<T> {
std::minstd_rand engine, std::minstd_rand engine,
std::vector<int>* inds) const { std::vector<int>* inds) const {
std::uniform_real_distribution<float> uniform(0, 1); std::uniform_real_distribution<float> uniform(0, 1);
if (inds->size() > num) { const int64_t size = static_cast<int64_t>(inds->size());
for (int i = num; i < inds->size(); ++i) { if (size > num) {
for (int64_t i = num; i < size; ++i) {
int rng_ind = std::floor(uniform(engine) * i); int rng_ind = std::floor(uniform(engine) * i);
if (rng_ind < num) if (rng_ind < num)
std::iter_swap(inds->begin() + rng_ind + offset, std::iter_swap(inds->begin() + rng_ind + offset,
......
...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/im2sequence_op.h" #include "paddle/fluid/operators/im2sequence_op.h"
#include <string>
#include <vector> #include <vector>
namespace paddle { namespace paddle {
...@@ -28,20 +29,19 @@ class Im2SequenceOp : public framework::OperatorWithKernel { ...@@ -28,20 +29,19 @@ class Im2SequenceOp : public framework::OperatorWithKernel {
"Input(X) of Im2SequenceOp should not be null."); "Input(X) of Im2SequenceOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"), PADDLE_ENFORCE(ctx->HasOutput("Out"),
"Output(Out) of Im2SequenceOp op should not be null."); "Output(Out) of Im2SequenceOp op should not be null.");
auto in_dim = ctx->GetInputDim("X"); auto in_dim = ctx->GetInputDim("X");
PADDLE_ENFORCE_EQ(in_dim.size(), 4, PADDLE_ENFORCE_EQ(in_dim.size(), 4,
"Input(X) format must be 4D tensor, eg., NCHW."); "Input(X) format must be 4D tensor, eg., NCHW.");
auto kernels = ctx->Attrs().Get<std::vector<int>>("kernels");
auto strides = ctx->Attrs().Get<std::vector<int>>("strides");
auto paddings = ctx->Attrs().Get<std::vector<int>>("paddings");
int batch_size = in_dim[0]; int batch_size = in_dim[0];
int img_channels = in_dim[1]; int img_channels = in_dim[1];
int img_height = in_dim[2]; int img_height = in_dim[2];
int img_width = in_dim[3]; int img_width = in_dim[3];
auto kernels = ctx->Attrs().Get<std::vector<int>>("kernels");
auto strides = ctx->Attrs().Get<std::vector<int>>("strides");
auto paddings = ctx->Attrs().Get<std::vector<int>>("paddings");
int output_height = Im2SeqOutputSize(img_height, kernels[0], paddings[0], int output_height = Im2SeqOutputSize(img_height, kernels[0], paddings[0],
paddings[2], strides[0]); paddings[2], strides[0]);
int output_width = Im2SeqOutputSize(img_width, kernels[1], paddings[1], int output_width = Im2SeqOutputSize(img_width, kernels[1], paddings[1],
...@@ -61,6 +61,10 @@ class Im2SequenceOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -61,6 +61,10 @@ class Im2SequenceOpMaker : public framework::OpProtoAndCheckerMaker {
"C: channels" "C: channels"
"H: height" "H: height"
"W: width"); "W: width");
AddInput("Y",
"(Tensor) The input tensor of image real size(H, W)."
"2-D with shape [batchsize, 2]")
.AsDispensable();
AddOutput("Out", "(LodTensor) The output data of im2sequence op,"); AddOutput("Out", "(LodTensor) The output data of im2sequence op,");
AddAttr<std::vector<int>>("kernels", AddAttr<std::vector<int>>("kernels",
"(vector<int>), the " "(vector<int>), the "
...@@ -73,6 +77,13 @@ class Im2SequenceOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -73,6 +77,13 @@ class Im2SequenceOpMaker : public framework::OpProtoAndCheckerMaker {
"(vector<int> default:{0, 0, 0, 0}), the " "(vector<int> default:{0, 0, 0, 0}), the "
"paddings(up_pad, left_pad, down_pad, right_pad)") "paddings(up_pad, left_pad, down_pad, right_pad)")
.SetDefault({0, 0, 0, 0}); .SetDefault({0, 0, 0, 0});
AddAttr<std::vector<int>>("out_stride",
"the attribute is valid only when input(Y)"
"is not NULL.this attribute represents the"
"scaling of the pic through the CNN"
"(vector<int> dedault:{1,1}),the out_stride"
" (out_stride_height, out_stride_width)")
.SetDefault({1, 1});
AddComment(R"DOC( AddComment(R"DOC(
This op uses kernels to scan images and converts these images to sequences. This op uses kernels to scan images and converts these images to sequences.
After expanding, The number of time steps are output_height * output_width After expanding, The number of time steps are output_height * output_width
...@@ -123,7 +134,7 @@ output.data = [[ 6. 2. 8. 3. 2. 4. 6. 3.] ...@@ -123,7 +134,7 @@ output.data = [[ 6. 2. 8. 3. 2. 4. 6. 3.]
[ 7. 1. 7. 9. 2. 1. 3. 5.] [ 7. 1. 7. 9. 2. 1. 3. 5.]
[ 5. 7. 2. 4. 1. 3. 9. 0.] [ 5. 7. 2. 4. 1. 3. 9. 0.]
[ 7. 9. 4. 8. 3. 5. 0. 8.]] [ 7. 9. 4. 8. 3. 5. 0. 8.]]
output.dims = {8, 9} output.dims = {8, 8}
output.lod = [[0, 4, 8]] output.lod = [[0, 4, 8]]
)DOC"); )DOC");
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include <string>
#include <vector> #include <vector>
#include "paddle/fluid/framework/data_layout.h" #include "paddle/fluid/framework/data_layout.h"
#include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/eigen.h"
...@@ -39,50 +40,106 @@ class Im2SequenceKernel : public framework::OpKernel<T> { ...@@ -39,50 +40,106 @@ class Im2SequenceKernel : public framework::OpKernel<T> {
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
const Tensor* in = ctx.Input<Tensor>("X"); const Tensor* in = ctx.Input<Tensor>("X");
LoDTensor* out = ctx.Output<LoDTensor>("Out"); LoDTensor* out = ctx.Output<LoDTensor>("Out");
out->mutable_data<T>(ctx.GetPlace());
// TODO(wanghaoshuang): Add layout checker after 'set_layout'
// being available for python API
// PADDLE_ENFORCE_EQ(in->layout(), framework::DataLayout::kNCHW,
// "Input(X) layout must be NCHW");
auto in_dim = in->dims(); auto in_dim = in->dims();
int batch_size = in_dim[0]; int batch_size = in_dim[0];
int img_channels = in_dim[1]; int img_channels = in_dim[1];
int img_height = in_dim[2]; int img_height = in_dim[2];
int img_width = in_dim[3]; int img_width = in_dim[3];
auto kernels = ctx.Attr<std::vector<int>>("kernels"); auto kernels = ctx.Attr<std::vector<int>>("kernels");
auto strides = ctx.Attr<std::vector<int>>("strides"); auto strides = ctx.Attr<std::vector<int>>("strides");
auto paddings = ctx.Attr<std::vector<int>>("paddings"); auto paddings = ctx.Attr<std::vector<int>>("paddings");
int output_height = Im2SeqOutputSize(img_height, kernels[0], paddings[0], if (ctx.HasInput("Y") && batch_size > 1) {
paddings[2], strides[0]); const Tensor* imgrealsize = ctx.Input<Tensor>("Y");
int output_width = Im2SeqOutputSize(img_width, kernels[1], paddings[1], auto out_stride = ctx.Attr<std::vector<int>>("out_stride");
paddings[3], strides[1]); Tensor cpu_shape_tensor;
TensorCopySync(*imgrealsize, platform::CPUPlace(), &cpu_shape_tensor);
const std::vector<int> dilations({1, 1}); std::vector<int> imgreal_h;
std::vector<int> imgreal_w;
auto out_dims = out->dims(); std::vector<int> output_height;
out->Resize({batch_size, out->numel() / batch_size}); std::vector<int> output_width;
for (int i = 0; i < batch_size; i++) { int result = 0;
const Tensor src = for (int i = 0; i < batch_size; i++) {
in->Slice(i, i + 1).Resize({img_channels, img_height, img_width}); int tmp_real_h = static_cast<int>((cpu_shape_tensor.data<T>())[2 * i]);
Tensor dst = out->Slice(i, i + 1).Resize( int tmp_real_w =
{output_height, output_width, img_channels, kernels[0], kernels[1]}); static_cast<int>((cpu_shape_tensor.data<T>())[2 * i + 1]);
if (tmp_real_h % out_stride[0] == 0) {
math::Im2ColFunctor<math::ColFormat::kOCF, DeviceContext, T> f; tmp_real_h = tmp_real_h / out_stride[0];
auto& dev_ctx = ctx.template device_context<DeviceContext>(); } else {
f(dev_ctx, src, dilations, strides, paddings, &dst); tmp_real_h = tmp_real_h / out_stride[0] + 1;
} }
out->Resize(out_dims); if (tmp_real_w % out_stride[1] == 0) {
tmp_real_w = tmp_real_w / out_stride[1];
// set lod information } else {
// TODO(wanghaoshuang): Move this to InferShape tmp_real_w = tmp_real_w / out_stride[1] + 1;
framework::LoD lod(1); }
lod[0].reserve(batch_size + 1); imgreal_h.push_back(tmp_real_h);
for (int i = 0, offset = 0; i < batch_size + 1; ++i) { imgreal_w.push_back(tmp_real_w);
output_height.push_back(Im2SeqOutputSize(
imgreal_h[i], kernels[0], paddings[0], paddings[2], strides[0]));
output_width.push_back(Im2SeqOutputSize(
imgreal_w[i], kernels[1], paddings[1], paddings[3], strides[1]));
result += output_height[i] * output_width[i];
}
out->mutable_data<T>({result, img_channels * kernels[0] * kernels[1]},
ctx.GetPlace());
const std::vector<int> dilations({1, 1});
int offset_out = 0;
for (int i = 0; i < batch_size; i++) {
const Tensor src =
in->Slice(i, i + 1).Resize({img_channels, img_height, img_width});
Tensor dst = out->Slice(offset_out,
offset_out + output_height[i] * output_width[i])
.Resize({output_height[i], output_width[i],
img_channels, kernels[0], kernels[1]});
offset_out += output_height[i] * output_width[i];
math::Im2ColFunctor<math::ColFormat::kOCF, DeviceContext, T> f;
auto& dev_ctx = ctx.template device_context<DeviceContext>();
f(dev_ctx, src, dilations, strides, paddings, &dst);
}
framework::LoD lod(1);
lod[0].reserve(batch_size + 1);
int offset = 0;
lod[0].push_back(offset);
for (int i = 0; i < batch_size; ++i) {
offset += output_height[i] * output_width[i];
lod[0].push_back(offset);
}
out->set_lod(lod);
} else {
out->mutable_data<T>(ctx.GetPlace());
int output_height = Im2SeqOutputSize(img_height, kernels[0], paddings[0],
paddings[2], strides[0]);
int output_width = Im2SeqOutputSize(img_width, kernels[1], paddings[1],
paddings[3], strides[1]);
const std::vector<int> dilations({1, 1});
auto out_dims = out->dims();
out->Resize({batch_size, out->numel() / batch_size});
for (int i = 0; i < batch_size; i++) {
const Tensor src =
in->Slice(i, i + 1).Resize({img_channels, img_height, img_width});
Tensor dst =
out->Slice(i, i + 1).Resize({output_height, output_width,
img_channels, kernels[0], kernels[1]});
math::Im2ColFunctor<math::ColFormat::kOCF, DeviceContext, T> f;
auto& dev_ctx = ctx.template device_context<DeviceContext>();
f(dev_ctx, src, dilations, strides, paddings, &dst);
}
out->Resize(out_dims);
framework::LoD lod(1);
lod[0].reserve(batch_size + 1);
int offset = 0;
lod[0].push_back(offset); lod[0].push_back(offset);
offset += output_height * output_width; for (int i = 0; i < batch_size; ++i) {
offset += output_height * output_width;
lod[0].push_back(offset);
}
out->set_lod(lod);
} }
out->set_lod(lod);
} }
}; };
......
...@@ -43,21 +43,6 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kCFO, ...@@ -43,21 +43,6 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kCFO,
int col_height = col->dims()[3]; int col_height = col->dims()[3];
int col_width = col->dims()[4]; int col_width = col->dims()[4];
PADDLE_ENFORCE_EQ((im_height + padding[0] + padding[2] -
((dilation[0] * (filter_height - 1) + 1))) /
stride[0] +
1,
col_height,
"Output_height and padding(padding_up, padding_down) are "
"inconsistent.");
PADDLE_ENFORCE_EQ((im_width + padding[1] + padding[3] -
((dilation[1] * (filter_width - 1) + 1))) /
stride[1] +
1,
col_width,
"Output_height and padding(padding_up, padding_down) are "
"inconsistent.");
int channels_col = im_channels * filter_height * filter_width; int channels_col = im_channels * filter_height * filter_width;
const T* im_data = im.data<T>(); const T* im_data = im.data<T>();
...@@ -178,17 +163,6 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kOCF, ...@@ -178,17 +163,6 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kOCF,
int col_height = col->dims()[0]; int col_height = col->dims()[0];
int col_width = col->dims()[1]; int col_width = col->dims()[1];
PADDLE_ENFORCE_EQ(
(im_height + padding[0] + padding[2] - filter_height) / stride[0] + 1,
col_height,
"Output_height and padding(padding_up, padding_down) are "
"inconsistent.");
PADDLE_ENFORCE_EQ(
(im_width + padding[1] + padding[3] - filter_width) / stride[1] + 1,
col_width,
"col_width and padding(padding_left, padding_right) are "
"inconsistent.");
const T* im_data = im.data<T>(); const T* im_data = im.data<T>();
T* col_data = col->data<T>(); T* col_data = col->data<T>();
......
...@@ -77,21 +77,6 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kCFO, ...@@ -77,21 +77,6 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kCFO,
int col_height = col->dims()[3]; int col_height = col->dims()[3];
int col_width = col->dims()[4]; int col_width = col->dims()[4];
PADDLE_ENFORCE_EQ((im_height + padding[0] + padding[2] -
(dilation[0] * (filter_height - 1) + 1)) /
stride[0] +
1,
col_height,
"Output_height and padding(padding_up, padding_down) are "
"inconsistent.");
PADDLE_ENFORCE_EQ((im_width + padding[1] + padding[3] -
(dilation[1] * (filter_width - 1) + 1)) /
stride[1] +
1,
col_width,
"col_width and padding(padding_left, padding_right) are "
"inconsistent.");
int num_outputs = im_channels * col_height * col_width; int num_outputs = im_channels * col_height * col_width;
int blocks = (num_outputs + 1024 - 1) / 1024; int blocks = (num_outputs + 1024 - 1) / 1024;
int block_x = 512; int block_x = 512;
...@@ -274,21 +259,6 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kOCF, ...@@ -274,21 +259,6 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kOCF,
int col_height = col->dims()[0]; int col_height = col->dims()[0];
int col_width = col->dims()[1]; int col_width = col->dims()[1];
PADDLE_ENFORCE_EQ((im_height + padding[0] + padding[2] -
(dilation[0] * (filter_height - 1) + 1)) /
stride[0] +
1,
col_height,
"Output_height and padding(padding_up, padding_down) are "
"inconsistent.");
PADDLE_ENFORCE_EQ((im_width + padding[1] + padding[3] -
(dilation[1] * (filter_width - 1) + 1)) /
stride[1] +
1,
col_width,
"col_width and padding(padding_left, padding_right) are "
"inconsistent.");
int block_dim_x = 0; int block_dim_x = 0;
int block_dim_y = 0; int block_dim_y = 0;
if (filter_height <= 4 && filter_width <= 4) { if (filter_height <= 4 && filter_width <= 4) {
......
...@@ -46,7 +46,7 @@ ENDIF() ...@@ -46,7 +46,7 @@ ENDIF()
# memcpy depends on device_context, here add deps individually for # memcpy depends on device_context, here add deps individually for
# avoiding cycle dependencies # avoiding cycle dependencies
cc_library(device_context SRCS device_context.cc init.cc DEPS malloc cc_library(device_context SRCS device_context.cc init.cc DEPS malloc
place eigen3 stringpiece cpu_helper ${GPU_CTX_DEPS} ${MKLDNN_CTX_DEPS}) place eigen3 stringpiece cpu_helper framework_proto ${GPU_CTX_DEPS} ${MKLDNN_CTX_DEPS})
nv_test(device_context_test SRCS device_context_test.cu DEPS device_context gpu_info) nv_test(device_context_test SRCS device_context_test.cu DEPS device_context gpu_info)
cc_test(init_test SRCS init_test.cc DEPS device_context) cc_test(init_test SRCS init_test.cc DEPS device_context)
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import functools
import sys
__all__ = ['deprecated']
def deprecated(since, instead, extra_message=""):
def decorator(func):
err_msg = "API {0} is deprecated since {1}. Please use {2} instead.".format(
func.__name__, since, instead)
if len(extra_message) != 0:
err_msg += "\n"
err_msg += extra_message
@functools.wraps(func)
def wrapper(*args, **kwargs):
print >> sys.stderr, err_msg
return func(*args, **kwargs)
wrapper.__doc__ += "\n "
wrapper.__doc__ += err_msg
return wrapper
return decorator
...@@ -18,10 +18,7 @@ import collections ...@@ -18,10 +18,7 @@ import collections
import copy import copy
import unique_name import unique_name
__all__ = [ __all__ = ['append_backward']
'append_backward',
'calc_gradient',
]
def _rename_arg_(op_descs, old_name, new_name, begin_idx=None, end_idx=None): def _rename_arg_(op_descs, old_name, new_name, begin_idx=None, end_idx=None):
...@@ -123,7 +120,8 @@ def _append_grad_suffix_(name): ...@@ -123,7 +120,8 @@ def _append_grad_suffix_(name):
def _addup_repetitive_outputs_(op_descs): def _addup_repetitive_outputs_(op_descs):
""" """
In backward part, an variable may be the output of more than one ops. In backward part, an variable may be the output of more than one ops.
In this case, the variable should be the accumulation of all the outputs. And one op may yield its multiple outputs to the same variable.
In these cases, the variable should be the accumulation of all the outputs.
`sum_op`s are added to implement the accumulate. `sum_op`s are added to implement the accumulate.
""" """
pending_sum_ops = [] pending_sum_ops = []
...@@ -136,29 +134,46 @@ def _addup_repetitive_outputs_(op_descs): ...@@ -136,29 +134,46 @@ def _addup_repetitive_outputs_(op_descs):
"sum", {"X": renamed_vars[var_name]}, {"Out": [var_name]}, "sum", {"X": renamed_vars[var_name]}, {"Out": [var_name]},
{"use_mkldnn": False}), idx)) {"use_mkldnn": False}), idx))
renamed_vars[var_name] = [var_name] renamed_vars[var_name] = [var_name]
for var_name in op_desc.output_arg_names(): for param_idx, param_name in enumerate(op_desc.output_names()):
if var_name == core.empty_var_name( arg_names = op_desc.output(param_name)
) or var_name in op_desc.input_arg_names(): for arg_idx, var_name in enumerate(arg_names):
# empty variable or inplace op if var_name == core.empty_var_name(
continue ) or var_name in op_desc.input_arg_names():
if len(renamed_vars[var_name]) == 0: # empty variable or inplace op
# it's the first time we get the variable continue
renamed_vars[var_name] = [var_name] if len(renamed_vars[var_name]) == 0:
else: # it's the first time we get the variable
if len(renamed_vars[var_name]) == 1: renamed_vars[var_name] = [var_name]
else:
if len(renamed_vars[var_name]) == 1:
new_name = var_name + "@RENAME@" + \
str(var_rename_count[var_name])
var_rename_count[var_name] += 1
# rename original var_name
renamed_vars[var_name][0] = new_name
_rename_arg_(op_descs, var_name, new_name, 0, idx)
_rename_arg_(pending_sum_ops, var_name, new_name)
for p in op_desc.output_names()[:param_idx]:
p_arg_names = op_desc.output(p)
if var_name in p_arg_names:
op_desc.set_output(p, [
new_name if x == var_name else x
for x in p_arg_names
])
arg_names = [
new_name if x == var_name else x
for x in arg_names[:arg_idx]
] + arg_names[arg_idx:]
new_name = var_name + "@RENAME@" + \ new_name = var_name + "@RENAME@" + \
str(var_rename_count[var_name]) str(var_rename_count[var_name])
var_rename_count[var_name] += 1 var_rename_count[var_name] += 1
# rename original var_name arg_names[arg_idx] = new_name
renamed_vars[var_name][0] = new_name op_desc.set_output(param_name, arg_names)
_rename_arg_(op_descs, var_name, new_name, 0, idx) renamed_vars[var_name].append(new_name)
_rename_arg_(pending_sum_ops, var_name, new_name)
new_name = var_name + "@RENAME@" + \
str(var_rename_count[var_name])
var_rename_count[var_name] += 1
op_desc.rename_output(var_name, new_name)
renamed_vars[var_name].append(new_name)
for var_name, inputs in renamed_vars.iteritems(): for var_name, inputs in renamed_vars.iteritems():
if len(inputs) > 1: if len(inputs) > 1:
pending_sum_ops.append( pending_sum_ops.append(
......
...@@ -18,10 +18,12 @@ All util layers. ...@@ -18,10 +18,12 @@ All util layers.
from layer_function_generator import autodoc from layer_function_generator import autodoc
from ..framework import unique_name from ..framework import unique_name
from ..layer_helper import LayerHelper from ..layer_helper import LayerHelper
from ..annotations import deprecated
__all__ = ['get_places'] __all__ = []
@deprecated(since='0.15.0', instead="ParallelExecutor")
@autodoc() @autodoc()
def get_places(device_count=None, device_type=None): def get_places(device_count=None, device_type=None):
helper = LayerHelper('get_places', **locals()) helper = LayerHelper('get_places', **locals())
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. # Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright (c ) 2018 PaddlePaddle Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
...@@ -3900,7 +3914,13 @@ def transpose(x, perm, name=None): ...@@ -3900,7 +3914,13 @@ def transpose(x, perm, name=None):
return out return out
def im2sequence(input, filter_size=1, stride=1, padding=0, name=None): def im2sequence(input,
filter_size=1,
stride=1,
padding=0,
input_image_size=None,
out_stride=1,
name=None):
""" """
Extracts image patches from the input tensor to form a tensor of shape Extracts image patches from the input tensor to form a tensor of shape
{input.batch_size * output_height * output_width, filter_size_H * {input.batch_size * output_height * output_width, filter_size_H *
...@@ -3937,6 +3957,15 @@ def im2sequence(input, filter_size=1, stride=1, padding=0, name=None): ...@@ -3937,6 +3957,15 @@ def im2sequence(input, filter_size=1, stride=1, padding=0, name=None):
padding_up = padding_down = padding_left = padding_right = padding padding_up = padding_down = padding_left = padding_right = padding
Default: padding = 0. Default: padding = 0.
input_image_size(Variable): the input contains image real size.It's dim
is [batchsize, 2]. It is dispensable.It is just for batch inference.
out_stride(int|tuple): The scaling of image through CNN. It is
dispensable. It is valid only when input_image_size is not null.
If out_stride is tuple, it must contain two intergers,
(out_stride_H, out_stride_W). Otherwise,
the out_stride_H = out_stride_W = out_stride.
name (int): The name of this layer. It is optional. name (int): The name of this layer. It is optional.
Returns: Returns:
...@@ -3987,7 +4016,7 @@ def im2sequence(input, filter_size=1, stride=1, padding=0, name=None): ...@@ -3987,7 +4016,7 @@ def im2sequence(input, filter_size=1, stride=1, padding=0, name=None):
[ 5. 7. 2. 4. 1. 3. 9. 0.] [ 5. 7. 2. 4. 1. 3. 9. 0.]
[ 7. 9. 4. 8. 3. 5. 0. 8.]] [ 7. 9. 4. 8. 3. 5. 0. 8.]]
output.dims = {8, 9} output.dims = {8, 8}
output.lod = [[4, 4]] output.lod = [[4, 4]]
...@@ -4009,18 +4038,17 @@ def im2sequence(input, filter_size=1, stride=1, padding=0, name=None): ...@@ -4009,18 +4038,17 @@ def im2sequence(input, filter_size=1, stride=1, padding=0, name=None):
if len(padding) == 2: if len(padding) == 2:
padding.append(padding[0]) padding.append(padding[0])
padding.append(padding[1]) padding.append(padding[1])
inputs = {"X": input}
attrs = {"kernels": filter_size, "strides": stride, "padding": padding}
if input_image_size:
if isinstance(out_stride, int):
out_stride = [out_stride, out_stride]
inputs["Y"] = input_image_size
attrs["out_stride"] = out_stride
helper = LayerHelper('im2sequence', **locals()) helper = LayerHelper('im2sequence', **locals())
out = helper.create_tmp_variable(dtype=helper.input_dtype()) out = helper.create_tmp_variable(dtype=helper.input_dtype())
helper.append_op( helper.append_op(
type='im2sequence', type='im2sequence', inputs=inputs, outputs={'Out': out}, attrs=attrs)
inputs={'X': input},
outputs={'Out': out},
attrs={
'kernels': filter_size,
'strides': stride,
'paddings': padding,
})
return out return out
......
...@@ -29,7 +29,7 @@ __all__ = [ ...@@ -29,7 +29,7 @@ __all__ = [
'SGD', 'Momentum', 'Adagrad', 'Adam', 'Adamax', 'DecayedAdagrad', 'Ftrl', 'SGD', 'Momentum', 'Adagrad', 'Adam', 'Adamax', 'DecayedAdagrad', 'Ftrl',
'SGDOptimizer', 'MomentumOptimizer', 'AdagradOptimizer', 'AdamOptimizer', 'SGDOptimizer', 'MomentumOptimizer', 'AdagradOptimizer', 'AdamOptimizer',
'AdamaxOptimizer', 'DecayedAdagradOptimizer', 'RMSPropOptimizer', 'AdamaxOptimizer', 'DecayedAdagradOptimizer', 'RMSPropOptimizer',
'FtrlOptimizer', 'Adadelta', 'ModelAverage', 'Optimizer', 'RMSPropOptimizer' 'FtrlOptimizer', 'Adadelta', 'ModelAverage', 'RMSPropOptimizer'
] ]
...@@ -67,7 +67,7 @@ class Optimizer(object): ...@@ -67,7 +67,7 @@ class Optimizer(object):
self._LARS_weight_decay = LARS_weight_decay self._LARS_weight_decay = LARS_weight_decay
def _create_global_learning_rate(self): def _create_global_learning_rate(self):
lr = self.global_learning_rate() lr = self._global_learning_rate()
if isinstance(lr, framework.Variable): if isinstance(lr, framework.Variable):
return return
...@@ -86,7 +86,7 @@ class Optimizer(object): ...@@ -86,7 +86,7 @@ class Optimizer(object):
dtype='float32' if self._dtype == None else self._dtype, dtype='float32' if self._dtype == None else self._dtype,
persistable=True) persistable=True)
def global_learning_rate(self, program=None): def _global_learning_rate(self, program=None):
""" """
get global decayed learning rate get global decayed learning rate
:return: :return:
...@@ -110,9 +110,9 @@ class Optimizer(object): ...@@ -110,9 +110,9 @@ class Optimizer(object):
return param_lr return param_lr
else: else:
if param_lr == 1.0: if param_lr == 1.0:
return self.global_learning_rate() return self._global_learning_rate()
else: else:
return self.global_learning_rate() * param_lr return self._global_learning_rate() * param_lr
def _create_accumulators(self, block, parameters): def _create_accumulators(self, block, parameters):
"""Create all accumulators needed by the parameters """Create all accumulators needed by the parameters
...@@ -185,10 +185,10 @@ class Optimizer(object): ...@@ -185,10 +185,10 @@ class Optimizer(object):
format(name, param.name)) format(name, param.name))
return self._accumulators[name][param.name] return self._accumulators[name][param.name]
def create_optimization_pass(self, def _create_optimization_pass(self,
parameters_and_grads, parameters_and_grads,
loss, loss,
startup_program=None): startup_program=None):
"""Add optimization operators to update gradients to variables. """Add optimization operators to update gradients to variables.
Args: Args:
...@@ -221,7 +221,7 @@ class Optimizer(object): ...@@ -221,7 +221,7 @@ class Optimizer(object):
self._create_global_learning_rate() self._create_global_learning_rate()
if self._LARS_weight_decay > 0.0: if self._LARS_weight_decay > 0.0:
layers.append_LARS(parameters_and_grads, layers.append_LARS(parameters_and_grads,
self.global_learning_rate(), self._global_learning_rate(),
self._LARS_weight_decay) self._LARS_weight_decay)
optimize_ops = [] optimize_ops = []
...@@ -262,8 +262,8 @@ class Optimizer(object): ...@@ -262,8 +262,8 @@ class Optimizer(object):
params_grads = append_regularization_ops(params_grads, params_grads = append_regularization_ops(params_grads,
self.regularization) self.regularization)
optimize_ops = self.create_optimization_pass(params_grads, loss, optimize_ops = self._create_optimization_pass(params_grads, loss,
startup_program) startup_program)
return optimize_ops, params_grads return optimize_ops, params_grads
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function from __future__ import print_function
from paddle.fluid.layers.device import get_places
import unittest import unittest
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle import paddle
...@@ -144,7 +144,7 @@ def train(word_dict, ...@@ -144,7 +144,7 @@ def train(word_dict,
cost, acc_out, prediction = net_method( cost, acc_out, prediction = net_method(
data, label, input_dim=dict_dim, class_dim=class_dim) data, label, input_dim=dict_dim, class_dim=class_dim)
else: else:
places = fluid.layers.get_places() places = get_places()
pd = fluid.layers.ParallelDo(places) pd = fluid.layers.ParallelDo(places)
with pd.do(): with pd.do():
cost, acc, _ = net_method( cost, acc, _ = net_method(
......
...@@ -12,15 +12,17 @@ ...@@ -12,15 +12,17 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function from __future__ import print_function
import argparse
import paddle.fluid as fluid
import paddle
import sys
import numpy
import unittest
import math import math
import sys
import os import os
import sys
import unittest
import numpy
import paddle
import paddle.fluid as fluid
from paddle.fluid.layers.device import get_places
BATCH_SIZE = 64 BATCH_SIZE = 64
...@@ -76,7 +78,7 @@ def train(nn_type, ...@@ -76,7 +78,7 @@ def train(nn_type,
net_conf = conv_net net_conf = conv_net
if parallel: if parallel:
places = fluid.layers.get_places() places = get_places()
pd = fluid.layers.ParallelDo(places) pd = fluid.layers.ParallelDo(places)
with pd.do(): with pd.do():
img_ = pd.read_input(img) img_ = pd.read_input(img)
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.layers.device import get_places
import unittest import unittest
import os import os
import numpy as np import numpy as np
...@@ -80,7 +81,7 @@ def train(use_cuda, is_sparse, is_parallel, save_dirname, is_local=True): ...@@ -80,7 +81,7 @@ def train(use_cuda, is_sparse, is_parallel, save_dirname, is_local=True):
avg_cost, predict_word = __network__( avg_cost, predict_word = __network__(
[first_word, second_word, third_word, forth_word, next_word]) [first_word, second_word, third_word, forth_word, next_word])
else: else:
places = fluid.layers.get_places() places = get_places()
pd = fluid.layers.ParallelDo(places) pd = fluid.layers.ParallelDo(places)
with pd.do(): with pd.do():
avg_cost, predict_word = __network__( avg_cost, predict_word = __network__(
......
...@@ -12,12 +12,13 @@ ...@@ -12,12 +12,13 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import numpy as np
import paddle
import paddle.fluid as fluid
import math import math
import sys import sys
import paddle
import paddle.fluid as fluid
from paddle.fluid.layers.device import get_places
# need to fix random seed and training data to compare the loss # need to fix random seed and training data to compare the loss
# value accurately calculated by the default and the memory optimization # value accurately calculated by the default and the memory optimization
# version. # version.
...@@ -34,7 +35,7 @@ if fluid.core.is_compiled_with_cuda(): ...@@ -34,7 +35,7 @@ if fluid.core.is_compiled_with_cuda():
use_nccl = False use_nccl = False
place = fluid.CUDAPlace(0) place = fluid.CUDAPlace(0)
places = fluid.layers.get_places(device_count=0, device_type=device_type) places = get_places(device_count=0, device_type=device_type)
pd = fluid.layers.ParallelDo(places, use_nccl=use_nccl) pd = fluid.layers.ParallelDo(places, use_nccl=use_nccl)
with pd.do(): with pd.do():
x_ = pd.read_input(x) x_ = pd.read_input(x)
......
...@@ -16,8 +16,6 @@ import unittest ...@@ -16,8 +16,6 @@ import unittest
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.layers as layers import paddle.fluid.layers as layers
import paddle.fluid.framework as framework
import paddle.fluid.optimizer as optimizer
from paddle.fluid.backward import calc_gradient from paddle.fluid.backward import calc_gradient
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
# limitations under the License. # limitations under the License.
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.layers.device import get_places
import decorators import decorators
import unittest import unittest
...@@ -20,7 +21,7 @@ import unittest ...@@ -20,7 +21,7 @@ import unittest
class TestGetPlaces(unittest.TestCase): class TestGetPlaces(unittest.TestCase):
@decorators.prog_scope() @decorators.prog_scope()
def test_get_places(self): def test_get_places(self):
places = fluid.layers.get_places() places = get_places()
cpu = fluid.CPUPlace() cpu = fluid.CPUPlace()
exe = fluid.Executor(cpu) exe = fluid.Executor(cpu)
exe.run(fluid.default_main_program()) exe.run(fluid.default_main_program())
......
...@@ -16,23 +16,48 @@ import numpy as np ...@@ -16,23 +16,48 @@ import numpy as np
from op_test import OpTest from op_test import OpTest
def get_output_shape(attrs, in_shape): def get_output_shape(attrs, in_shape, img_real_size):
batchsize = in_shape[0]
img_height = in_shape[2] img_height = in_shape[2]
img_width = in_shape[3] img_width = in_shape[3]
paddings = np.array(attrs['paddings']).astype("int32")
kernels = np.array(attrs['kernels']).astype("int32")
strides = np.array(attrs['strides']).astype("int32")
output_height = np.zeros((1, batchsize)).astype("int32")
output_width = np.zeros((1, batchsize)).astype("int32")
if len(img_real_size):
out_stride = np.array(attrs['out_stride']).astype("int32")
imgreal_h = 0
imgreal_w = 0
for index in range(batchsize):
if img_real_size[index, 0] % out_stride[0] == 0:
imgreal_h = img_real_size[index, 0] / out_stride[0]
else:
imgreal_h = img_real_size[index, 0] / out_stride[0] + 1
if img_real_size[index, 0] % out_stride[1] == 0:
imgreal_w = img_real_size[index, 1] / out_stride[1]
else:
imgreal_w = img_real_size[index, 0] / out_stride[1] + 1
output_height[0,index] = \
1 + \
(imgreal_h + paddings[0] + paddings[2] - kernels[0] + strides[0] - 1) / \
strides[0]
paddings = attrs['paddings'] output_width[0,index] = \
kernels = attrs['kernels'] 1 + \
strides = attrs['strides'] (imgreal_w + paddings[1] + paddings[3] - kernels[1] + strides[1] - 1) / \
strides[1]
else:
for index in range(batchsize):
output_height[0,index] = \
1 + \
(img_height + paddings[0] + paddings[2] - kernels[0] + strides[0] - 1) / \
strides[0]
output_height = \ output_width[0,index] = \
1 + \ 1 + \
(img_height + paddings[0] + paddings[2] - kernels[0] + strides[0] - 1) / \ (img_width + paddings[1] + paddings[3] - kernels[1] + strides[1] - 1) / \
strides[0] strides[1]
output_width = \
1 + \
(img_width + paddings[1] + paddings[3] - kernels[1] + strides[1] - 1) / \
strides[1]
return output_height, output_width return output_height, output_width
...@@ -75,22 +100,25 @@ def im2col(attrs, im, col): ...@@ -75,22 +100,25 @@ def im2col(attrs, im, col):
im_row_offset][im_col_offset] im_row_offset][im_col_offset]
def Im2Sequence(inputs, attrs): def Im2Sequence(inputs, img_real_size, attrs):
output_height, output_width = get_output_shape(attrs, inputs.shape) output_height, output_width = get_output_shape(attrs, inputs.shape,
img_real_size)
img_channels = inputs.shape[1] img_channels = inputs.shape[1]
batch_size = inputs.shape[0] batch_size = inputs.shape[0]
out = np.zeros([ out = []
batch_size, output_height, output_width, img_channels, for index in range(batch_size):
attrs['kernels'][0], attrs['kernels'][1] tmp = np.zeros([
]).astype("float32") output_height[0, index], output_width[0, index], img_channels,
attrs['kernels'][0], attrs['kernels'][1]
for i in range(len(inputs)): ]).astype("float32")
im2col(attrs, inputs[i], out[i]) out.append(tmp)
for index in range(len(inputs)):
out = out.reshape([ im2col(attrs, inputs[index], out[index])
batch_size * output_height * output_width, out[index] = out[index].reshape([
img_channels * attrs['kernels'][0] * attrs['kernels'][1] output_height[0, index] * output_width[0, index],
]) img_channels * attrs['kernels'][0] * attrs['kernels'][1]
])
out = np.concatenate(out, axis=0)
return out return out
...@@ -103,7 +131,7 @@ class TestBlockExpandOp(OpTest): ...@@ -103,7 +131,7 @@ class TestBlockExpandOp(OpTest):
self.attrs = { self.attrs = {
'kernels': [2, 2], 'kernels': [2, 2],
'strides': [1, 1], 'strides': [1, 1],
'paddings': [1, 1, 1, 1] 'paddings': [1, 1, 1, 1],
} }
def setUp(self): def setUp(self):
...@@ -113,7 +141,8 @@ class TestBlockExpandOp(OpTest): ...@@ -113,7 +141,8 @@ class TestBlockExpandOp(OpTest):
self.batch_size, self.img_channels, self.img_height, self.img_width self.batch_size, self.img_channels, self.img_height, self.img_width
]).astype("float32") ]).astype("float32")
out = Im2Sequence(x, self.attrs) real_size = np.array([]).astype("float32")
out = Im2Sequence(x, real_size, self.attrs)
self.inputs = {'X': x} self.inputs = {'X': x}
self.outputs = {'Out': out} self.outputs = {'Out': out}
...@@ -133,20 +162,20 @@ class TestBlockExpandOpCase2(TestBlockExpandOp): ...@@ -133,20 +162,20 @@ class TestBlockExpandOpCase2(TestBlockExpandOp):
self.attrs = { self.attrs = {
'kernels': [2, 1], 'kernels': [2, 1],
'strides': [2, 1], 'strides': [2, 1],
'paddings': [2, 1, 2, 1] 'paddings': [2, 1, 2, 1],
} }
class TestBlockExpandOpCase3(TestBlockExpandOp): class TestBlockExpandOpCase3(TestBlockExpandOp):
def config(self): def config(self):
self.batch_size = 3 self.batch_size = 2
self.img_channels = 1 self.img_channels = 1
self.img_height = 4 self.img_height = 4
self.img_width = 5 self.img_width = 5
self.attrs = { self.attrs = {
'kernels': [2, 1], 'kernels': [2, 1],
'strides': [2, 1], 'strides': [2, 1],
'paddings': [2, 0, 2, 0] 'paddings': [2, 0, 2, 0],
} }
...@@ -159,9 +188,94 @@ class TestBlockExpandOpCase4(TestBlockExpandOp): ...@@ -159,9 +188,94 @@ class TestBlockExpandOpCase4(TestBlockExpandOp):
self.attrs = { self.attrs = {
'kernels': [2, 2], 'kernels': [2, 2],
'strides': [1, 1], 'strides': [1, 1],
'paddings': [0, 0, 0, 0] 'paddings': [0, 0, 0, 0],
}
class TestBlockExpandOpCase5(OpTest):
def config(self):
self.batch_size = 1
self.img_channels = 3
self.img_height = 4
self.img_width = 5
self.attrs = {
'kernels': [2, 1],
'strides': [2, 1],
'paddings': [2, 1, 2, 1],
'out_stride': [2, 2],
}
def setUp(self):
self.config()
self.op_type = "im2sequence"
x = np.random.uniform(0.1, 1, [
self.batch_size, self.img_channels, self.img_height, self.img_width
]).astype("float32")
real_size = np.array([[8, 10], [5, 8]]).astype("float32")
out = np.array(Im2Sequence(x, real_size, self.attrs))
self.inputs = {'X': x, 'Y': real_size} #l ??
self.outputs = {'Out': out}
def test_check_output(self):
self.check_output()
class TestBlockExpandOpCase6(OpTest):
def config(self):
self.batch_size = 3
self.img_channels = 1
self.img_height = 4
self.img_width = 5
self.attrs = {
'kernels': [2, 1],
'strides': [1, 1],
'paddings': [0, 0, 0, 0],
'out_stride': [1, 1],
}
def setUp(self):
self.config()
self.op_type = "im2sequence"
x = np.random.uniform(0.1, 1, [
self.batch_size, self.img_channels, self.img_height, self.img_width
]).astype("float32")
real_size = np.array([[8, 10], [5, 8], [5, 8]]).astype("float32")
out = np.array(Im2Sequence(x, real_size, self.attrs))
self.inputs = {'X': x, 'Y': real_size} #l ??
self.outputs = {'Out': out}
def test_check_output(self):
self.check_output()
class TestBlockExpandOpCase7(OpTest):
def config(self):
self.batch_size = 2
self.img_channels = 2
self.img_height = 3
self.img_width = 3
self.attrs = {
'kernels': [2, 2],
'strides': [1, 1],
'paddings': [1, 0, 1, 0],
'out_stride': [2, 2],
} }
def setUp(self):
self.config()
self.op_type = "im2sequence"
x = np.random.uniform(0.1, 1, [
self.batch_size, self.img_channels, self.img_height, self.img_width
]).astype("float32")
real_size = np.array([[6, 6], [4, 4]]).astype("float32")
out = np.array(Im2Sequence(x, real_size, self.attrs))
self.inputs = {'X': x, 'Y': real_size}
self.outputs = {'Out': out}
def test_check_output(self):
self.check_output()
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
#set shiftwidth=4 set expandtab set tabstop=4
...@@ -16,6 +16,7 @@ from __future__ import print_function ...@@ -16,6 +16,7 @@ from __future__ import print_function
import unittest import unittest
import paddle.fluid.layers as layers import paddle.fluid.layers as layers
from paddle.fluid.layers.device import get_places
import paddle.fluid.nets as nets import paddle.fluid.nets as nets
from paddle.fluid.framework import Program, program_guard, default_main_program from paddle.fluid.framework import Program, program_guard, default_main_program
from paddle.fluid.param_attr import ParamAttr from paddle.fluid.param_attr import ParamAttr
...@@ -238,7 +239,7 @@ class TestBook(unittest.TestCase): ...@@ -238,7 +239,7 @@ class TestBook(unittest.TestCase):
def test_get_places(self): def test_get_places(self):
program = Program() program = Program()
with program_guard(program): with program_guard(program):
x = layers.get_places(device_count=4) x = get_places(device_count=4)
self.assertIsNotNone(x) self.assertIsNotNone(x)
print(str(program)) print(str(program))
...@@ -251,12 +252,16 @@ class TestBook(unittest.TestCase): ...@@ -251,12 +252,16 @@ class TestBook(unittest.TestCase):
print(str(program)) print(str(program))
def test_im2sequence(self): def test_im2sequence(self):
print("test_im2sequence")
program = Program() program = Program()
with program_guard(program): with program_guard(program):
x = layers.data(name='x', shape=[3, 128, 128], dtype='float32') x = layers.data(name='x', shape=[3, 128, 128], dtype='float32')
y = layers.data(name='y', shape=[], dtype='float32')
output = layers.im2sequence( output = layers.im2sequence(
input=x, stride=[1, 1], filter_size=[2, 2]) input=x,
input_image_size=y,
stride=[1, 1],
filter_size=[2, 2],
out_stride=[1, 1])
self.assertIsNotNone(output) self.assertIsNotNone(output)
print(str(program)) print(str(program))
......
...@@ -97,7 +97,7 @@ class TestMomentumOptimizer(unittest.TestCase): ...@@ -97,7 +97,7 @@ class TestMomentumOptimizer(unittest.TestCase):
params_grads = append_backward(mean_out) params_grads = append_backward(mean_out)
self.assertEqual(len(params_grads), 1) self.assertEqual(len(params_grads), 1)
self.assertEqual(len(momentum_optimizer.get_accumulators()), 0) self.assertEqual(len(momentum_optimizer.get_accumulators()), 0)
opts = momentum_optimizer.create_optimization_pass( opts = momentum_optimizer._create_optimization_pass(
params_grads, mul_out, init_program) params_grads, mul_out, init_program)
self.assertEqual(len(opts), 3) self.assertEqual(len(opts), 3)
sgd_op = opts[-1] sgd_op = opts[-1]
...@@ -151,7 +151,7 @@ class TestMomentumOptimizer(unittest.TestCase): ...@@ -151,7 +151,7 @@ class TestMomentumOptimizer(unittest.TestCase):
params_grads = append_backward(mean_out) params_grads = append_backward(mean_out)
self.assertEqual(len(params_grads), 1) self.assertEqual(len(params_grads), 1)
self.assertEqual(len(momentum_optimizer.get_accumulators()), 0) self.assertEqual(len(momentum_optimizer.get_accumulators()), 0)
opts = momentum_optimizer.create_optimization_pass( opts = momentum_optimizer._create_optimization_pass(
params_grads, mul_out, init_program) params_grads, mul_out, init_program)
self.assertEqual(len(opts), 3) self.assertEqual(len(opts), 3)
sgd_op = opts[-1] sgd_op = opts[-1]
...@@ -214,8 +214,8 @@ class TestAdagradOptimizer(unittest.TestCase): ...@@ -214,8 +214,8 @@ class TestAdagradOptimizer(unittest.TestCase):
params_grads = append_backward(mean_out) params_grads = append_backward(mean_out)
self.assertEqual(len(params_grads), 1) self.assertEqual(len(params_grads), 1)
self.assertEqual(len(adagrad_optimizer.get_accumulators()), 0) self.assertEqual(len(adagrad_optimizer.get_accumulators()), 0)
opts = adagrad_optimizer.create_optimization_pass(params_grads, mul_out, opts = adagrad_optimizer._create_optimization_pass(
init_program) params_grads, mul_out, init_program)
self.assertEqual(len(opts), 3) self.assertEqual(len(opts), 3)
self.assertEqual([op.type for op in opts], self.assertEqual([op.type for op in opts],
["fill_constant", "elementwise_mul", "adagrad"]) ["fill_constant", "elementwise_mul", "adagrad"])
...@@ -278,8 +278,8 @@ class TestAdamOptimizer(unittest.TestCase): ...@@ -278,8 +278,8 @@ class TestAdamOptimizer(unittest.TestCase):
params_grads = append_backward(mean_out) params_grads = append_backward(mean_out)
self.assertEqual(len(params_grads), 1) self.assertEqual(len(params_grads), 1)
self.assertEqual(len(adam_optimizer.get_accumulators()), 0) self.assertEqual(len(adam_optimizer.get_accumulators()), 0)
opts = adam_optimizer.create_optimization_pass(params_grads, mul_out, opts = adam_optimizer._create_optimization_pass(params_grads, mul_out,
init_program) init_program)
self.assertEqual(len(opts), 5) self.assertEqual(len(opts), 5)
self.assertEqual( self.assertEqual(
[op.type for op in opts], [op.type for op in opts],
...@@ -345,8 +345,8 @@ class TestAdamaxOptimizer(unittest.TestCase): ...@@ -345,8 +345,8 @@ class TestAdamaxOptimizer(unittest.TestCase):
params_grads = append_backward(mean_out) params_grads = append_backward(mean_out)
self.assertEqual(len(params_grads), 1) self.assertEqual(len(params_grads), 1)
self.assertEqual(len(adamax_optimizer.get_accumulators()), 0) self.assertEqual(len(adamax_optimizer.get_accumulators()), 0)
opts = adamax_optimizer.create_optimization_pass(params_grads, mul_out, opts = adamax_optimizer._create_optimization_pass(params_grads, mul_out,
init_program) init_program)
self.assertEqual(len(opts), 4) self.assertEqual(len(opts), 4)
self.assertEqual( self.assertEqual(
[op.type for op in opts], [op.type for op in opts],
...@@ -409,7 +409,7 @@ class TestDecayedAdagradOptimizer(unittest.TestCase): ...@@ -409,7 +409,7 @@ class TestDecayedAdagradOptimizer(unittest.TestCase):
params_grads = append_backward(mean_out) params_grads = append_backward(mean_out)
self.assertEqual(len(params_grads), 1) self.assertEqual(len(params_grads), 1)
self.assertEqual(len(decayed_adagrad_optimizer.get_accumulators()), 0) self.assertEqual(len(decayed_adagrad_optimizer.get_accumulators()), 0)
opts = decayed_adagrad_optimizer.create_optimization_pass( opts = decayed_adagrad_optimizer._create_optimization_pass(
params_grads, mul_out, init_program) params_grads, mul_out, init_program)
self.assertEqual(len(opts), 3) self.assertEqual(len(opts), 3)
self.assertEqual( self.assertEqual(
...@@ -475,8 +475,8 @@ class TestFtrlOptimizer(unittest.TestCase): ...@@ -475,8 +475,8 @@ class TestFtrlOptimizer(unittest.TestCase):
params_grads = append_backward(mean_out) params_grads = append_backward(mean_out)
self.assertEqual(len(params_grads), 1) self.assertEqual(len(params_grads), 1)
self.assertEqual(len(ftrl_optimizer.get_accumulators()), 0) self.assertEqual(len(ftrl_optimizer.get_accumulators()), 0)
opts = ftrl_optimizer.create_optimization_pass(params_grads, mul_out, opts = ftrl_optimizer._create_optimization_pass(params_grads, mul_out,
init_program) init_program)
self.assertEqual(len(opts), 3) self.assertEqual(len(opts), 3)
self.assertEqual([op.type for op in opts], self.assertEqual([op.type for op in opts],
["fill_constant", "elementwise_mul", "ftrl"]) ["fill_constant", "elementwise_mul", "ftrl"])
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
import unittest import unittest
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.layers.device import get_places
import paddle.fluid.profiler as profiler import paddle.fluid.profiler as profiler
import numpy import numpy
...@@ -115,7 +116,7 @@ class BaseParallelForTest(unittest.TestCase): ...@@ -115,7 +116,7 @@ class BaseParallelForTest(unittest.TestCase):
if use_parallel: if use_parallel:
thread_num = fluid.core.get_cuda_device_count( thread_num = fluid.core.get_cuda_device_count(
) if use_gpu else 8 ) if use_gpu else 8
places = fluid.layers.get_places(thread_num) places = get_places(thread_num)
pd = fluid.layers.ParallelDo(places, use_nccl=use_nccl) pd = fluid.layers.ParallelDo(places, use_nccl=use_nccl)
data = next(generator) data = next(generator)
......
...@@ -181,6 +181,14 @@ else: ...@@ -181,6 +181,14 @@ else:
command = "patchelf --set-rpath '$ORIGIN/../libs/' ${PADDLE_BINARY_DIR}/python/paddle/fluid/core.so" command = "patchelf --set-rpath '$ORIGIN/../libs/' ${PADDLE_BINARY_DIR}/python/paddle/fluid/core.so"
if os.system(command) != 0: if os.system(command) != 0:
raise Exception("patch core.so failed, command: %s" % command) raise Exception("patch core.so failed, command: %s" % command)
if '${WITH_FLUID_ONLY}'== 'OFF':
# change rpath of _swig_paddle.so.
if "@APPLE@" == "1":
command = "install_name_tool -id \"@loader_path/../paddle/libs/\" ${PADDLE_BINARY_DIR}/python/py_paddle/_swig_paddle.so"
else:
command = "patchelf --set-rpath '$ORIGIN/../paddle/libs/' ${PADDLE_BINARY_DIR}/python/py_paddle/_swig_paddle.so"
if os.system(command) != 0:
raise Exception("patch _swig_paddle.so failed, command: %s" % command)
setup(name='${PACKAGE_NAME}', setup(name='${PACKAGE_NAME}',
version='${PADDLE_VERSION}', version='${PADDLE_VERSION}',
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册