提交 d163592a 编写于 作者: Y ying

Merge branch 'develop' into multihead_attention

...@@ -18,6 +18,11 @@ dynamic_lstm ...@@ -18,6 +18,11 @@ dynamic_lstm
.. autofunction:: paddle.v2.fluid.layers.dynamic_lstm .. autofunction:: paddle.v2.fluid.layers.dynamic_lstm
:noindex: :noindex:
dynamic_gru
-----------
.. autofunction:: paddle.v2.fluid.layers.dynamic_gru
:noindex:
data data
---- ----
.. autofunction:: paddle.v2.fluid.layers.data .. autofunction:: paddle.v2.fluid.layers.data
...@@ -500,6 +505,11 @@ swish ...@@ -500,6 +505,11 @@ swish
.. autofunction:: paddle.v2.fluid.layers.swish .. autofunction:: paddle.v2.fluid.layers.swish
:noindex: :noindex:
im2sequence
------
.. autofunction:: paddle.v2.fluid.layers.im2sequence
:noindex:
edit_distance edit_distance
--------------- ---------------
.. autofunction:: paddle.v2.fluid.layers.edit_distance_error .. autofunction:: paddle.v2.fluid.layers.edit_distance_error
......
...@@ -26,8 +26,8 @@ glu ...@@ -26,8 +26,8 @@ glu
:noindex: :noindex:
dot_product_attention scaled_dot_product_attention
--------------------- ----------------------------
.. autofunction:: paddle.v2.fluid.nets.dot_product_attention .. autofunction:: paddle.v2.fluid.nets.dot_product_attention
:noindex: :noindex:
...@@ -25,14 +25,14 @@ ...@@ -25,14 +25,14 @@
.. code-block:: bash .. code-block:: bash
docker pull docker.paddlepaddle.org/paddle docker pull docker.paddlepaddlehub.com/paddle
下载GPU版本(cuda8.0_cudnn5_avx_mkl)的Docker镜像: 下载GPU版本(cuda8.0_cudnn5_avx_mkl)的Docker镜像:
.. code-block:: bash .. code-block:: bash
docker pull paddlepaddle/paddle:latest-gpu docker pull paddlepaddle/paddle:latest-gpu
docker pull docker.paddlepaddle.org/paddle:latest-gpu docker pull docker.paddlepaddlehub.com/paddle:latest-gpu
选择下载使用不同的BLAS库的Docker镜像: 选择下载使用不同的BLAS库的Docker镜像:
...@@ -49,7 +49,7 @@ ...@@ -49,7 +49,7 @@
docker pull paddlepaddle/paddle:[tag] docker pull paddlepaddle/paddle:[tag]
# 比如: # 比如:
docker pull docker.paddlepaddle.org/paddle:0.10.0-gpu docker pull docker.paddlepaddlehub.com/paddle:0.11.0-gpu
.. _docker_run: .. _docker_run:
......
...@@ -26,14 +26,14 @@ For users in China, we provide a faster mirror: ...@@ -26,14 +26,14 @@ For users in China, we provide a faster mirror:
.. code-block:: bash .. code-block:: bash
docker pull docker.paddlepaddle.org/paddle docker pull docker.paddlepaddlehub.com/paddle
Download GPU version (cuda8.0_cudnn5_avx_mkl) images: Download GPU version (cuda8.0_cudnn5_avx_mkl) images:
.. code-block:: bash .. code-block:: bash
docker pull paddlepaddle/paddle:latest-gpu docker pull paddlepaddle/paddle:latest-gpu
docker pull docker.paddlepaddle.org/paddle:latest-gpu docker pull docker.paddlepaddlehub.com/paddle:latest-gpu
Choose between different BLAS version: Choose between different BLAS version:
...@@ -53,7 +53,7 @@ and run: ...@@ -53,7 +53,7 @@ and run:
docker pull paddlepaddle/paddle:[tag] docker pull paddlepaddle/paddle:[tag]
# i.e. # i.e.
docker pull docker.paddlepaddle.org/paddle:0.10.0-gpu docker pull docker.paddlepaddlehub.com/paddle:0.11.0-gpu
.. _docker_run: .. _docker_run:
......
...@@ -61,6 +61,9 @@ Attribute GetAttrValue(const proto::OpDesc::Attr& attr_desc) { ...@@ -61,6 +61,9 @@ Attribute GetAttrValue(const proto::OpDesc::Attr& attr_desc) {
} }
return val; return val;
} }
case proto::AttrType::LONG: {
return attr_desc.l();
}
default: default:
PADDLE_THROW("Unsupport attr type %d", attr_desc.type()); PADDLE_THROW("Unsupport attr type %d", attr_desc.type());
} }
......
...@@ -168,6 +168,32 @@ struct ExtractAttribute<bool> { ...@@ -168,6 +168,32 @@ struct ExtractAttribute<bool> {
const std::string& attr_name_; const std::string& attr_name_;
}; };
template <>
struct ExtractAttribute<int64_t> {
explicit ExtractAttribute(const std::string& attr_name)
: attr_name_(attr_name) {}
int64_t* operator()(Attribute& attr) const {
if (attr.type() == typeid(int)) { // NOLINT
int val = boost::get<int>(attr);
attr = static_cast<int64_t>(val);
} else if (attr.type() == typeid(float)) { // NOLINT
int val = boost::get<float>(attr);
attr = static_cast<int64_t>(val);
}
int64_t* attr_value = nullptr;
try {
attr_value = &boost::get<int64_t>(attr);
} catch (boost::bad_get& bad_get) {
PADDLE_THROW("Cannot get attribute %s by type int64_t, its type is %s",
attr_name_, attr.type().name());
}
return attr_value;
}
const std::string& attr_name_;
};
// check whether a certain attribute fit its limits // check whether a certain attribute fit its limits
// an attribute can have more than one limits // an attribute can have more than one limits
template <typename T> template <typename T>
......
...@@ -75,7 +75,7 @@ std::vector<VarDesc *> BlockDesc::AllVars() const { ...@@ -75,7 +75,7 @@ std::vector<VarDesc *> BlockDesc::AllVars() const {
OpDesc *BlockDesc::AppendOp() { OpDesc *BlockDesc::AppendOp() {
need_update_ = true; need_update_ = true;
ops_.emplace_back(new OpDesc()); ops_.emplace_back(new OpDesc(this));
return ops_.back().get(); return ops_.back().get();
} }
...@@ -86,7 +86,7 @@ void BlockDesc::AppendAllocatedOp(std::unique_ptr<OpDesc> &&op_desc) { ...@@ -86,7 +86,7 @@ void BlockDesc::AppendAllocatedOp(std::unique_ptr<OpDesc> &&op_desc) {
OpDesc *BlockDesc::PrependOp() { OpDesc *BlockDesc::PrependOp() {
need_update_ = true; need_update_ = true;
ops_.emplace_front(new OpDesc()); ops_.emplace_front(new OpDesc(this));
return ops_.front().get(); return ops_.front().get();
} }
...@@ -153,7 +153,7 @@ BlockDesc::BlockDesc(ProgramDesc *prog, proto::BlockDesc *desc) ...@@ -153,7 +153,7 @@ BlockDesc::BlockDesc(ProgramDesc *prog, proto::BlockDesc *desc)
vars_[var_desc.name()].reset(new VarDesc(var_desc)); vars_[var_desc.name()].reset(new VarDesc(var_desc));
} }
for (const proto::OpDesc &op_desc : desc_->ops()) { for (const proto::OpDesc &op_desc : desc_->ops()) {
ops_.emplace_back(new OpDesc(op_desc, prog)); ops_.emplace_back(new OpDesc(op_desc, prog, this));
} }
} }
...@@ -162,7 +162,7 @@ BlockDesc::BlockDesc(const BlockDesc &other, proto::BlockDesc *desc, ...@@ -162,7 +162,7 @@ BlockDesc::BlockDesc(const BlockDesc &other, proto::BlockDesc *desc,
: prog_(prog), desc_(desc) { : prog_(prog), desc_(desc) {
need_update_ = true; need_update_ = true;
for (auto &op : other.ops_) { for (auto &op : other.ops_) {
ops_.emplace_back(new OpDesc(*op)); ops_.emplace_back(new OpDesc(*op, this));
} }
for (auto &it : other.vars_) { for (auto &it : other.vars_) {
......
...@@ -26,6 +26,7 @@ enum AttrType { ...@@ -26,6 +26,7 @@ enum AttrType {
BOOLEAN = 6; BOOLEAN = 6;
BOOLEANS = 7; BOOLEANS = 7;
BLOCK = 8; BLOCK = 8;
LONG = 9;
} }
// OpDesc describes an instance of a C++ framework::OperatorBase // OpDesc describes an instance of a C++ framework::OperatorBase
...@@ -44,6 +45,7 @@ message OpDesc { ...@@ -44,6 +45,7 @@ message OpDesc {
optional bool b = 10; optional bool b = 10;
repeated bool bools = 11; repeated bool bools = 11;
optional int32 block_idx = 12; optional int32 block_idx = 12;
optional int64 l = 13;
}; };
message Var { message Var {
......
...@@ -97,7 +97,7 @@ void OpDesc::CopyFrom(const OpDesc &op_desc) { ...@@ -97,7 +97,7 @@ void OpDesc::CopyFrom(const OpDesc &op_desc) {
need_update_ = true; need_update_ = true;
} }
OpDesc::OpDesc(const proto::OpDesc &desc, ProgramDesc *prog) OpDesc::OpDesc(const proto::OpDesc &desc, ProgramDesc *prog, BlockDesc *block)
: desc_(desc), need_update_(false) { : desc_(desc), need_update_(false) {
// restore inputs_ // restore inputs_
int input_size = desc_.inputs_size(); int input_size = desc_.inputs_size();
...@@ -131,6 +131,7 @@ OpDesc::OpDesc(const proto::OpDesc &desc, ProgramDesc *prog) ...@@ -131,6 +131,7 @@ OpDesc::OpDesc(const proto::OpDesc &desc, ProgramDesc *prog)
attrs_[attr_name] = prog->MutableBlock(bid); attrs_[attr_name] = prog->MutableBlock(bid);
} }
} }
this->block_ = block;
} }
proto::OpDesc *OpDesc::Proto() { proto::OpDesc *OpDesc::Proto() {
...@@ -282,6 +283,7 @@ struct SetAttrDescVisitor : public boost::static_visitor<void> { ...@@ -282,6 +283,7 @@ struct SetAttrDescVisitor : public boost::static_visitor<void> {
VectorToRepeated(v, attr_->mutable_bools()); VectorToRepeated(v, attr_->mutable_bools());
} }
void operator()(BlockDesc *desc) const { attr_->set_block_idx(desc->ID()); } void operator()(BlockDesc *desc) const { attr_->set_block_idx(desc->ID()); }
void operator()(int64_t v) const { attr_->set_l(v); }
void operator()(boost::blank) const { PADDLE_THROW("Unexpected branch"); } void operator()(boost::blank) const { PADDLE_THROW("Unexpected branch"); }
}; };
......
...@@ -25,7 +25,6 @@ namespace framework { ...@@ -25,7 +25,6 @@ namespace framework {
class BlockDesc; class BlockDesc;
class ProgramDesc; class ProgramDesc;
class OpDesc { class OpDesc {
public: public:
OpDesc() {} OpDesc() {}
...@@ -33,7 +32,14 @@ class OpDesc { ...@@ -33,7 +32,14 @@ class OpDesc {
OpDesc(const std::string &type, const VariableNameMap &inputs, OpDesc(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const AttributeMap &attrs); const VariableNameMap &outputs, const AttributeMap &attrs);
OpDesc(const proto::OpDesc &desc, ProgramDesc *prog); OpDesc(const proto::OpDesc &desc, ProgramDesc *prog, BlockDesc *block);
explicit OpDesc(BlockDesc *block) : block_(block) {}
OpDesc(const OpDesc &other, BlockDesc *block) {
*this = other;
block_ = block;
}
void CopyFrom(const OpDesc &op_desc); void CopyFrom(const OpDesc &op_desc);
...@@ -117,6 +123,10 @@ class OpDesc { ...@@ -117,6 +123,10 @@ class OpDesc {
void Flush(); void Flush();
BlockDesc *Block() { return this->block_; }
void SetBlock(BlockDesc *block) { this->block_ = block; }
private: private:
template <typename MapType> template <typename MapType>
static std::vector<typename MapType::key_type> MapKeys(const MapType &map) { static std::vector<typename MapType::key_type> MapKeys(const MapType &map) {
...@@ -129,6 +139,7 @@ class OpDesc { ...@@ -129,6 +139,7 @@ class OpDesc {
} }
proto::OpDesc desc_; proto::OpDesc desc_;
BlockDesc *block_; // not_own
// input arg name => input variable names // input arg name => input variable names
VariableNameMap inputs_; VariableNameMap inputs_;
// output arg name => output variable names // output arg name => output variable names
......
...@@ -35,7 +35,7 @@ using VariableNameMap = std::map<std::string, std::vector<std::string>>; ...@@ -35,7 +35,7 @@ using VariableNameMap = std::map<std::string, std::vector<std::string>>;
using Attribute = using Attribute =
boost::variant<boost::blank, int, float, std::string, std::vector<int>, boost::variant<boost::blank, int, float, std::string, std::vector<int>,
std::vector<float>, std::vector<std::string>, bool, std::vector<float>, std::vector<std::string>, bool,
std::vector<bool>, BlockDesc*>; std::vector<bool>, BlockDesc*, int64_t>;
using AttributeMap = std::unordered_map<std::string, Attribute>; using AttributeMap = std::unordered_map<std::string, Attribute>;
......
...@@ -66,6 +66,8 @@ class VarDesc { ...@@ -66,6 +66,8 @@ class VarDesc {
std::string Name() const { return desc_.name(); } std::string Name() const { return desc_.name(); }
void SetName(std::string name) { desc_.set_name(name); }
void SetShape(const std::vector<int64_t> &dims); void SetShape(const std::vector<int64_t> &dims);
void SetDataType(proto::DataType data_type); void SetDataType(proto::DataType data_type);
......
...@@ -12,19 +12,6 @@ ...@@ -12,19 +12,6 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
/*
Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include <memory> #include <memory>
#include <string> #include <string>
......
...@@ -21,8 +21,6 @@ namespace operators { ...@@ -21,8 +21,6 @@ namespace operators {
using Tensor = framework::Tensor; using Tensor = framework::Tensor;
using LoDTensor = framework::LoDTensor; using LoDTensor = framework::LoDTensor;
constexpr char kEPS = 1e-6;
class BipartiteMatchOp : public framework::OperatorWithKernel { class BipartiteMatchOp : public framework::OperatorWithKernel {
public: public:
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
...@@ -46,6 +44,7 @@ class BipartiteMatchKernel : public framework::OpKernel<T> { ...@@ -46,6 +44,7 @@ class BipartiteMatchKernel : public framework::OpKernel<T> {
// The match_dist must be initialized to 0 at first. // The match_dist must be initialized to 0 at first.
void BipartiteMatch(const Tensor& dist, int* match_indices, void BipartiteMatch(const Tensor& dist, int* match_indices,
T* match_dist) const { T* match_dist) const {
constexpr T kEPS = static_cast<T>(1e-6);
PADDLE_ENFORCE_EQ(dist.dims().size(), 2, "The rank of dist must be 2."); PADDLE_ENFORCE_EQ(dist.dims().size(), 2, "The rank of dist must be 2.");
int64_t row = dist.dims()[0]; int64_t row = dist.dims()[0];
int64_t col = dist.dims()[1]; int64_t col = dist.dims()[1];
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/iou_similarity_op.h"
namespace paddle {
namespace operators {
class IOUSimilarityOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of IOUSimilarityOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Y"),
"Input(Y) of IOUSimilarityOp should not be null.");
auto x_dims = ctx->GetInputDim("X");
auto y_dims = ctx->GetInputDim("Y");
PADDLE_ENFORCE_EQ(x_dims.size(), 2UL, "The rank of Input(X) must be 2.");
PADDLE_ENFORCE_EQ(x_dims[1], 4UL, "The shape of X is [N, 4]");
PADDLE_ENFORCE_EQ(y_dims.size(), 2UL, "The rank of Input(Y) must be 2.");
PADDLE_ENFORCE_EQ(y_dims[1], 4UL, "The shape of Y is [M, 4]");
ctx->ShareLoD("X", /*->*/ "Out");
ctx->SetOutputDim("Out", framework::make_ddim({x_dims[0], y_dims[0]}));
}
};
class IOUSimilarityOpMaker : public framework::OpProtoAndCheckerMaker {
public:
IOUSimilarityOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X",
"(LoDTensor, default LoDTensor<float>) "
"Box list X is a 2-D LoDTensor with shape [N, 4] holds N boxes, "
"each box is represented as [xmin, ymin, xmax, ymax], "
"the shape of X is [N, 4]. [xmin, ymin] is the left top "
"coordinate of the box if the input is image feature map, they "
"are close to the origin of the coordinate system. "
"[xmax, ymax] is the right bottom coordinate of the box. "
"This tensor can contain LoD information to represent a batch "
"of inputs. One instance of this batch can contain different "
"numbers of entities.");
AddInput("Y",
"(Tensor, default Tensor<float>) "
"Box list Y holds M boxes, each box is represented as "
"[xmin, ymin, xmax, ymax], the shape of X is [N, 4]. "
"[xmin, ymin] is the left top coordinate of the box if the "
"input is image feature map, and [xmax, ymax] is the right "
"bottom coordinate of the box.");
AddOutput("Out",
"(LoDTensor, the lod is same as input X) The output of "
"iou_similarity op, a tensor with shape [N, M] "
"representing pairwise iou scores.");
AddComment(R"DOC(
IOU Similarity Operator.
Computes intersection-over-union (IOU) between two box lists.
Box list 'X' should be a LoDTensor and 'Y' is a common Tensor,
boxes in 'Y' are shared by all instance of the batched inputs of X.
Given two boxes A and B, the calculation of IOU is as follows:
$$
IOU(A, B) =
\frac{area(A\cap B)}{area(A)+area(B)-area(A\cap B)}
$$
)DOC");
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_WITHOUT_GRADIENT(iou_similarity, ops::IOUSimilarityOp,
ops::IOUSimilarityOpMaker);
REGISTER_OP_CPU_KERNEL(
iou_similarity,
ops::IOUSimilarityKernel<paddle::platform::CPUDeviceContext, float>,
ops::IOUSimilarityKernel<paddle::platform::CPUDeviceContext, double>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/iou_similarity_op.h"
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(
iou_similarity,
ops::IOUSimilarityKernel<paddle::platform::CUDADeviceContext, float>,
ops::IOUSimilarityKernel<paddle::platform::CUDADeviceContext, double>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/op_registry.h"
#include "paddle/platform/for_range.h"
template <typename T>
inline HOSTDEVICE T IOUSimilarity(T xmin1, T ymin1, T xmax1, T ymax1, T xmin2,
T ymin2, T xmax2, T ymax2) {
constexpr T zero = static_cast<T>(0);
T area1 = (ymax1 - ymin1) * (xmax1 - xmin1);
T area2 = (ymax2 - ymin2) * (xmax2 - xmin2);
T inter_xmax = xmax1 > xmax2 ? xmax2 : xmax1;
T inter_ymax = ymax1 > ymax2 ? ymax2 : ymax1;
T inter_xmin = xmin1 > xmin2 ? xmin1 : xmin2;
T inter_ymin = ymin1 > ymin2 ? ymin1 : ymin2;
T inter_height = inter_ymax - inter_ymin;
T inter_width = inter_xmax - inter_xmin;
inter_height = inter_height > zero ? inter_height : zero;
inter_width = inter_width > zero ? inter_width : zero;
T inter_area = inter_width * inter_height;
T union_area = area1 + area2 - inter_area;
T sim_score = inter_area / union_area;
return sim_score;
}
template <typename T>
struct IOUSimilarityFunctor {
IOUSimilarityFunctor(const T* x, const T* y, T* z, int cols)
: x_(x), y_(y), z_(z), cols_(static_cast<size_t>(cols)) {}
inline HOSTDEVICE void operator()(size_t row_id) const {
T x_min1 = x_[row_id * 4];
T y_min1 = x_[row_id * 4 + 1];
T x_max1 = x_[row_id * 4 + 2];
T y_max1 = x_[row_id * 4 + 3];
for (size_t i = 0; i < cols_; ++i) {
T x_min2 = y_[i * 4];
T y_min2 = y_[i * 4 + 1];
T x_max2 = y_[i * 4 + 2];
T y_max2 = y_[i * 4 + 3];
T sim = IOUSimilarity(x_min1, y_min1, x_max1, y_max1, x_min2, y_min2,
x_max2, y_max2);
z_[row_id * cols_ + i] = sim;
}
}
const T* x_;
const T* y_;
T* z_;
const size_t cols_;
};
namespace paddle {
namespace operators {
template <typename DeviceContext, typename T>
class IOUSimilarityKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
const framework::LoDTensor* in_x = ctx.Input<framework::LoDTensor>("X");
const framework::Tensor* in_y = ctx.Input<framework::Tensor>("Y");
framework::LoDTensor* out = ctx.Output<framework::LoDTensor>("Out");
int x_n = in_x->dims()[0];
int y_n = in_y->dims()[0];
IOUSimilarityFunctor<T> functor(in_x->data<T>(), in_y->data<T>(),
out->mutable_data<T>(ctx.GetPlace()), y_n);
platform::ForRange<DeviceContext> for_range(
static_cast<const DeviceContext&>(ctx.device_context()), x_n);
for_range(functor);
}
}; // namespace operators
} // namespace operators
} // namespace paddle
...@@ -66,6 +66,12 @@ class LookupTableOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -66,6 +66,12 @@ class LookupTableOpMaker : public framework::OpProtoAndCheckerMaker {
"(boolean, default false) " "(boolean, default false) "
"Sparse update") "Sparse update")
.SetDefault(false); .SetDefault(false);
AddAttr<int64_t>("padding_idx",
"(int64, default -1) "
"If the value is -1, it makes no effect to lookup. "
"Otherwise the given value indicates padding the output "
"with zeros whenever lookup encounters it in Ids.")
.SetDefault(-1);
AddComment(R"DOC( AddComment(R"DOC(
Lookup Table Operator. Lookup Table Operator.
......
...@@ -21,9 +21,11 @@ limitations under the License. */ ...@@ -21,9 +21,11 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
template <typename T, int BlockDimX, int BlockDimY, int GridDimX> template <typename T, int BlockDimX, int BlockDimY, int GridDimX,
bool PaddingFlag>
__global__ void LookupTable(T* output, const T* table, const int64_t* ids, __global__ void LookupTable(T* output, const T* table, const int64_t* ids,
const int64_t N, const int64_t K, const int64_t D) { const int64_t N, const int64_t K, const int64_t D,
const int64_t padding_idx) {
int idx = threadIdx.x; int idx = threadIdx.x;
int idy = blockIdx.x + threadIdx.y * GridDimX; int idy = blockIdx.x + threadIdx.y * GridDimX;
...@@ -34,7 +36,14 @@ __global__ void LookupTable(T* output, const T* table, const int64_t* ids, ...@@ -34,7 +36,14 @@ __global__ void LookupTable(T* output, const T* table, const int64_t* ids,
T* out = output + idy * D; T* out = output + idy * D;
const T* tab = table + id * D; const T* tab = table + id * D;
for (int i = idx; i < D; i += BlockDimX) { for (int i = idx; i < D; i += BlockDimX) {
out[i] = tab[i]; if (PaddingFlag) {
if (id == padding_idx)
out[i] = static_cast<T>(0);
else
out[i] = tab[i];
} else {
out[i] = tab[i];
}
} }
idy += BlockDimY * GridDimX; idy += BlockDimY * GridDimX;
} }
...@@ -67,6 +76,7 @@ class LookupTableCUDAKernel : public framework::OpKernel<T> { ...@@ -67,6 +76,7 @@ class LookupTableCUDAKernel : public framework::OpKernel<T> {
auto* table_t = context.Input<LoDTensor>("W"); auto* table_t = context.Input<LoDTensor>("W");
auto* ids_t = context.Input<LoDTensor>("Ids"); auto* ids_t = context.Input<LoDTensor>("Ids");
auto* output_t = context.Output<LoDTensor>("Out"); auto* output_t = context.Output<LoDTensor>("Out");
int64_t padding_idx = context.Attr<int64_t>("padding_idx");
size_t N = table_t->dims()[0]; size_t N = table_t->dims()[0];
size_t D = table_t->dims()[1]; size_t D = table_t->dims()[1];
...@@ -77,10 +87,17 @@ class LookupTableCUDAKernel : public framework::OpKernel<T> { ...@@ -77,10 +87,17 @@ class LookupTableCUDAKernel : public framework::OpKernel<T> {
dim3 threads(128, 8); dim3 threads(128, 8);
dim3 grids(8, 1); dim3 grids(8, 1);
LookupTable<
T, 128, 8, if (padding_idx == -1)
8><<<grids, threads, 0, context.cuda_device_context().stream()>>>( LookupTable<
output, table, ids, N, K, D); T, 128, 8, 8,
false><<<grids, threads, 0, context.cuda_device_context().stream()>>>(
output, table, ids, N, K, D, padding_idx);
else
LookupTable<
T, 128, 8, 8,
true><<<grids, threads, 0, context.cuda_device_context().stream()>>>(
output, table, ids, N, K, D, padding_idx);
} }
}; };
...@@ -91,6 +108,8 @@ class LookupTableGradCUDAKernel : public framework::OpKernel<T> { ...@@ -91,6 +108,8 @@ class LookupTableGradCUDAKernel : public framework::OpKernel<T> {
auto& dev_ctx = auto& dev_ctx =
context.template device_context<platform::CUDADeviceContext>(); context.template device_context<platform::CUDADeviceContext>();
bool is_sparse = context.Attr<bool>("is_sparse"); bool is_sparse = context.Attr<bool>("is_sparse");
// Since paddings are not trainable and fixed in forward, the gradient of
// paddings makes no sense and we don't deal with it in backward.
if (is_sparse) { if (is_sparse) {
auto* ids = context.Input<LoDTensor>("Ids"); auto* ids = context.Input<LoDTensor>("Ids");
auto* table = context.Input<LoDTensor>("W"); auto* table = context.Input<LoDTensor>("W");
......
...@@ -32,16 +32,30 @@ class LookupTableKernel : public framework::OpKernel<T> { ...@@ -32,16 +32,30 @@ class LookupTableKernel : public framework::OpKernel<T> {
auto* table_t = context.Input<LoDTensor>("W"); // float tensor auto* table_t = context.Input<LoDTensor>("W"); // float tensor
auto* ids_t = context.Input<LoDTensor>("Ids"); // int tensor auto* ids_t = context.Input<LoDTensor>("Ids"); // int tensor
auto* output_t = context.Output<LoDTensor>("Out"); // float tensor auto* output_t = context.Output<LoDTensor>("Out"); // float tensor
int64_t padding_idx = context.Attr<int64_t>("padding_idx");
int N = table_t->dims()[0]; int N = table_t->dims()[0];
int D = table_t->dims()[1]; int D = table_t->dims()[1];
auto* ids = ids_t->data<int64_t>(); auto* ids = ids_t->data<int64_t>();
auto* table = table_t->data<T>(); auto* table = table_t->data<T>();
auto* output = output_t->mutable_data<T>(context.GetPlace()); auto* output = output_t->mutable_data<T>(context.GetPlace());
for (int64_t i = 0; i < ids_t->numel(); ++i) {
PADDLE_ENFORCE_LT(ids[i], N); if (padding_idx == -1) {
PADDLE_ENFORCE_GE(ids[i], 0); for (int64_t i = 0; i < ids_t->numel(); ++i) {
memcpy(output + i * D, table + ids[i] * D, D * sizeof(T)); PADDLE_ENFORCE_LT(ids[i], N);
PADDLE_ENFORCE_GE(ids[i], 0);
memcpy(output + i * D, table + ids[i] * D, D * sizeof(T));
}
} else {
for (int64_t i = 0; i < ids_t->numel(); ++i) {
if (ids[i] == padding_idx) {
memset(output + i * D, 0, D * sizeof(T));
} else {
PADDLE_ENFORCE_LT(ids[i], N);
PADDLE_ENFORCE_GE(ids[i], 0);
memcpy(output + i * D, table + ids[i] * D, D * sizeof(T));
}
}
} }
} }
}; };
...@@ -51,6 +65,8 @@ class LookupTableGradKernel : public framework::OpKernel<T> { ...@@ -51,6 +65,8 @@ class LookupTableGradKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
bool is_sparse = context.Attr<bool>("is_sparse"); bool is_sparse = context.Attr<bool>("is_sparse");
// Since paddings are not trainable and fixed in forward, the gradient of
// paddings makes no sense and we don't deal with it in backward.
if (is_sparse) { if (is_sparse) {
auto* ids = context.Input<LoDTensor>("Ids"); auto* ids = context.Input<LoDTensor>("Ids");
auto* table = context.Input<LoDTensor>("W"); auto* table = context.Input<LoDTensor>("W");
......
...@@ -124,7 +124,8 @@ class NCEOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -124,7 +124,8 @@ class NCEOpMaker : public framework::OpProtoAndCheckerMaker {
"This attribute only be used in unitest. Classes " "This attribute only be used in unitest. Classes "
"in this list wiil be used as negative classes " "in this list wiil be used as negative classes "
"for every samples. Under normal conditions, " "for every samples. Under normal conditions, "
"user should avoid setting this attribute."); "user should avoid setting this attribute.")
.SetDefault({});
AddComment(R"DOC( AddComment(R"DOC(
Compute and return the noise-contrastive estimation training loss. Compute and return the noise-contrastive estimation training loss.
See [Noise-contrastive estimation: A new estimation principle for unnormalized statistical models](http://www.jmlr.org/proceedings/papers/v9/gutmann10a/gutmann10a.pdf). See [Noise-contrastive estimation: A new estimation principle for unnormalized statistical models](http://www.jmlr.org/proceedings/papers/v9/gutmann10a/gutmann10a.pdf).
......
...@@ -197,7 +197,8 @@ class NCEGradKernel : public framework::OpKernel<T> { ...@@ -197,7 +197,8 @@ class NCEGradKernel : public framework::OpKernel<T> {
// get d_x // get d_x
auto d_x = context.Output<Tensor>(framework::GradVarName("Input")); auto d_x = context.Output<Tensor>(framework::GradVarName("Input"));
if (d_x != nullptr) { if (d_x != nullptr) {
d_x->mutable_data<T>(context.GetPlace()); auto* d_x_data = d_x->mutable_data<T>(context.GetPlace());
std::fill(d_x_data, d_x_data + d_x->numel(), 0.0);
auto d_x_matrix = EigenMatrix<T>::From(*d_x); auto d_x_matrix = EigenMatrix<T>::From(*d_x);
auto w_matrix = EigenMatrix<T>::From(*(context.Input<Tensor>("Weight"))); auto w_matrix = EigenMatrix<T>::From(*(context.Input<Tensor>("Weight")));
for (int64_t i = 0; i < sample_labels->numel(); ++i) { for (int64_t i = 0; i < sample_labels->numel(); ++i) {
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/prior_box_op.h"
namespace paddle {
namespace operators {
class PriorBoxOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Input"),
"Input(Input) of PriorBoxOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Image"),
"Input(Image) of PriorBoxOp should not be null.");
auto image_dims = ctx->GetInputDim("Image");
auto input_dims = ctx->GetInputDim("Input");
PADDLE_ENFORCE(image_dims.size() == 4, "The layout of image is NCHW.");
PADDLE_ENFORCE(input_dims.size() == 4, "The layout of input is NCHW.");
PADDLE_ENFORCE_LT(input_dims[2], image_dims[2],
"The height of input must smaller than image.");
PADDLE_ENFORCE_LT(input_dims[3], image_dims[3],
"The width of input must smaller than image.");
auto min_sizes = ctx->Attrs().Get<std::vector<int>>("min_sizes");
auto max_sizes = ctx->Attrs().Get<std::vector<int>>("max_sizes");
auto variances = ctx->Attrs().Get<std::vector<float>>("variances");
auto aspect_ratios = ctx->Attrs().Get<std::vector<float>>("aspect_ratios");
bool flip = ctx->Attrs().Get<bool>("flip");
PADDLE_ENFORCE_GT(min_sizes.size(), 0,
"Size of min_sizes must be at least 1.");
for (size_t i = 0; i < min_sizes.size(); ++i) {
PADDLE_ENFORCE_GT(min_sizes[i], 0, "min_sizes[%d] must be positive.", i);
}
std::vector<float> aspect_ratios_vec;
ExpandAspectRatios(aspect_ratios, flip, aspect_ratios_vec);
int num_priors = aspect_ratios_vec.size() * min_sizes.size();
if (max_sizes.size() > 0) {
PADDLE_ENFORCE_EQ(max_sizes.size(), min_sizes.size(),
"The number of min_size and max_size must be equal.");
for (size_t i = 0; i < min_sizes.size(); ++i) {
PADDLE_ENFORCE_GT(max_sizes[i], min_sizes[i],
"max_size[%d] must be greater than min_size[%d].", i,
i);
num_priors += 1;
}
}
PADDLE_ENFORCE_EQ(variances.size(), 4, "Must and only provide 4 variance.");
for (size_t i = 0; i < variances.size(); ++i) {
PADDLE_ENFORCE_GT(variances[i], 0.0,
"variance[%d] must be greater than 0.", i);
}
const float step_h = ctx->Attrs().Get<float>("step_h");
PADDLE_ENFORCE_GT(step_h, 0.0, "step_h should be larger than 0.");
const float step_w = ctx->Attrs().Get<float>("step_w");
PADDLE_ENFORCE_GT(step_w, 0.0, "step_w should be larger than 0.");
std::vector<int64_t> dim_vec(4);
dim_vec[0] = input_dims[2];
dim_vec[1] = input_dims[3];
dim_vec[2] = num_priors;
dim_vec[3] = 4;
ctx->SetOutputDim("Boxes", framework::make_ddim(dim_vec));
ctx->SetOutputDim("Variances", framework::make_ddim(dim_vec));
}
};
class PriorBoxOpMaker : public framework::OpProtoAndCheckerMaker {
public:
PriorBoxOpMaker(OpProto* proto, OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Input",
"(Tensor, default Tensor<float>), "
"the input feature data of PriorBoxOp, The layout is NCHW.");
AddInput("Image",
"(Tensor, default Tensor<float>), "
"the input image data of PriorBoxOp, The layout is NCHW.");
AddOutput("Boxes",
"(Tensor, default Tensor<float>), the output prior boxes of "
"PriorBoxOp. The layout is [H, W, num_priors, 4]. "
"H is the height of input, W is the width of input, num_priors "
"is the box count of each position.");
AddOutput("Variances",
"(Tensor, default Tensor<float>), the expanded variances of "
"PriorBoxOp. The layout is [H, W, num_priors, 4]. "
"H is the height of input, W is the width of input, num_priors "
"is the box count of each position.");
AddAttr<std::vector<int>>("min_sizes", "(vector<int>) ",
"List of min sizes of generated prior boxes.");
AddAttr<std::vector<int>>("max_sizes", "(vector<int>) ",
"List of max sizes of generated prior boxes.");
AddAttr<std::vector<float>>(
"aspect_ratios", "(vector<float>) ",
"List of aspect ratios of generated prior boxes.");
AddAttr<std::vector<float>>(
"variances", "(vector<float>) ",
"List of variances to be encoded in prior boxes.");
AddAttr<bool>("flip", "(bool) ", "Whether to flip aspect ratios.")
.SetDefault(true);
AddAttr<bool>("clip", "(bool) ", "Whether to clip out-of-boundary boxes.")
.SetDefault(true);
AddAttr<float>("step_w",
"Prior boxes step across width, 0 for auto calculation.")
.SetDefault(0.0);
AddAttr<float>("step_h",
"Prior boxes step across height, 0 for auto calculation.")
.SetDefault(0.0);
AddAttr<float>("offset",
"(float) "
"Prior boxes center offset.")
.SetDefault(0.5);
AddComment(R"DOC(
Prior box operator
Generate prior boxes for SSD(Single Shot MultiBox Detector) algorithm.
Each position of the input produce N prior boxes, N is determined by
the count of min_sizes, max_sizes and aspect_ratios, The size of the
box is in range(min_size, max_size) interval, which is generated in
sequence according to the aspect_ratios.
Please get more information from the following papers:
https://arxiv.org/abs/1512.02325.
)DOC");
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_WITHOUT_GRADIENT(prior_box, ops::PriorBoxOp, ops::PriorBoxOpMaker);
REGISTER_OP_CPU_KERNEL(
prior_box, ops::PriorBoxOpKernel<paddle::platform::CPUPlace, float>,
ops::PriorBoxOpKernel<paddle::platform::CPUPlace, double>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/op_registry.h"
#include "paddle/operators/math/math_function.h"
#include "paddle/platform/transform.h"
namespace paddle {
namespace operators {
inline void ExpandAspectRatios(const std::vector<float>& input_aspect_ratior,
bool flip,
std::vector<float>& output_aspect_ratior) {
constexpr float epsilon = 1e-6;
output_aspect_ratior.clear();
output_aspect_ratior.push_back(1.);
for (size_t i = 0; i < input_aspect_ratior.size(); ++i) {
float ar = input_aspect_ratior[i];
bool already_exist = false;
for (size_t j = 0; j < output_aspect_ratior.size(); ++j) {
if (fabs(ar - output_aspect_ratior[j]) < epsilon) {
already_exist = true;
break;
}
}
if (!already_exist) {
output_aspect_ratior.push_back(ar);
if (flip) {
output_aspect_ratior.push_back(1. / ar);
}
}
}
}
template <typename T>
struct ClipFunctor {
HOSTDEVICE T operator()(T in) const {
return std::min<T>(std::max<T>(in, 0.), 1.);
}
};
template <typename Place, typename T>
class PriorBoxOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* input = ctx.Input<paddle::framework::Tensor>("Input");
auto* image = ctx.Input<paddle::framework::Tensor>("Image");
auto* boxes = ctx.Output<paddle::framework::Tensor>("Boxes");
auto* vars = ctx.Output<paddle::framework::Tensor>("Variances");
auto min_sizes = ctx.Attr<std::vector<int>>("min_sizes");
auto max_sizes = ctx.Attr<std::vector<int>>("max_sizes");
auto input_aspect_ratio = ctx.Attr<std::vector<float>>("aspect_ratios");
auto variances = ctx.Attr<std::vector<float>>("variances");
auto flip = ctx.Attr<bool>("flip");
auto clip = ctx.Attr<bool>("clip");
std::vector<float> aspect_ratios;
ExpandAspectRatios(input_aspect_ratio, flip, aspect_ratios);
T step_w = static_cast<T>(ctx.Attr<float>("step_w"));
T step_h = static_cast<T>(ctx.Attr<float>("step_h"));
T offset = static_cast<T>(ctx.Attr<float>("offset"));
auto img_width = image->dims()[3];
auto img_height = image->dims()[2];
auto feature_width = input->dims()[3];
auto feature_height = input->dims()[2];
T step_width, step_height;
if (step_w == 0 || step_h == 0) {
step_width = static_cast<T>(img_width) / feature_width;
step_height = static_cast<T>(img_height) / feature_height;
} else {
step_width = step_w;
step_height = step_h;
}
int num_priors = aspect_ratios.size() * min_sizes.size();
if (max_sizes.size() > 0) {
num_priors += max_sizes.size();
}
boxes->mutable_data<T>(ctx.GetPlace());
vars->mutable_data<T>(ctx.GetPlace());
auto e_boxes = framework::EigenTensor<T, 4>::From(*boxes);
for (int h = 0; h < feature_height; ++h) {
for (int w = 0; w < feature_width; ++w) {
T center_x = (w + offset) * step_width;
T center_y = (h + offset) * step_height;
T box_width, box_height;
int idx = 0;
for (size_t s = 0; s < min_sizes.size(); ++s) {
int min_size = min_sizes[s];
// first prior: aspect_ratio = 1, size = min_size
box_width = box_height = min_size;
// xmin
e_boxes(h, w, idx, 0) = (center_x - box_width / 2.) / img_width;
// ymin
e_boxes(h, w, idx, 1) = (center_y - box_height / 2.) / img_height;
// xmax
e_boxes(h, w, idx, 2) = (center_x + box_width / 2.) / img_width;
// ymax
e_boxes(h, w, idx, 3) = (center_y + box_height / 2.) / img_height;
idx++;
if (max_sizes.size() > 0) {
int max_size = max_sizes[s];
// second prior: aspect_ratio = 1,
// size = sqrt(min_size * max_size)
box_width = box_height = sqrt(min_size * max_size);
// xmin
e_boxes(h, w, idx, 0) = (center_x - box_width / 2.) / img_width;
// ymin
e_boxes(h, w, idx, 1) = (center_y - box_height / 2.) / img_height;
// xmax
e_boxes(h, w, idx, 2) = (center_x + box_width / 2.) / img_width;
// ymax
e_boxes(h, w, idx, 3) = (center_y + box_height / 2.) / img_height;
idx++;
}
// rest of priors
for (size_t r = 0; r < aspect_ratios.size(); ++r) {
float ar = aspect_ratios[r];
if (fabs(ar - 1.) < 1e-6) {
continue;
}
box_width = min_size * sqrt(ar);
box_height = min_size / sqrt(ar);
// xmin
e_boxes(h, w, idx, 0) = (center_x - box_width / 2.) / img_width;
// ymin
e_boxes(h, w, idx, 1) = (center_y - box_height / 2.) / img_height;
// xmax
e_boxes(h, w, idx, 2) = (center_x + box_width / 2.) / img_width;
// ymax
e_boxes(h, w, idx, 3) = (center_y + box_height / 2.) / img_height;
idx++;
}
}
}
}
if (clip) {
platform::Transform<platform::CPUDeviceContext> trans;
ClipFunctor<T> clip_func;
trans(ctx.template device_context<platform::CPUDeviceContext>(),
boxes->data<T>(), boxes->data<T>() + boxes->numel(),
boxes->data<T>(), clip_func);
}
framework::Tensor var_t;
var_t.mutable_data<T>(
framework::make_ddim({1, static_cast<int>(variances.size())}),
ctx.GetPlace());
auto var_et = framework::EigenTensor<T, 2>::From(var_t);
for (size_t i = 0; i < variances.size(); ++i) {
var_et(0, i) = variances[i];
}
int box_num = feature_height * feature_width * num_priors;
auto var_dim = vars->dims();
vars->Resize({box_num, static_cast<int>(variances.size())});
auto e_vars = framework::EigenMatrix<T, Eigen::RowMajor>::From(*vars);
e_vars = var_et.broadcast(Eigen::DSizes<int, 2>(box_num, 1));
vars->Resize(var_dim);
}
}; // namespace operators
} // namespace operators
} // namespace paddle
...@@ -64,6 +64,8 @@ std::string AttrType(paddle::framework::proto::AttrType at) { ...@@ -64,6 +64,8 @@ std::string AttrType(paddle::framework::proto::AttrType at) {
return "bool array"; return "bool array";
case paddle::framework::proto::BLOCK: case paddle::framework::proto::BLOCK:
return "block id"; return "block id";
case paddle::framework::proto::LONG:
return "long";
} }
return "UNKNOWN"; // not possible return "UNKNOWN"; // not possible
} }
......
...@@ -212,6 +212,7 @@ void BindVarDsec(py::module &m) { ...@@ -212,6 +212,7 @@ void BindVarDsec(py::module &m) {
return name; return name;
}, },
py::return_value_policy::reference) py::return_value_policy::reference)
.def("set_name", &VarDesc::SetName)
.def("set_shape", &VarDesc::SetShape) .def("set_shape", &VarDesc::SetShape)
.def("set_dtype", &VarDesc::SetDataType) .def("set_dtype", &VarDesc::SetDataType)
.def("shape", &VarDesc::Shape, py::return_value_policy::reference) .def("shape", &VarDesc::Shape, py::return_value_policy::reference)
...@@ -280,7 +281,8 @@ void BindOpDesc(py::module &m) { ...@@ -280,7 +281,8 @@ void BindOpDesc(py::module &m) {
.def("check_attrs", &OpDesc::CheckAttrs) .def("check_attrs", &OpDesc::CheckAttrs)
.def("infer_shape", &OpDesc::InferShape) .def("infer_shape", &OpDesc::InferShape)
.def("infer_var_type", &OpDesc::InferVarType) .def("infer_var_type", &OpDesc::InferVarType)
.def("serialize_to_string", SerializeMessage<OpDesc>); .def("serialize_to_string", SerializeMessage<OpDesc>)
.def("block", &OpDesc::Block, py::return_value_policy::reference);
} }
} // namespace pybind } // namespace pybind
......
...@@ -19,12 +19,14 @@ from ..layer_helper import LayerHelper ...@@ -19,12 +19,14 @@ from ..layer_helper import LayerHelper
from ..initializer import Normal, Constant from ..initializer import Normal, Constant
from ..framework import Variable from ..framework import Variable
from ..param_attr import ParamAttr from ..param_attr import ParamAttr
from layer_function_generator import autodoc
from tensor import concat from tensor import concat
__all__ = [ __all__ = [
'fc', 'fc',
'embedding', 'embedding',
'dynamic_lstm', 'dynamic_lstm',
'dynamic_gru',
'gru_unit', 'gru_unit',
'linear_chain_crf', 'linear_chain_crf',
'crf_decoding', 'crf_decoding',
...@@ -57,6 +59,8 @@ __all__ = [ ...@@ -57,6 +59,8 @@ __all__ = [
'warpctc', 'warpctc',
'sequence_reshape', 'sequence_reshape',
'transpose', 'transpose',
'im2sequence',
'nce',
] ]
...@@ -181,22 +185,35 @@ def fc(input, ...@@ -181,22 +185,35 @@ def fc(input,
return helper.append_activation(pre_activation) return helper.append_activation(pre_activation)
def embedding(input, size, is_sparse=False, param_attr=None, dtype='float32'): def embedding(input,
size,
is_sparse=False,
padding_idx=None,
param_attr=None,
dtype='float32'):
""" """
**Embedding Layer** **Embedding Layer**
This layer is used to lookup a vector of IDs, provided by *input*, in a lookup table. This layer is used to lookup embeddings of IDs, provided by :attr:`input`, in
The result of this lookup is the embedding of each ID in the *input*. a lookup table. The result of this lookup is the embedding of each ID in the
:attr:`input`.
All the input variables are passed in as local variables to the LayerHelper All the input variables are passed in as local variables to the LayerHelper
constructor. constructor.
Args: Args:
input(Variable): Input to the function input(Variable): The tensor variable containing the IDs.
size(tuple|list|None): Shape of the look up table parameter size(tuple|list): The shape of the look up table parameter. It should
is_sparse(bool): Boolean flag that specifying whether the input is sparse have two elements which indicate the size of the dictionary of
param_attr(ParamAttr): Parameters for this layer embeddings and the size of each embedding vector respectively.
dtype(np.dtype|core.DataType|str): The type of data : float32, float_16, int etc is_sparse(bool): The flag indicating whether to use sparse update.
padding_idx(int|long|None): If :attr:`None`, it makes no effect to lookup.
Otherwise the given :attr:`padding_idx` indicates padding the output
with zeros whenever lookup encounters it in :attr:`input`. If
:math:`padding_idx < 0`, the padding_idx to use in lookup is
:math:`size[0] + dim`.
param_attr(ParamAttr): Parameters for this layer
dtype(np.dtype|core.DataType|str): The type of data : float32, float_16, int etc
Returns: Returns:
Variable: The tensor variable storing the embeddings of the \ Variable: The tensor variable storing the embeddings of the \
...@@ -214,12 +231,15 @@ def embedding(input, size, is_sparse=False, param_attr=None, dtype='float32'): ...@@ -214,12 +231,15 @@ def embedding(input, size, is_sparse=False, param_attr=None, dtype='float32'):
w = helper.create_parameter( w = helper.create_parameter(
attr=helper.param_attr, shape=size, dtype=dtype, is_bias=False) attr=helper.param_attr, shape=size, dtype=dtype, is_bias=False)
tmp = helper.create_tmp_variable(dtype) tmp = helper.create_tmp_variable(dtype)
padding_idx = -1 if padding_idx is None else padding_idx if padding_idx >= 0 else (
size[0] + padding_idx)
helper.append_op( helper.append_op(
type='lookup_table', type='lookup_table',
inputs={'Ids': input, inputs={'Ids': input,
'W': w}, 'W': w},
outputs={'Out': tmp}, outputs={'Out': tmp},
attrs={'is_sparse': is_sparse}) attrs={'is_sparse': is_sparse,
'padding_idx': padding_idx})
return tmp return tmp
...@@ -366,6 +386,113 @@ def dynamic_lstm(input, ...@@ -366,6 +386,113 @@ def dynamic_lstm(input,
return hidden, cell return hidden, cell
def dynamic_gru(input,
size,
param_attr=None,
bias_attr=None,
is_reverse=False,
gate_activation='sigmoid',
candidate_activation='tanh',
h_0=None):
"""
**Dynamic GRU Layer**
Refer to `Empirical Evaluation of Gated Recurrent Neural Networks on
Sequence Modeling <https://arxiv.org/abs/1412.3555>`_
The formula is as follows:
.. math::
u_t & = act_g(W_{ux}x_{t} + W_{uh}h_{t-1} + b_u)
r_t & = act_g(W_{rx}x_{t} + W_{rh}h_{t-1} + b_r)
\\tilde{h_t} & = act_c(W_{cx}x_{t} + W_{ch}(r_t \odot h_{t-1}) + b_c)
h_t & = (1-u_t) \odot h_{t-1} + u_t \odot \\tilde{h_t}
The :math:`\odot` is the element-wise product of the vectors. :math:`act_g`
is the update gate and reset gate activation function and :math:`sigmoid`
is usually used for it. :math:`act_c` is the activation function for
candidate hidden state and :math:`tanh` is usually used for it.
Note that these :math:`W_{ux}x_{t}, W_{rx}x_{t}, W_{cx}x_{t}` operations on
the input :math:`x_{t}` are NOT included in this operator. Users can choose
to use fully-connect layer before GRU layer.
Args:
input(Variable): The input of dynamic_gru layer, which supports
variable-time length input sequence. The underlying tensor in this
Variable is a matrix with shape :math:`(T \\times 3D)`, where
:math:`T` is the total time steps in this mini-batch, :math:`D`
is the hidden size.
size(int): The dimension of the gru cell.
param_attr(ParamAttr|None): The parameter attribute for the learnable
hidden-hidden weight matrix. Note:
- The shape of the weight matrix is :math:`(T \\times 3D)`, where
:math:`D` is the hidden size.
- All elements in the weight matrix can be divided into two parts.
The first part are weights of the update gate and reset gate with
shape :math:`(D \\times 2D)`, and the second part are weights for
candidate hidden state with shape :math:`(D \\times D)`.
bias_attr(ParamAttr): The parameter attribute for learnable the
hidden-hidden bias.
is_reverse(bool): Whether to compute reversed GRU, default
:attr:`False`.
gate_activation(str): The activation for update gate and reset gate.
Choices = ["sigmoid", "tanh", "relu", "identity"], default "sigmoid".
activation(str): The activation for candidate hidden state.
Choices = ["sigmoid", "tanh", "relu", "identity"], default "tanh".
Returns:
Variable: The hidden state of GRU. The shape is (T \\times D), and lod \
is the same with the input.
Examples:
.. code-block:: python
hidden_dim = 512
x = fluid.layers.fc(input=data, size=hidden_dim * 3)
hidden = fluid.layers.dynamic_gru(input=x, dim=hidden_dim)
"""
helper = LayerHelper('gru', **locals())
dtype = helper.input_dtype()
weight = helper.create_parameter(
attr=helper.param_attr, shape=[size, 3 * size], dtype=dtype)
bias = helper.create_parameter(
attr=helper.bias_attr, shape=[1, 3 * size], dtype=dtype, is_bias=True)
inputs = {'Input': input, 'Weight': weight, 'Bias': bias}
if h_0 != None:
assert h_0.shape == (
size, size), 'The shape of h0 should be(%d, %d)' % (size, size)
inputs['h0'] = h_0
hidden = helper.create_tmp_variable(dtype)
batch_gate = helper.create_tmp_variable(dtype)
batch_reset_hidden_prev = helper.create_tmp_variable(dtype)
batch_hidden = helper.create_tmp_variable(dtype)
helper.append_op(
type='gru',
inputs=inputs,
outputs={
'Hidden': hidden,
'BatchGate': batch_gate,
'BatchResetHiddenPrev': batch_reset_hidden_prev,
'BatchHidden': batch_hidden
},
attrs={
'is_reverse': is_reverse,
'gate_activation': gate_activation,
'activation': candidate_activation
})
return hidden
def gru_unit(input, def gru_unit(input,
hidden, hidden,
size, size,
...@@ -403,8 +530,10 @@ def gru_unit(input, ...@@ -403,8 +530,10 @@ def gru_unit(input,
size (integer): The input dimension value. size (integer): The input dimension value.
weight (ParamAttr): The weight parameters for gru unit. Default: None weight (ParamAttr): The weight parameters for gru unit. Default: None
bias (ParamAttr): The bias parameters for gru unit. Default: None bias (ParamAttr): The bias parameters for gru unit. Default: None
activation (string): The activation type for cell (actNode). Default: 'tanh' activation (string): The activation type for cell (actNode).
gate_activation (string): The activation type for gates (actGate). Default: 'sigmoid' Default: 'tanh'
gate_activation (string): The activation type for gates (actGate).
Default: 'sigmoid'
Returns: Returns:
tuple: The hidden value, reset-hidden value and gate values. tuple: The hidden value, reset-hidden value and gate values.
...@@ -543,8 +672,9 @@ def cross_entropy(input, label, **kwargs): ...@@ -543,8 +672,9 @@ def cross_entropy(input, label, **kwargs):
""" """
**Cross Entropy Layer** **Cross Entropy Layer**
This layer computes the cross entropy between `input` and `label`. It supports This layer computes the cross entropy between `input` and `label`. It
both standard cross-entropy and soft-label cross-entropy loss computation. supports both standard cross-entropy and soft-label cross-entropy loss
computation.
1) One-hot cross-entropy: 1) One-hot cross-entropy:
`soft_label = False`, `Label[i, 0]` indicates the class index for sample i: `soft_label = False`, `Label[i, 0]` indicates the class index for sample i:
...@@ -571,23 +701,28 @@ def cross_entropy(input, label, **kwargs): ...@@ -571,23 +701,28 @@ def cross_entropy(input, label, **kwargs):
Args: Args:
input (Variable|list): a 2-D tensor with shape [N x D], where N is the input (Variable|list): a 2-D tensor with shape [N x D], where N is the
batch size and D is the number of classes. This input is a probability batch size and D is the number of classes. This
computed by the previous operator, which is almost always the result input is a probability computed by the previous
of a softmax operator. operator, which is almost always the result of
a softmax operator.
label (Variable|list): the ground truth which is a 2-D tensor. When label (Variable|list): the ground truth which is a 2-D tensor. When
`soft_label` is set to `False`, `label` is a tensor<int64> with shape `soft_label` is set to `False`, `label` is a
[N x 1]. When `soft_label` is set to `True`, `label` is a tensor<int64> with shape [N x 1]. When
tensor<float/double> with shape [N x D]. `soft_label` is set to `True`, `label` is a
soft_label (bool, via `**kwargs`): a flag indicating whether to interpretate tensor<float/double> with shape [N x D].
the given labels as soft labels, default `False`. soft_label (bool, via `**kwargs`): a flag indicating whether to
interpretate the given labels as soft
labels, default `False`.
Returns: Returns:
A 2-D tensor with shape [N x 1], the cross entropy loss. A 2-D tensor with shape [N x 1], the cross entropy loss.
Raises: Raises:
`ValueError`: 1) the 1st dimension of `input` and `label` are not equal; 2) when \ `ValueError`: 1) the 1st dimension of `input` and `label` are not equal.
`soft_label == True`, and the 2nd dimension of `input` and `label` are not \ 2) when `soft_label == True`, and the 2nd dimension of
equal; 3) when `soft_label == False`, and the 2nd dimension of `label` is not 1. `input` and `label` are not equal.
3) when `soft_label == False`, and the 2nd dimension of
`label` is not 1.
Examples: Examples:
.. code-block:: python .. code-block:: python
...@@ -610,7 +745,9 @@ def square_error_cost(input, label, **kwargs): ...@@ -610,7 +745,9 @@ def square_error_cost(input, label, **kwargs):
""" """
**Square error cost layer** **Square error cost layer**
This layer accepts input predictions and target label and returns the squared error cost. This layer accepts input predictions and target label and returns the
squared error cost.
For predictions, :math:`X`, and target labels, :math:`Y`, the equation is: For predictions, :math:`X`, and target labels, :math:`Y`, the equation is:
.. math:: .. math::
...@@ -628,8 +765,8 @@ def square_error_cost(input, label, **kwargs): ...@@ -628,8 +765,8 @@ def square_error_cost(input, label, **kwargs):
label(Variable): Label tensor, has target labels. label(Variable): Label tensor, has target labels.
Returns: Returns:
Variable: The tensor variable storing the element-wise squared error difference \ Variable: The tensor variable storing the element-wise squared error
of input and label. difference of input and label.
Examples: Examples:
.. code-block:: python .. code-block:: python
...@@ -725,7 +862,8 @@ def chunk_eval(input, ...@@ -725,7 +862,8 @@ def chunk_eval(input,
"chunk_scheme": chunk_scheme, "chunk_scheme": chunk_scheme,
"excluded_chunk_types": excluded_chunk_types or [] "excluded_chunk_types": excluded_chunk_types or []
}) })
return precision, recall, f1_score, num_infer_chunks, num_label_chunks, num_correct_chunks return (precision, recall, f1_score, num_infer_chunks, num_label_chunks,
num_correct_chunks)
def sequence_conv(input, def sequence_conv(input,
...@@ -783,13 +921,14 @@ def conv2d(input, ...@@ -783,13 +921,14 @@ def conv2d(input,
**Convlution2D Layer** **Convlution2D Layer**
The convolution2D layer calculates the output based on the input, filter The convolution2D layer calculates the output based on the input, filter
and strides, paddings, dilations, groups parameters. Input(Input) and Output(Output) and strides, paddings, dilations, groups parameters. Input(Input) and
are in NCHW format. Where N is batch size, C is the number of channels, H is the height Output(Output) are in NCHW format. Where N is batch size, C is the number of
of the feature, and W is the width of the feature. channels, H is the height of the feature, and W is the width of the feature.
The details of convolution layer, please refer UFLDL's `convolution, The details of convolution layer, please refer UFLDL's `convolution,
<http://ufldl.stanford.edu/tutorial/supervised/FeatureExtractionUsingConvolution/>`_ . <http://ufldl.stanford.edu/tutorial/supervised/FeatureExtractionUsingConvolution/>`_ .
If bias attribution and activation type are provided, bias is added to the output of the convolution, If bias attribution and activation type are provided, bias is added to the
and the corresponding activation function is applied to the final result. output of the convolution, and the corresponding activation function is
applied to the final result.
For each input :math:`X`, the equation is: For each input :math:`X`, the equation is:
...@@ -804,7 +943,8 @@ def conv2d(input, ...@@ -804,7 +943,8 @@ def conv2d(input,
* :math:`\\ast`: Convolution operation. * :math:`\\ast`: Convolution operation.
* :math:`b`: Bias value, a 2-D tensor with shape [M, 1]. * :math:`b`: Bias value, a 2-D tensor with shape [M, 1].
* :math:`\\sigma`: Activation function. * :math:`\\sigma`: Activation function.
* :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different. * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be
different.
Example: Example:
...@@ -849,17 +989,20 @@ def conv2d(input, ...@@ -849,17 +989,20 @@ def conv2d(input,
act(str): Activation type. Default: None act(str): Activation type. Default: None
Returns: Returns:
Variable: The tensor variable storing the convolution and \ Variable: The tensor variable storing the convolution and
non-linearity activation result. non-linearity activation result.
Raises: Raises:
ValueError: If the shapes of input, filter_size, stride, padding and groups mismatch. ValueError: If the shapes of input, filter_size, stride, padding and
groups mismatch.
Examples: Examples:
.. code-block:: python .. code-block:: python
data = fluid.layers.data(name='data', shape=[3, 32, 32], dtype='float32') data = fluid.layers.data(
conv2d = fluid.layers.conv2d(input=data, num_filters=2, filter_size=3, act="relu") name='data', shape=[3, 32, 32], dtype='float32')
conv2d = fluid.layers.conv2d(
input=data, num_filters=2, filter_size=3, act="relu")
""" """
if stride is None: if stride is None:
stride = [1, 1] stride = [1, 1]
...@@ -1222,7 +1365,8 @@ def conv2d_transpose(input, ...@@ -1222,7 +1365,8 @@ def conv2d_transpose(input,
H is the height of the feature, and W is the width of the feature. H is the height of the feature, and W is the width of the feature.
Parameters(dilations, strides, paddings) are two elements. These two elements Parameters(dilations, strides, paddings) are two elements. These two elements
represent height and width, respectively. The details of convolution transpose represent height and width, respectively. The details of convolution transpose
layer, please refer to the following explanation and references `therein <http://www.matthewzeiler.com/wp-content/uploads/2017/07/cvpr2010.pdf>`_. layer, please refer to the following explanation and references
`therein <http://www.matthewzeiler.com/wp-content/uploads/2017/07/cvpr2010.pdf>`_.
For each input :math:`X`, the equation is: For each input :math:`X`, the equation is:
...@@ -1235,7 +1379,8 @@ def conv2d_transpose(input, ...@@ -1235,7 +1379,8 @@ def conv2d_transpose(input,
* :math:`X`: Input value, a tensor with NCHW format. * :math:`X`: Input value, a tensor with NCHW format.
* :math:`W`: Filter value, a tensor with MCHW format. * :math:`W`: Filter value, a tensor with MCHW format.
* :math:`\\ast` : Convolution transpose operation. * :math:`\\ast` : Convolution transpose operation.
* :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different. * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be
different.
Example: Example:
...@@ -1276,7 +1421,8 @@ def conv2d_transpose(input, ...@@ -1276,7 +1421,8 @@ def conv2d_transpose(input,
dilation(int|tuple): The dilation size. If dilation is a tuple, it must dilation(int|tuple): The dilation size. If dilation is a tuple, it must
contain two integers, (dilation_H, dilation_W). Otherwise, the contain two integers, (dilation_H, dilation_W). Otherwise, the
dilation_H = dilation_W = dilation. Default: dilation = 1. dilation_H = dilation_W = dilation. Default: dilation = 1.
param_attr(ParamAttr): The parameters to the Conv2d_transpose Layer. Default: None param_attr(ParamAttr): The parameters to the Conv2d_transpose Layer.
Default: None
use_cudnn(bool): Use cudnn kernel or not, it is valid only when the cudnn use_cudnn(bool): Use cudnn kernel or not, it is valid only when the cudnn
library is installed. Default: True library is installed. Default: True
name(str|None): A name for this layer(optional). If set None, the layer name(str|None): A name for this layer(optional). If set None, the layer
...@@ -1286,13 +1432,16 @@ def conv2d_transpose(input, ...@@ -1286,13 +1432,16 @@ def conv2d_transpose(input,
Variable: The tensor variable storing the convolution transpose result. Variable: The tensor variable storing the convolution transpose result.
Raises: Raises:
ValueError: If the shapes of input, filter_size, stride, padding and groups mismatch. ValueError: If the shapes of input, filter_size, stride, padding and
groups mismatch.
Examples: Examples:
.. code-block:: python .. code-block:: python
data = fluid.layers.data(name='data', shape=[3, 32, 32], dtype='float32') data = fluid.layers.data(
conv2d_transpose = fluid.layers.conv2d_transpose(input=data, num_filters=2, filter_size=3) name='data', shape=[3, 32, 32], dtype='float32')
conv2d_transpose = fluid.layers.conv2d_transpose(
input=data, num_filters=2, filter_size=3)
""" """
helper = LayerHelper("conv2d_transpose", **locals()) helper = LayerHelper("conv2d_transpose", **locals())
if not isinstance(input, Variable): if not isinstance(input, Variable):
...@@ -1484,10 +1633,10 @@ def lstm_unit(x_t, ...@@ -1484,10 +1633,10 @@ def lstm_unit(x_t,
tuple: The hidden value and cell value of lstm unit. tuple: The hidden value and cell value of lstm unit.
Raises: Raises:
ValueError: The ranks of **x_t**, **hidden_t_prev** and **cell_t_prev**\ ValueError: The ranks of **x_t**, **hidden_t_prev** and **cell_t_prev**
not be 2 or the 1st dimensions of **x_t**, **hidden_t_prev** \ not be 2 or the 1st dimensions of **x_t**, **hidden_t_prev**
and **cell_t_prev** not be the same or the 2nd dimensions of \ and **cell_t_prev** not be the same or the 2nd dimensions of
**hidden_t_prev** and **cell_t_prev** not be the same. **hidden_t_prev** and **cell_t_prev** not be the same.
Examples: Examples:
...@@ -2173,8 +2322,10 @@ def warpctc(input, label, blank=0, norm_by_times=False, **kwargs): ...@@ -2173,8 +2322,10 @@ def warpctc(input, label, blank=0, norm_by_times=False, **kwargs):
Examples: Examples:
.. code-block:: python .. code-block:: python
y = layers.data(name='y', shape=[11, 8], dtype='float32', lod_level=1) y = layers.data(
y_predict = layers.data(name='y_predict', shape=[11, 1], dtype='float32') name='y', shape=[11, 8], dtype='float32', lod_level=1)
y_predict = layers.data(
name='y_predict', shape=[11, 1], dtype='float32')
cost = layers.warpctc(input=y_predict, label=y) cost = layers.warpctc(input=y_predict, label=y)
""" """
...@@ -2246,6 +2397,61 @@ def sequence_reshape(input, new_dim): ...@@ -2246,6 +2397,61 @@ def sequence_reshape(input, new_dim):
return out return out
@autodoc()
def nce(input,
label,
num_total_classes,
sample_weight=None,
param_attr=None,
bias_attr=None,
num_neg_samples=None):
helper = LayerHelper('nce', **locals())
assert isinstance(input, Variable)
dim = input.shape[1]
assert isinstance(label, Variable)
num_true_class = label.shape[1]
w = helper.create_parameter(
attr=helper.param_attr,
shape=[num_total_classes, dim],
is_bias=False,
dtype=input.dtype)
b = helper.create_parameter(
attr=helper.bias_attr,
shape=[num_total_classes, 1],
is_bias=True,
dtype=input.dtype)
cost = helper.create_tmp_variable(dtype=input.dtype)
sample_logits = helper.create_tmp_variable(dtype=input.dtype)
sample_labels = helper.create_tmp_variable(dtype=label.dtype)
if num_neg_samples is None:
num_neg_samples = 10
else:
num_neg_samples = int(num_neg_samples)
attrs = {
'num_total_classes': int(num_total_classes),
'num_neg_samples': num_neg_samples
}
helper.append_op(
type='nce',
inputs={
'Input': input,
'Label': label,
'Weight': w,
'Bias': b,
'SampleWeight': sample_weight if sample_weight is not None else []
},
outputs={
'Cost': cost,
'SampleLogits': sample_logits,
'SampleLabels': sample_labels
},
attrs=attrs)
return cost / (num_neg_samples + 1)
def transpose(x, perm, name=None): def transpose(x, perm, name=None):
""" """
**transpose Layer** **transpose Layer**
...@@ -2288,3 +2494,129 @@ def transpose(x, perm, name=None): ...@@ -2288,3 +2494,129 @@ def transpose(x, perm, name=None):
outputs={'Out': [out]}, outputs={'Out': [out]},
attrs={'axis': perm}) attrs={'axis': perm})
return out return out
def im2sequence(input, filter_size=1, stride=1, padding=0, name=None):
"""
Extracts image patches from the input tensor to form a tensor of shape
{input.batch_size * output_height * output_width, filter_size_H *
filter_size_W * input.channels} which is similar with im2col.
This op use filter / kernel to scan images and convert these images to
sequences. After expanding, the number of time step are
output_height * output_width for an image, in which output_height and
output_width are calculated by below equation:
.. math::
output\_size = 1 + \
(2 * padding + img\_size - block\_size + stride - 1) / stride
And the dimension of each time step is block_y * block_x * input.channels.
Args:
input (Variable): The input should be a tensor in NCHW format.
filter_size(int|tuple|None): The filter size. If filter_size is a tuple,
it must contain two integers, (filter_size_H, filter_size_W).
Otherwise, the filter will be a square.
stride(int|tuple): The stride size. If stride is a tuple, it must
contain two integers, (stride_H, stride_W). Otherwise, the
stride_H = stride_W = stride. Default: stride = 1.
padding(int|tuple): The padding size. If padding is a tuple, it can
contain two integers like (padding_H, padding_W) which means
padding_up = padding_down = padding_H and
padding_left = padding_right = padding_W. Or it can use
(padding_up, padding_left, padding_down, padding_right) to indicate
paddings of four direction. Otherwise, a scalar padding means
padding_up = padding_down = padding_left = padding_right = padding
Default: padding = 0.
name (int): The name of this layer. It is optional.
Returns:
output: The output is a LoDTensor with shape
{input.batch_size * output_height * output_width,
filter_size_H * filter_size_W * input.channels}.
If we regard output as a matrix, each row of this matrix is
a step of a sequence.
Examples:
As an example:
.. code-block:: text
Given:
x = [[[[ 6. 2. 1.]
[ 8. 3. 5.]
[ 0. 2. 6.]]
[[ 2. 4. 4.]
[ 6. 3. 0.]
[ 6. 4. 7.]]]
[[[ 6. 7. 1.]
[ 5. 7. 9.]
[ 2. 4. 8.]]
[[ 1. 2. 1.]
[ 1. 3. 5.]
[ 9. 0. 8.]]]]
x.dims = {2, 2, 3, 3}
And:
filter = [2, 2]
stride = [1, 1]
padding = [0, 0]
Then:
output.data = [[ 6. 2. 8. 3. 2. 4. 6. 3.]
[ 2. 1. 3. 5. 4. 4. 3. 0.]
[ 8. 3. 0. 2. 6. 3. 6. 4.]
[ 3. 5. 2. 6. 3. 0. 4. 7.]
[ 6. 7. 5. 7. 1. 2. 1. 3.]
[ 7. 1. 7. 9. 2. 1. 3. 5.]
[ 5. 7. 2. 4. 1. 3. 9. 0.]
[ 7. 9. 4. 8. 3. 5. 0. 8.]]
output.dims = {8, 9}
output.lod = [[0, 4, 8]]
The simple usage is:
.. code-block:: python
output = fluid.layers.im2sequence(
input=layer, stride=[1, 1], filter_size=[2, 2])
"""
if isinstance(filter_size, int):
filter_size = [filter_size, filter_size]
if isinstance(stride, int):
stride = [stride, stride]
if isinstance(padding, int):
padding = [padding, padding]
if len(padding) == 2:
padding.append(padding[0])
padding.append(padding[1])
helper = LayerHelper('im2sequence', **locals())
out = helper.create_tmp_variable(dtype=helper.input_dtype())
helper.append_op(
type='im2sequence',
inputs={'X': input},
outputs={'Out': out},
attrs={
'kernels': filter_size,
'strides': stride,
'paddings': padding,
})
return out
...@@ -31,10 +31,12 @@ dtype_to_size = { ...@@ -31,10 +31,12 @@ dtype_to_size = {
class ControlFlowGraph(object): class ControlFlowGraph(object):
def __init__(self, Program): def __init__(self, Program, ops, forward_num):
self._program = Program self._program = Program
self._succesors = defaultdict(set) self._ops = ops
self._presucessors = defaultdict(set) self._forward_num = forward_num
self._successors = defaultdict(set)
self._presuccessors = defaultdict(set)
self._uses = defaultdict(set) self._uses = defaultdict(set)
self._defs = defaultdict(set) self._defs = defaultdict(set)
self._live_in = defaultdict(set) self._live_in = defaultdict(set)
...@@ -45,25 +47,16 @@ class ControlFlowGraph(object): ...@@ -45,25 +47,16 @@ class ControlFlowGraph(object):
self._add(node1, node2) self._add(node1, node2)
def _add(self, node1, node2): def _add(self, node1, node2):
self._succesors[node1].add(node2) self._successors[node1].add(node2)
self._presucessors[node2].add(node1) self._presuccessors[node2].add(node1)
def _build_graph(self): def _build_graph(self):
program_desc = self._program.get_desc() self.op_size = len(self._ops)
block_size = program_desc.num_blocks()
# TODO(qijun) handle Program with if/while operators
self.global_block_desc = program_desc.block(0)
self.op_size = self.global_block_desc.op_size()
op_node_connections = [(i, i + 1) for i in range(self.op_size - 1)] op_node_connections = [(i, i + 1) for i in range(self.op_size - 1)]
self._add_connections(op_node_connections) self._add_connections(op_node_connections)
self.ops = [self.global_block_desc.op(i) for i in range(self.op_size)]
for i in range(self.op_size): for i in range(self.op_size):
self._uses[i].update(self.ops[i].input_arg_names()) self._uses[i].update(self._ops[i].input_arg_names())
self._defs[i].update(self.ops[i].output_arg_names()) self._defs[i].update(self._ops[i].output_arg_names())
def _update_graph(self, old_name, new_name, begin_idx=0): def _update_graph(self, old_name, new_name, begin_idx=0):
for i in range(begin_idx, self.op_size): for i in range(begin_idx, self.op_size):
...@@ -103,7 +96,7 @@ class ControlFlowGraph(object): ...@@ -103,7 +96,7 @@ class ControlFlowGraph(object):
live_out[i] = set(self._live_out[i]) live_out[i] = set(self._live_out[i])
self._live_in[i] = self._uses[i] | ( self._live_in[i] = self._uses[i] | (
self._live_out[i] - self._defs[i]) self._live_out[i] - self._defs[i])
for s in self._succesors[i]: for s in self._successors[i]:
self._live_out[i] |= self._live_in[s] self._live_out[i] |= self._live_in[s]
if self._reach_fixed_point(live_in, live_out): if self._reach_fixed_point(live_in, live_out):
...@@ -113,39 +106,76 @@ class ControlFlowGraph(object): ...@@ -113,39 +106,76 @@ class ControlFlowGraph(object):
u = a & b u = a & b
return a - u, b - u return a - u, b - u
def _has_var(self, block_desc, var_name, is_forward):
if is_forward:
return block_desc.has_var(str(var_name))
else:
return block_desc.has_var_recursive(str(var_name))
def _find_var(self, block_desc, var_name, is_forward):
if is_forward:
return block_desc.find_var(str(var_name))
else:
return block_desc.find_var_recursive(str(var_name))
def memory_optimize(self): def memory_optimize(self):
def check_var_validity(block_desc, x, is_forward):
if str(x) == "@EMPTY@":
return False
if not self._has_var(block_desc, x, is_forward):
return False
if self._find_var(block_desc, x, is_forward).persistable():
return False
if self._find_var(
block_desc, x,
is_forward).type() != core.VarDesc.VarType.LOD_TENSOR:
return False
return True
self._build_graph() self._build_graph()
self._dataflow_analyze() self._dataflow_analyze()
self.pool = [] self.pool = []
for i in range(self.op_size): for i in range(self.op_size):
op = self._ops[i]
if op.type() == "while" or op.type() == "while_grad":
continue
block_desc = op.block()
is_forward = i < self._forward_num
if self.pool: if self.pool:
out_pair = [(x, self.global_block_desc.var(str(x)).shape()) defs_can_optimize = filter(
for x in self._defs[i]] lambda x: check_var_validity(block_desc, x, is_forward),
self._defs[i])
out_pair = [
(x, self._find_var(block_desc, x, is_forward).shape())
for x in defs_can_optimize
]
for x, x_shape in out_pair: for x, x_shape in out_pair:
if not self.global_block_desc.var(str(x)).persistable(): for index, cache_pair in enumerate(self.pool):
for index, cache_pair in enumerate(self.pool): cache_var = cache_pair[0]
cache_var = cache_pair[0] cache_shape = cache_pair[1]
cache_shape = cache_pair[1] if x_shape == cache_shape:
if x_shape == cache_shape: if self._has_var(block_desc, cache_var, is_forward):
x_dtype = self.global_block_desc.var(str( x_dtype = self._find_var(block_desc, x,
x)).dtype() is_forward).dtype()
cache_dtype = self.global_block_desc.var( cache_dtype = self._find_var(
str(cache_var)).dtype() block_desc, cache_var, is_forward).dtype()
# TODO(qijun): actually, we should compare dtype_to_size[x_dtype] # TODO(qijun): actually, we should compare dtype_to_size[x_dtype]
# and dtype_to_size[cache_dtype] # and dtype_to_size[cache_dtype]
if x_dtype == cache_dtype: if x_dtype == cache_dtype:
print( print(("Hit Cache !!!! cache pool index "
("Hit Cache !!!! cache pool index " "is %d, var name is %s, "
"is %d, var name is %s, " "cached var name is %s, "
"cached var name is %s, " "var shape is %s ") %
"var shape is %s ") % (index, x, cache_var,
(index, x, cache_var, str(cache_shape))) str(cache_shape)))
self.pool.pop(index) self.pool.pop(index)
if x == cache_var:
break
_rename_arg_( _rename_arg_(
self.ops, x, cache_var, begin_idx=i) self._ops, x, cache_var, begin_idx=i)
self._program.current_block().var(str( self._program.block(block_desc.id).var(
x)).desc = self.global_block_desc.var( str(x)).desc = self._find_var(
str(cache_var)) block_desc, cache_var, is_forward)
self._update_graph( self._update_graph(
x, cache_var, begin_idx=i) x, cache_var, begin_idx=i)
break break
...@@ -153,20 +183,70 @@ class ControlFlowGraph(object): ...@@ -153,20 +183,70 @@ class ControlFlowGraph(object):
in_diff, out_diff = self._get_diff(self._live_in[i], in_diff, out_diff = self._get_diff(self._live_in[i],
self._live_out[i]) self._live_out[i])
can_optimize = filter( can_optimize = filter(
lambda x: not self.global_block_desc.var(str(x)).persistable(), lambda x: check_var_validity(block_desc, x, is_forward),
in_diff) in_diff)
if can_optimize: if can_optimize:
for var_name in can_optimize: for var_name in can_optimize:
self.pool.append( self.pool.append((var_name, self._find_var(
(var_name, block_desc, var_name, is_forward).shape()))
self.global_block_desc.var(str(var_name)).shape()))
def get_program(self): def get_cfgs(input_program):
return self._program ops_list = []
pdesc = input_program.get_desc()
block_desc = pdesc.block(0)
op_size = block_desc.op_size()
# Get global block ops
ops_list.append(([block_desc.op(i) for i in range(op_size)], op_size))
while_sub_block_ids = []
while_grad_sub_block_ids = []
while_pair = []
for i in range(op_size):
op = block_desc.op(i)
if op.type() == "while":
while_sub_block_ids.append(op.attr("sub_block").id)
elif op.type() == "while_grad":
while_grad_sub_block_ids.append(op.attr("sub_block").id)
# Find while/while_grad block pair
for grad_id in while_grad_sub_block_ids:
parent_id = pdesc.block(grad_id).parent
if parent_id in while_sub_block_ids:
while_pair.append((parent_id, grad_id))
while_sub_block_ids.remove(parent_id)
# Get while/while_grad block ops
for parent_id, grad_id in while_pair:
while_block_ops = []
while_block = pdesc.block(parent_id)
while_block_op_size = while_block.op_size()
for i in range(while_block_op_size):
while_block_ops.append(while_block.op(i))
while_grad_block = pdesc.block(grad_id)
while_grad_block_op_size = while_grad_block.op_size()
for i in range(while_grad_block_op_size):
while_block_ops.append(while_grad_block.op(i))
ops_list.append((while_block_ops, while_block_op_size))
# Process rest while block ops
for parent_id in while_sub_block_ids:
while_block_ops = []
while_block = pdesc.block(parent_id)
while_block_op_size = while_block.op_size()
for i in range(while_block_op_size):
while_block_ops.append(while_block.op(i))
ops_list.append((while_block_ops, while_block_op_size))
cfgs = [ControlFlowGraph(input_program, i, j) for i, j in ops_list]
return cfgs
def memory_optimize(input_program): def memory_optimize(input_program):
graph = ControlFlowGraph(input_program) cfgs = get_cfgs(input_program)
graph.memory_optimize() for cfg in cfgs:
result_program = graph.get_program() cfg.memory_optimize()
return result_program
...@@ -55,7 +55,7 @@ def img_conv_group(input, ...@@ -55,7 +55,7 @@ def img_conv_group(input,
conv_act=None, conv_act=None,
param_attr=None, param_attr=None,
conv_with_batchnorm=False, conv_with_batchnorm=False,
conv_batchnorm_drop_rate=None, conv_batchnorm_drop_rate=0.0,
pool_stride=1, pool_stride=1,
pool_type=None, pool_type=None,
use_cudnn=True): use_cudnn=True):
...@@ -167,11 +167,10 @@ def scaled_dot_product_attention(queries, ...@@ -167,11 +167,10 @@ def scaled_dot_product_attention(queries,
""" """
The dot-product attention. The dot-product attention.
Attention mechanism can be seen as mapping a query and a set of Attention mechanism can be seen as mapping a query and a set of key-value
key-value pairs to an output. The output is computed as a weighted sum pairs to an output. The output is computed as a weighted sum of the values,
of the values, where the weight assigned to each value is computed by a where the weight assigned to each value is computed by a compatibility
compatibility function (dot-product here) of the query with the function (dot-product here) of the query with the corresponding key.
corresponding key.
The dot-product attention can be implemented through (batch) matrix The dot-product attention can be implemented through (batch) matrix
multipication as follows: multipication as follows:
...@@ -186,12 +185,14 @@ def scaled_dot_product_attention(queries, ...@@ -186,12 +185,14 @@ def scaled_dot_product_attention(queries,
Note that batch data containing sequences with different lengths is not Note that batch data containing sequences with different lengths is not
supported by this because of the (batch) matrix multipication. supported by this because of the (batch) matrix multipication.
Args:
query (Variable): The input variable which is a Tensor or queries (Variable): The input variable which is a Tensor or
LoDTensor. LoDTensor.
key (Variable): The input variable which is a Tensor or LoDTensor. keys (Variable): The input variable which is a Tensor or LoDTensor.
value (Variable): The input variable which is a Tensor or values (Variable): The input variable which is a Tensor or
LoDTensor. LoDTensor.
num_heads (int): Head number to compute the dot product attention.
dropout_rate (float): The dropout rate for attention weight.
Returns: Returns:
Variable: The context Tensor computed by multi-head scaled dot product Variable: The context Tensor computed by multi-head scaled dot product
......
...@@ -16,6 +16,11 @@ import numpy as np ...@@ -16,6 +16,11 @@ import numpy as np
import paddle.v2 as paddle import paddle.v2 as paddle
import paddle.v2.fluid as fluid import paddle.v2.fluid as fluid
# need to fix random seed and training data to compare the loss
# value accurately calculated by the default and the memory optimization
# version.
fluid.default_startup_program().random_seed = 111
x = fluid.layers.data(name='x', shape=[13], dtype='float32') x = fluid.layers.data(name='x', shape=[13], dtype='float32')
y_predict = fluid.layers.fc(input=x, size=1, act=None) y_predict = fluid.layers.fc(input=x, size=1, act=None)
...@@ -28,15 +33,18 @@ avg_cost = fluid.layers.mean(x=cost) ...@@ -28,15 +33,18 @@ avg_cost = fluid.layers.mean(x=cost)
sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.1) sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.1)
sgd_optimizer.minimize(avg_cost) sgd_optimizer.minimize(avg_cost)
# memopt_program = fluid.default_main_program() fluid.memory_optimize(fluid.default_main_program())
memopt_program = fluid.memory_optimize(fluid.default_main_program())
BATCH_SIZE = 200 BATCH_SIZE = 200
# fix the order of training data
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.reader.shuffle( paddle.dataset.uci_housing.train(), batch_size=BATCH_SIZE)
paddle.dataset.uci_housing.train(), buf_size=500),
batch_size=BATCH_SIZE) # train_reader = paddle.batch(
# paddle.reader.shuffle(
# paddle.dataset.uci_housing.train(), buf_size=500),
# batch_size=BATCH_SIZE)
place = fluid.CPUPlace() place = fluid.CPUPlace()
feeder = fluid.DataFeeder(place=place, feed_list=[x, y]) feeder = fluid.DataFeeder(place=place, feed_list=[x, y])
...@@ -49,7 +57,7 @@ for pass_id in range(PASS_NUM): ...@@ -49,7 +57,7 @@ for pass_id in range(PASS_NUM):
fluid.io.save_persistables(exe, "./fit_a_line.model/") fluid.io.save_persistables(exe, "./fit_a_line.model/")
fluid.io.load_persistables(exe, "./fit_a_line.model/") fluid.io.load_persistables(exe, "./fit_a_line.model/")
for data in train_reader(): for data in train_reader():
avg_loss_value, = exe.run(memopt_program, avg_loss_value, = exe.run(fluid.default_main_program(),
feed=feeder.feed(data), feed=feeder.feed(data),
fetch_list=[avg_cost]) fetch_list=[avg_cost])
......
...@@ -19,6 +19,11 @@ import sys ...@@ -19,6 +19,11 @@ import sys
import paddle.v2 as paddle import paddle.v2 as paddle
import paddle.v2.fluid as fluid import paddle.v2.fluid as fluid
# need to fix random seed and training data to compare the loss
# value accurately calculated by the default and the memory optimization
# version.
fluid.default_startup_program().random_seed = 111
def resnet_cifar10(input, depth=32): def resnet_cifar10(input, depth=32):
def conv_bn_layer(input, ch_out, filter_size, stride, padding, act='relu'): def conv_bn_layer(input, ch_out, filter_size, stride, padding, act='relu'):
...@@ -117,31 +122,37 @@ opts = optimizer.minimize(avg_cost) ...@@ -117,31 +122,37 @@ opts = optimizer.minimize(avg_cost)
accuracy = fluid.evaluator.Accuracy(input=predict, label=label) accuracy = fluid.evaluator.Accuracy(input=predict, label=label)
# memopt_program = fluid.default_main_program() fluid.memory_optimize(fluid.default_main_program())
memopt_program = fluid.memory_optimize(fluid.default_main_program())
BATCH_SIZE = 128 BATCH_SIZE = 128
PASS_NUM = 1 PASS_NUM = 1
# fix the order of training data
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.reader.shuffle( paddle.dataset.cifar.train10(), batch_size=BATCH_SIZE)
paddle.dataset.cifar.train10(), buf_size=128 * 10),
batch_size=BATCH_SIZE) # train_reader = paddle.batch(
# paddle.reader.shuffle(
# paddle.dataset.cifar.train10(), buf_size=128 * 10),
# batch_size=BATCH_SIZE)
place = fluid.CPUPlace() place = fluid.CPUPlace()
exe = fluid.Executor(place) exe = fluid.Executor(place)
feeder = fluid.DataFeeder(place=place, feed_list=[images, label]) feeder = fluid.DataFeeder(place=place, feed_list=[images, label])
exe.run(fluid.default_startup_program()) exe.run(fluid.default_startup_program())
i = 0
for pass_id in range(PASS_NUM): for pass_id in range(PASS_NUM):
accuracy.reset(exe) accuracy.reset(exe)
for data in train_reader(): for data in train_reader():
loss, acc = exe.run(memopt_program, loss, acc = exe.run(fluid.default_main_program(),
feed=feeder.feed(data), feed=feeder.feed(data),
fetch_list=[avg_cost] + accuracy.metrics) fetch_list=[avg_cost] + accuracy.metrics)
pass_acc = accuracy.eval(exe) pass_acc = accuracy.eval(exe)
print("loss:" + str(loss) + " acc:" + str(acc) + " pass_acc:" + str( print("loss:" + str(loss) + " acc:" + str(acc) + " pass_acc:" + str(
pass_acc)) pass_acc))
# this model is slow, so if we can train two mini batch, we think it works properly. # this model is slow, so if we can train two mini batch, we think it works properly.
exit(0) if i > 2:
exit(0)
i += 1
exit(1) exit(1)
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import paddle.v2 as paddle
import paddle.v2.fluid as fluid
import paddle.v2.fluid.core as core
import paddle.v2.fluid.framework as framework
import paddle.v2.fluid.layers as layers
from paddle.v2.fluid.executor import Executor
dict_size = 30000
source_dict_dim = target_dict_dim = dict_size
src_dict, trg_dict = paddle.dataset.wmt14.get_dict(dict_size)
hidden_dim = 32
word_dim = 16
IS_SPARSE = True
batch_size = 10
max_length = 50
topk_size = 50
trg_dic_size = 10000
decoder_size = hidden_dim
# need to fix random seed and training data to compare the loss
# value accurately calculated by the default and the memory optimization
# version.
fluid.default_startup_program().random_seed = 111
def encoder_decoder():
# encoder
src_word_id = layers.data(
name="src_word_id", shape=[1], dtype='int64', lod_level=1)
src_embedding = layers.embedding(
input=src_word_id,
size=[dict_size, word_dim],
dtype='float32',
is_sparse=IS_SPARSE,
param_attr=fluid.ParamAttr(name='vemb'))
fc1 = fluid.layers.fc(input=src_embedding, size=hidden_dim * 4, act='tanh')
lstm_hidden0, lstm_0 = layers.dynamic_lstm(input=fc1, size=hidden_dim * 4)
encoder_out = layers.sequence_last_step(input=lstm_hidden0)
# decoder
trg_language_word = layers.data(
name="target_language_word", shape=[1], dtype='int64', lod_level=1)
trg_embedding = layers.embedding(
input=trg_language_word,
size=[dict_size, word_dim],
dtype='float32',
is_sparse=IS_SPARSE,
param_attr=fluid.ParamAttr(name='vemb'))
rnn = fluid.layers.DynamicRNN()
with rnn.block():
current_word = rnn.step_input(trg_embedding)
mem = rnn.memory(init=encoder_out)
fc1 = fluid.layers.fc(input=[current_word, mem],
size=decoder_size,
act='tanh')
out = fluid.layers.fc(input=fc1, size=target_dict_dim, act='softmax')
rnn.update_memory(mem, fc1)
rnn.output(out)
return rnn()
def to_lodtensor(data, place):
seq_lens = [len(seq) for seq in data]
cur_len = 0
lod = [cur_len]
for l in seq_lens:
cur_len += l
lod.append(cur_len)
flattened_data = np.concatenate(data, axis=0).astype("int64")
flattened_data = flattened_data.reshape([len(flattened_data), 1])
res = core.LoDTensor()
res.set(flattened_data, place)
res.set_lod([lod])
return res
def main():
rnn_out = encoder_decoder()
label = layers.data(
name="target_language_next_word", shape=[1], dtype='int64', lod_level=1)
cost = layers.cross_entropy(input=rnn_out, label=label)
avg_cost = fluid.layers.mean(x=cost)
optimizer = fluid.optimizer.Adagrad(learning_rate=1e-4)
optimizer.minimize(avg_cost)
fluid.memory_optimize(fluid.default_main_program())
# fix the order of training data
train_data = paddle.batch(
paddle.dataset.wmt14.train(dict_size), batch_size=batch_size)
# train_data = paddle.batch(
# paddle.reader.shuffle(
# paddle.dataset.wmt14.train(dict_size), buf_size=1000),
# batch_size=batch_size)
place = core.CPUPlace()
exe = Executor(place)
exe.run(framework.default_startup_program())
batch_id = 0
for pass_id in xrange(10):
for data in train_data():
word_data = to_lodtensor(map(lambda x: x[0], data), place)
trg_word = to_lodtensor(map(lambda x: x[1], data), place)
trg_word_next = to_lodtensor(map(lambda x: x[2], data), place)
outs = exe.run(fluid.default_main_program(),
feed={
'src_word_id': word_data,
'target_language_word': trg_word,
'target_language_next_word': trg_word_next
},
fetch_list=[avg_cost])
avg_cost_val = np.array(outs[0])
print('pass_id=' + str(pass_id) + ' batch=' + str(batch_id) +
" avg_cost=" + str(avg_cost_val))
if batch_id > 2:
exit(0)
batch_id += 1
if __name__ == '__main__':
main()
...@@ -16,13 +16,13 @@ import numpy as np ...@@ -16,13 +16,13 @@ import numpy as np
from op_test import OpTest from op_test import OpTest
def bipartite_match(distance, match_indices, match_dis): def bipartite_match(distance, match_indices, match_dist):
"""Bipartite Matching algorithm. """Bipartite Matching algorithm.
Arg: Arg:
distance (numpy.array) : The distance of two entries with shape [M, N]. distance (numpy.array) : The distance of two entries with shape [M, N].
match_indices (numpy.array): the matched indices from column to row match_indices (numpy.array): the matched indices from column to row
with shape [1, N], it must be initialized to -1. with shape [1, N], it must be initialized to -1.
match_dis (numpy.array): The matched distance from column to row match_dist (numpy.array): The matched distance from column to row
with shape [1, N], it must be initialized to 0. with shape [1, N], it must be initialized to 0.
""" """
match_pair = [] match_pair = []
...@@ -36,13 +36,13 @@ def bipartite_match(distance, match_indices, match_dis): ...@@ -36,13 +36,13 @@ def bipartite_match(distance, match_indices, match_dis):
row_indices = -1 * np.ones((row, ), dtype=np.int) row_indices = -1 * np.ones((row, ), dtype=np.int)
idx = 0 idx = 0
for i, j, dis in match_sorted: for i, j, dist in match_sorted:
if idx >= row: if idx >= row:
break break
if match_indices[j] == -1 and row_indices[i] == -1 and dis > 0: if match_indices[j] == -1 and row_indices[i] == -1 and dist > 0:
match_indices[j] = i match_indices[j] = i
row_indices[i] = j row_indices[i] = j
match_dis[j] = dis match_dist[j] = dist
idx += 1 idx += 1
...@@ -55,24 +55,24 @@ def batch_bipartite_match(distance, lod): ...@@ -55,24 +55,24 @@ def batch_bipartite_match(distance, lod):
n = len(lod) - 1 n = len(lod) - 1
m = distance.shape[1] m = distance.shape[1]
match_indices = -1 * np.ones((n, m), dtype=np.int) match_indices = -1 * np.ones((n, m), dtype=np.int)
match_dis = np.zeros((n, m), dtype=np.float32) match_dist = np.zeros((n, m), dtype=np.float32)
for i in range(len(lod) - 1): for i in range(len(lod) - 1):
bipartite_match(distance[lod[i]:lod[i + 1], :], match_indices[i, :], bipartite_match(distance[lod[i]:lod[i + 1], :], match_indices[i, :],
match_dis[i, :]) match_dist[i, :])
return match_indices, match_dis return match_indices, match_dist
class TestBipartiteMatchOpForWithLoD(OpTest): class TestBipartiteMatchOpForWithLoD(OpTest):
def setUp(self): def setUp(self):
self.op_type = 'bipartite_match' self.op_type = 'bipartite_match'
lod = [[0, 5, 11, 23]] lod = [[0, 5, 11, 23]]
dis = np.random.random((23, 217)).astype('float32') dist = np.random.random((23, 217)).astype('float32')
match_indices, match_dis = batch_bipartite_match(dis, lod[0]) match_indices, match_dist = batch_bipartite_match(dist, lod[0])
self.inputs = {'DistMat': (dis, lod)} self.inputs = {'DistMat': (dist, lod)}
self.outputs = { self.outputs = {
'ColToRowMatchIndices': (match_indices), 'ColToRowMatchIndices': (match_indices),
'ColToRowMatchDis': (match_dis), 'ColToRowMatchDis': (match_dist),
} }
def test_check_output(self): def test_check_output(self):
...@@ -83,13 +83,13 @@ class TestBipartiteMatchOpWithoutLoD(OpTest): ...@@ -83,13 +83,13 @@ class TestBipartiteMatchOpWithoutLoD(OpTest):
def setUp(self): def setUp(self):
self.op_type = 'bipartite_match' self.op_type = 'bipartite_match'
lod = [[0, 8]] lod = [[0, 8]]
dis = np.random.random((8, 17)).astype('float32') dist = np.random.random((8, 17)).astype('float32')
match_indices, match_dis = batch_bipartite_match(dis, lod[0]) match_indices, match_dist = batch_bipartite_match(dist, lod[0])
self.inputs = {'DistMat': dis} self.inputs = {'DistMat': dist}
self.outputs = { self.outputs = {
'ColToRowMatchIndices': (match_indices), 'ColToRowMatchIndices': match_indices,
'ColToRowMatchDis': (match_dis), 'ColToRowMatchDis': match_dist,
} }
def test_check_output(self): def test_check_output(self):
......
...@@ -68,4 +68,6 @@ class TestUnpoolOp(OpTest): ...@@ -68,4 +68,6 @@ class TestUnpoolOp(OpTest):
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() # FIXME: detection_output_op will be rewritten. This unittest should be
# enabled after rewriting.
exit(0) # temporary disable this unittest
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import numpy as np
import sys
import math
from op_test import OpTest
class TestIOUSimilarityOp(OpTest):
def test_check_output(self):
self.check_output()
def setUp(self):
self.op_type = "iou_similarity"
self.boxes1 = np.array(
[[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]]).astype('float32')
self.boxes2 = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
[0.0, 0.0, 20.0, 20.0]]).astype('float32')
self.output = np.array(
[[2.0 / 16.0, 0, 6.0 / 400.0],
[1.0 / 16.0, 0.0, 5.0 / 400.0]]).astype('float32')
self.inputs = {'X': self.boxes1, 'Y': self.boxes2}
self.outputs = {'Out': self.output}
class TestIOUSimilarityOpWithLoD(TestIOUSimilarityOp):
def test_check_output(self):
self.check_output()
def setUp(self):
super(TestIOUSimilarityOpWithLoD, self).setUp()
self.boxes1_lod = [[0, 1, 2]]
self.output_lod = [[0, 1, 2]]
self.inputs = {'X': (self.boxes1, self.boxes1_lod), 'Y': self.boxes2}
self.outputs = {'Out': (self.output, self.output_lod)}
if __name__ == '__main__':
unittest.main()
...@@ -17,8 +17,9 @@ import unittest ...@@ -17,8 +17,9 @@ import unittest
import paddle.v2.fluid.layers as layers import paddle.v2.fluid.layers as layers
import paddle.v2.fluid.nets as nets import paddle.v2.fluid.nets as nets
from paddle.v2.fluid.framework import Program, program_guard from paddle.v2.fluid.framework import Program, program_guard, default_main_program
from paddle.v2.fluid.param_attr import ParamAttr from paddle.v2.fluid.param_attr import ParamAttr
import decorators
class TestBook(unittest.TestCase): class TestBook(unittest.TestCase):
...@@ -225,6 +226,51 @@ class TestBook(unittest.TestCase): ...@@ -225,6 +226,51 @@ class TestBook(unittest.TestCase):
self.assertIsNotNone(out) self.assertIsNotNone(out)
print(str(program)) print(str(program))
def test_im2sequence(self):
print("test_im2sequence")
program = Program()
with program_guard(program):
x = layers.data(name='x', shape=[3, 128, 128], dtype='float32')
output = layers.im2sequence(
input=x, stride=[1, 1], filter_size=[2, 2])
self.assertIsNotNone(output)
print(str(program))
@decorators.prog_scope()
def test_nce(self):
window_size = 5
words = []
for i in xrange(window_size):
words.append(
layers.data(
name='word_{0}'.format(i), shape=[1], dtype='int64'))
dict_size = 10000
label_word = int(window_size / 2) + 1
embs = []
for i in xrange(window_size):
if i == label_word:
continue
emb = layers.embedding(
input=words[i],
size=[dict_size, 32],
param_attr='emb.w',
is_sparse=True)
embs.append(emb)
embs = layers.concat(input=embs, axis=1)
loss = layers.nce(input=embs,
label=words[label_word],
num_total_classes=dict_size,
param_attr='nce.w',
bias_attr='nce.b')
avg_loss = layers.mean(x=loss)
self.assertIsNotNone(avg_loss)
print(str(default_main_program()))
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -33,5 +33,19 @@ class TestLookupTableOp(OpTest): ...@@ -33,5 +33,19 @@ class TestLookupTableOp(OpTest):
self.check_grad(['W'], 'Out', no_grad_set=set('Ids')) self.check_grad(['W'], 'Out', no_grad_set=set('Ids'))
class TestLookupTableOpWithPadding(TestLookupTableOp):
def test_check_output(self):
ids = np.squeeze(self.inputs['Ids'])
padding_idx = np.random.choice(ids, 1)[0]
self.outputs['Out'][ids == padding_idx] = np.zeros(31)
self.attrs = {'padding_idx': long(padding_idx)}
self.check_output()
def test_check_grad(self):
# Since paddings are not trainable and fixed in forward, the gradient of
# paddings makes no sense and we don't test the gradient here.
pass
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -109,4 +109,6 @@ class TestNCECase1(TestNCE): ...@@ -109,4 +109,6 @@ class TestNCECase1(TestNCE):
if __name__ == '__main__': if __name__ == '__main__':
# FIXME(qijun) https://github.com/PaddlePaddle/Paddle/issues/7778
exit(0)
unittest.main() unittest.main()
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import numpy as np
import sys
import math
from op_test import OpTest
class TestPriorBoxOp(OpTest):
def set_data(self):
self.init_test_params()
self.init_test_input()
self.init_test_output()
self.inputs = {'Input': self.input, 'Image': self.image}
self.attrs = {
'min_sizes': self.min_sizes,
'max_sizes': self.max_sizes,
'aspect_ratios': self.aspect_ratios,
'variances': self.variances,
'flip': self.flip,
'clip': self.clip,
'step_w': self.step_w,
'step_h': self.step_h,
'offset': self.offset
}
self.outputs = {'Boxes': self.out_boxes, 'Variances': self.out_var}
def test_check_output(self):
self.check_output()
def test_check_grad(self):
return
def setUp(self):
self.op_type = "prior_box"
self.set_data()
def init_test_params(self):
self.layer_w = 4
self.layer_h = 4
self.image_w = 20
self.image_h = 20
self.step_w = float(self.image_w) / float(self.layer_w)
self.step_h = float(self.image_h) / float(self.layer_h)
self.input_channels = 2
self.image_channels = 3
self.batch_size = 10
self.min_sizes = [2, 4]
self.min_sizes = np.array(self.min_sizes).astype('int64')
self.max_sizes = [5, 10]
self.max_sizes = np.array(self.max_sizes).astype('int64')
self.aspect_ratios = [2.0, 3.0]
self.flip = True
self.real_aspect_ratios = [1, 2.0, 1.0 / 2.0, 3.0, 1.0 / 3.0]
self.aspect_ratios = np.array(
self.aspect_ratios, dtype=np.float).flatten()
self.variances = [0.1, 0.1, 0.2, 0.2]
self.variances = np.array(self.variances, dtype=np.float).flatten()
self.clip = True
self.num_priors = len(self.real_aspect_ratios) * len(self.min_sizes)
if len(self.max_sizes) > 1:
self.num_priors += len(self.max_sizes)
self.offset = 0.5
def init_test_input(self):
self.image = np.random.random(
(self.batch_size, self.image_channels, self.image_w,
self.image_h)).astype('float32')
self.input = np.random.random(
(self.batch_size, self.input_channels, self.layer_w,
self.layer_h)).astype('float32')
def init_test_output(self):
out_dim = (self.layer_h, self.layer_w, self.num_priors, 4)
out_boxes = np.zeros(out_dim).astype('float32')
out_var = np.zeros(out_dim).astype('float32')
idx = 0
for h in range(self.layer_h):
for w in range(self.layer_w):
c_x = (w + self.offset) * self.step_w
c_y = (h + self.offset) * self.step_h
idx = 0
for s in range(len(self.min_sizes)):
min_size = self.min_sizes[s]
c_w = c_h = min_size / 2.
out_boxes[h, w, idx, :] = [
(c_x - c_w) / self.image_w, (c_y - c_h) / self.image_h,
(c_x + c_w) / self.image_w, (c_y + c_h) / self.image_h
]
idx += 1
if len(self.max_sizes) > 0:
max_size = self.max_sizes[s]
# second prior: aspect_ratio = 1,
c_w = c_h = math.sqrt(min_size * max_size) / 2
out_boxes[h, w, idx, :] = [(c_x - c_w) / self.image_w,
(c_y - c_h) / self.image_h,
(c_x + c_w) / self.image_w,
(c_y + c_h) / self.image_h]
idx += 1
# rest of priors
for r in range(len(self.real_aspect_ratios)):
ar = self.real_aspect_ratios[r]
if math.fabs(ar - 1.) < 1e-6:
continue
c_w = min_size * math.sqrt(ar) / 2
c_h = (min_size / math.sqrt(ar)) / 2
out_boxes[h, w, idx, :] = [(c_x - c_w) / self.image_w,
(c_y - c_h) / self.image_h,
(c_x + c_w) / self.image_w,
(c_y + c_h) / self.image_h]
idx += 1
# clip the prior's coordidate such that it is within[0, 1]
if self.clip:
out_boxes = np.clip(out_boxes, 0.0, 1.0)
# set the variance.
out_var = np.tile(self.variances, (self.layer_h, self.layer_w,
self.num_priors, 1))
self.out_boxes = out_boxes.astype('float32')
self.out_var = out_var.astype('float32')
if __name__ == '__main__':
unittest.main()
...@@ -319,11 +319,11 @@ def simple_transform(im, ...@@ -319,11 +319,11 @@ def simple_transform(im,
""" """
im = resize_short(im, resize_size) im = resize_short(im, resize_size)
if is_train: if is_train:
im = random_crop(im, crop_size) im = random_crop(im, crop_size, is_color=is_color)
if np.random.randint(2) == 0: if np.random.randint(2) == 0:
im = left_right_flip(im) im = left_right_flip(im)
else: else:
im = center_crop(im, crop_size) im = center_crop(im, crop_size, is_color=is_color)
if len(im.shape) == 3: if len(im.shape) == 3:
im = to_chw(im) im = to_chw(im)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册