提交 62bad2b4 编写于 作者: Y yuyang18

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into pr/11489

...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
| jczaja | Jacek Czaja | | jczaja | Jacek Czaja |
| JiayiFeng | Jia-Yi Feng | | JiayiFeng | Jia-Yi Feng |
| kbinias | Krzysztof Binias | | kbinias | Krzysztof Binias |
| kexinzhao | Ke-Xin Zhao |
| kuke | Yi-Bing Liu | | kuke | Yi-Bing Liu |
| lcy-seso | Ying Cao | | lcy-seso | Ying Cao |
| lipeng-unisound | Peng Li | | lipeng-unisound | Peng Li |
......
...@@ -173,21 +173,6 @@ def seq_to_seq_net(embedding_dim, encoder_size, decoder_size, source_dict_dim, ...@@ -173,21 +173,6 @@ def seq_to_seq_net(embedding_dim, encoder_size, decoder_size, source_dict_dim,
return avg_cost, feeding_list return avg_cost, feeding_list
def to_lodtensor(data, place):
seq_lens = [len(seq) for seq in data]
cur_len = 0
lod = [cur_len]
for l in seq_lens:
cur_len += l
lod.append(cur_len)
flattened_data = np.concatenate(data, axis=0).astype("int64")
flattened_data = flattened_data.reshape([len(flattened_data), 1])
lod_t = core.LoDTensor()
lod_t.set(flattened_data, place)
lod_t.set_lod([lod])
return lod_t, lod[-1]
def lodtensor_to_ndarray(lod_tensor): def lodtensor_to_ndarray(lod_tensor):
dims = lod_tensor.get_dims() dims = lod_tensor.get_dims()
ndarray = np.zeros(shape=dims).astype('float32') ndarray = np.zeros(shape=dims).astype('float32')
......
...@@ -125,18 +125,3 @@ def get_model(args): ...@@ -125,18 +125,3 @@ def get_model(args):
batch_size=args.batch_size) batch_size=args.batch_size)
return loss, inference_program, adam, train_reader, test_reader, batch_acc return loss, inference_program, adam, train_reader, test_reader, batch_acc
def to_lodtensor(data, place):
seq_lens = [len(seq) for seq in data]
cur_len = 0
lod = [cur_len]
for l in seq_lens:
cur_len += l
lod.append(cur_len)
flattened_data = numpy.concatenate(data, axis=0).astype("int64")
flattened_data = flattened_data.reshape([len(flattened_data), 1])
res = fluid.LoDTensor()
res.set(flattened_data, place)
res.set_lod([lod])
return res
...@@ -155,6 +155,15 @@ copy(inference_lib DEPS paddle_fluid_shared paddle_fluid ...@@ -155,6 +155,15 @@ copy(inference_lib DEPS paddle_fluid_shared paddle_fluid
DSTS ${dst_dir}/${module} ${dst_dir}/${module} DSTS ${dst_dir}/${module} ${dst_dir}/${module}
) )
if(WITH_CONTRIB)
set(contrib_dst_dir "${FLUID_INSTALL_DIR}/contrib/inference")
copy(contrib_inference_lib DEPS paddle_inference_api
SRCS ${PADDLE_SOURCE_DIR}/paddle/contrib/inference/paddle_inference_api.h
${PADDLE_BINARY_DIR}/paddle/contrib/inference/libpaddle_inference_api.*
DSTS ${contrib_dst_dir} ${contrib_dst_dir}
)
endif()
set(module "platform") set(module "platform")
copy(platform_lib DEPS profiler_py_proto copy(platform_lib DEPS profiler_py_proto
SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/dynload/*.h ${src_dir}/${module}/details/*.h SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/dynload/*.h ${src_dir}/${module}/details/*.h
......
#!/bin/bash #!/bin/bash
python gen_doc.py layers --submodules control_flow device io nn ops tensor detection learning_rate_scheduler > layers.rst python gen_doc.py layers --submodules control_flow device io nn ops tensor detection learning_rate_scheduler metric > layers.rst
for module in data_feeder clip metrics executor initializer io nets optimizer param_attr profiler regularizer for module in data_feeder clip metrics executor initializer io nets optimizer param_attr profiler regularizer
do do
......
...@@ -33,6 +33,13 @@ Xavier ...@@ -33,6 +33,13 @@ Xavier
:members: :members:
:noindex: :noindex:
Bilinear
--------
.. autoclass:: paddle.fluid.initializer.Bilinear
:members:
:noindex:
force_init_on_cpu force_init_on_cpu
----------------- -----------------
...@@ -73,3 +80,10 @@ XavierInitializer ...@@ -73,3 +80,10 @@ XavierInitializer
:members: :members:
:noindex: :noindex:
BilinearInitializer
-------------------
.. autoclass:: paddle.fluid.initializer.BilinearInitializer
:members:
:noindex:
...@@ -59,3 +59,39 @@ get_inference_program ...@@ -59,3 +59,39 @@ get_inference_program
.. autofunction:: paddle.fluid.io.get_inference_program .. autofunction:: paddle.fluid.io.get_inference_program
:noindex: :noindex:
save_checkpoint
---------------
.. autofunction:: paddle.fluid.io.save_checkpoint
:noindex:
load_checkpoint
---------------
.. autofunction:: paddle.fluid.io.load_checkpoint
:noindex:
clean_checkpoint
----------------
.. autofunction:: paddle.fluid.io.clean_checkpoint
:noindex:
load_persist_vars_without_grad
------------------------------
.. autofunction:: paddle.fluid.io.load_persist_vars_without_grad
:noindex:
save_persist_vars_without_grad
------------------------------
.. autofunction:: paddle.fluid.io.save_persist_vars_without_grad
:noindex:
get_latest_checkpoint_serial
----------------------------
.. autofunction:: paddle.fluid.io.get_latest_checkpoint_serial
:noindex:
...@@ -181,6 +181,12 @@ Print ...@@ -181,6 +181,12 @@ Print
.. autofunction:: paddle.fluid.layers.Print .. autofunction:: paddle.fluid.layers.Print
:noindex: :noindex:
is_empty
--------
.. autofunction:: paddle.fluid.layers.is_empty
:noindex:
device device
====== ======
...@@ -219,6 +225,12 @@ Send ...@@ -219,6 +225,12 @@ Send
.. autofunction:: paddle.fluid.layers.Send .. autofunction:: paddle.fluid.layers.Send
:noindex: :noindex:
Recv
----
.. autofunction:: paddle.fluid.layers.Recv
:noindex:
open_recordio_file open_recordio_file
------------------ ------------------
...@@ -255,6 +267,25 @@ double_buffer ...@@ -255,6 +267,25 @@ double_buffer
.. autofunction:: paddle.fluid.layers.double_buffer .. autofunction:: paddle.fluid.layers.double_buffer
:noindex: :noindex:
random_data_generator
---------------------
.. autofunction:: paddle.fluid.layers.random_data_generator
:noindex:
Preprocessor
------------
.. autoclass:: paddle.fluid.layers.Preprocessor
:members:
:noindex:
load
----
.. autofunction:: paddle.fluid.layers.load
:noindex:
nn nn
== ==
...@@ -342,6 +373,12 @@ conv2d ...@@ -342,6 +373,12 @@ conv2d
.. autofunction:: paddle.fluid.layers.conv2d .. autofunction:: paddle.fluid.layers.conv2d
:noindex: :noindex:
conv3d
------
.. autofunction:: paddle.fluid.layers.conv3d
:noindex:
sequence_pool sequence_pool
------------- -------------
...@@ -366,6 +403,12 @@ pool2d ...@@ -366,6 +403,12 @@ pool2d
.. autofunction:: paddle.fluid.layers.pool2d .. autofunction:: paddle.fluid.layers.pool2d
:noindex: :noindex:
pool3d
------
.. autofunction:: paddle.fluid.layers.pool3d
:noindex:
batch_norm batch_norm
---------- ----------
...@@ -384,6 +427,12 @@ conv2d_transpose ...@@ -384,6 +427,12 @@ conv2d_transpose
.. autofunction:: paddle.fluid.layers.conv2d_transpose .. autofunction:: paddle.fluid.layers.conv2d_transpose
:noindex: :noindex:
conv3d_transpose
----------------
.. autofunction:: paddle.fluid.layers.conv3d_transpose
:noindex:
sequence_expand sequence_expand
--------------- ---------------
...@@ -594,6 +643,48 @@ roi_pool ...@@ -594,6 +643,48 @@ roi_pool
.. autofunction:: paddle.fluid.layers.roi_pool .. autofunction:: paddle.fluid.layers.roi_pool
:noindex: :noindex:
dice_loss
---------
.. autofunction:: paddle.fluid.layers.dice_loss
:noindex:
image_resize
------------
.. autofunction:: paddle.fluid.layers.image_resize
:noindex:
image_resize_short
------------------
.. autofunction:: paddle.fluid.layers.image_resize_short
:noindex:
resize_bilinear
---------------
.. autofunction:: paddle.fluid.layers.resize_bilinear
:noindex:
gather
------
.. autofunction:: paddle.fluid.layers.gather
:noindex:
random_crop
-----------
.. autofunction:: paddle.fluid.layers.random_crop
:noindex:
mean_iou
--------
.. autofunction:: paddle.fluid.layers.mean_iou
:noindex:
ops ops
=== ===
...@@ -699,12 +790,6 @@ logical_not ...@@ -699,12 +790,6 @@ logical_not
.. autofunction:: paddle.fluid.layers.logical_not .. autofunction:: paddle.fluid.layers.logical_not
:noindex: :noindex:
uniform_random
--------------
.. autofunction:: paddle.fluid.layers.uniform_random
:noindex:
uniform_random_batch_size_like uniform_random_batch_size_like
------------------------------ ------------------------------
...@@ -723,12 +808,6 @@ gaussian_random_batch_size_like ...@@ -723,12 +808,6 @@ gaussian_random_batch_size_like
.. autofunction:: paddle.fluid.layers.gaussian_random_batch_size_like .. autofunction:: paddle.fluid.layers.gaussian_random_batch_size_like
:noindex: :noindex:
cumsum
------
.. autofunction:: paddle.fluid.layers.cumsum
:noindex:
scatter scatter
------- -------
...@@ -741,6 +820,30 @@ sum ...@@ -741,6 +820,30 @@ sum
.. autofunction:: paddle.fluid.layers.sum .. autofunction:: paddle.fluid.layers.sum
:noindex: :noindex:
slice
-----
.. autofunction:: paddle.fluid.layers.slice
:noindex:
polygon_box_transform
---------------------
.. autofunction:: paddle.fluid.layers.polygon_box_transform
:noindex:
shape
-----
.. autofunction:: paddle.fluid.layers.shape
:noindex:
maxout
------
.. autofunction:: paddle.fluid.layers.maxout
:noindex:
sigmoid sigmoid
------- -------
...@@ -897,18 +1000,6 @@ stanh ...@@ -897,18 +1000,6 @@ stanh
.. autofunction:: paddle.fluid.layers.stanh .. autofunction:: paddle.fluid.layers.stanh
:noindex: :noindex:
hard_shrink
-----------
.. autofunction:: paddle.fluid.layers.hard_shrink
:noindex:
thresholded_relu
----------------
.. autofunction:: paddle.fluid.layers.thresholded_relu
:noindex:
hard_sigmoid hard_sigmoid
------------ ------------
...@@ -921,6 +1012,30 @@ swish ...@@ -921,6 +1012,30 @@ swish
.. autofunction:: paddle.fluid.layers.swish .. autofunction:: paddle.fluid.layers.swish
:noindex: :noindex:
uniform_random
--------------
.. autofunction:: paddle.fluid.layers.uniform_random
:noindex:
hard_shrink
-----------
.. autofunction:: paddle.fluid.layers.hard_shrink
:noindex:
cumsum
------
.. autofunction:: paddle.fluid.layers.cumsum
:noindex:
thresholded_relu
----------------
.. autofunction:: paddle.fluid.layers.thresholded_relu
:noindex:
tensor tensor
====== ======
...@@ -978,6 +1093,18 @@ fill_constant ...@@ -978,6 +1093,18 @@ fill_constant
.. autofunction:: paddle.fluid.layers.fill_constant .. autofunction:: paddle.fluid.layers.fill_constant
:noindex: :noindex:
argmin
------
.. autofunction:: paddle.fluid.layers.argmin
:noindex:
argmax
------
.. autofunction:: paddle.fluid.layers.argmax
:noindex:
ones ones
---- ----
...@@ -993,6 +1120,12 @@ zeros ...@@ -993,6 +1120,12 @@ zeros
detection detection
========= =========
prior_box
---------
.. autofunction:: paddle.fluid.layers.prior_box
:noindex:
multi_box_head multi_box_head
-------------- --------------
...@@ -1080,3 +1213,18 @@ noam_decay ...@@ -1080,3 +1213,18 @@ noam_decay
.. autofunction:: paddle.fluid.layers.noam_decay .. autofunction:: paddle.fluid.layers.noam_decay
:noindex: :noindex:
metric
======
accuracy
--------
.. autofunction:: paddle.fluid.layers.accuracy
:noindex:
auc
---
.. autofunction:: paddle.fluid.layers.auc
:noindex:
...@@ -89,6 +89,13 @@ DecayedAdagradOptimizer ...@@ -89,6 +89,13 @@ DecayedAdagradOptimizer
:members: :members:
:noindex: :noindex:
RMSPropOptimizer
----------------
.. autoclass:: paddle.fluid.optimizer.RMSPropOptimizer
:members:
:noindex:
Adadelta Adadelta
-------- --------
......
...@@ -23,3 +23,15 @@ profiler ...@@ -23,3 +23,15 @@ profiler
.. autofunction:: paddle.fluid.profiler.profiler .. autofunction:: paddle.fluid.profiler.profiler
:noindex: :noindex:
start_profiler
--------------
.. autofunction:: paddle.fluid.profiler.start_profiler
:noindex:
stop_profiler
-------------
.. autofunction:: paddle.fluid.profiler.stop_profiler
:noindex:
...@@ -104,7 +104,7 @@ no changes added to commit (use "git add" and/or "git commit -a") ...@@ -104,7 +104,7 @@ no changes added to commit (use "git add" and/or "git commit -a")
➜ docker run -it -v $(pwd):/paddle paddle:latest-dev bash -c "cd /paddle/build && ctest" ➜ docker run -it -v $(pwd):/paddle paddle:latest-dev bash -c "cd /paddle/build && ctest"
``` ```
关于构建和测试的更多信息,请参见[这篇文档](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/getstarted/build_and_install/docker_install_cn.rst) 关于构建和测试的更多信息,请参见[使用Docker安装运行](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/v2/build_and_install/docker_install_cn.rst)
## 提交(commit) ## 提交(commit)
......
...@@ -406,6 +406,9 @@ void Executor::EnableMKLDNN(const ProgramDesc& program) { ...@@ -406,6 +406,9 @@ void Executor::EnableMKLDNN(const ProgramDesc& program) {
} }
} }
} }
#else
LOG(WARNING)
<< "'MKLDNN' is not supported, Please re-compile with WITH_MKLDNN option";
#endif #endif
} }
......
...@@ -410,5 +410,38 @@ void LoDTensor::MergeLoDTensor( ...@@ -410,5 +410,38 @@ void LoDTensor::MergeLoDTensor(
} }
} }
LoD ConvertToLengthBasedLoD(const LoD &offset_lod) {
LoD length_lod;
length_lod.reserve(offset_lod.size());
for (size_t lvl = 0; lvl < offset_lod.size(); ++lvl) {
std::vector<size_t> level;
if (offset_lod[lvl].size() > 0) {
level.reserve(offset_lod[lvl].size() - 1);
}
for (size_t idx = 0; idx < offset_lod[lvl].size() - 1; ++idx) {
level.push_back(offset_lod[lvl][idx + 1] - offset_lod[lvl][idx]);
}
length_lod.push_back(level);
}
return length_lod;
}
LoD ConvertToOffsetBasedLoD(const LoD &length_lod) {
LoD offset_lod;
offset_lod.reserve(length_lod.size());
for (size_t lvl = 0; lvl < length_lod.size(); ++lvl) {
std::vector<size_t> level;
level.reserve(length_lod[lvl].size() + 1);
size_t tmp = 0;
level.push_back(tmp);
for (size_t idx = 0; idx < length_lod[lvl].size(); ++idx) {
tmp += length_lod[lvl][idx];
level.push_back(tmp);
}
offset_lod.push_back(level);
}
return offset_lod;
}
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -226,5 +226,19 @@ extern void WriteToRecordIO(recordio::Writer* writer, ...@@ -226,5 +226,19 @@ extern void WriteToRecordIO(recordio::Writer* writer,
extern std::vector<LoDTensor> ReadFromRecordIO( extern std::vector<LoDTensor> ReadFromRecordIO(
recordio::Scanner* scanner, const platform::DeviceContext& dev_ctx); recordio::Scanner* scanner, const platform::DeviceContext& dev_ctx);
/*
* Convert between length-based LoD and offset-based LoD.
* The implementation of LoDTensor class use offset-based LoD.
* However, we want to expose the more user-friendly length-based
* LoD to the Python side instead.
*
* Example:
* If offset_lod = [[0, 2, 3],[0, 3, 5, 9]]
* then length_lod = [[2, 1], [3, 2, 4]]
*/
LoD ConvertToLengthBasedLoD(const LoD& offset_lod);
LoD ConvertToOffsetBasedLoD(const LoD& length_lod);
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -228,6 +228,38 @@ TEST(LoD, CheckAbsLoD) { ...@@ -228,6 +228,38 @@ TEST(LoD, CheckAbsLoD) {
ASSERT_FALSE(CheckAbsLoD(abs_lod0)); ASSERT_FALSE(CheckAbsLoD(abs_lod0));
} }
TEST(LoD, ConvertToLengthBasedLoD) {
LoD offset_lod;
offset_lod.push_back(std::vector<size_t>({0, 2}));
offset_lod.push_back(std::vector<size_t>({0, 1, 3}));
offset_lod.push_back(std::vector<size_t>({0, 2, 4, 5}));
LoD length_lod = ConvertToLengthBasedLoD(offset_lod);
LoD expected;
expected.push_back(std::vector<size_t>({2}));
expected.push_back(std::vector<size_t>({1, 2}));
expected.push_back(std::vector<size_t>({2, 2, 1}));
EXPECT_EQ(length_lod, expected);
}
TEST(LoD, ConvertToOffsetBasedLoD) {
LoD length_lod;
length_lod.push_back(std::vector<size_t>({2}));
length_lod.push_back(std::vector<size_t>({1, 2}));
length_lod.push_back(std::vector<size_t>({2, 2, 1}));
LoD offset_lod = ConvertToOffsetBasedLoD(length_lod);
LoD expected;
expected.push_back(std::vector<size_t>({0, 2}));
expected.push_back(std::vector<size_t>({0, 1, 3}));
expected.push_back(std::vector<size_t>({0, 2, 4, 5}));
EXPECT_EQ(offset_lod, expected);
}
template <typename T> template <typename T>
static void TestRecordIO() { static void TestRecordIO() {
LoDTensor tensor; LoDTensor tensor;
......
...@@ -43,48 +43,29 @@ Scope& Scope::NewScope() const { ...@@ -43,48 +43,29 @@ Scope& Scope::NewScope() const {
} }
Variable* Scope::Var(const std::string& name) { Variable* Scope::Var(const std::string& name) {
// acquire the lock when new var under this scope
std::unique_lock<std::mutex> lock(mutex_); std::unique_lock<std::mutex> lock(mutex_);
auto* v = FindVarLocally(name); return VarInternal(name);
if (v != nullptr) return v;
v = new Variable();
vars_[name].reset(v);
VLOG(3) << "Create variable " << name;
v->name_ = &(vars_.find(name)->first);
return v;
} }
Variable* Scope::Var(std::string* name) { Variable* Scope::Var(std::string* name) {
auto var_name = string::Sprintf("%p.%d", this, vars_.size()); std::unique_lock<std::mutex> lock(mutex_);
auto new_name = string::Sprintf("%p.%d", this, vars_.size());
if (name != nullptr) { if (name != nullptr) {
*name = var_name; *name = new_name;
} }
return Var(var_name); return VarInternal(new_name);
} }
Variable* Scope::FindVar(const std::string& name) const { Variable* Scope::FindVar(const std::string& name) const {
// acquire the lock when find var
std::unique_lock<std::mutex> lock(mutex_); std::unique_lock<std::mutex> lock(mutex_);
return FindVarInternal(name); return FindVarInternal(name);
} }
Variable* Scope::FindVarInternal(const std::string& name) const {
auto var = FindVarLocally(name);
if (var != nullptr) {
return var;
}
return (parent_ == nullptr) ? nullptr : parent_->FindVarInternal(name);
}
const Scope* Scope::FindScope(const Variable* var) const { const Scope* Scope::FindScope(const Variable* var) const {
for (auto& kv : vars_) { std::unique_lock<std::mutex> lock(mutex_);
if (kv.second.get() == var) { return FindScopeInternal(var);
return this;
}
}
return (parent_ == nullptr) ? nullptr : parent_->FindScope(var);
} }
void Scope::DropKids() { void Scope::DropKids() {
std::unique_lock<std::mutex> lock(mutex_); std::unique_lock<std::mutex> lock(mutex_);
for (Scope* s : kids_) delete s; for (Scope* s : kids_) delete s;
...@@ -92,6 +73,7 @@ void Scope::DropKids() { ...@@ -92,6 +73,7 @@ void Scope::DropKids() {
} }
std::vector<std::string> Scope::LocalVarNames() const { std::vector<std::string> Scope::LocalVarNames() const {
std::unique_lock<std::mutex> lock(mutex_);
std::vector<std::string> known_vars; std::vector<std::string> known_vars;
known_vars.reserve(this->vars_.size()); known_vars.reserve(this->vars_.size());
for (auto& p : vars_) { for (auto& p : vars_) {
...@@ -127,6 +109,39 @@ void Scope::EraseVars(const std::vector<std::string>& var_names) { ...@@ -127,6 +109,39 @@ void Scope::EraseVars(const std::vector<std::string>& var_names) {
void Scope::Rename(const std::string& origin_name, void Scope::Rename(const std::string& origin_name,
const std::string& new_name) const { const std::string& new_name) const {
std::unique_lock<std::mutex> lock(mutex_);
RenameInternal(origin_name, new_name);
}
std::string Scope::Rename(const std::string& origin_name) const {
std::unique_lock<std::mutex> lock(mutex_);
auto new_name = string::Sprintf("%p.%d", this, vars_.size());
RenameInternal(origin_name, new_name);
return new_name;
}
Variable* Scope::VarInternal(const std::string& name) {
auto* v = FindVarLocally(name);
if (v != nullptr) return v;
v = new Variable();
vars_[name].reset(v);
VLOG(3) << "Create variable " << name;
v->name_ = &(vars_.find(name)->first);
return v;
}
const Scope* Scope::FindScopeInternal(const Variable* var) const {
for (auto& kv : vars_) {
if (kv.second.get() == var) {
return this;
}
}
return (parent_ == nullptr) ? nullptr : parent_->FindScope(var);
}
void Scope::RenameInternal(const std::string& origin_name,
const std::string& new_name) const {
auto origin_it = vars_.find(origin_name); auto origin_it = vars_.find(origin_name);
PADDLE_ENFORCE(origin_it != vars_.end(), PADDLE_ENFORCE(origin_it != vars_.end(),
"Cannot find original variable with name %s", origin_name); "Cannot find original variable with name %s", origin_name);
...@@ -137,10 +152,12 @@ void Scope::Rename(const std::string& origin_name, ...@@ -137,10 +152,12 @@ void Scope::Rename(const std::string& origin_name,
vars_.erase(origin_it); vars_.erase(origin_it);
} }
std::string Scope::Rename(const std::string& origin_name) const { Variable* Scope::FindVarInternal(const std::string& name) const {
auto var_name = string::Sprintf("%p.%d", this, vars_.size()); auto var = FindVarLocally(name);
Rename(origin_name, var_name); if (var != nullptr) {
return var_name; return var;
}
return (parent_ == nullptr) ? nullptr : parent_->FindVar(name);
} }
Variable* Scope::FindVarLocally(const std::string& name) const { Variable* Scope::FindVarLocally(const std::string& name) const {
......
...@@ -88,12 +88,20 @@ class Scope { ...@@ -88,12 +88,20 @@ class Scope {
// Call Scope::NewScope for a sub-scope. // Call Scope::NewScope for a sub-scope.
explicit Scope(Scope const* parent) : parent_(parent) {} explicit Scope(Scope const* parent) : parent_(parent) {}
// Called by Var.
Variable* VarInternal(const std::string& name);
// Called by FindScope.
const Scope* FindScopeInternal(const Variable* var) const;
// Called by Rename.
void RenameInternal(const std::string& origin_name,
const std::string& new_name) const;
// Called by FindVar recursively. // Called by FindVar recursively.
// Caller doesn't own the returned Variable.
Variable* FindVarInternal(const std::string& name) const; Variable* FindVarInternal(const std::string& name) const;
// Called by FindVarInternal and Var. // Called by FindVarInternal and Var.
// Caller doesn't own the returned Variable.
Variable* FindVarLocally(const std::string& name) const; Variable* FindVarLocally(const std::string& name) const;
// Scope in `kids_` are owned by this class. // Scope in `kids_` are owned by this class.
......
...@@ -64,7 +64,8 @@ class OpConverter { ...@@ -64,7 +64,8 @@ class OpConverter {
(*it)(op, scope, test_mode); (*it)(op, scope, test_mode);
} }
// convert fluid block to tensorrt network // Convert a fluid block to tensorrt network, NOTE it just convert operators,
// the INetwork's inputs and outputs should specified in some other modules.
void ConvertBlock(const framework::proto::BlockDesc& block, void ConvertBlock(const framework::proto::BlockDesc& block,
const std::unordered_set<std::string>& parameters, const std::unordered_set<std::string>& parameters,
const framework::Scope& scope, TensorRTEngine* engine) { const framework::Scope& scope, TensorRTEngine* engine) {
......
...@@ -51,11 +51,12 @@ class TensorRTEngine : public EngineBase { ...@@ -51,11 +51,12 @@ class TensorRTEngine : public EngineBase {
nvinfer1::Weights w_; nvinfer1::Weights w_;
}; };
TensorRTEngine(int max_batch, int max_workspace, cudaStream_t* stream, TensorRTEngine(int max_batch, int max_workspace,
cudaStream_t* stream = nullptr,
nvinfer1::ILogger& logger = NaiveLogger::Global()) nvinfer1::ILogger& logger = NaiveLogger::Global())
: max_batch_(max_batch), : max_batch_(max_batch),
max_workspace_(max_workspace), max_workspace_(max_workspace),
stream_(stream), stream_(stream ? stream : &default_stream_),
logger_(logger) {} logger_(logger) {}
virtual ~TensorRTEngine(); virtual ~TensorRTEngine();
...@@ -121,6 +122,8 @@ class TensorRTEngine : public EngineBase { ...@@ -121,6 +122,8 @@ class TensorRTEngine : public EngineBase {
// the max memory size the engine uses // the max memory size the engine uses
int max_workspace_; int max_workspace_;
cudaStream_t* stream_; cudaStream_t* stream_;
// If stream_ is not set from outside, hold its own stream.
cudaStream_t default_stream_;
nvinfer1::ILogger& logger_; nvinfer1::ILogger& logger_;
std::vector<Buffer> buffers_; std::vector<Buffer> buffers_;
...@@ -165,20 +168,31 @@ class TensorRTEngine : public EngineBase { ...@@ -165,20 +168,31 @@ class TensorRTEngine : public EngineBase {
*/ */
class TRT_EngineManager { class TRT_EngineManager {
public: public:
TensorRTEngine* Create(int max_batch, int max_workspace, bool HasEngine(const std::string& name) const {
cudaStream_t* stream) { return engines_.count(name) != 0;
engines_.emplace_back(new TensorRTEngine(max_batch, max_workspace, stream)); }
return engines_.back().get();
// Get an engine called `name`.
TensorRTEngine* Get(const std::string& name) const {
return engines_.at(name).get();
}
// Create or get an engine called `name`
TensorRTEngine* Create(int max_batch, int max_workspace, cudaStream_t* stream,
const std::string& name) {
auto* p = new TensorRTEngine(max_batch, max_workspace, stream);
engines_[name].reset(p);
return p;
} }
void DeleteALl() { void DeleteALl() {
for (auto& ptr : engines_) { for (auto& item : engines_) {
ptr.reset(nullptr); item.second.reset(nullptr);
} }
} }
private: private:
std::vector<std::unique_ptr<TensorRTEngine>> engines_; std::unordered_map<std::string, std::unique_ptr<TensorRTEngine>> engines_;
}; };
} // namespace tensorrt } // namespace tensorrt
......
...@@ -29,6 +29,7 @@ DEFINE_string(data_file, "", "File of input index data."); ...@@ -29,6 +29,7 @@ DEFINE_string(data_file, "", "File of input index data.");
DEFINE_int32(repeat, 100, "Running the inference program repeat times"); DEFINE_int32(repeat, 100, "Running the inference program repeat times");
DEFINE_bool(prepare_vars, true, "Prepare variables before executor"); DEFINE_bool(prepare_vars, true, "Prepare variables before executor");
DEFINE_int32(num_threads, 1, "Number of threads should be used"); DEFINE_int32(num_threads, 1, "Number of threads should be used");
DECLARE_bool(use_mkldnn);
inline double GetCurrentMs() { inline double GetCurrentMs() {
struct timeval time; struct timeval time;
...@@ -103,9 +104,9 @@ void ThreadRunInfer( ...@@ -103,9 +104,9 @@ void ThreadRunInfer(
const int tid, paddle::framework::Scope* scope, const int tid, paddle::framework::Scope* scope,
const std::vector<std::vector<const paddle::framework::LoDTensor*>>& jobs) { const std::vector<std::vector<const paddle::framework::LoDTensor*>>& jobs) {
// maybe framework:ProgramDesc is not thread-safe // maybe framework:ProgramDesc is not thread-safe
paddle::platform::CPUPlace place;
paddle::framework::Executor executor(place);
auto& sub_scope = scope->NewScope(); auto& sub_scope = scope->NewScope();
auto place = paddle::platform::CPUPlace();
auto executor = paddle::framework::Executor(place);
auto inference_program = auto inference_program =
paddle::inference::Load(&executor, scope, FLAGS_model_path); paddle::inference::Load(&executor, scope, FLAGS_model_path);
...@@ -182,8 +183,8 @@ TEST(inference, nlp) { ...@@ -182,8 +183,8 @@ TEST(inference, nlp) {
stop_ms = GetCurrentMs(); stop_ms = GetCurrentMs();
} else { } else {
// 1. Define place, executor, scope // 1. Define place, executor, scope
auto place = paddle::platform::CPUPlace(); paddle::platform::CPUPlace place;
auto executor = paddle::framework::Executor(place); paddle::framework::Executor executor(place);
// 2. Initialize the inference_program and load parameters // 2. Initialize the inference_program and load parameters
std::unique_ptr<paddle::framework::ProgramDesc> inference_program; std::unique_ptr<paddle::framework::ProgramDesc> inference_program;
......
...@@ -27,7 +27,7 @@ namespace operators { ...@@ -27,7 +27,7 @@ namespace operators {
AddInput("X", "Input of " #OP_NAME " operator"); \ AddInput("X", "Input of " #OP_NAME " operator"); \
AddOutput("Out", "Output of " #OP_NAME " operator").Reuse("X"); \ AddOutput("Out", "Output of " #OP_NAME " operator").Reuse("X"); \
AddAttr<bool>("use_mkldnn", \ AddAttr<bool>("use_mkldnn", \
"(bool, default false) Only used in mkldnn kernel") \ "(default false) Only used in mkldnn kernel") \
.SetDefault(false); \ .SetDefault(false); \
AddComment(OP_COMMENT); \ AddComment(OP_COMMENT); \
} \ } \
...@@ -112,7 +112,7 @@ $$out = \frac{1}{1 + e^{-x}}$$ ...@@ -112,7 +112,7 @@ $$out = \frac{1}{1 + e^{-x}}$$
__attribute__((unused)) constexpr char LogSigmoidDoc[] = R"DOC( __attribute__((unused)) constexpr char LogSigmoidDoc[] = R"DOC(
Logsigmoid Activation Operator Logsigmoid Activation Operator
$$out = \log \frac{1}{1 + e^{-x}}$$ $$out = \\log \\frac{1}{1 + e^{-x}}$$
)DOC"; )DOC";
...@@ -196,7 +196,7 @@ $out = [x]$ ...@@ -196,7 +196,7 @@ $out = [x]$
__attribute__((unused)) constexpr char ReciprocalDoc[] = R"DOC( __attribute__((unused)) constexpr char ReciprocalDoc[] = R"DOC(
Reciprocal Activation Operator. Reciprocal Activation Operator.
$$out = \frac{1}{x}$$ $$out = \\frac{1}{x}$$
)DOC"; )DOC";
...@@ -252,15 +252,14 @@ class SoftShrinkOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -252,15 +252,14 @@ class SoftShrinkOpMaker : public framework::OpProtoAndCheckerMaker {
AddOutput("Out", "Output of Softshrink operator"); AddOutput("Out", "Output of Softshrink operator");
AddAttr<float>("lambda", "non-negative offset").SetDefault(0.5f); AddAttr<float>("lambda", "non-negative offset").SetDefault(0.5f);
AddComment(R"DOC( AddComment(R"DOC(
Softshrink Activation Operator. :strong:`Softshrink Activation Operator`
$$ .. math::
out = \begin{cases} out = \begin{cases}
x - \lambda, \text{if } x > \lambda \\ x - \lambda, \text{if } x > \lambda \\
x + \lambda, \text{if } x < -\lambda \\ x + \lambda, \text{if } x < -\lambda \\
0, \text{otherwise} 0, \text{otherwise}
\end{cases} \end{cases}
$$
)DOC"); )DOC");
} }
...@@ -271,18 +270,18 @@ class HardShrinkOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -271,18 +270,18 @@ class HardShrinkOpMaker : public framework::OpProtoAndCheckerMaker {
void Make() override { void Make() override {
AddInput("X", "Input of HardShrink operator"); AddInput("X", "Input of HardShrink operator");
AddOutput("Out", "Output of HardShrink operator"); AddOutput("Out", "Output of HardShrink operator");
AddAttr<float>("threshold", "The value of threshold for HardShrink") AddAttr<float>("threshold",
"The value of threshold for HardShrink. [default: 0.5]")
.SetDefault(0.5f); .SetDefault(0.5f);
AddComment(R"DOC( AddComment(R"DOC(
HardShrink Activation Operator. :strong:`HardShrink activation operator`
$$ .. math::
out = \begin{cases} out = \begin{cases}
x, \text{if } x > \lambda \\ x, \text{if } x > \lambda \\
x, \text{if } x < -\lambda \\ x, \text{if } x < -\lambda \\
0, \text{otherwise} 0, \text{otherwise}
\end{cases} \end{cases}
$$
)DOC"); )DOC");
} }
...@@ -394,18 +393,18 @@ class ThresholdedReluOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -394,18 +393,18 @@ class ThresholdedReluOpMaker : public framework::OpProtoAndCheckerMaker {
void Make() override { void Make() override {
AddInput("X", "Input of ThresholdedRelu operator"); AddInput("X", "Input of ThresholdedRelu operator");
AddOutput("Out", "Output of ThresholdedRelu operator"); AddOutput("Out", "Output of ThresholdedRelu operator");
AddAttr<float>("threshold", "The threshold location of activation") AddAttr<float>("threshold",
"The threshold location of activation. [default 1.0].")
.SetDefault(1.0f); .SetDefault(1.0f);
AddComment(R"DOC( AddComment(R"DOC(
ThresholdedRelu Activation Operator. :strong:`ThresholdedRelu activation operator`
$$ .. math::
out = \begin{cases}
out = \begin{cases}
x, \text{if } x > threshold \\ x, \text{if } x > threshold \\
0, \text{otherwise} 0, \text{otherwise}
\end{cases} \end{cases}
$$
)DOC"); )DOC");
} }
}; };
...@@ -444,7 +443,7 @@ class SwishOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -444,7 +443,7 @@ class SwishOpMaker : public framework::OpProtoAndCheckerMaker {
AddComment(R"DOC( AddComment(R"DOC(
Swish Activation Operator. Swish Activation Operator.
$$out = \frac{x}{1 + e^{- \beta x}}$$ $$out = \\frac{x}{1 + e^{- \beta x}}$$
)DOC"); )DOC");
} }
......
...@@ -23,30 +23,26 @@ class CompareOpProtoMaker : public framework::OpProtoAndCheckerMaker { ...@@ -23,30 +23,26 @@ class CompareOpProtoMaker : public framework::OpProtoAndCheckerMaker {
public: public:
void Make() override { void Make() override {
OpComment comment; OpComment comment;
AddInput("X", AddInput("X", string::Sprintf("the left hand operand of %s operator",
string::Sprintf("(LoDTensor) the left hand operand of %s operator",
comment.type)); comment.type));
AddInput("Y", string::Sprintf( AddInput("Y", string::Sprintf("the right hand operand of %s operator",
"(LoDTensor) the right hand operand of %s operator",
comment.type)); comment.type));
AddAttr<bool>("force_cpu", AddAttr<bool>("force_cpu",
"(bool, default false) Force fill output variable to cpu " "Force fill output variable to cpu "
"memory. Otherwise, fill output variable to the running " "memory. Otherwise, fill output variable to the running "
"device") "device [default true].")
.SetDefault(false); .SetDefault(true);
AddOutput("Out", string::Sprintf( AddOutput("Out", string::Sprintf("n-dim bool tensor. Each element is %s",
"(LoDTensor) n-dim bool tensor. Each element is %s",
comment.equation)); comment.equation));
AddComment(string::Sprintf(R"DOC(%s Operator AddComment(string::Sprintf(R"DOC(
It operates element-wise on X and Y, and returns the Out. Each of them is a It operates element-wise on X and Y, and returns the Out. Each of them is a
N-dim tensor. X and Y could be any type. The each element of the Out tensor is N-dim tensor. X and Y could be any type. The each element of the Out tensor is
calculated by %s calculated by $%s$
)DOC", )DOC",
comment.type, comment.equation)); comment.equation));
AddAttr<int>("axis", AddAttr<int>(
"(int, default -1). The start dimension index " "axis",
"for broadcasting Y onto X.") "The start dimension index for broadcasting Y onto X. [default -1]")
.SetDefault(-1) .SetDefault(-1)
.EqualGreaterThan(-1); .EqualGreaterThan(-1);
} }
......
...@@ -107,7 +107,13 @@ REGISTER_OPERATOR(concat, ops::ConcatOp, ops::ConcatOpMaker, ...@@ -107,7 +107,13 @@ REGISTER_OPERATOR(concat, ops::ConcatOp, ops::ConcatOpMaker,
false> /* set false to disable empty grad */); false> /* set false to disable empty grad */);
REGISTER_OPERATOR(concat_grad, ops::ConcatOpGrad); REGISTER_OPERATOR(concat_grad, ops::ConcatOpGrad);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
concat, ops::ConcatKernel<paddle::platform::CPUDeviceContext, float>); concat, ops::ConcatKernel<paddle::platform::CPUDeviceContext, double>,
ops::ConcatKernel<paddle::platform::CPUDeviceContext, float>,
ops::ConcatKernel<paddle::platform::CPUDeviceContext, int64_t>,
ops::ConcatKernel<paddle::platform::CPUDeviceContext, int>);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
concat_grad, concat_grad,
ops::ConcatGradKernel<paddle::platform::CPUDeviceContext, float>); ops::ConcatGradKernel<paddle::platform::CPUDeviceContext, double>,
ops::ConcatGradKernel<paddle::platform::CPUDeviceContext, float>,
ops::ConcatGradKernel<paddle::platform::CPUDeviceContext, int64_t>,
ops::ConcatGradKernel<paddle::platform::CPUDeviceContext, int>);
...@@ -15,7 +15,13 @@ limitations under the License. */ ...@@ -15,7 +15,13 @@ limitations under the License. */
#include "paddle/fluid/operators/concat_op.h" #include "paddle/fluid/operators/concat_op.h"
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL( REGISTER_OP_CUDA_KERNEL(
concat, ops::ConcatKernel<paddle::platform::CUDADeviceContext, float>); concat, ops::ConcatKernel<paddle::platform::CUDADeviceContext, double>,
ops::ConcatKernel<paddle::platform::CUDADeviceContext, float>,
ops::ConcatKernel<paddle::platform::CUDADeviceContext, int64_t>,
ops::ConcatKernel<paddle::platform::CUDADeviceContext, int>);
REGISTER_OP_CUDA_KERNEL( REGISTER_OP_CUDA_KERNEL(
concat_grad, concat_grad,
ops::ConcatGradKernel<paddle::platform::CUDADeviceContext, float>); ops::ConcatGradKernel<paddle::platform::CUDADeviceContext, double>,
ops::ConcatGradKernel<paddle::platform::CUDADeviceContext, float>,
ops::ConcatGradKernel<paddle::platform::CUDADeviceContext, int64_t>,
ops::ConcatGradKernel<paddle::platform::CUDADeviceContext, int>);
...@@ -52,7 +52,7 @@ static std::vector<int> GetOffsets(const framework::ExecutionContext& ctx) { ...@@ -52,7 +52,7 @@ static std::vector<int> GetOffsets(const framework::ExecutionContext& ctx) {
} else { } else {
res = ctx.Attr<std::vector<int>>("offsets"); res = ctx.Attr<std::vector<int>>("offsets");
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
rank, res.size(), rank, static_cast<int>(res.size()),
"Offsets size should be equal to dimension size of input tensor."); "Offsets size should be equal to dimension size of input tensor.");
} }
return res; return res;
......
...@@ -30,19 +30,19 @@ class CumOp : public framework::OperatorWithKernel { ...@@ -30,19 +30,19 @@ class CumOp : public framework::OperatorWithKernel {
class CumsumOpMaker : public framework::OpProtoAndCheckerMaker { class CumsumOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
void Make() override { void Make() override {
AddInput("X", "Input of Cumsum operator"); AddInput("X", "Input of cumsum operator");
AddOutput("Out", "Output of Cumsum operator"); AddOutput("Out", "Output of cumsum operator");
AddAttr<int>("axis", AddAttr<int>("axis",
"(int, default -1). The dimenstion to accumulate along. " "The dimenstion to accumulate along. -1 means the last "
"-1 means the last dimenstion") "dimenstion [default -1].")
.SetDefault(-1) .SetDefault(-1)
.EqualGreaterThan(-1); .EqualGreaterThan(-1);
AddAttr<bool>("exclusive", AddAttr<bool>("exclusive",
"bool, default false). Whether to perform exclusive cumsum") "Whether to perform exclusive cumsum. [default false].")
.SetDefault(false); .SetDefault(false);
AddAttr<bool>("reverse", AddAttr<bool>("reverse",
"bool, default false). If true, the cumsum is performed in " "If true, the cumsum is performed in the reversed direction. "
"the reversed direction") "[default false].")
.SetDefault(false); .SetDefault(false);
AddComment(R"DOC( AddComment(R"DOC(
The cumulative sum of the elements along a given axis. The cumulative sum of the elements along a given axis.
......
...@@ -106,23 +106,36 @@ class BoxCoderOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -106,23 +106,36 @@ class BoxCoderOpMaker : public framework::OpProtoAndCheckerMaker {
"and M represents the number of deocded boxes."); "and M represents the number of deocded boxes.");
AddComment(R"DOC( AddComment(R"DOC(
Bounding Box Coder Operator.
Bounding Box Coder.
Encode/Decode the target bounding box with the priorbox information. Encode/Decode the target bounding box with the priorbox information.
The Encoding schema described below: The Encoding schema described below:
ox = (tx - px) / pw / pxv
oy = (ty - py) / ph / pyv ox = (tx - px) / pw / pxv
ow = log(abs(tw / pw)) / pwv
oh = log(abs(th / ph)) / phv oy = (ty - py) / ph / pyv
ow = log(abs(tw / pw)) / pwv
oh = log(abs(th / ph)) / phv
The Decoding schema described below: The Decoding schema described below:
ox = (pw * pxv * tx * + px) - tw / 2
oy = (ph * pyv * ty * + py) - th / 2 ox = (pw * pxv * tx * + px) - tw / 2
ow = exp(pwv * tw) * pw + tw / 2
oh = exp(phv * th) * ph + th / 2 oy = (ph * pyv * ty * + py) - th / 2
where tx, ty, tw, th denote the target box's center coordinates, width and
height respectively. Similarly, px, py, pw, ph denote the priorbox's(anchor) ow = exp(pwv * tw) * pw + tw / 2
center coordinates, width and height. pxv, pyv, pwv, phv denote the variance
of the priorbox and ox, oy, ow, oh denote the encoded/decoded coordinates, oh = exp(phv * th) * ph + th / 2
width and height.
where `tx`, `ty`, `tw`, `th` denote the target box's center coordinates, width
and height respectively. Similarly, `px`, `py`, `pw`, `ph` denote the
priorbox's (anchor) center coordinates, width and height. `pxv`, `pyv`, `pwv`,
`phv` denote the variance of the priorbox and `ox`, `oy`, `ow`, `oh` denote the
encoded/decoded coordinates, width and height.
)DOC"); )DOC");
} }
}; };
......
...@@ -15,7 +15,7 @@ limitations under the License. */ ...@@ -15,7 +15,7 @@ limitations under the License. */
#include "paddle/fluid/operators/elementwise_mul_op.h" #include "paddle/fluid/operators/elementwise_mul_op.h"
#include "paddle/fluid/operators/elementwise_op.h" #include "paddle/fluid/operators/elementwise_op.h"
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_ELEMWISE_OP(elementwise_mul, "Mul", "Out = X \\odot\\ Y"); REGISTER_ELEMWISE_OP(elementwise_mul, "Mul", "Out = X \\\\odot Y");
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
elementwise_mul, elementwise_mul,
ops::ElementwiseMulKernel<paddle::platform::CPUDeviceContext, float>, ops::ElementwiseMulKernel<paddle::platform::CPUDeviceContext, float>,
......
...@@ -36,11 +36,12 @@ class GaussianRandomBatchSizeLikeOpMaker : public BatchSizeLikeOpMaker { ...@@ -36,11 +36,12 @@ class GaussianRandomBatchSizeLikeOpMaker : public BatchSizeLikeOpMaker {
void Apply() override { void Apply() override {
AddAttr<float>("mean", AddAttr<float>("mean",
"(float, default 0.0) " "(float, default 0.0) "
"mean of random tensor.") "The mean (or center) of the gaussian distribution.")
.SetDefault(.0f); .SetDefault(.0f);
AddAttr<float>("std", AddAttr<float>("std",
"(float, default 1.0) " "(float, default 1.0) "
"std of random tensor.") "The standard deviation (std, or spread) of the "
"gaussian distribution.")
.SetDefault(1.0f); .SetDefault(1.0f);
AddAttr<int>("seed", AddAttr<int>("seed",
"(int, default 0) " "(int, default 0) "
...@@ -55,9 +56,11 @@ class GaussianRandomBatchSizeLikeOpMaker : public BatchSizeLikeOpMaker { ...@@ -55,9 +56,11 @@ class GaussianRandomBatchSizeLikeOpMaker : public BatchSizeLikeOpMaker {
.SetDefault(framework::proto::VarType::FP32); .SetDefault(framework::proto::VarType::FP32);
AddComment(R"DOC( AddComment(R"DOC(
GaussianRandom Operator.
Used to initialize tensors with gaussian random generator. Used to initialize tensors with gaussian random generator.
The defalut mean of the distribution is 0. and defalut standard
deviation (std) of the distribution is 1.. Uers can set mean and std
by input arguments.
)DOC"); )DOC");
} }
}; };
......
...@@ -85,7 +85,7 @@ class GetPlacesOpProtoMaker : public framework::OpProtoAndCheckerMaker { ...@@ -85,7 +85,7 @@ class GetPlacesOpProtoMaker : public framework::OpProtoAndCheckerMaker {
.InEnum({"CUDA", "CPU", "AUTO"}) .InEnum({"CUDA", "CPU", "AUTO"})
.SetDefault("AUTO"); .SetDefault("AUTO");
AddComment(R"DOC( AddComment(R"DOC(
Returns a list of places based on flags. The list will be used for parallel Returns a list of places based on arguments. The list will be used for parallel
execution. execution.
)DOC"); )DOC");
} }
......
...@@ -62,36 +62,33 @@ class LayerNormOp : public framework::OperatorWithKernel { ...@@ -62,36 +62,33 @@ class LayerNormOp : public framework::OperatorWithKernel {
class LayerNormOpMaker : public framework::OpProtoAndCheckerMaker { class LayerNormOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
void Make() override { void Make() override {
AddInput("X", "(LoDTensor) The input tensor."); AddInput("X", "The input tensor.");
AddInput("Scale", AddInput("Scale",
"(Tensor, optional) Scale is a 1-dimensional tensor of size " "(optional) Scale is a 1-dimensional tensor of size "
"H(`begin_norm_axis` splits the tensor(`X`) to a matrix [N,H])." "H(`begin_norm_axis` splits the tensor(`X`) to a matrix [N,H])."
"It is applied to the output.") "It is applied to the output.")
.AsDispensable(); .AsDispensable();
AddInput("Bias", AddInput("Bias",
"(Tensor, optional) Bias is a 1-dimensional tensor of size " "(optional) Bias is a 1-dimensional tensor of size "
"H(`begin_norm_axis` splits the tensor(`X`) to a matrix [N,H])." "H(`begin_norm_axis` splits the tensor(`X`) to a matrix [N,H])."
"It is applied to the output.") "It is applied to the output.")
.AsDispensable(); .AsDispensable();
AddOutput("Y", "(LoDTensor) Result after normalization."); AddOutput("Y", "Result after normalization.");
AddOutput("Mean", "(Tensor) Mean of the current mini batch.") AddOutput("Mean", "Mean of the current mini batch.").AsIntermediate();
.AsIntermediate(); AddOutput("Variance", "Variance of the current mini batch.")
AddOutput("Variance", "(Tensor) Variance of the current mini batch.")
.AsIntermediate(); .AsIntermediate();
AddAttr<float>("epsilon", AddAttr<float>("epsilon",
"(float, default 1e-5) Constant for " "Constant for numerical stability [default 1e-5].")
"numerical stability")
.SetDefault(1e-5) .SetDefault(1e-5)
.AddCustomChecker([](const float &epsilon) { .AddCustomChecker([](const float &epsilon) {
PADDLE_ENFORCE(epsilon >= 0.0f && epsilon <= 0.001f, PADDLE_ENFORCE(epsilon >= 0.0f && epsilon <= 0.001f,
"'epsilon' should be between 0.0 and 0.001."); "'epsilon' should be between 0.0 and 0.001.");
}); });
AddAttr<int>("begin_norm_axis", AddAttr<int>("begin_norm_axis",
"(int default:1), the " "the axis of `begin_norm_axis ... Rank(X) - 1` will be "
"axis of `begin_norm_axis ... Rank(X) - 1` will be "
"normalized. `begin_norm_axis` splits the tensor(`X`) to a " "normalized. `begin_norm_axis` splits the tensor(`X`) to a "
"matrix [N,H].") "matrix [N,H]. [default 1].")
.SetDefault(1) .SetDefault(1)
.AddCustomChecker([](const int &begin_norm_axis) { .AddCustomChecker([](const int &begin_norm_axis) {
PADDLE_ENFORCE_GT(begin_norm_axis, 0, PADDLE_ENFORCE_GT(begin_norm_axis, 0,
...@@ -99,10 +96,14 @@ class LayerNormOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -99,10 +96,14 @@ class LayerNormOpMaker : public framework::OpProtoAndCheckerMaker {
}); });
AddComment(R"DOC( AddComment(R"DOC(
Layer Normalization. Assume feature vectors exist on dimensions
Layer Norm has been implemented as discussed in the paper: :attr:`begin_norm_axis ... rank(input)` and calculate the moment statistics
https://arxiv.org/abs/1607.06450 along these dimensions for each feature vector :math:`a` with size
... :math:`H`, then normalize each feature vector using the corresponding
statistics. After that, apply learnable gain and bias on the normalized
tensor to scale and shift if :attr:`scale` and :attr:`shift` are set.
Refer to `Layer Normalization <https://arxiv.org/pdf/1607.06450v1.pdf>`_
)DOC"); )DOC");
} }
}; };
......
...@@ -348,7 +348,8 @@ class ListenAndServOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -348,7 +348,8 @@ class ListenAndServOpMaker : public framework::OpProtoAndCheckerMaker {
}; };
void SignalHandler::StopAndExit(int signal_num) { void SignalHandler::StopAndExit(int signal_num) {
VLOG(3) << "Catch interrupt signal: " << signal_num << ", program will exit"; // Do not use VLOG here for the device for printing maybe already released.
// exit will release interal allocated resoureces.
exit(0); exit(0);
} }
......
...@@ -33,12 +33,10 @@ class MeanOp : public framework::OperatorWithKernel { ...@@ -33,12 +33,10 @@ class MeanOp : public framework::OperatorWithKernel {
class MeanOpMaker : public framework::OpProtoAndCheckerMaker { class MeanOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
void Make() override { void Make() override {
AddInput("X", "The input of mean op"); AddInput("X", "(Tensor) The input of mean op");
AddOutput("Out", "The output of mean op").Reuse("X"); AddOutput("Out", "(Tensor) The output of mean op").Reuse("X");
AddComment(R"DOC( AddComment(R"DOC(
Mean Operator. Mean Operator calculates the mean of all elements in X.
Out is a scalar which is the mean of all elements in X.
)DOC"); )DOC");
} }
......
...@@ -62,26 +62,46 @@ class MultiplexOp : public framework::OperatorWithKernel { ...@@ -62,26 +62,46 @@ class MultiplexOp : public framework::OperatorWithKernel {
class MultiplexOpMaker : public framework::OpProtoAndCheckerMaker { class MultiplexOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
void Make() override { void Make() override {
AddInput("Ids", "The index tensor of multiplex operator."); AddInput("Ids",
AddInput("X", "The candidate tensors of multiplex operator.") "Tensor<int32>, index variable which is a 2-D tensor with shape "
"[M, 1] where M is the batch size.");
AddInput("X",
"A list of variables to gather from. All variables have the same "
"shape and the rank is at least 2.")
.AsDuplicable(); .AsDuplicable();
AddOutput("Out", "The output tensor of multiplex operator."); AddOutput("Out", "The output tensor of multiplex operator.");
AddComment(R"DOC( AddComment(R"DOC(
Multiplex Operator. Referring to the given index variable, this layer selects rows from the
input variables to construct a multiplex variable. Assuming that there are
Multiplex multiple tensors according to the index provided by the index tensor. :math:`m` input variables and :math:`I_i` represents the i-th input
variable and :math:`i` is in [0, :math:`m`). All input variables are
Ids: the index tensor. tensors with same shape [:math:`d_0`, :math:`d_1`, ..., :math:`d_R`].
X[0 : N - 1]: the candidate tensors for output (N >= 2). Please note that rank of the input tensor should be at least 2. Each input
For each index i from 0 to batchSize - 1, the output is the i-th row of the variable will be treated as a 2-D matrix with shape [:math:`M`, :math:`N`]
where :math:`M` for :math:`d_0` and :math:`N` for :math:`d_1` * :math:`d_2`
* ... * :math:`d_R`. Let :math:`I_i[j]` be the j-th row of the i-th input
variable. The given index variable should be a 2-D tensor with shape
[:math:`M`, 1]. Let `ID[i]` be the i-th index value of the index variable.
Then the output variable will be a tensor with shape [:math:`d_0`,
:math:`d_1`, ..., :math:`d_R`]. If we treat the output tensor as a 2-D
matrix with shape [:math:`M`, :math:`N`] and let :math:`O[i]` be the i-th
row of the matrix, then `O[i]` is equal to :math:`I_{ID[i]}[i]`.
* Ids: the index tensor.
* X[0 : N - 1]: the candidate tensors for output (N >= 2).
* For each index i from 0 to batchSize - 1, the output is the i-th row of the
the (Ids[i])-th tensor. the (Ids[i])-th tensor.
For i-th row of the output tensor: For i-th row of the output tensor:
$$y[i] = x_{k}[i]$$ $$
y[i] = x_{k}[i]
$$
where `y` is the output tensor, `x_{k}` is the k-th input tensor, where $y$ is the output tensor, $x_{k}$ is the k-th input tensor,
and `k = Ids[i]`. and $k = Ids[i]$.
)DOC"); )DOC");
} }
......
...@@ -128,8 +128,10 @@ class NCEOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -128,8 +128,10 @@ class NCEOpMaker : public framework::OpProtoAndCheckerMaker {
"user should avoid setting this attribute.") "user should avoid setting this attribute.")
.SetDefault({}); .SetDefault({});
AddComment(R"DOC( AddComment(R"DOC(
Compute and return the noise-contrastive estimation training loss. Compute and return the noise-contrastive estimation training loss. See
See [Noise-contrastive estimation: A new estimation principle for unnormalized statistical models](http://www.jmlr.org/proceedings/papers/v9/gutmann10a/gutmann10a.pdf). `Noise-contrastive estimation: A new estimation principle for unnormalized
statistical models
<http://www.jmlr.org/proceedings/papers/v9/gutmann10a/gutmann10a.pdf>`_.
By default this operator uses a uniform distribution for sampling. By default this operator uses a uniform distribution for sampling.
)DOC"); )DOC");
} }
......
...@@ -204,8 +204,6 @@ void Pool2dOpMaker::Make() { ...@@ -204,8 +204,6 @@ void Pool2dOpMaker::Make() {
// TODO(dzhwinter): need to registered layout transform function // TODO(dzhwinter): need to registered layout transform function
AddComment(R"DOC( AddComment(R"DOC(
Pool2d Operator.
The pooling2d operation calculates the output based on The pooling2d operation calculates the output based on
the input, pooling_type and ksize, strides, paddings parameters. the input, pooling_type and ksize, strides, paddings parameters.
Input(X) and output(Out) are in NCHW format, where N is batch size, C is the Input(X) and output(Out) are in NCHW format, where N is batch size, C is the
...@@ -215,19 +213,28 @@ These two elements represent height and width, respectively. ...@@ -215,19 +213,28 @@ These two elements represent height and width, respectively.
The input(X) size and output(Out) size may be different. The input(X) size and output(Out) size may be different.
Example: Example:
Input: Input:
X shape: $(N, C, H_{in}, W_{in})$ X shape: $(N, C, H_{in}, W_{in})$
Output: Output:
Out shape: $(N, C, H_{out}, W_{out})$ Out shape: $(N, C, H_{out}, W_{out})$
For ceil_mode = false: For ceil_mode = false:
$$ $$
H_{out} = \frac{(H_{in} - ksize[0] + 2 * paddings[0])}{strides[0]} + 1 \\ H_{out} = \\frac{(H_{in} - ksize[0] + 2 * paddings[0])}{strides[0]} + 1
W_{out} = \frac{(W_{in} - ksize[1] + 2 * paddings[1])}{strides[1]} + 1 $$
$$
W_{out} = \\frac{(W_{in} - ksize[1] + 2 * paddings[1])}{strides[1]} + 1
$$ $$
For ceil_mode = true: For ceil_mode = true:
$$ $$
H_{out} = \frac{(H_{in} - ksize[0] + 2 * paddings[0] + strides[0] - 1)}{strides[0]} + 1 \\ H_{out} = \\frac{(H_{in} - ksize[0] + 2 * paddings[0] + strides[0] - 1)}{strides[0]} + 1
W_{out} = \frac{(W_{in} - ksize[1] + 2 * paddings[1] + strides[1] - 1)}{strides[1]} + 1 $$
$$
W_{out} = \\frac{(W_{in} - ksize[1] + 2 * paddings[1] + strides[1] - 1)}{strides[1]} + 1
$$ $$
)DOC"); )DOC");
......
...@@ -78,11 +78,15 @@ class CreateRecordIOReaderOp : public framework::OperatorBase { ...@@ -78,11 +78,15 @@ class CreateRecordIOReaderOp : public framework::OperatorBase {
class CreateRecordIOReaderOpMaker : public FileReaderMakerBase { class CreateRecordIOReaderOpMaker : public FileReaderMakerBase {
protected: protected:
void Apply() override { void Apply() override {
AddAttr<std::string>("filename", "The filename of record io reader"); AddAttr<std::string>(
"filename",
"The filename of record file. This file will given to reader.");
AddComment(R"DOC( AddComment(R"DOC(
CreateRecordIOReader Operator Open a recordio file and return the reader object. The returned reader object
is thread-safe.
Create a reader from a record io file NOTE: This is a very low-level API. It is used for debugging data file or
training. Please use `open_files` instead of this API for production usage.
)DOC"); )DOC");
} }
}; };
......
...@@ -54,7 +54,7 @@ std::unique_ptr<framework::ReaderBase> CreateReaderByFileName( ...@@ -54,7 +54,7 @@ std::unique_ptr<framework::ReaderBase> CreateReaderByFileName(
} }
void FileReaderMakerBase::Make() { void FileReaderMakerBase::Make() {
AddOutput("Out", "(ReaderHolder) The created random reader.").AsDuplicable(); AddOutput("Out", "(ReaderHolder): The created random reader.").AsDuplicable();
AddAttr<std::vector<int>>("shape_concat", "The concat of all data's shapes."); AddAttr<std::vector<int>>("shape_concat", "The concat of all data's shapes.");
AddAttr<std::vector<int>>( AddAttr<std::vector<int>>(
"ranks", "ranks",
......
...@@ -78,23 +78,23 @@ class RowConvOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -78,23 +78,23 @@ class RowConvOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
void Make() override { void Make() override {
AddInput("X", AddInput("X",
"(LoDTensor), the input(X) is a LodTensor, which supports " "the input(X) is a LodTensor, which supports "
"variable time-length input sequences. The underlying tensor " "variable time-length input sequences. The underlying tensor "
"in this LoDTensor is a matrix with shape (T x N), where T " "in this LoDTensor is a matrix with shape (T x N), where T "
"is the total time steps in this mini-batch and N is the input " "is the total time steps in this mini-batch and N is the input "
"data dimension."); "data dimension.");
AddInput("Filter", AddInput("Filter",
"(Tensor), the input(Filter) is a learnable parameter. It " "the input(Filter) is a learnable parameter. It "
"is a 2-D tensor with shape (future_context x N), where, " "is a 2-D tensor with shape (future_context x N), where, "
"future_context is the future context length and N is the data " "future_context is the future context length and N is the data "
"dimension."); "dimension.");
AddOutput("Out", AddOutput("Out",
"(LoDTensor), the output(Out) is a LodTensor, which supports " "the output(Out) is a LodTensor, which supports "
"variable time-length input sequences. The underlying tensor " "variable time-length input sequences. The underlying tensor "
"in this LodTensor is a matrix with shape T x N, i.e., the " "in this LodTensor is a matrix with shape T x N, i.e., the "
"same shape as X."); "same shape as X.");
AddComment(R"DOC( AddComment(R"DOC(
Row-convolution Operator. :strong:`Row-convolution operator`
The row convolution is called lookahead convolution. This operator was The row convolution is called lookahead convolution. This operator was
introduced in the following paper for DeepSpeech2: introduced in the following paper for DeepSpeech2:
...@@ -114,9 +114,23 @@ and a filter ($W$) of size $context \times d$, ...@@ -114,9 +114,23 @@ and a filter ($W$) of size $context \times d$,
the output sequence is convolved as: the output sequence is convolved as:
$$ $$
out_{i, :} = \sum_{j=i}^{i + context} in_{j,:} \dot W_{i-j, :} out_{i, :} = \\sum_{j=i}^{i + context} in_{j,:} \\cdot W_{i-j, :}
$$ $$
In the above equation:
* $Out_{i}$: The i-th row of output variable with shape [1, D].
* $\\tau$: Future context size.
* $X_{j}$: The j-th row of input variable with shape [1, D].
* $W_{i-j}$: The (i-j)-th row of parameters with shape [1, D].
More details about row_conv please refer to
the design document
https://github.com/PaddlePaddle/Paddle/issues/2228#issuecomment-303903645 .
)DOC"); )DOC");
} }
}; };
......
...@@ -95,8 +95,11 @@ of that dimension. If the value passed to start or end is larger than ...@@ -95,8 +95,11 @@ of that dimension. If the value passed to start or end is larger than
the n (the number of elements in this dimension), it represents n. the n (the number of elements in this dimension), it represents n.
For slicing to the end of a dimension with unknown size, it is recommended For slicing to the end of a dimension with unknown size, it is recommended
to pass in INT_MAX. If axes are omitted, they are set to [0, ..., ndim-1]. to pass in INT_MAX. If axes are omitted, they are set to [0, ..., ndim-1].
Following examples will explain how slice works:
Example 1: .. code-block:: text
Cast1:
Given: Given:
data = [ [1, 2, 3, 4], [5, 6, 7, 8], ] data = [ [1, 2, 3, 4], [5, 6, 7, 8], ]
axes = [0, 1] axes = [0, 1]
...@@ -105,7 +108,7 @@ to pass in INT_MAX. If axes are omitted, they are set to [0, ..., ndim-1]. ...@@ -105,7 +108,7 @@ to pass in INT_MAX. If axes are omitted, they are set to [0, ..., ndim-1].
Then: Then:
result = [ [5, 6, 7], ] result = [ [5, 6, 7], ]
Example 2: Cast2:
Given: Given:
data = [ [1, 2, 3, 4], [5, 6, 7, 8], ] data = [ [1, 2, 3, 4], [5, 6, 7, 8], ]
starts = [0, 1] starts = [0, 1]
......
...@@ -115,4 +115,7 @@ USE_CPU_ONLY_OP(concat); ...@@ -115,4 +115,7 @@ USE_CPU_ONLY_OP(concat);
REGISTER_OPERATOR(split, ops::SplitOp, ops::SplitOpMaker, ops::SplitGradMaker); REGISTER_OPERATOR(split, ops::SplitOp, ops::SplitOpMaker, ops::SplitGradMaker);
REGISTER_OP_CPU_KERNEL(split, REGISTER_OP_CPU_KERNEL(split,
ops::SplitOpKernel<paddle::platform::CPUPlace, float>); ops::SplitOpKernel<paddle::platform::CPUPlace, double>,
ops::SplitOpKernel<paddle::platform::CPUPlace, float>,
ops::SplitOpKernel<paddle::platform::CPUPlace, int64_t>,
ops::SplitOpKernel<paddle::platform::CPUPlace, int>);
...@@ -15,4 +15,7 @@ limitations under the License. */ ...@@ -15,4 +15,7 @@ limitations under the License. */
#include "paddle/fluid/operators/split_op.h" #include "paddle/fluid/operators/split_op.h"
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL( REGISTER_OP_CUDA_KERNEL(
split, ops::SplitOpKernel<paddle::platform::CUDADeviceContext, float>); split, ops::SplitOpKernel<paddle::platform::CUDADeviceContext, double>,
ops::SplitOpKernel<paddle::platform::CUDADeviceContext, float>,
ops::SplitOpKernel<paddle::platform::CUDADeviceContext, int64_t>,
ops::SplitOpKernel<paddle::platform::CUDADeviceContext, int>);
...@@ -66,17 +66,25 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector<int64_t> &shape) { ...@@ -66,17 +66,25 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector<int64_t> &shape) {
} // namespace } // namespace
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
void paddle::operators::TensorRTEngineKernel<DeviceContext, T>::Prepare( void TensorRTEngineKernel<DeviceContext, T>::Prepare(
const framework::ExecutionContext &context) const { const framework::ExecutionContext &context) const {
VLOG(4) << "Prepare engine"; VLOG(4) << "Prepare engine";
// Get the ProgramDesc and pass to convert. // Get the ProgramDesc and pass to convert.
framework::proto::BlockDesc block_desc; framework::proto::BlockDesc block_desc;
block_desc.ParseFromString(context.Attr<std::string>("subgraph")); block_desc.ParseFromString(context.Attr<std::string>("subgraph"));
max_batch_ = context.Attr<int>("max_batch"); int max_batch = context.Attr<int>("max_batch");
auto max_workspace = context.Attr<int>("max_workspace"); auto max_workspace = context.Attr<int>("max_workspace");
engine_ = Singleton<TRT_EngineManager>::Global().Create( auto params = context.Attr<std::vector<std::string>>("parameters");
max_batch_, max_workspace, &stream_); std::unordered_set<std::string> parameters;
engine_->InitNetwork(); for (const auto &param : params) {
parameters.insert(param);
}
// TODO(Superjomn) replace this with a different stream
auto *engine = Singleton<TRT_EngineManager>::Global().Create(
max_batch, max_workspace, nullptr /*engine hold its own stream*/,
context.Attr<std::string>("engine_uniq_key"));
engine->InitNetwork();
framework::BlockDesc block(nullptr /*programdesc*/, &block_desc); framework::BlockDesc block(nullptr /*programdesc*/, &block_desc);
// Add inputs // Add inputs
...@@ -87,24 +95,23 @@ void paddle::operators::TensorRTEngineKernel<DeviceContext, T>::Prepare( ...@@ -87,24 +95,23 @@ void paddle::operators::TensorRTEngineKernel<DeviceContext, T>::Prepare(
PADDLE_ENFORCE_EQ(var->GetType(), FluidDT::VarType_Type_LOD_TENSOR, PADDLE_ENFORCE_EQ(var->GetType(), FluidDT::VarType_Type_LOD_TENSOR,
"TensorRT engine only takes LoDTensor as input"); "TensorRT engine only takes LoDTensor as input");
auto shape = var->GetShape(); auto shape = var->GetShape();
engine_->DeclareInput( engine->DeclareInput(
input, FluidDataType2TRT( input, FluidDataType2TRT(
var->Proto()->type().lod_tensor().tensor().data_type()), var->Proto()->type().lod_tensor().tensor().data_type()),
Vec2TRT_Dims(var->GetShape())); Vec2TRT_Dims(var->GetShape()));
} }
// TODO(Superjomn) parameters should be passed after analysised from outside.
inference::Singleton<inference::tensorrt::OpConverter>::Global().ConvertBlock( inference::Singleton<inference::tensorrt::OpConverter>::Global().ConvertBlock(
block_desc, {}, context.scope(), engine_); block_desc, parameters, context.scope(), engine);
// Add outputs // Add outputs
VLOG(4) << "declare outputs"; VLOG(4) << "declare outputs";
for (auto &output : context.Outputs("Ys")) { for (auto &output : context.Outputs("Ys")) {
VLOG(4) << "declare output " << output; VLOG(4) << "declare output " << output;
engine_->DeclareOutput(output); engine->DeclareOutput(output);
} }
engine_->FreezeNetwork(); engine->FreezeNetwork();
} }
class TensorRTEngineOpMaker : public framework::OpProtoAndCheckerMaker { class TensorRTEngineOpMaker : public framework::OpProtoAndCheckerMaker {
...@@ -113,6 +120,7 @@ class TensorRTEngineOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -113,6 +120,7 @@ class TensorRTEngineOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("Xs", "A list of inputs.").AsDuplicable(); AddInput("Xs", "A list of inputs.").AsDuplicable();
AddOutput("Ys", "A list of outputs").AsDuplicable(); AddOutput("Ys", "A list of outputs").AsDuplicable();
AddAttr<std::string>("subgraph", "the subgraph."); AddAttr<std::string>("subgraph", "the subgraph.");
AddAttr<std::string>("engine_uniq_key", "unique key for the TRT engine.");
AddAttr<int>("max_batch", "the maximum batch size."); AddAttr<int>("max_batch", "the maximum batch size.");
AddAttr<int>("max_workspace", "the maximum batch size."); AddAttr<int>("max_workspace", "the maximum batch size.");
AddComment("TensorRT engine operator."); AddComment("TensorRT engine operator.");
......
...@@ -19,10 +19,14 @@ ...@@ -19,10 +19,14 @@
#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/inference/analysis/helper.h" #include "paddle/fluid/inference/analysis/helper.h"
#include "paddle/fluid/inference/tensorrt/engine.h" #include "paddle/fluid/inference/tensorrt/engine.h"
#include "paddle/fluid/inference/tensorrt/engine.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using inference::Singleton;
using inference::tensorrt::TRT_EngineManager;
class TensorRTEngineOp : public framework::OperatorWithKernel { class TensorRTEngineOp : public framework::OperatorWithKernel {
public: public:
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
...@@ -47,16 +51,18 @@ template <typename DeviceContext, typename T> ...@@ -47,16 +51,18 @@ template <typename DeviceContext, typename T>
class TensorRTEngineKernel : public framework::OpKernel<T> { class TensorRTEngineKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
if (!engine_) { auto engine_name = context.Attr<std::string>("engine_uniq_key");
if (!Singleton<TRT_EngineManager>::Global().HasEngine(engine_name)) {
Prepare(context); Prepare(context);
} }
auto* engine = Singleton<TRT_EngineManager>::Global().Get(engine_name);
auto input_names = context.op().Inputs("Xs"); auto input_names = context.op().Inputs("Xs");
PADDLE_ENFORCE(!input_names.empty(), "should pass more than one inputs"); PADDLE_ENFORCE(!input_names.empty(), "should pass more than one inputs");
// Try to determine a batch_size // Try to determine a batch_size
auto& tensor0 = inference::analysis::GetFromScope<framework::LoDTensor>( auto& tensor0 = inference::analysis::GetFromScope<framework::LoDTensor>(
context.scope(), input_names.front()); context.scope(), input_names.front());
int batch_size = tensor0.dims()[0]; int batch_size = tensor0.dims()[0];
PADDLE_ENFORCE_LE(batch_size, max_batch_); PADDLE_ENFORCE_LE(batch_size, context.Attr<int>("max_batch"));
// Convert input tensor from fluid to engine. // Convert input tensor from fluid to engine.
for (const auto& x : context.Inputs("Xs")) { for (const auto& x : context.Inputs("Xs")) {
...@@ -64,20 +70,20 @@ class TensorRTEngineKernel : public framework::OpKernel<T> { ...@@ -64,20 +70,20 @@ class TensorRTEngineKernel : public framework::OpKernel<T> {
auto& t = inference::analysis::GetFromScope<framework::LoDTensor>( auto& t = inference::analysis::GetFromScope<framework::LoDTensor>(
context.scope(), x); context.scope(), x);
if (platform::is_cpu_place(t.place())) { if (platform::is_cpu_place(t.place())) {
engine_->SetInputFromCPU(x, static_cast<const void*>(t.data<void>()), engine->SetInputFromCPU(x, static_cast<const void*>(t.data<void>()),
t.memory_size()); t.memory_size());
} else { } else {
engine_->SetInputFromGPU(x, static_cast<const void*>(t.data<void>()), engine->SetInputFromGPU(x, static_cast<const void*>(t.data<void>()),
t.memory_size()); t.memory_size());
} }
} }
// Execute the engine. // Execute the engine.
PADDLE_ENFORCE_GT(batch_size, 0); PADDLE_ENFORCE_GT(batch_size, 0);
engine_->Execute(batch_size); engine->Execute(batch_size);
// Convert output tensor from engine to fluid // Convert output tensor from engine to fluid
for (const auto& y : context.Outputs("Ys")) { for (const auto& y : context.Outputs("Ys")) {
// convert output and copy to fluid. // convert output and copy to fluid.
nvinfer1::ITensor* trt_t = engine_->GetITensor(y); nvinfer1::ITensor* trt_t = engine->GetITensor(y);
auto dims = trt_t->getDimensions(); auto dims = trt_t->getDimensions();
// Use the output ITensor's dims to reshape the Fluid Tensor. // Use the output ITensor's dims to reshape the Fluid Tensor.
std::vector<int> ddim(dims.d, dims.d + dims.nbDims); std::vector<int> ddim(dims.d, dims.d + dims.nbDims);
...@@ -89,27 +95,22 @@ class TensorRTEngineKernel : public framework::OpKernel<T> { ...@@ -89,27 +95,22 @@ class TensorRTEngineKernel : public framework::OpKernel<T> {
auto size = inference::analysis::AccuDims(dims.d, dims.nbDims); auto size = inference::analysis::AccuDims(dims.d, dims.nbDims);
if (platform::is_cpu_place(fluid_t->place())) { if (platform::is_cpu_place(fluid_t->place())) {
// TODO(Superjomn) change this float to dtype size. // TODO(Superjomn) change this float to dtype size.
engine_->GetOutputInCPU( engine->GetOutputInCPU(
y, fluid_t->mutable_data<float>(platform::CPUPlace()), y, fluid_t->mutable_data<float>(platform::CPUPlace()),
size * sizeof(float)); size * sizeof(float));
} else { } else {
engine_->GetOutputInGPU( engine->GetOutputInGPU(
y, fluid_t->mutable_data<float>(platform::CUDAPlace()), y, fluid_t->mutable_data<float>(platform::CUDAPlace()),
size * sizeof(float)); size * sizeof(float));
} }
} }
cudaStreamSynchronize(stream_); cudaStreamSynchronize(*engine->stream());
} }
protected: protected:
// Build the engine. // Build the engine.
void Prepare(const framework::ExecutionContext& context) const; void Prepare(const framework::ExecutionContext& context) const;
private:
mutable cudaStream_t stream_;
mutable inference::tensorrt::TensorRTEngine* engine_{nullptr};
mutable int max_batch_{0};
}; };
} // namespace operators } // namespace operators
......
...@@ -79,6 +79,17 @@ void SetAttr<int64_t>(framework::proto::OpDesc* op, const std::string& name, ...@@ -79,6 +79,17 @@ void SetAttr<int64_t>(framework::proto::OpDesc* op, const std::string& name,
attr->set_type(paddle::framework::proto::AttrType::LONG); attr->set_type(paddle::framework::proto::AttrType::LONG);
attr->set_l(data); attr->set_l(data);
} }
template <>
void SetAttr<std::vector<std::string>>(framework::proto::OpDesc* op,
const std::string& name,
const std::vector<std::string>& data) {
auto* attr = op->add_attrs();
attr->set_name(name);
attr->set_type(paddle::framework::proto::AttrType::STRINGS);
for (const auto& s : data) {
attr->add_strings(s.c_str());
}
}
} // namespace } // namespace
...@@ -123,11 +134,15 @@ TEST(TensorRTEngineOp, manual) { ...@@ -123,11 +134,15 @@ TEST(TensorRTEngineOp, manual) {
engine_op_desc.SetOutput("Ys", std::vector<std::string>({"z0"})); engine_op_desc.SetOutput("Ys", std::vector<std::string>({"z0"}));
SetAttr<std::string>(engine_op_desc.Proto(), "subgraph", SetAttr<std::string>(engine_op_desc.Proto(), "subgraph",
block_->SerializeAsString()); block_->SerializeAsString());
SetAttr<int>(engine_op_desc.Proto(), "max_batch", 30); SetAttr<int>(engine_op_desc.Proto(), "max_batch", 100);
SetAttr<int>(engine_op_desc.Proto(), "max_workspace", 1 << 10); SetAttr<int>(engine_op_desc.Proto(), "max_workspace", 1 << 10);
SetAttr<std::string>(engine_op_desc.Proto(), "engine_uniq_key", "a_engine");
SetAttr<std::vector<std::string>>(engine_op_desc.Proto(), "parameters",
std::vector<std::string>({}));
LOG(INFO) << "create engine op"; LOG(INFO) << "create engine op";
auto engine_op = framework::OpRegistry::CreateOp(*engine_op_desc.Proto()); auto engine_op = framework::OpRegistry::CreateOp(*engine_op_desc.Proto());
LOG(INFO) << "engine_op " << engine_op.get();
framework::Scope scope; framework::Scope scope;
platform::CPUPlace place; platform::CPUPlace place;
...@@ -145,6 +160,88 @@ TEST(TensorRTEngineOp, manual) { ...@@ -145,6 +160,88 @@ TEST(TensorRTEngineOp, manual) {
engine_op->Run(scope, place); engine_op->Run(scope, place);
} }
void Execute(int batch_size, int input_dim, int output_dim, int nlayers = 1) {
framework::ProgramDesc program;
framework::Scope scope;
platform::CPUPlace place;
platform::CPUDeviceContext ctx(place);
auto* block_ = program.Proto()->add_blocks();
block_->set_idx(0);
block_->set_parent_idx(-1);
using shape_t = std::vector<int64_t>;
LOG(INFO) << "create block desc";
framework::BlockDesc block_desc(&program, block_);
auto AddFCLayer = [&](const std::string& x_name, const std::string& y_name,
const std::string& z_name, bool x_created,
const shape_t& x_shape, const shape_t& y_shape,
const shape_t& z_shape) {
LOG(INFO) << "create fc op";
auto* fc = block_desc.AppendOp();
fc->SetType("mul");
fc->SetInput("X", std::vector<std::string>({x_name}));
fc->SetInput("Y", std::vector<std::string>({y_name}));
fc->SetOutput("Out", std::vector<std::string>({z_name}));
// Set inputs' variable shape in BlockDesc
if (!x_created) {
AddTensorToBlockDesc(block_, x_name,
std::vector<int64_t>({batch_size, input_dim, 1, 1}));
}
AddTensorToBlockDesc(block_, y_name,
std::vector<int64_t>({input_dim, output_dim}));
AddTensorToBlockDesc(block_, z_name,
std::vector<int64_t>({batch_size, output_dim}));
// Prepare variables.
if (!x_created) {
CreateCPUTensor(&scope, x_name, std::vector<int64_t>(x_shape));
}
CreateCPUTensor(&scope, y_name, std::vector<int64_t>(y_shape));
CreateCPUTensor(&scope, z_name, std::vector<int64_t>(z_shape));
// It is wired, need to copy manually.
*block_->add_ops() = *fc->Proto();
};
// Test with 4 layer FC
AddFCLayer("x0", "y0", "z0", false, {batch_size, input_dim},
{input_dim, output_dim}, {batch_size, output_dim});
AddFCLayer("z0", "y1", "z1", true, {}, {output_dim, output_dim},
{batch_size, output_dim});
AddFCLayer("z1", "y2", "z2", true, {}, {output_dim, output_dim},
{batch_size, output_dim});
AddFCLayer("z2", "y3", "z3", true, {}, {output_dim, output_dim},
{batch_size, output_dim});
LOG(INFO) << "create tensorrt desc";
framework::OpDesc engine_op_desc(nullptr);
engine_op_desc.SetType("tensorrt_engine");
engine_op_desc.SetInput("Xs", std::vector<std::string>({"x0"}));
engine_op_desc.SetOutput("Ys", std::vector<std::string>({"z3"}));
SetAttr<std::string>(engine_op_desc.Proto(), "subgraph",
block_->SerializeAsString());
SetAttr<int>(engine_op_desc.Proto(), "max_batch", batch_size);
SetAttr<int>(engine_op_desc.Proto(), "max_workspace", 2 << 10);
SetAttr<std::vector<std::string>>(
engine_op_desc.Proto(), "parameters",
std::vector<std::string>({"y0", "y1", "y2", "y3"}));
SetAttr<std::string>(engine_op_desc.Proto(), "engine_uniq_key", "b_engine");
auto engine_op = framework::OpRegistry::CreateOp(*engine_op_desc.Proto());
// Execute them.
engine_op->Run(scope, place);
}
// Test with a larger FC layer.
TEST(TensorRTEngineOp, fc) { Execute(40, 256, 256); }
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
......
...@@ -86,32 +86,24 @@ class UniformRandomOp : public framework::OperatorWithKernel { ...@@ -86,32 +86,24 @@ class UniformRandomOp : public framework::OperatorWithKernel {
class UniformRandomOpMaker : public framework::OpProtoAndCheckerMaker { class UniformRandomOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
void Make() override { void Make() override {
AddOutput("Out", "(Tensor) The output tensor of uniform random op"); AddOutput("Out", "The output tensor of uniform random op");
AddComment(R"DOC( AddComment(R"DOC(
Uniform random operator.
This operator initializes a tensor with random values sampled from a This operator initializes a tensor with random values sampled from a
uniform distribution. uniform distribution. The random result is in set [min, max].
)DOC"); )DOC");
AddAttr<std::vector<int>>("shape", AddAttr<std::vector<int>>("shape", "The shape of the output tensor");
"(vector<int>) The shape of the output tensor"); AddAttr<float>("min", "Minimum value of uniform random. [default -1.0].")
AddAttr<float>("min",
"(float, default -1.0) "
"Minimum value of uniform random")
.SetDefault(-1.0f); .SetDefault(-1.0f);
AddAttr<float>("max", AddAttr<float>("max", "Maximun value of uniform random. [default 1.0].")
"(float, default 1.0) "
"Maximun value of uniform random")
.SetDefault(1.0f); .SetDefault(1.0f);
AddAttr<int>("seed", AddAttr<int>("seed",
"(int, default 0) "
"Random seed used for generating samples. " "Random seed used for generating samples. "
"0 means use a seed generated by the system." "0 means use a seed generated by the system."
"Note that if seed is not 0, this operator will always " "Note that if seed is not 0, this operator will always "
"generate the same random numbers every time.") "generate the same random numbers every time. [default 0].")
.SetDefault(0); .SetDefault(0);
AddAttr<int>("dtype", "(int, default 5(FP32)) Output tensor data type") AddAttr<int>("dtype", "Output tensor data type. [default 5(FP32)].")
.SetDefault(framework::proto::VarType::FP32); .SetDefault(framework::proto::VarType::FP32);
} }
}; };
......
...@@ -144,28 +144,74 @@ PYBIND11_PLUGIN(core) { ...@@ -144,28 +144,74 @@ PYBIND11_PLUGIN(core) {
py::class_<LoDTensor, Tensor>(m, "LoDTensor") py::class_<LoDTensor, Tensor>(m, "LoDTensor")
.def_buffer( .def_buffer(
[](Tensor &self) -> py::buffer_info { return CastToPyBuffer(self); }) [](Tensor &self) -> py::buffer_info { return CastToPyBuffer(self); })
.def( .def("__init__",
"__init__", [](LoDTensor &instance, const std::vector<std::vector<size_t>>
[](LoDTensor &instance, const std::vector<std::vector<size_t>> &lod) { &recursive_sequence_lengths) {
LoD new_lod; LoD new_lod;
new_lod.reserve(lod.size()); new_lod.reserve(recursive_sequence_lengths.size());
std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod)); std::copy(recursive_sequence_lengths.begin(),
new (&instance) LoDTensor(new_lod); recursive_sequence_lengths.end(),
std::back_inserter(new_lod));
LoD new_offset_lod = ConvertToOffsetBasedLoD(new_lod);
PADDLE_ENFORCE(
CheckLoD(new_offset_lod, -1),
"the provided recursive_sequence_lengths info is invalid");
new (&instance) LoDTensor(new_offset_lod);
}) })
.def("__init__", [](LoDTensor &instance) { new (&instance) LoDTensor(); }) .def("__init__", [](LoDTensor &instance) { new (&instance) LoDTensor(); })
.def("set_lod", .def("set_lod",
[](LoDTensor &self, const std::vector<std::vector<size_t>> &lod) { [](LoDTensor &self, const std::vector<std::vector<size_t>> &lod) {
// the input lod is offset-based level-of-detail info
LOG(WARNING)
<< "set_lod is deprecated and will be removed by 9.2018, "
"please switch to set_recursive_sequence_lengths.";
LoD new_lod; LoD new_lod;
new_lod.reserve(lod.size()); new_lod.reserve(lod.size());
std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod)); std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod));
PADDLE_ENFORCE(CheckLoD(new_lod, vectorize(self.dims()).front()),
"the provided lod info is invalid");
self.set_lod(new_lod); self.set_lod(new_lod);
}) })
.def("lod", [](LoDTensor &self) -> std::vector<std::vector<size_t>> { .def("set_recursive_sequence_lengths",
auto lod = self.lod(); [](LoDTensor &self, const std::vector<std::vector<size_t>>
&recursive_sequence_lengths) {
// the input recursive_sequence_lengths is length-based
// level-of-detail info
LoD new_lod;
new_lod.reserve(recursive_sequence_lengths.size());
std::copy(recursive_sequence_lengths.begin(),
recursive_sequence_lengths.end(),
std::back_inserter(new_lod));
LoD new_offset_lod = ConvertToOffsetBasedLoD(new_lod);
PADDLE_ENFORCE(
CheckLoD(new_offset_lod, vectorize(self.dims()).front()),
"the provided recursive_sequence_lengths info is invalid");
self.set_lod(new_offset_lod);
})
.def("lod",
[](LoDTensor &self) -> std::vector<std::vector<size_t>> {
// output the offset-based lod info
LOG(WARNING) << "lod is deprecated and will be removed by 9.2018, "
"please switch to recursive_sequence_lengths.";
LoD lod = self.lod();
std::vector<std::vector<size_t>> new_lod; std::vector<std::vector<size_t>> new_lod;
new_lod.reserve(lod.size()); new_lod.reserve(lod.size());
std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod)); std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod));
return new_lod; return new_lod;
})
.def("recursive_sequence_lengths",
[](LoDTensor &self) -> std::vector<std::vector<size_t>> {
// output the length-based lod info
LoD lod = ConvertToLengthBasedLoD(self.lod());
std::vector<std::vector<size_t>> new_lod;
new_lod.reserve(lod.size());
std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod));
return new_lod;
})
.def("has_valid_recursive_sequence_lengths", [](LoDTensor &self) -> bool {
// Check that the lod info is valid and match the outermost
// dimension of the LoDTensor data
return CheckLoD(self.lod(), vectorize(self.dims()).front());
}); });
py::class_<SelectedRows>(m, "SelectedRows") py::class_<SelectedRows>(m, "SelectedRows")
......
...@@ -31,6 +31,7 @@ int main(int argc, char** argv) { ...@@ -31,6 +31,7 @@ int main(int argc, char** argv) {
strdup("--tryfromenv=fraction_of_gpu_memory_to_use,use_pinned_memory")); strdup("--tryfromenv=fraction_of_gpu_memory_to_use,use_pinned_memory"));
#else #else
new_argv.push_back(strdup("--tryfromenv=use_pinned_memory,use_mkldnn")); new_argv.push_back(strdup("--tryfromenv=use_pinned_memory,use_mkldnn"));
new_argv.push_back(strdup("--undefok=use_mkldnn"));
#endif #endif
int new_argc = static_cast<int>(new_argv.size()); int new_argc = static_cast<int>(new_argv.size());
char** new_argv_address = new_argv.data(); char** new_argv_address = new_argv.data();
......
...@@ -47,7 +47,7 @@ class DataToLoDTensorConverter(object): ...@@ -47,7 +47,7 @@ class DataToLoDTensorConverter(object):
self.lod = [] self.lod = []
for i in six.range(lod_level): for i in six.range(lod_level):
self.lod.append([0]) self.lod.append([])
def feed(self, data): def feed(self, data):
self._feed_impl_(data, self.lod, self.lod_level) self._feed_impl_(data, self.lod, self.lod_level)
...@@ -56,8 +56,7 @@ class DataToLoDTensorConverter(object): ...@@ -56,8 +56,7 @@ class DataToLoDTensorConverter(object):
if lod_level == 0: if lod_level == 0:
self.data.append(data) self.data.append(data)
else: else:
cur_lod_len = len(data) lod[0].append(len(data))
lod[0].append(lod[0][-1] + cur_lod_len)
for each_data in data: for each_data in data:
self._feed_impl_(each_data, lod[1:], lod_level - 1) self._feed_impl_(each_data, lod[1:], lod_level - 1)
...@@ -66,7 +65,7 @@ class DataToLoDTensorConverter(object): ...@@ -66,7 +65,7 @@ class DataToLoDTensorConverter(object):
t = core.LoDTensor() t = core.LoDTensor()
t.set(arr, self.place) t.set(arr, self.place)
if self.lod_level > 0: if self.lod_level > 0:
t.set_lod(self.lod) t.set_recursive_sequence_lengths(self.lod)
return t return t
......
...@@ -15,11 +15,13 @@ ...@@ -15,11 +15,13 @@
import framework import framework
import numpy as np import numpy as np
import contextlib import contextlib
from framework import convert_np_dtype_to_dtype_
from core import VarDesc
__all__ = [ __all__ = [
'Constant', 'Uniform', 'Normal', 'Xavier', 'force_init_on_cpu', 'Constant', 'Uniform', 'Normal', 'Xavier', 'Bilinear', 'force_init_on_cpu',
'init_on_cpu', 'ConstantInitializer', 'UniformInitializer', 'init_on_cpu', 'ConstantInitializer', 'UniformInitializer',
'NormalInitializer', 'XavierInitializer' 'NormalInitializer', 'XavierInitializer', 'BilinearInitializer'
] ]
_force_init_on_cpu_ = False _force_init_on_cpu_ = False
...@@ -422,6 +424,101 @@ class MSRAInitializer(Initializer): ...@@ -422,6 +424,101 @@ class MSRAInitializer(Initializer):
return op return op
class BilinearInitializer(Initializer):
"""Implements the bilinear initializer.
This initializer can be used in transposed convolution operator to
act as upsampling. Users can upsample a feature map with shape of
(B, C, H, W) by any integer factor. The usage is:
>>> factor = 2
>>> w_attr = ParamAttr(learning_rate=0., regularizer=L2Decay(0.),
>>> initializer=Bilinear())
>>> conv_up = fluid.layers.conv2d_transpose(
>>> input,
>>> num_filters=C,
>>> output_size=None,
>>> filter_size=2 * factor - factor % 2,
>>> padding=ceil((factor - 1) / 2.),
>>> stride=factor,
>>> groups=C,
>>> param_attr=w_attr,
>>> bias_attr=False)
Where, `num_filters=C` and `groups=C` means this is channel-wise tranposed
convolution. The filter shape will be (C, 1, K, K) where K is `filer_size`,
This initializer will set a (K, K) interpolation kernel for every channel
of the filter identically. The resulting shape of the output feature map
will be (B, C, factor * H, factor * W). Note that the learning rate and the
weight decay are set to 0 in order to keep coefficient values of bilinear
interpolation unchanged during training.
"""
def __init__(self):
"""Constructor for BilinearInitializer.
"""
super(BilinearInitializer, self).__init__()
def __call__(self, var, block):
"""Add biliear initialization ops for a variable
Args:
var (Variable): Variable that needs to be initialized.
block (Block): The block in which initialization ops should
be added.
Returns:
the initialization op
Raises:
ValueError: If type of `var` and `block` is not right.
If the shape of `var` size is not 4 and
var.shape[2] != var.shape[3].
"""
if not isinstance(var, framework.Variable):
raise ValueError("var must be framework.Variable.")
if not isinstance(block, framework.Block):
raise ValueError("block must be framework.Block.")
shape = var.shape
if len(shape) != 4:
raise ValueError("the length of shape must be 4.")
if shape[2] != shape[3]:
raise ValueError("shape[2] must be equal to shape[3].")
weight = np.zeros(np.prod(var.shape), dtype='float32')
size = shape[3]
# factor
f = np.ceil(size / 2.)
# center
c = (2 * f - 1 - f % 2) / (2. * f)
for i in range(np.prod(shape)):
x = i % size
y = (i / size) % size
weight[i] = (1 - abs(x / f - c)) * (1 - abs(y / f - c))
weight = np.reshape(weight, shape)
if var.dtype == VarDesc.VarType.FP32:
value_name = "fp32_values"
values = [float(v) for v in weight.flat]
else:
raise ValueError("Unsupported dtype %s", input.dtype)
if np.prod(shape) > 1024 * 1024:
raise ValueError("The size of input is too big. ")
op = block.append_op(
type='assign_value',
outputs={'Out': [var]},
attrs={
'dtype': var.dtype,
'shape': list(shape),
value_name: values
})
var.op = op
return op
# We short the class name, since users will use the initializer with the package # We short the class name, since users will use the initializer with the package
# name. The sample code: # name. The sample code:
# #
...@@ -436,3 +533,4 @@ Uniform = UniformInitializer ...@@ -436,3 +533,4 @@ Uniform = UniformInitializer
Normal = NormalInitializer Normal = NormalInitializer
Xavier = XavierInitializer Xavier = XavierInitializer
MSRA = MSRAInitializer MSRA = MSRAInitializer
Bilinear = BilinearInitializer
...@@ -20,6 +20,7 @@ from ..framework import Program, Variable, Operator ...@@ -20,6 +20,7 @@ from ..framework import Program, Variable, Operator
from ..layer_helper import LayerHelper, unique_name from ..layer_helper import LayerHelper, unique_name
from ..initializer import force_init_on_cpu from ..initializer import force_init_on_cpu
from ops import logical_and, logical_not, logical_or from ops import logical_and, logical_not, logical_or
import numpy
__all__ = [ __all__ = [
'split_lod_tensor', 'split_lod_tensor',
...@@ -233,9 +234,56 @@ class BlockGuard(object): ...@@ -233,9 +234,56 @@ class BlockGuard(object):
class ParallelDo(object): class ParallelDo(object):
""" """
ParallelDo class. ParallelDo is used to represent multi-thread data parallel processing.
ParallelDo class is used to create a ParallelDo. Its vanilla implementation can be shown as the following (:math:`|` means
single thread and :math:`||||` means multiple threads)
.. code-block:: text
In the forward pass
| Split input onto different devices
| Copy parameter onto different devices
|||| Compute forward pass in parallel
| Merge output from different devices
In the backward pass
| Split output@grad onto different devices
|||| Compute backward pass in parallel
| accumulate param@grad from different devices to the first device
| Merge input@grad from different devices
| Copy param@grad to the place of parallel_do_op
Examples:
.. code-block:: python
images = fluid.layers.data(name='pixel', shape=[1, 28, 28], dtype=DTYPE)
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
# ParallelDo version & Single-thread version
if thread_num > 1:
places = fluid.layers.get_places(thread_num)
pd = fluid.layers.ParallelDo(places)
with pd.do():
images = pd.read_input(images)
label = pd.read_input(label)
predict = cnn_model(images)
cost = fluid.layers.cross_entropy(input=predict, label=label)
avg_cost = fluid.layers.mean(x=cost)
pd.write_output(avg_cost)
avg_cost = pd()
avg_cost = fluid.layers.mean(avg_cost)
else:
predict = cnn_model(images)
cost = fluid.layers.cross_entropy(input=predict, label=label)
avg_cost = fluid.layers.mean(x=cost)
.. warning::
It will be soon deprecated, please use ParallelExecutor instead.
""" """
def __init__(self, places, use_nccl=False, name=None): def __init__(self, places, use_nccl=False, name=None):
...@@ -606,6 +654,29 @@ class WhileGuard(BlockGuard): ...@@ -606,6 +654,29 @@ class WhileGuard(BlockGuard):
class While(object): class While(object):
"""
while loop control flow.
Args:
cond (Variable): condition used to compare.
name (str): The name of this layer.
Examples:
.. code-block:: python
d0 = layers.data("d0", shape=[10], dtype='float32')
data_array = layers.array_write(x=d0, i=i)
array_len = layers.fill_constant(shape=[1],dtype='int64', value=3)
cond = layers.less_than(x=i, y=array_len)
while_op = layers.While(cond=cond)
with while_op.block():
d = layers.array_read(array=data_array, i=i)
i = layers.increment(x=i, in_place=True)
layers.array_write(result, i=i, array=d)
layers.less_than(x=i, y=array_len, cond=cond)
"""
BEFORE_WHILE_BLOCK = 0 BEFORE_WHILE_BLOCK = 0
IN_WHILE_BLOCK = 1 IN_WHILE_BLOCK = 1
AFTER_WHILE_BLOCK = 2 AFTER_WHILE_BLOCK = 2
...@@ -675,8 +746,8 @@ def lod_rank_table(x, level=0): ...@@ -675,8 +746,8 @@ def lod_rank_table(x, level=0):
.. code-block:: text .. code-block:: text
x is a LoDTensor: x is a LoDTensor:
x.lod = [[0, 2, 3], x.lod = [[2, 1],
[0, 5, 6, 7]] [5, 1, 1]]
x.data = [a, b, c, d, e, f, g] x.data = [a, b, c, d, e, f, g]
1. set level to 0: 1. set level to 0:
...@@ -748,17 +819,25 @@ def max_sequence_len(rank_table): ...@@ -748,17 +819,25 @@ def max_sequence_len(rank_table):
def lod_tensor_to_array(x, table): def lod_tensor_to_array(x, table):
""" Convert a LOD_TENSOR to an LOD_TENSOR_ARRAY. """
Convert a LoDTensor to a LoDTensorArray.
This function split a LoDTesnor to a LoDTensorArray according to its LoD
information. LoDTensorArray is an alias of C++ std::vector<LoDTensor> in
PaddlePaddle. The generated LoDTensorArray of this function can be further read
or written by `read_from_array()` and `write_to_array()` operators. However,
this function is generally an internal component of PaddlePaddle `DynamicRNN`.
Users should not use it directly.
Args: Args:
x (Variable|list): The LOD tensor to be converted to a LOD tensor array. x (Variable|list): The LoDTensor to be converted to a LoDTensorArray.
table (ParamAttr|list): The variable that stores the level of lod table (ParamAttr|list): The variable that stores the level of lod
which is ordered by sequence length in which is ordered by sequence length in
descending order. descending order. It is generally generated
by `layers.lod_rank_table()` API.
Returns: Returns:
Variable: The variable of type array that has been converted from a Variable: The LoDTensorArray that has been converted from the input tensor.
tensor.
Examples: Examples:
.. code-block:: python .. code-block:: python
...@@ -909,37 +988,40 @@ def create_array(dtype): ...@@ -909,37 +988,40 @@ def create_array(dtype):
dtype=dtype) dtype=dtype)
def less_than(x, y, force_cpu=True, cond=None, **ignored): @templatedoc()
def less_than(x, y, force_cpu=None, cond=None, **ignored):
""" """
**Less than** ${comment}
This layer returns the truth value of :math:`x < y` elementwise. >>> import paddle.fluid as fluid
>>> less = fluid.layers.less_than(x=label, y=limit)
Args: Args:
x(Variable): First operand of *less_than* x(${x_type}): ${x_comment}.
y(Variable): Second operand of *less_than* y(${y_type}): ${y_comment}.
force_cpu(Bool|True): The output data will be on CPU if set true. force_cpu(${force_cpu_type}): ${force_cpu_comment}.
cond(Variable|None): Optional output variable to store the result of *less_than* cond(Variable|None): Optional output variable to store the result of *less_than*
Returns: Returns:
Variable: The tensor variable storing the output of *less_than*. ${out_comment}.
Examples:
.. code-block:: python
less = fluid.layers.less_than(x=label, y=limit)
""" """
helper = LayerHelper("less_than", **locals()) helper = LayerHelper("less_than", **locals())
if cond is None: if cond is None:
cond = helper.create_tmp_variable(dtype='bool') cond = helper.create_tmp_variable(dtype='bool')
cond.stop_gradient = True cond.stop_gradient = True
attrs = dict()
if force_cpu is not None:
attrs['force_cpu'] = force_cpu
elif force_init_on_cpu():
attrs['force_cpu'] = force_init_on_cpu()
helper.append_op( helper.append_op(
type='less_than', type='less_than',
inputs={'X': [x], inputs={'X': [x],
'Y': [y]}, 'Y': [y]},
outputs={'Out': [cond]}, outputs={'Out': [cond]},
attrs={'force_cpu': force_cpu or force_init_on_cpu()}) attrs=attrs)
return cond return cond
...@@ -1023,8 +1105,28 @@ def array_read(array, i): ...@@ -1023,8 +1105,28 @@ def array_read(array, i):
def shrink_memory(x, i, table): def shrink_memory(x, i, table):
""" """
This function creates an operator to shrink_rnn_memory using the RankTable This function creates an operator to shrink rnn memory using the RankTable
as mentioned in the input parameter. as mentioned in the input parameter.
NOTE: This API is very low-level API. It is used by DynamicRNN only.
Since the Dynamic RNN uses no-padding way to implement RNN. The sequence
will be sorted by order, and the length of valid memory will be shrink after
each time step.
Args:
x(Variable): The memory object in the previous time step.
i(Variable): The step count variable. A int scalar as LoDTensor.
table(Variable): The RNNRankTable object.
Returns:
the memory variable after shrink.
Examples:
Since this API is very low level API. The example is not provided.
Please reference the implementation of class DynamicRNN for detail
usage.
""" """
helper = LayerHelper('shrink_memory', **locals()) helper = LayerHelper('shrink_memory', **locals())
out = helper.create_tmp_variable(dtype=x.dtype) out = helper.create_tmp_variable(dtype=x.dtype)
...@@ -1066,6 +1168,13 @@ def array_length(array): ...@@ -1066,6 +1168,13 @@ def array_length(array):
class ConditionalBlockGuard(BlockGuard): class ConditionalBlockGuard(BlockGuard):
"""
ConditionalBlockGuard is derived from BlockGuard. It is dedicated for
holding a ConditionalBlock, and helping users entering and exiting the
ConditionalBlock via Python's 'with' keyword. However, ConditionalBlockGuard
is generally an internal component of IfElse, users should not use it directly.
"""
def __init__(self, block): def __init__(self, block):
if not isinstance(block, ConditionalBlock): if not isinstance(block, ConditionalBlock):
raise TypeError("block should be conditional block") raise TypeError("block should be conditional block")
...@@ -1228,6 +1337,34 @@ class IfElseBlockGuard(object): ...@@ -1228,6 +1337,34 @@ class IfElseBlockGuard(object):
class IfElse(object): class IfElse(object):
"""
if-else control flow.
Args:
cond (Variable): condition used to compare.
name (str, default None): The name of this layer.
Examples:
.. code-block:: python
limit = fluid.layers.fill_constant_batch_size_like(
input=label, dtype='int64', shape=[1], value=5.0)
cond = fluid.layers.less_than(x=label, y=limit)
ie = fluid.layers.IfElse(cond)
with ie.true_block():
true_image = ie.input(image)
hidden = fluid.layers.fc(input=true_image, size=100, act='tanh')
prob = fluid.layers.fc(input=hidden, size=10, act='softmax')
ie.output(prob)
with ie.false_block():
false_image = ie.input(image)
hidden = fluid.layers.fc(
input=false_image, size=200, act='tanh')
prob = fluid.layers.fc(input=hidden, size=10, act='softmax')
ie.output(prob)
prob = ie()
"""
OUT_IF_ELSE_BLOCKS = 0 OUT_IF_ELSE_BLOCKS = 0
IN_IF_ELSE_TRUE_BLOCKS = 1 IN_IF_ELSE_TRUE_BLOCKS = 1
IN_IF_ELSE_FALSE_BLOCKS = 2 IN_IF_ELSE_FALSE_BLOCKS = 2
...@@ -1330,6 +1467,38 @@ class IfElse(object): ...@@ -1330,6 +1467,38 @@ class IfElse(object):
class DynamicRNN(object): class DynamicRNN(object):
"""
The dynamic RNN can process a batch of sequence data. The length of each
sample sequence can be different. This API automatically process them in
batch.
The input lod must be set. Please reference `lod_tensor`
>>> import paddle.fluid as fluid
>>> data = fluid.layers.data(name='sentence', dtype='int64', lod_level=1)
>>> embedding = fluid.layers.embedding(input=data, size=[65535, 32],
>>> is_sparse=True)
>>>
>>> drnn = fluid.layers.DynamicRNN()
>>> with drnn.block():
>>> word = drnn.step_input(embedding)
>>> prev = drnn.memory(shape=[200])
>>> hidden = fluid.layers.fc(input=[word, prev], size=200, act='relu')
>>> drnn.update_memory(prev, hidden) # set prev to hidden
>>> drnn.output(hidden)
>>>
>>> # last is the last time step of rnn. It is the encoding result.
>>> last = fluid.layers.sequence_last_step(drnn())
The dynamic RNN will unfold sequence into timesteps. Users need to define
how to process each time step during the :code:`with` block.
The `memory` is used staging data cross time step. The initial value of
memory can be zero or another variable.
The dynamic RNN can mark multiple variables as its output. Use `drnn()` to
get the output sequence.
"""
BEFORE_RNN = 0 BEFORE_RNN = 0
IN_RNN = 1 IN_RNN = 1
AFTER_RNN = 2 AFTER_RNN = 2
...@@ -1352,6 +1521,15 @@ class DynamicRNN(object): ...@@ -1352,6 +1521,15 @@ class DynamicRNN(object):
self.mem_link = [] self.mem_link = []
def step_input(self, x): def step_input(self, x):
"""
Mark a sequence as a dynamic RNN input.
Args:
x(Variable): The input sequence.
Returns:
The current timestep in the input sequence.
"""
self._assert_in_rnn_block_("step_input") self._assert_in_rnn_block_("step_input")
if not isinstance(x, Variable): if not isinstance(x, Variable):
raise TypeError( raise TypeError(
...@@ -1395,6 +1573,15 @@ class DynamicRNN(object): ...@@ -1395,6 +1573,15 @@ class DynamicRNN(object):
return array_read(array=input_array, i=self.step_idx) return array_read(array=input_array, i=self.step_idx)
def static_input(self, x): def static_input(self, x):
"""
Mark a variable as a RNN input. The input will not be scattered into
time steps.
Args:
x(Variable): The input variable.
Returns:
The input variable that can access in RNN.
"""
self._assert_in_rnn_block_("static_input") self._assert_in_rnn_block_("static_input")
if not isinstance(x, Variable): if not isinstance(x, Variable):
raise TypeError( raise TypeError(
...@@ -1416,6 +1603,10 @@ class DynamicRNN(object): ...@@ -1416,6 +1603,10 @@ class DynamicRNN(object):
@contextlib.contextmanager @contextlib.contextmanager
def block(self): def block(self):
"""
The block for user to define operators in RNN. See the class docstring
for more details.
"""
if self.status != DynamicRNN.BEFORE_RNN: if self.status != DynamicRNN.BEFORE_RNN:
raise ValueError("rnn.block() can only be invoke once") raise ValueError("rnn.block() can only be invoke once")
self.step_idx = fill_constant( self.step_idx = fill_constant(
...@@ -1442,6 +1633,9 @@ class DynamicRNN(object): ...@@ -1442,6 +1633,9 @@ class DynamicRNN(object):
x=each_array, table=self.lod_rank_table)) x=each_array, table=self.lod_rank_table))
def __call__(self, *args, **kwargs): def __call__(self, *args, **kwargs):
"""
Get the output of RNN. This API should only be invoked after RNN.block()
"""
if self.status != DynamicRNN.AFTER_RNN: if self.status != DynamicRNN.AFTER_RNN:
raise ValueError(("Output of the dynamic RNN can only be visited " raise ValueError(("Output of the dynamic RNN can only be visited "
"outside the rnn block.")) "outside the rnn block."))
...@@ -1456,6 +1650,70 @@ class DynamicRNN(object): ...@@ -1456,6 +1650,70 @@ class DynamicRNN(object):
value=0.0, value=0.0,
need_reorder=False, need_reorder=False,
dtype='float32'): dtype='float32'):
"""
Create a memory variable for dynamic rnn.
If the :code:`init` is not None, :code:`memory` will be initialized by
this variable. The :code:`need_reorder` is used to reorder the memory as
the input variable. It should be set to true when the initialized memory
depends on the input sample.
For example,
>>> import paddle.fluid as fluid
>>> sentence = fluid.layers.data(
>>> name='sentence', dtype='float32', shape=[32])
>>> boot_memory = fluid.layers.data(
>>> name='boot', dtype='float32', shape=[10])
>>>
>>> drnn = fluid.layers.DynamicRNN()
>>> with drnn.block():
>>> word = drnn.step_input(sentence)
>>> memory = drnn.memory(init=boot_memory, need_reorder=True)
>>> hidden = fluid.layers.fc(
>>> input=[word, memory], size=10, act='tanh')
>>> drnn.update_memory(ex_mem=memory, new_mem=hidden)
>>> drnn.output(hidden)
>>> rnn_output = drnn()
Otherwise, if :code:`shape`, :code:`value`, :code:`dtype` are set, the
:code:`memory` will be initialized by this :code:`value`.
For example,
>>> import paddle.fluid as fluid
>>> sentence = fluid.layers.data(
>>> name='sentence', dtype='float32', shape=[32])
>>>
>>> drnn = fluid.layers.DynamicRNN()
>>> with drnn.block():
>>> word = drnn.step_input(sentence)
>>> memory = drnn.memory(shape=[10], dtype='float32', value=0)
>>> hidden = fluid.layers.fc(
>>> input=[word, memory], size=10, act='tanh')
>>> drnn.update_memory(ex_mem=memory, new_mem=hidden)
>>> drnn.output(hidden)
>>> rnn_output = drnn()
Args:
init(Variable|None): The initialized variable.
shape(list|tuple): The memory shape. NOTE the shape does not contain
batch_size.
value(float): the initalized value.
need_reorder(bool): True if the initialized memory depends on the
input sample.
dtype(str|numpy.dtype): The data type of the initialized memory.
Returns:
the memory variable.
"""
self._assert_in_rnn_block_('memory') self._assert_in_rnn_block_('memory')
if init is not None: if init is not None:
if not isinstance(init, Variable): if not isinstance(init, Variable):
...@@ -1523,6 +1781,16 @@ class DynamicRNN(object): ...@@ -1523,6 +1781,16 @@ class DynamicRNN(object):
return self.memory(init=init) return self.memory(init=init)
def update_memory(self, ex_mem, new_mem): def update_memory(self, ex_mem, new_mem):
"""
Update the memory from ex_mem to new_mem. NOTE that the shape and data
type of :code:`ex_mem` and :code:`new_mem` must be same.
Args:
ex_mem(Variable): the memory variable.
new_mem(Variable): the plain variable generated in RNN block.
Returns:
None
"""
self._assert_in_rnn_block_('update_memory') self._assert_in_rnn_block_('update_memory')
if not isinstance(ex_mem, Variable): if not isinstance(ex_mem, Variable):
raise TypeError("The input arg `ex_mem` of update_memory() must " raise TypeError("The input arg `ex_mem` of update_memory() must "
...@@ -1540,6 +1808,15 @@ class DynamicRNN(object): ...@@ -1540,6 +1808,15 @@ class DynamicRNN(object):
self.mem_link.append((new_mem, mem_array)) self.mem_link.append((new_mem, mem_array))
def output(self, *outputs): def output(self, *outputs):
"""
mark the RNN output variables.
Args:
outputs: The output variables.
Returns:
None
"""
self._assert_in_rnn_block_('output') self._assert_in_rnn_block_('output')
parent_block = self._parent_block_() parent_block = self._parent_block_()
for each in outputs: for each in outputs:
...@@ -1582,26 +1859,26 @@ def reorder_lod_tensor_by_rank(x, rank_table): ...@@ -1582,26 +1859,26 @@ def reorder_lod_tensor_by_rank(x, rank_table):
def is_empty(x, cond=None, **ignored): def is_empty(x, cond=None, **ignored):
""" """
**Is Empty** Test whether a Variable is empty.
This layer returns the truth value of whether the variable is empty.
Args: Args:
x(Variable): Operand of *is_empty* x (Variable): The Variable to be tested.
cond(Variable|None): Optional output variable to store the result cond (Variable|None): Output parameter. Returns the test result
of *is_empty* of given 'x'. Default: None
Returns: Returns:
Variable: The tensor variable storing the output of *is_empty*. Variable: A bool scalar. True if 'x' is an empty Variable.
Raises: Raises:
TypeError: If input cond is not a variable, or cond's dtype is TypeError: If input cond is not a variable, or cond's dtype is
not bool not bool.
Examples: Examples:
.. code-block:: python .. code-block:: python
less = fluid.layers.is_empty(x=input) res = fluid.layers.is_empty(x=input)
# or:
fluid.layers.is_empty(x=input, cond=res)
""" """
helper = LayerHelper("is_empty", **locals()) helper = LayerHelper("is_empty", **locals())
if cond is None: if cond is None:
......
...@@ -97,7 +97,9 @@ def detection_output(loc, ...@@ -97,7 +97,9 @@ def detection_output(loc,
nms_eta(float): The parameter for adaptive NMS. nms_eta(float): The parameter for adaptive NMS.
Returns: Returns:
Variable: The detection outputs is a LoDTensor with shape [No, 6]. Variable:
The detection outputs is a LoDTensor with shape [No, 6].
Each row has six values: [label, confidence, xmin, ymin, xmax, ymax]. Each row has six values: [label, confidence, xmin, ymin, xmax, ymax].
`No` is the total number of detections in this mini-batch. For each `No` is the total number of detections in this mini-batch. For each
instance, the offsets in first dimension are called LoD, the offset instance, the offsets in first dimension are called LoD, the offset
...@@ -210,53 +212,68 @@ def bipartite_match(dist_matrix, ...@@ -210,53 +212,68 @@ def bipartite_match(dist_matrix,
dist_threshold=None, dist_threshold=None,
name=None): name=None):
""" """
**Bipartite matchint operator** This operator implements a greedy bipartite matching algorithm, which is
used to obtain the matching with the maximum distance based on the input
This operator is a greedy bipartite matching algorithm, which is used to
obtain the matching with the maximum distance based on the input
distance matrix. For input 2D matrix, the bipartite matching algorithm can distance matrix. For input 2D matrix, the bipartite matching algorithm can
find the matched column for each row, also can find the matched row for find the matched column for each row (matched means the largest distance),
each column. And this operator only calculate matched indices from column also can find the matched row for each column. And this operator only
to row. For each instance, the number of matched indices is the number of calculate matched indices from column to row. For each instance,
of columns of the input ditance matrix. the number of matched indices is the column number of the input distance
matrix.
There are two outputs to save matched indices and distance.
A simple description, this algothrim matched the best (maximum distance) There are two outputs, matched indices and distance.
A simple description, this algorithm matched the best (maximum distance)
row entity to the column entity and the matched indices are not duplicated row entity to the column entity and the matched indices are not duplicated
in each row of ColToRowMatchIndices. If the column entity is not matched in each row of ColToRowMatchIndices. If the column entity is not matched
any row entity, set -1 in ColToRowMatchIndices. any row entity, set -1 in ColToRowMatchIndices.
Please note that the input DistMat can be LoDTensor (with LoD) or Tensor. NOTE: the input DistMat can be LoDTensor (with LoD) or Tensor.
If LoDTensor with LoD, the height of ColToRowMatchIndices is batch size. If LoDTensor with LoD, the height of ColToRowMatchIndices is batch size.
If Tensor, the height of ColToRowMatchIndices is 1. If Tensor, the height of ColToRowMatchIndices is 1.
NOTE: This API is a very low level API. It is used by :code:`ssd_loss`
layer. Please consider to use :code:`ssd_loss` instead.
Args: Args:
dist_matrix(Variable): This input is a 2-D LoDTensor with shape dist_matrix(Variable): This input is a 2-D LoDTensor with shape
[K, M]. It is pair-wise distance matrix between the entities [K, M]. It is pair-wise distance matrix between the entities
represented by each row and each column. For example, assumed one represented by each row and each column. For example, assumed one
entity is A with shape [K], another entity is B with shape [M]. The entity is A with shape [K], another entity is B with shape [M]. The
dist_matirx[i][j] is the distance between A[i] and B[j]. The bigger dist_matrix[i][j] is the distance between A[i] and B[j]. The bigger
the distance is, the better macthing the pairs are. Please note, the distance is, the better matching the pairs are.
This tensor can contain LoD information to represent a batch of
inputs. One instance of this batch can contain different numbers of NOTE: This tensor can contain LoD information to represent a batch
entities. of inputs. One instance of this batch can contain different numbers
of entities.
match_type(string|None): The type of matching method, should be match_type(string|None): The type of matching method, should be
'bipartite' or 'per_prediction', 'bipartite' by defalut. 'bipartite' or 'per_prediction'. [default 'bipartite'].
dist_threshold(float|None): If `match_type` is 'per_prediction', dist_threshold(float|None): If `match_type` is 'per_prediction',
this threshold is to determine the extra matching bboxes based this threshold is to determine the extra matching bboxes based
on the maximum distance, 0.5 by defalut. on the maximum distance, 0.5 by default.
Returns: Returns:
match_indices(Variable): A 2-D Tensor with shape [N, M] in int type. tuple: a tuple with two elements is returned. The first is
matched_indices, the second is matched_distance.
The matched_indices is a 2-D Tensor with shape [N, M] in int type.
N is the batch size. If match_indices[i][j] is -1, it N is the batch size. If match_indices[i][j] is -1, it
means B[j] does not match any entity in i-th instance. means B[j] does not match any entity in i-th instance.
Otherwise, it means B[j] is matched to row Otherwise, it means B[j] is matched to row
match_indices[i][j] in i-th instance. The row number of match_indices[i][j] in i-th instance. The row number of
i-th instance is saved in match_indices[i][j]. i-th instance is saved in match_indices[i][j].
match_distance(Variable): A 2-D Tensor with shape [N, M] in float type.
N is batch size. If match_indices[i][j] is -1, The matched_distance is a 2-D Tensor with shape [N, M] in float type
. N is batch size. If match_indices[i][j] is -1,
match_distance[i][j] is also -1.0. Otherwise, assumed match_distance[i][j] is also -1.0. Otherwise, assumed
match_distance[i][j] = d, and the row offsets of each instance match_distance[i][j] = d, and the row offsets of each instance
are called LoD. Then match_distance[i][j] = dist_matrix[d+LoD[i]][j]. are called LoD. Then match_distance[i][j] =
dist_matrix[d+LoD[i]][j].
Examples:
>>> x = fluid.layers.data(name='x', shape=[4], dtype='float32')
>>> y = fluid.layers.data(name='y', shape=[4], dtype='float32')
>>> iou = fluid.layers.iou_similarity(x=x, y=y)
>>> matched_indices, matched_dist = fluid.layers.bipartite_match(iou)
""" """
helper = LayerHelper('bipartite_match', **locals()) helper = LayerHelper('bipartite_match', **locals())
match_indices = helper.create_tmp_variable(dtype='int32') match_indices = helper.create_tmp_variable(dtype='int32')
...@@ -281,8 +298,6 @@ def target_assign(input, ...@@ -281,8 +298,6 @@ def target_assign(input,
mismatch_value=None, mismatch_value=None,
name=None): name=None):
""" """
**Target assigner operator**
This operator can be, for given the target bounding boxes or labels, This operator can be, for given the target bounding boxes or labels,
to assign classification and regression targets to each prediction as well as to assign classification and regression targets to each prediction as well as
weights to prediction. The weights is used to specify which prediction would weights to prediction. The weights is used to specify which prediction would
...@@ -296,6 +311,8 @@ def target_assign(input, ...@@ -296,6 +311,8 @@ def target_assign(input,
1. Assigning all outpts based on `match_indices`: 1. Assigning all outpts based on `match_indices`:
.. code-block:: text
If id = match_indices[i][j] > 0, If id = match_indices[i][j] > 0,
out[i][j][0 : K] = X[lod[i] + id][j % P][0 : K] out[i][j][0 : K] = X[lod[i] + id][j % P][0 : K]
...@@ -311,6 +328,8 @@ def target_assign(input, ...@@ -311,6 +328,8 @@ def target_assign(input,
Assumed that the row offset for each instance in `neg_indices` is called neg_lod, Assumed that the row offset for each instance in `neg_indices` is called neg_lod,
for i-th instance and each `id` of neg_indices in this instance: for i-th instance and each `id` of neg_indices in this instance:
.. code-block:: text
out[i][id][0 : K] = {mismatch_value, mismatch_value, ...} out[i][id][0 : K] = {mismatch_value, mismatch_value, ...}
out_weight[i][id] = 1.0 out_weight[i][id] = 1.0
...@@ -326,10 +345,23 @@ def target_assign(input, ...@@ -326,10 +345,23 @@ def target_assign(input,
mismatch_value (float32): Fill this value to the mismatched location. mismatch_value (float32): Fill this value to the mismatched location.
Returns: Returns:
out (Variable): The output is a 3D Tensor with shape [N, P, K], tuple:
N and P is the same as they are in `neg_indices`, K is the
same as it in input of X. If `match_indices[i][j]`. A tuple(out, out_weight) is returned. out is a 3D Tensor with
out_weight (Variable): The weight for output with the shape of [N, P, 1]. shape [N, P, K], N and P is the same as they are in
`neg_indices`, K is the same as it in input of X. If
`match_indices[i][j]`. out_weight is the weight for output with
the shape of [N, P, 1].
Examples:
.. code-block:: python
matched_indices, matched_dist = fluid.layers.bipartite_match(iou)
gt = layers.data(
name='gt', shape=[1, 1], dtype='int32', lod_level=1)
trg, trg_weight = layers.target_assign(
gt, matched_indices, mismatch_value=0)
""" """
helper = LayerHelper('target_assign', **locals()) helper = LayerHelper('target_assign', **locals())
out = helper.create_tmp_variable(dtype=input.dtype) out = helper.create_tmp_variable(dtype=input.dtype)
...@@ -364,7 +396,7 @@ def ssd_loss(location, ...@@ -364,7 +396,7 @@ def ssd_loss(location,
normalize=True, normalize=True,
sample_size=None): sample_size=None):
""" """
**Multi-box loss layer for object dection algorithm of SSD** **Multi-box loss layer for object detection algorithm of SSD**
This layer is to compute dection loss for SSD given the location offset This layer is to compute dection loss for SSD given the location offset
predictions, confidence predictions, prior boxes and ground-truth boudding predictions, confidence predictions, prior boxes and ground-truth boudding
...@@ -372,21 +404,35 @@ def ssd_loss(location, ...@@ -372,21 +404,35 @@ def ssd_loss(location,
is a weighted sum of the localization loss (or regression loss) and is a weighted sum of the localization loss (or regression loss) and
confidence loss (or classification loss) by performing the following steps: confidence loss (or classification loss) by performing the following steps:
1. Find matched boundding box by bipartite matching algorithm. 1. Find matched bounding box by bipartite matching algorithm.
1.1 Compute IOU similarity between ground-truth boxes and prior boxes. 1.1 Compute IOU similarity between ground-truth boxes and prior boxes.
1.2 Compute matched boundding box by bipartite matching algorithm. 1.2 Compute matched boundding box by bipartite matching algorithm.
2. Compute confidence for mining hard examples 2. Compute confidence for mining hard examples
2.1. Get the target label based on matched indices. 2.1. Get the target label based on matched indices.
2.2. Compute confidence loss. 2.2. Compute confidence loss.
3. Apply hard example mining to get the negative example indices and update 3. Apply hard example mining to get the negative example indices and update
the matched indices. the matched indices.
4. Assign classification and regression targets 4. Assign classification and regression targets
4.1. Encoded bbox according to the prior boxes. 4.1. Encoded bbox according to the prior boxes.
4.2. Assign regression targets. 4.2. Assign regression targets.
4.3. Assign classification targets. 4.3. Assign classification targets.
5. Compute the overall objective loss. 5. Compute the overall objective loss.
5.1 Compute confidence loss. 5.1 Compute confidence loss.
5.1 Compute localization loss. 5.1 Compute localization loss.
5.3 Compute the overall weighted loss. 5.3 Compute the overall weighted loss.
Args: Args:
...@@ -421,39 +467,36 @@ def ssd_loss(location, ...@@ -421,39 +467,36 @@ def ssd_loss(location,
mining_type (str): The hard example mining type, should be 'hard_example' mining_type (str): The hard example mining type, should be 'hard_example'
or 'max_negative', now only support `max_negative`. or 'max_negative', now only support `max_negative`.
normalize (bool): Whether to normalize the SSD loss by the total number normalize (bool): Whether to normalize the SSD loss by the total number
of output locations, True by defalut. of output locations, True by default.
sample_size (int): The max sample size of negative box, used only when sample_size (int): The max sample size of negative box, used only when
mining_type is 'hard_example'. mining_type is 'hard_example'.
Returns: Returns:
Variable: The weighted sum of the localization loss and confidence loss, The weighted sum of the localization loss and confidence loss, with \
with shape [N * Np, 1], N and Np are the same as they are shape [N * Np, 1], N and Np are the same as they are in `location`.
in `location`.
Raises: Raises:
ValueError: If mining_type is 'hard_example', now only support ValueError: If mining_type is 'hard_example', now only support mining \
mining type of `max_negative`. type of `max_negative`.
Examples: Examples:
.. code-block:: python >>> pb = fluid.layers.data(
>>> name='prior_box',
pb = layers.data( >>> shape=[10, 4],
name='prior_box', >>> append_batch_size=False,
shape=[10, 4], >>> dtype='float32')
append_batch_size=False, >>> pbv = fluid.layers.data(
dtype='float32') >>> name='prior_box_var',
pbv = layers.data( >>> shape=[10, 4],
name='prior_box_var', >>> append_batch_size=False,
shape=[10, 4], >>> dtype='float32')
append_batch_size=False, >>> loc = fluid.layers.data(name='target_box', shape=[10, 4], dtype='float32')
dtype='float32') >>> scores = fluid.layers.data(name='scores', shape=[10, 21], dtype='float32')
loc = layers.data(name='target_box', shape=[10, 4], dtype='float32') >>> gt_box = fluid.layers.data(
scores = layers.data(name='scores', shape=[10, 21], dtype='float32') >>> name='gt_box', shape=[4], lod_level=1, dtype='float32')
gt_box = layers.data( >>> gt_label = fluid.layers.data(
name='gt_box', shape=[4], lod_level=1, dtype='float32') >>> name='gt_label', shape=[1], lod_level=1, dtype='float32')
gt_label = layers.data( >>> loss = fluid.layers.ssd_loss(loc, scores, gt_box, gt_label, pb, pbv)
name='gt_label', shape=[1], lod_level=1, dtype='float32')
loss = layers.ssd_loss(loc, scores, gt_box, gt_label, pb, pbv)
""" """
helper = LayerHelper('ssd_loss', **locals()) helper = LayerHelper('ssd_loss', **locals())
......
...@@ -22,9 +22,9 @@ from ..executor import global_scope ...@@ -22,9 +22,9 @@ from ..executor import global_scope
from layer_function_generator import generate_layer_fn, templatedoc from layer_function_generator import generate_layer_fn, templatedoc
__all__ = [ __all__ = [
'data', 'BlockGuardServ', 'ListenAndServ', 'Send', 'open_recordio_file', 'data', 'BlockGuardServ', 'ListenAndServ', 'Send', 'Recv',
'open_files', 'read_file', 'shuffle', 'batch', 'double_buffer', 'open_recordio_file', 'open_files', 'read_file', 'shuffle', 'batch',
'random_data_generator', 'Preprocessor', 'load' 'double_buffer', 'random_data_generator', 'Preprocessor', 'load'
] ]
...@@ -177,18 +177,17 @@ class ListenAndServ(object): ...@@ -177,18 +177,17 @@ class ListenAndServ(object):
}) })
def Send(endpoints, send_vars, get_vars=None): def Send(endpoints, send_vars, sync=True):
""" """
Send layer Send variables to the server side, and get vars from server
side when server have finished running server side program.
Args: Args:
endpoints: comma seperated IP:PORT pairs in the order endpoints (str): comma seperated IP:PORT pairs in the order
of send_vars to send of send_vars to send
send_vars: vars to send send_vars (list): variables to send to server
get_vars: vars to get from server after send completes. sync (bool): whether to wait the request finish
Send variables to the server side, and get vars from server
side when server have finished running server side program.
""" """
assert (type(send_vars) == list) assert (type(send_vars) == list)
...@@ -196,40 +195,33 @@ def Send(endpoints, send_vars, get_vars=None): ...@@ -196,40 +195,33 @@ def Send(endpoints, send_vars, get_vars=None):
endpoints = list(set(epmap)) endpoints = list(set(epmap))
helper = LayerHelper("Send", **locals()) helper = LayerHelper("Send", **locals())
if not get_vars:
get_vars = []
for s in send_vars:
v = helper.create_tmp_variable(dtype=s.dtype, stop_gradient=True)
get_vars.append(v)
rpc_op_role_name = core.op_proto_and_checker_maker.kOpRoleAttrName() rpc_op_role_name = core.op_proto_and_checker_maker.kOpRoleAttrName()
helper.append_op( helper.append_op(
type="send", type="send",
inputs={"X": send_vars}, inputs={"X": send_vars},
outputs={"Out": get_vars},
attrs={ attrs={
"endpoints": endpoints, "endpoints": endpoints,
"epmap": epmap, "epmap": epmap,
rpc_op_role_name: core.op_proto_and_checker_maker.OpRole.RPC rpc_op_role_name: core.op_proto_and_checker_maker.OpRole.RPC
}) })
if sync:
return get_vars helper.append_op(type="send_barrier", attrs={"endpoints": endpoints})
def Recv(endpoints, get_vars): def Recv(endpoints, get_vars, sync=True):
""" """
Recv layer Receive variables from server side
Args: Args:
endpoints: comma seperated IP:PORT pairs in the order endpoints (str): comma seperated IP:PORT pairs in the order
of send_vars to send of send_vars to send
send_vars: vars to send get_vars (list): vars to get from server after send completes.
get_vars: vars to get from server after send completes. sync (bool): whether to wait the request finish
Send variables to the server side, and get vars from server Returns:
side when server have finished running server side program. list: list of received variables
""" """
assert (type(send_vars) == list)
assert (type(get_vars) == list) assert (type(get_vars) == list)
epmap = endpoints.split(",") epmap = endpoints.split(",")
...@@ -242,6 +234,9 @@ def Recv(endpoints, get_vars): ...@@ -242,6 +234,9 @@ def Recv(endpoints, get_vars):
outputs={"Out": get_vars}, outputs={"Out": get_vars},
attrs={"endpoints": endpoints, attrs={"endpoints": endpoints,
"epmap": epmap}) "epmap": epmap})
if sync:
helper.append_op(type="fetch_barrier", attrs={"endpoints": endpoints})
return get_vars
def monkey_patch_reader_methods(reader): def monkey_patch_reader_methods(reader):
...@@ -292,6 +287,7 @@ def _copy_reader_create_op_(block, op): ...@@ -292,6 +287,7 @@ def _copy_reader_create_op_(block, op):
return new_op return new_op
@templatedoc(op_type='create_recordio_file_reader')
def open_recordio_file(filename, def open_recordio_file(filename,
shapes, shapes,
lod_levels, lod_levels,
...@@ -299,34 +295,30 @@ def open_recordio_file(filename, ...@@ -299,34 +295,30 @@ def open_recordio_file(filename,
pass_num=1, pass_num=1,
for_parallel=True): for_parallel=True):
""" """
Open a RecordIO file ${comment}
This layer takes a RecordIO file to read from and returns a Reader Variable.
Via the Reader Variable, we can get data from the given RecordIO file.
Args: Args:
filename(str): The RecordIO file's name. filename(${filename_type}): ${filename_comment}.
shapes(list): List of tuples which declaring data shapes. shapes(list): List of tuples which declaring data shapes.
lod_levels(list): List of ints which declaring data lod_level. lod_levels(${lod_levels_type}): ${lod_levels_comment}.
dtypes(list): List of strs which declaring data type. dtypes(list): List of strs which declaring data type.
pass_num(int): Number of passes to run. pass_num(int): Number of passes to run.
for_parallel(Bool): Set it as True if you are going to run for_parallel(Bool): Set it as True if you are going to run
subsequent operators in parallel. subsequent operators in parallel.
Returns: Returns:
Variable: A Reader Variable via which we can get RecordIO file data. ${out_comment}.
Examples: Examples:
.. code-block:: python
reader = fluid.layers.io.open_recordio_file( >>> import paddle.fluid as fluid
filename='./data.recordio', >>> reader = fluid.layers.io.open_recordio_file(
shapes=[(3,224,224), (1)], >>> filename='./data.recordio',
lod_levels=[0, 0], >>> shapes=[(3,224,224), (1)],
dtypes=['float32', 'int64']) >>> lod_levels=[0, 0],
>>> dtypes=['float32', 'int64'])
# Via the reader, we can use 'read_file' layer to get data: >>> # Via the reader, we can use 'read_file' layer to get data:
image, label = fluid.layers.io.read_file(reader) >>> image, label = fluid.layers.io.read_file(reader)
""" """
dtypes = [convert_np_dtype_to_dtype_(dt) for dt in dtypes] dtypes = [convert_np_dtype_to_dtype_(dt) for dt in dtypes]
shape_concat = [] shape_concat = []
...@@ -386,16 +378,16 @@ def random_data_generator(low, high, shapes, lod_levels, for_parallel=True): ...@@ -386,16 +378,16 @@ def random_data_generator(low, high, shapes, lod_levels, for_parallel=True):
Variable: A Reader Variable from which we can get random data. Variable: A Reader Variable from which we can get random data.
Examples: Examples:
.. code-block:: python .. code-block:: python
reader = fluid.layers.io.random_data_generator( reader = fluid.layers.random_data_generator(
low=0.0, low=0.0,
high=1.0, high=1.0,
shapes=[(3,224,224), (1)], shapes=[[3,224,224], [1]],
lod_levels=[0, 0]) lod_levels=[0, 0])
# Via the reader, we can use 'read_file' layer to get data: # Via the reader, we can use 'read_file' layer to get data:
image, label = fluid.layers.io.read_file(reader) image, label = fluid.layers.read_file(reader)
""" """
dtypes = [core.VarDesc.VarType.FP32] * len(shapes) dtypes = [core.VarDesc.VarType.FP32] * len(shapes)
shape_concat = [] shape_concat = []
...@@ -544,16 +536,77 @@ def __create_unshared_decorated_reader__(op_type, reader, attrs, name=None): ...@@ -544,16 +536,77 @@ def __create_unshared_decorated_reader__(op_type, reader, attrs, name=None):
def shuffle(reader, buffer_size): def shuffle(reader, buffer_size):
"""
Shuffle the reader.
"""
return __create_unshared_decorated_reader__( return __create_unshared_decorated_reader__(
'create_shuffle_reader', reader, {'buffer_size': int(buffer_size)}) 'create_shuffle_reader', reader, {'buffer_size': int(buffer_size)})
def batch(reader, batch_size): def batch(reader, batch_size):
"""
This layer is a reader decorator. It takes a reader and adds
'batching' decoration on it. When reading with the result
decorated reader, output data will be automatically organized
to the form of batches.
Args:
reader(Variable): The reader to be decorated with 'batching'.
batch_size(int): The batch size.
Returns:
Variable: The reader which has been decorated with 'batching'.
Examples:
.. code-block:: python
raw_reader = fluid.layers.io.open_files(filenames=['./data1.recordio',
'./data2.recordio'],
shapes=[(3,224,224), (1)],
lod_levels=[0, 0],
dtypes=['float32', 'int64'],
thread_num=2,
buffer_size=2)
batch_reader = fluid.layers.batch(reader=raw_reader, batch_size=5)
# If we read data with the raw_reader:
# data = fluid.layers.read_file(raw_reader)
# We can only get data instance by instance.
#
# However, if we read data with the batch_reader:
# data = fluid.layers.read_file(batch_reader)
# Each 5 adjacent instances will be automatically combined together
# to become a batch. So what we get('data') is a batch data instead
# of an instance.
"""
return __create_unshared_decorated_reader__( return __create_unshared_decorated_reader__(
'create_batch_reader', reader, {'batch_size': int(batch_size)}) 'create_batch_reader', reader, {'batch_size': int(batch_size)})
def double_buffer(reader, place=None, name=None): def double_buffer(reader, place=None, name=None):
"""
Wrap a double buffer reader. The data will copy to target place with a
double buffer queue. If the target place is None, the place that executor
perform on will be used.
Args:
reader(Variable): the reader variable need to be wrapped.
place(Place): the place of target data. Default is the sample place of
executor perform.
name(str): Variable name. None if the user does not care.
Returns:
wrapped reader with double buffer.
Examples:
>>> reader = fluid.layers.open_files(filenames=['somefile'],
>>> shapes=[[-1, 784], [-1, 1]],
>>> dtypes=['float32', 'int64'])
>>> reader = fluid.layers.double_buffer(reader)
>>> img, label = fluid.layers.read_file(reader)
"""
attrs = dict() attrs = dict()
if place is not None: if place is not None:
attrs['place'] = str(place).upper() attrs['place'] = str(place).upper()
...@@ -571,15 +624,41 @@ def parallel(reader): ...@@ -571,15 +624,41 @@ def parallel(reader):
{}) {})
def read_file(file_obj): def read_file(reader):
"""
Execute the given reader and get data via it.
A reader is also a Variable. It can be a raw reader generated by
`fluid.layers.open_files()` or a decorated one generated by
`fluid.layers.double_buffer()` and so on.
Args:
reader(Variable): The reader to execute.
Returns:
Tuple[Variable]: Data read via the given reader.
Examples:
.. code-block:: python
data_file = fluid.layers.open_files(
filenames=['mnist.recordio'],
shapes=[(-1, 748), (-1, 1)],
lod_levels=[0, 0],
dtypes=["float32", "int64"])
data_file = fluid.layers.double_buffer(
fluid.layers.batch(data_file, batch_size=64))
input, label = fluid.layers.read_file(data_file)
"""
helper = LayerHelper('read_file') helper = LayerHelper('read_file')
out = [ out = [
helper.create_tmp_variable( helper.create_tmp_variable(
stop_gradient=True, dtype='float32') stop_gradient=True, dtype='float32')
for _ in range(len(file_obj.desc.shapes())) for _ in range(len(reader.desc.shapes()))
] ]
helper.append_op( helper.append_op(
type='read', inputs={'Reader': [file_obj]}, outputs={'Out': out}) type='read', inputs={'Reader': [reader]}, outputs={'Out': out})
if len(out) == 1: if len(out) == 1:
return out[0] return out[0]
else: else:
...@@ -587,6 +666,26 @@ def read_file(file_obj): ...@@ -587,6 +666,26 @@ def read_file(file_obj):
class Preprocessor(object): class Preprocessor(object):
"""
A block for data pre-processing in reader.
Args:
reader (Variable): A reader variable.
name (str, default None): The name of the reader.
Examples:
.. code-block:: python
preprocessor = fluid.layers.io.Preprocessor(reader=reader)
with preprocessor.block():
img, lbl = preprocessor.inputs()
img_out = img / 2
lbl_out = lbl + 1
preprocessor.outputs(img_out, lbl_out)
data_file = fluid.layers.io.double_buffer(preprocessor())
"""
BEFORE_SUB_BLOCK = 0 BEFORE_SUB_BLOCK = 0
IN_SUB_BLOCK = 1 IN_SUB_BLOCK = 1
AFTER_SUB_BLOCK = 2 AFTER_SUB_BLOCK = 2
......
...@@ -44,6 +44,11 @@ def _type_to_str_(tp): ...@@ -44,6 +44,11 @@ def _type_to_str_(tp):
return framework_pb2.AttrType.Name(tp) return framework_pb2.AttrType.Name(tp)
_two_dollar_pattern_ = re.compile(r"\$\$([^\$]+)\$\$")
_single_dollar_pattern_ = re.compile(r"\$([^\$]+)\$")
_two_bang_pattern_ = re.compile(r"!!([^!]+)!!")
def _generate_doc_string_(op_proto): def _generate_doc_string_(op_proto):
""" """
Generate docstring by OpProto Generate docstring by OpProto
...@@ -55,22 +60,26 @@ def _generate_doc_string_(op_proto): ...@@ -55,22 +60,26 @@ def _generate_doc_string_(op_proto):
str: the document string str: the document string
""" """
def escape_math(text):
return _two_bang_pattern_.sub(
r'$$\1$$',
_single_dollar_pattern_.sub(
r':math:`\1`', _two_dollar_pattern_.sub(r"!!\1!!", text)))
if not isinstance(op_proto, framework_pb2.OpProto): if not isinstance(op_proto, framework_pb2.OpProto):
raise TypeError("OpProto should be `framework_pb2.OpProto`") raise TypeError("OpProto should be `framework_pb2.OpProto`")
buf = cStringIO.StringIO() buf = cStringIO.StringIO()
buf.write(op_proto.comment) buf.write(escape_math(op_proto.comment))
buf.write('\nArgs:\n') buf.write('\nArgs:\n')
for each_input in op_proto.inputs: for each_input in op_proto.inputs:
line_begin = ' {0}: '.format(_convert_(each_input.name)) line_begin = ' {0}: '.format(_convert_(each_input.name))
buf.write(line_begin) buf.write(line_begin)
buf.write(each_input.comment) buf.write(escape_math(each_input.comment))
buf.write('\n') if each_input.duplicable:
buf.write(' ' * len(line_begin)) buf.write(" Duplicatable.")
buf.write('Duplicable: ') if each_input.dispensable:
buf.write(str(each_input.duplicable)) buf.write(" Optional.")
buf.write(' Optional: ')
buf.write(str(each_input.dispensable))
buf.write('\n') buf.write('\n')
skip_attrs = OpProtoHolder.generated_op_attr_names() skip_attrs = OpProtoHolder.generated_op_attr_names()
...@@ -83,7 +92,7 @@ def _generate_doc_string_(op_proto): ...@@ -83,7 +92,7 @@ def _generate_doc_string_(op_proto):
buf.write(' (') buf.write(' (')
buf.write(_type_to_str_(each_attr.type)) buf.write(_type_to_str_(each_attr.type))
buf.write('): ') buf.write('): ')
buf.write(each_attr.comment) buf.write(escape_math(each_attr.comment))
buf.write('\n') buf.write('\n')
if len(op_proto.outputs) != 0: if len(op_proto.outputs) != 0:
...@@ -92,7 +101,7 @@ def _generate_doc_string_(op_proto): ...@@ -92,7 +101,7 @@ def _generate_doc_string_(op_proto):
for each_opt in op_proto.outputs: for each_opt in op_proto.outputs:
if not each_opt.intermediate: if not each_opt.intermediate:
break break
buf.write(each_opt.comment) buf.write(escape_math(each_opt.comment))
return buf.getvalue() return buf.getvalue()
......
...@@ -25,10 +25,11 @@ import nn ...@@ -25,10 +25,11 @@ import nn
import ops import ops
import tensor import tensor
from ..initializer import init_on_cpu from ..initializer import init_on_cpu
from ..framework import default_main_program, Parameter
__all__ = [ __all__ = [
'exponential_decay', 'natural_exp_decay', 'inverse_time_decay', 'exponential_decay', 'natural_exp_decay', 'inverse_time_decay',
'polynomial_decay', 'piecewise_decay', 'noam_decay' 'polynomial_decay', 'piecewise_decay', 'noam_decay', 'append_LARS'
] ]
...@@ -70,21 +71,40 @@ def noam_decay(d_model, warmup_steps): ...@@ -70,21 +71,40 @@ def noam_decay(d_model, warmup_steps):
def exponential_decay(learning_rate, decay_steps, decay_rate, staircase=False): def exponential_decay(learning_rate, decay_steps, decay_rate, staircase=False):
"""Applies exponential decay to the learning rate. """
Applies exponential decay to the learning rate.
When training a model, it is often recommended to lower the learning rate as the
training progresses. By using this function, the learning rate will be decayed by
'decay_rate' every 'decay_steps' steps.
>>> if staircase == True:
>>> decayed_learning_rate = learning_rate * decay_rate ^ floor(global_step / decay_steps)
>>> else:
>>> decayed_learning_rate = learning_rate * decay_rate ^ (global_step / decay_steps)
```python
decayed_learning_rate = learning_rate *
decay_rate ^ (global_step / decay_steps)
```
Args: Args:
learning_rate: A scalar float32 value or a Variable. This learning_rate(Variable|float): The initial learning rate.
will be the initial learning rate during training decay_steps(int): See the decay computation above.
decay_steps: A Python `int32` number. decay_rate(float): The decay rate. See the decay computation above.
decay_rate: A Python `float` number. staircase(Boolean): If True, decay the learning rate at discrete intervals.
staircase: Boolean. If set true, decay the learning rate every decay_steps. Default: False
Returns: Returns:
The decayed learning rate Variable: The decayed learning rate
Examples:
.. code-block:: python
base_lr = 0.1
sgd_optimizer = fluid.optimizer.SGD(
learning_rate=fluid.layers.exponential_decay(
learning_rate=base_lr,
decay_steps=10000,
decay_rate=0.5,
staircase=True))
sgd_optimizer.minimize(avg_cost)
""" """
global_step = _decay_step_counter() global_step = _decay_step_counter()
...@@ -128,22 +148,39 @@ def natural_exp_decay(learning_rate, decay_steps, decay_rate, staircase=False): ...@@ -128,22 +148,39 @@ def natural_exp_decay(learning_rate, decay_steps, decay_rate, staircase=False):
def inverse_time_decay(learning_rate, decay_steps, decay_rate, staircase=False): def inverse_time_decay(learning_rate, decay_steps, decay_rate, staircase=False):
"""Applies inverse time decay to the initial learning rate. """
Applies inverse time decay to the initial learning rate.
When training a model, it is often recommended to lower the learning rate as the
training progresses. By using this function, an inverse decay function will be
applied to the initial learning rate.
>>> if staircase: >>> if staircase == True:
>>> decayed_learning_rate = learning_rate / (1 + decay_rate * floor(global_step / decay_step)) >>> decayed_learning_rate = learning_rate / (1 + decay_rate * floor(global_step / decay_step))
>>> else: >>> else:
>>> decayed_learning_rate = learning_rate / (1 + decay_rate * global_step / decay_step) >>> decayed_learning_rate = learning_rate / (1 + decay_rate * global_step / decay_step)
Args: Args:
learning_rate: A scalar float32 value or a Variable. This learning_rate(Variable|float): The initial learning rate.
will be the initial learning rate during training. decay_steps(int): See the decay computation above.
decay_steps: A Python `int32` number. decay_rate(float): The decay rate. See the decay computation above.
decay_rate: A Python `float` number. staircase(Boolean): If True, decay the learning rate at discrete intervals.
staircase: Boolean. If set true, decay the learning rate every decay_steps. Default: False
Returns: Returns:
The decayed learning rate Variable: The decayed learning rate
Examples:
.. code-block:: python
base_lr = 0.1
sgd_optimizer = fluid.optimizer.SGD(
learning_rate=fluid.layers.inverse_time_decay(
learning_rate=base_lr,
decay_steps=10000,
decay_rate=0.5,
staircase=True))
sgd_optimizer.minimize(avg_cost)
""" """
global_step = _decay_step_counter() global_step = _decay_step_counter()
...@@ -209,15 +246,27 @@ def polynomial_decay(learning_rate, ...@@ -209,15 +246,27 @@ def polynomial_decay(learning_rate,
def piecewise_decay(boundaries, values): def piecewise_decay(boundaries, values):
"""Applies piecewise decay to the initial learning rate. """Applies piecewise decay to the initial learning rate.
>>> boundaries = [10000, 20000] The algorithm can be described as the code below.
>>> values = [1.0, 0.5, 0.1]
>>> .. code-block:: python
>>> if step < 10000:
>>> learning_rate = 1.0 boundaries = [10000, 20000]
>>> elif 10000 <= step < 20000: values = [1.0, 0.5, 0.1]
>>> learning_rate = 0.5 if step < 10000:
>>> else: learning_rate = 1.0
>>> learning_rate = 0.1 elif 10000 <= step < 20000:
learning_rate = 0.5
else:
learning_rate = 0.1
Args:
boundaries: A list of steps numbers.
values: A list of learning rate values that will be picked during
different step boundaries.
Returns:
The decayed learning rate.
""" """
if len(values) - len(boundaries) != 1: if len(values) - len(boundaries) != 1:
...@@ -249,3 +298,41 @@ def piecewise_decay(boundaries, values): ...@@ -249,3 +298,41 @@ def piecewise_decay(boundaries, values):
tensor.assign(last_value_var, lr) tensor.assign(last_value_var, lr)
return lr return lr
def append_LARS(params_grads, learning_rate, weight_decay):
"""Applies LARS (LAYER-WISE ADAPTIVE RATE SCALING) to learning rate for
each layer.
```python
learning_rate *= local_gw_ratio * sqrt(sumsq(param))
/ (sqrt(sumsq(gradient))+ weight_decay * sqrt(sumsq(param)))
```
Args:
learning_rate: A learning rate Variable. This
is the global learning rate for LARS.
weight_decay: A Python `float` number.
Returns:
The decayed learning rate
"""
def _balanced_weight(param_norm, grad_norm):
if weight_decay == 1.0:
return grad_norm + param_norm
else:
return grad_norm + weight_decay * param_norm
for param, grad in params_grads:
param_lr = param.optimize_attr['learning_rate']
param_norm = ops.sqrt(nn.reduce_sum(input=ops.square(param)))
grad_norm = ops.sqrt(nn.reduce_sum(input=ops.square(grad)))
if type(param_lr) == float and param_lr == 1.0:
decayed_lr = learning_rate * param_norm \
/ _balanced_weight(param_norm, grad_norm)
else:
decayed_lr = learning_rate * param_lr * param_norm \
/ _balanced_weight(param_norm, grad_norm)
# set back param local learning rate
param.optimize_attr['learning_rate'] = decayed_lr
...@@ -53,6 +53,43 @@ def accuracy(input, label, k=1, correct=None, total=None): ...@@ -53,6 +53,43 @@ def accuracy(input, label, k=1, correct=None, total=None):
def auc(input, label, curve='ROC', num_thresholds=200): def auc(input, label, curve='ROC', num_thresholds=200):
"""
**Area Under the Curve (AUC) Layer**
This implementation computes the AUC according to forward output and label.
It is used very widely in binary classification evaluation.
Note: If input label contains values other than 0 and 1, it will be cast
to `bool`. Find the relevant definitions `here <https://en.wikipedia.org\
/wiki/Receiver_operating_characteristic#Area_under_the_curve>`_.
There are two types of possible curves:
1. ROC: Receiver operating characteristic;
2. PR: Precision Recall
Args:
input(Variable): A floating-point 2D Variable, values are in the range
[0, 1]. Each row is sorted in descending order. This
input should be the output of topk. Typically, this
Variable indicates the probability of each label.
label(Variable): A 2D int Variable indicating the label of the training
data. The height is batch size and width is always 1.
curve(str): Curve type, can be 'ROC' or 'PR'. Default 'ROC'.
num_thresholds(int): The number of thresholds to use when discretizing
the roc curve. Default 200.
Returns:
Variable: A scalar representing the current AUC.
Examples:
.. code-block:: python
# network is a binary classification model and label the ground truth
prediction = network(image, is_infer=True)
auc_out=fluid.layers.auc(input=prediction, label=label)
"""
warnings.warn( warnings.warn(
"This interface not recommended, fluid.layers.auc compute the auc at every minibatch, \ "This interface not recommended, fluid.layers.auc compute the auc at every minibatch, \
but can not aggregate them and get the pass AUC, because pass \ but can not aggregate them and get the pass AUC, because pass \
......
此差异已折叠。
...@@ -17,7 +17,6 @@ __activations__ = [ ...@@ -17,7 +17,6 @@ __activations__ = [
'sigmoid', 'sigmoid',
'logsigmoid', 'logsigmoid',
'exp', 'exp',
'relu',
'tanh', 'tanh',
'tanh_shrink', 'tanh_shrink',
'softshrink', 'softshrink',
...@@ -29,7 +28,6 @@ __activations__ = [ ...@@ -29,7 +28,6 @@ __activations__ = [
'sin', 'sin',
'round', 'round',
'reciprocal', 'reciprocal',
'log',
'square', 'square',
'softplus', 'softplus',
'softsign', 'softsign',
...@@ -40,8 +38,6 @@ __activations__ = [ ...@@ -40,8 +38,6 @@ __activations__ = [
'relu6', 'relu6',
'pow', 'pow',
'stanh', 'stanh',
'hard_shrink',
'thresholded_relu',
'hard_sigmoid', 'hard_sigmoid',
'swish', 'swish',
] ]
...@@ -64,11 +60,9 @@ __all__ = [ ...@@ -64,11 +60,9 @@ __all__ = [
'logical_or', 'logical_or',
'logical_xor', 'logical_xor',
'logical_not', 'logical_not',
'uniform_random',
'uniform_random_batch_size_like', 'uniform_random_batch_size_like',
'gaussian_random', 'gaussian_random',
'gaussian_random_batch_size_like', 'gaussian_random_batch_size_like',
'cumsum',
'scatter', 'scatter',
'sum', 'sum',
'slice', 'slice',
...@@ -79,3 +73,88 @@ __all__ = [ ...@@ -79,3 +73,88 @@ __all__ = [
for _OP in set(__all__): for _OP in set(__all__):
globals()[_OP] = generate_layer_fn(_OP) globals()[_OP] = generate_layer_fn(_OP)
__all__ += ["uniform_random"]
_uniform_random_ = generate_layer_fn('uniform_random')
def uniform_random(shape, dtype=None, min=None, max=None, seed=None):
kwargs = dict()
for name in locals():
val = locals()[name]
if val is not None:
kwargs[name] = val
return _uniform_random_(**kwargs)
uniform_random.__doc__ = _uniform_random_.__doc__ + """
Examples:
>>> result = fluid.layers.uniform_random(shape=[32, 784])
"""
__all__ += ['hard_shrink']
_hard_shrink_ = generate_layer_fn('hard_shrink')
def hard_shrink(x, threshold=None):
kwargs = dict()
for name in locals():
val = locals()[name]
if val is not None:
kwargs[name] = val
return _hard_shrink_(**kwargs)
hard_shrink.__doc__ = _hard_shrink_.__doc__ + """
Examples:
>>> data = fluid.layers.data(name="input", shape=[784])
>>> result = fluid.layers.hard_shrink(x=data, threshold=0.3)
"""
__all__ += ['cumsum']
_cum_sum_ = generate_layer_fn('cumsum')
def cumsum(x, axis=None, exclusive=None, reverse=None):
kwargs = dict()
for name in locals():
val = locals()[name]
if val is not None:
kwargs[name] = val
return _cum_sum_(**kwargs)
cumsum.__doc__ = _cum_sum_.__doc__ + """
Examples:
>>> data = fluid.layers.data(name="input", shape=[32, 784])
>>> result = fluid.layers.cumsum(data, axis=0)
"""
__all__ += ['thresholded_relu']
_thresholded_relu_ = generate_layer_fn('thresholded_relu')
def thresholded_relu(x, threshold=None):
kwargs = dict()
for name in locals():
val = locals()[name]
if val is not None:
kwargs[name] = val
_thresholded_relu_(**kwargs)
thresholded_relu.__doc__ = _thresholded_relu_.__doc__ + """
Examples:
>>> data = fluid.layers.data(name="input", shape=[1])
>>> result = fluid.layers.thresholded_relu(data, threshold=0.4)
"""
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册