提交 fda1a788 编写于 作者: Y yuyang18

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into fix_api_reference_docs

......@@ -22,6 +22,7 @@
| jczaja | Jacek Czaja |
| JiayiFeng | Jia-Yi Feng |
| kbinias | Krzysztof Binias |
| kexinzhao | Ke-Xin Zhao |
| kuke | Yi-Bing Liu |
| lcy-seso | Ying Cao |
| lipeng-unisound | Peng Li |
......
......@@ -61,6 +61,7 @@ option(EIGEN_USE_THREADS "Compile with multi-threaded Eigen" OFF)
option(WITH_ARM_FP16 "Use half precision support on armv8.2-a cpu" OFF)
option(WITH_FAST_BUNDLE_TEST "Bundle tests that can be run in a single process together to reduce launch overhead" OFF)
option(WITH_CONTRIB "Compile the third-party contributation" OFF)
option(WITH_ANAKIN "Compile with Anakin library" OFF)
option(WITH_GRPC "Use grpc as the default rpc framework" ${WITH_DISTRIBUTE})
# CMAKE_BUILD_TYPE
......@@ -193,7 +194,10 @@ set(EXTERNAL_LIBS
if(WITH_GPU)
include(cuda)
include(tensorrt)
endif(WITH_GPU)
include(external/anakin)
else()
set(WITH_ANAKIN OFF CACHE STRING "Anakin is valid only when GPU is set." FORCE)
endif()
if(WITH_AMD_GPU)
find_package(HIP)
......
......@@ -173,21 +173,6 @@ def seq_to_seq_net(embedding_dim, encoder_size, decoder_size, source_dict_dim,
return avg_cost, feeding_list
def to_lodtensor(data, place):
seq_lens = [len(seq) for seq in data]
cur_len = 0
lod = [cur_len]
for l in seq_lens:
cur_len += l
lod.append(cur_len)
flattened_data = np.concatenate(data, axis=0).astype("int64")
flattened_data = flattened_data.reshape([len(flattened_data), 1])
lod_t = core.LoDTensor()
lod_t.set(flattened_data, place)
lod_t.set_lod([lod])
return lod_t, lod[-1]
def lodtensor_to_ndarray(lod_tensor):
dims = lod_tensor.get_dims()
ndarray = np.zeros(shape=dims).astype('float32')
......
......@@ -125,18 +125,3 @@ def get_model(args):
batch_size=args.batch_size)
return loss, inference_program, adam, train_reader, test_reader, batch_acc
def to_lodtensor(data, place):
seq_lens = [len(seq) for seq in data]
cur_len = 0
lod = [cur_len]
for l in seq_lens:
cur_len += l
lod.append(cur_len)
flattened_data = numpy.concatenate(data, axis=0).astype("int64")
flattened_data = flattened_data.reshape([len(flattened_data), 1])
res = fluid.LoDTensor()
res.set(flattened_data, place)
res.set_lod([lod])
return res
if (NOT WITH_ANAKIN)
return()
endif()
set(ANAKIN_INSTALL_DIR "${THIRD_PARTY_PATH}/install/anakin" CACHE PATH
"Anakin install path." FORCE)
set(ANAKIN_INCLUDE "${ANAKIN_INSTALL_DIR}" CACHE STRING "root of Anakin header files")
set(ANAKIN_LIBRARY "${ANAKIN_INSTALL_DIR}" CACHE STRING "path of Anakin library")
set(ANAKIN_COMPILE_EXTRA_FLAGS -Wno-error=unused-variable -Wno-error=format-extra-args -Wno-error=comment -Wno-error=format -Wno-error=switch -Wno-error=return-type -Wno-error=non-virtual-dtor -Wno-reorder -Wno-error=cpp)
set(ANAKIN_LIBRARY_URL "https://github.com/pangge/Anakin/releases/download/3.0/anakin_release_simple.tar.gz")
# A helper function used in Anakin, currently, to use it, one need to recursively include
# nearly all the header files.
function(fetch_include_recursively root_dir)
if (IS_DIRECTORY ${root_dir})
include_directories(${root_dir})
endif()
file(GLOB ALL_SUB RELATIVE ${root_dir} ${root_dir}/*)
foreach(sub ${ALL_SUB})
if (IS_DIRECTORY ${root_dir}/${sub})
fetch_include_recursively(${root_dir}/${sub})
endif()
endforeach()
endfunction()
# download library
message(STATUS "Download Anakin library from ${ANAKIN_LIBRARY_URL}")
execute_process(COMMAND bash -c "mkdir -p ${ANAKIN_INSTALL_DIR}")
execute_process(COMMAND bash -c "rm -rf ${ANAKIN_INSTALL_DIR}/*")
execute_process(COMMAND bash -c "cd ${ANAKIN_INSTALL_DIR}; wget -q ${ANAKIN_LIBRARY_URL}")
execute_process(COMMAND bash -c "mkdir -p ${ANAKIN_INSTALL_DIR}")
execute_process(COMMAND bash -c "cd ${ANAKIN_INSTALL_DIR}; tar xzf anakin_release_simple.tar.gz")
if (WITH_ANAKIN)
message(STATUS "Anakin for inference is enabled")
message(STATUS "Anakin is set INCLUDE:${ANAKIN_INCLUDE} LIBRARY:${ANAKIN_LIBRARY}")
fetch_include_recursively(${ANAKIN_INCLUDE})
link_directories(${ANAKIN_LIBRARY})
endif()
......@@ -29,6 +29,8 @@ IF(NOT ${CBLAS_FOUND})
"${CBLAS_INSTALL_DIR}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}openblas${CMAKE_STATIC_LIBRARY_SUFFIX}"
CACHE FILEPATH "openblas library." FORCE)
ADD_DEFINITIONS(-DPADDLE_USE_OPENBLAS)
SET(OPENBLAS_CC "${CMAKE_C_COMPILER} -Wno-unused-but-set-variable -Wno-unused-variable")
SET(OPENBLAS_COMMIT "v0.2.20")
......
......@@ -39,7 +39,7 @@ function(copy TARGET)
message(FATAL_ERROR "${TARGET} source numbers are not equal to destination numbers")
endif()
math(EXPR len "${copy_lib_SRCS_len} - 1")
add_custom_target(${TARGET} DEPENDS ${copy_lib_DEPS})
foreach(index RANGE ${len})
list(GET copy_lib_SRCS ${index} src)
......@@ -155,6 +155,15 @@ copy(inference_lib DEPS paddle_fluid_shared paddle_fluid
DSTS ${dst_dir}/${module} ${dst_dir}/${module}
)
if(WITH_CONTRIB)
set(contrib_dst_dir "${FLUID_INSTALL_DIR}/contrib/inference")
copy(contrib_inference_lib DEPS paddle_inference_api
SRCS ${PADDLE_SOURCE_DIR}/paddle/contrib/inference/paddle_inference_api.h
${PADDLE_BINARY_DIR}/paddle/contrib/inference/libpaddle_inference_api.*
DSTS ${contrib_dst_dir} ${contrib_dst_dir}
)
endif()
set(module "platform")
copy(platform_lib DEPS profiler_py_proto
SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/dynload/*.h ${src_dir}/${module}/details/*.h
......
#!/bin/bash
python gen_doc.py layers --submodules control_flow device io nn ops tensor detection learning_rate_scheduler > layers.rst
python gen_doc.py layers --submodules control_flow device io nn ops tensor detection learning_rate_scheduler metric > layers.rst
for module in data_feeder clip metrics executor initializer io nets optimizer param_attr profiler regularizer
do
......
......@@ -33,6 +33,13 @@ Xavier
:members:
:noindex:
Bilinear
--------
.. autoclass:: paddle.fluid.initializer.Bilinear
:members:
:noindex:
force_init_on_cpu
-----------------
......@@ -73,3 +80,10 @@ XavierInitializer
:members:
:noindex:
BilinearInitializer
-------------------
.. autoclass:: paddle.fluid.initializer.BilinearInitializer
:members:
:noindex:
......@@ -59,3 +59,39 @@ get_inference_program
.. autofunction:: paddle.fluid.io.get_inference_program
:noindex:
save_checkpoint
---------------
.. autofunction:: paddle.fluid.io.save_checkpoint
:noindex:
load_checkpoint
---------------
.. autofunction:: paddle.fluid.io.load_checkpoint
:noindex:
clean_checkpoint
----------------
.. autofunction:: paddle.fluid.io.clean_checkpoint
:noindex:
load_persist_vars_without_grad
------------------------------
.. autofunction:: paddle.fluid.io.load_persist_vars_without_grad
:noindex:
save_persist_vars_without_grad
------------------------------
.. autofunction:: paddle.fluid.io.save_persist_vars_without_grad
:noindex:
get_latest_checkpoint_serial
----------------------------
.. autofunction:: paddle.fluid.io.get_latest_checkpoint_serial
:noindex:
......@@ -181,6 +181,12 @@ Print
.. autofunction:: paddle.fluid.layers.Print
:noindex:
is_empty
--------
.. autofunction:: paddle.fluid.layers.is_empty
:noindex:
device
======
......@@ -219,6 +225,12 @@ Send
.. autofunction:: paddle.fluid.layers.Send
:noindex:
Recv
----
.. autofunction:: paddle.fluid.layers.Recv
:noindex:
open_recordio_file
------------------
......@@ -255,6 +267,25 @@ double_buffer
.. autofunction:: paddle.fluid.layers.double_buffer
:noindex:
random_data_generator
---------------------
.. autofunction:: paddle.fluid.layers.random_data_generator
:noindex:
Preprocessor
------------
.. autoclass:: paddle.fluid.layers.Preprocessor
:members:
:noindex:
load
----
.. autofunction:: paddle.fluid.layers.load
:noindex:
nn
==
......@@ -342,6 +373,12 @@ conv2d
.. autofunction:: paddle.fluid.layers.conv2d
:noindex:
conv3d
------
.. autofunction:: paddle.fluid.layers.conv3d
:noindex:
sequence_pool
-------------
......@@ -366,6 +403,12 @@ pool2d
.. autofunction:: paddle.fluid.layers.pool2d
:noindex:
pool3d
------
.. autofunction:: paddle.fluid.layers.pool3d
:noindex:
batch_norm
----------
......@@ -384,6 +427,12 @@ conv2d_transpose
.. autofunction:: paddle.fluid.layers.conv2d_transpose
:noindex:
conv3d_transpose
----------------
.. autofunction:: paddle.fluid.layers.conv3d_transpose
:noindex:
sequence_expand
---------------
......@@ -594,6 +643,48 @@ roi_pool
.. autofunction:: paddle.fluid.layers.roi_pool
:noindex:
dice_loss
---------
.. autofunction:: paddle.fluid.layers.dice_loss
:noindex:
image_resize
------------
.. autofunction:: paddle.fluid.layers.image_resize
:noindex:
image_resize_short
------------------
.. autofunction:: paddle.fluid.layers.image_resize_short
:noindex:
resize_bilinear
---------------
.. autofunction:: paddle.fluid.layers.resize_bilinear
:noindex:
gather
------
.. autofunction:: paddle.fluid.layers.gather
:noindex:
random_crop
-----------
.. autofunction:: paddle.fluid.layers.random_crop
:noindex:
mean_iou
--------
.. autofunction:: paddle.fluid.layers.mean_iou
:noindex:
ops
===
......@@ -699,12 +790,6 @@ logical_not
.. autofunction:: paddle.fluid.layers.logical_not
:noindex:
uniform_random
--------------
.. autofunction:: paddle.fluid.layers.uniform_random
:noindex:
uniform_random_batch_size_like
------------------------------
......@@ -723,12 +808,6 @@ gaussian_random_batch_size_like
.. autofunction:: paddle.fluid.layers.gaussian_random_batch_size_like
:noindex:
cumsum
------
.. autofunction:: paddle.fluid.layers.cumsum
:noindex:
scatter
-------
......@@ -741,10 +820,28 @@ sum
.. autofunction:: paddle.fluid.layers.sum
:noindex:
iou_similarity
slice
-----
.. autofunction:: paddle.fluid.layers.iou_similarity
.. autofunction:: paddle.fluid.layers.slice
:noindex:
polygon_box_transform
---------------------
.. autofunction:: paddle.fluid.layers.polygon_box_transform
:noindex:
shape
-----
.. autofunction:: paddle.fluid.layers.shape
:noindex:
maxout
------
.. autofunction:: paddle.fluid.layers.maxout
:noindex:
sigmoid
......@@ -903,18 +1000,6 @@ stanh
.. autofunction:: paddle.fluid.layers.stanh
:noindex:
hard_shrink
-----------
.. autofunction:: paddle.fluid.layers.hard_shrink
:noindex:
thresholded_relu
----------------
.. autofunction:: paddle.fluid.layers.thresholded_relu
:noindex:
hard_sigmoid
------------
......@@ -927,6 +1012,30 @@ swish
.. autofunction:: paddle.fluid.layers.swish
:noindex:
uniform_random
--------------
.. autofunction:: paddle.fluid.layers.uniform_random
:noindex:
hard_shrink
-----------
.. autofunction:: paddle.fluid.layers.hard_shrink
:noindex:
cumsum
------
.. autofunction:: paddle.fluid.layers.cumsum
:noindex:
thresholded_relu
----------------
.. autofunction:: paddle.fluid.layers.thresholded_relu
:noindex:
tensor
======
......@@ -984,6 +1093,18 @@ fill_constant
.. autofunction:: paddle.fluid.layers.fill_constant
:noindex:
argmin
------
.. autofunction:: paddle.fluid.layers.argmin
:noindex:
argmax
------
.. autofunction:: paddle.fluid.layers.argmax
:noindex:
ones
----
......@@ -999,6 +1120,12 @@ zeros
detection
=========
prior_box
---------
.. autofunction:: paddle.fluid.layers.prior_box
:noindex:
multi_box_head
--------------
......@@ -1086,3 +1213,18 @@ noam_decay
.. autofunction:: paddle.fluid.layers.noam_decay
:noindex:
metric
======
accuracy
--------
.. autofunction:: paddle.fluid.layers.accuracy
:noindex:
auc
---
.. autofunction:: paddle.fluid.layers.auc
:noindex:
......@@ -89,6 +89,13 @@ DecayedAdagradOptimizer
:members:
:noindex:
RMSPropOptimizer
----------------
.. autoclass:: paddle.fluid.optimizer.RMSPropOptimizer
:members:
:noindex:
Adadelta
--------
......
......@@ -23,3 +23,15 @@ profiler
.. autofunction:: paddle.fluid.profiler.profiler
:noindex:
start_profiler
--------------
.. autofunction:: paddle.fluid.profiler.start_profiler
:noindex:
stop_profiler
-------------
.. autofunction:: paddle.fluid.profiler.stop_profiler
:noindex:
......@@ -171,7 +171,7 @@ Pytorch chooses immediate evaluation. It avoids ever materializing a "forward gr
## What can fluid learn from them?
TBD
Please refer to `paddle/contrib/dynamic/`.
# Appendix
......
......@@ -104,7 +104,7 @@ no changes added to commit (use "git add" and/or "git commit -a")
➜ docker run -it -v $(pwd):/paddle paddle:latest-dev bash -c "cd /paddle/build && ctest"
```
关于构建和测试的更多信息,请参见[这篇文档](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/getstarted/build_and_install/docker_install_cn.rst)
关于构建和测试的更多信息,请参见[使用Docker安装运行](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/v2/build_and_install/docker_install_cn.rst)
## 提交(commit)
......
......@@ -14,3 +14,4 @@
#
add_subdirectory(inference)
add_subdirectory(tape)
......@@ -17,48 +17,9 @@ if(APPLE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=pessimizing-move")
endif(APPLE)
set(ANAKIN_INCLUDE "" CACHE STRING "root of Anakin header files")
set(ANAKIN_LIBRARY "" CACHE STRING "path of Anakin library")
set(inference_deps paddle_inference_api paddle_fluid_api)
# if anakin is set enable anakin api implementation
if(ANAKIN_INCLUDE AND ANAKIN_LIBRARY)
set(ANAKIN_FOUND ON)
else()
set(ANAKIN_FOUND OFF)
endif()
function(fetch_include_recursively root_dir)
if (IS_DIRECTORY ${root_dir})
include_directories(${root_dir})
endif()
file(GLOB ALL_SUB RELATIVE ${root_dir} ${root_dir}/*)
foreach(sub ${ALL_SUB})
if (IS_DIRECTORY ${root_dir}/${sub})
fetch_include_recursively(${root_dir}/${sub})
endif()
endforeach()
endfunction()
if (ANAKIN_FOUND)
# Anakin's code style doesn't follow google c style.
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=unused-variable -Wno-error=format-extra-args -Wno-error=comment -Wno-error=format -Wno-error=switch -Wno-error=return-type -Wno-error=non-virtual-dtor -Wno-reorder -Wno-error=cpp")
message(STATUS "Anakin for inference is enabled")
message(STATUS "Anakin is set INCLUDE:${ANAKIN_INCLUDE} LIBRARY:${ANAKIN_LIBRARY}")
fetch_include_recursively(${ANAKIN_INCLUDE})
link_directories(${ANAKIN_LIBRARY})
nv_library(inference_anakin_api SHARED SRCS paddle_inference_api.cc paddle_inference_api_anakin_engine.cc)
target_link_libraries(inference_anakin_api anakin anakin_saber_common)
list(APPEND inference_deps inference_anakin_api)
endif()
function(inference_api_test TARGET_NAME)
if (WITH_TESTING)
set(options "")
......@@ -79,7 +40,7 @@ function(inference_api_test TARGET_NAME)
endfunction(inference_api_test)
cc_library(paddle_inference_api
SRCS paddle_inference_api.cc paddle_inference_api_impl.cc
SRCS paddle_inference_api.cc paddle_inference_api_impl.cc
DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB})
cc_test(test_paddle_inference_api
......@@ -89,9 +50,17 @@ cc_test(test_paddle_inference_api
inference_api_test(test_paddle_inference_api_impl
ARGS test_word2vec test_image_classification)
if (ANAKIN_FOUND)
if (WITH_ANAKIN AND WITH_TESTING) # only needed in CI
# Due to Anakin do not have official library releases and the versions of protobuf and cuda do not match Paddle's,
# so anakin library will not be merged to our official inference library. To use anakin prediction API, one need to
# compile the libinference_anakin_api.a and compile with anakin.so.
nv_library(inference_anakin_api SHARED SRCS paddle_inference_api.cc paddle_inference_api_anakin_engine.cc)
target_compile_options(inference_anakin_api BEFORE PUBLIC ${ANAKIN_COMPILE_EXTRA_FLAGS})
target_link_libraries(inference_anakin_api anakin anakin_saber_common)
cc_test(inference_anakin_test SRCS paddle_inference_api_anakin_engine_tester.cc
DEPS ${inference_deps})
ARGS --model=${ANAKIN_INSTALL_DIR}/mobilenet_v2.anakin.bin
DEPS inference_anakin_api)
target_compile_options(inference_anakin_test BEFORE PUBLIC ${ANAKIN_COMPILE_EXTRA_FLAGS})
endif()
if(WITH_TESTING)
......
......@@ -12,9 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include <cuda.h>
#include "paddle/contrib/inference/paddle_inference_api_anakin_engine.h"
#include <cuda.h>
namespace paddle {
......
......@@ -19,10 +19,9 @@ limitations under the License. */
#pragma once
// NOTE This header file do not have namespace.
//#include <test/framework/net/paddle_api.h>
#include "paddle/contrib/inference/paddle_inference_api.h"
// from anakin
#include "framework/core/net/net.h"
#include "saber/saber_types.h"
......
......@@ -12,17 +12,19 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <gtest/gtest.h>
#include "gflags/gflags.h"
#include "paddle/contrib/inference/paddle_inference_api.h"
DEFINE_string(model, "", "Directory of the inference model.");
namespace paddle {
AnakinConfig GetConfig() {
AnakinConfig config;
config.model_file = "./mobilenet_v2.anakin.bin";
config.model_file = FLAGS_model;
config.device = 0;
config.max_batch_size = 1;
return config;
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
if(APPLE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=pessimizing-move")
endif(APPLE)
cc_library(tape_variable SRCS variable.cc DEPS ${FLUID_CORE_MODULES} device_context framework_proto proto_desc operator)
cc_library(tape SRCS tape.cc DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB} tape_variable)
cc_test(test_tape
SRCS test_tape.cc
DEPS tape tape_variable)
# Dynamic Graph on Fluid
PaddlePaddle Fluid is targeting the autodiff without tape, which, however, is very
challenging and we are still way from there. DyNet and PyTorch provide a good design
idea, the *tape*, that significantly eases the challenge. Also, DyNet provides
a C++ API that is as convenient as Python but with higher efficiency and could
conveniently integrate with industrial/production systems. This package, `tape`,
combines the good of
1. tape from PyTorch and DyNet
2. C++ API and core from DyNet
3. rich set of operators from PaddlePaddle
## Overview
We can implement Dynet-like Tape(See this [survey](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/survey/dynamic_graph.md))
by wrapping Paddle Fluid's `Operator` and `Variable`.
The user API is straight forward since
1. it is imperative. And it uses host language's control flow logic.
1. it avoids extra concepts such as `Scope` and `Executor`.
All of these benefits come at the cost of just adding one line `reset_global_tape`
at every iteration.
## Code Structure
In short, the `Tape` contains a vector of `OpHandle`s. And an `OpHandle` contains its
`type`, the pointers to the `Variable`s, and necessary attributes.
```c++
class Variable {
public:
VriableHandle Grad(); // returns its gradient variable
private:
framework::VarDesc desc_; // compile time infershape, necessary for lazy execution
framework::Variable var_; // run time variable, holds data memory
};
using VariableHandle = shared_ptr<Variable>;
struct OpHandle {
string type_;
map<string, vector<VariableHandle>> inputs_;
map<string, vector<VariableHandle>> outputs_;
AttributeMap attrs_;
};
class Tape {
public:
void AddOp(OpHandle); // add op
void Forward(); // execute the tape_
void Backward(); // execute the backward of the tape_
private:
vector<OpHandle> tape_;
};
```
We uses `Function` to indicate layers. It takes care of parameter
initialization and `AddOp` to the Tape when it is called.
```c++
class Linear {
public:
Linear(int in_dim, int out_dim, const std::string &act)
: w_(new Variable("LinearWeight")),
b_(new Variable("LinearBias")),
act_(act) {
Tape init_tape;
std::string initializer = "fill_constant";
framework::AttributeMap attrs;
attrs["dtype"] = paddle::framework::proto::VarType::Type::VarType_Type_FP32;
attrs["shape"] = std::vector<int>{in_dim, out_dim};
attrs["value"] = 1.0f;
init_tape.AddOp(initializer, {}, {{"Out", {w_}}}, attrs);
attrs["dtype"] = paddle::framework::proto::VarType::Type::VarType_Type_FP32;
attrs["shape"] = std::vector<int>{out_dim};
attrs["value"] = 1.0f;
init_tape.AddOp(initializer, {}, {{"Out", {b_}}}, attrs);
init_tape.Forward();
}
VariableHandle operator()(VariableHandle input) {
VariableHandle pre_bias(new Variable("linear"));
get_global_tape().AddOp("mul",
{{"X", {input}}, {"Y", {w_}}},
{{"Out", {pre_bias}}},
{{"x_num_col_dims", 1}, {"y_num_col_dims", 1}});
VariableHandle pre_act(new Variable("linear"));
get_global_tape().AddOp("elementwise_add",
{{"X", {pre_bias}}, {"Y", {b_}}},
{{"Out", {pre_act}}},
{{"axis", 1}});
VariableHandle post_act(new Variable("linear"));
get_global_tape().AddOp(act_,
{{"X", {pre_act}}},
{{"Out", {post_act}}},
{});
return post_act;
}
std::vector<VariableHandle> Params() { return {w_, b_}; }
private:
VariableHandle w_;
VariableHandle b_;
std::string act_;
};
```
## User API
```c++
// Model function
paddle::tape::Linear linear1(3, 3, "relu"); // init weight and bias
paddle::tape::Linear linear2(3, 3, "relu"); // init weight and bias
paddle::tape::Mean mean;
// Optimizer
paddle::tape::SGD sgd(0.001);
// Data Feeder
paddle::tape::Fill data_feeder(...);
VariableHandle input(new paddle::tape::Variable("input"));
VariableHandle label(new paddle::tape::Variable("label"));
for (int i = 0; i < 2; ++i) {
reset_global_tape();
data_feeder(input, label);
auto loss = softmax(linear2(linear1(input)), label); // compile time InferShape & InferVarType
LOG(INFO) << loss.value(); // Run forward up to loss
// Run backward, store gradient of w at w->Grad()
get_global_tape.Backward(loss);
// Update w
sgd(linear1.Params());
sgd(linear2.Params());
}
```
<details>
<summary></summary>
digraph G {
subgraph cluster_0 {
node [shape=record,style=filled];
style=filled;
color=lightgrey;
linear1 [label="{type: mul | {input | {<before_mul1>X: before_mul1 |<weight1> Y: weight1}} | {output |<before_bias1> Out: before_bias1}}"];
elementwise_add1 [label="{type: elementwise_add | {input | {<before_bias1>X: before_bias1 |<bias1> Y: bias1}} | {output |<before_act1> Out: before_act1}}"];
relu1 [label="{type: relu | {input | {<before_act1>X: before_act1 }} | {output |<after_act1> Out: after_act1}}"];
linear1 -> elementwise_add1->relu1;
label = "forward tape";
}
linear1:before_mul1->before_mul1
linear1:weight1->weight1
linear1:before_bias1->before_bias1
elementwise_add1:bias1->bias1
elementwise_add1:before_bias1->before_bias1
elementwise_add1:before_act1->before_act1
relu1:before_act1->before_act1
relu1:after_act1->after_act1
subgraph cluster_1 {
node [shape=record,style=filled];
style=filled;
color=lightgrey;
linear1_grad [label="{type: mul_grad | {input | {<before_mul1>X: before_mul1 |<weight1> Y: weight1|<before_bias1_grad> Out_grad: before_bias1_grad}} | {output |{<before_mul1_grad>X_grad: before_mul1_grad |<weight1_grad> Y_grad: weight1_grad}}}"];
elementwise_add1_grad [label="{type: elementwise_add_grad | {input | <before_act1_grad> Out_grad: before_act1_grad} | {output |{<before_bias1_grad>X_grad: before_bias1_grad |<bias1_grad> Y_grad: bias1_grad}}}"];
relu1_grad [label="{type: relu_grad | {input |<after_act1_grad> Out_grad: after_act1_grad} | {ouput | {<before_act1_grad>X_grad: before_act1_grad }}}"];
linear1_grad -> elementwise_add1_grad ->relu1_grad [dir=back];
label = "backward tape";
}
relu1_grad:after_act1_grad->after_act1_grad
relu1_grad:before_act1_grad->before_act1_grad
elementwise_add1_grad:before_act1_grad->before_act1_grad
elementwise_add1_grad:before_bias1_grad->before_bias1_grad
elementwise_add1_grad:bias1_grad->bias1_grad
linear1_grad:before_mul1->before_mul1
linear1_grad:weight1->weight1
linear1_grad:before_bias1_grad->before_bias1_grad
linear1_grad:before_mul1_grad->before_mul1_grad
linear1_grad:weight1_grad->weight1_grad
subgraph cluster_2 {
node [shape=record];
label = "Linear1";
weight1
bias1
}
weight1 -> weight1_grad [ label="Grad()", style="dashed" ];
bias1 -> bias1_grad [ label="Grad()", style="dashed"];
}
</details>
![Image](https://github.com/tonyyang-svail/Paddle/blob/cpp_tap/paddle/contrib/tape/computation_graph.png)
## Code Reuse
We want to stay close to Paddle Fluid as much as possible.
### Reuse All Operators
As all Ops are registered at `OpInfoMap`, the effort of adding a new `Function`
is about 10 lines of code, similar to expose an operator to Python.
### Reuse Compile Time InferShape and InferVarType
Note that all the symbolic information is stored at `tape::Varaible::desc_`, instead
of `ProgramDesc.block.vars`, we create a temporary `BlockDesc` to do `InferShape` and
`InferVarType` every time we `AddOp` to the tape.
### Reuse Operator::Run
We use smart pointer, instead of `Scope`, to manage memory. So we create a temporary
`Scope` for every `Operator::Run()`.
## Possible Feature
### Release Memory on Backward
We can release memory aggressively. During backward, we can delete the OpHandle once
we have finished its backward. Since all the variable is managed by smart pointer, the
memory is automatically released when its `ref_count` goes to 0.
### Kernel Fusion
As a symbolic representation of the Tape is constructed first before the actual
execution, it would be possible to perform graph optimization. One use case is kernel
fusion.
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include "paddle/contrib/tape/tape.h"
#include "paddle/contrib/tape/variable.h"
#include "paddle/fluid/framework/type_defs.h"
namespace paddle {
namespace tape {
class Function {};
class Fill {
public:
Fill(const std::string &initializer, const framework::AttributeMap &attrs)
: initializer_(initializer), attrs_(attrs) {}
void operator()(VariableHandle var) {
get_global_tape().AddOp(initializer_, {}, {{"Out", {var}}}, attrs_);
}
private:
const std::string initializer_;
const framework::AttributeMap attrs_;
};
class Mean {
public:
VariableHandle operator()(VariableHandle var) {
VariableHandle out(new Variable("mean"));
get_global_tape().AddOp("mean", {{"X", {var}}}, {{"Out", {out}}}, {});
return out;
}
};
class Linear {
public:
Linear(int in_dim, int out_dim, const std::string &act)
: w_(new Variable("LinearWeight")),
b_(new Variable("LinearBias")),
act_(act) {
Tape init_tape;
std::string initializer = "fill_constant";
framework::AttributeMap attrs;
attrs["dtype"] = paddle::framework::proto::VarType::Type::VarType_Type_FP32;
attrs["shape"] = std::vector<int>{in_dim, out_dim};
attrs["value"] = 1.0f;
init_tape.AddOp(initializer, {}, {{"Out", {w_}}}, attrs);
attrs["dtype"] = paddle::framework::proto::VarType::Type::VarType_Type_FP32;
attrs["shape"] = std::vector<int>{out_dim};
attrs["value"] = 1.0f;
init_tape.AddOp(initializer, {}, {{"Out", {b_}}}, attrs);
init_tape.Forward();
}
VariableHandle operator()(VariableHandle input) {
VariableHandle pre_bias(new Variable("linear"));
get_global_tape().AddOp("mul",
{{"X", {input}}, {"Y", {w_}}},
{{"Out", {pre_bias}}},
{{"x_num_col_dims", 1}, {"y_num_col_dims", 1}});
VariableHandle pre_act(new Variable("linear"));
get_global_tape().AddOp("elementwise_add",
{{"X", {pre_bias}}, {"Y", {b_}}},
{{"Out", {pre_act}}},
{{"axis", 1}});
VariableHandle post_act(new Variable("linear"));
get_global_tape().AddOp(
act_, {{"X", {pre_act}}}, {{"Out", {post_act}}}, {});
return post_act;
}
std::vector<VariableHandle> Params() { return {w_, b_}; }
private:
VariableHandle w_;
VariableHandle b_;
std::string act_;
};
class SGD {
public:
SGD(float learning_rate) : learning_rate_(new Variable("sgd")) {
Tape init_tape;
std::string initializer = "fill_constant";
framework::AttributeMap attrs;
attrs["dtype"] = paddle::framework::proto::VarType::Type::VarType_Type_FP32;
attrs["shape"] = std::vector<int>{1};
attrs["value"] = learning_rate;
init_tape.AddOp(initializer, {}, {{"Out", {learning_rate_}}}, attrs);
init_tape.Forward();
}
void operator()(VariableHandle input) {
PADDLE_ENFORCE(get_global_tape().HasBeenBackwarded(),
"optimization must happen after the backward");
Tape temp_tape;
temp_tape.AddOp("sgd",
{{"Param", {input}},
{"LearningRate", {learning_rate_}},
{"Grad", {input->Grad()}}},
{{"ParamOut", {input}}},
{});
temp_tape.Forward();
}
private:
VariableHandle learning_rate_;
};
}
}
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/contrib/tape/tape.h"
#include <list>
#include <map>
#include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/framework/dim.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/pybind/pybind.h"
namespace paddle {
namespace tape {
// borrowed from
// https://stackoverflow.com/questions/874134/find-if-string-ends-with-another-string-in-c
inline bool ends_with(std::string const &value, std::string const &ending) {
if (ending.size() > value.size()) return false;
return std::equal(ending.rbegin(), ending.rend(), value.rbegin());
}
std::ostream &operator<<(std::ostream &os, const framework::VarDesc &var_desc) {
os << var_desc.Name();
os << "[" << var_desc.GetType() << "]";
os << "[" << var_desc.GetDataType() << "]";
os << "{";
for (auto &i : var_desc.GetShape()) {
os << i << ",";
}
os << "}";
return os;
}
std::string to_string(const std::string &type,
const VariableHandleMap &in_vars,
const VariableHandleMap &out_vars,
const framework::AttributeMap &attrs) {
std::stringstream ss;
ss << type << " ";
for (auto &param_name : in_vars) {
for (auto &var : param_name.second) {
ss << param_name.first << ":(" << var->Desc() << ") ";
}
}
for (auto &param_name : out_vars) {
for (auto &var : param_name.second) {
ss << param_name.first << ":(" << var->Desc() << ") ";
}
}
return ss.str();
}
framework::OpDesc CreateOpDesc(const std::string &type,
const VariableHandleMap &in_vars,
const VariableHandleMap &out_vars,
const framework::AttributeMap &attrs) {
framework::VariableNameMap inputs;
for (auto &param_name : in_vars) {
for (auto &var : param_name.second) {
inputs[param_name.first].emplace_back(var->Name());
}
}
framework::VariableNameMap outputs;
for (auto &param_name : out_vars) {
for (auto &var : param_name.second) {
outputs[param_name.first].emplace_back(var->Name());
}
}
return framework::OpDesc(type, inputs, outputs, attrs);
}
void InferShapeAndVarType(const std::string &type,
const VariableHandleMap &in_vars,
VariableHandleMap *out_vars,
const framework::AttributeMap &attrs) {
framework::OpDesc op_desc = CreateOpDesc(type, in_vars, *out_vars, attrs);
// Create a temporary block for compile-time
framework::ProgramDesc program_desc;
framework::BlockDesc *block_desc = program_desc.MutableBlock(0);
PADDLE_ENFORCE(block_desc);
for (auto &param_name : in_vars) {
for (auto &var : param_name.second) {
*block_desc->Var(var->Name())->Proto() = *var->MutableDesc()->Proto();
}
}
for (auto &param_name : *out_vars) {
for (auto &var : param_name.second) {
*block_desc->Var(var->Name())->Proto() = *var->MutableDesc()->Proto();
}
}
LOG(INFO) << "- " << to_string(type, in_vars, *out_vars, attrs);
op_desc.InferShape(*block_desc);
op_desc.InferVarType(block_desc);
for (auto &param_name : *out_vars) {
for (auto &var : param_name.second) {
*var->MutableDesc()->Proto() = *block_desc->Var(var->Name())->Proto();
}
}
LOG(INFO) << "+ " << to_string(type, in_vars, *out_vars, attrs);
}
void Tape::AddOp(const std::string &type,
const VariableHandleMap &in_vars,
VariableHandleMap out_vars,
const framework::AttributeMap &attrs) {
InferShapeAndVarType(type, in_vars, &out_vars, attrs);
tape_.emplace_back(type, in_vars, out_vars, attrs);
}
// Temporary Scope for Operator::Run()
class ScopeWrapper : public framework::Scope {
public:
ScopeWrapper(const VariableHandleMap &in_vars,
const VariableHandleMap &out_vars) {
for (auto &v : in_vars) {
for (auto &vv : v.second) {
if (!vars_.count(vv->Name())) {
vars_[vv->Name()].reset(vv->Var());
}
}
}
for (auto &v : out_vars) {
for (auto &vv : v.second) {
if (!vars_.count(vv->Name())) {
vars_[vv->Name()].reset(vv->Var());
}
}
}
}
~ScopeWrapper() {
for (auto &pair : vars_) {
pair.second.release();
}
}
};
void Tape::Forward() {
LOG(INFO) << "Starting forward -------------------------";
PADDLE_ENFORCE(!has_been_backwarded_);
while (current_position_ < tape_.size()) {
OpHandle &op = tape_[current_position_];
// Create Output Tensor, this is only necessary for OpWithKernel
for (auto &param2var : op.outputs_) {
for (auto &var : param2var.second) {
var->InitializeVariable();
}
}
framework::OpDesc op_desc =
CreateOpDesc(op.type_, op.inputs_, op.outputs_, op.attrs_);
ScopeWrapper scope(op.inputs_, op.outputs_);
framework::OpRegistry::CreateOp(op_desc)->Run(scope, platform::CPUPlace());
current_position_++;
}
LOG(INFO) << "Finishing forward -------------------------";
}
void Tape::Backward(VariableHandle target) {
PADDLE_ENFORCE(!has_been_backwarded_);
Forward();
// TODO(tonyyang-svail): check output of last op is target
backward_tape_.reset(new Tape());
framework::AttributeMap attrs;
// FIXME(tonyyang-svail): Need to infer_data_type
attrs["dtype"] = framework::proto::VarType::Type::VarType_Type_FP32;
attrs["shape"] = std::vector<int>{1};
attrs["value"] = 1.0f;
backward_tape_->AddOp(
"fill_constant", {}, {{"Out", {target->Grad()}}}, attrs);
for (auto it = tape_.rbegin(); it != tape_.rend(); ++it) {
framework::OpDesc op_desc =
CreateOpDesc(it->type_, it->inputs_, it->outputs_, it->attrs_);
std::unordered_map<std::string, std::string> grad_to_var;
std::vector<std::unique_ptr<framework::OpDesc>> grad_op_descs =
framework::OpInfoMap::Instance()
.Get(op_desc.Type())
.GradOpMaker()(op_desc, {}, &grad_to_var, {});
for (auto &op_desc : grad_op_descs) {
std::unordered_map<std::string, VariableHandle> name2var;
for (auto &param2vars : it->inputs_) {
for (auto &a : param2vars.second) {
name2var[a->Name()] = a;
}
}
for (auto &param2vars : it->outputs_) {
for (auto &a : param2vars.second) {
name2var[a->Name()] = a;
}
}
VariableHandleMap in_vars;
VariableHandleMap out_vars;
std::map<const framework::VariableNameMap *, VariableHandleMap *>
loop_over{{&op_desc->Inputs(), &in_vars},
{&op_desc->Outputs(), &out_vars}};
for (auto &each : loop_over) {
auto &vmp = *each.first;
auto &vhm = *each.second;
for (auto &p2a : vmp) {
for (auto &argu : p2a.second) {
if (name2var.count(argu)) {
vhm[p2a.first].push_back(name2var[argu]);
} else {
PADDLE_ENFORCE(ends_with(argu, framework::kGradVarSuffix),
argu.c_str());
std::string name = argu.substr(
0, argu.size() - std::strlen(framework::kGradVarSuffix));
PADDLE_ENFORCE(name2var.count(name), name.c_str());
vhm[p2a.first].push_back(name2var[name]->Grad());
}
}
}
}
backward_tape_->AddOp(
op_desc->Type(), in_vars, out_vars, op_desc->GetAttrMap());
}
// TODO(tonyyang-svail): how to fill empty grad?
// TODO(tonyyang-svail): Sum var grad is necessary
}
backward_tape_->Forward();
has_been_backwarded_ = true;
}
Tape &get_global_tape() {
static Tape T;
return T;
}
void reset_global_tape() { get_global_tape() = Tape(); }
}
}
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <memory>
#include <string>
#include <vector>
#include "paddle/contrib/tape/variable.h"
namespace paddle {
namespace tape {
using VariableHandleMap = std::map<std::string, std::vector<VariableHandle>>;
struct OpHandle {
OpHandle(const std::string &type,
const VariableHandleMap &in_vars,
const VariableHandleMap &out_vars,
const framework::AttributeMap &attrs)
: type_(type), inputs_(in_vars), outputs_(out_vars), attrs_(attrs) {}
std::string type_;
VariableHandleMap inputs_;
VariableHandleMap outputs_;
framework::AttributeMap attrs_;
};
class Tape {
public:
void AddOp(const std::string &type,
const VariableHandleMap &in_vars,
VariableHandleMap out_vars,
const framework::AttributeMap &attrs);
void Forward();
void Backward(VariableHandle target);
bool HasBeenBackwarded() { return has_been_backwarded_; }
private:
bool has_been_backwarded_ = false;
size_t current_position_ = 0;
std::vector<OpHandle> tape_;
std::shared_ptr<Tape> backward_tape_;
};
Tape &get_global_tape();
void reset_global_tape();
}
}
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "gtest/gtest.h"
#include "paddle/contrib/tape/function.h"
using namespace paddle::tape;
TEST(Tape, TestMLP) {
LOG(INFO) << "TestMLP";
Linear linear1(3, 3, "relu");
Linear linear2(3, 3, "relu");
Mean mean;
SGD sgd(0.001);
std::string initializer = "fill_constant";
paddle::framework::AttributeMap attrs;
attrs["dtype"] = paddle::framework::proto::VarType::Type::VarType_Type_FP32;
attrs["shape"] = std::vector<int>{3, 3};
attrs["value"] = 1.0f;
Fill filler(initializer, attrs);
for (int i = 0; i < 2; ++i) {
reset_global_tape();
VariableHandle input(new Variable("input"));
filler(input);
auto loss = mean(linear2(linear1(input)));
get_global_tape().Backward(loss);
for (auto w : linear1.Params()) {
sgd(w);
}
for (auto w : linear2.Params()) {
sgd(w);
}
}
}
int main(int argc, char** argv) {
std::vector<paddle::platform::Place> places;
places.emplace_back(paddle::platform::CPUPlace());
paddle::platform::DeviceContextPool::Init(places);
testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/contrib/tape/variable.h"
namespace paddle {
namespace tape {
void Variable::InitializeVariable() {
LOG(INFO) << "Initialzing " << desc_.Name() << " as " << desc_.GetType();
framework::proto::VarType::Type var_type = desc_.GetType();
if (var_type == framework::proto::VarType::LOD_TENSOR) {
var_.GetMutable<framework::LoDTensor>();
} else if (var_type == framework::proto::VarType::SELECTED_ROWS) {
var_.GetMutable<framework::SelectedRows>();
} else {
PADDLE_THROW("Variable type %d is not in [LOD_TENSOR, SELECTED_ROWS]",
var_type);
}
}
}
}
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <memory>
#include "paddle/fluid/framework/operator.h" // framework::kGradVarSuffix
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/variable.h"
namespace paddle {
namespace tape {
class Variable;
using VariableHandle = std::shared_ptr<Variable>;
/*
* Combination of
* framework::VarDesc desc_;
* framework::Variable var_;
*/
class Variable {
public:
Variable(const std::string pre_fix)
: desc_(pre_fix + std::to_string(count())) {}
Variable(const std::string pre_fix, bool is_grad)
: desc_(pre_fix + (is_grad ? framework::kGradVarSuffix
: std::to_string(count()))) {}
~Variable() { LOG(INFO) << "Deleting " << Name(); }
// Instantiate LoDTensor/SelectedRow
void InitializeVariable();
VariableHandle Grad() {
if (grad_.expired()) {
VariableHandle new_grad(new Variable(desc_.Name(), true));
grad_ = new_grad;
return new_grad;
} else {
return VariableHandle(grad_);
}
}
// Stochastic Gradient Descent with Momentum
// VariableHandle Momentum ();
// void init(const std::string& initializer,
// const framework::AttributeMap& attrs);
// void value() {};
const framework::VarDesc& Desc() const { return desc_; }
framework::VarDesc* MutableDesc() { return &desc_; }
// TODO(tonyyang-svail): No need to expose name
std::string Name() const { return desc_.Name(); }
framework::Variable* Var() { return &var_; }
private:
int count() {
static int counter = 0;
return counter++;
}
framework::VarDesc desc_;
framework::Variable var_;
std::weak_ptr<Variable> grad_;
};
}
}
......@@ -406,6 +406,9 @@ void Executor::EnableMKLDNN(const ProgramDesc& program) {
}
}
}
#else
LOG(WARNING)
<< "'MKLDNN' is not supported, Please re-compile with WITH_MKLDNN option";
#endif
}
......
......@@ -18,6 +18,7 @@ limitations under the License. */
#include "paddle/fluid/framework/init.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/string/piece.h"
......@@ -113,6 +114,9 @@ void InitDevices(bool init_p2p, const std::vector<int> devices) {
}
places.emplace_back(platform::CPUPlace());
platform::DeviceContextPool::Init(places);
#ifndef PADDLE_WITH_MKLDNN
operators::math::SetNumThreads(1);
#endif
}
void InitGLOG(const std::string &prog_name) {
......
......@@ -410,5 +410,38 @@ void LoDTensor::MergeLoDTensor(
}
}
LoD ConvertToLengthBasedLoD(const LoD &offset_lod) {
LoD length_lod;
length_lod.reserve(offset_lod.size());
for (size_t lvl = 0; lvl < offset_lod.size(); ++lvl) {
std::vector<size_t> level;
if (offset_lod[lvl].size() > 0) {
level.reserve(offset_lod[lvl].size() - 1);
}
for (size_t idx = 0; idx < offset_lod[lvl].size() - 1; ++idx) {
level.push_back(offset_lod[lvl][idx + 1] - offset_lod[lvl][idx]);
}
length_lod.push_back(level);
}
return length_lod;
}
LoD ConvertToOffsetBasedLoD(const LoD &length_lod) {
LoD offset_lod;
offset_lod.reserve(length_lod.size());
for (size_t lvl = 0; lvl < length_lod.size(); ++lvl) {
std::vector<size_t> level;
level.reserve(length_lod[lvl].size() + 1);
size_t tmp = 0;
level.push_back(tmp);
for (size_t idx = 0; idx < length_lod[lvl].size(); ++idx) {
tmp += length_lod[lvl][idx];
level.push_back(tmp);
}
offset_lod.push_back(level);
}
return offset_lod;
}
} // namespace framework
} // namespace paddle
......@@ -226,5 +226,19 @@ extern void WriteToRecordIO(recordio::Writer* writer,
extern std::vector<LoDTensor> ReadFromRecordIO(
recordio::Scanner* scanner, const platform::DeviceContext& dev_ctx);
/*
* Convert between length-based LoD and offset-based LoD.
* The implementation of LoDTensor class use offset-based LoD.
* However, we want to expose the more user-friendly length-based
* LoD to the Python side instead.
*
* Example:
* If offset_lod = [[0, 2, 3],[0, 3, 5, 9]]
* then length_lod = [[2, 1], [3, 2, 4]]
*/
LoD ConvertToLengthBasedLoD(const LoD& offset_lod);
LoD ConvertToOffsetBasedLoD(const LoD& length_lod);
} // namespace framework
} // namespace paddle
......@@ -228,6 +228,38 @@ TEST(LoD, CheckAbsLoD) {
ASSERT_FALSE(CheckAbsLoD(abs_lod0));
}
TEST(LoD, ConvertToLengthBasedLoD) {
LoD offset_lod;
offset_lod.push_back(std::vector<size_t>({0, 2}));
offset_lod.push_back(std::vector<size_t>({0, 1, 3}));
offset_lod.push_back(std::vector<size_t>({0, 2, 4, 5}));
LoD length_lod = ConvertToLengthBasedLoD(offset_lod);
LoD expected;
expected.push_back(std::vector<size_t>({2}));
expected.push_back(std::vector<size_t>({1, 2}));
expected.push_back(std::vector<size_t>({2, 2, 1}));
EXPECT_EQ(length_lod, expected);
}
TEST(LoD, ConvertToOffsetBasedLoD) {
LoD length_lod;
length_lod.push_back(std::vector<size_t>({2}));
length_lod.push_back(std::vector<size_t>({1, 2}));
length_lod.push_back(std::vector<size_t>({2, 2, 1}));
LoD offset_lod = ConvertToOffsetBasedLoD(length_lod);
LoD expected;
expected.push_back(std::vector<size_t>({0, 2}));
expected.push_back(std::vector<size_t>({0, 1, 3}));
expected.push_back(std::vector<size_t>({0, 2, 4, 5}));
EXPECT_EQ(offset_lod, expected);
}
template <typename T>
static void TestRecordIO() {
LoDTensor tensor;
......
......@@ -98,6 +98,7 @@ static LoD GetLoD(const Scope& scope, const std::string& name) {
}
void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
VLOG(10) << "- " << DebugStringEx(&scope);
if (platform::is_gpu_place(place)) {
#ifndef PADDLE_WITH_CUDA
PADDLE_THROW("Cannot run operator on place %s", place);
......@@ -107,6 +108,7 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
#endif
}
RunImpl(scope, place);
VLOG(10) << "+ " << DebugStringEx(&scope);
}
bool OperatorBase::HasInputs(const std::string& name) const {
......
......@@ -145,9 +145,9 @@ void ParallelExecutor::BCastParamsToGPUs(
auto &dims = main_tensor.dims();
if (paddle::platform::is_gpu_place(main_tensor.place())) {
#ifdef PADDLE_WITH_CUDA
std::vector<void *> buffers;
size_t numel = main_tensor.numel();
ncclDataType_t data_type = platform::ToNCCLDataType(main_tensor.type());
platform::NCCLGroupGuard guard;
for (size_t i = 0; i < member_->places_.size(); ++i) {
auto place = member_->places_[i];
void *buffer;
......@@ -159,11 +159,21 @@ void ParallelExecutor::BCastParamsToGPUs(
t->Resize(dims);
buffer = t->mutable_data(place, main_tensor.type());
}
auto &nccl_ctx = member_->nccl_ctxs_->at(place);
platform::dynload::ncclBcast(buffer, numel, data_type, 0,
nccl_ctx.comm_, nccl_ctx.stream());
buffers.push_back(buffer);
}
member_->nccl_ctxs_->WaitAll();
PADDLE_ENFORCE_EQ(member_->places_.size(), buffers.size(),
"variables' buffer size to bcast NOT equal to places");
{
platform::NCCLGroupGuard guard;
for (size_t i = 0; i < member_->places_.size(); ++i) {
auto &nccl_ctx = member_->nccl_ctxs_->at(member_->places_[i]);
platform::dynload::ncclBcast(buffers[i], numel, data_type, 0,
nccl_ctx.comm_, nccl_ctx.stream());
}
member_->nccl_ctxs_->WaitAll();
}
#else
PADDLE_THROW("Not compiled with CUDA");
#endif
......
......@@ -43,48 +43,29 @@ Scope& Scope::NewScope() const {
}
Variable* Scope::Var(const std::string& name) {
// acquire the lock when new var under this scope
std::unique_lock<std::mutex> lock(mutex_);
auto* v = FindVarLocally(name);
if (v != nullptr) return v;
v = new Variable();
vars_[name].reset(v);
VLOG(3) << "Create variable " << name;
v->name_ = &(vars_.find(name)->first);
return v;
return VarInternal(name);
}
Variable* Scope::Var(std::string* name) {
auto var_name = string::Sprintf("%p.%d", this, vars_.size());
std::unique_lock<std::mutex> lock(mutex_);
auto new_name = string::Sprintf("%p.%d", this, vars_.size());
if (name != nullptr) {
*name = var_name;
*name = new_name;
}
return Var(var_name);
return VarInternal(new_name);
}
Variable* Scope::FindVar(const std::string& name) const {
// acquire the lock when find var
std::unique_lock<std::mutex> lock(mutex_);
return FindVarInternal(name);
}
Variable* Scope::FindVarInternal(const std::string& name) const {
auto var = FindVarLocally(name);
if (var != nullptr) {
return var;
}
return (parent_ == nullptr) ? nullptr : parent_->FindVarInternal(name);
}
const Scope* Scope::FindScope(const Variable* var) const {
for (auto& kv : vars_) {
if (kv.second.get() == var) {
return this;
}
}
return (parent_ == nullptr) ? nullptr : parent_->FindScope(var);
std::unique_lock<std::mutex> lock(mutex_);
return FindScopeInternal(var);
}
void Scope::DropKids() {
std::unique_lock<std::mutex> lock(mutex_);
for (Scope* s : kids_) delete s;
......@@ -92,6 +73,7 @@ void Scope::DropKids() {
}
std::vector<std::string> Scope::LocalVarNames() const {
std::unique_lock<std::mutex> lock(mutex_);
std::vector<std::string> known_vars;
known_vars.reserve(this->vars_.size());
for (auto& p : vars_) {
......@@ -127,6 +109,39 @@ void Scope::EraseVars(const std::vector<std::string>& var_names) {
void Scope::Rename(const std::string& origin_name,
const std::string& new_name) const {
std::unique_lock<std::mutex> lock(mutex_);
RenameInternal(origin_name, new_name);
}
std::string Scope::Rename(const std::string& origin_name) const {
std::unique_lock<std::mutex> lock(mutex_);
auto new_name = string::Sprintf("%p.%d", this, vars_.size());
RenameInternal(origin_name, new_name);
return new_name;
}
Variable* Scope::VarInternal(const std::string& name) {
auto* v = FindVarLocally(name);
if (v != nullptr) return v;
v = new Variable();
vars_[name].reset(v);
VLOG(3) << "Create variable " << name;
v->name_ = &(vars_.find(name)->first);
return v;
}
const Scope* Scope::FindScopeInternal(const Variable* var) const {
for (auto& kv : vars_) {
if (kv.second.get() == var) {
return this;
}
}
return (parent_ == nullptr) ? nullptr : parent_->FindScope(var);
}
void Scope::RenameInternal(const std::string& origin_name,
const std::string& new_name) const {
auto origin_it = vars_.find(origin_name);
PADDLE_ENFORCE(origin_it != vars_.end(),
"Cannot find original variable with name %s", origin_name);
......@@ -137,10 +152,12 @@ void Scope::Rename(const std::string& origin_name,
vars_.erase(origin_it);
}
std::string Scope::Rename(const std::string& origin_name) const {
auto var_name = string::Sprintf("%p.%d", this, vars_.size());
Rename(origin_name, var_name);
return var_name;
Variable* Scope::FindVarInternal(const std::string& name) const {
auto var = FindVarLocally(name);
if (var != nullptr) {
return var;
}
return (parent_ == nullptr) ? nullptr : parent_->FindVar(name);
}
Variable* Scope::FindVarLocally(const std::string& name) const {
......
......@@ -81,20 +81,29 @@ class Scope {
// Rename variable to a new name and return the new name
std::string Rename(const std::string& origin_name) const;
protected:
mutable std::unordered_map<std::string, std::unique_ptr<Variable>> vars_;
private:
// Call Scope::NewScope for a sub-scope.
explicit Scope(Scope const* parent) : parent_(parent) {}
// Called by Var.
Variable* VarInternal(const std::string& name);
// Called by FindScope.
const Scope* FindScopeInternal(const Variable* var) const;
// Called by Rename.
void RenameInternal(const std::string& origin_name,
const std::string& new_name) const;
// Called by FindVar recursively.
// Caller doesn't own the returned Variable.
Variable* FindVarInternal(const std::string& name) const;
// Called by FindVarInternal and Var.
// Caller doesn't own the returned Variable.
Variable* FindVarLocally(const std::string& name) const;
mutable std::unordered_map<std::string, std::unique_ptr<Variable>> vars_;
// Scope in `kids_` are owned by this class.
mutable std::list<Scope*> kids_;
Scope const* parent_{nullptr};
......
......@@ -20,16 +20,20 @@ limitations under the License. */
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/feed_fetch_type.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/pybind/pybind.h"
DEFINE_string(devices, "", "The devices to be used which is joined by comma.");
DEFINE_bool(init_p2p, false, "Whether to init p2p.");
DEFINE_int32(math_num_threads, 1,
"Number of threads used to run math functions.");
namespace paddle {
namespace inference {
void Init(const std::vector<std::string> argv) {
framework::InitGflags(argv);
operators::math::SetNumThreads(FLAGS_math_num_threads);
// init devices
std::vector<int> devices;
std::string token;
......
......@@ -64,7 +64,8 @@ class OpConverter {
(*it)(op, scope, test_mode);
}
// convert fluid block to tensorrt network
// Convert a fluid block to tensorrt network, NOTE it just convert operators,
// the INetwork's inputs and outputs should specified in some other modules.
void ConvertBlock(const framework::proto::BlockDesc& block,
const std::unordered_set<std::string>& parameters,
const framework::Scope& scope, TensorRTEngine* engine) {
......
......@@ -51,11 +51,12 @@ class TensorRTEngine : public EngineBase {
nvinfer1::Weights w_;
};
TensorRTEngine(int max_batch, int max_workspace, cudaStream_t* stream,
TensorRTEngine(int max_batch, int max_workspace,
cudaStream_t* stream = nullptr,
nvinfer1::ILogger& logger = NaiveLogger::Global())
: max_batch_(max_batch),
max_workspace_(max_workspace),
stream_(stream),
stream_(stream ? stream : &default_stream_),
logger_(logger) {}
virtual ~TensorRTEngine();
......@@ -121,6 +122,8 @@ class TensorRTEngine : public EngineBase {
// the max memory size the engine uses
int max_workspace_;
cudaStream_t* stream_;
// If stream_ is not set from outside, hold its own stream.
cudaStream_t default_stream_;
nvinfer1::ILogger& logger_;
std::vector<Buffer> buffers_;
......@@ -165,20 +168,31 @@ class TensorRTEngine : public EngineBase {
*/
class TRT_EngineManager {
public:
TensorRTEngine* Create(int max_batch, int max_workspace,
cudaStream_t* stream) {
engines_.emplace_back(new TensorRTEngine(max_batch, max_workspace, stream));
return engines_.back().get();
bool HasEngine(const std::string& name) const {
return engines_.count(name) != 0;
}
// Get an engine called `name`.
TensorRTEngine* Get(const std::string& name) const {
return engines_.at(name).get();
}
// Create or get an engine called `name`
TensorRTEngine* Create(int max_batch, int max_workspace, cudaStream_t* stream,
const std::string& name) {
auto* p = new TensorRTEngine(max_batch, max_workspace, stream);
engines_[name].reset(p);
return p;
}
void DeleteALl() {
for (auto& ptr : engines_) {
ptr.reset(nullptr);
for (auto& item : engines_) {
item.second.reset(nullptr);
}
}
private:
std::vector<std::unique_ptr<TensorRTEngine>> engines_;
std::unordered_map<std::string, std::unique_ptr<TensorRTEngine>> engines_;
};
} // namespace tensorrt
......
......@@ -29,6 +29,7 @@ DEFINE_string(data_file, "", "File of input index data.");
DEFINE_int32(repeat, 100, "Running the inference program repeat times");
DEFINE_bool(prepare_vars, true, "Prepare variables before executor");
DEFINE_int32(num_threads, 1, "Number of threads should be used");
DECLARE_bool(use_mkldnn);
inline double GetCurrentMs() {
struct timeval time;
......@@ -103,9 +104,9 @@ void ThreadRunInfer(
const int tid, paddle::framework::Scope* scope,
const std::vector<std::vector<const paddle::framework::LoDTensor*>>& jobs) {
// maybe framework:ProgramDesc is not thread-safe
paddle::platform::CPUPlace place;
paddle::framework::Executor executor(place);
auto& sub_scope = scope->NewScope();
auto place = paddle::platform::CPUPlace();
auto executor = paddle::framework::Executor(place);
auto inference_program =
paddle::inference::Load(&executor, scope, FLAGS_model_path);
......@@ -182,8 +183,8 @@ TEST(inference, nlp) {
stop_ms = GetCurrentMs();
} else {
// 1. Define place, executor, scope
auto place = paddle::platform::CPUPlace();
auto executor = paddle::framework::Executor(place);
paddle::platform::CPUPlace place;
paddle::framework::Executor executor(place);
// 2. Initialize the inference_program and load parameters
std::unique_ptr<paddle::framework::ProgramDesc> inference_program;
......
......@@ -19,18 +19,18 @@ limitations under the License. */
namespace paddle {
namespace operators {
#define REGISTER_ACTIVATION_OP_MAKER(OP_NAME, OP_COMMENT) \
class OP_NAME##OpMaker \
: public ::paddle::framework::OpProtoAndCheckerMaker { \
public: \
void Make() override { \
AddInput("X", "Input of " #OP_NAME " operator"); \
AddOutput("Out", "Output of " #OP_NAME " operator").Reuse("X"); \
AddAttr<bool>("use_mkldnn", \
"(bool, default false) Only used in mkldnn kernel") \
.SetDefault(false); \
AddComment(OP_COMMENT); \
} \
#define REGISTER_ACTIVATION_OP_MAKER(OP_NAME, OP_COMMENT) \
class OP_NAME##OpMaker \
: public ::paddle::framework::OpProtoAndCheckerMaker { \
public: \
void Make() override { \
AddInput("X", "Input of " #OP_NAME " operator"); \
AddOutput("Out", "Output of " #OP_NAME " operator").Reuse("X"); \
AddAttr<bool>("use_mkldnn", \
"(default false) Only used in mkldnn kernel") \
.SetDefault(false); \
AddComment(OP_COMMENT); \
} \
}
#define REGISTER_ACTIVATION_OP_GRAD_MAKER(OP_NAME, KERNEL_TYPE) \
......@@ -112,7 +112,7 @@ $$out = \frac{1}{1 + e^{-x}}$$
__attribute__((unused)) constexpr char LogSigmoidDoc[] = R"DOC(
Logsigmoid Activation Operator
$$out = \log \frac{1}{1 + e^{-x}}$$
$$out = \\log \\frac{1}{1 + e^{-x}}$$
)DOC";
......@@ -196,7 +196,7 @@ $out = [x]$
__attribute__((unused)) constexpr char ReciprocalDoc[] = R"DOC(
Reciprocal Activation Operator.
$$out = \frac{1}{x}$$
$$out = \\frac{1}{x}$$
)DOC";
......@@ -252,15 +252,14 @@ class SoftShrinkOpMaker : public framework::OpProtoAndCheckerMaker {
AddOutput("Out", "Output of Softshrink operator");
AddAttr<float>("lambda", "non-negative offset").SetDefault(0.5f);
AddComment(R"DOC(
Softshrink Activation Operator.
:strong:`Softshrink Activation Operator`
$$
out = \begin{cases}
x - \lambda, \text{if } x > \lambda \\
x + \lambda, \text{if } x < -\lambda \\
0, \text{otherwise}
\end{cases}
$$
.. math::
out = \begin{cases}
x - \lambda, \text{if } x > \lambda \\
x + \lambda, \text{if } x < -\lambda \\
0, \text{otherwise}
\end{cases}
)DOC");
}
......@@ -271,18 +270,18 @@ class HardShrinkOpMaker : public framework::OpProtoAndCheckerMaker {
void Make() override {
AddInput("X", "Input of HardShrink operator");
AddOutput("Out", "Output of HardShrink operator");
AddAttr<float>("threshold", "The value of threshold for HardShrink")
AddAttr<float>("threshold",
"The value of threshold for HardShrink. [default: 0.5]")
.SetDefault(0.5f);
AddComment(R"DOC(
HardShrink Activation Operator.
:strong:`HardShrink activation operator`
$$
out = \begin{cases}
x, \text{if } x > \lambda \\
x, \text{if } x < -\lambda \\
0, \text{otherwise}
\end{cases}
$$
.. math::
out = \begin{cases}
x, \text{if } x > \lambda \\
x, \text{if } x < -\lambda \\
0, \text{otherwise}
\end{cases}
)DOC");
}
......@@ -394,18 +393,18 @@ class ThresholdedReluOpMaker : public framework::OpProtoAndCheckerMaker {
void Make() override {
AddInput("X", "Input of ThresholdedRelu operator");
AddOutput("Out", "Output of ThresholdedRelu operator");
AddAttr<float>("threshold", "The threshold location of activation")
AddAttr<float>("threshold",
"The threshold location of activation. [default 1.0].")
.SetDefault(1.0f);
AddComment(R"DOC(
ThresholdedRelu Activation Operator.
:strong:`ThresholdedRelu activation operator`
$$
out = \begin{cases}
x, \text{if } x > threshold \\
0, \text{otherwise}
\end{cases}
$$
.. math::
out = \begin{cases}
x, \text{if } x > threshold \\
0, \text{otherwise}
\end{cases}
)DOC");
}
};
......@@ -444,7 +443,7 @@ class SwishOpMaker : public framework::OpProtoAndCheckerMaker {
AddComment(R"DOC(
Swish Activation Operator.
$$out = \frac{x}{1 + e^{- \beta x}}$$
$$out = \\frac{x}{1 + e^{- \beta x}}$$
)DOC");
}
......
......@@ -54,10 +54,19 @@ be linearly scaled to make the L2 norm of $Out$ equal to $max\_norm$, as
shown in the following formula:
$$
Out = \frac{max\_norm * X}{norm(X)},
Out = \\frac{max\\_norm * X}{norm(X)},
$$
where $norm(X)$ represents the L2 norm of $X$.
Examples:
.. code-block:: python
data = fluid.layer.data(
name='data', shape=[2, 4, 6], dtype='float32')
reshaped = fluid.layers.clip_by_norm(
x=data, max_norm=0.5)
)DOC");
}
};
......
......@@ -23,30 +23,26 @@ class CompareOpProtoMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
OpComment comment;
AddInput("X",
string::Sprintf("(LoDTensor) the left hand operand of %s operator",
comment.type));
AddInput("Y", string::Sprintf(
"(LoDTensor) the right hand operand of %s operator",
comment.type));
AddInput("X", string::Sprintf("the left hand operand of %s operator",
comment.type));
AddInput("Y", string::Sprintf("the right hand operand of %s operator",
comment.type));
AddAttr<bool>("force_cpu",
"(bool, default false) Force fill output variable to cpu "
"Force fill output variable to cpu "
"memory. Otherwise, fill output variable to the running "
"device")
.SetDefault(false);
AddOutput("Out", string::Sprintf(
"(LoDTensor) n-dim bool tensor. Each element is %s",
comment.equation));
AddComment(string::Sprintf(R"DOC(%s Operator
"device [default true].")
.SetDefault(true);
AddOutput("Out", string::Sprintf("n-dim bool tensor. Each element is %s",
comment.equation));
AddComment(string::Sprintf(R"DOC(
It operates element-wise on X and Y, and returns the Out. Each of them is a
N-dim tensor. X and Y could be any type. The each element of the Out tensor is
calculated by %s
calculated by $%s$
)DOC",
comment.type, comment.equation));
AddAttr<int>("axis",
"(int, default -1). The start dimension index "
"for broadcasting Y onto X.")
comment.equation));
AddAttr<int>(
"axis",
"The start dimension index for broadcasting Y onto X. [default -1]")
.SetDefault(-1)
.EqualGreaterThan(-1);
}
......
......@@ -107,7 +107,13 @@ REGISTER_OPERATOR(concat, ops::ConcatOp, ops::ConcatOpMaker,
false> /* set false to disable empty grad */);
REGISTER_OPERATOR(concat_grad, ops::ConcatOpGrad);
REGISTER_OP_CPU_KERNEL(
concat, ops::ConcatKernel<paddle::platform::CPUDeviceContext, float>);
concat, ops::ConcatKernel<paddle::platform::CPUDeviceContext, double>,
ops::ConcatKernel<paddle::platform::CPUDeviceContext, float>,
ops::ConcatKernel<paddle::platform::CPUDeviceContext, int64_t>,
ops::ConcatKernel<paddle::platform::CPUDeviceContext, int>);
REGISTER_OP_CPU_KERNEL(
concat_grad,
ops::ConcatGradKernel<paddle::platform::CPUDeviceContext, float>);
ops::ConcatGradKernel<paddle::platform::CPUDeviceContext, double>,
ops::ConcatGradKernel<paddle::platform::CPUDeviceContext, float>,
ops::ConcatGradKernel<paddle::platform::CPUDeviceContext, int64_t>,
ops::ConcatGradKernel<paddle::platform::CPUDeviceContext, int>);
......@@ -15,7 +15,13 @@ limitations under the License. */
#include "paddle/fluid/operators/concat_op.h"
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(
concat, ops::ConcatKernel<paddle::platform::CUDADeviceContext, float>);
concat, ops::ConcatKernel<paddle::platform::CUDADeviceContext, double>,
ops::ConcatKernel<paddle::platform::CUDADeviceContext, float>,
ops::ConcatKernel<paddle::platform::CUDADeviceContext, int64_t>,
ops::ConcatKernel<paddle::platform::CUDADeviceContext, int>);
REGISTER_OP_CUDA_KERNEL(
concat_grad,
ops::ConcatGradKernel<paddle::platform::CUDADeviceContext, float>);
ops::ConcatGradKernel<paddle::platform::CUDADeviceContext, double>,
ops::ConcatGradKernel<paddle::platform::CUDADeviceContext, float>,
ops::ConcatGradKernel<paddle::platform::CUDADeviceContext, int64_t>,
ops::ConcatGradKernel<paddle::platform::CUDADeviceContext, int>);
......@@ -52,7 +52,7 @@ static std::vector<int> GetOffsets(const framework::ExecutionContext& ctx) {
} else {
res = ctx.Attr<std::vector<int>>("offsets");
PADDLE_ENFORCE_EQ(
rank, res.size(),
rank, static_cast<int>(res.size()),
"Offsets size should be equal to dimension size of input tensor.");
}
return res;
......
......@@ -30,19 +30,19 @@ class CumOp : public framework::OperatorWithKernel {
class CumsumOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("X", "Input of Cumsum operator");
AddOutput("Out", "Output of Cumsum operator");
AddInput("X", "Input of cumsum operator");
AddOutput("Out", "Output of cumsum operator");
AddAttr<int>("axis",
"(int, default -1). The dimenstion to accumulate along. "
"-1 means the last dimenstion")
"The dimenstion to accumulate along. -1 means the last "
"dimenstion [default -1].")
.SetDefault(-1)
.EqualGreaterThan(-1);
AddAttr<bool>("exclusive",
"bool, default false). Whether to perform exclusive cumsum")
"Whether to perform exclusive cumsum. [default false].")
.SetDefault(false);
AddAttr<bool>("reverse",
"bool, default false). If true, the cumsum is performed in "
"the reversed direction")
"If true, the cumsum is performed in the reversed direction. "
"[default false].")
.SetDefault(false);
AddComment(R"DOC(
The cumulative sum of the elements along a given axis.
......
......@@ -169,7 +169,8 @@ class RequestPrefetch final : public RequestBase {
auto scope = request_->GetMutableLocalScope();
auto invar = scope->FindVar(in_var_name);
framework::Variable* outvar = scope->FindVar(out_var_name);
// out var must be created in local scope!
framework::Variable* outvar = scope->Var(out_var_name);
request_handler_->Handle(in_var_name, scope, invar, &outvar, out_var_name);
......
......@@ -106,23 +106,36 @@ class BoxCoderOpMaker : public framework::OpProtoAndCheckerMaker {
"and M represents the number of deocded boxes.");
AddComment(R"DOC(
Bounding Box Coder Operator.
Bounding Box Coder.
Encode/Decode the target bounding box with the priorbox information.
The Encoding schema described below:
ox = (tx - px) / pw / pxv
oy = (ty - py) / ph / pyv
ow = log(abs(tw / pw)) / pwv
oh = log(abs(th / ph)) / phv
ox = (tx - px) / pw / pxv
oy = (ty - py) / ph / pyv
ow = log(abs(tw / pw)) / pwv
oh = log(abs(th / ph)) / phv
The Decoding schema described below:
ox = (pw * pxv * tx * + px) - tw / 2
oy = (ph * pyv * ty * + py) - th / 2
ow = exp(pwv * tw) * pw + tw / 2
oh = exp(phv * th) * ph + th / 2
where tx, ty, tw, th denote the target box's center coordinates, width and
height respectively. Similarly, px, py, pw, ph denote the priorbox's(anchor)
center coordinates, width and height. pxv, pyv, pwv, phv denote the variance
of the priorbox and ox, oy, ow, oh denote the encoded/decoded coordinates,
width and height.
ox = (pw * pxv * tx * + px) - tw / 2
oy = (ph * pyv * ty * + py) - th / 2
ow = exp(pwv * tw) * pw + tw / 2
oh = exp(phv * th) * ph + th / 2
where `tx`, `ty`, `tw`, `th` denote the target box's center coordinates, width
and height respectively. Similarly, `px`, `py`, `pw`, `ph` denote the
priorbox's (anchor) center coordinates, width and height. `pxv`, `pyv`, `pwv`,
`phv` denote the variance of the priorbox and `ox`, `oy`, `ow`, `oh` denote the
encoded/decoded coordinates, width and height.
)DOC");
}
};
......
......@@ -15,7 +15,7 @@ limitations under the License. */
#include "paddle/fluid/operators/elementwise_mul_op.h"
#include "paddle/fluid/operators/elementwise_op.h"
namespace ops = paddle::operators;
REGISTER_ELEMWISE_OP(elementwise_mul, "Mul", "Out = X \\odot\\ Y");
REGISTER_ELEMWISE_OP(elementwise_mul, "Mul", "Out = X \\\\odot Y");
REGISTER_OP_CPU_KERNEL(
elementwise_mul,
ops::ElementwiseMulKernel<paddle::platform::CPUDeviceContext, float>,
......
......@@ -36,11 +36,12 @@ class GaussianRandomBatchSizeLikeOpMaker : public BatchSizeLikeOpMaker {
void Apply() override {
AddAttr<float>("mean",
"(float, default 0.0) "
"mean of random tensor.")
"The mean (or center) of the gaussian distribution.")
.SetDefault(.0f);
AddAttr<float>("std",
"(float, default 1.0) "
"std of random tensor.")
"The standard deviation (std, or spread) of the "
"gaussian distribution.")
.SetDefault(1.0f);
AddAttr<int>("seed",
"(int, default 0) "
......@@ -55,9 +56,11 @@ class GaussianRandomBatchSizeLikeOpMaker : public BatchSizeLikeOpMaker {
.SetDefault(framework::proto::VarType::FP32);
AddComment(R"DOC(
GaussianRandom Operator.
Used to initialize tensors with gaussian random generator.
The defalut mean of the distribution is 0. and defalut standard
deviation (std) of the distribution is 1.. Uers can set mean and std
by input arguments.
)DOC");
}
};
......
......@@ -85,7 +85,7 @@ class GetPlacesOpProtoMaker : public framework::OpProtoAndCheckerMaker {
.InEnum({"CUDA", "CPU", "AUTO"})
.SetDefault("AUTO");
AddComment(R"DOC(
Returns a list of places based on flags. The list will be used for parallel
Returns a list of places based on arguments. The list will be used for parallel
execution.
)DOC");
}
......
......@@ -62,36 +62,33 @@ class LayerNormOp : public framework::OperatorWithKernel {
class LayerNormOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("X", "(LoDTensor) The input tensor.");
AddInput("X", "The input tensor.");
AddInput("Scale",
"(Tensor, optional) Scale is a 1-dimensional tensor of size "
"(optional) Scale is a 1-dimensional tensor of size "
"H(`begin_norm_axis` splits the tensor(`X`) to a matrix [N,H])."
"It is applied to the output.")
.AsDispensable();
AddInput("Bias",
"(Tensor, optional) Bias is a 1-dimensional tensor of size "
"(optional) Bias is a 1-dimensional tensor of size "
"H(`begin_norm_axis` splits the tensor(`X`) to a matrix [N,H])."
"It is applied to the output.")
.AsDispensable();
AddOutput("Y", "(LoDTensor) Result after normalization.");
AddOutput("Mean", "(Tensor) Mean of the current mini batch.")
.AsIntermediate();
AddOutput("Variance", "(Tensor) Variance of the current mini batch.")
AddOutput("Y", "Result after normalization.");
AddOutput("Mean", "Mean of the current mini batch.").AsIntermediate();
AddOutput("Variance", "Variance of the current mini batch.")
.AsIntermediate();
AddAttr<float>("epsilon",
"(float, default 1e-5) Constant for "
"numerical stability")
"Constant for numerical stability [default 1e-5].")
.SetDefault(1e-5)
.AddCustomChecker([](const float &epsilon) {
PADDLE_ENFORCE(epsilon >= 0.0f && epsilon <= 0.001f,
"'epsilon' should be between 0.0 and 0.001.");
});
AddAttr<int>("begin_norm_axis",
"(int default:1), the "
"axis of `begin_norm_axis ... Rank(X) - 1` will be "
"the axis of `begin_norm_axis ... Rank(X) - 1` will be "
"normalized. `begin_norm_axis` splits the tensor(`X`) to a "
"matrix [N,H].")
"matrix [N,H]. [default 1].")
.SetDefault(1)
.AddCustomChecker([](const int &begin_norm_axis) {
PADDLE_ENFORCE_GT(begin_norm_axis, 0,
......@@ -99,10 +96,14 @@ class LayerNormOpMaker : public framework::OpProtoAndCheckerMaker {
});
AddComment(R"DOC(
Layer Normalization.
Layer Norm has been implemented as discussed in the paper:
https://arxiv.org/abs/1607.06450
...
Assume feature vectors exist on dimensions
:attr:`begin_norm_axis ... rank(input)` and calculate the moment statistics
along these dimensions for each feature vector :math:`a` with size
:math:`H`, then normalize each feature vector using the corresponding
statistics. After that, apply learnable gain and bias on the normalized
tensor to scale and shift if :attr:`scale` and :attr:`shift` are set.
Refer to `Layer Normalization <https://arxiv.org/pdf/1607.06450v1.pdf>`_
)DOC");
}
};
......
......@@ -348,7 +348,8 @@ class ListenAndServOpMaker : public framework::OpProtoAndCheckerMaker {
};
void SignalHandler::StopAndExit(int signal_num) {
VLOG(3) << "Catch interrupt signal: " << signal_num << ", program will exit";
// Do not use VLOG here for the device for printing maybe already released.
// exit will release interal allocated resoureces.
exit(0);
}
......
......@@ -20,13 +20,16 @@
#ifdef PADDLE_WITH_MKLML
#include <mkl_cblas.h>
#include <mkl_lapacke.h>
#include <mkl_service.h>
#include <mkl_vml_functions.h>
#endif
#ifdef PADDLE_USE_OPENBLAS
#include <cblas.h>
#ifdef LAPACK_FOUND
#include <lapacke.h>
#endif
#endif
#ifndef LAPACK_FOUND
extern "C" {
......@@ -46,6 +49,18 @@ namespace paddle {
namespace operators {
namespace math {
static void SetNumThreads(int num_threads) {
#ifdef PADDLE_USE_OPENBLAS
int real_num_threads = num_threads > 1 ? num_threads : 1;
openblas_set_num_threads(real_num_threads);
#elif defined(PADDLE_WITH_MKLML)
int real_num_threads = num_threads > 1 ? num_threads : 1;
mkl_set_num_threads(real_num_threads);
#else
PADDLE_ENFORCE(false, "To be implemented.");
#endif
}
/**
* Matrix Descriptor of a memory buffer.
*
......
......@@ -21,8 +21,10 @@ limitations under the License. */
#ifdef PADDLE_USE_OPENBLAS
#include <cblas.h>
#ifdef LAPACK_FOUND
#include <lapacke.h>
#endif
#endif
#ifndef LAPACK_FOUND
extern "C" {
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/mean_iou_op.h"
namespace paddle {
namespace operators {
class MeanIoUOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Predictions"),
"Input (Predictions) of MeanIoU op should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Labels"),
"Input (labels) of MeanIoU op should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("OutMeanIou"),
"Output (OutMeanIou) of MeanIoU op should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("OutWrong"),
"Output (OutWrong) of MeanIoU op should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("OutCorrect"),
"Output (OutWrong) of MeanIoU op should not be null.");
int64_t num_classes =
static_cast<int64_t>(ctx->Attrs().Get<int>("num_classes"));
ctx->SetOutputDim("OutMeanIou", {1});
ctx->SetOutputDim("OutWrong", {num_classes});
ctx->SetOutputDim("OutCorrect", {num_classes});
}
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
return framework::OpKernelType(
framework::ToDataType(ctx.Input<Tensor>("Predictions")->type()),
ctx.GetPlace());
}
};
class MeanIoUOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("Predictions",
"(Tensor), A Tensor of prediction results for semantic labels"
" with type int32 or int64. The rank should be greater than 1.");
AddInput(
"Labels",
"(Tensor), A Tensor of ground truth labels with type int32 or int64."
"Its shape should be the same as Input(Predictions).");
AddInput("InWrongs",
"(vector<Tensor>), A list of Tensor with shape "
"[num_classes]. They are used to collect wrong number among "
"batches. Empty list is also valid here.")
.AsDuplicable()
.AsDispensable();
AddInput(
"InCorrects",
"(vector<Tensor>), A list of Tensor with shape "
"[num_classes]. They are used to collect correct number among batches. "
"Empty list is also valid here.")
.AsDuplicable()
.AsDispensable();
AddInput("InMeanIou",
"(vector<Tensor>), A list of Tensor that Output(mean_iou) should "
"be added to. Empty list is also valid here.")
.AsDuplicable()
.AsDispensable();
AddOutput("OutMeanIou",
"(vector<Tensor>), A Tensor representing the"
" mean intersection-over-union with shape [1].");
AddOutput("OutWrong", "(Tensor), A Tensor with shape [num_classes]. ");
AddOutput("OutCorrect", "(Tensor), A Tensor with shape [num_classes]. ");
AddAttr<int>("num_classes", "(int), The possible number of labels.");
AddComment(R"DOC(
mean-IOU Operator.
Mean Intersection-Over-Union is a common evaluation metric for
semantic image segmentation, which first computes the IOU for each
semantic class and then computes the average over classes.
IOU is defined as follows:
IOU = true_positive / (true_positive + false_positive + false_negative).
It is based on pixel level area while "IOU Similarity Operator"
is based on area of rectangle.
)DOC");
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OPERATOR(mean_iou, ops::MeanIoUOp, ops::MeanIoUOpMaker,
paddle::framework::EmptyGradOpMaker);
REGISTER_OP_CPU_KERNEL(mean_iou, ops::MeanIoUKernel<int>,
ops::MeanIoUKernel<int32_t>,
ops::MeanIoUKernel<int64_t>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/mean_iou_op.h"
#include "paddle/fluid/platform/cuda_primitives.h"
#include "paddle/fluid/platform/gpu_info.h"
namespace paddle {
namespace operators {
using platform::PADDLE_CUDA_NUM_THREADS;
#define CUDA_1D_KERNEL_LOOP(i, n) \
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \
i += blockDim.x * gridDim.x)
template <typename T>
__global__ void CountCUDAKernel(const int num_classes, const int count,
const T* predictions, const T* labels,
int* wrong, int* correct) {
extern __shared__ int blcok_cache[];
int* wrong_c = blcok_cache;
int* correct_c = blcok_cache + num_classes;
// init cache
for (int i = threadIdx.x; i < num_classes * 2; i += blockDim.x) {
blcok_cache[i] = 0;
}
__syncthreads();
T pred;
T label;
CUDA_1D_KERNEL_LOOP(i, count) {
pred = predictions[i];
label = labels[i];
if (pred == label) {
atomicAdd(correct_c + pred, 1);
} else {
atomicAdd(wrong_c + pred, 1);
atomicAdd(wrong_c + label, 1);
}
}
__syncthreads();
for (int i = threadIdx.x; i < num_classes; i += blockDim.x) {
atomicAdd(wrong + i, wrong_c[i]);
atomicAdd(correct + i, correct_c[i]);
}
}
__global__ void ComputeIoUCUDAKernel(const int num_classes, int* wrong,
int* correct, float* ious, float* iou) {
__shared__ int valid_count_c;
if (threadIdx.x == 0) {
valid_count_c = 0;
}
__syncthreads();
CUDA_1D_KERNEL_LOOP(i, num_classes) {
int wrong_n = wrong[i];
int correct_n = correct[i];
int denominator = wrong_n + correct_n;
if (denominator > 0) {
atomicAdd(&valid_count_c, 1);
ious[i] = static_cast<float>(correct_n) / denominator;
} else {
ious[i] = 0;
}
}
__syncthreads();
if (threadIdx.x == 0) {
float iou_sum = 0;
for (int i = 0; i < num_classes; ++i) {
iou_sum += ious[i];
}
iou[0] += iou_sum / valid_count_c;
}
}
template <typename T>
class MeanIoUCUDAOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto& place = *ctx.template device_context<platform::CUDADeviceContext>()
.eigen_device();
// get input and output tensor
auto* predictions = ctx.Input<Tensor>("Predictions");
auto* labels = ctx.Input<Tensor>("Labels");
auto* out_mean_iou = ctx.Output<Tensor>("OutMeanIou");
auto* out_wrong = ctx.Output<Tensor>("OutWrong");
auto* out_correct = ctx.Output<Tensor>("OutCorrect");
int num_classes = static_cast<int>(ctx.Attr<int>("num_classes"));
// Get data ptr
const T* predictions_data = predictions->data<T>();
const T* labels_data = labels->data<T>();
int* out_wrong_data = out_wrong->mutable_data<int>(ctx.GetPlace());
int* out_correct_data = out_correct->mutable_data<int>(ctx.GetPlace());
float* out_mean_iou_data =
out_mean_iou->mutable_data<float>(ctx.GetPlace());
// Get Eigen tensor
auto out_mean_iou_t = EigenTensor<float, 1>::From(*out_mean_iou);
auto out_wrong_t = EigenTensor<int, 1>::From(*out_wrong);
auto out_correct_t = EigenTensor<int, 1>::From(*out_correct);
// Temporary tensor
Tensor ious;
float* ious_data = ious.mutable_data<float>(
{static_cast<int64_t>(num_classes)}, ctx.GetPlace());
auto ious_t = EigenTensor<float, 1>::From(ious);
// Init out_wrong, out_correct and out_mean_iou
out_wrong_t.device(place) = out_wrong_t.constant(0);
out_correct_t.device(place) = out_correct_t.constant(0);
out_mean_iou_t.device(place) = out_mean_iou_t.constant(0.0f);
// collect pre wrong, correct and mean_iou
auto in_mean_ious = ctx.MultiInput<Tensor>("InMeanIou");
for (int i = 0; i < in_mean_ious.size(); ++i) {
out_mean_iou_t.device(place) +=
EigenTensor<float, 1>::From(*in_mean_ious[i]);
}
auto in_wrongs = ctx.MultiInput<Tensor>("InWrongs");
for (int i = 0; i < in_wrongs.size(); ++i) {
out_wrong_t.device(place) += EigenTensor<int, 1>::From(*in_wrongs[i]);
}
auto in_corrects = ctx.MultiInput<Tensor>("InCorrects");
for (int i = 0; i < in_corrects.size(); ++i) {
out_correct_t.device(place) += EigenTensor<int, 1>::From(*in_corrects[i]);
}
// compute
auto stream = ctx.cuda_device_context().stream();
int block = PADDLE_CUDA_NUM_THREADS;
int grid = (predictions->numel() + block - 1) / block;
int cache_size = (num_classes * 2 + 1) * sizeof(int);
CountCUDAKernel<T><<<grid, block, cache_size, stream>>>(
num_classes, predictions->numel(), predictions_data, labels_data,
out_wrong_data, out_correct_data);
ctx.device_context().Wait();
ComputeIoUCUDAKernel<<<1, block, 0, stream>>>(num_classes, out_wrong_data,
out_correct_data, ious_data,
out_mean_iou_data);
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(mean_iou, ops::MeanIoUCUDAOpKernel<int>,
ops::MeanIoUCUDAOpKernel<int64_t>,
ops::MeanIoUCUDAOpKernel<int32_t>);
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <algorithm>
#include "paddle/fluid/framework/op_registry.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
template <typename T, int D, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
using EigenTensor = framework::EigenTensor<T, D, MajorType, IndexType>;
template <typename T>
class MeanIoUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto& place = *ctx.template device_context<platform::CPUDeviceContext>()
.eigen_device();
// get input and output tensor
auto* predictions = ctx.Input<Tensor>("Predictions");
auto* labels = ctx.Input<Tensor>("Labels");
auto* out_mean_iou = ctx.Output<Tensor>("OutMeanIou");
auto* out_wrong = ctx.Output<Tensor>("OutWrong");
auto* out_correct = ctx.Output<Tensor>("OutCorrect");
int num_classes = static_cast<int>(ctx.Attr<int>("num_classes"));
// get data ptr
const T* predictions_data = predictions->data<T>();
const T* labels_data = labels->data<T>();
float* out_mean_iou_data =
out_mean_iou->mutable_data<float>(ctx.GetPlace());
int* out_wrong_data = out_wrong->mutable_data<int>(ctx.GetPlace());
int* out_correct_data = out_correct->mutable_data<int>(ctx.GetPlace());
// get eigen tensor
auto out_mean_iou_t = EigenTensor<float, 1>::From(*out_mean_iou);
auto out_wrong_t = EigenTensor<int, 1>::From(*out_wrong);
auto out_correct_t = EigenTensor<int, 1>::From(*out_correct);
// Tmp tensor
Tensor denominator;
Tensor valid_count;
Tensor iou_sum;
// get data ptr of tmp tensor
int* denominator_data = denominator.mutable_data<int>(
{static_cast<int64_t>(num_classes)}, ctx.GetPlace());
int* valid_count_data = valid_count.mutable_data<int>({1}, ctx.GetPlace());
float* iou_sum_data = iou_sum.mutable_data<float>({1}, ctx.GetPlace());
// get eigen tensor of tmp tensor
auto denominator_t = EigenTensor<int, 1>::From(denominator);
auto valid_count_t = EigenTensor<int, 1>::From(valid_count);
auto iou_sum_t = EigenTensor<float, 1>::From(iou_sum);
// init out_wrong, out_correct and out_mean_iou
out_wrong_t = out_wrong_t.constant(0);
out_correct_t = out_correct_t.constant(0);
out_mean_iou_t = out_mean_iou_t.constant(0);
// collect pre wrong, correct and mean_iou
auto in_mean_ious = ctx.MultiInput<Tensor>("InMeanIou");
for (size_t i = 0; i < in_mean_ious.size(); ++i) {
out_mean_iou_t.device(place) +=
EigenTensor<float, 1>::From(*in_mean_ious[i]);
}
auto in_wrongs = ctx.MultiInput<Tensor>("InWrongs");
for (size_t i = 0; i < in_wrongs.size(); ++i) {
out_wrong_t.device(place) += EigenTensor<int, 1>::From(*in_wrongs[i]);
}
auto in_corrects = ctx.MultiInput<Tensor>("InCorrects");
for (size_t i = 0; i < in_corrects.size(); ++i) {
out_correct_t.device(place) += EigenTensor<int, 1>::From(*in_corrects[i]);
}
// compute
for (int64_t i = 0; i < predictions->numel(); ++i) {
if (predictions_data[i] == labels_data[i]) {
out_correct_data[predictions_data[i]] += 1;
} else {
out_wrong_data[labels_data[i]] += 1;
out_wrong_data[predictions_data[i]] += 1;
}
}
denominator_t = out_wrong_t + out_correct_t;
valid_count_t =
(denominator_t > denominator_t.constant(0.0f)).cast<int>().sum();
for (int i = 0; i < num_classes; ++i) {
if (denominator_data[i] == 0) {
denominator_data[i] = 1;
}
}
iou_sum_t =
(out_correct_t.cast<float>() / denominator_t.cast<float>()).sum();
out_mean_iou_data[0] += (iou_sum_data[0] / valid_count_data[0]);
}
};
} // namespace operators
} // namespace paddle
......@@ -33,12 +33,10 @@ class MeanOp : public framework::OperatorWithKernel {
class MeanOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("X", "The input of mean op");
AddOutput("Out", "The output of mean op").Reuse("X");
AddInput("X", "(Tensor) The input of mean op");
AddOutput("Out", "(Tensor) The output of mean op").Reuse("X");
AddComment(R"DOC(
Mean Operator.
Out is a scalar which is the mean of all elements in X.
Mean Operator calculates the mean of all elements in X.
)DOC");
}
......
......@@ -62,26 +62,46 @@ class MultiplexOp : public framework::OperatorWithKernel {
class MultiplexOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("Ids", "The index tensor of multiplex operator.");
AddInput("X", "The candidate tensors of multiplex operator.")
AddInput("Ids",
"Tensor<int32>, index variable which is a 2-D tensor with shape "
"[M, 1] where M is the batch size.");
AddInput("X",
"A list of variables to gather from. All variables have the same "
"shape and the rank is at least 2.")
.AsDuplicable();
AddOutput("Out", "The output tensor of multiplex operator.");
AddComment(R"DOC(
Multiplex Operator.
Multiplex multiple tensors according to the index provided by the index tensor.
Ids: the index tensor.
X[0 : N - 1]: the candidate tensors for output (N >= 2).
For each index i from 0 to batchSize - 1, the output is the i-th row of the
Referring to the given index variable, this layer selects rows from the
input variables to construct a multiplex variable. Assuming that there are
:math:`m` input variables and :math:`I_i` represents the i-th input
variable and :math:`i` is in [0, :math:`m`). All input variables are
tensors with same shape [:math:`d_0`, :math:`d_1`, ..., :math:`d_R`].
Please note that rank of the input tensor should be at least 2. Each input
variable will be treated as a 2-D matrix with shape [:math:`M`, :math:`N`]
where :math:`M` for :math:`d_0` and :math:`N` for :math:`d_1` * :math:`d_2`
* ... * :math:`d_R`. Let :math:`I_i[j]` be the j-th row of the i-th input
variable. The given index variable should be a 2-D tensor with shape
[:math:`M`, 1]. Let `ID[i]` be the i-th index value of the index variable.
Then the output variable will be a tensor with shape [:math:`d_0`,
:math:`d_1`, ..., :math:`d_R`]. If we treat the output tensor as a 2-D
matrix with shape [:math:`M`, :math:`N`] and let :math:`O[i]` be the i-th
row of the matrix, then `O[i]` is equal to :math:`I_{ID[i]}[i]`.
* Ids: the index tensor.
* X[0 : N - 1]: the candidate tensors for output (N >= 2).
* For each index i from 0 to batchSize - 1, the output is the i-th row of the
the (Ids[i])-th tensor.
For i-th row of the output tensor:
$$y[i] = x_{k}[i]$$
$$
y[i] = x_{k}[i]
$$
where `y` is the output tensor, `x_{k}` is the k-th input tensor,
and `k = Ids[i]`.
where $y$ is the output tensor, $x_{k}$ is the k-th input tensor,
and $k = Ids[i]$.
)DOC");
}
......
......@@ -128,8 +128,10 @@ class NCEOpMaker : public framework::OpProtoAndCheckerMaker {
"user should avoid setting this attribute.")
.SetDefault({});
AddComment(R"DOC(
Compute and return the noise-contrastive estimation training loss.
See [Noise-contrastive estimation: A new estimation principle for unnormalized statistical models](http://www.jmlr.org/proceedings/papers/v9/gutmann10a/gutmann10a.pdf).
Compute and return the noise-contrastive estimation training loss. See
`Noise-contrastive estimation: A new estimation principle for unnormalized
statistical models
<http://www.jmlr.org/proceedings/papers/v9/gutmann10a/gutmann10a.pdf>`_.
By default this operator uses a uniform distribution for sampling.
)DOC");
}
......
......@@ -204,8 +204,6 @@ void Pool2dOpMaker::Make() {
// TODO(dzhwinter): need to registered layout transform function
AddComment(R"DOC(
Pool2d Operator.
The pooling2d operation calculates the output based on
the input, pooling_type and ksize, strides, paddings parameters.
Input(X) and output(Out) are in NCHW format, where N is batch size, C is the
......@@ -215,19 +213,28 @@ These two elements represent height and width, respectively.
The input(X) size and output(Out) size may be different.
Example:
Input:
X shape: $(N, C, H_{in}, W_{in})$
Output:
Out shape: $(N, C, H_{out}, W_{out})$
For ceil_mode = false:
$$
H_{out} = \frac{(H_{in} - ksize[0] + 2 * paddings[0])}{strides[0]} + 1 \\
W_{out} = \frac{(W_{in} - ksize[1] + 2 * paddings[1])}{strides[1]} + 1
H_{out} = \\frac{(H_{in} - ksize[0] + 2 * paddings[0])}{strides[0]} + 1
$$
$$
W_{out} = \\frac{(W_{in} - ksize[1] + 2 * paddings[1])}{strides[1]} + 1
$$
For ceil_mode = true:
$$
H_{out} = \frac{(H_{in} - ksize[0] + 2 * paddings[0] + strides[0] - 1)}{strides[0]} + 1 \\
W_{out} = \frac{(W_{in} - ksize[1] + 2 * paddings[1] + strides[1] - 1)}{strides[1]} + 1
H_{out} = \\frac{(H_{in} - ksize[0] + 2 * paddings[0] + strides[0] - 1)}{strides[0]} + 1
$$
$$
W_{out} = \\frac{(W_{in} - ksize[1] + 2 * paddings[1] + strides[1] - 1)}{strides[1]} + 1
$$
)DOC");
......
......@@ -78,11 +78,15 @@ class CreateRecordIOReaderOp : public framework::OperatorBase {
class CreateRecordIOReaderOpMaker : public FileReaderMakerBase {
protected:
void Apply() override {
AddAttr<std::string>("filename", "The filename of record io reader");
AddAttr<std::string>(
"filename",
"The filename of record file. This file will given to reader.");
AddComment(R"DOC(
CreateRecordIOReader Operator
Open a recordio file and return the reader object. The returned reader object
is thread-safe.
Create a reader from a record io file
NOTE: This is a very low-level API. It is used for debugging data file or
training. Please use `open_files` instead of this API for production usage.
)DOC");
}
};
......
......@@ -54,7 +54,7 @@ std::unique_ptr<framework::ReaderBase> CreateReaderByFileName(
}
void FileReaderMakerBase::Make() {
AddOutput("Out", "(ReaderHolder) The created random reader.").AsDuplicable();
AddOutput("Out", "(ReaderHolder): The created random reader.").AsDuplicable();
AddAttr<std::vector<int>>("shape_concat", "The concat of all data's shapes.");
AddAttr<std::vector<int>>(
"ranks",
......
......@@ -78,23 +78,23 @@ class RowConvOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("X",
"(LoDTensor), the input(X) is a LodTensor, which supports "
"the input(X) is a LodTensor, which supports "
"variable time-length input sequences. The underlying tensor "
"in this LoDTensor is a matrix with shape (T x N), where T "
"is the total time steps in this mini-batch and N is the input "
"data dimension.");
AddInput("Filter",
"(Tensor), the input(Filter) is a learnable parameter. It "
"the input(Filter) is a learnable parameter. It "
"is a 2-D tensor with shape (future_context x N), where, "
"future_context is the future context length and N is the data "
"dimension.");
AddOutput("Out",
"(LoDTensor), the output(Out) is a LodTensor, which supports "
"the output(Out) is a LodTensor, which supports "
"variable time-length input sequences. The underlying tensor "
"in this LodTensor is a matrix with shape T x N, i.e., the "
"same shape as X.");
AddComment(R"DOC(
Row-convolution Operator.
:strong:`Row-convolution operator`
The row convolution is called lookahead convolution. This operator was
introduced in the following paper for DeepSpeech2:
......@@ -114,9 +114,23 @@ and a filter ($W$) of size $context \times d$,
the output sequence is convolved as:
$$
out_{i, :} = \sum_{j=i}^{i + context} in_{j,:} \dot W_{i-j, :}
out_{i, :} = \\sum_{j=i}^{i + context} in_{j,:} \\cdot W_{i-j, :}
$$
In the above equation:
* $Out_{i}$: The i-th row of output variable with shape [1, D].
* $\\tau$: Future context size.
* $X_{j}$: The j-th row of input variable with shape [1, D].
* $W_{i-j}$: The (i-j)-th row of parameters with shape [1, D].
More details about row_conv please refer to
the design document
https://github.com/PaddlePaddle/Paddle/issues/2228#issuecomment-303903645 .
)DOC");
}
};
......
......@@ -95,23 +95,26 @@ of that dimension. If the value passed to start or end is larger than
the n (the number of elements in this dimension), it represents n.
For slicing to the end of a dimension with unknown size, it is recommended
to pass in INT_MAX. If axes are omitted, they are set to [0, ..., ndim-1].
Example 1:
Given:
data = [ [1, 2, 3, 4], [5, 6, 7, 8], ]
axes = [0, 1]
starts = [1, 0]
ends = [2, 3]
Then:
result = [ [5, 6, 7], ]
Example 2:
Given:
data = [ [1, 2, 3, 4], [5, 6, 7, 8], ]
starts = [0, 1]
ends = [-1, 1000]
Then:
result = [ [2, 3, 4], ]
Following examples will explain how slice works:
.. code-block:: text
Cast1:
Given:
data = [ [1, 2, 3, 4], [5, 6, 7, 8], ]
axes = [0, 1]
starts = [1, 0]
ends = [2, 3]
Then:
result = [ [5, 6, 7], ]
Cast2:
Given:
data = [ [1, 2, 3, 4], [5, 6, 7, 8], ]
starts = [0, 1]
ends = [-1, 1000]
Then:
result = [ [2, 3, 4], ]
)DOC");
}
};
......
......@@ -115,4 +115,7 @@ USE_CPU_ONLY_OP(concat);
REGISTER_OPERATOR(split, ops::SplitOp, ops::SplitOpMaker, ops::SplitGradMaker);
REGISTER_OP_CPU_KERNEL(split,
ops::SplitOpKernel<paddle::platform::CPUPlace, float>);
ops::SplitOpKernel<paddle::platform::CPUPlace, double>,
ops::SplitOpKernel<paddle::platform::CPUPlace, float>,
ops::SplitOpKernel<paddle::platform::CPUPlace, int64_t>,
ops::SplitOpKernel<paddle::platform::CPUPlace, int>);
......@@ -15,4 +15,7 @@ limitations under the License. */
#include "paddle/fluid/operators/split_op.h"
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(
split, ops::SplitOpKernel<paddle::platform::CUDADeviceContext, float>);
split, ops::SplitOpKernel<paddle::platform::CUDADeviceContext, double>,
ops::SplitOpKernel<paddle::platform::CUDADeviceContext, float>,
ops::SplitOpKernel<paddle::platform::CUDADeviceContext, int64_t>,
ops::SplitOpKernel<paddle::platform::CUDADeviceContext, int>);
......@@ -66,17 +66,25 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector<int64_t> &shape) {
} // namespace
template <typename DeviceContext, typename T>
void paddle::operators::TensorRTEngineKernel<DeviceContext, T>::Prepare(
void TensorRTEngineKernel<DeviceContext, T>::Prepare(
const framework::ExecutionContext &context) const {
VLOG(4) << "Prepare engine";
// Get the ProgramDesc and pass to convert.
framework::proto::BlockDesc block_desc;
block_desc.ParseFromString(context.Attr<std::string>("subgraph"));
max_batch_ = context.Attr<int>("max_batch");
int max_batch = context.Attr<int>("max_batch");
auto max_workspace = context.Attr<int>("max_workspace");
engine_ = Singleton<TRT_EngineManager>::Global().Create(
max_batch_, max_workspace, &stream_);
engine_->InitNetwork();
auto params = context.Attr<std::vector<std::string>>("parameters");
std::unordered_set<std::string> parameters;
for (const auto &param : params) {
parameters.insert(param);
}
// TODO(Superjomn) replace this with a different stream
auto *engine = Singleton<TRT_EngineManager>::Global().Create(
max_batch, max_workspace, nullptr /*engine hold its own stream*/,
context.Attr<std::string>("engine_uniq_key"));
engine->InitNetwork();
framework::BlockDesc block(nullptr /*programdesc*/, &block_desc);
// Add inputs
......@@ -87,24 +95,23 @@ void paddle::operators::TensorRTEngineKernel<DeviceContext, T>::Prepare(
PADDLE_ENFORCE_EQ(var->GetType(), FluidDT::VarType_Type_LOD_TENSOR,
"TensorRT engine only takes LoDTensor as input");
auto shape = var->GetShape();
engine_->DeclareInput(
engine->DeclareInput(
input, FluidDataType2TRT(
var->Proto()->type().lod_tensor().tensor().data_type()),
Vec2TRT_Dims(var->GetShape()));
}
// TODO(Superjomn) parameters should be passed after analysised from outside.
inference::Singleton<inference::tensorrt::OpConverter>::Global().ConvertBlock(
block_desc, {}, context.scope(), engine_);
block_desc, parameters, context.scope(), engine);
// Add outputs
VLOG(4) << "declare outputs";
for (auto &output : context.Outputs("Ys")) {
VLOG(4) << "declare output " << output;
engine_->DeclareOutput(output);
engine->DeclareOutput(output);
}
engine_->FreezeNetwork();
engine->FreezeNetwork();
}
class TensorRTEngineOpMaker : public framework::OpProtoAndCheckerMaker {
......@@ -113,6 +120,7 @@ class TensorRTEngineOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("Xs", "A list of inputs.").AsDuplicable();
AddOutput("Ys", "A list of outputs").AsDuplicable();
AddAttr<std::string>("subgraph", "the subgraph.");
AddAttr<std::string>("engine_uniq_key", "unique key for the TRT engine.");
AddAttr<int>("max_batch", "the maximum batch size.");
AddAttr<int>("max_workspace", "the maximum batch size.");
AddComment("TensorRT engine operator.");
......
......@@ -19,10 +19,14 @@
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/inference/analysis/helper.h"
#include "paddle/fluid/inference/tensorrt/engine.h"
#include "paddle/fluid/inference/tensorrt/engine.h"
namespace paddle {
namespace operators {
using inference::Singleton;
using inference::tensorrt::TRT_EngineManager;
class TensorRTEngineOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
......@@ -47,16 +51,18 @@ template <typename DeviceContext, typename T>
class TensorRTEngineKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
if (!engine_) {
auto engine_name = context.Attr<std::string>("engine_uniq_key");
if (!Singleton<TRT_EngineManager>::Global().HasEngine(engine_name)) {
Prepare(context);
}
auto* engine = Singleton<TRT_EngineManager>::Global().Get(engine_name);
auto input_names = context.op().Inputs("Xs");
PADDLE_ENFORCE(!input_names.empty(), "should pass more than one inputs");
// Try to determine a batch_size
auto& tensor0 = inference::analysis::GetFromScope<framework::LoDTensor>(
context.scope(), input_names.front());
int batch_size = tensor0.dims()[0];
PADDLE_ENFORCE_LE(batch_size, max_batch_);
PADDLE_ENFORCE_LE(batch_size, context.Attr<int>("max_batch"));
// Convert input tensor from fluid to engine.
for (const auto& x : context.Inputs("Xs")) {
......@@ -64,20 +70,20 @@ class TensorRTEngineKernel : public framework::OpKernel<T> {
auto& t = inference::analysis::GetFromScope<framework::LoDTensor>(
context.scope(), x);
if (platform::is_cpu_place(t.place())) {
engine_->SetInputFromCPU(x, static_cast<const void*>(t.data<void>()),
t.memory_size());
engine->SetInputFromCPU(x, static_cast<const void*>(t.data<void>()),
t.memory_size());
} else {
engine_->SetInputFromGPU(x, static_cast<const void*>(t.data<void>()),
t.memory_size());
engine->SetInputFromGPU(x, static_cast<const void*>(t.data<void>()),
t.memory_size());
}
}
// Execute the engine.
PADDLE_ENFORCE_GT(batch_size, 0);
engine_->Execute(batch_size);
engine->Execute(batch_size);
// Convert output tensor from engine to fluid
for (const auto& y : context.Outputs("Ys")) {
// convert output and copy to fluid.
nvinfer1::ITensor* trt_t = engine_->GetITensor(y);
nvinfer1::ITensor* trt_t = engine->GetITensor(y);
auto dims = trt_t->getDimensions();
// Use the output ITensor's dims to reshape the Fluid Tensor.
std::vector<int> ddim(dims.d, dims.d + dims.nbDims);
......@@ -89,27 +95,22 @@ class TensorRTEngineKernel : public framework::OpKernel<T> {
auto size = inference::analysis::AccuDims(dims.d, dims.nbDims);
if (platform::is_cpu_place(fluid_t->place())) {
// TODO(Superjomn) change this float to dtype size.
engine_->GetOutputInCPU(
engine->GetOutputInCPU(
y, fluid_t->mutable_data<float>(platform::CPUPlace()),
size * sizeof(float));
} else {
engine_->GetOutputInGPU(
engine->GetOutputInGPU(
y, fluid_t->mutable_data<float>(platform::CUDAPlace()),
size * sizeof(float));
}
}
cudaStreamSynchronize(stream_);
cudaStreamSynchronize(*engine->stream());
}
protected:
// Build the engine.
void Prepare(const framework::ExecutionContext& context) const;
private:
mutable cudaStream_t stream_;
mutable inference::tensorrt::TensorRTEngine* engine_{nullptr};
mutable int max_batch_{0};
};
} // namespace operators
......
......@@ -79,6 +79,17 @@ void SetAttr<int64_t>(framework::proto::OpDesc* op, const std::string& name,
attr->set_type(paddle::framework::proto::AttrType::LONG);
attr->set_l(data);
}
template <>
void SetAttr<std::vector<std::string>>(framework::proto::OpDesc* op,
const std::string& name,
const std::vector<std::string>& data) {
auto* attr = op->add_attrs();
attr->set_name(name);
attr->set_type(paddle::framework::proto::AttrType::STRINGS);
for (const auto& s : data) {
attr->add_strings(s.c_str());
}
}
} // namespace
......@@ -123,11 +134,15 @@ TEST(TensorRTEngineOp, manual) {
engine_op_desc.SetOutput("Ys", std::vector<std::string>({"z0"}));
SetAttr<std::string>(engine_op_desc.Proto(), "subgraph",
block_->SerializeAsString());
SetAttr<int>(engine_op_desc.Proto(), "max_batch", 30);
SetAttr<int>(engine_op_desc.Proto(), "max_batch", 100);
SetAttr<int>(engine_op_desc.Proto(), "max_workspace", 1 << 10);
SetAttr<std::string>(engine_op_desc.Proto(), "engine_uniq_key", "a_engine");
SetAttr<std::vector<std::string>>(engine_op_desc.Proto(), "parameters",
std::vector<std::string>({}));
LOG(INFO) << "create engine op";
auto engine_op = framework::OpRegistry::CreateOp(*engine_op_desc.Proto());
LOG(INFO) << "engine_op " << engine_op.get();
framework::Scope scope;
platform::CPUPlace place;
......@@ -145,6 +160,88 @@ TEST(TensorRTEngineOp, manual) {
engine_op->Run(scope, place);
}
void Execute(int batch_size, int input_dim, int output_dim, int nlayers = 1) {
framework::ProgramDesc program;
framework::Scope scope;
platform::CPUPlace place;
platform::CPUDeviceContext ctx(place);
auto* block_ = program.Proto()->add_blocks();
block_->set_idx(0);
block_->set_parent_idx(-1);
using shape_t = std::vector<int64_t>;
LOG(INFO) << "create block desc";
framework::BlockDesc block_desc(&program, block_);
auto AddFCLayer = [&](const std::string& x_name, const std::string& y_name,
const std::string& z_name, bool x_created,
const shape_t& x_shape, const shape_t& y_shape,
const shape_t& z_shape) {
LOG(INFO) << "create fc op";
auto* fc = block_desc.AppendOp();
fc->SetType("mul");
fc->SetInput("X", std::vector<std::string>({x_name}));
fc->SetInput("Y", std::vector<std::string>({y_name}));
fc->SetOutput("Out", std::vector<std::string>({z_name}));
// Set inputs' variable shape in BlockDesc
if (!x_created) {
AddTensorToBlockDesc(block_, x_name,
std::vector<int64_t>({batch_size, input_dim, 1, 1}));
}
AddTensorToBlockDesc(block_, y_name,
std::vector<int64_t>({input_dim, output_dim}));
AddTensorToBlockDesc(block_, z_name,
std::vector<int64_t>({batch_size, output_dim}));
// Prepare variables.
if (!x_created) {
CreateCPUTensor(&scope, x_name, std::vector<int64_t>(x_shape));
}
CreateCPUTensor(&scope, y_name, std::vector<int64_t>(y_shape));
CreateCPUTensor(&scope, z_name, std::vector<int64_t>(z_shape));
// It is wired, need to copy manually.
*block_->add_ops() = *fc->Proto();
};
// Test with 4 layer FC
AddFCLayer("x0", "y0", "z0", false, {batch_size, input_dim},
{input_dim, output_dim}, {batch_size, output_dim});
AddFCLayer("z0", "y1", "z1", true, {}, {output_dim, output_dim},
{batch_size, output_dim});
AddFCLayer("z1", "y2", "z2", true, {}, {output_dim, output_dim},
{batch_size, output_dim});
AddFCLayer("z2", "y3", "z3", true, {}, {output_dim, output_dim},
{batch_size, output_dim});
LOG(INFO) << "create tensorrt desc";
framework::OpDesc engine_op_desc(nullptr);
engine_op_desc.SetType("tensorrt_engine");
engine_op_desc.SetInput("Xs", std::vector<std::string>({"x0"}));
engine_op_desc.SetOutput("Ys", std::vector<std::string>({"z3"}));
SetAttr<std::string>(engine_op_desc.Proto(), "subgraph",
block_->SerializeAsString());
SetAttr<int>(engine_op_desc.Proto(), "max_batch", batch_size);
SetAttr<int>(engine_op_desc.Proto(), "max_workspace", 2 << 10);
SetAttr<std::vector<std::string>>(
engine_op_desc.Proto(), "parameters",
std::vector<std::string>({"y0", "y1", "y2", "y3"}));
SetAttr<std::string>(engine_op_desc.Proto(), "engine_uniq_key", "b_engine");
auto engine_op = framework::OpRegistry::CreateOp(*engine_op_desc.Proto());
// Execute them.
engine_op->Run(scope, place);
}
// Test with a larger FC layer.
TEST(TensorRTEngineOp, fc) { Execute(40, 256, 256); }
} // namespace operators
} // namespace paddle
......
......@@ -35,10 +35,10 @@ class UniformRandomBatchSizeLikeOpMaker : public BatchSizeLikeOpMaker {
protected:
void Apply() override {
AddComment(R"DOC(
Uniform random operator
UniformRandomBatchSizeLike operator.
This operator initializes a tensor with the same batch_size as the Input tensor
with random values sampled from a uniform distribution.
with random values sampled from a uniform distribution.
)DOC");
AddAttr<float>("min",
......
......@@ -86,32 +86,24 @@ class UniformRandomOp : public framework::OperatorWithKernel {
class UniformRandomOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddOutput("Out", "(Tensor) The output tensor of uniform random op");
AddOutput("Out", "The output tensor of uniform random op");
AddComment(R"DOC(
Uniform random operator.
This operator initializes a tensor with random values sampled from a
uniform distribution.
uniform distribution. The random result is in set [min, max].
)DOC");
AddAttr<std::vector<int>>("shape",
"(vector<int>) The shape of the output tensor");
AddAttr<float>("min",
"(float, default -1.0) "
"Minimum value of uniform random")
AddAttr<std::vector<int>>("shape", "The shape of the output tensor");
AddAttr<float>("min", "Minimum value of uniform random. [default -1.0].")
.SetDefault(-1.0f);
AddAttr<float>("max",
"(float, default 1.0) "
"Maximun value of uniform random")
AddAttr<float>("max", "Maximun value of uniform random. [default 1.0].")
.SetDefault(1.0f);
AddAttr<int>("seed",
"(int, default 0) "
"Random seed used for generating samples. "
"0 means use a seed generated by the system."
"Note that if seed is not 0, this operator will always "
"generate the same random numbers every time.")
"generate the same random numbers every time. [default 0].")
.SetDefault(0);
AddAttr<int>("dtype", "(int, default 5(FP32)) Output tensor data type")
AddAttr<int>("dtype", "Output tensor data type. [default 5(FP32)].")
.SetDefault(framework::proto::VarType::FP32);
}
};
......
......@@ -322,7 +322,6 @@ class DeviceTracerImpl : public DeviceTracer {
DisableActivity();
dynload::cuptiUnsubscribe(subscriber_);
CUPTI_CALL(dynload::cuptiGetTimestamp(&end_ns_));
PADDLE_ENFORCE(dynload::cuptiFinalize());
enabled_ = false;
}
......
......@@ -72,7 +72,6 @@ extern void *cupti_dso_handle;
__macro(cuptiGetResultString); \
__macro(cuptiActivityGetNumDroppedRecords); \
__macro(cuptiActivityFlushAll); \
__macro(cuptiFinalize); \
__macro(cuptiSubscribe); \
__macro(cuptiUnsubscribe); \
__macro(cuptiEnableCallback); \
......
......@@ -41,6 +41,11 @@ inline ncclDataType_t ToNCCLDataType(std::type_index type) {
}
}
// NOTE(minqiyang): according to the ncclGroupEnd documentations:
// https://docs.nvidia.com/deeplearning/sdk/nccl-api/ncclapidoc.html,
// ncclGroupEnd will wait for all communicators to be initialized, which will
// cause blocking problem when a runtime_error was thrown, so try only guard
// NCCL actions when use it.
class NCCLGroupGuard {
public:
static std::mutex &NCCLMutex() {
......
......@@ -144,28 +144,74 @@ PYBIND11_PLUGIN(core) {
py::class_<LoDTensor, Tensor>(m, "LoDTensor")
.def_buffer(
[](Tensor &self) -> py::buffer_info { return CastToPyBuffer(self); })
.def(
"__init__",
[](LoDTensor &instance, const std::vector<std::vector<size_t>> &lod) {
LoD new_lod;
new_lod.reserve(lod.size());
std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod));
new (&instance) LoDTensor(new_lod);
})
.def("__init__",
[](LoDTensor &instance, const std::vector<std::vector<size_t>>
&recursive_sequence_lengths) {
LoD new_lod;
new_lod.reserve(recursive_sequence_lengths.size());
std::copy(recursive_sequence_lengths.begin(),
recursive_sequence_lengths.end(),
std::back_inserter(new_lod));
LoD new_offset_lod = ConvertToOffsetBasedLoD(new_lod);
PADDLE_ENFORCE(
CheckLoD(new_offset_lod, -1),
"the provided recursive_sequence_lengths info is invalid");
new (&instance) LoDTensor(new_offset_lod);
})
.def("__init__", [](LoDTensor &instance) { new (&instance) LoDTensor(); })
.def("set_lod",
[](LoDTensor &self, const std::vector<std::vector<size_t>> &lod) {
// the input lod is offset-based level-of-detail info
LOG(WARNING)
<< "set_lod is deprecated and will be removed by 9.2018, "
"please switch to set_recursive_sequence_lengths.";
LoD new_lod;
new_lod.reserve(lod.size());
std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod));
PADDLE_ENFORCE(CheckLoD(new_lod, vectorize(self.dims()).front()),
"the provided lod info is invalid");
self.set_lod(new_lod);
})
.def("lod", [](LoDTensor &self) -> std::vector<std::vector<size_t>> {
auto lod = self.lod();
std::vector<std::vector<size_t>> new_lod;
new_lod.reserve(lod.size());
std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod));
return new_lod;
.def("set_recursive_sequence_lengths",
[](LoDTensor &self, const std::vector<std::vector<size_t>>
&recursive_sequence_lengths) {
// the input recursive_sequence_lengths is length-based
// level-of-detail info
LoD new_lod;
new_lod.reserve(recursive_sequence_lengths.size());
std::copy(recursive_sequence_lengths.begin(),
recursive_sequence_lengths.end(),
std::back_inserter(new_lod));
LoD new_offset_lod = ConvertToOffsetBasedLoD(new_lod);
PADDLE_ENFORCE(
CheckLoD(new_offset_lod, vectorize(self.dims()).front()),
"the provided recursive_sequence_lengths info is invalid");
self.set_lod(new_offset_lod);
})
.def("lod",
[](LoDTensor &self) -> std::vector<std::vector<size_t>> {
// output the offset-based lod info
LOG(WARNING) << "lod is deprecated and will be removed by 9.2018, "
"please switch to recursive_sequence_lengths.";
LoD lod = self.lod();
std::vector<std::vector<size_t>> new_lod;
new_lod.reserve(lod.size());
std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod));
return new_lod;
})
.def("recursive_sequence_lengths",
[](LoDTensor &self) -> std::vector<std::vector<size_t>> {
// output the length-based lod info
LoD lod = ConvertToLengthBasedLoD(self.lod());
std::vector<std::vector<size_t>> new_lod;
new_lod.reserve(lod.size());
std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod));
return new_lod;
})
.def("has_valid_recursive_sequence_lengths", [](LoDTensor &self) -> bool {
// Check that the lod info is valid and match the outermost
// dimension of the LoDTensor data
return CheckLoD(self.lod(), vectorize(self.dims()).front());
});
py::class_<SelectedRows>(m, "SelectedRows")
......
......@@ -12,8 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef MATHFUNCTIONS_H_
#define MATHFUNCTIONS_H_
#pragma once
#ifdef PADDLE_WITH_MKLML
#include <mkl_cblas.h>
......@@ -21,7 +20,7 @@ limitations under the License. */
#include <mkl_vml_functions.h>
#endif
#if defined(PADDLE_USE_VECLIB)
#ifdef PADDLE_USE_VECLIB
extern "C" {
#include <cblas.h>
#include <clapack.h>
......@@ -30,8 +29,10 @@ extern "C" {
#ifdef PADDLE_USE_OPENBLAS
#include <cblas.h>
#ifdef LAPACK_FOUND
#include <lapacke.h>
#endif
#endif
#ifndef LAPACK_FOUND
extern "C" {
......@@ -126,5 +127,3 @@ template <class T>
void vTanh(const int n, const T* a, T* r);
} // namespace paddle
#endif // MATHFUNCTIONS_H_
......@@ -132,7 +132,8 @@ EOF
-DCMAKE_MODULE_PATH=/opt/rocm/hip/cmake \
-DWITH_FLUID_ONLY=${WITH_FLUID_ONLY:-OFF} \
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON \
-DWITH_CONTRIB=${WITH_CONTRIB:-ON}
-DWITH_CONTRIB=${WITH_CONTRIB:-ON} \
-DWITH_ANAKIN=ON
}
function abort(){
......
......@@ -31,6 +31,7 @@ int main(int argc, char** argv) {
strdup("--tryfromenv=fraction_of_gpu_memory_to_use,use_pinned_memory"));
#else
new_argv.push_back(strdup("--tryfromenv=use_pinned_memory,use_mkldnn"));
new_argv.push_back(strdup("--undefok=use_mkldnn"));
#endif
int new_argc = static_cast<int>(new_argv.size());
char** new_argv_address = new_argv.data();
......
......@@ -47,7 +47,7 @@ class DataToLoDTensorConverter(object):
self.lod = []
for i in six.range(lod_level):
self.lod.append([0])
self.lod.append([])
def feed(self, data):
self._feed_impl_(data, self.lod, self.lod_level)
......@@ -56,8 +56,7 @@ class DataToLoDTensorConverter(object):
if lod_level == 0:
self.data.append(data)
else:
cur_lod_len = len(data)
lod[0].append(lod[0][-1] + cur_lod_len)
lod[0].append(len(data))
for each_data in data:
self._feed_impl_(each_data, lod[1:], lod_level - 1)
......@@ -66,7 +65,7 @@ class DataToLoDTensorConverter(object):
t = core.LoDTensor()
t.set(arr, self.place)
if self.lod_level > 0:
t.set_lod(self.lod)
t.set_recursive_sequence_lengths(self.lod)
return t
......
......@@ -1034,6 +1034,37 @@ class Block(object):
class Program(object):
"""
Python Program. Beneath it is a ProgramDesc, which is used for
create c++ Program. A program is a self-contained programing
language like container. It has at least one Block, when the
control flow op like conditional_block, while_op is included,
it will contains nested block.
Please reference the framework.proto for details.
Notes: we have default_startup_program and default_main_program
by default, a pair of them will shared the parameters.
The default_startup_program only run once to initialize parameters,
default_main_program run in every minibatch and adjust the weights.
Args:
None
Returns:
Python Program
Examples:
.. code-block:: python
main_program = Program()
startup_program = Program()
with fluid.program_guard(main_program=main_program, startup_program=startup_program):
fluid.layers.data(name="x", shape=[-1, 784], dtype='float32')
fluid.layers.data(name="y", shape=[-1, 1], dtype='int32')
fluid.layers.fc(name="fc", shape=[10], dtype='float32', act="relu")
"""
def __init__(self):
self.desc = core.ProgramDesc()
self.blocks = [Block(self, 0)]
......@@ -1099,6 +1130,8 @@ class Program(object):
def clone(self, for_test=False):
"""Clone the Program object
Args:
for_test(bool): indicate whether clone for test.
Set for_test to False when we want to clone the program for training.
Set for_test to True when we want to clone the program for testing.
......@@ -1109,8 +1142,9 @@ class Program(object):
the is_test attributes in these operators will be set to True for
testing purposes, otherwise, they remain unchanged.
Returns(Program):
The cloned Program object.
Returns:
Program: The cloned Program object.
"""
if for_test:
p = self.inference_optimize()
......@@ -1228,6 +1262,7 @@ class Program(object):
def copy_param_info_from(self, other):
"""
Copy the information of parameters from other program.
Args:
other(Program): Other program
......@@ -1246,6 +1281,7 @@ class Program(object):
def copy_data_info_from(self, other):
"""
Copy the information of data variables from other program.
Args:
other(Program): Other program
......@@ -1299,6 +1335,7 @@ class Parameter(Variable):
def to_string(self, throw_on_error, with_details=False):
"""
To debug string.
Args:
throw_on_error(bool): raise exception when self is not initialized
when throw_on_error is True
......
......@@ -15,11 +15,13 @@
import framework
import numpy as np
import contextlib
from framework import convert_np_dtype_to_dtype_
from core import VarDesc
__all__ = [
'Constant', 'Uniform', 'Normal', 'Xavier', 'force_init_on_cpu',
'Constant', 'Uniform', 'Normal', 'Xavier', 'Bilinear', 'force_init_on_cpu',
'init_on_cpu', 'ConstantInitializer', 'UniformInitializer',
'NormalInitializer', 'XavierInitializer'
'NormalInitializer', 'XavierInitializer', 'BilinearInitializer'
]
_force_init_on_cpu_ = False
......@@ -422,6 +424,101 @@ class MSRAInitializer(Initializer):
return op
class BilinearInitializer(Initializer):
"""Implements the bilinear initializer.
This initializer can be used in transposed convolution operator to
act as upsampling. Users can upsample a feature map with shape of
(B, C, H, W) by any integer factor. The usage is:
>>> factor = 2
>>> w_attr = ParamAttr(learning_rate=0., regularizer=L2Decay(0.),
>>> initializer=Bilinear())
>>> conv_up = fluid.layers.conv2d_transpose(
>>> input,
>>> num_filters=C,
>>> output_size=None,
>>> filter_size=2 * factor - factor % 2,
>>> padding=ceil((factor - 1) / 2.),
>>> stride=factor,
>>> groups=C,
>>> param_attr=w_attr,
>>> bias_attr=False)
Where, `num_filters=C` and `groups=C` means this is channel-wise tranposed
convolution. The filter shape will be (C, 1, K, K) where K is `filer_size`,
This initializer will set a (K, K) interpolation kernel for every channel
of the filter identically. The resulting shape of the output feature map
will be (B, C, factor * H, factor * W). Note that the learning rate and the
weight decay are set to 0 in order to keep coefficient values of bilinear
interpolation unchanged during training.
"""
def __init__(self):
"""Constructor for BilinearInitializer.
"""
super(BilinearInitializer, self).__init__()
def __call__(self, var, block):
"""Add biliear initialization ops for a variable
Args:
var (Variable): Variable that needs to be initialized.
block (Block): The block in which initialization ops should
be added.
Returns:
the initialization op
Raises:
ValueError: If type of `var` and `block` is not right.
If the shape of `var` size is not 4 and
var.shape[2] != var.shape[3].
"""
if not isinstance(var, framework.Variable):
raise ValueError("var must be framework.Variable.")
if not isinstance(block, framework.Block):
raise ValueError("block must be framework.Block.")
shape = var.shape
if len(shape) != 4:
raise ValueError("the length of shape must be 4.")
if shape[2] != shape[3]:
raise ValueError("shape[2] must be equal to shape[3].")
weight = np.zeros(np.prod(var.shape), dtype='float32')
size = shape[3]
# factor
f = np.ceil(size / 2.)
# center
c = (2 * f - 1 - f % 2) / (2. * f)
for i in range(np.prod(shape)):
x = i % size
y = (i / size) % size
weight[i] = (1 - abs(x / f - c)) * (1 - abs(y / f - c))
weight = np.reshape(weight, shape)
if var.dtype == VarDesc.VarType.FP32:
value_name = "fp32_values"
values = [float(v) for v in weight.flat]
else:
raise ValueError("Unsupported dtype %s", input.dtype)
if np.prod(shape) > 1024 * 1024:
raise ValueError("The size of input is too big. ")
op = block.append_op(
type='assign_value',
outputs={'Out': [var]},
attrs={
'dtype': var.dtype,
'shape': list(shape),
value_name: values
})
var.op = op
return op
# We short the class name, since users will use the initializer with the package
# name. The sample code:
#
......@@ -436,3 +533,4 @@ Uniform = UniformInitializer
Normal = NormalInitializer
Xavier = XavierInitializer
MSRA = MSRAInitializer
Bilinear = BilinearInitializer
......@@ -20,6 +20,7 @@ from ..framework import Program, Variable, Operator
from ..layer_helper import LayerHelper, unique_name
from ..initializer import force_init_on_cpu
from ops import logical_and, logical_not, logical_or
import numpy
__all__ = [
'split_lod_tensor',
......@@ -233,9 +234,56 @@ class BlockGuard(object):
class ParallelDo(object):
"""
ParallelDo class.
ParallelDo is used to represent multi-thread data parallel processing.
ParallelDo class is used to create a ParallelDo.
Its vanilla implementation can be shown as the following (:math:`|` means
single thread and :math:`||||` means multiple threads)
.. code-block:: text
In the forward pass
| Split input onto different devices
| Copy parameter onto different devices
|||| Compute forward pass in parallel
| Merge output from different devices
In the backward pass
| Split output@grad onto different devices
|||| Compute backward pass in parallel
| accumulate param@grad from different devices to the first device
| Merge input@grad from different devices
| Copy param@grad to the place of parallel_do_op
Examples:
.. code-block:: python
images = fluid.layers.data(name='pixel', shape=[1, 28, 28], dtype=DTYPE)
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
# ParallelDo version & Single-thread version
if thread_num > 1:
places = fluid.layers.get_places(thread_num)
pd = fluid.layers.ParallelDo(places)
with pd.do():
images = pd.read_input(images)
label = pd.read_input(label)
predict = cnn_model(images)
cost = fluid.layers.cross_entropy(input=predict, label=label)
avg_cost = fluid.layers.mean(x=cost)
pd.write_output(avg_cost)
avg_cost = pd()
avg_cost = fluid.layers.mean(avg_cost)
else:
predict = cnn_model(images)
cost = fluid.layers.cross_entropy(input=predict, label=label)
avg_cost = fluid.layers.mean(x=cost)
.. warning::
It will be soon deprecated, please use ParallelExecutor instead.
"""
def __init__(self, places, use_nccl=False, name=None):
......@@ -606,6 +654,29 @@ class WhileGuard(BlockGuard):
class While(object):
"""
while loop control flow.
Args:
cond (Variable): condition used to compare.
name (str): The name of this layer.
Examples:
.. code-block:: python
d0 = layers.data("d0", shape=[10], dtype='float32')
data_array = layers.array_write(x=d0, i=i)
array_len = layers.fill_constant(shape=[1],dtype='int64', value=3)
cond = layers.less_than(x=i, y=array_len)
while_op = layers.While(cond=cond)
with while_op.block():
d = layers.array_read(array=data_array, i=i)
i = layers.increment(x=i, in_place=True)
layers.array_write(result, i=i, array=d)
layers.less_than(x=i, y=array_len, cond=cond)
"""
BEFORE_WHILE_BLOCK = 0
IN_WHILE_BLOCK = 1
AFTER_WHILE_BLOCK = 2
......@@ -675,8 +746,8 @@ def lod_rank_table(x, level=0):
.. code-block:: text
x is a LoDTensor:
x.lod = [[0, 2, 3],
[0, 5, 6, 7]]
x.lod = [[2, 1],
[5, 1, 1]]
x.data = [a, b, c, d, e, f, g]
1. set level to 0:
......@@ -706,7 +777,7 @@ def lod_rank_table(x, level=0):
.. code-block:: python
x = fluid.layers.data(name='x', shape=[10],
dtype='float32', lod_level=1)
dtype='float32', lod_level=1)
out = layers.lod_rank_table(x=x, level=0)
"""
helper = LayerHelper("lod_rank_table", **locals())
......@@ -748,17 +819,25 @@ def max_sequence_len(rank_table):
def lod_tensor_to_array(x, table):
""" Convert a LOD_TENSOR to an LOD_TENSOR_ARRAY.
"""
Convert a LoDTensor to a LoDTensorArray.
This function split a LoDTesnor to a LoDTensorArray according to its LoD
information. LoDTensorArray is an alias of C++ std::vector<LoDTensor> in
PaddlePaddle. The generated LoDTensorArray of this function can be further read
or written by `read_from_array()` and `write_to_array()` operators. However,
this function is generally an internal component of PaddlePaddle `DynamicRNN`.
Users should not use it directly.
Args:
x (Variable|list): The LOD tensor to be converted to a LOD tensor array.
x (Variable|list): The LoDTensor to be converted to a LoDTensorArray.
table (ParamAttr|list): The variable that stores the level of lod
which is ordered by sequence length in
descending order.
descending order. It is generally generated
by `layers.lod_rank_table()` API.
Returns:
Variable: The variable of type array that has been converted from a
tensor.
Variable: The LoDTensorArray that has been converted from the input tensor.
Examples:
.. code-block:: python
......@@ -823,8 +902,7 @@ def increment(x, value=1.0, in_place=True):
in_place (bool): If the increment should be performed in-place.
Returns:
Variable: The tensor variable storing the transformation of
element-wise increment of each value in the input.
Variable: The elementwise-incremented object.
Examples:
.. code-block:: python
......@@ -866,7 +944,7 @@ def array_write(x, i, array=None):
Variable: The output LOD_TENSOR_ARRAY where the input tensor is written.
Examples:
.. code-block::python
.. code-block:: python
tmp = fluid.layers.zeros(shape=[10], dtype='int32')
i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=10)
......@@ -909,37 +987,40 @@ def create_array(dtype):
dtype=dtype)
def less_than(x, y, force_cpu=True, cond=None, **ignored):
@templatedoc()
def less_than(x, y, force_cpu=None, cond=None, **ignored):
"""
**Less than**
${comment}
This layer returns the truth value of :math:`x < y` elementwise.
>>> import paddle.fluid as fluid
>>> less = fluid.layers.less_than(x=label, y=limit)
Args:
x(Variable): First operand of *less_than*
y(Variable): Second operand of *less_than*
force_cpu(Bool|True): The output data will be on CPU if set true.
x(${x_type}): ${x_comment}.
y(${y_type}): ${y_comment}.
force_cpu(${force_cpu_type}): ${force_cpu_comment}.
cond(Variable|None): Optional output variable to store the result of *less_than*
Returns:
Variable: The tensor variable storing the output of *less_than*.
Examples:
.. code-block:: python
less = fluid.layers.less_than(x=label, y=limit)
${out_comment}.
"""
helper = LayerHelper("less_than", **locals())
if cond is None:
cond = helper.create_tmp_variable(dtype='bool')
cond.stop_gradient = True
attrs = dict()
if force_cpu is not None:
attrs['force_cpu'] = force_cpu
elif force_init_on_cpu():
attrs['force_cpu'] = force_init_on_cpu()
helper.append_op(
type='less_than',
inputs={'X': [x],
'Y': [y]},
outputs={'Out': [cond]},
attrs={'force_cpu': force_cpu or force_init_on_cpu()})
attrs=attrs)
return cond
......@@ -974,19 +1055,38 @@ def equal(x, y, cond=None, **ignored):
def array_read(array, i):
"""This function performs the operation to read the data in as an
"""
This function performs the operation to read the data in as an
LOD_TENSOR_ARRAY.
.. code-block:: text
Given:
array = [0.6, 0.1, 0.3, 0.1]
And:
i = 2
Then:
output = 0.3
Args:
array (Variable|list): The input tensor that will be written to an array.
i (Variable|list): The subscript index in tensor array, that points the
place where data will be written to.
array (Variable|list): The input tensor that store data to be read.
i (Variable|list): The index of the data to be read from input array.
Returns:
Variable: The tensor type variable that has the data written to it.
Examples:
.. code-block::python
tmp = fluid.layers.zeros(shape=[10], dtype='int32')
i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=10)
arr = layers.array_read(tmp, i=i)
.. code-block:: python
tmp = fluid.layers.zeros(shape=[10], dtype='int32')
i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=10)
arr = fluid.layers.array_read(tmp, i=i)
"""
helper = LayerHelper('array_read', **locals())
if not isinstance(
......@@ -1004,8 +1104,28 @@ def array_read(array, i):
def shrink_memory(x, i, table):
"""
This function creates an operator to shrink_rnn_memory using the RankTable
This function creates an operator to shrink rnn memory using the RankTable
as mentioned in the input parameter.
NOTE: This API is very low-level API. It is used by DynamicRNN only.
Since the Dynamic RNN uses no-padding way to implement RNN. The sequence
will be sorted by order, and the length of valid memory will be shrink after
each time step.
Args:
x(Variable): The memory object in the previous time step.
i(Variable): The step count variable. A int scalar as LoDTensor.
table(Variable): The RNNRankTable object.
Returns:
the memory variable after shrink.
Examples:
Since this API is very low level API. The example is not provided.
Please reference the implementation of class DynamicRNN for detail
usage.
"""
helper = LayerHelper('shrink_memory', **locals())
out = helper.create_tmp_variable(dtype=x.dtype)
......@@ -1047,6 +1167,13 @@ def array_length(array):
class ConditionalBlockGuard(BlockGuard):
"""
ConditionalBlockGuard is derived from BlockGuard. It is dedicated for
holding a ConditionalBlock, and helping users entering and exiting the
ConditionalBlock via Python's 'with' keyword. However, ConditionalBlockGuard
is generally an internal component of IfElse, users should not use it directly.
"""
def __init__(self, block):
if not isinstance(block, ConditionalBlock):
raise TypeError("block should be conditional block")
......@@ -1209,6 +1336,34 @@ class IfElseBlockGuard(object):
class IfElse(object):
"""
if-else control flow.
Args:
cond (Variable): condition used to compare.
name (str, default None): The name of this layer.
Examples:
.. code-block:: python
limit = fluid.layers.fill_constant_batch_size_like(
input=label, dtype='int64', shape=[1], value=5.0)
cond = fluid.layers.less_than(x=label, y=limit)
ie = fluid.layers.IfElse(cond)
with ie.true_block():
true_image = ie.input(image)
hidden = fluid.layers.fc(input=true_image, size=100, act='tanh')
prob = fluid.layers.fc(input=hidden, size=10, act='softmax')
ie.output(prob)
with ie.false_block():
false_image = ie.input(image)
hidden = fluid.layers.fc(
input=false_image, size=200, act='tanh')
prob = fluid.layers.fc(input=hidden, size=10, act='softmax')
ie.output(prob)
prob = ie()
"""
OUT_IF_ELSE_BLOCKS = 0
IN_IF_ELSE_TRUE_BLOCKS = 1
IN_IF_ELSE_FALSE_BLOCKS = 2
......@@ -1311,6 +1466,38 @@ class IfElse(object):
class DynamicRNN(object):
"""
The dynamic RNN can process a batch of sequence data. The length of each
sample sequence can be different. This API automatically process them in
batch.
The input lod must be set. Please reference `lod_tensor`
>>> import paddle.fluid as fluid
>>> data = fluid.layers.data(name='sentence', dtype='int64', lod_level=1)
>>> embedding = fluid.layers.embedding(input=data, size=[65535, 32],
>>> is_sparse=True)
>>>
>>> drnn = fluid.layers.DynamicRNN()
>>> with drnn.block():
>>> word = drnn.step_input(embedding)
>>> prev = drnn.memory(shape=[200])
>>> hidden = fluid.layers.fc(input=[word, prev], size=200, act='relu')
>>> drnn.update_memory(prev, hidden) # set prev to hidden
>>> drnn.output(hidden)
>>>
>>> # last is the last time step of rnn. It is the encoding result.
>>> last = fluid.layers.sequence_last_step(drnn())
The dynamic RNN will unfold sequence into timesteps. Users need to define
how to process each time step during the :code:`with` block.
The `memory` is used staging data cross time step. The initial value of
memory can be zero or another variable.
The dynamic RNN can mark multiple variables as its output. Use `drnn()` to
get the output sequence.
"""
BEFORE_RNN = 0
IN_RNN = 1
AFTER_RNN = 2
......@@ -1333,6 +1520,15 @@ class DynamicRNN(object):
self.mem_link = []
def step_input(self, x):
"""
Mark a sequence as a dynamic RNN input.
Args:
x(Variable): The input sequence.
Returns:
The current timestep in the input sequence.
"""
self._assert_in_rnn_block_("step_input")
if not isinstance(x, Variable):
raise TypeError(
......@@ -1376,6 +1572,15 @@ class DynamicRNN(object):
return array_read(array=input_array, i=self.step_idx)
def static_input(self, x):
"""
Mark a variable as a RNN input. The input will not be scattered into
time steps.
Args:
x(Variable): The input variable.
Returns:
The input variable that can access in RNN.
"""
self._assert_in_rnn_block_("static_input")
if not isinstance(x, Variable):
raise TypeError(
......@@ -1397,6 +1602,10 @@ class DynamicRNN(object):
@contextlib.contextmanager
def block(self):
"""
The block for user to define operators in RNN. See the class docstring
for more details.
"""
if self.status != DynamicRNN.BEFORE_RNN:
raise ValueError("rnn.block() can only be invoke once")
self.step_idx = fill_constant(
......@@ -1423,6 +1632,9 @@ class DynamicRNN(object):
x=each_array, table=self.lod_rank_table))
def __call__(self, *args, **kwargs):
"""
Get the output of RNN. This API should only be invoked after RNN.block()
"""
if self.status != DynamicRNN.AFTER_RNN:
raise ValueError(("Output of the dynamic RNN can only be visited "
"outside the rnn block."))
......@@ -1437,6 +1649,70 @@ class DynamicRNN(object):
value=0.0,
need_reorder=False,
dtype='float32'):
"""
Create a memory variable for dynamic rnn.
If the :code:`init` is not None, :code:`memory` will be initialized by
this variable. The :code:`need_reorder` is used to reorder the memory as
the input variable. It should be set to true when the initialized memory
depends on the input sample.
For example,
>>> import paddle.fluid as fluid
>>> sentence = fluid.layers.data(
>>> name='sentence', dtype='float32', shape=[32])
>>> boot_memory = fluid.layers.data(
>>> name='boot', dtype='float32', shape=[10])
>>>
>>> drnn = fluid.layers.DynamicRNN()
>>> with drnn.block():
>>> word = drnn.step_input(sentence)
>>> memory = drnn.memory(init=boot_memory, need_reorder=True)
>>> hidden = fluid.layers.fc(
>>> input=[word, memory], size=10, act='tanh')
>>> drnn.update_memory(ex_mem=memory, new_mem=hidden)
>>> drnn.output(hidden)
>>> rnn_output = drnn()
Otherwise, if :code:`shape`, :code:`value`, :code:`dtype` are set, the
:code:`memory` will be initialized by this :code:`value`.
For example,
>>> import paddle.fluid as fluid
>>> sentence = fluid.layers.data(
>>> name='sentence', dtype='float32', shape=[32])
>>>
>>> drnn = fluid.layers.DynamicRNN()
>>> with drnn.block():
>>> word = drnn.step_input(sentence)
>>> memory = drnn.memory(shape=[10], dtype='float32', value=0)
>>> hidden = fluid.layers.fc(
>>> input=[word, memory], size=10, act='tanh')
>>> drnn.update_memory(ex_mem=memory, new_mem=hidden)
>>> drnn.output(hidden)
>>> rnn_output = drnn()
Args:
init(Variable|None): The initialized variable.
shape(list|tuple): The memory shape. NOTE the shape does not contain
batch_size.
value(float): the initalized value.
need_reorder(bool): True if the initialized memory depends on the
input sample.
dtype(str|numpy.dtype): The data type of the initialized memory.
Returns:
the memory variable.
"""
self._assert_in_rnn_block_('memory')
if init is not None:
if not isinstance(init, Variable):
......@@ -1504,6 +1780,16 @@ class DynamicRNN(object):
return self.memory(init=init)
def update_memory(self, ex_mem, new_mem):
"""
Update the memory from ex_mem to new_mem. NOTE that the shape and data
type of :code:`ex_mem` and :code:`new_mem` must be same.
Args:
ex_mem(Variable): the memory variable.
new_mem(Variable): the plain variable generated in RNN block.
Returns:
None
"""
self._assert_in_rnn_block_('update_memory')
if not isinstance(ex_mem, Variable):
raise TypeError("The input arg `ex_mem` of update_memory() must "
......@@ -1521,6 +1807,15 @@ class DynamicRNN(object):
self.mem_link.append((new_mem, mem_array))
def output(self, *outputs):
"""
mark the RNN output variables.
Args:
outputs: The output variables.
Returns:
None
"""
self._assert_in_rnn_block_('output')
parent_block = self._parent_block_()
for each in outputs:
......@@ -1563,26 +1858,26 @@ def reorder_lod_tensor_by_rank(x, rank_table):
def is_empty(x, cond=None, **ignored):
"""
**Is Empty**
This layer returns the truth value of whether the variable is empty.
Test whether a Variable is empty.
Args:
x(Variable): Operand of *is_empty*
cond(Variable|None): Optional output variable to store the result
of *is_empty*
x (Variable): The Variable to be tested.
cond (Variable|None): Output parameter. Returns the test result
of given 'x'. Default: None
Returns:
Variable: The tensor variable storing the output of *is_empty*.
Variable: A bool scalar. True if 'x' is an empty Variable.
Raises:
TypeError: If input cond is not a variable, or cond's dtype is
not bool
not bool.
Examples:
.. code-block:: python
less = fluid.layers.is_empty(x=input)
res = fluid.layers.is_empty(x=input)
# or:
fluid.layers.is_empty(x=input, cond=res)
"""
helper = LayerHelper("is_empty", **locals())
if cond is None:
......
......@@ -97,7 +97,9 @@ def detection_output(loc,
nms_eta(float): The parameter for adaptive NMS.
Returns:
Variable: The detection outputs is a LoDTensor with shape [No, 6].
Variable:
The detection outputs is a LoDTensor with shape [No, 6].
Each row has six values: [label, confidence, xmin, ymin, xmax, ymax].
`No` is the total number of detections in this mini-batch. For each
instance, the offsets in first dimension are called LoD, the offset
......@@ -110,15 +112,15 @@ def detection_output(loc,
Examples:
.. code-block:: python
pb = layers.data(name='prior_box', shape=[10, 4],
pb = layers.data(name='prior_box', shape=[10, 4],
append_batch_size=False, dtype='float32')
pbv = layers.data(name='prior_box_var', shape=[10, 4],
pbv = layers.data(name='prior_box_var', shape=[10, 4],
append_batch_size=False, dtype='float32')
loc = layers.data(name='target_box', shape=[2, 21, 4],
loc = layers.data(name='target_box', shape=[2, 21, 4],
append_batch_size=False, dtype='float32')
scores = layers.data(name='scores', shape=[2, 21, 10],
scores = layers.data(name='scores', shape=[2, 21, 10],
append_batch_size=False, dtype='float32')
nmsed_outs = fluid.layers.detection_output(scores=scores,
nmsed_outs = fluid.layers.detection_output(scores=scores,
loc=loc,
prior_box=pb,
prior_box_var=pbv)
......@@ -210,53 +212,68 @@ def bipartite_match(dist_matrix,
dist_threshold=None,
name=None):
"""
**Bipartite matchint operator**
This operator is a greedy bipartite matching algorithm, which is used to
obtain the matching with the maximum distance based on the input
This operator implements a greedy bipartite matching algorithm, which is
used to obtain the matching with the maximum distance based on the input
distance matrix. For input 2D matrix, the bipartite matching algorithm can
find the matched column for each row, also can find the matched row for
each column. And this operator only calculate matched indices from column
to row. For each instance, the number of matched indices is the number of
of columns of the input ditance matrix.
There are two outputs to save matched indices and distance.
A simple description, this algothrim matched the best (maximum distance)
find the matched column for each row (matched means the largest distance),
also can find the matched row for each column. And this operator only
calculate matched indices from column to row. For each instance,
the number of matched indices is the column number of the input distance
matrix.
There are two outputs, matched indices and distance.
A simple description, this algorithm matched the best (maximum distance)
row entity to the column entity and the matched indices are not duplicated
in each row of ColToRowMatchIndices. If the column entity is not matched
any row entity, set -1 in ColToRowMatchIndices.
Please note that the input DistMat can be LoDTensor (with LoD) or Tensor.
NOTE: the input DistMat can be LoDTensor (with LoD) or Tensor.
If LoDTensor with LoD, the height of ColToRowMatchIndices is batch size.
If Tensor, the height of ColToRowMatchIndices is 1.
NOTE: This API is a very low level API. It is used by :code:`ssd_loss`
layer. Please consider to use :code:`ssd_loss` instead.
Args:
dist_matrix(Variable): This input is a 2-D LoDTensor with shape
[K, M]. It is pair-wise distance matrix between the entities
represented by each row and each column. For example, assumed one
entity is A with shape [K], another entity is B with shape [M]. The
dist_matirx[i][j] is the distance between A[i] and B[j]. The bigger
the distance is, the better macthing the pairs are. Please note,
This tensor can contain LoD information to represent a batch of
inputs. One instance of this batch can contain different numbers of
entities.
dist_matrix[i][j] is the distance between A[i] and B[j]. The bigger
the distance is, the better matching the pairs are.
NOTE: This tensor can contain LoD information to represent a batch
of inputs. One instance of this batch can contain different numbers
of entities.
match_type(string|None): The type of matching method, should be
'bipartite' or 'per_prediction', 'bipartite' by defalut.
'bipartite' or 'per_prediction'. [default 'bipartite'].
dist_threshold(float|None): If `match_type` is 'per_prediction',
this threshold is to determine the extra matching bboxes based
on the maximum distance, 0.5 by defalut.
on the maximum distance, 0.5 by default.
Returns:
match_indices(Variable): A 2-D Tensor with shape [N, M] in int type.
N is the batch size. If match_indices[i][j] is -1, it
means B[j] does not match any entity in i-th instance.
Otherwise, it means B[j] is matched to row
match_indices[i][j] in i-th instance. The row number of
i-th instance is saved in match_indices[i][j].
match_distance(Variable): A 2-D Tensor with shape [N, M] in float type.
N is batch size. If match_indices[i][j] is -1,
match_distance[i][j] is also -1.0. Otherwise, assumed
match_distance[i][j] = d, and the row offsets of each instance
are called LoD. Then match_distance[i][j] = dist_matrix[d+LoD[i]][j].
tuple: a tuple with two elements is returned. The first is
matched_indices, the second is matched_distance.
The matched_indices is a 2-D Tensor with shape [N, M] in int type.
N is the batch size. If match_indices[i][j] is -1, it
means B[j] does not match any entity in i-th instance.
Otherwise, it means B[j] is matched to row
match_indices[i][j] in i-th instance. The row number of
i-th instance is saved in match_indices[i][j].
The matched_distance is a 2-D Tensor with shape [N, M] in float type
. N is batch size. If match_indices[i][j] is -1,
match_distance[i][j] is also -1.0. Otherwise, assumed
match_distance[i][j] = d, and the row offsets of each instance
are called LoD. Then match_distance[i][j] =
dist_matrix[d+LoD[i]][j].
Examples:
>>> x = fluid.layers.data(name='x', shape=[4], dtype='float32')
>>> y = fluid.layers.data(name='y', shape=[4], dtype='float32')
>>> iou = fluid.layers.iou_similarity(x=x, y=y)
>>> matched_indices, matched_dist = fluid.layers.bipartite_match(iou)
"""
helper = LayerHelper('bipartite_match', **locals())
match_indices = helper.create_tmp_variable(dtype='int32')
......@@ -281,8 +298,6 @@ def target_assign(input,
mismatch_value=None,
name=None):
"""
**Target assigner operator**
This operator can be, for given the target bounding boxes or labels,
to assign classification and regression targets to each prediction as well as
weights to prediction. The weights is used to specify which prediction would
......@@ -296,20 +311,24 @@ def target_assign(input,
1. Assigning all outpts based on `match_indices`:
If id = match_indices[i][j] > 0,
.. code-block:: text
out[i][j][0 : K] = X[lod[i] + id][j % P][0 : K]
out_weight[i][j] = 1.
If id = match_indices[i][j] > 0,
Otherwise,
out[i][j][0 : K] = X[lod[i] + id][j % P][0 : K]
out_weight[i][j] = 1.
out[j][j][0 : K] = {mismatch_value, mismatch_value, ...}
out_weight[i][j] = 0.
Otherwise,
out[j][j][0 : K] = {mismatch_value, mismatch_value, ...}
out_weight[i][j] = 0.
2. Assigning out_weight based on `neg_indices` if `neg_indices` is provided:
Assumed that the row offset for each instance in `neg_indices` is called neg_lod,
for i-th instance and each `id` of neg_indices in this instance:
.. code-block:: text
out[i][id][0 : K] = {mismatch_value, mismatch_value, ...}
out_weight[i][id] = 1.0
......@@ -326,10 +345,23 @@ def target_assign(input,
mismatch_value (float32): Fill this value to the mismatched location.
Returns:
out (Variable): The output is a 3D Tensor with shape [N, P, K],
N and P is the same as they are in `neg_indices`, K is the
same as it in input of X. If `match_indices[i][j]`.
out_weight (Variable): The weight for output with the shape of [N, P, 1].
tuple:
A tuple(out, out_weight) is returned. out is a 3D Tensor with
shape [N, P, K], N and P is the same as they are in
`neg_indices`, K is the same as it in input of X. If
`match_indices[i][j]`. out_weight is the weight for output with
the shape of [N, P, 1].
Examples:
.. code-block:: python
matched_indices, matched_dist = fluid.layers.bipartite_match(iou)
gt = layers.data(
name='gt', shape=[1, 1], dtype='int32', lod_level=1)
trg, trg_weight = layers.target_assign(
gt, matched_indices, mismatch_value=0)
"""
helper = LayerHelper('target_assign', **locals())
out = helper.create_tmp_variable(dtype=input.dtype)
......@@ -364,7 +396,7 @@ def ssd_loss(location,
normalize=True,
sample_size=None):
"""
**Multi-box loss layer for object dection algorithm of SSD**
**Multi-box loss layer for object detection algorithm of SSD**
This layer is to compute dection loss for SSD given the location offset
predictions, confidence predictions, prior boxes and ground-truth boudding
......@@ -372,21 +404,35 @@ def ssd_loss(location,
is a weighted sum of the localization loss (or regression loss) and
confidence loss (or classification loss) by performing the following steps:
1. Find matched boundding box by bipartite matching algorithm.
1. Find matched bounding box by bipartite matching algorithm.
1.1 Compute IOU similarity between ground-truth boxes and prior boxes.
1.2 Compute matched boundding box by bipartite matching algorithm.
2. Compute confidence for mining hard examples
2.1. Get the target label based on matched indices.
2.2. Compute confidence loss.
3. Apply hard example mining to get the negative example indices and update
the matched indices.
4. Assign classification and regression targets
4.1. Encoded bbox according to the prior boxes.
4.2. Assign regression targets.
4.3. Assign classification targets.
5. Compute the overall objective loss.
5.1 Compute confidence loss.
5.1 Compute localization loss.
5.3 Compute the overall weighted loss.
Args:
......@@ -421,39 +467,36 @@ def ssd_loss(location,
mining_type (str): The hard example mining type, should be 'hard_example'
or 'max_negative', now only support `max_negative`.
normalize (bool): Whether to normalize the SSD loss by the total number
of output locations, True by defalut.
of output locations, True by default.
sample_size (int): The max sample size of negative box, used only when
mining_type is 'hard_example'.
Returns:
Variable: The weighted sum of the localization loss and confidence loss,
with shape [N * Np, 1], N and Np are the same as they are
in `location`.
The weighted sum of the localization loss and confidence loss, with \
shape [N * Np, 1], N and Np are the same as they are in `location`.
Raises:
ValueError: If mining_type is 'hard_example', now only support
mining type of `max_negative`.
ValueError: If mining_type is 'hard_example', now only support mining \
type of `max_negative`.
Examples:
.. code-block:: python
pb = layers.data(
name='prior_box',
shape=[10, 4],
append_batch_size=False,
dtype='float32')
pbv = layers.data(
name='prior_box_var',
shape=[10, 4],
append_batch_size=False,
dtype='float32')
loc = layers.data(name='target_box', shape=[10, 4], dtype='float32')
scores = layers.data(name='scores', shape=[10, 21], dtype='float32')
gt_box = layers.data(
name='gt_box', shape=[4], lod_level=1, dtype='float32')
gt_label = layers.data(
name='gt_label', shape=[1], lod_level=1, dtype='float32')
loss = layers.ssd_loss(loc, scores, gt_box, gt_label, pb, pbv)
>>> pb = fluid.layers.data(
>>> name='prior_box',
>>> shape=[10, 4],
>>> append_batch_size=False,
>>> dtype='float32')
>>> pbv = fluid.layers.data(
>>> name='prior_box_var',
>>> shape=[10, 4],
>>> append_batch_size=False,
>>> dtype='float32')
>>> loc = fluid.layers.data(name='target_box', shape=[10, 4], dtype='float32')
>>> scores = fluid.layers.data(name='scores', shape=[10, 21], dtype='float32')
>>> gt_box = fluid.layers.data(
>>> name='gt_box', shape=[4], lod_level=1, dtype='float32')
>>> gt_label = fluid.layers.data(
>>> name='gt_label', shape=[1], lod_level=1, dtype='float32')
>>> loss = fluid.layers.ssd_loss(loc, scores, gt_box, gt_label, pb, pbv)
"""
helper = LayerHelper('ssd_loss', **locals())
......
......@@ -22,9 +22,9 @@ from ..executor import global_scope
from layer_function_generator import generate_layer_fn, templatedoc
__all__ = [
'data', 'BlockGuardServ', 'ListenAndServ', 'Send', 'open_recordio_file',
'open_files', 'read_file', 'shuffle', 'batch', 'double_buffer',
'random_data_generator', 'Preprocessor', 'load'
'data', 'BlockGuardServ', 'ListenAndServ', 'Send', 'Recv',
'open_recordio_file', 'open_files', 'read_file', 'shuffle', 'batch',
'double_buffer', 'random_data_generator', 'Preprocessor', 'load'
]
......@@ -202,18 +202,17 @@ class ListenAndServ(object):
})
def Send(endpoints, send_vars, get_vars=None):
def Send(endpoints, send_vars, sync=True):
"""
Send layer
Send variables to the server side, and get vars from server
side when server have finished running server side program.
Args:
endpoints: comma seperated IP:PORT pairs in the order
endpoints (str): comma seperated IP:PORT pairs in the order
of send_vars to send
send_vars: vars to send
get_vars: vars to get from server after send completes.
Send variables to the server side, and get vars from server
side when server have finished running server side program.
send_vars (list): variables to send to server
sync (bool): whether to wait the request finish
"""
assert (type(send_vars) == list)
......@@ -221,40 +220,33 @@ def Send(endpoints, send_vars, get_vars=None):
endpoints = list(set(epmap))
helper = LayerHelper("Send", **locals())
if not get_vars:
get_vars = []
for s in send_vars:
v = helper.create_tmp_variable(dtype=s.dtype, stop_gradient=True)
get_vars.append(v)
rpc_op_role_name = core.op_proto_and_checker_maker.kOpRoleAttrName()
helper.append_op(
type="send",
inputs={"X": send_vars},
outputs={"Out": get_vars},
attrs={
"endpoints": endpoints,
"epmap": epmap,
rpc_op_role_name: core.op_proto_and_checker_maker.OpRole.RPC
})
return get_vars
if sync:
helper.append_op(type="send_barrier", attrs={"endpoints": endpoints})
def Recv(endpoints, get_vars):
def Recv(endpoints, get_vars, sync=True):
"""
Recv layer
Receive variables from server side
Args:
endpoints: comma seperated IP:PORT pairs in the order
endpoints (str): comma seperated IP:PORT pairs in the order
of send_vars to send
send_vars: vars to send
get_vars: vars to get from server after send completes.
get_vars (list): vars to get from server after send completes.
sync (bool): whether to wait the request finish
Send variables to the server side, and get vars from server
side when server have finished running server side program.
Returns:
list: list of received variables
"""
assert (type(send_vars) == list)
assert (type(get_vars) == list)
epmap = endpoints.split(",")
......@@ -267,6 +259,9 @@ def Recv(endpoints, get_vars):
outputs={"Out": get_vars},
attrs={"endpoints": endpoints,
"epmap": epmap})
if sync:
helper.append_op(type="fetch_barrier", attrs={"endpoints": endpoints})
return get_vars
def monkey_patch_reader_methods(reader):
......@@ -317,6 +312,7 @@ def _copy_reader_create_op_(block, op):
return new_op
@templatedoc(op_type='create_recordio_file_reader')
def open_recordio_file(filename,
shapes,
lod_levels,
......@@ -324,34 +320,30 @@ def open_recordio_file(filename,
pass_num=1,
for_parallel=True):
"""
Open a RecordIO file
This layer takes a RecordIO file to read from and returns a Reader Variable.
Via the Reader Variable, we can get data from the given RecordIO file.
${comment}
Args:
filename(str): The RecordIO file's name.
filename(${filename_type}): ${filename_comment}.
shapes(list): List of tuples which declaring data shapes.
lod_levels(list): List of ints which declaring data lod_level.
lod_levels(${lod_levels_type}): ${lod_levels_comment}.
dtypes(list): List of strs which declaring data type.
pass_num(int): Number of passes to run.
for_parallel(Bool): Set it as True if you are going to run
subsequent operators in parallel.
Returns:
Variable: A Reader Variable via which we can get RecordIO file data.
${out_comment}.
Examples:
.. code-block:: python
reader = fluid.layers.io.open_recordio_file(
filename='./data.recordio',
shapes=[(3,224,224), (1)],
lod_levels=[0, 0],
dtypes=['float32', 'int64'])
# Via the reader, we can use 'read_file' layer to get data:
image, label = fluid.layers.io.read_file(reader)
>>> import paddle.fluid as fluid
>>> reader = fluid.layers.io.open_recordio_file(
>>> filename='./data.recordio',
>>> shapes=[(3,224,224), (1)],
>>> lod_levels=[0, 0],
>>> dtypes=['float32', 'int64'])
>>> # Via the reader, we can use 'read_file' layer to get data:
>>> image, label = fluid.layers.io.read_file(reader)
"""
dtypes = [convert_np_dtype_to_dtype_(dt) for dt in dtypes]
shape_concat = []
......@@ -411,16 +403,16 @@ def random_data_generator(low, high, shapes, lod_levels, for_parallel=True):
Variable: A Reader Variable from which we can get random data.
Examples:
.. code-block:: python
reader = fluid.layers.io.random_data_generator(
low=0.0,
high=1.0,
shapes=[(3,224,224), (1)],
lod_levels=[0, 0])
.. code-block:: python
# Via the reader, we can use 'read_file' layer to get data:
image, label = fluid.layers.io.read_file(reader)
reader = fluid.layers.random_data_generator(
low=0.0,
high=1.0,
shapes=[[3,224,224], [1]],
lod_levels=[0, 0])
# Via the reader, we can use 'read_file' layer to get data:
image, label = fluid.layers.read_file(reader)
"""
dtypes = [core.VarDesc.VarType.FP32] * len(shapes)
shape_concat = []
......@@ -569,16 +561,77 @@ def __create_unshared_decorated_reader__(op_type, reader, attrs, name=None):
def shuffle(reader, buffer_size):
"""
Shuffle the reader.
"""
return __create_unshared_decorated_reader__(
'create_shuffle_reader', reader, {'buffer_size': int(buffer_size)})
def batch(reader, batch_size):
"""
This layer is a reader decorator. It takes a reader and adds
'batching' decoration on it. When reading with the result
decorated reader, output data will be automatically organized
to the form of batches.
Args:
reader(Variable): The reader to be decorated with 'batching'.
batch_size(int): The batch size.
Returns:
Variable: The reader which has been decorated with 'batching'.
Examples:
.. code-block:: python
raw_reader = fluid.layers.io.open_files(filenames=['./data1.recordio',
'./data2.recordio'],
shapes=[(3,224,224), (1)],
lod_levels=[0, 0],
dtypes=['float32', 'int64'],
thread_num=2,
buffer_size=2)
batch_reader = fluid.layers.batch(reader=raw_reader, batch_size=5)
# If we read data with the raw_reader:
# data = fluid.layers.read_file(raw_reader)
# We can only get data instance by instance.
#
# However, if we read data with the batch_reader:
# data = fluid.layers.read_file(batch_reader)
# Each 5 adjacent instances will be automatically combined together
# to become a batch. So what we get('data') is a batch data instead
# of an instance.
"""
return __create_unshared_decorated_reader__(
'create_batch_reader', reader, {'batch_size': int(batch_size)})
def double_buffer(reader, place=None, name=None):
"""
Wrap a double buffer reader. The data will copy to target place with a
double buffer queue. If the target place is None, the place that executor
perform on will be used.
Args:
reader(Variable): the reader variable need to be wrapped.
place(Place): the place of target data. Default is the sample place of
executor perform.
name(str): Variable name. None if the user does not care.
Returns:
wrapped reader with double buffer.
Examples:
>>> reader = fluid.layers.open_files(filenames=['somefile'],
>>> shapes=[[-1, 784], [-1, 1]],
>>> dtypes=['float32', 'int64'])
>>> reader = fluid.layers.double_buffer(reader)
>>> img, label = fluid.layers.read_file(reader)
"""
attrs = dict()
if place is not None:
attrs['place'] = str(place).upper()
......@@ -596,15 +649,41 @@ def parallel(reader):
{})
def read_file(file_obj):
def read_file(reader):
"""
Execute the given reader and get data via it.
A reader is also a Variable. It can be a raw reader generated by
`fluid.layers.open_files()` or a decorated one generated by
`fluid.layers.double_buffer()` and so on.
Args:
reader(Variable): The reader to execute.
Returns:
Tuple[Variable]: Data read via the given reader.
Examples:
.. code-block:: python
data_file = fluid.layers.open_files(
filenames=['mnist.recordio'],
shapes=[(-1, 748), (-1, 1)],
lod_levels=[0, 0],
dtypes=["float32", "int64"])
data_file = fluid.layers.double_buffer(
fluid.layers.batch(data_file, batch_size=64))
input, label = fluid.layers.read_file(data_file)
"""
helper = LayerHelper('read_file')
out = [
helper.create_tmp_variable(
stop_gradient=True, dtype='float32')
for _ in range(len(file_obj.desc.shapes()))
for _ in range(len(reader.desc.shapes()))
]
helper.append_op(
type='read', inputs={'Reader': [file_obj]}, outputs={'Out': out})
type='read', inputs={'Reader': [reader]}, outputs={'Out': out})
if len(out) == 1:
return out[0]
else:
......@@ -612,6 +691,26 @@ def read_file(file_obj):
class Preprocessor(object):
"""
A block for data pre-processing in reader.
Args:
reader (Variable): A reader variable.
name (str, default None): The name of the reader.
Examples:
.. code-block:: python
preprocessor = fluid.layers.io.Preprocessor(reader=reader)
with preprocessor.block():
img, lbl = preprocessor.inputs()
img_out = img / 2
lbl_out = lbl + 1
preprocessor.outputs(img_out, lbl_out)
data_file = fluid.layers.io.double_buffer(preprocessor())
"""
BEFORE_SUB_BLOCK = 0
IN_SUB_BLOCK = 1
AFTER_SUB_BLOCK = 2
......
......@@ -44,6 +44,11 @@ def _type_to_str_(tp):
return framework_pb2.AttrType.Name(tp)
_two_dollar_pattern_ = re.compile(r"\$\$([^\$]+)\$\$")
_single_dollar_pattern_ = re.compile(r"\$([^\$]+)\$")
_two_bang_pattern_ = re.compile(r"!!([^!]+)!!")
def _generate_doc_string_(op_proto):
"""
Generate docstring by OpProto
......@@ -55,22 +60,26 @@ def _generate_doc_string_(op_proto):
str: the document string
"""
def escape_math(text):
return _two_bang_pattern_.sub(
r'$$\1$$',
_single_dollar_pattern_.sub(
r':math:`\1`', _two_dollar_pattern_.sub(r"!!\1!!", text)))
if not isinstance(op_proto, framework_pb2.OpProto):
raise TypeError("OpProto should be `framework_pb2.OpProto`")
buf = cStringIO.StringIO()
buf.write(op_proto.comment)
buf.write(escape_math(op_proto.comment))
buf.write('\nArgs:\n')
for each_input in op_proto.inputs:
line_begin = ' {0}: '.format(_convert_(each_input.name))
buf.write(line_begin)
buf.write(each_input.comment)
buf.write('\n')
buf.write(' ' * len(line_begin))
buf.write('Duplicable: ')
buf.write(str(each_input.duplicable))
buf.write(' Optional: ')
buf.write(str(each_input.dispensable))
buf.write(escape_math(each_input.comment))
if each_input.duplicable:
buf.write(" Duplicatable.")
if each_input.dispensable:
buf.write(" Optional.")
buf.write('\n')
skip_attrs = OpProtoHolder.generated_op_attr_names()
......@@ -83,7 +92,7 @@ def _generate_doc_string_(op_proto):
buf.write(' (')
buf.write(_type_to_str_(each_attr.type))
buf.write('): ')
buf.write(each_attr.comment)
buf.write(escape_math(each_attr.comment))
buf.write('\n')
if len(op_proto.outputs) != 0:
......@@ -92,7 +101,7 @@ def _generate_doc_string_(op_proto):
for each_opt in op_proto.outputs:
if not each_opt.intermediate:
break
buf.write(each_opt.comment)
buf.write(escape_math(each_opt.comment))
return buf.getvalue()
......
......@@ -25,10 +25,11 @@ import nn
import ops
import tensor
from ..initializer import init_on_cpu
from ..framework import default_main_program, Parameter
__all__ = [
'exponential_decay', 'natural_exp_decay', 'inverse_time_decay',
'polynomial_decay', 'piecewise_decay', 'noam_decay'
'polynomial_decay', 'piecewise_decay', 'noam_decay', 'append_LARS'
]
......@@ -70,21 +71,40 @@ def noam_decay(d_model, warmup_steps):
def exponential_decay(learning_rate, decay_steps, decay_rate, staircase=False):
"""Applies exponential decay to the learning rate.
"""
Applies exponential decay to the learning rate.
When training a model, it is often recommended to lower the learning rate as the
training progresses. By using this function, the learning rate will be decayed by
'decay_rate' every 'decay_steps' steps.
>>> if staircase == True:
>>> decayed_learning_rate = learning_rate * decay_rate ^ floor(global_step / decay_steps)
>>> else:
>>> decayed_learning_rate = learning_rate * decay_rate ^ (global_step / decay_steps)
```python
decayed_learning_rate = learning_rate *
decay_rate ^ (global_step / decay_steps)
```
Args:
learning_rate: A scalar float32 value or a Variable. This
will be the initial learning rate during training
decay_steps: A Python `int32` number.
decay_rate: A Python `float` number.
staircase: Boolean. If set true, decay the learning rate every decay_steps.
learning_rate(Variable|float): The initial learning rate.
decay_steps(int): See the decay computation above.
decay_rate(float): The decay rate. See the decay computation above.
staircase(Boolean): If True, decay the learning rate at discrete intervals.
Default: False
Returns:
The decayed learning rate
Variable: The decayed learning rate
Examples:
.. code-block:: python
base_lr = 0.1
sgd_optimizer = fluid.optimizer.SGD(
learning_rate=fluid.layers.exponential_decay(
learning_rate=base_lr,
decay_steps=10000,
decay_rate=0.5,
staircase=True))
sgd_optimizer.minimize(avg_cost)
"""
global_step = _decay_step_counter()
......@@ -128,22 +148,39 @@ def natural_exp_decay(learning_rate, decay_steps, decay_rate, staircase=False):
def inverse_time_decay(learning_rate, decay_steps, decay_rate, staircase=False):
"""Applies inverse time decay to the initial learning rate.
"""
Applies inverse time decay to the initial learning rate.
When training a model, it is often recommended to lower the learning rate as the
training progresses. By using this function, an inverse decay function will be
applied to the initial learning rate.
>>> if staircase:
>>> if staircase == True:
>>> decayed_learning_rate = learning_rate / (1 + decay_rate * floor(global_step / decay_step))
>>> else:
>>> decayed_learning_rate = learning_rate / (1 + decay_rate * global_step / decay_step)
Args:
learning_rate: A scalar float32 value or a Variable. This
will be the initial learning rate during training.
decay_steps: A Python `int32` number.
decay_rate: A Python `float` number.
staircase: Boolean. If set true, decay the learning rate every decay_steps.
learning_rate(Variable|float): The initial learning rate.
decay_steps(int): See the decay computation above.
decay_rate(float): The decay rate. See the decay computation above.
staircase(Boolean): If True, decay the learning rate at discrete intervals.
Default: False
Returns:
The decayed learning rate
Variable: The decayed learning rate
Examples:
.. code-block:: python
base_lr = 0.1
sgd_optimizer = fluid.optimizer.SGD(
learning_rate=fluid.layers.inverse_time_decay(
learning_rate=base_lr,
decay_steps=10000,
decay_rate=0.5,
staircase=True))
sgd_optimizer.minimize(avg_cost)
"""
global_step = _decay_step_counter()
......@@ -209,15 +246,27 @@ def polynomial_decay(learning_rate,
def piecewise_decay(boundaries, values):
"""Applies piecewise decay to the initial learning rate.
>>> boundaries = [10000, 20000]
>>> values = [1.0, 0.5, 0.1]
>>>
>>> if step < 10000:
>>> learning_rate = 1.0
>>> elif 10000 <= step < 20000:
>>> learning_rate = 0.5
>>> else:
>>> learning_rate = 0.1
The algorithm can be described as the code below.
.. code-block:: python
boundaries = [10000, 20000]
values = [1.0, 0.5, 0.1]
if step < 10000:
learning_rate = 1.0
elif 10000 <= step < 20000:
learning_rate = 0.5
else:
learning_rate = 0.1
Args:
boundaries: A list of steps numbers.
values: A list of learning rate values that will be picked during
different step boundaries.
Returns:
The decayed learning rate.
"""
if len(values) - len(boundaries) != 1:
......@@ -249,3 +298,41 @@ def piecewise_decay(boundaries, values):
tensor.assign(last_value_var, lr)
return lr
def append_LARS(params_grads, learning_rate, weight_decay):
"""Applies LARS (LAYER-WISE ADAPTIVE RATE SCALING) to learning rate for
each layer.
```python
learning_rate *= local_gw_ratio * sqrt(sumsq(param))
/ (sqrt(sumsq(gradient))+ weight_decay * sqrt(sumsq(param)))
```
Args:
learning_rate: A learning rate Variable. This
is the global learning rate for LARS.
weight_decay: A Python `float` number.
Returns:
The decayed learning rate
"""
def _balanced_weight(param_norm, grad_norm):
if weight_decay == 1.0:
return grad_norm + param_norm
else:
return grad_norm + weight_decay * param_norm
for param, grad in params_grads:
param_lr = param.optimize_attr['learning_rate']
param_norm = ops.sqrt(nn.reduce_sum(input=ops.square(param)))
grad_norm = ops.sqrt(nn.reduce_sum(input=ops.square(grad)))
if type(param_lr) == float and param_lr == 1.0:
decayed_lr = learning_rate * param_norm \
/ _balanced_weight(param_norm, grad_norm)
else:
decayed_lr = learning_rate * param_lr * param_norm \
/ _balanced_weight(param_norm, grad_norm)
# set back param local learning rate
param.optimize_attr['learning_rate'] = decayed_lr
......@@ -27,8 +27,32 @@ __all__ = ['accuracy', 'auc']
def accuracy(input, label, k=1, correct=None, total=None):
"""
accuracy layer.
Refer to the https://en.wikipedia.org/wiki/Precision_and_recall
This function computes the accuracy using the input and label.
The output is the top k inputs and their indices.
If the correct label occurs in top k predictions, then correct will increment by one.
Note: the dtype of accuracy is determined by input. the input and label dtype can be different.
Args:
input(Variable): The input of accuracy layer, which is the predictions of network.
Carry LoD information is supported.
label(Variable): The label of dataset.
k(int): The top k predictions for each class will be checked.
correct(Variable): The correct predictions count.
total(Variable): The total entries count.
Returns:
Variable: The correct rate.
Examples:
.. code-block:: python
data = fluid.layers.data(name="data", shape=[-1, 32, 32], dtype="float32")
label = fluid.layers.data(name="data", shape=[-1,1], dtype="int32")
predict = fluid.layers.fc(input=data, size=10)
acc = fluid.layers.accuracy(input=predict, label=label, k=5)
"""
helper = LayerHelper("accuracy", **locals())
topk_out, topk_indices = nn.topk(input, k=k)
......@@ -53,6 +77,43 @@ def accuracy(input, label, k=1, correct=None, total=None):
def auc(input, label, curve='ROC', num_thresholds=200):
"""
**Area Under the Curve (AUC) Layer**
This implementation computes the AUC according to forward output and label.
It is used very widely in binary classification evaluation.
Note: If input label contains values other than 0 and 1, it will be cast
to `bool`. Find the relevant definitions `here <https://en.wikipedia.org\
/wiki/Receiver_operating_characteristic#Area_under_the_curve>`_.
There are two types of possible curves:
1. ROC: Receiver operating characteristic;
2. PR: Precision Recall
Args:
input(Variable): A floating-point 2D Variable, values are in the range
[0, 1]. Each row is sorted in descending order. This
input should be the output of topk. Typically, this
Variable indicates the probability of each label.
label(Variable): A 2D int Variable indicating the label of the training
data. The height is batch size and width is always 1.
curve(str): Curve type, can be 'ROC' or 'PR'. Default 'ROC'.
num_thresholds(int): The number of thresholds to use when discretizing
the roc curve. Default 200.
Returns:
Variable: A scalar representing the current AUC.
Examples:
.. code-block:: python
# network is a binary classification model and label the ground truth
prediction = network(image, is_infer=True)
auc_out=fluid.layers.auc(input=prediction, label=label)
"""
warnings.warn(
"This interface not recommended, fluid.layers.auc compute the auc at every minibatch, \
but can not aggregate them and get the pass AUC, because pass \
......
......@@ -39,13 +39,16 @@ __all__ = [
'chunk_eval',
'sequence_conv',
'conv2d',
'conv3d',
'sequence_pool',
'sequence_softmax',
'softmax',
'pool2d',
'pool3d',
'batch_norm',
'beam_search_decode',
'conv2d_transpose',
'conv3d_transpose',
'sequence_expand',
'lstm_unit',
'reduce_sum',
......@@ -87,6 +90,9 @@ __all__ = [
'resize_bilinear',
'gather',
'random_crop',
'mean_iou',
'relu',
'log',
]
......@@ -102,14 +108,15 @@ def fc(input,
"""
**Fully Connected Layer**
The fully connected layer can take multiple tensors as its inputs. It
creates a variable called weights for each input tensor, which represents
a fully connected weight matrix from each input unit to each output unit.
The fully connected layer multiplies each input tensor with its coresponding
weight to produce an output Tensor. If multiple input tensors are given,
the results of multiple multiplications will be sumed up. If bias_attr is
not None, a bias variable will be created and added to the output. Finally,
if activation is not None, it will be applied to the output as well.
This function creates a fully connected layer in the network. It can take
multiple tensors as its inputs. It creates a variable called weights for
each input tensor, which represents a fully connected weight matrix from
each input unit to each output unit. The fully connected layer multiplies
each input tensor with its coresponding weight to produce an output Tensor.
If multiple input tensors are given, the results of multiple multiplications
will be sumed up. If bias_attr is not None, a bias variable will be created
and added to the output. Finally, if activation is not None, it will be applied
to the output as well.
This process can be formulated as follows:
......@@ -150,7 +157,7 @@ def fc(input,
name (str, default None): The name of this layer.
Returns:
A tensor variable storing the transformation result.
Variable: The transformation result.
Raises:
ValueError: If rank of the input tensor is less than 2.
......@@ -158,8 +165,7 @@ def fc(input,
Examples:
.. code-block:: python
data = fluid.layers.data(
name="data", shape=[32, 32], dtype="float32")
data = fluid.layers.data(name="data", shape=[32, 32], dtype="float32")
fc = fluid.layers.fc(input=data, size=1000, act="tanh")
"""
......@@ -221,11 +227,11 @@ def embedding(input,
have two elements which indicate the size of the dictionary of
embeddings and the size of each embedding vector respectively.
is_sparse(bool): The flag indicating whether to use sparse update.
is_distributed (bool): Whether to run lookup table from remote parameter server.
is_distributed(bool): Whether to run lookup table from remote parameter server.
padding_idx(int|long|None): If :attr:`None`, it makes no effect to lookup.
Otherwise the given :attr:`padding_idx` indicates padding the output
with zeros whenever lookup encounters it in :attr:`input`. If
:math:`padding_idx < 0`, the padding_idx to use in lookup is
:math:`padding_idx < 0`, the :attr:`padding_idx` to use in lookup is
:math:`size[0] + dim`.
param_attr(ParamAttr): Parameters for this layer
dtype(np.dtype|core.VarDesc.VarType|str): The type of data : float32, float_16, int etc
......@@ -485,27 +491,31 @@ def dynamic_lstmp(input,
cell_activation(str): The activation for cell output. Choices = ["sigmoid",
"tanh", "relu", "identity"], default "tanh".
candidate_activation(str): The activation for candidate hidden state.
Choices = ["sigmoid", "tanh",
"relu", "identity"],
Choices = ["sigmoid", "tanh", "relu", "identity"],
default "tanh".
proj_activation(str): The activation for projection output.
Choices = ["sigmoid", "tanh",
"relu", "identity"],
Choices = ["sigmoid", "tanh", "relu", "identity"],
default "tanh".
dtype(str): Data type. Choices = ["float32", "float64"], default "float32".
name(str|None): A name for this layer(optional). If set None, the layer
will be named automatically.
Returns:
tuple: The projection of hidden state, and cell state of LSTMP. The \
shape of projection is (T x P), for the cell state which is \
(T x D), and both LoD is the same with the `input`.
tuple: A tuple of two output variable: the projection of hidden state, \
and cell state of LSTMP. The shape of projection is (T x P), \
for the cell state which is (T x D), and both LoD is the same \
with the `input`.
Examples:
.. code-block:: python
dict_dim, emb_dim = 128, 64
data = fluid.layers.data(name='sequence', shape=[1],
dtype='int32', lod_level=1)
emb = fluid.layers.embedding(input=data, size=[dict_dim, emb_dim])
hidden_dim, proj_dim = 512, 256
fc_out = fluid.layers.fc(input=input_seq, size=hidden_dim * 4,
fc_out = fluid.layers.fc(input=emb, size=hidden_dim * 4,
act=None, bias_attr=None)
proj_out, _ = fluid.layers.dynamic_lstmp(input=fc_out,
size=hidden_dim * 4,
......@@ -571,10 +581,10 @@ def dynamic_gru(input,
candidate_activation='tanh',
h_0=None):
"""
**Dynamic GRU Layer**
**Gated Recurrent Unit (GRU) Layer**
Refer to `Empirical Evaluation of Gated Recurrent Neural Networks on
Sequence Modeling <https://arxiv.org/abs/1412.3555>`_
Sequence Modeling <https://arxiv.org/abs/1412.3555>`_ .
The formula is as follows:
......@@ -621,17 +631,25 @@ def dynamic_gru(input,
Choices = ["sigmoid", "tanh", "relu", "identity"], default "sigmoid".
candidate_activation(str): The activation for candidate hidden state.
Choices = ["sigmoid", "tanh", "relu", "identity"], default "tanh".
h_0 (Variable): The hidden output of the first time step.
h_0 (Variable): This is initial hidden state. If not set, default is
zero. This is a tensor with shape (N x D), where N is the number of
total time steps of input mini-batch feature and D is the hidden
size.
Returns:
Variable: The hidden state of GRU. The shape is :math:`(T \\times D)`, \
and lod is the same with the input.
and sequence length is the same with the input.
Examples:
.. code-block:: python
dict_dim, emb_dim = 128, 64
data = fluid.layers.data(name='sequence', shape=[1],
dtype='int32', lod_level=1)
emb = fluid.layers.embedding(input=data, size=[dict_dim, emb_dim])
hidden_dim = 512
x = fluid.layers.fc(input=data, size=hidden_dim * 3)
x = fluid.layers.fc(input=emb, size=hidden_dim * 3)
hidden = fluid.layers.dynamic_gru(input=x, dim=hidden_dim)
"""
......@@ -779,11 +797,14 @@ def linear_chain_crf(input, label, param_attr=None):
Args:
input(${emission_type}): ${emission_comment}
input(${transition_type}): ${transition_comment}
label(${label_type}): ${label_comment}
param_attr(ParamAttr): The attribute of the learnable parameter.
Returns:
${log_likelihood_comment}
output(${emission_exps_type}): ${emission_exps_comment} \n
output(${transition_exps_type}): ${transition_exps_comment} \n
output(${log_likelihood_type}): ${log_likelihood_comment}
"""
helper = LayerHelper('linear_chain_crf', **locals())
......@@ -851,9 +872,9 @@ def cos_sim(X, Y):
${comment}
Args:
X (Variable): ${x_comment}
Y (Variable): ${y_comment}
X (Variable): ${x_comment}.
Y (Variable): ${y_comment}.
Returns:
Variable: the output of cosine(X, Y).
"""
......@@ -877,13 +898,13 @@ def dropout(x, dropout_prob, is_test=False, seed=None, name=None):
Drop or keep each element of `x` independently. Dropout is a regularization
technique for reducing overfitting by preventing neuron co-adaption during
training. The dropout operator randomly set (according to the given dropout
training. The dropout operator randomly sets (according to the given dropout
probability) the outputs of some units to zero, while others are remain
unchanged.
Args:
x (Variable): The input tensor.
dropout_prob (float): Probability of setting units to zero.
x (Variable): The input tensor variable.
dropout_prob (float): Probability of setting units to zero.
is_test (bool): A flag indicating whether it is in test phrase or not.
seed (int): A Python integer used to create random seeds. If this
parameter is set to None, a random seed is used.
......@@ -893,13 +914,14 @@ def dropout(x, dropout_prob, is_test=False, seed=None, name=None):
will be named automatically.
Returns:
Variable: A tensor variable.
Variable: A tensor variable is the shape with `x`.
Examples:
.. code-block:: python
x = fluid.layers.data(name="data", shape=[32, 32], dtype="float32")
droped = fluid.layers.dropout(input=x, dropout_rate=0.5)
x = fluid.layers.data(name="data", shape=[32, 32], dtype="float32")
droped = fluid.layers.dropout(x, dropout_prob=0.5)
"""
helper = LayerHelper('dropout', **locals())
......@@ -1119,7 +1141,7 @@ def chunk_eval(input,
chunk_scheme (str): ${chunk_scheme_comment}
num_chunk_types (int): ${num_chunk_types_comment}
excluded_chunk_types (list): ${excluded_chunk_types_comment}
Returns:
tuple: tuple containing: precision, recall, f1_score,
num_infer_chunks, num_label_chunks,
......@@ -1192,15 +1214,11 @@ def sequence_conv(input,
bias_attr (ParamAttr|None): attributes for bias
param_attr (ParamAttr|None): attributes for parameter
act (str): the activation type
Returns:
Variable: output of sequence_conv
"""
# FIXME(dzh) : want to unify the argument of python layer
# function. So we ignore some unecessary attributes.
# such as, padding_trainable, context_start.
helper = LayerHelper('sequence_conv', **locals())
dtype = helper.input_dtype()
filter_shape = [filter_size * input.shape[1], num_filters]
......@@ -1225,6 +1243,41 @@ def sequence_conv(input,
def sequence_softmax(input, param_attr=None, bias_attr=None, use_cudnn=True):
"""
This function computes the softmax activation among all time-steps for each
sequence. The dimension of each time-step should be 1. Thus, the shape of
input Tensor can be either :math:`[N, 1]` or :math:`[N]`, where :math:`N`
is the sum of the length of all sequences.
For i-th sequence in a mini-batch:
.. math::
Out(X[lod[i]:lod[i+1]], :) = \\frac{\exp(X[lod[i]:lod[i+1], :])}{\sum(\exp(X[lod[i]:lod[i+1], :]))}
For example, for a mini-batch of 3 sequences with variable-length,
each containing 2, 3, 2 time-steps, the lod of which is [0, 2, 5, 7],
then softmax will be computed among :math:`X[0:2, :]`, :math:`X[2:5, :]`,
:math:`X[5:7, :]`, and :math:`N` turns out to be 7.
Args:
input (Variable): The input variable which is a LoDTensor.
bias_attr (ParamAttr|None): attributes for bias
param_attr (ParamAttr|None): attributes for parameter
use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn \
library is installed. Default: True
Returns:
Variable: output of sequence_softmax
Examples:
.. code-block:: python
x = fluid.layers.data(name='x', shape=[7, 1],
dtype='float32', lod_level=1)
x_sequence_softmax = fluid.layers.sequence_softmax(input=x)
"""
helper = LayerHelper('sequence_softmax', **locals())
dtype = helper.input_dtype()
softmax_out = helper.create_tmp_variable(dtype)
......@@ -1262,14 +1315,17 @@ def conv2d(input,
act=None,
name=None):
"""
**Convlution2D Layer**
The convolution2D layer calculates the output based on the input, filter
and strides, paddings, dilations, groups parameters. Input(Input) and
Output(Output) are in NCHW format. Where N is batch size, C is the number of
and strides, paddings, dilations, groups parameters. Input and
Output are in NCHW format, where N is batch size, C is the number of
channels, H is the height of the feature, and W is the width of the feature.
The details of convolution layer, please refer UFLDL's `convolution,
<http://ufldl.stanford.edu/tutorial/supervised/FeatureExtractionUsingConvolution/>`_ .
Filter is in MCHW format, where M is the number of output image channels,
C is the number of input image channels, H is the height of the filter,
and W is the width of the filter. If the groups is greater than 1,
C will equal the number of input image channels divided by the groups.
Please refer to UFLDL's `convolution
<http://ufldl.stanford.edu/tutorial/supervised/FeatureExtractionUsingConvolution/>`_
for more detials.
If bias attribution and activation type are provided, bias is added to the
output of the convolution, and the corresponding activation function is
applied to the final result.
......@@ -1280,15 +1336,14 @@ def conv2d(input,
Out = \sigma (W \\ast X + b)
In the above equation:
Where:
* :math:`X`: Input value, a tensor with NCHW format.
* :math:`W`: Filter value, a tensor with MCHW format.
* :math:`\\ast`: Convolution operation.
* :math:`b`: Bias value, a 2-D tensor with shape [M, 1].
* :math:`\\sigma`: Activation function.
* :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be
different.
* :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different.
Example:
......@@ -1299,6 +1354,7 @@ def conv2d(input,
Filter shape: :math:`(C_{out}, C_{in}, H_f, W_f)`
- Output:
Output shape: :math:`(N, C_{out}, H_{out}, W_{out})`
Where
......@@ -1310,7 +1366,7 @@ def conv2d(input,
Args:
input (Variable): The input image with [N, C, H, W] format.
num_filters(int): The number of filter. It is as same as the output
num_filters(int): The number of filter. It is as same as the output
image channel.
filter_size (int|tuple|None): The filter size. If filter_size is a tuple,
it must contain two integers, (filter_size_H, filter_size_W).
......@@ -1333,7 +1389,8 @@ def conv2d(input,
bias_attr (ParamAttr): Bias parameter for the Conv2d layer. Default: None
use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn
library is installed. Default: True
use_mkldnn (bool): Use mkldnn kernels or not.
use_mkldnn (bool): Use mkldnn kernels or not, it is valid only when compiled
with mkldnn library. Default: False
act (str): Activation type. Default: None
name (str|None): A name for this layer(optional). If set None, the layer
will be named automatically.
......@@ -1349,13 +1406,9 @@ def conv2d(input,
Examples:
.. code-block:: python
data = fluid.layers.data(
name='data', shape=[3, 32, 32], dtype='float32')
conv2d = fluid.layers.conv2d(
input=data, num_filters=2, filter_size=3, act="relu")
data = fluid.layers.data(name='data', shape=[3, 32, 32], dtype='float32')
conv2d = fluid.layers.conv2d(input=data, num_filters=2, filter_size=3, act="relu")
"""
if stride is None:
stride = [1, 1]
num_channels = input.shape[1]
......@@ -1418,6 +1471,168 @@ def conv2d(input,
return helper.append_activation(pre_act)
def conv3d(input,
num_filters,
filter_size,
stride=1,
padding=0,
dilation=1,
groups=None,
param_attr=None,
bias_attr=None,
use_cudnn=True,
use_mkldnn=False,
act=None,
name=None):
"""
**Convlution3D Layer**
The convolution3D layer calculates the output based on the input, filter
and strides, paddings, dilations, groups parameters. Input(Input) and
Output(Output) are in NCDHW format. Where N is batch size C is the number of
channels, D is the depth of the feature, H is the height of the feature,
and W is the width of the feature. Convlution3D is similar with Convlution2D
but adds one dimension(depth). If bias attribution and activation type are
provided, bias is added to the output of the convolution, and the
corresponding activation function is applied to the final result.
For each input :math:`X`, the equation is:
.. math::
Out = \sigma (W \\ast X + b)
In the above equation:
* :math:`X`: Input value, a tensor with NCDHW format.
* :math:`W`: Filter value, a tensor with MCDHW format.
* :math:`\\ast`: Convolution operation.
* :math:`b`: Bias value, a 2-D tensor with shape [M, 1].
* :math:`\\sigma`: Activation function.
* :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different.
Example:
- Input:
Input shape: :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`
Filter shape: :math:`(C_{out}, C_{in}, D_f, H_f, W_f)`
- Output:
Output shape: :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`
Where
.. math::
D_{out}&= \\frac{(D_{in} + 2 * paddings[0] - (dilations[0] * (D_f - 1) + 1))}{strides[0]} + 1 \\\\
H_{out}&= \\frac{(H_{in} + 2 * paddings[1] - (dilations[1] * (H_f - 1) + 1))}{strides[1]} + 1 \\\\
W_{out}&= \\frac{(W_{in} + 2 * paddings[2] - (dilations[2] * (W_f - 1) + 1))}{strides[2]} + 1
Args:
input (Variable): The input image with [N, C, D, H, W] format.
num_filters(int): The number of filter. It is as same as the output
image channel.
filter_size (int|tuple|None): The filter size. If filter_size is a tuple,
it must contain three integers, (filter_size_D, filter_size_H, filter_size_W).
Otherwise, the filter will be a square.
stride (int|tuple): The stride size. If stride is a tuple, it must
contain three integers, (stride_D, stride_H, stride_W). Otherwise, the
stride_D = stride_H = stride_W = stride. Default: stride = 1.
padding (int|tuple): The padding size. If padding is a tuple, it must
contain three integers, (padding_D, padding_H, padding_W). Otherwise, the
padding_D = padding_H = padding_W = padding. Default: padding = 0.
dilation (int|tuple): The dilation size. If dilation is a tuple, it must
contain three integers, (dilation_D, dilation_H, dilation_W). Otherwise, the
dilation_D = dilation_H = dilation_W = dilation. Default: dilation = 1.
groups (int): The groups number of the Conv3d Layer. According to grouped
convolution in Alex Krizhevsky's Deep CNN paper: when group=2,
the first half of the filters is only connected to the first half
of the input channels, while the second half of the filters is only
connected to the second half of the input channels. Default: groups=1
param_attr (ParamAttr): The parameters to the Conv3d Layer. Default: None
bias_attr (ParamAttr): Bias parameter for the Conv3d layer. Default: None
use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn
library is installed. Default: True
use_mkldnn (bool): Use mkldnn kernels or not.
act (str): Activation type. Default: None
name (str|None): A name for this layer(optional). If set None, the layer
will be named automatically.
Returns:
Variable: The tensor variable storing the convolution and \
non-linearity activation result.
Raises:
ValueError: If the shapes of input, filter_size, stride, padding and
groups mismatch.
Examples:
.. code-block:: python
data = fluid.layers.data(name='data', shape=[3, 12, 32, 32], dtype='float32')
conv3d = fluid.layers.conv3d(input=data, num_filters=2, filter_size=3, act="relu")
"""
l_type = 'conv3d'
helper = LayerHelper(l_type, **locals())
dtype = helper.input_dtype()
num_channels = input.shape[1]
if groups is None:
num_filter_channels = num_channels
else:
if num_channels % groups != 0:
raise ValueError("num_channels must be divisible by groups.")
num_filter_channels = num_channels / groups
filter_size = utils.convert_to_list(filter_size, 3, 'filter_size')
stride = utils.convert_to_list(stride, 3, 'stride')
padding = utils.convert_to_list(padding, 3, 'padding')
dilation = utils.convert_to_list(dilation, 3, 'dilation')
if not isinstance(use_cudnn, bool):
raise ValueError("use_cudnn should be True or False")
input_shape = input.shape
filter_shape = [num_filters, num_filter_channels] + filter_size
def _get_default_param_initializer():
std = (2.0 / (filter_size[0]**3 * num_channels))**0.5
return Normal(0.0, std, 0)
filter_param = helper.create_parameter(
attr=helper.param_attr,
shape=filter_shape,
dtype=dtype,
default_initializer=_get_default_param_initializer())
pre_bias = helper.create_tmp_variable(dtype)
helper.append_op(
type=l_type,
inputs={
'Input': input,
'Filter': filter_param,
},
outputs={"Output": pre_bias},
attrs={
'strides': stride,
'paddings': padding,
'dilations': dilation,
'groups': groups,
'use_cudnn': use_cudnn,
'use_mkldnn': use_mkldnn
})
pre_act = helper.append_bias_op(pre_bias, dim_start=1, dim_end=2)
return helper.append_activation(pre_act)
def sequence_pool(input, pool_type):
"""
This function add the operator for sequence pooling.
......@@ -1434,13 +1649,13 @@ def sequence_pool(input, pool_type):
.. code-block:: text
x is a 1-level LoDTensor:
x.lod = [[0, 2, 5, 7]]
x.lod = [[2, 3, 2]]
x.data = [1, 3, 2, 4, 6, 5, 1]
x.dims = [7, 1]
then output is a Tensor:
out.dim = [3, 1]
with condition len(x.lod[-1]) - 1 == out.dims[0]
with condition len(x.lod[-1]) == out.dims[0]
for different pool_type:
average: out.data = [2, 4, 3], where 2=(1+3)/2, 4=(2+4+6)/3, 3=(5+1)/2
......@@ -1499,13 +1714,13 @@ def sequence_first_step(input):
.. code-block:: text
x is a 1-level LoDTensor:
x.lod = [[0, 2, 5, 7]]
x.lod = [[2, 3, 2]]
x.data = [1, 3, 2, 4, 6, 5, 1]
x.dims = [7, 1]
then output is a Tensor:
out.dim = [3, 1]
with condition len(x.lod[-1]) - 1 == out.dims[0]
with condition len(x.lod[-1]) == out.dims[0]
out.data = [1, 2, 5], where 1=first(1,3), 2=first(2,4,6), 5=first(5,1)
Args:
......@@ -1532,13 +1747,13 @@ def sequence_last_step(input):
.. code-block:: text
x is a 1-level LoDTensor:
x.lod = [[0, 2, 5, 7]]
x.lod = [[2, 3, 2]]
x.data = [1, 3, 2, 4, 6, 5, 1]
x.dims = [7, 1]
then output is a Tensor:
out.dim = [3, 1]
with condition len(x.lod[-1]) - 1 == out.dims[0]
with condition len(x.lod[-1]) == out.dims[0]
out.data = [3, 6, 1], where 3=last(1,3), 6=last(2,4,6), 1=last(5,1)
Args:
......@@ -1558,6 +1773,7 @@ def sequence_last_step(input):
return sequence_pool(input=input, pool_type="last")
@templatedoc()
def pool2d(input,
pool_size=-1,
pool_type="max",
......@@ -1569,7 +1785,99 @@ def pool2d(input,
use_mkldnn=False,
name=None):
"""
This function adds the operator for pooling in 2 dimensions, using the
${comment}
Args:
input (Variable): The input tensor of pooling operator. The format of
input tensor is NCHW, where N is batch size, C is
the number of channels, H is the height of the
feature, and W is the width of the feature.
pool_size (int): The side length of pooling windows. All pooling
windows are squares with pool_size on a side.
pool_type: ${pooling_type_comment}
pool_stride (int): stride of the pooling layer.
pool_padding (int): padding size.
global_pooling: ${global_pooling_comment}
use_cudnn: ${use_cudnn_comment}
ceil_mode: ${ceil_mode_comment}
use_mkldnn: ${use_mkldnn_comment}
name (str|None): A name for this layer(optional). If set None, the
layer will be named automatically.
Returns:
Variable: The pooling result.
Raises:
ValueError: If 'pool_type' is not "max" nor "avg"
ValueError: If 'global_pooling' is False and 'pool_size' is -1
ValueError: If 'use_cudnn' is not a bool value.
Examples:
.. code-block:: python
data = fluid.layers.data(
name='data', shape=[3, 32, 32], dtype='float32')
conv2d = fluid.layers.pool2d(
input=data,
pool_size=2,
pool_type='max',
pool_stride=1,
global_pooling=False)
"""
if pool_type not in ["max", "avg"]:
raise ValueError(
"Unknown pool_type: '%s'. It can only be 'max' or 'avg'.",
str(pool_type))
if global_pooling is False and pool_size == -1:
raise ValueError(
"When the global_pooling is False, pool_size must be passed "
"and be a valid value. Received pool_size: " + str(pool_size))
pool_size = utils.convert_to_list(pool_size, 2, 'pool_size')
pool_padding = utils.convert_to_list(pool_padding, 2, 'pool_padding')
pool_stride = utils.convert_to_list(pool_stride, 2, 'pool_stride')
if not isinstance(use_cudnn, bool):
raise ValueError("use_cudnn should be True or False")
l_type = 'pool2d'
helper = LayerHelper(l_type, **locals())
dtype = helper.input_dtype()
pool_out = helper.create_tmp_variable(dtype)
helper.append_op(
type=l_type,
inputs={"X": input},
outputs={"Out": pool_out},
attrs={
"pooling_type": pool_type,
"ksize": pool_size,
"global_pooling": global_pooling,
"strides": pool_stride,
"paddings": pool_padding,
"use_cudnn": use_cudnn,
"ceil_mode": ceil_mode,
"use_mkldnn": use_mkldnn
})
return pool_out
def pool3d(input,
pool_size=-1,
pool_type="max",
pool_stride=1,
pool_padding=0,
global_pooling=False,
use_cudnn=True,
ceil_mode=False,
use_mkldnn=False,
name=None):
"""
This function adds the operator for pooling in 3-dimensions, using the
pooling configurations mentioned in input parameters.
Args:
......@@ -1584,9 +1892,9 @@ def pool2d(input,
use_mkldnn (bool): ${use_mkldnn_comment}
name (str): A name for this layer(optional). If set None, the layer
will be named automatically.
Returns:
Variable: output of pool2d layer.
Variable: output of pool3d layer.
"""
if pool_type not in ["max", "avg"]:
raise ValueError(
......@@ -1598,19 +1906,20 @@ def pool2d(input,
"When the global_pooling is False, pool_size must be passed "
"and be a valid value. Received pool_size: " + str(pool_size))
pool_size = utils.convert_to_list(pool_size, 2, 'pool_size')
pool_padding = utils.convert_to_list(pool_padding, 2, 'pool_padding')
pool_stride = utils.convert_to_list(pool_stride, 2, 'pool_stride')
pool_size = utils.convert_to_list(pool_size, 3, 'pool_size')
pool_padding = utils.convert_to_list(pool_padding, 3, 'pool_padding')
pool_stride = utils.convert_to_list(pool_stride, 3, 'pool_stride')
if not isinstance(use_cudnn, bool):
raise ValueError("use_cudnn should be True or False")
helper = LayerHelper('pool2d', **locals())
l_type = "pool3d"
helper = LayerHelper(l_type, **locals())
dtype = helper.input_dtype()
pool_out = helper.create_tmp_variable(dtype)
helper.append_op(
type="pool2d",
type=l_type,
inputs={"X": input},
outputs={"Out": pool_out},
attrs={
......@@ -1744,6 +2053,7 @@ def batch_norm(input,
return helper.append_activation(batch_norm_out)
@templatedoc()
def layer_norm(input,
scale=True,
shift=True,
......@@ -1754,20 +2064,11 @@ def layer_norm(input,
act=None,
name=None):
"""
**Layer Normalization**
Assume feature vectors exist on dimensions
:attr:`begin_norm_axis ... rank(input)` and calculate the moment statistics
along these dimensions for each feature vector :math:`a` with size
:math:`H`, then normalize each feature vector using the corresponding
statistics. After that, apply learnable gain and bias on the normalized
tensor to scale and shift if :attr:`scale` and :attr:`shift` are set.
Refer to `Layer Normalization <https://arxiv.org/pdf/1607.06450v1.pdf>`_
${comment}
The formula is as follows:
.. math::
.. math::
\\mu & = \\frac{1}{H}\\sum_{i=1}^{H} a_i
......@@ -1775,6 +2076,15 @@ def layer_norm(input,
h & = f(\\frac{g}{\\sigma}(a - \\mu) + b)
* :math:`a`: the vector representation of the summed inputs to the neurons
in that layer.
* :math:`H`: the number of hidden units in a layers
* :math:`g`: the trainable scale parameter.
* :math:`b`: the trainable bias parameter.
Args:
input(Variable): The input tensor variable.
scale(bool): Whether to learn the adaptive gain :math:`g` after
......@@ -1793,14 +2103,13 @@ def layer_norm(input,
name (str): The name of this layer. It is optional.
Returns:
Variable: A tensor variable with the same shape as the input.
${y_comment}
Examples:
.. code-block:: python
data = fluid.layers.data(
name='data', shape=[3, 32, 32], dtype='float32')
x = fluid.layers.layer_norm(input=data, begin_norm_axis=1)
>>> data = fluid.layers.data(name='data', shape=[3, 32, 32],
>>> dtype='float32')
>>> x = fluid.layers.layer_norm(input=data, begin_norm_axis=1)
"""
helper = LayerHelper('layer_norm', **locals())
dtype = helper.input_dtype()
......@@ -1822,54 +2131,240 @@ def layer_norm(input,
attr=helper.bias_attr, shape=param_shape, dtype=dtype, is_bias=True)
inputs['Bias'] = bias
# create output
mean_out = helper.create_tmp_variable(dtype=dtype, stop_gradient=True)
variance_out = helper.create_tmp_variable(dtype=dtype, stop_gradient=True)
layer_norm_out = helper.create_tmp_variable(dtype)
# create output
mean_out = helper.create_tmp_variable(dtype=dtype, stop_gradient=True)
variance_out = helper.create_tmp_variable(dtype=dtype, stop_gradient=True)
layer_norm_out = helper.create_tmp_variable(dtype)
helper.append_op(
type="layer_norm",
inputs=inputs,
outputs={
"Y": layer_norm_out,
"Mean": mean_out,
"Variance": variance_out,
},
attrs={"epsilon": epsilon,
"begin_norm_axis": begin_norm_axis})
return helper.append_activation(layer_norm_out)
def beam_search_decode(ids, scores, name=None):
"""
Beam Search Decode
This layers is to pack the output of beam search layer into sentences and
associated scores. It is usually called after the beam search layer.
Typically, the output of beam search layer is a tensor of selected ids, with
a tensor of the score of each id. Beam search layer's output ids, however,
are generated directly during the tree search, and they are stacked by each
level of the search tree. Thus we need to reorganize them into sentences,
based on the score of each id. This layer takes the output of beam search
layer as input and repack them into sentences.
Args:
ids (Variable): The selected ids, output of beam search layer.
scores (Variable): The associated scores of the ids, out put of beam
search layer.
name (str): The name of this layer. It is optional.
Returns:
tuple(Variable): a tuple of two output tensors: sentence_ids, sentence_scores.
sentence_ids is a tensor with shape [size, length], where size is the
beam size of beam search, and length is the length of each sentence.
Note that the length of sentences may vary.
sentence_scores is a tensor with the same shape as sentence_ids.
Examples:
.. code-block:: python
ids, scores = fluid.layers.beam_search(
pre_ids, ids, scores, beam_size, end_id)
sentence_ids, sentence_scores = fluid.layers.beam_search_decode(
ids, scores)
"""
helper = LayerHelper('beam_search_decode', **locals())
sentence_ids = helper.create_tmp_variable(dtype=ids.dtype)
sentence_scores = helper.create_tmp_variable(dtype=ids.dtype)
helper.append_op(
type="beam_search_decode",
inputs={"Ids": ids,
"Scores": scores},
outputs={
"SentenceIds": sentence_ids,
"SentenceScores": sentence_scores
})
return sentence_ids, sentence_scores
def conv2d_transpose(input,
num_filters,
output_size=None,
filter_size=None,
padding=0,
stride=1,
dilation=1,
groups=None,
param_attr=None,
bias_attr=None,
use_cudnn=True,
act=None,
name=None):
"""
**Convlution2D transpose layer**
The convolution2D transpose layer calculates the output based on the input,
filter, and dilations, strides, paddings. Input(Input) and output(Output)
are in NCHW format. Where N is batch size, C is the number of channels,
H is the height of the feature, and W is the width of the feature.
Parameters(dilations, strides, paddings) are two elements. These two elements
represent height and width, respectively. The details of convolution transpose
layer, please refer to the following explanation and references
`therein <http://www.matthewzeiler.com/wp-content/uploads/2017/07/cvpr2010.pdf>`_.
If bias attribution and activation type are provided, bias is added to
the output of the convolution, and the corresponding activation function
is applied to the final result.
For each input :math:`X`, the equation is:
.. math::
Out = \sigma (W \\ast X + b)
Where:
* :math:`X`: Input value, a tensor with NCHW format.
* :math:`W`: Filter value, a tensor with MCHW format.
* :math:`\\ast`: Convolution operation.
* :math:`b`: Bias value, a 2-D tensor with shape [M, 1].
* :math:`\\sigma`: Activation function.
* :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different.
Example:
- Input:
Input shape: :math:`(N, C_{in}, H_{in}, W_{in})`
Filter shape: :math:`(C_{in}, C_{out}, H_f, W_f)`
- Output:
Output shape: :math:`(N, C_{out}, H_{out}, W_{out})`
Where
.. math::
H_{out} &= (H_{in} - 1) * strides[0] - 2 * paddings[0] + dilations[0] * (H_f - 1) + 1 \\\\
W_{out} &= (W_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (W_f - 1) + 1
Args:
input(Variable): The input image with [N, C, H, W] format.
num_filters(int): The number of the filter. It is as same as the output
image channel.
output_size(int|tuple|None): The output image size. If output size is a
tuple, it must contain two integers, (image_H, image_W). This
parameter only works when filter_size is None.
filter_size(int|tuple|None): The filter size. If filter_size is a tuple,
it must contain two integers, (filter_size_H, filter_size_W).
Otherwise, the filter will be a square. None if use output size to
calculate filter_size.
padding(int|tuple): The padding size. If padding is a tuple, it must
contain two integers, (padding_H, padding_W). Otherwise, the
padding_H = padding_W = padding. Default: padding = 0.
stride(int|tuple): The stride size. If stride is a tuple, it must
contain two integers, (stride_H, stride_W). Otherwise, the
stride_H = stride_W = stride. Default: stride = 1.
dilation(int|tuple): The dilation size. If dilation is a tuple, it must
contain two integers, (dilation_H, dilation_W). Otherwise, the
dilation_H = dilation_W = dilation. Default: dilation = 1.
groups(int): The groups number of the Conv2d transpose layer. Inspired by
grouped convolution in Alex Krizhevsky's Deep CNN paper, in which
when group=2, the first half of the filters is only connected to the
first half of the input channels, while the second half of the
filters is only connected to the second half of the input channels.
Default: groups=1
param_attr(ParamAttr): The parameters to the Conv2d_transpose Layer.
Default: None
bias_attr(ParamAttr): Bias parameter for the Conv2d layer. Default: None
use_cudnn(bool): Use cudnn kernel or not, it is valid only when the cudnn
library is installed. Default: True
act(str): Activation type. Default: None
name(str|None): A name for this layer(optional). If set None, the layer
will be named automatically.
Returns:
Variable: The tensor variable storing the convolution transpose result.
Raises:
ValueError: If the shapes of input, filter_size, stride, padding and
groups mismatch.
Examples:
.. code-block:: python
data = fluid.layers.data(name='data', shape=[3, 32, 32], dtype='float32')
conv2d_transpose = fluid.layers.conv2d_transpose(input=data, num_filters=2, filter_size=3)
"""
helper = LayerHelper("conv2d_transpose", **locals())
if not isinstance(input, Variable):
raise TypeError("Input of conv2d_transpose must be Variable")
input_channel = input.shape[1]
padding = utils.convert_to_list(padding, 2, 'padding')
stride = utils.convert_to_list(stride, 2, 'stride')
dilation = utils.convert_to_list(dilation, 2, 'dilation')
helper.append_op(
type="layer_norm",
inputs=inputs,
outputs={
"Y": layer_norm_out,
"Mean": mean_out,
"Variance": variance_out,
},
attrs={"epsilon": epsilon,
"begin_norm_axis": begin_norm_axis})
if not isinstance(use_cudnn, bool):
raise ValueError("use_cudnn should be True or False")
return helper.append_activation(layer_norm_out)
if filter_size is None:
if output_size is None:
raise ValueError("output_size must be set when filter_size is None")
if isinstance(output_size, int):
output_size = [output_size, output_size]
h_in = input.shape[2]
w_in = input.shape[3]
def beam_search_decode(ids, scores, name=None):
"""
${beam_search_decode}
filter_size_h = (output_size[0] - (h_in - 1) * stride[0] + 2 *
padding[0] - 1) / dilation[0] + 1
filter_size_w = (output_size[1] - (w_in - 1) * stride[1] + 2 *
padding[1] - 1) / dilation[1] + 1
filter_size = [filter_size_h, filter_size_w]
else:
filter_size = utils.convert_to_list(filter_size, 2,
'conv2d_transpose.filter_size')
Args:
ids (Variable): ${ids_comment}
scores (Variable): ${scores_comment}
name (str): The name of this layer. It is optional.
Returns:
tuple: a tuple of two output variable: sentence_ids, sentence_scores
"""
helper = LayerHelper('beam_search_decode', **locals())
sentence_ids = helper.create_tmp_variable(dtype=ids.dtype)
sentence_scores = helper.create_tmp_variable(dtype=ids.dtype)
groups = 1 if groups is None else groups
filter_shape = [input_channel, num_filters / groups] + filter_size
img_filter = helper.create_parameter(
dtype=input.dtype, shape=filter_shape, attr=helper.param_attr)
pre_bias = helper.create_tmp_variable(dtype=input.dtype)
helper.append_op(
type="beam_search_decode",
inputs={"Ids": ids,
"Scores": scores},
outputs={
"SentenceIds": sentence_ids,
"SentenceScores": sentence_scores
type='conv2d_transpose',
inputs={'Input': [input],
'Filter': [img_filter]},
outputs={'Output': pre_bias},
attrs={
'strides': stride,
'paddings': padding,
'dilations': dilation,
'groups': groups,
'use_cudnn': use_cudnn
})
return sentence_ids, sentence_scores
pre_act = helper.append_bias_op(pre_bias, dim_start=1, dim_end=2)
out = helper.append_activation(pre_act)
return out
def conv2d_transpose(input,
def conv3d_transpose(input,
num_filters,
output_size=None,
filter_size=None,
......@@ -1883,79 +2378,84 @@ def conv2d_transpose(input,
act=None,
name=None):
"""
**Convlution2D Transpose Layer**
**Convlution3D transpose layer**
The convolution2D transpose layer calculates the output based on the input,
The convolution3D transpose layer calculates the output based on the input,
filter, and dilations, strides, paddings. Input(Input) and output(Output)
are in NCHW format. Where N is batch size, C is the number of channels,
H is the height of the feature, and W is the width of the feature.
Parameters(dilations, strides, paddings) are two elements. These two elements
represent height and width, respectively. The details of convolution transpose
layer, please refer to the following explanation and references
`here <http://www.matthewzeiler.com/wp-content/uploads/2017/07/cvpr2010.pdf>`_.
are in NCDHW format. Where N is batch size, C is the number of channels,
D is the depth of the feature, H is the height of the feature, and W
is the width of the feature. Parameters(dilations, strides, paddings) are
two elements. These two elements represent height and width, respectively.
The details of convolution transpose layer, please refer to the following
explanation and references `therein <http://www.matthewzeiler.com/wp-content/uploads/2017/07/cvpr2010.pdf>`_.
If bias attribution and activation type are provided, bias is added to
the output of the convolution, and the corresponding activation function
is applied to the final result.
For each input :math:`X`, the equation is:
.. math::
Out = W \\ast X
Out = \sigma (W \\ast X + b)
In the above equation:
* :math:`X`: Input value, a tensor with NCHW format.
* :math:`W`: Filter value, a tensor with MCHW format.
* :math:`\\ast` : Convolution transpose operation.
* :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be
different.
* :math:`X`: Input value, a tensor with NCDHW format.
* :math:`W`: Filter value, a tensor with MCDHW format.
* :math:`\\ast`: Convolution operation.
* :math:`b`: Bias value, a 2-D tensor with shape [M, 1].
* :math:`\\sigma`: Activation function.
* :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different.
Example:
- Input:
Input shape: :math:`(N, C_{in}, H_{in}, W_{in})`
Input shape: :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`
Filter shape: :math:`(C_{in}, C_{out}, H_f, W_f)`
Filter shape: :math:`(C_{in}, C_{out}, D_f, H_f, W_f)`
- Output:
Output shape: :math:`(N, C_{out}, H_{out}, W_{out})`
Output shape: :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`
Where
.. math::
H_{out} &= (H_{in} - 1) * strides[0] - 2 * paddings[0] + dilations[0] * (H_f - 1) + 1 \\\\
W_{out} &= (W_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (W_f - 1) + 1
D_{out} &= (D_{in} - 1) * strides[0] - 2 * paddings[0] + dilations[0] * (D_f - 1) + 1 \\\\
H_{out} &= (H_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (H_f - 1) + 1 \\\\
W_{out} &= (W_{in} - 1) * strides[2] - 2 * paddings[2] + dilations[2] * (W_f - 1) + 1
Args:
input(Variable): The input image with [N, C, H, W] format.
input(Variable): The input image with [N, C, D, H, W] format.
num_filters(int): The number of the filter. It is as same as the output
image channel.
output_size(int|tuple|None): The output image size. If output size is a
tuple, it must contain two integers, (image_H, image_W). This
tuple, it must contain three integers, (image_D, image_H, image_W). This
parameter only works when filter_size is None.
filter_size(int|tuple|None): The filter size. If filter_size is a tuple,
it must contain two integers, (filter_size_H, filter_size_W).
it must contain three integers, (filter_size_D, filter_size_H, filter_size_W).
Otherwise, the filter will be a square. None if use output size to
calculate filter_size.
padding(int|tuple): The padding size. If padding is a tuple, it must
contain two integers, (padding_H, padding_W). Otherwise, the
padding_H = padding_W = padding. Default: padding = 0.
contain three integers, (padding_D, padding_H, padding_W). Otherwise, the
padding_D = padding_H = padding_W = padding. Default: padding = 0.
stride(int|tuple): The stride size. If stride is a tuple, it must
contain two integers, (stride_H, stride_W). Otherwise, the
stride_H = stride_W = stride. Default: stride = 1.
contain three integers, (stride_D, stride_H, stride_W). Otherwise, the
stride_D = stride_H = stride_W = stride. Default: stride = 1.
dilation(int|tuple): The dilation size. If dilation is a tuple, it must
contain two integers, (dilation_H, dilation_W). Otherwise, the
dilation_H = dilation_W = dilation. Default: dilation = 1.
groups(int): The groups number of the Conv2d transpose layer. Inspired by
contain three integers, (dilation_D, dilation_H, dilation_W). Otherwise, the
dilation_D = dilation_H = dilation_W = dilation. Default: dilation = 1.
groups(int): The groups number of the Conv3d transpose layer. Inspired by
grouped convolution in Alex Krizhevsky's Deep CNN paper, in which
when group=2, the first half of the filters is only connected to the
first half of the input channels, while the second half of the
filters is only connected to the second half of the input channels.
Default: groups=1
param_attr(ParamAttr): The parameters to the Conv2d_transpose Layer.
Default: None
bias_attr(ParamAttr): Bias parameter for the Conv2d layer. Default: None
param_attr(ParamAttr): The parameters to the Conv3d_transpose Layer.
Default: None
bias_attr(ParamAttr): Bias parameter for the Conv3d layer. Default: None
use_cudnn(bool): Use cudnn kernel or not, it is valid only when the cudnn
library is installed. Default: True
act(str): Activation type. Default: None
......@@ -1972,19 +2472,18 @@ def conv2d_transpose(input,
Examples:
.. code-block:: python
data = fluid.layers.data(
name='data', shape=[3, 32, 32], dtype='float32')
conv2d_transpose = fluid.layers.conv2d_transpose(
input=data, num_filters=2, filter_size=3)
data = fluid.layers.data(name='data', shape=[3, 12, 32, 32], dtype='float32')
conv3d_transpose = fluid.layers.conv3d_transpose(input=data, num_filters=2, filter_size=3)
"""
helper = LayerHelper("conv2d_transpose", **locals())
l_type = "conv3d_transpose"
helper = LayerHelper(l_type, **locals())
if not isinstance(input, Variable):
raise TypeError("Input of conv2d_transpose must be Variable")
raise TypeError("Input of conv3d_transpose must be Variable")
input_channel = input.shape[1]
padding = utils.convert_to_list(padding, 2, 'padding')
stride = utils.convert_to_list(stride, 2, 'stride')
dilation = utils.convert_to_list(dilation, 2, 'dilation')
padding = utils.convert_to_list(padding, 3, 'padding')
stride = utils.convert_to_list(stride, 3, 'stride')
dilation = utils.convert_to_list(dilation, 3, 'dilation')
if not isinstance(use_cudnn, bool):
raise ValueError("use_cudnn should be True or False")
......@@ -1995,17 +2494,20 @@ def conv2d_transpose(input,
if isinstance(output_size, int):
output_size = [output_size, output_size]
h_in = input.shape[2]
w_in = input.shape[3]
d_in = input.shape[2]
h_in = input.shape[3]
w_in = input.shape[4]
filter_size_h = (output_size[0] - (h_in - 1) * stride[0] + 2 *
filter_size_d = (output_size[0] - (d_in - 1) * stride[0] + 2 *
padding[0] - 1) / dilation[0] + 1
filter_size_w = (output_size[1] - (w_in - 1) * stride[1] + 2 *
filter_size_h = (output_size[1] - (h_in - 1) * stride[1] + 2 *
padding[1] - 1) / dilation[1] + 1
filter_size = [filter_size_h, filter_size_w]
filter_size_w = (output_size[2] - (w_in - 1) * stride[2] + 2 *
padding[2] - 1) / dilation[2] + 1
filter_size = [filter_size_d, filter_size_h, filter_size_w]
else:
filter_size = utils.convert_to_list(filter_size, 2,
'conv2d_transpose.filter_size')
filter_size = utils.convert_to_list(filter_size, 3,
'conv3d_transpose.filter_size')
groups = 1 if groups is None else groups
filter_shape = [input_channel, num_filters / groups] + filter_size
......@@ -2014,7 +2516,7 @@ def conv2d_transpose(input,
pre_bias = helper.create_tmp_variable(dtype=input.dtype)
helper.append_op(
type='conv2d_transpose',
type=l_type,
inputs={'Input': [input],
'Filter': [img_filter]},
outputs={'Output': pre_bias},
......@@ -2042,18 +2544,18 @@ def sequence_expand(x, y, ref_level=-1, name=None):
* Case 1
x is a LoDTensor:
x.lod = [[0, 2, 4]]
x.lod = [[2, 2]]
x.data = [[a], [b], [c], [d]]
x.dims = [4, 1]
y is a LoDTensor:
y.lod = [[0, 2, 4],
[0, 3, 6, 7, 8]]
y.lod = [[2, 2],
[3, 3, 1, 1]]
ref_level: 0
then output is a 1-level LoDTensor:
out.lod = [[0, 2, 4, 6, 8]]
out.lod = [[2, 2, 2, 2]]
out.data = [[a], [b], [a], [b], [c], [d], [c], [d]]
out.dims = [8, 1]
......@@ -2063,7 +2565,7 @@ def sequence_expand(x, y, ref_level=-1, name=None):
x.dims = [3, 1]
y is a LoDTensor:
y.lod = [[0, 2, 2, 5]]
y.lod = [[2, 0, 3]]
ref_level: -1
......@@ -2112,7 +2614,7 @@ def beam_search(pre_ids, ids, scores, beam_size, end_id, level=0):
beam_size (int): ${beam_size_comment}
end_id (int): ${end_id_comment}
level (int): ${level_comment}
Returns:
tuple: a tuple of beam_search output variables: selected_ids, selected_scores
'''
......@@ -2322,23 +2824,24 @@ def reduce_sum(input, dim=None, keep_dim=False, name=None):
def reduce_mean(input, dim=None, keep_dim=False, name=None):
"""
Computes the mean of tensor elements over the given dimension.
Computes the mean of the input tensor's elements along the given dimension.
Args:
input (Variable): The input variable which is a Tensor or LoDTensor.
dim (list|int|None): The dimensions along which the mean is computed. If
:attr:`None`, compute the mean over all elements of :attr:`input`
and return a Tensor variable with a single element, otherwise
dim (list|int|None): The dimension along which the mean is computed. If
`None`, compute the mean over all elements of :attr:`input`
and return a variable with a single element, otherwise it
must be in the range :math:`[-rank(input), rank(input))`. If
:math:`dim[i] < 0`, the dimension to reduce is :math:`rank + dim[i]`.
:math:`dim[i] < 0`, the dimension to reduce is
:math:`rank(input) + dim[i]`.
keep_dim (bool): Whether to reserve the reduced dimension in the
output Tensor. The result tensor will have one fewer dimension
than the :attr:`input` unless :attr:`keep_dim` is true.
name(str|None): A name for this layer(optional). If set None, the layer
name(str|None): A name for this layer(optional). If set `None`, the layer
will be named automatically.
Returns:
Variable: The reduced Tensor variable.
Variable: The reduced mean Variable.
Examples:
.. code-block:: python
......@@ -2560,7 +3063,7 @@ def split(input, num_or_sections, dim=-1, name=None):
will be named automatically.
Returns:
List: The list of segmented tensor variables.
list(Variable): The list of segmented tensor variables.
Examples:
.. code-block:: python
......@@ -2611,32 +3114,33 @@ def l2_normalize(x, axis, epsilon=1e-12, name=None):
norm. For a 1-D tensor (`dim` is fixed to 0), this layer computes
.. math::
y = \frac{x}{ \sqrt{\sum {x^2} + epsion }}
y = \\frac{x}{ \sqrt{\sum {x^2} + epsion }}
For `x` with more dimensions, this layer independently normalizes each 1-D
slice along dimension `axis`.
Args:
x(Variable|list): The input tensor to l2_normalize layer.
axis(int): The axis on which to apply normalization. If `axis < 0`,
axis(int): The axis on which to apply normalization. If `axis < 0`, \
the dimension to normalization is rank(X) + axis. -1 is the
last dimension.
epsilon(float): The epsilon value is used to avoid division by zero,
epsilon(float): The epsilon value is used to avoid division by zero, \
the defalut value is 1e-10.
name(str|None): A name for this layer(optional). If set None, the layer
name(str|None): A name for this layer(optional). If set None, the layer \
will be named automatically.
Returns:
Variable: The output tensor variable.
Variable: The output tensor variable is the same shape with `x`.
Examples:
.. code-block:: python
data = fluid.layers.data(name="data",
shape=(3, 17, 13),
dtype="float32")
normed = fluid.layers.l2_normalize(x=data, axis=1)
data = fluid.layers.data(name="data",
shape=(3, 17, 13),
dtype="float32")
normed = fluid.layers.l2_normalize(x=data, axis=1)
"""
if len(x.shape) == 1:
......@@ -2768,25 +3272,51 @@ def topk(input, k, name=None):
This operator is used to find values and indices of the k largest entries
for the last dimension.
If the input is a vector (rank=1), finds the k largest entries in the vector
If the input is a vector (1-D Tensor), finds the k largest entries in the vector
and outputs their values and indices as vectors. Thus values[j] is the j-th
largest entry in input, and its index is indices[j].
If the input is a Tensor with higher rank, this operator computes the top k
entries along the last dimension.
For example:
.. code-block:: text
If:
input = [[5, 4, 2, 3],
[9, 7, 10, 25],
[6, 2, 10, 1]]
k = 2
Then:
The first output:
values = [[5, 4],
[10, 25],
[6, 10]]
The second output:
indices = [[0, 1],
[2, 3],
[0, 2]]
Args:
input(Variable): The input variable which can be a vector or Tensor with
higher rank.
k(int): An integer value to specify the top k largest elements.
k(int): The number of top elements to look for along the last dimension
of input.
name(str|None): A name for this layer(optional). If set None, the layer
will be named automatically.
will be named automatically.
Default: None
Returns:
values(Variable): The k largest elements along each last dimensional
slice.
indices(Variable): The indices of values within the last dimension of
input.
Tuple[Variable]: A tuple with two elements. Each element is a Variable.
The first one is k largest elements along each last
dimensional slice. The second one is indices of values
within the last dimension of input.
Raises:
ValueError: If k < 1 or k is not less than the last dimension of input
Examples:
.. code-block:: python
......@@ -2794,7 +3324,7 @@ def topk(input, k, name=None):
top5_values, top5_indices = layers.topk(input, k=5)
"""
shape = input.shape
if k < 1 and k >= shape[-1]:
if k < 1 or k >= shape[-1]:
raise ValueError("k must be greater than 0 and less than %d." %
(shape[-1]))
......@@ -2812,8 +3342,7 @@ def topk(input, k, name=None):
return values, indices
def edit_distance(input, label, normalized=True, ignored_tokens=None,
name=None):
def edit_distance(input, label, normalized=True, ignored_tokens=None):
"""
EditDistance operator computes the edit distances between a batch of
hypothesis strings and their references. Edit distance, also called
......@@ -2827,21 +3356,21 @@ def edit_distance(input, label, normalized=True, ignored_tokens=None,
"kitten" -> "sitten" -> "sittin" -> "sitting"
Input(Hyps) is a LoDTensor consisting of all the hypothesis strings with
The input is a LoDTensor consisting of all the hypothesis strings with
the total number denoted by `batch_size`, and the separation is specified
by the LoD information. And the `batch_size` reference strings are arranged
in order in the same way in the LoDTensor Input(Refs).
in order in the same way in the input LoDTensor.
Output(Out) contains the `batch_size` results and each stands for the edit
The output contains the `batch_size` results and each stands for the edit
distance for a pair of strings respectively. If Attr(normalized) is true,
the edit distance will be divided by the length of reference string.
Args:
input(Variable): The indices for hypothesis strings.
label(Variable): The indices for reference strings.
normalized(bool): Indicated whether to normalize the edit distance by
normalized(bool, default True): Indicated whether to normalize the edit distance by
the length of reference string.
ignored_tokens(list of int): Tokens that should be removed before
ignored_tokens(list<int>, default None): Tokens that should be removed before
calculating edit distance.
name (str): The name of this layer. It is optional.
......@@ -2853,7 +3382,6 @@ def edit_distance(input, label, normalized=True, ignored_tokens=None,
x = fluid.layers.data(name='x', shape=[8], dtype='float32')
y = fluid.layers.data(name='y', shape=[7], dtype='float32')
cost = fluid.layers.edit_distance(input=x,label=y)
"""
helper = LayerHelper("edit_distance", **locals())
......@@ -2916,7 +3444,7 @@ def ctc_greedy_decoder(input, blank, name=None):
[0.2, 0.2, 0.1, 0.5],
[0.5, 0.1, 0.3, 0.1]]
input.lod = [[0, 4, 8]]
input.lod = [[4, 4]]
Then:
......@@ -2924,7 +3452,7 @@ def ctc_greedy_decoder(input, blank, name=None):
[1],
[3]]
output.lod = [[0, 2, 3]]
output.lod = [[2, 1]]
Args:
......@@ -2941,7 +3469,7 @@ def ctc_greedy_decoder(input, blank, name=None):
Returns:
Variable: CTC greedy decode result. If all the sequences in result were
empty, the result LoDTensor will be [-1] with LoD [[0]] and dims [1, 1].
empty, the result LoDTensor will be [-1] with LoD [[]] and dims [1, 1].
Examples:
.. code-block:: python
......@@ -2974,35 +3502,33 @@ def warpctc(input, label, blank=0, norm_by_times=False):
input tensor.
Args:
input(Variable): (LodTensor, default: LoDTensor<float>),
the unscaled probabilities of variable-length sequences,
which is a 2-D Tensor with LoD information.
It's shape is [Lp, num_classes + 1], where Lp is the sum of all input
sequences' length and num_classes is the true number of classes.
(not including the blank label).
label(Variable): (LodTensor, default: LoDTensor<int>), the ground truth
of variable-length sequence, which is a 2-D Tensor with LoD
information. It is of the shape [Lg, 1], where Lg is th sum of
all labels' length.
blank (int): default 0, the blank label index of Connectionist
Temporal Classification (CTC) loss, which is in the
half-opened interval [0, num_classes + 1).
norm_by_times (bool): default false, whether to normalize
the gradients by the number of time-step, which is also the
sequence's length. There is no need to normalize the gradients
if warpctc layer was follewed by a mean_op.
input (Variable): The unscaled probabilities of variable-length sequences,
which is a 2-D Tensor with LoD information.
It's shape is [Lp, num_classes + 1], where Lp is the sum of all input
sequences' length and num_classes is the true number of classes.
(not including the blank label).
label (Variable): The ground truth of variable-length sequence,
which is a 2-D Tensor with LoD information. It is of the shape [Lg, 1],
where Lg is th sum of all labels' length.
blank (int, default 0): The blank label index of Connectionist
Temporal Classification (CTC) loss, which is in the
half-opened interval [0, num_classes + 1).
norm_by_times(bool, default false): Whether to normalize the gradients
by the number of time-step, which is also the sequence's length.
There is no need to normalize the gradients if warpctc layer was
follewed by a mean_op.
Returns:
Variable: The Connectionist Temporal Classification (CTC) loss,
which is a 2-D Tensor of the shape [batch_size, 1].
Examples:
.. code-block:: python
y = layers.data(
name='y', shape=[11, 8], dtype='float32', lod_level=1)
y_predict = layers.data(
name='y_predict', shape=[11, 1], dtype='float32')
cost = layers.warpctc(input=y_predict, label=y)
label = fluid.layers.data(shape=[11, 8], dtype='float32', lod_level=1)
predict = fluid.layers.data(shape=[11, 1], dtype='float32')
cost = fluid.layers.warpctc(input=predict, label=label)
"""
helper = LayerHelper('warpctc', **locals())
......@@ -3032,16 +3558,20 @@ def sequence_reshape(input, new_dim):
x is a LoDTensor:
x.lod = [[0, 2, 6]]
x.data = [[1, 2], [3, 4],
[5, 6], [7, 8], [9, 10], [11, 12]]
x.data = [[1, 2], [3, 4],
[5, 6], [7, 8],
[9, 10], [11, 12]]
x.dims = [6, 2]
set new_dim = 4
then out is a LoDTensor:
out.lod = [[0, 1, 3]]
out.data = [[1, 2, 3, 4],
[5, 6, 7, 8], [9, 10, 11, 12]]
out.data = [[1, 2, 3, 4],
[5, 6, 7, 8],
[9, 10, 11, 12]]
out.dims = [3, 4]
Currently, only 1-level LoDTensor is supported and please make sure
......@@ -3049,19 +3579,19 @@ def sequence_reshape(input, new_dim):
no remainder for each sequence.
Args:
input (Variable): (LodTensor, default: LoDTensor<float>), a 2-D LoDTensor
with shape being [N, M] where M for dimension.
new_dim (int): New dimension which the input LoDTensor is reshaped to.
input (Variable): A 2-D LoDTensor with shape being [N, M] where M for dimension.
new_dim (int): New dimension that the input LoDTensor is reshaped to.
Returns:
Variable: Reshaped LoDTensor according to new dimension.
Examples:
.. code-block:: python
x = fluid.layers.data(name='x', shape=[5, 20],
dtype='float32', lod_level=1)
x_reshaped = layers.sequence_reshape(input=x, new_dim=10)
x = fluid.layers.data(shape=[5, 20], dtype='float32', lod_level=1)
x_reshaped = fluid.layers.sequence_reshape(input=x, new_dim=10)
"""
helper = LayerHelper('sequence_reshape', **locals())
out = helper.create_tmp_variable(helper.input_dtype())
......@@ -3091,13 +3621,41 @@ def nce(input,
input (Variable): input variable.
label (Variable): label.
num_total_classes (int):${num_total_classes_comment}
sample_weight (int): ${sample_weight_comment}
sample_weight (Variable|None): A Variable of shape [batch_size, 1]
storing a weight for each sample. The default weight for each
sample is 1.0.
param_attr (ParamAttr|None): attributes for parameter
bias_attr (ParamAttr|None): attributes for bias
num_neg_samples (int): ${num_neg_samples_comment}
Returns:
Variable: output of nce layer.
Variable: The output nce loss.
Examples:
.. code-block:: python
window_size = 5
words = []
for i in xrange(window_size):
words.append(layers.data(
name='word_{0}'.format(i), shape=[1], dtype='int64'))
dict_size = 10000
label_word = int(window_size / 2) + 1
embs = []
for i in xrange(window_size):
if i == label_word:
continue
emb = layers.embedding(input=words[i], size=[dict_size, 32],
param_attr='emb.w', is_sparse=True)
embs.append(emb)
embs = layers.concat(input=embs, axis=1)
loss = layers.nce(input=embs, label=words[label_word],
num_total_classes=dict_size, param_attr='nce.w',
bias_attr='nce.b')
"""
helper = LayerHelper('nce', **locals())
assert isinstance(input, Variable)
......@@ -3237,8 +3795,6 @@ def im2sequence(input, filter_size=1, stride=1, padding=0, name=None):
Examples:
As an example:
.. code-block:: text
Given:
......@@ -3280,9 +3836,9 @@ def im2sequence(input, filter_size=1, stride=1, padding=0, name=None):
output.dims = {8, 9}
output.lod = [[0, 4, 8]]
output.lod = [[4, 4]]
The simple usage is:
Examples:
.. code-block:: python
......@@ -3315,29 +3871,13 @@ def im2sequence(input, filter_size=1, stride=1, padding=0, name=None):
return out
@templatedoc()
def row_conv(input, future_context_size, param_attr=None, act=None):
"""Row Conv Operator. This layer will apply lookahead convolution to
**input**. The input variable should be a 2D LoDTensor with shape [T, D].
Parameters with shape [future_context_size + 1, D] will be created. The math
equation of row convolution is as follows:
.. math::
Out_{i} = \sum_{j = i} ^ {i + \\tau} X_{j} \odot W_{i - j}
In the above equation:
* :math:`Out_{i}`: The i-th row of output variable with shape [1, D].
* :math:`\\tau`: Future context size.
* :math:`X_{j}`: The j-th row of input variable with shape [1, D].
* :math:`W_{i-j}`: The (i-j)-th row of parameters with shape [1, D].
More details about row_conv please refer to the paper \
(http://www.cs.cmu.edu/~dyogatam/papers/wang+etal.iclrworkshop2016.pdf) and
the design document \
(https://github.com/PaddlePaddle/Paddle/issues/2228#issuecomment-303903645).
"""
${comment}
Args:
input (Variable): Input variable, a 2D LoDTensor with shape [T, D].
input (${x_type}): ${x_comment}.
future_context_size (int): Future context size. Please note, the shape
of convolution kernel is [future_context_size + 1, D].
param_attr (ParamAttr): Attributes of parameters, including
......@@ -3345,14 +3885,13 @@ def row_conv(input, future_context_size, param_attr=None, act=None):
act (str): Non-linear activation to be applied to output variable.
Returns:
Variable: The output tensor with same shape as input tensor.
${out_comment}.
Examples:
.. code-block:: python
x = fluid.layers.data(name='x', shape=[16],
dtype='float32', lod_level=1)
out = fluid.layers.row_conv(input=x, future_context_size=2)
>>> import paddle.fluid as fluid
>>> x = fluid.layers.data(name='x', shape=[16],
>>> dtype='float32', lod_level=1)
>>> out = fluid.layers.row_conv(input=x, future_context_size=2)
"""
helper = LayerHelper('row_conv', **locals())
dtype = helper.input_dtype()
......@@ -3368,42 +3907,23 @@ def row_conv(input, future_context_size, param_attr=None, act=None):
return helper.append_activation(out)
@templatedoc()
def multiplex(inputs, index):
"""
**Multiplex Layer**
Referring to the given index variable, this layer selects rows from the
input variables to construct a multiplex variable. Assuming that there are
:math:`m` input variables and :math:`I_i` represents the i-th input
variable and :math:`i` is in [0, :math:`m`). All input variables are
tensors with same shape [:math:`d_0`, :math:`d_1`, ..., :math:`d_R`].
Please note that rank of the input tensor should be at least 2. Each input
variable will be treated as a 2-D matrix with shape [:math:`M`, :math:`N`]
where :math:`M` for :math:`d_0` and :math:`N` for :math:`d_1` * :math:`d_2`
* ... * :math:`d_R`. Let :math:`I_i[j]` be the j-th row of the i-th input
variable. The given index variable should be a 2-D tensor with shape
[:math:`M`, 1]. Let `ID[i]` be the i-th index value of the index variable.
Then the output variable will be a tensor with shape [:math:`d_0`,
:math:`d_1`, ..., :math:`d_R`]. If we treat the output tensor as a 2-D
matrix with shape [:math:`M`, :math:`N`] and let :math:`O[i]` be the i-th
row of the matrix, then `O[i]` is equal to :math:`I_{ID[i]}[i]`.
${comment}
>>> import paddle.fluid as fluid
>>> x1 = fluid.layers.data(name='x1', shape=[4], dtype='float32')
>>> x2 = fluid.layers.data(name='x2', shape=[4], dtype='float32')
>>> index = fluid.layers.data(name='index', shape=[1], dtype='int32')
>>> out = fluid.layers.multiplex(inputs=[x1, x2], index=index)
Args:
inputs (list): A list of variables to gather from. All variables have the
same shape and the rank is at least 2.
index (Variable): Tensor<int32>, index variable which is a 2-D tensor
with shape [M, 1] where M is the batch size.
inputs (list): ${x_comment}.
index (${ids_type}): ${ids_comment}.
Returns:
Variable: Multiplex variable gathered from input variables.
Examples:
.. code-block:: python
x1 = fluid.layers.data(name='x1', shape=[4], dtype='float32')
x2 = fluid.layers.data(name='x2', shape=[4], dtype='float32')
index = fluid.layers.data(name='index', shape=[1], dtype='int32')
out = fluid.layers.multiplex(inputs=[x1, x2], index=index)
${out_comment}.
"""
helper = LayerHelper('multiplex', **locals())
......@@ -3489,31 +4009,30 @@ def softmax_with_cross_entropy(logits, label, soft_label=False):
def smooth_l1(x, y, inside_weight=None, outside_weight=None, sigma=None):
"""
**Smooth L1 Loss Operator. **
This operator computes the smooth L1 loss for X and Y.
The operator takes the first dimension of X and Y as batch size.
This layer computes the smooth L1 loss for Variable :attr:`x` and :attr:`y`.
It takes the first dimension of :attr:`x` and :attr:`y` as batch size.
For each instance, it computes the smooth L1 loss element by element first
and then sums all the losses. So the shape of Out is [batch_size, 1].
and then sums all the losses. So the shape of ouput Variable is
[batch_size, 1].
Args:
x (Variable): A tensor with rank at least 2. The input value of smooth
L1 loss op with shape [batch_size, dim1, ..., dimN].
y (Variable): A tensor with rank at least 2. The target value of smooth
L1 loss op with same shape as x.
L1 loss op with same shape as :attr:`x`.
inside_weight (Variable|None): A tensor with rank at least 2. This
input is optional and should have same shape with x. If provided,
the result of (x - y) will be multiplied by this tensor element by
element.
input is optional and should have same shape with :attr:`x`. If
provided, the result of (:attr:`x` - :attr:`y`) will be multiplied
by this tensor element by element.
outside_weight (Variable|None): A tensor with rank at least 2. This
input is optional and should have same shape with x. If provided,
the out smooth L1 loss will be multiplied by this tensor element
by element.
sigma (float|None): Hyper parameter of smooth L1 loss op. A float scalar
with default value 1.0.
input is optional and should have same shape with :attr:`x`. If
provided, the out smooth L1 loss will be multiplied by this tensor
element by element.
sigma (float|None): Hyper parameter of smooth L1 loss layer. A float
scalar with default value 1.0.
Returns:
Variable: A tensor with rank be 2. The output smooth L1 loss with
shape [batch_size, 1].
Variable: The output smooth L1 loss with shape [batch_size, 1].
Examples:
.. code-block:: python
......@@ -3524,6 +4043,7 @@ def smooth_l1(x, y, inside_weight=None, outside_weight=None, sigma=None):
fc = fluid.layers.fc(input=data, size=100)
out = fluid.layers.smooth_l1(x=fc, y=label)
"""
helper = LayerHelper('smooth_l1_loss', **locals())
diff = helper.create_tmp_variable(dtype=x.dtype)
loss = helper.create_tmp_variable(dtype=x.dtype)
......@@ -3543,32 +4063,20 @@ def smooth_l1(x, y, inside_weight=None, outside_weight=None, sigma=None):
def one_hot(input, depth):
"""
One Hot Operator. This operator creates the one-hot representations for input
index values. The following example will help to explain the function of this
operator.
This layer creates the one-hot representations for input indices.
Args:
input(variable): A Tensor/LodTensor of indices, last dimension must be 1.
depth(scalar): an interger defining the depth of the one hot dimension.
input(Variable): Input indices, last dimension must be 1.
depth(scalar): An interger defining the depth of the one-hot dimension.
Returns:
The one-hot tensor or LodTensor, same as input.
Variable: The one-hot representations of input.
Examples:
.. code-block:: python
X is a LoDTensor:
X.lod = [[0, 1, 4]]
X.shape = [4, 1]
X.data = [[1], [1], [3], [0]]
set depth = 4
Out is a LoDTensor:
Out.lod = [[0, 1, 4]]
Out.shape = [4, 4]
Out.data = [[0., 1., 0., 0.],
[0., 1., 0., 0.],
[0., 0., 0., 1.],
[1., 0., 0., 0.]]
label = layers.data(name="label", shape=[1], dtype="float32")
one_hot_label = layers.one_hot(input=label, depth=10)
"""
helper = LayerHelper("one_hot", **locals())
one_hot_out = helper.create_tmp_variable(dtype='float32')
......@@ -3719,73 +4227,74 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=True, name=None):
def lod_reset(x, y=None, target_lod=None):
"""
LoD Reset Operator. Set LoD of **x** to a new one specified by **y** or
**target_lod**. When **y** provided, **y.lod** would be considered as target
LoD first, otherwise **y.data** would be considered as target LoD. If **y**
is not provided, target LoD should be specified by **target_lod**.
If target LoD is specified by **Y.data** or **target_lod**, only one level
LoD is supported.
Set LoD of :attr:`x` to a new one specified by :attr:`y` or
:attr:`target_lod`. When :attr:`y` provided, :attr:`y.lod` would be
considered as target LoD first, otherwise :attr:`y.data` would be
considered as target LoD. If :attr:`y` is not provided, target LoD should
be specified by :attr:`target_lod`. If target LoD is specified by
:attr:`Y.data` or :attr:`target_lod`, only one level LoD is supported.
.. code-block:: text
* Example 1:
Given a 1-level LoDTensor x:
x.lod = [[ 0, 2, 5 6 ]]
x.lod = [[ 2, 3, 1 ]]
x.data = [[1.0], [2.0], [3.0], [4.0], [5.0], [6.0]]
x.dims = [6, 1]
target_lod: [0, 4, 6]
target_lod: [4, 2]
then we get a 1-level LoDTensor:
out.lod = [[ 0, 4, 6 ]]
out.lod = [[4, 2]]
out.data = [[1.0], [2.0], [3.0], [4.0], [5.0], [6.0]]
out.dims = [6, 1]
* Example 2:
Given a 1-level LoDTensor x:
x.lod = [[ 0, 2, 5 6 ]]
x.lod = [[2, 3, 1]]
x.data = [[1.0], [2.0], [3.0], [4.0], [5.0], [6.0]]
x.dims = [6, 1]
y is a Tensor:
y.data = [[0, 2, 6]]
y.data = [[2, 4]]
y.dims = [1, 3]
then we get a 1-level LoDTensor:
out.lod = [[ 0, 2, 6 ]]
out.lod = [[2, 4]]
out.data = [[1.0], [2.0], [3.0], [4.0], [5.0], [6.0]]
out.dims = [6, 1]
* Example 3:
Given a 1-level LoDTensor x:
x.lod = [[ 0, 2, 5 6 ]]
x.lod = [[2, 3, 1]]
x.data = [[1.0], [2.0], [3.0], [4.0], [5.0], [6.0]]
x.dims = [6, 1]
y is a 2-level LoDTensor:
y.lod = [[0, 2, 4], [0, 2, 5, 6]]
y.lod = [[2, 2], [2, 2, 1, 1]]
y.data = [[1.1], [2.1], [3.1], [4.1], [5.1], [6.1]]
y.dims = [6, 1]
then we get a 2-level LoDTensor:
out.lod = [[0, 2, 4], [0, 2, 5, 6]]
out.lod = [[2, 2], [2, 2, 1, 1]]
out.data = [[1.0], [2.0], [3.0], [4.0], [5.0], [6.0]]
out.dims = [6, 1]
Args:
x (Variable): Input variable which could be a Tensor or LodTensor.
y (Variable|None): If provided, output's LoD would be derived from y.
y (Variable|None): If provided, output's LoD would be derived
from :attr:`y`.
target_lod (list|tuple|None): One level LoD which should be considered
as target LoD when y not provided.
as target LoD when :attr:`y` not provided.
Returns:
Variable: Output variable with LoD specified by this operator.
Variable: Output variable with LoD specified by this layer.
Raises:
ValueError: If y and target_lod are both None.
ValueError: If :attr:`y` and :attr:`target_lod` are both None.
Examples:
.. code-block:: python
......@@ -3821,9 +4330,7 @@ def lrn(input, n=5, k=1.0, alpha=1e-4, beta=0.75, name=None):
.. math::
Output(i, x, y) = Input(i, x, y) / \left(
k + \alpha \sum\limits^{\min(C, c + n/2)}_{j = \max(0, c - n/2)}
(Input(j, x, y))^2 \right)^{\beta}
Output(i, x, y) = Input(i, x, y) / \\left(k + \\alpha \\sum\\limits^{\\min(C, c + n/2)}_{j = \\max(0, c - n/2)}(Input(j, x, y))^2\\right)^{\\beta}
In the above equation:
......@@ -4254,6 +4761,7 @@ def gather(input, index):
output (Variable): The output is a tensor with the same rank as input.
Examples:
.. code-block:: python
output = fluid.layers.gather(x, index)
......@@ -4274,10 +4782,6 @@ def random_crop(x, shape, seed=None):
"""
${comment}
Examples:
>>> img = fluid.layers.data("img", [3, 256, 256])
>>> cropped_img = fluid.layers.random_crop(img, shape=[3, 224, 224])
Args:
x(${x_type}): ${x_comment}
shape(${shape_type}): ${shape_comment}
......@@ -4286,7 +4790,10 @@ def random_crop(x, shape, seed=None):
Returns:
${out_comment}
Examples:
>>> img = fluid.layers.data("img", [3, 256, 256])
>>> cropped_img = fluid.layers.random_crop(img, shape=[3, 224, 224])
"""
helper = LayerHelper("random_crop", **locals())
dtype = helper.input_dtype()
......@@ -4312,9 +4819,115 @@ def random_crop(x, shape, seed=None):
seed_out = helper.create_tmp_variable(dtype="int64")
helper.append_op(
type="random_crop",
inputs={"X": input,
inputs={"X": x,
"Seed": seed},
outputs={"Out": out,
"SeedOut": seed_out},
attrs={"shape": shape})
return out
def log(x):
"""
Calculates the natural log of the given input tensor, element-wise.
.. math::
Out = \\ln(x)
Args:
x (Variable): Input tensor.
Returns:
Variable: The natural log of the input tensor computed element-wise.
Examples:
.. code-block:: python
output = fluid.layers.log(x)
"""
helper = LayerHelper('log', **locals())
dtype = helper.input_dtype()
out = helper.create_tmp_variable(dtype)
helper.append_op(type="log", inputs={"X": input}, outputs={"Out": out})
return out
def relu(x):
"""
Relu takes one input data (Tensor) and produces one output data (Tensor)
where the rectified linear function, y = max(0, x), is applied to
the tensor elementwise.
.. math::
Out = \\max(0, x)
Args:
x (Variable): The input tensor.
Returns:
Variable: The output tensor with the same shape as input.
Examples:
.. code-block:: python
output = fluid.layers.relu(x)
"""
helper = LayerHelper('relu', **locals())
dtype = helper.input_dtype()
out = helper.create_tmp_variable(dtype)
helper.append_op(type="relu", inputs={"X": input}, outputs={"Out": out})
return out
def mean_iou(input, label, num_classes):
"""
Mean Intersection-Over-Union is a common evaluation metric for
semantic image segmentation, which first computes the IOU for each
semantic class and then computes the average over classes.
IOU is defined as follows:
.. math::
IOU = \\frac{true\_positiv}{(true\_positive + false\_positive + false\_negative)}.
The predictions are accumulated in a confusion matrix and mean-IOU
is then calculated from it.
Args:
input (Variable): A Tensor of prediction results for semantic labels with type int32 or int64.
label (Variable): A Tensor of ground truth labels with type int32 or int64.
Its shape should be the same as input.
num_classes (int): The possible number of labels.
Returns:
mean_iou (Variable): A Tensor representing the mean intersection-over-union with shape [1].
out_wrong(Variable): A Tensor with shape [num_classes]. The wrong numbers of each class.
out_correct(Variable): A Tensor with shape [num_classes]. The correct numbers of each class.
Examples:
.. code-block:: python
iou, wrongs, corrects = fluid.layers.mean_iou(predict, label, num_classes)
"""
helper = LayerHelper('mean_iou', **locals())
dtype = helper.input_dtype()
out_mean_iou = helper.create_tmp_variable(dtype='float32')
out_wrong = helper.create_tmp_variable(dtype='int32')
out_correct = helper.create_tmp_variable(dtype='int32')
helper.append_op(
type="mean_iou",
inputs={"predictions": input,
"labels": label},
outputs={
"out_mean_iou": out_mean_iou,
"out_wrong": out_wrong,
"out_correct": out_correct
},
attrs={"num_classes": num_classes})
return out_mean_iou, out_wrong, out_correct
......@@ -17,7 +17,6 @@ __activations__ = [
'sigmoid',
'logsigmoid',
'exp',
'relu',
'tanh',
'tanh_shrink',
'softshrink',
......@@ -29,7 +28,6 @@ __activations__ = [
'sin',
'round',
'reciprocal',
'log',
'square',
'softplus',
'softsign',
......@@ -40,8 +38,6 @@ __activations__ = [
'relu6',
'pow',
'stanh',
'hard_shrink',
'thresholded_relu',
'hard_sigmoid',
'swish',
]
......@@ -64,11 +60,9 @@ __all__ = [
'logical_or',
'logical_xor',
'logical_not',
'uniform_random',
'uniform_random_batch_size_like',
'gaussian_random',
'gaussian_random_batch_size_like',
'cumsum',
'scatter',
'sum',
'slice',
......@@ -80,3 +74,88 @@ __all__ = [
for _OP in set(__all__):
globals()[_OP] = generate_layer_fn(_OP)
__all__ += ["uniform_random"]
_uniform_random_ = generate_layer_fn('uniform_random')
def uniform_random(shape, dtype=None, min=None, max=None, seed=None):
kwargs = dict()
for name in locals():
val = locals()[name]
if val is not None:
kwargs[name] = val
return _uniform_random_(**kwargs)
uniform_random.__doc__ = _uniform_random_.__doc__ + """
Examples:
>>> result = fluid.layers.uniform_random(shape=[32, 784])
"""
__all__ += ['hard_shrink']
_hard_shrink_ = generate_layer_fn('hard_shrink')
def hard_shrink(x, threshold=None):
kwargs = dict()
for name in locals():
val = locals()[name]
if val is not None:
kwargs[name] = val
return _hard_shrink_(**kwargs)
hard_shrink.__doc__ = _hard_shrink_.__doc__ + """
Examples:
>>> data = fluid.layers.data(name="input", shape=[784])
>>> result = fluid.layers.hard_shrink(x=data, threshold=0.3)
"""
__all__ += ['cumsum']
_cum_sum_ = generate_layer_fn('cumsum')
def cumsum(x, axis=None, exclusive=None, reverse=None):
kwargs = dict()
for name in locals():
val = locals()[name]
if val is not None:
kwargs[name] = val
return _cum_sum_(**kwargs)
cumsum.__doc__ = _cum_sum_.__doc__ + """
Examples:
>>> data = fluid.layers.data(name="input", shape=[32, 784])
>>> result = fluid.layers.cumsum(data, axis=0)
"""
__all__ += ['thresholded_relu']
_thresholded_relu_ = generate_layer_fn('thresholded_relu')
def thresholded_relu(x, threshold=None):
kwargs = dict()
for name in locals():
val = locals()[name]
if val is not None:
kwargs[name] = val
_thresholded_relu_(**kwargs)
thresholded_relu.__doc__ = _thresholded_relu_.__doc__ + """
Examples:
>>> data = fluid.layers.data(name="input", shape=[1])
>>> result = fluid.layers.thresholded_relu(data, threshold=0.4)
"""
......@@ -6,7 +6,7 @@
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# Unlessf required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
......@@ -51,7 +51,12 @@ def create_parameter(shape,
is_bias=False,
default_initializer=None):
"""
Create a parameter
Create a parameter. The parameter is a learnable variable, which can have
gradient, and can be optimized.
NOTE: this is a very low-level API. This API is useful when you create
operator by your self. instead of using layers.
Args:
shape(list[int]): shape of the parameter
dtype(string): element type of the parameter
......@@ -63,7 +68,12 @@ def create_parameter(shape,
default_initializer(Initializer): initializer for the parameter
Returns:
Parameter: the created parameter
the created parameter.
Examples:
>>> W = fluid.layers.create_parameter(shape=[784, 200], dtype='float32')
>>> data = fluid.layers.data(name="img", shape=[64, 784], append_batch_size=False)
>>> hidden = fluid.layers.matmul(x=data, y=W)
"""
helper = LayerHelper("create_parameter", **locals())
if attr is None:
......@@ -79,16 +89,29 @@ def create_global_var(shape,
force_cpu=False,
name=None):
"""
Create a global variable. such as global_step
Create a new variable in the global block(block 0).
Args:
shape(list[int]): shape of the variable
value(float): the value of the variable
dtype(string): element type of the parameter
persistable(bool): if this variable is persistable
force_cpu(bool): force this variable to be on CPU
value(float): the value of the variable. The new created
variable will be filled with it.
dtype(string): data type of the variable
persistable(bool): if this variable is persistable.
Default: False
force_cpu(bool): force this variable to be on CPU.
Default: False
name(str|None): The name of the variable. If set to None the variable
name will be generated automatically.
Default: None
Returns:
Variable: the created Variable
Examples:
.. code-block:: python
var = fluid.create_global_var(shape=[2,3], value=1.0, dtype='float32',
persistable=True, force_cpu=True, name='new_var')
"""
helper = LayerHelper("global_var", **locals())
var = helper.create_global_variable(
......@@ -101,8 +124,21 @@ def create_global_var(shape,
def cast(x, dtype):
"""
This function takes in the input with input_dtype
and casts it to the output_dtype as the output.
This layer takes in the Variable :attr:`x` with :attr:`x.dtype` and casts
it to the output with :attr:`dtype`.
Args:
x (Variable): The input Variable for casting.
dtype(np.dtype|core.VarDesc.VarType|str): Data type of the output Variable.
Returns:
Variable: The output Variable after casting.
Examples:
.. code-block:: python
data = fluid.layers.data(name='x', shape=[13], dtype='float32')
result = fluid.layers.cast(x=data, dtype='float64')
"""
helper = LayerHelper('cast', **locals())
out = helper.create_tmp_variable(dtype=dtype)
......@@ -133,7 +169,8 @@ def concat(input, axis=0, name=None):
Examples:
.. code-block:: python
out = fluid.layers.concat(input=[Efirst, Esecond, Ethird, Efourth])
out = fluid.layers.concat(input=[Efirst, Esecond, Ethird, Efourth])
"""
helper = LayerHelper('concat', **locals())
out = helper.create_tmp_variable(dtype=helper.input_dtype())
......@@ -146,19 +183,21 @@ def concat(input, axis=0, name=None):
def sums(input, out=None):
"""This function performs the sum operation on the input and returns the
"""
This function performs the sum operation on the input and returns the
result as the output.
Args:
input (Variable|list): The input tensor that has the elements
that need to be summed up.
out (Variable|None): Output parameter. The sum result.
Default: None
Returns:
Variable: The tensor type variable that has the sum of input
written to it.
Variable: the sum of input. The same as the argument 'out'
Examples:
.. code-block::python
.. code-block:: python
tmp = fluid.layers.zeros(shape=[10], dtype='int32')
i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=10)
......@@ -191,6 +230,7 @@ def assign(input, output):
Examples:
.. code-block:: python
out = fluid.layers.create_tensor(dtype='float32')
hidden = fluid.layers.fc(input=data, size=10)
fluid.layers.assign(hidden, out)
......@@ -328,13 +368,13 @@ def argmin(x, axis=0):
x(Variable): The input to compute the indices of
the min elements.
axis(int): Axis to compute indices along.
Returns:
Variable: The tensor variable storing the output
Examples:
.. code-block:: python
out = fluid.layers.argmin(x=in, axis=0)
out = fluid.layers.argmin(x=in, axis=-1)
"""
......@@ -359,13 +399,13 @@ def argmax(x, axis=0):
x(Variable): The input to compute the indices of
the max elements.
axis(int): Axis to compute indices along.
Returns:
Variable: The tensor variable storing the output
Examples:
.. code-block:: python
out = fluid.layers.argmax(x=in, axis=0)
out = fluid.layers.argmax(x=in, axis=-1)
"""
......@@ -413,11 +453,12 @@ def zeros(shape, dtype, force_cpu=False):
It also sets *stop_gradient* to True.
Args:
shape(tuple|list|None): Shape of output tensor
dtype(np.dtype|core.VarDesc.VarType|str): Data type of output tensor
shape(tuple|list|None): Shape of output tensor.
dtype(np.dtype|core.VarDesc.VarType|str): Data type of output tensor.
force_cpu(bool, default False): Whether to make output stay on CPU.
Returns:
Variable: The tensor variable storing the output
Variable: The tensor variable storing the output.
Examples:
.. code-block:: python
......@@ -486,11 +527,27 @@ def save_combine(x, file_path, overwrite=True):
Saves a list of variables into a single file.
Args:
x(list): A list of Tensor/LoDTensor to be saved together in a single file.
x(list): A list of Tensor/LoDTensor variables to be saved together in
a single file.
file_path(str): The file path where variables will be saved.
overwrite(bool): Whether or not cover the given file when it has already
overwrite(bool): Whether or not cover the given file when it has already
existed. If it's set 'False' and the file is existed, a runtime
error will be thrown.
Returns:
There is no return value.
Examples:
.. code-block:: python
v1 = fluid.layers.data(name="data",
shape=(4, 6),
dtype="float32")
v2 = fluid.layers.data(name="data",
shape=(6, 8, 4),
dtype="float32")
normed = fluid.layers.save_combine([v1, v2], file_path="output")
"""
helper = LayerHelper("save_combine", **locals())
helper.append_op(
......
......@@ -18,80 +18,6 @@ import numpy as np
__all__ = ['create_lod_tensor', 'create_random_int_lodtensor']
def _validate_lod(lod, tensor_height=-1):
"""Check whether the input length-based lod info is valid.
There are several things to check:
1. lod should be a list of lists. Empty list is fine.
2. The length of each sublist (a lod level) should be at least one.
3. Each element in each lod level should be an integer greater than 0.
4. The sum of one lod level should be equal to the length of the next lod level.
5. The sum of the last lod level should be equal to the tensor height.
Bypass this check if user does not provide tensor_height as input.
Args:
lod: the length-based lod info, e.g., [[2, 3], [2, 1, 2, 3, 4]].
tensor_height: the outermost dimension of the tensor with which the input
lod is associated with.
Returns:
A boolean indicating whether the input lod is valid or not.
"""
assert isinstance(lod, list), "lod should be a list"
# Empty lod is fine
if len(lod) == 0:
return True
lod_sum = []
for level in lod:
assert isinstance(level, list), "each item in lod should be a list"
# Each level of lod should have at least one length info
if len(level) < 1:
return False
level_sum = 0
for lod_len in level:
# Each length in a level should be > 0
if lod_len <= 0:
return False
level_sum += lod_len
lod_sum.append(level_sum)
for idx, val in enumerate(lod_sum[:-1]):
# Each level's sum should be equal to
# the number of items in the next level
if val != len(lod[idx + 1]):
return False
if tensor_height == -1:
return True
else:
# Last level's sum should be equal to the tensor height
return lod_sum[-1] == tensor_height
def _convert_lod(lod):
"""Convert a length-based lod to a offset-based lod.
If the length-based lod is [[2, 3], [2, 1, 2, 3, 4]],
then the offset-based lod is [[0, 2, 5], [0, 2, 3, 5, 8, 12]].
Args:
lod: a length-based lod info.
Returns:
A list of lists as the offset-based lod converted to from the input lod.
"""
new_lod = []
for level in lod:
cur_len = 0
new_level = [cur_len]
for lod_len in level:
cur_len += lod_len
new_level.append(cur_len)
new_lod.append(new_level)
return new_lod
def create_lod_tensor(data, lod, place):
"""Create a lod tensor from a numpy array, a list, or an existing lod tensor.
......@@ -139,11 +65,11 @@ def create_lod_tensor(data, lod, place):
flattened_data = flattened_data.reshape([len(flattened_data), 1])
return create_lod_tensor(flattened_data, lod, place)
elif isinstance(data, np.ndarray):
assert _validate_lod(lod,
data.shape[0]), "the provided lod info is invalid"
tensor = core.LoDTensor()
tensor.set(data, place)
tensor.set_lod(_convert_lod(lod))
tensor.set_recursive_sequence_lengths(lod)
assert tensor.has_valid_recursive_sequence_lengths(
), "the provided lod info is invalid"
return tensor
else:
raise TypeError(
......@@ -181,9 +107,8 @@ def create_random_int_lodtensor(lod, base_shape, place, low, high):
A fluid LoDTensor object with tensor data and lod info.
"""
assert isinstance(base_shape, list), "base_shape should be a list"
converted_lod = _convert_lod(lod)
# append the total number of basic elements to the front of its shape
overall_shape = [converted_lod[-1][-1]] + base_shape
overall_shape = [sum(lod[-1])] + base_shape
# the range of integer data elements is [low, high]
data = np.random.random_integers(low, high, overall_shape).astype("int64")
return create_lod_tensor(data, lod, place)
......@@ -13,7 +13,7 @@
# limitations under the License.
import re
from collections import defaultdict
from paddle.fluid.framework import Program
from paddle.fluid.framework import Program, Variable
import framework
import layers
from backward import append_backward
......@@ -41,7 +41,10 @@ class Optimizer(object):
but need to use one of it's implementation.
"""
def __init__(self, learning_rate, regularization=None):
def __init__(self,
learning_rate,
regularization=None,
LARS_weight_decay=0.0):
if not isinstance(learning_rate, float) and \
not isinstance(learning_rate, framework.Variable):
raise TypeError("learning rate should be float or Variable")
......@@ -61,6 +64,7 @@ class Optimizer(object):
# {accum_name : { paramter_name : accumulator_for_parameter, ...}, ...}
self._accumulators = defaultdict(lambda: dict())
self.helper = None
self._LARS_weight_decay = LARS_weight_decay
def _create_global_learning_rate(self):
lr = self.global_learning_rate()
......@@ -100,10 +104,15 @@ class Optimizer(object):
# create learning rate variable for every parameter
param = param_and_grad[0]
param_lr = param.optimize_attr['learning_rate']
if param_lr == 1.0:
return self.global_learning_rate()
if type(param_lr) == Variable:
# param learning rate has been updated (LARS)
print("returns updated param lr ", param_lr)
return param_lr
else:
return self.global_learning_rate() * param_lr
if param_lr == 1.0:
return self.global_learning_rate()
else:
return self.global_learning_rate() * param_lr
def _create_accumulators(self, block, parameters):
"""Create all accumulators needed by the parameters
......@@ -210,6 +219,10 @@ class Optimizer(object):
self._create_accumulators(loss.block,
[p[0] for p in parameters_and_grads])
self._create_global_learning_rate()
if self._LARS_weight_decay > 0.0:
layers.append_LARS(parameters_and_grads,
self.global_learning_rate(),
self._LARS_weight_decay)
optimize_ops = []
for param_and_grad in parameters_and_grads:
......
......@@ -76,8 +76,7 @@ def db_lstm(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark,
emb_layers.append(mark_embedding)
hidden_0_layers = [
fluid.layers.fc(input=emb, size=hidden_dim, act='tanh')
for emb in emb_layers
fluid.layers.fc(input=emb, size=hidden_dim) for emb in emb_layers
]
hidden_0 = fluid.layers.sums(input=hidden_0_layers)
......@@ -94,8 +93,8 @@ def db_lstm(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark,
for i in range(1, depth):
mix_hidden = fluid.layers.sums(input=[
fluid.layers.fc(input=input_tmp[0], size=hidden_dim, act='tanh'),
fluid.layers.fc(input=input_tmp[1], size=hidden_dim, act='tanh')
fluid.layers.fc(input=input_tmp[0], size=hidden_dim),
fluid.layers.fc(input=input_tmp[1], size=hidden_dim)
])
lstm = fluid.layers.dynamic_lstm(
......
......@@ -94,7 +94,7 @@ def train(nn_type,
test_program = fluid.default_main_program().clone(for_test=True)
optimizer = fluid.optimizer.Adam(learning_rate=0.001)
optimizer = fluid.optimizer.Adam(learning_rate=0.001, LARS_weight_decay=0.3)
optimizer.minimize(avg_loss)
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
......
......@@ -22,12 +22,11 @@ class TestDataFeeder(unittest.TestCase):
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
feeder = fluid.DataFeeder([img, label], fluid.CPUPlace())
result = feeder.feed([([0] * 784, [9]), ([1] * 784, [1])])
print(result)
self.assertEqual(result['image'].shape(), [2, 1, 28, 28])
self.assertEqual(result['label'].shape(), [2, 1])
self.assertEqual(result['image'].lod(), [])
self.assertEqual(result['label'].lod(), [])
self.assertEqual(result['image'].recursive_sequence_lengths(), [])
self.assertEqual(result['label'].recursive_sequence_lengths(), [])
def test_lod_level_1_converter(self):
# lod_level = 1
......@@ -42,12 +41,12 @@ class TestDataFeeder(unittest.TestCase):
# label = [1] * len(data)
result = feeder.feed(
[([1, 2, 3], [1]), ([4, 5], [1]), ([6, 7, 8, 9], [1])])
print(result)
self.assertEqual(result['sentences'].shape(), [9, 1])
self.assertEqual(result['label'].shape(), [3, 1])
self.assertEqual(result['sentences'].lod(), [[0, 3, 5, 9]])
self.assertEqual(result['label'].lod(), [])
self.assertEqual(result['sentences'].recursive_sequence_lengths(),
[[3, 2, 4]])
self.assertEqual(result['label'].recursive_sequence_lengths(), [])
def test_lod_level_2_converter(self):
# lod_level = 2
......@@ -62,12 +61,12 @@ class TestDataFeeder(unittest.TestCase):
# label = [1] * len(data)
result = feeder.feed(
[([[1, 2, 3], [4, 5]], [1]), ([[6, 7, 8, 9]], [1])])
print(result)
self.assertEqual(result['paragraphs'].shape(), [9, 1])
self.assertEqual(result['label'].shape(), [2, 1])
self.assertEqual(result['paragraphs'].lod(), [[0, 2, 3], [0, 3, 5, 9]])
self.assertEqual(result['label'].lod(), [])
self.assertEqual(result['paragraphs'].recursive_sequence_lengths(),
[[2, 1], [3, 2, 4]])
self.assertEqual(result['label'].recursive_sequence_lengths(), [])
if __name__ == '__main__':
......
......@@ -13,44 +13,41 @@
# limitations under the License.
import paddle.fluid as fluid
from paddle.fluid.lod_tensor import create_lod_tensor, create_random_int_lodtensor, _validate_lod, _convert_lod
import numpy
from paddle.fluid.lod_tensor import create_lod_tensor, create_random_int_lodtensor
import numpy as np
import unittest
class TestLoDTensor(unittest.TestCase):
def test_validate_lod(self):
lod = (1, 2, 1)
self.assertRaises(AssertionError, _validate_lod, lod, -1)
lod = [[1, 2], (2, 3)]
self.assertRaises(AssertionError, _validate_lod, lod, -1)
lod = [1, 2, 3]
self.assertRaises(AssertionError, _validate_lod, lod, -1)
def test_pybind_lod(self):
tensor = fluid.LoDTensor()
lod = []
self.assertTrue(_validate_lod(lod, -1))
tensor.set_recursive_sequence_lengths(lod)
lod = [[], [1], [3]]
self.assertFalse(_validate_lod(lod, -1))
lod = [[0], [-1], [3]]
self.assertFalse(_validate_lod(lod, -1))
self.assertRaises(Exception, tensor.set_recursive_sequence_lengths, lod)
lod = [[0], [2], [3]]
self.assertRaises(Exception, tensor.set_recursive_sequence_lengths, lod)
# Each level's sum should be equal to the number of items in the next level
# Moreover, last level's sum should be equal to the tensor height
lod = [[2, 3], [1, 3, 1, 2, 1]]
self.assertTrue(_validate_lod(lod, tensor_height=8))
lod = [[1, 3], [2, 1, 3]]
self.assertFalse(_validate_lod(lod, tensor_height=6))
lod = [[1, 3], [2, 1, 3, 4]]
self.assertFalse(_validate_lod(lod, tensor_height=5))
def test_convert_lod(self):
lod = [[1, 2, 3]]
converted_lod = [[0, 1, 3, 6]]
self.assertEqual(_convert_lod(lod), converted_lod)
tensor.set_recursive_sequence_lengths(lod)
self.assertEqual(tensor.recursive_sequence_lengths(), lod)
tensor.set(np.random.random([6, 1]), fluid.CPUPlace())
self.assertTrue(tensor.has_valid_recursive_sequence_lengths())
tensor.set(np.random.random([9, 1]), fluid.CPUPlace())
self.assertFalse(tensor.has_valid_recursive_sequence_lengths())
# Each level's sum should be equal to the number of items in the next level
# Moreover, last level's sum should be equal to the tensor height
lod = [[2, 3], [1, 3, 1, 2, 2]]
tensor.set_recursive_sequence_lengths(lod)
self.assertEqual(tensor.recursive_sequence_lengths(), lod)
tensor.set(np.random.random([8, 1]), fluid.CPUPlace())
self.assertFalse(tensor.has_valid_recursive_sequence_lengths())
lod = [[2, 3], [1, 3, 1, 2, 1]]
converted_lod = [[0, 2, 5], [0, 1, 4, 5, 7, 8]]
self.assertEqual(_convert_lod(lod), converted_lod)
tensor.set_recursive_sequence_lengths(lod)
self.assertTrue(tensor.has_valid_recursive_sequence_lengths())
tensor.set(np.random.random([9, 1]), fluid.CPUPlace())
self.assertFalse(tensor.has_valid_recursive_sequence_lengths())
def test_create_lod_tensor(self):
# Create LoDTensor from a list
......@@ -60,19 +57,19 @@ class TestLoDTensor(unittest.TestCase):
self.assertRaises(AssertionError, create_lod_tensor, data, wrong_lod,
fluid.CPUPlace())
tensor = create_lod_tensor(data, correct_lod, fluid.CPUPlace())
self.assertEqual(tensor.lod(), [[0, 3, 5]])
self.assertEqual(tensor.recursive_sequence_lengths(), correct_lod)
# Create LoDTensor from numpy array
data = numpy.random.random([10, 1])
data = np.random.random([10, 1])
lod = [[2, 1], [3, 3, 4]]
tensor = create_lod_tensor(data, lod, fluid.CPUPlace())
self.assertEqual(tensor.lod(), [[0, 2, 3], [0, 3, 6, 10]])
self.assertEqual(tensor.recursive_sequence_lengths(), lod)
# Create LoDTensor from another LoDTensor, they are differnt instances
new_lod = [[2, 2, 1], [1, 2, 2, 3, 2]]
new_tensor = create_lod_tensor(tensor, new_lod, fluid.CPUPlace())
self.assertEqual(tensor.lod(), [[0, 2, 3], [0, 3, 6, 10]])
self.assertEqual(new_tensor.lod(), [[0, 2, 4, 5], [0, 1, 3, 5, 8, 10]])
self.assertEqual(tensor.recursive_sequence_lengths(), lod)
self.assertEqual(new_tensor.recursive_sequence_lengths(), new_lod)
def test_create_random_int_lodtensor(self):
# The shape of a word, commonly used in speech and NLP problem, is [1]
......@@ -83,7 +80,7 @@ class TestLoDTensor(unittest.TestCase):
high = dict_size - 1
tensor = create_random_int_lodtensor(lod, shape,
fluid.CPUPlace(), low, high)
self.assertEqual(tensor.lod(), [[0, 2, 5, 10]])
self.assertEqual(tensor.recursive_sequence_lengths(), lod)
self.assertEqual(tensor.shape(), [10, 1])
......
......@@ -41,8 +41,8 @@ function(py_test_modules TARGET_NAME)
endfunction()
list(REMOVE_ITEM TEST_OPS test_warpctc_op)
list(REMOVE_ITEM TEST_OPS test_dist_train)
#list(REMOVE_ITEM TEST_OPS test_parallel_executor_crf)
#list(REMOVE_ITEM TEST_OPS test_parallel_executor_fetch_feed)
list(REMOVE_ITEM TEST_OPS test_parallel_executor_crf)
list(REMOVE_ITEM TEST_OPS test_parallel_executor_fetch_feed)
# TODO(wuyi): this test hungs on CI, will add it back later
list(REMOVE_ITEM TEST_OPS test_listen_and_serv_op)
foreach(TEST_OP ${TEST_OPS})
......@@ -50,3 +50,5 @@ foreach(TEST_OP ${TEST_OPS})
endforeach(TEST_OP)
py_test_modules(test_warpctc_op MODULES test_warpctc_op ENVS FLAGS_warpctc_dir=${WARPCTC_LIB_DIR} SERIAL)
py_test_modules(test_dist_train MODULES test_dist_train SERIAL)
py_test_modules(test_parallel_executor_crf MODULES test_parallel_executor_crf SERIAL)
py_test_modules(test_parallel_executor_fetch_feed MODULES test_parallel_executor_fetch_feed SERIAL)
......@@ -162,7 +162,7 @@ class OpTest(unittest.TestCase):
tensor = core.LoDTensor()
if isinstance(np_value, tuple):
tensor.set(np_value[0], place)
tensor.set_lod(np_value[1])
tensor.set_recursive_sequence_lengths(np_value[1])
else:
tensor.set(np_value, place)
feed_map[name] = tensor
......@@ -170,7 +170,8 @@ class OpTest(unittest.TestCase):
tensor = core.LoDTensor()
if isinstance(self.inputs[var_name], tuple):
tensor.set(self.inputs[var_name][0], place)
tensor.set_lod(self.inputs[var_name][1])
tensor.set_recursive_sequence_lengths(self.inputs[var_name][
1])
else:
tensor.set(self.inputs[var_name], place)
feed_map[var_name] = tensor
......@@ -293,7 +294,8 @@ class OpTest(unittest.TestCase):
str(place))
if isinstance(expect, tuple):
self.assertListEqual(
actual.lod(), expect[1], "Output (" + sub_out_name +
actual.recursive_sequence_lengths(), expect[1],
"Output (" + sub_out_name +
") has different lod at " + str(place))
else:
idx = find_actual(out_name, fetch_list)
......@@ -307,8 +309,8 @@ class OpTest(unittest.TestCase):
"Output (" + out_name + ") has diff at " + str(place) +
str(actual_t) + "\n" + str(expect_t))
if isinstance(expect, tuple):
self.assertListEqual(actual.lod(), expect[1],
"Output (" + out_name +
self.assertListEqual(actual.recursive_sequence_lengths(),
expect[1], "Output (" + out_name +
") has different lod at " + str(place))
def _get_places(self):
......@@ -408,7 +410,7 @@ class OpTest(unittest.TestCase):
tensor = core.LoDTensor()
tensor.set(np_value, place)
if lod is not None:
tensor.set_lod(lod)
tensor.set_recursive_sequence_lengths(lod)
return tensor
@staticmethod
......
......@@ -128,7 +128,7 @@ def create_or_get_tensor(scope, var_name, var, place):
tensor = scope.var(var_name).get_tensor()
if var is not None:
assert isinstance(var, np.ndarray)
tensor.set_lod([[]])
tensor.set_recursive_sequence_lengths([])
tensor.set_dims(var.shape)
tensor.set(var, place)
return tensor
......
......@@ -26,36 +26,36 @@ class TestBeamSearchDecodeOp(unittest.TestCase):
def append_lod_tensor(self, tensor_array, lod, data):
lod_tensor = core.LoDTensor()
lod_tensor.set_lod(lod)
lod_tensor.set_recursive_sequence_lengths(lod)
lod_tensor.set(data, self.place)
tensor_array.append(lod_tensor)
def test_get_set(self):
ids = self.scope.var("ids").get_lod_tensor_array()
self.append_lod_tensor(
ids, [[0, 3, 6], [0, 1, 2, 3, 4, 5, 6]],
ids, [[3, 3], [1, 1, 1, 1, 1, 1]],
np.array(
[1, 2, 3, 4, 5, 6], dtype="int64"))
self.append_lod_tensor(
ids, [[0, 3, 6], [0, 1, 1, 3, 5, 5, 6]],
ids, [[3, 3], [1, 0, 2, 2, 0, 1]],
np.array(
[0, 1, 2, 3, 4, 5], dtype="int64"))
self.append_lod_tensor(
ids, [[0, 3, 6], [0, 0, 1, 2, 3, 4, 5]],
ids, [[3, 3], [0, 1, 1, 1, 1, 1]],
np.array(
[0, 1, 2, 3, 4], dtype="int64"))
scores = self.scope.var("scores").get_lod_tensor_array()
self.append_lod_tensor(
scores, [[0, 3, 6], [0, 1, 2, 3, 4, 5, 6]],
scores, [[3, 3], [1, 1, 1, 1, 1, 1]],
np.array(
[1, 2, 3, 4, 5, 6], dtype="float64"))
self.append_lod_tensor(
scores, [[0, 3, 6], [0, 1, 1, 3, 5, 5, 6]],
scores, [[3, 3], [1, 0, 2, 2, 0, 1]],
np.array(
[0, 1, 2, 3, 4, 5], dtype="float64"))
self.append_lod_tensor(
scores, [[0, 3, 6], [0, 0, 1, 2, 3, 4, 5]],
scores, [[3, 3], [0, 1, 1, 1, 1, 1]],
np.array(
[0, 1, 2, 3, 4], dtype="float64"))
......@@ -73,9 +73,11 @@ class TestBeamSearchDecodeOp(unittest.TestCase):
beam_search_decode_op.run(self.scope, self.place)
expected_lod = [[0, 4, 8], [0, 1, 3, 6, 9, 10, 13, 16, 19]]
self.assertEqual(sentence_ids.lod(), expected_lod)
self.assertEqual(sentence_scores.lod(), expected_lod)
expected_lod = [[4, 4], [1, 2, 3, 3, 1, 3, 3, 3]]
self.assertEqual(sentence_ids.recursive_sequence_lengths(),
expected_lod)
self.assertEqual(sentence_scores.recursive_sequence_lengths(),
expected_lod)
expected_data = np.array(
[2, 1, 0, 3, 1, 0, 3, 2, 1, 5, 4, 3, 2, 4, 4, 3, 6, 5, 4], "int64")
......
......@@ -48,18 +48,18 @@ class BeamSearchOpTester(unittest.TestCase):
op.run(self.scope, core.CPUPlace())
selected_ids = self.scope.find_var("selected_ids").get_tensor()
print 'selected_ids', np.array(selected_ids)
print 'lod', selected_ids.lod()
print 'lod', selected_ids.recursive_sequence_lengths()
def _create_pre_ids(self):
np_data = np.array([[1, 2, 3, 4]], dtype='int64')
tensor = create_tensor(self.scope, "pre_ids", np_data)
def _create_ids(self):
self.lod = [[0, 1, 4], [0, 1, 2, 3, 4]]
self.lod = [[1, 3], [1, 1, 1, 1]]
np_data = np.array(
[[4, 2, 5], [2, 1, 3], [3, 5, 2], [8, 2, 1]], dtype='int64')
tensor = create_tensor(self.scope, "ids", np_data)
tensor.set_lod(self.lod)
tensor.set_recursive_sequence_lengths(self.lod)
def _create_scores(self):
np_data = np.array(
......@@ -71,7 +71,7 @@ class BeamSearchOpTester(unittest.TestCase):
],
dtype='float32')
tensor = create_tensor(self.scope, "scores", np_data)
tensor.set_lod(self.lod)
tensor.set_recursive_sequence_lengths(self.lod)
if __name__ == '__main__':
......
......@@ -65,23 +65,25 @@ def batch_bipartite_match(distance, lod, match_type=None, dist_threshold=None):
distance (numpy.array) : The distance of two entries with shape [M, N].
lod (list of int): The offsets of each input in this batch.
"""
n = len(lod) - 1
n = len(lod)
m = distance.shape[1]
match_indices = -1 * np.ones((n, m), dtype=np.int)
match_dist = np.zeros((n, m), dtype=np.float32)
for i in range(len(lod) - 1):
bipartite_match(distance[lod[i]:lod[i + 1], :], match_indices[i, :],
match_dist[i, :])
cur_offset = 0
for i in range(n):
bipartite_match(distance[cur_offset:(cur_offset + lod[i]), :],
match_indices[i, :], match_dist[i, :])
if match_type == 'per_prediction':
argmax_match(distance[lod[i]:lod[i + 1], :], match_indices[i, :],
match_dist[i, :], dist_threshold)
argmax_match(distance[cur_offset:(cur_offset + lod[i]), :],
match_indices[i, :], match_dist[i, :], dist_threshold)
cur_offset += lod[i]
return match_indices, match_dist
class TestBipartiteMatchOpWithLoD(OpTest):
def setUp(self):
self.op_type = 'bipartite_match'
lod = [[0, 5, 11, 23]]
lod = [[5, 6, 12]]
dist = np.random.random((23, 217)).astype('float32')
match_indices, match_dist = batch_bipartite_match(dist, lod[0])
......@@ -98,7 +100,7 @@ class TestBipartiteMatchOpWithLoD(OpTest):
class TestBipartiteMatchOpWithoutLoD(OpTest):
def setUp(self):
self.op_type = 'bipartite_match'
lod = [[0, 8]]
lod = [[8]]
dist = np.random.random((8, 17)).astype('float32')
match_indices, match_dist = batch_bipartite_match(dist, lod[0])
......@@ -115,7 +117,7 @@ class TestBipartiteMatchOpWithoutLoD(OpTest):
class TestBipartiteMatchOpWithPerPredictionType(OpTest):
def setUp(self):
self.op_type = 'bipartite_match'
lod = [[0, 5, 11, 23]]
lod = [[5, 6, 12]]
dist = np.random.random((23, 237)).astype('float32')
match_indices, match_dist = batch_bipartite_match(dist, lod[0],
'per_prediction', 0.5)
......
......@@ -81,15 +81,19 @@ def batch_box_coder(prior_box, prior_box_var, target_box, lod, code_type,
n = target_box.shape[0]
m = prior_box.shape[0]
output_box = np.zeros((n, m, 4), dtype=np.float32)
for i in range(len(lod) - 1):
cur_offset = 0
for i in range(len(lod)):
if (code_type == "EncodeCenterSize"):
box_coder(target_box[lod[i]:lod[i + 1], :], prior_box,
prior_box_var, output_box[lod[i]:lod[i + 1], :, :],
box_coder(target_box[cur_offset:(cur_offset + lod[i]), :],
prior_box, prior_box_var,
output_box[cur_offset:(cur_offset + lod[i]), :, :],
code_type, box_normalized)
elif (code_type == "DecodeCenterSize"):
box_coder(target_box[lod[i]:lod[i + 1], :, :], prior_box,
prior_box_var, output_box[lod[i]:lod[i + 1], :, :],
box_coder(target_box[cur_offset:(cur_offset + lod[i]), :, :],
prior_box, prior_box_var,
output_box[cur_offset:(cur_offset + lod[i]), :, :],
code_type, box_normalized)
cur_offset += lod[i]
return output_box
......@@ -99,7 +103,7 @@ class TestBoxCoderOp(OpTest):
def setUp(self):
self.op_type = "box_coder"
lod = [[0, 1, 2, 3, 4, 5]]
lod = [[1, 1, 1, 1, 1]]
prior_box = np.random.random((10, 4)).astype('float32')
prior_box_var = np.random.random((10, 4)).astype('float32')
target_box = np.random.random((5, 10, 4)).astype('float32')
......@@ -152,7 +156,7 @@ class TestBoxCoderOpWithLoD(OpTest):
def setUp(self):
self.op_type = "box_coder"
lod = [[0, 4, 12, 20]]
lod = [[4, 8, 8]]
prior_box = np.random.random((10, 4)).astype('float32')
prior_box_var = np.random.random((10, 4)).astype('float32')
target_box = np.random.random((20, 4)).astype('float32')
......
......@@ -144,10 +144,10 @@ class TestChunkEvalOp(OpTest):
starts = sorted(starts)
self.num_correct_chunks, self.num_infer_chunks, self.num_label_chunks = self.gen_chunks(
infer, label, starts)
self.inputs = {
'Inference': (infer, [starts]),
'Label': (label, [starts])
}
lod = []
for i in range(len(starts) - 1):
lod.append(starts[i + 1] - starts[i])
self.inputs = {'Inference': (infer, [lod]), 'Label': (label, [lod])}
precision = float(
self.num_correct_chunks
) / self.num_infer_chunks if self.num_infer_chunks else 0
......
......@@ -22,9 +22,9 @@ from op_test import OpTest
class CRFDecoding(object):
def __init__(self, emission_weights, transition_weights,
seq_start_positions):
assert (emission_weights.shape[0] == seq_start_positions[-1])
assert (emission_weights.shape[0] == sum(seq_start_positions))
self.tag_num = emission_weights.shape[1]
self.seq_num = len(seq_start_positions) - 1
self.seq_num = len(seq_start_positions)
self.seq_start_positions = seq_start_positions
self.x = emission_weights
......@@ -34,9 +34,9 @@ class CRFDecoding(object):
self.w = transition_weights[2:, :]
self.track = np.zeros(
(seq_start_positions[-1], self.tag_num), dtype="int64")
(sum(seq_start_positions), self.tag_num), dtype="int64")
self.decoded_path = np.zeros(
(seq_start_positions[-1], 1), dtype="int64")
(sum(seq_start_positions), 1), dtype="int64")
def _decode_one_sequence(self, decoded_path, x):
seq_len, tag_num = x.shape
......@@ -71,9 +71,11 @@ class CRFDecoding(object):
decoded_path[i - 1] = max_idx = track[i, max_idx]
def decode(self):
cur_pos = 0
for i in range(self.seq_num):
start = self.seq_start_positions[i]
end = self.seq_start_positions[i + 1]
start = cur_pos
cur_pos += self.seq_start_positions[i]
end = cur_pos
self._decode_one_sequence(self.decoded_path[start:end, :],
self.x[start:end, :])
return self.decoded_path
......@@ -90,11 +92,13 @@ class TestCRFDecodingOp1(OpTest):
TAG_NUM = 17
MAX_SEQ_LEN = 10
lod = [[0]]
lod = [[]]
total_len = 0
for i in range(SEQ_NUM):
lod[-1].append(lod[-1][-1] + random.randint(1, MAX_SEQ_LEN))
lod[-1].append(random.randint(1, MAX_SEQ_LEN))
total_len += lod[-1][-1]
emission = np.random.uniform(-1, 1,
[lod[-1][-1], TAG_NUM]).astype("float64")
[total_len, TAG_NUM]).astype("float64")
transition = np.random.uniform(-0.5, 0.5,
[TAG_NUM + 2, TAG_NUM]).astype("float64")
......@@ -126,7 +130,8 @@ class TestCRFDecodingOp2(OpTest):
self.op_type = "crf_decoding"
TAG_NUM = 5
lod = [[0, 1, 3, 6, 10]]
lod = [[1, 2, 3, 4]]
total_len = sum(lod[-1])
transition = np.repeat(
np.arange(
TAG_NUM, dtype="float64").reshape(1, TAG_NUM),
......@@ -135,13 +140,13 @@ class TestCRFDecodingOp2(OpTest):
emission = np.repeat(
np.arange(
TAG_NUM, dtype="float64").reshape(1, TAG_NUM),
lod[-1][-1],
total_len,
axis=0)
labels = np.random.randint(
low=0, high=TAG_NUM, size=(lod[-1][-1], 1), dtype="int64")
low=0, high=TAG_NUM, size=(total_len, 1), dtype="int64")
predicted_labels = np.ones(
(lod[-1][-1], 1), dtype="int64") * (TAG_NUM - 1)
(total_len, 1), dtype="int64") * (TAG_NUM - 1)
expected_output = (labels == predicted_labels).astype("int64")
self.inputs = {
......
......@@ -22,14 +22,16 @@ from test_softmax_op import stable_softmax
def CTCAlign(input, lod, blank, merge_repeated):
lod0 = lod[0]
result = []
for i in range(len(lod0) - 1):
cur_offset = 0
for i in range(len(lod0)):
prev_token = -1
for j in range(lod0[i], lod0[i + 1]):
for j in range(cur_offset, cur_offset + lod0[i]):
token = input[j][0]
if (token != blank) and not (merge_repeated and
token == prev_token):
result.append(token)
prev_token = token
cur_offset += lod0[i]
result = np.array(result).reshape([len(result), 1]).astype("int32")
if len(result) == 0:
result = np.array([-1])
......@@ -39,7 +41,7 @@ def CTCAlign(input, lod, blank, merge_repeated):
class TestCTCAlignOp(OpTest):
def config(self):
self.op_type = "ctc_align"
self.input_lod = [[0, 11, 18]]
self.input_lod = [[11, 7]]
self.blank = 0
self.merge_repeated = False
self.input = np.array(
......@@ -66,7 +68,7 @@ class TestCTCAlignOp(OpTest):
class TestCTCAlignOpCase1(TestCTCAlignOp):
def config(self):
self.op_type = "ctc_align"
self.input_lod = [[0, 11, 19]]
self.input_lod = [[11, 8]]
self.blank = 0
self.merge_repeated = True
self.input = np.array(
......@@ -77,7 +79,7 @@ class TestCTCAlignOpCase1(TestCTCAlignOp):
class TestCTCAlignOpCase2(TestCTCAlignOp):
def config(self):
self.op_type = "ctc_align"
self.input_lod = [[0, 4]]
self.input_lod = [[4]]
self.blank = 0
self.merge_repeated = True
self.input = np.array([0, 0, 0, 0]).reshape([4, 1]).astype("int32")
......
......@@ -74,13 +74,13 @@ class TestDetectionMAPOp(OpTest):
self.evaluate_difficult = True
self.ap_type = "integral"
self.label_lod = [[0, 2, 4]]
self.label_lod = [[2, 2]]
# label difficult xmin ymin xmax ymax
self.label = [[1, 0, 0.1, 0.1, 0.3, 0.3], [1, 1, 0.6, 0.6, 0.8, 0.8],
[2, 0, 0.3, 0.3, 0.6, 0.5], [1, 0, 0.7, 0.1, 0.9, 0.3]]
# label score xmin ymin xmax ymax difficult
self.detect_lod = [[0, 3, 7]]
self.detect_lod = [[3, 4]]
self.detect = [
[1, 0.3, 0.1, 0.0, 0.4, 0.3], [1, 0.7, 0.0, 0.1, 0.2, 0.3],
[1, 0.9, 0.7, 0.6, 0.8, 0.8], [2, 0.8, 0.2, 0.1, 0.4, 0.4],
......@@ -89,7 +89,7 @@ class TestDetectionMAPOp(OpTest):
]
# label score true_pos false_pos
self.tf_pos_lod = [[0, 3, 7]]
self.tf_pos_lod = [[3, 4]]
self.tf_pos = [[1, 0.9, 1, 0], [1, 0.7, 1, 0], [1, 0.3, 0, 1],
[1, 0.2, 1, 0], [2, 0.8, 0, 1], [2, 0.1, 1, 0],
[3, 0.2, 0, 1]]
......@@ -112,15 +112,19 @@ class TestDetectionMAPOp(OpTest):
for i, count in enumerate(class_pos_count):
class_pos_count_dict[i] = count
for i in range(len(true_pos_lod[0]) - 1):
start = true_pos_lod[0][i]
end = true_pos_lod[0][i + 1]
cur_pos = 0
for i in range(len(true_pos_lod[0])):
start = cur_pos
cur_pos += true_pos_lod[0][i]
end = cur_pos
for j in range(start, end):
true_pos_dict[i].append(true_pos[j])
for i in range(len(false_pos_lod[0]) - 1):
start = false_pos_lod[0][i]
end = false_pos_lod[0][i + 1]
cur_pos = 0
for i in range(len(false_pos_lod[0])):
start = cur_pos
cur_pos += false_pos_lod[0][i]
end = cur_pos
for j in range(start, end):
false_pos_dict[i].append(false_pos[j])
......@@ -130,19 +134,19 @@ class TestDetectionMAPOp(OpTest):
label_number = self.class_num
out_class_pos_count = []
out_true_pos_lod = [0]
out_true_pos_lod = []
out_true_pos = []
out_false_pos_lod = [0]
out_false_pos_lod = []
out_false_pos = []
for i in range(label_number):
out_class_pos_count.append([label_count[i]])
true_pos_list = true_pos[i]
out_true_pos += true_pos_list
out_true_pos_lod.append(len(out_true_pos))
out_true_pos_lod.append(len(true_pos_list))
false_pos_list = false_pos[i]
out_false_pos += false_pos_list
out_false_pos_lod.append(len(out_false_pos))
out_false_pos_lod.append(len(false_pos_list))
return out_class_pos_count, out_true_pos, [
out_true_pos_lod
......@@ -241,7 +245,7 @@ class TestDetectionMAPOpSkipDiff(TestDetectionMAPOp):
self.evaluate_difficult = False
self.tf_pos_lod = [[0, 2, 6]]
self.tf_pos_lod = [[2, 4]]
# label score true_pos false_pos
self.tf_pos = [[1, 0.7, 1, 0], [1, 0.3, 0, 1], [1, 0.2, 1, 0],
[2, 0.8, 0, 1], [2, 0.1, 1, 0], [3, 0.2, 0, 1]]
......@@ -267,9 +271,9 @@ class TestDetectionMAPOpMultiBatch(TestDetectionMAPOp):
def init_test_case(self):
super(TestDetectionMAPOpMultiBatch, self).init_test_case()
self.class_pos_count = [0, 2, 1]
self.true_pos_lod = [[0, 0, 3, 5]]
self.true_pos_lod = [[0, 3, 2]]
self.true_pos = [[0.7, 1.], [0.3, 0.], [0.2, 1.], [0.8, 0.], [0.1, 1.]]
self.false_pos_lod = [[0, 0, 3, 5]]
self.false_pos_lod = [[0, 3, 2]]
self.false_pos = [[0.7, 0.], [0.3, 1.], [0.2, 0.], [0.8, 1.], [0.1, 0.]]
......
......@@ -16,6 +16,7 @@ import os
import time
import unittest
from multiprocessing import Process
import signal
import numpy
......@@ -24,9 +25,6 @@ import paddle.fluid.layers as layers
class TestSendOp(unittest.TestCase):
@unittest.skip(
"This test is buggy. We cannot use time.sleep to sync processes, the connection may fail in unittest."
)
def test_send(self):
# Run init_serv in a thread
place = fluid.CPUPlace()
......@@ -35,7 +33,9 @@ class TestSendOp(unittest.TestCase):
p.daemon = True
p.start()
time.sleep(10)
self.ps_timeout = 5
self._wait_ps_ready(p.pid)
with open("/tmp/paddle.%d.port" % p.pid, "r") as fn:
selected_port = int(fn.readlines()[0])
self.init_client(place, selected_port)
......@@ -44,9 +44,23 @@ class TestSendOp(unittest.TestCase):
self.assertTrue(numpy.allclose(self.local_out, self.dist_out))
# FIXME(typhoonzero): find a way to gracefully shutdown the server.
os.system("kill -9 %d" % p.pid)
os.kill(p.pid, signal.SIGKILL)
p.join()
def _wait_ps_ready(self, pid):
start_left_time = self.ps_timeout
sleep_time = 0.5
while True:
assert start_left_time >= 0, "wait ps ready failed"
time.sleep(sleep_time)
try:
# the listen_and_serv_op would touch a file which contains the listen port
# on the /tmp directory until it was ready to process all the RPC call.
os.stat("/tmp/paddle.%d.port" % pid)
return
except os.error:
start_left_time -= sleep_time
def init_serv(self, place):
main = fluid.Program()
......@@ -84,7 +98,10 @@ class TestSendOp(unittest.TestCase):
dtype="float32",
persistable=False,
shape=[32, 32])
o = layers.Send("127.0.0.1:%d" % port, [x], [get_var])
fluid.initializer.Constant(value=2.3)(get_var, main.global_block())
layers.Send("127.0.0.1:%d" % port, [x])
o = layers.Recv("127.0.0.1:%d" % port, [get_var])
exe = fluid.Executor(place)
self.dist_out = exe.run(main, fetch_list=o) # o is a list
......
......@@ -136,16 +136,16 @@ class BaseRNN(object):
feed_dict = dict()
for iname in self.inputs:
lod = [0]
lod = []
np_flatten = []
for seq_id in xrange(len(self.inputs[iname])):
seq_len = len(self.inputs[iname][seq_id])
lod.append(lod[-1] + seq_len)
lod.append(seq_len)
np_flatten.extend(self.inputs[iname][seq_id])
t = fluid.Tensor()
t.set(numpy.array(np_flatten), place)
t.set_lod([lod])
t.set_recursive_sequence_lengths([lod])
feed_dict[iname] = t
for pname in self.params:
......
......@@ -39,20 +39,20 @@ class TestDyRnnStaticInput(unittest.TestCase):
def prepare_x_tensor(self):
self.x_tensor_dim = 10
lod = [[0, 2, 3, 6]]
shape = [lod[0][-1], self.x_tensor_dim]
lod = [[2, 1, 3]]
shape = [sum(lod[0]), self.x_tensor_dim]
self.x_tensor_data = np.random.random(shape).astype('float32')
self.x_tensor = core.LoDTensor()
self.x_tensor.set_lod(lod)
self.x_tensor.set_recursive_sequence_lengths(lod)
self.x_tensor.set(self.x_tensor_data, self.place)
def prepare_static_input_tensor(self):
self.static_input_tensor_dim = 4
lod = [[0, 1, 3, 6]]
shape = [lod[0][-1], self.static_input_tensor_dim]
lod = [[1, 2, 3]]
shape = [sum(lod[0]), self.static_input_tensor_dim]
self.static_input_data = np.random.random(shape).astype('float32')
self.static_input_tensor = core.LoDTensor()
self.static_input_tensor.set_lod(lod)
self.static_input_tensor.set_recursive_sequence_lengths(lod)
self.static_input_tensor.set(self.static_input_data, self.place)
def fetch_value(self, var):
......@@ -69,7 +69,7 @@ class TestDyRnnStaticInput(unittest.TestCase):
ndarray = np.zeros(shape=dims).astype('float32')
for i in xrange(np.product(dims)):
ndarray.ravel()[i] = lod_tensor.get_float_element(i)
return ndarray, lod_tensor.lod()
return ndarray, lod_tensor.recursive_sequence_lengths()
def build_graph(self, only_forward=False):
x_tensor = fluid.layers.data(
......@@ -131,21 +131,20 @@ class TestDyRnnStaticInput(unittest.TestCase):
framework.grad_var_name('static_input_tensor'))
return static_input_grad, loss
def get_seq_len_from_lod(self, lod):
return [lod[0][i + 1] - lod[0][i] for i in xrange(len(lod[0]) - 1)]
def get_expected_static_step_outs(self):
x_lod = self.x_tensor.lod()
x_seq_len = self.get_seq_len_from_lod(x_lod)
x_lod = self.x_tensor.recursive_sequence_lengths()
x_seq_len = x_lod[0]
x_seq_len_sorted = sorted(x_seq_len)
x_sorted_indices = np.argsort(x_seq_len)[::-1]
static_lod = self.static_input_tensor.lod()
static_sliced = [
self.static_input_data[static_lod[0][i]:static_lod[0][i + 1]]
for i in xrange(len(static_lod[0]) - 1)
]
static_seq_len = self.get_seq_len_from_lod(static_lod)
static_lod = self.static_input_tensor.recursive_sequence_lengths()
static_sliced = []
cur_offset = 0
for i in xrange(len(static_lod[0])):
static_sliced.append(self.static_input_data[cur_offset:(
cur_offset + static_lod[0][i])])
cur_offset += static_lod[0][i]
static_seq_len = static_lod[0]
static_reordered = []
for i in xrange(len(x_sorted_indices)):
static_reordered.extend(static_sliced[x_sorted_indices[i]].tolist())
......@@ -159,11 +158,13 @@ class TestDyRnnStaticInput(unittest.TestCase):
for i in xrange(self._max_sequence_len):
end = len(x_seq_len) - bisect.bisect_left(x_seq_len_sorted, i + 1)
lod = [0]
lod = []
total_len = 0
for i in xrange(end):
lod.append(static_seq_len_reordered[i] + lod[-1])
lod.append(static_seq_len_reordered[i])
total_len += lod[-1]
static_step_lods.append([lod])
end = lod[-1]
end = total_len
static_step_outs.append(
np.array(static_reordered[:end]).astype('float32'))
......@@ -199,7 +200,9 @@ class TestDyRnnStaticInput(unittest.TestCase):
self.static_input_tensor.set_float_element(i, origin)
numeric_gradients.ravel()[i] = (y_pos - y_neg) / self._delta / 2
self.assertTrue(np.allclose(actual_gradients, numeric_gradients, 0.001))
self.assertTrue(np.allclose(actual_lod, self.static_input_tensor.lod()))
self.assertTrue(
np.allclose(actual_lod,
self.static_input_tensor.recursive_sequence_lengths()))
if __name__ == '__main__':
......
......@@ -52,23 +52,29 @@ class TestEditDistanceOp(OpTest):
def setUp(self):
self.op_type = "edit_distance"
normalized = False
x1 = np.array([[0, 12, 3, 5, 8, 2]]).astype("int64")
x2 = np.array([[0, 12, 4, 7, 8]]).astype("int64")
x1 = np.array([[12, 3, 5, 8, 2]]).astype("int64")
x2 = np.array([[12, 4, 7, 8]]).astype("int64")
x1 = np.transpose(x1)
x2 = np.transpose(x2)
x1_lod = [0, 1, 5]
x2_lod = [0, 3, 4]
x1_lod = [1, 4]
x2_lod = [3, 1]
num_strs = len(x1_lod) - 1
num_strs = len(x1_lod)
distance = np.zeros((num_strs, 1)).astype("float32")
sequence_num = np.array(2).astype("int64")
x1_offset = 0
x2_offset = 0
for i in range(0, num_strs):
distance[i] = Levenshtein(
hyp=x1[x1_lod[i]:x1_lod[i + 1]],
ref=x2[x2_lod[i]:x2_lod[i + 1]])
hyp=x1[x1_offset:(x1_offset + x1_lod[i])],
ref=x2[x2_offset:(x2_offset + x2_lod[i])])
x1_offset += x1_lod[i]
x2_offset += x2_lod[i]
if normalized is True:
len_ref = x2_lod[i + 1] - x2_lod[i]
len_ref = x2_lod[i]
distance[i] = distance[i] / len_ref
self.attrs = {'normalized': normalized}
self.inputs = {'Hyps': (x1, [x1_lod]), 'Refs': (x2, [x2_lod])}
self.outputs = {'Out': distance, 'SequenceNum': sequence_num}
......@@ -81,23 +87,29 @@ class TestEditDistanceOpNormalized(OpTest):
def setUp(self):
self.op_type = "edit_distance"
normalized = True
x1 = np.array([[0, 10, 3, 6, 5, 8, 2]]).astype("int64")
x2 = np.array([[0, 10, 4, 6, 7, 8]]).astype("int64")
x1 = np.array([[10, 3, 6, 5, 8, 2]]).astype("int64")
x2 = np.array([[10, 4, 6, 7, 8]]).astype("int64")
x1 = np.transpose(x1)
x2 = np.transpose(x2)
x1_lod = [0, 1, 3, 6]
x2_lod = [0, 2, 3, 5]
x1_lod = [1, 2, 3]
x2_lod = [2, 1, 2]
num_strs = len(x1_lod) - 1
num_strs = len(x1_lod)
distance = np.zeros((num_strs, 1)).astype("float32")
sequence_num = np.array(3).astype("int64")
x1_offset = 0
x2_offset = 0
for i in range(0, num_strs):
distance[i] = Levenshtein(
hyp=x1[x1_lod[i]:x1_lod[i + 1]],
ref=x2[x2_lod[i]:x2_lod[i + 1]])
hyp=x1[x1_offset:(x1_offset + x1_lod[i])],
ref=x2[x2_offset:(x2_offset + x2_lod[i])])
x1_offset += x1_lod[i]
x2_offset += x2_lod[i]
if normalized is True:
len_ref = x2_lod[i + 1] - x2_lod[i]
len_ref = x2_lod[i]
distance[i] = distance[i] / len_ref
self.attrs = {'normalized': normalized}
self.inputs = {'Hyps': (x1, [x1_lod]), 'Refs': (x2, [x2_lod])}
self.outputs = {'Out': distance, 'SequenceNum': sequence_num}
......
......@@ -24,17 +24,16 @@ class TestFeedFetch(unittest.TestCase):
input_array = np.ones((4, 4, 6)).astype("float32")
input_array[0, 0, 0] = 3
input_array[3, 3, 5] = 10
input_tensor = core.LoDTensor([[0, 2, 4]])
input_tensor = core.LoDTensor([[2, 2]])
input_tensor.set(input_array, place)
core.set_feed_variable(scope, input_tensor, "feed", 0)
output_tensor = core.get_fetch_variable(scope, "feed", 0)
output_lod = output_tensor.lod()
self.assertEqual(0, output_lod[0][0])
output_lod = output_tensor.recursive_sequence_lengths()
self.assertEqual(2, output_lod[0][0])
self.assertEqual(2, output_lod[0][1])
self.assertEqual(4, output_lod[0][2])
output_array = np.array(output_tensor)
self.assertEqual(3, output_array[0, 0, 0])
......
......@@ -55,7 +55,7 @@ class TestFillConstantBatchSizeLikeWithLoDTensor(OpTest):
self.op_type = "fill_constant_batch_size_like"
self.inputs = {
'Input': (np.random.random((31, 28)).astype("float32"),
[[0, 9, 23, 31]])
[[9, 14, 8]])
}
self.attrs = {
'value': 3.5,
......
......@@ -20,8 +20,8 @@ from test_lstm_op import identity, sigmoid, tanh, relu
class TestGRUOp(OpTest):
lod = [[0, 2, 6, 9]]
batch_size = lod[0][-1]
lod = [[2, 4, 3]]
batch_size = sum(lod[0])
frame_size = 5
activate = {
'identity': identity,
......@@ -33,10 +33,10 @@ class TestGRUOp(OpTest):
@staticmethod
def seq_to_batch(lod, is_reverse):
idx_in_seq_list = []
seq_starts = lod[0]
seq_lens = []
for i in range(len(seq_starts) - 1):
seq_lens.append(seq_starts[i + 1] - seq_starts[i])
seq_lens = lod[0]
seq_starts = [0]
for i in range(len(seq_lens)):
seq_starts.append(seq_starts[-1] + seq_lens[i])
sorted_seqs = sorted(
range(len(seq_lens)), lambda x, y: seq_lens[y] - seq_lens[x])
num_batch = seq_lens[sorted_seqs[0]]
......
......@@ -364,5 +364,22 @@ class TestMSRAInitializer(unittest.TestCase):
self.assertEqual(init_op.attr('seed'), 134)
class TestMSRAInitializer(unittest.TestCase):
def test_bilinear_initializer(self):
"""Test the bilinear initializer with supplied arguments
"""
program = framework.Program()
block = program.global_block()
block.create_parameter(
dtype="float32",
shape=[8, 1, 3, 3],
lod_level=0,
name="param",
initializer=initializer.BilinearInitializer())
self.assertEqual(len(block.ops), 1)
init_op = block.ops[0]
self.assertEqual(init_op.type, 'assign_value')
if __name__ == '__main__':
unittest.main()
......@@ -58,8 +58,8 @@ class TestIOUSimilarityOpWithLoD(TestIOUSimilarityOp):
def setUp(self):
super(TestIOUSimilarityOpWithLoD, self).setUp()
self.boxes1_lod = [[0, 1, 2]]
self.output_lod = [[0, 1, 2]]
self.boxes1_lod = [[1, 1]]
self.output_lod = [[1, 1]]
self.inputs = {'X': (self.boxes1, self.boxes1_lod), 'Y': self.boxes2}
self.outputs = {'Out': (self.output, self.output_lod)}
......
......@@ -105,11 +105,13 @@ class TestLinearChainCrfOp(OpTest):
MAX_SEQ_LEN = 5
# the linear_chain_crf operator only supports sequence (LoD level = 1)
lod = [[0]]
lod = [[]]
seq_start_pos = [0]
for i in range(SEQ_NUM):
lod[-1].append(lod[-1][-1] + random.randint(1, MAX_SEQ_LEN))
emission = np.random.uniform(-1, 1,
[lod[-1][-1], TAG_NUM]).astype("float64")
lod[-1].append(random.randint(1, MAX_SEQ_LEN))
seq_start_pos.append(seq_start_pos[-1] + lod[-1][-1])
emission = np.random.uniform(
-1, 1, [seq_start_pos[-1], TAG_NUM]).astype("float64")
emission_row_max = np.amax(emission, axis=1, keepdims=True)
emission_exps = np.exp(emission - emission_row_max)
......@@ -118,14 +120,14 @@ class TestLinearChainCrfOp(OpTest):
transition_exps = np.exp(transition)
labels = np.random.randint(
low=0, high=TAG_NUM, size=(lod[-1][-1], 1), dtype="int64")
low=0, high=TAG_NUM, size=(seq_start_pos[-1], 1), dtype="int64")
self.inputs = {
"Emission": (emission, lod),
"Transition": transition,
"Label": (labels, lod)
}
crf = LinearChainCrfForward(lod[0], emission, emission_row_max,
crf = LinearChainCrfForward(seq_start_pos, emission, emission_row_max,
emission_exps, transition, transition_exps,
labels)
alpha, log_likelihood = crf.crf_forward_compute()
......
......@@ -57,17 +57,18 @@ class TestListenAndServOp(OpTest):
def setUp(self):
self.ps_timeout = 5
self.ip = "127.0.0.1"
self.port = "6173"
self.port = "0"
self.trainers = 1
self.trainer_id = 1
self.trainer_id = 0
def _start_pserver(self, use_cuda, sync_mode):
p = Process(
target=run_pserver,
args=(use_cuda, sync_mode, self.ip, self.port, self.trainers,
self.trainer_id))
p.daemon = True
p.start()
return p.pid
return p
def _wait_ps_ready(self, pid):
start_left_time = self.ps_timeout
......@@ -89,18 +90,20 @@ class TestListenAndServOp(OpTest):
def test_handle_signal_in_serv_op(self):
# run pserver on CPU in sync mode
pid = self._start_pserver(False, True)
self._wait_ps_ready(pid)
p1 = self._start_pserver(False, True)
self._wait_ps_ready(p1.pid)
# raise SIGTERM to pserver
os.kill(pid, signal.SIGTERM)
os.kill(p1.pid, signal.SIGKILL)
p1.join()
# run pserver on CPU in async mode
pid = self._start_pserver(False, False)
self._wait_ps_ready(pid)
p2 = self._start_pserver(False, False)
self._wait_ps_ready(p2.pid)
# raise SIGTERM to pserver
os.kill(pid, signal.SIGTERM)
os.kill(p2.pid, signal.SIGKILL)
p2.join()
if __name__ == '__main__':
......
......@@ -30,7 +30,8 @@ class TestLoDRankTable(unittest.TestCase):
tensor = core.LoDTensor()
tensor.set(numpy.random.random(size=(17, 100)), cpu)
tensor.set_lod([[0, 1, 3], [0, 5, 6, 7], [0, 3, 4, 9, 10, 13, 16, 17]])
tensor.set_recursive_sequence_lengths(
[[1, 2], [5, 1, 1], [3, 1, 5, 1, 3, 3, 1]])
exe.run(scope=scope, feed={'x': tensor})
var = scope.find_var(rank_table.name)
table = var.get_lod_rank_table()
......
......@@ -21,11 +21,15 @@ class TestLodResetOpByAttr(OpTest):
def setUp(self):
self.op_type = "lod_reset"
x = np.random.random((10, 20)).astype("float32")
lod = [[0, 3, 5, 10]]
target_lod_0 = [0, 7, 10]
lod = [[3, 2, 5]]
# target_offset_lod and target_lod are the same lod info represented
# in offset-based format and length-based format, respectively.
target_offset_lod = [0, 7, 10]
target_lod = [7, 3]
self.inputs = {'X': (x, lod)}
self.attrs = {'target_lod': target_lod_0}
self.outputs = {'Out': (x, [target_lod_0])}
# The `target_lod` attribute is still based on offset
self.attrs = {'target_lod': target_offset_lod}
self.outputs = {'Out': (x, [target_lod])}
def test_check_output(self):
self.check_output()
......@@ -38,13 +42,16 @@ class TestLodResetOpByInput(OpTest):
def setUp(self):
self.op_type = "lod_reset"
x = np.random.random((10, 20)).astype("float32")
lod = [[0, 3, 5, 10]]
target_lod_0 = [0, 4, 7, 10]
lod = [[3, 2, 5]]
# target_offset_lod and target_lod are the same lod info represented
# in offset-based format and length-based format, respectively.
target_offset_lod = [0, 4, 7, 10]
target_lod = [4, 3, 3]
self.inputs = {
'X': (x, lod),
'Y': np.array([target_lod_0]).astype('int32')
'Y': np.array([target_offset_lod]).astype('int32')
}
self.outputs = {'Out': (x, [target_lod_0])}
self.outputs = {'Out': (x, [target_lod])}
def test_check_output(self):
self.check_output()
......@@ -57,15 +64,16 @@ class TestLodResetOpBoth(OpTest):
def setUp(self):
self.op_type = "lod_reset"
x = np.random.random((10, 20)).astype("float32")
lod = [[0, 3, 5, 10]]
target_lod_0_attr = [0, 7, 10]
target_lod_0_in = [0, 4, 7, 10]
lod = [[3, 2, 5]]
target_offset_lod_attr = [0, 7, 10]
target_offset_lod_in = [0, 4, 7, 10]
target_lod_in = [4, 3, 3]
self.inputs = {
'X': (x, lod),
'Y': np.array(target_lod_0_in).astype('int32')
'Y': np.array(target_offset_lod_in).astype('int32')
}
self.attrs = {'target_lod': target_lod_0_attr}
self.outputs = {'Out': (x, [target_lod_0_in])}
self.attrs = {'target_lod': target_offset_lod_attr}
self.outputs = {'Out': (x, [target_lod_in])}
def test_check_output(self):
self.check_output()
......@@ -78,11 +86,11 @@ class TestLodResetOpYIsLoDTensor(OpTest):
def setUp(self):
self.op_type = "lod_reset"
x = np.random.random((10, 20)).astype("float32")
lod = [[0, 3, 5, 10]]
lod = [[3, 2, 5]]
y = np.random.random((10, 10)).astype("float32")
target_lod_0 = [[0, 4, 7, 10]]
self.inputs = {'X': (x, lod), 'Y': (y, target_lod_0)}
self.outputs = {'Out': (x, target_lod_0)}
target_lod = [[4, 3, 3]]
self.inputs = {'X': (x, lod), 'Y': (y, target_lod)}
self.outputs = {'Out': (x, target_lod)}
def test_check_output(self):
self.check_output()
......
......@@ -27,7 +27,7 @@ class TestLoDTensorArray(unittest.TestCase):
for i in xrange(10):
t = core.LoDTensor()
t.set(numpy.array([i], dtype='float32'), cpu)
t.set_lod([[0, 1]])
t.set_recursive_sequence_lengths([[1]])
tensor_array.append(t)
self.assertEqual(10, len(tensor_array))
......@@ -35,17 +35,17 @@ class TestLoDTensorArray(unittest.TestCase):
for i in xrange(10):
t = tensor_array[i]
self.assertEqual(numpy.array(t), numpy.array([i], dtype='float32'))
self.assertEqual([[0, 1]], t.lod())
self.assertEqual([[1]], t.recursive_sequence_lengths())
t = core.LoDTensor()
t.set(numpy.array([i + 10], dtype='float32'), cpu)
t.set_lod([[0, 2]])
t.set_recursive_sequence_lengths([[1]])
tensor_array[i] = t
t = tensor_array[i]
self.assertEqual(
numpy.array(t), numpy.array(
[i + 10], dtype='float32'))
self.assertEqual([[0, 2]], t.lod())
self.assertEqual([[1]], t.recursive_sequence_lengths())
if __name__ == '__main__':
......
......@@ -29,7 +29,7 @@ class TestCPULoDTensorArrayOps(unittest.TestCase):
tensor = core.LoDTensor()
tensor.set(
numpy.arange(10).reshape(10, 1).astype('int32'), self.place())
tensor.set_lod([[0, 3, 9, 10]])
tensor.set_recursive_sequence_lengths([[3, 6, 1]])
expect = map(lambda x: numpy.array(x).astype('int32'),
[[3, 0, 9], [4, 1], [5, 2], [6], [7], [8]])
self.main(
......@@ -42,7 +42,7 @@ class TestCPULoDTensorArrayOps(unittest.TestCase):
tensor = core.LoDTensor()
tensor.set(
numpy.arange(10).reshape(10, 1).astype('int32'), self.place())
tensor.set_lod([[0, 3, 9, 9, 10]])
tensor.set_recursive_sequence_lengths([[3, 6, 0, 1]])
expect = map(lambda x: numpy.array(x).astype('int32'),
[[3, 0, 9], [4, 1], [5, 2], [6], [7], [8]])
self.main(
......@@ -55,7 +55,7 @@ class TestCPULoDTensorArrayOps(unittest.TestCase):
tensor = core.LoDTensor()
tensor.set(
numpy.arange(20).reshape(20, 1).astype('int32'), self.place())
tensor.set_lod([[0, 2, 5], [0, 3, 9, 11, 17, 20]])
tensor.set_recursive_sequence_lengths([[2, 3], [3, 6, 2, 6, 3]])
expect = [
numpy.array(
......@@ -65,7 +65,7 @@ class TestCPULoDTensorArrayOps(unittest.TestCase):
[17, 18, 19], dtype='int32')
]
lod = [[[0, 2, 5]], [[0, 6, 12]], [[0, 3]]]
lod = [[[2, 3]], [[6, 6]], [[3]]]
self.main(
tensor=tensor,
expect_array=expect,
......@@ -77,8 +77,8 @@ class TestCPULoDTensorArrayOps(unittest.TestCase):
tensor.set(
numpy.arange(31).reshape(31, 1).astype('int32'), self.place())
tensor.set_lod([[0, 3, 5, 9, 11],
[0, 3, 7, 11, 11, 12, 17, 19, 21, 23, 30, 31]])
tensor.set_recursive_sequence_lengths(
[[3, 2, 4, 2], [3, 4, 4, 0, 1, 5, 2, 2, 2, 7, 1]])
expect = [
numpy.array(
......@@ -88,7 +88,7 @@ class TestCPULoDTensorArrayOps(unittest.TestCase):
], [17, 18, 3, 4, 5, 6, 11, 30], [19, 20, 7, 8, 9, 10], [21, 22]]
]
lod = [[[0, 5, 8, 8, 15]], [[0, 2, 6, 7, 8]], [[0, 2, 6]], [[0, 2]]]
lod = [[[5, 3, 0, 7]], [[2, 4, 1, 1]], [[2, 4]], [[2]]]
self.main(
tensor=tensor,
expect_array=expect,
......@@ -99,8 +99,9 @@ class TestCPULoDTensorArrayOps(unittest.TestCase):
tensor = core.LoDTensor()
tensor.set(
numpy.arange(50).reshape(50, 1).astype('int32'), self.place())
tensor.set_lod([[0, 2, 5, 6], [0, 2, 5, 6, 10, 12, 13],
[0, 3, 7, 11, 17, 21, 22, 23, 27, 31, 39, 45, 46, 50]])
tensor.set_recursive_sequence_lengths(
[[2, 3, 1], [2, 3, 1, 4, 2, 1],
[3, 4, 4, 6, 4, 1, 1, 4, 4, 8, 6, 1, 4]])
expect = [
numpy.array(
......@@ -108,8 +109,8 @@ class TestCPULoDTensorArrayOps(unittest.TestCase):
for item in [[21, 0, 1, 2, 3, 4, 5, 6, 46, 47, 48, 49], range(
22, 39) + range(7, 21), range(39, 46)]
]
lod = [[[0, 1, 3, 4], [0, 1, 4, 8, 12]],
[[0, 4, 7], [0, 1, 5, 9, 17, 21, 27, 31]], [[0, 2], [0, 6, 7]]]
lod = [[[1, 2, 1], [1, 3, 4, 4]], [[4, 3], [1, 4, 4, 8, 4, 6, 4]],
[[2], [6, 1]]]
self.main(
tensor=tensor,
expect_array=expect,
......@@ -120,8 +121,9 @@ class TestCPULoDTensorArrayOps(unittest.TestCase):
tensor = core.LoDTensor()
tensor.set(
numpy.arange(50).reshape(50, 1).astype('int32'), self.place())
tensor.set_lod([[0, 2, 5, 6], [0, 2, 5, 6, 10, 12, 13],
[0, 3, 7, 11, 17, 21, 22, 23, 27, 31, 39, 45, 46, 50]])
tensor.set_recursive_sequence_lengths(
[[2, 3, 1], [2, 3, 1, 4, 2, 1],
[3, 4, 4, 6, 4, 1, 1, 4, 4, 8, 6, 1, 4]])
self.main(
tensor=tensor,
expect_array=None,
......@@ -162,12 +164,13 @@ class TestCPULoDTensorArrayOps(unittest.TestCase):
exp_tensor, exp_lod = exp
exp_tensor = numpy.expand_dims(exp_tensor, axis=1)
self.assertTrue(numpy.allclose(exp_tensor, numpy.array(array[i])))
self.assertEqual(exp_lod, array[i].lod())
self.assertEqual(exp_lod, array[i].recursive_sequence_lengths())
def check_tensor_same(self, actual, expect):
self.assertTrue(
numpy.allclose(numpy.array(actual), numpy.array(expect)))
self.assertEqual(actual.lod(), expect.lod())
self.assertEqual(actual.recursive_sequence_lengths(),
expect.recursive_sequence_lengths())
class TestCPULoDTensorArrayOpGrad(unittest.TestCase):
......@@ -188,7 +191,7 @@ class TestCPULoDTensorArrayOpGrad(unittest.TestCase):
tensor = core.LoDTensor()
tensor.set(numpy.arange(10).reshape(10, 1).astype('float32'), place)
tensor.set_lod([[0, 3, 9, 10]])
tensor.set_recursive_sequence_lengths([[3, 6, 1]])
g_vars = program.global_block().var(x.name + "@GRAD")
......
......@@ -84,15 +84,17 @@ def lstm(
h = g_o * act_cell(c)
return h, c
def _reverse(x, lod):
def _reverse(x, offset):
y = np.zeros_like(x)
for i in range(len(lod) - 1):
b, e = lod[i], lod[i + 1]
for i in range(len(offset) - 1):
b, e = offset[i], offset[i + 1]
y[b:e, :] = np.flip(x[b:e, :], 0)
return y
offset = lod[0]
batch_size = len(offset) - 1
offset = [0]
for l in lod[0]:
offset.append(offset[-1] + l)
batch_size = len(lod[0])
hidden = []
cell = []
input = _reverse(input, offset) if is_reverse else input
......@@ -100,7 +102,7 @@ def lstm(
input = input + np.tile(w_b, (offset[-1], 1))
for i in range(batch_size):
# compute one sequence
seq_len = offset[i + 1] - offset[i]
seq_len = lod[0][i]
x = input[offset[i]:offset[i + 1], :]
h_pre = h0[i] # 1 x D
c_pre = c0[i] # 1 x D
......@@ -124,7 +126,7 @@ def lstm(
class TestLstmOp(OpTest):
def set_argument(self):
self.lod = [[0, 2, 5, 7]]
self.lod = [[2, 3, 2]]
self.D = 16
self.act_gate = 'sigmoid'
......@@ -139,8 +141,8 @@ class TestLstmOp(OpTest):
self.set_argument()
self.op_type = 'lstm'
T = self.lod[0][-1]
N = len(self.lod[0]) - 1
T = sum(self.lod[0])
N = len(self.lod[0])
x = np.random.normal(size=(T, 4 * self.D)).astype('float64')
if self.has_initial_state:
......@@ -186,7 +188,7 @@ class TestLstmOp(OpTest):
def test_check_grad(self):
# TODO(qingqing) remove folowing lines after the check_grad is refined.
N = len(self.lod[0]) - 1
N = len(self.lod[0])
self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
self.outputs['BatchCellPreAct'] = np.zeros(
(N, self.D)).astype('float64')
......@@ -196,7 +198,7 @@ class TestLstmOp(OpTest):
# class TestLstmOpHasInitial(TestLstmOp):
# def set_argument(self):
# self.lod = [[0, 2, 5, 7]]
# self.lod = [[2, 3, 2]]
# self.D = 16
# self.act_gate = 'sigmoid'
......@@ -209,7 +211,7 @@ class TestLstmOp(OpTest):
# def test_check_grad(self):
# # TODO(qingqing) remove folowing lines after the check_grad is refined.
# N = len(self.lod[0]) - 1
# N = len(self.lod[0])
# self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
# self.outputs['BatchCellPreAct'] = np.zeros(
# (N, self.D)).astype('float64')
......@@ -218,7 +220,7 @@ class TestLstmOp(OpTest):
# max_relative_error=5e-4)
# def test_check_grad_ingore_bias(self):
# N = len(self.lod[0]) - 1
# N = len(self.lod[0])
# self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
# self.outputs['BatchCellPreAct'] = np.zeros(
# (N, self.D)).astype('float64')
......@@ -228,7 +230,7 @@ class TestLstmOp(OpTest):
# no_grad_set=set('Bias'))
# def test_check_grad_ingore_weight(self):
# N = len(self.lod[0]) - 1
# N = len(self.lod[0])
# self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
# self.outputs['BatchCellPreAct'] = np.zeros(
# (N, self.D)).astype('float64')
......@@ -238,7 +240,7 @@ class TestLstmOp(OpTest):
# no_grad_set=set('Weight'))
# def test_check_grad_ingore_input(self):
# N = len(self.lod[0]) - 1
# N = len(self.lod[0])
# self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
# self.outputs['BatchCellPreAct'] = np.zeros(
# (N, self.D)).astype('float64')
......@@ -248,7 +250,7 @@ class TestLstmOp(OpTest):
# no_grad_set=set('Input'))
# def test_check_grad_ingore_h0(self):
# N = len(self.lod[0]) - 1
# N = len(self.lod[0])
# self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
# self.outputs['BatchCellPreAct'] = np.zeros(
# (N, self.D)).astype('float64')
......@@ -258,7 +260,7 @@ class TestLstmOp(OpTest):
# no_grad_set=set('H0'))
# def test_check_grad_ingore_c0(self):
# N = len(self.lod[0]) - 1
# N = len(self.lod[0])
# self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
# self.outputs['BatchCellPreAct'] = np.zeros(
# (N, self.D)).astype('float64')
......@@ -269,7 +271,7 @@ class TestLstmOp(OpTest):
# class TestLstmOpRerverse(TestLstmOp):
# def set_argument(self):
# self.lod = [[0, 2, 5, 7]]
# self.lod = [[2, 3, 2]]
# self.D = 16
# self.act_gate = 'sigmoid'
......@@ -282,7 +284,7 @@ class TestLstmOp(OpTest):
# class TestLstmOpNotUsePeepholes(TestLstmOp):
# def set_argument(self):
# self.lod = [[0, 2, 5, 7]]
# self.lod = [[2, 3, 2]]
# self.D = 16
# self.act_gate = 'sigmoid'
......
......@@ -64,15 +64,17 @@ def lstmp(
r = act_proj(r)
return r, c
def _reverse(x, lod):
def _reverse(x, offset):
y = np.zeros_like(x)
for i in range(len(lod) - 1):
b, e = lod[i], lod[i + 1]
for i in range(len(offset) - 1):
b, e = offset[i], offset[i + 1]
y[b:e, :] = np.flip(x[b:e, :], 0)
return y
offset = lod[0]
batch_size = len(offset) - 1
offset = [0]
for l in lod[0]:
offset.append(offset[-1] + l)
batch_size = len(lod[0])
# recurrent projection state
projection = []
cell = []
......@@ -81,7 +83,7 @@ def lstmp(
input = input + np.tile(w_b, (offset[-1], 1))
for i in range(batch_size):
# compute one sequence
seq_len = offset[i + 1] - offset[i]
seq_len = lod[0][i]
x = input[offset[i]:offset[i + 1], :]
r_pre = np.dot(h0[i], w_rh) # 1 x P
r_pre = act_proj(r_pre)
......@@ -117,8 +119,8 @@ class TestLstmpOp(LstmTest.TestLstmOp):
self.reset_argument()
self.op_type = 'lstmp'
T = self.lod[0][-1]
N = len(self.lod[0]) - 1
T = sum(self.lod[0])
N = len(self.lod[0])
x = np.random.normal(size=(T, 4 * self.D)).astype('float64')
if self.has_initial_state:
......@@ -166,7 +168,7 @@ class TestLstmpOp(LstmTest.TestLstmOp):
def test_check_grad(self):
# TODO(qingqing) remove folowing lines after the check_grad is refined.
N = len(self.lod[0]) - 1
N = len(self.lod[0])
self.outputs['OrderedP0'] = np.zeros((N, self.P)).astype('float64')
self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64')
......@@ -183,7 +185,7 @@ class TestLstmpOpHasInitial(TestLstmpOp):
def test_check_grad(self):
# TODO(qingqing) remove folowing lines after the check_grad is refined.
N = len(self.lod[0]) - 1
N = len(self.lod[0])
self.outputs['OrderedP0'] = np.zeros((N, self.P)).astype('float64')
self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64')
......@@ -195,7 +197,7 @@ class TestLstmpOpHasInitial(TestLstmpOp):
max_relative_error=1e-2)
def test_check_grad_ingore_bias(self):
N = len(self.lod[0]) - 1
N = len(self.lod[0])
self.outputs['OrderedP0'] = np.zeros((N, self.P)).astype('float64')
self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64')
......@@ -207,7 +209,7 @@ class TestLstmpOpHasInitial(TestLstmpOp):
no_grad_set=set('Bias'))
def test_check_grad_ingore_weight(self):
N = len(self.lod[0]) - 1
N = len(self.lod[0])
self.outputs['OrderedP0'] = np.zeros((N, self.P)).astype('float64')
self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64')
......@@ -219,7 +221,7 @@ class TestLstmpOpHasInitial(TestLstmpOp):
no_grad_set=set('Weight'))
def test_check_grad_ingore_proj_weight(self):
N = len(self.lod[0]) - 1
N = len(self.lod[0])
self.outputs['OrderedP0'] = np.zeros((N, self.P)).astype('float64')
self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64')
......@@ -231,7 +233,7 @@ class TestLstmpOpHasInitial(TestLstmpOp):
no_grad_set=set('ProjWeight'))
def test_check_grad_ingore_input(self):
N = len(self.lod[0]) - 1
N = len(self.lod[0])
self.outputs['OrderedP0'] = np.zeros((N, self.P)).astype('float64')
self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64')
......@@ -243,7 +245,7 @@ class TestLstmpOpHasInitial(TestLstmpOp):
no_grad_set=set('Input'))
def test_check_grad_ingore_h0(self):
N = len(self.lod[0]) - 1
N = len(self.lod[0])
self.outputs['OrderedP0'] = np.zeros((N, self.P)).astype('float64')
self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64')
......@@ -255,7 +257,7 @@ class TestLstmpOpHasInitial(TestLstmpOp):
no_grad_set=set('H0'))
def test_check_grad_ingore_c0(self):
N = len(self.lod[0]) - 1
N = len(self.lod[0])
self.outputs['OrderedP0'] = np.zeros((N, self.P)).astype('float64')
self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64')
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import division
import unittest
import numpy as np
from op_test import OpTest
def compute_mean_iou(predictions, labels, num_classes, in_wrongs, in_corrects,
in_mean_ious):
assert predictions.shape == labels.shape
predictions = predictions.flatten()
labels = labels.flatten()
out_wrong = np.zeros([num_classes]).astype("int32")
for _, wrong in in_wrongs:
out_wrong += wrong
out_correct = np.zeros([num_classes]).astype("int32")
for _, correct in in_corrects:
out_correct += correct
for pred, label in zip(predictions, labels):
if pred == label:
out_correct[pred] += 1
else:
out_wrong[pred] += 1
out_wrong[label] += 1
denominator = out_wrong + out_correct
valid_count = (denominator != 0).sum()
denominator = np.where(denominator > 0, denominator,
np.ones(denominator.shape))
mean_iou = (out_correct / denominator).sum() / valid_count
for _, in_mean_iou in in_mean_ious:
mean_iou += in_mean_iou
return mean_iou, out_wrong, out_correct
class TestMeanIOUOp(OpTest):
def setUp(self):
self.config()
self.op_type = "mean_iou"
predictions = np.random.randint(0, self.num_classes,
self.image_size).astype("int32")
labels = np.random.randint(0, self.num_classes,
self.image_size).astype("int32")
in_wrongs = []
for i in range(self.in_wrong_num):
in_wrongs.append(("in_wrong_%d" % i, np.random.randint(
0, 10, [self.num_classes]).astype("int32")))
in_corrects = []
for i in range(self.in_correct_num):
in_corrects.append(("in_correct_%d" % i, np.random.randint(
0, 10, [self.num_classes]).astype("int32")))
in_mean_ious = []
for i in range(self.in_mean_iou_num):
in_mean_ious.append(("in_mean_iou_%d" % i, np.random.uniform(
0, 1, [1]).astype("float32")))
self.inputs = {
'Predictions': predictions,
'Labels': labels,
'InWrongs': in_wrongs,
'InCorrects': in_corrects,
'InMeanIou': in_mean_ious
}
self.attrs = {'num_classes': long(self.num_classes)}
mean_iou, out_wrong, out_correct = compute_mean_iou(
predictions, labels, self.num_classes, in_wrongs, in_corrects,
in_mean_ious)
self.outputs = {
'OutMeanIou': mean_iou,
'OutWrong': out_wrong,
'OutCorrect': out_correct
}
def config(self):
self.num_classes = 10
self.image_size = [128, 128]
self.in_wrong_num = 0
self.in_correct_num = 0
self.in_mean_iou_num = 0
def test_check_output(self):
self.check_output()
class TestCase1(TestMeanIOUOp):
def config(self):
self.num_classes = 5
self.image_size = [100, 128]
self.in_wrong_num = 2
self.in_correct_num = 2
self.in_mean_iou_num = 2
if __name__ == '__main__':
unittest.main()
......@@ -70,7 +70,7 @@ class TestMineHardExamplesOp(OpTest):
self.updated_match_indices = self.match_indices
self.neg_indices_lod = [[0, 1, 2]]
self.neg_indices_lod = [[1, 1]]
self.neg_indices = np.array([[1], [0]]).astype('int32')
......@@ -92,7 +92,7 @@ class TestMineHardExamplesOpHardExample(TestMineHardExamplesOp):
self.updated_match_indices = np.array([[0, -1, -1],
[-1, -1, -1]]).astype('int32')
self.neg_indices_lod = [[0, 1, 3]]
self.neg_indices_lod = [[1, 2]]
self.neg_indices = np.array([[2], [0], [2]]).astype('int32')
......
......@@ -135,12 +135,12 @@ def batched_multiclass_nms(boxes, scores, background, score_threshold,
batch_size = scores.shape[0]
det_outs = []
lod = [0]
lod = []
for n in range(batch_size):
nmsed_outs, nmsed_num = multiclass_nms(boxes[n], scores[n], background,
score_threshold, nms_threshold,
nms_top_k, keep_top_k)
lod.append(lod[-1] + nmsed_num)
lod.append(nmsed_num)
if nmsed_num == 0: continue
for c, indices in nmsed_outs.iteritems():
......
......@@ -27,9 +27,9 @@ class TestOneHotOp(OpTest):
self.op_type = 'one_hot'
depth = 10
dimension = 12
x_lod = [[0, 4, 5, 8, 11]]
x = [np.random.randint(0, depth - 1) for i in xrange(x_lod[0][-1])]
x = np.array(x).astype('int').reshape([x_lod[0][-1], 1])
x_lod = [[4, 1, 3, 3]]
x = [np.random.randint(0, depth - 1) for i in xrange(sum(x_lod[0]))]
x = np.array(x).astype('int').reshape([sum(x_lod[0]), 1])
out = np.zeros(shape=(np.product(x.shape[:-1]),
depth)).astype('float32')
......@@ -50,9 +50,9 @@ class TestOneHotOp_default_dtype(OpTest):
self.op_type = 'one_hot'
depth = 10
dimension = 12
x_lod = [[0, 4, 5, 8, 11]]
x = [np.random.randint(0, depth - 1) for i in xrange(x_lod[0][-1])]
x = np.array(x).astype('int').reshape([x_lod[0][-1], 1])
x_lod = [[4, 1, 3, 3]]
x = [np.random.randint(0, depth - 1) for i in xrange(sum(x_lod[0]))]
x = np.array(x).astype('int').reshape([sum(x_lod[0]), 1])
out = np.zeros(shape=(np.product(x.shape[:-1]),
depth)).astype('float32')
......@@ -75,11 +75,11 @@ class TestOneHotOp_exception(OpTest):
self.place = core.CPUPlace()
self.dimension = 12
self.x = core.LoDTensor()
x_lod = [[0, 4, 5, 8, 11]]
data = [np.random.randint(11, 20) for i in xrange(x_lod[0][-1])]
data = np.array(data).astype('int').reshape([x_lod[0][-1], 1])
x_lod = [[4, 1, 3, 3]]
data = [np.random.randint(11, 20) for i in xrange(sum(x_lod[0]))]
data = np.array(data).astype('int').reshape([sum(x_lod[0]), 1])
self.x.set(data, self.place)
self.x.set_lod(x_lod)
self.x.set_recursive_sequence_lengths(x_lod)
def test_check_output(self):
program = Program()
......
......@@ -173,6 +173,7 @@ class TestCRFModel(unittest.TestCase):
pe.run(feed=feeder.feed(cur_batch),
fetch_list=[avg_cost.name]))[0]
@unittest.skip(reason="CI hangs")
def test_update_sparse_parameter_all_reduce(self):
build_strategy = fluid.BuildStrategy()
build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.AllReduce
......@@ -181,6 +182,7 @@ class TestCRFModel(unittest.TestCase):
self.check_network_convergence(
is_sparse=True, build_strategy=build_strategy, use_cuda=False)
@unittest.skip(reason="CI hangs")
def test_update_dense_parameter_all_reduce(self):
build_strategy = fluid.BuildStrategy()
build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.AllReduce
......@@ -189,6 +191,7 @@ class TestCRFModel(unittest.TestCase):
self.check_network_convergence(
is_sparse=False, build_strategy=build_strategy, use_cuda=False)
@unittest.skip(reason="CI hangs")
def test_update_sparse_parameter_reduce(self):
build_strategy = fluid.BuildStrategy()
build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce
......@@ -197,6 +200,7 @@ class TestCRFModel(unittest.TestCase):
self.check_network_convergence(
is_sparse=True, build_strategy=build_strategy, use_cuda=False)
@unittest.skip(reason="CI hangs")
def test_update_dense_parameter_reduce(self):
build_strategy = fluid.BuildStrategy()
build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce
......
......@@ -28,7 +28,7 @@ class TestPrintOpCPU(unittest.TestCase):
self.x_tensor = core.LoDTensor()
tensor_np = np.random.random(size=(2, 3)).astype('float32')
self.x_tensor.set(tensor_np, self.place)
self.x_tensor.set_lod([[0, 1, 1]])
self.x_tensor.set_recursive_sequence_lengths([[1, 1]])
def build_network(self, only_forward, **kargs):
x = layers.data('x', shape=[3], dtype='float32', lod_level=1)
......@@ -62,7 +62,7 @@ class TestPrintOpGPU(TestPrintOpCPU):
self.x_tensor = core.LoDTensor()
tensor_np = np.random.random(size=(2, 3)).astype('float32')
self.x_tensor.set(tensor_np, self.place)
self.x_tensor.set_lod([[0, 1, 1]])
self.x_tensor.set_recursive_sequence_lengths([[1, 1]])
if __name__ == '__main__':
......
......@@ -70,11 +70,10 @@ class TestReorderLoDTensor(unittest.TestCase):
lod_level_i = numpy.random.randint(
low=1,
high=5,
size=self.num_seq if i == 0 else lod_level_i[-1])
lod_level_i = [0] + numpy.cumsum(lod_level_i).tolist()
size=self.num_seq if i == 0 else sum(lod_level_i)).tolist()
data_lod.append(lod_level_i)
data_value = numpy.random.random(
size=[data_lod[-1][-1] if data_lod else self.num_seq
size=[sum(data_lod[-1]) if data_lod else self.num_seq
] + data_shape).astype('float32')
self.data[data_name] = (data_value, data_lod)
......@@ -84,29 +83,36 @@ class TestReorderLoDTensor(unittest.TestCase):
tensor = fluid.Tensor()
tensor.set(self.data[desc[0]][0], place)
if self.data[desc[0]][1]:
tensor.set_lod(self.data[desc[0]][1])
tensor.set_recursive_sequence_lengths(self.data[desc[0]][1])
self.inputs[desc[0]] = tensor
def reorder(self):
level = 0
def convert_to_offset(lod):
offset_lod = [[0] for i in lod]
for i, level in enumerate(lod):
for seq_len in level:
offset_lod[i].append(offset_lod[i][-1] + seq_len)
return offset_lod
level = 0
# compute the rank_table according to ref_lod
ref_lod = self.data[self.data_desc[1][0]][1][level]
rank_table = [] # list of (index, length)
for i in range(len(ref_lod) - 1):
rank_table.append((i, ref_lod[i + 1] - ref_lod[i]))
for i in range(len(ref_lod)):
rank_table.append((i, ref_lod[i]))
rank_table = sorted(rank_table, lambda x, y: y[1] - x[1])
# compute the input sequence info according to input_lod
input_value, input_lod = self.data[self.data_desc[0][0]]
offset_lod = convert_to_offset(input_lod)
input_table = [] # list of (offset, length, sub_lod)
if input_lod:
for i in range(len(input_lod[level]) - 1):
if offset_lod:
for i in range(len(offset_lod[level]) - 1):
start_idx = i
end_idx = i + 1
sub_lod = []
for lod_level_i in input_lod[level:]:
for lod_level_i in offset_lod[level:]:
sub_lod_i = []
for idx in range(start_idx, end_idx):
sub_lod_i.append(lod_level_i[idx + 1] - lod_level_i[
......@@ -132,10 +138,9 @@ class TestReorderLoDTensor(unittest.TestCase):
input_seq_sub_lod = input_table[index][2]
if len(output_lod) == 0:
output_lod = [[0] for i in input_seq_sub_lod]
for i, sub_lod_i in enumerate(input_seq_sub_lod):
for idx_sub in sub_lod_i:
output_lod[i].append(output_lod[i][-1] + idx_sub)
output_lod = [[] for i in input_seq_sub_lod]
for i, level in enumerate(input_seq_sub_lod):
output_lod[i].extend(level)
return output_value, output_lod
def test_reorder_lod_tensor(self):
......@@ -148,7 +153,8 @@ class TestReorderLoDTensor(unittest.TestCase):
self.assertTrue(
numpy.allclose(
numpy.array(actual_output), expect_output, atol=0.001))
self.assertEqual(expect_output_lod, actual_output.lod())
self.assertEqual(expect_output_lod,
actual_output.recursive_sequence_lengths())
# check gradient
expect_grad = numpy.ones_like(self.data[self.data_desc[0][0]][0])
expect_grad_lod = self.data[self.data_desc[0][0]][1]
......@@ -156,7 +162,8 @@ class TestReorderLoDTensor(unittest.TestCase):
self.assertTrue(
numpy.allclose(
numpy.array(actual_grad), expect_grad, atol=0.001))
self.assertEqual(expect_grad_lod, actual_grad.lod())
self.assertEqual(expect_grad_lod,
actual_grad.recursive_sequence_lengths())
def test_reorder_tensor(self):
self.data_desc[0][-1] = 0 # input is tensor
......@@ -168,7 +175,8 @@ class TestReorderLoDTensor(unittest.TestCase):
self.assertTrue(
numpy.allclose(
numpy.array(actual_output), expect_output, atol=0.001))
self.assertEqual(expect_output_lod, actual_output.lod())
self.assertEqual(expect_output_lod,
actual_output.recursive_sequence_lengths())
# check gradient
expect_grad = numpy.ones_like(self.data[self.data_desc[0][0]][0])
expect_grad_lod = self.data[self.data_desc[0][0]][1]
......@@ -176,14 +184,14 @@ class TestReorderLoDTensor(unittest.TestCase):
self.assertTrue(
numpy.allclose(
numpy.array(actual_grad), expect_grad, atol=0.001))
self.assertEqual(expect_grad_lod, actual_grad.lod())
self.assertEqual(expect_grad_lod,
actual_grad.recursive_sequence_lengths())
# compare outputs between LodTensors with explicit and implicit lod
# use the same data but set the input lod explicitly
input_lod = [[
i for i in range(len(self.data[self.data_desc[0][0]][0]) + 1)
]]
self.inputs[self.data_desc[0][0]].set_lod(input_lod)
input_lod = [[1] * len(self.data[self.data_desc[0][0]][0])]
self.inputs[self.data_desc[0][0]].set_recursive_sequence_lengths(
input_lod)
# preserve the output of LodTensor with implicit lod to compare
expect_output = [
numpy.array(actual_output) for actual_output in self.actual_outputs
......
......@@ -107,7 +107,7 @@ class TestROIPoolOp(OpTest):
rois = []
self.rois_lod = [[]]
for bno in range(self.batch_size):
self.rois_lod[0].append(len(rois))
self.rois_lod[0].append(bno + 1)
for i in range(bno + 1):
x1 = np.random.random_integers(
0, self.width / self.spatial_scale - self.pooled_width)
......@@ -121,7 +121,6 @@ class TestROIPoolOp(OpTest):
roi = [bno, x1, y1, x2, y2]
rois.append(roi)
self.rois_lod[0].append(len(rois))
self.rois_num = len(rois)
self.rois = np.array(rois).astype("int64")
......
......@@ -19,8 +19,10 @@ from op_test import OpTest
def row_conv_forward(x, lod, wt):
out = np.zeros_like(x)
seq_info = lod[0]
num_sequences = len(seq_info) - 1
num_sequences = len(lod[0])
seq_info = [0]
for seq_len in lod[0]:
seq_info.append(seq_info[-1] + seq_len)
context_length = wt.shape[0]
for i in range(num_sequences): # loop over number of sequences
......@@ -32,7 +34,6 @@ def row_conv_forward(x, lod, wt):
cur_timesteps = end - start
for j in range(cur_timesteps): # loop over different timesteps
for k in range(context_length):
if j + k >= cur_timesteps:
continue
curoutput[j, :] += curinput[j + k, :] * wt[k, :]
......@@ -44,8 +45,8 @@ class TestRowConvOp1(OpTest):
def setUp(self):
self.op_type = "row_conv"
lod = [[0, 2, 5, 7]]
T = lod[0][-1]
lod = [[2, 3, 2]]
T = sum(lod[0])
D = 16
context_length = 2
......@@ -75,8 +76,8 @@ class TestRowConvOp2(OpTest):
def setUp(self):
self.op_type = "row_conv"
lod = [[0, 20, 50, 100]]
T = lod[0][-1]
lod = [[20, 30, 50]]
T = sum(lod[0])
D = 35
context_length = 35
......
......@@ -18,14 +18,19 @@ import sys
from op_test import OpTest
def to_abs_lod(lod):
if len(lod) == 0 or len(lod) == 1:
return lod
def to_abs_offset_lod(lod):
offset_lod = [[0] for i in lod]
for i, level in enumerate(lod):
for seq_len in level:
offset_lod[i].append(offset_lod[i][-1] + seq_len)
if len(offset_lod) == 0 or len(offset_lod) == 1:
return offset_lod
import copy
new_lod = copy.deepcopy(lod)
for idx, val in enumerate(lod[0]):
new_lod[0][idx] = lod[1][val]
return new_lod
new_offset_lod = copy.deepcopy(offset_lod)
for idx, val in enumerate(offset_lod[0]):
new_offset_lod[0][idx] = offset_lod[1][val]
return new_offset_lod
def seq_concat(inputs, level):
......@@ -35,11 +40,11 @@ def seq_concat(inputs, level):
x1 = inputs['X'][1][1][0]
level_idx = len(lod0) - level - 1
outs = []
for i in range(len(lod0[level_idx]) - 1):
sub_x0 = x0[to_abs_lod(lod0)[level_idx][i]:to_abs_lod(lod0)[level_idx][
i + 1], :]
sub_x1 = x1[to_abs_lod(lod1)[level_idx][i]:to_abs_lod(lod1)[level_idx][
i + 1], :]
for i in range(len(lod0[level_idx])):
sub_x0 = x0[to_abs_offset_lod(lod0)[level_idx][i]:to_abs_offset_lod(
lod0)[level_idx][i + 1], :]
sub_x1 = x1[to_abs_offset_lod(lod1)[level_idx][i]:to_abs_offset_lod(
lod1)[level_idx][i + 1], :]
outs.append(np.concatenate((sub_x0, sub_x1), axis=0))
return np.concatenate(outs, axis=0)
......@@ -48,9 +53,9 @@ class TestSeqConcatOp(OpTest):
def set_data(self):
# two level, batch size is 3
x0 = np.random.random((4, 6, 3)).astype('float32')
lod0 = [[0, 2, 4], [0, 1, 2, 3, 4]]
lod0 = [[2, 2], [1, 1, 1, 1]]
x1 = np.random.random((4, 8, 3)).astype('float32')
lod1 = [[0, 2, 4], [0, 1, 2, 3, 4]]
lod1 = [[2, 2], [1, 1, 1, 1]]
axis = 1
level = 1
self.inputs = {'X': [('x0', (x0, lod0)), ('x1', (x1, lod1))]}
......@@ -72,14 +77,14 @@ class TestSeqConcatOpLevelZeroNestedSequence(TestSeqConcatOp):
def set_data(self):
# two level, batch size is 3
x0 = np.random.random((4, 6, 3)).astype('float32')
lod0 = [[0, 2, 4], [0, 1, 2, 3, 4]]
lod0 = [[2, 2], [1, 1, 1, 1]]
x1 = np.random.random((7, 6, 3)).astype('float32')
lod1 = [[0, 2, 4], [0, 1, 3, 5, 7]]
lod1 = [[2, 2], [1, 2, 2, 2]]
axis = 0
level = 0
self.inputs = {'X': [('x0', (x0, lod0)), ('x1', (x1, lod1))]}
self.attrs = {'axis': axis, 'level': level}
out_lod = [[0, 2, 4], [0, 2, 5, 8, 11]]
out_lod = [[2, 2], [2, 3, 3, 3]]
self.outputs = {'Out': (seq_concat(self.inputs, level), out_lod)}
......@@ -87,14 +92,14 @@ class TestSeqConcatOplevelOneNestedSequence(TestSeqConcatOp):
def set_data(self):
# two level, batch size is 3
x0 = np.random.random((4, 6, 3)).astype('float32')
lod0 = [[0, 2, 4], [0, 1, 2, 3, 4]]
lod0 = [[2, 2], [1, 1, 1, 1]]
x1 = np.random.random((7, 6, 3)).astype('float32')
lod1 = [[0, 3, 4], [0, 1, 3, 5, 7]]
lod1 = [[3, 1], [1, 2, 2, 2]]
axis = 0
level = 1
self.inputs = {'X': [('x0', (x0, lod0)), ('x1', (x1, lod1))]}
self.attrs = {'axis': axis, 'level': level}
out_lod = [[0, 5, 8], [0, 1, 2, 3, 5, 7, 8, 9, 11]]
out_lod = [[5, 3], [1, 1, 1, 2, 2, 1, 1, 2]]
self.outputs = {'Out': (seq_concat(self.inputs, level), out_lod)}
......@@ -102,14 +107,14 @@ class TestSeqConcatOpLevelZeroSequence(TestSeqConcatOp):
def set_data(self):
# two level, batch size is 3
x0 = np.random.random((4, 3, 4)).astype('float32')
lod0 = [[0, 1, 2, 3, 4]]
lod0 = [[1, 1, 1, 1]]
x1 = np.random.random((7, 3, 4)).astype('float32')
lod1 = [[0, 1, 3, 5, 7]]
lod1 = [[1, 2, 2, 2]]
axis = 0
level = 0
self.inputs = {'X': [('x0', (x0, lod0)), ('x1', (x1, lod1))]}
self.attrs = {'axis': axis, 'level': level}
out_lod = [[0, 2, 5, 8, 11]]
out_lod = [[2, 3, 3, 3]]
self.outputs = {'Out': (seq_concat(self.inputs, level), out_lod)}
......
......@@ -75,35 +75,38 @@ class TestSeqProject(OpTest):
pading_data = self.pad_data
out = np.zeros((self.input_size[0], self.context_length *
self.input_size[1])).astype('float32')
lod = lod[0]
offset = [0]
for seq_len in lod[0]:
offset.append(offset[-1] + seq_len)
begin_pad = np.max([0, -self.context_start])
for i in range(len(lod) - 1):
for i in range(len(offset) - 1):
for j in range(self.context_length):
in_begin = lod[i] + self.context_start + j
in_end = lod[i + 1] + self.context_start + j
out_begin = lod[i]
out_end = lod[i + 1]
if in_begin < lod[i]:
pad_size = np.min([lod[i] - in_begin, lod[i + 1] - lod[i]])
in_begin = offset[i] + self.context_start + j
in_end = offset[i + 1] + self.context_start + j
out_begin = offset[i]
out_end = offset[i + 1]
if in_begin < offset[i]:
pad_size = np.min(
[offset[i] - in_begin, offset[i + 1] - offset[i]])
if self.padding_trainable:
sub_w = pading_data[j:j + pad_size, :]
out[lod[i]:lod[i] + pad_size, j * self.input_size[1]:(
j + 1) * self.input_size[1]] = sub_w
out_begin = lod[i] + pad_size
in_begin = lod[i]
out[offset[i]:offset[i] + pad_size, j * self.input_size[
1]:(j + 1) * self.input_size[1]] = sub_w
out_begin = offset[i] + pad_size
in_begin = offset[i]
if in_end > lod[i + 1]:
if in_end > offset[i + 1]:
pad_size = np.min(
[in_end - lod[i + 1], lod[i + 1] - lod[i]])
[in_end - offset[i + 1], offset[i + 1] - offset[i]])
if self.padding_trainable:
sub_w = pading_data[begin_pad + self.context_start + j -
pad_size:begin_pad +
self.context_start + j, :]
out[lod[i + 1] - pad_size:lod[i + 1], j * self.
out[offset[i + 1] - pad_size:offset[i + 1], j * self.
input_size[1]:(j + 1) * self.input_size[1]] = sub_w
in_end = lod[i + 1]
out_end = lod[i + 1] - pad_size
in_end = offset[i + 1]
out_end = offset[i + 1] - pad_size
if in_end <= in_begin:
continue
......@@ -175,7 +178,11 @@ class TestSeqProject(OpTest):
self.context_stride = 1
self.input_size = [self.input_row, 23]
self.lod = [[0, 4, 5, 8, self.input_row]]
offset_lod = [[0, 4, 5, 8, self.input_row]]
self.lod = [[]]
# convert from offset-based lod to length-based lod
for i in range(len(offset_lod[0]) - 1):
self.lod[0].append(offset_lod[0][i + 1] - offset_lod[0][i])
self.output_represention = 8 # output feature size
......@@ -188,7 +195,11 @@ class TestSeqProjectCase1(TestSeqProject):
self.context_stride = 1
self.input_size = [self.input_row, 23]
self.lod = [[0, 4, 5, 8, self.input_row]]
offset_lod = [[0, 4, 5, 8, self.input_row]]
self.lod = [[]]
# convert from offset-based lod to length-based lod
for i in range(len(offset_lod[0]) - 1):
self.lod[0].append(offset_lod[0][i + 1] - offset_lod[0][i])
self.output_represention = 8 # output feature size
......@@ -203,8 +214,12 @@ class TestSeqProjectCase2(TestSeqProject):
self.input_size = [self.input_row, 23]
idx = range(self.input_size[0])
del idx[0]
self.lod = [[0] + np.sort(random.sample(idx, 8)).tolist() +
[self.input_size[0]]]
offset_lod = [[0] + np.sort(random.sample(idx, 8)).tolist() +
[self.input_size[0]]]
self.lod = [[]]
# convert from offset-based lod to length-based lod
for i in range(len(offset_lod[0]) - 1):
self.lod[0].append(offset_lod[0][i + 1] - offset_lod[0][i])
self.output_represention = 8 # output feature size
......
......@@ -18,26 +18,34 @@ from op_test import OpTest
class TestSeqAvgPool(OpTest):
def convert_to_offset(self, lod):
offset = [[0] for i in lod]
for i, level in enumerate(lod):
for seq_len in level:
offset[i].append(offset[i][-1] + seq_len)
return offset
def set_data(self):
self.op_type = 'sequence_pool'
# one level, batch size is 4
x = np.random.uniform(0.1, 1, [11, 23]).astype('float32')
lod = [[0, 4, 5, 8, 11]]
lod = [[4, 1, 3, 3]]
self.inputs = {'X': (x, lod)}
offset = self.convert_to_offset(lod)
out = np.zeros((4, 23)).astype('float32')
self.outputs = {'Out': out}
return x, lod, out
return x, offset, out
def compute(self, x, lod, out):
def compute(self, x, offset, out):
self.attrs = {'pooltype': "AVERAGE"}
for i in range(4):
sub_x = x[lod[0][i]:lod[0][i + 1], :]
for i in range(len(offset[0]) - 1):
sub_x = x[offset[0][i]:offset[0][i + 1], :]
out[i] = sub_x.mean(axis=0)
def setUp(self):
x, lod, out = self.set_data()
self.compute(x, lod, out)
x, offset, out = self.set_data()
self.compute(x, offset, out)
def test_check_output(self):
self.check_output()
......@@ -50,10 +58,10 @@ class TestSeqAvgPool(OpTest):
class TestSeqSumPool(TestSeqAvgPool):
def compute(self, x, lod, out):
def compute(self, x, offset, out):
self.attrs = {'pooltype': "SUM"}
for i in range(4):
sub_x = x[lod[0][i]:lod[0][i + 1], :]
for i in range(len(offset[0]) - 1):
sub_x = x[offset[0][i]:offset[0][i + 1], :]
out[i] = sub_x.sum(axis=0)
......@@ -61,46 +69,47 @@ class TestSeqMaxPool(TestSeqAvgPool):
def set_data(self):
self.op_type = 'sequence_pool'
x = np.random.uniform(0.1, 1, [13, 23]).astype('float32')
lod = [[0, 4, 5, 8, 13]]
for i in range(4):
l = lod[0][i + 1] - lod[0][i]
x[lod[0][i] + np.random.randint(l), :] += 2.0
lod = [[4, 1, 3, 5]]
offset = self.convert_to_offset(lod)
for i in range(len(offset[0]) - 1):
l = offset[0][i + 1] - offset[0][i]
x[offset[0][i] + np.random.randint(l), :] += 2.0
self.inputs = {'X': (x, lod)}
out = np.zeros((4, 23)).astype('float32')
self.outputs = {'Out': out}
return x, lod, out
return x, offset, out
def compute(self, x, lod, out):
def compute(self, x, offset, out):
self.attrs = {'pooltype': "MAX"}
for i in range(4):
sub_x = x[lod[0][i]:lod[0][i + 1], :]
for i in range(len(offset[0]) - 1):
sub_x = x[offset[0][i]:offset[0][i + 1], :]
out[i] = np.amax(sub_x, axis=0)
class TestSeqSqrtPool(TestSeqAvgPool):
def compute(self, x, lod, out):
def compute(self, x, offset, out):
self.attrs = {'pooltype': "SQRT"}
for i in range(4):
sub_x = x[lod[0][i]:lod[0][i + 1], :]
len = lod[0][i + 1] - lod[0][i]
out[i] = sub_x.sum(axis=0) / np.sqrt(len)
for i in range(len(offset[0]) - 1):
sub_x = x[offset[0][i]:offset[0][i + 1], :]
seq_len = offset[0][i + 1] - offset[0][i]
out[i] = sub_x.sum(axis=0) / np.sqrt(seq_len)
class TestSeqLastPool(TestSeqAvgPool):
def compute(self, x, lod, out):
def compute(self, x, offset, out):
self.attrs = {'pooltype': "LAST"}
for i in range(4):
sub_x = x[lod[0][i]:lod[0][i + 1], :]
for i in range(len(offset[0]) - 1):
sub_x = x[offset[0][i]:offset[0][i + 1], :]
out[i] = sub_x[-1, :]
class TestSeqFirstPool(TestSeqAvgPool):
def compute(self, x, lod, out):
def compute(self, x, offset, out):
self.attrs = {'pooltype': "FIRST"}
for i in range(4):
sub_x = x[lod[0][i]:lod[0][i + 1], :]
for i in range(len(offset[0]) - 1):
sub_x = x[offset[0][i]:offset[0][i + 1], :]
out[i] = sub_x[0, :]
......@@ -109,35 +118,39 @@ class TestSeqAvgPool2D(TestSeqAvgPool):
self.op_type = 'sequence_pool'
# one level, batch size is 4
x = np.random.uniform(0.1, 1, [13, 3, 17]).astype('float32')
lod = [[0, 4, 5, 8, 13]]
lod = [[4, 1, 3, 5]]
self.inputs = {'X': (x, lod)}
offset = self.convert_to_offset(lod)
out = np.zeros((4, 3, 17)).astype('float32')
self.outputs = {'Out': out}
return x, lod, out
return x, offset, out
def compute(self, x, lod, out):
def compute(self, x, offset, out):
self.attrs = {'pooltype': "AVERAGE"}
for i in range(4):
sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17))
for i in range(len(offset[0]) - 1):
sub_x = np.reshape(x[offset[0][i]:offset[0][i + 1], :],
(-1, 3 * 17))
out[i] = np.reshape(sub_x.mean(axis=0), (3, 17))
class TestSeqSumPool2D(TestSeqAvgPool2D):
def compute(self, x, lod, out):
def compute(self, x, offset, out):
self.attrs = {'pooltype': "SUM"}
for i in range(4):
sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17))
for i in range(len(offset[0]) - 1):
sub_x = np.reshape(x[offset[0][i]:offset[0][i + 1], :],
(-1, 3 * 17))
out[i] = np.reshape(sub_x.sum(axis=0), (3, 17))
class TestSeqSqrtPool2D(TestSeqAvgPool2D):
def compute(self, x, lod, out):
def compute(self, x, offset, out):
self.attrs = {'pooltype': "SQRT"}
for i in range(4):
sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17))
len = lod[0][i + 1] - lod[0][i]
out[i] = np.reshape(sub_x.sum(axis=0) / np.sqrt(len), (3, 17))
for i in range(len(offset[0]) - 1):
sub_x = np.reshape(x[offset[0][i]:offset[0][i + 1], :],
(-1, 3 * 17))
seq_len = offset[0][i + 1] - offset[0][i]
out[i] = np.reshape(sub_x.sum(axis=0) / np.sqrt(seq_len), (3, 17))
def test_check_grad(self):
# Remove MaxIndex after check_grad is refined.
......@@ -150,36 +163,40 @@ class TestSeqMaxPool2D(TestSeqAvgPool2D):
def set_data(self):
self.op_type = 'sequence_pool'
x = np.random.uniform(0.1, 1, [13, 3, 11]).astype('float32')
lod = [[0, 4, 5, 8, 13]]
lod = [[4, 1, 3, 5]]
self.inputs = {'X': (x, lod)}
for i in range(4):
l = lod[0][i + 1] - lod[0][i]
x[lod[0][i] + np.random.randint(l), :] += 1.0
offset = self.convert_to_offset(lod)
for i in range(len(offset[0]) - 1):
l = offset[0][i + 1] - offset[0][i]
x[offset[0][i] + np.random.randint(l), :] += 1.0
out = np.zeros((4, 3, 11)).astype('float32')
self.outputs = {'Out': out}
return x, lod, out
return x, offset, out
def compute(self, x, lod, out):
def compute(self, x, offset, out):
self.attrs = {'pooltype': "MAX"}
for i in range(4):
sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 11))
for i in range(len(offset[0]) - 1):
sub_x = np.reshape(x[offset[0][i]:offset[0][i + 1], :],
(-1, 3 * 11))
out[i] = np.reshape(np.amax(sub_x, axis=0), (3, 11))
class TestSeqLastPool2D(TestSeqAvgPool2D):
def compute(self, x, lod, out):
def compute(self, x, offset, out):
self.attrs = {'pooltype': "LAST"}
for i in range(4):
sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17))
for i in range(len(offset[0]) - 1):
sub_x = np.reshape(x[offset[0][i]:offset[0][i + 1], :],
(-1, 3 * 17))
out[i] = np.reshape(sub_x[-1, :], (3, 17))
class TestSeqFirstPool2D(TestSeqAvgPool2D):
def compute(self, x, lod, out):
def compute(self, x, offset, out):
self.attrs = {'pooltype': "FIRST"}
for i in range(4):
sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17))
for i in range(len(offset[0]) - 1):
sub_x = np.reshape(x[offset[0][i]:offset[0][i + 1], :],
(-1, 3 * 17))
out[i] = np.reshape(sub_x[0, :], (3, 17))
......
......@@ -18,15 +18,17 @@ from op_test import OpTest
def sequence_erase(in_seq, lod0, tokens):
new_lod0 = [0]
new_lod0 = []
out_seq = []
for i in range(0, len(lod0) - 1):
offset = 0
for i in range(0, len(lod0)):
num_out = 0
for dat in in_seq[lod0[i]:lod0[i + 1]]:
for dat in in_seq[offset:(offset + lod0[i])]:
if dat not in tokens:
out_seq.append(dat)
num_out += 1
new_lod0.append(new_lod0[-1] + num_out)
offset += lod0[i]
new_lod0.append(num_out)
return np.array(out_seq).astype("int32"), new_lod0
......@@ -34,7 +36,7 @@ class TestSequenceEraseOpInt32(OpTest):
def setUp(self):
self.op_type = "sequence_erase"
in_seq = np.random.randint(0, 10, (30, 1)).astype("int32")
lod = [[0, 9, 13, 24, 30]]
lod = [[9, 4, 11, 6]]
tokens = [2, 3, 5]
out_seq, new_lod0 = sequence_erase(in_seq, lod[0], tokens)
self.attrs = {'tokens': tokens}
......@@ -49,7 +51,7 @@ class TestSequenceEraseOpInt64(OpTest):
def setUp(self):
self.op_type = "sequence_erase"
in_seq = np.random.randint(0, 10, (30, 1)).astype("int64")
lod = [[0, 9, 13, 24, 30]]
lod = [[9, 4, 11, 6]]
tokens = [2, 3, 5]
out_seq, new_lod0 = sequence_erase(in_seq, lod[0], tokens)
self.attrs = {'tokens': tokens}
......@@ -64,7 +66,7 @@ class TestSequenceEraseOpEmpty(OpTest):
def setUp(self):
self.op_type = "sequence_erase"
in_seq = np.random.randint(0, 10, (30, 1)).astype("int32")
lod = [[0, 9, 13, 24, 30]]
lod = [[9, 4, 11, 6]]
tokens = []
out_seq, new_lod0 = sequence_erase(in_seq, lod[0], tokens)
self.attrs = {'tokens': tokens}
......
......@@ -21,7 +21,7 @@ class TestSequenceExpand(OpTest):
def set_data(self):
x_data = np.random.uniform(0.1, 1, [3, 1]).astype('float32')
y_data = np.random.uniform(0.1, 1, [8, 1]).astype('float32')
y_lod = [[0, 1, 4, 8]]
y_lod = [[1, 3, 4]]
self.inputs = {'X': x_data, 'Y': (y_data, y_lod)}
def compute(self):
......@@ -37,23 +37,27 @@ class TestSequenceExpand(OpTest):
out = np.zeros(shape=((0, ) + x_data.shape[1:]), dtype=x_data.dtype)
if x_lod is None:
x_idx = [i for i in xrange(x_data.shape[0] + 1)]
# x_idx = [i for i in xrange(x_data.shape[0] + 1)]
x_idx = [1] * x_data.shape[0]
else:
x_idx = x_lod[0]
out_lod = [[0]]
out_lod = [[]]
offset = 0
for i in xrange(len(y_lod[ref_level])):
repeat_num = y_lod[ref_level][i]
x_len = x_idx[i]
for i in xrange(1, len(y_lod[ref_level])):
repeat_num = y_lod[ref_level][i] - y_lod[ref_level][i - 1]
x_len = x_idx[i] - x_idx[i - 1]
if repeat_num > 0:
x_sub = x_data[x_idx[i - 1]:x_idx[i], :]
x_sub = x_data[offset:(offset + x_len), :]
stacked_x_sub = x_sub
for r in range(repeat_num - 1):
stacked_x_sub = np.vstack((stacked_x_sub, x_sub))
out = np.vstack((out, stacked_x_sub))
if x_lod is not None:
for j in xrange(repeat_num):
out_lod[0].append(out_lod[0][-1] + x_len)
out_lod[0].append(x_len)
offset += x_len
if x_lod is None:
self.outputs = {'Out': out}
......@@ -75,9 +79,9 @@ class TestSequenceExpand(OpTest):
class TestSequenceExpandCase1(TestSequenceExpand):
def set_data(self):
x_data = np.random.uniform(0.1, 1, [5, 1]).astype('float32')
x_lod = [[0, 2, 5]]
x_lod = [[2, 3]]
y_data = np.random.uniform(0.1, 1, [13, 1]).astype('float32')
y_lod = [[0, 2, 5], [0, 2, 4, 7, 10, 13]]
y_lod = [[2, 3], [2, 2, 3, 3, 3]]
self.inputs = {'X': x_data, 'Y': (y_data, y_lod)}
self.attrs = {'ref_level': 0}
......@@ -85,9 +89,9 @@ class TestSequenceExpandCase1(TestSequenceExpand):
class TestSequenceExpandCase2(TestSequenceExpand):
def set_data(self):
x_data = np.random.uniform(0.1, 1, [1, 2, 2]).astype('float32')
x_lod = [[0, 1]]
x_lod = [[1]]
y_data = np.random.uniform(0.1, 1, [2, 2, 2]).astype('float32')
y_lod = [[0, 2], [0, 2]]
y_lod = [[2], [1, 1]]
self.inputs = {'X': (x_data, x_lod), 'Y': (y_data, y_lod)}
self.attrs = {'ref_level': 0}
......@@ -95,9 +99,9 @@ class TestSequenceExpandCase2(TestSequenceExpand):
class TestSequenceExpandCase3(TestSequenceExpand):
def set_data(self):
x_data = np.random.uniform(0.1, 1, [4, 1]).astype('float32')
x_lod = [[0, 1, 2, 3, 4]]
y_data = np.random.uniform(0.1, 1, [6, 1]).astype('float32')
y_lod = [[0, 2, 4, 4, 6]]
x_lod = [[1, 1, 1, 1]]
y_data = np.random.uniform(0.1, 1, [8, 1]).astype('float32')
y_lod = [[2, 2, 2, 2]]
self.inputs = {'X': (x_data, x_lod), 'Y': (y_data, y_lod)}
......@@ -105,9 +109,9 @@ class TestSequenceExpandCase4(TestSequenceExpand):
def set_data(self):
data = np.random.uniform(0.1, 1, [5 * 2, 1])
x_data = np.array(data).reshape([5, 2]).astype('float32')
x_lod = [[0, 2, 5]]
y_data = np.random.uniform(0.1, 1, [3, 1]).astype('float32')
y_lod = [[0, 1, 3], [0, 1, 3]]
x_lod = [[2, 3]]
y_data = np.random.uniform(0.1, 1, [5, 1]).astype('float32')
y_lod = [[2], [2, 3]]
self.inputs = {'X': (x_data, x_lod), 'Y': (y_data, y_lod)}
......
......@@ -22,7 +22,7 @@ class TestSequenceReshape(OpTest):
def setUp(self):
self.op_type = 'sequence_reshape'
dimension = 12
x_lod = [[0, 4, 5, 8, 11]]
x_lod = [[4, 1, 3, 3]]
x = np.random.uniform(0.1, 1, [11, 24]).astype('float32')
self.inputs = {'X': (x, x_lod)}
......@@ -34,13 +34,13 @@ class TestSequenceReshape(OpTest):
def compute_output(self, x, x_lod, dimension):
x_width = x.shape[1]
out_lod = [[0]]
for i in xrange(len(x_lod[0]) - 1):
seq_len = x_lod[0][i + 1] - x_lod[0][i]
out_lod = [[]]
for i in xrange(len(x_lod[0])):
seq_len = x_lod[0][i]
offset = (seq_len * x_width) / dimension
assert int(offset) * dimension == seq_len * x_width
out_lod[0].append(out_lod[0][-1] + int(offset))
out = np.zeros(shape=(out_lod[0][-1], dimension)).astype('float32')
out_lod[0].append(int(offset))
out = np.zeros(shape=(sum(out_lod[0]), dimension)).astype('float32')
out.ravel()[:] = x.ravel()[:]
return out, out_lod
......@@ -55,7 +55,7 @@ class TestSequenceReshape_reduce(TestSequenceReshape):
def setUp(self):
self.op_type = 'sequence_reshape'
dimension = 24
x_lod = [[0, 4, 6, 8, 12]]
x_lod = [[4, 2, 2, 4]]
x = np.random.uniform(0.1, 1, [12, 12]).astype('float32')
self.inputs = {'X': (x, x_lod)}
......@@ -70,7 +70,7 @@ class TestSequenceReshape_same(TestSequenceReshape):
def setUp(self):
self.op_type = 'sequence_reshape'
dimension = 12
x_lod = [[0, 4, 6, 8, 12]]
x_lod = [[4, 2, 2, 4]]
x = np.random.uniform(0.1, 1, [12, 12]).astype('float32')
self.inputs = {'X': (x, x_lod)}
......
......@@ -29,20 +29,20 @@ class TestSequenceSliceOp(OpTest):
self.inputs = {'X': (x, lod), 'Offset': offset, 'Length': length}
outs = [] #np.zeros((100, 3, 2)).astype('float32')
out_lod = [[0]]
out_lod_offset = 0
out_lod = [[]]
lod_offset = 0
for i in range(len(offset)):
sub_x = x[lod[0][i] + offset[i, 0]:lod[0][i] + offset[i, 0] +
sub_x = x[lod_offset + offset[i, 0]:lod_offset + offset[i, 0] +
length[i, 0], :]
out_lod_offset = out_lod_offset + len(sub_x)
outs.append(sub_x)
out_lod[0].append(out_lod_offset)
out_lod[0].append(len(sub_x))
lod_offset += lod[0][i]
outs = np.concatenate(outs, axis=0)
self.outputs = {'Out': (outs, out_lod)}
def init_test_case(self):
self.x_dim = (100, 3, 2)
self.x_lod = [[0, 20, 40, 60, 80, 100]]
self.x_lod = [[20, 20, 20, 20, 20]]
self.offset = [[1], [2], [3], [4], [5]]
self.length = [[10], [8], [6], [4], [2]]
......
......@@ -26,15 +26,16 @@ class TestSequenceSoftmaxOp(OpTest):
self.init_op_type()
x = np.random.uniform(0.1, 1, (11, 1)).astype("float32")
lod = [[0, 4, 5, 8, 11]]
lod = [[4, 1, 3, 3]]
out = np.zeros((11, 1)).astype("float32")
for i in range(4):
sub_x = x[lod[0][i]:lod[0][i + 1], :]
sub_x = sub_x.reshape(1, lod[0][i + 1] - lod[0][i])
offset = 0
for i in range(len(lod[0])):
sub_x = x[offset:offset + lod[0][i], :]
sub_x = sub_x.reshape(1, lod[0][i])
sub_out = stable_softmax(sub_x)
out[lod[0][i]:lod[0][i + 1], :] = sub_out.reshape(
lod[0][i + 1] - lod[0][i], 1)
out[offset:offset + lod[0][i], :] = sub_out.reshape(lod[0][i], 1)
offset += lod[0][i]
self.inputs = {"X": (x, lod)}
self.outputs = {"Out": out}
......
......@@ -54,12 +54,12 @@ class TestShrinkRNNMemoryReferLoD(TestShrinkRNNMemoryBase):
def test_refer_lod(self):
cpu = core.CPUPlace()
x_tensor = core.LoDTensor()
x_tensor.set_lod([[0, 2, 5, 6]])
x_tensor.set_recursive_sequence_lengths([[2, 3, 1]])
tensor_np = np.random.random(size=(6, 100)).astype('float32')
x_tensor.set(tensor_np, cpu)
rank_table_tensor = core.LoDTensor()
rank_table_tensor.set_lod([[0, 1, 3, 6]])
rank_table_tensor.set_recursive_sequence_lengths([[1, 2, 3]])
rank_table_tensor.set(np.random.random(size=(6, 1)).astype('float32'),
cpu)
......@@ -83,7 +83,7 @@ class TestShrinkRNNMemoryNoLoD(TestShrinkRNNMemoryBase):
x_tensor.set(tensor_np, cpu)
rank_table_tensor = core.LoDTensor()
rank_table_tensor.set_lod([[0, 1, 3, 6]])
rank_table_tensor.set_recursive_sequence_lengths([[1, 2, 3]])
rank_table_tensor.set(np.random.random(size=(6, 1)).astype('float32'),
cpu)
......
......@@ -56,7 +56,7 @@ class TestCPULoDTensorArrayOps(unittest.TestCase):
def test_split_and_merge_lod_tensor_level_0(self):
tensor = core.LoDTensor()
tensor.set(np.arange(10).reshape(10, 1).astype('int32'), self.place())
tensor.set_lod([[0, 3, 9, 10]])
tensor.set_recursive_sequence_lengths([[3, 6, 1]])
mask_np = np.array([0, 1, 0]).astype('bool')
mask_np = np.expand_dims(mask_np, axis=1)
......@@ -68,15 +68,15 @@ class TestCPULoDTensorArrayOps(unittest.TestCase):
expect_true_tensor = np.expand_dims(expect_true_tensor, axis=1)
expect_true = core.LoDTensor()
expect_true.set(expect_true_tensor, self.place())
expect_true.set_lod([[0, 6]])
expect_true.set_recursive_sequence_lengths([[6]])
expect_false_tensor = np.array([0, 1, 2, 9]).astype('int32')
expect_false_tensor = np.expand_dims(expect_false_tensor, axis=1)
expect_false_lod = [[0, 3, 4]]
expect_false_lod = [[3, 1]]
expect_false = core.LoDTensor()
expect_false.set(expect_false_tensor, self.place())
expect_false.set_lod(expect_false_lod)
expect_false.set_recursive_sequence_lengths(expect_false_lod)
self.main(
tensor=tensor,
......@@ -126,7 +126,8 @@ class TestCPULoDTensorArrayOps(unittest.TestCase):
def check_tensor_same(self, actual, expect):
self.assertTrue(np.allclose(np.array(actual), np.array(expect)))
self.assertEqual(actual.lod(), expect.lod())
self.assertEqual(actual.recursive_sequence_lengths(),
expect.recursive_sequence_lengths())
class TestCPUSplitMergeLoDTensorGrad(unittest.TestCase):
......@@ -151,7 +152,7 @@ class TestCPUSplitMergeLoDTensorGrad(unittest.TestCase):
tensor = core.LoDTensor()
tensor.set(np.arange(10).reshape(10, 1).astype('float32'), place)
tensor.set_lod([[0, 3, 9, 10]])
tensor.set_recursive_sequence_lengths([[3, 6, 1]])
mask_np = np.array([0, 1, 0]).astype('bool')
mask_np = np.expand_dims(mask_np, axis=1)
......
......@@ -22,22 +22,23 @@ def gen_match_and_neg_indices(num_prior, gt_lod, neg_lod):
if len(gt_lod) != len(neg_lod):
raise AssertionError("The input arguments are illegal.")
batch_size = len(gt_lod) - 1
batch_size = len(gt_lod)
match_indices = -1 * np.ones((batch_size, num_prior)).astype('int32')
neg_indices = np.zeros((neg_lod[-1], 1)).astype('int32')
neg_indices = np.zeros((sum(neg_lod), 1)).astype('int32')
offset = 0
for n in range(batch_size):
gt_num = gt_lod[n + 1] - gt_lod[n]
gt_num = gt_lod[n]
ids = random.sample([i for i in range(num_prior)], gt_num)
match_indices[n, ids] = [i for i in range(gt_num)]
ret_ids = set([i for i in range(num_prior)]) - set(ids)
s = neg_lod[n]
e = neg_lod[n + 1]
l = e - s
l = neg_lod[n]
neg_ids = random.sample(ret_ids, l)
neg_indices[s:e, :] = np.array(neg_ids).astype('int32').reshape(l, 1)
neg_indices[offset:offset + neg_lod[n], :] = np.array(neg_ids).astype(
'int32').reshape(l, 1)
offset += neg_lod[n]
return match_indices, neg_indices
......@@ -56,24 +57,28 @@ def target_assign(encoded_box, gt_label, match_indices, neg_indices, gt_lod,
# init weight for target label
trg_label_wt = np.zeros((batch_size, num_prior, 1)).astype('float32')
gt_offset = 0
neg_offset = 0
for i in range(batch_size):
cur_indices = match_indices[i]
col_ids = np.where(cur_indices > -1)
col_val = cur_indices[col_ids]
gt_start = gt_lod[i]
# target bbox
for v, c in zip(col_val + gt_start, col_ids[0].tolist()):
for v, c in zip(col_val + gt_offset, col_ids[0].tolist()):
trg_box[i][c][:] = encoded_box[v][c][:]
# weight for target bbox
trg_box_wt[i][col_ids] = 1.0
trg_label[i][col_ids] = gt_label[col_val + gt_start]
trg_label[i][col_ids] = gt_label[col_val + gt_offset]
trg_label_wt[i][col_ids] = 1.0
# set target label weight to 1.0 for the negative samples
if neg_indices is not None:
neg_ids = neg_indices[neg_lod[i]:neg_lod[i + 1]]
neg_ids = neg_indices[neg_offset:neg_offset + neg_lod[i]]
trg_label_wt[i][neg_ids] = 1.0
# update offset
gt_offset += gt_lod[i]
neg_offset += neg_lod[i]
return trg_box, trg_box_wt, trg_label, trg_label_wt
......@@ -83,11 +88,11 @@ class TestTargetAssginFloatType(OpTest):
self.op_type = "target_assign"
num_prior = 120
num_class = 21
gt_lod = [0, 5, 11, 23]
neg_lod = [0, 4, 7, 13]
gt_lod = [5, 6, 12]
neg_lod = [4, 3, 6]
mismatch_value = 0
batch_size = len(gt_lod) - 1
num_gt = gt_lod[-1]
batch_size = len(gt_lod)
num_gt = sum(gt_lod)
encoded_box = np.random.random((num_gt, num_prior, 4)).astype('float32')
gt_label = np.random.randint(
......@@ -121,11 +126,11 @@ class TestTargetAssginIntType(OpTest):
self.op_type = "target_assign"
num_prior = 120
num_class = 21
gt_lod = [0, 5, 11, 23]
neg_lod = [0, 4, 7, 13]
gt_lod = [5, 6, 12]
neg_lod = [4, 3, 6]
mismatch_value = 0
batch_size = len(gt_lod) - 1
num_gt = gt_lod[-1]
batch_size = len(gt_lod)
num_gt = sum(gt_lod)
encoded_box = np.random.random((num_gt, num_prior, 4)).astype('float32')
gt_label = np.random.randint(
......
......@@ -69,15 +69,14 @@ class TestTensor(unittest.TestCase):
array[0, 0, 0] = 3
array[3, 3, 5] = 10
lod_tensor.set(array, place)
lod_tensor.set_lod([[0, 2, 4]])
lod_tensor.set_recursive_sequence_lengths([[2, 2]])
lod_v = numpy.array(lod_tensor)
self.assertTrue(numpy.alltrue(array == lod_v))
lod = lod_tensor.lod()
self.assertEqual(0, lod[0][0])
lod = lod_tensor.recursive_sequence_lengths()
self.assertEqual(2, lod[0][0])
self.assertEqual(2, lod[0][1])
self.assertEqual(4, lod[0][2])
def test_float_lod_tensor(self):
place = core.CPUPlace()
......@@ -97,21 +96,21 @@ class TestTensor(unittest.TestCase):
lod_v = numpy.array(lod_tensor)
self.assertAlmostEqual(1.0, lod_v[0, 0, 0, 0])
self.assertAlmostEqual(2.0, lod_v[0, 0, 0, 1])
self.assertEqual(len(lod_tensor.lod()), 0)
self.assertEqual(len(lod_tensor.recursive_sequence_lengths()), 0)
lod_py = [[0, 2, 5], [0, 2, 4, 5]]
lod_tensor.set_lod(lod_py)
lod = lod_tensor.lod()
lod_py = [[2, 1], [1, 2, 2]]
lod_tensor.set_recursive_sequence_lengths(lod_py)
lod = lod_tensor.recursive_sequence_lengths()
self.assertListEqual(lod_py, lod)
def test_lod_tensor_init(self):
scope = core.Scope()
place = core.CPUPlace()
lod_py = [[0, 2, 5], [0, 2, 4, 5]]
lod_py = [[2, 1], [1, 2, 2]]
lod_tensor = core.LoDTensor()
lod_tensor.set_dims([5, 2, 3, 4])
lod_tensor.set_lod(lod_py)
lod_tensor.set_recursive_sequence_lengths(lod_py)
lod_tensor.alloc_float(place)
tensor_array = numpy.array(lod_tensor)
tensor_array[0, 0, 0, 0] = 1.0
......@@ -121,17 +120,17 @@ class TestTensor(unittest.TestCase):
lod_v = numpy.array(lod_tensor)
self.assertAlmostEqual(1.0, lod_v[0, 0, 0, 0])
self.assertAlmostEqual(2.0, lod_v[0, 0, 0, 1])
self.assertListEqual(lod_py, lod_tensor.lod())
self.assertListEqual(lod_py, lod_tensor.recursive_sequence_lengths())
def test_lod_tensor_gpu_init(self):
if not core.is_compiled_with_cuda():
return
place = core.CUDAPlace(0)
lod_py = [[0, 2, 5], [0, 2, 4, 5]]
lod_py = [[2, 1], [1, 2, 2]]
lod_tensor = core.LoDTensor()
lod_tensor.set_dims([5, 2, 3, 4])
lod_tensor.set_lod(lod_py)
lod_tensor.set_recursive_sequence_lengths(lod_py)
lod_tensor.alloc_float(place)
tensor_array = numpy.array(lod_tensor)
tensor_array[0, 0, 0, 0] = 1.0
......@@ -141,7 +140,7 @@ class TestTensor(unittest.TestCase):
lod_v = numpy.array(lod_tensor)
self.assertAlmostEqual(1.0, lod_v[0, 0, 0, 0])
self.assertAlmostEqual(2.0, lod_v[0, 0, 0, 1])
self.assertListEqual(lod_py, lod_tensor.lod())
self.assertListEqual(lod_py, lod_tensor.recursive_sequence_lengths())
def test_empty_tensor(self):
place = core.CPUPlace()
......
......@@ -34,8 +34,8 @@ class CTCForward(object):
self.level = 0
self.num_classes = softmax.shape[1]
self.batch_size = len(softmax_lod[self.level]) - 1
assert self.batch_size == len(labels_lod[self.level]) - 1
self.batch_size = len(softmax_lod[self.level])
assert self.batch_size == len(labels_lod[self.level])
self.loss = np.zeros([self.batch_size, 1], dtype="float32")
self.gradient = np.zeros(self.softmax.shape, dtype="float32")
......@@ -156,16 +156,20 @@ class CTCForward(object):
return -log_prob
def forward(self):
softmax_offset = 0
labels_offset = 0
for i in range(self.batch_size):
softmax_start_i = self.softmax_lod[self.level][i]
softmax_end_i = self.softmax_lod[self.level][i + 1]
labels_start_i = self.labels_lod[self.level][i]
labels_end_i = self.labels_lod[self.level][i + 1]
softmax_start_i = softmax_offset
softmax_end_i = softmax_offset + self.softmax_lod[self.level][i]
labels_start_i = labels_offset
labels_end_i = labels_offset + self.labels_lod[self.level][i]
softmax_a_sequence = self.softmax[softmax_start_i:softmax_end_i, :]
labels_a_sequence = self.labels[labels_start_i:labels_end_i, :]
self.loss[i] = self.forward_a_sequence(softmax_a_sequence,
labels_a_sequence)
softmax_offset += self.softmax_lod[self.level][i]
labels_offset += self.labels_lod[self.level][i]
return self.loss
......@@ -173,8 +177,8 @@ class TestWarpCTCOp(OpTest):
def config(self):
self.batch_size = 4
self.num_classes = 8
self.logits_lod = [[0, 4, 5, 8, 11]]
self.labels_lod = [[0, 3, 4, 8, 12]]
self.logits_lod = [[4, 1, 3, 3]]
self.labels_lod = [[3, 1, 4, 4]]
self.blank = self.num_classes - 1
self.norm_by_times = False
......@@ -184,11 +188,13 @@ class TestWarpCTCOp(OpTest):
logits = np.random.uniform(
0.1, 1.0,
[self.logits_lod[0][-1], self.num_classes]).astype("float32")
[sum(self.logits_lod[0]), self.num_classes]).astype("float32")
softmax = np.apply_along_axis(stable_softmax, 1, logits)
# labels should not be blank
labels = np.random.randint(
0, self.num_classes - 1, [self.labels_lod[0][-1], 1], dtype="int32")
0,
self.num_classes - 1, [sum(self.labels_lod[0]), 1],
dtype="int32")
ctc = CTCForward(softmax, self.logits_lod, labels, self.labels_lod,
self.blank, self.norm_by_times)
......@@ -196,9 +202,8 @@ class TestWarpCTCOp(OpTest):
max_sequence_length = 0
for i in range(self.batch_size):
max_sequence_length = max(
max_sequence_length,
self.logits_lod[0][i + 1] - self.logits_lod[0][i])
max_sequence_length = max(max_sequence_length,
self.logits_lod[0][i])
self.gradient = np.zeros(
[max_sequence_length, self.batch_size, self.num_classes],
dtype="float32")
......@@ -222,8 +227,8 @@ class TestWarpCTCOpCase1(TestWarpCTCOp):
def config(self):
self.batch_size = 4
self.num_classes = CUDA_BLOCK_SIZE + 2
self.logits_lod = [[0, 4, 5, 8, 11]]
self.labels_lod = [[0, 3, 4, 8, 12]]
self.logits_lod = [[4, 1, 3, 3]]
self.labels_lod = [[3, 1, 4, 4]]
self.blank = 0
self.norm_by_times = False
......
......@@ -76,11 +76,11 @@ class TestWeightNormalization(unittest.TestCase):
lod_level_i = numpy.random.randint(
low=1,
high=5,
size=self.batch_size if i == 0 else lod_level_i[-1])
lod_level_i = [0] + numpy.cumsum(lod_level_i).tolist()
size=self.batch_size
if i == 0 else sum(lod_level_i)).tolist()
data_lod.append(lod_level_i)
data_value = numpy.random.random(
size=[data_lod[-1][-1] if data_lod else self.batch_size
size=[sum(data_lod[-1]) if data_lod else self.batch_size
] + data_shape).astype('float32')
self.data[data_name] = (data_value, data_lod)
......@@ -90,7 +90,7 @@ class TestWeightNormalization(unittest.TestCase):
tensor = fluid.Tensor()
tensor.set(self.data[desc[0]][0], place)
if self.data[desc[0]][1]:
tensor.set_lod(self.data[desc[0]][1])
tensor.set_recursive_sequence_lengths(self.data[desc[0]][1])
self.inputs[desc[0]] = tensor
def weight_normalize(self):
......
......@@ -22,7 +22,7 @@ def as_lodtensor(np_array, lod, place):
tensor = core.LoDTensor()
tensor.set(np_value, place)
if lod is not None:
tensor.set_lod(lod)
tensor.set_recursive_sequence_lengths(lod)
return tensor
......@@ -73,7 +73,7 @@ def set_input(scope, op, inputs, place):
if isinstance(var, tuple) or isinstance(var, np.ndarray):
tensor = scope.find_var(var_name).get_tensor()
if isinstance(var, tuple):
tensor.set_lod(var[1])
tensor.set_recursive_sequence_lengths(var[1])
var = var[0]
tensor.set_dims(var.shape)
tensor.set(var, place)
......
......@@ -157,9 +157,11 @@ class ControlFlowGraph(object):
if op.type() == "fill_constant" and op.attr("force_cpu") == True:
self._skip_opt.update(op.output_arg_names())
def release_memory(self):
def release_memory(self, skip_opt_set=None):
self._dataflow_analyze()
self._update_skip_opt_set()
if skip_opt_set:
self._skip_opt.update(skip_opt_set)
fwd_id = 0
bwd_id = 0
for i in range(self.op_size):
......@@ -183,7 +185,7 @@ class ControlFlowGraph(object):
else:
bwd_id += 1
def memory_optimize(self, level=0):
def memory_optimize(self, skip_opt_set=None, level=0):
def compare_shape(x_shape, cache_shape, opt_level):
if opt_level == 0:
return x_shape == cache_shape
......@@ -200,6 +202,9 @@ class ControlFlowGraph(object):
self._dataflow_analyze()
self._update_skip_opt_set()
# update skip set to meet users' demand
if skip_opt_set:
self._skip_opt.update(skip_opt_set)
self.pool = []
for i in range(self.op_size):
op = self._ops[i]
......@@ -358,7 +363,7 @@ def _get_cfgs(input_program):
return cfgs
def memory_optimize(input_program, print_log=False, level=0):
def memory_optimize(input_program, skip_opt_set=None, print_log=False, level=0):
"""Optimize memory by reusing var memory.
Note: it doesn't not support subblock nested in subblock.
......@@ -374,10 +379,10 @@ def memory_optimize(input_program, print_log=False, level=0):
PRINT_LOG = print_log
cfgs = _get_cfgs(input_program)
for cfg in cfgs:
cfg.memory_optimize(level)
cfg.memory_optimize(skip_opt_set=skip_opt_set, level=level)
def release_memory(input_program):
def release_memory(input_program, skip_opt_set=None):
cfgs = _get_cfgs(input_program)
for cfg in cfgs:
cfg.release_memory()
cfg.release_memory(skip_opt_set=skip_opt_set)
......@@ -7,7 +7,7 @@ for file in $(git diff --cached --name-status | awk '$1 != "D" {print $2}'); do
if [[ $file =~ ^(paddle/api/.*|paddle/capi/.*|paddle/contrib/.*|paddle/cuda/.*|paddle/function/.*|paddle/gserver/.*|paddle/math/.*|paddle/optimizer/.*|paddle/parameter/.*|paddle/pserver/.*|paddle/trainer/.*|paddle/utils/.*) ]]; then
continue;
else
cpplint $file;
cpplint --filter=-readability/fn_size $file;
TOTAL_ERRORS=$(expr $TOTAL_ERRORS + $?);
fi
done
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册