提交 bb10c370 编写于 作者: T tangwei12

merge

...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
| jczaja | Jacek Czaja | | jczaja | Jacek Czaja |
| JiayiFeng | Jia-Yi Feng | | JiayiFeng | Jia-Yi Feng |
| kbinias | Krzysztof Binias | | kbinias | Krzysztof Binias |
| kexinzhao | Ke-Xin Zhao |
| kuke | Yi-Bing Liu | | kuke | Yi-Bing Liu |
| lcy-seso | Ying Cao | | lcy-seso | Ying Cao |
| lipeng-unisound | Peng Li | | lipeng-unisound | Peng Li |
......
...@@ -61,6 +61,7 @@ option(EIGEN_USE_THREADS "Compile with multi-threaded Eigen" OFF) ...@@ -61,6 +61,7 @@ option(EIGEN_USE_THREADS "Compile with multi-threaded Eigen" OFF)
option(WITH_ARM_FP16 "Use half precision support on armv8.2-a cpu" OFF) option(WITH_ARM_FP16 "Use half precision support on armv8.2-a cpu" OFF)
option(WITH_FAST_BUNDLE_TEST "Bundle tests that can be run in a single process together to reduce launch overhead" OFF) option(WITH_FAST_BUNDLE_TEST "Bundle tests that can be run in a single process together to reduce launch overhead" OFF)
option(WITH_CONTRIB "Compile the third-party contributation" OFF) option(WITH_CONTRIB "Compile the third-party contributation" OFF)
option(WITH_ANAKIN "Compile with Anakin library" OFF)
option(WITH_GRPC "Use grpc as the default rpc framework" ${WITH_DISTRIBUTE}) option(WITH_GRPC "Use grpc as the default rpc framework" ${WITH_DISTRIBUTE})
# CMAKE_BUILD_TYPE # CMAKE_BUILD_TYPE
...@@ -193,7 +194,10 @@ set(EXTERNAL_LIBS ...@@ -193,7 +194,10 @@ set(EXTERNAL_LIBS
if(WITH_GPU) if(WITH_GPU)
include(cuda) include(cuda)
include(tensorrt) include(tensorrt)
endif(WITH_GPU) include(external/anakin)
else()
set(WITH_ANAKIN OFF CACHE STRING "Anakin is valid only when GPU is set." FORCE)
endif()
if(WITH_AMD_GPU) if(WITH_AMD_GPU)
find_package(HIP) find_package(HIP)
......
...@@ -180,7 +180,7 @@ def train(avg_loss, infer_prog, optimizer, train_reader, test_reader, batch_acc, ...@@ -180,7 +180,7 @@ def train(avg_loss, infer_prog, optimizer, train_reader, test_reader, batch_acc,
print_train_time(start_time, time.time(), num_samples) print_train_time(start_time, time.time(), num_samples)
print("Pass: %d, Loss: %f" % (pass_id, np.mean(train_losses))), print("Pass: %d, Loss: %f" % (pass_id, np.mean(train_losses))),
# evaluation # evaluation
if not args.no_test and batch_acc: if not args.no_test and batch_acc and not args.use_reader_op:
pass_test_acc = test(exe, infer_prog, test_reader, feeder, pass_test_acc = test(exe, infer_prog, test_reader, feeder,
batch_acc) batch_acc)
print(", Test Accuracy: %f" % pass_test_acc) print(", Test Accuracy: %f" % pass_test_acc)
...@@ -277,11 +277,12 @@ def train_parallel(avg_loss, infer_prog, optimizer, train_reader, test_reader, ...@@ -277,11 +277,12 @@ def train_parallel(avg_loss, infer_prog, optimizer, train_reader, test_reader,
batch_id += 1 batch_id += 1
print_train_time(start_time, time.time(), num_samples) print_train_time(start_time, time.time(), num_samples)
if not args.no_test and batch_acc: if not args.no_test and batch_acc and not args.use_reader_op:
# we have not implement record io for test
# skip test when use args.use_reader_op
test_acc = test(startup_exe, infer_prog, test_reader, feeder, test_acc = test(startup_exe, infer_prog, test_reader, feeder,
batch_acc) batch_acc)
print("Pass: %d, Test Accuracy: %f\n" % (pass_id, test_acc)) print("Pass: %d, Test Accuracy: %f\n" % (pass_id, test_acc))
exit(0)
def print_arguments(args): def print_arguments(args):
......
...@@ -173,21 +173,6 @@ def seq_to_seq_net(embedding_dim, encoder_size, decoder_size, source_dict_dim, ...@@ -173,21 +173,6 @@ def seq_to_seq_net(embedding_dim, encoder_size, decoder_size, source_dict_dim,
return avg_cost, feeding_list return avg_cost, feeding_list
def to_lodtensor(data, place):
seq_lens = [len(seq) for seq in data]
cur_len = 0
lod = [cur_len]
for l in seq_lens:
cur_len += l
lod.append(cur_len)
flattened_data = np.concatenate(data, axis=0).astype("int64")
flattened_data = flattened_data.reshape([len(flattened_data), 1])
lod_t = core.LoDTensor()
lod_t.set(flattened_data, place)
lod_t.set_lod([lod])
return lod_t, lod[-1]
def lodtensor_to_ndarray(lod_tensor): def lodtensor_to_ndarray(lod_tensor):
dims = lod_tensor.get_dims() dims = lod_tensor.get_dims()
ndarray = np.zeros(shape=dims).astype('float32') ndarray = np.zeros(shape=dims).astype('float32')
......
...@@ -199,7 +199,10 @@ def get_model(args): ...@@ -199,7 +199,10 @@ def get_model(args):
batched_train_reader = paddle.batch( batched_train_reader = paddle.batch(
paddle.reader.shuffle( paddle.reader.shuffle(
train_reader, buf_size=5120), train_reader, buf_size=5120),
batch_size=args.batch_size * args.gpus) batch_size=args.batch_size * args.gpus,
batched_test_reader = paddle.batch(train_reader, batch_size=args.batch_size) drop_last=True)
batched_test_reader = paddle.batch(
train_reader, batch_size=args.batch_size, drop_last=True)
return avg_cost, inference_program, optimizer, batched_train_reader, batched_test_reader, batch_acc return avg_cost, inference_program, optimizer, batched_train_reader,\
batched_test_reader, batch_acc
...@@ -125,18 +125,3 @@ def get_model(args): ...@@ -125,18 +125,3 @@ def get_model(args):
batch_size=args.batch_size) batch_size=args.batch_size)
return loss, inference_program, adam, train_reader, test_reader, batch_acc return loss, inference_program, adam, train_reader, test_reader, batch_acc
def to_lodtensor(data, place):
seq_lens = [len(seq) for seq in data]
cur_len = 0
lod = [cur_len]
for l in seq_lens:
cur_len += l
lod.append(cur_len)
flattened_data = numpy.concatenate(data, axis=0).astype("int64")
flattened_data = flattened_data.reshape([len(flattened_data), 1])
res = fluid.LoDTensor()
res.set(flattened_data, place)
res.set_lod([lod])
return res
if (NOT WITH_ANAKIN)
return()
endif()
set(ANAKIN_INSTALL_DIR "${THIRD_PARTY_PATH}/install/anakin" CACHE PATH
"Anakin install path." FORCE)
set(ANAKIN_INCLUDE "${ANAKIN_INSTALL_DIR}" CACHE STRING "root of Anakin header files")
set(ANAKIN_LIBRARY "${ANAKIN_INSTALL_DIR}" CACHE STRING "path of Anakin library")
set(ANAKIN_COMPILE_EXTRA_FLAGS -Wno-error=unused-variable -Wno-error=format-extra-args -Wno-error=comment -Wno-error=format -Wno-error=switch -Wno-error=return-type -Wno-error=non-virtual-dtor -Wno-reorder -Wno-error=cpp)
set(ANAKIN_LIBRARY_URL "https://github.com/pangge/Anakin/releases/download/3.0/anakin_release_simple.tar.gz")
# A helper function used in Anakin, currently, to use it, one need to recursively include
# nearly all the header files.
function(fetch_include_recursively root_dir)
if (IS_DIRECTORY ${root_dir})
include_directories(${root_dir})
endif()
file(GLOB ALL_SUB RELATIVE ${root_dir} ${root_dir}/*)
foreach(sub ${ALL_SUB})
if (IS_DIRECTORY ${root_dir}/${sub})
fetch_include_recursively(${root_dir}/${sub})
endif()
endforeach()
endfunction()
# download library
message(STATUS "Download Anakin library from ${ANAKIN_LIBRARY_URL}")
execute_process(COMMAND bash -c "mkdir -p ${ANAKIN_INSTALL_DIR}")
execute_process(COMMAND bash -c "rm -rf ${ANAKIN_INSTALL_DIR}/*")
execute_process(COMMAND bash -c "cd ${ANAKIN_INSTALL_DIR}; wget -q ${ANAKIN_LIBRARY_URL}")
execute_process(COMMAND bash -c "mkdir -p ${ANAKIN_INSTALL_DIR}")
execute_process(COMMAND bash -c "cd ${ANAKIN_INSTALL_DIR}; tar xzf anakin_release_simple.tar.gz")
if (WITH_ANAKIN)
message(STATUS "Anakin for inference is enabled")
message(STATUS "Anakin is set INCLUDE:${ANAKIN_INCLUDE} LIBRARY:${ANAKIN_LIBRARY}")
fetch_include_recursively(${ANAKIN_INCLUDE})
link_directories(${ANAKIN_LIBRARY})
endif()
...@@ -29,6 +29,8 @@ IF(NOT ${CBLAS_FOUND}) ...@@ -29,6 +29,8 @@ IF(NOT ${CBLAS_FOUND})
"${CBLAS_INSTALL_DIR}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}openblas${CMAKE_STATIC_LIBRARY_SUFFIX}" "${CBLAS_INSTALL_DIR}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}openblas${CMAKE_STATIC_LIBRARY_SUFFIX}"
CACHE FILEPATH "openblas library." FORCE) CACHE FILEPATH "openblas library." FORCE)
ADD_DEFINITIONS(-DPADDLE_USE_OPENBLAS)
SET(OPENBLAS_CC "${CMAKE_C_COMPILER} -Wno-unused-but-set-variable -Wno-unused-variable") SET(OPENBLAS_CC "${CMAKE_C_COMPILER} -Wno-unused-but-set-variable -Wno-unused-variable")
SET(OPENBLAS_COMMIT "v0.2.20") SET(OPENBLAS_COMMIT "v0.2.20")
......
...@@ -39,7 +39,7 @@ function(copy TARGET) ...@@ -39,7 +39,7 @@ function(copy TARGET)
message(FATAL_ERROR "${TARGET} source numbers are not equal to destination numbers") message(FATAL_ERROR "${TARGET} source numbers are not equal to destination numbers")
endif() endif()
math(EXPR len "${copy_lib_SRCS_len} - 1") math(EXPR len "${copy_lib_SRCS_len} - 1")
add_custom_target(${TARGET} DEPENDS ${copy_lib_DEPS}) add_custom_target(${TARGET} DEPENDS ${copy_lib_DEPS})
foreach(index RANGE ${len}) foreach(index RANGE ${len})
list(GET copy_lib_SRCS ${index} src) list(GET copy_lib_SRCS ${index} src)
...@@ -155,6 +155,15 @@ copy(inference_lib DEPS paddle_fluid_shared paddle_fluid ...@@ -155,6 +155,15 @@ copy(inference_lib DEPS paddle_fluid_shared paddle_fluid
DSTS ${dst_dir}/${module} ${dst_dir}/${module} DSTS ${dst_dir}/${module} ${dst_dir}/${module}
) )
if(WITH_CONTRIB)
set(contrib_dst_dir "${FLUID_INSTALL_DIR}/contrib/inference")
copy(contrib_inference_lib DEPS paddle_inference_api
SRCS ${PADDLE_SOURCE_DIR}/paddle/contrib/inference/paddle_inference_api.h
${PADDLE_BINARY_DIR}/paddle/contrib/inference/libpaddle_inference_api.*
DSTS ${contrib_dst_dir} ${contrib_dst_dir}
)
endif()
set(module "platform") set(module "platform")
copy(platform_lib DEPS profiler_py_proto copy(platform_lib DEPS profiler_py_proto
SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/dynload/*.h ${src_dir}/${module}/details/*.h SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/dynload/*.h ${src_dir}/${module}/details/*.h
......
#!/bin/bash #!/bin/bash
python gen_doc.py layers --submodules control_flow device io nn ops tensor > layers.rst python gen_doc.py layers --submodules control_flow device io nn ops tensor detection learning_rate_scheduler metric > layers.rst
for module in data_feeder clip metrics executor initializer io nets optimizer param_attr profiler regularizer for module in data_feeder clip metrics executor initializer io nets optimizer param_attr profiler regularizer
do do
......
...@@ -33,6 +33,13 @@ Xavier ...@@ -33,6 +33,13 @@ Xavier
:members: :members:
:noindex: :noindex:
Bilinear
--------
.. autoclass:: paddle.fluid.initializer.Bilinear
:members:
:noindex:
force_init_on_cpu force_init_on_cpu
----------------- -----------------
...@@ -73,3 +80,10 @@ XavierInitializer ...@@ -73,3 +80,10 @@ XavierInitializer
:members: :members:
:noindex: :noindex:
BilinearInitializer
-------------------
.. autoclass:: paddle.fluid.initializer.BilinearInitializer
:members:
:noindex:
...@@ -77,3 +77,21 @@ clean_checkpoint ...@@ -77,3 +77,21 @@ clean_checkpoint
.. autofunction:: paddle.fluid.io.clean_checkpoint .. autofunction:: paddle.fluid.io.clean_checkpoint
:noindex: :noindex:
load_persist_vars_without_grad
------------------------------
.. autofunction:: paddle.fluid.io.load_persist_vars_without_grad
:noindex:
save_persist_vars_without_grad
------------------------------
.. autofunction:: paddle.fluid.io.save_persist_vars_without_grad
:noindex:
get_latest_checkpoint_serial
----------------------------
.. autofunction:: paddle.fluid.io.get_latest_checkpoint_serial
:noindex:
...@@ -225,6 +225,12 @@ Send ...@@ -225,6 +225,12 @@ Send
.. autofunction:: paddle.fluid.layers.Send .. autofunction:: paddle.fluid.layers.Send
:noindex: :noindex:
Recv
----
.. autofunction:: paddle.fluid.layers.Recv
:noindex:
open_recordio_file open_recordio_file
------------------ ------------------
...@@ -274,6 +280,12 @@ Preprocessor ...@@ -274,6 +280,12 @@ Preprocessor
:members: :members:
:noindex: :noindex:
load
----
.. autofunction:: paddle.fluid.layers.load
:noindex:
nn nn
== ==
...@@ -361,6 +373,12 @@ conv2d ...@@ -361,6 +373,12 @@ conv2d
.. autofunction:: paddle.fluid.layers.conv2d .. autofunction:: paddle.fluid.layers.conv2d
:noindex: :noindex:
conv3d
------
.. autofunction:: paddle.fluid.layers.conv3d
:noindex:
sequence_pool sequence_pool
------------- -------------
...@@ -385,6 +403,12 @@ pool2d ...@@ -385,6 +403,12 @@ pool2d
.. autofunction:: paddle.fluid.layers.pool2d .. autofunction:: paddle.fluid.layers.pool2d
:noindex: :noindex:
pool3d
------
.. autofunction:: paddle.fluid.layers.pool3d
:noindex:
batch_norm batch_norm
---------- ----------
...@@ -403,6 +427,12 @@ conv2d_transpose ...@@ -403,6 +427,12 @@ conv2d_transpose
.. autofunction:: paddle.fluid.layers.conv2d_transpose .. autofunction:: paddle.fluid.layers.conv2d_transpose
:noindex: :noindex:
conv3d_transpose
----------------
.. autofunction:: paddle.fluid.layers.conv3d_transpose
:noindex:
sequence_expand sequence_expand
--------------- ---------------
...@@ -619,6 +649,18 @@ dice_loss ...@@ -619,6 +649,18 @@ dice_loss
.. autofunction:: paddle.fluid.layers.dice_loss .. autofunction:: paddle.fluid.layers.dice_loss
:noindex: :noindex:
image_resize
------------
.. autofunction:: paddle.fluid.layers.image_resize
:noindex:
image_resize_short
------------------
.. autofunction:: paddle.fluid.layers.image_resize_short
:noindex:
resize_bilinear resize_bilinear
--------------- ---------------
...@@ -637,6 +679,12 @@ random_crop ...@@ -637,6 +679,12 @@ random_crop
.. autofunction:: paddle.fluid.layers.random_crop .. autofunction:: paddle.fluid.layers.random_crop
:noindex: :noindex:
mean_iou
--------
.. autofunction:: paddle.fluid.layers.mean_iou
:noindex:
ops ops
=== ===
...@@ -742,12 +790,6 @@ logical_not ...@@ -742,12 +790,6 @@ logical_not
.. autofunction:: paddle.fluid.layers.logical_not .. autofunction:: paddle.fluid.layers.logical_not
:noindex: :noindex:
uniform_random
--------------
.. autofunction:: paddle.fluid.layers.uniform_random
:noindex:
uniform_random_batch_size_like uniform_random_batch_size_like
------------------------------ ------------------------------
...@@ -766,12 +808,6 @@ gaussian_random_batch_size_like ...@@ -766,12 +808,6 @@ gaussian_random_batch_size_like
.. autofunction:: paddle.fluid.layers.gaussian_random_batch_size_like .. autofunction:: paddle.fluid.layers.gaussian_random_batch_size_like
:noindex: :noindex:
cumsum
------
.. autofunction:: paddle.fluid.layers.cumsum
:noindex:
scatter scatter
------- -------
...@@ -784,12 +820,30 @@ sum ...@@ -784,12 +820,30 @@ sum
.. autofunction:: paddle.fluid.layers.sum .. autofunction:: paddle.fluid.layers.sum
:noindex: :noindex:
slice
-----
.. autofunction:: paddle.fluid.layers.slice
:noindex:
polygon_box_transform
---------------------
.. autofunction:: paddle.fluid.layers.polygon_box_transform
:noindex:
shape shape
----- -----
.. autofunction:: paddle.fluid.layers.shape .. autofunction:: paddle.fluid.layers.shape
:noindex: :noindex:
maxout
------
.. autofunction:: paddle.fluid.layers.maxout
:noindex:
sigmoid sigmoid
------- -------
...@@ -946,18 +1000,6 @@ stanh ...@@ -946,18 +1000,6 @@ stanh
.. autofunction:: paddle.fluid.layers.stanh .. autofunction:: paddle.fluid.layers.stanh
:noindex: :noindex:
hard_shrink
-----------
.. autofunction:: paddle.fluid.layers.hard_shrink
:noindex:
thresholded_relu
----------------
.. autofunction:: paddle.fluid.layers.thresholded_relu
:noindex:
hard_sigmoid hard_sigmoid
------------ ------------
...@@ -970,6 +1012,30 @@ swish ...@@ -970,6 +1012,30 @@ swish
.. autofunction:: paddle.fluid.layers.swish .. autofunction:: paddle.fluid.layers.swish
:noindex: :noindex:
uniform_random
--------------
.. autofunction:: paddle.fluid.layers.uniform_random
:noindex:
hard_shrink
-----------
.. autofunction:: paddle.fluid.layers.hard_shrink
:noindex:
cumsum
------
.. autofunction:: paddle.fluid.layers.cumsum
:noindex:
thresholded_relu
----------------
.. autofunction:: paddle.fluid.layers.thresholded_relu
:noindex:
tensor tensor
====== ======
...@@ -1027,6 +1093,18 @@ fill_constant ...@@ -1027,6 +1093,18 @@ fill_constant
.. autofunction:: paddle.fluid.layers.fill_constant .. autofunction:: paddle.fluid.layers.fill_constant
:noindex: :noindex:
argmin
------
.. autofunction:: paddle.fluid.layers.argmin
:noindex:
argmax
------
.. autofunction:: paddle.fluid.layers.argmax
:noindex:
ones ones
---- ----
...@@ -1039,3 +1117,114 @@ zeros ...@@ -1039,3 +1117,114 @@ zeros
.. autofunction:: paddle.fluid.layers.zeros .. autofunction:: paddle.fluid.layers.zeros
:noindex: :noindex:
detection
=========
prior_box
---------
.. autofunction:: paddle.fluid.layers.prior_box
:noindex:
multi_box_head
--------------
.. autofunction:: paddle.fluid.layers.multi_box_head
:noindex:
bipartite_match
---------------
.. autofunction:: paddle.fluid.layers.bipartite_match
:noindex:
target_assign
-------------
.. autofunction:: paddle.fluid.layers.target_assign
:noindex:
detection_output
----------------
.. autofunction:: paddle.fluid.layers.detection_output
:noindex:
ssd_loss
--------
.. autofunction:: paddle.fluid.layers.ssd_loss
:noindex:
detection_map
-------------
.. autofunction:: paddle.fluid.layers.detection_map
:noindex:
iou_similarity
--------------
.. autofunction:: paddle.fluid.layers.iou_similarity
:noindex:
box_coder
---------
.. autofunction:: paddle.fluid.layers.box_coder
:noindex:
learning_rate_scheduler
=======================
exponential_decay
-----------------
.. autofunction:: paddle.fluid.layers.exponential_decay
:noindex:
natural_exp_decay
-----------------
.. autofunction:: paddle.fluid.layers.natural_exp_decay
:noindex:
inverse_time_decay
------------------
.. autofunction:: paddle.fluid.layers.inverse_time_decay
:noindex:
polynomial_decay
----------------
.. autofunction:: paddle.fluid.layers.polynomial_decay
:noindex:
piecewise_decay
---------------
.. autofunction:: paddle.fluid.layers.piecewise_decay
:noindex:
noam_decay
----------
.. autofunction:: paddle.fluid.layers.noam_decay
:noindex:
metric
======
accuracy
--------
.. autofunction:: paddle.fluid.layers.accuracy
:noindex:
auc
---
.. autofunction:: paddle.fluid.layers.auc
:noindex:
...@@ -171,7 +171,7 @@ Pytorch chooses immediate evaluation. It avoids ever materializing a "forward gr ...@@ -171,7 +171,7 @@ Pytorch chooses immediate evaluation. It avoids ever materializing a "forward gr
## What can fluid learn from them? ## What can fluid learn from them?
TBD Please refer to `paddle/contrib/dynamic/`.
# Appendix # Appendix
......
...@@ -101,7 +101,7 @@ value_printer ...@@ -101,7 +101,7 @@ value_printer
:noindex: :noindex:
Detection Detection
===== ==========
detection_map detection_map
------------- -------------
......
...@@ -11,7 +11,7 @@ Data layer ...@@ -11,7 +11,7 @@ Data layer
data data
---- ----
.. autoclass:: paddle.v2.layer.data .. autofunction:: paddle.v2.layer.data
:noindex: :noindex:
Fully Connected Layers Fully Connected Layers
...@@ -21,12 +21,12 @@ Fully Connected Layers ...@@ -21,12 +21,12 @@ Fully Connected Layers
fc fc
-- --
.. autoclass:: paddle.v2.layer.fc .. autofunction:: paddle.v2.layer.fc
:noindex: :noindex:
selective_fc selective_fc
------------ ------------
.. autoclass:: paddle.v2.layer.selective_fc .. autofunction:: paddle.v2.layer.selective_fc
:noindex: :noindex:
Conv Layers Conv Layers
...@@ -34,34 +34,34 @@ Conv Layers ...@@ -34,34 +34,34 @@ Conv Layers
conv_operator conv_operator
------------- -------------
.. autoclass:: paddle.v2.layer.conv_operator .. autofunction:: paddle.v2.layer.conv_operator
:noindex: :noindex:
conv_projection conv_projection
--------------- ---------------
.. autoclass:: paddle.v2.layer.conv_projection .. autofunction:: paddle.v2.layer.conv_projection
:noindex: :noindex:
conv_shift conv_shift
---------- ----------
.. autoclass:: paddle.v2.layer.conv_shift .. autofunction:: paddle.v2.layer.conv_shift
:noindex: :noindex:
img_conv img_conv
-------- --------
.. autoclass:: paddle.v2.layer.img_conv .. autofunction:: paddle.v2.layer.img_conv
:noindex: :noindex:
.. _api_v2.layer_context_projection: .. _api_v2.layer_context_projection:
context_projection context_projection
------------------ ------------------
.. autoclass:: paddle.v2.layer.context_projection .. autofunction:: paddle.v2.layer.context_projection
:noindex: :noindex:
row_conv row_conv
-------- --------
.. autoclass:: paddle.v2.layer.row_conv .. autofunction:: paddle.v2.layer.row_conv
:noindex: :noindex:
Image Pooling Layer Image Pooling Layer
...@@ -69,27 +69,27 @@ Image Pooling Layer ...@@ -69,27 +69,27 @@ Image Pooling Layer
img_pool img_pool
-------- --------
.. autoclass:: paddle.v2.layer.img_pool .. autofunction:: paddle.v2.layer.img_pool
:noindex: :noindex:
spp spp
--- ---
.. autoclass:: paddle.v2.layer.spp .. autofunction:: paddle.v2.layer.spp
:noindex: :noindex:
maxout maxout
------ ------
.. autoclass:: paddle.v2.layer.maxout .. autofunction:: paddle.v2.layer.maxout
:noindex: :noindex:
roi_pool roi_pool
-------- --------
.. autoclass:: paddle.v2.layer.roi_pool .. autofunction:: paddle.v2.layer.roi_pool
:noindex: :noindex:
pad pad
---- ----
.. autoclass:: paddle.v2.layer.pad .. autofunction:: paddle.v2.layer.pad
:noindex: :noindex:
Norm Layer Norm Layer
...@@ -97,27 +97,27 @@ Norm Layer ...@@ -97,27 +97,27 @@ Norm Layer
img_cmrnorm img_cmrnorm
----------- -----------
.. autoclass:: paddle.v2.layer.img_cmrnorm .. autofunction:: paddle.v2.layer.img_cmrnorm
:noindex: :noindex:
batch_norm batch_norm
---------- ----------
.. autoclass:: paddle.v2.layer.batch_norm .. autofunction:: paddle.v2.layer.batch_norm
:noindex: :noindex:
sum_to_one_norm sum_to_one_norm
--------------- ---------------
.. autoclass:: paddle.v2.layer.sum_to_one_norm .. autofunction:: paddle.v2.layer.sum_to_one_norm
:noindex: :noindex:
cross_channel_norm cross_channel_norm
------------------ ------------------
.. autoclass:: paddle.v2.layer.cross_channel_norm .. autofunction:: paddle.v2.layer.cross_channel_norm
:noindex: :noindex:
row_l2_norm row_l2_norm
----------- -----------
.. autoclass:: paddle.v2.layer.row_l2_norm .. autofunction:: paddle.v2.layer.row_l2_norm
:noindex: :noindex:
Recurrent Layers Recurrent Layers
...@@ -125,22 +125,22 @@ Recurrent Layers ...@@ -125,22 +125,22 @@ Recurrent Layers
recurrent recurrent
--------- ---------
.. autoclass:: paddle.v2.layer.recurrent .. autofunction:: paddle.v2.layer.recurrent
:noindex: :noindex:
lstmemory lstmemory
--------- ---------
.. autoclass:: paddle.v2.layer.lstmemory .. autofunction:: paddle.v2.layer.lstmemory
:noindex: :noindex:
grumemory grumemory
--------- ---------
.. autoclass:: paddle.v2.layer.grumemory .. autofunction:: paddle.v2.layer.grumemory
:noindex: :noindex:
gated_unit gated_unit
----------- -----------
.. autoclass:: paddle.v2.layer.gated_unit .. autofunction:: paddle.v2.layer.gated_unit
:noindex: :noindex:
Recurrent Layer Group Recurrent Layer Group
...@@ -148,32 +148,32 @@ Recurrent Layer Group ...@@ -148,32 +148,32 @@ Recurrent Layer Group
memory memory
------ ------
.. autoclass:: paddle.v2.layer.memory .. autofunction:: paddle.v2.layer.memory
:noindex: :noindex:
recurrent_group recurrent_group
--------------- ---------------
.. autoclass:: paddle.v2.layer.recurrent_group .. autofunction:: paddle.v2.layer.recurrent_group
:noindex: :noindex:
lstm_step lstm_step
--------- ---------
.. autoclass:: paddle.v2.layer.lstm_step .. autofunction:: paddle.v2.layer.lstm_step
:noindex: :noindex:
gru_step gru_step
-------- --------
.. autoclass:: paddle.v2.layer.gru_step .. autofunction:: paddle.v2.layer.gru_step
:noindex: :noindex:
beam_search beam_search
------------ ------------
.. autoclass:: paddle.v2.layer.beam_search .. autofunction:: paddle.v2.layer.beam_search
:noindex: :noindex:
get_output get_output
---------- ----------
.. autoclass:: paddle.v2.layer.get_output .. autofunction:: paddle.v2.layer.get_output
:noindex: :noindex:
Mixed Layer Mixed Layer
...@@ -183,54 +183,54 @@ Mixed Layer ...@@ -183,54 +183,54 @@ Mixed Layer
mixed mixed
----- -----
.. autoclass:: paddle.v2.layer.mixed .. autofunction:: paddle.v2.layer.mixed
:noindex: :noindex:
.. _api_v2.layer_embedding: .. _api_v2.layer_embedding:
embedding embedding
--------- ---------
.. autoclass:: paddle.v2.layer.embedding .. autofunction:: paddle.v2.layer.embedding
:noindex: :noindex:
scaling_projection scaling_projection
------------------ ------------------
.. autoclass:: paddle.v2.layer.scaling_projection .. autofunction:: paddle.v2.layer.scaling_projection
:noindex: :noindex:
dotmul_projection dotmul_projection
----------------- -----------------
.. autoclass:: paddle.v2.layer.dotmul_projection .. autofunction:: paddle.v2.layer.dotmul_projection
:noindex: :noindex:
dotmul_operator dotmul_operator
--------------- ---------------
.. autoclass:: paddle.v2.layer.dotmul_operator .. autofunction:: paddle.v2.layer.dotmul_operator
:noindex: :noindex:
full_matrix_projection full_matrix_projection
---------------------- ----------------------
.. autoclass:: paddle.v2.layer.full_matrix_projection .. autofunction:: paddle.v2.layer.full_matrix_projection
:noindex: :noindex:
identity_projection identity_projection
------------------- -------------------
.. autoclass:: paddle.v2.layer.identity_projection .. autofunction:: paddle.v2.layer.identity_projection
:noindex: :noindex:
slice_projection slice_projection
------------------- -------------------
.. autoclass:: paddle.v2.layer.slice_projection .. autofunction:: paddle.v2.layer.slice_projection
:noindex: :noindex:
table_projection table_projection
---------------- ----------------
.. autoclass:: paddle.v2.layer.table_projection .. autofunction:: paddle.v2.layer.table_projection
:noindex: :noindex:
trans_full_matrix_projection trans_full_matrix_projection
---------------------------- ----------------------------
.. autoclass:: paddle.v2.layer.trans_full_matrix_projection .. autofunction:: paddle.v2.layer.trans_full_matrix_projection
:noindex: :noindex:
Aggregate Layers Aggregate Layers
...@@ -245,51 +245,46 @@ AggregateLevel ...@@ -245,51 +245,46 @@ AggregateLevel
pooling pooling
------- -------
.. autoclass:: paddle.v2.layer.pooling .. autofunction:: paddle.v2.layer.pooling
:noindex: :noindex:
.. _api_v2.layer_last_seq: .. _api_v2.layer_last_seq:
last_seq last_seq
-------- --------
.. autoclass:: paddle.v2.layer.last_seq .. autofunction:: paddle.v2.layer.last_seq
:noindex: :noindex:
.. _api_v2.layer_first_seq: .. _api_v2.layer_first_seq:
first_seq first_seq
--------- ---------
.. autoclass:: paddle.v2.layer.first_seq .. autofunction:: paddle.v2.layer.first_seq
:noindex: :noindex:
sub_seq sub_seq
--------- ---------
.. autoclass:: paddle.v2.layer.sub_seq .. autofunction:: paddle.v2.layer.sub_seq
:noindex: :noindex:
concat concat
------ ------
.. autoclass:: paddle.v2.layer.concat .. autofunction:: paddle.v2.layer.concat
:noindex: :noindex:
seq_concat seq_concat
---------- ----------
.. autoclass:: paddle.v2.layer.seq_concat .. autofunction:: paddle.v2.layer.seq_concat
:noindex: :noindex:
seq_slice seq_slice
--------- ---------
.. autoclass:: paddle.v2.layer.seq_slice .. autofunction:: paddle.v2.layer.seq_slice
:noindex:
kmax_sequence_score
-------------------
.. autoclass:: paddle.v2.layer.kmax_sequence_score
:noindex: :noindex:
sub_nested_seq sub_nested_seq
-------------- --------------
.. autoclass:: paddle.v2.layer.sub_nested_seq .. autofunction:: paddle.v2.layer.sub_nested_seq
:noindex: :noindex:
Reshaping Layers Reshaping Layers
...@@ -297,7 +292,7 @@ Reshaping Layers ...@@ -297,7 +292,7 @@ Reshaping Layers
block_expand block_expand
------------ ------------
.. autoclass:: paddle.v2.layer.block_expand .. autofunction:: paddle.v2.layer.block_expand
:noindex: :noindex:
.. _api_v2.layer_expand: .. _api_v2.layer_expand:
...@@ -309,22 +304,22 @@ ExpandLevel ...@@ -309,22 +304,22 @@ ExpandLevel
expand expand
------ ------
.. autoclass:: paddle.v2.layer.expand .. autofunction:: paddle.v2.layer.expand
:noindex: :noindex:
repeat repeat
------ ------
.. autoclass:: paddle.v2.layer.repeat .. autofunction:: paddle.v2.layer.repeat
:noindex: :noindex:
rotate rotate
------ ------
.. autoclass:: paddle.v2.layer.rotate .. autofunction:: paddle.v2.layer.rotate
:noindex: :noindex:
seq_reshape seq_reshape
----------- -----------
.. autoclass:: paddle.v2.layer.seq_reshape .. autofunction:: paddle.v2.layer.seq_reshape
:noindex: :noindex:
Math Layers Math Layers
...@@ -332,94 +327,94 @@ Math Layers ...@@ -332,94 +327,94 @@ Math Layers
addto addto
----- -----
.. autoclass:: paddle.v2.layer.addto .. autofunction:: paddle.v2.layer.addto
:noindex: :noindex:
linear_comb linear_comb
----------- -----------
.. autoclass:: paddle.v2.layer.linear_comb .. autofunction:: paddle.v2.layer.linear_comb
:noindex: :noindex:
interpolation interpolation
------------- -------------
.. autoclass:: paddle.v2.layer.interpolation .. autofunction:: paddle.v2.layer.interpolation
:noindex: :noindex:
bilinear_interp bilinear_interp
--------------- ---------------
.. autoclass:: paddle.v2.layer.bilinear_interp .. autofunction:: paddle.v2.layer.bilinear_interp
:noindex: :noindex:
dropout dropout
-------- --------
.. autoclass:: paddle.v2.layer.dropout .. autofunction:: paddle.v2.layer.dropout
:noindex: :noindex:
dot_prod dot_prod
--------- ---------
.. autoclass:: paddle.v2.layer.dot_prod .. autofunction:: paddle.v2.layer.dot_prod
:noindex: :noindex:
out_prod out_prod
-------- --------
.. autoclass:: paddle.v2.layer.out_prod .. autofunction:: paddle.v2.layer.out_prod
:noindex: :noindex:
power power
----- -----
.. autoclass:: paddle.v2.layer.power .. autofunction:: paddle.v2.layer.power
:noindex: :noindex:
scaling scaling
------- -------
.. autoclass:: paddle.v2.layer.scaling .. autofunction:: paddle.v2.layer.scaling
:noindex: :noindex:
clip clip
---- ----
.. autoclass:: paddle.v2.layer.clip .. autofunction:: paddle.v2.layer.clip
:noindex: :noindex:
resize resize
------ ------
.. autoclass:: paddle.v2.layer.resize .. autofunction:: paddle.v2.layer.resize
:noindex: :noindex:
slope_intercept slope_intercept
--------------- ---------------
.. autoclass:: paddle.v2.layer.slope_intercept .. autofunction:: paddle.v2.layer.slope_intercept
:noindex: :noindex:
tensor tensor
------ ------
.. autoclass:: paddle.v2.layer.tensor .. autofunction:: paddle.v2.layer.tensor
:noindex: :noindex:
.. _api_v2.layer_cos_sim: .. _api_v2.layer_cos_sim:
cos_sim cos_sim
------- -------
.. autoclass:: paddle.v2.layer.cos_sim .. autofunction:: paddle.v2.layer.cos_sim
:noindex: :noindex:
l2_distance l2_distance
----------- -----------
.. autoclass:: paddle.v2.layer.l2_distance .. autofunction:: paddle.v2.layer.l2_distance
:noindex: :noindex:
trans trans
----- -----
.. autoclass:: paddle.v2.layer.trans .. autofunction:: paddle.v2.layer.trans
:noindex: :noindex:
scale_shift scale_shift
----------- -----------
.. autoclass:: paddle.v2.layer.scale_shift .. autofunction:: paddle.v2.layer.scale_shift
:noindex: :noindex:
factorization_machine factorization_machine
--------------------- ---------------------
.. autoclass:: paddle.v2.layer.factorization_machine .. autofunction:: paddle.v2.layer.factorization_machine
:noindex: :noindex:
Sampling Layers Sampling Layers
...@@ -427,17 +422,17 @@ Sampling Layers ...@@ -427,17 +422,17 @@ Sampling Layers
maxid maxid
----- -----
.. autoclass:: paddle.v2.layer.max_id .. autofunction:: paddle.v2.layer.max_id
:noindex: :noindex:
sampling_id sampling_id
----------- -----------
.. autoclass:: paddle.v2.layer.sampling_id .. autofunction:: paddle.v2.layer.sampling_id
:noindex: :noindex:
multiplex multiplex
--------- ---------
.. autoclass:: paddle.v2.layer.multiplex .. autofunction:: paddle.v2.layer.multiplex
:noindex: :noindex:
.. _api_v2.layer_costs: .. _api_v2.layer_costs:
...@@ -447,97 +442,97 @@ Cost Layers ...@@ -447,97 +442,97 @@ Cost Layers
cross_entropy_cost cross_entropy_cost
------------------ ------------------
.. autoclass:: paddle.v2.layer.cross_entropy_cost .. autofunction:: paddle.v2.layer.cross_entropy_cost
:noindex: :noindex:
cross_entropy_with_selfnorm_cost cross_entropy_with_selfnorm_cost
-------------------------------- --------------------------------
.. autoclass:: paddle.v2.layer.cross_entropy_with_selfnorm_cost .. autofunction:: paddle.v2.layer.cross_entropy_with_selfnorm_cost
:noindex: :noindex:
multi_binary_label_cross_entropy_cost multi_binary_label_cross_entropy_cost
------------------------------------- -------------------------------------
.. autoclass:: paddle.v2.layer.multi_binary_label_cross_entropy_cost .. autofunction:: paddle.v2.layer.multi_binary_label_cross_entropy_cost
:noindex: :noindex:
classification_cost classification_cost
------------------- -------------------
.. autoclass:: paddle.v2.layer.classification_cost .. autofunction:: paddle.v2.layer.classification_cost
:noindex: :noindex:
huber_regression_cost huber_regression_cost
------------------------- -------------------------
.. autoclass:: paddle.v2.layer.huber_regression_cost .. autofunction:: paddle.v2.layer.huber_regression_cost
:noindex: :noindex:
huber_classification_cost huber_classification_cost
------------------------- -------------------------
.. autoclass:: paddle.v2.layer.huber_classification_cost .. autofunction:: paddle.v2.layer.huber_classification_cost
:noindex: :noindex:
lambda_cost lambda_cost
----------- -----------
.. autoclass:: paddle.v2.layer.lambda_cost .. autofunction:: paddle.v2.layer.lambda_cost
:noindex: :noindex:
square_error_cost square_error_cost
----------------- -----------------
.. autoclass:: paddle.v2.layer.square_error_cost .. autofunction:: paddle.v2.layer.square_error_cost
:noindex: :noindex:
rank_cost rank_cost
--------- ---------
.. autoclass:: paddle.v2.layer.rank_cost .. autofunction:: paddle.v2.layer.rank_cost
:noindex: :noindex:
sum_cost sum_cost
--------- ---------
.. autoclass:: paddle.v2.layer.sum_cost .. autofunction:: paddle.v2.layer.sum_cost
:noindex: :noindex:
crf crf
--- ---
.. autoclass:: paddle.v2.layer.crf .. autofunction:: paddle.v2.layer.crf
:noindex: :noindex:
crf_decoding crf_decoding
------------ ------------
.. autoclass:: paddle.v2.layer.crf_decoding .. autofunction:: paddle.v2.layer.crf_decoding
:noindex: :noindex:
ctc ctc
--- ---
.. autoclass:: paddle.v2.layer.ctc .. autofunction:: paddle.v2.layer.ctc
:noindex: :noindex:
warp_ctc warp_ctc
-------- --------
.. autoclass:: paddle.v2.layer.warp_ctc .. autofunction:: paddle.v2.layer.warp_ctc
:noindex: :noindex:
nce nce
--- ---
.. autoclass:: paddle.v2.layer.nce .. autofunction:: paddle.v2.layer.nce
:noindex: :noindex:
hsigmoid hsigmoid
--------- ---------
.. autoclass:: paddle.v2.layer.hsigmoid .. autofunction:: paddle.v2.layer.hsigmoid
:noindex: :noindex:
smooth_l1_cost smooth_l1_cost
-------------- --------------
.. autoclass:: paddle.v2.layer.smooth_l1_cost .. autofunction:: paddle.v2.layer.smooth_l1_cost
:noindex: :noindex:
multibox_loss multibox_loss
-------------- --------------
.. autoclass:: paddle.v2.layer.multibox_loss .. autofunction:: paddle.v2.layer.multibox_loss
:noindex: :noindex:
detection_output detection_output
---------------- ----------------
.. autoclass:: paddle.v2.layer.detection_output .. autofunction:: paddle.v2.layer.detection_output
:noindex: :noindex:
Check Layer Check Layer
...@@ -545,7 +540,7 @@ Check Layer ...@@ -545,7 +540,7 @@ Check Layer
eos eos
--- ---
.. autoclass:: paddle.v2.layer.eos .. autofunction:: paddle.v2.layer.eos
:noindex: :noindex:
Activation Activation
...@@ -553,5 +548,5 @@ Activation ...@@ -553,5 +548,5 @@ Activation
prelu prelu
-------- --------
.. autoclass:: paddle.v2.layer.prelu .. autofunction:: paddle.v2.layer.prelu
:noindex: :noindex:
...@@ -8,4 +8,3 @@ API ...@@ -8,4 +8,3 @@ API
model_configs.rst model_configs.rst
data.rst data.rst
run_logic.rst run_logic.rst
fluid/index.rst
...@@ -60,6 +60,7 @@ paddlepaddle-gpu==0.11.0 使用CUDA 7.5和cuDNN 5编译的0.11.0版 ...@@ -60,6 +60,7 @@ paddlepaddle-gpu==0.11.0 使用CUDA 7.5和cuDNN 5编译的0.11.0版
"cpu_noavx_openblas", "`paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuNoavxOpenblas/.lastSuccessful/paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl>`__", "`paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuNoavxOpenblas/.lastSuccessful/paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl>`_" "cpu_noavx_openblas", "`paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuNoavxOpenblas/.lastSuccessful/paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl>`__", "`paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuNoavxOpenblas/.lastSuccessful/paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl>`_"
"cuda8.0_cudnn5_avx_mkl", "`paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda80cudnn5cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl>`__", "`paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda80cudnn5cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl>`__" "cuda8.0_cudnn5_avx_mkl", "`paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda80cudnn5cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl>`__", "`paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda80cudnn5cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl>`__"
"cuda8.0_cudnn7_avx_mkl", "`paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda8cudnn7cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl>`__", "`paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda8cudnn7cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl>`__" "cuda8.0_cudnn7_avx_mkl", "`paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda8cudnn7cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl>`__", "`paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda8cudnn7cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl>`__"
"cuda9.0_cudnn7_avx_mkl", "`paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda90cudnn7avxMkl/.lastSuccessful/paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl>`__", "`paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda90cudnn7avxMkl/.lastSuccessful/paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl>`__"
.. _pip_dependency: .. _pip_dependency:
......
...@@ -63,6 +63,7 @@ If the links below shows up the login form, just click "Log in as guest" to star ...@@ -63,6 +63,7 @@ If the links below shows up the login form, just click "Log in as guest" to star
"cpu_noavx_openblas", "`paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuNoavxOpenblas/.lastSuccessful/paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl>`__", "`paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuNoavxOpenblas/.lastSuccessful/paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl>`__" "cpu_noavx_openblas", "`paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuNoavxOpenblas/.lastSuccessful/paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl>`__", "`paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuNoavxOpenblas/.lastSuccessful/paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl>`__"
"cuda8.0_cudnn5_avx_mkl", "`paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda80cudnn5cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl>`__", "`paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda80cudnn5cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl>`__" "cuda8.0_cudnn5_avx_mkl", "`paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda80cudnn5cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl>`__", "`paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda80cudnn5cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl>`__"
"cuda8.0_cudnn7_avx_mkl", "`paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda8cudnn7cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl>`__", "`paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda8cudnn7cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl>`__" "cuda8.0_cudnn7_avx_mkl", "`paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda8cudnn7cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl>`__", "`paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda8cudnn7cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl>`__"
"cuda9.0_cudnn7_avx_mkl", "`paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda90cudnn7avxMkl/.lastSuccessful/paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl>`__", "`paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda90cudnn7avxMkl/.lastSuccessful/paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl>`__"
.. _pip_dependency: .. _pip_dependency:
......
...@@ -104,7 +104,7 @@ no changes added to commit (use "git add" and/or "git commit -a") ...@@ -104,7 +104,7 @@ no changes added to commit (use "git add" and/or "git commit -a")
➜ docker run -it -v $(pwd):/paddle paddle:latest-dev bash -c "cd /paddle/build && ctest" ➜ docker run -it -v $(pwd):/paddle paddle:latest-dev bash -c "cd /paddle/build && ctest"
``` ```
关于构建和测试的更多信息,请参见[这篇文档](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/getstarted/build_and_install/docker_install_cn.rst) 关于构建和测试的更多信息,请参见[使用Docker安装运行](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/v2/build_and_install/docker_install_cn.rst)
## 提交(commit) ## 提交(commit)
......
...@@ -14,3 +14,4 @@ ...@@ -14,3 +14,4 @@
# #
add_subdirectory(inference) add_subdirectory(inference)
add_subdirectory(tape)
...@@ -17,48 +17,9 @@ if(APPLE) ...@@ -17,48 +17,9 @@ if(APPLE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=pessimizing-move") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=pessimizing-move")
endif(APPLE) endif(APPLE)
set(ANAKIN_INCLUDE "" CACHE STRING "root of Anakin header files")
set(ANAKIN_LIBRARY "" CACHE STRING "path of Anakin library")
set(inference_deps paddle_inference_api paddle_fluid_api) set(inference_deps paddle_inference_api paddle_fluid_api)
# if anakin is set enable anakin api implementation
if(ANAKIN_INCLUDE AND ANAKIN_LIBRARY)
set(ANAKIN_FOUND ON)
else()
set(ANAKIN_FOUND OFF)
endif()
function(fetch_include_recursively root_dir)
if (IS_DIRECTORY ${root_dir})
include_directories(${root_dir})
endif()
file(GLOB ALL_SUB RELATIVE ${root_dir} ${root_dir}/*)
foreach(sub ${ALL_SUB})
if (IS_DIRECTORY ${root_dir}/${sub})
fetch_include_recursively(${root_dir}/${sub})
endif()
endforeach()
endfunction()
if (ANAKIN_FOUND)
# Anakin's code style doesn't follow google c style.
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=unused-variable -Wno-error=format-extra-args -Wno-error=comment -Wno-error=format -Wno-error=switch -Wno-error=return-type -Wno-error=non-virtual-dtor -Wno-reorder -Wno-error=cpp")
message(STATUS "Anakin for inference is enabled")
message(STATUS "Anakin is set INCLUDE:${ANAKIN_INCLUDE} LIBRARY:${ANAKIN_LIBRARY}")
fetch_include_recursively(${ANAKIN_INCLUDE})
link_directories(${ANAKIN_LIBRARY})
nv_library(inference_anakin_api SHARED SRCS paddle_inference_api.cc paddle_inference_api_anakin_engine.cc)
target_link_libraries(inference_anakin_api anakin anakin_saber_common)
list(APPEND inference_deps inference_anakin_api)
endif()
function(inference_api_test TARGET_NAME) function(inference_api_test TARGET_NAME)
if (WITH_TESTING) if (WITH_TESTING)
set(options "") set(options "")
...@@ -79,7 +40,7 @@ function(inference_api_test TARGET_NAME) ...@@ -79,7 +40,7 @@ function(inference_api_test TARGET_NAME)
endfunction(inference_api_test) endfunction(inference_api_test)
cc_library(paddle_inference_api cc_library(paddle_inference_api
SRCS paddle_inference_api.cc paddle_inference_api_impl.cc SRCS paddle_inference_api.cc paddle_inference_api_impl.cc
DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB}) DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB})
cc_test(test_paddle_inference_api cc_test(test_paddle_inference_api
...@@ -89,9 +50,17 @@ cc_test(test_paddle_inference_api ...@@ -89,9 +50,17 @@ cc_test(test_paddle_inference_api
inference_api_test(test_paddle_inference_api_impl inference_api_test(test_paddle_inference_api_impl
ARGS test_word2vec test_image_classification) ARGS test_word2vec test_image_classification)
if (ANAKIN_FOUND) if (WITH_ANAKIN AND WITH_TESTING) # only needed in CI
# Due to Anakin do not have official library releases and the versions of protobuf and cuda do not match Paddle's,
# so anakin library will not be merged to our official inference library. To use anakin prediction API, one need to
# compile the libinference_anakin_api.a and compile with anakin.so.
nv_library(inference_anakin_api SHARED SRCS paddle_inference_api.cc paddle_inference_api_anakin_engine.cc)
target_compile_options(inference_anakin_api BEFORE PUBLIC ${ANAKIN_COMPILE_EXTRA_FLAGS})
target_link_libraries(inference_anakin_api anakin anakin_saber_common)
cc_test(inference_anakin_test SRCS paddle_inference_api_anakin_engine_tester.cc cc_test(inference_anakin_test SRCS paddle_inference_api_anakin_engine_tester.cc
DEPS ${inference_deps}) ARGS --model=${ANAKIN_INSTALL_DIR}/mobilenet_v2.anakin.bin
DEPS inference_anakin_api)
target_compile_options(inference_anakin_test BEFORE PUBLIC ${ANAKIN_COMPILE_EXTRA_FLAGS})
endif() endif()
if(WITH_TESTING) if(WITH_TESTING)
......
...@@ -12,9 +12,8 @@ ...@@ -12,9 +12,8 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include <cuda.h>
#include "paddle/contrib/inference/paddle_inference_api_anakin_engine.h" #include "paddle/contrib/inference/paddle_inference_api_anakin_engine.h"
#include <cuda.h>
namespace paddle { namespace paddle {
......
...@@ -19,10 +19,9 @@ limitations under the License. */ ...@@ -19,10 +19,9 @@ limitations under the License. */
#pragma once #pragma once
// NOTE This header file do not have namespace.
//#include <test/framework/net/paddle_api.h>
#include "paddle/contrib/inference/paddle_inference_api.h" #include "paddle/contrib/inference/paddle_inference_api.h"
// from anakin
#include "framework/core/net/net.h" #include "framework/core/net/net.h"
#include "saber/saber_types.h" #include "saber/saber_types.h"
......
...@@ -12,17 +12,19 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,17 +12,19 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <gflags/gflags.h>
#include <glog/logging.h> #include <glog/logging.h>
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "gflags/gflags.h"
#include "paddle/contrib/inference/paddle_inference_api.h" #include "paddle/contrib/inference/paddle_inference_api.h"
DEFINE_string(model, "", "Directory of the inference model.");
namespace paddle { namespace paddle {
AnakinConfig GetConfig() { AnakinConfig GetConfig() {
AnakinConfig config; AnakinConfig config;
config.model_file = "./mobilenet_v2.anakin.bin"; config.model_file = FLAGS_model;
config.device = 0; config.device = 0;
config.max_batch_size = 1; config.max_batch_size = 1;
return config; return config;
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
if(APPLE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=pessimizing-move")
endif(APPLE)
cc_library(tape_variable SRCS variable.cc DEPS ${FLUID_CORE_MODULES} device_context framework_proto proto_desc operator)
cc_library(tape SRCS tape.cc DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB} tape_variable)
cc_test(test_tape
SRCS test_tape.cc
DEPS tape tape_variable)
# Dynamic Graph on Fluid
PaddlePaddle Fluid is targeting the autodiff without tape, which, however, is very
challenging and we are still way from there. DyNet and PyTorch provide a good design
idea, the *tape*, that significantly eases the challenge. Also, DyNet provides
a C++ API that is as convenient as Python but with higher efficiency and could
conveniently integrate with industrial/production systems. This package, `tape`,
combines the good of
1. tape from PyTorch and DyNet
2. C++ API and core from DyNet
3. rich set of operators from PaddlePaddle
## Overview
We can implement Dynet-like Tape(See this [survey](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/survey/dynamic_graph.md))
by wrapping Paddle Fluid's `Operator` and `Variable`.
The user API is straight forward since
1. it is imperative. And it uses host language's control flow logic.
1. it avoids extra concepts such as `Scope` and `Executor`.
All of these benefits come at the cost of just adding one line `reset_global_tape`
at every iteration.
## Code Structure
In short, the `Tape` contains a vector of `OpHandle`s. And an `OpHandle` contains its
`type`, the pointers to the `Variable`s, and necessary attributes.
```c++
class Variable {
public:
VriableHandle Grad(); // returns its gradient variable
private:
framework::VarDesc desc_; // compile time infershape, necessary for lazy execution
framework::Variable var_; // run time variable, holds data memory
};
using VariableHandle = shared_ptr<Variable>;
struct OpHandle {
string type_;
map<string, vector<VariableHandle>> inputs_;
map<string, vector<VariableHandle>> outputs_;
AttributeMap attrs_;
};
class Tape {
public:
void AddOp(OpHandle); // add op
void Forward(); // execute the tape_
void Backward(); // execute the backward of the tape_
private:
vector<OpHandle> tape_;
};
```
We uses `Function` to indicate layers. It takes care of parameter
initialization and `AddOp` to the Tape when it is called.
```c++
class Linear {
public:
Linear(int in_dim, int out_dim, const std::string &act)
: w_(new Variable("LinearWeight")),
b_(new Variable("LinearBias")),
act_(act) {
Tape init_tape;
std::string initializer = "fill_constant";
framework::AttributeMap attrs;
attrs["dtype"] = paddle::framework::proto::VarType::Type::VarType_Type_FP32;
attrs["shape"] = std::vector<int>{in_dim, out_dim};
attrs["value"] = 1.0f;
init_tape.AddOp(initializer, {}, {{"Out", {w_}}}, attrs);
attrs["dtype"] = paddle::framework::proto::VarType::Type::VarType_Type_FP32;
attrs["shape"] = std::vector<int>{out_dim};
attrs["value"] = 1.0f;
init_tape.AddOp(initializer, {}, {{"Out", {b_}}}, attrs);
init_tape.Forward();
}
VariableHandle operator()(VariableHandle input) {
VariableHandle pre_bias(new Variable("linear"));
get_global_tape().AddOp("mul",
{{"X", {input}}, {"Y", {w_}}},
{{"Out", {pre_bias}}},
{{"x_num_col_dims", 1}, {"y_num_col_dims", 1}});
VariableHandle pre_act(new Variable("linear"));
get_global_tape().AddOp("elementwise_add",
{{"X", {pre_bias}}, {"Y", {b_}}},
{{"Out", {pre_act}}},
{{"axis", 1}});
VariableHandle post_act(new Variable("linear"));
get_global_tape().AddOp(act_,
{{"X", {pre_act}}},
{{"Out", {post_act}}},
{});
return post_act;
}
std::vector<VariableHandle> Params() { return {w_, b_}; }
private:
VariableHandle w_;
VariableHandle b_;
std::string act_;
};
```
## User API
```c++
// Model function
paddle::tape::Linear linear1(3, 3, "relu"); // init weight and bias
paddle::tape::Linear linear2(3, 3, "relu"); // init weight and bias
paddle::tape::Mean mean;
// Optimizer
paddle::tape::SGD sgd(0.001);
// Data Feeder
paddle::tape::Fill data_feeder(...);
VariableHandle input(new paddle::tape::Variable("input"));
VariableHandle label(new paddle::tape::Variable("label"));
for (int i = 0; i < 2; ++i) {
reset_global_tape();
data_feeder(input, label);
auto loss = softmax(linear2(linear1(input)), label); // compile time InferShape & InferVarType
LOG(INFO) << loss.value(); // Run forward up to loss
// Run backward, store gradient of w at w->Grad()
get_global_tape.Backward(loss);
// Update w
sgd(linear1.Params());
sgd(linear2.Params());
}
```
<details>
<summary></summary>
digraph G {
subgraph cluster_0 {
node [shape=record,style=filled];
style=filled;
color=lightgrey;
linear1 [label="{type: mul | {input | {<before_mul1>X: before_mul1 |<weight1> Y: weight1}} | {output |<before_bias1> Out: before_bias1}}"];
elementwise_add1 [label="{type: elementwise_add | {input | {<before_bias1>X: before_bias1 |<bias1> Y: bias1}} | {output |<before_act1> Out: before_act1}}"];
relu1 [label="{type: relu | {input | {<before_act1>X: before_act1 }} | {output |<after_act1> Out: after_act1}}"];
linear1 -> elementwise_add1->relu1;
label = "forward tape";
}
linear1:before_mul1->before_mul1
linear1:weight1->weight1
linear1:before_bias1->before_bias1
elementwise_add1:bias1->bias1
elementwise_add1:before_bias1->before_bias1
elementwise_add1:before_act1->before_act1
relu1:before_act1->before_act1
relu1:after_act1->after_act1
subgraph cluster_1 {
node [shape=record,style=filled];
style=filled;
color=lightgrey;
linear1_grad [label="{type: mul_grad | {input | {<before_mul1>X: before_mul1 |<weight1> Y: weight1|<before_bias1_grad> Out_grad: before_bias1_grad}} | {output |{<before_mul1_grad>X_grad: before_mul1_grad |<weight1_grad> Y_grad: weight1_grad}}}"];
elementwise_add1_grad [label="{type: elementwise_add_grad | {input | <before_act1_grad> Out_grad: before_act1_grad} | {output |{<before_bias1_grad>X_grad: before_bias1_grad |<bias1_grad> Y_grad: bias1_grad}}}"];
relu1_grad [label="{type: relu_grad | {input |<after_act1_grad> Out_grad: after_act1_grad} | {ouput | {<before_act1_grad>X_grad: before_act1_grad }}}"];
linear1_grad -> elementwise_add1_grad ->relu1_grad [dir=back];
label = "backward tape";
}
relu1_grad:after_act1_grad->after_act1_grad
relu1_grad:before_act1_grad->before_act1_grad
elementwise_add1_grad:before_act1_grad->before_act1_grad
elementwise_add1_grad:before_bias1_grad->before_bias1_grad
elementwise_add1_grad:bias1_grad->bias1_grad
linear1_grad:before_mul1->before_mul1
linear1_grad:weight1->weight1
linear1_grad:before_bias1_grad->before_bias1_grad
linear1_grad:before_mul1_grad->before_mul1_grad
linear1_grad:weight1_grad->weight1_grad
subgraph cluster_2 {
node [shape=record];
label = "Linear1";
weight1
bias1
}
weight1 -> weight1_grad [ label="Grad()", style="dashed" ];
bias1 -> bias1_grad [ label="Grad()", style="dashed"];
}
</details>
![Image](https://github.com/tonyyang-svail/Paddle/blob/cpp_tap/paddle/contrib/tape/computation_graph.png)
## Code Reuse
We want to stay close to Paddle Fluid as much as possible.
### Reuse All Operators
As all Ops are registered at `OpInfoMap`, the effort of adding a new `Function`
is about 10 lines of code, similar to expose an operator to Python.
### Reuse Compile Time InferShape and InferVarType
Note that all the symbolic information is stored at `tape::Varaible::desc_`, instead
of `ProgramDesc.block.vars`, we create a temporary `BlockDesc` to do `InferShape` and
`InferVarType` every time we `AddOp` to the tape.
### Reuse Operator::Run
We use smart pointer, instead of `Scope`, to manage memory. So we create a temporary
`Scope` for every `Operator::Run()`.
## Possible Feature
### Release Memory on Backward
We can release memory aggressively. During backward, we can delete the OpHandle once
we have finished its backward. Since all the variable is managed by smart pointer, the
memory is automatically released when its `ref_count` goes to 0.
### Kernel Fusion
As a symbolic representation of the Tape is constructed first before the actual
execution, it would be possible to perform graph optimization. One use case is kernel
fusion.
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include "paddle/contrib/tape/tape.h"
#include "paddle/contrib/tape/variable.h"
#include "paddle/fluid/framework/type_defs.h"
namespace paddle {
namespace tape {
class Function {};
class Fill {
public:
Fill(const std::string &initializer, const framework::AttributeMap &attrs)
: initializer_(initializer), attrs_(attrs) {}
void operator()(VariableHandle var) {
get_global_tape().AddOp(initializer_, {}, {{"Out", {var}}}, attrs_);
}
private:
const std::string initializer_;
const framework::AttributeMap attrs_;
};
class Mean {
public:
VariableHandle operator()(VariableHandle var) {
VariableHandle out(new Variable("mean"));
get_global_tape().AddOp("mean", {{"X", {var}}}, {{"Out", {out}}}, {});
return out;
}
};
class Linear {
public:
Linear(int in_dim, int out_dim, const std::string &act)
: w_(new Variable("LinearWeight")),
b_(new Variable("LinearBias")),
act_(act) {
Tape init_tape;
std::string initializer = "fill_constant";
framework::AttributeMap attrs;
attrs["dtype"] = paddle::framework::proto::VarType::Type::VarType_Type_FP32;
attrs["shape"] = std::vector<int>{in_dim, out_dim};
attrs["value"] = 1.0f;
init_tape.AddOp(initializer, {}, {{"Out", {w_}}}, attrs);
attrs["dtype"] = paddle::framework::proto::VarType::Type::VarType_Type_FP32;
attrs["shape"] = std::vector<int>{out_dim};
attrs["value"] = 1.0f;
init_tape.AddOp(initializer, {}, {{"Out", {b_}}}, attrs);
init_tape.Forward();
}
VariableHandle operator()(VariableHandle input) {
VariableHandle pre_bias(new Variable("linear"));
get_global_tape().AddOp("mul",
{{"X", {input}}, {"Y", {w_}}},
{{"Out", {pre_bias}}},
{{"x_num_col_dims", 1}, {"y_num_col_dims", 1}});
VariableHandle pre_act(new Variable("linear"));
get_global_tape().AddOp("elementwise_add",
{{"X", {pre_bias}}, {"Y", {b_}}},
{{"Out", {pre_act}}},
{{"axis", 1}});
VariableHandle post_act(new Variable("linear"));
get_global_tape().AddOp(
act_, {{"X", {pre_act}}}, {{"Out", {post_act}}}, {});
return post_act;
}
std::vector<VariableHandle> Params() { return {w_, b_}; }
private:
VariableHandle w_;
VariableHandle b_;
std::string act_;
};
class SGD {
public:
SGD(float learning_rate) : learning_rate_(new Variable("sgd")) {
Tape init_tape;
std::string initializer = "fill_constant";
framework::AttributeMap attrs;
attrs["dtype"] = paddle::framework::proto::VarType::Type::VarType_Type_FP32;
attrs["shape"] = std::vector<int>{1};
attrs["value"] = learning_rate;
init_tape.AddOp(initializer, {}, {{"Out", {learning_rate_}}}, attrs);
init_tape.Forward();
}
void operator()(VariableHandle input) {
PADDLE_ENFORCE(get_global_tape().HasBeenBackwarded(),
"optimization must happen after the backward");
Tape temp_tape;
temp_tape.AddOp("sgd",
{{"Param", {input}},
{"LearningRate", {learning_rate_}},
{"Grad", {input->Grad()}}},
{{"ParamOut", {input}}},
{});
temp_tape.Forward();
}
private:
VariableHandle learning_rate_;
};
}
}
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/contrib/tape/tape.h"
#include <list>
#include <map>
#include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/framework/dim.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/pybind/pybind.h"
namespace paddle {
namespace tape {
// borrowed from
// https://stackoverflow.com/questions/874134/find-if-string-ends-with-another-string-in-c
inline bool ends_with(std::string const &value, std::string const &ending) {
if (ending.size() > value.size()) return false;
return std::equal(ending.rbegin(), ending.rend(), value.rbegin());
}
std::ostream &operator<<(std::ostream &os, const framework::VarDesc &var_desc) {
os << var_desc.Name();
os << "[" << var_desc.GetType() << "]";
os << "[" << var_desc.GetDataType() << "]";
os << "{";
for (auto &i : var_desc.GetShape()) {
os << i << ",";
}
os << "}";
return os;
}
std::string to_string(const std::string &type,
const VariableHandleMap &in_vars,
const VariableHandleMap &out_vars,
const framework::AttributeMap &attrs) {
std::stringstream ss;
ss << type << " ";
for (auto &param_name : in_vars) {
for (auto &var : param_name.second) {
ss << param_name.first << ":(" << var->Desc() << ") ";
}
}
for (auto &param_name : out_vars) {
for (auto &var : param_name.second) {
ss << param_name.first << ":(" << var->Desc() << ") ";
}
}
return ss.str();
}
framework::OpDesc CreateOpDesc(const std::string &type,
const VariableHandleMap &in_vars,
const VariableHandleMap &out_vars,
const framework::AttributeMap &attrs) {
framework::VariableNameMap inputs;
for (auto &param_name : in_vars) {
for (auto &var : param_name.second) {
inputs[param_name.first].emplace_back(var->Name());
}
}
framework::VariableNameMap outputs;
for (auto &param_name : out_vars) {
for (auto &var : param_name.second) {
outputs[param_name.first].emplace_back(var->Name());
}
}
return framework::OpDesc(type, inputs, outputs, attrs);
}
void InferShapeAndVarType(const std::string &type,
const VariableHandleMap &in_vars,
VariableHandleMap *out_vars,
const framework::AttributeMap &attrs) {
framework::OpDesc op_desc = CreateOpDesc(type, in_vars, *out_vars, attrs);
// Create a temporary block for compile-time
framework::ProgramDesc program_desc;
framework::BlockDesc *block_desc = program_desc.MutableBlock(0);
PADDLE_ENFORCE(block_desc);
for (auto &param_name : in_vars) {
for (auto &var : param_name.second) {
*block_desc->Var(var->Name())->Proto() = *var->MutableDesc()->Proto();
}
}
for (auto &param_name : *out_vars) {
for (auto &var : param_name.second) {
*block_desc->Var(var->Name())->Proto() = *var->MutableDesc()->Proto();
}
}
LOG(INFO) << "- " << to_string(type, in_vars, *out_vars, attrs);
op_desc.InferShape(*block_desc);
op_desc.InferVarType(block_desc);
for (auto &param_name : *out_vars) {
for (auto &var : param_name.second) {
*var->MutableDesc()->Proto() = *block_desc->Var(var->Name())->Proto();
}
}
LOG(INFO) << "+ " << to_string(type, in_vars, *out_vars, attrs);
}
void Tape::AddOp(const std::string &type,
const VariableHandleMap &in_vars,
VariableHandleMap out_vars,
const framework::AttributeMap &attrs) {
InferShapeAndVarType(type, in_vars, &out_vars, attrs);
tape_.emplace_back(type, in_vars, out_vars, attrs);
}
// Temporary Scope for Operator::Run()
class ScopeWrapper : public framework::Scope {
public:
ScopeWrapper(const VariableHandleMap &in_vars,
const VariableHandleMap &out_vars) {
for (auto &v : in_vars) {
for (auto &vv : v.second) {
if (!vars_.count(vv->Name())) {
vars_[vv->Name()].reset(vv->Var());
}
}
}
for (auto &v : out_vars) {
for (auto &vv : v.second) {
if (!vars_.count(vv->Name())) {
vars_[vv->Name()].reset(vv->Var());
}
}
}
}
~ScopeWrapper() {
for (auto &pair : vars_) {
pair.second.release();
}
}
};
void Tape::Forward() {
LOG(INFO) << "Starting forward -------------------------";
PADDLE_ENFORCE(!has_been_backwarded_);
while (current_position_ < tape_.size()) {
OpHandle &op = tape_[current_position_];
// Create Output Tensor, this is only necessary for OpWithKernel
for (auto &param2var : op.outputs_) {
for (auto &var : param2var.second) {
var->InitializeVariable();
}
}
framework::OpDesc op_desc =
CreateOpDesc(op.type_, op.inputs_, op.outputs_, op.attrs_);
ScopeWrapper scope(op.inputs_, op.outputs_);
framework::OpRegistry::CreateOp(op_desc)->Run(scope, platform::CPUPlace());
current_position_++;
}
LOG(INFO) << "Finishing forward -------------------------";
}
void Tape::Backward(VariableHandle target) {
PADDLE_ENFORCE(!has_been_backwarded_);
Forward();
// TODO(tonyyang-svail): check output of last op is target
backward_tape_.reset(new Tape());
framework::AttributeMap attrs;
// FIXME(tonyyang-svail): Need to infer_data_type
attrs["dtype"] = framework::proto::VarType::Type::VarType_Type_FP32;
attrs["shape"] = std::vector<int>{1};
attrs["value"] = 1.0f;
backward_tape_->AddOp(
"fill_constant", {}, {{"Out", {target->Grad()}}}, attrs);
for (auto it = tape_.rbegin(); it != tape_.rend(); ++it) {
framework::OpDesc op_desc =
CreateOpDesc(it->type_, it->inputs_, it->outputs_, it->attrs_);
std::unordered_map<std::string, std::string> grad_to_var;
std::vector<std::unique_ptr<framework::OpDesc>> grad_op_descs =
framework::OpInfoMap::Instance()
.Get(op_desc.Type())
.GradOpMaker()(op_desc, {}, &grad_to_var, {});
for (auto &op_desc : grad_op_descs) {
std::unordered_map<std::string, VariableHandle> name2var;
for (auto &param2vars : it->inputs_) {
for (auto &a : param2vars.second) {
name2var[a->Name()] = a;
}
}
for (auto &param2vars : it->outputs_) {
for (auto &a : param2vars.second) {
name2var[a->Name()] = a;
}
}
VariableHandleMap in_vars;
VariableHandleMap out_vars;
std::map<const framework::VariableNameMap *, VariableHandleMap *>
loop_over{{&op_desc->Inputs(), &in_vars},
{&op_desc->Outputs(), &out_vars}};
for (auto &each : loop_over) {
auto &vmp = *each.first;
auto &vhm = *each.second;
for (auto &p2a : vmp) {
for (auto &argu : p2a.second) {
if (name2var.count(argu)) {
vhm[p2a.first].push_back(name2var[argu]);
} else {
PADDLE_ENFORCE(ends_with(argu, framework::kGradVarSuffix),
argu.c_str());
std::string name = argu.substr(
0, argu.size() - std::strlen(framework::kGradVarSuffix));
PADDLE_ENFORCE(name2var.count(name), name.c_str());
vhm[p2a.first].push_back(name2var[name]->Grad());
}
}
}
}
backward_tape_->AddOp(
op_desc->Type(), in_vars, out_vars, op_desc->GetAttrMap());
}
// TODO(tonyyang-svail): how to fill empty grad?
// TODO(tonyyang-svail): Sum var grad is necessary
}
backward_tape_->Forward();
has_been_backwarded_ = true;
}
Tape &get_global_tape() {
static Tape T;
return T;
}
void reset_global_tape() { get_global_tape() = Tape(); }
}
}
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <memory>
#include <string>
#include <vector>
#include "paddle/contrib/tape/variable.h"
namespace paddle {
namespace tape {
using VariableHandleMap = std::map<std::string, std::vector<VariableHandle>>;
struct OpHandle {
OpHandle(const std::string &type,
const VariableHandleMap &in_vars,
const VariableHandleMap &out_vars,
const framework::AttributeMap &attrs)
: type_(type), inputs_(in_vars), outputs_(out_vars), attrs_(attrs) {}
std::string type_;
VariableHandleMap inputs_;
VariableHandleMap outputs_;
framework::AttributeMap attrs_;
};
class Tape {
public:
void AddOp(const std::string &type,
const VariableHandleMap &in_vars,
VariableHandleMap out_vars,
const framework::AttributeMap &attrs);
void Forward();
void Backward(VariableHandle target);
bool HasBeenBackwarded() { return has_been_backwarded_; }
private:
bool has_been_backwarded_ = false;
size_t current_position_ = 0;
std::vector<OpHandle> tape_;
std::shared_ptr<Tape> backward_tape_;
};
Tape &get_global_tape();
void reset_global_tape();
}
}
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "gtest/gtest.h"
#include "paddle/contrib/tape/function.h"
using namespace paddle::tape;
TEST(Tape, TestMLP) {
LOG(INFO) << "TestMLP";
Linear linear1(3, 3, "relu");
Linear linear2(3, 3, "relu");
Mean mean;
SGD sgd(0.001);
std::string initializer = "fill_constant";
paddle::framework::AttributeMap attrs;
attrs["dtype"] = paddle::framework::proto::VarType::Type::VarType_Type_FP32;
attrs["shape"] = std::vector<int>{3, 3};
attrs["value"] = 1.0f;
Fill filler(initializer, attrs);
for (int i = 0; i < 2; ++i) {
reset_global_tape();
VariableHandle input(new Variable("input"));
filler(input);
auto loss = mean(linear2(linear1(input)));
get_global_tape().Backward(loss);
for (auto w : linear1.Params()) {
sgd(w);
}
for (auto w : linear2.Params()) {
sgd(w);
}
}
}
int main(int argc, char** argv) {
std::vector<paddle::platform::Place> places;
places.emplace_back(paddle::platform::CPUPlace());
paddle::platform::DeviceContextPool::Init(places);
testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/contrib/tape/variable.h"
namespace paddle {
namespace tape {
void Variable::InitializeVariable() {
LOG(INFO) << "Initialzing " << desc_.Name() << " as " << desc_.GetType();
framework::proto::VarType::Type var_type = desc_.GetType();
if (var_type == framework::proto::VarType::LOD_TENSOR) {
var_.GetMutable<framework::LoDTensor>();
} else if (var_type == framework::proto::VarType::SELECTED_ROWS) {
var_.GetMutable<framework::SelectedRows>();
} else {
PADDLE_THROW("Variable type %d is not in [LOD_TENSOR, SELECTED_ROWS]",
var_type);
}
}
}
}
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <memory>
#include "paddle/fluid/framework/operator.h" // framework::kGradVarSuffix
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/variable.h"
namespace paddle {
namespace tape {
class Variable;
using VariableHandle = std::shared_ptr<Variable>;
/*
* Combination of
* framework::VarDesc desc_;
* framework::Variable var_;
*/
class Variable {
public:
Variable(const std::string pre_fix)
: desc_(pre_fix + std::to_string(count())) {}
Variable(const std::string pre_fix, bool is_grad)
: desc_(pre_fix + (is_grad ? framework::kGradVarSuffix
: std::to_string(count()))) {}
~Variable() { LOG(INFO) << "Deleting " << Name(); }
// Instantiate LoDTensor/SelectedRow
void InitializeVariable();
VariableHandle Grad() {
if (grad_.expired()) {
VariableHandle new_grad(new Variable(desc_.Name(), true));
grad_ = new_grad;
return new_grad;
} else {
return VariableHandle(grad_);
}
}
// Stochastic Gradient Descent with Momentum
// VariableHandle Momentum ();
// void init(const std::string& initializer,
// const framework::AttributeMap& attrs);
// void value() {};
const framework::VarDesc& Desc() const { return desc_; }
framework::VarDesc* MutableDesc() { return &desc_; }
// TODO(tonyyang-svail): No need to expose name
std::string Name() const { return desc_.Name(); }
framework::Variable* Var() { return &var_; }
private:
int count() {
static int counter = 0;
return counter++;
}
framework::VarDesc desc_;
framework::Variable var_;
std::weak_ptr<Variable> grad_;
};
}
}
...@@ -84,7 +84,7 @@ cc_library(lod_rank_table SRCS lod_rank_table.cc DEPS lod_tensor) ...@@ -84,7 +84,7 @@ cc_library(lod_rank_table SRCS lod_rank_table.cc DEPS lod_tensor)
cc_library(feed_fetch_method SRCS feed_fetch_method.cc DEPS lod_tensor scope glog) cc_library(feed_fetch_method SRCS feed_fetch_method.cc DEPS lod_tensor scope glog)
if(WITH_DISTRIBUTE) if(WITH_DISTRIBUTE)
cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method sendrecvop_grpc grpc++_unsecure grpc_unsecure gpr) cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method sendrecvop_grpc cares grpc++_unsecure grpc_unsecure gpr)
set(DISTRIBUTE_COMPILE_FLAGS "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor") set(DISTRIBUTE_COMPILE_FLAGS "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor")
set_source_files_properties(executor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) set_source_files_properties(executor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
else() else()
......
...@@ -330,8 +330,12 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope, ...@@ -330,8 +330,12 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
} }
for (auto& op : ctx->ops_) { for (auto& op : ctx->ops_) {
VLOG(3) << place_ << " " << op->DebugStringEx(local_scope); VLOG(4) << place_ << " " << op->DebugStringEx(local_scope);
op->Run(*local_scope, place_); op->Run(*local_scope, place_);
// NOTE! Please do not delete this line, it's usefull because the debug
// string before and after op.run are different, after run the output
// will have right shape which is usefull for debug.
VLOG(3) << place_ << " " << op->DebugStringEx(local_scope);
if (FLAGS_benchmark) { if (FLAGS_benchmark) {
VLOG(2) << "Memory used after operator " + op->Type() + " running: " VLOG(2) << "Memory used after operator " + op->Type() + " running: "
...@@ -402,6 +406,9 @@ void Executor::EnableMKLDNN(const ProgramDesc& program) { ...@@ -402,6 +406,9 @@ void Executor::EnableMKLDNN(const ProgramDesc& program) {
} }
} }
} }
#else
LOG(WARNING)
<< "'MKLDNN' is not supported, Please re-compile with WITH_MKLDNN option";
#endif #endif
} }
......
...@@ -18,6 +18,7 @@ limitations under the License. */ ...@@ -18,6 +18,7 @@ limitations under the License. */
#include "paddle/fluid/framework/init.h" #include "paddle/fluid/framework/init.h"
#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/place.h"
#include "paddle/fluid/string/piece.h" #include "paddle/fluid/string/piece.h"
...@@ -113,6 +114,9 @@ void InitDevices(bool init_p2p, const std::vector<int> devices) { ...@@ -113,6 +114,9 @@ void InitDevices(bool init_p2p, const std::vector<int> devices) {
} }
places.emplace_back(platform::CPUPlace()); places.emplace_back(platform::CPUPlace());
platform::DeviceContextPool::Init(places); platform::DeviceContextPool::Init(places);
#ifndef PADDLE_WITH_MKLDNN
operators::math::SetNumThreads(1);
#endif
} }
void InitGLOG(const std::string &prog_name) { void InitGLOG(const std::string &prog_name) {
......
...@@ -410,5 +410,38 @@ void LoDTensor::MergeLoDTensor( ...@@ -410,5 +410,38 @@ void LoDTensor::MergeLoDTensor(
} }
} }
LoD ConvertToLengthBasedLoD(const LoD &offset_lod) {
LoD length_lod;
length_lod.reserve(offset_lod.size());
for (size_t lvl = 0; lvl < offset_lod.size(); ++lvl) {
std::vector<size_t> level;
if (offset_lod[lvl].size() > 0) {
level.reserve(offset_lod[lvl].size() - 1);
}
for (size_t idx = 0; idx < offset_lod[lvl].size() - 1; ++idx) {
level.push_back(offset_lod[lvl][idx + 1] - offset_lod[lvl][idx]);
}
length_lod.push_back(level);
}
return length_lod;
}
LoD ConvertToOffsetBasedLoD(const LoD &length_lod) {
LoD offset_lod;
offset_lod.reserve(length_lod.size());
for (size_t lvl = 0; lvl < length_lod.size(); ++lvl) {
std::vector<size_t> level;
level.reserve(length_lod[lvl].size() + 1);
size_t tmp = 0;
level.push_back(tmp);
for (size_t idx = 0; idx < length_lod[lvl].size(); ++idx) {
tmp += length_lod[lvl][idx];
level.push_back(tmp);
}
offset_lod.push_back(level);
}
return offset_lod;
}
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -226,5 +226,19 @@ extern void WriteToRecordIO(recordio::Writer* writer, ...@@ -226,5 +226,19 @@ extern void WriteToRecordIO(recordio::Writer* writer,
extern std::vector<LoDTensor> ReadFromRecordIO( extern std::vector<LoDTensor> ReadFromRecordIO(
recordio::Scanner* scanner, const platform::DeviceContext& dev_ctx); recordio::Scanner* scanner, const platform::DeviceContext& dev_ctx);
/*
* Convert between length-based LoD and offset-based LoD.
* The implementation of LoDTensor class use offset-based LoD.
* However, we want to expose the more user-friendly length-based
* LoD to the Python side instead.
*
* Example:
* If offset_lod = [[0, 2, 3],[0, 3, 5, 9]]
* then length_lod = [[2, 1], [3, 2, 4]]
*/
LoD ConvertToLengthBasedLoD(const LoD& offset_lod);
LoD ConvertToOffsetBasedLoD(const LoD& length_lod);
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -228,6 +228,38 @@ TEST(LoD, CheckAbsLoD) { ...@@ -228,6 +228,38 @@ TEST(LoD, CheckAbsLoD) {
ASSERT_FALSE(CheckAbsLoD(abs_lod0)); ASSERT_FALSE(CheckAbsLoD(abs_lod0));
} }
TEST(LoD, ConvertToLengthBasedLoD) {
LoD offset_lod;
offset_lod.push_back(std::vector<size_t>({0, 2}));
offset_lod.push_back(std::vector<size_t>({0, 1, 3}));
offset_lod.push_back(std::vector<size_t>({0, 2, 4, 5}));
LoD length_lod = ConvertToLengthBasedLoD(offset_lod);
LoD expected;
expected.push_back(std::vector<size_t>({2}));
expected.push_back(std::vector<size_t>({1, 2}));
expected.push_back(std::vector<size_t>({2, 2, 1}));
EXPECT_EQ(length_lod, expected);
}
TEST(LoD, ConvertToOffsetBasedLoD) {
LoD length_lod;
length_lod.push_back(std::vector<size_t>({2}));
length_lod.push_back(std::vector<size_t>({1, 2}));
length_lod.push_back(std::vector<size_t>({2, 2, 1}));
LoD offset_lod = ConvertToOffsetBasedLoD(length_lod);
LoD expected;
expected.push_back(std::vector<size_t>({0, 2}));
expected.push_back(std::vector<size_t>({0, 1, 3}));
expected.push_back(std::vector<size_t>({0, 2, 4, 5}));
EXPECT_EQ(offset_lod, expected);
}
template <typename T> template <typename T>
static void TestRecordIO() { static void TestRecordIO() {
LoDTensor tensor; LoDTensor tensor;
......
...@@ -69,6 +69,19 @@ static DDim GetDims(const Scope& scope, const std::string& name, ...@@ -69,6 +69,19 @@ static DDim GetDims(const Scope& scope, const std::string& name,
} }
} }
static int GetRowSize(const Scope& scope, const std::string& name) {
Variable* var = scope.FindVar(name);
if (var == nullptr) {
return -1;
}
if (var->IsType<SelectedRows>()) {
return var->Get<SelectedRows>().rows().size();
}
return -1;
}
static LoD GetLoD(const Scope& scope, const std::string& name) { static LoD GetLoD(const Scope& scope, const std::string& name) {
Variable* var = scope.FindVar(name); Variable* var = scope.FindVar(name);
auto default_lod = LoD({{}}); auto default_lod = LoD({{}});
...@@ -85,6 +98,7 @@ static LoD GetLoD(const Scope& scope, const std::string& name) { ...@@ -85,6 +98,7 @@ static LoD GetLoD(const Scope& scope, const std::string& name) {
} }
void OperatorBase::Run(const Scope& scope, const platform::Place& place) { void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
VLOG(10) << "- " << DebugStringEx(&scope);
if (platform::is_gpu_place(place)) { if (platform::is_gpu_place(place)) {
#ifndef PADDLE_WITH_CUDA #ifndef PADDLE_WITH_CUDA
PADDLE_THROW("Cannot run operator on place %s", place); PADDLE_THROW("Cannot run operator on place %s", place);
...@@ -94,6 +108,7 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) { ...@@ -94,6 +108,7 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
#endif #endif
} }
RunImpl(scope, place); RunImpl(scope, place);
VLOG(10) << "+ " << DebugStringEx(&scope);
} }
bool OperatorBase::HasInputs(const std::string& name) const { bool OperatorBase::HasInputs(const std::string& name) const {
...@@ -153,6 +168,10 @@ std::string OperatorBase::DebugStringEx(const Scope* scope) const { ...@@ -153,6 +168,10 @@ std::string OperatorBase::DebugStringEx(const Scope* scope) const {
for (size_t i = 0; i < input.second.size(); ++i) { for (size_t i = 0; i < input.second.size(); ++i) {
ss << input.second[i]; ss << input.second[i];
if (scope) { if (scope) {
int row_size = GetRowSize(*scope, input.second[i]);
if (row_size >= 0) {
ss << "[row_size=" << row_size << "]";
}
ss << "[" << GetDims(*scope, input.second[i], true) << "]"; ss << "[" << GetDims(*scope, input.second[i], true) << "]";
ss << "(" << GetLoD(*scope, input.second[i]) << ")"; ss << "(" << GetLoD(*scope, input.second[i]) << ")";
} }
...@@ -173,6 +192,10 @@ std::string OperatorBase::DebugStringEx(const Scope* scope) const { ...@@ -173,6 +192,10 @@ std::string OperatorBase::DebugStringEx(const Scope* scope) const {
for (size_t i = 0; i < output.second.size(); ++i) { for (size_t i = 0; i < output.second.size(); ++i) {
ss << output.second[i]; ss << output.second[i];
if (scope) { if (scope) {
int row_size = GetRowSize(*scope, output.second[i]);
if (row_size >= 0) {
ss << "[row_size=" << row_size << "]";
}
ss << "[" << GetDims(*scope, output.second[i], true) << "]"; ss << "[" << GetDims(*scope, output.second[i], true) << "]";
ss << "(" << GetLoD(*scope, output.second[i]) << ")"; ss << "(" << GetLoD(*scope, output.second[i]) << ")";
} }
......
...@@ -145,9 +145,9 @@ void ParallelExecutor::BCastParamsToGPUs( ...@@ -145,9 +145,9 @@ void ParallelExecutor::BCastParamsToGPUs(
auto &dims = main_tensor.dims(); auto &dims = main_tensor.dims();
if (paddle::platform::is_gpu_place(main_tensor.place())) { if (paddle::platform::is_gpu_place(main_tensor.place())) {
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
std::vector<void *> buffers;
size_t numel = main_tensor.numel(); size_t numel = main_tensor.numel();
ncclDataType_t data_type = platform::ToNCCLDataType(main_tensor.type()); ncclDataType_t data_type = platform::ToNCCLDataType(main_tensor.type());
platform::NCCLGroupGuard guard;
for (size_t i = 0; i < member_->places_.size(); ++i) { for (size_t i = 0; i < member_->places_.size(); ++i) {
auto place = member_->places_[i]; auto place = member_->places_[i];
void *buffer; void *buffer;
...@@ -159,11 +159,21 @@ void ParallelExecutor::BCastParamsToGPUs( ...@@ -159,11 +159,21 @@ void ParallelExecutor::BCastParamsToGPUs(
t->Resize(dims); t->Resize(dims);
buffer = t->mutable_data(place, main_tensor.type()); buffer = t->mutable_data(place, main_tensor.type());
} }
auto &nccl_ctx = member_->nccl_ctxs_->at(place); buffers.push_back(buffer);
platform::dynload::ncclBcast(buffer, numel, data_type, 0,
nccl_ctx.comm_, nccl_ctx.stream());
} }
member_->nccl_ctxs_->WaitAll();
PADDLE_ENFORCE_EQ(member_->places_.size(), buffers.size(),
"variables' buffer size to bcast NOT equal to places");
{
platform::NCCLGroupGuard guard;
for (size_t i = 0; i < member_->places_.size(); ++i) {
auto &nccl_ctx = member_->nccl_ctxs_->at(member_->places_[i]);
platform::dynload::ncclBcast(buffers[i], numel, data_type, 0,
nccl_ctx.comm_, nccl_ctx.stream());
}
member_->nccl_ctxs_->WaitAll();
}
#else #else
PADDLE_THROW("Not compiled with CUDA"); PADDLE_THROW("Not compiled with CUDA");
#endif #endif
......
...@@ -35,14 +35,15 @@ class ReaderBase { ...@@ -35,14 +35,15 @@ class ReaderBase {
class DecoratedReader : public ReaderBase { class DecoratedReader : public ReaderBase {
public: public:
explicit DecoratedReader(ReaderBase* reader) : ReaderBase(), reader_(reader) { explicit DecoratedReader(const std::shared_ptr<ReaderBase>& reader)
: ReaderBase(), reader_(reader) {
PADDLE_ENFORCE_NOT_NULL(reader_); PADDLE_ENFORCE_NOT_NULL(reader_);
} }
void ReInit() override { reader_->ReInit(); } void ReInit() override { reader_->ReInit(); }
protected: protected:
ReaderBase* reader_; std::shared_ptr<ReaderBase> reader_;
}; };
class FileReader : public ReaderBase { class FileReader : public ReaderBase {
...@@ -64,7 +65,7 @@ class ReaderHolder { ...@@ -64,7 +65,7 @@ class ReaderHolder {
public: public:
void Reset(ReaderBase* reader) { reader_.reset(reader); } void Reset(ReaderBase* reader) { reader_.reset(reader); }
ReaderBase* Get() const { return reader_.get(); } std::shared_ptr<ReaderBase> Get() const { return reader_; }
void ReadNext(std::vector<LoDTensor>* out) { void ReadNext(std::vector<LoDTensor>* out) {
PADDLE_ENFORCE_NOT_NULL(reader_); PADDLE_ENFORCE_NOT_NULL(reader_);
...@@ -76,7 +77,7 @@ class ReaderHolder { ...@@ -76,7 +77,7 @@ class ReaderHolder {
} }
private: private:
std::unique_ptr<ReaderBase> reader_; std::shared_ptr<ReaderBase> reader_;
}; };
} // namespace framework } // namespace framework
......
...@@ -43,48 +43,29 @@ Scope& Scope::NewScope() const { ...@@ -43,48 +43,29 @@ Scope& Scope::NewScope() const {
} }
Variable* Scope::Var(const std::string& name) { Variable* Scope::Var(const std::string& name) {
// acquire the lock when new var under this scope
std::unique_lock<std::mutex> lock(mutex_); std::unique_lock<std::mutex> lock(mutex_);
auto* v = FindVarLocally(name); return VarInternal(name);
if (v != nullptr) return v;
v = new Variable();
vars_[name].reset(v);
VLOG(3) << "Create variable " << name;
v->name_ = &(vars_.find(name)->first);
return v;
} }
Variable* Scope::Var(std::string* name) { Variable* Scope::Var(std::string* name) {
auto var_name = string::Sprintf("%p.%d", this, vars_.size()); std::unique_lock<std::mutex> lock(mutex_);
auto new_name = string::Sprintf("%p.%d", this, vars_.size());
if (name != nullptr) { if (name != nullptr) {
*name = var_name; *name = new_name;
} }
return Var(var_name); return VarInternal(new_name);
} }
Variable* Scope::FindVar(const std::string& name) const { Variable* Scope::FindVar(const std::string& name) const {
// acquire the lock when find var
std::unique_lock<std::mutex> lock(mutex_); std::unique_lock<std::mutex> lock(mutex_);
return FindVarInternal(name); return FindVarInternal(name);
} }
Variable* Scope::FindVarInternal(const std::string& name) const {
auto var = FindVarLocally(name);
if (var != nullptr) {
return var;
}
return (parent_ == nullptr) ? nullptr : parent_->FindVarInternal(name);
}
const Scope* Scope::FindScope(const Variable* var) const { const Scope* Scope::FindScope(const Variable* var) const {
for (auto& kv : vars_) { std::unique_lock<std::mutex> lock(mutex_);
if (kv.second.get() == var) { return FindScopeInternal(var);
return this;
}
}
return (parent_ == nullptr) ? nullptr : parent_->FindScope(var);
} }
void Scope::DropKids() { void Scope::DropKids() {
std::unique_lock<std::mutex> lock(mutex_); std::unique_lock<std::mutex> lock(mutex_);
for (Scope* s : kids_) delete s; for (Scope* s : kids_) delete s;
...@@ -92,6 +73,7 @@ void Scope::DropKids() { ...@@ -92,6 +73,7 @@ void Scope::DropKids() {
} }
std::vector<std::string> Scope::LocalVarNames() const { std::vector<std::string> Scope::LocalVarNames() const {
std::unique_lock<std::mutex> lock(mutex_);
std::vector<std::string> known_vars; std::vector<std::string> known_vars;
known_vars.reserve(this->vars_.size()); known_vars.reserve(this->vars_.size());
for (auto& p : vars_) { for (auto& p : vars_) {
...@@ -127,6 +109,39 @@ void Scope::EraseVars(const std::vector<std::string>& var_names) { ...@@ -127,6 +109,39 @@ void Scope::EraseVars(const std::vector<std::string>& var_names) {
void Scope::Rename(const std::string& origin_name, void Scope::Rename(const std::string& origin_name,
const std::string& new_name) const { const std::string& new_name) const {
std::unique_lock<std::mutex> lock(mutex_);
RenameInternal(origin_name, new_name);
}
std::string Scope::Rename(const std::string& origin_name) const {
std::unique_lock<std::mutex> lock(mutex_);
auto new_name = string::Sprintf("%p.%d", this, vars_.size());
RenameInternal(origin_name, new_name);
return new_name;
}
Variable* Scope::VarInternal(const std::string& name) {
auto* v = FindVarLocally(name);
if (v != nullptr) return v;
v = new Variable();
vars_[name].reset(v);
VLOG(3) << "Create variable " << name;
v->name_ = &(vars_.find(name)->first);
return v;
}
const Scope* Scope::FindScopeInternal(const Variable* var) const {
for (auto& kv : vars_) {
if (kv.second.get() == var) {
return this;
}
}
return (parent_ == nullptr) ? nullptr : parent_->FindScope(var);
}
void Scope::RenameInternal(const std::string& origin_name,
const std::string& new_name) const {
auto origin_it = vars_.find(origin_name); auto origin_it = vars_.find(origin_name);
PADDLE_ENFORCE(origin_it != vars_.end(), PADDLE_ENFORCE(origin_it != vars_.end(),
"Cannot find original variable with name %s", origin_name); "Cannot find original variable with name %s", origin_name);
...@@ -137,10 +152,12 @@ void Scope::Rename(const std::string& origin_name, ...@@ -137,10 +152,12 @@ void Scope::Rename(const std::string& origin_name,
vars_.erase(origin_it); vars_.erase(origin_it);
} }
std::string Scope::Rename(const std::string& origin_name) const { Variable* Scope::FindVarInternal(const std::string& name) const {
auto var_name = string::Sprintf("%p.%d", this, vars_.size()); auto var = FindVarLocally(name);
Rename(origin_name, var_name); if (var != nullptr) {
return var_name; return var;
}
return (parent_ == nullptr) ? nullptr : parent_->FindVar(name);
} }
Variable* Scope::FindVarLocally(const std::string& name) const { Variable* Scope::FindVarLocally(const std::string& name) const {
......
...@@ -81,20 +81,29 @@ class Scope { ...@@ -81,20 +81,29 @@ class Scope {
// Rename variable to a new name and return the new name // Rename variable to a new name and return the new name
std::string Rename(const std::string& origin_name) const; std::string Rename(const std::string& origin_name) const;
protected:
mutable std::unordered_map<std::string, std::unique_ptr<Variable>> vars_;
private: private:
// Call Scope::NewScope for a sub-scope. // Call Scope::NewScope for a sub-scope.
explicit Scope(Scope const* parent) : parent_(parent) {} explicit Scope(Scope const* parent) : parent_(parent) {}
// Called by Var.
Variable* VarInternal(const std::string& name);
// Called by FindScope.
const Scope* FindScopeInternal(const Variable* var) const;
// Called by Rename.
void RenameInternal(const std::string& origin_name,
const std::string& new_name) const;
// Called by FindVar recursively. // Called by FindVar recursively.
// Caller doesn't own the returned Variable.
Variable* FindVarInternal(const std::string& name) const; Variable* FindVarInternal(const std::string& name) const;
// Called by FindVarInternal and Var. // Called by FindVarInternal and Var.
// Caller doesn't own the returned Variable.
Variable* FindVarLocally(const std::string& name) const; Variable* FindVarLocally(const std::string& name) const;
mutable std::unordered_map<std::string, std::unique_ptr<Variable>> vars_;
// Scope in `kids_` are owned by this class. // Scope in `kids_` are owned by this class.
mutable std::list<Scope*> kids_; mutable std::list<Scope*> kids_;
Scope const* parent_{nullptr}; Scope const* parent_{nullptr};
......
set(FLUID_CORE_MODULES proto_desc memory lod_tensor executor init) set(FLUID_CORE_MODULES proto_desc memory lod_tensor executor init)
cc_library(analysis SRCS dot.cc node.cc data_flow_graph.cc graph_traits.cc subgraph_splitter.cc fluid_to_data_flow_graph_pass.cc cc_library(analysis SRCS pass_manager.cc dot.cc node.cc data_flow_graph.cc graph_traits.cc subgraph_splitter.cc
DEPS paddle_fluid) fluid_to_data_flow_graph_pass.cc
data_flow_graph_to_fluid_pass.cc
tensorrt_subgraph_pass.cc
dfg_graphviz_draw_pass.cc
DEPS framework_proto)
cc_test(test_node SRCS node_tester.cc DEPS analysis) cc_test(test_node SRCS node_tester.cc DEPS analysis)
cc_test(test_dot SRCS dot_tester.cc DEPS analysis) cc_test(test_dot SRCS dot_tester.cc DEPS analysis)
set(PYTHON_TESTS_DIR ${PADDLE_BINARY_DIR}/python/paddle/fluid/tests) set(PYTHON_TESTS_DIR ${PADDLE_BINARY_DIR}/python/paddle/fluid/tests)
cc_test(test_data_flow_graph SRCS data_flow_graph_tester.cc DEPS analysis ${FLUID_CORE_MODULES} paddle_fluid function (inference_analysis_test TARGET)
ARGS --inference_model_dir=${PYTHON_TESTS_DIR}/book/word2vec.inference.model) set(options "")
set_tests_properties(test_data_flow_graph PROPERTIES DEPENDS test_word2vec) set(oneValueArgs "")
set(multiValueArgs SRCS)
cmake_parse_arguments(analysis_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
cc_test(test_subgraph_splitter cc_test(${TARGET}
SRCS subgraph_splitter_tester.cc SRCS "${analysis_test_SRCS}"
DEPS analysis paddle_fluid tensor DEPS analysis
ARGS --inference_model_dir=${PYTHON_TESTS_DIR}/book/word2vec.inference.model) ARGS --inference_model_dir=${PYTHON_TESTS_DIR}/book/word2vec.inference.model --fraction_of_gpu_memory_to_use=0.5)
set_tests_properties(test_subgraph_splitter PROPERTIES DEPENDS test_word2vec) set_tests_properties(${TARGET} PROPERTIES DEPENDS test_word2vec)
endfunction(inference_analysis_test)
cc_test(test_dfg_graphviz_draw_pass inference_analysis_test(test_data_flow_graph SRCS data_flow_graph_tester.cc)
SRCS dfg_graphviz_draw_pass_tester.cc inference_analysis_test(test_data_flow_graph_to_fluid_pass SRCS data_flow_graph_to_fluid_pass_tester.cc)
DEPS analysis inference_analysis_test(test_fluid_to_data_flow_graph_pass SRCS fluid_to_data_flow_graph_pass_tester.cc)
ARGS --inference_model_dir=${PYTHON_TESTS_DIR}/book/word2vec.inference.model) inference_analysis_test(test_subgraph_splitter SRCS subgraph_splitter_tester.cc)
set_tests_properties(test_dfg_graphviz_draw_pass PROPERTIES DEPENDS test_word2vec) inference_analysis_test(test_dfg_graphviz_draw_pass SRCS dfg_graphviz_draw_pass_tester.cc)
#inference_analysis_test(test_tensorrt_subgraph_pass SRCS tensorrt_subgraph_pass_tester.cc)
inference_analysis_test(test_pass_manager SRCS pass_manager_tester.cc)
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/analysis/argument.h"
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
/*
* This file defines the class Argument, which is the input and output of the
* analysis module. All the fields that needed either by Passes or PassManagers
* are contained in Argument.
*
* TODO(Superjomn) Find some way better to contain the fields when it grow too
* big.
*/
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/inference/analysis/data_flow_graph.h"
namespace paddle {
namespace inference {
namespace analysis {
/*
* The argument definition of both Pass and PassManagers.
*
* All the fields should be registered here for clearness.
*/
struct Argument {
// The graph that process by the Passes or PassManagers.
std::unique_ptr<DataFlowGraph> main_dfg;
// The original program desc.
std::unique_ptr<framework::proto::ProgramDesc> origin_program_desc;
};
#define UNLIKELY(condition) __builtin_expect(static_cast<bool>(condition), 0)
#define ANALYSIS_ARGUMENT_CHECK_FIELD(field__) \
if (!UNLIKELY(field__)) { \
LOG(ERROR) << "field " << #field__ << " should be set."; \
return false; \
}
} // namespace analysis
} // namespace inference
} // namespace paddle
...@@ -14,6 +14,7 @@ limitations under the License. */ ...@@ -14,6 +14,7 @@ limitations under the License. */
#include "paddle/fluid/inference/analysis/data_flow_graph.h" #include "paddle/fluid/inference/analysis/data_flow_graph.h"
#include "paddle/fluid/inference/analysis/dot.h" #include "paddle/fluid/inference/analysis/dot.h"
#include "paddle/fluid/inference/analysis/node.h"
namespace paddle { namespace paddle {
namespace inference { namespace inference {
...@@ -57,19 +58,7 @@ std::string DataFlowGraph::DotString() const { ...@@ -57,19 +58,7 @@ std::string DataFlowGraph::DotString() const {
// Add nodes // Add nodes
for (size_t i = 0; i < nodes.size(); i++) { for (size_t i = 0; i < nodes.size(); i++) {
const Node &node = nodes.Get(i); const Node &node = nodes.Get(i);
switch (node.type()) { dot.AddNode(node.repr(), node.dot_attrs());
case Node::Type::kValue:
dot.AddNode(node.repr(), node.dot_attrs());
break;
case Node::Type::kFunction:
dot.AddNode(node.repr(), node.dot_attrs());
break;
case Node::Type::kFunctionBlock:
dot.AddNode(node.repr(), node.dot_attrs());
break;
default:
PADDLE_THROW("unsupported Node type %d", static_cast<int>(node.type()));
}
} }
// Add edges // Add edges
......
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h"
#include "paddle/fluid/framework/proto_desc.h"
namespace paddle {
namespace inference {
namespace analysis {
bool DataFlowGraphToFluidPass::Initialize(Argument* argument) {
ANALYSIS_ARGUMENT_CHECK_FIELD(argument)
ANALYSIS_ARGUMENT_CHECK_FIELD(argument->origin_program_desc)
desc_ = argument->origin_program_desc.get();
// Here some logic from program_desc.cc and will not add new interfaces into
// framework::ProgramDesc class, use some UT to assure the correctness.
auto* block = desc_->mutable_blocks()->Add();
block->set_idx(framework::kRootBlockIndex);
block->set_parent_idx(framework::kNoneBlockIndex);
return true;
}
bool DataFlowGraphToFluidPass::Finalize() { return true; }
void DataFlowGraphToFluidPass::Run(DataFlowGraph* graph) {
auto traits = GraphTraits<DataFlowGraph>(graph);
for (auto it = traits.nodes().begin(); it != traits.nodes().end(); ++it) {
if (it->deleted()) continue;
switch (it->type()) {
case Node::Type::kFunction:
LOG(INFO) << "add function " << it->name();
AddFluidOp(&(*it));
break;
case Node::Type::kFunctionBlock:
AddEngineOp(&(*it));
break;
default:
continue;
}
}
}
void DataFlowGraphToFluidPass::AddFluidOp(Node* node) {
LOG(INFO) << "processing func " << node->name();
auto* ori_op = static_cast<framework::proto::OpDesc*>(node->pb_desc());
// currently only the main block is analyzed.
auto* main_block = desc_->mutable_blocks(framework::kRootBlockIndex);
auto* op = main_block->add_ops();
LOG(INFO) << "to copy the op";
*op = *ori_op; // copy the attributes, by default, these will not be changed
// by analysis phrase.
// The inputs and outputs of the existing ops are not changed by tensorrt
// subgraph pass.
// NOTE It might be changed by other passes in the long run.
}
void DataFlowGraphToFluidPass::AddEngineOp(Node* node) {
// auto* ori_op = static_cast<framework::proto::OpDesc*>(node->extra_info());
// auto* main_block = desc_->mutable_blocks(framework::kRootBlockIndex);
// auto* op = main_block->add_ops();
// TODO(Superjomn) Here need to expose some arguments for default setting.
}
} // namespace analysis
} // namespace inference
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
/*
* This file implements the transformation from fluid ProgramDesc to data flow
* graph.
*/
#pragma once
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/inference/analysis/data_flow_graph.h"
#include "paddle/fluid/inference/analysis/pass.h"
namespace paddle {
namespace inference {
namespace analysis {
class DataFlowGraphToFluidPass final : public DataFlowGraphPass {
public:
DataFlowGraphToFluidPass() = default;
bool Initialize(Argument *argument) override;
bool Finalize() override;
void Run(DataFlowGraph *graph) override;
std::string repr() const override { return "DFG to fluid"; }
std::string description() const override {
return "Transform a DFG to a Fluid ProgramDesc";
}
Pass *CreatePrinterPass(std::ostream &os,
const std::string &banner) const override {
return nullptr;
}
protected:
// Add a Fluid Op into the ProgramDesc.
void AddFluidOp(Node *node);
// Add a EngineOp into the ProgramDesc.
void AddEngineOp(Node *node);
private:
framework::proto::ProgramDesc *desc_;
};
} // namespace analysis
} // namespace inference
} // namespace paddle
...@@ -27,13 +27,12 @@ namespace inference { ...@@ -27,13 +27,12 @@ namespace inference {
namespace analysis { namespace analysis {
TEST_F(DFG_Tester, Test) { TEST_F(DFG_Tester, Test) {
framework::proto::ProgramDesc new_desc;
DataFlowGraph graph; DataFlowGraph graph;
FluidToDataFlowGraphPass pass0; FluidToDataFlowGraphPass pass0;
DataFlowGraphToFluidPass pass1; DataFlowGraphToFluidPass pass1;
pass0.Initialize(desc); ASSERT_TRUE(pass0.Initialize(&argument));
pass1.Initialize(&new_desc); ASSERT_TRUE(pass1.Initialize(&argument));
pass0.Run(&graph); pass0.Run(&graph);
pass1.Run(&graph); pass1.Run(&graph);
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h"
namespace paddle {
namespace inference {
namespace analysis {
void DFG_GraphvizDrawPass::Run(DataFlowGraph *graph) {
auto content = Draw(graph);
std::ofstream file(GenDotPath());
file.write(content.c_str(), content.size());
file.close();
LOG(INFO) << "draw dot to " << GenDotPath();
}
std::string DFG_GraphvizDrawPass::Draw(DataFlowGraph *graph) {
Dot dot;
// Add nodes
for (size_t i = 0; i < graph->nodes.size(); i++) {
const Node &node = graph->nodes.Get(i);
if (config_.display_deleted_node || !node.deleted()) {
dot.AddNode(node.repr(), node.dot_attrs());
}
}
// Add edges
for (size_t i = 0; i < graph->nodes.size(); i++) {
const Node &node = graph->nodes.Get(i);
if (!config_.display_deleted_node && node.deleted()) continue;
for (auto &in : node.inlinks) {
if (!config_.display_deleted_node && in->deleted()) continue;
for (auto &in : node.inlinks) {
dot.AddEdge(in->repr(), node.repr(), {});
}
}
}
return dot.Build();
}
} // namespace analysis
} // namespace inference
} // namespace paddle
...@@ -21,6 +21,7 @@ limitations under the License. */ ...@@ -21,6 +21,7 @@ limitations under the License. */
#include <fstream> #include <fstream>
#include <string> #include <string>
#include "paddle/fluid/inference/analysis/dot.h"
#include "paddle/fluid/inference/analysis/pass.h" #include "paddle/fluid/inference/analysis/pass.h"
namespace paddle { namespace paddle {
...@@ -32,35 +33,39 @@ namespace analysis { ...@@ -32,35 +33,39 @@ namespace analysis {
*/ */
class DFG_GraphvizDrawPass : public DataFlowGraphPass { class DFG_GraphvizDrawPass : public DataFlowGraphPass {
public: public:
DFG_GraphvizDrawPass(const std::string& dir, const std::string& id) struct Config {
: dir_(dir), id_(id) {} Config(const std::string &dir, const std::string &id,
bool display_deleted_node = false)
bool Initialize() override { return Pass::Initialize(); } : dir(dir), id(id), display_deleted_node(display_deleted_node) {}
void Run(DataFlowGraph* graph) override {
auto content = Draw(graph); // The directory to store the .dot or .png files.
std::ofstream file(GenDotPath()); const std::string dir;
file.write(content.c_str(), content.size()); // The identifier for this dot file.
file.close(); const std::string id;
LOG(INFO) << "draw dot to " << GenDotPath(); // Whether to display deleted nodes, default false.
} const bool display_deleted_node;
};
DFG_GraphvizDrawPass(const Config &config) : config_(config) {}
bool Initialize(Argument *argument) override { return true; }
void Run(DataFlowGraph *graph) override;
bool Finalize() override { return Pass::Finalize(); } bool Finalize() override { return Pass::Finalize(); }
Pass* CreatePrinterPass(std::ostream& os, std::string repr() const override { return "DFG graphviz drawer"; }
const std::string& banner) const override { std::string description() const override {
return nullptr; return "Debug a DFG by draw with graphviz";
} }
private: private:
// Path of the dot file to output. // Path of the dot file to output.
std::string GenDotPath() const { std::string GenDotPath() const {
return dir_ + "/" + "graph_" + id_ + ".dot"; return config_.dir + "/" + "graph_" + config_.id + ".dot";
} }
std::string Draw(DataFlowGraph* graph) { return graph->DotString(); } std::string Draw(DataFlowGraph *graph);
std::string dir_; Config config_;
std::string id_;
}; };
} // namespace analysis } // namespace analysis
......
...@@ -24,9 +24,10 @@ namespace inference { ...@@ -24,9 +24,10 @@ namespace inference {
namespace analysis { namespace analysis {
TEST_F(DFG_Tester, dfg_graphviz_draw_pass_tester) { TEST_F(DFG_Tester, dfg_graphviz_draw_pass_tester) {
auto dfg = ProgramDescToDFG(desc); auto dfg = ProgramDescToDFG(*argument.origin_program_desc);
DFG_GraphvizDrawPass pass("./", "test"); DFG_GraphvizDrawPass::Config config("./", "test");
pass.Initialize(); DFG_GraphvizDrawPass pass(config);
pass.Initialize(&argument);
pass.Run(&dfg); pass.Run(&dfg);
// test content // test content
...@@ -38,7 +39,8 @@ TEST_F(DFG_Tester, dfg_graphviz_draw_pass_tester) { ...@@ -38,7 +39,8 @@ TEST_F(DFG_Tester, dfg_graphviz_draw_pass_tester) {
while (std::getline(file, line)) { while (std::getline(file, line)) {
no++; no++;
} }
ASSERT_EQ(no, 82); // DFG is sensitive to ProgramDesc, be careful to change the existing models.
ASSERT_EQ(no, 112);
} }
} // namespace analysis } // namespace analysis
......
...@@ -21,19 +21,23 @@ namespace paddle { ...@@ -21,19 +21,23 @@ namespace paddle {
namespace inference { namespace inference {
namespace analysis { namespace analysis {
FluidToDataFlowGraphPass::FluidToDataFlowGraphPass() {} bool FluidToDataFlowGraphPass::Initialize(Argument *argument) {
ANALYSIS_ARGUMENT_CHECK_FIELD(argument);
bool FluidToDataFlowGraphPass::Initialize() { return Pass::Initialize(); } ANALYSIS_ARGUMENT_CHECK_FIELD(argument->origin_program_desc);
PADDLE_ENFORCE(argument);
bool FluidToDataFlowGraphPass::Initialize( if (!argument->main_dfg) {
const framework::proto::ProgramDesc &desc) { LOG(INFO) << "Init DFG";
desc_ = &desc; argument->main_dfg.reset(new DataFlowGraph);
}
desc_ = argument->origin_program_desc.get();
return true; return true;
} }
bool FluidToDataFlowGraphPass::Finalize() { return Pass::Finalize(); } bool FluidToDataFlowGraphPass::Finalize() { return Pass::Finalize(); }
void FluidToDataFlowGraphPass::Run(DataFlowGraph *graph) { void FluidToDataFlowGraphPass::Run(DataFlowGraph *graph) {
PADDLE_ENFORCE(graph);
PADDLE_ENFORCE(desc_);
// insert vars // insert vars
std::unordered_map<std::string, size_t> var2id; std::unordered_map<std::string, size_t> var2id;
auto &main_block = desc_->blocks(framework::kRootBlockIndex); auto &main_block = desc_->blocks(framework::kRootBlockIndex);
...@@ -41,7 +45,7 @@ void FluidToDataFlowGraphPass::Run(DataFlowGraph *graph) { ...@@ -41,7 +45,7 @@ void FluidToDataFlowGraphPass::Run(DataFlowGraph *graph) {
const auto &var = main_block.vars(i); const auto &var = main_block.vars(i);
auto *v = graph->nodes.Create(Node::Type::kValue); auto *v = graph->nodes.Create(Node::Type::kValue);
v->SetName(var.name()); v->SetName(var.name());
v->SetExtraInfo(const_cast<void *>(static_cast<const void *>(&var))); v->SetPbDesc(const_cast<void *>(static_cast<const void *>(&var)));
var2id[var.name()] = v->id(); var2id[var.name()] = v->id();
} }
for (int i = 0; i < main_block.ops_size(); i++) { for (int i = 0; i < main_block.ops_size(); i++) {
...@@ -51,7 +55,7 @@ void FluidToDataFlowGraphPass::Run(DataFlowGraph *graph) { ...@@ -51,7 +55,7 @@ void FluidToDataFlowGraphPass::Run(DataFlowGraph *graph) {
static_cast<Function *>(o)->SetFuncType(op.type()); static_cast<Function *>(o)->SetFuncType(op.type());
// Link to the original protobuf message's memory, make it easier to // Link to the original protobuf message's memory, make it easier to
// generate from a data flow graph to fluid ProgramDesc. // generate from a data flow graph to fluid ProgramDesc.
o->SetExtraInfo(const_cast<void *>(static_cast<const void *>(&op))); o->SetPbDesc(const_cast<void *>(static_cast<const void *>(&op)));
// set inputs and outputs // set inputs and outputs
// TODO(Superjomn) make sure the InputNames is the real variable name. // TODO(Superjomn) make sure the InputNames is the real variable name.
for (int j = 0; j < op.inputs_size(); j++) { for (int j = 0; j < op.inputs_size(); j++) {
......
...@@ -34,13 +34,18 @@ namespace analysis { ...@@ -34,13 +34,18 @@ namespace analysis {
*/ */
class FluidToDataFlowGraphPass final : public DataFlowGraphPass { class FluidToDataFlowGraphPass final : public DataFlowGraphPass {
public: public:
FluidToDataFlowGraphPass(); FluidToDataFlowGraphPass() = default;
bool Initialize() override;
bool Initialize(const framework::proto::ProgramDesc &desc) override; bool Initialize(Argument *argument) override;
bool Finalize() override; bool Finalize() override;
void Run(DataFlowGraph *graph) override; void Run(DataFlowGraph *graph) override;
std::string repr() const override { return "fluid-to-data-flow-graph"; }
std::string description() const override {
return "transform a fluid ProgramDesc to a data flow graph.";
}
Pass *CreatePrinterPass(std::ostream &os, Pass *CreatePrinterPass(std::ostream &os,
const std::string &banner) const override; const std::string &banner) const override;
......
...@@ -23,11 +23,11 @@ namespace analysis { ...@@ -23,11 +23,11 @@ namespace analysis {
TEST_F(DFG_Tester, Init) { TEST_F(DFG_Tester, Init) {
FluidToDataFlowGraphPass pass; FluidToDataFlowGraphPass pass;
pass.Initialize(); pass.Initialize(&argument);
pass.Initialize(desc);
DataFlowGraph graph; DataFlowGraph graph;
pass.Run(&graph); pass.Run(&graph);
ASSERT_GT(graph.nodes.size(), 0); // Analysis is sensitive to ProgramDesc, careful to change the original model.
ASSERT_EQ(graph.nodes.size(), 37);
pass.Finalize(); pass.Finalize();
LOG(INFO) << '\n' << graph.DotString(); LOG(INFO) << '\n' << graph.DotString();
} }
......
...@@ -62,6 +62,7 @@ struct DataTypeNamer { ...@@ -62,6 +62,7 @@ struct DataTypeNamer {
SET_TYPE(int); SET_TYPE(int);
SET_TYPE(bool); SET_TYPE(bool);
SET_TYPE(float); SET_TYPE(float);
SET_TYPE(void *);
} }
std::unordered_map<decltype(typeid(int).hash_code()), // NOLINT std::unordered_map<decltype(typeid(int).hash_code()), // NOLINT
......
...@@ -40,6 +40,9 @@ Node *NodeMap::Create(Node::Type type) { ...@@ -40,6 +40,9 @@ Node *NodeMap::Create(Node::Type type) {
case Node::Type::kValue: case Node::Type::kValue:
nodes_.emplace_back(new Value); nodes_.emplace_back(new Value);
break; break;
case Node::Type::kFunctionBlock:
nodes_.emplace_back(new FunctionBlock);
break;
default: default:
PADDLE_THROW("Not supported node type."); PADDLE_THROW("Not supported node type.");
} }
......
...@@ -71,12 +71,17 @@ class Node { ...@@ -71,12 +71,17 @@ class Node {
// Get an additional attribute and convert it to T data type. NOTE this will // Get an additional attribute and convert it to T data type. NOTE this will
// silently create a new attribute if not exists. // silently create a new attribute if not exists.
Attr &attr(const std::string &name) { return attrs_[name]; } Attr &attr(const std::string &name) const { return attrs_[name]; }
int id() const { return id_; } int id() const { return id_; }
bool deleted() const { return deleted_; } // The Protobuf description is set/get with a void* to decouple Node interface
// from a specific kind of Protobuf message.
void SetPbDesc(void *pb) { attr("pb_desc").Pointer() = pb; }
void *pb_desc() const { return attr("pb_desc").Pointer(); }
void SetDeleted() { deleted_ = true; } void SetDeleted() { deleted_ = true; }
bool deleted() const { return deleted_; }
void SetName(const std::string &name) { name_ = name; } void SetName(const std::string &name) { name_ = name; }
const std::string &name() const { return name_; } const std::string &name() const { return name_; }
...@@ -84,29 +89,25 @@ class Node { ...@@ -84,29 +89,25 @@ class Node {
void SetType(Type type) { type_ = type; } void SetType(Type type) { type_ = type; }
Type type() const { return type_; } Type type() const { return type_; }
void *extra_info() const { return extra_info_; }
void SetExtraInfo(void *extra_info) { extra_info_ = extra_info; }
// Input links. // Input links.
std::vector<Node *> inlinks; std::vector<Node *> inlinks;
// Output links. // Output links.
std::vector<Node *> outlinks; std::vector<Node *> outlinks;
// A helper class to maintain the status from Pass. // A helper class to maintain the status from Pass.
// TODO(superjomn) add a checker here to ensure the T is primary.
struct Attr { struct Attr {
// NOTE T should be a primary type or a struct combined by several primary // NOTE T should be a primary type or a struct combined by several primary
// types. // types.
// NOTE the STL containers should not use here. // NOTE the STL containers should not use here.
// Some usages // Some usages
// Attr attr; // Attr attr;
// T data; // attr.Bool() = true;
// attr.data.assign((char*)data, sizeof(data));
bool &Bool() { return As<bool>(); } bool &Bool() { return As<bool>(); }
float &Float() { return As<float>(); } float &Float() { return As<float>(); }
int32_t &Int32() { return As<int32_t>(); } int32_t &Int32() { return As<int32_t>(); }
int64_t &Int64() { return As<int64_t>(); } int64_t &Int64() { return As<int64_t>(); }
void *&Pointer() { return As<void *>(); }
private: private:
template <typename T> template <typename T>
...@@ -130,6 +131,7 @@ class Node { ...@@ -130,6 +131,7 @@ class Node {
size_t type_hash_{std::numeric_limits<size_t>::max()}; size_t type_hash_{std::numeric_limits<size_t>::max()};
}; };
// Type checks.
bool IsFunction() const { return type_ == Node::Type::kFunction; } bool IsFunction() const { return type_ == Node::Type::kFunction; }
bool IsValue() const { return type_ == Node::Type::kValue; } bool IsValue() const { return type_ == Node::Type::kValue; }
bool IsFunctionBlock() const { return type_ == Node::Type::kFunctionBlock; } bool IsFunctionBlock() const { return type_ == Node::Type::kFunctionBlock; }
...@@ -148,9 +150,6 @@ class Node { ...@@ -148,9 +150,6 @@ class Node {
Type type_{Type::kNone}; Type type_{Type::kNone};
// Mark this node is deleted by some pass. // Mark this node is deleted by some pass.
bool deleted_{false}; bool deleted_{false};
void *extra_info_;
mutable std::unordered_map<std::string, Attr> attrs_; mutable std::unordered_map<std::string, Attr> attrs_;
}; };
......
...@@ -19,6 +19,7 @@ limitations under the License. */ ...@@ -19,6 +19,7 @@ limitations under the License. */
#include <string> #include <string>
#include "paddle/fluid/framework/framework.pb.h" #include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/inference/analysis/argument.h"
#include "paddle/fluid/inference/analysis/data_flow_graph.h" #include "paddle/fluid/inference/analysis/data_flow_graph.h"
#include "paddle/fluid/inference/analysis/helper.h" #include "paddle/fluid/inference/analysis/helper.h"
#include "paddle/fluid/inference/analysis/node.h" #include "paddle/fluid/inference/analysis/node.h"
...@@ -30,19 +31,24 @@ namespace analysis { ...@@ -30,19 +31,24 @@ namespace analysis {
class Pass { class Pass {
public: public:
Pass() = default; Pass() = default;
virtual ~Pass() {} virtual ~Pass() = default;
// Virtual method overridden by subclasses to do only necessary initialization // Virtual method overridden by subclasses to do only necessary initialization
// before any pass is run. // before any pass is run.
virtual bool Initialize() { return false; } // virtual bool Initialize() { return false; }
// There is some passes such as FlowToDataFlowGraphPass that needs a // There is some passes such as FlowToDataFlowGraphPass that needs a
// ProgramDesc. Here use the native ProgramDesc ProtoBuf message, so that it // ProgramDesc. Here use the native ProgramDesc ProtoBuf message, so that it
// only couple with the proto file. // only couple with the proto file.
virtual bool Initialize(const framework::proto::ProgramDesc &desc) { // virtual bool Initialize(const framework::proto::ProgramDesc &desc) { return
return false; // false; }
}
// There are some Passes such as DataFlowGraphToFluidPass that will output a // There are some Passes such as DataFlowGraphToFluidPass that will output a
// ProgramDesc. // ProgramDesc.
virtual bool Initialize(framework::proto::ProgramDesc *desc) { return false; } // virtual bool Initialize(framework::proto::ProgramDesc *desc) { return
// false; }
// Mutable Pass.
virtual bool Initialize(Argument *argument) { return false; }
// Readonly Pass.
virtual bool Initialize(const Argument &argument) { return false; }
// Virtual method overriden by subclasses to do any necessary clean up after // Virtual method overriden by subclasses to do any necessary clean up after
// all passes have run. // all passes have run.
...@@ -50,7 +56,9 @@ class Pass { ...@@ -50,7 +56,9 @@ class Pass {
// Get a Pass appropriate to print the Node this pass operates on. // Get a Pass appropriate to print the Node this pass operates on.
virtual Pass *CreatePrinterPass(std::ostream &os, virtual Pass *CreatePrinterPass(std::ostream &os,
const std::string &banner) const = 0; const std::string &banner) const {
return nullptr;
}
// Run on a single Node. // Run on a single Node.
virtual void Run(Node *x) { LOG(FATAL) << "not valid"; } virtual void Run(Node *x) { LOG(FATAL) << "not valid"; }
...@@ -60,6 +68,11 @@ class Pass { ...@@ -60,6 +68,11 @@ class Pass {
virtual void Run(FunctionBlock *x) { LOG(FATAL) << "not valid"; } virtual void Run(FunctionBlock *x) { LOG(FATAL) << "not valid"; }
// Run on a single DataFlowGraph. // Run on a single DataFlowGraph.
virtual void Run(DataFlowGraph *x) { LOG(FATAL) << "not valid"; } virtual void Run(DataFlowGraph *x) { LOG(FATAL) << "not valid"; }
// Human-readable short representation.
virtual std::string repr() const = 0;
// Human-readable long description.
virtual std::string description() const = 0;
}; };
// NodePass process on any Node types. // NodePass process on any Node types.
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/inference/analysis/pass_manager.h"
#include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h"
namespace paddle {
namespace inference {
namespace analysis {
void DfgPassManager::RunAll() {
PADDLE_ENFORCE(argument_);
for (auto& pass : data_) {
VLOG(4) << "Running pass [" << pass->repr() << "]";
pass->Run(argument_->main_dfg.get());
}
}
void NodePassManager::RunAll() {
PADDLE_ENFORCE(argument_);
PADDLE_ENFORCE(argument_->main_dfg.get());
auto trait =
GraphTraits<DataFlowGraph>(argument_->main_dfg.get()).nodes_in_DFS();
for (auto& node : trait) {
for (auto& pass : data_) {
pass->Run(&node);
}
}
}
} // namespace analysis
} // namespace inference
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
/*
* This file defines the logic of pass management. The analysis for inference is
* a pipeline of Passes, a PassManager is a agency that helps to manage the
* executation of the Passes.
*
* There are two modes of Passes, the first one is called NodePass and takes
* an Node as input and output; the second one is called DFGPass and takes a
* DFG(Data Flow Graph) as input and output. It is hard to put all the passes in
* the same pipeline, there are two kinds of PassManagers, both takes a DFG as
* input and output a DFG, but the Passes inside are different:
*
* 1. NodePassManager: the passes inside are all NodePasses, it can have
* different graph trivial algorithm, for example, DFS_NodePassManager will
* trigger the passes in depth first order;
* 2. DfgPassManager: the passes inside are all DfgPasses.
*/
#pragma once
#include <string>
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/inference/analysis/pass.h"
namespace paddle {
namespace inference {
namespace analysis {
/*
* PassManager is the base class for all pass managers, a pass manager has
* several Pass-es registered, and execute them in the linear order.
*/
class PassManager : public OrderedRegistry<Pass> {
public:
PassManager() = default;
// Call all the passes' Initialize methods. The desc and data_flow_graph are
// globally shared, so pass them as the arguemnts for all the pass managers.
virtual bool Initialize(const Argument& argument) { return false; }
virtual bool Initialize(Argument* argument) {
argument_ = argument;
for (auto& pass : data_) {
LOG(INFO) << "Initializing pass " << pass->repr();
if (!pass->Initialize(argument)) {
LOG(ERROR) << "Failed to initialize pass [" << pass->repr() << "]";
return false;
}
}
return true;
}
// Call all the passes' Finalize methods.
virtual bool Finalize() {
for (auto& pass : data_) {
if (!pass->Finalize()) {
LOG(ERROR) << "Failed to finalize pass [" << pass->repr() << "]";
return false;
}
}
return true;
}
// Run all the passes.
virtual void RunAll() = 0;
// Short identifier.
virtual std::string repr() const = 0;
// Long description.
virtual std::string description() const = 0;
virtual ~PassManager() = default;
protected:
Argument* argument_{nullptr};
};
/*
* A pass manager that process a DFG.
*/
class DfgPassManager : public PassManager {
public:
DfgPassManager() = default;
void RunAll() override;
virtual ~DfgPassManager() = default;
};
/*
* A pass manager that process a Node each time.
*/
class NodePassManager : public PassManager {
public:
NodePassManager() = default;
void RunAll() override;
virtual ~NodePassManager() = default;
};
} // namespace analysis
} // namespace inference
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/inference/analysis/pass_manager.h"
#include "paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h"
#include "paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h"
#include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h"
#include "paddle/fluid/inference/analysis/ut_helper.h"
#include <gtest/gtest.h>
namespace paddle {
namespace inference {
namespace analysis {
class TestDfgPassManager final : public DfgPassManager {
public:
TestDfgPassManager() = default;
virtual ~TestDfgPassManager() = default;
// Short identifier.
std::string repr() const override { return "test-pass-manager"; }
// Long description.
std::string description() const override { return "test doc"; }
};
class TestNodePassManager final : public NodePassManager {
public:
virtual ~TestNodePassManager() = default;
std::string repr() const override { return "test-node-pass-manager"; }
std::string description() const override { return "test doc"; }
};
class TestNodePass final : public NodePass {
public:
virtual ~TestNodePass() = default;
bool Initialize(Argument* argument) override { return true; }
void Run(Node* node) override {
LOG(INFO) << "- Processing node " << node->repr();
}
std::string repr() const override { return "test-node"; }
std::string description() const override { return "some doc"; }
};
TEST_F(DFG_Tester, DFG_pass_manager) {
TestDfgPassManager manager;
DFG_GraphvizDrawPass::Config config("./", "dfg.dot");
manager.Register("fluid-to-flow-graph", new FluidToDataFlowGraphPass);
manager.Register("graphviz", new DFG_GraphvizDrawPass(config));
manager.Register("dfg-to-fluid", new DataFlowGraphToFluidPass);
ASSERT_TRUE(manager.Initialize(&argument));
manager.RunAll();
}
TEST_F(DFG_Tester, Node_pass_manager) {
// Pre-process: initialize the DFG with the ProgramDesc first.
FluidToDataFlowGraphPass pass0;
pass0.Initialize(&argument);
pass0.Run(argument.main_dfg.get());
TestNodePassManager manager;
manager.Register("test-node-pass", new TestNodePass);
ASSERT_TRUE(manager.Initialize(&argument));
manager.RunAll();
}
} // namespace analysis
} // namespace inference
} // namespace paddle
...@@ -19,22 +19,23 @@ namespace paddle { ...@@ -19,22 +19,23 @@ namespace paddle {
namespace inference { namespace inference {
namespace analysis { namespace analysis {
SubGraphSplitter::NodeInsideSubgraphTeller teller = [](const Node* node) {
if (node->type() != Node::Type::kFunction) return false;
const auto* func = static_cast<const Function*>(node);
if (func->func_type() == "elementwise_add" || func->func_type() == "relu" ||
func->func_type() == "conv2d" || func->func_type() == "mul" ||
func->func_type() == "sigmoid" || func->func_type() == "softmax") {
LOG(INFO) << "sub-graph marked " << node->repr();
return true;
}
return false;
};
TEST_F(DFG_Tester, Split) { TEST_F(DFG_Tester, Split) {
auto desc = LoadProgramDesc(); auto desc = LoadProgramDesc();
auto dfg = ProgramDescToDFG(desc); auto dfg = ProgramDescToDFG(desc);
LOG(INFO) << "spliter\n" << dfg.DotString(); LOG(INFO) << "spliter\n" << dfg.DotString();
SubGraphSplitter::NodeInsideSubgraphTeller teller = [](const Node* node) {
if (node->type() != Node::Type::kFunction) return false;
const auto* func = static_cast<const Function*>(node);
if (func->func_type() == "elementwise_add" || func->func_type() == "relu" ||
func->func_type() == "conv2d" || func->func_type() == "mul" ||
func->func_type() == "sigmoid" || func->func_type() == "softmax") {
LOG(INFO) << "sub-graph marked " << node->repr();
return true;
}
return false;
};
ASSERT_GT(dfg.nodes.size(), 5UL); ASSERT_GT(dfg.nodes.size(), 5UL);
auto subgraphs = SubGraphSplitter(&dfg, teller)(); auto subgraphs = SubGraphSplitter(&dfg, teller)();
...@@ -62,6 +63,28 @@ TEST_F(DFG_Tester, Split) { ...@@ -62,6 +63,28 @@ TEST_F(DFG_Tester, Split) {
ASSERT_EQ(subgraphs.back().size(), 6UL); ASSERT_EQ(subgraphs.back().size(), 6UL);
} }
TEST_F(DFG_Tester, Fuse) {
auto desc = LoadProgramDesc();
auto dfg = ProgramDescToDFG(desc);
size_t count0 = dfg.nodes.size();
SubGraphFuse fuse(&dfg, teller);
fuse();
int count1 = 0;
for (auto& node : dfg.nodes.nodes()) {
if (node->deleted()) {
LOG(INFO) << "deleted " << node->repr();
}
count1 += node->deleted();
}
// At least one nodes should be deleted.
ASSERT_EQ(dfg.nodes.size(), count0 + 1); // added a new FunctionBlock
ASSERT_EQ(6UL, count1);
}
} // namespace analysis } // namespace analysis
} // namespace inference } // namespace inference
} // namespace paddle } // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/analysis/tensorrt_subgraph_pass.h"
#include "paddle/fluid/inference/analysis/subgraph_splitter.h"
namespace paddle {
namespace inference {
namespace analysis {
TensorRTSubGraphPass::TensorRTSubGraphPass(
const TensorRTSubGraphPass::NodeInsideSubgraphTeller &teller)
: node_inside_subgraph_teller_(teller) {}
void TensorRTSubGraphPass::Run(DataFlowGraph *graph) {
SubGraphFuse(graph, node_inside_subgraph_teller_);
}
} // analysis
} // inference
} // paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/fluid/inference/analysis/node.h"
#include "paddle/fluid/inference/analysis/pass.h"
#include "paddle/fluid/inference/analysis/subgraph_splitter.h"
namespace paddle {
namespace inference {
namespace analysis {
/*
* Parse the graph and replace TensorRT supported nodes with SubGraphNode
*/
class TensorRTSubGraphPass : public DataFlowGraphPass {
public:
// Tell whether to transform a sub-graph into TensorRT.
using NodeInsideSubgraphTeller = SubGraphFuse::NodeInsideSubgraphTeller;
TensorRTSubGraphPass(const NodeInsideSubgraphTeller& teller);
bool Initialize(Argument* argument) override { return true; }
// This class get a sub-graph as input and determine whether to transform this
// sub-graph into TensorRT.
void Run(DataFlowGraph* graph) override;
private:
NodeInsideSubgraphTeller node_inside_subgraph_teller_;
};
} // namespace analysis
} // namespace inference
} // paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/inference/analysis/tensorrt_subgraph_pass.h"
#include <gflags/gflags.h>
#include <gtest/gtest.h>
#include "paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h"
#include "paddle/fluid/inference/analysis/ut_helper.h"
namespace paddle {
namespace inference {
namespace analysis {
DEFINE_string(model_dir, "", "inference test model dir");
TEST(TensorRTSubGraph, single_pass) {
auto desc = LoadProgramDesc();
auto dfg = ProgramDescToDFG(desc);
SubGraphSplitter::NodeInsideSubgraphTeller teller = [](const Node* node) {
if (node->type() != Node::Type::kFunction) return false;
const auto* func = static_cast<const Function*>(node);
if (func->func_type() == "elementwise_add" || func->func_type() == "relu" ||
func->func_type() == "conv2d" || func->func_type() == "mul" ||
func->func_type() == "sigmoid" || func->func_type() == "softmax") {
LOG(INFO) << "sub-graph marked " << node->repr();
return true;
}
return false;
};
DFG_GraphvizDrawPass::Config config{"./", "test"};
DFG_GraphvizDrawPass dfg_pass(config);
dfg_pass.Initialize();
DFG_GraphvizDrawPass dfg_pass1(config);
dfg_pass1.Initialize();
dfg_pass.Run(&dfg);
TensorRTSubGraphPass trt_pass(std::move(teller));
trt_pass.Initialize();
trt_pass.Run(&dfg);
dfg_pass1.Run(&dfg);
// Check the TRT op's block desc
for (auto node : dfg.nodes.nodes()) {
if (node->IsFunctionBlock()) {
}
}
}
TEST(TensorRTSubGraph, pass_manager) {}
} // namespace analysis
} // namespace inference
} // namespace paddle
...@@ -15,33 +15,46 @@ limitations under the License. */ ...@@ -15,33 +15,46 @@ limitations under the License. */
#pragma once #pragma once
#include <gflags/gflags.h> #include <gflags/gflags.h>
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <fstream>
#include <string> #include <string>
#include "paddle/fluid/framework/executor.h" #include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/inference/analysis/data_flow_graph.h" #include "paddle/fluid/inference/analysis/data_flow_graph.h"
#include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h" #include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h"
#include "paddle/fluid/inference/analysis/ut_helper.h" #include "paddle/fluid/inference/analysis/ut_helper.h"
#include "paddle/fluid/inference/io.h"
namespace paddle { namespace paddle {
namespace inference { namespace inference {
// Read ProgramDesc from a __model__ file, defined in io.cc
extern void ReadBinaryFile(const std::string& filename, std::string* contents);
namespace analysis { namespace analysis {
DEFINE_string(inference_model_dir, "", "inference test model dir"); DEFINE_string(inference_model_dir, "", "inference test model dir");
static framework::proto::ProgramDesc LoadProgramDesc( static framework::proto::ProgramDesc LoadProgramDesc(
const std::string& model_dir = FLAGS_inference_model_dir) { const std::string& model_dir = FLAGS_inference_model_dir) {
paddle::platform::CPUPlace place; std::string msg;
paddle::framework::Executor executor(place); std::string net_file = FLAGS_inference_model_dir + "/__model__";
paddle::framework::Scope scope; std::ifstream fin(net_file, std::ios::in | std::ios::binary);
auto program = Load(&executor, &scope, model_dir); PADDLE_ENFORCE(static_cast<bool>(fin), "Cannot open file %s", net_file);
return *program->Proto(); fin.seekg(0, std::ios::end);
msg.resize(fin.tellg());
fin.seekg(0, std::ios::beg);
fin.read(&(msg.at(0)), msg.size());
fin.close();
framework::proto::ProgramDesc program_desc;
program_desc.ParseFromString(msg);
return program_desc;
} }
static DataFlowGraph ProgramDescToDFG( static DataFlowGraph ProgramDescToDFG(
const framework::proto::ProgramDesc& desc) { const framework::proto::ProgramDesc& desc) {
DataFlowGraph graph; DataFlowGraph graph;
FluidToDataFlowGraphPass pass; FluidToDataFlowGraphPass pass;
pass.Initialize(desc); Argument argument;
argument.origin_program_desc.reset(new framework::proto::ProgramDesc(desc));
pass.Initialize(&argument);
pass.Run(&graph); pass.Run(&graph);
pass.Finalize(); pass.Finalize();
return graph; return graph;
...@@ -49,9 +62,12 @@ static DataFlowGraph ProgramDescToDFG( ...@@ -49,9 +62,12 @@ static DataFlowGraph ProgramDescToDFG(
class DFG_Tester : public ::testing::Test { class DFG_Tester : public ::testing::Test {
protected: protected:
void SetUp() override { desc = LoadProgramDesc(FLAGS_inference_model_dir); } void SetUp() override {
auto desc = LoadProgramDesc(FLAGS_inference_model_dir);
argument.origin_program_desc.reset(new framework::proto::ProgramDesc(desc));
}
framework::proto::ProgramDesc desc; Argument argument;
}; };
} // namespace analysis } // namespace analysis
......
...@@ -20,16 +20,20 @@ limitations under the License. */ ...@@ -20,16 +20,20 @@ limitations under the License. */
#include "paddle/fluid/framework/block_desc.h" #include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/feed_fetch_type.h" #include "paddle/fluid/framework/feed_fetch_type.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/pybind/pybind.h" #include "paddle/fluid/pybind/pybind.h"
DEFINE_string(devices, "", "The devices to be used which is joined by comma."); DEFINE_string(devices, "", "The devices to be used which is joined by comma.");
DEFINE_bool(init_p2p, false, "Whether to init p2p."); DEFINE_bool(init_p2p, false, "Whether to init p2p.");
DEFINE_int32(math_num_threads, 1,
"Number of threads used to run math functions.");
namespace paddle { namespace paddle {
namespace inference { namespace inference {
void Init(const std::vector<std::string> argv) { void Init(const std::vector<std::string> argv) {
framework::InitGflags(argv); framework::InitGflags(argv);
operators::math::SetNumThreads(FLAGS_math_num_threads);
// init devices // init devices
std::vector<int> devices; std::vector<int> devices;
std::string token; std::string token;
......
...@@ -64,7 +64,8 @@ class OpConverter { ...@@ -64,7 +64,8 @@ class OpConverter {
(*it)(op, scope, test_mode); (*it)(op, scope, test_mode);
} }
// convert fluid block to tensorrt network // Convert a fluid block to tensorrt network, NOTE it just convert operators,
// the INetwork's inputs and outputs should specified in some other modules.
void ConvertBlock(const framework::proto::BlockDesc& block, void ConvertBlock(const framework::proto::BlockDesc& block,
const std::unordered_set<std::string>& parameters, const std::unordered_set<std::string>& parameters,
const framework::Scope& scope, TensorRTEngine* engine) { const framework::Scope& scope, TensorRTEngine* engine) {
......
...@@ -51,11 +51,12 @@ class TensorRTEngine : public EngineBase { ...@@ -51,11 +51,12 @@ class TensorRTEngine : public EngineBase {
nvinfer1::Weights w_; nvinfer1::Weights w_;
}; };
TensorRTEngine(int max_batch, int max_workspace, cudaStream_t* stream, TensorRTEngine(int max_batch, int max_workspace,
cudaStream_t* stream = nullptr,
nvinfer1::ILogger& logger = NaiveLogger::Global()) nvinfer1::ILogger& logger = NaiveLogger::Global())
: max_batch_(max_batch), : max_batch_(max_batch),
max_workspace_(max_workspace), max_workspace_(max_workspace),
stream_(stream), stream_(stream ? stream : &default_stream_),
logger_(logger) {} logger_(logger) {}
virtual ~TensorRTEngine(); virtual ~TensorRTEngine();
...@@ -121,6 +122,8 @@ class TensorRTEngine : public EngineBase { ...@@ -121,6 +122,8 @@ class TensorRTEngine : public EngineBase {
// the max memory size the engine uses // the max memory size the engine uses
int max_workspace_; int max_workspace_;
cudaStream_t* stream_; cudaStream_t* stream_;
// If stream_ is not set from outside, hold its own stream.
cudaStream_t default_stream_;
nvinfer1::ILogger& logger_; nvinfer1::ILogger& logger_;
std::vector<Buffer> buffers_; std::vector<Buffer> buffers_;
...@@ -165,20 +168,31 @@ class TensorRTEngine : public EngineBase { ...@@ -165,20 +168,31 @@ class TensorRTEngine : public EngineBase {
*/ */
class TRT_EngineManager { class TRT_EngineManager {
public: public:
TensorRTEngine* Create(int max_batch, int max_workspace, bool HasEngine(const std::string& name) const {
cudaStream_t* stream) { return engines_.count(name) != 0;
engines_.emplace_back(new TensorRTEngine(max_batch, max_workspace, stream)); }
return engines_.back().get();
// Get an engine called `name`.
TensorRTEngine* Get(const std::string& name) const {
return engines_.at(name).get();
}
// Create or get an engine called `name`
TensorRTEngine* Create(int max_batch, int max_workspace, cudaStream_t* stream,
const std::string& name) {
auto* p = new TensorRTEngine(max_batch, max_workspace, stream);
engines_[name].reset(p);
return p;
} }
void DeleteALl() { void DeleteALl() {
for (auto& ptr : engines_) { for (auto& item : engines_) {
ptr.reset(nullptr); item.second.reset(nullptr);
} }
} }
private: private:
std::vector<std::unique_ptr<TensorRTEngine>> engines_; std::unordered_map<std::string, std::unique_ptr<TensorRTEngine>> engines_;
}; };
} // namespace tensorrt } // namespace tensorrt
......
...@@ -29,6 +29,7 @@ DEFINE_string(data_file, "", "File of input index data."); ...@@ -29,6 +29,7 @@ DEFINE_string(data_file, "", "File of input index data.");
DEFINE_int32(repeat, 100, "Running the inference program repeat times"); DEFINE_int32(repeat, 100, "Running the inference program repeat times");
DEFINE_bool(prepare_vars, true, "Prepare variables before executor"); DEFINE_bool(prepare_vars, true, "Prepare variables before executor");
DEFINE_int32(num_threads, 1, "Number of threads should be used"); DEFINE_int32(num_threads, 1, "Number of threads should be used");
DECLARE_bool(use_mkldnn);
inline double GetCurrentMs() { inline double GetCurrentMs() {
struct timeval time; struct timeval time;
...@@ -103,9 +104,9 @@ void ThreadRunInfer( ...@@ -103,9 +104,9 @@ void ThreadRunInfer(
const int tid, paddle::framework::Scope* scope, const int tid, paddle::framework::Scope* scope,
const std::vector<std::vector<const paddle::framework::LoDTensor*>>& jobs) { const std::vector<std::vector<const paddle::framework::LoDTensor*>>& jobs) {
// maybe framework:ProgramDesc is not thread-safe // maybe framework:ProgramDesc is not thread-safe
paddle::platform::CPUPlace place;
paddle::framework::Executor executor(place);
auto& sub_scope = scope->NewScope(); auto& sub_scope = scope->NewScope();
auto place = paddle::platform::CPUPlace();
auto executor = paddle::framework::Executor(place);
auto inference_program = auto inference_program =
paddle::inference::Load(&executor, scope, FLAGS_model_path); paddle::inference::Load(&executor, scope, FLAGS_model_path);
...@@ -182,8 +183,8 @@ TEST(inference, nlp) { ...@@ -182,8 +183,8 @@ TEST(inference, nlp) {
stop_ms = GetCurrentMs(); stop_ms = GetCurrentMs();
} else { } else {
// 1. Define place, executor, scope // 1. Define place, executor, scope
auto place = paddle::platform::CPUPlace(); paddle::platform::CPUPlace place;
auto executor = paddle::framework::Executor(place); paddle::framework::Executor executor(place);
// 2. Initialize the inference_program and load parameters // 2. Initialize the inference_program and load parameters
std::unique_ptr<paddle::framework::ProgramDesc> inference_program; std::unique_ptr<paddle::framework::ProgramDesc> inference_program;
......
...@@ -43,14 +43,16 @@ void* CPUAllocator::Alloc(size_t* index, size_t size) { ...@@ -43,14 +43,16 @@ void* CPUAllocator::Alloc(size_t* index, size_t size) {
*index = 0; // unlock memory *index = 0; // unlock memory
void* p; void* p = nullptr;
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
// refer to https://github.com/01org/mkl-dnn/blob/master/include/mkldnn.hpp // refer to https://github.com/01org/mkl-dnn/blob/master/include/mkldnn.hpp
// memory alignment // memory alignment
PADDLE_ENFORCE_EQ(posix_memalign(&p, 4096ul, size), 0); PADDLE_ENFORCE_EQ(posix_memalign(&p, 4096ul, size), 0, "Alloc %ld error!",
size);
#else #else
PADDLE_ENFORCE_EQ(posix_memalign(&p, 32ul, size), 0); PADDLE_ENFORCE_EQ(posix_memalign(&p, 32ul, size), 0, "Alloc %ld error!",
size);
#endif #endif
PADDLE_ENFORCE(p, "Fail to allocate CPU memory: size = %d .", size); PADDLE_ENFORCE(p, "Fail to allocate CPU memory: size = %d .", size);
......
...@@ -19,18 +19,18 @@ limitations under the License. */ ...@@ -19,18 +19,18 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
#define REGISTER_ACTIVATION_OP_MAKER(OP_NAME, OP_COMMENT) \ #define REGISTER_ACTIVATION_OP_MAKER(OP_NAME, OP_COMMENT) \
class OP_NAME##OpMaker \ class OP_NAME##OpMaker \
: public ::paddle::framework::OpProtoAndCheckerMaker { \ : public ::paddle::framework::OpProtoAndCheckerMaker { \
public: \ public: \
void Make() override { \ void Make() override { \
AddInput("X", "Input of " #OP_NAME " operator"); \ AddInput("X", "Input of " #OP_NAME " operator"); \
AddOutput("Out", "Output of " #OP_NAME " operator").Reuse("X"); \ AddOutput("Out", "Output of " #OP_NAME " operator").Reuse("X"); \
AddAttr<bool>("use_mkldnn", \ AddAttr<bool>("use_mkldnn", \
"(bool, default false) Only used in mkldnn kernel") \ "(default false) Only used in mkldnn kernel") \
.SetDefault(false); \ .SetDefault(false); \
AddComment(OP_COMMENT); \ AddComment(OP_COMMENT); \
} \ } \
} }
#define REGISTER_ACTIVATION_OP_GRAD_MAKER(OP_NAME, KERNEL_TYPE) \ #define REGISTER_ACTIVATION_OP_GRAD_MAKER(OP_NAME, KERNEL_TYPE) \
...@@ -112,7 +112,7 @@ $$out = \frac{1}{1 + e^{-x}}$$ ...@@ -112,7 +112,7 @@ $$out = \frac{1}{1 + e^{-x}}$$
__attribute__((unused)) constexpr char LogSigmoidDoc[] = R"DOC( __attribute__((unused)) constexpr char LogSigmoidDoc[] = R"DOC(
Logsigmoid Activation Operator Logsigmoid Activation Operator
$$out = \log \frac{1}{1 + e^{-x}}$$ $$out = \\log \\frac{1}{1 + e^{-x}}$$
)DOC"; )DOC";
...@@ -133,7 +133,7 @@ $out = \max(x, 0)$ ...@@ -133,7 +133,7 @@ $out = \max(x, 0)$
__attribute__((unused)) constexpr char TanhDoc[] = R"DOC( __attribute__((unused)) constexpr char TanhDoc[] = R"DOC(
Tanh Activation Operator. Tanh Activation Operator.
$$out = \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$ $$out = \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$
)DOC"; )DOC";
...@@ -196,7 +196,7 @@ $out = [x]$ ...@@ -196,7 +196,7 @@ $out = [x]$
__attribute__((unused)) constexpr char ReciprocalDoc[] = R"DOC( __attribute__((unused)) constexpr char ReciprocalDoc[] = R"DOC(
Reciprocal Activation Operator. Reciprocal Activation Operator.
$$out = \frac{1}{x}$$ $$out = \\frac{1}{x}$$
)DOC"; )DOC";
...@@ -252,15 +252,14 @@ class SoftShrinkOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -252,15 +252,14 @@ class SoftShrinkOpMaker : public framework::OpProtoAndCheckerMaker {
AddOutput("Out", "Output of Softshrink operator"); AddOutput("Out", "Output of Softshrink operator");
AddAttr<float>("lambda", "non-negative offset").SetDefault(0.5f); AddAttr<float>("lambda", "non-negative offset").SetDefault(0.5f);
AddComment(R"DOC( AddComment(R"DOC(
Softshrink Activation Operator. :strong:`Softshrink Activation Operator`
$$ .. math::
out = \begin{cases} out = \begin{cases}
x - \lambda, \text{if } x > \lambda \\ x - \lambda, \text{if } x > \lambda \\
x + \lambda, \text{if } x < -\lambda \\ x + \lambda, \text{if } x < -\lambda \\
0, \text{otherwise} 0, \text{otherwise}
\end{cases} \end{cases}
$$
)DOC"); )DOC");
} }
...@@ -271,18 +270,18 @@ class HardShrinkOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -271,18 +270,18 @@ class HardShrinkOpMaker : public framework::OpProtoAndCheckerMaker {
void Make() override { void Make() override {
AddInput("X", "Input of HardShrink operator"); AddInput("X", "Input of HardShrink operator");
AddOutput("Out", "Output of HardShrink operator"); AddOutput("Out", "Output of HardShrink operator");
AddAttr<float>("threshold", "The value of threshold for HardShrink") AddAttr<float>("threshold",
"The value of threshold for HardShrink. [default: 0.5]")
.SetDefault(0.5f); .SetDefault(0.5f);
AddComment(R"DOC( AddComment(R"DOC(
HardShrink Activation Operator. :strong:`HardShrink activation operator`
$$ .. math::
out = \begin{cases} out = \begin{cases}
x, \text{if } x > \lambda \\ x, \text{if } x > \lambda \\
x, \text{if } x < -\lambda \\ x, \text{if } x < -\lambda \\
0, \text{otherwise} 0, \text{otherwise}
\end{cases} \end{cases}
$$
)DOC"); )DOC");
} }
...@@ -394,18 +393,18 @@ class ThresholdedReluOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -394,18 +393,18 @@ class ThresholdedReluOpMaker : public framework::OpProtoAndCheckerMaker {
void Make() override { void Make() override {
AddInput("X", "Input of ThresholdedRelu operator"); AddInput("X", "Input of ThresholdedRelu operator");
AddOutput("Out", "Output of ThresholdedRelu operator"); AddOutput("Out", "Output of ThresholdedRelu operator");
AddAttr<float>("threshold", "The threshold location of activation") AddAttr<float>("threshold",
"The threshold location of activation. [default 1.0].")
.SetDefault(1.0f); .SetDefault(1.0f);
AddComment(R"DOC( AddComment(R"DOC(
ThresholdedRelu Activation Operator. :strong:`ThresholdedRelu activation operator`
$$ .. math::
out = \begin{cases}
x, \text{if } x > threshold \\
0, \text{otherwise}
\end{cases}
$$
out = \begin{cases}
x, \text{if } x > threshold \\
0, \text{otherwise}
\end{cases}
)DOC"); )DOC");
} }
}; };
...@@ -444,7 +443,7 @@ class SwishOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -444,7 +443,7 @@ class SwishOpMaker : public framework::OpProtoAndCheckerMaker {
AddComment(R"DOC( AddComment(R"DOC(
Swish Activation Operator. Swish Activation Operator.
$$out = \frac{x}{1 + e^{- \beta x}}$$ $$out = \\frac{x}{1 + e^{- \beta x}}$$
)DOC"); )DOC");
} }
......
...@@ -19,10 +19,17 @@ limitations under the License. */ ...@@ -19,10 +19,17 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = framework::Tensor; using batch_norm_bwd = mkldnn::batch_normalization_backward;
using batch_norm_fwd = mkldnn::batch_normalization_forward;
using framework::DataLayout;
using framework::Tensor;
using mkldnn::memory;
using mkldnn::primitive;
using mkldnn::reorder;
using mkldnn::stream;
using paddle::platform::MKLDNNDeviceContext; using paddle::platform::MKLDNNDeviceContext;
using paddle::platform::MKLDNNMemDesc; using paddle::platform::MKLDNNMemDesc;
using mkldnn::memory; using platform::to_void_cast;
template <typename T> template <typename T>
using EigenArrayMap = using EigenArrayMap =
...@@ -64,21 +71,12 @@ void run_batch_norm_op(Args &&... args) { ...@@ -64,21 +71,12 @@ void run_batch_norm_op(Args &&... args) {
mkldnn::stream(mkldnn::stream::kind::eager).submit(pipeline).wait(); mkldnn::stream(mkldnn::stream::kind::eager).submit(pipeline).wait();
} }
template <typename T>
inline void *cast_const_to_void(const T *t) {
return static_cast<void *>(const_cast<T *>(t));
}
} // namespace } // namespace
template <typename T> template <typename T>
class BatchNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> { class BatchNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext &ctx) const override { void Compute(const framework::ExecutionContext &ctx) const override {
auto data_layout_str = ctx.Attr<std::string>("data_layout");
auto data_layout = framework::StringToDataLayout(data_layout_str);
PADDLE_ENFORCE(data_layout == framework::DataLayout::kNCHW,
"MKLDNN batch normalization handles only NCHW data layout");
const float epsilon = ctx.Attr<float>("epsilon"); const float epsilon = ctx.Attr<float>("epsilon");
const float momentum = ctx.Attr<float>("momentum"); const float momentum = ctx.Attr<float>("momentum");
const bool is_test = ctx.Attr<bool>("is_test"); const bool is_test = ctx.Attr<bool>("is_test");
...@@ -99,41 +97,53 @@ class BatchNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -99,41 +97,53 @@ class BatchNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
const auto *scale = ctx.Input<Tensor>("Scale"); const auto *scale = ctx.Input<Tensor>("Scale");
const auto *shift = ctx.Input<Tensor>("Bias"); const auto *shift = ctx.Input<Tensor>("Bias");
y->mutable_data<T>(ctx.GetPlace()); PADDLE_ENFORCE(x->layout() == DataLayout::kMKLDNN &&
mean_out->mutable_data<T>(ctx.GetPlace()); x->format() != memory::format::format_undef,
variance_out->mutable_data<T>(ctx.GetPlace()); "Wrong layout/format set for Input x tensor");
const T *x_data = x->data<T>();
const T *mean_data = mean->data<T>();
const T *variance_data = variance->data<T>();
T *y_data = y->mutable_data<T>(ctx.GetPlace());
T *mean_out_data = mean_out->mutable_data<T>(ctx.GetPlace());
T *variance_out_data = variance_out->mutable_data<T>(ctx.GetPlace());
T *batch_mean_data = nullptr;
T *batch_variance_data = nullptr;
if (!is_test) { if (!is_test) {
batch_mean->mutable_data<T>(ctx.GetPlace()); batch_mean_data = batch_mean->mutable_data<T>(ctx.GetPlace());
batch_variance->mutable_data<T>(ctx.GetPlace()); batch_variance_data = batch_variance->mutable_data<T>(ctx.GetPlace());
} }
auto propagation = is_test == true ? mkldnn::prop_kind::forward_scoring auto propagation = is_test == true ? mkldnn::prop_kind::forward_scoring
: mkldnn::prop_kind::forward_training; : mkldnn::prop_kind::forward_training;
auto dims = paddle::framework::vectorize2int(x->dims()); auto src_tz = paddle::framework::vectorize2int(x->dims());
auto scale_tz = paddle::framework::vectorize2int(scale->dims());
auto src_md = PADDLE_ENFORCE(scale_tz.size() == 1, "Dims of scale tensor is NOT 1");
MKLDNNMemDesc(dims, memory::data_type::f32, memory::format::nchw); const unsigned int ic = scale_tz[0];
auto dst_md =
MKLDNNMemDesc(dims, memory::data_type::f32, memory::format::nchw);
auto src_pd = mkldnn::memory::primitive_desc{src_md, mkldnn_engine};
auto dst_pd = mkldnn::memory::primitive_desc{dst_md, mkldnn_engine};
auto src = mkldnn::memory{src_pd, cast_const_to_void(x->data<T>())};
auto dst = mkldnn::memory{dst_pd, y->data<T>()};
unsigned flags = mkldnn::use_scale_shift; unsigned flags = mkldnn::use_scale_shift;
if (is_test) flags |= mkldnn::use_global_stats; if (is_test) flags |= mkldnn::use_global_stats;
// create mkldnn memory from input x tensor
auto src_memory =
memory({{{src_tz}, memory::data_type::f32, x->format()}, mkldnn_engine},
to_void_cast(x_data));
// create primitive descriptor for batch norm forward
using bn_fwd_types = bn_type_traits<mkldnn::batch_normalization_forward>; using bn_fwd_types = bn_type_traits<mkldnn::batch_normalization_forward>;
auto batch_norm_fwd_desc = auto batch_norm_fwd_desc = bn_fwd_types::op_desc{
bn_fwd_types::op_desc{propagation, src_md, epsilon, flags}; propagation, src_memory.get_primitive_desc().desc(), epsilon, flags};
auto batch_norm_fwd_pd = std::shared_ptr<batch_norm_fwd::primitive_desc> batch_norm_fwd_pd =
bn_fwd_types::op_prim{batch_norm_fwd_desc, mkldnn_engine}; std::shared_ptr<batch_norm_fwd::primitive_desc>(
new batch_norm_fwd::primitive_desc(batch_norm_fwd_desc,
mkldnn_engine));
const unsigned int ic = dims[1]; // Save the pd to be used in backward pass
const std::string key = ctx.op().Output("SavedMean");
const std::string key_batch_norm_fwd_pd = key + "@bn_fwd_pd";
dev_ctx.SetBlob(key_batch_norm_fwd_pd, batch_norm_fwd_pd);
// MKLDNN requires a single piece of memory for scale and shift/bias data // MKLDNN requires a single piece of memory for scale and shift/bias data
const size_t scaleshift_size = 2 * ic; const size_t scaleshift_size = 2 * ic;
...@@ -143,73 +153,58 @@ class BatchNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -143,73 +153,58 @@ class BatchNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
copy_to_weights(scale->data<T>(), scale->data<T>() + ic, shift->data<T>(), copy_to_weights(scale->data<T>(), scale->data<T>() + ic, shift->data<T>(),
shift->data<T>() + ic, &scaleshift_data); shift->data<T>() + ic, &scaleshift_data);
auto scaleshift_memory = mkldnn::memory{ // crate mkldnn memory for weights(scale/shift)
batch_norm_fwd_pd.weights_primitive_desc(), scaleshift_data.data()}; auto scaleshift_memory = memory(batch_norm_fwd_pd->weights_primitive_desc(),
scaleshift_data.data());
if (is_test) { // create mkldnn memory for output y tensor
auto mean_memory = mkldnn::memory{batch_norm_fwd_pd.mean_primitive_desc(), auto dst_memory = memory(batch_norm_fwd_pd->dst_primitive_desc(), y_data);
cast_const_to_void(mean->data<T>())};
if (is_test) {
// create mkldnn memory for stats (as input)
auto mean_memory = memory(batch_norm_fwd_pd->mean_primitive_desc(),
to_void_cast(mean_data));
auto variance_memory = auto variance_memory =
mkldnn::memory{batch_norm_fwd_pd.variance_primitive_desc(), memory(batch_norm_fwd_pd->variance_primitive_desc(),
cast_const_to_void(variance->data<T>())}; to_void_cast(variance_data));
run_batch_norm_op<typename bn_fwd_types::op_type>( run_batch_norm_op<typename bn_fwd_types::op_type>(
batch_norm_fwd_pd, src, (const mkldnn::primitive::at &)mean_memory, *batch_norm_fwd_pd, src_memory,
(const mkldnn::primitive::at &)mean_memory,
(const mkldnn::primitive::at &)variance_memory, scaleshift_memory, (const mkldnn::primitive::at &)variance_memory, scaleshift_memory,
dst); dst_memory);
} else { } else {
// create mkldnn memory for stats (as output)
auto mean_memory = auto mean_memory =
mkldnn::memory{batch_norm_fwd_pd.mean_primitive_desc(), memory(batch_norm_fwd_pd->mean_primitive_desc(), batch_mean_data);
cast_const_to_void(batch_mean->data<T>())}; auto variance_memory = memory(
batch_norm_fwd_pd->variance_primitive_desc(), batch_variance_data);
auto variance_memory =
mkldnn::memory{batch_norm_fwd_pd.variance_primitive_desc(),
cast_const_to_void(batch_variance->data<T>())};
run_batch_norm_op<bn_fwd_types::op_type>(batch_norm_fwd_pd, src, run_batch_norm_op<bn_fwd_types::op_type>(*batch_norm_fwd_pd, src_memory,
scaleshift_memory, dst, scaleshift_memory, dst_memory,
mean_memory, variance_memory); mean_memory, variance_memory);
} }
if (!is_test) { if (!is_test) {
const unsigned int in = dims[0]; // mkldnn only compute stats for current batch
const unsigned int sample_size = x->numel() / in / ic; // so we need compute momentum stats via Eigen lib
EigenVectorArrayMap<T> batch_mean_e(batch_mean_data, ic);
// saved_xx is use just in this batch of data EigenVectorArrayMap<T> batch_variance_e(batch_variance_data, ic);
EigenVectorArrayMap<T> saved_mean_e( ConstEigenVectorArrayMap<T> mean_e(mean_data, ic);
batch_mean->mutable_data<T>(ctx.GetPlace()), ic); ConstEigenVectorArrayMap<T> variance_e{variance_data, ic};
EigenVectorArrayMap<T> saved_variance_e(
batch_variance->mutable_data<T>(ctx.GetPlace()), ic); EigenVectorArrayMap<T> running_mean_e(mean_out_data, ic);
saved_mean_e.setZero(); EigenVectorArrayMap<T> running_variance_e(variance_out_data, ic);
saved_variance_e.setZero();
const unsigned int x_arr_size = in * ic;
ConstEigenArrayMap<T> x_arr(x->data<T>(), sample_size, x_arr_size);
for (unsigned int nc = 0; nc < x_arr_size; ++nc) {
saved_mean_e(nc % ic) += x_arr.col(nc).sum();
}
saved_mean_e /= in * sample_size;
for (unsigned int nc = 0; nc < x_arr_size; ++nc) {
saved_variance_e(nc % ic) +=
(x_arr.col(nc) - saved_mean_e(nc % ic)).matrix().squaredNorm();
}
saved_variance_e /= in * sample_size;
ConstEigenVectorArrayMap<T> mean_arr{mean->data<T>(), ic};
ConstEigenVectorArrayMap<T> variance_arr{variance->data<T>(), ic};
EigenVectorArrayMap<T> running_mean_arr(
mean_out->mutable_data<T>(ctx.GetPlace()), ic);
EigenVectorArrayMap<T> running_var_arr(
variance_out->mutable_data<T>(ctx.GetPlace()), ic);
auto one_minus_momentum = 1. - momentum; auto one_minus_momentum = 1. - momentum;
running_mean_arr = running_mean_e = mean_e * momentum + batch_mean_e * one_minus_momentum;
mean_arr * momentum + saved_mean_e * one_minus_momentum; running_variance_e =
running_var_arr = variance_e * momentum + batch_variance_e * one_minus_momentum;
variance_arr * momentum + saved_variance_e * one_minus_momentum;
} }
y->set_layout(DataLayout::kMKLDNN);
y->set_format(
(memory::format)dst_memory.get_primitive_desc().desc().data.format);
} }
}; };
...@@ -217,11 +212,6 @@ template <typename T> ...@@ -217,11 +212,6 @@ template <typename T>
class BatchNormMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> { class BatchNormMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
public: public:
void Compute(const paddle::framework::ExecutionContext &ctx) const override { void Compute(const paddle::framework::ExecutionContext &ctx) const override {
auto data_layout_str = ctx.Attr<std::string>("data_layout");
auto data_layout = framework::StringToDataLayout(data_layout_str);
PADDLE_ENFORCE(data_layout == framework::DataLayout::kNCHW,
"MKLDNN batch normalization handles only NCHW data layout");
auto &dev_ctx = ctx.template device_context<MKLDNNDeviceContext>(); auto &dev_ctx = ctx.template device_context<MKLDNNDeviceContext>();
auto mkldnn_engine = dev_ctx.GetEngine(); auto mkldnn_engine = dev_ctx.GetEngine();
...@@ -238,88 +228,132 @@ class BatchNormMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> { ...@@ -238,88 +228,132 @@ class BatchNormMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
auto *diff_scale = ctx.Output<Tensor>(framework::GradVarName("Scale")); auto *diff_scale = ctx.Output<Tensor>(framework::GradVarName("Scale"));
auto *diff_shift = ctx.Output<Tensor>(framework::GradVarName("Bias")); auto *diff_shift = ctx.Output<Tensor>(framework::GradVarName("Bias"));
diff_x->mutable_data<T>(ctx.GetPlace()); PADDLE_ENFORCE(diff_y->layout() == DataLayout::kMKLDNN &&
diff_scale->mutable_data<T>(ctx.GetPlace()); diff_y->format() != memory::format::format_undef,
diff_shift->mutable_data<T>(ctx.GetPlace()); "Wrong layout/format set for Input diff_y tensor");
const T *x_data = x->data<T>();
const T *diff_y_data = diff_y->data<T>();
const T *batch_mean_data = batch_mean->data<T>();
const T *batch_variance_data = batch_variance->data<T>();
const T *scale_data = scale->data<T>();
const T *shift_data = shift->data<T>();
T *diff_x_data = diff_x->mutable_data<T>(ctx.GetPlace());
T *diff_scale_data = diff_scale->mutable_data<T>(ctx.GetPlace());
T *diff_shift_data = diff_shift->mutable_data<T>(ctx.GetPlace());
auto src_tz = paddle::framework::vectorize2int(x->dims());
auto diff_src_tz = src_tz;
auto dst_tz = src_tz;
auto diff_dst_tz = dst_tz;
auto scale_tz = paddle::framework::vectorize2int(scale->dims());
PADDLE_ENFORCE(scale_tz.size() == 1, "Dims of scale tensor is NOT 1");
const unsigned int ic = scale_tz[0];
// Retrieve bn_fwd_pd from device context
const std::string key = ctx.op().Input("SavedMean");
const std::string key_batch_norm_fwd_pd = key + "@bn_fwd_pd";
auto batch_norm_fwd_pd =
std::static_pointer_cast<batch_norm_fwd::primitive_desc>(
dev_ctx.GetBlob(key_batch_norm_fwd_pd));
PADDLE_ENFORCE(batch_norm_fwd_pd != nullptr,
"Fail to find batch_norm_fwd_pd in device context");
auto dims = paddle::framework::vectorize2int(x->dims()); using bn_bwd_types = bn_type_traits<mkldnn::batch_normalization_backward>;
unsigned flags = mkldnn::use_scale_shift | !mkldnn::use_global_stats;
auto src_md = // create mkldnn memory from input diff_y tensor
MKLDNNMemDesc(dims, memory::data_type::f32, memory::format::nchw); auto user_diff_dst_memory =
auto dst_md = memory({{{diff_dst_tz}, memory::data_type::f32, diff_y->format()},
MKLDNNMemDesc(dims, memory::data_type::f32, memory::format::nchw); mkldnn_engine},
auto diff_src_md = to_void_cast(diff_y_data));
MKLDNNMemDesc(dims, memory::data_type::f32, memory::format::nchw);
auto diff_dst_md =
MKLDNNMemDesc(dims, memory::data_type::f32, memory::format::nchw);
using bn_bwd_types = bn_type_traits<mkldnn::batch_normalization_backward>; // create mkldnn memory from input x tensor
using bn_fwd_types = bn_type_traits<mkldnn::batch_normalization_forward>; auto src_memory =
memory({{{src_tz}, memory::data_type::f32, x->format()}, mkldnn_engine},
to_void_cast(x_data));
auto batch_norm_fwd_desc = bn_fwd_types::op_desc{ // for diff_dst, try to use same format as dst in forward pass
mkldnn::prop_kind::forward_training, src_md, epsilon, flags}; auto diff_dst_pd = batch_norm_fwd_pd.get()->dst_primitive_desc();
auto batch_norm_fwd_pd = auto diff_dst_md = diff_dst_pd.desc();
bn_fwd_types::op_prim{batch_norm_fwd_desc, mkldnn_engine};
// create primitive descriptor for batch norm backward
unsigned flags = mkldnn::use_scale_shift;
auto batch_norm_bwd_desc = bn_bwd_types::op_desc{ auto batch_norm_bwd_desc = bn_bwd_types::op_desc{
mkldnn::prop_kind::backward, diff_dst_md, dst_md, epsilon, flags}; mkldnn::prop_kind::backward, diff_dst_md,
src_memory.get_primitive_desc().desc(), epsilon, flags};
auto batch_norm_bwd_pd = bn_bwd_types::op_prim{ auto batch_norm_bwd_pd = bn_bwd_types::op_prim{
batch_norm_bwd_desc, mkldnn_engine, batch_norm_fwd_pd}; batch_norm_bwd_desc, mkldnn_engine, *batch_norm_fwd_pd};
auto src = mkldnn::memory{{src_md, mkldnn_engine}, // reorder user_diff_dst if it's not in preferred format
cast_const_to_void(x->data<T>())}; auto diff_dst_memory = user_diff_dst_memory;
primitive reorder_diff_dst;
auto mean = mkldnn::memory{batch_norm_bwd_pd.mean_primitive_desc(), bool is_diff_dst_reordered = false;
cast_const_to_void(batch_mean->data<T>())}; if (diff_dst_pd != user_diff_dst_memory.get_primitive_desc()) {
diff_dst_memory = memory(diff_dst_pd);
auto variance = reorder_diff_dst = reorder(user_diff_dst_memory, diff_dst_memory);
mkldnn::memory{batch_norm_bwd_pd.variance_primitive_desc(), is_diff_dst_reordered = true;
cast_const_to_void(batch_variance->data<T>())}; }
auto diff_dst = mkldnn::memory{{diff_dst_md, mkldnn_engine},
cast_const_to_void(diff_y->data<T>())};
const unsigned int ic = dims[1]; // create mkldnn memory for input tensors (src/mean/variance)
auto mean_memory = memory(batch_norm_bwd_pd.mean_primitive_desc(),
to_void_cast(batch_mean_data));
auto variance_memory = memory(batch_norm_bwd_pd.variance_primitive_desc(),
to_void_cast(batch_variance_data));
// MKLDNN requires a single piece of memory for scale and shift/bias data
const size_t scaleshift_size = 2 * ic; const size_t scaleshift_size = 2 * ic;
std::vector<T> scaleshift_data; std::vector<T> scaleshift_data;
scaleshift_data.reserve(scaleshift_size); scaleshift_data.reserve(scaleshift_size);
copy_to_weights(scale->data<T>(), scale->data<T>() + ic, shift->data<T>(), copy_to_weights(scale_data, scale_data + ic, shift_data, shift_data + ic,
shift->data<T>() + ic, &scaleshift_data); &scaleshift_data);
auto scaleshift_memory = mkldnn::memory{ // create mkldnn memory for input tensors (scale/shift)
batch_norm_bwd_pd.weights_primitive_desc(), scaleshift_data.data()}; auto scaleshift_memory = memory(batch_norm_bwd_pd.weights_primitive_desc(),
scaleshift_data.data());
// create mkldnn memory for output diff weights (combined scale/shift)
std::vector<T> diff_scaleshift_data; std::vector<T> diff_scaleshift_data;
diff_scaleshift_data.reserve(scaleshift_size); diff_scaleshift_data.reserve(scaleshift_size);
copy_to_weights(diff_scale->data<T>(), diff_scale->data<T>() + ic,
diff_shift->data<T>(), diff_shift->data<T>() + ic,
&diff_scaleshift_data);
auto diff_scaleshift_memory = auto diff_scaleshift_memory =
mkldnn::memory{batch_norm_bwd_pd.diff_weights_primitive_desc(), memory(batch_norm_bwd_pd.diff_weights_primitive_desc(),
diff_scaleshift_data.data()}; diff_scaleshift_data.data());
auto diff_src = mkldnn::memory{{diff_src_md, mkldnn_engine}, // here assume diff_src is in the same format of src
static_cast<void *>(diff_x->data<T>())}; auto diff_src_memory = memory(src_memory.get_primitive_desc(), diff_x_data);
run_batch_norm_op<bn_bwd_types::op_type>( // finally create batch_norm backward primitive
batch_norm_bwd_pd, src, mean, variance, diff_dst, scaleshift_memory, auto batch_norm_bwd_prim =
diff_src, diff_scaleshift_memory); batch_norm_bwd(batch_norm_bwd_pd, src_memory, mean_memory,
variance_memory, diff_dst_memory, scaleshift_memory,
diff_src_memory, diff_scaleshift_memory);
// execute optional reorder and batch_norm backward primitive
std::vector<primitive> pipeline;
if (is_diff_dst_reordered) pipeline.push_back(reorder_diff_dst);
pipeline.push_back(batch_norm_bwd_prim);
stream(stream::kind::eager).submit(pipeline).wait();
// copy back diff sacle/shift to output tensors (diff scale/shift)
diff_scaleshift_data.resize(scaleshift_size);
auto it = std::begin(diff_scaleshift_data); auto it = std::begin(diff_scaleshift_data);
std::copy(it, std::next(it, ic), diff_scale->data<T>()); std::copy(it, std::next(it, ic), diff_scale_data);
std::copy(std::next(it, ic), std::end(diff_scaleshift_data), std::copy(std::next(it, ic), std::end(diff_scaleshift_data),
diff_shift->data<T>()); diff_shift_data);
// set layout/format of output tensors
diff_x->set_layout(DataLayout::kMKLDNN);
diff_x->set_format((memory::format)diff_src_memory.get_primitive_desc()
.desc()
.data.format);
} }
}; };
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP_KERNEL(batch_norm, MKLDNN, paddle::platform::CPUPlace, REGISTER_OP_KERNEL(batch_norm, MKLDNN, ::paddle::platform::CPUPlace,
ops::BatchNormMKLDNNOpKernel<float>); ops::BatchNormMKLDNNOpKernel<float>);
REGISTER_OP_KERNEL(batch_norm_grad, MKLDNN, paddle::platform::CPUPlace, REGISTER_OP_KERNEL(batch_norm_grad, MKLDNN, ::paddle::platform::CPUPlace,
ops::BatchNormMKLDNNGradOpKernel<float>); ops::BatchNormMKLDNNGradOpKernel<float>);
...@@ -110,19 +110,19 @@ class BatchNormOp : public framework::OperatorWithKernel { ...@@ -110,19 +110,19 @@ class BatchNormOp : public framework::OperatorWithKernel {
ctx.Input<Tensor>("Variance")->type()), ctx.Input<Tensor>("Variance")->type()),
"Variance input should be of float type"); "Variance input should be of float type");
framework::LibraryType library_{framework::LibraryType::kPlain};
// TODO(pzelazko-intel): enable MKLDNN layout when it's ready // TODO(pzelazko-intel): enable MKLDNN layout when it's ready
framework::LibraryType library = framework::LibraryType::kPlain;
framework::DataLayout layout = framework::DataLayout::kAnyLayout; framework::DataLayout layout = framework::DataLayout::kAnyLayout;
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
if (library_ == framework::LibraryType::kPlain && if (library == framework::LibraryType::kPlain &&
platform::CanMKLDNNBeUsed(ctx)) { platform::CanMKLDNNBeUsed(ctx)) {
library_ = framework::LibraryType::kMKLDNN; library = framework::LibraryType::kMKLDNN;
layout = framework::DataLayout::kMKLDNN; layout = framework::DataLayout::kMKLDNN;
} }
#endif #endif
return framework::OpKernelType(input_data_type, ctx.GetPlace(), layout, return framework::OpKernelType(input_data_type, ctx.GetPlace(), layout,
library_); library);
} }
}; };
...@@ -370,19 +370,21 @@ class BatchNormGradOp : public framework::OperatorWithKernel { ...@@ -370,19 +370,21 @@ class BatchNormGradOp : public framework::OperatorWithKernel {
PADDLE_THROW("can't find Y@GRAD"); PADDLE_THROW("can't find Y@GRAD");
} }
framework::LibraryType library_{framework::LibraryType::kPlain};
// TODO(pzelazko-intel): enable MKLDNN layout when it's ready // TODO(pzelazko-intel): enable MKLDNN layout when it's ready
framework::DataLayout layout_ = framework::DataLayout::kAnyLayout; framework::LibraryType library = framework::LibraryType::kPlain;
framework::DataLayout layout = framework::DataLayout::kAnyLayout;
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
if (library_ == framework::LibraryType::kPlain && if (library == framework::LibraryType::kPlain &&
platform::CanMKLDNNBeUsed(ctx)) { platform::CanMKLDNNBeUsed(ctx)) {
library_ = framework::LibraryType::kMKLDNN; library = framework::LibraryType::kMKLDNN;
layout_ = framework::DataLayout::kMKLDNN; layout = framework::DataLayout::kMKLDNN;
} }
#endif #endif
return framework::OpKernelType( return framework::OpKernelType(
framework::ToDataType(ctx.Input<Tensor>("X")->type()), ctx.GetPlace(), framework::ToDataType(ctx.Input<Tensor>("X")->type()), ctx.GetPlace(),
layout_, library_); layout, library);
} }
}; };
......
...@@ -20,6 +20,7 @@ limitations under the License. */ ...@@ -20,6 +20,7 @@ limitations under the License. */
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/detail/macros.h" #include "paddle/fluid/operators/detail/macros.h"
#include "paddle/fluid/operators/send_recv_util.h" #include "paddle/fluid/operators/send_recv_util.h"
#include "paddle/fluid/string/printf.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -36,12 +37,14 @@ class CheckpointNotifyOp : public framework::OperatorBase { ...@@ -36,12 +37,14 @@ class CheckpointNotifyOp : public framework::OperatorBase {
const platform::Place& place) const override { const platform::Place& place) const override {
std::vector<std::string> epmap = Attr<std::vector<std::string>>("epmap"); std::vector<std::string> epmap = Attr<std::vector<std::string>>("epmap");
std::string dir = Attr<std::string>("dir"); std::string dir = Attr<std::string>("dir");
std::string lookup_table_name = Attr<std::string>("lookup_table");
detail::RPCClient* rpc_client = detail::RPCClient* rpc_client =
detail::RPCClient::GetInstance<RPCCLIENT_T>(); detail::RPCClient::GetInstance<RPCCLIENT_T>();
for (size_t i = 0; i < epmap.size(); i++) { for (size_t i = 0; i < epmap.size(); i++) {
VLOG(3) << "sending to " << epmap[i] << " to checkpoint notify ... "; VLOG(3) << "sending " << dir <<" to " << epmap[i] << " to checkpoint notify ... ";
rpc_client->AsyncCheckpointNotify(epmap[i], dir); auto serial_looku_table = string::Sprintf("%s/%s.%d", dir, lookup_table_name, i);
rpc_client->AsyncCheckpointNotify(epmap[i], serial_looku_table);
} }
rpc_client->Wait(); rpc_client->Wait();
} }
...@@ -57,6 +60,8 @@ class CheckpointNotifyOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -57,6 +60,8 @@ class CheckpointNotifyOpMaker : public framework::OpProtoAndCheckerMaker {
.SetDefault({"127.0.0.1:6164"}); .SetDefault({"127.0.0.1:6164"});
AddAttr<std::string>( AddAttr<std::string>(
"dir", "(string, default '') indicate the folder checkpoint will use"); "dir", "(string, default '') indicate the folder checkpoint will use");
AddAttr<std::string>(
"lookup_table", "(string, default '') the lookup table name");
AddComment(R"DOC( AddComment(R"DOC(
Prefetch operator Prefetch operator
......
...@@ -91,32 +91,31 @@ class ChunkEvalOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -91,32 +91,31 @@ class ChunkEvalOpMaker : public framework::OpProtoAndCheckerMaker {
"(int64_t). The number of chunks both in Inference and Label on the " "(int64_t). The number of chunks both in Inference and Label on the "
"given mini-batch."); "given mini-batch.");
AddAttr<int>("num_chunk_types", AddAttr<int>("num_chunk_types",
"(int). The number of chunk type. See below for details."); "The number of chunk type. See the description for details.");
AddAttr<std::string>( AddAttr<std::string>("chunk_scheme",
"chunk_scheme", "The labeling scheme indicating "
"(string, default IOB). The labeling scheme indicating " "how to encode the chunks. Must be IOB, IOE, IOBES or "
"how to encode the chunks. Must be IOB, IOE, IOBES or plain. See below " "plain. See the description"
"for details.") "for details.")
.SetDefault("IOB"); .SetDefault("IOB");
AddAttr<std::vector<int>>("excluded_chunk_types", AddAttr<std::vector<int>>("excluded_chunk_types",
"(list<int>) A list including chunk type ids " "A list including chunk type ids "
"indicating chunk types that are not counted. " "indicating chunk types that are not counted. "
"See below for details.") "See the description for details.")
.SetDefault(std::vector<int>{}); .SetDefault(std::vector<int>{});
AddComment(R"DOC( AddComment(R"DOC(
For some basics of chunking, please refer to For some basics of chunking, please refer to
‘Chunking with Support Vector Machines <https://aclanthology.info/pdf/N/N01/N01-1025.pdf>’. 'Chunking with Support Vector Machines <https://aclanthology.info/pdf/N/N01/N01-1025.pdf>'.
ChunkEvalOp computes the precision, recall, and F1-score of chunk detection,
CheckEvalOp computes the precision, recall, and F1-score of chunk detection,
and supports IOB, IOE, IOBES and IO (also known as plain) tagging schemes. and supports IOB, IOE, IOBES and IO (also known as plain) tagging schemes.
Here is a NER example of labeling for these tagging schemes: Here is a NER example of labeling for these tagging schemes:
Li Ming works at Agricultural Bank of China in Beijing. Li Ming works at Agricultural Bank of China in Beijing.
IO: I-PER I-PER O O I-ORG I-ORG I-ORG I-ORG O I-LOC IO I-PER I-PER O O I-ORG I-ORG I-ORG I-ORG O I-LOC
IOB: B-PER I-PER O O B-ORG I-ORG I-ORG I-ORG O B-LOC IOB B-PER I-PER O O B-ORG I-ORG I-ORG I-ORG O B-LOC
IOE: I-PER E-PER O O I-ORG I-ORG I-ORG E-ORG O E-LOC IOE I-PER E-PER O O I-ORG I-ORG I-ORG E-ORG O E-LOC
IOBES: B-PER E-PER O O I-ORG I-ORG I-ORG E-ORG O S-LOC IOBES B-PER E-PER O O I-ORG I-ORG I-ORG E-ORG O S-LOC
There are three chunk types(named entity types) including PER(person), ORG(organization) There are three chunk types(named entity types) including PER(person), ORG(organization)
and LOC(LOCATION), and we can see that the labels have the form <tag type>-<chunk type>. and LOC(LOCATION), and we can see that the labels have the form <tag type>-<chunk type>.
...@@ -124,31 +123,31 @@ and LOC(LOCATION), and we can see that the labels have the form <tag type>-<chun ...@@ -124,31 +123,31 @@ and LOC(LOCATION), and we can see that the labels have the form <tag type>-<chun
Since the calculations actually use label ids rather than labels, extra attention Since the calculations actually use label ids rather than labels, extra attention
should be paid when mapping labels to ids to make CheckEvalOp work. The key point should be paid when mapping labels to ids to make CheckEvalOp work. The key point
is that the listed equations are satisfied by ids. is that the listed equations are satisfied by ids.
tag_type = label % num_tag_type tag_type = label % num_tag_type
chunk_type = label / num_tag_type chunk_type = label / num_tag_type
where `num_tag_type` is the num of tag types in the tagging scheme, `num_chunk_type` where `num_tag_type` is the num of tag types in the tagging scheme, `num_chunk_type`
is the num of chunk types, and `tag_type` get its value from the following table. is the num of chunk types, and `tag_type` get its value from the following table.
Scheme Begin Inside End Single Scheme Begin Inside End Single
plain 0 - - - plain 0 - - -
IOB 0 1 - - IOB 0 1 - -
IOE - 0 1 - IOE - 0 1 -
IOBES 0 1 2 3 IOBES 0 1 2 3
Still use NER as example, assuming the tagging scheme is IOB while chunk types are ORG, Still use NER as example, assuming the tagging scheme is IOB while chunk types are ORG,
PER and LOC. To satisfy the above equations, the label map can be like this: PER and LOC. To satisfy the above equations, the label map can be like this:
B-ORG 0 B-ORG 0
I-ORG 1 I-ORG 1
B-PER 2 B-PER 2
I-PER 3 I-PER 3
B-LOC 4 B-LOC 4
I-LOC 5 I-LOC 5
O 6 O 6
Its not hard to verify the equations noting that the num of chunk types It's not hard to verify the equations noting that the num of chunk types
is 3 and the num of tag types in IOB scheme is 2. For example, the label is 3 and the num of tag types in IOB scheme is 2. For example, the label
id of I-LOC is 5, the tag type id of I-LOC is 1, and the chunk type id of id of I-LOC is 5, the tag type id of I-LOC is 1, and the chunk type id of
I-LOC is 2, which consistent with the results from the equations. I-LOC is 2, which consistent with the results from the equations.
......
...@@ -54,10 +54,19 @@ be linearly scaled to make the L2 norm of $Out$ equal to $max\_norm$, as ...@@ -54,10 +54,19 @@ be linearly scaled to make the L2 norm of $Out$ equal to $max\_norm$, as
shown in the following formula: shown in the following formula:
$$ $$
Out = \frac{max\_norm * X}{norm(X)}, Out = \\frac{max\\_norm * X}{norm(X)},
$$ $$
where $norm(X)$ represents the L2 norm of $X$. where $norm(X)$ represents the L2 norm of $X$.
Examples:
.. code-block:: python
data = fluid.layer.data(
name='data', shape=[2, 4, 6], dtype='float32')
reshaped = fluid.layers.clip_by_norm(
x=data, max_norm=0.5)
)DOC"); )DOC");
} }
}; };
......
...@@ -23,30 +23,26 @@ class CompareOpProtoMaker : public framework::OpProtoAndCheckerMaker { ...@@ -23,30 +23,26 @@ class CompareOpProtoMaker : public framework::OpProtoAndCheckerMaker {
public: public:
void Make() override { void Make() override {
OpComment comment; OpComment comment;
AddInput("X", AddInput("X", string::Sprintf("the left hand operand of %s operator",
string::Sprintf("(LoDTensor) the left hand operand of %s operator", comment.type));
comment.type)); AddInput("Y", string::Sprintf("the right hand operand of %s operator",
AddInput("Y", string::Sprintf( comment.type));
"(LoDTensor) the right hand operand of %s operator",
comment.type));
AddAttr<bool>("force_cpu", AddAttr<bool>("force_cpu",
"(bool, default false) Force fill output variable to cpu " "Force fill output variable to cpu "
"memory. Otherwise, fill output variable to the running " "memory. Otherwise, fill output variable to the running "
"device") "device [default true].")
.SetDefault(false); .SetDefault(true);
AddOutput("Out", string::Sprintf( AddOutput("Out", string::Sprintf("n-dim bool tensor. Each element is %s",
"(LoDTensor) n-dim bool tensor. Each element is %s", comment.equation));
comment.equation)); AddComment(string::Sprintf(R"DOC(
AddComment(string::Sprintf(R"DOC(%s Operator
It operates element-wise on X and Y, and returns the Out. Each of them is a It operates element-wise on X and Y, and returns the Out. Each of them is a
N-dim tensor. X and Y could be any type. The each element of the Out tensor is N-dim tensor. X and Y could be any type. The each element of the Out tensor is
calculated by %s calculated by $%s$
)DOC", )DOC",
comment.type, comment.equation)); comment.equation));
AddAttr<int>("axis", AddAttr<int>(
"(int, default -1). The start dimension index " "axis",
"for broadcasting Y onto X.") "The start dimension index for broadcasting Y onto X. [default -1]")
.SetDefault(-1) .SetDefault(-1)
.EqualGreaterThan(-1); .EqualGreaterThan(-1);
} }
......
...@@ -107,7 +107,13 @@ REGISTER_OPERATOR(concat, ops::ConcatOp, ops::ConcatOpMaker, ...@@ -107,7 +107,13 @@ REGISTER_OPERATOR(concat, ops::ConcatOp, ops::ConcatOpMaker,
false> /* set false to disable empty grad */); false> /* set false to disable empty grad */);
REGISTER_OPERATOR(concat_grad, ops::ConcatOpGrad); REGISTER_OPERATOR(concat_grad, ops::ConcatOpGrad);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
concat, ops::ConcatKernel<paddle::platform::CPUDeviceContext, float>); concat, ops::ConcatKernel<paddle::platform::CPUDeviceContext, double>,
ops::ConcatKernel<paddle::platform::CPUDeviceContext, float>,
ops::ConcatKernel<paddle::platform::CPUDeviceContext, int64_t>,
ops::ConcatKernel<paddle::platform::CPUDeviceContext, int>);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
concat_grad, concat_grad,
ops::ConcatGradKernel<paddle::platform::CPUDeviceContext, float>); ops::ConcatGradKernel<paddle::platform::CPUDeviceContext, double>,
ops::ConcatGradKernel<paddle::platform::CPUDeviceContext, float>,
ops::ConcatGradKernel<paddle::platform::CPUDeviceContext, int64_t>,
ops::ConcatGradKernel<paddle::platform::CPUDeviceContext, int>);
...@@ -15,7 +15,13 @@ limitations under the License. */ ...@@ -15,7 +15,13 @@ limitations under the License. */
#include "paddle/fluid/operators/concat_op.h" #include "paddle/fluid/operators/concat_op.h"
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL( REGISTER_OP_CUDA_KERNEL(
concat, ops::ConcatKernel<paddle::platform::CUDADeviceContext, float>); concat, ops::ConcatKernel<paddle::platform::CUDADeviceContext, double>,
ops::ConcatKernel<paddle::platform::CUDADeviceContext, float>,
ops::ConcatKernel<paddle::platform::CUDADeviceContext, int64_t>,
ops::ConcatKernel<paddle::platform::CUDADeviceContext, int>);
REGISTER_OP_CUDA_KERNEL( REGISTER_OP_CUDA_KERNEL(
concat_grad, concat_grad,
ops::ConcatGradKernel<paddle::platform::CUDADeviceContext, float>); ops::ConcatGradKernel<paddle::platform::CUDADeviceContext, double>,
ops::ConcatGradKernel<paddle::platform::CUDADeviceContext, float>,
ops::ConcatGradKernel<paddle::platform::CUDADeviceContext, int64_t>,
ops::ConcatGradKernel<paddle::platform::CUDADeviceContext, int>);
...@@ -75,9 +75,8 @@ void ConvOp::InferShape(framework::InferShapeContext* ctx) const { ...@@ -75,9 +75,8 @@ void ConvOp::InferShape(framework::InferShapeContext* ctx) const {
framework::OpKernelType ConvOp::GetExpectedKernelType( framework::OpKernelType ConvOp::GetExpectedKernelType(
const framework::ExecutionContext& ctx) const { const framework::ExecutionContext& ctx) const {
framework::LibraryType library{framework::LibraryType::kPlain}; framework::LibraryType library{framework::LibraryType::kPlain};
std::string data_format = ctx.Attr<std::string>("data_format");
// TODO(pzelazko-intel): enable MKLDNN layout when it's ready // TODO(pzelazko-intel): enable MKLDNN layout when it's ready
std::string data_format = ctx.Attr<std::string>("data_format");
framework::DataLayout layout = framework::StringToDataLayout(data_format); framework::DataLayout layout = framework::StringToDataLayout(data_format);
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
......
...@@ -156,7 +156,7 @@ Parameters(strides, paddings) are two elements. These two elements represent hei ...@@ -156,7 +156,7 @@ Parameters(strides, paddings) are two elements. These two elements represent hei
and width, respectively. and width, respectively.
The input(X) size and output(Out) size may be different. The input(X) size and output(Out) size may be different.
Example: For an example:
Input: Input:
Input shape: $(N, C_{in}, H_{in}, W_{in})$ Input shape: $(N, C_{in}, H_{in}, W_{in})$
Filter shape: $(C_{in}, C_{out}, H_f, W_f)$ Filter shape: $(C_{in}, C_{out}, H_f, W_f)$
......
...@@ -76,9 +76,9 @@ class CosSimOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -76,9 +76,9 @@ class CosSimOpMaker : public framework::OpProtoAndCheckerMaker {
.AsIntermediate(); .AsIntermediate();
AddComment(R"DOC( AddComment(R"DOC(
Cosine Similarity Operator. **Cosine Similarity Operator**
$Out = X^T * Y / (\sqrt{X^T * X} * \sqrt{Y^T * Y})$ $Out = \frac{X^T * Y}{(\sqrt{X^T * X} * \sqrt{Y^T * Y})}$
The input X and Y must have the same shape, except that the 1st dimension The input X and Y must have the same shape, except that the 1st dimension
of input Y could be just 1 (different from input X), which will be of input Y could be just 1 (different from input X), which will be
......
...@@ -53,21 +53,18 @@ sequence of observed tags. ...@@ -53,21 +53,18 @@ sequence of observed tags.
The output of this operator changes according to whether Input(Label) is given: The output of this operator changes according to whether Input(Label) is given:
1. Input(Label) is given: 1. Input(Label) is given:
This happens in training. This operator is used to co-work with the chunk_eval
This happens in training. This operator is used to co-work with the chunk_eval operator.
operator. When Input(Label) is given, the crf_decoding operator returns a row vector
with shape [N x 1] whose values are fixed to be 0, indicating an incorrect
When Input(Label) is given, the crf_decoding operator returns a row vector prediction, or 1 indicating a tag is correctly predicted. Such an output is the
with shape [N x 1] whose values are fixed to be 0, indicating an incorrect input to chunk_eval operator.
prediction, or 1 indicating a tag is correctly predicted. Such an output is the
input to chunk_eval operator.
2. Input(Label) is not given: 2. Input(Label) is not given:
This is the standard decoding process.
This is the standard decoding process.
The crf_decoding operator returns a row vector with shape [N x 1] whose values The crf_decoding operator returns a row vector with shape [N x 1] whose values
range from 0 to maximum tag number - 1. Each element indicates an index of a range from 0 to maximum tag number - 1, Each element indicates an index of a
predicted tag. predicted tag.
)DOC"); )DOC");
} }
......
...@@ -52,7 +52,7 @@ static std::vector<int> GetOffsets(const framework::ExecutionContext& ctx) { ...@@ -52,7 +52,7 @@ static std::vector<int> GetOffsets(const framework::ExecutionContext& ctx) {
} else { } else {
res = ctx.Attr<std::vector<int>>("offsets"); res = ctx.Attr<std::vector<int>>("offsets");
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
rank, res.size(), rank, static_cast<int>(res.size()),
"Offsets size should be equal to dimension size of input tensor."); "Offsets size should be equal to dimension size of input tensor.");
} }
return res; return res;
......
...@@ -30,19 +30,19 @@ class CumOp : public framework::OperatorWithKernel { ...@@ -30,19 +30,19 @@ class CumOp : public framework::OperatorWithKernel {
class CumsumOpMaker : public framework::OpProtoAndCheckerMaker { class CumsumOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
void Make() override { void Make() override {
AddInput("X", "Input of Cumsum operator"); AddInput("X", "Input of cumsum operator");
AddOutput("Out", "Output of Cumsum operator"); AddOutput("Out", "Output of cumsum operator");
AddAttr<int>("axis", AddAttr<int>("axis",
"(int, default -1). The dimenstion to accumulate along. " "The dimenstion to accumulate along. -1 means the last "
"-1 means the last dimenstion") "dimenstion [default -1].")
.SetDefault(-1) .SetDefault(-1)
.EqualGreaterThan(-1); .EqualGreaterThan(-1);
AddAttr<bool>("exclusive", AddAttr<bool>("exclusive",
"bool, default false). Whether to perform exclusive cumsum") "Whether to perform exclusive cumsum. [default false].")
.SetDefault(false); .SetDefault(false);
AddAttr<bool>("reverse", AddAttr<bool>("reverse",
"bool, default false). If true, the cumsum is performed in " "If true, the cumsum is performed in the reversed direction. "
"the reversed direction") "[default false].")
.SetDefault(false); .SetDefault(false);
AddComment(R"DOC( AddComment(R"DOC(
The cumulative sum of the elements along a given axis. The cumulative sum of the elements along a given axis.
......
...@@ -262,7 +262,7 @@ void GRPCClient::Proceed() { ...@@ -262,7 +262,7 @@ void GRPCClient::Proceed() {
if (c->status_.ok()) { if (c->status_.ok()) {
c->Process(); c->Process();
} else { } else {
LOG(ERROR) << "var: " << c->var_h_.String() LOG(FATAL) << "var: " << c->var_h_.String()
<< " grpc error:" << c->status_.error_message(); << " grpc error:" << c->status_.error_message();
} }
delete c; delete c;
......
...@@ -169,7 +169,8 @@ class RequestPrefetch final : public RequestBase { ...@@ -169,7 +169,8 @@ class RequestPrefetch final : public RequestBase {
auto scope = request_->GetMutableLocalScope(); auto scope = request_->GetMutableLocalScope();
auto invar = scope->FindVar(in_var_name); auto invar = scope->FindVar(in_var_name);
framework::Variable* outvar = scope->FindVar(out_var_name); // out var must be created in local scope!
framework::Variable* outvar = scope->Var(out_var_name);
request_handler_->Handle(in_var_name, scope, invar, &outvar, out_var_name); request_handler_->Handle(in_var_name, scope, invar, &outvar, out_var_name);
...@@ -207,7 +208,8 @@ class RequestCheckpointNotify final : public RequestBase { ...@@ -207,7 +208,8 @@ class RequestCheckpointNotify final : public RequestBase {
auto scope = request_->GetMutableLocalScope(); auto scope = request_->GetMutableLocalScope();
std::string checkpoint_notify = request_->Varname(); std::string checkpoint_notify = request_->Varname();
std::string checkpoint_dir = request_->Varname(); std::string checkpoint_dir = request_->OutVarname();
framework::Variable* invar = nullptr; framework::Variable* invar = nullptr;
framework::Variable* outvar = nullptr; framework::Variable* outvar = nullptr;
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include "paddle/fluid/framework/selected_rows.h" #include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/operators/detail/request_handler_impl.h" #include "paddle/fluid/operators/detail/request_handler_impl.h"
#include "paddle/fluid/operators/detail/rpc_server.h" #include "paddle/fluid/operators/detail/rpc_server.h"
#include "paddle/fluid/string/printf.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -124,6 +125,11 @@ bool RequestCheckpointHandler::Handle(const std::string& varname, ...@@ -124,6 +125,11 @@ bool RequestCheckpointHandler::Handle(const std::string& varname,
framework::Variable* invar, framework::Variable* invar,
framework::Variable** outvar, framework::Variable** outvar,
const std::string& out_var_name) { const std::string& out_var_name) {
auto *lt_var = scope->FindVar("loopup_table_path")->GetMutable<std::string>();
lt_var->clear();
lt_var->append(out_var_name);
VLOG(4) << "RequestCheckpointHandler update loopup_table_path to: " << out_var_name;
executor_->RunPreparedContext(checkpoint_prepared_ctx_.get(), scope); executor_->RunPreparedContext(checkpoint_prepared_ctx_.get(), scope);
return true; return true;
} }
......
...@@ -106,23 +106,36 @@ class BoxCoderOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -106,23 +106,36 @@ class BoxCoderOpMaker : public framework::OpProtoAndCheckerMaker {
"and M represents the number of deocded boxes."); "and M represents the number of deocded boxes.");
AddComment(R"DOC( AddComment(R"DOC(
Bounding Box Coder Operator.
Bounding Box Coder.
Encode/Decode the target bounding box with the priorbox information. Encode/Decode the target bounding box with the priorbox information.
The Encoding schema described below: The Encoding schema described below:
ox = (tx - px) / pw / pxv
oy = (ty - py) / ph / pyv ox = (tx - px) / pw / pxv
ow = log(abs(tw / pw)) / pwv
oh = log(abs(th / ph)) / phv oy = (ty - py) / ph / pyv
ow = log(abs(tw / pw)) / pwv
oh = log(abs(th / ph)) / phv
The Decoding schema described below: The Decoding schema described below:
ox = (pw * pxv * tx * + px) - tw / 2
oy = (ph * pyv * ty * + py) - th / 2 ox = (pw * pxv * tx * + px) - tw / 2
ow = exp(pwv * tw) * pw + tw / 2
oh = exp(phv * th) * ph + th / 2 oy = (ph * pyv * ty * + py) - th / 2
where tx, ty, tw, th denote the target box's center coordinates, width and
height respectively. Similarly, px, py, pw, ph denote the priorbox's(anchor) ow = exp(pwv * tw) * pw + tw / 2
center coordinates, width and height. pxv, pyv, pwv, phv denote the variance
of the priorbox and ox, oy, ow, oh denote the encoded/decoded coordinates, oh = exp(phv * th) * ph + th / 2
width and height.
where `tx`, `ty`, `tw`, `th` denote the target box's center coordinates, width
and height respectively. Similarly, `px`, `py`, `pw`, `ph` denote the
priorbox's (anchor) center coordinates, width and height. `pxv`, `pyv`, `pwv`,
`phv` denote the variance of the priorbox and `ox`, `oy`, `ow`, `oh` denote the
encoded/decoded coordinates, width and height.
)DOC"); )DOC");
} }
}; };
......
...@@ -83,11 +83,13 @@ class PolygonBoxTransformOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -83,11 +83,13 @@ class PolygonBoxTransformOpMaker : public framework::OpProtoAndCheckerMaker {
AddComment(R"DOC( AddComment(R"DOC(
PolygonBoxTransform Operator. PolygonBoxTransform Operator.
PolygonBoxTransform Operator is used to transform the coordinate shift to the real coordinate.
The input is the final geometry output in detection network. The input is the final geometry output in detection network.
We use 2*n numbers to denote the coordinate shift from n corner vertices of We use 2*n numbers to denote the coordinate shift from n corner vertices of
the polygon_box to the pixel location. As each distance offset contains two numbers (xi, yi), the polygon_box to the pixel location. As each distance offset contains two numbers (xi, yi),
the geometry output contains 2*n channels. the geometry output contains 2*n channels.
PolygonBoxTransform Operator is used to transform the coordinate shift to the real coordinate.
)DOC"); )DOC");
} }
}; };
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册