提交 d0a8eea2 编写于 作者: F fengjiayi

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into expose_Parameter_2

...@@ -76,7 +76,8 @@ RUN easy_install -U pip && \ ...@@ -76,7 +76,8 @@ RUN easy_install -U pip && \
pip install sphinx-rtd-theme==0.1.9 recommonmark pip install sphinx-rtd-theme==0.1.9 recommonmark
RUN pip install pre-commit 'ipython==5.3.0' && \ RUN pip install pre-commit 'ipython==5.3.0' && \
pip install 'ipykernel==4.6.0' 'jupyter==1.0.0' pip install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \
pip install opencv-python
#For docstring checker #For docstring checker
RUN pip install pylint pytest astroid isort RUN pip install pylint pytest astroid isort
......
...@@ -40,12 +40,12 @@ ExternalProject_Add( ...@@ -40,12 +40,12 @@ ExternalProject_Add(
# NOTE(wuyi): # NOTE(wuyi):
# this package is generated by following steps: # this package is generated by following steps:
# 1. git clone -b v1.8.x https://github.com/grpc/grpc.git # 1. git clone -b v1.8.x https://github.com/grpc/grpc.git
# 2. submodule update --init # 2. git submodule update --init
# 3. keep only zlib, cares, protobuf, boringssl under "third_party", # 3. keep only zlib, cares, protobuf, boringssl under "third_party",
# checkout and clean other dirs under third_party # checkout and clean other dirs under third_party
# 4. remove .git, and package the directory. # 4. remove .git, and package the directory.
URL "http://paddlepaddledeps.bj.bcebos.com/grpc-v1.8.x.tar.gz" URL "http://paddlepaddledeps.bj.bcebos.com/grpc-v1.10.x.tar.gz"
URL_MD5 "c9c58ee7d0e8929a63155af6a2ecdbd0" URL_MD5 "1f268a2aff6759839dccd256adcc91cf"
PREFIX ${GRPC_SOURCES_DIR} PREFIX ${GRPC_SOURCES_DIR}
UPDATE_COMMAND "" UPDATE_COMMAND ""
CONFIGURE_COMMAND "" CONFIGURE_COMMAND ""
......
...@@ -54,7 +54,7 @@ ExternalProject_Add( ...@@ -54,7 +54,7 @@ ExternalProject_Add(
${EXTERNAL_PROJECT_LOG_ARGS} ${EXTERNAL_PROJECT_LOG_ARGS}
DEPENDS ${MKLDNN_DEPENDS} DEPENDS ${MKLDNN_DEPENDS}
GIT_REPOSITORY "https://github.com/01org/mkl-dnn.git" GIT_REPOSITORY "https://github.com/01org/mkl-dnn.git"
GIT_TAG "db3424ad44901513c03a1ea31ccaacdf633fbe9f" GIT_TAG "a29d8487a63afca3d5b8c5bbdbb473cf8ccc6e51"
PREFIX ${MKLDNN_SOURCES_DIR} PREFIX ${MKLDNN_SOURCES_DIR}
UPDATE_COMMAND "" UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${MKLDNN_INSTALL_DIR} CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${MKLDNN_INSTALL_DIR}
......
.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` .. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
!DO NOT EDIT THIS FILE MANUALLY! !DO NOT EDIT THIS FILE MANUALLY!
========= =============
evaluator fluid.average
========= =============
.. _api_fluid_average_WeightedAverage:
WeightedAverage
---------------
.. autoclass:: paddle.fluid.average.WeightedAverage
:members:
:noindex:
.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
!DO NOT EDIT THIS FILE MANUALLY!
==============
fluid.backward
==============
.. _api_fluid_backward_append_backward:
append_backward
---------------
.. autofunction:: paddle.fluid.backward.append_backward
:noindex:
.. _api_fluid_backward_calc_gradient:
calc_gradient
-------------
.. autofunction:: paddle.fluid.backward.calc_gradient
:noindex:
.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` .. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
!DO NOT EDIT THIS FILE MANUALLY! !DO NOT EDIT THIS FILE MANUALLY!
==== ==========
clip fluid.clip
==== ==========
.. _api_fluid_clip_ErrorClipByValue:
ErrorClipByValue ErrorClipByValue
---------------- ----------------
...@@ -12,6 +14,8 @@ ErrorClipByValue ...@@ -12,6 +14,8 @@ ErrorClipByValue
:members: :members:
:noindex: :noindex:
.. _api_fluid_clip_GradientClipByValue:
GradientClipByValue GradientClipByValue
------------------- -------------------
...@@ -19,6 +23,8 @@ GradientClipByValue ...@@ -19,6 +23,8 @@ GradientClipByValue
:members: :members:
:noindex: :noindex:
.. _api_fluid_clip_GradientClipByNorm:
GradientClipByNorm GradientClipByNorm
------------------ ------------------
...@@ -26,6 +32,8 @@ GradientClipByNorm ...@@ -26,6 +32,8 @@ GradientClipByNorm
:members: :members:
:noindex: :noindex:
.. _api_fluid_clip_GradientClipByGlobalNorm:
GradientClipByGlobalNorm GradientClipByGlobalNorm
------------------------ ------------------------
...@@ -33,15 +41,3 @@ GradientClipByGlobalNorm ...@@ -33,15 +41,3 @@ GradientClipByGlobalNorm
:members: :members:
:noindex: :noindex:
append_gradient_clip_ops
------------------------
.. autofunction:: paddle.fluid.clip.append_gradient_clip_ops
:noindex:
error_clip_callback
-------------------
.. autofunction:: paddle.fluid.clip.error_clip_callback
:noindex:
==================================
Data Reader Interface and DataSets
==================================
.. toctree::
:maxdepth: 1
data/data_reader.rst
data/image.rst
data/dataset.rst
.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` .. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
!DO NOT EDIT THIS FILE MANUALLY! !DO NOT EDIT THIS FILE MANUALLY!
=========== =================
data_feeder fluid.data_feeder
=========== =================
.. _api_fluid_data_feeder_DataFeeder:
DataFeeder DataFeeder
---------- ----------
......
.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` .. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
!DO NOT EDIT THIS FILE MANUALLY! !DO NOT EDIT THIS FILE MANUALLY!
======== ==============
executor fluid.executor
======== ==============
.. _api_fluid_executor_Executor:
Executor Executor
-------- --------
...@@ -12,24 +14,32 @@ Executor ...@@ -12,24 +14,32 @@ Executor
:members: :members:
:noindex: :noindex:
.. _api_fluid_executor_global_scope:
global_scope global_scope
------------ ------------
.. autofunction:: paddle.fluid.executor.global_scope .. autofunction:: paddle.fluid.executor.global_scope
:noindex: :noindex:
.. _api_fluid_executor_scope_guard:
scope_guard scope_guard
----------- -----------
.. autofunction:: paddle.fluid.executor.scope_guard .. autofunction:: paddle.fluid.executor.scope_guard
:noindex: :noindex:
switch_scope .. _api_fluid_executor__switch_scope:
------------
_switch_scope
-------------
.. autofunction:: paddle.fluid.executor.switch_scope .. autofunction:: paddle.fluid.executor._switch_scope
:noindex: :noindex:
.. _api_fluid_executor_fetch_var:
fetch_var fetch_var
--------- ---------
......
.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
!DO NOT EDIT THIS FILE MANUALLY!
=====
fluid
=====
.. _api_fluid_Block:
Block
-----
.. autoclass:: paddle.fluid.Block
:members:
:noindex:
.. _api_fluid_Variable:
Variable
--------
.. autoclass:: paddle.fluid.Variable
:members:
:noindex:
.. _api_fluid_Program:
Program
-------
.. autoclass:: paddle.fluid.Program
:members:
:noindex:
.. _api_fluid_Operator:
Operator
--------
.. autoclass:: paddle.fluid.Operator
:members:
:noindex:
.. _api_fluid_default_startup_program:
default_startup_program
-----------------------
.. autofunction:: paddle.fluid.default_startup_program
:noindex:
.. _api_fluid_default_main_program:
default_main_program
--------------------
.. autofunction:: paddle.fluid.default_main_program
:noindex:
.. _api_fluid_program_guard:
program_guard
-------------
.. autofunction:: paddle.fluid.program_guard
:noindex:
.. _api_fluid_get_var:
get_var
-------
.. autofunction:: paddle.fluid.get_var
:noindex:
.. _api_fluid_Executor:
Executor
--------
.. autoclass:: paddle.fluid.Executor
:members:
:noindex:
.. _api_fluid_global_scope:
global_scope
------------
.. autofunction:: paddle.fluid.global_scope
:noindex:
.. _api_fluid_scope_guard:
scope_guard
-----------
.. autofunction:: paddle.fluid.scope_guard
:noindex:
.. _api_fluid__switch_scope:
_switch_scope
-------------
.. autofunction:: paddle.fluid._switch_scope
:noindex:
.. _api_fluid_fetch_var:
fetch_var
---------
.. autofunction:: paddle.fluid.fetch_var
:noindex:
.. _api_fluid_Go:
Go
--
.. autoclass:: paddle.fluid.Go
:members:
:noindex:
.. _api_fluid_make_channel:
make_channel
------------
.. autofunction:: paddle.fluid.make_channel
:noindex:
.. _api_fluid_channel_send:
channel_send
------------
.. autofunction:: paddle.fluid.channel_send
:noindex:
.. _api_fluid_channel_recv:
channel_recv
------------
.. autofunction:: paddle.fluid.channel_recv
:noindex:
.. _api_fluid_channel_close:
channel_close
-------------
.. autofunction:: paddle.fluid.channel_close
:noindex:
.. _api_fluid_Select:
Select
------
.. autoclass:: paddle.fluid.Select
:members:
:noindex:
.. _api_fluid_Trainer:
Trainer
-------
.. autoclass:: paddle.fluid.Trainer
:members:
:noindex:
.. _api_fluid_BeginEpochEvent:
BeginEpochEvent
---------------
.. autoclass:: paddle.fluid.BeginEpochEvent
:members:
:noindex:
.. _api_fluid_EndEpochEvent:
EndEpochEvent
-------------
.. autoclass:: paddle.fluid.EndEpochEvent
:members:
:noindex:
.. _api_fluid_BeginStepEvent:
BeginStepEvent
--------------
.. autoclass:: paddle.fluid.BeginStepEvent
:members:
:noindex:
.. _api_fluid_EndStepEvent:
EndStepEvent
------------
.. autoclass:: paddle.fluid.EndStepEvent
:members:
:noindex:
.. _api_fluid_CheckpointConfig:
CheckpointConfig
----------------
.. autoclass:: paddle.fluid.CheckpointConfig
:members:
:noindex:
.. _api_fluid_Inferencer:
Inferencer
----------
.. autoclass:: paddle.fluid.Inferencer
:members:
:noindex:
.. _api_fluid_DistributeTranspiler:
DistributeTranspiler
--------------------
.. autoclass:: paddle.fluid.DistributeTranspiler
:members:
:noindex:
.. _api_fluid_memory_optimize:
memory_optimize
---------------
.. autofunction:: paddle.fluid.memory_optimize
:noindex:
.. _api_fluid_release_memory:
release_memory
--------------
.. autofunction:: paddle.fluid.release_memory
:noindex:
.. _api_fluid_ParallelExecutor:
ParallelExecutor
----------------
.. autoclass:: paddle.fluid.ParallelExecutor
:members:
:noindex:
.. _api_fluid_ExecutionStrategy:
ExecutionStrategy
-----------------
.. autoclass:: paddle.fluid.ExecutionStrategy
:members:
:noindex:
.. _api_fluid_BuildStrategy:
BuildStrategy
-------------
.. autoclass:: paddle.fluid.BuildStrategy
:members:
:noindex:
.. _api_fluid_create_lod_tensor:
create_lod_tensor
-----------------
.. autofunction:: paddle.fluid.create_lod_tensor
:noindex:
.. _api_fluid_create_random_int_lodtensor:
create_random_int_lodtensor
---------------------------
.. autofunction:: paddle.fluid.create_random_int_lodtensor
:noindex:
.. _api_fluid_LoDTensor:
LoDTensor
---------
.. autoclass:: paddle.fluid.LoDTensor
:members:
:noindex:
.. _api_fluid_CPUPlace:
CPUPlace
--------
.. autoclass:: paddle.fluid.CPUPlace
:members:
:noindex:
.. _api_fluid_CUDAPlace:
CUDAPlace
---------
.. autoclass:: paddle.fluid.CUDAPlace
:members:
:noindex:
.. _api_fluid_CUDAPinnedPlace:
CUDAPinnedPlace
---------------
.. autoclass:: paddle.fluid.CUDAPinnedPlace
:members:
:noindex:
.. _api_fluid_Tensor:
Tensor
------
.. autoclass:: paddle.fluid.Tensor
:members:
:noindex:
.. _api_fluid_ParamAttr:
ParamAttr
---------
.. autoclass:: paddle.fluid.ParamAttr
:members:
:noindex:
.. _api_fluid_WeightNormParamAttr:
WeightNormParamAttr
-------------------
.. autoclass:: paddle.fluid.WeightNormParamAttr
:members:
:noindex:
.. _api_fluid_DataFeeder:
DataFeeder
----------
.. autoclass:: paddle.fluid.DataFeeder
:members:
:noindex:
.. _api_fluid_Scope:
Scope
-----
.. autoclass:: paddle.fluid.Scope
:members:
:noindex:
...@@ -29,19 +29,27 @@ def parse_arg(): ...@@ -29,19 +29,27 @@ def parse_arg():
class DocGenerator(object): class DocGenerator(object):
def __init__(self, module_name, stream=sys.stdout): def __init__(self, module_name=None, stream=sys.stdout):
if module_name == "":
module_name = None
self.stream = stream self.stream = stream
self.module_name = module_name if module_name is None:
if not hasattr(fluid, module_name): self.module_name = "fluid"
raise ValueError("Cannot find fluid.{0}".format(module_name))
else: else:
self.module = getattr(fluid, module_name) self.module_name = "fluid." + module_name
if module_name is None:
self.module = fluid
else:
if not hasattr(fluid, module_name):
raise ValueError("Cannot find fluid.{0}".format(module_name))
else:
self.module = getattr(fluid, module_name)
self.stream.write('''.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` self.stream.write('''.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
!DO NOT EDIT THIS FILE MANUALLY! !DO NOT EDIT THIS FILE MANUALLY!
''') ''')
self._print_header_(module_name, dot='=', is_title=True) self._print_header_(self.module_name, dot='=', is_title=True)
def print_submodule(self, submodule_name): def print_submodule(self, submodule_name):
submodule = getattr(self.module, submodule_name) submodule = getattr(self.module, submodule_name)
...@@ -60,25 +68,29 @@ class DocGenerator(object): ...@@ -60,25 +68,29 @@ class DocGenerator(object):
self._print_header_(name, dot='=', is_title=False) self._print_header_(name, dot='=', is_title=False)
def print_item(self, name): def print_item(self, name):
item = getattr(self.module, name) item = getattr(self.module, name, None)
if item is None:
return
if isinstance(item, types.TypeType): if isinstance(item, types.TypeType):
self.print_class(name) self.print_class(name)
elif isinstance(item, types.FunctionType): elif isinstance(item, types.FunctionType):
self.print_method(name) self.print_method(name)
else: else:
raise RuntimeError("Unsupported item {0}".format(name)) pass
def print_class(self, name): def print_class(self, name):
self._print_ref_(name)
self._print_header_(name, dot='-', is_title=False) self._print_header_(name, dot='-', is_title=False)
self.stream.write('''.. autoclass:: paddle.fluid.{0}.{1} self.stream.write('''.. autoclass:: paddle.{0}.{1}
:members: :members:
:noindex: :noindex:
'''.format(self.module_name, name)) '''.format(self.module_name, name))
def print_method(self, name): def print_method(self, name):
self._print_ref_(name)
self._print_header_(name, dot='-', is_title=False) self._print_header_(name, dot='-', is_title=False)
self.stream.write('''.. autofunction:: paddle.fluid.{0}.{1} self.stream.write('''.. autofunction:: paddle.{0}.{1}
:noindex: :noindex:
'''.format(self.module_name, name)) '''.format(self.module_name, name))
...@@ -94,6 +106,10 @@ class DocGenerator(object): ...@@ -94,6 +106,10 @@ class DocGenerator(object):
self.stream.write('\n') self.stream.write('\n')
self.stream.write('\n') self.stream.write('\n')
def _print_ref_(self, name):
self.stream.write(".. _api_{0}_{1}:\n\n".format("_".join(
self.module_name.split(".")), name))
def main(): def main():
args = parse_arg() args = parse_arg()
......
#!/bin/bash #!/bin/bash
python gen_doc.py layers --submodules control_flow device io nn ops tensor detection learning_rate_scheduler metric > layers.rst python gen_doc.py layers --submodules control_flow device io nn ops tensor learning_rate_scheduler detection metric_op tensor > layers.rst
for module in data_feeder clip metrics executor initializer io nets optimizer param_attr profiler regularizer transpiler for module in data_feeder clip metrics executor initializer io nets optimizer param_attr profiler regularizer transpiler recordio_writer backward average profiler
do do
python gen_doc.py ${module} > ${module}.rst python gen_doc.py ${module} > ${module}.rst
done done
python gen_doc.py "" > fluid.rst
====================== =============
Fluid API Reference
====================== =============
.. toctree:: .. toctree::
:maxdepth: 1 :maxdepth: 1
fluid.rst
layers.rst layers.rst
data_feeder.rst data_feeder.rst
executor.rst executor.rst
...@@ -18,3 +19,8 @@ Fluid ...@@ -18,3 +19,8 @@ Fluid
regularizer.rst regularizer.rst
io.rst io.rst
data.rst data.rst
transpiler.rst
recordio_writer.rst
backward.rst
average.rst
profiler.rst
.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` .. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
!DO NOT EDIT THIS FILE MANUALLY! !DO NOT EDIT THIS FILE MANUALLY!
=========== =================
initializer fluid.initializer
=========== =================
.. _api_fluid_initializer_Constant:
Constant Constant
-------- --------
...@@ -12,6 +14,8 @@ Constant ...@@ -12,6 +14,8 @@ Constant
:members: :members:
:noindex: :noindex:
.. _api_fluid_initializer_Uniform:
Uniform Uniform
------- -------
...@@ -19,6 +23,8 @@ Uniform ...@@ -19,6 +23,8 @@ Uniform
:members: :members:
:noindex: :noindex:
.. _api_fluid_initializer_Normal:
Normal Normal
------ ------
...@@ -26,6 +32,8 @@ Normal ...@@ -26,6 +32,8 @@ Normal
:members: :members:
:noindex: :noindex:
.. _api_fluid_initializer_Xavier:
Xavier Xavier
------ ------
...@@ -33,6 +41,8 @@ Xavier ...@@ -33,6 +41,8 @@ Xavier
:members: :members:
:noindex: :noindex:
.. _api_fluid_initializer_Bilinear:
Bilinear Bilinear
-------- --------
...@@ -40,18 +50,33 @@ Bilinear ...@@ -40,18 +50,33 @@ Bilinear
:members: :members:
:noindex: :noindex:
.. _api_fluid_initializer_MSRA:
MSRA
----
.. autoclass:: paddle.fluid.initializer.MSRA
:members:
:noindex:
.. _api_fluid_initializer_force_init_on_cpu:
force_init_on_cpu force_init_on_cpu
----------------- -----------------
.. autofunction:: paddle.fluid.initializer.force_init_on_cpu .. autofunction:: paddle.fluid.initializer.force_init_on_cpu
:noindex: :noindex:
.. _api_fluid_initializer_init_on_cpu:
init_on_cpu init_on_cpu
----------- -----------
.. autofunction:: paddle.fluid.initializer.init_on_cpu .. autofunction:: paddle.fluid.initializer.init_on_cpu
:noindex: :noindex:
.. _api_fluid_initializer_ConstantInitializer:
ConstantInitializer ConstantInitializer
------------------- -------------------
...@@ -59,6 +84,8 @@ ConstantInitializer ...@@ -59,6 +84,8 @@ ConstantInitializer
:members: :members:
:noindex: :noindex:
.. _api_fluid_initializer_UniformInitializer:
UniformInitializer UniformInitializer
------------------ ------------------
...@@ -66,6 +93,8 @@ UniformInitializer ...@@ -66,6 +93,8 @@ UniformInitializer
:members: :members:
:noindex: :noindex:
.. _api_fluid_initializer_NormalInitializer:
NormalInitializer NormalInitializer
----------------- -----------------
...@@ -73,6 +102,8 @@ NormalInitializer ...@@ -73,6 +102,8 @@ NormalInitializer
:members: :members:
:noindex: :noindex:
.. _api_fluid_initializer_XavierInitializer:
XavierInitializer XavierInitializer
----------------- -----------------
...@@ -80,6 +111,8 @@ XavierInitializer ...@@ -80,6 +111,8 @@ XavierInitializer
:members: :members:
:noindex: :noindex:
.. _api_fluid_initializer_BilinearInitializer:
BilinearInitializer BilinearInitializer
------------------- -------------------
...@@ -87,3 +120,12 @@ BilinearInitializer ...@@ -87,3 +120,12 @@ BilinearInitializer
:members: :members:
:noindex: :noindex:
.. _api_fluid_initializer_MSRAInitializer:
MSRAInitializer
---------------
.. autoclass:: paddle.fluid.initializer.MSRAInitializer
:members:
:noindex:
.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` .. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
!DO NOT EDIT THIS FILE MANUALLY! !DO NOT EDIT THIS FILE MANUALLY!
== ========
io fluid.io
== ========
.. _api_fluid_io_save_vars:
save_vars save_vars
--------- ---------
...@@ -11,84 +13,112 @@ save_vars ...@@ -11,84 +13,112 @@ save_vars
.. autofunction:: paddle.fluid.io.save_vars .. autofunction:: paddle.fluid.io.save_vars
:noindex: :noindex:
.. _api_fluid_io_save_params:
save_params save_params
----------- -----------
.. autofunction:: paddle.fluid.io.save_params .. autofunction:: paddle.fluid.io.save_params
:noindex: :noindex:
.. _api_fluid_io_save_persistables:
save_persistables save_persistables
----------------- -----------------
.. autofunction:: paddle.fluid.io.save_persistables .. autofunction:: paddle.fluid.io.save_persistables
:noindex: :noindex:
.. _api_fluid_io_load_vars:
load_vars load_vars
--------- ---------
.. autofunction:: paddle.fluid.io.load_vars .. autofunction:: paddle.fluid.io.load_vars
:noindex: :noindex:
.. _api_fluid_io_load_params:
load_params load_params
----------- -----------
.. autofunction:: paddle.fluid.io.load_params .. autofunction:: paddle.fluid.io.load_params
:noindex: :noindex:
.. _api_fluid_io_load_persistables:
load_persistables load_persistables
----------------- -----------------
.. autofunction:: paddle.fluid.io.load_persistables .. autofunction:: paddle.fluid.io.load_persistables
:noindex: :noindex:
.. _api_fluid_io_save_inference_model:
save_inference_model save_inference_model
-------------------- --------------------
.. autofunction:: paddle.fluid.io.save_inference_model .. autofunction:: paddle.fluid.io.save_inference_model
:noindex: :noindex:
.. _api_fluid_io_load_inference_model:
load_inference_model load_inference_model
-------------------- --------------------
.. autofunction:: paddle.fluid.io.load_inference_model .. autofunction:: paddle.fluid.io.load_inference_model
:noindex: :noindex:
.. _api_fluid_io_get_inference_program:
get_inference_program get_inference_program
--------------------- ---------------------
.. autofunction:: paddle.fluid.io.get_inference_program .. autofunction:: paddle.fluid.io.get_inference_program
:noindex: :noindex:
.. _api_fluid_io_save_checkpoint:
save_checkpoint save_checkpoint
--------------- ---------------
.. autofunction:: paddle.fluid.io.save_checkpoint .. autofunction:: paddle.fluid.io.save_checkpoint
:noindex: :noindex:
.. _api_fluid_io_load_checkpoint:
load_checkpoint load_checkpoint
--------------- ---------------
.. autofunction:: paddle.fluid.io.load_checkpoint .. autofunction:: paddle.fluid.io.load_checkpoint
:noindex: :noindex:
.. _api_fluid_io_clean_checkpoint:
clean_checkpoint clean_checkpoint
---------------- ----------------
.. autofunction:: paddle.fluid.io.clean_checkpoint .. autofunction:: paddle.fluid.io.clean_checkpoint
:noindex: :noindex:
.. _api_fluid_io_load_persist_vars_without_grad:
load_persist_vars_without_grad load_persist_vars_without_grad
------------------------------ ------------------------------
.. autofunction:: paddle.fluid.io.load_persist_vars_without_grad .. autofunction:: paddle.fluid.io.load_persist_vars_without_grad
:noindex: :noindex:
.. _api_fluid_io_save_persist_vars_without_grad:
save_persist_vars_without_grad save_persist_vars_without_grad
------------------------------ ------------------------------
.. autofunction:: paddle.fluid.io.save_persist_vars_without_grad .. autofunction:: paddle.fluid.io.save_persist_vars_without_grad
:noindex: :noindex:
.. _api_fluid_io_get_latest_checkpoint_serial:
get_latest_checkpoint_serial get_latest_checkpoint_serial
---------------------------- ----------------------------
......
.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` .. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
!DO NOT EDIT THIS FILE MANUALLY! !DO NOT EDIT THIS FILE MANUALLY!
====== ============
layers fluid.layers
====== ============
control_flow control_flow
============ ============
.. _api_fluid_layers_split_lod_tensor:
split_lod_tensor split_lod_tensor
---------------- ----------------
.. autofunction:: paddle.fluid.layers.split_lod_tensor .. autofunction:: paddle.fluid.layers.split_lod_tensor
:noindex: :noindex:
.. _api_fluid_layers_merge_lod_tensor:
merge_lod_tensor merge_lod_tensor
---------------- ----------------
.. autofunction:: paddle.fluid.layers.merge_lod_tensor .. autofunction:: paddle.fluid.layers.merge_lod_tensor
:noindex: :noindex:
.. _api_fluid_layers_BlockGuard:
BlockGuard BlockGuard
---------- ----------
...@@ -27,6 +33,8 @@ BlockGuard ...@@ -27,6 +33,8 @@ BlockGuard
:members: :members:
:noindex: :noindex:
.. _api_fluid_layers_BlockGuardWithCompletion:
BlockGuardWithCompletion BlockGuardWithCompletion
------------------------ ------------------------
...@@ -34,12 +42,7 @@ BlockGuardWithCompletion ...@@ -34,12 +42,7 @@ BlockGuardWithCompletion
:members: :members:
:noindex: :noindex:
StaticRNNMemoryLink .. _api_fluid_layers_WhileGuard:
-------------------
.. autoclass:: paddle.fluid.layers.StaticRNNMemoryLink
:members:
:noindex:
WhileGuard WhileGuard
---------- ----------
...@@ -48,6 +51,8 @@ WhileGuard ...@@ -48,6 +51,8 @@ WhileGuard
:members: :members:
:noindex: :noindex:
.. _api_fluid_layers_While:
While While
----- -----
...@@ -55,6 +60,8 @@ While ...@@ -55,6 +60,8 @@ While
:members: :members:
:noindex: :noindex:
.. _api_fluid_layers_Switch:
Switch Switch
------ ------
...@@ -62,78 +69,104 @@ Switch ...@@ -62,78 +69,104 @@ Switch
:members: :members:
:noindex: :noindex:
.. _api_fluid_layers_lod_rank_table:
lod_rank_table lod_rank_table
-------------- --------------
.. autofunction:: paddle.fluid.layers.lod_rank_table .. autofunction:: paddle.fluid.layers.lod_rank_table
:noindex: :noindex:
.. _api_fluid_layers_max_sequence_len:
max_sequence_len max_sequence_len
---------------- ----------------
.. autofunction:: paddle.fluid.layers.max_sequence_len .. autofunction:: paddle.fluid.layers.max_sequence_len
:noindex: :noindex:
.. _api_fluid_layers_lod_tensor_to_array:
lod_tensor_to_array lod_tensor_to_array
------------------- -------------------
.. autofunction:: paddle.fluid.layers.lod_tensor_to_array .. autofunction:: paddle.fluid.layers.lod_tensor_to_array
:noindex: :noindex:
.. _api_fluid_layers_array_to_lod_tensor:
array_to_lod_tensor array_to_lod_tensor
------------------- -------------------
.. autofunction:: paddle.fluid.layers.array_to_lod_tensor .. autofunction:: paddle.fluid.layers.array_to_lod_tensor
:noindex: :noindex:
.. _api_fluid_layers_increment:
increment increment
--------- ---------
.. autofunction:: paddle.fluid.layers.increment .. autofunction:: paddle.fluid.layers.increment
:noindex: :noindex:
.. _api_fluid_layers_array_write:
array_write array_write
----------- -----------
.. autofunction:: paddle.fluid.layers.array_write .. autofunction:: paddle.fluid.layers.array_write
:noindex: :noindex:
.. _api_fluid_layers_create_array:
create_array create_array
------------ ------------
.. autofunction:: paddle.fluid.layers.create_array .. autofunction:: paddle.fluid.layers.create_array
:noindex: :noindex:
.. _api_fluid_layers_less_than:
less_than less_than
--------- ---------
.. autofunction:: paddle.fluid.layers.less_than .. autofunction:: paddle.fluid.layers.less_than
:noindex: :noindex:
.. _api_fluid_layers_equal:
equal equal
----- -----
.. autofunction:: paddle.fluid.layers.equal .. autofunction:: paddle.fluid.layers.equal
:noindex: :noindex:
.. _api_fluid_layers_array_read:
array_read array_read
---------- ----------
.. autofunction:: paddle.fluid.layers.array_read .. autofunction:: paddle.fluid.layers.array_read
:noindex: :noindex:
.. _api_fluid_layers_shrink_memory:
shrink_memory shrink_memory
------------- -------------
.. autofunction:: paddle.fluid.layers.shrink_memory .. autofunction:: paddle.fluid.layers.shrink_memory
:noindex: :noindex:
.. _api_fluid_layers_array_length:
array_length array_length
------------ ------------
.. autofunction:: paddle.fluid.layers.array_length .. autofunction:: paddle.fluid.layers.array_length
:noindex: :noindex:
.. _api_fluid_layers_IfElse:
IfElse IfElse
------ ------
...@@ -141,6 +174,8 @@ IfElse ...@@ -141,6 +174,8 @@ IfElse
:members: :members:
:noindex: :noindex:
.. _api_fluid_layers_DynamicRNN:
DynamicRNN DynamicRNN
---------- ----------
...@@ -148,6 +183,8 @@ DynamicRNN ...@@ -148,6 +183,8 @@ DynamicRNN
:members: :members:
:noindex: :noindex:
.. _api_fluid_layers_ConditionalBlock:
ConditionalBlock ConditionalBlock
---------------- ----------------
...@@ -155,6 +192,8 @@ ConditionalBlock ...@@ -155,6 +192,8 @@ ConditionalBlock
:members: :members:
:noindex: :noindex:
.. _api_fluid_layers_StaticRNN:
StaticRNN StaticRNN
--------- ---------
...@@ -162,12 +201,16 @@ StaticRNN ...@@ -162,12 +201,16 @@ StaticRNN
:members: :members:
:noindex: :noindex:
.. _api_fluid_layers_reorder_lod_tensor_by_rank:
reorder_lod_tensor_by_rank reorder_lod_tensor_by_rank
-------------------------- --------------------------
.. autofunction:: paddle.fluid.layers.reorder_lod_tensor_by_rank .. autofunction:: paddle.fluid.layers.reorder_lod_tensor_by_rank
:noindex: :noindex:
.. _api_fluid_layers_ParallelDo:
ParallelDo ParallelDo
---------- ----------
...@@ -175,12 +218,16 @@ ParallelDo ...@@ -175,12 +218,16 @@ ParallelDo
:members: :members:
:noindex: :noindex:
.. _api_fluid_layers_Print:
Print Print
----- -----
.. autofunction:: paddle.fluid.layers.Print .. autofunction:: paddle.fluid.layers.Print
:noindex: :noindex:
.. _api_fluid_layers_is_empty:
is_empty is_empty
-------- --------
...@@ -190,6 +237,8 @@ is_empty ...@@ -190,6 +237,8 @@ is_empty
device device
====== ======
.. _api_fluid_layers_get_places:
get_places get_places
---------- ----------
...@@ -199,12 +248,16 @@ get_places ...@@ -199,12 +248,16 @@ get_places
io io
== ==
.. _api_fluid_layers_data:
data data
---- ----
.. autofunction:: paddle.fluid.layers.data .. autofunction:: paddle.fluid.layers.data
:noindex: :noindex:
.. _api_fluid_layers_BlockGuardServ:
BlockGuardServ BlockGuardServ
-------------- --------------
...@@ -212,6 +265,8 @@ BlockGuardServ ...@@ -212,6 +265,8 @@ BlockGuardServ
:members: :members:
:noindex: :noindex:
.. _api_fluid_layers_ListenAndServ:
ListenAndServ ListenAndServ
------------- -------------
...@@ -219,60 +274,80 @@ ListenAndServ ...@@ -219,60 +274,80 @@ ListenAndServ
:members: :members:
:noindex: :noindex:
.. _api_fluid_layers_Send:
Send Send
---- ----
.. autofunction:: paddle.fluid.layers.Send .. autofunction:: paddle.fluid.layers.Send
:noindex: :noindex:
.. _api_fluid_layers_Recv:
Recv Recv
---- ----
.. autofunction:: paddle.fluid.layers.Recv .. autofunction:: paddle.fluid.layers.Recv
:noindex: :noindex:
.. _api_fluid_layers_open_recordio_file:
open_recordio_file open_recordio_file
------------------ ------------------
.. autofunction:: paddle.fluid.layers.open_recordio_file .. autofunction:: paddle.fluid.layers.open_recordio_file
:noindex: :noindex:
.. _api_fluid_layers_open_files:
open_files open_files
---------- ----------
.. autofunction:: paddle.fluid.layers.open_files .. autofunction:: paddle.fluid.layers.open_files
:noindex: :noindex:
.. _api_fluid_layers_read_file:
read_file read_file
--------- ---------
.. autofunction:: paddle.fluid.layers.read_file .. autofunction:: paddle.fluid.layers.read_file
:noindex: :noindex:
.. _api_fluid_layers_shuffle:
shuffle shuffle
------- -------
.. autofunction:: paddle.fluid.layers.shuffle .. autofunction:: paddle.fluid.layers.shuffle
:noindex: :noindex:
.. _api_fluid_layers_batch:
batch batch
----- -----
.. autofunction:: paddle.fluid.layers.batch .. autofunction:: paddle.fluid.layers.batch
:noindex: :noindex:
.. _api_fluid_layers_double_buffer:
double_buffer double_buffer
------------- -------------
.. autofunction:: paddle.fluid.layers.double_buffer .. autofunction:: paddle.fluid.layers.double_buffer
:noindex: :noindex:
.. _api_fluid_layers_random_data_generator:
random_data_generator random_data_generator
--------------------- ---------------------
.. autofunction:: paddle.fluid.layers.random_data_generator .. autofunction:: paddle.fluid.layers.random_data_generator
:noindex: :noindex:
.. _api_fluid_layers_Preprocessor:
Preprocessor Preprocessor
------------ ------------
...@@ -280,6 +355,8 @@ Preprocessor ...@@ -280,6 +355,8 @@ Preprocessor
:members: :members:
:noindex: :noindex:
.. _api_fluid_layers_load:
load load
---- ----
...@@ -289,584 +366,802 @@ load ...@@ -289,584 +366,802 @@ load
nn nn
== ==
.. _api_fluid_layers_fc:
fc fc
-- --
.. autofunction:: paddle.fluid.layers.fc .. autofunction:: paddle.fluid.layers.fc
:noindex: :noindex:
.. _api_fluid_layers_embedding:
embedding embedding
--------- ---------
.. autofunction:: paddle.fluid.layers.embedding .. autofunction:: paddle.fluid.layers.embedding
:noindex: :noindex:
.. _api_fluid_layers_dynamic_lstm:
dynamic_lstm dynamic_lstm
------------ ------------
.. autofunction:: paddle.fluid.layers.dynamic_lstm .. autofunction:: paddle.fluid.layers.dynamic_lstm
:noindex: :noindex:
.. _api_fluid_layers_dynamic_lstmp:
dynamic_lstmp dynamic_lstmp
------------- -------------
.. autofunction:: paddle.fluid.layers.dynamic_lstmp .. autofunction:: paddle.fluid.layers.dynamic_lstmp
:noindex: :noindex:
.. _api_fluid_layers_dynamic_gru:
dynamic_gru dynamic_gru
----------- -----------
.. autofunction:: paddle.fluid.layers.dynamic_gru .. autofunction:: paddle.fluid.layers.dynamic_gru
:noindex: :noindex:
.. _api_fluid_layers_gru_unit:
gru_unit gru_unit
-------- --------
.. autofunction:: paddle.fluid.layers.gru_unit .. autofunction:: paddle.fluid.layers.gru_unit
:noindex: :noindex:
.. _api_fluid_layers_linear_chain_crf:
linear_chain_crf linear_chain_crf
---------------- ----------------
.. autofunction:: paddle.fluid.layers.linear_chain_crf .. autofunction:: paddle.fluid.layers.linear_chain_crf
:noindex: :noindex:
.. _api_fluid_layers_crf_decoding:
crf_decoding crf_decoding
------------ ------------
.. autofunction:: paddle.fluid.layers.crf_decoding .. autofunction:: paddle.fluid.layers.crf_decoding
:noindex: :noindex:
.. _api_fluid_layers_cos_sim:
cos_sim cos_sim
------- -------
.. autofunction:: paddle.fluid.layers.cos_sim .. autofunction:: paddle.fluid.layers.cos_sim
:noindex: :noindex:
.. _api_fluid_layers_cross_entropy:
cross_entropy cross_entropy
------------- -------------
.. autofunction:: paddle.fluid.layers.cross_entropy .. autofunction:: paddle.fluid.layers.cross_entropy
:noindex: :noindex:
.. _api_fluid_layers_square_error_cost:
square_error_cost square_error_cost
----------------- -----------------
.. autofunction:: paddle.fluid.layers.square_error_cost .. autofunction:: paddle.fluid.layers.square_error_cost
:noindex: :noindex:
.. _api_fluid_layers_chunk_eval:
chunk_eval chunk_eval
---------- ----------
.. autofunction:: paddle.fluid.layers.chunk_eval .. autofunction:: paddle.fluid.layers.chunk_eval
:noindex: :noindex:
.. _api_fluid_layers_sequence_conv:
sequence_conv sequence_conv
------------- -------------
.. autofunction:: paddle.fluid.layers.sequence_conv .. autofunction:: paddle.fluid.layers.sequence_conv
:noindex: :noindex:
.. _api_fluid_layers_conv2d:
conv2d conv2d
------ ------
.. autofunction:: paddle.fluid.layers.conv2d .. autofunction:: paddle.fluid.layers.conv2d
:noindex: :noindex:
.. _api_fluid_layers_conv3d:
conv3d conv3d
------ ------
.. autofunction:: paddle.fluid.layers.conv3d .. autofunction:: paddle.fluid.layers.conv3d
:noindex: :noindex:
.. _api_fluid_layers_sequence_pool:
sequence_pool sequence_pool
------------- -------------
.. autofunction:: paddle.fluid.layers.sequence_pool .. autofunction:: paddle.fluid.layers.sequence_pool
:noindex: :noindex:
.. _api_fluid_layers_sequence_softmax:
sequence_softmax sequence_softmax
---------------- ----------------
.. autofunction:: paddle.fluid.layers.sequence_softmax .. autofunction:: paddle.fluid.layers.sequence_softmax
:noindex: :noindex:
.. _api_fluid_layers_softmax:
softmax softmax
------- -------
.. autofunction:: paddle.fluid.layers.softmax .. autofunction:: paddle.fluid.layers.softmax
:noindex: :noindex:
.. _api_fluid_layers_pool2d:
pool2d pool2d
------ ------
.. autofunction:: paddle.fluid.layers.pool2d .. autofunction:: paddle.fluid.layers.pool2d
:noindex: :noindex:
.. _api_fluid_layers_pool3d:
pool3d pool3d
------ ------
.. autofunction:: paddle.fluid.layers.pool3d .. autofunction:: paddle.fluid.layers.pool3d
:noindex: :noindex:
.. _api_fluid_layers_batch_norm:
batch_norm batch_norm
---------- ----------
.. autofunction:: paddle.fluid.layers.batch_norm .. autofunction:: paddle.fluid.layers.batch_norm
:noindex: :noindex:
.. _api_fluid_layers_beam_search_decode:
beam_search_decode beam_search_decode
------------------ ------------------
.. autofunction:: paddle.fluid.layers.beam_search_decode .. autofunction:: paddle.fluid.layers.beam_search_decode
:noindex: :noindex:
.. _api_fluid_layers_conv2d_transpose:
conv2d_transpose conv2d_transpose
---------------- ----------------
.. autofunction:: paddle.fluid.layers.conv2d_transpose .. autofunction:: paddle.fluid.layers.conv2d_transpose
:noindex: :noindex:
.. _api_fluid_layers_conv3d_transpose:
conv3d_transpose conv3d_transpose
---------------- ----------------
.. autofunction:: paddle.fluid.layers.conv3d_transpose .. autofunction:: paddle.fluid.layers.conv3d_transpose
:noindex: :noindex:
.. _api_fluid_layers_sequence_expand:
sequence_expand sequence_expand
--------------- ---------------
.. autofunction:: paddle.fluid.layers.sequence_expand .. autofunction:: paddle.fluid.layers.sequence_expand
:noindex: :noindex:
.. _api_fluid_layers_lstm_unit:
lstm_unit lstm_unit
--------- ---------
.. autofunction:: paddle.fluid.layers.lstm_unit .. autofunction:: paddle.fluid.layers.lstm_unit
:noindex: :noindex:
.. _api_fluid_layers_reduce_sum:
reduce_sum reduce_sum
---------- ----------
.. autofunction:: paddle.fluid.layers.reduce_sum .. autofunction:: paddle.fluid.layers.reduce_sum
:noindex: :noindex:
.. _api_fluid_layers_reduce_mean:
reduce_mean reduce_mean
----------- -----------
.. autofunction:: paddle.fluid.layers.reduce_mean .. autofunction:: paddle.fluid.layers.reduce_mean
:noindex: :noindex:
.. _api_fluid_layers_reduce_max:
reduce_max reduce_max
---------- ----------
.. autofunction:: paddle.fluid.layers.reduce_max .. autofunction:: paddle.fluid.layers.reduce_max
:noindex: :noindex:
.. _api_fluid_layers_reduce_min:
reduce_min reduce_min
---------- ----------
.. autofunction:: paddle.fluid.layers.reduce_min .. autofunction:: paddle.fluid.layers.reduce_min
:noindex: :noindex:
.. _api_fluid_layers_reduce_prod:
reduce_prod reduce_prod
----------- -----------
.. autofunction:: paddle.fluid.layers.reduce_prod .. autofunction:: paddle.fluid.layers.reduce_prod
:noindex: :noindex:
.. _api_fluid_layers_sequence_first_step:
sequence_first_step sequence_first_step
------------------- -------------------
.. autofunction:: paddle.fluid.layers.sequence_first_step .. autofunction:: paddle.fluid.layers.sequence_first_step
:noindex: :noindex:
.. _api_fluid_layers_sequence_last_step:
sequence_last_step sequence_last_step
------------------ ------------------
.. autofunction:: paddle.fluid.layers.sequence_last_step .. autofunction:: paddle.fluid.layers.sequence_last_step
:noindex: :noindex:
.. _api_fluid_layers_dropout:
dropout dropout
------- -------
.. autofunction:: paddle.fluid.layers.dropout .. autofunction:: paddle.fluid.layers.dropout
:noindex: :noindex:
.. _api_fluid_layers_split:
split split
----- -----
.. autofunction:: paddle.fluid.layers.split .. autofunction:: paddle.fluid.layers.split
:noindex: :noindex:
.. _api_fluid_layers_ctc_greedy_decoder:
ctc_greedy_decoder ctc_greedy_decoder
------------------ ------------------
.. autofunction:: paddle.fluid.layers.ctc_greedy_decoder .. autofunction:: paddle.fluid.layers.ctc_greedy_decoder
:noindex: :noindex:
.. _api_fluid_layers_edit_distance:
edit_distance edit_distance
------------- -------------
.. autofunction:: paddle.fluid.layers.edit_distance .. autofunction:: paddle.fluid.layers.edit_distance
:noindex: :noindex:
.. _api_fluid_layers_l2_normalize:
l2_normalize l2_normalize
------------ ------------
.. autofunction:: paddle.fluid.layers.l2_normalize .. autofunction:: paddle.fluid.layers.l2_normalize
:noindex: :noindex:
.. _api_fluid_layers_matmul:
matmul matmul
------ ------
.. autofunction:: paddle.fluid.layers.matmul .. autofunction:: paddle.fluid.layers.matmul
:noindex: :noindex:
.. _api_fluid_layers_topk:
topk topk
---- ----
.. autofunction:: paddle.fluid.layers.topk .. autofunction:: paddle.fluid.layers.topk
:noindex: :noindex:
.. _api_fluid_layers_warpctc:
warpctc warpctc
------- -------
.. autofunction:: paddle.fluid.layers.warpctc .. autofunction:: paddle.fluid.layers.warpctc
:noindex: :noindex:
.. _api_fluid_layers_sequence_reshape:
sequence_reshape sequence_reshape
---------------- ----------------
.. autofunction:: paddle.fluid.layers.sequence_reshape .. autofunction:: paddle.fluid.layers.sequence_reshape
:noindex: :noindex:
.. _api_fluid_layers_transpose:
transpose transpose
--------- ---------
.. autofunction:: paddle.fluid.layers.transpose .. autofunction:: paddle.fluid.layers.transpose
:noindex: :noindex:
.. _api_fluid_layers_im2sequence:
im2sequence im2sequence
----------- -----------
.. autofunction:: paddle.fluid.layers.im2sequence .. autofunction:: paddle.fluid.layers.im2sequence
:noindex: :noindex:
.. _api_fluid_layers_nce:
nce nce
--- ---
.. autofunction:: paddle.fluid.layers.nce .. autofunction:: paddle.fluid.layers.nce
:noindex: :noindex:
.. _api_fluid_layers_beam_search:
beam_search beam_search
----------- -----------
.. autofunction:: paddle.fluid.layers.beam_search .. autofunction:: paddle.fluid.layers.beam_search
:noindex: :noindex:
.. _api_fluid_layers_row_conv:
row_conv row_conv
-------- --------
.. autofunction:: paddle.fluid.layers.row_conv .. autofunction:: paddle.fluid.layers.row_conv
:noindex: :noindex:
.. _api_fluid_layers_multiplex:
multiplex multiplex
--------- ---------
.. autofunction:: paddle.fluid.layers.multiplex .. autofunction:: paddle.fluid.layers.multiplex
:noindex: :noindex:
.. _api_fluid_layers_layer_norm:
layer_norm layer_norm
---------- ----------
.. autofunction:: paddle.fluid.layers.layer_norm .. autofunction:: paddle.fluid.layers.layer_norm
:noindex: :noindex:
.. _api_fluid_layers_softmax_with_cross_entropy:
softmax_with_cross_entropy softmax_with_cross_entropy
-------------------------- --------------------------
.. autofunction:: paddle.fluid.layers.softmax_with_cross_entropy .. autofunction:: paddle.fluid.layers.softmax_with_cross_entropy
:noindex: :noindex:
.. _api_fluid_layers_smooth_l1:
smooth_l1 smooth_l1
--------- ---------
.. autofunction:: paddle.fluid.layers.smooth_l1 .. autofunction:: paddle.fluid.layers.smooth_l1
:noindex: :noindex:
.. _api_fluid_layers_one_hot:
one_hot one_hot
------- -------
.. autofunction:: paddle.fluid.layers.one_hot .. autofunction:: paddle.fluid.layers.one_hot
:noindex: :noindex:
.. _api_fluid_layers_autoincreased_step_counter:
autoincreased_step_counter autoincreased_step_counter
-------------------------- --------------------------
.. autofunction:: paddle.fluid.layers.autoincreased_step_counter .. autofunction:: paddle.fluid.layers.autoincreased_step_counter
:noindex: :noindex:
.. _api_fluid_layers_reshape:
reshape reshape
------- -------
.. autofunction:: paddle.fluid.layers.reshape .. autofunction:: paddle.fluid.layers.reshape
:noindex: :noindex:
.. _api_fluid_layers_lod_reset:
lod_reset lod_reset
--------- ---------
.. autofunction:: paddle.fluid.layers.lod_reset .. autofunction:: paddle.fluid.layers.lod_reset
:noindex: :noindex:
.. _api_fluid_layers_lrn:
lrn lrn
--- ---
.. autofunction:: paddle.fluid.layers.lrn .. autofunction:: paddle.fluid.layers.lrn
:noindex: :noindex:
.. _api_fluid_layers_pad:
pad pad
--- ---
.. autofunction:: paddle.fluid.layers.pad .. autofunction:: paddle.fluid.layers.pad
:noindex: :noindex:
.. _api_fluid_layers_label_smooth:
label_smooth label_smooth
------------ ------------
.. autofunction:: paddle.fluid.layers.label_smooth .. autofunction:: paddle.fluid.layers.label_smooth
:noindex: :noindex:
.. _api_fluid_layers_roi_pool:
roi_pool roi_pool
-------- --------
.. autofunction:: paddle.fluid.layers.roi_pool .. autofunction:: paddle.fluid.layers.roi_pool
:noindex: :noindex:
.. _api_fluid_layers_dice_loss:
dice_loss dice_loss
--------- ---------
.. autofunction:: paddle.fluid.layers.dice_loss .. autofunction:: paddle.fluid.layers.dice_loss
:noindex: :noindex:
.. _api_fluid_layers_image_resize:
image_resize image_resize
------------ ------------
.. autofunction:: paddle.fluid.layers.image_resize .. autofunction:: paddle.fluid.layers.image_resize
:noindex: :noindex:
.. _api_fluid_layers_image_resize_short:
image_resize_short image_resize_short
------------------ ------------------
.. autofunction:: paddle.fluid.layers.image_resize_short .. autofunction:: paddle.fluid.layers.image_resize_short
:noindex: :noindex:
.. _api_fluid_layers_resize_bilinear:
resize_bilinear resize_bilinear
--------------- ---------------
.. autofunction:: paddle.fluid.layers.resize_bilinear .. autofunction:: paddle.fluid.layers.resize_bilinear
:noindex: :noindex:
.. _api_fluid_layers_gather:
gather gather
------ ------
.. autofunction:: paddle.fluid.layers.gather .. autofunction:: paddle.fluid.layers.gather
:noindex: :noindex:
.. _api_fluid_layers_random_crop:
random_crop random_crop
----------- -----------
.. autofunction:: paddle.fluid.layers.random_crop .. autofunction:: paddle.fluid.layers.random_crop
:noindex: :noindex:
.. _api_fluid_layers_mean_iou:
mean_iou mean_iou
-------- --------
.. autofunction:: paddle.fluid.layers.mean_iou .. autofunction:: paddle.fluid.layers.mean_iou
:noindex: :noindex:
.. _api_fluid_layers_relu:
relu
----
.. autofunction:: paddle.fluid.layers.relu
:noindex:
.. _api_fluid_layers_log:
log
---
.. autofunction:: paddle.fluid.layers.log
:noindex:
.. _api_fluid_layers_crop:
crop
----
.. autofunction:: paddle.fluid.layers.crop
:noindex:
ops ops
=== ===
.. _api_fluid_layers_mean:
mean mean
---- ----
.. autofunction:: paddle.fluid.layers.mean .. autofunction:: paddle.fluid.layers.mean
:noindex: :noindex:
.. _api_fluid_layers_mul:
mul mul
--- ---
.. autofunction:: paddle.fluid.layers.mul .. autofunction:: paddle.fluid.layers.mul
:noindex: :noindex:
.. _api_fluid_layers_scale:
scale scale
----- -----
.. autofunction:: paddle.fluid.layers.scale .. autofunction:: paddle.fluid.layers.scale
:noindex: :noindex:
.. _api_fluid_layers_sigmoid_cross_entropy_with_logits:
sigmoid_cross_entropy_with_logits sigmoid_cross_entropy_with_logits
--------------------------------- ---------------------------------
.. autofunction:: paddle.fluid.layers.sigmoid_cross_entropy_with_logits .. autofunction:: paddle.fluid.layers.sigmoid_cross_entropy_with_logits
:noindex: :noindex:
.. _api_fluid_layers_elementwise_add:
elementwise_add elementwise_add
--------------- ---------------
.. autofunction:: paddle.fluid.layers.elementwise_add .. autofunction:: paddle.fluid.layers.elementwise_add
:noindex: :noindex:
.. _api_fluid_layers_elementwise_div:
elementwise_div elementwise_div
--------------- ---------------
.. autofunction:: paddle.fluid.layers.elementwise_div .. autofunction:: paddle.fluid.layers.elementwise_div
:noindex: :noindex:
.. _api_fluid_layers_elementwise_sub:
elementwise_sub elementwise_sub
--------------- ---------------
.. autofunction:: paddle.fluid.layers.elementwise_sub .. autofunction:: paddle.fluid.layers.elementwise_sub
:noindex: :noindex:
.. _api_fluid_layers_elementwise_mul:
elementwise_mul elementwise_mul
--------------- ---------------
.. autofunction:: paddle.fluid.layers.elementwise_mul .. autofunction:: paddle.fluid.layers.elementwise_mul
:noindex: :noindex:
.. _api_fluid_layers_elementwise_max:
elementwise_max elementwise_max
--------------- ---------------
.. autofunction:: paddle.fluid.layers.elementwise_max .. autofunction:: paddle.fluid.layers.elementwise_max
:noindex: :noindex:
.. _api_fluid_layers_elementwise_min:
elementwise_min elementwise_min
--------------- ---------------
.. autofunction:: paddle.fluid.layers.elementwise_min .. autofunction:: paddle.fluid.layers.elementwise_min
:noindex: :noindex:
.. _api_fluid_layers_elementwise_pow:
elementwise_pow elementwise_pow
--------------- ---------------
.. autofunction:: paddle.fluid.layers.elementwise_pow .. autofunction:: paddle.fluid.layers.elementwise_pow
:noindex: :noindex:
.. _api_fluid_layers_clip:
clip clip
---- ----
.. autofunction:: paddle.fluid.layers.clip .. autofunction:: paddle.fluid.layers.clip
:noindex: :noindex:
.. _api_fluid_layers_clip_by_norm:
clip_by_norm clip_by_norm
------------ ------------
.. autofunction:: paddle.fluid.layers.clip_by_norm .. autofunction:: paddle.fluid.layers.clip_by_norm
:noindex: :noindex:
.. _api_fluid_layers_logical_and:
logical_and logical_and
----------- -----------
.. autofunction:: paddle.fluid.layers.logical_and .. autofunction:: paddle.fluid.layers.logical_and
:noindex: :noindex:
.. _api_fluid_layers_logical_or:
logical_or logical_or
---------- ----------
.. autofunction:: paddle.fluid.layers.logical_or .. autofunction:: paddle.fluid.layers.logical_or
:noindex: :noindex:
.. _api_fluid_layers_logical_xor:
logical_xor logical_xor
----------- -----------
.. autofunction:: paddle.fluid.layers.logical_xor .. autofunction:: paddle.fluid.layers.logical_xor
:noindex: :noindex:
.. _api_fluid_layers_logical_not:
logical_not logical_not
----------- -----------
.. autofunction:: paddle.fluid.layers.logical_not .. autofunction:: paddle.fluid.layers.logical_not
:noindex: :noindex:
.. _api_fluid_layers_uniform_random_batch_size_like:
uniform_random_batch_size_like uniform_random_batch_size_like
------------------------------ ------------------------------
.. autofunction:: paddle.fluid.layers.uniform_random_batch_size_like .. autofunction:: paddle.fluid.layers.uniform_random_batch_size_like
:noindex: :noindex:
.. _api_fluid_layers_gaussian_random:
gaussian_random gaussian_random
--------------- ---------------
.. autofunction:: paddle.fluid.layers.gaussian_random .. autofunction:: paddle.fluid.layers.gaussian_random
:noindex: :noindex:
.. _api_fluid_layers_gaussian_random_batch_size_like:
gaussian_random_batch_size_like gaussian_random_batch_size_like
------------------------------- -------------------------------
.. autofunction:: paddle.fluid.layers.gaussian_random_batch_size_like .. autofunction:: paddle.fluid.layers.gaussian_random_batch_size_like
:noindex: :noindex:
.. _api_fluid_layers_scatter:
scatter scatter
------- -------
.. autofunction:: paddle.fluid.layers.scatter .. autofunction:: paddle.fluid.layers.scatter
:noindex: :noindex:
.. _api_fluid_layers_sum:
sum sum
--- ---
.. autofunction:: paddle.fluid.layers.sum .. autofunction:: paddle.fluid.layers.sum
:noindex: :noindex:
.. _api_fluid_layers_slice:
slice slice
----- -----
.. autofunction:: paddle.fluid.layers.slice .. autofunction:: paddle.fluid.layers.slice
:noindex: :noindex:
.. _api_fluid_layers_polygon_box_transform:
polygon_box_transform polygon_box_transform
--------------------- ---------------------
.. autofunction:: paddle.fluid.layers.polygon_box_transform .. autofunction:: paddle.fluid.layers.polygon_box_transform
:noindex: :noindex:
.. _api_fluid_layers_shape:
shape shape
----- -----
.. autofunction:: paddle.fluid.layers.shape .. autofunction:: paddle.fluid.layers.shape
:noindex: :noindex:
.. _api_fluid_layers_iou_similarity:
iou_similarity
--------------
.. autofunction:: paddle.fluid.layers.iou_similarity
:noindex:
.. _api_fluid_layers_maxout:
maxout maxout
------ ------
.. autofunction:: paddle.fluid.layers.maxout .. autofunction:: paddle.fluid.layers.maxout
:noindex: :noindex:
.. _api_fluid_layers_sigmoid:
sigmoid sigmoid
------- -------
.. autofunction:: paddle.fluid.layers.sigmoid .. autofunction:: paddle.fluid.layers.sigmoid
:noindex: :noindex:
.. _api_fluid_layers_logsigmoid:
logsigmoid logsigmoid
---------- ----------
.. autofunction:: paddle.fluid.layers.logsigmoid .. autofunction:: paddle.fluid.layers.logsigmoid
:noindex: :noindex:
.. _api_fluid_layers_exp:
exp exp
--- ---
.. autofunction:: paddle.fluid.layers.exp .. autofunction:: paddle.fluid.layers.exp
:noindex: :noindex:
relu .. _api_fluid_layers_tanh:
----
.. autofunction:: paddle.fluid.layers.relu
:noindex:
tanh tanh
---- ----
...@@ -874,71 +1169,87 @@ tanh ...@@ -874,71 +1169,87 @@ tanh
.. autofunction:: paddle.fluid.layers.tanh .. autofunction:: paddle.fluid.layers.tanh
:noindex: :noindex:
.. _api_fluid_layers_tanh_shrink:
tanh_shrink tanh_shrink
----------- -----------
.. autofunction:: paddle.fluid.layers.tanh_shrink .. autofunction:: paddle.fluid.layers.tanh_shrink
:noindex: :noindex:
.. _api_fluid_layers_softshrink:
softshrink softshrink
---------- ----------
.. autofunction:: paddle.fluid.layers.softshrink .. autofunction:: paddle.fluid.layers.softshrink
:noindex: :noindex:
.. _api_fluid_layers_sqrt:
sqrt sqrt
---- ----
.. autofunction:: paddle.fluid.layers.sqrt .. autofunction:: paddle.fluid.layers.sqrt
:noindex: :noindex:
.. _api_fluid_layers_abs:
abs abs
--- ---
.. autofunction:: paddle.fluid.layers.abs .. autofunction:: paddle.fluid.layers.abs
:noindex: :noindex:
.. _api_fluid_layers_ceil:
ceil ceil
---- ----
.. autofunction:: paddle.fluid.layers.ceil .. autofunction:: paddle.fluid.layers.ceil
:noindex: :noindex:
.. _api_fluid_layers_floor:
floor floor
----- -----
.. autofunction:: paddle.fluid.layers.floor .. autofunction:: paddle.fluid.layers.floor
:noindex: :noindex:
.. _api_fluid_layers_cos:
cos cos
--- ---
.. autofunction:: paddle.fluid.layers.cos .. autofunction:: paddle.fluid.layers.cos
:noindex: :noindex:
.. _api_fluid_layers_sin:
sin sin
--- ---
.. autofunction:: paddle.fluid.layers.sin .. autofunction:: paddle.fluid.layers.sin
:noindex: :noindex:
.. _api_fluid_layers_round:
round round
----- -----
.. autofunction:: paddle.fluid.layers.round .. autofunction:: paddle.fluid.layers.round
:noindex: :noindex:
.. _api_fluid_layers_reciprocal:
reciprocal reciprocal
---------- ----------
.. autofunction:: paddle.fluid.layers.reciprocal .. autofunction:: paddle.fluid.layers.reciprocal
:noindex: :noindex:
log .. _api_fluid_layers_square:
---
.. autofunction:: paddle.fluid.layers.log
:noindex:
square square
------ ------
...@@ -946,90 +1257,120 @@ square ...@@ -946,90 +1257,120 @@ square
.. autofunction:: paddle.fluid.layers.square .. autofunction:: paddle.fluid.layers.square
:noindex: :noindex:
.. _api_fluid_layers_softplus:
softplus softplus
-------- --------
.. autofunction:: paddle.fluid.layers.softplus .. autofunction:: paddle.fluid.layers.softplus
:noindex: :noindex:
.. _api_fluid_layers_softsign:
softsign softsign
-------- --------
.. autofunction:: paddle.fluid.layers.softsign .. autofunction:: paddle.fluid.layers.softsign
:noindex: :noindex:
.. _api_fluid_layers_brelu:
brelu brelu
----- -----
.. autofunction:: paddle.fluid.layers.brelu .. autofunction:: paddle.fluid.layers.brelu
:noindex: :noindex:
.. _api_fluid_layers_leaky_relu:
leaky_relu leaky_relu
---------- ----------
.. autofunction:: paddle.fluid.layers.leaky_relu .. autofunction:: paddle.fluid.layers.leaky_relu
:noindex: :noindex:
.. _api_fluid_layers_soft_relu:
soft_relu soft_relu
--------- ---------
.. autofunction:: paddle.fluid.layers.soft_relu .. autofunction:: paddle.fluid.layers.soft_relu
:noindex: :noindex:
.. _api_fluid_layers_elu:
elu elu
--- ---
.. autofunction:: paddle.fluid.layers.elu .. autofunction:: paddle.fluid.layers.elu
:noindex: :noindex:
.. _api_fluid_layers_relu6:
relu6 relu6
----- -----
.. autofunction:: paddle.fluid.layers.relu6 .. autofunction:: paddle.fluid.layers.relu6
:noindex: :noindex:
.. _api_fluid_layers_pow:
pow pow
--- ---
.. autofunction:: paddle.fluid.layers.pow .. autofunction:: paddle.fluid.layers.pow
:noindex: :noindex:
.. _api_fluid_layers_stanh:
stanh stanh
----- -----
.. autofunction:: paddle.fluid.layers.stanh .. autofunction:: paddle.fluid.layers.stanh
:noindex: :noindex:
.. _api_fluid_layers_hard_sigmoid:
hard_sigmoid hard_sigmoid
------------ ------------
.. autofunction:: paddle.fluid.layers.hard_sigmoid .. autofunction:: paddle.fluid.layers.hard_sigmoid
:noindex: :noindex:
.. _api_fluid_layers_swish:
swish swish
----- -----
.. autofunction:: paddle.fluid.layers.swish .. autofunction:: paddle.fluid.layers.swish
:noindex: :noindex:
.. _api_fluid_layers_uniform_random:
uniform_random uniform_random
-------------- --------------
.. autofunction:: paddle.fluid.layers.uniform_random .. autofunction:: paddle.fluid.layers.uniform_random
:noindex: :noindex:
.. _api_fluid_layers_hard_shrink:
hard_shrink hard_shrink
----------- -----------
.. autofunction:: paddle.fluid.layers.hard_shrink .. autofunction:: paddle.fluid.layers.hard_shrink
:noindex: :noindex:
.. _api_fluid_layers_cumsum:
cumsum cumsum
------ ------
.. autofunction:: paddle.fluid.layers.cumsum .. autofunction:: paddle.fluid.layers.cumsum
:noindex: :noindex:
.. _api_fluid_layers_thresholded_relu:
thresholded_relu thresholded_relu
---------------- ----------------
...@@ -1039,192 +1380,383 @@ thresholded_relu ...@@ -1039,192 +1380,383 @@ thresholded_relu
tensor tensor
====== ======
.. _api_fluid_layers_create_tensor:
create_tensor create_tensor
------------- -------------
.. autofunction:: paddle.fluid.layers.create_tensor .. autofunction:: paddle.fluid.layers.create_tensor
:noindex: :noindex:
.. _api_fluid_layers_create_parameter:
create_parameter create_parameter
---------------- ----------------
.. autofunction:: paddle.fluid.layers.create_parameter .. autofunction:: paddle.fluid.layers.create_parameter
:noindex: :noindex:
.. _api_fluid_layers_create_global_var:
create_global_var create_global_var
----------------- -----------------
.. autofunction:: paddle.fluid.layers.create_global_var .. autofunction:: paddle.fluid.layers.create_global_var
:noindex: :noindex:
.. _api_fluid_layers_cast:
cast cast
---- ----
.. autofunction:: paddle.fluid.layers.cast .. autofunction:: paddle.fluid.layers.cast
:noindex: :noindex:
.. _api_fluid_layers_concat:
concat concat
------ ------
.. autofunction:: paddle.fluid.layers.concat .. autofunction:: paddle.fluid.layers.concat
:noindex: :noindex:
.. _api_fluid_layers_sums:
sums sums
---- ----
.. autofunction:: paddle.fluid.layers.sums .. autofunction:: paddle.fluid.layers.sums
:noindex: :noindex:
.. _api_fluid_layers_assign:
assign assign
------ ------
.. autofunction:: paddle.fluid.layers.assign .. autofunction:: paddle.fluid.layers.assign
:noindex: :noindex:
.. _api_fluid_layers_fill_constant_batch_size_like:
fill_constant_batch_size_like fill_constant_batch_size_like
----------------------------- -----------------------------
.. autofunction:: paddle.fluid.layers.fill_constant_batch_size_like .. autofunction:: paddle.fluid.layers.fill_constant_batch_size_like
:noindex: :noindex:
.. _api_fluid_layers_fill_constant:
fill_constant fill_constant
------------- -------------
.. autofunction:: paddle.fluid.layers.fill_constant .. autofunction:: paddle.fluid.layers.fill_constant
:noindex: :noindex:
.. _api_fluid_layers_argmin:
argmin argmin
------ ------
.. autofunction:: paddle.fluid.layers.argmin .. autofunction:: paddle.fluid.layers.argmin
:noindex: :noindex:
.. _api_fluid_layers_argmax:
argmax argmax
------ ------
.. autofunction:: paddle.fluid.layers.argmax .. autofunction:: paddle.fluid.layers.argmax
:noindex: :noindex:
.. _api_fluid_layers_ones:
ones ones
---- ----
.. autofunction:: paddle.fluid.layers.ones .. autofunction:: paddle.fluid.layers.ones
:noindex: :noindex:
.. _api_fluid_layers_zeros:
zeros zeros
----- -----
.. autofunction:: paddle.fluid.layers.zeros .. autofunction:: paddle.fluid.layers.zeros
:noindex: :noindex:
.. _api_fluid_layers_reverse:
reverse
-------
.. autofunction:: paddle.fluid.layers.reverse
:noindex:
learning_rate_scheduler
=======================
.. _api_fluid_layers_exponential_decay:
exponential_decay
-----------------
.. autofunction:: paddle.fluid.layers.exponential_decay
:noindex:
.. _api_fluid_layers_natural_exp_decay:
natural_exp_decay
-----------------
.. autofunction:: paddle.fluid.layers.natural_exp_decay
:noindex:
.. _api_fluid_layers_inverse_time_decay:
inverse_time_decay
------------------
.. autofunction:: paddle.fluid.layers.inverse_time_decay
:noindex:
.. _api_fluid_layers_polynomial_decay:
polynomial_decay
----------------
.. autofunction:: paddle.fluid.layers.polynomial_decay
:noindex:
.. _api_fluid_layers_piecewise_decay:
piecewise_decay
---------------
.. autofunction:: paddle.fluid.layers.piecewise_decay
:noindex:
.. _api_fluid_layers_noam_decay:
noam_decay
----------
.. autofunction:: paddle.fluid.layers.noam_decay
:noindex:
.. _api_fluid_layers_append_LARS:
append_LARS
-----------
.. autofunction:: paddle.fluid.layers.append_LARS
:noindex:
detection detection
========= =========
.. _api_fluid_layers_prior_box:
prior_box prior_box
--------- ---------
.. autofunction:: paddle.fluid.layers.prior_box .. autofunction:: paddle.fluid.layers.prior_box
:noindex: :noindex:
.. _api_fluid_layers_multi_box_head:
multi_box_head multi_box_head
-------------- --------------
.. autofunction:: paddle.fluid.layers.multi_box_head .. autofunction:: paddle.fluid.layers.multi_box_head
:noindex: :noindex:
.. _api_fluid_layers_bipartite_match:
bipartite_match bipartite_match
--------------- ---------------
.. autofunction:: paddle.fluid.layers.bipartite_match .. autofunction:: paddle.fluid.layers.bipartite_match
:noindex: :noindex:
.. _api_fluid_layers_target_assign:
target_assign target_assign
------------- -------------
.. autofunction:: paddle.fluid.layers.target_assign .. autofunction:: paddle.fluid.layers.target_assign
:noindex: :noindex:
.. _api_fluid_layers_detection_output:
detection_output detection_output
---------------- ----------------
.. autofunction:: paddle.fluid.layers.detection_output .. autofunction:: paddle.fluid.layers.detection_output
:noindex: :noindex:
.. _api_fluid_layers_ssd_loss:
ssd_loss ssd_loss
-------- --------
.. autofunction:: paddle.fluid.layers.ssd_loss .. autofunction:: paddle.fluid.layers.ssd_loss
:noindex: :noindex:
.. _api_fluid_layers_detection_map:
detection_map detection_map
------------- -------------
.. autofunction:: paddle.fluid.layers.detection_map .. autofunction:: paddle.fluid.layers.detection_map
:noindex: :noindex:
.. _api_fluid_layers_iou_similarity:
iou_similarity iou_similarity
-------------- --------------
.. autofunction:: paddle.fluid.layers.iou_similarity .. autofunction:: paddle.fluid.layers.iou_similarity
:noindex: :noindex:
.. _api_fluid_layers_box_coder:
box_coder box_coder
--------- ---------
.. autofunction:: paddle.fluid.layers.box_coder .. autofunction:: paddle.fluid.layers.box_coder
:noindex: :noindex:
learning_rate_scheduler metric_op
======================= =========
exponential_decay .. _api_fluid_layers_accuracy:
-----------------
.. autofunction:: paddle.fluid.layers.exponential_decay accuracy
--------
.. autofunction:: paddle.fluid.layers.accuracy
:noindex: :noindex:
natural_exp_decay .. _api_fluid_layers_auc:
-----------------
.. autofunction:: paddle.fluid.layers.natural_exp_decay auc
---
.. autofunction:: paddle.fluid.layers.auc
:noindex: :noindex:
inverse_time_decay tensor
------------------ ======
.. autofunction:: paddle.fluid.layers.inverse_time_decay .. _api_fluid_layers_create_tensor:
create_tensor
-------------
.. autofunction:: paddle.fluid.layers.create_tensor
:noindex: :noindex:
polynomial_decay .. _api_fluid_layers_create_parameter:
create_parameter
---------------- ----------------
.. autofunction:: paddle.fluid.layers.polynomial_decay .. autofunction:: paddle.fluid.layers.create_parameter
:noindex: :noindex:
piecewise_decay .. _api_fluid_layers_create_global_var:
---------------
.. autofunction:: paddle.fluid.layers.piecewise_decay create_global_var
-----------------
.. autofunction:: paddle.fluid.layers.create_global_var
:noindex: :noindex:
noam_decay .. _api_fluid_layers_cast:
----------
.. autofunction:: paddle.fluid.layers.noam_decay cast
----
.. autofunction:: paddle.fluid.layers.cast
:noindex: :noindex:
metric .. _api_fluid_layers_concat:
======
accuracy concat
-------- ------
.. autofunction:: paddle.fluid.layers.accuracy .. autofunction:: paddle.fluid.layers.concat
:noindex: :noindex:
auc .. _api_fluid_layers_sums:
---
.. autofunction:: paddle.fluid.layers.auc sums
----
.. autofunction:: paddle.fluid.layers.sums
:noindex:
.. _api_fluid_layers_assign:
assign
------
.. autofunction:: paddle.fluid.layers.assign
:noindex:
.. _api_fluid_layers_fill_constant_batch_size_like:
fill_constant_batch_size_like
-----------------------------
.. autofunction:: paddle.fluid.layers.fill_constant_batch_size_like
:noindex:
.. _api_fluid_layers_fill_constant:
fill_constant
-------------
.. autofunction:: paddle.fluid.layers.fill_constant
:noindex:
.. _api_fluid_layers_argmin:
argmin
------
.. autofunction:: paddle.fluid.layers.argmin
:noindex:
.. _api_fluid_layers_argmax:
argmax
------
.. autofunction:: paddle.fluid.layers.argmax
:noindex:
.. _api_fluid_layers_ones:
ones
----
.. autofunction:: paddle.fluid.layers.ones
:noindex:
.. _api_fluid_layers_zeros:
zeros
-----
.. autofunction:: paddle.fluid.layers.zeros
:noindex:
.. _api_fluid_layers_reverse:
reverse
-------
.. autofunction:: paddle.fluid.layers.reverse
:noindex: :noindex:
.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` .. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
!DO NOT EDIT THIS FILE MANUALLY! !DO NOT EDIT THIS FILE MANUALLY!
======= =============
metrics fluid.metrics
======= =============
.. _api_fluid_metrics_MetricBase:
MetricBase MetricBase
---------- ----------
...@@ -12,6 +14,8 @@ MetricBase ...@@ -12,6 +14,8 @@ MetricBase
:members: :members:
:noindex: :noindex:
.. _api_fluid_metrics_CompositeMetric:
CompositeMetric CompositeMetric
--------------- ---------------
...@@ -19,6 +23,26 @@ CompositeMetric ...@@ -19,6 +23,26 @@ CompositeMetric
:members: :members:
:noindex: :noindex:
.. _api_fluid_metrics_Precision:
Precision
---------
.. autoclass:: paddle.fluid.metrics.Precision
:members:
:noindex:
.. _api_fluid_metrics_Recall:
Recall
------
.. autoclass:: paddle.fluid.metrics.Recall
:members:
:noindex:
.. _api_fluid_metrics_Accuracy:
Accuracy Accuracy
-------- --------
...@@ -26,6 +50,8 @@ Accuracy ...@@ -26,6 +50,8 @@ Accuracy
:members: :members:
:noindex: :noindex:
.. _api_fluid_metrics_ChunkEvaluator:
ChunkEvaluator ChunkEvaluator
-------------- --------------
...@@ -33,6 +59,8 @@ ChunkEvaluator ...@@ -33,6 +59,8 @@ ChunkEvaluator
:members: :members:
:noindex: :noindex:
.. _api_fluid_metrics_EditDistance:
EditDistance EditDistance
------------ ------------
...@@ -40,6 +68,8 @@ EditDistance ...@@ -40,6 +68,8 @@ EditDistance
:members: :members:
:noindex: :noindex:
.. _api_fluid_metrics_DetectionMAP:
DetectionMAP DetectionMAP
------------ ------------
...@@ -47,6 +77,8 @@ DetectionMAP ...@@ -47,6 +77,8 @@ DetectionMAP
:members: :members:
:noindex: :noindex:
.. _api_fluid_metrics_Auc:
Auc Auc
--- ---
......
.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` .. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
!DO NOT EDIT THIS FILE MANUALLY! !DO NOT EDIT THIS FILE MANUALLY!
==== ==========
nets fluid.nets
==== ==========
.. _api_fluid_nets_simple_img_conv_pool:
simple_img_conv_pool simple_img_conv_pool
-------------------- --------------------
...@@ -11,18 +13,24 @@ simple_img_conv_pool ...@@ -11,18 +13,24 @@ simple_img_conv_pool
.. autofunction:: paddle.fluid.nets.simple_img_conv_pool .. autofunction:: paddle.fluid.nets.simple_img_conv_pool
:noindex: :noindex:
.. _api_fluid_nets_sequence_conv_pool:
sequence_conv_pool sequence_conv_pool
------------------ ------------------
.. autofunction:: paddle.fluid.nets.sequence_conv_pool .. autofunction:: paddle.fluid.nets.sequence_conv_pool
:noindex: :noindex:
.. _api_fluid_nets_glu:
glu glu
--- ---
.. autofunction:: paddle.fluid.nets.glu .. autofunction:: paddle.fluid.nets.glu
:noindex: :noindex:
.. _api_fluid_nets_scaled_dot_product_attention:
scaled_dot_product_attention scaled_dot_product_attention
---------------------------- ----------------------------
......
.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` .. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
!DO NOT EDIT THIS FILE MANUALLY! !DO NOT EDIT THIS FILE MANUALLY!
========= ===============
optimizer fluid.optimizer
========= ===============
.. _api_fluid_optimizer_SGD:
SGD SGD
--- ---
...@@ -12,6 +14,8 @@ SGD ...@@ -12,6 +14,8 @@ SGD
:members: :members:
:noindex: :noindex:
.. _api_fluid_optimizer_Momentum:
Momentum Momentum
-------- --------
...@@ -19,6 +23,8 @@ Momentum ...@@ -19,6 +23,8 @@ Momentum
:members: :members:
:noindex: :noindex:
.. _api_fluid_optimizer_Adagrad:
Adagrad Adagrad
------- -------
...@@ -26,6 +32,8 @@ Adagrad ...@@ -26,6 +32,8 @@ Adagrad
:members: :members:
:noindex: :noindex:
.. _api_fluid_optimizer_Adam:
Adam Adam
---- ----
...@@ -33,6 +41,8 @@ Adam ...@@ -33,6 +41,8 @@ Adam
:members: :members:
:noindex: :noindex:
.. _api_fluid_optimizer_Adamax:
Adamax Adamax
------ ------
...@@ -40,6 +50,8 @@ Adamax ...@@ -40,6 +50,8 @@ Adamax
:members: :members:
:noindex: :noindex:
.. _api_fluid_optimizer_DecayedAdagrad:
DecayedAdagrad DecayedAdagrad
-------------- --------------
...@@ -47,6 +59,17 @@ DecayedAdagrad ...@@ -47,6 +59,17 @@ DecayedAdagrad
:members: :members:
:noindex: :noindex:
.. _api_fluid_optimizer_Ftrl:
Ftrl
----
.. autoclass:: paddle.fluid.optimizer.Ftrl
:members:
:noindex:
.. _api_fluid_optimizer_SGDOptimizer:
SGDOptimizer SGDOptimizer
------------ ------------
...@@ -54,6 +77,8 @@ SGDOptimizer ...@@ -54,6 +77,8 @@ SGDOptimizer
:members: :members:
:noindex: :noindex:
.. _api_fluid_optimizer_MomentumOptimizer:
MomentumOptimizer MomentumOptimizer
----------------- -----------------
...@@ -61,6 +86,8 @@ MomentumOptimizer ...@@ -61,6 +86,8 @@ MomentumOptimizer
:members: :members:
:noindex: :noindex:
.. _api_fluid_optimizer_AdagradOptimizer:
AdagradOptimizer AdagradOptimizer
---------------- ----------------
...@@ -68,6 +95,8 @@ AdagradOptimizer ...@@ -68,6 +95,8 @@ AdagradOptimizer
:members: :members:
:noindex: :noindex:
.. _api_fluid_optimizer_AdamOptimizer:
AdamOptimizer AdamOptimizer
------------- -------------
...@@ -75,6 +104,8 @@ AdamOptimizer ...@@ -75,6 +104,8 @@ AdamOptimizer
:members: :members:
:noindex: :noindex:
.. _api_fluid_optimizer_AdamaxOptimizer:
AdamaxOptimizer AdamaxOptimizer
--------------- ---------------
...@@ -82,6 +113,8 @@ AdamaxOptimizer ...@@ -82,6 +113,8 @@ AdamaxOptimizer
:members: :members:
:noindex: :noindex:
.. _api_fluid_optimizer_DecayedAdagradOptimizer:
DecayedAdagradOptimizer DecayedAdagradOptimizer
----------------------- -----------------------
...@@ -89,6 +122,8 @@ DecayedAdagradOptimizer ...@@ -89,6 +122,8 @@ DecayedAdagradOptimizer
:members: :members:
:noindex: :noindex:
.. _api_fluid_optimizer_RMSPropOptimizer:
RMSPropOptimizer RMSPropOptimizer
---------------- ----------------
...@@ -96,6 +131,17 @@ RMSPropOptimizer ...@@ -96,6 +131,17 @@ RMSPropOptimizer
:members: :members:
:noindex: :noindex:
.. _api_fluid_optimizer_FtrlOptimizer:
FtrlOptimizer
-------------
.. autoclass:: paddle.fluid.optimizer.FtrlOptimizer
:members:
:noindex:
.. _api_fluid_optimizer_Adadelta:
Adadelta Adadelta
-------- --------
...@@ -103,6 +149,8 @@ Adadelta ...@@ -103,6 +149,8 @@ Adadelta
:members: :members:
:noindex: :noindex:
.. _api_fluid_optimizer_ModelAverage:
ModelAverage ModelAverage
------------ ------------
...@@ -110,6 +158,8 @@ ModelAverage ...@@ -110,6 +158,8 @@ ModelAverage
:members: :members:
:noindex: :noindex:
.. _api_fluid_optimizer_Optimizer:
Optimizer Optimizer
--------- ---------
...@@ -117,3 +167,12 @@ Optimizer ...@@ -117,3 +167,12 @@ Optimizer
:members: :members:
:noindex: :noindex:
.. _api_fluid_optimizer_RMSPropOptimizer:
RMSPropOptimizer
----------------
.. autoclass:: paddle.fluid.optimizer.RMSPropOptimizer
:members:
:noindex:
.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` .. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
!DO NOT EDIT THIS FILE MANUALLY! !DO NOT EDIT THIS FILE MANUALLY!
========== ================
param_attr fluid.param_attr
========== ================
.. _api_fluid_param_attr_ParamAttr:
ParamAttr ParamAttr
--------- ---------
...@@ -12,6 +14,8 @@ ParamAttr ...@@ -12,6 +14,8 @@ ParamAttr
:members: :members:
:noindex: :noindex:
.. _api_fluid_param_attr_WeightNormParamAttr:
WeightNormParamAttr WeightNormParamAttr
------------------- -------------------
......
.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` .. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
!DO NOT EDIT THIS FILE MANUALLY! !DO NOT EDIT THIS FILE MANUALLY!
======== ==============
profiler fluid.profiler
======== ==============
.. _api_fluid_profiler_cuda_profiler:
cuda_profiler cuda_profiler
------------- -------------
...@@ -11,24 +13,32 @@ cuda_profiler ...@@ -11,24 +13,32 @@ cuda_profiler
.. autofunction:: paddle.fluid.profiler.cuda_profiler .. autofunction:: paddle.fluid.profiler.cuda_profiler
:noindex: :noindex:
.. _api_fluid_profiler_reset_profiler:
reset_profiler reset_profiler
-------------- --------------
.. autofunction:: paddle.fluid.profiler.reset_profiler .. autofunction:: paddle.fluid.profiler.reset_profiler
:noindex: :noindex:
.. _api_fluid_profiler_profiler:
profiler profiler
-------- --------
.. autofunction:: paddle.fluid.profiler.profiler .. autofunction:: paddle.fluid.profiler.profiler
:noindex: :noindex:
.. _api_fluid_profiler_start_profiler:
start_profiler start_profiler
-------------- --------------
.. autofunction:: paddle.fluid.profiler.start_profiler .. autofunction:: paddle.fluid.profiler.start_profiler
:noindex: :noindex:
.. _api_fluid_profiler_stop_profiler:
stop_profiler stop_profiler
------------- -------------
......
.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
!DO NOT EDIT THIS FILE MANUALLY!
=====================
fluid.recordio_writer
=====================
.. _api_fluid_recordio_writer_convert_reader_to_recordio_file:
convert_reader_to_recordio_file
-------------------------------
.. autofunction:: paddle.fluid.recordio_writer.convert_reader_to_recordio_file
:noindex:
.. _api_fluid_recordio_writer_convert_reader_to_recordio_files:
convert_reader_to_recordio_files
--------------------------------
.. autofunction:: paddle.fluid.recordio_writer.convert_reader_to_recordio_files
:noindex:
.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` .. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
!DO NOT EDIT THIS FILE MANUALLY! !DO NOT EDIT THIS FILE MANUALLY!
=========== =================
regularizer fluid.regularizer
=========== =================
.. _api_fluid_regularizer_append_regularization_ops:
append_regularization_ops append_regularization_ops
------------------------- -------------------------
...@@ -11,12 +13,7 @@ append_regularization_ops ...@@ -11,12 +13,7 @@ append_regularization_ops
.. autofunction:: paddle.fluid.regularizer.append_regularization_ops .. autofunction:: paddle.fluid.regularizer.append_regularization_ops
:noindex: :noindex:
WeightDecayRegularizer .. _api_fluid_regularizer_L1Decay:
----------------------
.. autoclass:: paddle.fluid.regularizer.WeightDecayRegularizer
:members:
:noindex:
L1Decay L1Decay
------- -------
...@@ -25,6 +22,8 @@ L1Decay ...@@ -25,6 +22,8 @@ L1Decay
:members: :members:
:noindex: :noindex:
.. _api_fluid_regularizer_L2Decay:
L2Decay L2Decay
------- -------
...@@ -32,6 +31,8 @@ L2Decay ...@@ -32,6 +31,8 @@ L2Decay
:members: :members:
:noindex: :noindex:
.. _api_fluid_regularizer_L1DecayRegularizer:
L1DecayRegularizer L1DecayRegularizer
------------------ ------------------
...@@ -39,6 +40,8 @@ L1DecayRegularizer ...@@ -39,6 +40,8 @@ L1DecayRegularizer
:members: :members:
:noindex: :noindex:
.. _api_fluid_regularizer_L2DecayRegularizer:
L2DecayRegularizer L2DecayRegularizer
------------------ ------------------
......
.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` .. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
!DO NOT EDIT THIS FILE MANUALLY! !DO NOT EDIT THIS FILE MANUALLY!
========== ================
transpiler fluid.transpiler
========== ================
.. _api_fluid_transpiler_DistributeTranspiler:
DistributeTranspiler DistributeTranspiler
-------------------- --------------------
...@@ -12,12 +14,7 @@ DistributeTranspiler ...@@ -12,12 +14,7 @@ DistributeTranspiler
:members: :members:
:noindex: :noindex:
InferenceTranspiler .. _api_fluid_transpiler_memory_optimize:
-------------------
.. autoclass:: paddle.fluid.transpiler.InferenceTranspiler
:members:
:noindex:
memory_optimize memory_optimize
--------------- ---------------
...@@ -25,12 +22,16 @@ memory_optimize ...@@ -25,12 +22,16 @@ memory_optimize
.. autofunction:: paddle.fluid.transpiler.memory_optimize .. autofunction:: paddle.fluid.transpiler.memory_optimize
:noindex: :noindex:
.. _api_fluid_transpiler_release_memory:
release_memory release_memory
-------------- --------------
.. autofunction:: paddle.fluid.transpiler.release_memory .. autofunction:: paddle.fluid.transpiler.release_memory
:noindex: :noindex:
.. _api_fluid_transpiler_HashName:
HashName HashName
-------- --------
...@@ -38,9 +39,12 @@ HashName ...@@ -38,9 +39,12 @@ HashName
:members: :members:
:noindex: :noindex:
.. _api_fluid_transpiler_RoundRobin:
RoundRobin RoundRobin
---------- ----------
.. autoclass:: paddle.fluid.transpiler.RoundRobin .. autoclass:: paddle.fluid.transpiler.RoundRobin
:members: :members:
:noindex: :noindex:
...@@ -213,3 +213,12 @@ virtualenv本身也是Python的一个包,可以用pip进行安装: ...@@ -213,3 +213,12 @@ virtualenv本身也是Python的一个包,可以用pip进行安装:
保存并关闭文件。 保存并关闭文件。
这样,每次打开终端时就会自动启动名为‘paddle’的Python环境了。 这样,每次打开终端时就会自动启动名为‘paddle’的Python环境了。
10. 通过pip安装的PaddlePaddle在 :code:`import paddle.fluid` 报找不到 :code:`libmkldnn.so` 或 :code:`libmklml_intel.so`
------------------------------------------------------------------------------------------
出现这种问题的原因是在导入 :code:`paddle.fluid` 时需要加载 :code:`libmkldnn.so` 和 :code:`libmklml_intel.so`,
但是系统没有找到该文件。一般通过pip安装PaddlePaddle时会将 :code:`libmkldnn.so` 和 :code:`libmklml_intel.so`
拷贝到 :code:`/usr/local/lib` 路径下,所以解决办法是将该路径加到 :code:`LD_LIBRARY_PATH` 环境变量下,
即: :code:`export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH` 。
**注意**:如果是在虚拟环境中安装PaddlePaddle, :code:`libmkldnn.so` 和 :code:`libmklml_intel.so` 可能不在 :code:`/usr/local/lib` 路径下。
\ No newline at end of file
...@@ -147,10 +147,9 @@ void TransDataLayoutFromMKLDNN(const OpKernelType& kernel_type_for_var, ...@@ -147,10 +147,9 @@ void TransDataLayoutFromMKLDNN(const OpKernelType& kernel_type_for_var,
"Input tensor type is not supported: ", in.type().name()); "Input tensor type is not supported: ", in.type().name());
memory::data_type out_type = in_type; memory::data_type out_type = in_type;
memory::format in_format = auto in_format = MKLDNNFormatForSize(in_tz.size(), in.format());
in_tz.size() == 2 ? memory::format::nc : in.format(); auto out_format =
memory::format out_format = MKLDNNFormatForSize(in_tz.size(), ToMKLDNNFormat(out_layout));
out_tz.size() == 2 ? memory::format::nc : ToMKLDNNFormat(out_layout);
void* in_data = GetDataFromTensor(in, in_type); void* in_data = GetDataFromTensor(in, in_type);
......
...@@ -61,6 +61,13 @@ inline MKLDNNDataType ToMKLDNNDataType(const std::type_index type) { ...@@ -61,6 +61,13 @@ inline MKLDNNDataType ToMKLDNNDataType(const std::type_index type) {
if (iter != dict.end()) return iter->second; if (iter != dict.end()) return iter->second;
return MKLDNNDataType::data_undef; return MKLDNNDataType::data_undef;
} }
inline MKLDNNFormat MKLDNNFormatForSize(size_t dims_size,
MKLDNNFormat default_format) {
return (dims_size == 1
? mkldnn::memory::format::x
: dims_size == 2 ? mkldnn::memory::format::nc : default_format);
}
#endif #endif
void TransDataLayoutFromMKLDNN(const OpKernelType& kernel_type_for_var, void TransDataLayoutFromMKLDNN(const OpKernelType& kernel_type_for_var,
......
...@@ -47,9 +47,13 @@ void DataTransform(const OpKernelType& expected_kernel_type, ...@@ -47,9 +47,13 @@ void DataTransform(const OpKernelType& expected_kernel_type,
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
// Case1 - transform from Non-MKLDNN OPKernel to MKLDNN OPKernel // Case1 - transform from Non-MKLDNN OPKernel to MKLDNN OPKernel
// Just set layout/format. No real transform occur // Just set layout/format. No real transform occur
auto out_format =
MKLDNNFormatForSize(in.dims().size(), ToMKLDNNFormat(lin));
out.ShareDataWith(input_tensor); out.ShareDataWith(input_tensor);
out.set_layout(DataLayout::kMKLDNN); out.set_layout(DataLayout::kMKLDNN);
out.set_format(ToMKLDNNFormat(lin)); out.set_format(out_format);
#endif #endif
} else { } else {
// Case2 - transfrom from MKLDNN OPKernel to Non-MKLDNN OPKernel // Case2 - transfrom from MKLDNN OPKernel to Non-MKLDNN OPKernel
......
...@@ -103,50 +103,23 @@ void BroadcastOpHandle::RunImpl() { ...@@ -103,50 +103,23 @@ void BroadcastOpHandle::RunImpl() {
}); });
} }
// FIXME(zcd): a temporary fix for some language model that has sparse this->RunAndRecordEvent([&] {
// parameter. {
bool use_mutex = true; platform::NCCLGroupGuard guard;
if (in_var->IsType<paddle::framework::SelectedRows>()) { for (auto &call : broadcast_calls) {
use_mutex = false; call();
}
if (use_mutex) {
this->RunAndRecordEvent([&] {
{
platform::NCCLGroupGuard guard;
for (auto &call : broadcast_calls) {
call();
}
}
if (!out_handle->IsTheSameVar(*in_var_handle)) {
auto out_var = var_scopes.at(in_var_handle->scope_idx_)
->FindVar(out_var_handles[0]->name_);
paddle::framework::TensorCopy(
in_tensor, in_var_handle->place_,
*(dev_ctxes_.at(in_var_handle->place_)),
&VariableVisitor::GetMutableTensor(out_var));
}
});
} else {
this->RunAndRecordEventNoMutex([&] {
{
platform::NCCLGroupGuard guard;
for (auto &call : broadcast_calls) {
call();
}
}
if (!out_handle->IsTheSameVar(*in_var_handle)) {
auto out_var = var_scopes.at(in_var_handle->scope_idx_)
->FindVar(out_var_handles[0]->name_);
paddle::framework::TensorCopy(
in_tensor, in_var_handle->place_,
*(dev_ctxes_.at(in_var_handle->place_)),
&VariableVisitor::GetMutableTensor(out_var));
} }
}); }
}
if (!out_handle->IsTheSameVar(*in_var_handle)) {
auto out_var = var_scopes.at(in_var_handle->scope_idx_)
->FindVar(out_var_handles[0]->name_);
paddle::framework::TensorCopy(
in_tensor, in_var_handle->place_,
*(dev_ctxes_.at(in_var_handle->place_)),
&VariableVisitor::GetMutableTensor(out_var));
}
});
#else #else
PADDLE_THROW("CUDA is not enabled."); PADDLE_THROW("CUDA is not enabled.");
#endif #endif
......
...@@ -470,7 +470,7 @@ void MultiDevSSAGraphBuilder::ConnectOp(SSAGraph *result, OpHandleBase *op, ...@@ -470,7 +470,7 @@ void MultiDevSSAGraphBuilder::ConnectOp(SSAGraph *result, OpHandleBase *op,
void MultiDevSSAGraphBuilder::CreateDistTrainOp(SSAGraph *result, void MultiDevSSAGraphBuilder::CreateDistTrainOp(SSAGraph *result,
const OpDesc &op) const { const OpDesc &op) const {
int op_dev_id = -1; int op_dev_id = -1;
if (op.Type() == "split_byref") { if (op.Type() == "split_byref" || op.Type() == "split_selected_rows") {
op_dev_id = GetVarDeviceID(op.InputArgumentNames()[0]); op_dev_id = GetVarDeviceID(op.InputArgumentNames()[0]);
if (strategy_.reduce_ == BuildStrategy::ReduceStrategy::kAllReduce) { if (strategy_.reduce_ == BuildStrategy::ReduceStrategy::kAllReduce) {
op_dev_id = GetAppropriateDeviceID(op.InputArgumentNames()); op_dev_id = GetAppropriateDeviceID(op.InputArgumentNames());
......
...@@ -47,7 +47,7 @@ class MultiDevSSAGraphBuilder : public SSAGraphBuilder { ...@@ -47,7 +47,7 @@ class MultiDevSSAGraphBuilder : public SSAGraphBuilder {
#endif #endif
std::unique_ptr<SSAGraph> Build(const ProgramDesc &program) const override; std::unique_ptr<SSAGraph> Build(const ProgramDesc &program) const override;
int GetVarDeviceID(const std::string &varname) const; int GetVarDeviceID(const std::string &varname) const override;
private: private:
void CreateOpHandleIOs(SSAGraph *result, const OpDesc &op, void CreateOpHandleIOs(SSAGraph *result, const OpDesc &op,
......
...@@ -11,8 +11,8 @@ ...@@ -11,8 +11,8 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "paddle/fluid/framework/details/op_handle_base.h" #include "paddle/fluid/framework/details/op_handle_base.h"
#include <map>
namespace paddle { namespace paddle {
namespace framework { namespace framework {
...@@ -122,35 +122,17 @@ void OpHandleBase::RunAndRecordEvent(const std::function<void()> &callback) { ...@@ -122,35 +122,17 @@ void OpHandleBase::RunAndRecordEvent(const std::function<void()> &callback) {
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
if (!events_.empty()) { // Use event if (!events_.empty()) { // Use event
std::function<void()> method = callback; std::function<void()> method = callback;
// NOTE(zcd): device context must be ordered here because RecordEvent
// will use a mutex to ensure the safe of multi-threads.
std::map<platform::DeviceContext *, platform::Place> ordered_ctxes;
for (auto &p : dev_ctxes_) { for (auto &p : dev_ctxes_) {
method = [method, p, this]() { ordered_ctxes.emplace(p.second, p.first);
static_cast<platform::CUDADeviceContext *>(p.second)->RecordEvent(
events_.at(boost::get<platform::CUDAPlace>(p.first).device),
method);
};
} }
method(); for (auto &p : ordered_ctxes) {
} else {
#endif
callback();
#ifdef PADDLE_WITH_CUDA
}
#endif
}
void OpHandleBase::RunAndRecordEventNoMutex(
const std::function<void()> &callback) {
#ifdef PADDLE_WITH_CUDA
if (!events_.empty()) { // Use event
std::function<void()> method = callback;
for (auto &p : dev_ctxes_) {
method = [method, p, this]() { method = [method, p, this]() {
static_cast<platform::CUDADeviceContext *>(p.second) static_cast<platform::CUDADeviceContext *>(p.first)->RecordEvent(
->RecordEventNoMutex( events_.at(boost::get<platform::CUDAPlace>(p.second).device),
events_.at(boost::get<platform::CUDAPlace>(p.first).device), method);
method);
}; };
} }
method(); method();
......
...@@ -85,10 +85,6 @@ class OpHandleBase { ...@@ -85,10 +85,6 @@ class OpHandleBase {
protected: protected:
void RunAndRecordEvent(const std::function<void()> &callback); void RunAndRecordEvent(const std::function<void()> &callback);
// FIXME(zcd): A temporary fix for some language model that has sparse
// parameter.
void RunAndRecordEventNoMutex(const std::function<void()> &callback);
void RunAndRecordEvent(platform::Place p, void RunAndRecordEvent(platform::Place p,
const std::function<void()> &callback); const std::function<void()> &callback);
......
...@@ -80,9 +80,7 @@ void ReduceOpHandle::RunImpl() { ...@@ -80,9 +80,7 @@ void ReduceOpHandle::RunImpl() {
} }
if (pre_in_var->IsType<framework::SelectedRows>()) { if (pre_in_var->IsType<framework::SelectedRows>()) {
// FIXME(zcd): A temporary fix for some language model that has sparse this->RunAndRecordEvent([&] {
// parameter.
this->RunAndRecordEventNoMutex([&] {
std::vector<const SelectedRows *> in_selected_rows = std::vector<const SelectedRows *> in_selected_rows =
GetInputValues<SelectedRows>(in_var_handles, var_scopes); GetInputValues<SelectedRows>(in_var_handles, var_scopes);
GatherSelectedRows(in_selected_rows, in_places, dev_ctxes_, t_out_p, GatherSelectedRows(in_selected_rows, in_places, dev_ctxes_, t_out_p,
......
...@@ -27,6 +27,7 @@ enum AttrType { ...@@ -27,6 +27,7 @@ enum AttrType {
BOOLEANS = 7; BOOLEANS = 7;
BLOCK = 8; BLOCK = 8;
LONG = 9; LONG = 9;
BLOCKS = 10;
} }
// OpDesc describes an instance of a C++ framework::OperatorBase // OpDesc describes an instance of a C++ framework::OperatorBase
...@@ -46,6 +47,7 @@ message OpDesc { ...@@ -46,6 +47,7 @@ message OpDesc {
repeated bool bools = 11; repeated bool bools = 11;
optional int32 block_idx = 12; optional int32 block_idx = 12;
optional int64 l = 13; optional int64 l = 13;
repeated int32 blocks_idx = 14;
}; };
message Var { message Var {
......
...@@ -51,8 +51,6 @@ std::ostream &operator<<(std::ostream &os, const LoD &lod) { ...@@ -51,8 +51,6 @@ std::ostream &operator<<(std::ostream &os, const LoD &lod) {
} }
std::ostream &operator<<(std::ostream &os, const LoDTensor &t) { std::ostream &operator<<(std::ostream &os, const LoDTensor &t) {
PADDLE_ENFORCE(t.type().hash_code() == typeid(float).hash_code());
if (!platform::is_cpu_place(t.place())) { if (!platform::is_cpu_place(t.place())) {
LoDTensor tt; LoDTensor tt;
framework::TensorCopy(t, platform::CPUPlace(), &tt); framework::TensorCopy(t, platform::CPUPlace(), &tt);
...@@ -70,7 +68,13 @@ std::ostream &operator<<(std::ostream &os, const LoDTensor &t) { ...@@ -70,7 +68,13 @@ std::ostream &operator<<(std::ostream &os, const LoDTensor &t) {
// only print first ten elements // only print first ten elements
int64_t size = t.numel() < 10 ? t.numel() : 10; int64_t size = t.numel() < 10 ? t.numel() : 10;
for (int64_t i = 0; i < size; ++i) { for (int64_t i = 0; i < size; ++i) {
os << t.data<float>()[i] << " "; if (t.type().hash_code() == typeid(float).hash_code()) {
os << t.data<float>()[i] << " ";
} else if (t.type().hash_code() == typeid(int64_t).hash_code()) {
os << t.data<int64_t>()[i] << " ";
} else {
PADDLE_THROW("LoDTensor data type not in [float, int64_t]");
}
} }
return os; return os;
......
...@@ -26,6 +26,20 @@ ...@@ -26,6 +26,20 @@
namespace paddle { namespace paddle {
namespace framework { namespace framework {
TEST(LoD, PrintLoDTensor) {
LoDTensor tensor1;
tensor1.mutable_data<float>(platform::CPUPlace());
tensor1.data<float>()[0] = 0.2;
tensor1.data<float>()[1] = 0.5;
LOG(INFO) << tensor1;
LoDTensor tensor2;
tensor2.mutable_data<int64_t>(platform::CPUPlace());
tensor2.data<int64_t>()[0] = 1;
tensor2.data<int64_t>()[1] = 2;
LOG(INFO) << tensor2;
}
TEST(LoD, data) { TEST(LoD, data) {
LoD lod{{0, 1, 2}}; LoD lod{{0, 1, 2}};
lod.push_back({0, 2, 4, 5}); lod.push_back({0, 2, 4, 5});
...@@ -37,7 +51,7 @@ TEST(LoD, data) { ...@@ -37,7 +51,7 @@ TEST(LoD, data) {
} }
} }
TEST(LodExpand, test) { TEST(LoD, ExpandLoD) {
LoD lod{{0, 2}}; LoD lod{{0, 2}};
LoDTensor tensor; LoDTensor tensor;
tensor.set_lod(lod); tensor.set_lod(lod);
......
...@@ -211,6 +211,12 @@ void OpDesc::SetBlockAttr(const std::string &name, BlockDesc *block) { ...@@ -211,6 +211,12 @@ void OpDesc::SetBlockAttr(const std::string &name, BlockDesc *block) {
need_update_ = true; need_update_ = true;
} }
void OpDesc::SetBlocksAttr(const std::string &name,
std::vector<BlockDesc *> blocks) {
this->attrs_[name] = blocks;
need_update_ = true;
}
void OpDesc::SetAttrMap( void OpDesc::SetAttrMap(
const std::unordered_map<std::string, Attribute> &attr_map) { const std::unordered_map<std::string, Attribute> &attr_map) {
attrs_ = attr_map; attrs_ = attr_map;
...@@ -305,6 +311,13 @@ struct SetAttrDescVisitor : public boost::static_visitor<void> { ...@@ -305,6 +311,13 @@ struct SetAttrDescVisitor : public boost::static_visitor<void> {
void operator()(const std::vector<bool> &v) const { void operator()(const std::vector<bool> &v) const {
VectorToRepeated(v, attr_->mutable_bools()); VectorToRepeated(v, attr_->mutable_bools());
} }
void operator()(const std::vector<BlockDesc *> &v) const {
std::vector<int> blocks_idx;
for (auto blk : v) {
blocks_idx.push_back(blk->ID());
}
VectorToRepeated(blocks_idx, attr_->mutable_blocks_idx());
}
void operator()(BlockDesc *desc) const { attr_->set_block_idx(desc->ID()); } void operator()(BlockDesc *desc) const { attr_->set_block_idx(desc->ID()); }
void operator()(int64_t v) const { attr_->set_l(v); } void operator()(int64_t v) const { attr_->set_l(v); }
void operator()(boost::blank) const { PADDLE_THROW("Unexpected branch"); } void operator()(boost::blank) const { PADDLE_THROW("Unexpected branch"); }
......
...@@ -77,6 +77,8 @@ class OpDesc { ...@@ -77,6 +77,8 @@ class OpDesc {
void SetBlockAttr(const std::string &name, BlockDesc *block); void SetBlockAttr(const std::string &name, BlockDesc *block);
void SetBlocksAttr(const std::string &name, std::vector<BlockDesc *> blocks);
Attribute GetAttr(const std::string &name) const; Attribute GetAttr(const std::string &name) const;
Attribute GetNullableAttr(const std::string &name) const; Attribute GetNullableAttr(const std::string &name) const;
......
...@@ -121,7 +121,7 @@ ParallelExecutor::ParallelExecutor( ...@@ -121,7 +121,7 @@ ParallelExecutor::ParallelExecutor(
#endif #endif
} }
builder_ = std::move(builder_factory.Create()); builder_ = builder_factory.Create();
member_->executor_.reset(new details::ThreadedSSAGraphExecutor( member_->executor_.reset(new details::ThreadedSSAGraphExecutor(
exec_strategy, member_->local_scopes_, places, exec_strategy, member_->local_scopes_, places,
builder_->Build(main_program))); builder_->Build(main_program)));
......
...@@ -35,7 +35,8 @@ using VariableNameMap = std::map<std::string, std::vector<std::string>>; ...@@ -35,7 +35,8 @@ using VariableNameMap = std::map<std::string, std::vector<std::string>>;
using Attribute = using Attribute =
boost::variant<boost::blank, int, float, std::string, std::vector<int>, boost::variant<boost::blank, int, float, std::string, std::vector<int>,
std::vector<float>, std::vector<std::string>, bool, std::vector<float>, std::vector<std::string>, bool,
std::vector<bool>, BlockDesc*, int64_t>; std::vector<bool>, BlockDesc*, int64_t,
std::vector<BlockDesc*>>;
using AttributeMap = std::unordered_map<std::string, Attribute>; using AttributeMap = std::unordered_map<std::string, Attribute>;
......
...@@ -70,6 +70,7 @@ $$Out = values$$ ...@@ -70,6 +70,7 @@ $$Out = values$$
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OPERATOR(assign_value, ops::AssignValueOp, ops::AssignValueOpMaker); REGISTER_OPERATOR(assign_value, ops::AssignValueOp, ops::AssignValueOpMaker,
paddle::framework::EmptyGradOpMaker);
REGISTER_OP_CPU_KERNEL(assign_value, ops::AssignValueKernel<int>, REGISTER_OP_CPU_KERNEL(assign_value, ops::AssignValueKernel<int>,
ops::AssignValueKernel<float>); ops::AssignValueKernel<float>);
...@@ -18,6 +18,7 @@ limitations under the License. */ ...@@ -18,6 +18,7 @@ limitations under the License. */
#include <limits> #include <limits>
#include "glog/logging.h" // For VLOG
#include "paddle/fluid/framework/threadpool.h" #include "paddle/fluid/framework/threadpool.h"
#include "paddle/fluid/operators/distributed/request_handler.h" #include "paddle/fluid/operators/distributed/request_handler.h"
#include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/platform/profiler.h"
...@@ -75,6 +76,9 @@ bool GRPCClient::AsyncSendVar(const std::string& ep, ...@@ -75,6 +76,9 @@ bool GRPCClient::AsyncSendVar(const std::string& ep,
var_h.scope = p_scope; var_h.scope = p_scope;
var_h.name = var_name_val; var_h.name = var_name_val;
var_h.ctx = p_ctx; var_h.ctx = p_ctx;
var_h.method = "Send";
VLOG(3) << var_h.String() << " begin";
// stub context // stub context
SendProcessor* s = new SendProcessor(ch); SendProcessor* s = new SendProcessor(ch);
...@@ -129,6 +133,9 @@ bool GRPCClient::AsyncGetVar(const std::string& ep, ...@@ -129,6 +133,9 @@ bool GRPCClient::AsyncGetVar(const std::string& ep,
var_h.scope = p_scope; var_h.scope = p_scope;
var_h.name = var_name_val; var_h.name = var_name_val;
var_h.ctx = p_ctx; var_h.ctx = p_ctx;
var_h.method = "Get";
VLOG(3) << var_h.String() << " begin";
// stub context // stub context
GetProcessor* s = new GetProcessor(ch); GetProcessor* s = new GetProcessor(ch);
...@@ -172,6 +179,9 @@ bool GRPCClient::AsyncPrefetchVar(const std::string& ep, ...@@ -172,6 +179,9 @@ bool GRPCClient::AsyncPrefetchVar(const std::string& ep,
var_h.scope = p_scope; var_h.scope = p_scope;
var_h.name = out_var_name_val; var_h.name = out_var_name_val;
var_h.ctx = p_ctx; var_h.ctx = p_ctx;
var_h.method = "Prefetch";
VLOG(3) << var_h.String() << " begin";
// stub context // stub context
GetProcessor* s = new GetProcessor(ch); GetProcessor* s = new GetProcessor(ch);
...@@ -243,10 +253,11 @@ void GRPCClient::Proceed() { ...@@ -243,10 +253,11 @@ void GRPCClient::Proceed() {
GPR_ASSERT(ok); GPR_ASSERT(ok);
PADDLE_ENFORCE(c); PADDLE_ENFORCE(c);
if (c->status_.ok()) { if (c->status_.ok()) {
VLOG(3) << c->var_h_.String() << " process";
c->Process(); c->Process();
} else { } else {
LOG(FATAL) << "var: " << c->var_h_.String() LOG(FATAL) << c->var_h_.String()
<< " grpc error:" << c->status_.error_message(); << " meets grpc error:" << c->status_.error_message();
} }
delete c; delete c;
{ {
...@@ -258,14 +269,15 @@ void GRPCClient::Proceed() { ...@@ -258,14 +269,15 @@ void GRPCClient::Proceed() {
} }
std::shared_ptr<grpc::Channel> GRPCClient::GetChannel(const std::string& ep) { std::shared_ptr<grpc::Channel> GRPCClient::GetChannel(const std::string& ep) {
// TODO(Yancey1989): make grpc client completely thread-safe
std::lock_guard<std::mutex> guard(chan_mutex_); std::lock_guard<std::mutex> guard(chan_mutex_);
auto it = channels_.find(ep); auto it = channels_.find(ep);
if (it != channels_.end()) { if (it != channels_.end()) {
return it->second; return it->second;
} }
// Channel configurations:
grpc::ChannelArguments args; grpc::ChannelArguments args;
args.SetInt(GRPC_ARG_MAX_RECONNECT_BACKOFF_MS, 2000);
args.SetCompressionAlgorithm(GRPC_COMPRESS_NONE); args.SetCompressionAlgorithm(GRPC_COMPRESS_NONE);
args.SetMaxSendMessageSize(std::numeric_limits<int>::max()); args.SetMaxSendMessageSize(std::numeric_limits<int>::max());
args.SetMaxReceiveMessageSize(std::numeric_limits<int>::max()); args.SetMaxReceiveMessageSize(std::numeric_limits<int>::max());
......
...@@ -47,14 +47,18 @@ namespace operators { ...@@ -47,14 +47,18 @@ namespace operators {
namespace distributed { namespace distributed {
struct VarHandle { struct VarHandle {
// RPC endpoint.
std::string ep; std::string ep;
const platform::DeviceContext* ctx; const platform::DeviceContext* ctx;
const framework::Scope* scope; const framework::Scope* scope;
// Variable name.
std::string name; std::string name;
// RPC method name.
std::string method;
std::string String() const { std::string String() const {
std::ostringstream s; std::ostringstream s;
s << "name:[" << name << "] ep:[" << ep << "]"; s << method << " name:[" << name << "], ep:[" << ep << "]";
return s.str(); return s.str();
} }
}; };
...@@ -72,6 +76,7 @@ class BaseProcessor { ...@@ -72,6 +76,7 @@ class BaseProcessor {
virtual void Prepare(const VarHandle& var_info, int64_t time_out) { virtual void Prepare(const VarHandle& var_info, int64_t time_out) {
context_.reset(new grpc::ClientContext()); context_.reset(new grpc::ClientContext());
var_h_ = var_info; var_h_ = var_info;
context_->set_wait_for_ready(true);
std::chrono::system_clock::time_point deadline = std::chrono::system_clock::time_point deadline =
std::chrono::system_clock::now() + std::chrono::milliseconds(time_out); std::chrono::system_clock::now() + std::chrono::milliseconds(time_out);
...@@ -81,6 +86,7 @@ class BaseProcessor { ...@@ -81,6 +86,7 @@ class BaseProcessor {
virtual void Prepare(int64_t time_out) { virtual void Prepare(int64_t time_out) {
context_.reset(new grpc::ClientContext()); context_.reset(new grpc::ClientContext());
context_->set_wait_for_ready(true);
std::chrono::system_clock::time_point deadline = std::chrono::system_clock::time_point deadline =
std::chrono::system_clock::now() + std::chrono::milliseconds(time_out); std::chrono::system_clock::now() + std::chrono::milliseconds(time_out);
...@@ -172,26 +178,24 @@ class GRPCClient : public RPCClient { ...@@ -172,26 +178,24 @@ class GRPCClient : public RPCClient {
bool AsyncSendVar(const std::string& ep, const platform::DeviceContext& ctx, bool AsyncSendVar(const std::string& ep, const platform::DeviceContext& ctx,
const framework::Scope& scope, const std::string& var_name, const framework::Scope& scope, const std::string& var_name,
int64_t time_out = RPCClient::rpc_time_out) override; int64_t time_out = FLAGS_grpc_deadline) override;
bool AsyncGetVar(const std::string& ep, const platform::DeviceContext& ctx, bool AsyncGetVar(const std::string& ep, const platform::DeviceContext& ctx,
const framework::Scope& scope, const std::string& var_name, const framework::Scope& scope, const std::string& var_name,
int64_t time_out = RPCClient::rpc_time_out) override; int64_t time_out = FLAGS_grpc_deadline) override;
bool AsyncPrefetchVar(const std::string& ep, bool AsyncPrefetchVar(const std::string& ep,
const platform::DeviceContext& ctx, const platform::DeviceContext& ctx,
const framework::Scope& scope, const framework::Scope& scope,
const std::string& in_var_name, const std::string& in_var_name,
const std::string& out_var_name, const std::string& out_var_name,
int64_t time_out = RPCClient::rpc_time_out) override; int64_t time_out = FLAGS_grpc_deadline) override;
void AsyncSendBatchBarrier( void AsyncSendBatchBarrier(const std::string& ep,
const std::string& ep, int64_t time_out = FLAGS_grpc_deadline) override;
int64_t time_out = RPCClient::rpc_time_out) override;
void AsyncSendFetchBarrier( void AsyncSendFetchBarrier(const std::string& ep,
const std::string& ep, int64_t time_out = FLAGS_grpc_deadline) override;
int64_t time_out = RPCClient::rpc_time_out) override;
void Wait() override; void Wait() override;
...@@ -207,7 +211,7 @@ class GRPCClient : public RPCClient { ...@@ -207,7 +211,7 @@ class GRPCClient : public RPCClient {
void Proceed(); void Proceed();
void AsyncSendComplete(const std::string& ep, void AsyncSendComplete(const std::string& ep,
int64_t time_out = RPCClient::rpc_time_out); int64_t time_out = FLAGS_grpc_deadline);
std::shared_ptr<grpc::Channel> GetChannel(const std::string& ep); std::shared_ptr<grpc::Channel> GetChannel(const std::string& ep);
......
...@@ -41,6 +41,19 @@ class RequestBase { ...@@ -41,6 +41,19 @@ class RequestBase {
virtual ~RequestBase() {} virtual ~RequestBase() {}
virtual void Process() = 0; virtual void Process() = 0;
std::string Status2String(const std::string& method) {
std::string status = "Process";
if (status_ == FINISH) {
status = "Finish";
}
std::ostringstream s;
s << method << " name:[" << GetReqName() << "]"
<< ", ep:[" << ctx_.peer() << "]"
<< " " << status << " using req_id:" << req_id_;
return s.str();
}
CallStatus Status() const { CallStatus Status() const {
std::lock_guard<std::mutex> l(status_mu_); std::lock_guard<std::mutex> l(status_mu_);
return status_; return status_;
...@@ -84,7 +97,7 @@ class RequestSend final : public RequestBase { ...@@ -84,7 +97,7 @@ class RequestSend final : public RequestBase {
void Process() override { void Process() override {
std::string varname = GetReqName(); std::string varname = GetReqName();
VLOG(3) << "RequestSend var_name:" << varname; VLOG(4) << "RequestSend var_name:" << varname;
auto scope = request_->GetMutableLocalScope(); auto scope = request_->GetMutableLocalScope();
auto invar = request_->GetVar(); auto invar = request_->GetVar();
...@@ -119,7 +132,7 @@ class RequestGet final : public RequestBase { ...@@ -119,7 +132,7 @@ class RequestGet final : public RequestBase {
void Process() override { void Process() override {
// proc request. // proc request.
std::string varname = request_.varname(); std::string varname = request_.varname();
VLOG(3) << "RequestGet " << varname; VLOG(4) << "RequestGet " << varname;
auto scope = request_handler_->scope(); auto scope = request_handler_->scope();
auto invar = scope->FindVar(varname); auto invar = scope->FindVar(varname);
...@@ -165,7 +178,7 @@ class RequestPrefetch final : public RequestBase { ...@@ -165,7 +178,7 @@ class RequestPrefetch final : public RequestBase {
// prefetch process... // prefetch process...
std::string in_var_name = request_->Varname(); std::string in_var_name = request_->Varname();
std::string out_var_name = request_->OutVarname(); std::string out_var_name = request_->OutVarname();
VLOG(3) << "RequestPrefetch, in_var_name: " << in_var_name VLOG(4) << "RequestPrefetch, in_var_name: " << in_var_name
<< " out_var_name: " << out_var_name; << " out_var_name: " << out_var_name;
auto scope = request_->GetMutableLocalScope(); auto scope = request_->GetMutableLocalScope();
...@@ -188,10 +201,10 @@ class RequestPrefetch final : public RequestBase { ...@@ -188,10 +201,10 @@ class RequestPrefetch final : public RequestBase {
}; };
void AsyncGRPCServer::WaitServerReady() { void AsyncGRPCServer::WaitServerReady() {
VLOG(3) << "AsyncGRPCServer is wait server ready"; VLOG(4) << "AsyncGRPCServer is wait server ready";
std::unique_lock<std::mutex> lock(this->mutex_ready_); std::unique_lock<std::mutex> lock(this->mutex_ready_);
condition_ready_.wait(lock, [=] { return this->ready_ == 1; }); condition_ready_.wait(lock, [=] { return this->ready_ == 1; });
VLOG(3) << "AsyncGRPCServer WaitSeverReady"; VLOG(4) << "AsyncGRPCServer WaitSeverReady";
} }
void AsyncGRPCServer::StartServer() { void AsyncGRPCServer::StartServer() {
...@@ -230,7 +243,7 @@ void AsyncGRPCServer::StartServer() { ...@@ -230,7 +243,7 @@ void AsyncGRPCServer::StartServer() {
for (int i = 0; i < threadnum; i++) { for (int i = 0; i < threadnum; i++) {
rpc_threads_[rpc_name].emplace_back(new std::thread(std::bind( rpc_threads_[rpc_name].emplace_back(new std::thread(std::bind(
&AsyncGRPCServer::HandleRequest, this, cq.get(), rpc_name, f))); &AsyncGRPCServer::HandleRequest, this, cq.get(), rpc_name, f)));
VLOG(3) << t.first << " creates threads!"; VLOG(4) << t.first << " creates threads!";
} }
} }
...@@ -247,7 +260,7 @@ void AsyncGRPCServer::StartServer() { ...@@ -247,7 +260,7 @@ void AsyncGRPCServer::StartServer() {
auto& threads = t.second; auto& threads = t.second;
for (size_t i = 0; i < threads.size(); ++i) { for (size_t i = 0; i < threads.size(); ++i) {
threads[i]->join(); threads[i]->join();
VLOG(3) << t.first << " threads ends!"; VLOG(4) << t.first << " threads ends!";
} }
} }
} }
...@@ -255,7 +268,7 @@ void AsyncGRPCServer::StartServer() { ...@@ -255,7 +268,7 @@ void AsyncGRPCServer::StartServer() {
void AsyncGRPCServer::ShutdownQueue() { void AsyncGRPCServer::ShutdownQueue() {
for (auto& t : rpc_cq_) { for (auto& t : rpc_cq_) {
t.second->Shutdown(); t.second->Shutdown();
VLOG(3) << t.first << " shutdown!"; VLOG(4) << t.first << " queue shutdown!";
} }
} }
...@@ -264,7 +277,7 @@ void AsyncGRPCServer::ShutDownImpl() { ...@@ -264,7 +277,7 @@ void AsyncGRPCServer::ShutDownImpl() {
is_shut_down_ = true; is_shut_down_ = true;
ShutdownQueue(); ShutdownQueue();
VLOG(3) << "server_ shutdown!"; VLOG(4) << "server_ shutdown!";
server_->Shutdown(); server_->Shutdown();
} }
...@@ -272,7 +285,7 @@ void AsyncGRPCServer::TryToRegisterNewOne(const std::string& rpc_name, ...@@ -272,7 +285,7 @@ void AsyncGRPCServer::TryToRegisterNewOne(const std::string& rpc_name,
int req_id) { int req_id) {
std::unique_lock<std::mutex> lock(cq_mutex_); std::unique_lock<std::mutex> lock(cq_mutex_);
if (is_shut_down_) { if (is_shut_down_) {
VLOG(3) << "shutdown, do not TryToRegisterNewSendOne"; VLOG(4) << "shutdown, do not TryToRegisterNewSendOne";
return; return;
} }
...@@ -306,14 +319,14 @@ void AsyncGRPCServer::HandleRequest( ...@@ -306,14 +319,14 @@ void AsyncGRPCServer::HandleRequest(
bool ok = false; bool ok = false;
while (true) { while (true) {
VLOG(3) << "HandleRequest " << rpc_name << " wait next"; VLOG(4) << "HandleRequest " << rpc_name << " wait next";
if (!cq->Next(&tag, &ok)) { if (!cq->Next(&tag, &ok)) {
LOG(INFO) << "CompletionQueue " << rpc_name << " shutdown!"; LOG(INFO) << "CompletionQueue " << rpc_name << " shutdown!";
break; break;
} }
int req_id = static_cast<int>(reinterpret_cast<intptr_t>(tag)); int req_id = static_cast<int>(reinterpret_cast<intptr_t>(tag));
VLOG(3) << "HandleRequest " << rpc_name << ", req_id:" << req_id VLOG(4) << "HandleRequest " << rpc_name << ", req_id:" << req_id
<< " get next"; << " get next";
auto& reqs = rpc_reqs_[rpc_name]; auto& reqs = rpc_reqs_[rpc_name];
...@@ -324,22 +337,21 @@ void AsyncGRPCServer::HandleRequest( ...@@ -324,22 +337,21 @@ void AsyncGRPCServer::HandleRequest(
base = reqs[req_id]; base = reqs[req_id];
} }
VLOG(3) << base->Status2String(rpc_name);
// reference: // reference:
// https://github.com/tensorflow/tensorflow/issues/5596 // https://github.com/tensorflow/tensorflow/issues/5596
// https://groups.google.com/forum/#!topic/grpc-io/xftlRy-IQwM // https://groups.google.com/forum/#!topic/grpc-io/xftlRy-IQwM
// https://groups.google.com/forum/#!topic/grpc-io/ywATt88Ef_I // https://groups.google.com/forum/#!topic/grpc-io/ywATt88Ef_I
if (!ok) { if (!ok) {
LOG(WARNING) << "completion queue:" << rpc_name LOG(WARNING) << "completion queue:" << rpc_name
<< " recv no regular event:argument name[" << " recv no regular event"
<< base->GetReqName() << "]"; << " context:" << base->Status2String(rpc_name);
TryToRegisterNewOne(rpc_name, req_id); TryToRegisterNewOne(rpc_name, req_id);
delete base; delete base;
continue; continue;
} }
VLOG(3) << "queue id:" << rpc_name << ", req_id:" << req_id
<< ", status:" << base->Status();
switch (base->Status()) { switch (base->Status()) {
case PROCESS: { case PROCESS: {
base->Process(); base->Process();
......
...@@ -13,6 +13,10 @@ ...@@ -13,6 +13,10 @@
// limitations under the License. // limitations under the License.
#include "paddle/fluid/operators/distributed/rpc_client.h" #include "paddle/fluid/operators/distributed/rpc_client.h"
#include "gflags/gflags.h"
// default to 3min to avoid temprary network failures.
DEFINE_int32(grpc_deadline, 180000, "deadline timeouts for grpc");
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -15,11 +15,14 @@ ...@@ -15,11 +15,14 @@
#pragma once #pragma once
#include <string> #include <string>
#include "gflags/gflags.h"
#include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/scope.h"
DECLARE_int32(grpc_deadline);
namespace paddle { namespace paddle {
namespace operators { namespace operators {
namespace distributed { namespace distributed {
...@@ -32,26 +35,26 @@ class RPCClient { ...@@ -32,26 +35,26 @@ class RPCClient {
const platform::DeviceContext& ctx, const platform::DeviceContext& ctx,
const framework::Scope& scope, const framework::Scope& scope,
const std::string& var_name, const std::string& var_name,
int64_t time_out = rpc_time_out) = 0; int64_t time_out = FLAGS_grpc_deadline) = 0;
virtual bool AsyncGetVar(const std::string& ep, virtual bool AsyncGetVar(const std::string& ep,
const platform::DeviceContext& ctx, const platform::DeviceContext& ctx,
const framework::Scope& scope, const framework::Scope& scope,
const std::string& var_name, const std::string& var_name,
int64_t time_out = rpc_time_out) = 0; int64_t time_out = FLAGS_grpc_deadline) = 0;
virtual bool AsyncPrefetchVar(const std::string& ep, virtual bool AsyncPrefetchVar(const std::string& ep,
const platform::DeviceContext& ctx, const platform::DeviceContext& ctx,
const framework::Scope& scope, const framework::Scope& scope,
const std::string& in_var_name, const std::string& in_var_name,
const std::string& out_var_name, const std::string& out_var_name,
int64_t time_out = rpc_time_out) = 0; int64_t time_out = FLAGS_grpc_deadline) = 0;
virtual void AsyncSendBatchBarrier(const std::string& ep, virtual void AsyncSendBatchBarrier(
int64_t time_out = rpc_time_out) = 0; const std::string& ep, int64_t time_out = FLAGS_grpc_deadline) = 0;
virtual void AsyncSendFetchBarrier(const std::string& ep, virtual void AsyncSendFetchBarrier(
int64_t time_out = rpc_time_out) = 0; const std::string& ep, int64_t time_out = FLAGS_grpc_deadline) = 0;
// SendComplete tells all the server that current trainer have no more data // SendComplete tells all the server that current trainer have no more data
// to train, so that the pserver can reduce it's barrier count, and continue // to train, so that the pserver can reduce it's barrier count, and continue
...@@ -60,8 +63,6 @@ class RPCClient { ...@@ -60,8 +63,6 @@ class RPCClient {
virtual void Wait() = 0; virtual void Wait() = 0;
static constexpr int64_t rpc_time_out = 120 * 1000;
template <typename T> template <typename T>
static RPCClient* GetInstance() { static RPCClient* GetInstance() {
std::call_once(init_flag_, &RPCClient::Init<T>); std::call_once(init_flag_, &RPCClient::Init<T>);
......
...@@ -47,11 +47,12 @@ void RPCServer::WaitBarrier(const std::string& rpc_name) { ...@@ -47,11 +47,12 @@ void RPCServer::WaitBarrier(const std::string& rpc_name) {
return (barrier_counter_[rpc_name] >= client_num_ || exit_flag_.load()); return (barrier_counter_[rpc_name] >= client_num_ || exit_flag_.load());
}); });
VLOG(3) << "batch_barrier_:" << barrier_counter_[rpc_name]; VLOG(3) << "batch_barrier_: " << rpc_name << " "
<< barrier_counter_[rpc_name];
} }
void RPCServer::IncreaseBatchBarrier(const std::string rpc_name) { void RPCServer::IncreaseBatchBarrier(const std::string rpc_name) {
VLOG(3) << "RPCServer begin IncreaseBatchBarrier " << rpc_name; VLOG(4) << "RPCServer begin IncreaseBatchBarrier " << rpc_name;
int b = 0; int b = 0;
std::unique_lock<std::mutex> lock(mutex_); std::unique_lock<std::mutex> lock(mutex_);
b = ++barrier_counter_[rpc_name]; b = ++barrier_counter_[rpc_name];
...@@ -100,7 +101,7 @@ void RPCServer::SetCond(const std::string& rpc_name) { ...@@ -100,7 +101,7 @@ void RPCServer::SetCond(const std::string& rpc_name) {
} }
void RPCServer::WaitCond(const std::string& rpc_name) { void RPCServer::WaitCond(const std::string& rpc_name) {
VLOG(3) << "RPCServer WaitCond " << rpc_name; VLOG(4) << "RPCServer WaitCond " << rpc_name;
int cond = 0; int cond = 0;
{ {
std::unique_lock<std::mutex> lock(mutex_); std::unique_lock<std::mutex> lock(mutex_);
......
...@@ -76,6 +76,8 @@ bool ReadRaw(::google::protobuf::io::CodedInputStream* input, ...@@ -76,6 +76,8 @@ bool ReadRaw(::google::protobuf::io::CodedInputStream* input,
if (total_written + size_to_write > length) { if (total_written + size_to_write > length) {
size_to_write = length - total_written; size_to_write = length - total_written;
} }
// This log is useful to see how long a internal block size is of rpc.
VLOG(7) << "copy " << size_to_write << " data to CUDAPlace";
memory::Copy(boost::get<platform::CUDAPlace>(place), memory::Copy(boost::get<platform::CUDAPlace>(place),
reinterpret_cast<void*>(p), cpu, data, size_to_write, reinterpret_cast<void*>(p), cpu, data, size_to_write,
gpu_dev_ctx.stream()); gpu_dev_ctx.stream());
...@@ -103,6 +105,8 @@ bool ReadRaw(::google::protobuf::io::CodedInputStream* input, ...@@ -103,6 +105,8 @@ bool ReadRaw(::google::protobuf::io::CodedInputStream* input,
} }
// TODO(gongwb): can we avoid copy? // TODO(gongwb): can we avoid copy?
platform::CPUPlace cpu; platform::CPUPlace cpu;
// This log is useful to see how long a internal block size is of rpc.
VLOG(7) << "copy " << size_to_write << " data to CPUPlace";
memory::Copy(cpu, reinterpret_cast<void*>(p), cpu, data, size_to_write); memory::Copy(cpu, reinterpret_cast<void*>(p), cpu, data, size_to_write);
p += size_to_write; p += size_to_write;
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/operators/elementwise_add_op.h"
#include "paddle/fluid/operators/elementwise_op_function.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
namespace paddle {
namespace operators {
using framework::DataLayout;
using framework::Tensor;
using mkldnn::memory;
using mkldnn::reorder;
using mkldnn::primitive;
using mkldnn::stream;
using mkldnn::sum;
template <typename T>
class EltwiseAddMKLDNNKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto& dev_ctx =
ctx.template device_context<paddle::platform::MKLDNNDeviceContext>();
const auto& mkldnn_engine = dev_ctx.GetEngine();
auto* x = ctx.Input<Tensor>("X");
auto* y = ctx.Input<Tensor>("Y");
auto* z = ctx.Output<Tensor>("Out");
const T* x_data = x->data<T>();
const T* y_data = y->data<T>();
T* z_data = z->mutable_data<T>(ctx.GetPlace());
int axis = ctx.Attr<int>("axis");
auto x_dims = x->dims();
auto y_dims = y->dims();
auto z_dims = z->dims();
// Execute default elementwise_add operator when
// broadcast operations need to performed.
if (x_dims != y_dims) {
auto sum_func = [](T a, T b) -> T { return a + b; };
TransformFunctor<decltype(sum_func), T,
paddle::platform::CPUDeviceContext, T>
functor(
x, y, z,
ctx.template device_context<paddle::platform::CPUDeviceContext>(),
sum_func);
axis = (axis == -1 ? x_dims.size() - y_dims.size() : axis);
PADDLE_ENFORCE(axis >= 0 && axis < x_dims.size(),
"Axis should be in range [0, x_dims)");
trim_trailing_singular_dims(&y_dims);
axis = (y_dims.size() == 0) ? x_dims.size() : axis;
int pre, n, post;
get_mid_dims(x_dims, y_dims, axis, &pre, &n, &post);
if (post == 1) {
functor.RunRowWise(n, pre);
} else {
functor.RunMidWise(n, pre, post);
}
z->set_layout(DataLayout::kMKLDNN);
z->set_format(x->format());
} else {
PADDLE_ENFORCE(x->layout() == DataLayout::kMKLDNN &&
x->format() != memory::format::format_undef,
"Wrong layout/format set for X tensor");
PADDLE_ENFORCE(y->layout() == DataLayout::kMKLDNN &&
y->format() != memory::format::format_undef,
"Wrong layout/format set for X tensor");
std::vector<int> src_x_tz = framework::vectorize2int(x_dims);
std::vector<int> src_y_tz = framework::vectorize2int(y_dims);
std::vector<int> dst_tz = framework::vectorize2int(z_dims);
std::vector<memory::primitive_desc> srcs_pd;
std::vector<memory> srcs;
std::vector<float> scales = {1.0f, 1.0f};
auto src_x_pd = memory::primitive_desc(
{{src_x_tz}, memory::data_type::f32, x->format()}, mkldnn_engine);
auto src_y_pd = memory::primitive_desc(
{{src_y_tz}, memory::data_type::f32, y->format()}, mkldnn_engine);
auto src_x_memory =
memory(src_x_pd, paddle::platform::to_void_cast(x_data));
auto src_y_memory =
memory(src_y_pd, paddle::platform::to_void_cast(y_data));
srcs_pd.push_back(src_x_pd);
srcs_pd.push_back(src_y_pd);
srcs.push_back(src_x_memory);
srcs.push_back(src_y_memory);
auto dst_md =
memory::desc({dst_tz}, memory::data_type::f32, memory::format::any);
// create primitive descriptor for sum
auto sum_pd = sum::primitive_desc(dst_md, scales, srcs_pd);
// create mkldnn memory for dst
memory dst_memory = memory(sum_pd.dst_primitive_desc(), z_data);
std::vector<primitive::at> inputs;
inputs.push_back(srcs[0]);
inputs.push_back(srcs[1]);
// create sum primitive
auto sum_prim = sum(sum_pd, inputs, dst_memory);
std::vector<primitive> pipeline;
pipeline.push_back(sum_prim);
stream(stream::kind::eager).submit(pipeline).wait();
z->set_layout(DataLayout::kMKLDNN);
z->set_format(
(memory::format)dst_memory.get_primitive_desc().desc().data.format);
}
}
};
template <typename T>
class EltwiseAddMKLDNNGradKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
using Tensor = framework::Tensor;
auto* x = ctx.Input<Tensor>("X");
auto* y = ctx.Input<Tensor>("Y");
auto* out = ctx.Input<Tensor>("Out");
auto* dout = ctx.Input<Tensor>(framework::GradVarName("Out"));
auto* dx = ctx.Output<Tensor>(framework::GradVarName("X"));
auto* dy = ctx.Output<Tensor>(framework::GradVarName("Y"));
int axis = ctx.Attr<int>("axis");
auto set_mkldnn_format = [](Tensor* in, const Tensor* out) {
in->set_layout(DataLayout::kMKLDNN);
in->set_format(out->format());
};
if (x->dims() == y->dims()) {
auto blas = math::GetBlas<paddle::platform::CPUDeviceContext, T>(ctx);
if (dx) {
blas.VCOPY(dout->numel(), dout->data<T>(),
dx->mutable_data<T>(ctx.GetPlace()));
set_mkldnn_format(dx, dout);
}
if (dy) {
blas.VCOPY(dout->numel(), dout->data<T>(),
dy->mutable_data<T>(ctx.GetPlace()));
set_mkldnn_format(dy, dout);
}
} else {
// Execute default kernel when broadcast is needed
ElemwiseGradCompute<paddle::platform::CPUDeviceContext, T,
IdentityGrad<T>, IdentityGrad<T>>(
ctx, *x, *y, *out, *dout, axis, dx, dy, IdentityGrad<T>(),
IdentityGrad<T>());
}
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_KERNEL(elementwise_add, MKLDNN, ::paddle::platform::CPUPlace,
ops::EltwiseAddMKLDNNKernel<float>)
REGISTER_OP_KERNEL(elementwise_add_grad, MKLDNN, ::paddle::platform::CPUPlace,
ops::EltwiseAddMKLDNNGradKernel<float>)
...@@ -14,8 +14,12 @@ limitations under the License. */ ...@@ -14,8 +14,12 @@ limitations under the License. */
#pragma once #pragma once
#include <string> #include <string>
#include "paddle/fluid/framework/data_layout.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/operator.h"
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h"
#endif
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -40,6 +44,21 @@ class ElementwiseOp : public framework::OperatorWithKernel { ...@@ -40,6 +44,21 @@ class ElementwiseOp : public framework::OperatorWithKernel {
ctx->SetOutputDim("Out", x_dim); ctx->SetOutputDim("Out", x_dim);
ctx->ShareLoD("X", /*->*/ "Out"); ctx->ShareLoD("X", /*->*/ "Out");
} }
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
auto input_data_type =
framework::ToDataType(ctx.Input<Tensor>("X")->type());
#ifdef PADDLE_WITH_MKLDNN
if (platform::CanMKLDNNBeUsed(ctx)) {
return framework::OpKernelType(input_data_type, ctx.GetPlace(),
framework::DataLayout::kMKLDNN,
framework::LibraryType::kMKLDNN);
}
#endif
return framework::OpKernelType(input_data_type, ctx.GetPlace());
}
}; };
class ElementwiseOpInferVarType : public framework::VarTypeInference { class ElementwiseOpInferVarType : public framework::VarTypeInference {
...@@ -65,6 +84,8 @@ class ElementwiseOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -65,6 +84,8 @@ class ElementwiseOpMaker : public framework::OpProtoAndCheckerMaker {
"for broadcasting Y onto X.") "for broadcasting Y onto X.")
.SetDefault(-1) .SetDefault(-1)
.EqualGreaterThan(-1); .EqualGreaterThan(-1);
AddAttr<bool>("use_mkldnn", "(bool, default false). Used by MKLDNN.")
.SetDefault(false);
AddComment(string::Sprintf(R"DOC( AddComment(string::Sprintf(R"DOC(
Limited Elementwise %s Operator Limited Elementwise %s Operator
...@@ -138,6 +159,21 @@ class ElementwiseOpGrad : public framework::OperatorWithKernel { ...@@ -138,6 +159,21 @@ class ElementwiseOpGrad : public framework::OperatorWithKernel {
ctx->SetOutputDim(y_grad_name, y_dims); ctx->SetOutputDim(y_grad_name, y_dims);
} }
} }
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
auto input_data_type =
framework::ToDataType(ctx.Input<Tensor>("X")->type());
#ifdef PADDLE_WITH_MKLDNN
if (platform::CanMKLDNNBeUsed(ctx)) {
return framework::OpKernelType(input_data_type, ctx.GetPlace(),
framework::DataLayout::kMKLDNN,
framework::LibraryType::kMKLDNN);
}
#endif
return framework::OpKernelType(input_data_type, ctx.GetPlace());
}
}; };
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
......
...@@ -101,17 +101,16 @@ void ListenAndServOp::RunSyncLoop( ...@@ -101,17 +101,16 @@ void ListenAndServOp::RunSyncLoop(
framework::Scope *recv_scope, framework::Scope *recv_scope,
const std::vector<int> &prefetch_block_id_list) const { const std::vector<int> &prefetch_block_id_list) const {
size_t num_blocks = program->Size(); size_t num_blocks = program->Size();
auto optimize_blocks =
Attr<std::vector<framework::BlockDesc *>>(kOptimizeBlocks);
PADDLE_ENFORCE_GE(num_blocks, 2, PADDLE_ENFORCE_GE(num_blocks, 2,
"server program should have at least 2 blocks"); "server program should have at least 2 blocks");
std::vector<int> optimize_block_id_list; std::vector<int> optimize_blocks_idx;
for (int blkid = 1; blkid < num_blocks; ++blkid) { for (auto blk : optimize_blocks) {
if (std::find(prefetch_block_id_list.begin(), prefetch_block_id_list.end(), optimize_blocks_idx.push_back(blk->ID());
blkid) == prefetch_block_id_list.end()) {
optimize_block_id_list.push_back(blkid);
}
} }
auto optimize_prepared = executor->Prepare(*program, optimize_block_id_list); auto optimize_prepared = executor->Prepare(*program, optimize_blocks_idx);
// Insert placeholder for block0 which holds current op itself. // Insert placeholder for block0 which holds current op itself.
optimize_prepared.insert( optimize_prepared.insert(
optimize_prepared.begin(), optimize_prepared.begin(),
...@@ -134,14 +133,14 @@ void ListenAndServOp::RunSyncLoop( ...@@ -134,14 +133,14 @@ void ListenAndServOp::RunSyncLoop(
// and this will still work. // and this will still work.
// The optimize blocks which have the same parent ID would run parallel // The optimize blocks which have the same parent ID would run parallel
// TODO(Yancey1989): need to use ParallelExecutor for future // TODO(Yancey1989): need to use ParallelExecutor for future
int32_t last_parent_blkid = program->Block(1).Parent(); int32_t last_parent_blkid = optimize_blocks[0]->Parent();
std::vector<size_t> parallel_blkids; std::vector<size_t> parallel_blkids;
parallel_blkids.push_back(1); parallel_blkids.push_back(optimize_blocks[0]->ID());
double ts = GetTimestamp(); double ts = GetTimestamp();
for (size_t i = 1; i < optimize_block_id_list.size(); ++i) { for (size_t i = 1; i < optimize_blocks.size(); ++i) {
// skip the first optimize block because it is already in the // skip the first optimize block because it is already in the
// parallel_blkids. // parallel_blkids.
int blkid = optimize_block_id_list[i]; int blkid = optimize_blocks[i]->ID();
if (program->Block(blkid).Parent() != last_parent_blkid) { if (program->Block(blkid).Parent() != last_parent_blkid) {
ParallelExecuteBlocks(parallel_blkids, executor, optimize_prepared, ParallelExecuteBlocks(parallel_blkids, executor, optimize_prepared,
program, recv_scope); program, recv_scope);
...@@ -164,8 +163,8 @@ void ListenAndServOp::RunSyncLoop( ...@@ -164,8 +163,8 @@ void ListenAndServOp::RunSyncLoop(
} }
void ListenAndServOp::RunAsyncLoop(framework::Executor *executor, void ListenAndServOp::RunAsyncLoop(framework::Executor *executor,
framework::ProgramDesc *program) const { framework::ProgramDesc *program,
VLOG(3) << "RunAsyncLoop in"; framework::Scope *recv_scope) const {
// grad name to block id // grad name to block id
std::unordered_map<std::string, int32_t> grad_to_block_id; std::unordered_map<std::string, int32_t> grad_to_block_id;
std::unordered_map<int32_t, std::string> id_to_grad; std::unordered_map<int32_t, std::string> id_to_grad;
...@@ -192,6 +191,10 @@ void ListenAndServOp::RunAsyncLoop(framework::Executor *executor, ...@@ -192,6 +191,10 @@ void ListenAndServOp::RunAsyncLoop(framework::Executor *executor,
block_list.push_back(blkid); block_list.push_back(blkid);
} }
auto optimize_prepared = executor->Prepare(*program, block_list); auto optimize_prepared = executor->Prepare(*program, block_list);
// execute global block if needed
if (block_list[0] == 1 && id_to_grad.count(1) == 0) {
executor->RunPreparedContext(optimize_prepared[0].get(), recv_scope);
}
std::unordered_map<std::string, std::unordered_map<std::string,
std::shared_ptr<framework::ExecutorPrepareContext>> std::shared_ptr<framework::ExecutorPrepareContext>>
grad_to_prepared_ctx; grad_to_prepared_ctx;
...@@ -203,7 +206,6 @@ void ListenAndServOp::RunAsyncLoop(framework::Executor *executor, ...@@ -203,7 +206,6 @@ void ListenAndServOp::RunAsyncLoop(framework::Executor *executor,
request_get_handler_->SetGradToPreparedCtx(&grad_to_prepared_ctx); request_get_handler_->SetGradToPreparedCtx(&grad_to_prepared_ctx);
request_prefetch_handler_->SetGradToPreparedCtx(&grad_to_prepared_ctx); request_prefetch_handler_->SetGradToPreparedCtx(&grad_to_prepared_ctx);
VLOG(3) << "RunAsyncLoop into while";
while (true) { while (true) {
if (rpc_service_->IsExit()) { if (rpc_service_->IsExit()) {
LOG(INFO) << "get exit!rpc_processor break!"; LOG(INFO) << "get exit!rpc_processor break!";
...@@ -261,8 +263,11 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope, ...@@ -261,8 +263,11 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope,
rpc_service_->RegisterRPC(distributed::kRequestPrefetch, rpc_service_->RegisterRPC(distributed::kRequestPrefetch,
request_prefetch_handler_.get()); request_prefetch_handler_.get());
auto *optimize_block = Attr<framework::BlockDesc *>(kOptimizeBlock); auto optimize_blocks =
auto *program = optimize_block->Program(); Attr<std::vector<framework::BlockDesc *>>(kOptimizeBlocks);
PADDLE_ENFORCE(optimize_blocks.size() >= 1,
"optimize blocks should be 1 at least on the pserver side.");
auto *program = optimize_blocks[0]->Program();
framework::Executor executor(dev_place); framework::Executor executor(dev_place);
// prepare for prefetch // prepare for prefetch
...@@ -317,7 +322,7 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope, ...@@ -317,7 +322,7 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope,
if (sync_mode) { if (sync_mode) {
RunSyncLoop(&executor, program, &recv_scope, prefetch_block_id_list); RunSyncLoop(&executor, program, &recv_scope, prefetch_block_id_list);
} else { } else {
RunAsyncLoop(&executor, program); RunAsyncLoop(&executor, program, &recv_scope);
} }
} }
...@@ -339,8 +344,9 @@ class ListenAndServOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -339,8 +344,9 @@ class ListenAndServOpMaker : public framework::OpProtoAndCheckerMaker {
"a map from grad name to it's optimize block id") "a map from grad name to it's optimize block id")
.SetDefault({}); .SetDefault({});
AddAttr<bool>("sync_mode", "if works at sync_mode or not").SetDefault(true); AddAttr<bool>("sync_mode", "if works at sync_mode or not").SetDefault(true);
AddAttr<framework::BlockDesc *>(kOptimizeBlock, AddAttr<std::vector<framework::BlockDesc *>>(
"BlockID to run on server side."); kOptimizeBlocks, "Optimize blocks to run on server side.")
.SetDefault({});
AddAttr<std::vector<std::string>>(kPrefetchVarNameToBlockId, AddAttr<std::vector<std::string>>(kPrefetchVarNameToBlockId,
"prefetch blocks to run on server side.") "prefetch blocks to run on server side.")
.SetDefault({}); .SetDefault({});
......
...@@ -30,7 +30,7 @@ limitations under the License. */ ...@@ -30,7 +30,7 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
constexpr char kOptimizeBlock[] = "OptimizeBlock"; constexpr char kOptimizeBlocks[] = "optimize_blocks";
constexpr char kPrefetchVarNameToBlockId[] = "prefetch_var_name_to_block_id"; constexpr char kPrefetchVarNameToBlockId[] = "prefetch_var_name_to_block_id";
void RunServer(std::shared_ptr<distributed::RPCServer> service); void RunServer(std::shared_ptr<distributed::RPCServer> service);
...@@ -50,7 +50,8 @@ class ListenAndServOp : public framework::OperatorBase { ...@@ -50,7 +50,8 @@ class ListenAndServOp : public framework::OperatorBase {
const std::vector<int>& prefetch_block_id_list) const; const std::vector<int>& prefetch_block_id_list) const;
void RunAsyncLoop(framework::Executor* executor, void RunAsyncLoop(framework::Executor* executor,
framework::ProgramDesc* program) const; framework::ProgramDesc* program,
framework::Scope* recv_scope) const;
void SavePort() const; void SavePort() const;
......
...@@ -295,7 +295,7 @@ class ParallelDoGradOp : public framework::OperatorBase { ...@@ -295,7 +295,7 @@ class ParallelDoGradOp : public framework::OperatorBase {
auto sum_op = framework::OpRegistry::CreateOp( auto sum_op = framework::OpRegistry::CreateOp(
"sum", {{"X", {s, tmp_name}}}, {{"Out", {s}}}, "sum", {{"X", {s, tmp_name}}}, {{"Out", {s}}},
framework::AttributeMap{}); framework::AttributeMap{{"use_mkldnn", {false}}});
VLOG(10) << sum_op->DebugStringEx(sub_scopes[0]); VLOG(10) << sum_op->DebugStringEx(sub_scopes[0]);
sum_op->Run(*sub_scopes[0], places[0]); sum_op->Run(*sub_scopes[0], places[0]);
WaitOnPlace(places[0]); WaitOnPlace(places[0]);
......
...@@ -37,6 +37,11 @@ class RandomCropOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -37,6 +37,11 @@ class RandomCropOpMaker : public framework::OpProtoAndCheckerMaker {
AddOutput("SeedOut", "The random seed after random cropping.") AddOutput("SeedOut", "The random seed after random cropping.")
.AsIntermediate(); .AsIntermediate();
AddAttr<std::vector<int>>("shape", "The shape of a cropped instance."); AddAttr<std::vector<int>>("shape", "The shape of a cropped instance.");
AddAttr<int>("startup_seed",
"If the input 'Seed' is not initialized, the 'startup_seed' "
"will be used to replace it. Even so, the seed after random "
"crop will also be outputed to the 'SeedOut'.")
.SetDefault(0);
AddComment(R"DOC( AddComment(R"DOC(
This operator takes a batch of instance, and do random cropping on each instance. This operator takes a batch of instance, and do random cropping on each instance.
It means that cropping positions differs on each instance, which is determined It means that cropping positions differs on each instance, which is determined
...@@ -49,8 +54,6 @@ class RandomCropOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -49,8 +54,6 @@ class RandomCropOpMaker : public framework::OpProtoAndCheckerMaker {
class RandomCropOpInferShape : public framework::InferShapeBase { class RandomCropOpInferShape : public framework::InferShapeBase {
public: public:
void operator()(framework::InferShapeContext* ctx) const override { void operator()(framework::InferShapeContext* ctx) const override {
auto seed_dim = ctx->GetInputDim("Seed");
PADDLE_ENFORCE(seed_dim.size() == 1 && seed_dim[0] == 1);
auto shape = ctx->Attrs().Get<std::vector<int>>("shape"); auto shape = ctx->Attrs().Get<std::vector<int>>("shape");
auto x_dim = ctx->GetInputDim("X"); auto x_dim = ctx->GetInputDim("X");
PADDLE_ENFORCE_GT(x_dim.size(), static_cast<int64_t>(shape.size())); PADDLE_ENFORCE_GT(x_dim.size(), static_cast<int64_t>(shape.size()));
...@@ -62,7 +65,6 @@ class RandomCropOpInferShape : public framework::InferShapeBase { ...@@ -62,7 +65,6 @@ class RandomCropOpInferShape : public framework::InferShapeBase {
out_dim[x_i] = shape[shape_i]; out_dim[x_i] = shape[shape_i];
} }
ctx->SetOutputDim("Out", framework::make_ddim(out_dim)); ctx->SetOutputDim("Out", framework::make_ddim(out_dim));
ctx->SetOutputDim("SeedOut", framework::make_ddim({1}));
} }
}; };
......
...@@ -142,16 +142,22 @@ template <typename DeviceContext, typename T> ...@@ -142,16 +142,22 @@ template <typename DeviceContext, typename T>
class RandomCropKernel : public framework::OpKernel<T> { class RandomCropKernel : public framework::OpKernel<T> {
public: public:
virtual void Compute(const framework::ExecutionContext& ctx) const { virtual void Compute(const framework::ExecutionContext& ctx) const {
auto& seed_tensor = detail::Ref(ctx.Input<framework::LoDTensor>("Seed"));
int64_t seed = 0; int64_t seed = 0;
if (platform::is_cpu_place(seed_tensor.place())) { auto& seed_tensor = detail::Ref(ctx.Input<framework::LoDTensor>("Seed"));
seed = *seed_tensor.data<int64_t>(); if (seed_tensor.IsInitialized()) {
if (platform::is_cpu_place(seed_tensor.place())) {
seed = *seed_tensor.data<int64_t>();
} else {
LOG(WARNING) << "It is slow to place seed in GPU memory. Please verify "
"your program";
framework::LoDTensor cpu_seed;
framework::TensorCopySync(seed_tensor, platform::CPUPlace(), &cpu_seed);
seed = *cpu_seed.data<int64_t>();
}
} else { } else {
LOG(WARNING) << "It is slow to place seed in GPU memory. Please verify " VLOG(5) << "WARNING: The input 'Seed' is not initialized, use attribute "
"your program"; "'startup_seed' instead.";
framework::LoDTensor cpu_seed; seed = ctx.Attr<int>("startup_seed");
framework::TensorCopySync(seed_tensor, platform::CPUPlace(), &cpu_seed);
seed = *cpu_seed.data<int64_t>();
} }
auto shape = ctx.Attr<std::vector<int>>("shape"); auto shape = ctx.Attr<std::vector<int>>("shape");
auto& x = detail::Ref(ctx.Input<framework::LoDTensor>("X")); auto& x = detail::Ref(ctx.Input<framework::LoDTensor>("X"));
...@@ -171,7 +177,7 @@ class RandomCropKernel : public framework::OpKernel<T> { ...@@ -171,7 +177,7 @@ class RandomCropKernel : public framework::OpKernel<T> {
engine.discard(functor.prod_batchsize_dims_ * engine.discard(functor.prod_batchsize_dims_ *
(functor.rank_ - functor.num_batchsize_dims_)); (functor.rank_ - functor.num_batchsize_dims_));
*ctx.Output<framework::LoDTensor>("SeedOut")->mutable_data<int64_t>( *ctx.Output<framework::LoDTensor>("SeedOut")->mutable_data<int64_t>(
platform::CPUPlace()) = engine(); framework::make_ddim({1}), platform::CPUPlace()) = engine();
} }
}; };
......
...@@ -39,6 +39,7 @@ class CustomReader : public framework::DecoratedReader { ...@@ -39,6 +39,7 @@ class CustomReader : public framework::DecoratedReader {
const framework::ProgramDesc program_; const framework::ProgramDesc program_;
int sub_block_id_; int sub_block_id_;
framework::Executor exe_; framework::Executor exe_;
framework::Scope scope_;
std::vector<std::string> source_var_names_; std::vector<std::string> source_var_names_;
std::vector<std::string> sink_var_names_; std::vector<std::string> sink_var_names_;
...@@ -158,23 +159,24 @@ void CustomReader::ReadNext(std::vector<framework::LoDTensor>* out) { ...@@ -158,23 +159,24 @@ void CustomReader::ReadNext(std::vector<framework::LoDTensor>* out) {
// The scope for CustomReader's sub-block should be independent and shouldn't // The scope for CustomReader's sub-block should be independent and shouldn't
// be any other computation scope's child. Otherwise, data preprocessing and // be any other computation scope's child. Otherwise, data preprocessing and
// compution cannot be concurrent. // compution cannot be concurrent.
framework::Scope scope; framework::Scope* exe_scope = &scope_.NewScope();
// 1. Copy LoDTensors from underlying reader's output to source variables. // 1. Copy LoDTensors from underlying reader's output to source variables.
for (size_t i = 0; i < source_var_names_.size(); ++i) { for (size_t i = 0; i < source_var_names_.size(); ++i) {
framework::Variable* var = scope.Var(source_var_names_[i]); framework::Variable* var = exe_scope->Var(source_var_names_[i]);
framework::LoDTensor* tensor = var->GetMutable<framework::LoDTensor>(); framework::LoDTensor* tensor = var->GetMutable<framework::LoDTensor>();
tensor->ShareDataWith(underlying_outs[i]); tensor->ShareDataWith(underlying_outs[i]);
tensor->set_lod(underlying_outs[i].lod()); tensor->set_lod(underlying_outs[i].lod());
} }
// 2. Run the sub-block. // 2. Run the sub-block.
exe_.Run(program_, &scope, sub_block_id_, false, true); exe_.Run(program_, exe_scope, sub_block_id_, false, true);
// 3. Copy LoDTensors from sink variables to out. // 3. Copy LoDTensors from sink variables to out.
out->resize(sink_var_names_.size()); out->resize(sink_var_names_.size());
for (size_t i = 0; i < sink_var_names_.size(); ++i) { for (size_t i = 0; i < sink_var_names_.size(); ++i) {
const auto& tensor = detail::Ref(scope.FindVar(sink_var_names_[i])) const auto& tensor = detail::Ref(exe_scope->FindVar(sink_var_names_[i]))
.Get<framework::LoDTensor>(); .Get<framework::LoDTensor>();
framework::TensorCopySync(tensor, platform::CPUPlace(), &(*out)[i]); framework::TensorCopySync(tensor, platform::CPUPlace(), &(*out)[i]);
} }
scope_.DeleteScope(exe_scope);
} }
} // namespace reader } // namespace reader
......
...@@ -23,13 +23,13 @@ namespace reader { ...@@ -23,13 +23,13 @@ namespace reader {
// 'Double buffer' means we shall maintain two batches of input data at the same // 'Double buffer' means we shall maintain two batches of input data at the same
// time. So the kCacheSize shoul be at least 2. // time. So the kCacheSize shoul be at least 2.
static constexpr size_t kCacheSize = 3; static constexpr size_t kCacheSize = 5;
// There will be two bacthes out of the channel during training: // There will be two bacthes out of the channel during training:
// 1. the one waiting to be sent to the channel // 1. the one waiting to be sent to the channel
// 2. the one just be received from the channel, which is also being used by // 2. the one just be received from the channel, which is also being used by
// subsequent operators. // subsequent operators.
// So the channel size should be kChacheSize - 2 // So the channel size should be kChacheSize - 2
static constexpr size_t kChannelSize = 1; // kCacheSize - 2 static constexpr size_t kChannelSize = 3; // kCacheSize - 2
class DoubleBufferReader : public framework::DecoratedReader { class DoubleBufferReader : public framework::DecoratedReader {
public: public:
......
...@@ -429,7 +429,8 @@ class RecurrentGradOp : public RecurrentBase { ...@@ -429,7 +429,8 @@ class RecurrentGradOp : public RecurrentBase {
auto sum_op = framework::OpRegistry::CreateOp( auto sum_op = framework::OpRegistry::CreateOp(
"sum", {{"X", {pg_names[param_id], new_inside_name}}}, "sum", {{"X", {pg_names[param_id], new_inside_name}}},
{{"Out", {pg_names[param_id]}}}, framework::AttributeMap{}); {{"Out", {pg_names[param_id]}}},
framework::AttributeMap{{"use_mkldnn", {false}}});
sum_op->Run(cur_scope, place); sum_op->Run(cur_scope, place);
cur_scope.Rename(new_inside_name, inside_grad_name); cur_scope.Rename(new_inside_name, inside_grad_name);
......
...@@ -129,7 +129,10 @@ void StartServerNet(bool is_sparse, std::atomic<bool> *initialized) { ...@@ -129,7 +129,10 @@ void StartServerNet(bool is_sparse, std::atomic<bool> *initialized) {
// sub program run in listen_and_serv_op, for simple test we use sum // sub program run in listen_and_serv_op, for simple test we use sum
f::ProgramDesc program; f::ProgramDesc program;
const auto &root_block = program.Block(0); const auto &root_block = program.Block(0);
std::vector<framework::BlockDesc *> optimize_blocks;
auto *optimize_block = program.AppendBlock(root_block); auto *optimize_block = program.AppendBlock(root_block);
optimize_blocks.push_back(optimize_block);
auto *prefetch_block = program.AppendBlock(root_block); auto *prefetch_block = program.AppendBlock(root_block);
// X for server side tensors, RX for received tensors, must be of same shape. // X for server side tensors, RX for received tensors, must be of same shape.
AddOp("sum", {{"X", {"x0", "x1"}}}, {{"Out", {"Out"}}}, {}, optimize_block, AddOp("sum", {{"X", {"x0", "x1"}}}, {{"Out", {"Out"}}}, {}, optimize_block,
...@@ -139,7 +142,7 @@ void StartServerNet(bool is_sparse, std::atomic<bool> *initialized) { ...@@ -139,7 +142,7 @@ void StartServerNet(bool is_sparse, std::atomic<bool> *initialized) {
attrs.insert({"Fanin", 1}); attrs.insert({"Fanin", 1});
attrs.insert({"ParamList", std::vector<std::string>({"Out"})}); attrs.insert({"ParamList", std::vector<std::string>({"Out"})});
attrs.insert({"GradList", std::vector<std::string>({"x1"})}); attrs.insert({"GradList", std::vector<std::string>({"x1"})});
attrs.insert({"OptimizeBlock", optimize_block}); attrs.insert({"optimize_blocks", optimize_blocks});
attrs.insert({"PrefetchBlock", prefetch_block}); attrs.insert({"PrefetchBlock", prefetch_block});
attrs.insert({"grad_to_block_id", std::vector<std::string>({""})}); attrs.insert({"grad_to_block_id", std::vector<std::string>({""})});
attrs.insert({"sync_mode", true}); attrs.insert({"sync_mode", true});
......
...@@ -27,8 +27,81 @@ using paddle::platform::MKLDNNMemDesc; ...@@ -27,8 +27,81 @@ using paddle::platform::MKLDNNMemDesc;
using mkldnn::memory; // Note: paddle has also "memory" namespace using mkldnn::memory; // Note: paddle has also "memory" namespace
using mkldnn::primitive; using mkldnn::primitive;
using mkldnn::softmax_forward; using mkldnn::softmax_forward;
using mkldnn::softmax_backward;
using mkldnn::prop_kind; using mkldnn::prop_kind;
using mkldnn::stream; using mkldnn::stream;
using platform::to_void_cast;
class SoftmaxMKLDNNHandler : public platform::MKLDNNHandler {
public:
SoftmaxMKLDNNHandler(
std::shared_ptr<mkldnn::softmax_forward::primitive_desc> softmax_pd,
const platform::MKLDNNDeviceContext& dev_ctx, mkldnn::engine engine,
const std::string& base_key)
: platform::MKLDNNHandler(dev_ctx, engine, base_key),
softmax_pd_(softmax_pd) {}
SoftmaxMKLDNNHandler(
std::shared_ptr<mkldnn::softmax_forward::primitive_desc> softmax_pd,
std::shared_ptr<mkldnn::softmax_backward::primitive_desc> softmax_bwd_pd,
const platform::MKLDNNDeviceContext& dev_ctx, mkldnn::engine engine,
const std::string& base_key)
: platform::MKLDNNHandler(dev_ctx, engine, base_key),
softmax_pd_(softmax_pd),
softmax_bwd_pd_(softmax_bwd_pd) {
// If we are in Grad operatgor then update a key with BWD suffix to
// distinguish from FWD memory primitives
key_ += "-BWD";
}
std::shared_ptr<mkldnn::softmax_forward> AcquireSoftmax(
std::shared_ptr<mkldnn::memory> dst_memory_p,
std::shared_ptr<mkldnn::memory> src_memory_p) {
/*Generate key*/
auto prim_key = key_ + "@softmax_p";
auto softmax_p = std::static_pointer_cast<mkldnn::softmax_forward>(
dev_ctx_.GetBlob(prim_key));
PADDLE_ENFORCE((softmax_p != nullptr) || (is_reusing_ == false),
"Fail to find softmax primitive in device context");
if (softmax_p == nullptr) {
softmax_p = std::make_shared<mkldnn::softmax_forward>(
*(softmax_pd_.get()),
*(static_cast<mkldnn::memory*>(src_memory_p.get())),
*(static_cast<mkldnn::memory*>(dst_memory_p.get())));
dev_ctx_.SetBlob(prim_key, softmax_p);
} else {
is_reusing_ = true;
}
return softmax_p;
}
std::shared_ptr<mkldnn::softmax_backward> AcquireSoftmaxBackward(
std::shared_ptr<mkldnn::memory> dst_memory_p,
std::shared_ptr<mkldnn::memory> diff_dst_memory_p,
std::shared_ptr<mkldnn::memory> diff_src_memory_p) {
auto prim_key = key_ + "@softmax_bwd_p";
auto softmax_bwd_p = std::static_pointer_cast<mkldnn::softmax_backward>(
dev_ctx_.GetBlob(prim_key));
PADDLE_ENFORCE((softmax_bwd_p != nullptr) || (is_reusing_ == false),
"Fail to find softmax backward primitive in device context");
if (softmax_bwd_p == nullptr) {
softmax_bwd_p = std::make_shared<mkldnn::softmax_backward>(
*softmax_bwd_pd_, *(dst_memory_p.get()), *(diff_dst_memory_p.get()),
*(diff_src_memory_p.get()));
dev_ctx_.SetBlob(prim_key, softmax_bwd_p);
} else {
is_reusing_ = true;
}
return softmax_bwd_p;
}
private:
std::shared_ptr<mkldnn::softmax_forward::primitive_desc> softmax_pd_;
std::shared_ptr<mkldnn::softmax_backward::primitive_desc> softmax_bwd_pd_;
};
template <typename T> template <typename T>
class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel<T> { class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel<T> {
...@@ -54,56 +127,27 @@ class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel<T> { ...@@ -54,56 +127,27 @@ class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel<T> {
// Same memory descriptor to be used for input and output // Same memory descriptor to be used for input and output
memory::dims softmax_tz = {src_tz[0], src_tz[1]}; memory::dims softmax_tz = {src_tz[0], src_tz[1]};
// Generate keys for storing/retriving primitives for this operator // Generate keys for storing/retriving primitives for this operator
// TODO(jczaja): Each MKLDNN operator may have diffrent hashing function const std::string key =
auto gethash = [](memory::dims& operand_dims) { platform::MKLDNNHandler::GetHash(softmax_tz, ctx.op().Output("Out"));
return std::string(std::to_string(operand_dims[0]) + "-" + const std::string key_softmax_pd = key + "@softmax_pd";
std::to_string(operand_dims[1]));
}; // Currently only NC data format is supported
const std::string key = gethash(softmax_tz); auto softmax_md = MKLDNNMemDesc(
const std::string key_softmax_p = key + "@softmax_p"; {softmax_tz}, platform::MKLDNNGetDataType<T>(), memory::format::nc);
const std::string key_softmax_src_mem_p = key + "@softmax_src_mem_p"; // Normalization is made after innermost dimension eg. C out of NC
const std::string key_softmax_dst_mem_p = key + "@softmax_dst_mem_p"; auto softmax_desc = softmax_forward::desc(prop_kind::forward_scoring,
softmax_md, 1 /*dim: C*/);
std::shared_ptr<void> softmax_p = dev_ctx.GetBlob(key_softmax_p); auto softmax_pd = std::make_shared<mkldnn::softmax_forward::primitive_desc>(
if (softmax_p == nullptr) { softmax_desc, mkldnn_engine);
// Currently only NC data format is supported dev_ctx.SetBlob(key_softmax_pd, softmax_pd);
auto softmax_md =
MKLDNNMemDesc({softmax_tz}, memory::f32, memory::format::nc); SoftmaxMKLDNNHandler handler(softmax_pd, dev_ctx, mkldnn_engine, key);
// Normalization is made after innermost dimension eg. C out of NC auto softmax_src_memory_p =
auto softmax_desc = softmax_forward::desc(prop_kind::forward_scoring, handler.AcquireSrcMemory(softmax_md, to_void_cast<T>(input_data));
softmax_md, 1 /*dim: C*/); auto softmax_dst_memory_p =
// create memory primitives handler.AcquireDstMemory(softmax_md, to_void_cast<T>(output_data));
auto softmax_src_memory_p = std::make_shared<memory>( auto softmax_p =
memory::primitive_desc{softmax_md, mkldnn_engine}, handler.AcquireSoftmax(softmax_dst_memory_p, softmax_src_memory_p);
static_cast<void*>(const_cast<T*>(input_data)));
dev_ctx.SetBlob(key_softmax_src_mem_p, softmax_src_memory_p);
auto softmax_dst_memory_p = std::make_shared<memory>(
memory::primitive_desc{softmax_md, mkldnn_engine},
static_cast<void*>(output_data));
dev_ctx.SetBlob(key_softmax_dst_mem_p, softmax_dst_memory_p);
auto softmax_forward_pd =
std::make_shared<softmax_forward::primitive_desc>(softmax_desc,
mkldnn_engine);
softmax_p = std::make_shared<softmax_forward>(
*(softmax_forward_pd.get()),
*(static_cast<memory*>(softmax_src_memory_p.get())),
*(static_cast<memory*>(softmax_dst_memory_p.get())));
dev_ctx.SetBlob(key_softmax_p, softmax_p);
} else {
// Primitives already exist
auto src_memory_p = std::static_pointer_cast<memory>(
dev_ctx.GetBlob(key_softmax_src_mem_p));
PADDLE_ENFORCE(src_memory_p != nullptr,
"Fail to find softmax src mem_p in device context");
auto dst_memory_p = std::static_pointer_cast<memory>(
dev_ctx.GetBlob(key_softmax_dst_mem_p));
PADDLE_ENFORCE(dst_memory_p != nullptr,
"Fail to find softmax dst mem_p in device context");
src_memory_p->set_data_handle(
reinterpret_cast<void*>(const_cast<T*>(input_data)));
dst_memory_p->set_data_handle(output_data);
}
std::vector<primitive> pipeline{ std::vector<primitive> pipeline{
*(static_cast<softmax_forward::primitive*>(softmax_p.get()))}; *(static_cast<softmax_forward::primitive*>(softmax_p.get()))};
...@@ -120,6 +164,77 @@ class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel<T> { ...@@ -120,6 +164,77 @@ class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel<T> {
} }
}; };
template <typename T>
class SoftmaxMKLDNNGradKernel : public paddle::framework::OpKernel<T> {
public:
void Compute(const paddle::framework::ExecutionContext& ctx) const override {
PADDLE_ENFORCE(paddle::platform::is_cpu_place(ctx.GetPlace()),
"It must use CPUPlace.");
auto& dev_ctx = ctx.template device_context<MKLDNNDeviceContext>();
auto mkldnn_engine = dev_ctx.GetEngine();
const Tensor* output = ctx.Input<Tensor>("Out");
const T* dst_data = output->data<T>();
auto* dout = ctx.template Input<Tensor>(framework::GradVarName("Out"));
const auto* diff_dst_ptr = dout->template data<T>();
auto* dx =
ctx.template Output<framework::Tensor>(framework::GradVarName("X"));
T* diff_src_ptr = dx->template mutable_data<T>(ctx.GetPlace());
std::vector<int> dst_tz = paddle::framework::vectorize2int(output->dims());
std::vector<int> src_tz(dst_tz);
PADDLE_ENFORCE(output->dims().size() == 2UL,
"The input of softmax op must be a 2D matrix.");
// MKL-DNN does support softmax over selected axis. Having 2D Tensor,
// we will make normalization after final eg. axis: 1
PADDLE_ENFORCE(((src_tz[0] == dst_tz[0]) && (src_tz[1] == dst_tz[1])),
"Softmax input and output dimensions should match");
// Same memory descriptor to be used for input and output
memory::dims softmax_tz = {src_tz[0], src_tz[1]};
// Currently only supports NC data format
// retrieve eltwise primitive desc from device context
const std::string key =
platform::MKLDNNHandler::GetHash(softmax_tz, ctx.op().Input("Out"));
const std::string key_softmax_pd = key + "@softmax_pd";
auto softmax_pd =
std::static_pointer_cast<mkldnn::softmax_forward::primitive_desc>(
dev_ctx.GetBlob(key_softmax_pd));
PADDLE_ENFORCE(softmax_pd != nullptr,
"Fail to find softmax_pd in device context");
// TODO(jczaja): Add layouts support when there is a need to do so
// Two dimensional softmax does support NC format
auto data_softmax_md = MKLDNNMemDesc(
{softmax_tz}, platform::MKLDNNGetDataType<T>(), memory::format::nc);
auto diff_softmax_md = MKLDNNMemDesc(
{softmax_tz}, platform::MKLDNNGetDataType<T>(), memory::format::nc);
// Normalization is made after innermost dimension eg. C out of NC
auto softmax_bwd_desc =
softmax_backward::desc(diff_softmax_md, data_softmax_md, 1 /* dim: C*/);
auto softmax_bwd_pd =
std::make_shared<mkldnn::softmax_backward::primitive_desc>(
softmax_bwd_desc, mkldnn_engine, *softmax_pd);
SoftmaxMKLDNNHandler handler(softmax_pd, softmax_bwd_pd, dev_ctx,
mkldnn_engine, key);
auto dst_memory_p =
handler.AcquireDstMemory(data_softmax_md, to_void_cast<T>(dst_data));
auto diff_dst_memory_p = handler.AcquireDiffDstMemory(
diff_softmax_md, to_void_cast<T>(diff_dst_ptr));
auto diff_src_memory_p = handler.AcquireDiffSrcMemory(
diff_softmax_md, to_void_cast<T>(diff_src_ptr));
// Get primitve from device context
auto softmax_bwd_p = handler.AcquireSoftmaxBackward(
dst_memory_p, diff_dst_memory_p, diff_src_memory_p);
std::vector<primitive> pipeline{*softmax_bwd_p};
stream(stream::kind::eager).submit(pipeline).wait();
}
};
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
...@@ -127,3 +242,5 @@ namespace ops = paddle::operators; ...@@ -127,3 +242,5 @@ namespace ops = paddle::operators;
REGISTER_OP_KERNEL(softmax, MKLDNN, ::paddle::platform::CPUPlace, REGISTER_OP_KERNEL(softmax, MKLDNN, ::paddle::platform::CPUPlace,
ops::SoftmaxMKLDNNKernel<float>); ops::SoftmaxMKLDNNKernel<float>);
REGISTER_OP_KERNEL(softmax_grad, MKLDNN, ::paddle::platform::CPUPlace,
ops::SoftmaxMKLDNNGradKernel<float>);
...@@ -145,16 +145,30 @@ class SoftmaxOpGrad : public framework::OperatorWithKernel { ...@@ -145,16 +145,30 @@ class SoftmaxOpGrad : public framework::OperatorWithKernel {
const framework::ExecutionContext& ctx) const override { const framework::ExecutionContext& ctx) const override {
// choose cudnn kernel if the runtime supported. // choose cudnn kernel if the runtime supported.
framework::LibraryType library_{framework::LibraryType::kPlain}; framework::LibraryType library_{framework::LibraryType::kPlain};
std::string data_format = ctx.Attr<std::string>("data_format");
framework::DataLayout layout_ = framework::StringToDataLayout(data_format);
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
if (platform::CanCUDNNBeUsed(ctx)) { if (platform::CanCUDNNBeUsed(ctx)) {
library_ = framework::LibraryType::kCUDNN; library_ = framework::LibraryType::kCUDNN;
} }
#endif #endif
std::string data_format = ctx.Attr<std::string>("data_format"); #ifdef PADDLE_WITH_MKLDNN
return framework::OpKernelType( if (library_ == framework::LibraryType::kPlain &&
framework::ToDataType(ctx.Input<Tensor>("X")->type()), ctx.GetPlace(), platform::CanMKLDNNBeUsed(ctx)) {
framework::StringToDataLayout(data_format), library_); library_ = framework::LibraryType::kMKLDNN;
layout_ = framework::DataLayout::kMKLDNN;
}
#endif
auto input_data_type =
framework::ToDataType(ctx.Input<Tensor>("X")->type());
if (input_data_type == framework::proto::VarType::FP16) {
PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
"float16 can only be used on GPU place");
}
return framework::OpKernelType(input_data_type, ctx.GetPlace(), layout_,
library_);
} }
}; };
......
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
/*Licensed under the Apache License, Version 2.0(the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "mkldnn.hpp"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/fluid/operators/sum_op.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
namespace paddle {
namespace operators {
using paddle::framework::Tensor;
using paddle::platform::MKLDNNDeviceContext;
using paddle::platform::CPUDeviceContext;
using framework::DataLayout;
using mkldnn::memory;
using mkldnn::primitive;
using mkldnn::stream;
using mkldnn::sum;
using mkldnn::reorder;
using platform::to_void_cast;
template <typename T>
class SumMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
public:
void Compute(const paddle::framework::ExecutionContext& ctx) const override {
PADDLE_ENFORCE(paddle::platform::is_cpu_place(ctx.GetPlace()),
"It must use CPUPlace.");
auto& dev_ctx = ctx.template device_context<MKLDNNDeviceContext>();
const auto& mkldnn_engine = dev_ctx.GetEngine();
auto in_vars = ctx.MultiInputVar("X");
const int N = in_vars.size();
auto out_var = ctx.OutputVar("Out");
bool in_place = out_var == in_vars[0];
if (out_var->IsType<framework::LoDTensor>()) {
LoDTensor* output = ctx.Output<LoDTensor>("Out");
T* output_data = output->mutable_data<T>(ctx.GetPlace());
std::vector<int> dst_tz = framework::vectorize2int(output->dims());
auto src_tz = dst_tz;
memory::format output_format{memory::format::format_undef};
std::vector<float> scales;
std::vector<memory::primitive_desc> srcs_mpd;
std::vector<mkldnn::memory> srcs_mem;
PADDLE_ENFORCE(in_vars[0]->IsType<LoDTensor>(),
"Input[0] must be LoDTensors");
auto& input0 = in_vars[0]->Get<LoDTensor>();
PADDLE_ENFORCE(input0.layout() == DataLayout::kMKLDNN &&
input0.format() != memory::format::format_undef,
"Wrong layout/format for inputs[0]");
memory::format input_format = input0.format();
if (src_tz.size() == 1 && (input_format == memory::format::nchw ||
input_format == memory::format::nhwc)) {
input_format = memory::format::x;
}
if (src_tz.size() == 2 && (input_format == memory::format::nchw ||
input_format == memory::format::nhwc)) {
input_format = memory::format::nc;
}
for (int i = in_place ? 1 : 0; i < N; i++) {
PADDLE_ENFORCE(in_vars[i]->IsType<LoDTensor>(),
"all inputs must be all LoDTensors");
auto& input = in_vars[i]->Get<LoDTensor>();
PADDLE_ENFORCE(input.layout() == DataLayout::kMKLDNN &&
input.format() != memory::format::format_undef,
"Wrong layout/format for inputs");
if (input.numel() == 0) {
continue;
}
const T* input_data = input.data<T>();
auto src_md =
memory::desc(src_tz, memory::data_type::f32, input_format);
auto src_mpd = memory::primitive_desc(src_md, mkldnn_engine);
auto src_mem = memory(src_mpd, to_void_cast(input_data));
srcs_mpd.push_back(src_mpd);
srcs_mem.push_back(src_mem);
scales.push_back(1.0);
}
auto dst_md =
memory::desc(dst_tz, memory::data_type::f32, memory::format::any);
auto sum_pd = sum::primitive_desc(dst_md, scales, srcs_mpd);
std::shared_ptr<memory> dst_mem;
if (in_place) {
dst_mem.reset(new memory(sum_pd.dst_primitive_desc()));
} else {
dst_mem.reset(new memory(sum_pd.dst_primitive_desc(), output_data));
}
std::vector<mkldnn::primitive::at> inputs;
for (size_t i = 0; i < srcs_mem.size(); ++i) {
inputs.push_back(srcs_mem[i]);
}
auto sum_prim = mkldnn::sum(sum_pd, inputs, *dst_mem);
output_format = (memory::format)platform::GetMKLDNNFormat(sum_pd);
primitive reorder_prim;
std::shared_ptr<memory> target_mem;
if (in_place) {
output_format = input_format;
target_mem.reset(new memory(
{{{src_tz}, memory::data_type::f32, output_format}, mkldnn_engine},
output_data));
reorder_prim = reorder(*dst_mem, *target_mem);
}
std::vector<primitive> pipeline;
pipeline.push_back(sum_prim);
if (in_place) pipeline.push_back(reorder_prim);
stream(stream::kind::eager).submit(pipeline).wait();
output->set_layout(DataLayout::kMKLDNN);
output->set_format(output_format);
} else if (out_var->IsType<framework::SelectedRows>()) {
// TODO(@mozga-intel) Add MKLDNN SelectedRows support
std::unique_ptr<framework::SelectedRows> in0;
if (in_place) {
// If is in_place, we store the input[0] to in0
auto& in_sel0 = in_vars[0]->Get<SelectedRows>();
auto& rows = in_sel0.rows();
in0.reset(new framework::SelectedRows(rows, in_sel0.height()));
in0->mutable_value()->ShareDataWith(in_sel0.value());
}
auto get_selected_row = [&](size_t i) -> const SelectedRows& {
if (i == 0 && in0) {
return *in0.get();
} else {
return in_vars[i]->Get<SelectedRows>();
}
};
auto* out = ctx.Output<SelectedRows>("Out");
out->mutable_rows()->clear();
auto* out_value = out->mutable_value();
// Runtime InferShape
size_t first_dim = 0;
for (int i = 0; i < N; i++) {
auto& sel_row = get_selected_row(i);
first_dim += sel_row.rows().size();
}
auto in_dim =
framework::vectorize(get_selected_row(N - 1).value().dims());
in_dim[0] = static_cast<int64_t>(first_dim);
out_value->Resize(framework::make_ddim(in_dim));
// if all the input sparse vars are empty, no need to
// merge these vars.
if (first_dim == 0UL) {
return;
}
out_value->mutable_data<T>(ctx.GetPlace());
math::SelectedRowsAddTo<CPUDeviceContext, T> functor;
int64_t offset = 0;
for (int i = 0; i < N; i++) {
auto& sel_row = get_selected_row(i);
if (sel_row.rows().size() == 0) {
continue;
}
PADDLE_ENFORCE_EQ(out->height(), sel_row.height());
functor(ctx.template device_context<CPUDeviceContext>(), sel_row,
offset, out);
offset += sel_row.value().numel();
}
} else if (out_var->IsType<framework::LoDTensorArray>()) {
// TODO(@mozga-intel) Add MKLDNN LoDTensorArray support
auto& out_array = *out_var->GetMutable<framework::LoDTensorArray>();
for (size_t i = in_place ? 1 : 0; i < in_vars.size(); ++i) {
PADDLE_ENFORCE(in_vars[i]->IsType<framework::LoDTensorArray>(),
"Only support all inputs are TensorArray");
auto& in_array = in_vars[i]->Get<framework::LoDTensorArray>();
for (size_t i = 0; i < in_array.size(); ++i) {
if (in_array[i].numel() != 0) {
if (i >= out_array.size()) {
out_array.resize(i + 1);
}
if (out_array[i].numel() == 0) {
framework::TensorCopy(in_array[i], in_array[i].place(),
ctx.device_context(), &out_array[i]);
out_array[i].set_lod(in_array[i].lod());
} else {
PADDLE_ENFORCE(out_array[i].lod() == in_array[i].lod());
auto in = EigenVector<T>::Flatten(in_array[i]);
auto result = EigenVector<T>::Flatten(out_array[i]);
result.device(*ctx.template device_context<MKLDNNDeviceContext>()
.eigen_device()) = result + in;
}
}
}
}
} else {
PADDLE_THROW("Unexpected branch, output variable type is %s",
out_var->Type().name());
}
}
};
} // namespace operators
} // namespace paddle
REGISTER_OP_KERNEL(sum, MKLDNN, ::paddle::platform::CPUPlace,
paddle::operators::SumMKLDNNOpKernel<float>);
...@@ -18,6 +18,10 @@ limitations under the License. */ ...@@ -18,6 +18,10 @@ limitations under the License. */
#include "paddle/fluid/framework/var_type_inference.h" #include "paddle/fluid/framework/var_type_inference.h"
#include "paddle/fluid/operators/detail/safe_ref.h" #include "paddle/fluid/operators/detail/safe_ref.h"
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h"
#endif
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using framework::Tensor; using framework::Tensor;
...@@ -63,6 +67,18 @@ class SumOp : public framework::OperatorWithKernel { ...@@ -63,6 +67,18 @@ class SumOp : public framework::OperatorWithKernel {
framework::OpKernelType GetExpectedKernelType( framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override { const framework::ExecutionContext& ctx) const override {
auto x_vars = ctx.MultiInputVar("X"); auto x_vars = ctx.MultiInputVar("X");
framework::LibraryType library{framework::LibraryType::kPlain};
framework::DataLayout layout{framework::DataLayout::kAnyLayout};
#ifdef PADDLE_WITH_MKLDNN
if (library == framework::LibraryType::kPlain &&
platform::CanMKLDNNBeUsed(ctx)) {
library = framework::LibraryType::kMKLDNN;
layout = framework::DataLayout::kMKLDNN;
}
#endif
if (x_vars[0]->IsType<framework::LoDTensor>()) { if (x_vars[0]->IsType<framework::LoDTensor>()) {
int dtype = -1; int dtype = -1;
for (auto& x_var : x_vars) { for (auto& x_var : x_vars) {
...@@ -80,26 +96,27 @@ class SumOp : public framework::OperatorWithKernel { ...@@ -80,26 +96,27 @@ class SumOp : public framework::OperatorWithKernel {
"Sum operator should have at least one tensor"); "Sum operator should have at least one tensor");
return framework::OpKernelType( return framework::OpKernelType(
static_cast<framework::proto::VarType::Type>(dtype), static_cast<framework::proto::VarType::Type>(dtype), ctx.GetPlace(),
ctx.device_context()); layout, library);
} else if (x_vars[0]->IsType<framework::SelectedRows>()) { } else if (x_vars[0]->IsType<framework::SelectedRows>()) {
for (auto& var : x_vars) { for (auto& var : x_vars) {
auto& value = var->Get<framework::SelectedRows>().value(); auto& value = var->Get<framework::SelectedRows>().value();
if (value.IsInitialized()) { if (value.IsInitialized()) {
return framework::OpKernelType(framework::ToDataType(value.type()), return framework::OpKernelType(framework::ToDataType(value.type()),
ctx.device_context()); ctx.device_context(), layout, library);
} }
} }
// if input sparse vars are not initialized, use an default kernel type. // if input sparse vars are not initialized, use an default kernel type.
return framework::OpKernelType(framework::proto::VarType::FP32, return framework::OpKernelType(framework::proto::VarType::FP32,
ctx.device_context()); ctx.device_context(), layout, library);
} else if (x_vars[0]->IsType<framework::LoDTensorArray>()) { } else if (x_vars[0]->IsType<framework::LoDTensorArray>()) {
for (auto& x_var : x_vars) { for (auto& x_var : x_vars) {
auto& array = x_var->Get<framework::LoDTensorArray>(); auto& array = x_var->Get<framework::LoDTensorArray>();
for (auto& each : array) { for (auto& each : array) {
if (each.numel() != 0) { if (each.numel() != 0) {
return framework::OpKernelType(framework::ToDataType(each.type()), return framework::OpKernelType(framework::ToDataType(each.type()),
ctx.device_context()); ctx.device_context(), layout,
library);
} }
} }
} }
...@@ -116,6 +133,9 @@ class SumOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -116,6 +133,9 @@ class SumOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("X", "(vector<Tensor>) The input tensors of sum operator.") AddInput("X", "(vector<Tensor>) The input tensors of sum operator.")
.AsDuplicable(); .AsDuplicable();
AddOutput("Out", "(Tensor) The output tensor of sum operator.").Reuse("X"); AddOutput("Out", "(Tensor) The output tensor of sum operator.").Reuse("X");
AddAttr<bool>("use_mkldnn",
"(bool, default false) Only used in mkldnn kernel")
.SetDefault(false);
AddComment(R"DOC( AddComment(R"DOC(
Sum operator. Sum operator.
...@@ -132,7 +152,6 @@ class SumOpVarTypeInference : public framework::VarTypeInference { ...@@ -132,7 +152,6 @@ class SumOpVarTypeInference : public framework::VarTypeInference {
framework::BlockDesc* block) const override { framework::BlockDesc* block) const override {
auto& inputs = op_desc.Input("X"); auto& inputs = op_desc.Input("X");
auto var_type = framework::proto::VarType::SELECTED_ROWS; auto var_type = framework::proto::VarType::SELECTED_ROWS;
for (auto& name : op_desc.Input("X")) { for (auto& name : op_desc.Input("X")) {
VLOG(10) << name << " " VLOG(10) << name << " "
<< block->FindRecursiveOrCreateVar(name).GetType(); << block->FindRecursiveOrCreateVar(name).GetType();
...@@ -206,6 +225,7 @@ namespace ops = paddle::operators; ...@@ -206,6 +225,7 @@ namespace ops = paddle::operators;
REGISTER_OPERATOR(sum, ops::SumOp, ops::SumOpMaker, ops::SumGradMaker, REGISTER_OPERATOR(sum, ops::SumOp, ops::SumOpMaker, ops::SumGradMaker,
ops::SumOpVarTypeInference); ops::SumOpVarTypeInference);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
sum, ops::SumKernel<paddle::platform::CPUDeviceContext, float>, sum, ops::SumKernel<paddle::platform::CPUDeviceContext, float>,
ops::SumKernel<paddle::platform::CPUDeviceContext, double>, ops::SumKernel<paddle::platform::CPUDeviceContext, double>,
......
...@@ -203,11 +203,11 @@ class WhileGradOp : public framework::OperatorBase { ...@@ -203,11 +203,11 @@ class WhileGradOp : public framework::OperatorBase {
->set_lod(inside_tensor.lod()); ->set_lod(inside_tensor.lod());
} }
} }
auto new_inside_name = cur_scope.Rename(inside_grad_name); auto new_inside_name = cur_scope.Rename(inside_grad_name);
auto sum_op = framework::OpRegistry::CreateOp( auto sum_op = framework::OpRegistry::CreateOp(
"sum", {{"X", {pg_names[param_id], new_inside_name}}}, "sum", {{"X", {pg_names[param_id], new_inside_name}}},
{{"Out", {pg_names[param_id]}}}, framework::AttributeMap{}); {{"Out", {pg_names[param_id]}}},
framework::AttributeMap{{"use_mkldnn", {false}}});
sum_op->Run(cur_scope, dev_place); sum_op->Run(cur_scope, dev_place);
cur_scope.Rename(new_inside_name, inside_grad_name); cur_scope.Rename(new_inside_name, inside_grad_name);
} }
......
...@@ -106,14 +106,6 @@ class CUDADeviceContext : public DeviceContext { ...@@ -106,14 +106,6 @@ class CUDADeviceContext : public DeviceContext {
PADDLE_ENFORCE(cudaEventRecord(ev, stream_)); PADDLE_ENFORCE(cudaEventRecord(ev, stream_));
} }
// FIXME(zcd): A temporary fix for some language model that has sparse
// parameter.
template <typename Callback>
void RecordEventNoMutex(cudaEvent_t ev, Callback callback) {
callback();
PADDLE_ENFORCE(cudaEventRecord(ev, stream_));
}
private: private:
CUDAPlace place_; CUDAPlace place_;
......
cc_library(dynamic_loader SRCS dynamic_loader.cc DEPS glog gflags enforce) cc_library(dynamic_loader SRCS dynamic_loader.cc DEPS glog gflags enforce)
list(APPEND CUDA_SRCS cublas.cc cudnn.cc curand.cc nccl.cc) list(APPEND CUDA_SRCS cublas.cc cudnn.cc curand.cc)
# There is no macOS version of NCCL.
if (NOT APPLE)
list(APPEND CUDA_SRCS nccl.cc)
endif()
if (TENSORRT_FOUND) if (TENSORRT_FOUND)
list(APPEND CUDA_SRCS tensorrt.cc) list(APPEND CUDA_SRCS tensorrt.cc)
endif() endif()
configure_file(cupti_lib_path.h.in ${CMAKE_CURRENT_BINARY_DIR}/cupti_lib_path.h) configure_file(cupti_lib_path.h.in ${CMAKE_CURRENT_BINARY_DIR}/cupti_lib_path.h)
if (CUPTI_FOUND) if (CUPTI_FOUND)
list(APPEND CUDA_SRCS cupti.cc) list(APPEND CUDA_SRCS cupti.cc)
......
...@@ -44,8 +44,10 @@ limitations under the License. */ ...@@ -44,8 +44,10 @@ limitations under the License. */
#include "paddle/fluid/platform/dynload/cublas.h" #include "paddle/fluid/platform/dynload/cublas.h"
#include "paddle/fluid/platform/dynload/cudnn.h" #include "paddle/fluid/platform/dynload/cudnn.h"
#include "paddle/fluid/platform/dynload/curand.h" #include "paddle/fluid/platform/dynload/curand.h"
#ifndef __APPLE__
#include "paddle/fluid/platform/dynload/nccl.h" #include "paddle/fluid/platform/dynload/nccl.h"
#endif #endif // __APPLE__
#endif // PADDLE_WITH_CUDA
namespace paddle { namespace paddle {
namespace platform { namespace platform {
...@@ -174,6 +176,7 @@ inline typename std::enable_if<sizeof...(Args) != 0, void>::type throw_on_error( ...@@ -174,6 +176,7 @@ inline typename std::enable_if<sizeof...(Args) != 0, void>::type throw_on_error(
throw std::runtime_error(err + string::Sprintf(args...)); throw std::runtime_error(err + string::Sprintf(args...));
} }
#ifndef __APPLE__
template <typename... Args> template <typename... Args>
inline typename std::enable_if<sizeof...(Args) != 0, void>::type throw_on_error( inline typename std::enable_if<sizeof...(Args) != 0, void>::type throw_on_error(
ncclResult_t stat, const Args&... args) { ncclResult_t stat, const Args&... args) {
...@@ -184,7 +187,7 @@ inline typename std::enable_if<sizeof...(Args) != 0, void>::type throw_on_error( ...@@ -184,7 +187,7 @@ inline typename std::enable_if<sizeof...(Args) != 0, void>::type throw_on_error(
string::Sprintf(args...)); string::Sprintf(args...));
} }
} }
#endif // __APPLE__
#endif // PADDLE_WITH_CUDA #endif // PADDLE_WITH_CUDA
template <typename T> template <typename T>
......
...@@ -99,5 +99,143 @@ inline mkldnn::memory::format GetMKLDNNFormat(const mkldnn::memory memory) { ...@@ -99,5 +99,143 @@ inline mkldnn::memory::format GetMKLDNNFormat(const mkldnn::memory memory) {
memory.get_primitive_desc().desc().data.format); memory.get_primitive_desc().desc().data.format);
} }
inline mkldnn::memory::format GetMKLDNNFormat(
const mkldnn::sum::primitive_desc& memory) {
return static_cast<mkldnn::memory::format>(
memory.dst_primitive_desc().desc().data.format);
}
class MKLDNNHandler {
public:
MKLDNNHandler(const MKLDNNDeviceContext& dev_ctx, mkldnn::engine engine,
const std::string& base_key)
: dev_ctx_(dev_ctx),
engine_(engine),
key_(base_key),
is_reusing_(false) {}
std::shared_ptr<mkldnn::memory> AcquireSrcMemory(
const mkldnn::memory::desc& md, void* ptr) {
return this->AcquireMemory(md, ptr, "@user_src_mem_p");
}
std::shared_ptr<mkldnn::memory> AcquireWeightsMemory(
const mkldnn::memory::desc& md, void* ptr) {
return this->AcquireMemory(md, ptr, "@user_weights_mem_p");
}
std::shared_ptr<mkldnn::memory> AcquireDstMemory(
const mkldnn::memory::desc& md, void* ptr) {
return this->AcquireMemory(md, ptr, "@user_dst_mem_p");
}
std::shared_ptr<mkldnn::memory> AcquireDiffDstMemory(
const mkldnn::memory::desc& md, void* ptr) {
return this->AcquireMemory(md, ptr, "@user_diff_dst_mem_p");
}
std::shared_ptr<mkldnn::memory> AcquireDiffSrcMemory(
const mkldnn::memory::desc& md, void* ptr) {
return this->AcquireMemory(md, ptr, "@user_diff_src_mem_p");
}
std::shared_ptr<mkldnn::memory> AcquireMemoryFromPrimitive(
mkldnn::memory::primitive_desc mdp, void* ptr,
const std::string& suffix) {
auto local_key = key_ + suffix;
auto mem_p =
std::static_pointer_cast<mkldnn::memory>(dev_ctx_.GetBlob(local_key));
PADDLE_ENFORCE((mem_p != nullptr) || (is_reusing_ == false),
"Fail to find mem primitive in device context");
if (mem_p == nullptr) {
mem_p = std::make_shared<mkldnn::memory>(mdp, ptr);
dev_ctx_.SetBlob(local_key, mem_p);
} else {
mem_p->set_data_handle(ptr);
// Mark that reusing happenned. All primitives from operator instance
// should be reused or none of them. So we check consistency
is_reusing_ = true;
}
return mem_p;
}
std::shared_ptr<mkldnn::memory> AcquireMemory(const mkldnn::memory::desc& md,
void* ptr,
const std::string& suffix) {
/*Generate key*/
auto local_key = key_ + suffix;
auto mem_p =
std::static_pointer_cast<mkldnn::memory>(dev_ctx_.GetBlob(local_key));
PADDLE_ENFORCE((mem_p != nullptr) || (is_reusing_ == false),
"Fail to find mem primitive in device context");
if (mem_p == nullptr) {
mem_p = std::make_shared<mkldnn::memory>(
mkldnn::memory::primitive_desc{md, engine_}, ptr);
dev_ctx_.SetBlob(local_key, mem_p);
} else {
mem_p->set_data_handle(ptr);
// Mark that reusing happenned. All primitives from operator instance
// should be reused or none of them. So we check consistency
is_reusing_ = true;
}
return mem_p;
}
std::shared_ptr<mkldnn::memory> AcquireMemory(
mkldnn::memory::primitive_desc& mpd,
mkldnn::memory::primitive_desc& user_mpd,
const std::shared_ptr<mkldnn::memory> user_memory_p,
const std::string& suffix, std::vector<mkldnn::primitive>& pipeline) {
// create reorder primitive if the input format is not the preferred one
auto local_key = key_ + suffix;
auto key_reorder_p = key_ + suffix + "reorder_p";
auto target_memory_p =
std::static_pointer_cast<mkldnn::memory>(dev_ctx_.GetBlob(local_key));
PADDLE_ENFORCE((target_memory_p != nullptr) || (is_reusing_ == false),
"Fail to find mem primitive in device context");
if (target_memory_p == nullptr) {
target_memory_p = user_memory_p;
std::shared_ptr<mkldnn::primitive> reorder_p;
if (mpd != user_mpd) {
target_memory_p = std::make_shared<mkldnn::memory>(mpd);
auto reorder_p =
std::make_shared<mkldnn::reorder>(*user_memory_p, *target_memory_p);
dev_ctx_.SetBlob(key_reorder_p, reorder_p);
pipeline.push_back(*reorder_p);
}
dev_ctx_.SetBlob(local_key, target_memory_p);
} else {
// Make reorder if needed
auto reorder_p = std::static_pointer_cast<mkldnn::reorder>(
dev_ctx_.GetBlob(key_reorder_p));
if (reorder_p != nullptr) {
pipeline.push_back(*reorder_p);
}
is_reusing_ = true;
}
return target_memory_p;
}
static std::string GetHash(mkldnn::memory::dims& operand_dims,
const std::string& suffix) {
auto dims2str = [](const mkldnn::memory::dims& operand_dims) {
std::string dstr = "";
for (size_t i = 0; i < operand_dims.size(); ++i) {
dstr += std::to_string(operand_dims[i]) + "-";
}
return dstr;
};
return dims2str(operand_dims) + suffix;
};
protected:
const MKLDNNDeviceContext& dev_ctx_;
mkldnn::engine engine_;
std::string key_;
bool is_reusing_;
};
} // namespace platform } // namespace platform
} // namespace paddle } // namespace paddle
...@@ -268,7 +268,8 @@ void BindOpDesc(pybind11::module *m) { ...@@ -268,7 +268,8 @@ void BindOpDesc(pybind11::module *m) {
.value("STRINGS", pd::proto::AttrType::STRINGS) .value("STRINGS", pd::proto::AttrType::STRINGS)
.value("BOOL", pd::proto::AttrType::BOOLEAN) .value("BOOL", pd::proto::AttrType::BOOLEAN)
.value("BOOLS", pd::proto::AttrType::BOOLEANS) .value("BOOLS", pd::proto::AttrType::BOOLEANS)
.value("BLOCK", pd::proto::AttrType::BLOCK); .value("BLOCK", pd::proto::AttrType::BLOCK)
.value("BLOCKS", pd::proto::AttrType::BLOCKS);
pybind11::class_<pd::OpDesc> op_desc(*m, "OpDesc", ""); pybind11::class_<pd::OpDesc> op_desc(*m, "OpDesc", "");
op_desc op_desc
...@@ -293,6 +294,7 @@ void BindOpDesc(pybind11::module *m) { ...@@ -293,6 +294,7 @@ void BindOpDesc(pybind11::module *m) {
.def("set_attr", &pd::OpDesc::SetAttr) .def("set_attr", &pd::OpDesc::SetAttr)
.def("attr", &pd::OpDesc::GetAttr) .def("attr", &pd::OpDesc::GetAttr)
.def("set_block_attr", &pd::OpDesc::SetBlockAttr) .def("set_block_attr", &pd::OpDesc::SetBlockAttr)
.def("set_blocks_attr", &pd::OpDesc::SetBlocksAttr)
.def("set_serialized_attr", .def("set_serialized_attr",
[](pd::OpDesc &self, const std::string &name, [](pd::OpDesc &self, const std::string &name,
const pybind11::bytes &seriralized) { const pybind11::bytes &seriralized) {
......
...@@ -167,9 +167,6 @@ PYBIND11_PLUGIN(core) { ...@@ -167,9 +167,6 @@ PYBIND11_PLUGIN(core) {
.def("set_lod", .def("set_lod",
[](LoDTensor &self, const std::vector<std::vector<size_t>> &lod) { [](LoDTensor &self, const std::vector<std::vector<size_t>> &lod) {
// the input lod is offset-based level-of-detail info // the input lod is offset-based level-of-detail info
LOG(WARNING)
<< "set_lod is deprecated and will be removed by 9.2018, "
"please switch to set_recursive_sequence_lengths.";
LoD new_lod; LoD new_lod;
new_lod.reserve(lod.size()); new_lod.reserve(lod.size());
std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod)); std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod));
...@@ -196,8 +193,6 @@ PYBIND11_PLUGIN(core) { ...@@ -196,8 +193,6 @@ PYBIND11_PLUGIN(core) {
.def("lod", .def("lod",
[](LoDTensor &self) -> std::vector<std::vector<size_t>> { [](LoDTensor &self) -> std::vector<std::vector<size_t>> {
// output the offset-based lod info // output the offset-based lod info
LOG(WARNING) << "lod is deprecated and will be removed by 9.2018, "
"please switch to recursive_sequence_lengths.";
LoD lod = self.lod(); LoD lod = self.lod();
std::vector<std::vector<size_t>> new_lod; std::vector<std::vector<size_t>> new_lod;
new_lod.reserve(lod.size()); new_lod.reserve(lod.size());
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
function print_usage() { function print_usage() {
echo -e "\n${RED}Usage${NONE}: echo -e "\n${RED}Usage${NONE}:
${BOLD}${SCRIPT_NAME}${NONE} [OPTION]" ${BOLD}${SCRIPT_NAME}${NONE} [OPTION]"
echo -e "\n${RED}Options${NONE}: echo -e "\n${RED}Options${NONE}:
${BLUE}build${NONE}: run build for x86 platform ${BLUE}build${NONE}: run build for x86 platform
${BLUE}build_android${NONE}: run build for android platform ${BLUE}build_android${NONE}: run build for android platform
...@@ -133,7 +133,7 @@ EOF ...@@ -133,7 +133,7 @@ EOF
-DWITH_FLUID_ONLY=${WITH_FLUID_ONLY:-OFF} \ -DWITH_FLUID_ONLY=${WITH_FLUID_ONLY:-OFF} \
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \
-DWITH_CONTRIB=${WITH_CONTRIB:-ON} \ -DWITH_CONTRIB=${WITH_CONTRIB:-ON} \
-DWITH_ANAKIN=ON -DWITH_ANAKIN=${WITH_ANAKIN:-ON}
} }
function abort(){ function abort(){
...@@ -198,7 +198,7 @@ function build_android() { ...@@ -198,7 +198,7 @@ function build_android() {
fi fi
ANDROID_STANDALONE_TOOLCHAIN=$ANDROID_TOOLCHAINS_DIR/$ANDROID_ARCH-android-$ANDROID_API ANDROID_STANDALONE_TOOLCHAIN=$ANDROID_TOOLCHAINS_DIR/$ANDROID_ARCH-android-$ANDROID_API
cat <<EOF cat <<EOF
============================================ ============================================
Generating the standalone toolchain ... Generating the standalone toolchain ...
...@@ -212,13 +212,13 @@ EOF ...@@ -212,13 +212,13 @@ EOF
--arch=$ANDROID_ARCH \ --arch=$ANDROID_ARCH \
--platform=android-$ANDROID_API \ --platform=android-$ANDROID_API \
--install-dir=$ANDROID_STANDALONE_TOOLCHAIN --install-dir=$ANDROID_STANDALONE_TOOLCHAIN
BUILD_ROOT=${PADDLE_ROOT}/build_android BUILD_ROOT=${PADDLE_ROOT}/build_android
DEST_ROOT=${PADDLE_ROOT}/install_android DEST_ROOT=${PADDLE_ROOT}/install_android
mkdir -p $BUILD_ROOT mkdir -p $BUILD_ROOT
cd $BUILD_ROOT cd $BUILD_ROOT
if [ $ANDROID_ABI == "armeabi-v7a" ]; then if [ $ANDROID_ABI == "armeabi-v7a" ]; then
cmake -DCMAKE_SYSTEM_NAME=Android \ cmake -DCMAKE_SYSTEM_NAME=Android \
-DANDROID_STANDALONE_TOOLCHAIN=$ANDROID_STANDALONE_TOOLCHAIN \ -DANDROID_STANDALONE_TOOLCHAIN=$ANDROID_STANDALONE_TOOLCHAIN \
...@@ -286,7 +286,7 @@ function build_ios() { ...@@ -286,7 +286,7 @@ function build_ios() {
-DWITH_TESTING=OFF \ -DWITH_TESTING=OFF \
-DWITH_SWIG_PY=OFF \ -DWITH_SWIG_PY=OFF \
-DCMAKE_BUILD_TYPE=Release -DCMAKE_BUILD_TYPE=Release
make -j 2 make -j 2
} }
...@@ -331,14 +331,14 @@ EOF ...@@ -331,14 +331,14 @@ EOF
function bind_test() { function bind_test() {
# the number of process to run tests # the number of process to run tests
NUM_PROC=6 NUM_PROC=6
# calculate and set the memory usage for each process # calculate and set the memory usage for each process
MEM_USAGE=$(printf "%.2f" `echo "scale=5; 1.0 / $NUM_PROC" | bc`) MEM_USAGE=$(printf "%.2f" `echo "scale=5; 1.0 / $NUM_PROC" | bc`)
export FLAGS_fraction_of_gpu_memory_to_use=$MEM_USAGE export FLAGS_fraction_of_gpu_memory_to_use=$MEM_USAGE
# get the CUDA device count # get the CUDA device count
CUDA_DEVICE_COUNT=$(nvidia-smi -L | wc -l) CUDA_DEVICE_COUNT=$(nvidia-smi -L | wc -l)
for (( i = 0; i < $NUM_PROC; i++ )); do for (( i = 0; i < $NUM_PROC; i++ )); do
cuda_list=() cuda_list=()
for (( j = 0; j < $CUDA_DEVICE_COUNT; j++ )); do for (( j = 0; j < $CUDA_DEVICE_COUNT; j++ )); do
......
...@@ -132,9 +132,9 @@ def _addup_repetitive_outputs_(op_descs): ...@@ -132,9 +132,9 @@ def _addup_repetitive_outputs_(op_descs):
for idx, op_desc in enumerate(op_descs): for idx, op_desc in enumerate(op_descs):
for var_name in op_desc.input_arg_names(): for var_name in op_desc.input_arg_names():
if len(renamed_vars[var_name]) > 1: if len(renamed_vars[var_name]) > 1:
pending_sum_ops.append( pending_sum_ops.append((_create_op_desc_(
(_create_op_desc_("sum", {"X": renamed_vars[var_name]}, "sum", {"X": renamed_vars[var_name]}, {"Out": [var_name]},
{"Out": [var_name]}, {}), idx)) {"use_mkldnn": False}), idx))
renamed_vars[var_name] = [var_name] renamed_vars[var_name] = [var_name]
for var_name in op_desc.output_arg_names(): for var_name in op_desc.output_arg_names():
if var_name == core.empty_var_name( if var_name == core.empty_var_name(
...@@ -161,8 +161,9 @@ def _addup_repetitive_outputs_(op_descs): ...@@ -161,8 +161,9 @@ def _addup_repetitive_outputs_(op_descs):
renamed_vars[var_name].append(new_name) renamed_vars[var_name].append(new_name)
for var_name, inputs in renamed_vars.iteritems(): for var_name, inputs in renamed_vars.iteritems():
if len(inputs) > 1: if len(inputs) > 1:
pending_sum_ops.append((_create_op_desc_( pending_sum_ops.append(
"sum", {"X": inputs}, {"Out": [var_name]}, {}), len(op_descs))) (_create_op_desc_("sum", {"X": inputs}, {"Out": [var_name]},
{"use_mkldnn": False}), len(op_descs)))
# sum_op descs are sorted according to their insert position # sum_op descs are sorted according to their insert position
for p in reversed(pending_sum_ops): for p in reversed(pending_sum_ops):
op_descs.insert(p[1], p[0]) op_descs.insert(p[1], p[0])
......
...@@ -78,6 +78,8 @@ def as_numpy(tensor): ...@@ -78,6 +78,8 @@ def as_numpy(tensor):
Returns: Returns:
numpy.ndarray numpy.ndarray
""" """
if isinstance(tensor, core.LoDTensorArray):
return [as_numpy(t) for t in tensor]
if isinstance(tensor, list): if isinstance(tensor, list):
return [as_numpy(t) for t in tensor] return [as_numpy(t) for t in tensor]
assert isinstance(tensor, core.LoDTensor) assert isinstance(tensor, core.LoDTensor)
......
...@@ -559,15 +559,9 @@ class Operator(object): ...@@ -559,15 +559,9 @@ class Operator(object):
if (attr_name not in self.attrs) or ( if (attr_name not in self.attrs) or (
self.attrs[attr_name] is None): self.attrs[attr_name] is None):
continue continue
if isinstance(self.attrs[attr_name], Block): attr_val = self.attrs[attr_name]
self.desc.set_block_attr(attr_name, self._update_desc_attr(attr_name, attr_val)
self.attrs[attr_name].desc)
elif isinstance(self.attrs[attr_name], core.BlockDesc) or \
isinstance(self.attrs[attr_name], core.ProgramDesc):
self.desc.set_serialized_attr(
attr_name, self.attrs[attr_name].serialize_to_string())
else:
self.desc.set_attr(attr_name, self.attrs[attr_name])
self.desc.check_attrs() self.desc.check_attrs()
if self.has_kernel(type): if self.has_kernel(type):
self.desc.infer_var_type(self.block.desc) self.desc.infer_var_type(self.block.desc)
...@@ -714,8 +708,24 @@ class Operator(object): ...@@ -714,8 +708,24 @@ class Operator(object):
ValueError: If the type of value doesn't match with desc.attr_type(name). ValueError: If the type of value doesn't match with desc.attr_type(name).
""" """
self.attrs[name] = val self.attrs[name] = val
self._update_desc_attr(name, val)
def _update_desc_attr(self, name, val):
"""
Update the value of desc's attribute by attribute's name.
Args:
name(str): the attribute name.
val(bool|int|str|float|list): the value of the attribute.
Raises:
ValueError: If the type of value doesn't match with desc.attr_type(name).
"""
if isinstance(val, Block): if isinstance(val, Block):
self.desc.set_block_attr(name, val.desc) self.desc.set_block_attr(name, val.desc)
elif isinstance(val, list) and val and all(
isinstance(v, Block) for v in val):
self.desc.set_blocks_attr(name, [v.desc for v in val])
elif isinstance(val, core.BlockDesc) or \ elif isinstance(val, core.BlockDesc) or \
isinstance(val, core.ProgramDesc): isinstance(val, core.ProgramDesc):
self.desc.set_serialized_attr(name, val.serialize_to_string()) self.desc.set_serialized_attr(name, val.serialize_to_string())
...@@ -1388,7 +1398,11 @@ class Program(object): ...@@ -1388,7 +1398,11 @@ class Program(object):
* Set for_test to True when we want to clone the program for testing. * Set for_test to True when we want to clone the program for testing.
Notes: This API DOES NOT prune any operator. Use Notes: This API DOES NOT prune any operator. Use
:code:`clone(for_test=True)` before backward and optimization please. :code:`clone(for_test=True)` before backward and optimization please. e.g.
>>> test_program = fluid.default_main_program().clone(for_test=True)
>>> optimizer = fluid.optimizer.Momentum(learning_rate=0.01, momentum=0.9)
>>> optimizer.minimize()
Args: Args:
for_test(bool): True if change the :code:`is_test` attribute of for_test(bool): True if change the :code:`is_test` attribute of
......
...@@ -110,7 +110,7 @@ class BlockGuardServ(BlockGuard): ...@@ -110,7 +110,7 @@ class BlockGuardServ(BlockGuard):
class ListenAndServ(object): class ListenAndServ(object):
""" """
**ListenAndServ Layer** **ListenAndServ Layer**
ListenAndServ is used to create a rpc server bind and listen ListenAndServ is used to create a rpc server bind and listen
on specific TCP port, this server will run the sub-block when on specific TCP port, this server will run the sub-block when
received variables from clients. received variables from clients.
...@@ -186,7 +186,6 @@ class ListenAndServ(object): ...@@ -186,7 +186,6 @@ class ListenAndServ(object):
main_program = self.helper.main_program main_program = self.helper.main_program
current_block = main_program.current_block() current_block = main_program.current_block()
parent_block = self.parent_block() parent_block = self.parent_block()
empty_block = Program().global_block()
parent_block.append_op( parent_block.append_op(
type='listen_and_serv', type='listen_and_serv',
...@@ -195,8 +194,9 @@ class ListenAndServ(object): ...@@ -195,8 +194,9 @@ class ListenAndServ(object):
attrs={ attrs={
'endpoint': self.endpoint, 'endpoint': self.endpoint,
'Fanin': self.fan_in, 'Fanin': self.fan_in,
'OptimizeBlock': current_block, 'optimize_blocks': [
'PrefetchBlock': empty_block, current_block
], # did not support multiple optimize blocks in layers
'sync_mode': True, # did not support async now in layers 'sync_mode': True, # did not support async now in layers
'grad_to_block_id': [""] 'grad_to_block_id': [""]
}) })
...@@ -212,7 +212,7 @@ def Send(endpoints, send_vars, sync=True): ...@@ -212,7 +212,7 @@ def Send(endpoints, send_vars, sync=True):
of send_vars to send of send_vars to send
send_vars (list): variables to send to server send_vars (list): variables to send to server
sync (bool): whether to wait the request finish sync (bool): whether to wait the request finish
""" """
assert (type(send_vars) == list) assert (type(send_vars) == list)
...@@ -469,10 +469,13 @@ def open_files(filenames, ...@@ -469,10 +469,13 @@ def open_files(filenames,
lod_levels(list): List of ints which declaring data lod_level. lod_levels(list): List of ints which declaring data lod_level.
dtypes(list): List of strs which declaring data type. dtypes(list): List of strs which declaring data type.
thread_num(int): The maximal concurrent prefetch thread number. thread_num(int): The maximal concurrent prefetch thread number.
buffer_size(int): The size of prefetch buffer. buffer_size(int|None): The size of prefetch buffer. If it is setted None,
buffer size will be thread_num * 3.
Default: None
pass_num(int): Number of passes to run. pass_num(int): Number of passes to run.
for_parallel(Bool): Set it as True if you are going to run for_parallel(Bool): Set it as True if you are going to run
subsequent operators in parallel. subsequent operators in parallel.
Default: True
Returns: Returns:
Variable: A Reader Variable via which we can get file data. Variable: A Reader Variable via which we can get file data.
...@@ -492,7 +495,7 @@ def open_files(filenames, ...@@ -492,7 +495,7 @@ def open_files(filenames,
image, label = fluid.layers.io.read_file(reader) image, label = fluid.layers.io.read_file(reader)
""" """
if buffer_size is None: if buffer_size is None:
buffer_size = thread_num buffer_size = thread_num * 3
if isinstance(filenames, basestring): if isinstance(filenames, basestring):
filenames = [filenames] filenames = [filenames]
dtypes = [convert_np_dtype_to_dtype_(dt) for dt in dtypes] dtypes = [convert_np_dtype_to_dtype_(dt) for dt in dtypes]
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
""" """
All layers just related to the neural network. All layers just related to the neural network.
""" """
from ..layer_helper import LayerHelper from ..layer_helper import LayerHelper
...@@ -23,6 +23,7 @@ from layer_function_generator import autodoc, templatedoc ...@@ -23,6 +23,7 @@ from layer_function_generator import autodoc, templatedoc
from tensor import concat from tensor import concat
import utils import utils
import random import random
from .. import unique_name
__all__ = [ __all__ = [
'fc', 'fc',
...@@ -109,14 +110,14 @@ def fc(input, ...@@ -109,14 +110,14 @@ def fc(input,
""" """
**Fully Connected Layer** **Fully Connected Layer**
This function creates a fully connected layer in the network. It can take This function creates a fully connected layer in the network. It can take
multiple tensors as its inputs. It creates a variable called weights for multiple tensors as its inputs. It creates a variable called weights for
each input tensor, which represents a fully connected weight matrix from each input tensor, which represents a fully connected weight matrix from
each input unit to each output unit. The fully connected layer multiplies each input unit to each output unit. The fully connected layer multiplies
each input tensor with its coresponding weight to produce an output Tensor. each input tensor with its coresponding weight to produce an output Tensor.
If multiple input tensors are given, the results of multiple multiplications If multiple input tensors are given, the results of multiple multiplications
will be sumed up. If bias_attr is not None, a bias variable will be created will be sumed up. If bias_attr is not None, a bias variable will be created
and added to the output. Finally, if activation is not None, it will be applied and added to the output. Finally, if activation is not None, it will be applied
to the output as well. to the output as well.
This process can be formulated as follows: This process can be formulated as follows:
...@@ -198,7 +199,10 @@ def fc(input, ...@@ -198,7 +199,10 @@ def fc(input,
else: else:
pre_bias = helper.create_tmp_variable(dtype) pre_bias = helper.create_tmp_variable(dtype)
helper.append_op( helper.append_op(
type="sum", inputs={"X": mul_results}, outputs={"Out": pre_bias}) type="sum",
inputs={"X": mul_results},
outputs={"Out": pre_bias},
attrs={"use_mkldnn": use_mkldnn})
# add bias # add bias
pre_activation = helper.append_bias_op(pre_bias, dim_start=num_flatten_dims) pre_activation = helper.append_bias_op(pre_bias, dim_start=num_flatten_dims)
# add activation # add activation
...@@ -847,7 +851,7 @@ def crf_decoding(input, param_attr, label=None): ...@@ -847,7 +851,7 @@ def crf_decoding(input, param_attr, label=None):
Returns: Returns:
Variable: ${viterbi_path_comment} Variable: ${viterbi_path_comment}
Examples: Examples:
.. code-block:: python .. code-block:: python
...@@ -1085,7 +1089,7 @@ def chunk_eval(input, ...@@ -1085,7 +1089,7 @@ def chunk_eval(input,
Here is a NER example of labeling for these tagging schemes: Here is a NER example of labeling for these tagging schemes:
.. code-block:: python .. code-block:: python
====== ====== ====== ===== == ============ ===== ===== ===== == ========= ====== ====== ====== ===== == ============ ===== ===== ===== == =========
Li Ming works at Agricultural Bank of China in Beijing. Li Ming works at Agricultural Bank of China in Beijing.
====== ====== ====== ===== == ============ ===== ===== ===== == ========= ====== ====== ====== ===== == ============ ===== ===== ===== == =========
...@@ -1111,7 +1115,7 @@ def chunk_eval(input, ...@@ -1111,7 +1115,7 @@ def chunk_eval(input,
is the num of chunk types, and `tag_type` get its value from the following table. is the num of chunk types, and `tag_type` get its value from the following table.
.. code-block:: python .. code-block:: python
Scheme Begin Inside End Single Scheme Begin Inside End Single
plain 0 - - - plain 0 - - -
IOB 0 1 - - IOB 0 1 - -
...@@ -1147,7 +1151,7 @@ def chunk_eval(input, ...@@ -1147,7 +1151,7 @@ def chunk_eval(input,
tuple: tuple containing: precision, recall, f1_score, tuple: tuple containing: precision, recall, f1_score,
num_infer_chunks, num_label_chunks, num_infer_chunks, num_label_chunks,
num_correct_chunks num_correct_chunks
Examples: Examples:
.. code-block:: python .. code-block:: python
...@@ -1247,7 +1251,7 @@ def sequence_softmax(input, param_attr=None, bias_attr=None, use_cudnn=True): ...@@ -1247,7 +1251,7 @@ def sequence_softmax(input, param_attr=None, bias_attr=None, use_cudnn=True):
""" """
This function computes the softmax activation among all time-steps for each This function computes the softmax activation among all time-steps for each
sequence. The dimension of each time-step should be 1. Thus, the shape of sequence. The dimension of each time-step should be 1. Thus, the shape of
input Tensor can be either :math:`[N, 1]` or :math:`[N]`, where :math:`N` input Tensor can be either :math:`[N, 1]` or :math:`[N]`, where :math:`N`
is the sum of the length of all sequences. is the sum of the length of all sequences.
For i-th sequence in a mini-batch: For i-th sequence in a mini-batch:
...@@ -1267,7 +1271,7 @@ def sequence_softmax(input, param_attr=None, bias_attr=None, use_cudnn=True): ...@@ -1267,7 +1271,7 @@ def sequence_softmax(input, param_attr=None, bias_attr=None, use_cudnn=True):
param_attr (ParamAttr|None): attributes for parameter param_attr (ParamAttr|None): attributes for parameter
use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn \ use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn \
library is installed. Default: True library is installed. Default: True
Returns: Returns:
Variable: output of sequence_softmax Variable: output of sequence_softmax
...@@ -1828,11 +1832,11 @@ def pool2d(input, ...@@ -1828,11 +1832,11 @@ def pool2d(input,
${comment} ${comment}
Args: Args:
input (Variable): The input tensor of pooling operator. The format of input (Variable): The input tensor of pooling operator. The format of
input tensor is NCHW, where N is batch size, C is input tensor is NCHW, where N is batch size, C is
the number of channels, H is the height of the the number of channels, H is the height of the
feature, and W is the width of the feature. feature, and W is the width of the feature.
pool_size (int): The side length of pooling windows. All pooling pool_size (int): The side length of pooling windows. All pooling
windows are squares with pool_size on a side. windows are squares with pool_size on a side.
pool_type: ${pooling_type_comment} pool_type: ${pooling_type_comment}
pool_stride (int): stride of the pooling layer. pool_stride (int): stride of the pooling layer.
...@@ -1841,7 +1845,7 @@ def pool2d(input, ...@@ -1841,7 +1845,7 @@ def pool2d(input,
use_cudnn: ${use_cudnn_comment} use_cudnn: ${use_cudnn_comment}
ceil_mode: ${ceil_mode_comment} ceil_mode: ${ceil_mode_comment}
use_mkldnn: ${use_mkldnn_comment} use_mkldnn: ${use_mkldnn_comment}
name (str|None): A name for this layer(optional). If set None, the name (str|None): A name for this layer(optional). If set None, the
layer will be named automatically. layer will be named automatically.
Returns: Returns:
...@@ -1859,10 +1863,10 @@ def pool2d(input, ...@@ -1859,10 +1863,10 @@ def pool2d(input,
data = fluid.layers.data( data = fluid.layers.data(
name='data', shape=[3, 32, 32], dtype='float32') name='data', shape=[3, 32, 32], dtype='float32')
conv2d = fluid.layers.pool2d( conv2d = fluid.layers.pool2d(
input=data, input=data,
pool_size=2, pool_size=2,
pool_type='max', pool_type='max',
pool_stride=1, pool_stride=1,
global_pooling=False) global_pooling=False)
""" """
if pool_type not in ["max", "avg"]: if pool_type not in ["max", "avg"]:
...@@ -2227,14 +2231,14 @@ def beam_search_decode(ids, scores, name=None): ...@@ -2227,14 +2231,14 @@ def beam_search_decode(ids, scores, name=None):
This layers is to pack the output of beam search layer into sentences and This layers is to pack the output of beam search layer into sentences and
associated scores. It is usually called after the beam search layer. associated scores. It is usually called after the beam search layer.
Typically, the output of beam search layer is a tensor of selected ids, with Typically, the output of beam search layer is a tensor of selected ids, with
a tensor of the score of each id. Beam search layer's output ids, however, a tensor of the score of each id. Beam search layer's output ids, however,
are generated directly during the tree search, and they are stacked by each are generated directly during the tree search, and they are stacked by each
level of the search tree. Thus we need to reorganize them into sentences, level of the search tree. Thus we need to reorganize them into sentences,
based on the score of each id. This layer takes the output of beam search based on the score of each id. This layer takes the output of beam search
layer as input and repack them into sentences. layer as input and repack them into sentences.
Args: Args:
ids (Variable): The selected ids, output of beam search layer. ids (Variable): The selected ids, output of beam search layer.
scores (Variable): The associated scores of the ids, out put of beam scores (Variable): The associated scores of the ids, out put of beam
search layer. search layer.
name (str): The name of this layer. It is optional. name (str): The name of this layer. It is optional.
...@@ -2242,7 +2246,7 @@ def beam_search_decode(ids, scores, name=None): ...@@ -2242,7 +2246,7 @@ def beam_search_decode(ids, scores, name=None):
Returns: Returns:
tuple(Variable): a tuple of two output tensors: sentence_ids, sentence_scores. tuple(Variable): a tuple of two output tensors: sentence_ids, sentence_scores.
sentence_ids is a tensor with shape [size, length], where size is the sentence_ids is a tensor with shape [size, length], where size is the
beam size of beam search, and length is the length of each sentence. beam size of beam search, and length is the length of each sentence.
Note that the length of sentences may vary. Note that the length of sentences may vary.
sentence_scores is a tensor with the same shape as sentence_ids. sentence_scores is a tensor with the same shape as sentence_ids.
...@@ -2919,7 +2923,7 @@ def reduce_mean(input, dim=None, keep_dim=False, name=None): ...@@ -2919,7 +2923,7 @@ def reduce_mean(input, dim=None, keep_dim=False, name=None):
`None`, compute the mean over all elements of :attr:`input` `None`, compute the mean over all elements of :attr:`input`
and return a variable with a single element, otherwise it and return a variable with a single element, otherwise it
must be in the range :math:`[-rank(input), rank(input))`. If must be in the range :math:`[-rank(input), rank(input))`. If
:math:`dim[i] < 0`, the dimension to reduce is :math:`dim[i] < 0`, the dimension to reduce is
:math:`rank(input) + dim[i]`. :math:`rank(input) + dim[i]`.
keep_dim (bool): Whether to reserve the reduced dimension in the keep_dim (bool): Whether to reserve the reduced dimension in the
output Tensor. The result tensor will have one fewer dimension output Tensor. The result tensor will have one fewer dimension
...@@ -3390,16 +3394,16 @@ def topk(input, k, name=None): ...@@ -3390,16 +3394,16 @@ def topk(input, k, name=None):
Args: Args:
input(Variable): The input variable which can be a vector or Tensor with input(Variable): The input variable which can be a vector or Tensor with
higher rank. higher rank.
k(int): The number of top elements to look for along the last dimension k(int): The number of top elements to look for along the last dimension
of input. of input.
name(str|None): A name for this layer(optional). If set None, the layer name(str|None): A name for this layer(optional). If set None, the layer
will be named automatically. will be named automatically.
Default: None Default: None
Returns: Returns:
Tuple[Variable]: A tuple with two elements. Each element is a Variable. Tuple[Variable]: A tuple with two elements. Each element is a Variable.
The first one is k largest elements along each last The first one is k largest elements along each last
dimensional slice. The second one is indices of values dimensional slice. The second one is indices of values
within the last dimension of input. within the last dimension of input.
Raises: Raises:
...@@ -3594,15 +3598,15 @@ def warpctc(input, label, blank=0, norm_by_times=False): ...@@ -3594,15 +3598,15 @@ def warpctc(input, label, blank=0, norm_by_times=False):
It's shape is [Lp, num_classes + 1], where Lp is the sum of all input It's shape is [Lp, num_classes + 1], where Lp is the sum of all input
sequences' length and num_classes is the true number of classes. sequences' length and num_classes is the true number of classes.
(not including the blank label). (not including the blank label).
label (Variable): The ground truth of variable-length sequence, label (Variable): The ground truth of variable-length sequence,
which is a 2-D Tensor with LoD information. It is of the shape [Lg, 1], which is a 2-D Tensor with LoD information. It is of the shape [Lg, 1],
where Lg is th sum of all labels' length. where Lg is th sum of all labels' length.
blank (int, default 0): The blank label index of Connectionist blank (int, default 0): The blank label index of Connectionist
Temporal Classification (CTC) loss, which is in the Temporal Classification (CTC) loss, which is in the
half-opened interval [0, num_classes + 1). half-opened interval [0, num_classes + 1).
norm_by_times(bool, default false): Whether to normalize the gradients norm_by_times(bool, default false): Whether to normalize the gradients
by the number of time-step, which is also the sequence's length. by the number of time-step, which is also the sequence's length.
There is no need to normalize the gradients if warpctc layer was There is no need to normalize the gradients if warpctc layer was
follewed by a mean_op. follewed by a mean_op.
Returns: Returns:
...@@ -3708,8 +3712,8 @@ def nce(input, ...@@ -3708,8 +3712,8 @@ def nce(input,
input (Variable): input variable. input (Variable): input variable.
label (Variable): label. label (Variable): label.
num_total_classes (int):${num_total_classes_comment} num_total_classes (int):${num_total_classes_comment}
sample_weight (Variable|None): A Variable of shape [batch_size, 1] sample_weight (Variable|None): A Variable of shape [batch_size, 1]
storing a weight for each sample. The default weight for each storing a weight for each sample. The default weight for each
sample is 1.0. sample is 1.0.
param_attr (ParamAttr|None): attributes for parameter param_attr (ParamAttr|None): attributes for parameter
bias_attr (ParamAttr|None): attributes for bias bias_attr (ParamAttr|None): attributes for bias
...@@ -4099,7 +4103,7 @@ def smooth_l1(x, y, inside_weight=None, outside_weight=None, sigma=None): ...@@ -4099,7 +4103,7 @@ def smooth_l1(x, y, inside_weight=None, outside_weight=None, sigma=None):
This layer computes the smooth L1 loss for Variable :attr:`x` and :attr:`y`. This layer computes the smooth L1 loss for Variable :attr:`x` and :attr:`y`.
It takes the first dimension of :attr:`x` and :attr:`y` as batch size. It takes the first dimension of :attr:`x` and :attr:`y` as batch size.
For each instance, it computes the smooth L1 loss element by element first For each instance, it computes the smooth L1 loss element by element first
and then sums all the losses. So the shape of ouput Variable is and then sums all the losses. So the shape of ouput Variable is
[batch_size, 1]. [batch_size, 1].
Args: Args:
...@@ -4108,14 +4112,14 @@ def smooth_l1(x, y, inside_weight=None, outside_weight=None, sigma=None): ...@@ -4108,14 +4112,14 @@ def smooth_l1(x, y, inside_weight=None, outside_weight=None, sigma=None):
y (Variable): A tensor with rank at least 2. The target value of smooth y (Variable): A tensor with rank at least 2. The target value of smooth
L1 loss op with same shape as :attr:`x`. L1 loss op with same shape as :attr:`x`.
inside_weight (Variable|None): A tensor with rank at least 2. This inside_weight (Variable|None): A tensor with rank at least 2. This
input is optional and should have same shape with :attr:`x`. If input is optional and should have same shape with :attr:`x`. If
provided, the result of (:attr:`x` - :attr:`y`) will be multiplied provided, the result of (:attr:`x` - :attr:`y`) will be multiplied
by this tensor element by element. by this tensor element by element.
outside_weight (Variable|None): A tensor with rank at least 2. This outside_weight (Variable|None): A tensor with rank at least 2. This
input is optional and should have same shape with :attr:`x`. If input is optional and should have same shape with :attr:`x`. If
provided, the out smooth L1 loss will be multiplied by this tensor provided, the out smooth L1 loss will be multiplied by this tensor
element by element. element by element.
sigma (float|None): Hyper parameter of smooth L1 loss layer. A float sigma (float|None): Hyper parameter of smooth L1 loss layer. A float
scalar with default value 1.0. scalar with default value 1.0.
Returns: Returns:
...@@ -4161,7 +4165,7 @@ def one_hot(input, depth): ...@@ -4161,7 +4165,7 @@ def one_hot(input, depth):
Examples: Examples:
.. code-block:: python .. code-block:: python
label = layers.data(name="label", shape=[1], dtype="float32") label = layers.data(name="label", shape=[1], dtype="float32")
one_hot_label = layers.one_hot(input=label, depth=10) one_hot_label = layers.one_hot(input=label, depth=10)
""" """
...@@ -4263,14 +4267,18 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=True, name=None): ...@@ -4263,14 +4267,18 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=True, name=None):
say :attr:`actual_shape` has a higher priority say :attr:`actual_shape` has a higher priority
than :attr:`shape`. than :attr:`shape`.
act (str): The non-linear activation to be applied to output variable. act (str): The non-linear activation to be applied to output variable.
inplace(bool): If this flag is set true, a new output tensor is created inplace(bool): If this flag is set true, the output
whose data is copied from input x, otherwise the output shares data with input without copying, otherwise
shares data with input without copying. a new output tensor is created
whose data is copied from input x.
name (str): The name of this layer. It is optional. name (str): The name of this layer. It is optional.
Returns: Returns:
Variable: The output tensor. Variable: The output tensor.
Raises:
TypeError: if actual_shape is neither Variable nor None.
Examples: Examples:
.. code-block:: python .. code-block:: python
...@@ -4282,6 +4290,11 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=True, name=None): ...@@ -4282,6 +4290,11 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=True, name=None):
if not (isinstance(shape, list) or isinstance(shape, tuple)): if not (isinstance(shape, list) or isinstance(shape, tuple)):
raise ValueError("Input shape must be a python lsit or tuple.") raise ValueError("Input shape must be a python lsit or tuple.")
inputs = {"X": x}
if isinstance(actual_shape, Variable):
inputs["Shape"] = actual_shape
elif actual_shape is not None:
raise TypeError("actual_shape should either be Variable or None")
# Validate the shape # Validate the shape
unk_dim_idx = -1 unk_dim_idx = -1
...@@ -4302,9 +4315,7 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=True, name=None): ...@@ -4302,9 +4315,7 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=True, name=None):
reshaped = helper.create_tmp_variable(dtype=x.dtype) reshaped = helper.create_tmp_variable(dtype=x.dtype)
helper.append_op( helper.append_op(
type="reshape", type="reshape",
inputs={"X": x, inputs=inputs,
"Shape": actual_shape}
if isinstance(actual_shape, Variable) else {"X": x},
attrs={"shape": shape, attrs={"shape": shape,
"inplace": inplace}, "inplace": inplace},
outputs={"Out": reshaped}) outputs={"Out": reshaped})
...@@ -4315,10 +4326,10 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=True, name=None): ...@@ -4315,10 +4326,10 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=True, name=None):
def lod_reset(x, y=None, target_lod=None): def lod_reset(x, y=None, target_lod=None):
""" """
Set LoD of :attr:`x` to a new one specified by :attr:`y` or Set LoD of :attr:`x` to a new one specified by :attr:`y` or
:attr:`target_lod`. When :attr:`y` provided, :attr:`y.lod` would be :attr:`target_lod`. When :attr:`y` provided, :attr:`y.lod` would be
considered as target LoD first, otherwise :attr:`y.data` would be considered as target LoD first, otherwise :attr:`y.data` would be
considered as target LoD. If :attr:`y` is not provided, target LoD should considered as target LoD. If :attr:`y` is not provided, target LoD should
be specified by :attr:`target_lod`. If target LoD is specified by be specified by :attr:`target_lod`. If target LoD is specified by
:attr:`Y.data` or :attr:`target_lod`, only one level LoD is supported. :attr:`Y.data` or :attr:`target_lod`, only one level LoD is supported.
.. code-block:: text .. code-block:: text
...@@ -4372,7 +4383,7 @@ def lod_reset(x, y=None, target_lod=None): ...@@ -4372,7 +4383,7 @@ def lod_reset(x, y=None, target_lod=None):
Args: Args:
x (Variable): Input variable which could be a Tensor or LodTensor. x (Variable): Input variable which could be a Tensor or LodTensor.
y (Variable|None): If provided, output's LoD would be derived y (Variable|None): If provided, output's LoD would be derived
from :attr:`y`. from :attr:`y`.
target_lod (list|tuple|None): One level LoD which should be considered target_lod (list|tuple|None): One level LoD which should be considered
as target LoD when :attr:`y` not provided. as target LoD when :attr:`y` not provided.
...@@ -4688,7 +4699,7 @@ def image_resize(input, ...@@ -4688,7 +4699,7 @@ def image_resize(input,
""" """
**Resize a Batch of Images** **Resize a Batch of Images**
The input must be a tensor of the shape (num_batches, channels, in_h, in_w), The input must be a tensor of the shape (num_batches, channels, in_h, in_w),
and the resizing only applies on the last two dimensions(hight and width). and the resizing only applies on the last two dimensions(hight and width).
Supporting resample methods: Supporting resample methods:
...@@ -4784,9 +4795,9 @@ def resize_bilinear(input, out_shape=None, scale=None, name=None): ...@@ -4784,9 +4795,9 @@ def resize_bilinear(input, out_shape=None, scale=None, name=None):
def image_resize_short(input, out_short_len, resample='BILINEAR'): def image_resize_short(input, out_short_len, resample='BILINEAR'):
""" """
Resize a batch of images. The short edge of input images will be Resize a batch of images. The short edge of input images will be
resized to the given 'out_short_len'. The long edge of input images resized to the given 'out_short_len'. The long edge of input images
will be resized proportionately to make images' length-width ratio will be resized proportionately to make images' length-width ratio
constant. constant.
Args: Args:
...@@ -4819,7 +4830,7 @@ def gather(input, index): ...@@ -4819,7 +4830,7 @@ def gather(input, index):
""" """
**Gather Layer** **Gather Layer**
Output is obtained by gathering entries of the outer-most dimension Output is obtained by gathering entries of the outer-most dimension
of X indexed by `index` and concatenate them together. of X indexed by `index` and concatenate them together.
.. math:: .. math::
...@@ -4844,7 +4855,7 @@ def gather(input, index): ...@@ -4844,7 +4855,7 @@ def gather(input, index):
[5, 6]] [5, 6]]
Args: Args:
input (Variable): The source input with rank>=1. input (Variable): The source input with rank>=1.
index (Variable): The index input with rank=1. index (Variable): The index input with rank=1.
Returns: Returns:
...@@ -4880,40 +4891,32 @@ def random_crop(x, shape, seed=None): ...@@ -4880,40 +4891,32 @@ def random_crop(x, shape, seed=None):
Returns: Returns:
${out_comment} ${out_comment}
Examples: Examples:
>>> img = fluid.layers.data("img", [3, 256, 256]) >>> img = fluid.layers.data("img", [3, 256, 256])
>>> cropped_img = fluid.layers.random_crop(img, shape=[3, 224, 224]) >>> cropped_img = fluid.layers.random_crop(img, shape=[3, 224, 224])
""" """
helper = LayerHelper("random_crop", **locals()) helper = LayerHelper("random_crop", **locals())
dtype = helper.input_dtype() dtype = x.dtype
out = helper.create_tmp_variable(dtype) out = helper.create_tmp_variable(dtype)
if seed is None: if seed is None:
seed = random.randint(-65536, 65535) seed = random.randint(-65536, 65535)
op_attrs = {"shape": shape}
if isinstance(seed, int): if isinstance(seed, int):
seed_value = seed op_attrs["startup_seed"] = seed
seed = helper.create_tmp_variable(dtype="int64") seed = helper.create_variable(
helper.append_op( name=unique_name.generate("random_crop_seed"),
type="fill_constant", dtype="int64",
inputs={}, persistable=True)
outputs={"Out": seed},
attrs={
"dtype": seed.dtype,
"shape": [1],
"value": float(seed_value),
"force_cpu": True
})
elif not isinstance(seed, Variable): elif not isinstance(seed, Variable):
raise ValueError("'seed' must be a Variable or an int.") raise ValueError("'seed' must be a Variable or an int.")
seed_out = helper.create_tmp_variable(dtype="int64")
helper.append_op( helper.append_op(
type="random_crop", type="random_crop",
inputs={"X": x, inputs={"X": x,
"Seed": seed}, "Seed": seed},
outputs={"Out": out, outputs={"Out": out,
"SeedOut": seed_out}, "SeedOut": seed},
attrs={"shape": shape}) attrs=op_attrs)
return out return out
...@@ -4926,7 +4929,7 @@ def log(x): ...@@ -4926,7 +4929,7 @@ def log(x):
Out = \\ln(x) Out = \\ln(x)
Args: Args:
x (Variable): Input tensor. x (Variable): Input tensor.
Returns: Returns:
Variable: The natural log of the input tensor computed element-wise. Variable: The natural log of the input tensor computed element-wise.
...@@ -4955,7 +4958,7 @@ def relu(x): ...@@ -4955,7 +4958,7 @@ def relu(x):
Out = \\max(0, x) Out = \\max(0, x)
Args: Args:
x (Variable): The input tensor. x (Variable): The input tensor.
Returns: Returns:
Variable: The output tensor with the same shape as input. Variable: The output tensor with the same shape as input.
...@@ -4976,15 +4979,15 @@ def relu(x): ...@@ -4976,15 +4979,15 @@ def relu(x):
def mean_iou(input, label, num_classes): def mean_iou(input, label, num_classes):
""" """
Mean Intersection-Over-Union is a common evaluation metric for Mean Intersection-Over-Union is a common evaluation metric for
semantic image segmentation, which first computes the IOU for each semantic image segmentation, which first computes the IOU for each
semantic class and then computes the average over classes. semantic class and then computes the average over classes.
IOU is defined as follows: IOU is defined as follows:
.. math:: .. math::
IOU = \\frac{true\_positiv}{(true\_positive + false\_positive + false\_negative)}. IOU = \\frac{true\_positiv}{(true\_positive + false\_positive + false\_negative)}.
The predictions are accumulated in a confusion matrix and mean-IOU The predictions are accumulated in a confusion matrix and mean-IOU
is then calculated from it. is then calculated from it.
...@@ -4997,12 +5000,12 @@ def mean_iou(input, label, num_classes): ...@@ -4997,12 +5000,12 @@ def mean_iou(input, label, num_classes):
Returns: Returns:
mean_iou (Variable): A Tensor representing the mean intersection-over-union with shape [1]. mean_iou (Variable): A Tensor representing the mean intersection-over-union with shape [1].
out_wrong(Variable): A Tensor with shape [num_classes]. The wrong numbers of each class. out_wrong(Variable): A Tensor with shape [num_classes]. The wrong numbers of each class.
out_correct(Variable): A Tensor with shape [num_classes]. The correct numbers of each class. out_correct(Variable): A Tensor with shape [num_classes]. The correct numbers of each class.
Examples: Examples:
.. code-block:: python .. code-block:: python
iou, wrongs, corrects = fluid.layers.mean_iou(predict, label, num_classes) iou, wrongs, corrects = fluid.layers.mean_iou(predict, label, num_classes)
""" """
helper = LayerHelper('mean_iou', **locals()) helper = LayerHelper('mean_iou', **locals())
......
...@@ -155,7 +155,7 @@ def cast(x, dtype): ...@@ -155,7 +155,7 @@ def cast(x, dtype):
Examples: Examples:
.. code-block:: python .. code-block:: python
data = fluid.layers.data(name='x', shape=[13], dtype='float32') data = fluid.layers.data(name='x', shape=[13], dtype='float32')
result = fluid.layers.cast(x=data, dtype='float64') result = fluid.layers.cast(x=data, dtype='float64')
""" """
...@@ -188,7 +188,7 @@ def concat(input, axis=0, name=None): ...@@ -188,7 +188,7 @@ def concat(input, axis=0, name=None):
Examples: Examples:
.. code-block:: python .. code-block:: python
out = fluid.layers.concat(input=[Efirst, Esecond, Ethird, Efourth]) out = fluid.layers.concat(input=[Efirst, Esecond, Ethird, Efourth])
""" """
helper = LayerHelper('concat', **locals()) helper = LayerHelper('concat', **locals())
...@@ -230,11 +230,15 @@ def sums(input, out=None): ...@@ -230,11 +230,15 @@ def sums(input, out=None):
helper = LayerHelper('sum', **locals()) helper = LayerHelper('sum', **locals())
if out is None: if out is None:
out = helper.create_tmp_variable(dtype=helper.input_dtype()) out = helper.create_tmp_variable(dtype=helper.input_dtype())
helper.append_op(type='sum', inputs={'X': input}, outputs={'Out': out}) helper.append_op(
type='sum',
inputs={'X': input},
outputs={'Out': out},
attrs={'use_mkldnn': False})
return out return out
def assign(input, output): def assign(input, output=None):
""" """
**Assign** **Assign**
...@@ -242,7 +246,7 @@ def assign(input, output): ...@@ -242,7 +246,7 @@ def assign(input, output):
Args: Args:
input(Variable|numpy.ndarray): The source variable input(Variable|numpy.ndarray): The source variable
output(Variable): The destination variable output(Variable|None): The destination variable
Returns: Returns:
Variable: The destination variable that was supplied as the *output*. Variable: The destination variable that was supplied as the *output*.
...@@ -255,6 +259,8 @@ def assign(input, output): ...@@ -255,6 +259,8 @@ def assign(input, output):
fluid.layers.assign(hidden, out) fluid.layers.assign(hidden, out)
""" """
helper = LayerHelper('assign', **locals()) helper = LayerHelper('assign', **locals())
if output is None:
output = helper.create_tmp_variable(dtype=input.dtype)
if isinstance(input, Variable): if isinstance(input, Variable):
helper.append_op( helper.append_op(
type='assign', inputs={'X': [input]}, outputs={'Out': [output]}) type='assign', inputs={'X': [input]}, outputs={'Out': [output]})
...@@ -380,7 +386,7 @@ def argmin(x, axis=0): ...@@ -380,7 +386,7 @@ def argmin(x, axis=0):
""" """
**argmin** **argmin**
This function computes the indices of the min elements This function computes the indices of the min elements
of the input tensor's element along the provided axis. of the input tensor's element along the provided axis.
Args: Args:
...@@ -395,7 +401,7 @@ def argmin(x, axis=0): ...@@ -395,7 +401,7 @@ def argmin(x, axis=0):
.. code-block:: python .. code-block:: python
out = fluid.layers.argmin(x=in, axis=0) out = fluid.layers.argmin(x=in, axis=0)
out = fluid.layers.argmin(x=in, axis=-1) out = fluid.layers.argmin(x=in, axis=-1)
""" """
helper = LayerHelper("arg_min", **locals()) helper = LayerHelper("arg_min", **locals())
out = helper.create_tmp_variable(VarDesc.VarType.INT64) out = helper.create_tmp_variable(VarDesc.VarType.INT64)
...@@ -411,7 +417,7 @@ def argmax(x, axis=0): ...@@ -411,7 +417,7 @@ def argmax(x, axis=0):
""" """
**argmax** **argmax**
This function computes the indices of the max elements This function computes the indices of the max elements
of the input tensor's element along the provided axis. of the input tensor's element along the provided axis.
Args: Args:
...@@ -426,7 +432,7 @@ def argmax(x, axis=0): ...@@ -426,7 +432,7 @@ def argmax(x, axis=0):
.. code-block:: python .. code-block:: python
out = fluid.layers.argmax(x=in, axis=0) out = fluid.layers.argmax(x=in, axis=0)
out = fluid.layers.argmax(x=in, axis=-1) out = fluid.layers.argmax(x=in, axis=-1)
""" """
helper = LayerHelper("arg_max", **locals()) helper = LayerHelper("arg_max", **locals())
out = helper.create_tmp_variable(VarDesc.VarType.INT64) out = helper.create_tmp_variable(VarDesc.VarType.INT64)
...@@ -495,9 +501,9 @@ def reverse(x, axis): ...@@ -495,9 +501,9 @@ def reverse(x, axis):
Args: Args:
x(Vairbale): the input to be reversed. x(Vairbale): the input to be reversed.
axis(int|tuple|list): Axis that along which order of elements axis(int|tuple|list): Axis that along which order of elements
is reversed. If it is a tuple or a list, reversing is reversed. If it is a tuple or a list, reversing
will be apply on each axis in the tuple or list. will be apply on each axis in the tuple or list.
Returns: Returns:
Variable: The reversed tensor. Variable: The reversed tensor.
...@@ -528,9 +534,9 @@ def save(x, file_path, overwrite=True): ...@@ -528,9 +534,9 @@ def save(x, file_path, overwrite=True):
Args: Args:
x(variable): The Tensor/LoDTensor to be saved. x(variable): The Tensor/LoDTensor to be saved.
file_path(str): The file path where the variable will be saved. file_path(str): The file path where the variable will be saved.
overwrite(bool): Whether or not cover the given file when it has already overwrite(bool): Whether or not cover the given file when it has already
existed. If it's set 'False' and the file is existed, a runtime existed. If it's set 'False' and the file is existed, a runtime
error will be thrown. error will be thrown.
""" """
helper = LayerHelper("save", **locals()) helper = LayerHelper("save", **locals())
helper.append_op( helper.append_op(
...@@ -550,8 +556,8 @@ def save_combine(x, file_path, overwrite=True): ...@@ -550,8 +556,8 @@ def save_combine(x, file_path, overwrite=True):
a single file. a single file.
file_path(str): The file path where variables will be saved. file_path(str): The file path where variables will be saved.
overwrite(bool): Whether or not cover the given file when it has already overwrite(bool): Whether or not cover the given file when it has already
existed. If it's set 'False' and the file is existed, a runtime existed. If it's set 'False' and the file is existed, a runtime
error will be thrown. error will be thrown.
Returns: Returns:
There is no return value. There is no return value.
......
...@@ -596,12 +596,12 @@ class Auc(MetricBase): ...@@ -596,12 +596,12 @@ class Auc(MetricBase):
tp, fn, tn, fp = 0, 0, 0, 0 tp, fn, tn, fp = 0, 0, 0, 0
for i, lbl in enumerate(labels): for i, lbl in enumerate(labels):
if lbl: if lbl:
if predictions[i, 1] >= thresh: if preds[i, 1] >= thresh:
tp += 1 tp += 1
else: else:
fn += 1 fn += 1
else: else:
if predictions[i, 1] >= thresh: if preds[i, 1] >= thresh:
fp += 1 fp += 1
else: else:
tn += 1 tn += 1
......
...@@ -160,7 +160,7 @@ class ParallelExecutor(object): ...@@ -160,7 +160,7 @@ class ParallelExecutor(object):
build_strategy, num_trainers, trainer_id) build_strategy, num_trainers, trainer_id)
self.scope = scope self.scope = scope
def run(self, fetch_list, feed=None, feed_dict=None): def run(self, fetch_list, feed=None, feed_dict=None, return_numpy=False):
""" """
Run a parallel executor with fetch_list. Run a parallel executor with fetch_list.
...@@ -196,6 +196,8 @@ class ParallelExecutor(object): ...@@ -196,6 +196,8 @@ class ParallelExecutor(object):
to each device. Default None. to each device. Default None.
feed_dict: Alias for feed parameter, for backward compatibility. feed_dict: Alias for feed parameter, for backward compatibility.
This parameter has been deprecated. Default None. This parameter has been deprecated. Default None.
return_numpy(bool): Whether converts the fetched tensor to numpy.
Default: False.
Returns: Returns:
List: The fetched result list. List: The fetched result list.
...@@ -270,6 +272,9 @@ class ParallelExecutor(object): ...@@ -270,6 +272,9 @@ class ParallelExecutor(object):
if self.is_dist: if self.is_dist:
self.bcast_params() self.bcast_params()
if return_numpy:
return executor.as_numpy(arr)
return [arr[i] for i in range(len(arr))] return [arr[i] for i in range(len(arr))]
def bcast_params(self): def bcast_params(self):
......
...@@ -15,7 +15,7 @@ if(NOT WITH_DISTRIBUTE) ...@@ -15,7 +15,7 @@ if(NOT WITH_DISTRIBUTE)
endif(NOT WITH_DISTRIBUTE) endif(NOT WITH_DISTRIBUTE)
list(REMOVE_ITEM TEST_OPS test_seq_concat_op) # FIXME(helin): https://github.com/PaddlePaddle/Paddle/issues/8290 list(REMOVE_ITEM TEST_OPS test_seq_concat_op) # FIXME(helin): https://github.com/PaddlePaddle/Paddle/issues/8290
list(REMOVE_ITEM TEST_OPS test_modified_huber_loss_op) # FIXME(qijun) https://github.com/PaddlePaddle/Paddle/issues/5184 list(REMOVE_ITEM TEST_OPS test_modified_huber_loss_op) # FIXME(qijun) https://github.com/PaddlePaddle/Paddle/issues/5184
list(REMOVE_ITEM TEST_OPS test_lstm_unit_op) # # FIXME(qijun) https://github.com/PaddlePaddle/Paddle/issues/5185 list(REMOVE_ITEM TEST_OPS test_lstm_unit_op) # # FIXME(qijun) https://github.com/PaddlePaddle/Paddle/issues/5185
list(REMOVE_ITEM TEST_OPS test_nce) # FIXME(qijun) https://github.com/PaddlePaddle/Paddle/issues/7778 list(REMOVE_ITEM TEST_OPS test_nce) # FIXME(qijun) https://github.com/PaddlePaddle/Paddle/issues/7778
list(REMOVE_ITEM TEST_OPS test_recurrent_op) # FIXME(qijun) https://github.com/PaddlePaddle/Paddle/issues/6152 list(REMOVE_ITEM TEST_OPS test_recurrent_op) # FIXME(qijun) https://github.com/PaddlePaddle/Paddle/issues/6152
...@@ -43,8 +43,6 @@ list(REMOVE_ITEM TEST_OPS test_warpctc_op) ...@@ -43,8 +43,6 @@ list(REMOVE_ITEM TEST_OPS test_warpctc_op)
list(REMOVE_ITEM TEST_OPS test_dist_train) list(REMOVE_ITEM TEST_OPS test_dist_train)
list(REMOVE_ITEM TEST_OPS test_parallel_executor_crf) list(REMOVE_ITEM TEST_OPS test_parallel_executor_crf)
list(REMOVE_ITEM TEST_OPS test_parallel_executor_fetch_feed) list(REMOVE_ITEM TEST_OPS test_parallel_executor_fetch_feed)
# TODO(wuyi): this test hungs on CI, will add it back later
list(REMOVE_ITEM TEST_OPS test_listen_and_serv_op)
foreach(TEST_OP ${TEST_OPS}) foreach(TEST_OP ${TEST_OPS})
py_test_modules(${TEST_OP} MODULES ${TEST_OP}) py_test_modules(${TEST_OP} MODULES ${TEST_OP})
endforeach(TEST_OP) endforeach(TEST_OP)
...@@ -52,3 +50,4 @@ py_test_modules(test_warpctc_op MODULES test_warpctc_op ENVS FLAGS_warpctc_dir=$ ...@@ -52,3 +50,4 @@ py_test_modules(test_warpctc_op MODULES test_warpctc_op ENVS FLAGS_warpctc_dir=$
py_test_modules(test_dist_train MODULES test_dist_train SERIAL) py_test_modules(test_dist_train MODULES test_dist_train SERIAL)
py_test_modules(test_parallel_executor_crf MODULES test_parallel_executor_crf SERIAL) py_test_modules(test_parallel_executor_crf MODULES test_parallel_executor_crf SERIAL)
py_test_modules(test_parallel_executor_fetch_feed MODULES test_parallel_executor_fetch_feed SERIAL) py_test_modules(test_parallel_executor_fetch_feed MODULES test_parallel_executor_fetch_feed SERIAL)
set_tests_properties(test_listen_and_serv_op PROPERTIES TIMEOUT 20)
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import numpy as np
import paddle.fluid.core as core
from op_test import OpTest
from test_elementwise_add_op import *
'''
Some tests differ from the tests defined in test_elementwise_add_op.py
because MKLDNN does not support tensors of number of dimensions 3.
Such dimensions cause exceptions in MKLDNN reorder primitive.
'''
class TestMKLDNNElementwiseAddOp(TestElementwiseAddOp):
def init_input_output(self):
self.x = np.random.uniform(0.1, 1, [2, 3, 4, 5]).astype(self.dtype)
self.y = np.random.uniform(0.1, 1, [2, 3, 4, 5]).astype(self.dtype)
self.out = np.add(self.x, self.y)
def init_kernel_type(self):
self.use_mkldnn = True
class TestMKLDNNElementwiseAddOp_scalar(TestElementwiseAddOp_scalar):
def init_input_output(self):
self.x = np.random.rand(2, 3, 4, 5).astype(self.dtype)
self.y = np.random.rand(1).astype(self.dtype)
self.out = self.x + self.y
def init_kernel_type(self):
self.use_mkldnn = True
class TestMKLDNNElementwiseAddOp_scalar2(TestElementwiseAddOp_scalar2):
def init_input_output(self):
self.x = np.random.rand(2, 3, 4, 5).astype(self.dtype)
self.y = np.random.rand(1, 1).astype(self.dtype)
self.out = self.x + self.y
def init_kernel_type(self):
self.use_mkldnn = True
class TestMKLDNNElementwiseAddOp_Vector(TestElementwiseAddOp_Vector):
def init_kernel_type(self):
self.use_mkldnn = True
class TesMKLDNNtElementwiseAddOp_broadcast_0(TestElementwiseAddOp_broadcast_0):
def init_input_output(self):
self.x = np.random.rand(2, 3, 4, 5).astype(self.dtype)
self.y = np.random.rand(2).astype(self.dtype)
self.out = self.x + self.y.reshape(2, 1, 1, 1)
def init_kernel_type(self):
self.use_mkldnn = True
class TestMKLDNNElementwiseAddOp_broadcast_1(TestElementwiseAddOp_broadcast_1):
def init_input_output(self):
self.x = np.random.rand(2, 3, 4, 5).astype(self.dtype)
self.y = np.random.rand(3).astype(self.dtype)
self.out = self.x + self.y.reshape(1, 3, 1, 1)
def init_kernel_type(self):
self.use_mkldnn = True
class TestMKLDNNElementwiseAddOp_broadcast_2(TestElementwiseAddOp_broadcast_2):
def init_input_output(self):
self.x = np.random.rand(2, 2, 3, 4).astype(self.dtype)
self.y = np.random.rand(4).astype(self.dtype)
self.out = self.x + self.y.reshape(1, 1, 1, 4)
def init_kernel_type(self):
self.use_mkldnn = True
class TestMKLDNNElementwiseAddOp_broadcast_3(TestElementwiseAddOp_broadcast_3):
def init_kernel_type(self):
self.use_mkldnn = True
class TestMKLDNNElementwiseAddOp_broadcast_4(TestElementwiseAddOp_broadcast_4):
def init_kernel_type(self):
self.use_mkldnn = True
class TestMKLDNNElementwiseAddOp_rowwise_add_0(
TestElementwiseAddOp_rowwise_add_0):
def init_input_output(self):
self.x = np.random.rand(2, 3, 4, 5).astype(self.dtype)
self.y = np.random.rand(3, 4).astype(self.dtype)
self.out = self.x + self.y.reshape(1, 3, 4, 1)
def init_kernel_type(self):
self.use_mkldnn = True
class TestMKLDNNElementwiseAddOp_rowwise_add_1(
TestElementwiseAddOp_rowwise_add_1):
def init_kernel_type(self):
self.use_mkldnn = True
class TestMKLDNNElementwiseAddOp_channelwise_add(
TestElementwiseAddOp_channelwise_add):
def init_input_output(self):
self.x = np.random.rand(3, 5, 20, 20).astype(self.dtype)
self.y = np.random.rand(3, 1, 1, 1).astype(self.dtype)
self.out = self.x + self.y
def init_kernel_type(self):
self.use_mkldnn = True
if __name__ == '__main__':
unittest.main()
...@@ -18,19 +18,23 @@ from op_test import OpTest ...@@ -18,19 +18,23 @@ from op_test import OpTest
class TestElementwiseAddOp(OpTest): class TestElementwiseAddOp(OpTest):
def init_kernel_type(self):
self.use_mkldnn = False
def setUp(self): def setUp(self):
self.op_type = "elementwise_add" self.op_type = "elementwise_add"
self.dtype = np.float32 self.dtype = np.float32
self.axis = -1 self.axis = -1
self.init_dtype() self.init_dtype()
self.init_input_output() self.init_input_output()
self.init_kernel_type()
self.init_axis() self.init_axis()
self.inputs = { self.inputs = {
'X': OpTest.np_dtype_to_fluid_dtype(self.x), 'X': OpTest.np_dtype_to_fluid_dtype(self.x),
'Y': OpTest.np_dtype_to_fluid_dtype(self.y) 'Y': OpTest.np_dtype_to_fluid_dtype(self.y)
} }
self.attrs = {'axis': self.axis} self.attrs = {'axis': self.axis, 'use_mkldnn': self.use_mkldnn}
self.outputs = {'Out': self.out} self.outputs = {'Out': self.out}
def test_check_output(self): def test_check_output(self):
......
...@@ -94,7 +94,7 @@ class TestListenAndServOp(OpTest): ...@@ -94,7 +94,7 @@ class TestListenAndServOp(OpTest):
self._wait_ps_ready(p1.pid) self._wait_ps_ready(p1.pid)
# raise SIGTERM to pserver # raise SIGTERM to pserver
os.kill(p1.pid, signal.SIGKILL) os.kill(p1.pid, signal.SIGINT)
p1.join() p1.join()
# run pserver on CPU in async mode # run pserver on CPU in async mode
...@@ -102,7 +102,7 @@ class TestListenAndServOp(OpTest): ...@@ -102,7 +102,7 @@ class TestListenAndServOp(OpTest):
self._wait_ps_ready(p2.pid) self._wait_ps_ready(p2.pid)
# raise SIGTERM to pserver # raise SIGTERM to pserver
os.kill(p2.pid, signal.SIGKILL) os.kill(p2.pid, signal.SIGTERM)
p2.join() p2.join()
......
...@@ -75,7 +75,9 @@ class TestFetchOp(unittest.TestCase): ...@@ -75,7 +75,9 @@ class TestFetchOp(unittest.TestCase):
fetch_list.append(k) fetch_list.append(k)
for data in train_inputs: for data in train_inputs:
ret = pe.run(fetch_list, feed=feeder.feed(data)) ret = pe.run(fetch_list,
feed=feeder.feed(data),
return_numpy=True)
for i in range(len(fetch_list)): for i in range(len(fetch_list)):
assert not math.isnan(np.sum(ret[i])) and \ assert not math.isnan(np.sum(ret[i])) and \
not math.isinf(np.sum(ret[i])) not math.isinf(np.sum(ret[i]))
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
from test_sum_op import TestSumOp
class TestMKLDNN(TestSumOp):
def init_kernel_type(self):
self.use_mkldnn = True
if __name__ == '__main__':
unittest.main()
...@@ -20,12 +20,15 @@ from op_test import OpTest ...@@ -20,12 +20,15 @@ from op_test import OpTest
class TestSumOp(OpTest): class TestSumOp(OpTest):
def setUp(self): def setUp(self):
self.op_type = "sum" self.op_type = "sum"
self.use_mkldnn = False
self.init_kernel_type()
x0 = np.random.random((3, 4)).astype('float32') x0 = np.random.random((3, 4)).astype('float32')
x1 = np.random.random((3, 4)).astype('float32') x1 = np.random.random((3, 4)).astype('float32')
x2 = np.random.random((3, 4)).astype('float32') x2 = np.random.random((3, 4)).astype('float32')
self.inputs = {"X": [("x0", x0), ("x1", x1), ("x2", x2)]} self.inputs = {"X": [("x0", x0), ("x1", x1), ("x2", x2)]}
y = x0 + x1 + x2 y = x0 + x1 + x2
self.outputs = {'Out': y} self.outputs = {'Out': y}
self.attrs = {'use_mkldnn': self.use_mkldnn}
def test_check_output(self): def test_check_output(self):
self.check_output() self.check_output()
...@@ -33,6 +36,9 @@ class TestSumOp(OpTest): ...@@ -33,6 +36,9 @@ class TestSumOp(OpTest):
def test_check_grad(self): def test_check_grad(self):
self.check_grad(['x0'], 'Out') self.check_grad(['x0'], 'Out')
def init_kernel_type(self):
pass
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -396,7 +396,7 @@ class DistributeTranspiler(object): ...@@ -396,7 +396,7 @@ class DistributeTranspiler(object):
return varname return varname
return "" return ""
def __clone_lr_op_sub_block__(op, program, new_block): def __clone_lr_op_sub_block__(op, program, lr_block):
if not op.has_attr('sub_block'): if not op.has_attr('sub_block'):
return return
...@@ -405,36 +405,41 @@ class DistributeTranspiler(object): ...@@ -405,36 +405,41 @@ class DistributeTranspiler(object):
assert isinstance(origin_block, Block) assert isinstance(origin_block, Block)
# we put the new sub block to new block to follow the block # we put the new sub block to new block to follow the block
# hierarchy of the original blocks # hierarchy of the original blocks
new_sub_block = program.create_block(new_block.idx) new_sub_block = program.create_block(lr_block.idx)
# clone vars # clone vars
for var in origin_block.vars: for var in origin_block.vars:
new_sub_block.clone_variable(var) new_sub_block.clone_variable(var)
# clone ops # clone ops
for op in origin_block.ops: for origin_op in origin_block.ops:
self._clone_lr_op(program, new_sub_block, op) cloned_op = self._clone_lr_op(program, new_sub_block, origin_op)
# clone sub_block of op # clone sub_block of op
__clone_lr_op_sub_block__(op, program, new_sub_block) __clone_lr_op_sub_block__(cloned_op, program, new_sub_block)
# reset the block of op # reset the block of op
op.set_attr('sub_block', new_sub_block) op.set_attr('sub_block', new_sub_block)
# append lr decay ops to the child block if exists # append lr decay ops to the child block if exists
lr_ops = self._get_lr_ops() lr_ops = self._get_lr_ops()
# record optimize blocks and we can run them on pserver parallel
optimize_blocks = []
if len(lr_ops) > 0: if len(lr_ops) > 0:
lr_decay_block = pserver_program.create_block( lr_decay_block = pserver_program.create_block(
pserver_program.num_blocks - 1) pserver_program.num_blocks - 1)
optimize_blocks.append(lr_decay_block)
for _, op in enumerate(lr_ops): for _, op in enumerate(lr_ops):
self._append_pserver_non_opt_ops(lr_decay_block, op) cloned_op = self._append_pserver_non_opt_ops(lr_decay_block, op)
# append sub blocks to pserver_program in lr_decay_op # append sub blocks to pserver_program in lr_decay_op
__clone_lr_op_sub_block__(op, pserver_program, lr_decay_block) __clone_lr_op_sub_block__(cloned_op, pserver_program,
lr_decay_block)
# append op to the current block # append op to the current block
grad_to_block_id = [] grad_to_block_id = []
pre_block_idx = pserver_program.num_blocks - 1 pre_block_idx = pserver_program.num_blocks - 1
for idx, opt_op in enumerate(opt_op_on_pserver): for idx, opt_op in enumerate(opt_op_on_pserver):
per_opt_block = pserver_program.create_block(pre_block_idx) per_opt_block = pserver_program.create_block(pre_block_idx)
optimize_blocks.append(per_opt_block)
# append grad merging ops before clip and weight decay # append grad merging ops before clip and weight decay
for _, op in enumerate(self.optimize_ops): for _, op in enumerate(self.optimize_ops):
# find the origin @GRAD var before clipping # find the origin @GRAD var before clipping
...@@ -453,6 +458,7 @@ class DistributeTranspiler(object): ...@@ -453,6 +458,7 @@ class DistributeTranspiler(object):
if global_ops: if global_ops:
opt_state_block = pserver_program.create_block( opt_state_block = pserver_program.create_block(
pserver_program.num_blocks - 1) pserver_program.num_blocks - 1)
optimize_blocks.append(opt_state_block)
for glb_op in global_ops: for glb_op in global_ops:
__append_optimize_op__(glb_op, opt_state_block, __append_optimize_op__(glb_op, opt_state_block,
grad_to_block_id, None) grad_to_block_id, None)
...@@ -474,11 +480,11 @@ class DistributeTranspiler(object): ...@@ -474,11 +480,11 @@ class DistributeTranspiler(object):
assert len(prefetch_var_name_to_block_id) == 0 assert len(prefetch_var_name_to_block_id) == 0
attrs = { attrs = {
"OptimizeBlock": pserver_program.block(1), "optimize_blocks": optimize_blocks,
"endpoint": endpoint, "endpoint": endpoint,
"Fanin": self.trainer_num, "Fanin": self.trainer_num,
"sync_mode": self.sync_mode, "sync_mode": self.sync_mode,
"grad_to_block_id": grad_to_block_id "grad_to_block_id": grad_to_block_id,
} }
if len(prefetch_var_name_to_block_id) > 0: if len(prefetch_var_name_to_block_id) > 0:
attrs['prefetch_var_name_to_block_id'] \ attrs['prefetch_var_name_to_block_id'] \
...@@ -872,7 +878,8 @@ class DistributeTranspiler(object): ...@@ -872,7 +878,8 @@ class DistributeTranspiler(object):
table_opt_block.append_op( table_opt_block.append_op(
type="sum", type="sum",
inputs={"X": pserver_side_table_grad_list}, inputs={"X": pserver_side_table_grad_list},
outputs={"Out": [grad_var]}) outputs={"Out": [grad_var]},
attrs={"use_mkldnn": False})
else: else:
# in async_mode, for table gradient, it also need to be splited to each parameter server # in async_mode, for table gradient, it also need to be splited to each parameter server
origin_grad_name = grad_var.name origin_grad_name = grad_var.name
...@@ -1104,7 +1111,8 @@ class DistributeTranspiler(object): ...@@ -1104,7 +1111,8 @@ class DistributeTranspiler(object):
optimize_block.append_op( optimize_block.append_op(
type="sum", type="sum",
inputs={"X": vars2merge}, inputs={"X": vars2merge},
outputs={"Out": merged_var}) outputs={"Out": merged_var},
attrs={"use_mkldnn": False})
# TODO(panyx0718): What if it's SELECTED_ROWS. # TODO(panyx0718): What if it's SELECTED_ROWS.
if not merged_var.type == core.VarDesc.VarType.SELECTED_ROWS: if not merged_var.type == core.VarDesc.VarType.SELECTED_ROWS:
optimize_block.append_op( optimize_block.append_op(
...@@ -1209,7 +1217,7 @@ class DistributeTranspiler(object): ...@@ -1209,7 +1217,7 @@ class DistributeTranspiler(object):
if var not in program.global_block().vars: if var not in program.global_block().vars:
block.clone_variable(var) block.clone_variable(var)
block.append_op( return block.append_op(
type=op.type, inputs=inputs, outputs=outputs, attrs=op.attrs) type=op.type, inputs=inputs, outputs=outputs, attrs=op.attrs)
def _append_pserver_non_opt_ops(self, optimize_block, opt_op): def _append_pserver_non_opt_ops(self, optimize_block, opt_op):
...@@ -1247,7 +1255,7 @@ class DistributeTranspiler(object): ...@@ -1247,7 +1255,7 @@ class DistributeTranspiler(object):
elif not program.global_block().vars.has_key(var.name): elif not program.global_block().vars.has_key(var.name):
program.global_block().clone_variable(var) program.global_block().clone_variable(var)
optimize_block.append_op( return optimize_block.append_op(
type=opt_op.type, type=opt_op.type,
inputs=inputs, inputs=inputs,
outputs=outputs, outputs=outputs,
...@@ -1291,16 +1299,6 @@ class DistributeTranspiler(object): ...@@ -1291,16 +1299,6 @@ class DistributeTranspiler(object):
ufind.union(op1, op2) ufind.union(op1, op2)
return ufind return ufind
def _is_opt_role_op(self, op):
# NOTE: depend on oprole to find out whether this op is for
# optimize
op_maker = core.op_proto_and_checker_maker
optimize_role = core.op_proto_and_checker_maker.OpRole.Optimize
if op_maker.kOpRoleAttrName() in op.attrs and \
int(op.attrs[op_maker.kOpRoleAttrName()]) == int(optimize_role):
return True
return False
def _is_optimizer_op(self, op): def _is_optimizer_op(self, op):
if "Param" in op.input_names and \ if "Param" in op.input_names and \
"LearningRate" in op.input_names: "LearningRate" in op.input_names:
...@@ -1391,7 +1389,10 @@ class DistributeTranspiler(object): ...@@ -1391,7 +1389,10 @@ class DistributeTranspiler(object):
params_grads = [] params_grads = []
origin_var_dict = self.origin_program.global_block().vars origin_var_dict = self.origin_program.global_block().vars
for op in block.ops: for op in block.ops:
if self._is_opt_role_op(op): # NOTE(Yancey1989): we can not use op role to distinguish an optimizer op
# or not, because all ops in optimizer sub-graph would
# sign the optimizer op role
if self._is_optimizer_op(op):
opt_ops.append(op) opt_ops.append(op)
# HACK(wuyi): if we find grad vars from input of optimize # HACK(wuyi): if we find grad vars from input of optimize
# ops, we may get the output of clip op. Use syntax "@GRAD" # ops, we may get the output of clip op. Use syntax "@GRAD"
......
...@@ -336,7 +336,7 @@ def _buf2lines(buf, line_break="\n"): ...@@ -336,7 +336,7 @@ def _buf2lines(buf, line_break="\n"):
class PipeReader: class PipeReader:
""" """
PipeReader read data by stream from a command, take it's PipeReader read data by stream from a command, take it's
stdout into a pipe buffer and redirect it to the parser to stdout into a pipe buffer and redirect it to the parser to
parse, then yield data as your desired format. parse, then yield data as your desired format.
...@@ -352,7 +352,7 @@ class PipeReader: ...@@ -352,7 +352,7 @@ class PipeReader:
An example: An example:
.. code-block:: python .. code-block:: python
def example_reader(): def example_reader():
for f in myfiles: for f in myfiles:
pr = PipeReader("cat %s"%f) pr = PipeReader("cat %s"%f)
......
...@@ -43,7 +43,7 @@ CIFAR100_URL = URL_PREFIX + 'cifar-100-python.tar.gz' ...@@ -43,7 +43,7 @@ CIFAR100_URL = URL_PREFIX + 'cifar-100-python.tar.gz'
CIFAR100_MD5 = 'eb9058c3a382ffc7106e4002c42a8d85' CIFAR100_MD5 = 'eb9058c3a382ffc7106e4002c42a8d85'
def reader_creator(filename, sub_name): def reader_creator(filename, sub_name, cycle=False):
def read_batch(batch): def read_batch(batch):
data = batch['data'] data = batch['data']
labels = batch.get('labels', batch.get('fine_labels', None)) labels = batch.get('labels', batch.get('fine_labels', None))
...@@ -56,10 +56,13 @@ def reader_creator(filename, sub_name): ...@@ -56,10 +56,13 @@ def reader_creator(filename, sub_name):
names = (each_item.name for each_item in f names = (each_item.name for each_item in f
if sub_name in each_item.name) if sub_name in each_item.name)
for name in names: while True:
batch = cPickle.load(f.extractfile(name)) for name in names:
for item in read_batch(batch): batch = cPickle.load(f.extractfile(name))
yield item for item in read_batch(batch):
yield item
if not cycle:
break
return reader return reader
...@@ -94,34 +97,40 @@ def test100(): ...@@ -94,34 +97,40 @@ def test100():
'test') 'test')
def train10(): def train10(cycle=False):
""" """
CIFAR-10 training set creator. CIFAR-10 training set creator.
It returns a reader creator, each sample in the reader is image pixels in It returns a reader creator, each sample in the reader is image pixels in
[0, 1] and label in [0, 9]. [0, 1] and label in [0, 9].
:param cycle: whether to cycle through the dataset
:type cycle: bool
:return: Training reader creator :return: Training reader creator
:rtype: callable :rtype: callable
""" """
return reader_creator( return reader_creator(
paddle.v2.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5), paddle.v2.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5),
'data_batch') 'data_batch',
cycle=cycle)
def test10(): def test10(cycle=False):
""" """
CIFAR-10 test set creator. CIFAR-10 test set creator.
It returns a reader creator, each sample in the reader is image pixels in It returns a reader creator, each sample in the reader is image pixels in
[0, 1] and label in [0, 9]. [0, 1] and label in [0, 9].
:param cycle: whether to cycle through the dataset
:type cycle: bool
:return: Test reader creator. :return: Test reader creator.
:rtype: callable :rtype: callable
""" """
return reader_creator( return reader_creator(
paddle.v2.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5), paddle.v2.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5),
'test_batch') 'test_batch',
cycle=cycle)
def fetch(): def fetch():
......
...@@ -76,7 +76,8 @@ def reader_creator(data_file, ...@@ -76,7 +76,8 @@ def reader_creator(data_file,
dataset_name, dataset_name,
mapper, mapper,
buffered_size=1024, buffered_size=1024,
use_xmap=True): use_xmap=True,
cycle=False):
''' '''
1. read images from tar file and 1. read images from tar file and
merge images into batch files in 102flowers.tgz_batch/ merge images into batch files in 102flowers.tgz_batch/
...@@ -96,6 +97,8 @@ def reader_creator(data_file, ...@@ -96,6 +97,8 @@ def reader_creator(data_file,
:type mapper: callable :type mapper: callable
:param buffered_size: the size of buffer used to process images :param buffered_size: the size of buffer used to process images
:type buffered_size: int :type buffered_size: int
:param cycle: whether to cycle through the dataset
:type cycle: bool
:return: data reader :return: data reader
:rtype: callable :rtype: callable
''' '''
...@@ -108,15 +111,18 @@ def reader_creator(data_file, ...@@ -108,15 +111,18 @@ def reader_creator(data_file,
file_list = batch_images_from_tar(data_file, dataset_name, img2label) file_list = batch_images_from_tar(data_file, dataset_name, img2label)
def reader(): def reader():
for file in open(file_list): while True:
file = file.strip() for file in open(file_list):
batch = None file = file.strip()
with open(file, 'r') as f: batch = None
batch = cPickle.load(f) with open(file, 'r') as f:
data = batch['data'] batch = cPickle.load(f)
labels = batch['label'] data = batch['data']
for sample, label in itertools.izip(data, batch['label']): labels = batch['label']
yield sample, int(label) - 1 for sample, label in itertools.izip(data, batch['label']):
yield sample, int(label) - 1
if not cycle:
break
if use_xmap: if use_xmap:
cpu_num = int(os.environ.get('CPU_NUM', cpu_count())) cpu_num = int(os.environ.get('CPU_NUM', cpu_count()))
...@@ -125,7 +131,7 @@ def reader_creator(data_file, ...@@ -125,7 +131,7 @@ def reader_creator(data_file,
return map_readers(mapper, reader) return map_readers(mapper, reader)
def train(mapper=train_mapper, buffered_size=1024, use_xmap=True): def train(mapper=train_mapper, buffered_size=1024, use_xmap=True, cycle=False):
''' '''
Create flowers training set reader. Create flowers training set reader.
It returns a reader, each sample in the reader is It returns a reader, each sample in the reader is
...@@ -138,17 +144,23 @@ def train(mapper=train_mapper, buffered_size=1024, use_xmap=True): ...@@ -138,17 +144,23 @@ def train(mapper=train_mapper, buffered_size=1024, use_xmap=True):
:type mapper: callable :type mapper: callable
:param buffered_size: the size of buffer used to process images :param buffered_size: the size of buffer used to process images
:type buffered_size: int :type buffered_size: int
:param cycle: whether to cycle through the dataset
:type cycle: bool
:return: train data reader :return: train data reader
:rtype: callable :rtype: callable
''' '''
return reader_creator( return reader_creator(
download(DATA_URL, 'flowers', DATA_MD5), download(DATA_URL, 'flowers', DATA_MD5),
download(LABEL_URL, 'flowers', LABEL_MD5), download(LABEL_URL, 'flowers', LABEL_MD5),
download(SETID_URL, 'flowers', SETID_MD5), TRAIN_FLAG, mapper, download(SETID_URL, 'flowers', SETID_MD5),
buffered_size, use_xmap) TRAIN_FLAG,
mapper,
buffered_size,
use_xmap,
cycle=cycle)
def test(mapper=test_mapper, buffered_size=1024, use_xmap=True): def test(mapper=test_mapper, buffered_size=1024, use_xmap=True, cycle=False):
''' '''
Create flowers test set reader. Create flowers test set reader.
It returns a reader, each sample in the reader is It returns a reader, each sample in the reader is
...@@ -161,14 +173,20 @@ def test(mapper=test_mapper, buffered_size=1024, use_xmap=True): ...@@ -161,14 +173,20 @@ def test(mapper=test_mapper, buffered_size=1024, use_xmap=True):
:type mapper: callable :type mapper: callable
:param buffered_size: the size of buffer used to process images :param buffered_size: the size of buffer used to process images
:type buffered_size: int :type buffered_size: int
:param cycle: whether to cycle through the dataset
:type cycle: bool
:return: test data reader :return: test data reader
:rtype: callable :rtype: callable
''' '''
return reader_creator( return reader_creator(
download(DATA_URL, 'flowers', DATA_MD5), download(DATA_URL, 'flowers', DATA_MD5),
download(LABEL_URL, 'flowers', LABEL_MD5), download(LABEL_URL, 'flowers', LABEL_MD5),
download(SETID_URL, 'flowers', SETID_MD5), TEST_FLAG, mapper, download(SETID_URL, 'flowers', SETID_MD5),
buffered_size, use_xmap) TEST_FLAG,
mapper,
buffered_size,
use_xmap,
cycle=cycle)
def valid(mapper=test_mapper, buffered_size=1024, use_xmap=True): def valid(mapper=test_mapper, buffered_size=1024, use_xmap=True):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册