提交 d0a8eea2 编写于 作者: F fengjiayi

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into expose_Parameter_2

...@@ -76,7 +76,8 @@ RUN easy_install -U pip && \ ...@@ -76,7 +76,8 @@ RUN easy_install -U pip && \
pip install sphinx-rtd-theme==0.1.9 recommonmark pip install sphinx-rtd-theme==0.1.9 recommonmark
RUN pip install pre-commit 'ipython==5.3.0' && \ RUN pip install pre-commit 'ipython==5.3.0' && \
pip install 'ipykernel==4.6.0' 'jupyter==1.0.0' pip install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \
pip install opencv-python
#For docstring checker #For docstring checker
RUN pip install pylint pytest astroid isort RUN pip install pylint pytest astroid isort
......
...@@ -40,12 +40,12 @@ ExternalProject_Add( ...@@ -40,12 +40,12 @@ ExternalProject_Add(
# NOTE(wuyi): # NOTE(wuyi):
# this package is generated by following steps: # this package is generated by following steps:
# 1. git clone -b v1.8.x https://github.com/grpc/grpc.git # 1. git clone -b v1.8.x https://github.com/grpc/grpc.git
# 2. submodule update --init # 2. git submodule update --init
# 3. keep only zlib, cares, protobuf, boringssl under "third_party", # 3. keep only zlib, cares, protobuf, boringssl under "third_party",
# checkout and clean other dirs under third_party # checkout and clean other dirs under third_party
# 4. remove .git, and package the directory. # 4. remove .git, and package the directory.
URL "http://paddlepaddledeps.bj.bcebos.com/grpc-v1.8.x.tar.gz" URL "http://paddlepaddledeps.bj.bcebos.com/grpc-v1.10.x.tar.gz"
URL_MD5 "c9c58ee7d0e8929a63155af6a2ecdbd0" URL_MD5 "1f268a2aff6759839dccd256adcc91cf"
PREFIX ${GRPC_SOURCES_DIR} PREFIX ${GRPC_SOURCES_DIR}
UPDATE_COMMAND "" UPDATE_COMMAND ""
CONFIGURE_COMMAND "" CONFIGURE_COMMAND ""
......
...@@ -54,7 +54,7 @@ ExternalProject_Add( ...@@ -54,7 +54,7 @@ ExternalProject_Add(
${EXTERNAL_PROJECT_LOG_ARGS} ${EXTERNAL_PROJECT_LOG_ARGS}
DEPENDS ${MKLDNN_DEPENDS} DEPENDS ${MKLDNN_DEPENDS}
GIT_REPOSITORY "https://github.com/01org/mkl-dnn.git" GIT_REPOSITORY "https://github.com/01org/mkl-dnn.git"
GIT_TAG "db3424ad44901513c03a1ea31ccaacdf633fbe9f" GIT_TAG "a29d8487a63afca3d5b8c5bbdbb473cf8ccc6e51"
PREFIX ${MKLDNN_SOURCES_DIR} PREFIX ${MKLDNN_SOURCES_DIR}
UPDATE_COMMAND "" UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${MKLDNN_INSTALL_DIR} CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${MKLDNN_INSTALL_DIR}
......
.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` .. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
!DO NOT EDIT THIS FILE MANUALLY! !DO NOT EDIT THIS FILE MANUALLY!
========= =============
evaluator fluid.average
========= =============
.. _api_fluid_average_WeightedAverage:
WeightedAverage
---------------
.. autoclass:: paddle.fluid.average.WeightedAverage
:members:
:noindex:
.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
!DO NOT EDIT THIS FILE MANUALLY!
==============
fluid.backward
==============
.. _api_fluid_backward_append_backward:
append_backward
---------------
.. autofunction:: paddle.fluid.backward.append_backward
:noindex:
.. _api_fluid_backward_calc_gradient:
calc_gradient
-------------
.. autofunction:: paddle.fluid.backward.calc_gradient
:noindex:
.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` .. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
!DO NOT EDIT THIS FILE MANUALLY! !DO NOT EDIT THIS FILE MANUALLY!
==== ==========
clip fluid.clip
==== ==========
.. _api_fluid_clip_ErrorClipByValue:
ErrorClipByValue ErrorClipByValue
---------------- ----------------
...@@ -12,6 +14,8 @@ ErrorClipByValue ...@@ -12,6 +14,8 @@ ErrorClipByValue
:members: :members:
:noindex: :noindex:
.. _api_fluid_clip_GradientClipByValue:
GradientClipByValue GradientClipByValue
------------------- -------------------
...@@ -19,6 +23,8 @@ GradientClipByValue ...@@ -19,6 +23,8 @@ GradientClipByValue
:members: :members:
:noindex: :noindex:
.. _api_fluid_clip_GradientClipByNorm:
GradientClipByNorm GradientClipByNorm
------------------ ------------------
...@@ -26,6 +32,8 @@ GradientClipByNorm ...@@ -26,6 +32,8 @@ GradientClipByNorm
:members: :members:
:noindex: :noindex:
.. _api_fluid_clip_GradientClipByGlobalNorm:
GradientClipByGlobalNorm GradientClipByGlobalNorm
------------------------ ------------------------
...@@ -33,15 +41,3 @@ GradientClipByGlobalNorm ...@@ -33,15 +41,3 @@ GradientClipByGlobalNorm
:members: :members:
:noindex: :noindex:
append_gradient_clip_ops
------------------------
.. autofunction:: paddle.fluid.clip.append_gradient_clip_ops
:noindex:
error_clip_callback
-------------------
.. autofunction:: paddle.fluid.clip.error_clip_callback
:noindex:
==================================
Data Reader Interface and DataSets
==================================
.. toctree::
:maxdepth: 1
data/data_reader.rst
data/image.rst
data/dataset.rst
.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` .. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
!DO NOT EDIT THIS FILE MANUALLY! !DO NOT EDIT THIS FILE MANUALLY!
=========== =================
data_feeder fluid.data_feeder
=========== =================
.. _api_fluid_data_feeder_DataFeeder:
DataFeeder DataFeeder
---------- ----------
......
.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` .. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
!DO NOT EDIT THIS FILE MANUALLY! !DO NOT EDIT THIS FILE MANUALLY!
======== ==============
executor fluid.executor
======== ==============
.. _api_fluid_executor_Executor:
Executor Executor
-------- --------
...@@ -12,24 +14,32 @@ Executor ...@@ -12,24 +14,32 @@ Executor
:members: :members:
:noindex: :noindex:
.. _api_fluid_executor_global_scope:
global_scope global_scope
------------ ------------
.. autofunction:: paddle.fluid.executor.global_scope .. autofunction:: paddle.fluid.executor.global_scope
:noindex: :noindex:
.. _api_fluid_executor_scope_guard:
scope_guard scope_guard
----------- -----------
.. autofunction:: paddle.fluid.executor.scope_guard .. autofunction:: paddle.fluid.executor.scope_guard
:noindex: :noindex:
switch_scope .. _api_fluid_executor__switch_scope:
------------
_switch_scope
-------------
.. autofunction:: paddle.fluid.executor.switch_scope .. autofunction:: paddle.fluid.executor._switch_scope
:noindex: :noindex:
.. _api_fluid_executor_fetch_var:
fetch_var fetch_var
--------- ---------
......
.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
!DO NOT EDIT THIS FILE MANUALLY!
=====
fluid
=====
.. _api_fluid_Block:
Block
-----
.. autoclass:: paddle.fluid.Block
:members:
:noindex:
.. _api_fluid_Variable:
Variable
--------
.. autoclass:: paddle.fluid.Variable
:members:
:noindex:
.. _api_fluid_Program:
Program
-------
.. autoclass:: paddle.fluid.Program
:members:
:noindex:
.. _api_fluid_Operator:
Operator
--------
.. autoclass:: paddle.fluid.Operator
:members:
:noindex:
.. _api_fluid_default_startup_program:
default_startup_program
-----------------------
.. autofunction:: paddle.fluid.default_startup_program
:noindex:
.. _api_fluid_default_main_program:
default_main_program
--------------------
.. autofunction:: paddle.fluid.default_main_program
:noindex:
.. _api_fluid_program_guard:
program_guard
-------------
.. autofunction:: paddle.fluid.program_guard
:noindex:
.. _api_fluid_get_var:
get_var
-------
.. autofunction:: paddle.fluid.get_var
:noindex:
.. _api_fluid_Executor:
Executor
--------
.. autoclass:: paddle.fluid.Executor
:members:
:noindex:
.. _api_fluid_global_scope:
global_scope
------------
.. autofunction:: paddle.fluid.global_scope
:noindex:
.. _api_fluid_scope_guard:
scope_guard
-----------
.. autofunction:: paddle.fluid.scope_guard
:noindex:
.. _api_fluid__switch_scope:
_switch_scope
-------------
.. autofunction:: paddle.fluid._switch_scope
:noindex:
.. _api_fluid_fetch_var:
fetch_var
---------
.. autofunction:: paddle.fluid.fetch_var
:noindex:
.. _api_fluid_Go:
Go
--
.. autoclass:: paddle.fluid.Go
:members:
:noindex:
.. _api_fluid_make_channel:
make_channel
------------
.. autofunction:: paddle.fluid.make_channel
:noindex:
.. _api_fluid_channel_send:
channel_send
------------
.. autofunction:: paddle.fluid.channel_send
:noindex:
.. _api_fluid_channel_recv:
channel_recv
------------
.. autofunction:: paddle.fluid.channel_recv
:noindex:
.. _api_fluid_channel_close:
channel_close
-------------
.. autofunction:: paddle.fluid.channel_close
:noindex:
.. _api_fluid_Select:
Select
------
.. autoclass:: paddle.fluid.Select
:members:
:noindex:
.. _api_fluid_Trainer:
Trainer
-------
.. autoclass:: paddle.fluid.Trainer
:members:
:noindex:
.. _api_fluid_BeginEpochEvent:
BeginEpochEvent
---------------
.. autoclass:: paddle.fluid.BeginEpochEvent
:members:
:noindex:
.. _api_fluid_EndEpochEvent:
EndEpochEvent
-------------
.. autoclass:: paddle.fluid.EndEpochEvent
:members:
:noindex:
.. _api_fluid_BeginStepEvent:
BeginStepEvent
--------------
.. autoclass:: paddle.fluid.BeginStepEvent
:members:
:noindex:
.. _api_fluid_EndStepEvent:
EndStepEvent
------------
.. autoclass:: paddle.fluid.EndStepEvent
:members:
:noindex:
.. _api_fluid_CheckpointConfig:
CheckpointConfig
----------------
.. autoclass:: paddle.fluid.CheckpointConfig
:members:
:noindex:
.. _api_fluid_Inferencer:
Inferencer
----------
.. autoclass:: paddle.fluid.Inferencer
:members:
:noindex:
.. _api_fluid_DistributeTranspiler:
DistributeTranspiler
--------------------
.. autoclass:: paddle.fluid.DistributeTranspiler
:members:
:noindex:
.. _api_fluid_memory_optimize:
memory_optimize
---------------
.. autofunction:: paddle.fluid.memory_optimize
:noindex:
.. _api_fluid_release_memory:
release_memory
--------------
.. autofunction:: paddle.fluid.release_memory
:noindex:
.. _api_fluid_ParallelExecutor:
ParallelExecutor
----------------
.. autoclass:: paddle.fluid.ParallelExecutor
:members:
:noindex:
.. _api_fluid_ExecutionStrategy:
ExecutionStrategy
-----------------
.. autoclass:: paddle.fluid.ExecutionStrategy
:members:
:noindex:
.. _api_fluid_BuildStrategy:
BuildStrategy
-------------
.. autoclass:: paddle.fluid.BuildStrategy
:members:
:noindex:
.. _api_fluid_create_lod_tensor:
create_lod_tensor
-----------------
.. autofunction:: paddle.fluid.create_lod_tensor
:noindex:
.. _api_fluid_create_random_int_lodtensor:
create_random_int_lodtensor
---------------------------
.. autofunction:: paddle.fluid.create_random_int_lodtensor
:noindex:
.. _api_fluid_LoDTensor:
LoDTensor
---------
.. autoclass:: paddle.fluid.LoDTensor
:members:
:noindex:
.. _api_fluid_CPUPlace:
CPUPlace
--------
.. autoclass:: paddle.fluid.CPUPlace
:members:
:noindex:
.. _api_fluid_CUDAPlace:
CUDAPlace
---------
.. autoclass:: paddle.fluid.CUDAPlace
:members:
:noindex:
.. _api_fluid_CUDAPinnedPlace:
CUDAPinnedPlace
---------------
.. autoclass:: paddle.fluid.CUDAPinnedPlace
:members:
:noindex:
.. _api_fluid_Tensor:
Tensor
------
.. autoclass:: paddle.fluid.Tensor
:members:
:noindex:
.. _api_fluid_ParamAttr:
ParamAttr
---------
.. autoclass:: paddle.fluid.ParamAttr
:members:
:noindex:
.. _api_fluid_WeightNormParamAttr:
WeightNormParamAttr
-------------------
.. autoclass:: paddle.fluid.WeightNormParamAttr
:members:
:noindex:
.. _api_fluid_DataFeeder:
DataFeeder
----------
.. autoclass:: paddle.fluid.DataFeeder
:members:
:noindex:
.. _api_fluid_Scope:
Scope
-----
.. autoclass:: paddle.fluid.Scope
:members:
:noindex:
...@@ -29,19 +29,27 @@ def parse_arg(): ...@@ -29,19 +29,27 @@ def parse_arg():
class DocGenerator(object): class DocGenerator(object):
def __init__(self, module_name, stream=sys.stdout): def __init__(self, module_name=None, stream=sys.stdout):
if module_name == "":
module_name = None
self.stream = stream self.stream = stream
self.module_name = module_name if module_name is None:
if not hasattr(fluid, module_name): self.module_name = "fluid"
raise ValueError("Cannot find fluid.{0}".format(module_name))
else: else:
self.module = getattr(fluid, module_name) self.module_name = "fluid." + module_name
if module_name is None:
self.module = fluid
else:
if not hasattr(fluid, module_name):
raise ValueError("Cannot find fluid.{0}".format(module_name))
else:
self.module = getattr(fluid, module_name)
self.stream.write('''.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` self.stream.write('''.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
!DO NOT EDIT THIS FILE MANUALLY! !DO NOT EDIT THIS FILE MANUALLY!
''') ''')
self._print_header_(module_name, dot='=', is_title=True) self._print_header_(self.module_name, dot='=', is_title=True)
def print_submodule(self, submodule_name): def print_submodule(self, submodule_name):
submodule = getattr(self.module, submodule_name) submodule = getattr(self.module, submodule_name)
...@@ -60,25 +68,29 @@ class DocGenerator(object): ...@@ -60,25 +68,29 @@ class DocGenerator(object):
self._print_header_(name, dot='=', is_title=False) self._print_header_(name, dot='=', is_title=False)
def print_item(self, name): def print_item(self, name):
item = getattr(self.module, name) item = getattr(self.module, name, None)
if item is None:
return
if isinstance(item, types.TypeType): if isinstance(item, types.TypeType):
self.print_class(name) self.print_class(name)
elif isinstance(item, types.FunctionType): elif isinstance(item, types.FunctionType):
self.print_method(name) self.print_method(name)
else: else:
raise RuntimeError("Unsupported item {0}".format(name)) pass
def print_class(self, name): def print_class(self, name):
self._print_ref_(name)
self._print_header_(name, dot='-', is_title=False) self._print_header_(name, dot='-', is_title=False)
self.stream.write('''.. autoclass:: paddle.fluid.{0}.{1} self.stream.write('''.. autoclass:: paddle.{0}.{1}
:members: :members:
:noindex: :noindex:
'''.format(self.module_name, name)) '''.format(self.module_name, name))
def print_method(self, name): def print_method(self, name):
self._print_ref_(name)
self._print_header_(name, dot='-', is_title=False) self._print_header_(name, dot='-', is_title=False)
self.stream.write('''.. autofunction:: paddle.fluid.{0}.{1} self.stream.write('''.. autofunction:: paddle.{0}.{1}
:noindex: :noindex:
'''.format(self.module_name, name)) '''.format(self.module_name, name))
...@@ -94,6 +106,10 @@ class DocGenerator(object): ...@@ -94,6 +106,10 @@ class DocGenerator(object):
self.stream.write('\n') self.stream.write('\n')
self.stream.write('\n') self.stream.write('\n')
def _print_ref_(self, name):
self.stream.write(".. _api_{0}_{1}:\n\n".format("_".join(
self.module_name.split(".")), name))
def main(): def main():
args = parse_arg() args = parse_arg()
......
#!/bin/bash #!/bin/bash
python gen_doc.py layers --submodules control_flow device io nn ops tensor detection learning_rate_scheduler metric > layers.rst python gen_doc.py layers --submodules control_flow device io nn ops tensor learning_rate_scheduler detection metric_op tensor > layers.rst
for module in data_feeder clip metrics executor initializer io nets optimizer param_attr profiler regularizer transpiler for module in data_feeder clip metrics executor initializer io nets optimizer param_attr profiler regularizer transpiler recordio_writer backward average profiler
do do
python gen_doc.py ${module} > ${module}.rst python gen_doc.py ${module} > ${module}.rst
done done
python gen_doc.py "" > fluid.rst
====================== =============
Fluid API Reference
====================== =============
.. toctree:: .. toctree::
:maxdepth: 1 :maxdepth: 1
fluid.rst
layers.rst layers.rst
data_feeder.rst data_feeder.rst
executor.rst executor.rst
...@@ -18,3 +19,8 @@ Fluid ...@@ -18,3 +19,8 @@ Fluid
regularizer.rst regularizer.rst
io.rst io.rst
data.rst data.rst
transpiler.rst
recordio_writer.rst
backward.rst
average.rst
profiler.rst
.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` .. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
!DO NOT EDIT THIS FILE MANUALLY! !DO NOT EDIT THIS FILE MANUALLY!
=========== =================
initializer fluid.initializer
=========== =================
.. _api_fluid_initializer_Constant:
Constant Constant
-------- --------
...@@ -12,6 +14,8 @@ Constant ...@@ -12,6 +14,8 @@ Constant
:members: :members:
:noindex: :noindex:
.. _api_fluid_initializer_Uniform:
Uniform Uniform
------- -------
...@@ -19,6 +23,8 @@ Uniform ...@@ -19,6 +23,8 @@ Uniform
:members: :members:
:noindex: :noindex:
.. _api_fluid_initializer_Normal:
Normal Normal
------ ------
...@@ -26,6 +32,8 @@ Normal ...@@ -26,6 +32,8 @@ Normal
:members: :members:
:noindex: :noindex:
.. _api_fluid_initializer_Xavier:
Xavier Xavier
------ ------
...@@ -33,6 +41,8 @@ Xavier ...@@ -33,6 +41,8 @@ Xavier
:members: :members:
:noindex: :noindex:
.. _api_fluid_initializer_Bilinear:
Bilinear Bilinear
-------- --------
...@@ -40,18 +50,33 @@ Bilinear ...@@ -40,18 +50,33 @@ Bilinear
:members: :members:
:noindex: :noindex:
.. _api_fluid_initializer_MSRA:
MSRA
----
.. autoclass:: paddle.fluid.initializer.MSRA
:members:
:noindex:
.. _api_fluid_initializer_force_init_on_cpu:
force_init_on_cpu force_init_on_cpu
----------------- -----------------
.. autofunction:: paddle.fluid.initializer.force_init_on_cpu .. autofunction:: paddle.fluid.initializer.force_init_on_cpu
:noindex: :noindex:
.. _api_fluid_initializer_init_on_cpu:
init_on_cpu init_on_cpu
----------- -----------
.. autofunction:: paddle.fluid.initializer.init_on_cpu .. autofunction:: paddle.fluid.initializer.init_on_cpu
:noindex: :noindex:
.. _api_fluid_initializer_ConstantInitializer:
ConstantInitializer ConstantInitializer
------------------- -------------------
...@@ -59,6 +84,8 @@ ConstantInitializer ...@@ -59,6 +84,8 @@ ConstantInitializer
:members: :members:
:noindex: :noindex:
.. _api_fluid_initializer_UniformInitializer:
UniformInitializer UniformInitializer
------------------ ------------------
...@@ -66,6 +93,8 @@ UniformInitializer ...@@ -66,6 +93,8 @@ UniformInitializer
:members: :members:
:noindex: :noindex:
.. _api_fluid_initializer_NormalInitializer:
NormalInitializer NormalInitializer
----------------- -----------------
...@@ -73,6 +102,8 @@ NormalInitializer ...@@ -73,6 +102,8 @@ NormalInitializer
:members: :members:
:noindex: :noindex:
.. _api_fluid_initializer_XavierInitializer:
XavierInitializer XavierInitializer
----------------- -----------------
...@@ -80,6 +111,8 @@ XavierInitializer ...@@ -80,6 +111,8 @@ XavierInitializer
:members: :members:
:noindex: :noindex:
.. _api_fluid_initializer_BilinearInitializer:
BilinearInitializer BilinearInitializer
------------------- -------------------
...@@ -87,3 +120,12 @@ BilinearInitializer ...@@ -87,3 +120,12 @@ BilinearInitializer
:members: :members:
:noindex: :noindex:
.. _api_fluid_initializer_MSRAInitializer:
MSRAInitializer
---------------
.. autoclass:: paddle.fluid.initializer.MSRAInitializer
:members:
:noindex:
.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` .. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
!DO NOT EDIT THIS FILE MANUALLY! !DO NOT EDIT THIS FILE MANUALLY!
== ========
io fluid.io
== ========
.. _api_fluid_io_save_vars:
save_vars save_vars
--------- ---------
...@@ -11,84 +13,112 @@ save_vars ...@@ -11,84 +13,112 @@ save_vars
.. autofunction:: paddle.fluid.io.save_vars .. autofunction:: paddle.fluid.io.save_vars
:noindex: :noindex:
.. _api_fluid_io_save_params:
save_params save_params
----------- -----------
.. autofunction:: paddle.fluid.io.save_params .. autofunction:: paddle.fluid.io.save_params
:noindex: :noindex:
.. _api_fluid_io_save_persistables:
save_persistables save_persistables
----------------- -----------------
.. autofunction:: paddle.fluid.io.save_persistables .. autofunction:: paddle.fluid.io.save_persistables
:noindex: :noindex:
.. _api_fluid_io_load_vars:
load_vars load_vars
--------- ---------
.. autofunction:: paddle.fluid.io.load_vars .. autofunction:: paddle.fluid.io.load_vars
:noindex: :noindex:
.. _api_fluid_io_load_params:
load_params load_params
----------- -----------
.. autofunction:: paddle.fluid.io.load_params .. autofunction:: paddle.fluid.io.load_params
:noindex: :noindex:
.. _api_fluid_io_load_persistables:
load_persistables load_persistables
----------------- -----------------
.. autofunction:: paddle.fluid.io.load_persistables .. autofunction:: paddle.fluid.io.load_persistables
:noindex: :noindex:
.. _api_fluid_io_save_inference_model:
save_inference_model save_inference_model
-------------------- --------------------
.. autofunction:: paddle.fluid.io.save_inference_model .. autofunction:: paddle.fluid.io.save_inference_model
:noindex: :noindex:
.. _api_fluid_io_load_inference_model:
load_inference_model load_inference_model
-------------------- --------------------
.. autofunction:: paddle.fluid.io.load_inference_model .. autofunction:: paddle.fluid.io.load_inference_model
:noindex: :noindex:
.. _api_fluid_io_get_inference_program:
get_inference_program get_inference_program
--------------------- ---------------------
.. autofunction:: paddle.fluid.io.get_inference_program .. autofunction:: paddle.fluid.io.get_inference_program
:noindex: :noindex:
.. _api_fluid_io_save_checkpoint:
save_checkpoint save_checkpoint
--------------- ---------------
.. autofunction:: paddle.fluid.io.save_checkpoint .. autofunction:: paddle.fluid.io.save_checkpoint
:noindex: :noindex:
.. _api_fluid_io_load_checkpoint:
load_checkpoint load_checkpoint
--------------- ---------------
.. autofunction:: paddle.fluid.io.load_checkpoint .. autofunction:: paddle.fluid.io.load_checkpoint
:noindex: :noindex:
.. _api_fluid_io_clean_checkpoint:
clean_checkpoint clean_checkpoint
---------------- ----------------
.. autofunction:: paddle.fluid.io.clean_checkpoint .. autofunction:: paddle.fluid.io.clean_checkpoint
:noindex: :noindex:
.. _api_fluid_io_load_persist_vars_without_grad:
load_persist_vars_without_grad load_persist_vars_without_grad
------------------------------ ------------------------------
.. autofunction:: paddle.fluid.io.load_persist_vars_without_grad .. autofunction:: paddle.fluid.io.load_persist_vars_without_grad
:noindex: :noindex:
.. _api_fluid_io_save_persist_vars_without_grad:
save_persist_vars_without_grad save_persist_vars_without_grad
------------------------------ ------------------------------
.. autofunction:: paddle.fluid.io.save_persist_vars_without_grad .. autofunction:: paddle.fluid.io.save_persist_vars_without_grad
:noindex: :noindex:
.. _api_fluid_io_get_latest_checkpoint_serial:
get_latest_checkpoint_serial get_latest_checkpoint_serial
---------------------------- ----------------------------
......
此差异已折叠。
.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` .. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
!DO NOT EDIT THIS FILE MANUALLY! !DO NOT EDIT THIS FILE MANUALLY!
======= =============
metrics fluid.metrics
======= =============
.. _api_fluid_metrics_MetricBase:
MetricBase MetricBase
---------- ----------
...@@ -12,6 +14,8 @@ MetricBase ...@@ -12,6 +14,8 @@ MetricBase
:members: :members:
:noindex: :noindex:
.. _api_fluid_metrics_CompositeMetric:
CompositeMetric CompositeMetric
--------------- ---------------
...@@ -19,6 +23,26 @@ CompositeMetric ...@@ -19,6 +23,26 @@ CompositeMetric
:members: :members:
:noindex: :noindex:
.. _api_fluid_metrics_Precision:
Precision
---------
.. autoclass:: paddle.fluid.metrics.Precision
:members:
:noindex:
.. _api_fluid_metrics_Recall:
Recall
------
.. autoclass:: paddle.fluid.metrics.Recall
:members:
:noindex:
.. _api_fluid_metrics_Accuracy:
Accuracy Accuracy
-------- --------
...@@ -26,6 +50,8 @@ Accuracy ...@@ -26,6 +50,8 @@ Accuracy
:members: :members:
:noindex: :noindex:
.. _api_fluid_metrics_ChunkEvaluator:
ChunkEvaluator ChunkEvaluator
-------------- --------------
...@@ -33,6 +59,8 @@ ChunkEvaluator ...@@ -33,6 +59,8 @@ ChunkEvaluator
:members: :members:
:noindex: :noindex:
.. _api_fluid_metrics_EditDistance:
EditDistance EditDistance
------------ ------------
...@@ -40,6 +68,8 @@ EditDistance ...@@ -40,6 +68,8 @@ EditDistance
:members: :members:
:noindex: :noindex:
.. _api_fluid_metrics_DetectionMAP:
DetectionMAP DetectionMAP
------------ ------------
...@@ -47,6 +77,8 @@ DetectionMAP ...@@ -47,6 +77,8 @@ DetectionMAP
:members: :members:
:noindex: :noindex:
.. _api_fluid_metrics_Auc:
Auc Auc
--- ---
......
.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` .. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
!DO NOT EDIT THIS FILE MANUALLY! !DO NOT EDIT THIS FILE MANUALLY!
==== ==========
nets fluid.nets
==== ==========
.. _api_fluid_nets_simple_img_conv_pool:
simple_img_conv_pool simple_img_conv_pool
-------------------- --------------------
...@@ -11,18 +13,24 @@ simple_img_conv_pool ...@@ -11,18 +13,24 @@ simple_img_conv_pool
.. autofunction:: paddle.fluid.nets.simple_img_conv_pool .. autofunction:: paddle.fluid.nets.simple_img_conv_pool
:noindex: :noindex:
.. _api_fluid_nets_sequence_conv_pool:
sequence_conv_pool sequence_conv_pool
------------------ ------------------
.. autofunction:: paddle.fluid.nets.sequence_conv_pool .. autofunction:: paddle.fluid.nets.sequence_conv_pool
:noindex: :noindex:
.. _api_fluid_nets_glu:
glu glu
--- ---
.. autofunction:: paddle.fluid.nets.glu .. autofunction:: paddle.fluid.nets.glu
:noindex: :noindex:
.. _api_fluid_nets_scaled_dot_product_attention:
scaled_dot_product_attention scaled_dot_product_attention
---------------------------- ----------------------------
......
.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` .. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
!DO NOT EDIT THIS FILE MANUALLY! !DO NOT EDIT THIS FILE MANUALLY!
========= ===============
optimizer fluid.optimizer
========= ===============
.. _api_fluid_optimizer_SGD:
SGD SGD
--- ---
...@@ -12,6 +14,8 @@ SGD ...@@ -12,6 +14,8 @@ SGD
:members: :members:
:noindex: :noindex:
.. _api_fluid_optimizer_Momentum:
Momentum Momentum
-------- --------
...@@ -19,6 +23,8 @@ Momentum ...@@ -19,6 +23,8 @@ Momentum
:members: :members:
:noindex: :noindex:
.. _api_fluid_optimizer_Adagrad:
Adagrad Adagrad
------- -------
...@@ -26,6 +32,8 @@ Adagrad ...@@ -26,6 +32,8 @@ Adagrad
:members: :members:
:noindex: :noindex:
.. _api_fluid_optimizer_Adam:
Adam Adam
---- ----
...@@ -33,6 +41,8 @@ Adam ...@@ -33,6 +41,8 @@ Adam
:members: :members:
:noindex: :noindex:
.. _api_fluid_optimizer_Adamax:
Adamax Adamax
------ ------
...@@ -40,6 +50,8 @@ Adamax ...@@ -40,6 +50,8 @@ Adamax
:members: :members:
:noindex: :noindex:
.. _api_fluid_optimizer_DecayedAdagrad:
DecayedAdagrad DecayedAdagrad
-------------- --------------
...@@ -47,6 +59,17 @@ DecayedAdagrad ...@@ -47,6 +59,17 @@ DecayedAdagrad
:members: :members:
:noindex: :noindex:
.. _api_fluid_optimizer_Ftrl:
Ftrl
----
.. autoclass:: paddle.fluid.optimizer.Ftrl
:members:
:noindex:
.. _api_fluid_optimizer_SGDOptimizer:
SGDOptimizer SGDOptimizer
------------ ------------
...@@ -54,6 +77,8 @@ SGDOptimizer ...@@ -54,6 +77,8 @@ SGDOptimizer
:members: :members:
:noindex: :noindex:
.. _api_fluid_optimizer_MomentumOptimizer:
MomentumOptimizer MomentumOptimizer
----------------- -----------------
...@@ -61,6 +86,8 @@ MomentumOptimizer ...@@ -61,6 +86,8 @@ MomentumOptimizer
:members: :members:
:noindex: :noindex:
.. _api_fluid_optimizer_AdagradOptimizer:
AdagradOptimizer AdagradOptimizer
---------------- ----------------
...@@ -68,6 +95,8 @@ AdagradOptimizer ...@@ -68,6 +95,8 @@ AdagradOptimizer
:members: :members:
:noindex: :noindex:
.. _api_fluid_optimizer_AdamOptimizer:
AdamOptimizer AdamOptimizer
------------- -------------
...@@ -75,6 +104,8 @@ AdamOptimizer ...@@ -75,6 +104,8 @@ AdamOptimizer
:members: :members:
:noindex: :noindex:
.. _api_fluid_optimizer_AdamaxOptimizer:
AdamaxOptimizer AdamaxOptimizer
--------------- ---------------
...@@ -82,6 +113,8 @@ AdamaxOptimizer ...@@ -82,6 +113,8 @@ AdamaxOptimizer
:members: :members:
:noindex: :noindex:
.. _api_fluid_optimizer_DecayedAdagradOptimizer:
DecayedAdagradOptimizer DecayedAdagradOptimizer
----------------------- -----------------------
...@@ -89,6 +122,8 @@ DecayedAdagradOptimizer ...@@ -89,6 +122,8 @@ DecayedAdagradOptimizer
:members: :members:
:noindex: :noindex:
.. _api_fluid_optimizer_RMSPropOptimizer:
RMSPropOptimizer RMSPropOptimizer
---------------- ----------------
...@@ -96,6 +131,17 @@ RMSPropOptimizer ...@@ -96,6 +131,17 @@ RMSPropOptimizer
:members: :members:
:noindex: :noindex:
.. _api_fluid_optimizer_FtrlOptimizer:
FtrlOptimizer
-------------
.. autoclass:: paddle.fluid.optimizer.FtrlOptimizer
:members:
:noindex:
.. _api_fluid_optimizer_Adadelta:
Adadelta Adadelta
-------- --------
...@@ -103,6 +149,8 @@ Adadelta ...@@ -103,6 +149,8 @@ Adadelta
:members: :members:
:noindex: :noindex:
.. _api_fluid_optimizer_ModelAverage:
ModelAverage ModelAverage
------------ ------------
...@@ -110,6 +158,8 @@ ModelAverage ...@@ -110,6 +158,8 @@ ModelAverage
:members: :members:
:noindex: :noindex:
.. _api_fluid_optimizer_Optimizer:
Optimizer Optimizer
--------- ---------
...@@ -117,3 +167,12 @@ Optimizer ...@@ -117,3 +167,12 @@ Optimizer
:members: :members:
:noindex: :noindex:
.. _api_fluid_optimizer_RMSPropOptimizer:
RMSPropOptimizer
----------------
.. autoclass:: paddle.fluid.optimizer.RMSPropOptimizer
:members:
:noindex:
.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` .. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
!DO NOT EDIT THIS FILE MANUALLY! !DO NOT EDIT THIS FILE MANUALLY!
========== ================
param_attr fluid.param_attr
========== ================
.. _api_fluid_param_attr_ParamAttr:
ParamAttr ParamAttr
--------- ---------
...@@ -12,6 +14,8 @@ ParamAttr ...@@ -12,6 +14,8 @@ ParamAttr
:members: :members:
:noindex: :noindex:
.. _api_fluid_param_attr_WeightNormParamAttr:
WeightNormParamAttr WeightNormParamAttr
------------------- -------------------
......
.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` .. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
!DO NOT EDIT THIS FILE MANUALLY! !DO NOT EDIT THIS FILE MANUALLY!
======== ==============
profiler fluid.profiler
======== ==============
.. _api_fluid_profiler_cuda_profiler:
cuda_profiler cuda_profiler
------------- -------------
...@@ -11,24 +13,32 @@ cuda_profiler ...@@ -11,24 +13,32 @@ cuda_profiler
.. autofunction:: paddle.fluid.profiler.cuda_profiler .. autofunction:: paddle.fluid.profiler.cuda_profiler
:noindex: :noindex:
.. _api_fluid_profiler_reset_profiler:
reset_profiler reset_profiler
-------------- --------------
.. autofunction:: paddle.fluid.profiler.reset_profiler .. autofunction:: paddle.fluid.profiler.reset_profiler
:noindex: :noindex:
.. _api_fluid_profiler_profiler:
profiler profiler
-------- --------
.. autofunction:: paddle.fluid.profiler.profiler .. autofunction:: paddle.fluid.profiler.profiler
:noindex: :noindex:
.. _api_fluid_profiler_start_profiler:
start_profiler start_profiler
-------------- --------------
.. autofunction:: paddle.fluid.profiler.start_profiler .. autofunction:: paddle.fluid.profiler.start_profiler
:noindex: :noindex:
.. _api_fluid_profiler_stop_profiler:
stop_profiler stop_profiler
------------- -------------
......
.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
!DO NOT EDIT THIS FILE MANUALLY!
=====================
fluid.recordio_writer
=====================
.. _api_fluid_recordio_writer_convert_reader_to_recordio_file:
convert_reader_to_recordio_file
-------------------------------
.. autofunction:: paddle.fluid.recordio_writer.convert_reader_to_recordio_file
:noindex:
.. _api_fluid_recordio_writer_convert_reader_to_recordio_files:
convert_reader_to_recordio_files
--------------------------------
.. autofunction:: paddle.fluid.recordio_writer.convert_reader_to_recordio_files
:noindex:
.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` .. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
!DO NOT EDIT THIS FILE MANUALLY! !DO NOT EDIT THIS FILE MANUALLY!
=========== =================
regularizer fluid.regularizer
=========== =================
.. _api_fluid_regularizer_append_regularization_ops:
append_regularization_ops append_regularization_ops
------------------------- -------------------------
...@@ -11,12 +13,7 @@ append_regularization_ops ...@@ -11,12 +13,7 @@ append_regularization_ops
.. autofunction:: paddle.fluid.regularizer.append_regularization_ops .. autofunction:: paddle.fluid.regularizer.append_regularization_ops
:noindex: :noindex:
WeightDecayRegularizer .. _api_fluid_regularizer_L1Decay:
----------------------
.. autoclass:: paddle.fluid.regularizer.WeightDecayRegularizer
:members:
:noindex:
L1Decay L1Decay
------- -------
...@@ -25,6 +22,8 @@ L1Decay ...@@ -25,6 +22,8 @@ L1Decay
:members: :members:
:noindex: :noindex:
.. _api_fluid_regularizer_L2Decay:
L2Decay L2Decay
------- -------
...@@ -32,6 +31,8 @@ L2Decay ...@@ -32,6 +31,8 @@ L2Decay
:members: :members:
:noindex: :noindex:
.. _api_fluid_regularizer_L1DecayRegularizer:
L1DecayRegularizer L1DecayRegularizer
------------------ ------------------
...@@ -39,6 +40,8 @@ L1DecayRegularizer ...@@ -39,6 +40,8 @@ L1DecayRegularizer
:members: :members:
:noindex: :noindex:
.. _api_fluid_regularizer_L2DecayRegularizer:
L2DecayRegularizer L2DecayRegularizer
------------------ ------------------
......
.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` .. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
!DO NOT EDIT THIS FILE MANUALLY! !DO NOT EDIT THIS FILE MANUALLY!
========== ================
transpiler fluid.transpiler
========== ================
.. _api_fluid_transpiler_DistributeTranspiler:
DistributeTranspiler DistributeTranspiler
-------------------- --------------------
...@@ -12,12 +14,7 @@ DistributeTranspiler ...@@ -12,12 +14,7 @@ DistributeTranspiler
:members: :members:
:noindex: :noindex:
InferenceTranspiler .. _api_fluid_transpiler_memory_optimize:
-------------------
.. autoclass:: paddle.fluid.transpiler.InferenceTranspiler
:members:
:noindex:
memory_optimize memory_optimize
--------------- ---------------
...@@ -25,12 +22,16 @@ memory_optimize ...@@ -25,12 +22,16 @@ memory_optimize
.. autofunction:: paddle.fluid.transpiler.memory_optimize .. autofunction:: paddle.fluid.transpiler.memory_optimize
:noindex: :noindex:
.. _api_fluid_transpiler_release_memory:
release_memory release_memory
-------------- --------------
.. autofunction:: paddle.fluid.transpiler.release_memory .. autofunction:: paddle.fluid.transpiler.release_memory
:noindex: :noindex:
.. _api_fluid_transpiler_HashName:
HashName HashName
-------- --------
...@@ -38,9 +39,12 @@ HashName ...@@ -38,9 +39,12 @@ HashName
:members: :members:
:noindex: :noindex:
.. _api_fluid_transpiler_RoundRobin:
RoundRobin RoundRobin
---------- ----------
.. autoclass:: paddle.fluid.transpiler.RoundRobin .. autoclass:: paddle.fluid.transpiler.RoundRobin
:members: :members:
:noindex: :noindex:
...@@ -213,3 +213,12 @@ virtualenv本身也是Python的一个包,可以用pip进行安装: ...@@ -213,3 +213,12 @@ virtualenv本身也是Python的一个包,可以用pip进行安装:
保存并关闭文件。 保存并关闭文件。
这样,每次打开终端时就会自动启动名为‘paddle’的Python环境了。 这样,每次打开终端时就会自动启动名为‘paddle’的Python环境了。
10. 通过pip安装的PaddlePaddle在 :code:`import paddle.fluid` 报找不到 :code:`libmkldnn.so` 或 :code:`libmklml_intel.so`
------------------------------------------------------------------------------------------
出现这种问题的原因是在导入 :code:`paddle.fluid` 时需要加载 :code:`libmkldnn.so` 和 :code:`libmklml_intel.so`,
但是系统没有找到该文件。一般通过pip安装PaddlePaddle时会将 :code:`libmkldnn.so` 和 :code:`libmklml_intel.so`
拷贝到 :code:`/usr/local/lib` 路径下,所以解决办法是将该路径加到 :code:`LD_LIBRARY_PATH` 环境变量下,
即: :code:`export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH` 。
**注意**:如果是在虚拟环境中安装PaddlePaddle, :code:`libmkldnn.so` 和 :code:`libmklml_intel.so` 可能不在 :code:`/usr/local/lib` 路径下。
\ No newline at end of file
...@@ -147,10 +147,9 @@ void TransDataLayoutFromMKLDNN(const OpKernelType& kernel_type_for_var, ...@@ -147,10 +147,9 @@ void TransDataLayoutFromMKLDNN(const OpKernelType& kernel_type_for_var,
"Input tensor type is not supported: ", in.type().name()); "Input tensor type is not supported: ", in.type().name());
memory::data_type out_type = in_type; memory::data_type out_type = in_type;
memory::format in_format = auto in_format = MKLDNNFormatForSize(in_tz.size(), in.format());
in_tz.size() == 2 ? memory::format::nc : in.format(); auto out_format =
memory::format out_format = MKLDNNFormatForSize(in_tz.size(), ToMKLDNNFormat(out_layout));
out_tz.size() == 2 ? memory::format::nc : ToMKLDNNFormat(out_layout);
void* in_data = GetDataFromTensor(in, in_type); void* in_data = GetDataFromTensor(in, in_type);
......
...@@ -61,6 +61,13 @@ inline MKLDNNDataType ToMKLDNNDataType(const std::type_index type) { ...@@ -61,6 +61,13 @@ inline MKLDNNDataType ToMKLDNNDataType(const std::type_index type) {
if (iter != dict.end()) return iter->second; if (iter != dict.end()) return iter->second;
return MKLDNNDataType::data_undef; return MKLDNNDataType::data_undef;
} }
inline MKLDNNFormat MKLDNNFormatForSize(size_t dims_size,
MKLDNNFormat default_format) {
return (dims_size == 1
? mkldnn::memory::format::x
: dims_size == 2 ? mkldnn::memory::format::nc : default_format);
}
#endif #endif
void TransDataLayoutFromMKLDNN(const OpKernelType& kernel_type_for_var, void TransDataLayoutFromMKLDNN(const OpKernelType& kernel_type_for_var,
......
...@@ -47,9 +47,13 @@ void DataTransform(const OpKernelType& expected_kernel_type, ...@@ -47,9 +47,13 @@ void DataTransform(const OpKernelType& expected_kernel_type,
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
// Case1 - transform from Non-MKLDNN OPKernel to MKLDNN OPKernel // Case1 - transform from Non-MKLDNN OPKernel to MKLDNN OPKernel
// Just set layout/format. No real transform occur // Just set layout/format. No real transform occur
auto out_format =
MKLDNNFormatForSize(in.dims().size(), ToMKLDNNFormat(lin));
out.ShareDataWith(input_tensor); out.ShareDataWith(input_tensor);
out.set_layout(DataLayout::kMKLDNN); out.set_layout(DataLayout::kMKLDNN);
out.set_format(ToMKLDNNFormat(lin)); out.set_format(out_format);
#endif #endif
} else { } else {
// Case2 - transfrom from MKLDNN OPKernel to Non-MKLDNN OPKernel // Case2 - transfrom from MKLDNN OPKernel to Non-MKLDNN OPKernel
......
...@@ -103,50 +103,23 @@ void BroadcastOpHandle::RunImpl() { ...@@ -103,50 +103,23 @@ void BroadcastOpHandle::RunImpl() {
}); });
} }
// FIXME(zcd): a temporary fix for some language model that has sparse this->RunAndRecordEvent([&] {
// parameter. {
bool use_mutex = true; platform::NCCLGroupGuard guard;
if (in_var->IsType<paddle::framework::SelectedRows>()) { for (auto &call : broadcast_calls) {
use_mutex = false; call();
}
if (use_mutex) {
this->RunAndRecordEvent([&] {
{
platform::NCCLGroupGuard guard;
for (auto &call : broadcast_calls) {
call();
}
}
if (!out_handle->IsTheSameVar(*in_var_handle)) {
auto out_var = var_scopes.at(in_var_handle->scope_idx_)
->FindVar(out_var_handles[0]->name_);
paddle::framework::TensorCopy(
in_tensor, in_var_handle->place_,
*(dev_ctxes_.at(in_var_handle->place_)),
&VariableVisitor::GetMutableTensor(out_var));
}
});
} else {
this->RunAndRecordEventNoMutex([&] {
{
platform::NCCLGroupGuard guard;
for (auto &call : broadcast_calls) {
call();
}
}
if (!out_handle->IsTheSameVar(*in_var_handle)) {
auto out_var = var_scopes.at(in_var_handle->scope_idx_)
->FindVar(out_var_handles[0]->name_);
paddle::framework::TensorCopy(
in_tensor, in_var_handle->place_,
*(dev_ctxes_.at(in_var_handle->place_)),
&VariableVisitor::GetMutableTensor(out_var));
} }
}); }
}
if (!out_handle->IsTheSameVar(*in_var_handle)) {
auto out_var = var_scopes.at(in_var_handle->scope_idx_)
->FindVar(out_var_handles[0]->name_);
paddle::framework::TensorCopy(
in_tensor, in_var_handle->place_,
*(dev_ctxes_.at(in_var_handle->place_)),
&VariableVisitor::GetMutableTensor(out_var));
}
});
#else #else
PADDLE_THROW("CUDA is not enabled."); PADDLE_THROW("CUDA is not enabled.");
#endif #endif
......
...@@ -470,7 +470,7 @@ void MultiDevSSAGraphBuilder::ConnectOp(SSAGraph *result, OpHandleBase *op, ...@@ -470,7 +470,7 @@ void MultiDevSSAGraphBuilder::ConnectOp(SSAGraph *result, OpHandleBase *op,
void MultiDevSSAGraphBuilder::CreateDistTrainOp(SSAGraph *result, void MultiDevSSAGraphBuilder::CreateDistTrainOp(SSAGraph *result,
const OpDesc &op) const { const OpDesc &op) const {
int op_dev_id = -1; int op_dev_id = -1;
if (op.Type() == "split_byref") { if (op.Type() == "split_byref" || op.Type() == "split_selected_rows") {
op_dev_id = GetVarDeviceID(op.InputArgumentNames()[0]); op_dev_id = GetVarDeviceID(op.InputArgumentNames()[0]);
if (strategy_.reduce_ == BuildStrategy::ReduceStrategy::kAllReduce) { if (strategy_.reduce_ == BuildStrategy::ReduceStrategy::kAllReduce) {
op_dev_id = GetAppropriateDeviceID(op.InputArgumentNames()); op_dev_id = GetAppropriateDeviceID(op.InputArgumentNames());
......
...@@ -47,7 +47,7 @@ class MultiDevSSAGraphBuilder : public SSAGraphBuilder { ...@@ -47,7 +47,7 @@ class MultiDevSSAGraphBuilder : public SSAGraphBuilder {
#endif #endif
std::unique_ptr<SSAGraph> Build(const ProgramDesc &program) const override; std::unique_ptr<SSAGraph> Build(const ProgramDesc &program) const override;
int GetVarDeviceID(const std::string &varname) const; int GetVarDeviceID(const std::string &varname) const override;
private: private:
void CreateOpHandleIOs(SSAGraph *result, const OpDesc &op, void CreateOpHandleIOs(SSAGraph *result, const OpDesc &op,
......
...@@ -11,8 +11,8 @@ ...@@ -11,8 +11,8 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "paddle/fluid/framework/details/op_handle_base.h" #include "paddle/fluid/framework/details/op_handle_base.h"
#include <map>
namespace paddle { namespace paddle {
namespace framework { namespace framework {
...@@ -122,35 +122,17 @@ void OpHandleBase::RunAndRecordEvent(const std::function<void()> &callback) { ...@@ -122,35 +122,17 @@ void OpHandleBase::RunAndRecordEvent(const std::function<void()> &callback) {
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
if (!events_.empty()) { // Use event if (!events_.empty()) { // Use event
std::function<void()> method = callback; std::function<void()> method = callback;
// NOTE(zcd): device context must be ordered here because RecordEvent
// will use a mutex to ensure the safe of multi-threads.
std::map<platform::DeviceContext *, platform::Place> ordered_ctxes;
for (auto &p : dev_ctxes_) { for (auto &p : dev_ctxes_) {
method = [method, p, this]() { ordered_ctxes.emplace(p.second, p.first);
static_cast<platform::CUDADeviceContext *>(p.second)->RecordEvent(
events_.at(boost::get<platform::CUDAPlace>(p.first).device),
method);
};
} }
method(); for (auto &p : ordered_ctxes) {
} else {
#endif
callback();
#ifdef PADDLE_WITH_CUDA
}
#endif
}
void OpHandleBase::RunAndRecordEventNoMutex(
const std::function<void()> &callback) {
#ifdef PADDLE_WITH_CUDA
if (!events_.empty()) { // Use event
std::function<void()> method = callback;
for (auto &p : dev_ctxes_) {
method = [method, p, this]() { method = [method, p, this]() {
static_cast<platform::CUDADeviceContext *>(p.second) static_cast<platform::CUDADeviceContext *>(p.first)->RecordEvent(
->RecordEventNoMutex( events_.at(boost::get<platform::CUDAPlace>(p.second).device),
events_.at(boost::get<platform::CUDAPlace>(p.first).device), method);
method);
}; };
} }
method(); method();
......
...@@ -85,10 +85,6 @@ class OpHandleBase { ...@@ -85,10 +85,6 @@ class OpHandleBase {
protected: protected:
void RunAndRecordEvent(const std::function<void()> &callback); void RunAndRecordEvent(const std::function<void()> &callback);
// FIXME(zcd): A temporary fix for some language model that has sparse
// parameter.
void RunAndRecordEventNoMutex(const std::function<void()> &callback);
void RunAndRecordEvent(platform::Place p, void RunAndRecordEvent(platform::Place p,
const std::function<void()> &callback); const std::function<void()> &callback);
......
...@@ -80,9 +80,7 @@ void ReduceOpHandle::RunImpl() { ...@@ -80,9 +80,7 @@ void ReduceOpHandle::RunImpl() {
} }
if (pre_in_var->IsType<framework::SelectedRows>()) { if (pre_in_var->IsType<framework::SelectedRows>()) {
// FIXME(zcd): A temporary fix for some language model that has sparse this->RunAndRecordEvent([&] {
// parameter.
this->RunAndRecordEventNoMutex([&] {
std::vector<const SelectedRows *> in_selected_rows = std::vector<const SelectedRows *> in_selected_rows =
GetInputValues<SelectedRows>(in_var_handles, var_scopes); GetInputValues<SelectedRows>(in_var_handles, var_scopes);
GatherSelectedRows(in_selected_rows, in_places, dev_ctxes_, t_out_p, GatherSelectedRows(in_selected_rows, in_places, dev_ctxes_, t_out_p,
......
...@@ -27,6 +27,7 @@ enum AttrType { ...@@ -27,6 +27,7 @@ enum AttrType {
BOOLEANS = 7; BOOLEANS = 7;
BLOCK = 8; BLOCK = 8;
LONG = 9; LONG = 9;
BLOCKS = 10;
} }
// OpDesc describes an instance of a C++ framework::OperatorBase // OpDesc describes an instance of a C++ framework::OperatorBase
...@@ -46,6 +47,7 @@ message OpDesc { ...@@ -46,6 +47,7 @@ message OpDesc {
repeated bool bools = 11; repeated bool bools = 11;
optional int32 block_idx = 12; optional int32 block_idx = 12;
optional int64 l = 13; optional int64 l = 13;
repeated int32 blocks_idx = 14;
}; };
message Var { message Var {
......
...@@ -51,8 +51,6 @@ std::ostream &operator<<(std::ostream &os, const LoD &lod) { ...@@ -51,8 +51,6 @@ std::ostream &operator<<(std::ostream &os, const LoD &lod) {
} }
std::ostream &operator<<(std::ostream &os, const LoDTensor &t) { std::ostream &operator<<(std::ostream &os, const LoDTensor &t) {
PADDLE_ENFORCE(t.type().hash_code() == typeid(float).hash_code());
if (!platform::is_cpu_place(t.place())) { if (!platform::is_cpu_place(t.place())) {
LoDTensor tt; LoDTensor tt;
framework::TensorCopy(t, platform::CPUPlace(), &tt); framework::TensorCopy(t, platform::CPUPlace(), &tt);
...@@ -70,7 +68,13 @@ std::ostream &operator<<(std::ostream &os, const LoDTensor &t) { ...@@ -70,7 +68,13 @@ std::ostream &operator<<(std::ostream &os, const LoDTensor &t) {
// only print first ten elements // only print first ten elements
int64_t size = t.numel() < 10 ? t.numel() : 10; int64_t size = t.numel() < 10 ? t.numel() : 10;
for (int64_t i = 0; i < size; ++i) { for (int64_t i = 0; i < size; ++i) {
os << t.data<float>()[i] << " "; if (t.type().hash_code() == typeid(float).hash_code()) {
os << t.data<float>()[i] << " ";
} else if (t.type().hash_code() == typeid(int64_t).hash_code()) {
os << t.data<int64_t>()[i] << " ";
} else {
PADDLE_THROW("LoDTensor data type not in [float, int64_t]");
}
} }
return os; return os;
......
...@@ -26,6 +26,20 @@ ...@@ -26,6 +26,20 @@
namespace paddle { namespace paddle {
namespace framework { namespace framework {
TEST(LoD, PrintLoDTensor) {
LoDTensor tensor1;
tensor1.mutable_data<float>(platform::CPUPlace());
tensor1.data<float>()[0] = 0.2;
tensor1.data<float>()[1] = 0.5;
LOG(INFO) << tensor1;
LoDTensor tensor2;
tensor2.mutable_data<int64_t>(platform::CPUPlace());
tensor2.data<int64_t>()[0] = 1;
tensor2.data<int64_t>()[1] = 2;
LOG(INFO) << tensor2;
}
TEST(LoD, data) { TEST(LoD, data) {
LoD lod{{0, 1, 2}}; LoD lod{{0, 1, 2}};
lod.push_back({0, 2, 4, 5}); lod.push_back({0, 2, 4, 5});
...@@ -37,7 +51,7 @@ TEST(LoD, data) { ...@@ -37,7 +51,7 @@ TEST(LoD, data) {
} }
} }
TEST(LodExpand, test) { TEST(LoD, ExpandLoD) {
LoD lod{{0, 2}}; LoD lod{{0, 2}};
LoDTensor tensor; LoDTensor tensor;
tensor.set_lod(lod); tensor.set_lod(lod);
......
...@@ -211,6 +211,12 @@ void OpDesc::SetBlockAttr(const std::string &name, BlockDesc *block) { ...@@ -211,6 +211,12 @@ void OpDesc::SetBlockAttr(const std::string &name, BlockDesc *block) {
need_update_ = true; need_update_ = true;
} }
void OpDesc::SetBlocksAttr(const std::string &name,
std::vector<BlockDesc *> blocks) {
this->attrs_[name] = blocks;
need_update_ = true;
}
void OpDesc::SetAttrMap( void OpDesc::SetAttrMap(
const std::unordered_map<std::string, Attribute> &attr_map) { const std::unordered_map<std::string, Attribute> &attr_map) {
attrs_ = attr_map; attrs_ = attr_map;
...@@ -305,6 +311,13 @@ struct SetAttrDescVisitor : public boost::static_visitor<void> { ...@@ -305,6 +311,13 @@ struct SetAttrDescVisitor : public boost::static_visitor<void> {
void operator()(const std::vector<bool> &v) const { void operator()(const std::vector<bool> &v) const {
VectorToRepeated(v, attr_->mutable_bools()); VectorToRepeated(v, attr_->mutable_bools());
} }
void operator()(const std::vector<BlockDesc *> &v) const {
std::vector<int> blocks_idx;
for (auto blk : v) {
blocks_idx.push_back(blk->ID());
}
VectorToRepeated(blocks_idx, attr_->mutable_blocks_idx());
}
void operator()(BlockDesc *desc) const { attr_->set_block_idx(desc->ID()); } void operator()(BlockDesc *desc) const { attr_->set_block_idx(desc->ID()); }
void operator()(int64_t v) const { attr_->set_l(v); } void operator()(int64_t v) const { attr_->set_l(v); }
void operator()(boost::blank) const { PADDLE_THROW("Unexpected branch"); } void operator()(boost::blank) const { PADDLE_THROW("Unexpected branch"); }
......
...@@ -77,6 +77,8 @@ class OpDesc { ...@@ -77,6 +77,8 @@ class OpDesc {
void SetBlockAttr(const std::string &name, BlockDesc *block); void SetBlockAttr(const std::string &name, BlockDesc *block);
void SetBlocksAttr(const std::string &name, std::vector<BlockDesc *> blocks);
Attribute GetAttr(const std::string &name) const; Attribute GetAttr(const std::string &name) const;
Attribute GetNullableAttr(const std::string &name) const; Attribute GetNullableAttr(const std::string &name) const;
......
...@@ -121,7 +121,7 @@ ParallelExecutor::ParallelExecutor( ...@@ -121,7 +121,7 @@ ParallelExecutor::ParallelExecutor(
#endif #endif
} }
builder_ = std::move(builder_factory.Create()); builder_ = builder_factory.Create();
member_->executor_.reset(new details::ThreadedSSAGraphExecutor( member_->executor_.reset(new details::ThreadedSSAGraphExecutor(
exec_strategy, member_->local_scopes_, places, exec_strategy, member_->local_scopes_, places,
builder_->Build(main_program))); builder_->Build(main_program)));
......
...@@ -35,7 +35,8 @@ using VariableNameMap = std::map<std::string, std::vector<std::string>>; ...@@ -35,7 +35,8 @@ using VariableNameMap = std::map<std::string, std::vector<std::string>>;
using Attribute = using Attribute =
boost::variant<boost::blank, int, float, std::string, std::vector<int>, boost::variant<boost::blank, int, float, std::string, std::vector<int>,
std::vector<float>, std::vector<std::string>, bool, std::vector<float>, std::vector<std::string>, bool,
std::vector<bool>, BlockDesc*, int64_t>; std::vector<bool>, BlockDesc*, int64_t,
std::vector<BlockDesc*>>;
using AttributeMap = std::unordered_map<std::string, Attribute>; using AttributeMap = std::unordered_map<std::string, Attribute>;
......
...@@ -70,6 +70,7 @@ $$Out = values$$ ...@@ -70,6 +70,7 @@ $$Out = values$$
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OPERATOR(assign_value, ops::AssignValueOp, ops::AssignValueOpMaker); REGISTER_OPERATOR(assign_value, ops::AssignValueOp, ops::AssignValueOpMaker,
paddle::framework::EmptyGradOpMaker);
REGISTER_OP_CPU_KERNEL(assign_value, ops::AssignValueKernel<int>, REGISTER_OP_CPU_KERNEL(assign_value, ops::AssignValueKernel<int>,
ops::AssignValueKernel<float>); ops::AssignValueKernel<float>);
...@@ -18,6 +18,7 @@ limitations under the License. */ ...@@ -18,6 +18,7 @@ limitations under the License. */
#include <limits> #include <limits>
#include "glog/logging.h" // For VLOG
#include "paddle/fluid/framework/threadpool.h" #include "paddle/fluid/framework/threadpool.h"
#include "paddle/fluid/operators/distributed/request_handler.h" #include "paddle/fluid/operators/distributed/request_handler.h"
#include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/platform/profiler.h"
...@@ -75,6 +76,9 @@ bool GRPCClient::AsyncSendVar(const std::string& ep, ...@@ -75,6 +76,9 @@ bool GRPCClient::AsyncSendVar(const std::string& ep,
var_h.scope = p_scope; var_h.scope = p_scope;
var_h.name = var_name_val; var_h.name = var_name_val;
var_h.ctx = p_ctx; var_h.ctx = p_ctx;
var_h.method = "Send";
VLOG(3) << var_h.String() << " begin";
// stub context // stub context
SendProcessor* s = new SendProcessor(ch); SendProcessor* s = new SendProcessor(ch);
...@@ -129,6 +133,9 @@ bool GRPCClient::AsyncGetVar(const std::string& ep, ...@@ -129,6 +133,9 @@ bool GRPCClient::AsyncGetVar(const std::string& ep,
var_h.scope = p_scope; var_h.scope = p_scope;
var_h.name = var_name_val; var_h.name = var_name_val;
var_h.ctx = p_ctx; var_h.ctx = p_ctx;
var_h.method = "Get";
VLOG(3) << var_h.String() << " begin";
// stub context // stub context
GetProcessor* s = new GetProcessor(ch); GetProcessor* s = new GetProcessor(ch);
...@@ -172,6 +179,9 @@ bool GRPCClient::AsyncPrefetchVar(const std::string& ep, ...@@ -172,6 +179,9 @@ bool GRPCClient::AsyncPrefetchVar(const std::string& ep,
var_h.scope = p_scope; var_h.scope = p_scope;
var_h.name = out_var_name_val; var_h.name = out_var_name_val;
var_h.ctx = p_ctx; var_h.ctx = p_ctx;
var_h.method = "Prefetch";
VLOG(3) << var_h.String() << " begin";
// stub context // stub context
GetProcessor* s = new GetProcessor(ch); GetProcessor* s = new GetProcessor(ch);
...@@ -243,10 +253,11 @@ void GRPCClient::Proceed() { ...@@ -243,10 +253,11 @@ void GRPCClient::Proceed() {
GPR_ASSERT(ok); GPR_ASSERT(ok);
PADDLE_ENFORCE(c); PADDLE_ENFORCE(c);
if (c->status_.ok()) { if (c->status_.ok()) {
VLOG(3) << c->var_h_.String() << " process";
c->Process(); c->Process();
} else { } else {
LOG(FATAL) << "var: " << c->var_h_.String() LOG(FATAL) << c->var_h_.String()
<< " grpc error:" << c->status_.error_message(); << " meets grpc error:" << c->status_.error_message();
} }
delete c; delete c;
{ {
...@@ -258,14 +269,15 @@ void GRPCClient::Proceed() { ...@@ -258,14 +269,15 @@ void GRPCClient::Proceed() {
} }
std::shared_ptr<grpc::Channel> GRPCClient::GetChannel(const std::string& ep) { std::shared_ptr<grpc::Channel> GRPCClient::GetChannel(const std::string& ep) {
// TODO(Yancey1989): make grpc client completely thread-safe
std::lock_guard<std::mutex> guard(chan_mutex_); std::lock_guard<std::mutex> guard(chan_mutex_);
auto it = channels_.find(ep); auto it = channels_.find(ep);
if (it != channels_.end()) { if (it != channels_.end()) {
return it->second; return it->second;
} }
// Channel configurations:
grpc::ChannelArguments args; grpc::ChannelArguments args;
args.SetInt(GRPC_ARG_MAX_RECONNECT_BACKOFF_MS, 2000);
args.SetCompressionAlgorithm(GRPC_COMPRESS_NONE); args.SetCompressionAlgorithm(GRPC_COMPRESS_NONE);
args.SetMaxSendMessageSize(std::numeric_limits<int>::max()); args.SetMaxSendMessageSize(std::numeric_limits<int>::max());
args.SetMaxReceiveMessageSize(std::numeric_limits<int>::max()); args.SetMaxReceiveMessageSize(std::numeric_limits<int>::max());
......
...@@ -47,14 +47,18 @@ namespace operators { ...@@ -47,14 +47,18 @@ namespace operators {
namespace distributed { namespace distributed {
struct VarHandle { struct VarHandle {
// RPC endpoint.
std::string ep; std::string ep;
const platform::DeviceContext* ctx; const platform::DeviceContext* ctx;
const framework::Scope* scope; const framework::Scope* scope;
// Variable name.
std::string name; std::string name;
// RPC method name.
std::string method;
std::string String() const { std::string String() const {
std::ostringstream s; std::ostringstream s;
s << "name:[" << name << "] ep:[" << ep << "]"; s << method << " name:[" << name << "], ep:[" << ep << "]";
return s.str(); return s.str();
} }
}; };
...@@ -72,6 +76,7 @@ class BaseProcessor { ...@@ -72,6 +76,7 @@ class BaseProcessor {
virtual void Prepare(const VarHandle& var_info, int64_t time_out) { virtual void Prepare(const VarHandle& var_info, int64_t time_out) {
context_.reset(new grpc::ClientContext()); context_.reset(new grpc::ClientContext());
var_h_ = var_info; var_h_ = var_info;
context_->set_wait_for_ready(true);
std::chrono::system_clock::time_point deadline = std::chrono::system_clock::time_point deadline =
std::chrono::system_clock::now() + std::chrono::milliseconds(time_out); std::chrono::system_clock::now() + std::chrono::milliseconds(time_out);
...@@ -81,6 +86,7 @@ class BaseProcessor { ...@@ -81,6 +86,7 @@ class BaseProcessor {
virtual void Prepare(int64_t time_out) { virtual void Prepare(int64_t time_out) {
context_.reset(new grpc::ClientContext()); context_.reset(new grpc::ClientContext());
context_->set_wait_for_ready(true);
std::chrono::system_clock::time_point deadline = std::chrono::system_clock::time_point deadline =
std::chrono::system_clock::now() + std::chrono::milliseconds(time_out); std::chrono::system_clock::now() + std::chrono::milliseconds(time_out);
...@@ -172,26 +178,24 @@ class GRPCClient : public RPCClient { ...@@ -172,26 +178,24 @@ class GRPCClient : public RPCClient {
bool AsyncSendVar(const std::string& ep, const platform::DeviceContext& ctx, bool AsyncSendVar(const std::string& ep, const platform::DeviceContext& ctx,
const framework::Scope& scope, const std::string& var_name, const framework::Scope& scope, const std::string& var_name,
int64_t time_out = RPCClient::rpc_time_out) override; int64_t time_out = FLAGS_grpc_deadline) override;
bool AsyncGetVar(const std::string& ep, const platform::DeviceContext& ctx, bool AsyncGetVar(const std::string& ep, const platform::DeviceContext& ctx,
const framework::Scope& scope, const std::string& var_name, const framework::Scope& scope, const std::string& var_name,
int64_t time_out = RPCClient::rpc_time_out) override; int64_t time_out = FLAGS_grpc_deadline) override;
bool AsyncPrefetchVar(const std::string& ep, bool AsyncPrefetchVar(const std::string& ep,
const platform::DeviceContext& ctx, const platform::DeviceContext& ctx,
const framework::Scope& scope, const framework::Scope& scope,
const std::string& in_var_name, const std::string& in_var_name,
const std::string& out_var_name, const std::string& out_var_name,
int64_t time_out = RPCClient::rpc_time_out) override; int64_t time_out = FLAGS_grpc_deadline) override;
void AsyncSendBatchBarrier( void AsyncSendBatchBarrier(const std::string& ep,
const std::string& ep, int64_t time_out = FLAGS_grpc_deadline) override;
int64_t time_out = RPCClient::rpc_time_out) override;
void AsyncSendFetchBarrier( void AsyncSendFetchBarrier(const std::string& ep,
const std::string& ep, int64_t time_out = FLAGS_grpc_deadline) override;
int64_t time_out = RPCClient::rpc_time_out) override;
void Wait() override; void Wait() override;
...@@ -207,7 +211,7 @@ class GRPCClient : public RPCClient { ...@@ -207,7 +211,7 @@ class GRPCClient : public RPCClient {
void Proceed(); void Proceed();
void AsyncSendComplete(const std::string& ep, void AsyncSendComplete(const std::string& ep,
int64_t time_out = RPCClient::rpc_time_out); int64_t time_out = FLAGS_grpc_deadline);
std::shared_ptr<grpc::Channel> GetChannel(const std::string& ep); std::shared_ptr<grpc::Channel> GetChannel(const std::string& ep);
......
...@@ -41,6 +41,19 @@ class RequestBase { ...@@ -41,6 +41,19 @@ class RequestBase {
virtual ~RequestBase() {} virtual ~RequestBase() {}
virtual void Process() = 0; virtual void Process() = 0;
std::string Status2String(const std::string& method) {
std::string status = "Process";
if (status_ == FINISH) {
status = "Finish";
}
std::ostringstream s;
s << method << " name:[" << GetReqName() << "]"
<< ", ep:[" << ctx_.peer() << "]"
<< " " << status << " using req_id:" << req_id_;
return s.str();
}
CallStatus Status() const { CallStatus Status() const {
std::lock_guard<std::mutex> l(status_mu_); std::lock_guard<std::mutex> l(status_mu_);
return status_; return status_;
...@@ -84,7 +97,7 @@ class RequestSend final : public RequestBase { ...@@ -84,7 +97,7 @@ class RequestSend final : public RequestBase {
void Process() override { void Process() override {
std::string varname = GetReqName(); std::string varname = GetReqName();
VLOG(3) << "RequestSend var_name:" << varname; VLOG(4) << "RequestSend var_name:" << varname;
auto scope = request_->GetMutableLocalScope(); auto scope = request_->GetMutableLocalScope();
auto invar = request_->GetVar(); auto invar = request_->GetVar();
...@@ -119,7 +132,7 @@ class RequestGet final : public RequestBase { ...@@ -119,7 +132,7 @@ class RequestGet final : public RequestBase {
void Process() override { void Process() override {
// proc request. // proc request.
std::string varname = request_.varname(); std::string varname = request_.varname();
VLOG(3) << "RequestGet " << varname; VLOG(4) << "RequestGet " << varname;
auto scope = request_handler_->scope(); auto scope = request_handler_->scope();
auto invar = scope->FindVar(varname); auto invar = scope->FindVar(varname);
...@@ -165,7 +178,7 @@ class RequestPrefetch final : public RequestBase { ...@@ -165,7 +178,7 @@ class RequestPrefetch final : public RequestBase {
// prefetch process... // prefetch process...
std::string in_var_name = request_->Varname(); std::string in_var_name = request_->Varname();
std::string out_var_name = request_->OutVarname(); std::string out_var_name = request_->OutVarname();
VLOG(3) << "RequestPrefetch, in_var_name: " << in_var_name VLOG(4) << "RequestPrefetch, in_var_name: " << in_var_name
<< " out_var_name: " << out_var_name; << " out_var_name: " << out_var_name;
auto scope = request_->GetMutableLocalScope(); auto scope = request_->GetMutableLocalScope();
...@@ -188,10 +201,10 @@ class RequestPrefetch final : public RequestBase { ...@@ -188,10 +201,10 @@ class RequestPrefetch final : public RequestBase {
}; };
void AsyncGRPCServer::WaitServerReady() { void AsyncGRPCServer::WaitServerReady() {
VLOG(3) << "AsyncGRPCServer is wait server ready"; VLOG(4) << "AsyncGRPCServer is wait server ready";
std::unique_lock<std::mutex> lock(this->mutex_ready_); std::unique_lock<std::mutex> lock(this->mutex_ready_);
condition_ready_.wait(lock, [=] { return this->ready_ == 1; }); condition_ready_.wait(lock, [=] { return this->ready_ == 1; });
VLOG(3) << "AsyncGRPCServer WaitSeverReady"; VLOG(4) << "AsyncGRPCServer WaitSeverReady";
} }
void AsyncGRPCServer::StartServer() { void AsyncGRPCServer::StartServer() {
...@@ -230,7 +243,7 @@ void AsyncGRPCServer::StartServer() { ...@@ -230,7 +243,7 @@ void AsyncGRPCServer::StartServer() {
for (int i = 0; i < threadnum; i++) { for (int i = 0; i < threadnum; i++) {
rpc_threads_[rpc_name].emplace_back(new std::thread(std::bind( rpc_threads_[rpc_name].emplace_back(new std::thread(std::bind(
&AsyncGRPCServer::HandleRequest, this, cq.get(), rpc_name, f))); &AsyncGRPCServer::HandleRequest, this, cq.get(), rpc_name, f)));
VLOG(3) << t.first << " creates threads!"; VLOG(4) << t.first << " creates threads!";
} }
} }
...@@ -247,7 +260,7 @@ void AsyncGRPCServer::StartServer() { ...@@ -247,7 +260,7 @@ void AsyncGRPCServer::StartServer() {
auto& threads = t.second; auto& threads = t.second;
for (size_t i = 0; i < threads.size(); ++i) { for (size_t i = 0; i < threads.size(); ++i) {
threads[i]->join(); threads[i]->join();
VLOG(3) << t.first << " threads ends!"; VLOG(4) << t.first << " threads ends!";
} }
} }
} }
...@@ -255,7 +268,7 @@ void AsyncGRPCServer::StartServer() { ...@@ -255,7 +268,7 @@ void AsyncGRPCServer::StartServer() {
void AsyncGRPCServer::ShutdownQueue() { void AsyncGRPCServer::ShutdownQueue() {
for (auto& t : rpc_cq_) { for (auto& t : rpc_cq_) {
t.second->Shutdown(); t.second->Shutdown();
VLOG(3) << t.first << " shutdown!"; VLOG(4) << t.first << " queue shutdown!";
} }
} }
...@@ -264,7 +277,7 @@ void AsyncGRPCServer::ShutDownImpl() { ...@@ -264,7 +277,7 @@ void AsyncGRPCServer::ShutDownImpl() {
is_shut_down_ = true; is_shut_down_ = true;
ShutdownQueue(); ShutdownQueue();
VLOG(3) << "server_ shutdown!"; VLOG(4) << "server_ shutdown!";
server_->Shutdown(); server_->Shutdown();
} }
...@@ -272,7 +285,7 @@ void AsyncGRPCServer::TryToRegisterNewOne(const std::string& rpc_name, ...@@ -272,7 +285,7 @@ void AsyncGRPCServer::TryToRegisterNewOne(const std::string& rpc_name,
int req_id) { int req_id) {
std::unique_lock<std::mutex> lock(cq_mutex_); std::unique_lock<std::mutex> lock(cq_mutex_);
if (is_shut_down_) { if (is_shut_down_) {
VLOG(3) << "shutdown, do not TryToRegisterNewSendOne"; VLOG(4) << "shutdown, do not TryToRegisterNewSendOne";
return; return;
} }
...@@ -306,14 +319,14 @@ void AsyncGRPCServer::HandleRequest( ...@@ -306,14 +319,14 @@ void AsyncGRPCServer::HandleRequest(
bool ok = false; bool ok = false;
while (true) { while (true) {
VLOG(3) << "HandleRequest " << rpc_name << " wait next"; VLOG(4) << "HandleRequest " << rpc_name << " wait next";
if (!cq->Next(&tag, &ok)) { if (!cq->Next(&tag, &ok)) {
LOG(INFO) << "CompletionQueue " << rpc_name << " shutdown!"; LOG(INFO) << "CompletionQueue " << rpc_name << " shutdown!";
break; break;
} }
int req_id = static_cast<int>(reinterpret_cast<intptr_t>(tag)); int req_id = static_cast<int>(reinterpret_cast<intptr_t>(tag));
VLOG(3) << "HandleRequest " << rpc_name << ", req_id:" << req_id VLOG(4) << "HandleRequest " << rpc_name << ", req_id:" << req_id
<< " get next"; << " get next";
auto& reqs = rpc_reqs_[rpc_name]; auto& reqs = rpc_reqs_[rpc_name];
...@@ -324,22 +337,21 @@ void AsyncGRPCServer::HandleRequest( ...@@ -324,22 +337,21 @@ void AsyncGRPCServer::HandleRequest(
base = reqs[req_id]; base = reqs[req_id];
} }
VLOG(3) << base->Status2String(rpc_name);
// reference: // reference:
// https://github.com/tensorflow/tensorflow/issues/5596 // https://github.com/tensorflow/tensorflow/issues/5596
// https://groups.google.com/forum/#!topic/grpc-io/xftlRy-IQwM // https://groups.google.com/forum/#!topic/grpc-io/xftlRy-IQwM
// https://groups.google.com/forum/#!topic/grpc-io/ywATt88Ef_I // https://groups.google.com/forum/#!topic/grpc-io/ywATt88Ef_I
if (!ok) { if (!ok) {
LOG(WARNING) << "completion queue:" << rpc_name LOG(WARNING) << "completion queue:" << rpc_name
<< " recv no regular event:argument name[" << " recv no regular event"
<< base->GetReqName() << "]"; << " context:" << base->Status2String(rpc_name);
TryToRegisterNewOne(rpc_name, req_id); TryToRegisterNewOne(rpc_name, req_id);
delete base; delete base;
continue; continue;
} }
VLOG(3) << "queue id:" << rpc_name << ", req_id:" << req_id
<< ", status:" << base->Status();
switch (base->Status()) { switch (base->Status()) {
case PROCESS: { case PROCESS: {
base->Process(); base->Process();
......
...@@ -13,6 +13,10 @@ ...@@ -13,6 +13,10 @@
// limitations under the License. // limitations under the License.
#include "paddle/fluid/operators/distributed/rpc_client.h" #include "paddle/fluid/operators/distributed/rpc_client.h"
#include "gflags/gflags.h"
// default to 3min to avoid temprary network failures.
DEFINE_int32(grpc_deadline, 180000, "deadline timeouts for grpc");
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -15,11 +15,14 @@ ...@@ -15,11 +15,14 @@
#pragma once #pragma once
#include <string> #include <string>
#include "gflags/gflags.h"
#include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/scope.h"
DECLARE_int32(grpc_deadline);
namespace paddle { namespace paddle {
namespace operators { namespace operators {
namespace distributed { namespace distributed {
...@@ -32,26 +35,26 @@ class RPCClient { ...@@ -32,26 +35,26 @@ class RPCClient {
const platform::DeviceContext& ctx, const platform::DeviceContext& ctx,
const framework::Scope& scope, const framework::Scope& scope,
const std::string& var_name, const std::string& var_name,
int64_t time_out = rpc_time_out) = 0; int64_t time_out = FLAGS_grpc_deadline) = 0;
virtual bool AsyncGetVar(const std::string& ep, virtual bool AsyncGetVar(const std::string& ep,
const platform::DeviceContext& ctx, const platform::DeviceContext& ctx,
const framework::Scope& scope, const framework::Scope& scope,
const std::string& var_name, const std::string& var_name,
int64_t time_out = rpc_time_out) = 0; int64_t time_out = FLAGS_grpc_deadline) = 0;
virtual bool AsyncPrefetchVar(const std::string& ep, virtual bool AsyncPrefetchVar(const std::string& ep,
const platform::DeviceContext& ctx, const platform::DeviceContext& ctx,
const framework::Scope& scope, const framework::Scope& scope,
const std::string& in_var_name, const std::string& in_var_name,
const std::string& out_var_name, const std::string& out_var_name,
int64_t time_out = rpc_time_out) = 0; int64_t time_out = FLAGS_grpc_deadline) = 0;
virtual void AsyncSendBatchBarrier(const std::string& ep, virtual void AsyncSendBatchBarrier(
int64_t time_out = rpc_time_out) = 0; const std::string& ep, int64_t time_out = FLAGS_grpc_deadline) = 0;
virtual void AsyncSendFetchBarrier(const std::string& ep, virtual void AsyncSendFetchBarrier(
int64_t time_out = rpc_time_out) = 0; const std::string& ep, int64_t time_out = FLAGS_grpc_deadline) = 0;
// SendComplete tells all the server that current trainer have no more data // SendComplete tells all the server that current trainer have no more data
// to train, so that the pserver can reduce it's barrier count, and continue // to train, so that the pserver can reduce it's barrier count, and continue
...@@ -60,8 +63,6 @@ class RPCClient { ...@@ -60,8 +63,6 @@ class RPCClient {
virtual void Wait() = 0; virtual void Wait() = 0;
static constexpr int64_t rpc_time_out = 120 * 1000;
template <typename T> template <typename T>
static RPCClient* GetInstance() { static RPCClient* GetInstance() {
std::call_once(init_flag_, &RPCClient::Init<T>); std::call_once(init_flag_, &RPCClient::Init<T>);
......
...@@ -47,11 +47,12 @@ void RPCServer::WaitBarrier(const std::string& rpc_name) { ...@@ -47,11 +47,12 @@ void RPCServer::WaitBarrier(const std::string& rpc_name) {
return (barrier_counter_[rpc_name] >= client_num_ || exit_flag_.load()); return (barrier_counter_[rpc_name] >= client_num_ || exit_flag_.load());
}); });
VLOG(3) << "batch_barrier_:" << barrier_counter_[rpc_name]; VLOG(3) << "batch_barrier_: " << rpc_name << " "
<< barrier_counter_[rpc_name];
} }
void RPCServer::IncreaseBatchBarrier(const std::string rpc_name) { void RPCServer::IncreaseBatchBarrier(const std::string rpc_name) {
VLOG(3) << "RPCServer begin IncreaseBatchBarrier " << rpc_name; VLOG(4) << "RPCServer begin IncreaseBatchBarrier " << rpc_name;
int b = 0; int b = 0;
std::unique_lock<std::mutex> lock(mutex_); std::unique_lock<std::mutex> lock(mutex_);
b = ++barrier_counter_[rpc_name]; b = ++barrier_counter_[rpc_name];
...@@ -100,7 +101,7 @@ void RPCServer::SetCond(const std::string& rpc_name) { ...@@ -100,7 +101,7 @@ void RPCServer::SetCond(const std::string& rpc_name) {
} }
void RPCServer::WaitCond(const std::string& rpc_name) { void RPCServer::WaitCond(const std::string& rpc_name) {
VLOG(3) << "RPCServer WaitCond " << rpc_name; VLOG(4) << "RPCServer WaitCond " << rpc_name;
int cond = 0; int cond = 0;
{ {
std::unique_lock<std::mutex> lock(mutex_); std::unique_lock<std::mutex> lock(mutex_);
......
...@@ -76,6 +76,8 @@ bool ReadRaw(::google::protobuf::io::CodedInputStream* input, ...@@ -76,6 +76,8 @@ bool ReadRaw(::google::protobuf::io::CodedInputStream* input,
if (total_written + size_to_write > length) { if (total_written + size_to_write > length) {
size_to_write = length - total_written; size_to_write = length - total_written;
} }
// This log is useful to see how long a internal block size is of rpc.
VLOG(7) << "copy " << size_to_write << " data to CUDAPlace";
memory::Copy(boost::get<platform::CUDAPlace>(place), memory::Copy(boost::get<platform::CUDAPlace>(place),
reinterpret_cast<void*>(p), cpu, data, size_to_write, reinterpret_cast<void*>(p), cpu, data, size_to_write,
gpu_dev_ctx.stream()); gpu_dev_ctx.stream());
...@@ -103,6 +105,8 @@ bool ReadRaw(::google::protobuf::io::CodedInputStream* input, ...@@ -103,6 +105,8 @@ bool ReadRaw(::google::protobuf::io::CodedInputStream* input,
} }
// TODO(gongwb): can we avoid copy? // TODO(gongwb): can we avoid copy?
platform::CPUPlace cpu; platform::CPUPlace cpu;
// This log is useful to see how long a internal block size is of rpc.
VLOG(7) << "copy " << size_to_write << " data to CPUPlace";
memory::Copy(cpu, reinterpret_cast<void*>(p), cpu, data, size_to_write); memory::Copy(cpu, reinterpret_cast<void*>(p), cpu, data, size_to_write);
p += size_to_write; p += size_to_write;
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/operators/elementwise_add_op.h"
#include "paddle/fluid/operators/elementwise_op_function.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
namespace paddle {
namespace operators {
using framework::DataLayout;
using framework::Tensor;
using mkldnn::memory;
using mkldnn::reorder;
using mkldnn::primitive;
using mkldnn::stream;
using mkldnn::sum;
template <typename T>
class EltwiseAddMKLDNNKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto& dev_ctx =
ctx.template device_context<paddle::platform::MKLDNNDeviceContext>();
const auto& mkldnn_engine = dev_ctx.GetEngine();
auto* x = ctx.Input<Tensor>("X");
auto* y = ctx.Input<Tensor>("Y");
auto* z = ctx.Output<Tensor>("Out");
const T* x_data = x->data<T>();
const T* y_data = y->data<T>();
T* z_data = z->mutable_data<T>(ctx.GetPlace());
int axis = ctx.Attr<int>("axis");
auto x_dims = x->dims();
auto y_dims = y->dims();
auto z_dims = z->dims();
// Execute default elementwise_add operator when
// broadcast operations need to performed.
if (x_dims != y_dims) {
auto sum_func = [](T a, T b) -> T { return a + b; };
TransformFunctor<decltype(sum_func), T,
paddle::platform::CPUDeviceContext, T>
functor(
x, y, z,
ctx.template device_context<paddle::platform::CPUDeviceContext>(),
sum_func);
axis = (axis == -1 ? x_dims.size() - y_dims.size() : axis);
PADDLE_ENFORCE(axis >= 0 && axis < x_dims.size(),
"Axis should be in range [0, x_dims)");
trim_trailing_singular_dims(&y_dims);
axis = (y_dims.size() == 0) ? x_dims.size() : axis;
int pre, n, post;
get_mid_dims(x_dims, y_dims, axis, &pre, &n, &post);
if (post == 1) {
functor.RunRowWise(n, pre);
} else {
functor.RunMidWise(n, pre, post);
}
z->set_layout(DataLayout::kMKLDNN);
z->set_format(x->format());
} else {
PADDLE_ENFORCE(x->layout() == DataLayout::kMKLDNN &&
x->format() != memory::format::format_undef,
"Wrong layout/format set for X tensor");
PADDLE_ENFORCE(y->layout() == DataLayout::kMKLDNN &&
y->format() != memory::format::format_undef,
"Wrong layout/format set for X tensor");
std::vector<int> src_x_tz = framework::vectorize2int(x_dims);
std::vector<int> src_y_tz = framework::vectorize2int(y_dims);
std::vector<int> dst_tz = framework::vectorize2int(z_dims);
std::vector<memory::primitive_desc> srcs_pd;
std::vector<memory> srcs;
std::vector<float> scales = {1.0f, 1.0f};
auto src_x_pd = memory::primitive_desc(
{{src_x_tz}, memory::data_type::f32, x->format()}, mkldnn_engine);
auto src_y_pd = memory::primitive_desc(
{{src_y_tz}, memory::data_type::f32, y->format()}, mkldnn_engine);
auto src_x_memory =
memory(src_x_pd, paddle::platform::to_void_cast(x_data));
auto src_y_memory =
memory(src_y_pd, paddle::platform::to_void_cast(y_data));
srcs_pd.push_back(src_x_pd);
srcs_pd.push_back(src_y_pd);
srcs.push_back(src_x_memory);
srcs.push_back(src_y_memory);
auto dst_md =
memory::desc({dst_tz}, memory::data_type::f32, memory::format::any);
// create primitive descriptor for sum
auto sum_pd = sum::primitive_desc(dst_md, scales, srcs_pd);
// create mkldnn memory for dst
memory dst_memory = memory(sum_pd.dst_primitive_desc(), z_data);
std::vector<primitive::at> inputs;
inputs.push_back(srcs[0]);
inputs.push_back(srcs[1]);
// create sum primitive
auto sum_prim = sum(sum_pd, inputs, dst_memory);
std::vector<primitive> pipeline;
pipeline.push_back(sum_prim);
stream(stream::kind::eager).submit(pipeline).wait();
z->set_layout(DataLayout::kMKLDNN);
z->set_format(
(memory::format)dst_memory.get_primitive_desc().desc().data.format);
}
}
};
template <typename T>
class EltwiseAddMKLDNNGradKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
using Tensor = framework::Tensor;
auto* x = ctx.Input<Tensor>("X");
auto* y = ctx.Input<Tensor>("Y");
auto* out = ctx.Input<Tensor>("Out");
auto* dout = ctx.Input<Tensor>(framework::GradVarName("Out"));
auto* dx = ctx.Output<Tensor>(framework::GradVarName("X"));
auto* dy = ctx.Output<Tensor>(framework::GradVarName("Y"));
int axis = ctx.Attr<int>("axis");
auto set_mkldnn_format = [](Tensor* in, const Tensor* out) {
in->set_layout(DataLayout::kMKLDNN);
in->set_format(out->format());
};
if (x->dims() == y->dims()) {
auto blas = math::GetBlas<paddle::platform::CPUDeviceContext, T>(ctx);
if (dx) {
blas.VCOPY(dout->numel(), dout->data<T>(),
dx->mutable_data<T>(ctx.GetPlace()));
set_mkldnn_format(dx, dout);
}
if (dy) {
blas.VCOPY(dout->numel(), dout->data<T>(),
dy->mutable_data<T>(ctx.GetPlace()));
set_mkldnn_format(dy, dout);
}
} else {
// Execute default kernel when broadcast is needed
ElemwiseGradCompute<paddle::platform::CPUDeviceContext, T,
IdentityGrad<T>, IdentityGrad<T>>(
ctx, *x, *y, *out, *dout, axis, dx, dy, IdentityGrad<T>(),
IdentityGrad<T>());
}
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_KERNEL(elementwise_add, MKLDNN, ::paddle::platform::CPUPlace,
ops::EltwiseAddMKLDNNKernel<float>)
REGISTER_OP_KERNEL(elementwise_add_grad, MKLDNN, ::paddle::platform::CPUPlace,
ops::EltwiseAddMKLDNNGradKernel<float>)
...@@ -14,8 +14,12 @@ limitations under the License. */ ...@@ -14,8 +14,12 @@ limitations under the License. */
#pragma once #pragma once
#include <string> #include <string>
#include "paddle/fluid/framework/data_layout.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/operator.h"
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h"
#endif
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -40,6 +44,21 @@ class ElementwiseOp : public framework::OperatorWithKernel { ...@@ -40,6 +44,21 @@ class ElementwiseOp : public framework::OperatorWithKernel {
ctx->SetOutputDim("Out", x_dim); ctx->SetOutputDim("Out", x_dim);
ctx->ShareLoD("X", /*->*/ "Out"); ctx->ShareLoD("X", /*->*/ "Out");
} }
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
auto input_data_type =
framework::ToDataType(ctx.Input<Tensor>("X")->type());
#ifdef PADDLE_WITH_MKLDNN
if (platform::CanMKLDNNBeUsed(ctx)) {
return framework::OpKernelType(input_data_type, ctx.GetPlace(),
framework::DataLayout::kMKLDNN,
framework::LibraryType::kMKLDNN);
}
#endif
return framework::OpKernelType(input_data_type, ctx.GetPlace());
}
}; };
class ElementwiseOpInferVarType : public framework::VarTypeInference { class ElementwiseOpInferVarType : public framework::VarTypeInference {
...@@ -65,6 +84,8 @@ class ElementwiseOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -65,6 +84,8 @@ class ElementwiseOpMaker : public framework::OpProtoAndCheckerMaker {
"for broadcasting Y onto X.") "for broadcasting Y onto X.")
.SetDefault(-1) .SetDefault(-1)
.EqualGreaterThan(-1); .EqualGreaterThan(-1);
AddAttr<bool>("use_mkldnn", "(bool, default false). Used by MKLDNN.")
.SetDefault(false);
AddComment(string::Sprintf(R"DOC( AddComment(string::Sprintf(R"DOC(
Limited Elementwise %s Operator Limited Elementwise %s Operator
...@@ -138,6 +159,21 @@ class ElementwiseOpGrad : public framework::OperatorWithKernel { ...@@ -138,6 +159,21 @@ class ElementwiseOpGrad : public framework::OperatorWithKernel {
ctx->SetOutputDim(y_grad_name, y_dims); ctx->SetOutputDim(y_grad_name, y_dims);
} }
} }
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
auto input_data_type =
framework::ToDataType(ctx.Input<Tensor>("X")->type());
#ifdef PADDLE_WITH_MKLDNN
if (platform::CanMKLDNNBeUsed(ctx)) {
return framework::OpKernelType(input_data_type, ctx.GetPlace(),
framework::DataLayout::kMKLDNN,
framework::LibraryType::kMKLDNN);
}
#endif
return framework::OpKernelType(input_data_type, ctx.GetPlace());
}
}; };
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
......
...@@ -101,17 +101,16 @@ void ListenAndServOp::RunSyncLoop( ...@@ -101,17 +101,16 @@ void ListenAndServOp::RunSyncLoop(
framework::Scope *recv_scope, framework::Scope *recv_scope,
const std::vector<int> &prefetch_block_id_list) const { const std::vector<int> &prefetch_block_id_list) const {
size_t num_blocks = program->Size(); size_t num_blocks = program->Size();
auto optimize_blocks =
Attr<std::vector<framework::BlockDesc *>>(kOptimizeBlocks);
PADDLE_ENFORCE_GE(num_blocks, 2, PADDLE_ENFORCE_GE(num_blocks, 2,
"server program should have at least 2 blocks"); "server program should have at least 2 blocks");
std::vector<int> optimize_block_id_list; std::vector<int> optimize_blocks_idx;
for (int blkid = 1; blkid < num_blocks; ++blkid) { for (auto blk : optimize_blocks) {
if (std::find(prefetch_block_id_list.begin(), prefetch_block_id_list.end(), optimize_blocks_idx.push_back(blk->ID());
blkid) == prefetch_block_id_list.end()) {
optimize_block_id_list.push_back(blkid);
}
} }
auto optimize_prepared = executor->Prepare(*program, optimize_block_id_list); auto optimize_prepared = executor->Prepare(*program, optimize_blocks_idx);
// Insert placeholder for block0 which holds current op itself. // Insert placeholder for block0 which holds current op itself.
optimize_prepared.insert( optimize_prepared.insert(
optimize_prepared.begin(), optimize_prepared.begin(),
...@@ -134,14 +133,14 @@ void ListenAndServOp::RunSyncLoop( ...@@ -134,14 +133,14 @@ void ListenAndServOp::RunSyncLoop(
// and this will still work. // and this will still work.
// The optimize blocks which have the same parent ID would run parallel // The optimize blocks which have the same parent ID would run parallel
// TODO(Yancey1989): need to use ParallelExecutor for future // TODO(Yancey1989): need to use ParallelExecutor for future
int32_t last_parent_blkid = program->Block(1).Parent(); int32_t last_parent_blkid = optimize_blocks[0]->Parent();
std::vector<size_t> parallel_blkids; std::vector<size_t> parallel_blkids;
parallel_blkids.push_back(1); parallel_blkids.push_back(optimize_blocks[0]->ID());
double ts = GetTimestamp(); double ts = GetTimestamp();
for (size_t i = 1; i < optimize_block_id_list.size(); ++i) { for (size_t i = 1; i < optimize_blocks.size(); ++i) {
// skip the first optimize block because it is already in the // skip the first optimize block because it is already in the
// parallel_blkids. // parallel_blkids.
int blkid = optimize_block_id_list[i]; int blkid = optimize_blocks[i]->ID();
if (program->Block(blkid).Parent() != last_parent_blkid) { if (program->Block(blkid).Parent() != last_parent_blkid) {
ParallelExecuteBlocks(parallel_blkids, executor, optimize_prepared, ParallelExecuteBlocks(parallel_blkids, executor, optimize_prepared,
program, recv_scope); program, recv_scope);
...@@ -164,8 +163,8 @@ void ListenAndServOp::RunSyncLoop( ...@@ -164,8 +163,8 @@ void ListenAndServOp::RunSyncLoop(
} }
void ListenAndServOp::RunAsyncLoop(framework::Executor *executor, void ListenAndServOp::RunAsyncLoop(framework::Executor *executor,
framework::ProgramDesc *program) const { framework::ProgramDesc *program,
VLOG(3) << "RunAsyncLoop in"; framework::Scope *recv_scope) const {
// grad name to block id // grad name to block id
std::unordered_map<std::string, int32_t> grad_to_block_id; std::unordered_map<std::string, int32_t> grad_to_block_id;
std::unordered_map<int32_t, std::string> id_to_grad; std::unordered_map<int32_t, std::string> id_to_grad;
...@@ -192,6 +191,10 @@ void ListenAndServOp::RunAsyncLoop(framework::Executor *executor, ...@@ -192,6 +191,10 @@ void ListenAndServOp::RunAsyncLoop(framework::Executor *executor,
block_list.push_back(blkid); block_list.push_back(blkid);
} }
auto optimize_prepared = executor->Prepare(*program, block_list); auto optimize_prepared = executor->Prepare(*program, block_list);
// execute global block if needed
if (block_list[0] == 1 && id_to_grad.count(1) == 0) {
executor->RunPreparedContext(optimize_prepared[0].get(), recv_scope);
}
std::unordered_map<std::string, std::unordered_map<std::string,
std::shared_ptr<framework::ExecutorPrepareContext>> std::shared_ptr<framework::ExecutorPrepareContext>>
grad_to_prepared_ctx; grad_to_prepared_ctx;
...@@ -203,7 +206,6 @@ void ListenAndServOp::RunAsyncLoop(framework::Executor *executor, ...@@ -203,7 +206,6 @@ void ListenAndServOp::RunAsyncLoop(framework::Executor *executor,
request_get_handler_->SetGradToPreparedCtx(&grad_to_prepared_ctx); request_get_handler_->SetGradToPreparedCtx(&grad_to_prepared_ctx);
request_prefetch_handler_->SetGradToPreparedCtx(&grad_to_prepared_ctx); request_prefetch_handler_->SetGradToPreparedCtx(&grad_to_prepared_ctx);
VLOG(3) << "RunAsyncLoop into while";
while (true) { while (true) {
if (rpc_service_->IsExit()) { if (rpc_service_->IsExit()) {
LOG(INFO) << "get exit!rpc_processor break!"; LOG(INFO) << "get exit!rpc_processor break!";
...@@ -261,8 +263,11 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope, ...@@ -261,8 +263,11 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope,
rpc_service_->RegisterRPC(distributed::kRequestPrefetch, rpc_service_->RegisterRPC(distributed::kRequestPrefetch,
request_prefetch_handler_.get()); request_prefetch_handler_.get());
auto *optimize_block = Attr<framework::BlockDesc *>(kOptimizeBlock); auto optimize_blocks =
auto *program = optimize_block->Program(); Attr<std::vector<framework::BlockDesc *>>(kOptimizeBlocks);
PADDLE_ENFORCE(optimize_blocks.size() >= 1,
"optimize blocks should be 1 at least on the pserver side.");
auto *program = optimize_blocks[0]->Program();
framework::Executor executor(dev_place); framework::Executor executor(dev_place);
// prepare for prefetch // prepare for prefetch
...@@ -317,7 +322,7 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope, ...@@ -317,7 +322,7 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope,
if (sync_mode) { if (sync_mode) {
RunSyncLoop(&executor, program, &recv_scope, prefetch_block_id_list); RunSyncLoop(&executor, program, &recv_scope, prefetch_block_id_list);
} else { } else {
RunAsyncLoop(&executor, program); RunAsyncLoop(&executor, program, &recv_scope);
} }
} }
...@@ -339,8 +344,9 @@ class ListenAndServOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -339,8 +344,9 @@ class ListenAndServOpMaker : public framework::OpProtoAndCheckerMaker {
"a map from grad name to it's optimize block id") "a map from grad name to it's optimize block id")
.SetDefault({}); .SetDefault({});
AddAttr<bool>("sync_mode", "if works at sync_mode or not").SetDefault(true); AddAttr<bool>("sync_mode", "if works at sync_mode or not").SetDefault(true);
AddAttr<framework::BlockDesc *>(kOptimizeBlock, AddAttr<std::vector<framework::BlockDesc *>>(
"BlockID to run on server side."); kOptimizeBlocks, "Optimize blocks to run on server side.")
.SetDefault({});
AddAttr<std::vector<std::string>>(kPrefetchVarNameToBlockId, AddAttr<std::vector<std::string>>(kPrefetchVarNameToBlockId,
"prefetch blocks to run on server side.") "prefetch blocks to run on server side.")
.SetDefault({}); .SetDefault({});
......
...@@ -30,7 +30,7 @@ limitations under the License. */ ...@@ -30,7 +30,7 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
constexpr char kOptimizeBlock[] = "OptimizeBlock"; constexpr char kOptimizeBlocks[] = "optimize_blocks";
constexpr char kPrefetchVarNameToBlockId[] = "prefetch_var_name_to_block_id"; constexpr char kPrefetchVarNameToBlockId[] = "prefetch_var_name_to_block_id";
void RunServer(std::shared_ptr<distributed::RPCServer> service); void RunServer(std::shared_ptr<distributed::RPCServer> service);
...@@ -50,7 +50,8 @@ class ListenAndServOp : public framework::OperatorBase { ...@@ -50,7 +50,8 @@ class ListenAndServOp : public framework::OperatorBase {
const std::vector<int>& prefetch_block_id_list) const; const std::vector<int>& prefetch_block_id_list) const;
void RunAsyncLoop(framework::Executor* executor, void RunAsyncLoop(framework::Executor* executor,
framework::ProgramDesc* program) const; framework::ProgramDesc* program,
framework::Scope* recv_scope) const;
void SavePort() const; void SavePort() const;
......
...@@ -295,7 +295,7 @@ class ParallelDoGradOp : public framework::OperatorBase { ...@@ -295,7 +295,7 @@ class ParallelDoGradOp : public framework::OperatorBase {
auto sum_op = framework::OpRegistry::CreateOp( auto sum_op = framework::OpRegistry::CreateOp(
"sum", {{"X", {s, tmp_name}}}, {{"Out", {s}}}, "sum", {{"X", {s, tmp_name}}}, {{"Out", {s}}},
framework::AttributeMap{}); framework::AttributeMap{{"use_mkldnn", {false}}});
VLOG(10) << sum_op->DebugStringEx(sub_scopes[0]); VLOG(10) << sum_op->DebugStringEx(sub_scopes[0]);
sum_op->Run(*sub_scopes[0], places[0]); sum_op->Run(*sub_scopes[0], places[0]);
WaitOnPlace(places[0]); WaitOnPlace(places[0]);
......
...@@ -37,6 +37,11 @@ class RandomCropOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -37,6 +37,11 @@ class RandomCropOpMaker : public framework::OpProtoAndCheckerMaker {
AddOutput("SeedOut", "The random seed after random cropping.") AddOutput("SeedOut", "The random seed after random cropping.")
.AsIntermediate(); .AsIntermediate();
AddAttr<std::vector<int>>("shape", "The shape of a cropped instance."); AddAttr<std::vector<int>>("shape", "The shape of a cropped instance.");
AddAttr<int>("startup_seed",
"If the input 'Seed' is not initialized, the 'startup_seed' "
"will be used to replace it. Even so, the seed after random "
"crop will also be outputed to the 'SeedOut'.")
.SetDefault(0);
AddComment(R"DOC( AddComment(R"DOC(
This operator takes a batch of instance, and do random cropping on each instance. This operator takes a batch of instance, and do random cropping on each instance.
It means that cropping positions differs on each instance, which is determined It means that cropping positions differs on each instance, which is determined
...@@ -49,8 +54,6 @@ class RandomCropOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -49,8 +54,6 @@ class RandomCropOpMaker : public framework::OpProtoAndCheckerMaker {
class RandomCropOpInferShape : public framework::InferShapeBase { class RandomCropOpInferShape : public framework::InferShapeBase {
public: public:
void operator()(framework::InferShapeContext* ctx) const override { void operator()(framework::InferShapeContext* ctx) const override {
auto seed_dim = ctx->GetInputDim("Seed");
PADDLE_ENFORCE(seed_dim.size() == 1 && seed_dim[0] == 1);
auto shape = ctx->Attrs().Get<std::vector<int>>("shape"); auto shape = ctx->Attrs().Get<std::vector<int>>("shape");
auto x_dim = ctx->GetInputDim("X"); auto x_dim = ctx->GetInputDim("X");
PADDLE_ENFORCE_GT(x_dim.size(), static_cast<int64_t>(shape.size())); PADDLE_ENFORCE_GT(x_dim.size(), static_cast<int64_t>(shape.size()));
...@@ -62,7 +65,6 @@ class RandomCropOpInferShape : public framework::InferShapeBase { ...@@ -62,7 +65,6 @@ class RandomCropOpInferShape : public framework::InferShapeBase {
out_dim[x_i] = shape[shape_i]; out_dim[x_i] = shape[shape_i];
} }
ctx->SetOutputDim("Out", framework::make_ddim(out_dim)); ctx->SetOutputDim("Out", framework::make_ddim(out_dim));
ctx->SetOutputDim("SeedOut", framework::make_ddim({1}));
} }
}; };
......
...@@ -142,16 +142,22 @@ template <typename DeviceContext, typename T> ...@@ -142,16 +142,22 @@ template <typename DeviceContext, typename T>
class RandomCropKernel : public framework::OpKernel<T> { class RandomCropKernel : public framework::OpKernel<T> {
public: public:
virtual void Compute(const framework::ExecutionContext& ctx) const { virtual void Compute(const framework::ExecutionContext& ctx) const {
auto& seed_tensor = detail::Ref(ctx.Input<framework::LoDTensor>("Seed"));
int64_t seed = 0; int64_t seed = 0;
if (platform::is_cpu_place(seed_tensor.place())) { auto& seed_tensor = detail::Ref(ctx.Input<framework::LoDTensor>("Seed"));
seed = *seed_tensor.data<int64_t>(); if (seed_tensor.IsInitialized()) {
if (platform::is_cpu_place(seed_tensor.place())) {
seed = *seed_tensor.data<int64_t>();
} else {
LOG(WARNING) << "It is slow to place seed in GPU memory. Please verify "
"your program";
framework::LoDTensor cpu_seed;
framework::TensorCopySync(seed_tensor, platform::CPUPlace(), &cpu_seed);
seed = *cpu_seed.data<int64_t>();
}
} else { } else {
LOG(WARNING) << "It is slow to place seed in GPU memory. Please verify " VLOG(5) << "WARNING: The input 'Seed' is not initialized, use attribute "
"your program"; "'startup_seed' instead.";
framework::LoDTensor cpu_seed; seed = ctx.Attr<int>("startup_seed");
framework::TensorCopySync(seed_tensor, platform::CPUPlace(), &cpu_seed);
seed = *cpu_seed.data<int64_t>();
} }
auto shape = ctx.Attr<std::vector<int>>("shape"); auto shape = ctx.Attr<std::vector<int>>("shape");
auto& x = detail::Ref(ctx.Input<framework::LoDTensor>("X")); auto& x = detail::Ref(ctx.Input<framework::LoDTensor>("X"));
...@@ -171,7 +177,7 @@ class RandomCropKernel : public framework::OpKernel<T> { ...@@ -171,7 +177,7 @@ class RandomCropKernel : public framework::OpKernel<T> {
engine.discard(functor.prod_batchsize_dims_ * engine.discard(functor.prod_batchsize_dims_ *
(functor.rank_ - functor.num_batchsize_dims_)); (functor.rank_ - functor.num_batchsize_dims_));
*ctx.Output<framework::LoDTensor>("SeedOut")->mutable_data<int64_t>( *ctx.Output<framework::LoDTensor>("SeedOut")->mutable_data<int64_t>(
platform::CPUPlace()) = engine(); framework::make_ddim({1}), platform::CPUPlace()) = engine();
} }
}; };
......
...@@ -39,6 +39,7 @@ class CustomReader : public framework::DecoratedReader { ...@@ -39,6 +39,7 @@ class CustomReader : public framework::DecoratedReader {
const framework::ProgramDesc program_; const framework::ProgramDesc program_;
int sub_block_id_; int sub_block_id_;
framework::Executor exe_; framework::Executor exe_;
framework::Scope scope_;
std::vector<std::string> source_var_names_; std::vector<std::string> source_var_names_;
std::vector<std::string> sink_var_names_; std::vector<std::string> sink_var_names_;
...@@ -158,23 +159,24 @@ void CustomReader::ReadNext(std::vector<framework::LoDTensor>* out) { ...@@ -158,23 +159,24 @@ void CustomReader::ReadNext(std::vector<framework::LoDTensor>* out) {
// The scope for CustomReader's sub-block should be independent and shouldn't // The scope for CustomReader's sub-block should be independent and shouldn't
// be any other computation scope's child. Otherwise, data preprocessing and // be any other computation scope's child. Otherwise, data preprocessing and
// compution cannot be concurrent. // compution cannot be concurrent.
framework::Scope scope; framework::Scope* exe_scope = &scope_.NewScope();
// 1. Copy LoDTensors from underlying reader's output to source variables. // 1. Copy LoDTensors from underlying reader's output to source variables.
for (size_t i = 0; i < source_var_names_.size(); ++i) { for (size_t i = 0; i < source_var_names_.size(); ++i) {
framework::Variable* var = scope.Var(source_var_names_[i]); framework::Variable* var = exe_scope->Var(source_var_names_[i]);
framework::LoDTensor* tensor = var->GetMutable<framework::LoDTensor>(); framework::LoDTensor* tensor = var->GetMutable<framework::LoDTensor>();
tensor->ShareDataWith(underlying_outs[i]); tensor->ShareDataWith(underlying_outs[i]);
tensor->set_lod(underlying_outs[i].lod()); tensor->set_lod(underlying_outs[i].lod());
} }
// 2. Run the sub-block. // 2. Run the sub-block.
exe_.Run(program_, &scope, sub_block_id_, false, true); exe_.Run(program_, exe_scope, sub_block_id_, false, true);
// 3. Copy LoDTensors from sink variables to out. // 3. Copy LoDTensors from sink variables to out.
out->resize(sink_var_names_.size()); out->resize(sink_var_names_.size());
for (size_t i = 0; i < sink_var_names_.size(); ++i) { for (size_t i = 0; i < sink_var_names_.size(); ++i) {
const auto& tensor = detail::Ref(scope.FindVar(sink_var_names_[i])) const auto& tensor = detail::Ref(exe_scope->FindVar(sink_var_names_[i]))
.Get<framework::LoDTensor>(); .Get<framework::LoDTensor>();
framework::TensorCopySync(tensor, platform::CPUPlace(), &(*out)[i]); framework::TensorCopySync(tensor, platform::CPUPlace(), &(*out)[i]);
} }
scope_.DeleteScope(exe_scope);
} }
} // namespace reader } // namespace reader
......
...@@ -23,13 +23,13 @@ namespace reader { ...@@ -23,13 +23,13 @@ namespace reader {
// 'Double buffer' means we shall maintain two batches of input data at the same // 'Double buffer' means we shall maintain two batches of input data at the same
// time. So the kCacheSize shoul be at least 2. // time. So the kCacheSize shoul be at least 2.
static constexpr size_t kCacheSize = 3; static constexpr size_t kCacheSize = 5;
// There will be two bacthes out of the channel during training: // There will be two bacthes out of the channel during training:
// 1. the one waiting to be sent to the channel // 1. the one waiting to be sent to the channel
// 2. the one just be received from the channel, which is also being used by // 2. the one just be received from the channel, which is also being used by
// subsequent operators. // subsequent operators.
// So the channel size should be kChacheSize - 2 // So the channel size should be kChacheSize - 2
static constexpr size_t kChannelSize = 1; // kCacheSize - 2 static constexpr size_t kChannelSize = 3; // kCacheSize - 2
class DoubleBufferReader : public framework::DecoratedReader { class DoubleBufferReader : public framework::DecoratedReader {
public: public:
......
...@@ -429,7 +429,8 @@ class RecurrentGradOp : public RecurrentBase { ...@@ -429,7 +429,8 @@ class RecurrentGradOp : public RecurrentBase {
auto sum_op = framework::OpRegistry::CreateOp( auto sum_op = framework::OpRegistry::CreateOp(
"sum", {{"X", {pg_names[param_id], new_inside_name}}}, "sum", {{"X", {pg_names[param_id], new_inside_name}}},
{{"Out", {pg_names[param_id]}}}, framework::AttributeMap{}); {{"Out", {pg_names[param_id]}}},
framework::AttributeMap{{"use_mkldnn", {false}}});
sum_op->Run(cur_scope, place); sum_op->Run(cur_scope, place);
cur_scope.Rename(new_inside_name, inside_grad_name); cur_scope.Rename(new_inside_name, inside_grad_name);
......
...@@ -129,7 +129,10 @@ void StartServerNet(bool is_sparse, std::atomic<bool> *initialized) { ...@@ -129,7 +129,10 @@ void StartServerNet(bool is_sparse, std::atomic<bool> *initialized) {
// sub program run in listen_and_serv_op, for simple test we use sum // sub program run in listen_and_serv_op, for simple test we use sum
f::ProgramDesc program; f::ProgramDesc program;
const auto &root_block = program.Block(0); const auto &root_block = program.Block(0);
std::vector<framework::BlockDesc *> optimize_blocks;
auto *optimize_block = program.AppendBlock(root_block); auto *optimize_block = program.AppendBlock(root_block);
optimize_blocks.push_back(optimize_block);
auto *prefetch_block = program.AppendBlock(root_block); auto *prefetch_block = program.AppendBlock(root_block);
// X for server side tensors, RX for received tensors, must be of same shape. // X for server side tensors, RX for received tensors, must be of same shape.
AddOp("sum", {{"X", {"x0", "x1"}}}, {{"Out", {"Out"}}}, {}, optimize_block, AddOp("sum", {{"X", {"x0", "x1"}}}, {{"Out", {"Out"}}}, {}, optimize_block,
...@@ -139,7 +142,7 @@ void StartServerNet(bool is_sparse, std::atomic<bool> *initialized) { ...@@ -139,7 +142,7 @@ void StartServerNet(bool is_sparse, std::atomic<bool> *initialized) {
attrs.insert({"Fanin", 1}); attrs.insert({"Fanin", 1});
attrs.insert({"ParamList", std::vector<std::string>({"Out"})}); attrs.insert({"ParamList", std::vector<std::string>({"Out"})});
attrs.insert({"GradList", std::vector<std::string>({"x1"})}); attrs.insert({"GradList", std::vector<std::string>({"x1"})});
attrs.insert({"OptimizeBlock", optimize_block}); attrs.insert({"optimize_blocks", optimize_blocks});
attrs.insert({"PrefetchBlock", prefetch_block}); attrs.insert({"PrefetchBlock", prefetch_block});
attrs.insert({"grad_to_block_id", std::vector<std::string>({""})}); attrs.insert({"grad_to_block_id", std::vector<std::string>({""})});
attrs.insert({"sync_mode", true}); attrs.insert({"sync_mode", true});
......
...@@ -27,8 +27,81 @@ using paddle::platform::MKLDNNMemDesc; ...@@ -27,8 +27,81 @@ using paddle::platform::MKLDNNMemDesc;
using mkldnn::memory; // Note: paddle has also "memory" namespace using mkldnn::memory; // Note: paddle has also "memory" namespace
using mkldnn::primitive; using mkldnn::primitive;
using mkldnn::softmax_forward; using mkldnn::softmax_forward;
using mkldnn::softmax_backward;
using mkldnn::prop_kind; using mkldnn::prop_kind;
using mkldnn::stream; using mkldnn::stream;
using platform::to_void_cast;
class SoftmaxMKLDNNHandler : public platform::MKLDNNHandler {
public:
SoftmaxMKLDNNHandler(
std::shared_ptr<mkldnn::softmax_forward::primitive_desc> softmax_pd,
const platform::MKLDNNDeviceContext& dev_ctx, mkldnn::engine engine,
const std::string& base_key)
: platform::MKLDNNHandler(dev_ctx, engine, base_key),
softmax_pd_(softmax_pd) {}
SoftmaxMKLDNNHandler(
std::shared_ptr<mkldnn::softmax_forward::primitive_desc> softmax_pd,
std::shared_ptr<mkldnn::softmax_backward::primitive_desc> softmax_bwd_pd,
const platform::MKLDNNDeviceContext& dev_ctx, mkldnn::engine engine,
const std::string& base_key)
: platform::MKLDNNHandler(dev_ctx, engine, base_key),
softmax_pd_(softmax_pd),
softmax_bwd_pd_(softmax_bwd_pd) {
// If we are in Grad operatgor then update a key with BWD suffix to
// distinguish from FWD memory primitives
key_ += "-BWD";
}
std::shared_ptr<mkldnn::softmax_forward> AcquireSoftmax(
std::shared_ptr<mkldnn::memory> dst_memory_p,
std::shared_ptr<mkldnn::memory> src_memory_p) {
/*Generate key*/
auto prim_key = key_ + "@softmax_p";
auto softmax_p = std::static_pointer_cast<mkldnn::softmax_forward>(
dev_ctx_.GetBlob(prim_key));
PADDLE_ENFORCE((softmax_p != nullptr) || (is_reusing_ == false),
"Fail to find softmax primitive in device context");
if (softmax_p == nullptr) {
softmax_p = std::make_shared<mkldnn::softmax_forward>(
*(softmax_pd_.get()),
*(static_cast<mkldnn::memory*>(src_memory_p.get())),
*(static_cast<mkldnn::memory*>(dst_memory_p.get())));
dev_ctx_.SetBlob(prim_key, softmax_p);
} else {
is_reusing_ = true;
}
return softmax_p;
}
std::shared_ptr<mkldnn::softmax_backward> AcquireSoftmaxBackward(
std::shared_ptr<mkldnn::memory> dst_memory_p,
std::shared_ptr<mkldnn::memory> diff_dst_memory_p,
std::shared_ptr<mkldnn::memory> diff_src_memory_p) {
auto prim_key = key_ + "@softmax_bwd_p";
auto softmax_bwd_p = std::static_pointer_cast<mkldnn::softmax_backward>(
dev_ctx_.GetBlob(prim_key));
PADDLE_ENFORCE((softmax_bwd_p != nullptr) || (is_reusing_ == false),
"Fail to find softmax backward primitive in device context");
if (softmax_bwd_p == nullptr) {
softmax_bwd_p = std::make_shared<mkldnn::softmax_backward>(
*softmax_bwd_pd_, *(dst_memory_p.get()), *(diff_dst_memory_p.get()),
*(diff_src_memory_p.get()));
dev_ctx_.SetBlob(prim_key, softmax_bwd_p);
} else {
is_reusing_ = true;
}
return softmax_bwd_p;
}
private:
std::shared_ptr<mkldnn::softmax_forward::primitive_desc> softmax_pd_;
std::shared_ptr<mkldnn::softmax_backward::primitive_desc> softmax_bwd_pd_;
};
template <typename T> template <typename T>
class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel<T> { class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel<T> {
...@@ -54,56 +127,27 @@ class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel<T> { ...@@ -54,56 +127,27 @@ class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel<T> {
// Same memory descriptor to be used for input and output // Same memory descriptor to be used for input and output
memory::dims softmax_tz = {src_tz[0], src_tz[1]}; memory::dims softmax_tz = {src_tz[0], src_tz[1]};
// Generate keys for storing/retriving primitives for this operator // Generate keys for storing/retriving primitives for this operator
// TODO(jczaja): Each MKLDNN operator may have diffrent hashing function const std::string key =
auto gethash = [](memory::dims& operand_dims) { platform::MKLDNNHandler::GetHash(softmax_tz, ctx.op().Output("Out"));
return std::string(std::to_string(operand_dims[0]) + "-" + const std::string key_softmax_pd = key + "@softmax_pd";
std::to_string(operand_dims[1]));
}; // Currently only NC data format is supported
const std::string key = gethash(softmax_tz); auto softmax_md = MKLDNNMemDesc(
const std::string key_softmax_p = key + "@softmax_p"; {softmax_tz}, platform::MKLDNNGetDataType<T>(), memory::format::nc);
const std::string key_softmax_src_mem_p = key + "@softmax_src_mem_p"; // Normalization is made after innermost dimension eg. C out of NC
const std::string key_softmax_dst_mem_p = key + "@softmax_dst_mem_p"; auto softmax_desc = softmax_forward::desc(prop_kind::forward_scoring,
softmax_md, 1 /*dim: C*/);
std::shared_ptr<void> softmax_p = dev_ctx.GetBlob(key_softmax_p); auto softmax_pd = std::make_shared<mkldnn::softmax_forward::primitive_desc>(
if (softmax_p == nullptr) { softmax_desc, mkldnn_engine);
// Currently only NC data format is supported dev_ctx.SetBlob(key_softmax_pd, softmax_pd);
auto softmax_md =
MKLDNNMemDesc({softmax_tz}, memory::f32, memory::format::nc); SoftmaxMKLDNNHandler handler(softmax_pd, dev_ctx, mkldnn_engine, key);
// Normalization is made after innermost dimension eg. C out of NC auto softmax_src_memory_p =
auto softmax_desc = softmax_forward::desc(prop_kind::forward_scoring, handler.AcquireSrcMemory(softmax_md, to_void_cast<T>(input_data));
softmax_md, 1 /*dim: C*/); auto softmax_dst_memory_p =
// create memory primitives handler.AcquireDstMemory(softmax_md, to_void_cast<T>(output_data));
auto softmax_src_memory_p = std::make_shared<memory>( auto softmax_p =
memory::primitive_desc{softmax_md, mkldnn_engine}, handler.AcquireSoftmax(softmax_dst_memory_p, softmax_src_memory_p);
static_cast<void*>(const_cast<T*>(input_data)));
dev_ctx.SetBlob(key_softmax_src_mem_p, softmax_src_memory_p);
auto softmax_dst_memory_p = std::make_shared<memory>(
memory::primitive_desc{softmax_md, mkldnn_engine},
static_cast<void*>(output_data));
dev_ctx.SetBlob(key_softmax_dst_mem_p, softmax_dst_memory_p);
auto softmax_forward_pd =
std::make_shared<softmax_forward::primitive_desc>(softmax_desc,
mkldnn_engine);
softmax_p = std::make_shared<softmax_forward>(
*(softmax_forward_pd.get()),
*(static_cast<memory*>(softmax_src_memory_p.get())),
*(static_cast<memory*>(softmax_dst_memory_p.get())));
dev_ctx.SetBlob(key_softmax_p, softmax_p);
} else {
// Primitives already exist
auto src_memory_p = std::static_pointer_cast<memory>(
dev_ctx.GetBlob(key_softmax_src_mem_p));
PADDLE_ENFORCE(src_memory_p != nullptr,
"Fail to find softmax src mem_p in device context");
auto dst_memory_p = std::static_pointer_cast<memory>(
dev_ctx.GetBlob(key_softmax_dst_mem_p));
PADDLE_ENFORCE(dst_memory_p != nullptr,
"Fail to find softmax dst mem_p in device context");
src_memory_p->set_data_handle(
reinterpret_cast<void*>(const_cast<T*>(input_data)));
dst_memory_p->set_data_handle(output_data);
}
std::vector<primitive> pipeline{ std::vector<primitive> pipeline{
*(static_cast<softmax_forward::primitive*>(softmax_p.get()))}; *(static_cast<softmax_forward::primitive*>(softmax_p.get()))};
...@@ -120,6 +164,77 @@ class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel<T> { ...@@ -120,6 +164,77 @@ class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel<T> {
} }
}; };
template <typename T>
class SoftmaxMKLDNNGradKernel : public paddle::framework::OpKernel<T> {
public:
void Compute(const paddle::framework::ExecutionContext& ctx) const override {
PADDLE_ENFORCE(paddle::platform::is_cpu_place(ctx.GetPlace()),
"It must use CPUPlace.");
auto& dev_ctx = ctx.template device_context<MKLDNNDeviceContext>();
auto mkldnn_engine = dev_ctx.GetEngine();
const Tensor* output = ctx.Input<Tensor>("Out");
const T* dst_data = output->data<T>();
auto* dout = ctx.template Input<Tensor>(framework::GradVarName("Out"));
const auto* diff_dst_ptr = dout->template data<T>();
auto* dx =
ctx.template Output<framework::Tensor>(framework::GradVarName("X"));
T* diff_src_ptr = dx->template mutable_data<T>(ctx.GetPlace());
std::vector<int> dst_tz = paddle::framework::vectorize2int(output->dims());
std::vector<int> src_tz(dst_tz);
PADDLE_ENFORCE(output->dims().size() == 2UL,
"The input of softmax op must be a 2D matrix.");
// MKL-DNN does support softmax over selected axis. Having 2D Tensor,
// we will make normalization after final eg. axis: 1
PADDLE_ENFORCE(((src_tz[0] == dst_tz[0]) && (src_tz[1] == dst_tz[1])),
"Softmax input and output dimensions should match");
// Same memory descriptor to be used for input and output
memory::dims softmax_tz = {src_tz[0], src_tz[1]};
// Currently only supports NC data format
// retrieve eltwise primitive desc from device context
const std::string key =
platform::MKLDNNHandler::GetHash(softmax_tz, ctx.op().Input("Out"));
const std::string key_softmax_pd = key + "@softmax_pd";
auto softmax_pd =
std::static_pointer_cast<mkldnn::softmax_forward::primitive_desc>(
dev_ctx.GetBlob(key_softmax_pd));
PADDLE_ENFORCE(softmax_pd != nullptr,
"Fail to find softmax_pd in device context");
// TODO(jczaja): Add layouts support when there is a need to do so
// Two dimensional softmax does support NC format
auto data_softmax_md = MKLDNNMemDesc(
{softmax_tz}, platform::MKLDNNGetDataType<T>(), memory::format::nc);
auto diff_softmax_md = MKLDNNMemDesc(
{softmax_tz}, platform::MKLDNNGetDataType<T>(), memory::format::nc);
// Normalization is made after innermost dimension eg. C out of NC
auto softmax_bwd_desc =
softmax_backward::desc(diff_softmax_md, data_softmax_md, 1 /* dim: C*/);
auto softmax_bwd_pd =
std::make_shared<mkldnn::softmax_backward::primitive_desc>(
softmax_bwd_desc, mkldnn_engine, *softmax_pd);
SoftmaxMKLDNNHandler handler(softmax_pd, softmax_bwd_pd, dev_ctx,
mkldnn_engine, key);
auto dst_memory_p =
handler.AcquireDstMemory(data_softmax_md, to_void_cast<T>(dst_data));
auto diff_dst_memory_p = handler.AcquireDiffDstMemory(
diff_softmax_md, to_void_cast<T>(diff_dst_ptr));
auto diff_src_memory_p = handler.AcquireDiffSrcMemory(
diff_softmax_md, to_void_cast<T>(diff_src_ptr));
// Get primitve from device context
auto softmax_bwd_p = handler.AcquireSoftmaxBackward(
dst_memory_p, diff_dst_memory_p, diff_src_memory_p);
std::vector<primitive> pipeline{*softmax_bwd_p};
stream(stream::kind::eager).submit(pipeline).wait();
}
};
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
...@@ -127,3 +242,5 @@ namespace ops = paddle::operators; ...@@ -127,3 +242,5 @@ namespace ops = paddle::operators;
REGISTER_OP_KERNEL(softmax, MKLDNN, ::paddle::platform::CPUPlace, REGISTER_OP_KERNEL(softmax, MKLDNN, ::paddle::platform::CPUPlace,
ops::SoftmaxMKLDNNKernel<float>); ops::SoftmaxMKLDNNKernel<float>);
REGISTER_OP_KERNEL(softmax_grad, MKLDNN, ::paddle::platform::CPUPlace,
ops::SoftmaxMKLDNNGradKernel<float>);
...@@ -145,16 +145,30 @@ class SoftmaxOpGrad : public framework::OperatorWithKernel { ...@@ -145,16 +145,30 @@ class SoftmaxOpGrad : public framework::OperatorWithKernel {
const framework::ExecutionContext& ctx) const override { const framework::ExecutionContext& ctx) const override {
// choose cudnn kernel if the runtime supported. // choose cudnn kernel if the runtime supported.
framework::LibraryType library_{framework::LibraryType::kPlain}; framework::LibraryType library_{framework::LibraryType::kPlain};
std::string data_format = ctx.Attr<std::string>("data_format");
framework::DataLayout layout_ = framework::StringToDataLayout(data_format);
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
if (platform::CanCUDNNBeUsed(ctx)) { if (platform::CanCUDNNBeUsed(ctx)) {
library_ = framework::LibraryType::kCUDNN; library_ = framework::LibraryType::kCUDNN;
} }
#endif #endif
std::string data_format = ctx.Attr<std::string>("data_format"); #ifdef PADDLE_WITH_MKLDNN
return framework::OpKernelType( if (library_ == framework::LibraryType::kPlain &&
framework::ToDataType(ctx.Input<Tensor>("X")->type()), ctx.GetPlace(), platform::CanMKLDNNBeUsed(ctx)) {
framework::StringToDataLayout(data_format), library_); library_ = framework::LibraryType::kMKLDNN;
layout_ = framework::DataLayout::kMKLDNN;
}
#endif
auto input_data_type =
framework::ToDataType(ctx.Input<Tensor>("X")->type());
if (input_data_type == framework::proto::VarType::FP16) {
PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
"float16 can only be used on GPU place");
}
return framework::OpKernelType(input_data_type, ctx.GetPlace(), layout_,
library_);
} }
}; };
......
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
/*Licensed under the Apache License, Version 2.0(the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "mkldnn.hpp"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/fluid/operators/sum_op.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
namespace paddle {
namespace operators {
using paddle::framework::Tensor;
using paddle::platform::MKLDNNDeviceContext;
using paddle::platform::CPUDeviceContext;
using framework::DataLayout;
using mkldnn::memory;
using mkldnn::primitive;
using mkldnn::stream;
using mkldnn::sum;
using mkldnn::reorder;
using platform::to_void_cast;
template <typename T>
class SumMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
public:
void Compute(const paddle::framework::ExecutionContext& ctx) const override {
PADDLE_ENFORCE(paddle::platform::is_cpu_place(ctx.GetPlace()),
"It must use CPUPlace.");
auto& dev_ctx = ctx.template device_context<MKLDNNDeviceContext>();
const auto& mkldnn_engine = dev_ctx.GetEngine();
auto in_vars = ctx.MultiInputVar("X");
const int N = in_vars.size();
auto out_var = ctx.OutputVar("Out");
bool in_place = out_var == in_vars[0];
if (out_var->IsType<framework::LoDTensor>()) {
LoDTensor* output = ctx.Output<LoDTensor>("Out");
T* output_data = output->mutable_data<T>(ctx.GetPlace());
std::vector<int> dst_tz = framework::vectorize2int(output->dims());
auto src_tz = dst_tz;
memory::format output_format{memory::format::format_undef};
std::vector<float> scales;
std::vector<memory::primitive_desc> srcs_mpd;
std::vector<mkldnn::memory> srcs_mem;
PADDLE_ENFORCE(in_vars[0]->IsType<LoDTensor>(),
"Input[0] must be LoDTensors");
auto& input0 = in_vars[0]->Get<LoDTensor>();
PADDLE_ENFORCE(input0.layout() == DataLayout::kMKLDNN &&
input0.format() != memory::format::format_undef,
"Wrong layout/format for inputs[0]");
memory::format input_format = input0.format();
if (src_tz.size() == 1 && (input_format == memory::format::nchw ||
input_format == memory::format::nhwc)) {
input_format = memory::format::x;
}
if (src_tz.size() == 2 && (input_format == memory::format::nchw ||
input_format == memory::format::nhwc)) {
input_format = memory::format::nc;
}
for (int i = in_place ? 1 : 0; i < N; i++) {
PADDLE_ENFORCE(in_vars[i]->IsType<LoDTensor>(),
"all inputs must be all LoDTensors");
auto& input = in_vars[i]->Get<LoDTensor>();
PADDLE_ENFORCE(input.layout() == DataLayout::kMKLDNN &&
input.format() != memory::format::format_undef,
"Wrong layout/format for inputs");
if (input.numel() == 0) {
continue;
}
const T* input_data = input.data<T>();
auto src_md =
memory::desc(src_tz, memory::data_type::f32, input_format);
auto src_mpd = memory::primitive_desc(src_md, mkldnn_engine);
auto src_mem = memory(src_mpd, to_void_cast(input_data));
srcs_mpd.push_back(src_mpd);
srcs_mem.push_back(src_mem);
scales.push_back(1.0);
}
auto dst_md =
memory::desc(dst_tz, memory::data_type::f32, memory::format::any);
auto sum_pd = sum::primitive_desc(dst_md, scales, srcs_mpd);
std::shared_ptr<memory> dst_mem;
if (in_place) {
dst_mem.reset(new memory(sum_pd.dst_primitive_desc()));
} else {
dst_mem.reset(new memory(sum_pd.dst_primitive_desc(), output_data));
}
std::vector<mkldnn::primitive::at> inputs;
for (size_t i = 0; i < srcs_mem.size(); ++i) {
inputs.push_back(srcs_mem[i]);
}
auto sum_prim = mkldnn::sum(sum_pd, inputs, *dst_mem);
output_format = (memory::format)platform::GetMKLDNNFormat(sum_pd);
primitive reorder_prim;
std::shared_ptr<memory> target_mem;
if (in_place) {
output_format = input_format;
target_mem.reset(new memory(
{{{src_tz}, memory::data_type::f32, output_format}, mkldnn_engine},
output_data));
reorder_prim = reorder(*dst_mem, *target_mem);
}
std::vector<primitive> pipeline;
pipeline.push_back(sum_prim);
if (in_place) pipeline.push_back(reorder_prim);
stream(stream::kind::eager).submit(pipeline).wait();
output->set_layout(DataLayout::kMKLDNN);
output->set_format(output_format);
} else if (out_var->IsType<framework::SelectedRows>()) {
// TODO(@mozga-intel) Add MKLDNN SelectedRows support
std::unique_ptr<framework::SelectedRows> in0;
if (in_place) {
// If is in_place, we store the input[0] to in0
auto& in_sel0 = in_vars[0]->Get<SelectedRows>();
auto& rows = in_sel0.rows();
in0.reset(new framework::SelectedRows(rows, in_sel0.height()));
in0->mutable_value()->ShareDataWith(in_sel0.value());
}
auto get_selected_row = [&](size_t i) -> const SelectedRows& {
if (i == 0 && in0) {
return *in0.get();
} else {
return in_vars[i]->Get<SelectedRows>();
}
};
auto* out = ctx.Output<SelectedRows>("Out");
out->mutable_rows()->clear();
auto* out_value = out->mutable_value();
// Runtime InferShape
size_t first_dim = 0;
for (int i = 0; i < N; i++) {
auto& sel_row = get_selected_row(i);
first_dim += sel_row.rows().size();
}
auto in_dim =
framework::vectorize(get_selected_row(N - 1).value().dims());
in_dim[0] = static_cast<int64_t>(first_dim);
out_value->Resize(framework::make_ddim(in_dim));
// if all the input sparse vars are empty, no need to
// merge these vars.
if (first_dim == 0UL) {
return;
}
out_value->mutable_data<T>(ctx.GetPlace());
math::SelectedRowsAddTo<CPUDeviceContext, T> functor;
int64_t offset = 0;
for (int i = 0; i < N; i++) {
auto& sel_row = get_selected_row(i);
if (sel_row.rows().size() == 0) {
continue;
}
PADDLE_ENFORCE_EQ(out->height(), sel_row.height());
functor(ctx.template device_context<CPUDeviceContext>(), sel_row,
offset, out);
offset += sel_row.value().numel();
}
} else if (out_var->IsType<framework::LoDTensorArray>()) {
// TODO(@mozga-intel) Add MKLDNN LoDTensorArray support
auto& out_array = *out_var->GetMutable<framework::LoDTensorArray>();
for (size_t i = in_place ? 1 : 0; i < in_vars.size(); ++i) {
PADDLE_ENFORCE(in_vars[i]->IsType<framework::LoDTensorArray>(),
"Only support all inputs are TensorArray");
auto& in_array = in_vars[i]->Get<framework::LoDTensorArray>();
for (size_t i = 0; i < in_array.size(); ++i) {
if (in_array[i].numel() != 0) {
if (i >= out_array.size()) {
out_array.resize(i + 1);
}
if (out_array[i].numel() == 0) {
framework::TensorCopy(in_array[i], in_array[i].place(),
ctx.device_context(), &out_array[i]);
out_array[i].set_lod(in_array[i].lod());
} else {
PADDLE_ENFORCE(out_array[i].lod() == in_array[i].lod());
auto in = EigenVector<T>::Flatten(in_array[i]);
auto result = EigenVector<T>::Flatten(out_array[i]);
result.device(*ctx.template device_context<MKLDNNDeviceContext>()
.eigen_device()) = result + in;
}
}
}
}
} else {
PADDLE_THROW("Unexpected branch, output variable type is %s",
out_var->Type().name());
}
}
};
} // namespace operators
} // namespace paddle
REGISTER_OP_KERNEL(sum, MKLDNN, ::paddle::platform::CPUPlace,
paddle::operators::SumMKLDNNOpKernel<float>);
此差异已折叠。
...@@ -203,11 +203,11 @@ class WhileGradOp : public framework::OperatorBase { ...@@ -203,11 +203,11 @@ class WhileGradOp : public framework::OperatorBase {
->set_lod(inside_tensor.lod()); ->set_lod(inside_tensor.lod());
} }
} }
auto new_inside_name = cur_scope.Rename(inside_grad_name); auto new_inside_name = cur_scope.Rename(inside_grad_name);
auto sum_op = framework::OpRegistry::CreateOp( auto sum_op = framework::OpRegistry::CreateOp(
"sum", {{"X", {pg_names[param_id], new_inside_name}}}, "sum", {{"X", {pg_names[param_id], new_inside_name}}},
{{"Out", {pg_names[param_id]}}}, framework::AttributeMap{}); {{"Out", {pg_names[param_id]}}},
framework::AttributeMap{{"use_mkldnn", {false}}});
sum_op->Run(cur_scope, dev_place); sum_op->Run(cur_scope, dev_place);
cur_scope.Rename(new_inside_name, inside_grad_name); cur_scope.Rename(new_inside_name, inside_grad_name);
} }
......
...@@ -106,14 +106,6 @@ class CUDADeviceContext : public DeviceContext { ...@@ -106,14 +106,6 @@ class CUDADeviceContext : public DeviceContext {
PADDLE_ENFORCE(cudaEventRecord(ev, stream_)); PADDLE_ENFORCE(cudaEventRecord(ev, stream_));
} }
// FIXME(zcd): A temporary fix for some language model that has sparse
// parameter.
template <typename Callback>
void RecordEventNoMutex(cudaEvent_t ev, Callback callback) {
callback();
PADDLE_ENFORCE(cudaEventRecord(ev, stream_));
}
private: private:
CUDAPlace place_; CUDAPlace place_;
......
cc_library(dynamic_loader SRCS dynamic_loader.cc DEPS glog gflags enforce) cc_library(dynamic_loader SRCS dynamic_loader.cc DEPS glog gflags enforce)
list(APPEND CUDA_SRCS cublas.cc cudnn.cc curand.cc nccl.cc) list(APPEND CUDA_SRCS cublas.cc cudnn.cc curand.cc)
# There is no macOS version of NCCL.
if (NOT APPLE)
list(APPEND CUDA_SRCS nccl.cc)
endif()
if (TENSORRT_FOUND) if (TENSORRT_FOUND)
list(APPEND CUDA_SRCS tensorrt.cc) list(APPEND CUDA_SRCS tensorrt.cc)
endif() endif()
configure_file(cupti_lib_path.h.in ${CMAKE_CURRENT_BINARY_DIR}/cupti_lib_path.h) configure_file(cupti_lib_path.h.in ${CMAKE_CURRENT_BINARY_DIR}/cupti_lib_path.h)
if (CUPTI_FOUND) if (CUPTI_FOUND)
list(APPEND CUDA_SRCS cupti.cc) list(APPEND CUDA_SRCS cupti.cc)
......
此差异已折叠。
...@@ -268,7 +268,8 @@ void BindOpDesc(pybind11::module *m) { ...@@ -268,7 +268,8 @@ void BindOpDesc(pybind11::module *m) {
.value("STRINGS", pd::proto::AttrType::STRINGS) .value("STRINGS", pd::proto::AttrType::STRINGS)
.value("BOOL", pd::proto::AttrType::BOOLEAN) .value("BOOL", pd::proto::AttrType::BOOLEAN)
.value("BOOLS", pd::proto::AttrType::BOOLEANS) .value("BOOLS", pd::proto::AttrType::BOOLEANS)
.value("BLOCK", pd::proto::AttrType::BLOCK); .value("BLOCK", pd::proto::AttrType::BLOCK)
.value("BLOCKS", pd::proto::AttrType::BLOCKS);
pybind11::class_<pd::OpDesc> op_desc(*m, "OpDesc", ""); pybind11::class_<pd::OpDesc> op_desc(*m, "OpDesc", "");
op_desc op_desc
...@@ -293,6 +294,7 @@ void BindOpDesc(pybind11::module *m) { ...@@ -293,6 +294,7 @@ void BindOpDesc(pybind11::module *m) {
.def("set_attr", &pd::OpDesc::SetAttr) .def("set_attr", &pd::OpDesc::SetAttr)
.def("attr", &pd::OpDesc::GetAttr) .def("attr", &pd::OpDesc::GetAttr)
.def("set_block_attr", &pd::OpDesc::SetBlockAttr) .def("set_block_attr", &pd::OpDesc::SetBlockAttr)
.def("set_blocks_attr", &pd::OpDesc::SetBlocksAttr)
.def("set_serialized_attr", .def("set_serialized_attr",
[](pd::OpDesc &self, const std::string &name, [](pd::OpDesc &self, const std::string &name,
const pybind11::bytes &seriralized) { const pybind11::bytes &seriralized) {
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册