提交 b6e63554 编写于 作者: T tangwei12

Merge branch 'develop' of github.com:PaddlePaddle/Paddle into ckpt_m2

......@@ -40,12 +40,12 @@ ExternalProject_Add(
# NOTE(wuyi):
# this package is generated by following steps:
# 1. git clone -b v1.8.x https://github.com/grpc/grpc.git
# 2. submodule update --init
# 2. git submodule update --init
# 3. keep only zlib, cares, protobuf, boringssl under "third_party",
# checkout and clean other dirs under third_party
# 4. remove .git, and package the directory.
URL "http://paddlepaddledeps.bj.bcebos.com/grpc-v1.8.x.tar.gz"
URL_MD5 "c9c58ee7d0e8929a63155af6a2ecdbd0"
URL "http://paddlepaddledeps.bj.bcebos.com/grpc-v1.10.x.tar.gz"
URL_MD5 "1f268a2aff6759839dccd256adcc91cf"
PREFIX ${GRPC_SOURCES_DIR}
UPDATE_COMMAND ""
CONFIGURE_COMMAND ""
......
.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
!DO NOT EDIT THIS FILE MANUALLY!
=========
evaluator
=========
=============
fluid.average
=============
.. _api_fluid_average_WeightedAverage:
WeightedAverage
---------------
.. autoclass:: paddle.fluid.average.WeightedAverage
:members:
:noindex:
.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
!DO NOT EDIT THIS FILE MANUALLY!
==============
fluid.backward
==============
.. _api_fluid_backward_append_backward:
append_backward
---------------
.. autofunction:: paddle.fluid.backward.append_backward
:noindex:
.. _api_fluid_backward_calc_gradient:
calc_gradient
-------------
.. autofunction:: paddle.fluid.backward.calc_gradient
:noindex:
.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
!DO NOT EDIT THIS FILE MANUALLY!
====
clip
====
==========
fluid.clip
==========
.. _api_fluid_clip_ErrorClipByValue:
ErrorClipByValue
----------------
......@@ -12,6 +14,8 @@ ErrorClipByValue
:members:
:noindex:
.. _api_fluid_clip_GradientClipByValue:
GradientClipByValue
-------------------
......@@ -19,6 +23,8 @@ GradientClipByValue
:members:
:noindex:
.. _api_fluid_clip_GradientClipByNorm:
GradientClipByNorm
------------------
......@@ -26,6 +32,8 @@ GradientClipByNorm
:members:
:noindex:
.. _api_fluid_clip_GradientClipByGlobalNorm:
GradientClipByGlobalNorm
------------------------
......@@ -33,15 +41,3 @@ GradientClipByGlobalNorm
:members:
:noindex:
append_gradient_clip_ops
------------------------
.. autofunction:: paddle.fluid.clip.append_gradient_clip_ops
:noindex:
error_clip_callback
-------------------
.. autofunction:: paddle.fluid.clip.error_clip_callback
:noindex:
==================================
Data Reader Interface and DataSets
==================================
.. toctree::
:maxdepth: 1
data/data_reader.rst
data/image.rst
data/dataset.rst
.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
!DO NOT EDIT THIS FILE MANUALLY!
===========
data_feeder
===========
=================
fluid.data_feeder
=================
.. _api_fluid_data_feeder_DataFeeder:
DataFeeder
----------
......
.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
!DO NOT EDIT THIS FILE MANUALLY!
========
executor
========
==============
fluid.executor
==============
.. _api_fluid_executor_Executor:
Executor
--------
......@@ -12,24 +14,32 @@ Executor
:members:
:noindex:
.. _api_fluid_executor_global_scope:
global_scope
------------
.. autofunction:: paddle.fluid.executor.global_scope
:noindex:
.. _api_fluid_executor_scope_guard:
scope_guard
-----------
.. autofunction:: paddle.fluid.executor.scope_guard
:noindex:
switch_scope
------------
.. _api_fluid_executor__switch_scope:
_switch_scope
-------------
.. autofunction:: paddle.fluid.executor.switch_scope
.. autofunction:: paddle.fluid.executor._switch_scope
:noindex:
.. _api_fluid_executor_fetch_var:
fetch_var
---------
......
.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
!DO NOT EDIT THIS FILE MANUALLY!
=====
fluid
=====
.. _api_fluid_Block:
Block
-----
.. autoclass:: paddle.fluid.Block
:members:
:noindex:
.. _api_fluid_Variable:
Variable
--------
.. autoclass:: paddle.fluid.Variable
:members:
:noindex:
.. _api_fluid_Program:
Program
-------
.. autoclass:: paddle.fluid.Program
:members:
:noindex:
.. _api_fluid_Operator:
Operator
--------
.. autoclass:: paddle.fluid.Operator
:members:
:noindex:
.. _api_fluid_default_startup_program:
default_startup_program
-----------------------
.. autofunction:: paddle.fluid.default_startup_program
:noindex:
.. _api_fluid_default_main_program:
default_main_program
--------------------
.. autofunction:: paddle.fluid.default_main_program
:noindex:
.. _api_fluid_program_guard:
program_guard
-------------
.. autofunction:: paddle.fluid.program_guard
:noindex:
.. _api_fluid_get_var:
get_var
-------
.. autofunction:: paddle.fluid.get_var
:noindex:
.. _api_fluid_Executor:
Executor
--------
.. autoclass:: paddle.fluid.Executor
:members:
:noindex:
.. _api_fluid_global_scope:
global_scope
------------
.. autofunction:: paddle.fluid.global_scope
:noindex:
.. _api_fluid_scope_guard:
scope_guard
-----------
.. autofunction:: paddle.fluid.scope_guard
:noindex:
.. _api_fluid__switch_scope:
_switch_scope
-------------
.. autofunction:: paddle.fluid._switch_scope
:noindex:
.. _api_fluid_fetch_var:
fetch_var
---------
.. autofunction:: paddle.fluid.fetch_var
:noindex:
.. _api_fluid_Go:
Go
--
.. autoclass:: paddle.fluid.Go
:members:
:noindex:
.. _api_fluid_make_channel:
make_channel
------------
.. autofunction:: paddle.fluid.make_channel
:noindex:
.. _api_fluid_channel_send:
channel_send
------------
.. autofunction:: paddle.fluid.channel_send
:noindex:
.. _api_fluid_channel_recv:
channel_recv
------------
.. autofunction:: paddle.fluid.channel_recv
:noindex:
.. _api_fluid_channel_close:
channel_close
-------------
.. autofunction:: paddle.fluid.channel_close
:noindex:
.. _api_fluid_Select:
Select
------
.. autoclass:: paddle.fluid.Select
:members:
:noindex:
.. _api_fluid_Trainer:
Trainer
-------
.. autoclass:: paddle.fluid.Trainer
:members:
:noindex:
.. _api_fluid_BeginEpochEvent:
BeginEpochEvent
---------------
.. autoclass:: paddle.fluid.BeginEpochEvent
:members:
:noindex:
.. _api_fluid_EndEpochEvent:
EndEpochEvent
-------------
.. autoclass:: paddle.fluid.EndEpochEvent
:members:
:noindex:
.. _api_fluid_BeginStepEvent:
BeginStepEvent
--------------
.. autoclass:: paddle.fluid.BeginStepEvent
:members:
:noindex:
.. _api_fluid_EndStepEvent:
EndStepEvent
------------
.. autoclass:: paddle.fluid.EndStepEvent
:members:
:noindex:
.. _api_fluid_CheckpointConfig:
CheckpointConfig
----------------
.. autoclass:: paddle.fluid.CheckpointConfig
:members:
:noindex:
.. _api_fluid_Inferencer:
Inferencer
----------
.. autoclass:: paddle.fluid.Inferencer
:members:
:noindex:
.. _api_fluid_DistributeTranspiler:
DistributeTranspiler
--------------------
.. autoclass:: paddle.fluid.DistributeTranspiler
:members:
:noindex:
.. _api_fluid_memory_optimize:
memory_optimize
---------------
.. autofunction:: paddle.fluid.memory_optimize
:noindex:
.. _api_fluid_release_memory:
release_memory
--------------
.. autofunction:: paddle.fluid.release_memory
:noindex:
.. _api_fluid_ParallelExecutor:
ParallelExecutor
----------------
.. autoclass:: paddle.fluid.ParallelExecutor
:members:
:noindex:
.. _api_fluid_ExecutionStrategy:
ExecutionStrategy
-----------------
.. autoclass:: paddle.fluid.ExecutionStrategy
:members:
:noindex:
.. _api_fluid_BuildStrategy:
BuildStrategy
-------------
.. autoclass:: paddle.fluid.BuildStrategy
:members:
:noindex:
.. _api_fluid_create_lod_tensor:
create_lod_tensor
-----------------
.. autofunction:: paddle.fluid.create_lod_tensor
:noindex:
.. _api_fluid_create_random_int_lodtensor:
create_random_int_lodtensor
---------------------------
.. autofunction:: paddle.fluid.create_random_int_lodtensor
:noindex:
.. _api_fluid_LoDTensor:
LoDTensor
---------
.. autoclass:: paddle.fluid.LoDTensor
:members:
:noindex:
.. _api_fluid_CPUPlace:
CPUPlace
--------
.. autoclass:: paddle.fluid.CPUPlace
:members:
:noindex:
.. _api_fluid_CUDAPlace:
CUDAPlace
---------
.. autoclass:: paddle.fluid.CUDAPlace
:members:
:noindex:
.. _api_fluid_CUDAPinnedPlace:
CUDAPinnedPlace
---------------
.. autoclass:: paddle.fluid.CUDAPinnedPlace
:members:
:noindex:
.. _api_fluid_Tensor:
Tensor
------
.. autoclass:: paddle.fluid.Tensor
:members:
:noindex:
.. _api_fluid_ParamAttr:
ParamAttr
---------
.. autoclass:: paddle.fluid.ParamAttr
:members:
:noindex:
.. _api_fluid_WeightNormParamAttr:
WeightNormParamAttr
-------------------
.. autoclass:: paddle.fluid.WeightNormParamAttr
:members:
:noindex:
.. _api_fluid_DataFeeder:
DataFeeder
----------
.. autoclass:: paddle.fluid.DataFeeder
:members:
:noindex:
.. _api_fluid_Scope:
Scope
-----
.. autoclass:: paddle.fluid.Scope
:members:
:noindex:
......@@ -29,19 +29,27 @@ def parse_arg():
class DocGenerator(object):
def __init__(self, module_name, stream=sys.stdout):
def __init__(self, module_name=None, stream=sys.stdout):
if module_name == "":
module_name = None
self.stream = stream
self.module_name = module_name
if not hasattr(fluid, module_name):
raise ValueError("Cannot find fluid.{0}".format(module_name))
if module_name is None:
self.module_name = "fluid"
else:
self.module = getattr(fluid, module_name)
self.module_name = "fluid." + module_name
if module_name is None:
self.module = fluid
else:
if not hasattr(fluid, module_name):
raise ValueError("Cannot find fluid.{0}".format(module_name))
else:
self.module = getattr(fluid, module_name)
self.stream.write('''.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
!DO NOT EDIT THIS FILE MANUALLY!
''')
self._print_header_(module_name, dot='=', is_title=True)
self._print_header_(self.module_name, dot='=', is_title=True)
def print_submodule(self, submodule_name):
submodule = getattr(self.module, submodule_name)
......@@ -60,25 +68,29 @@ class DocGenerator(object):
self._print_header_(name, dot='=', is_title=False)
def print_item(self, name):
item = getattr(self.module, name)
item = getattr(self.module, name, None)
if item is None:
return
if isinstance(item, types.TypeType):
self.print_class(name)
elif isinstance(item, types.FunctionType):
self.print_method(name)
else:
raise RuntimeError("Unsupported item {0}".format(name))
pass
def print_class(self, name):
self._print_ref_(name)
self._print_header_(name, dot='-', is_title=False)
self.stream.write('''.. autoclass:: paddle.fluid.{0}.{1}
self.stream.write('''.. autoclass:: paddle.{0}.{1}
:members:
:noindex:
'''.format(self.module_name, name))
def print_method(self, name):
self._print_ref_(name)
self._print_header_(name, dot='-', is_title=False)
self.stream.write('''.. autofunction:: paddle.fluid.{0}.{1}
self.stream.write('''.. autofunction:: paddle.{0}.{1}
:noindex:
'''.format(self.module_name, name))
......@@ -94,6 +106,10 @@ class DocGenerator(object):
self.stream.write('\n')
self.stream.write('\n')
def _print_ref_(self, name):
self.stream.write(".. _api_{0}_{1}:\n\n".format("_".join(
self.module_name.split(".")), name))
def main():
args = parse_arg()
......
#!/bin/bash
python gen_doc.py layers --submodules control_flow device io nn ops tensor detection learning_rate_scheduler metric > layers.rst
python gen_doc.py layers --submodules control_flow device io nn ops tensor learning_rate_scheduler detection metric_op tensor > layers.rst
for module in data_feeder clip metrics executor initializer io nets optimizer param_attr profiler regularizer transpiler
for module in data_feeder clip metrics executor initializer io nets optimizer param_attr profiler regularizer transpiler recordio_writer backward average profiler
do
python gen_doc.py ${module} > ${module}.rst
done
python gen_doc.py "" > fluid.rst
======================
Fluid
======================
=============
API Reference
=============
.. toctree::
:maxdepth: 1
fluid.rst
layers.rst
data_feeder.rst
executor.rst
......@@ -18,3 +19,8 @@ Fluid
regularizer.rst
io.rst
data.rst
transpiler.rst
recordio_writer.rst
backward.rst
average.rst
profiler.rst
.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
!DO NOT EDIT THIS FILE MANUALLY!
===========
initializer
===========
=================
fluid.initializer
=================
.. _api_fluid_initializer_Constant:
Constant
--------
......@@ -12,6 +14,8 @@ Constant
:members:
:noindex:
.. _api_fluid_initializer_Uniform:
Uniform
-------
......@@ -19,6 +23,8 @@ Uniform
:members:
:noindex:
.. _api_fluid_initializer_Normal:
Normal
------
......@@ -26,6 +32,8 @@ Normal
:members:
:noindex:
.. _api_fluid_initializer_Xavier:
Xavier
------
......@@ -33,6 +41,8 @@ Xavier
:members:
:noindex:
.. _api_fluid_initializer_Bilinear:
Bilinear
--------
......@@ -40,18 +50,33 @@ Bilinear
:members:
:noindex:
.. _api_fluid_initializer_MSRA:
MSRA
----
.. autoclass:: paddle.fluid.initializer.MSRA
:members:
:noindex:
.. _api_fluid_initializer_force_init_on_cpu:
force_init_on_cpu
-----------------
.. autofunction:: paddle.fluid.initializer.force_init_on_cpu
:noindex:
.. _api_fluid_initializer_init_on_cpu:
init_on_cpu
-----------
.. autofunction:: paddle.fluid.initializer.init_on_cpu
:noindex:
.. _api_fluid_initializer_ConstantInitializer:
ConstantInitializer
-------------------
......@@ -59,6 +84,8 @@ ConstantInitializer
:members:
:noindex:
.. _api_fluid_initializer_UniformInitializer:
UniformInitializer
------------------
......@@ -66,6 +93,8 @@ UniformInitializer
:members:
:noindex:
.. _api_fluid_initializer_NormalInitializer:
NormalInitializer
-----------------
......@@ -73,6 +102,8 @@ NormalInitializer
:members:
:noindex:
.. _api_fluid_initializer_XavierInitializer:
XavierInitializer
-----------------
......@@ -80,6 +111,8 @@ XavierInitializer
:members:
:noindex:
.. _api_fluid_initializer_BilinearInitializer:
BilinearInitializer
-------------------
......@@ -87,3 +120,12 @@ BilinearInitializer
:members:
:noindex:
.. _api_fluid_initializer_MSRAInitializer:
MSRAInitializer
---------------
.. autoclass:: paddle.fluid.initializer.MSRAInitializer
:members:
:noindex:
.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
!DO NOT EDIT THIS FILE MANUALLY!
==
io
==
========
fluid.io
========
.. _api_fluid_io_save_vars:
save_vars
---------
......@@ -11,84 +13,112 @@ save_vars
.. autofunction:: paddle.fluid.io.save_vars
:noindex:
.. _api_fluid_io_save_params:
save_params
-----------
.. autofunction:: paddle.fluid.io.save_params
:noindex:
.. _api_fluid_io_save_persistables:
save_persistables
-----------------
.. autofunction:: paddle.fluid.io.save_persistables
:noindex:
.. _api_fluid_io_load_vars:
load_vars
---------
.. autofunction:: paddle.fluid.io.load_vars
:noindex:
.. _api_fluid_io_load_params:
load_params
-----------
.. autofunction:: paddle.fluid.io.load_params
:noindex:
.. _api_fluid_io_load_persistables:
load_persistables
-----------------
.. autofunction:: paddle.fluid.io.load_persistables
:noindex:
.. _api_fluid_io_save_inference_model:
save_inference_model
--------------------
.. autofunction:: paddle.fluid.io.save_inference_model
:noindex:
.. _api_fluid_io_load_inference_model:
load_inference_model
--------------------
.. autofunction:: paddle.fluid.io.load_inference_model
:noindex:
.. _api_fluid_io_get_inference_program:
get_inference_program
---------------------
.. autofunction:: paddle.fluid.io.get_inference_program
:noindex:
.. _api_fluid_io_save_checkpoint:
save_checkpoint
---------------
.. autofunction:: paddle.fluid.io.save_checkpoint
:noindex:
.. _api_fluid_io_load_checkpoint:
load_checkpoint
---------------
.. autofunction:: paddle.fluid.io.load_checkpoint
:noindex:
.. _api_fluid_io_clean_checkpoint:
clean_checkpoint
----------------
.. autofunction:: paddle.fluid.io.clean_checkpoint
:noindex:
.. _api_fluid_io_load_persist_vars_without_grad:
load_persist_vars_without_grad
------------------------------
.. autofunction:: paddle.fluid.io.load_persist_vars_without_grad
:noindex:
.. _api_fluid_io_save_persist_vars_without_grad:
save_persist_vars_without_grad
------------------------------
.. autofunction:: paddle.fluid.io.save_persist_vars_without_grad
:noindex:
.. _api_fluid_io_get_latest_checkpoint_serial:
get_latest_checkpoint_serial
----------------------------
......
此差异已折叠。
.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
!DO NOT EDIT THIS FILE MANUALLY!
=======
metrics
=======
=============
fluid.metrics
=============
.. _api_fluid_metrics_MetricBase:
MetricBase
----------
......@@ -12,6 +14,8 @@ MetricBase
:members:
:noindex:
.. _api_fluid_metrics_CompositeMetric:
CompositeMetric
---------------
......@@ -19,6 +23,26 @@ CompositeMetric
:members:
:noindex:
.. _api_fluid_metrics_Precision:
Precision
---------
.. autoclass:: paddle.fluid.metrics.Precision
:members:
:noindex:
.. _api_fluid_metrics_Recall:
Recall
------
.. autoclass:: paddle.fluid.metrics.Recall
:members:
:noindex:
.. _api_fluid_metrics_Accuracy:
Accuracy
--------
......@@ -26,6 +50,8 @@ Accuracy
:members:
:noindex:
.. _api_fluid_metrics_ChunkEvaluator:
ChunkEvaluator
--------------
......@@ -33,6 +59,8 @@ ChunkEvaluator
:members:
:noindex:
.. _api_fluid_metrics_EditDistance:
EditDistance
------------
......@@ -40,6 +68,8 @@ EditDistance
:members:
:noindex:
.. _api_fluid_metrics_DetectionMAP:
DetectionMAP
------------
......@@ -47,6 +77,8 @@ DetectionMAP
:members:
:noindex:
.. _api_fluid_metrics_Auc:
Auc
---
......
.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
!DO NOT EDIT THIS FILE MANUALLY!
====
nets
====
==========
fluid.nets
==========
.. _api_fluid_nets_simple_img_conv_pool:
simple_img_conv_pool
--------------------
......@@ -11,18 +13,24 @@ simple_img_conv_pool
.. autofunction:: paddle.fluid.nets.simple_img_conv_pool
:noindex:
.. _api_fluid_nets_sequence_conv_pool:
sequence_conv_pool
------------------
.. autofunction:: paddle.fluid.nets.sequence_conv_pool
:noindex:
.. _api_fluid_nets_glu:
glu
---
.. autofunction:: paddle.fluid.nets.glu
:noindex:
.. _api_fluid_nets_scaled_dot_product_attention:
scaled_dot_product_attention
----------------------------
......
.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
!DO NOT EDIT THIS FILE MANUALLY!
=========
optimizer
=========
===============
fluid.optimizer
===============
.. _api_fluid_optimizer_SGD:
SGD
---
......@@ -12,6 +14,8 @@ SGD
:members:
:noindex:
.. _api_fluid_optimizer_Momentum:
Momentum
--------
......@@ -19,6 +23,8 @@ Momentum
:members:
:noindex:
.. _api_fluid_optimizer_Adagrad:
Adagrad
-------
......@@ -26,6 +32,8 @@ Adagrad
:members:
:noindex:
.. _api_fluid_optimizer_Adam:
Adam
----
......@@ -33,6 +41,8 @@ Adam
:members:
:noindex:
.. _api_fluid_optimizer_Adamax:
Adamax
------
......@@ -40,6 +50,8 @@ Adamax
:members:
:noindex:
.. _api_fluid_optimizer_DecayedAdagrad:
DecayedAdagrad
--------------
......@@ -47,6 +59,17 @@ DecayedAdagrad
:members:
:noindex:
.. _api_fluid_optimizer_Ftrl:
Ftrl
----
.. autoclass:: paddle.fluid.optimizer.Ftrl
:members:
:noindex:
.. _api_fluid_optimizer_SGDOptimizer:
SGDOptimizer
------------
......@@ -54,6 +77,8 @@ SGDOptimizer
:members:
:noindex:
.. _api_fluid_optimizer_MomentumOptimizer:
MomentumOptimizer
-----------------
......@@ -61,6 +86,8 @@ MomentumOptimizer
:members:
:noindex:
.. _api_fluid_optimizer_AdagradOptimizer:
AdagradOptimizer
----------------
......@@ -68,6 +95,8 @@ AdagradOptimizer
:members:
:noindex:
.. _api_fluid_optimizer_AdamOptimizer:
AdamOptimizer
-------------
......@@ -75,6 +104,8 @@ AdamOptimizer
:members:
:noindex:
.. _api_fluid_optimizer_AdamaxOptimizer:
AdamaxOptimizer
---------------
......@@ -82,6 +113,8 @@ AdamaxOptimizer
:members:
:noindex:
.. _api_fluid_optimizer_DecayedAdagradOptimizer:
DecayedAdagradOptimizer
-----------------------
......@@ -89,6 +122,8 @@ DecayedAdagradOptimizer
:members:
:noindex:
.. _api_fluid_optimizer_RMSPropOptimizer:
RMSPropOptimizer
----------------
......@@ -96,6 +131,17 @@ RMSPropOptimizer
:members:
:noindex:
.. _api_fluid_optimizer_FtrlOptimizer:
FtrlOptimizer
-------------
.. autoclass:: paddle.fluid.optimizer.FtrlOptimizer
:members:
:noindex:
.. _api_fluid_optimizer_Adadelta:
Adadelta
--------
......@@ -103,6 +149,8 @@ Adadelta
:members:
:noindex:
.. _api_fluid_optimizer_ModelAverage:
ModelAverage
------------
......@@ -110,6 +158,8 @@ ModelAverage
:members:
:noindex:
.. _api_fluid_optimizer_Optimizer:
Optimizer
---------
......@@ -117,3 +167,12 @@ Optimizer
:members:
:noindex:
.. _api_fluid_optimizer_RMSPropOptimizer:
RMSPropOptimizer
----------------
.. autoclass:: paddle.fluid.optimizer.RMSPropOptimizer
:members:
:noindex:
.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
!DO NOT EDIT THIS FILE MANUALLY!
==========
param_attr
==========
================
fluid.param_attr
================
.. _api_fluid_param_attr_ParamAttr:
ParamAttr
---------
......@@ -12,6 +14,8 @@ ParamAttr
:members:
:noindex:
.. _api_fluid_param_attr_WeightNormParamAttr:
WeightNormParamAttr
-------------------
......
.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
!DO NOT EDIT THIS FILE MANUALLY!
========
profiler
========
==============
fluid.profiler
==============
.. _api_fluid_profiler_cuda_profiler:
cuda_profiler
-------------
......@@ -11,24 +13,32 @@ cuda_profiler
.. autofunction:: paddle.fluid.profiler.cuda_profiler
:noindex:
.. _api_fluid_profiler_reset_profiler:
reset_profiler
--------------
.. autofunction:: paddle.fluid.profiler.reset_profiler
:noindex:
.. _api_fluid_profiler_profiler:
profiler
--------
.. autofunction:: paddle.fluid.profiler.profiler
:noindex:
.. _api_fluid_profiler_start_profiler:
start_profiler
--------------
.. autofunction:: paddle.fluid.profiler.start_profiler
:noindex:
.. _api_fluid_profiler_stop_profiler:
stop_profiler
-------------
......
.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
!DO NOT EDIT THIS FILE MANUALLY!
=====================
fluid.recordio_writer
=====================
.. _api_fluid_recordio_writer_convert_reader_to_recordio_file:
convert_reader_to_recordio_file
-------------------------------
.. autofunction:: paddle.fluid.recordio_writer.convert_reader_to_recordio_file
:noindex:
.. _api_fluid_recordio_writer_convert_reader_to_recordio_files:
convert_reader_to_recordio_files
--------------------------------
.. autofunction:: paddle.fluid.recordio_writer.convert_reader_to_recordio_files
:noindex:
.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
!DO NOT EDIT THIS FILE MANUALLY!
===========
regularizer
===========
=================
fluid.regularizer
=================
.. _api_fluid_regularizer_append_regularization_ops:
append_regularization_ops
-------------------------
......@@ -11,12 +13,7 @@ append_regularization_ops
.. autofunction:: paddle.fluid.regularizer.append_regularization_ops
:noindex:
WeightDecayRegularizer
----------------------
.. autoclass:: paddle.fluid.regularizer.WeightDecayRegularizer
:members:
:noindex:
.. _api_fluid_regularizer_L1Decay:
L1Decay
-------
......@@ -25,6 +22,8 @@ L1Decay
:members:
:noindex:
.. _api_fluid_regularizer_L2Decay:
L2Decay
-------
......@@ -32,6 +31,8 @@ L2Decay
:members:
:noindex:
.. _api_fluid_regularizer_L1DecayRegularizer:
L1DecayRegularizer
------------------
......@@ -39,6 +40,8 @@ L1DecayRegularizer
:members:
:noindex:
.. _api_fluid_regularizer_L2DecayRegularizer:
L2DecayRegularizer
------------------
......
.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
!DO NOT EDIT THIS FILE MANUALLY!
==========
transpiler
==========
================
fluid.transpiler
================
.. _api_fluid_transpiler_DistributeTranspiler:
DistributeTranspiler
--------------------
......@@ -12,12 +14,7 @@ DistributeTranspiler
:members:
:noindex:
InferenceTranspiler
-------------------
.. autoclass:: paddle.fluid.transpiler.InferenceTranspiler
:members:
:noindex:
.. _api_fluid_transpiler_memory_optimize:
memory_optimize
---------------
......@@ -25,12 +22,16 @@ memory_optimize
.. autofunction:: paddle.fluid.transpiler.memory_optimize
:noindex:
.. _api_fluid_transpiler_release_memory:
release_memory
--------------
.. autofunction:: paddle.fluid.transpiler.release_memory
:noindex:
.. _api_fluid_transpiler_HashName:
HashName
--------
......@@ -38,9 +39,12 @@ HashName
:members:
:noindex:
.. _api_fluid_transpiler_RoundRobin:
RoundRobin
----------
.. autoclass:: paddle.fluid.transpiler.RoundRobin
:members:
:noindex:
......@@ -147,10 +147,9 @@ void TransDataLayoutFromMKLDNN(const OpKernelType& kernel_type_for_var,
"Input tensor type is not supported: ", in.type().name());
memory::data_type out_type = in_type;
memory::format in_format =
in_tz.size() == 2 ? memory::format::nc : in.format();
memory::format out_format =
out_tz.size() == 2 ? memory::format::nc : ToMKLDNNFormat(out_layout);
auto in_format = MKLDNNFormatForSize(in_tz.size(), in.format());
auto out_format =
MKLDNNFormatForSize(in_tz.size(), ToMKLDNNFormat(out_layout));
void* in_data = GetDataFromTensor(in, in_type);
......
......@@ -61,6 +61,13 @@ inline MKLDNNDataType ToMKLDNNDataType(const std::type_index type) {
if (iter != dict.end()) return iter->second;
return MKLDNNDataType::data_undef;
}
inline MKLDNNFormat MKLDNNFormatForSize(size_t dims_size,
MKLDNNFormat default_format) {
return (dims_size == 1
? mkldnn::memory::format::x
: dims_size == 2 ? mkldnn::memory::format::nc : default_format);
}
#endif
void TransDataLayoutFromMKLDNN(const OpKernelType& kernel_type_for_var,
......
......@@ -47,9 +47,13 @@ void DataTransform(const OpKernelType& expected_kernel_type,
#ifdef PADDLE_WITH_MKLDNN
// Case1 - transform from Non-MKLDNN OPKernel to MKLDNN OPKernel
// Just set layout/format. No real transform occur
auto out_format =
MKLDNNFormatForSize(in.dims().size(), ToMKLDNNFormat(lin));
out.ShareDataWith(input_tensor);
out.set_layout(DataLayout::kMKLDNN);
out.set_format(ToMKLDNNFormat(lin));
out.set_format(out_format);
#endif
} else {
// Case2 - transfrom from MKLDNN OPKernel to Non-MKLDNN OPKernel
......
......@@ -470,7 +470,7 @@ void MultiDevSSAGraphBuilder::ConnectOp(SSAGraph *result, OpHandleBase *op,
void MultiDevSSAGraphBuilder::CreateDistTrainOp(SSAGraph *result,
const OpDesc &op) const {
int op_dev_id = -1;
if (op.Type() == "split_byref") {
if (op.Type() == "split_byref" || op.Type() == "split_selected_rows") {
op_dev_id = GetVarDeviceID(op.InputArgumentNames()[0]);
if (strategy_.reduce_ == BuildStrategy::ReduceStrategy::kAllReduce) {
op_dev_id = GetAppropriateDeviceID(op.InputArgumentNames());
......
......@@ -70,6 +70,7 @@ $$Out = values$$
namespace ops = paddle::operators;
REGISTER_OPERATOR(assign_value, ops::AssignValueOp, ops::AssignValueOpMaker);
REGISTER_OPERATOR(assign_value, ops::AssignValueOp, ops::AssignValueOpMaker,
paddle::framework::EmptyGradOpMaker);
REGISTER_OP_CPU_KERNEL(assign_value, ops::AssignValueKernel<int>,
ops::AssignValueKernel<float>);
......@@ -286,14 +286,15 @@ void GRPCClient::Proceed() {
}
std::shared_ptr<grpc::Channel> GRPCClient::GetChannel(const std::string& ep) {
// TODO(Yancey1989): make grpc client completely thread-safe
std::lock_guard<std::mutex> guard(chan_mutex_);
auto it = channels_.find(ep);
if (it != channels_.end()) {
return it->second;
}
// Channel configurations:
grpc::ChannelArguments args;
args.SetInt(GRPC_ARG_MAX_RECONNECT_BACKOFF_MS, 2000);
args.SetCompressionAlgorithm(GRPC_COMPRESS_NONE);
args.SetMaxSendMessageSize(std::numeric_limits<int>::max());
args.SetMaxReceiveMessageSize(std::numeric_limits<int>::max());
......
......@@ -76,6 +76,7 @@ class BaseProcessor {
virtual void Prepare(const VarHandle& var_info, int64_t time_out) {
context_.reset(new grpc::ClientContext());
var_h_ = var_info;
context_->set_wait_for_ready(true);
std::chrono::system_clock::time_point deadline =
std::chrono::system_clock::now() + std::chrono::milliseconds(time_out);
......@@ -85,6 +86,7 @@ class BaseProcessor {
virtual void Prepare(int64_t time_out) {
context_.reset(new grpc::ClientContext());
context_->set_wait_for_ready(true);
std::chrono::system_clock::time_point deadline =
std::chrono::system_clock::now() + std::chrono::milliseconds(time_out);
......@@ -190,26 +192,24 @@ class GRPCClient : public RPCClient {
bool AsyncSendVar(const std::string& ep, const platform::DeviceContext& ctx,
const framework::Scope& scope, const std::string& var_name,
int64_t time_out = RPCClient::rpc_time_out) override;
int64_t time_out = FLAGS_grpc_deadline) override;
bool AsyncGetVar(const std::string& ep, const platform::DeviceContext& ctx,
const framework::Scope& scope, const std::string& var_name,
int64_t time_out = RPCClient::rpc_time_out) override;
int64_t time_out = FLAGS_grpc_deadline) override;
bool AsyncPrefetchVar(const std::string& ep,
const platform::DeviceContext& ctx,
const framework::Scope& scope,
const std::string& in_var_name,
const std::string& out_var_name,
int64_t time_out = RPCClient::rpc_time_out) override;
int64_t time_out = FLAGS_grpc_deadline) override;
void AsyncSendBatchBarrier(
const std::string& ep,
int64_t time_out = RPCClient::rpc_time_out) override;
void AsyncSendBatchBarrier(const std::string& ep,
int64_t time_out = FLAGS_grpc_deadline) override;
void AsyncSendFetchBarrier(
const std::string& ep,
int64_t time_out = RPCClient::rpc_time_out) override;
void AsyncSendFetchBarrier(const std::string& ep,
int64_t time_out = FLAGS_grpc_deadline) override;
void AsyncCheckpointNotify(
const std::string& ep, const std::string& dir,
......@@ -229,7 +229,7 @@ class GRPCClient : public RPCClient {
void Proceed();
void AsyncSendComplete(const std::string& ep,
int64_t time_out = RPCClient::rpc_time_out);
int64_t time_out = FLAGS_grpc_deadline);
std::shared_ptr<grpc::Channel> GetChannel(const std::string& ep);
......
......@@ -97,7 +97,7 @@ class RequestSend final : public RequestBase {
void Process() override {
std::string varname = GetReqName();
VLOG(3) << "RequestSend var_name:" << varname;
VLOG(4) << "RequestSend var_name:" << varname;
auto scope = request_->GetMutableLocalScope();
auto invar = request_->GetVar();
......@@ -132,7 +132,7 @@ class RequestGet final : public RequestBase {
void Process() override {
// proc request.
std::string varname = request_.varname();
VLOG(3) << "RequestGet " << varname;
VLOG(4) << "RequestGet " << varname;
auto scope = request_handler_->scope();
auto invar = scope->FindVar(varname);
......@@ -178,7 +178,7 @@ class RequestPrefetch final : public RequestBase {
// prefetch process...
std::string in_var_name = request_->Varname();
std::string out_var_name = request_->OutVarname();
VLOG(3) << "RequestPrefetch, in_var_name: " << in_var_name
VLOG(4) << "RequestPrefetch, in_var_name: " << in_var_name
<< " out_var_name: " << out_var_name;
auto scope = request_->GetMutableLocalScope();
......@@ -240,10 +240,10 @@ class RequestCheckpointNotify final : public RequestBase {
};
void AsyncGRPCServer::WaitServerReady() {
VLOG(3) << "AsyncGRPCServer is wait server ready";
VLOG(4) << "AsyncGRPCServer is wait server ready";
std::unique_lock<std::mutex> lock(this->mutex_ready_);
condition_ready_.wait(lock, [=] { return this->ready_ == 1; });
VLOG(3) << "AsyncGRPCServer WaitSeverReady";
VLOG(4) << "AsyncGRPCServer WaitSeverReady";
}
void AsyncGRPCServer::StartServer() {
......@@ -283,7 +283,7 @@ void AsyncGRPCServer::StartServer() {
for (int i = 0; i < threadnum; i++) {
rpc_threads_[rpc_name].emplace_back(new std::thread(std::bind(
&AsyncGRPCServer::HandleRequest, this, cq.get(), rpc_name, f)));
VLOG(3) << t.first << " creates threads!";
VLOG(4) << t.first << " creates threads!";
}
}
......@@ -300,7 +300,7 @@ void AsyncGRPCServer::StartServer() {
auto& threads = t.second;
for (size_t i = 0; i < threads.size(); ++i) {
threads[i]->join();
VLOG(3) << t.first << " threads ends!";
VLOG(4) << t.first << " threads ends!";
}
}
}
......@@ -308,7 +308,7 @@ void AsyncGRPCServer::StartServer() {
void AsyncGRPCServer::ShutdownQueue() {
for (auto& t : rpc_cq_) {
t.second->Shutdown();
VLOG(3) << t.first << " shutdown!";
VLOG(4) << t.first << " queue shutdown!";
}
}
......@@ -317,7 +317,7 @@ void AsyncGRPCServer::ShutDownImpl() {
is_shut_down_ = true;
ShutdownQueue();
VLOG(3) << "server_ shutdown!";
VLOG(4) << "server_ shutdown!";
server_->Shutdown();
}
......@@ -325,7 +325,7 @@ void AsyncGRPCServer::TryToRegisterNewOne(const std::string& rpc_name,
int req_id) {
std::unique_lock<std::mutex> lock(cq_mutex_);
if (is_shut_down_) {
LOG(WARNING) << "shutdown, do not TryToRegisterNewSendOne";
VLOG(4) << "shutdown, do not TryToRegisterNewSendOne";
return;
}
......
......@@ -13,6 +13,10 @@
// limitations under the License.
#include "paddle/fluid/operators/distributed/rpc_client.h"
#include "gflags/gflags.h"
// default to 3min to avoid temprary network failures.
DEFINE_int32(grpc_deadline, 180000, "deadline timeouts for grpc");
namespace paddle {
namespace operators {
......
......@@ -15,11 +15,14 @@
#pragma once
#include <string>
#include "gflags/gflags.h"
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/scope.h"
DECLARE_int32(grpc_deadline);
namespace paddle {
namespace operators {
namespace distributed {
......@@ -32,26 +35,26 @@ class RPCClient {
const platform::DeviceContext& ctx,
const framework::Scope& scope,
const std::string& var_name,
int64_t time_out = rpc_time_out) = 0;
int64_t time_out = FLAGS_grpc_deadline) = 0;
virtual bool AsyncGetVar(const std::string& ep,
const platform::DeviceContext& ctx,
const framework::Scope& scope,
const std::string& var_name,
int64_t time_out = rpc_time_out) = 0;
int64_t time_out = FLAGS_grpc_deadline) = 0;
virtual bool AsyncPrefetchVar(const std::string& ep,
const platform::DeviceContext& ctx,
const framework::Scope& scope,
const std::string& in_var_name,
const std::string& out_var_name,
int64_t time_out = rpc_time_out) = 0;
int64_t time_out = FLAGS_grpc_deadline) = 0;
virtual void AsyncSendBatchBarrier(const std::string& ep,
int64_t time_out = rpc_time_out) = 0;
virtual void AsyncSendBatchBarrier(
const std::string& ep, int64_t time_out = FLAGS_grpc_deadline) = 0;
virtual void AsyncSendFetchBarrier(const std::string& ep,
int64_t time_out = rpc_time_out) = 0;
virtual void AsyncSendFetchBarrier(
const std::string& ep, int64_t time_out = FLAGS_grpc_deadline) = 0;
virtual void AsyncCheckpointNotify(const std::string& ep,
const std::string& dir,
......@@ -64,8 +67,6 @@ class RPCClient {
virtual void Wait() = 0;
static constexpr int64_t rpc_time_out = 120 * 1000;
template <typename T>
static RPCClient* GetInstance() {
std::call_once(init_flag_, &RPCClient::Init<T>);
......
......@@ -47,11 +47,12 @@ void RPCServer::WaitBarrier(const std::string& rpc_name) {
return (barrier_counter_[rpc_name] >= client_num_ || exit_flag_.load());
});
VLOG(3) << "batch_barrier_:" << barrier_counter_[rpc_name];
VLOG(3) << "batch_barrier_: " << rpc_name << " "
<< barrier_counter_[rpc_name];
}
void RPCServer::IncreaseBatchBarrier(const std::string rpc_name) {
VLOG(3) << "RPCServer begin IncreaseBatchBarrier " << rpc_name;
VLOG(4) << "RPCServer begin IncreaseBatchBarrier " << rpc_name;
int b = 0;
std::unique_lock<std::mutex> lock(mutex_);
b = ++barrier_counter_[rpc_name];
......@@ -100,7 +101,7 @@ void RPCServer::SetCond(const std::string& rpc_name) {
}
void RPCServer::WaitCond(const std::string& rpc_name) {
VLOG(3) << "RPCServer WaitCond " << rpc_name;
VLOG(4) << "RPCServer WaitCond " << rpc_name;
int cond = 0;
{
std::unique_lock<std::mutex> lock(mutex_);
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/operators/elementwise_add_op.h"
#include "paddle/fluid/operators/elementwise_op_function.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
namespace paddle {
namespace operators {
using framework::DataLayout;
using framework::Tensor;
using mkldnn::memory;
using mkldnn::reorder;
using mkldnn::primitive;
using mkldnn::stream;
using mkldnn::sum;
template <typename T>
class EltwiseAddMKLDNNKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto& dev_ctx =
ctx.template device_context<paddle::platform::MKLDNNDeviceContext>();
const auto& mkldnn_engine = dev_ctx.GetEngine();
auto* x = ctx.Input<Tensor>("X");
auto* y = ctx.Input<Tensor>("Y");
auto* z = ctx.Output<Tensor>("Out");
const T* x_data = x->data<T>();
const T* y_data = y->data<T>();
T* z_data = z->mutable_data<T>(ctx.GetPlace());
int axis = ctx.Attr<int>("axis");
auto x_dims = x->dims();
auto y_dims = y->dims();
auto z_dims = z->dims();
// Execute default elementwise_add operator when
// broadcast operations need to performed.
if (x_dims != y_dims) {
auto sum_func = [](T a, T b) -> T { return a + b; };
TransformFunctor<decltype(sum_func), T,
paddle::platform::CPUDeviceContext, T>
functor(
x, y, z,
ctx.template device_context<paddle::platform::CPUDeviceContext>(),
sum_func);
axis = (axis == -1 ? x_dims.size() - y_dims.size() : axis);
PADDLE_ENFORCE(axis >= 0 && axis < x_dims.size(),
"Axis should be in range [0, x_dims)");
trim_trailing_singular_dims(&y_dims);
axis = (y_dims.size() == 0) ? x_dims.size() : axis;
int pre, n, post;
get_mid_dims(x_dims, y_dims, axis, &pre, &n, &post);
if (post == 1) {
functor.RunRowWise(n, pre);
} else {
functor.RunMidWise(n, pre, post);
}
z->set_layout(DataLayout::kMKLDNN);
z->set_format(x->format());
} else {
PADDLE_ENFORCE(x->layout() == DataLayout::kMKLDNN &&
x->format() != memory::format::format_undef,
"Wrong layout/format set for X tensor");
PADDLE_ENFORCE(y->layout() == DataLayout::kMKLDNN &&
y->format() != memory::format::format_undef,
"Wrong layout/format set for X tensor");
std::vector<int> src_x_tz = framework::vectorize2int(x_dims);
std::vector<int> src_y_tz = framework::vectorize2int(y_dims);
std::vector<int> dst_tz = framework::vectorize2int(z_dims);
std::vector<memory::primitive_desc> srcs_pd;
std::vector<memory> srcs;
std::vector<float> scales = {1.0f, 1.0f};
auto src_x_pd = memory::primitive_desc(
{{src_x_tz}, memory::data_type::f32, x->format()}, mkldnn_engine);
auto src_y_pd = memory::primitive_desc(
{{src_y_tz}, memory::data_type::f32, y->format()}, mkldnn_engine);
auto src_x_memory =
memory(src_x_pd, paddle::platform::to_void_cast(x_data));
auto src_y_memory =
memory(src_y_pd, paddle::platform::to_void_cast(y_data));
srcs_pd.push_back(src_x_pd);
srcs_pd.push_back(src_y_pd);
srcs.push_back(src_x_memory);
srcs.push_back(src_y_memory);
auto dst_md =
memory::desc({dst_tz}, memory::data_type::f32, memory::format::any);
// create primitive descriptor for sum
auto sum_pd = sum::primitive_desc(dst_md, scales, srcs_pd);
// create mkldnn memory for dst
memory dst_memory = memory(sum_pd.dst_primitive_desc(), z_data);
std::vector<primitive::at> inputs;
inputs.push_back(srcs[0]);
inputs.push_back(srcs[1]);
// create sum primitive
auto sum_prim = sum(sum_pd, inputs, dst_memory);
std::vector<primitive> pipeline;
pipeline.push_back(sum_prim);
stream(stream::kind::eager).submit(pipeline).wait();
z->set_layout(DataLayout::kMKLDNN);
z->set_format(
(memory::format)dst_memory.get_primitive_desc().desc().data.format);
}
}
};
template <typename T>
class EltwiseAddMKLDNNGradKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
using Tensor = framework::Tensor;
auto* x = ctx.Input<Tensor>("X");
auto* y = ctx.Input<Tensor>("Y");
auto* out = ctx.Input<Tensor>("Out");
auto* dout = ctx.Input<Tensor>(framework::GradVarName("Out"));
auto* dx = ctx.Output<Tensor>(framework::GradVarName("X"));
auto* dy = ctx.Output<Tensor>(framework::GradVarName("Y"));
int axis = ctx.Attr<int>("axis");
auto set_mkldnn_format = [](Tensor* in, const Tensor* out) {
in->set_layout(DataLayout::kMKLDNN);
in->set_format(out->format());
};
if (x->dims() == y->dims()) {
auto blas = math::GetBlas<paddle::platform::CPUDeviceContext, T>(ctx);
if (dx) {
blas.VCOPY(dout->numel(), dout->data<T>(),
dx->mutable_data<T>(ctx.GetPlace()));
set_mkldnn_format(dx, dout);
}
if (dy) {
blas.VCOPY(dout->numel(), dout->data<T>(),
dy->mutable_data<T>(ctx.GetPlace()));
set_mkldnn_format(dy, dout);
}
} else {
// Execute default kernel when broadcast is needed
ElemwiseGradCompute<paddle::platform::CPUDeviceContext, T,
IdentityGrad<T>, IdentityGrad<T>>(
ctx, *x, *y, *out, *dout, axis, dx, dy, IdentityGrad<T>(),
IdentityGrad<T>());
}
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_KERNEL(elementwise_add, MKLDNN, ::paddle::platform::CPUPlace,
ops::EltwiseAddMKLDNNKernel<float>)
REGISTER_OP_KERNEL(elementwise_add_grad, MKLDNN, ::paddle::platform::CPUPlace,
ops::EltwiseAddMKLDNNGradKernel<float>)
......@@ -14,8 +14,12 @@ limitations under the License. */
#pragma once
#include <string>
#include "paddle/fluid/framework/data_layout.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h"
#endif
namespace paddle {
namespace operators {
......@@ -40,6 +44,21 @@ class ElementwiseOp : public framework::OperatorWithKernel {
ctx->SetOutputDim("Out", x_dim);
ctx->ShareLoD("X", /*->*/ "Out");
}
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
auto input_data_type =
framework::ToDataType(ctx.Input<Tensor>("X")->type());
#ifdef PADDLE_WITH_MKLDNN
if (platform::CanMKLDNNBeUsed(ctx)) {
return framework::OpKernelType(input_data_type, ctx.GetPlace(),
framework::DataLayout::kMKLDNN,
framework::LibraryType::kMKLDNN);
}
#endif
return framework::OpKernelType(input_data_type, ctx.GetPlace());
}
};
class ElementwiseOpInferVarType : public framework::VarTypeInference {
......@@ -65,6 +84,8 @@ class ElementwiseOpMaker : public framework::OpProtoAndCheckerMaker {
"for broadcasting Y onto X.")
.SetDefault(-1)
.EqualGreaterThan(-1);
AddAttr<bool>("use_mkldnn", "(bool, default false). Used by MKLDNN.")
.SetDefault(false);
AddComment(string::Sprintf(R"DOC(
Limited Elementwise %s Operator
......@@ -138,6 +159,21 @@ class ElementwiseOpGrad : public framework::OperatorWithKernel {
ctx->SetOutputDim(y_grad_name, y_dims);
}
}
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
auto input_data_type =
framework::ToDataType(ctx.Input<Tensor>("X")->type());
#ifdef PADDLE_WITH_MKLDNN
if (platform::CanMKLDNNBeUsed(ctx)) {
return framework::OpKernelType(input_data_type, ctx.GetPlace(),
framework::DataLayout::kMKLDNN,
framework::LibraryType::kMKLDNN);
}
#endif
return framework::OpKernelType(input_data_type, ctx.GetPlace());
}
};
} // namespace operators
} // namespace paddle
......
......@@ -164,8 +164,8 @@ void ListenAndServOp::RunSyncLoop(
}
void ListenAndServOp::RunAsyncLoop(framework::Executor *executor,
framework::ProgramDesc *program) const {
VLOG(3) << "RunAsyncLoop in";
framework::ProgramDesc *program,
framework::Scope *recv_scope) const {
// grad name to block id
std::unordered_map<std::string, int32_t> grad_to_block_id;
std::unordered_map<int32_t, std::string> id_to_grad;
......@@ -192,6 +192,10 @@ void ListenAndServOp::RunAsyncLoop(framework::Executor *executor,
block_list.push_back(blkid);
}
auto optimize_prepared = executor->Prepare(*program, block_list);
// execute global block if needed
if (block_list[0] == 1 && id_to_grad.count(1) == 0) {
executor->RunPreparedContext(optimize_prepared[0].get(), recv_scope);
}
std::unordered_map<std::string,
std::shared_ptr<framework::ExecutorPrepareContext>>
grad_to_prepared_ctx;
......@@ -203,7 +207,6 @@ void ListenAndServOp::RunAsyncLoop(framework::Executor *executor,
request_get_handler_->SetGradToPreparedCtx(&grad_to_prepared_ctx);
request_prefetch_handler_->SetGradToPreparedCtx(&grad_to_prepared_ctx);
VLOG(3) << "RunAsyncLoop into while";
while (true) {
if (rpc_service_->IsExit()) {
VLOG(4) << "get exit!rpc_processor break!";
......@@ -338,7 +341,7 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope,
RunSyncLoop(&executor, program, &recv_scope, prefetch_block_id_list,
checkpoint_block_id);
} else {
RunAsyncLoop(&executor, program);
RunAsyncLoop(&executor, program, &recv_scope);
}
}
......
......@@ -52,7 +52,8 @@ class ListenAndServOp : public framework::OperatorBase {
const int checkpoint_point_block_id) const;
void RunAsyncLoop(framework::Executor* executor,
framework::ProgramDesc* program) const;
framework::ProgramDesc* program,
framework::Scope* recv_scope) const;
void SavePort() const;
......
......@@ -37,6 +37,11 @@ class RandomCropOpMaker : public framework::OpProtoAndCheckerMaker {
AddOutput("SeedOut", "The random seed after random cropping.")
.AsIntermediate();
AddAttr<std::vector<int>>("shape", "The shape of a cropped instance.");
AddAttr<int>("startup_seed",
"If the input 'Seed' is not initialized, the 'startup_seed' "
"will be used to replace it. Even so, the seed after random "
"crop will also be outputed to the 'SeedOut'.")
.SetDefault(0);
AddComment(R"DOC(
This operator takes a batch of instance, and do random cropping on each instance.
It means that cropping positions differs on each instance, which is determined
......@@ -49,8 +54,6 @@ class RandomCropOpMaker : public framework::OpProtoAndCheckerMaker {
class RandomCropOpInferShape : public framework::InferShapeBase {
public:
void operator()(framework::InferShapeContext* ctx) const override {
auto seed_dim = ctx->GetInputDim("Seed");
PADDLE_ENFORCE(seed_dim.size() == 1 && seed_dim[0] == 1);
auto shape = ctx->Attrs().Get<std::vector<int>>("shape");
auto x_dim = ctx->GetInputDim("X");
PADDLE_ENFORCE_GT(x_dim.size(), static_cast<int64_t>(shape.size()));
......@@ -62,7 +65,6 @@ class RandomCropOpInferShape : public framework::InferShapeBase {
out_dim[x_i] = shape[shape_i];
}
ctx->SetOutputDim("Out", framework::make_ddim(out_dim));
ctx->SetOutputDim("SeedOut", framework::make_ddim({1}));
}
};
......
......@@ -142,16 +142,22 @@ template <typename DeviceContext, typename T>
class RandomCropKernel : public framework::OpKernel<T> {
public:
virtual void Compute(const framework::ExecutionContext& ctx) const {
auto& seed_tensor = detail::Ref(ctx.Input<framework::LoDTensor>("Seed"));
int64_t seed = 0;
if (platform::is_cpu_place(seed_tensor.place())) {
seed = *seed_tensor.data<int64_t>();
auto& seed_tensor = detail::Ref(ctx.Input<framework::LoDTensor>("Seed"));
if (seed_tensor.IsInitialized()) {
if (platform::is_cpu_place(seed_tensor.place())) {
seed = *seed_tensor.data<int64_t>();
} else {
LOG(WARNING) << "It is slow to place seed in GPU memory. Please verify "
"your program";
framework::LoDTensor cpu_seed;
framework::TensorCopySync(seed_tensor, platform::CPUPlace(), &cpu_seed);
seed = *cpu_seed.data<int64_t>();
}
} else {
LOG(WARNING) << "It is slow to place seed in GPU memory. Please verify "
"your program";
framework::LoDTensor cpu_seed;
framework::TensorCopySync(seed_tensor, platform::CPUPlace(), &cpu_seed);
seed = *cpu_seed.data<int64_t>();
VLOG(5) << "WARNING: The input 'Seed' is not initialized, use attribute "
"'startup_seed' instead.";
seed = ctx.Attr<int>("startup_seed");
}
auto shape = ctx.Attr<std::vector<int>>("shape");
auto& x = detail::Ref(ctx.Input<framework::LoDTensor>("X"));
......@@ -171,7 +177,7 @@ class RandomCropKernel : public framework::OpKernel<T> {
engine.discard(functor.prod_batchsize_dims_ *
(functor.rank_ - functor.num_batchsize_dims_));
*ctx.Output<framework::LoDTensor>("SeedOut")->mutable_data<int64_t>(
platform::CPUPlace()) = engine();
framework::make_ddim({1}), platform::CPUPlace()) = engine();
}
};
......
......@@ -39,6 +39,7 @@ class CustomReader : public framework::DecoratedReader {
const framework::ProgramDesc program_;
int sub_block_id_;
framework::Executor exe_;
framework::Scope scope_;
std::vector<std::string> source_var_names_;
std::vector<std::string> sink_var_names_;
......@@ -158,23 +159,24 @@ void CustomReader::ReadNext(std::vector<framework::LoDTensor>* out) {
// The scope for CustomReader's sub-block should be independent and shouldn't
// be any other computation scope's child. Otherwise, data preprocessing and
// compution cannot be concurrent.
framework::Scope scope;
framework::Scope* exe_scope = &scope_.NewScope();
// 1. Copy LoDTensors from underlying reader's output to source variables.
for (size_t i = 0; i < source_var_names_.size(); ++i) {
framework::Variable* var = scope.Var(source_var_names_[i]);
framework::Variable* var = exe_scope->Var(source_var_names_[i]);
framework::LoDTensor* tensor = var->GetMutable<framework::LoDTensor>();
tensor->ShareDataWith(underlying_outs[i]);
tensor->set_lod(underlying_outs[i].lod());
}
// 2. Run the sub-block.
exe_.Run(program_, &scope, sub_block_id_, false, true);
exe_.Run(program_, exe_scope, sub_block_id_, false, true);
// 3. Copy LoDTensors from sink variables to out.
out->resize(sink_var_names_.size());
for (size_t i = 0; i < sink_var_names_.size(); ++i) {
const auto& tensor = detail::Ref(scope.FindVar(sink_var_names_[i]))
const auto& tensor = detail::Ref(exe_scope->FindVar(sink_var_names_[i]))
.Get<framework::LoDTensor>();
framework::TensorCopySync(tensor, platform::CPUPlace(), &(*out)[i]);
}
scope_.DeleteScope(exe_scope);
}
} // namespace reader
......
......@@ -23,13 +23,13 @@ namespace reader {
// 'Double buffer' means we shall maintain two batches of input data at the same
// time. So the kCacheSize shoul be at least 2.
static constexpr size_t kCacheSize = 3;
static constexpr size_t kCacheSize = 5;
// There will be two bacthes out of the channel during training:
// 1. the one waiting to be sent to the channel
// 2. the one just be received from the channel, which is also being used by
// subsequent operators.
// So the channel size should be kChacheSize - 2
static constexpr size_t kChannelSize = 1; // kCacheSize - 2
static constexpr size_t kChannelSize = 3; // kCacheSize - 2
class DoubleBufferReader : public framework::DecoratedReader {
public:
......
......@@ -559,19 +559,8 @@ class Operator(object):
self.attrs[attr_name] is None):
continue
attr_val = self.attrs[attr_name]
if isinstance(attr_val, Block):
self.desc.set_block_attr(attr_name,
self.attrs[attr_name].desc)
elif isinstance(attr_val, list) and attr_val and \
all(isinstance(v, Block) for v in attr_val):
self.desc.set_blocks_attr(attr_name,
[v.desc for v in attr_val])
elif isinstance(attr_val, core.BlockDesc) or \
isinstance(attr_val, core.ProgramDesc):
self.desc.set_serialized_attr(
attr_name, attr_val.serialize_to_string())
else:
self.desc.set_attr(attr_name, attr_val)
self._update_desc_attr(attr_name, attr_val)
self.desc.check_attrs()
if self.has_kernel(type):
self.desc.infer_var_type(self.block.desc)
......@@ -718,6 +707,19 @@ class Operator(object):
ValueError: If the type of value doesn't match with desc.attr_type(name).
"""
self.attrs[name] = val
self._update_desc_attr(name, val)
def _update_desc_attr(self, name, val):
"""
Update the value of desc's attribute by attribute's name.
Args:
name(str): the attribute name.
val(bool|int|str|float|list): the value of the attribute.
Raises:
ValueError: If the type of value doesn't match with desc.attr_type(name).
"""
if isinstance(val, Block):
self.desc.set_block_attr(name, val.desc)
elif isinstance(val, list) and val and all(
......
......@@ -110,7 +110,7 @@ class BlockGuardServ(BlockGuard):
class ListenAndServ(object):
"""
**ListenAndServ Layer**
ListenAndServ is used to create a rpc server bind and listen
on specific TCP port, this server will run the sub-block when
received variables from clients.
......@@ -212,7 +212,7 @@ def Send(endpoints, send_vars, sync=True):
of send_vars to send
send_vars (list): variables to send to server
sync (bool): whether to wait the request finish
"""
assert (type(send_vars) == list)
......@@ -469,10 +469,13 @@ def open_files(filenames,
lod_levels(list): List of ints which declaring data lod_level.
dtypes(list): List of strs which declaring data type.
thread_num(int): The maximal concurrent prefetch thread number.
buffer_size(int): The size of prefetch buffer.
buffer_size(int|None): The size of prefetch buffer. If it is setted None,
buffer size will be thread_num * 3.
Default: None
pass_num(int): Number of passes to run.
for_parallel(Bool): Set it as True if you are going to run
subsequent operators in parallel.
Default: True
Returns:
Variable: A Reader Variable via which we can get file data.
......@@ -492,7 +495,7 @@ def open_files(filenames,
image, label = fluid.layers.io.read_file(reader)
"""
if buffer_size is None:
buffer_size = thread_num
buffer_size = thread_num * 3
if isinstance(filenames, basestring):
filenames = [filenames]
dtypes = [convert_np_dtype_to_dtype_(dt) for dt in dtypes]
......
......@@ -23,6 +23,7 @@ from layer_function_generator import autodoc, templatedoc
from tensor import concat
import utils
import random
from .. import unique_name
__all__ = [
'fc',
......@@ -4266,14 +4267,18 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=True, name=None):
say :attr:`actual_shape` has a higher priority
than :attr:`shape`.
act (str): The non-linear activation to be applied to output variable.
inplace(bool): If this flag is set true, a new output tensor is created
whose data is copied from input x, otherwise the output
shares data with input without copying.
inplace(bool): If this flag is set true, the output
shares data with input without copying, otherwise
a new output tensor is created
whose data is copied from input x.
name (str): The name of this layer. It is optional.
Returns:
Variable: The output tensor.
Raises:
TypeError: if actual_shape is neither Variable nor None.
Examples:
.. code-block:: python
......@@ -4285,6 +4290,11 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=True, name=None):
if not (isinstance(shape, list) or isinstance(shape, tuple)):
raise ValueError("Input shape must be a python lsit or tuple.")
inputs = {"X": x}
if isinstance(actual_shape, Variable):
inputs["Shape"] = actual_shape
elif actual_shape is not None:
raise TypeError("actual_shape should either be Variable or None")
# Validate the shape
unk_dim_idx = -1
......@@ -4305,9 +4315,7 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=True, name=None):
reshaped = helper.create_tmp_variable(dtype=x.dtype)
helper.append_op(
type="reshape",
inputs={"X": x,
"Shape": actual_shape}
if isinstance(actual_shape, Variable) else {"X": x},
inputs=inputs,
attrs={"shape": shape,
"inplace": inplace},
outputs={"Out": reshaped})
......@@ -4889,47 +4897,39 @@ def random_crop(x, shape, seed=None):
>>> cropped_img = fluid.layers.random_crop(img, shape=[3, 224, 224])
"""
helper = LayerHelper("random_crop", **locals())
dtype = helper.input_dtype()
dtype = x.dtype
out = helper.create_tmp_variable(dtype)
if seed is None:
seed = random.randint(-65536, 65535)
op_attrs = {"shape": shape}
if isinstance(seed, int):
seed_value = seed
seed = helper.create_tmp_variable(dtype="int64")
helper.append_op(
type="fill_constant",
inputs={},
outputs={"Out": seed},
attrs={
"dtype": seed.dtype,
"shape": [1],
"value": float(seed_value),
"force_cpu": True
})
op_attrs["startup_seed"] = seed
seed = helper.create_variable(
name=unique_name.generate("random_crop_seed"),
dtype="int64",
persistable=True)
elif not isinstance(seed, Variable):
raise ValueError("'seed' must be a Variable or an int.")
seed_out = helper.create_tmp_variable(dtype="int64")
helper.append_op(
type="random_crop",
inputs={"X": x,
"Seed": seed},
outputs={"Out": out,
"SeedOut": seed_out},
attrs={"shape": shape})
"SeedOut": seed},
attrs=op_attrs)
return out
def log(input):
def log(x):
"""
Calculates the natural log of the given input tensor, element-wise.
.. math::
Out = \\ln(input)
Out = \\ln(x)
Args:
input (Variable): Input tensor.
x (Variable): Input tensor.
Returns:
Variable: The natural log of the input tensor computed element-wise.
......@@ -4938,7 +4938,7 @@ def log(input):
.. code-block:: python
output = fluid.layers.log(input)
output = fluid.layers.log(x)
"""
helper = LayerHelper('log', **locals())
dtype = helper.input_dtype(input_param_name='x')
......@@ -4947,18 +4947,18 @@ def log(input):
return out
def relu(input):
def relu(x):
"""
Relu takes one input data (Tensor) and produces one output data (Tensor)
where the rectified linear function, y = max(0, input), is applied to
where the rectified linear function, y = max(0, x), is applied to
the tensor elementwise.
.. math::
Out = \\max(0, input)
Out = \\max(0, x)
Args:
input (Variable): The input tensor.
x (Variable): The input tensor.
Returns:
Variable: The output tensor with the same shape as input.
......@@ -4967,7 +4967,7 @@ def relu(input):
.. code-block:: python
output = fluid.layers.relu(input)
output = fluid.layers.relu(x)
"""
helper = LayerHelper('relu', **locals())
dtype = helper.input_dtype(input_param_name='x')
......
......@@ -155,7 +155,7 @@ def cast(x, dtype):
Examples:
.. code-block:: python
data = fluid.layers.data(name='x', shape=[13], dtype='float32')
result = fluid.layers.cast(x=data, dtype='float64')
"""
......@@ -188,7 +188,7 @@ def concat(input, axis=0, name=None):
Examples:
.. code-block:: python
out = fluid.layers.concat(input=[Efirst, Esecond, Ethird, Efourth])
"""
helper = LayerHelper('concat', **locals())
......@@ -238,7 +238,7 @@ def sums(input, out=None):
return out
def assign(input, output):
def assign(input, output=None):
"""
**Assign**
......@@ -246,7 +246,7 @@ def assign(input, output):
Args:
input(Variable|numpy.ndarray): The source variable
output(Variable): The destination variable
output(Variable|None): The destination variable
Returns:
Variable: The destination variable that was supplied as the *output*.
......@@ -259,6 +259,8 @@ def assign(input, output):
fluid.layers.assign(hidden, out)
"""
helper = LayerHelper('assign', **locals())
if output is None:
output = helper.create_tmp_variable(dtype=input.dtype)
if isinstance(input, Variable):
helper.append_op(
type='assign', inputs={'X': [input]}, outputs={'Out': [output]})
......
......@@ -596,12 +596,12 @@ class Auc(MetricBase):
tp, fn, tn, fp = 0, 0, 0, 0
for i, lbl in enumerate(labels):
if lbl:
if predictions[i, 1] >= thresh:
if preds[i, 1] >= thresh:
tp += 1
else:
fn += 1
else:
if predictions[i, 1] >= thresh:
if preds[i, 1] >= thresh:
fp += 1
else:
tn += 1
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import numpy as np
import paddle.fluid.core as core
from op_test import OpTest
from test_elementwise_add_op import *
'''
Some tests differ from the tests defined in test_elementwise_add_op.py
because MKLDNN does not support tensors of number of dimensions 3.
Such dimensions cause exceptions in MKLDNN reorder primitive.
'''
class TestMKLDNNElementwiseAddOp(TestElementwiseAddOp):
def init_input_output(self):
self.x = np.random.uniform(0.1, 1, [2, 3, 4, 5]).astype(self.dtype)
self.y = np.random.uniform(0.1, 1, [2, 3, 4, 5]).astype(self.dtype)
self.out = np.add(self.x, self.y)
def init_kernel_type(self):
self.use_mkldnn = True
class TestMKLDNNElementwiseAddOp_scalar(TestElementwiseAddOp_scalar):
def init_input_output(self):
self.x = np.random.rand(2, 3, 4, 5).astype(self.dtype)
self.y = np.random.rand(1).astype(self.dtype)
self.out = self.x + self.y
def init_kernel_type(self):
self.use_mkldnn = True
class TestMKLDNNElementwiseAddOp_scalar2(TestElementwiseAddOp_scalar2):
def init_input_output(self):
self.x = np.random.rand(2, 3, 4, 5).astype(self.dtype)
self.y = np.random.rand(1, 1).astype(self.dtype)
self.out = self.x + self.y
def init_kernel_type(self):
self.use_mkldnn = True
class TestMKLDNNElementwiseAddOp_Vector(TestElementwiseAddOp_Vector):
def init_kernel_type(self):
self.use_mkldnn = True
class TesMKLDNNtElementwiseAddOp_broadcast_0(TestElementwiseAddOp_broadcast_0):
def init_input_output(self):
self.x = np.random.rand(2, 3, 4, 5).astype(self.dtype)
self.y = np.random.rand(2).astype(self.dtype)
self.out = self.x + self.y.reshape(2, 1, 1, 1)
def init_kernel_type(self):
self.use_mkldnn = True
class TestMKLDNNElementwiseAddOp_broadcast_1(TestElementwiseAddOp_broadcast_1):
def init_input_output(self):
self.x = np.random.rand(2, 3, 4, 5).astype(self.dtype)
self.y = np.random.rand(3).astype(self.dtype)
self.out = self.x + self.y.reshape(1, 3, 1, 1)
def init_kernel_type(self):
self.use_mkldnn = True
class TestMKLDNNElementwiseAddOp_broadcast_2(TestElementwiseAddOp_broadcast_2):
def init_input_output(self):
self.x = np.random.rand(2, 2, 3, 4).astype(self.dtype)
self.y = np.random.rand(4).astype(self.dtype)
self.out = self.x + self.y.reshape(1, 1, 1, 4)
def init_kernel_type(self):
self.use_mkldnn = True
class TestMKLDNNElementwiseAddOp_broadcast_3(TestElementwiseAddOp_broadcast_3):
def init_kernel_type(self):
self.use_mkldnn = True
class TestMKLDNNElementwiseAddOp_broadcast_4(TestElementwiseAddOp_broadcast_4):
def init_kernel_type(self):
self.use_mkldnn = True
class TestMKLDNNElementwiseAddOp_rowwise_add_0(
TestElementwiseAddOp_rowwise_add_0):
def init_input_output(self):
self.x = np.random.rand(2, 3, 4, 5).astype(self.dtype)
self.y = np.random.rand(3, 4).astype(self.dtype)
self.out = self.x + self.y.reshape(1, 3, 4, 1)
def init_kernel_type(self):
self.use_mkldnn = True
class TestMKLDNNElementwiseAddOp_rowwise_add_1(
TestElementwiseAddOp_rowwise_add_1):
def init_kernel_type(self):
self.use_mkldnn = True
class TestMKLDNNElementwiseAddOp_channelwise_add(
TestElementwiseAddOp_channelwise_add):
def init_input_output(self):
self.x = np.random.rand(3, 5, 20, 20).astype(self.dtype)
self.y = np.random.rand(3, 1, 1, 1).astype(self.dtype)
self.out = self.x + self.y
def init_kernel_type(self):
self.use_mkldnn = True
if __name__ == '__main__':
unittest.main()
......@@ -18,19 +18,23 @@ from op_test import OpTest
class TestElementwiseAddOp(OpTest):
def init_kernel_type(self):
self.use_mkldnn = False
def setUp(self):
self.op_type = "elementwise_add"
self.dtype = np.float32
self.axis = -1
self.init_dtype()
self.init_input_output()
self.init_kernel_type()
self.init_axis()
self.inputs = {
'X': OpTest.np_dtype_to_fluid_dtype(self.x),
'Y': OpTest.np_dtype_to_fluid_dtype(self.y)
}
self.attrs = {'axis': self.axis}
self.attrs = {'axis': self.axis, 'use_mkldnn': self.use_mkldnn}
self.outputs = {'Out': self.out}
def test_check_output(self):
......
......@@ -1323,16 +1323,6 @@ class DistributeTranspiler(object):
ufind.union(op1, op2)
return ufind
def _is_opt_role_op(self, op):
# NOTE: depend on oprole to find out whether this op is for
# optimize
op_maker = core.op_proto_and_checker_maker
optimize_role = core.op_proto_and_checker_maker.OpRole.Optimize
if op_maker.kOpRoleAttrName() in op.attrs and \
int(op.attrs[op_maker.kOpRoleAttrName()]) == int(optimize_role):
return True
return False
def _is_optimizer_op(self, op):
if "Param" in op.input_names and \
"LearningRate" in op.input_names:
......@@ -1423,7 +1413,10 @@ class DistributeTranspiler(object):
params_grads = []
origin_var_dict = self.origin_program.global_block().vars
for op in block.ops:
if self._is_opt_role_op(op):
# NOTE(Yancey1989): we can not use op role to distinguish an optimizer op
# or not, because all ops in optimizer sub-graph would
# sign the optimizer op role
if self._is_optimizer_op(op):
opt_ops.append(op)
# HACK(wuyi): if we find grad vars from input of optimize
# ops, we may get the output of clip op. Use syntax "@GRAD"
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册