提交 3ca3620c 编写于 作者: W wangyanfei01

ISSUE=4587061 tiny modification on remote updater code doc

git-svn-id: https://svn.baidu.com/idl/trunk/paddle@1411 1ad973e4-5ce8-4261-8a94-b56d1f490c56
上级 f0637523
...@@ -24,11 +24,6 @@ limitations under the License. */ ...@@ -24,11 +24,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
/**
* this module call ParameterClient to exchange parameters among all
* parameters servers.
*/
// TODO(yanfei): // TODO(yanfei):
// I think that the biggest feature of rdma is packet lossless control // I think that the biggest feature of rdma is packet lossless control
// feature instead of high bandwiths, zero copy and gpu-direct rdma in // feature instead of high bandwiths, zero copy and gpu-direct rdma in
...@@ -41,21 +36,21 @@ namespace paddle { ...@@ -41,21 +36,21 @@ namespace paddle {
// //
/** /**
* normal remote parameter updater for dense parameters. * Normal remote parameter updater for dense parameters.
* *
* it first packs all parameters for all pservers using ParameterClient * It first packs all parameters for all pservers using ParameterClient
* module, then wait for return merged parameters data from all pservers. * module, then wait for merged parameters data from all pservers.
* the synchronization pattern specified by sync-sgd or async-sgd is * The synchronization pattern specified by sync-sgd or async-sgd is
* achieved by all pservers with the help of the controller within this * achieved by all pservers with the help of the controller within this
* remote parameter updater. * remote parameter updater.
* this module indeedly bridges the gradient machines and parameter servers. * This module indeedly bridges the gradient machines and parameter servers.
* it help to transfer the parameters from acceleration device to cpu end * It helps to transfer the parameters from acceleration device to cpu end
* for network. it contains additional parameters copy buffers for * for network. It contains additional parameters copy buffers for
* acceleration devices at cpu end, such as gpu, otherwise it will * acceleration devices at cpu end, such as gpu, otherwise it will
* directly use original parameters data to launching. * directly use original parameters data to update pservers.
* *
* this remote parameter updater do not use pipeline mechanism to hide * This remote parameter updater does not use pipeline mechanism to hide
* copy latency from gpu to cpu buffer, as well as the overlapped between * copy latency from gpu to cpu buffer. In addition the overlapped between
* backward and communication is not supported. * backward and communication is not supported.
*/ */
class RemoteParameterUpdater : public ParameterUpdater { class RemoteParameterUpdater : public ParameterUpdater {
...@@ -74,9 +69,10 @@ public: ...@@ -74,9 +69,10 @@ public:
*/ */
virtual void init(std::vector<ParameterPtr>& parameters); virtual void init(std::vector<ParameterPtr>& parameters);
/** /**
* start batch * @brief start batch
* one batch training exhibits stateful feature to help *
* to do performance tuning, sgd optimization if necessary. * @note one batch training exhibits stateful feature to help
* to do performance tuning, sgd optimization if necessary.
*/ */
virtual PassType startBatch(int64_t batchSize) { virtual PassType startBatch(int64_t batchSize) {
if (localUpdater_) { if (localUpdater_) {
...@@ -170,16 +166,16 @@ protected: ...@@ -170,16 +166,16 @@ protected:
// to really hide pserver latency in backward computation. // to really hide pserver latency in backward computation.
// //
/** /**
* this updater add additional optimization for overlapping synchronization * This updater add additional optimization for overlapping synchronization
* from pservers with backward computation. * from pservers with backward computation.
* *
* parameter can be sent to pservers when related backward stage is finished. * Parameter can be sent to pservers when related backward stage is finished.
* this concurrent udpater does data copy from acceleration device to host * This concurrent udpater does data copy from acceleration device to host
* memory aynchronously. In addition internal parameter client reads data in * memory aynchronously. In addition internal parameter client reads data in
* host memory and send them to all pservers in next stage. So this class * host memory and send them to all pservers in next stage. So this class
* help to pipeline device-to-host copy and host-to-network to hide network * help to pipeline device-to-host copy and host-to-network to hide network
* latency in backward stage. * latency in backward stage.
* it contains separate send and recv thread for pipeline usage. * It contains separate send and recv thread for pipeline usage.
*/ */
class ConcurrentRemoteParameterUpdater : public RemoteParameterUpdater { class ConcurrentRemoteParameterUpdater : public RemoteParameterUpdater {
public: public:
...@@ -246,15 +242,15 @@ private: ...@@ -246,15 +242,15 @@ private:
// the synchronization between sparse and dense udpater. it could also // the synchronization between sparse and dense udpater. it could also
// reduce the threads for managing all connections. // reduce the threads for managing all connections.
/** /**
* this class is specified for updating sparse parameters. * This class is specified for updating sparse parameters.
* *
* it allows part of parameter to be exchanged with all pservers. * It allows part of parameter to be exchanged with all pservers.
* if sparse input assigned, part gradients of first hidden layer * If sparse input assigned, part gradients of first hidden layer
* could remained zero which can not need to be exchanged within * could remained zero which can not need to be exchanged within
* all pservers. this is the key optimization point for this updater * all pservers. This is the key optimization point for this updater
* *
* for updating sparse parameters, all latest parameters are stored * For updating sparse parameters, all latest parameters are stored
* in pservers instead of keeping full copy at train end, so need * in pservers instead of keeping full copy at train end, so need to
* prefetch parameters weight value which can be changed in next-batch * prefetch parameters weight value which can be changed in next-batch
* before doing next forwardbackward. Also, with above fact that the * before doing next forwardbackward. Also, with above fact that the
* parameters can be stored in pserver instead of trainer, we can * parameters can be stored in pserver instead of trainer, we can
...@@ -329,10 +325,10 @@ protected: ...@@ -329,10 +325,10 @@ protected:
}; };
/** /**
* class for supporting normal updater and sparse updater * Class for supporting normal updater and sparse updater
* *
* not all parts of one model are sparse, so it exists dense updater * Not all parts of one model are sparse, so it exists dense updater
* for normal layers which sparse updater is for sparse layers. * for normal layers while sparse updater is for sparse layers.
* *
* it directly call internal dense and sparse udpater individually. * it directly call internal dense and sparse udpater individually.
*/ */
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册