Parameter¶
Weight¶
-
namespace
paddle
¶ -
-
class
Weight
¶ Public Functions
-
Weight
(size_t height, size_t width, ParameterPtr parameter)¶
-
Weight
(size_t height, size_t width, ParameterPtr parameter, size_t offset)¶
-
const ParameterPtr &
getParameterPtr
()¶
-
void
incUpdate
(const UpdateCallback &callback)¶
-
void
setParameterPtr
(ParameterPtr param)¶
-
-
class
Regularizer¶
-
namespace
paddle
¶ -
class
Regularizer
¶ Subclassed by paddle::L1L2LrRegularizer, paddle::L1L2Regularizer, paddle::L1LrRegularizer, paddle::L1Regularizer, paddle::L2LrRegularizer, paddle::L2Regularizer
Public Functions
-
virtual void
update
(const VectorPtr vecs[], const ParameterConfig ¶Config, real learningRate, int t0, int t) const = 0¶
-
virtual
~Regularizer
()¶
Public Static Functions
-
Regularizer *
get
(const std::vector<ParameterType> &types, const ParameterConfig ¶Config)¶
-
virtual void
-
class
L1Regularizer
¶ Inherits from paddle::Regularizer
Private Functions
-
virtual void
update
(const VectorPtr vecs[], const ParameterConfig ¶Config, real learningRate, int t0, int t) const¶
-
virtual void
-
class
L1LrRegularizer
¶ Inherits from paddle::Regularizer
Private Functions
-
virtual void
update
(const VectorPtr vecs[], const ParameterConfig ¶Config, real learningRate, int t0, int t) const¶
-
virtual void
-
class
L2Regularizer
¶ Inherits from paddle::Regularizer
Private Functions
-
virtual void
update
(const VectorPtr vecs[], const ParameterConfig ¶Config, real learningRate, int t0, int t) const¶
-
virtual void
-
class
L2LrRegularizer
¶ Inherits from paddle::Regularizer
Private Functions
-
virtual void
update
(const VectorPtr vecs[], const ParameterConfig ¶Config, real learningRate, int t0, int t) const¶
-
virtual void
-
class
L1L2Regularizer
¶ Inherits from paddle::Regularizer
Private Functions
-
virtual void
update
(const VectorPtr vecs[], const ParameterConfig ¶Config, real learningRate, int t0, int t) const¶
-
virtual void
-
class
L1L2LrRegularizer
¶ Inherits from paddle::Regularizer
Private Functions
-
virtual void
update
(const VectorPtr vecs[], const ParameterConfig ¶Config, real learningRate, int t0, int t) const¶
-
virtual void
-
class
Parameter¶
-
namespace
paddle
Typedefs
-
typedef std::shared_ptr<std::vector<void *>>
UserDefinedVectorPtr
¶
-
typedef std::shared_ptr<std::vector<std::string>>
SVectorPtr
¶
-
struct
Argument
¶ Public Functions
-
Argument
()¶
-
~Argument
()¶
-
void
countIncrement
()¶
-
int
getAllCount
() const¶
-
void
waitValueReady
() const¶
-
void
notifyValueReady
() const¶
-
void
waitGradReady
() const¶
-
void
notifyGradReady
() const¶
-
int64_t
getBatchSize
() const¶
-
size_t
getFrameHeight
() const¶
-
size_t
getFrameWidth
() const¶
-
void
setFrameHeight
(size_t h)¶
-
void
setFrameWidth
(size_t w)¶
-
int64_t
getNumSequences
() const¶
-
int64_t
getNumSubSequences
() const¶
-
bool
hasSubseq
() const¶
-
const int *
getCpuStartPositions
() const¶
-
void
subArgFrom
(const Argument &input, size_t offset, size_t height, size_t width, bool useGpu, bool trans = false, bool seqFlag = false, size_t seqStart = 0, size_t seqSize = 0)¶ (value, ids, grad, sequenceStartPositions) of output are subset of input. Note that, output share the same memory of input.
- Parameters
input[in]
-input
offset[in]
-offset in terms of rows
height[in]
-height of output.value
width[in]
-width of output.value
useGpu[in]
-trans[in]
-whether input.value is transform
seqFlag[in]
-whether input has sequenceStartPositions
seqStart[in]
-offset of input.sequenceStartPositions
seqSize[in]
-lenght of output.sequenceStartPositions
-
int32_t
resizeAndCopyFrom
(const Argument &src, int32_t startSeq, int32_t copySize, bool useGpu, hl_stream_t stream)¶
-
int32_t
resizeAndCopyFrom
(const Argument &src, int32_t startSeq, int32_t copySize, bool useGpu = FLAGS_use_gpu)¶
-
void
resizeAndCopyFrom
(const Argument &src, bool useGpu, hl_stream_t stream)¶
-
void
concat
(const std::vector<Argument> &args, const std::vector<int> &selectRows, const std::vector<int> &seqStartPos, bool useGpu, hl_stream_t stream, PassType passType)¶
-
void
concat
(const std::vector<Argument> &src, bool useGpu = FLAGS_use_gpu, hl_stream_t stream = HPPL_STREAM_DEFAULT, PassType passType = PASS_TEST)¶
-
void
checkSubset
() const¶
Public Members
-
IVectorPtr
ids
¶
-
SVectorPtr
strs
¶
-
size_t
frameHeight
¶
-
size_t
frameWidth
¶
-
ICpuGpuVectorPtr
sequenceStartPositions
¶
-
ICpuGpuVectorPtr
subSequenceStartPositions
¶
-
IVectorPtr
cpuSequenceDims
¶
-
int
deviceId
¶
-
int
allCount
¶
-
int
valueCount
¶
-
int
gradCount
¶
-
LockedCondition
valueReadyCond
¶
-
LockedCondition
gradReadyCond
¶
-
int
dataId
¶
Public Static Functions
-
struct
SeqInfo
¶
-
-
typedef std::shared_ptr<std::vector<void *>>
-
namespace
paddle
Typedefs
-
typedef std::map<std::string, ParameterPtr>
ParameterMap
¶
-
struct
Segment
¶
-
class
Parameter
¶ Public Types
-
enum
MatType
¶ Values:
-
MAT_NORMAL
¶
-
MAT_NORMAL_SHARED
¶ both value and grad are shared
-
MAT_VALUE_SHARED
¶ Now used in BatchNorm in CPU mode.
-
MAT_SPARSE_ROW_IDS
¶ sparse matrix, which has full size parameter
-
MAT_SPARSE_ROW_AUTO_GROW
¶ sparse matrix, parameter size scale by sparse rates.
-
MAT_CACHE_ROW
¶
-
MAT_SPARSE_ROW
¶
-
MAT_SPARSE_ROW_PREFETCH
¶ sparse matrix for prefetching parameter from pserver
-
MAT_SPARSE_ROW_PREFETCH_FULL_SIZE
¶ same as above, but parameter has full size for saving parameter in local
-
Public Functions
-
Parameter
(const ParameterConfig &config, bool useGpu, bool doInit = true)¶
-
const std::string &
getName
() const¶
-
size_t
getSize
() const¶
-
bool
isFullSize
() const¶
-
bool
useGpu
() const¶
-
int
getDeviceId
() const¶
-
void
setDevice
(int deviceId)¶
-
size_t
getID
() const¶ The id ranges from 0 to the_total_number_of_parameters - 1.
-
void
setID
(size_t id)¶ ID is a implict value created until neural network is built.
-
bool
isStatic
() const¶
-
void
enableSparseParameter
()¶
-
void
enableType
(ParameterType type, MatType matType = MAT_NORMAL)¶ allocate buffer for the give type
-
void
enableIntType
(ParameterType type, size_t intStoreSize = 0)¶
for batchGradientMachine: blockNum is number of partitions of the matrix.
-
bool
isGradSparseUpdate
() const¶
-
bool
isSparseRemoteUpdate
() const¶
-
const ParameterConfig &
getConfig
() const¶
-
ParameterConfig &
getConfig
()¶
-
bool
hasType
(ParameterType pType) const¶
-
const IVectorPtr &
getIntBuf
(ParameterType pType)¶
-
void
setIntBuf
(ParameterType pType, const IVectorPtr &iVec)¶
-
SparsePrefetchRowCpuMatrix *
getPrefetchMatrix
()¶
-
float
getLearnRate
() const¶
-
float
getInitMean
() const¶
-
float
getInitStandardDeviation
() const¶
-
void
setValueUpdated
()¶
-
void
clearValueUpdated
()¶
-
bool
isValueUpdated
() const¶
-
void
updateWithGradient
(real learningRate)¶ Update bufs_[PARAMETER_VALUE] using bufs_[PARAMETER_GRADIENT]
-
void
updateWithGradient
(real learningRate, MatrixPtr gradMat, IVectorPtr t0, int currentTime, bool fini = false)¶ Update bufs_[PARAMETER_VALUE] using sparse row grad matrix.
- See
- SparseRowCpuMatrix::sgdUpdate for more information.
-
void
updateWithGradient
(real learningRate, VectorPtr grad, bool normalUpdate = true)¶ This function is used to calculate multiple gpus, but only as a candidate
-
bool
save
(const std::string &filename) const¶ Save parameter value to a file
-
bool
save
(std::ostream &s) const¶ Save parameter to ostream
-
bool
load
(const std::string &filename)¶ Load parameter value from a file
-
bool
load
(std::istream &is)¶ Load parameter from istream
-
void
incUpdate
(const UpdateCallback &callbacks = NULL)¶ After one of the parameter’s gradient is merged You should call this function to do some additional processing,
-
void
clearGradient
()¶
-
void
initialize
()¶
-
void
randomize
()¶ Initialize the value according to config_: initial_mean, initial_std and initial_strategy.
-
void
zeroMem
()¶ Initialize the value to 0.
-
void
updateHook
()¶ Parameter Update Hook.
The parameter’s update hook before ParameterUpdater::updateImpl It could modify gradient/momentum/etc here. Such as drop some gradient, etc.
-
void
initHook
()¶ Initialize all updater hook.
This method should be invoked in ParameterUpdater::init() only.
-
void
singleUpdate
(void *data)¶
-
bool
isSparse
()¶
-
SparseFormat
getFormat
()¶
-
void
exec
(ExecFunc func)¶
Public Static Functions
-
void
randomize
(const VectorPtr &value, const ParameterConfig &config)¶
-
VectorPtr *
getTlsTempBufs
()¶
Public Static Attributes
-
const int
kFormatVersion
¶
-
const std::string
kMissParameterFail
¶
-
const std::string
kMissParameterRand
¶
-
const std::string
kMissParameterZero
¶
Protected Functions
-
void
setMat
(ParameterType pType, int matType)¶ create matrix to matType.
used by gradient machine which needs specify matrix type, instead of creating in weights.cpp.
- Note
- pType should be enabled already.
-
bool
isUpdatable
()¶
-
void
clearUpdate
()¶
Protected Attributes
-
ParameterConfig
config_
¶
-
bool
useGpu_
¶
-
int
deviceId_
¶
-
VectorPtr
bufs_
[NUM_PARAMETER_TYPES]¶ bufs_ stores parameter value and gradient.
Layer should use bufs_[PARAMETER_VALUE] to form weight matrix for calculation and stores gradient to bufs_[PARAMETER_GRADIENT].
-
MatrixPtr
mats_
[NUM_PARAMETER_TYPES]¶ Weight matrix for bufs_.
It’s helpfull when parameter shared by multi-layers. Caller should check, if mats exist, do not create it again.
-
IVectorPtr
intBufs_
[NUM_PARAMETER_TYPES]¶ Int vectors, used in some User defined parameter types.
-
int
updateCounter_
¶
-
bool
updated_
¶
-
SparseFormat
format_
¶
-
std::vector<std::shared_ptr<IParameterUpdaterHook>>
updaterHooks_
¶
Protected Static Attributes
-
ThreadLocal<std::vector<VectorPtr>>
tlsTempBufs_
¶
-
struct
Header
¶ - #include <Parameter.h>
file header structure
-
enum
-
typedef std::map<std::string, ParameterPtr>
-
namespace
paddle
Typedefs
-
typedef std::shared_ptr<ParallelParameter>
ParallelParameterPtr
¶
-
typedef void (
ParallelParameter
::*UpdateFunction
)(real learnRate)¶
-
typedef std::map<std::string, ParallelParameterPtr>
ParallelParameterMap
¶
Enums
-
enum
TrainerRole
¶ TrainRole denotes the role of current training, different roles have different jobs.
control, major, minor are three kinds of role to support mutiple GPUs parallel SGD training. SM on GPU card has two groups, each group consist of a major and a minor.
- Parameters
single
-single GPU card single thread training.
control
-current parameter updates via control role, not participate in real training. control role is responsible for merging all major’s gradient and update parameter value.
major
-major role paticipates in real training, when local gradient is ready, merge its corresponding minor’s gradient and notify controller: this group’s gradient is already ready.
minor
-minor role participates in real training, when local gradient is ready, only notify its corresponding major. In order to maximum apportion jobs, after controller updates the paramemter value, each group’s minior reponses to dispatch the latest model into local and major.
Values:
-
TRAINER_ROLE_SINGLE
¶
-
TRAINER_ROLE_CONTROL
¶
-
TRAINER_ROLE_MAJOR
¶
-
TRAINER_ROLE_MINOR
¶
-
TRAINER_ROLE_MASTER
¶
-
TRAINER_ROLE_SLAVE
¶
Variables
-
const int
UPDATE_TYPE_NUM
¶
-
class
ParallelParameter
¶ Subclassed by paddle::AsyncParameter, paddle::SyncParameter
Public Functions
-
ParallelParameter
(TrainerRole role, ParameterPtr localParam)¶
-
virtual
~ParallelParameter
()¶
-
ParameterPtr
getLocalParameter
()¶
-
bool
timeWaitGradReady
(int sec)¶
-
void
waitGradReady
()¶
-
void
postValueReady
()¶
-
void
syncUpdate
(TrainerRole role, real learnRate)¶
-
virtual void
synchronizeParamter
() = 0¶
-
virtual void
singleUpdate
(real learnRate)¶ for synchronous
-
virtual void
controlUpdate
(const UpdateCallback &callback)¶
-
virtual void
majorUpdate
(real learnRate)¶
-
virtual void
minorUpdate
(real learnRate)¶
-
virtual void
slaveUpdate
(real learnRate)¶ for asynchronous
Public Static Functions
-
ParallelParameterPtr
create
(TrainerRole role, ParameterPtr localParam, int asyncCount = 1)¶
-
-
class
SyncParameter
¶ - #include <ParallelParameter.h>
this class is designed for multi-threading training.
“Synchronous” means multiple GPUs calculate 1/4 mini-Batch, but will get only one gradient
Inherits from paddle::ParallelParameter
Public Functions
-
SyncParameter
(TrainerRole role, ParameterPtr localParam)¶
-
~SyncParameter
()¶
-
void
attachControlParam
(ParallelParameterPtr controler)¶
-
void
attachMajorParam
(ParallelParameterPtr partner)¶
-
void
attachMinorParam
(ParallelParameterPtr partner, int deviceId)¶
-
void
waitAllMajorGradReady
()¶
-
virtual void
synchronizeParamter
()¶
-
virtual void
singleUpdate
(real learnRate)¶ for synchronous
-
virtual void
controlUpdate
(const UpdateCallback &callback)¶
-
virtual void
majorUpdate
(real learnRate)¶
-
virtual void
minorUpdate
(real learnRate)¶
-
std::vector<ParallelParameterPtr> &
getMajorPartners
()¶
-
std::vector<ParallelParameterPtr> &
getMinorPartners
()¶
Private Members
-
ParameterPtr
partnerParam_
¶
-
std::vector<ParallelParameterPtr>
majorPartners_
¶
-
std::vector<ParallelParameterPtr>
minorPartners_
¶
-
std::vector<int>
minorDeviceIds_
¶
-
ParallelParameterPtr
controlParam_
¶
-
-
class
AsyncParameter
¶ Inherits from paddle::ParallelParameter
Public Functions
-
AsyncParameter
(TrainerRole role, int asyncCount, ParameterPtr localParam)¶
-
void
clearCounter
()¶
-
virtual void
synchronizeParamter
()¶
-
virtual void
slaveUpdate
(real learnRate)¶ When asynchronous training, update strategy including slave and master.
slave: If in range asyncCount, adopting self-update method. If beyond asyncCount, waiting for master to update.
-
bool
masterUpdate
(ParallelParameterPtr slaveParam, const UpdateCallback &callback)¶ When asynchronous training, update strategy including slave and master.
master: it only polls slaves, do not training data. If slave’s gradient is ready, fetch it. Update master’s parameter, then copy it into corresponding slave.
-
-
typedef std::shared_ptr<ParallelParameter>