提交 66be6fed 编写于 作者: Q qijun

add some source code comments

ISSUE=4592951

git-svn-id: https://svn.baidu.com/idl/trunk/paddle@1447 1ad973e4-5ce8-4261-8a94-b56d1f490c56
上级 ff496cd4
Activations Activations
============= =============
.. doxygenfile:: paddle/gserver/activations/ActivationFunction.h .. doxygenclass:: paddle::ActivationFunction
.. doxygenfile:: paddle/gserver/activations/ActivationFunction.cpp :members:
...@@ -28,8 +28,17 @@ limitations under the License. */ ...@@ -28,8 +28,17 @@ limitations under the License. */
namespace paddle { namespace paddle {
static ClassRegistrar<ActivationFunction> gActivationRegistrar; static ClassRegistrar<ActivationFunction> gActivationRegistrar;
/**
* @def ACTIVATION_CLASS_NAME
* @brief Macro for getting derived activation class name
* @note ACTIVATION_CLASS_NAME(softmax) softmax_;
* means softmaxActivation softmax_;
*/
#define ACTIVATION_CLASS_NAME(ACTIVATION_NAME) ACTIVATION_NAME##Activation #define ACTIVATION_CLASS_NAME(ACTIVATION_NAME) ACTIVATION_NAME##Activation
/**
* @def BEGIN_DEFINE_ACTIVATION
* @brief Macro for defining a devried activation class
*/
#define BEGIN_DEFINE_ACTIVATION(ACTIVATION_NAME) \ #define BEGIN_DEFINE_ACTIVATION(ACTIVATION_NAME) \
class ACTIVATION_CLASS_NAME(ACTIVATION_NAME) : public ActivationFunction { \ class ACTIVATION_CLASS_NAME(ACTIVATION_NAME) : public ActivationFunction { \
private: \ private: \
...@@ -37,7 +46,10 @@ static ClassRegistrar<ActivationFunction> gActivationRegistrar; ...@@ -37,7 +46,10 @@ static ClassRegistrar<ActivationFunction> gActivationRegistrar;
\ \
public: \ public: \
const std::string& getName() const { return name; } const std::string& getName() const { return name; }
/**
* @def END_DEFINE_ACTIVATION
* @brief Macro for registering a derived activation class
*/
#define END_DEFINE_ACTIVATION(ACTIVATION_NAME) \ #define END_DEFINE_ACTIVATION(ACTIVATION_NAME) \
}; \ }; \
const std::string ACTIVATION_CLASS_NAME(ACTIVATION_NAME)::name = \ const std::string ACTIVATION_CLASS_NAME(ACTIVATION_NAME)::name = \
...@@ -66,9 +78,10 @@ static InitFunction __reg_activation__identity([] { ...@@ -66,9 +78,10 @@ static InitFunction __reg_activation__identity([] {
}); });
/** /**
* SigmoidActivation * @brief Sigmoid Activation
* * \f[
* f(z) = \frac{1}{1+exp(-z)} * f(z) = \frac{1}{1+exp(-z)}
* \f]
*/ */
BEGIN_DEFINE_ACTIVATION(sigmoid) BEGIN_DEFINE_ACTIVATION(sigmoid)
void forward(Argument& act) { act.value->sigmoid(*act.value); } void forward(Argument& act) { act.value->sigmoid(*act.value); }
...@@ -76,8 +89,10 @@ void backward(Argument& act) { act.grad->sigmoidDerivative(*act.value); } ...@@ -76,8 +89,10 @@ void backward(Argument& act) { act.grad->sigmoidDerivative(*act.value); }
END_DEFINE_ACTIVATION(sigmoid) END_DEFINE_ACTIVATION(sigmoid)
/** /**
* Do Softmax activation for all sample. * @brief Softmax Activation
* \f[
* P(y=j|x) = \frac{e^{x^Tw_j}}{\sum^K_{k=1}e^{x^Tw_k}} * P(y=j|x) = \frac{e^{x^Tw_j}}{\sum^K_{k=1}e^{x^Tw_k}}
* \f]
*/ */
BEGIN_DEFINE_ACTIVATION(softmax) BEGIN_DEFINE_ACTIVATION(softmax)
private: private:
...@@ -115,8 +130,12 @@ void backward(Argument& act) { ...@@ -115,8 +130,12 @@ void backward(Argument& act) {
} }
END_DEFINE_ACTIVATION(softmax) END_DEFINE_ACTIVATION(softmax)
/// Softmax on all frames of one sequence.
/// Width of frame must be one. /**
* @brief Sequence_softmax Activation
* @note Softmax on all frames of one sequence.
* Width of frame must be one.
*/
BEGIN_DEFINE_ACTIVATION(sequence_softmax) BEGIN_DEFINE_ACTIVATION(sequence_softmax)
private: private:
ACTIVATION_CLASS_NAME(softmax) softmax_; ACTIVATION_CLASS_NAME(softmax) softmax_;
...@@ -156,8 +175,7 @@ void backward(Argument& act) { ...@@ -156,8 +175,7 @@ void backward(Argument& act) {
END_DEFINE_ACTIVATION(sequence_softmax) END_DEFINE_ACTIVATION(sequence_softmax)
/** /**
* Relu Activation. * @brief Relu Activation.
*
* forward. y = max(0, z) * forward. y = max(0, z)
* *
* derivative of relu is: * derivative of relu is:
...@@ -173,7 +191,7 @@ void backward(Argument& act) { act.grad->reluDerivative(*act.value); } ...@@ -173,7 +191,7 @@ void backward(Argument& act) { act.grad->reluDerivative(*act.value); }
END_DEFINE_ACTIVATION(relu) END_DEFINE_ACTIVATION(relu)
/** /**
* BRelu Activation. * @brief BRelu Activation.
* *
* forward. y = min(24, max(0, z)) * forward. y = min(24, max(0, z))
* *
...@@ -192,9 +210,10 @@ void backward(Argument& act) { act.grad->breluDerivative(*act.value); } ...@@ -192,9 +210,10 @@ void backward(Argument& act) { act.grad->breluDerivative(*act.value); }
END_DEFINE_ACTIVATION(brelu) END_DEFINE_ACTIVATION(brelu)
/** /**
* tanh activation. * @brief Tanh Activation.
* * \f[
* f(z) = tanh(z)=\frac{e^z-e^{-z}}{e^z+e^{-z}} * f(z) = tanh(z)=\frac{e^z-e^{-z}}{e^z+e^{-z}}
* \f]
*/ */
BEGIN_DEFINE_ACTIVATION(tanh) BEGIN_DEFINE_ACTIVATION(tanh)
void forward(Argument& act) { act.value->tanh(*act.value); } void forward(Argument& act) { act.value->tanh(*act.value); }
...@@ -203,9 +222,10 @@ void backward(Argument& act) { act.grad->tanhDerivative(*act.value); } ...@@ -203,9 +222,10 @@ void backward(Argument& act) { act.grad->tanhDerivative(*act.value); }
END_DEFINE_ACTIVATION(tanh) END_DEFINE_ACTIVATION(tanh)
/** /**
* Scaled Tanh Activation * @brief Scaled Tanh Activation
* * \f[
* f(z) = 1.7159 * tanh(2/3*z) * f(z) = 1.7159 * tanh(2/3*z)
* \f]
*/ */
BEGIN_DEFINE_ACTIVATION(stanh) BEGIN_DEFINE_ACTIVATION(stanh)
private: private:
...@@ -221,9 +241,10 @@ void backward(Argument& act) { ...@@ -221,9 +241,10 @@ void backward(Argument& act) {
END_DEFINE_ACTIVATION(stanh) END_DEFINE_ACTIVATION(stanh)
/** /**
* Soft relu activation. * @brief Soft Relu Activation.
* * \f[
* f(z) = ln(1+e^z) * f(z) = ln(1+e^z)
* \f]
*/ */
BEGIN_DEFINE_ACTIVATION(softrelu) BEGIN_DEFINE_ACTIVATION(softrelu)
void forward(Argument& act) { act.value->softrelu(*act.value); } void forward(Argument& act) { act.value->softrelu(*act.value); }
...@@ -232,8 +253,7 @@ void backward(Argument& act) { act.grad->softreluDerivative(*act.value); } ...@@ -232,8 +253,7 @@ void backward(Argument& act) { act.grad->softreluDerivative(*act.value); }
END_DEFINE_ACTIVATION(softrelu) END_DEFINE_ACTIVATION(softrelu)
/** /**
* Abs Activation. * @brief Abs Activation.
*
* Forward: f(z) = abs(z) * Forward: f(z) = abs(z)
* *
* Derivative: * Derivative:
...@@ -258,9 +278,10 @@ void backward(Argument& act) { act.grad->absDerivative(*act.in); } ...@@ -258,9 +278,10 @@ void backward(Argument& act) { act.grad->absDerivative(*act.in); }
END_DEFINE_ACTIVATION(abs) END_DEFINE_ACTIVATION(abs)
/** /**
* Square Activation. * @brief Square Activation.
* * \f[
* f(z) = z^2. * f(z) = z^2.
* \f]
*/ */
BEGIN_DEFINE_ACTIVATION(square) BEGIN_DEFINE_ACTIVATION(square)
void forward(Argument& act) { void forward(Argument& act) {
...@@ -274,7 +295,12 @@ void forward(Argument& act) { ...@@ -274,7 +295,12 @@ void forward(Argument& act) {
void backward(Argument& act) { act.grad->squareDerivative(*act.in); } void backward(Argument& act) { act.grad->squareDerivative(*act.in); }
END_DEFINE_ACTIVATION(square) END_DEFINE_ACTIVATION(square)
/**
* @brief Exponential Activation.
* \f[
* f(z) = e^z
* \f]
*/
BEGIN_DEFINE_ACTIVATION(exponential) BEGIN_DEFINE_ACTIVATION(exponential)
void forward(Argument& act) { act.value->exp(*act.value); } void forward(Argument& act) { act.value->exp(*act.value); }
......
...@@ -17,7 +17,18 @@ limitations under the License. */ ...@@ -17,7 +17,18 @@ limitations under the License. */
#include <string> #include <string>
namespace paddle { namespace paddle {
struct Argument; struct Argument;
/**
* @brief Activation function is a function that transforms a set of input
* signals into an output signals. The purpose of the activation function
* is to introduce non-liearilty into the network.
*
* @note Common activation function are provieded, including linear,
* sigmoid, softmax, sequence_max, relu, brelu, tanh, stanh,
* softrelu, abs, square, exponential.
*
*/
class ActivationFunction { class ActivationFunction {
public: public:
static ActivationFunction* create(const std::string& type); static ActivationFunction* create(const std::string& type);
...@@ -26,16 +37,25 @@ public: ...@@ -26,16 +37,25 @@ public:
virtual ~ActivationFunction() {} virtual ~ActivationFunction() {}
// act.value <- f(act.value), /**
// where f is the activation function. * @brief Foward propagation
// Suppose that before calling forward(), act.value is x and *
// after forward() is called, act.value is y, then y = f(x), * act.value <- f(act.value),
// Usually, act is Layer::output_ * where f is the activation function.
* Suppose that before calling forward(), act.value is x and
* after forward() is called, act.value is y, then y = f(x).
*
* Usually, act is Layer::output_
*/
virtual void forward(Argument& act) = 0; virtual void forward(Argument& act) = 0;
// x and y are defined in the above comment for forward(). /**
// Before calling backward(), act.grad = dE / dy, where E is the error/cost. * @brief Backward propagaion
// After backward() returns, act.grad = dE / dx = (dE/dy) * (dy/dx) *
* x and y are defined in the above comment for forward().
* - Before calling backward(), act.grad = dE / dy, where E is the error/cost
* - After backward() returns, act.grad = dE / dx = (dE/dy) * (dy/dx)
*/
virtual void backward(Argument& act) = 0; virtual void backward(Argument& act) = 0;
virtual const std::string& getName() const = 0; virtual const std::string& getName() const = 0;
......
...@@ -41,7 +41,8 @@ limitations under the License. */ ...@@ -41,7 +41,8 @@ limitations under the License. */
namespace paddle { namespace paddle {
/** /**
* @brief Macro for registering a data provider. * @def REGISTER_DATA_PROVIDER
* @brief Macro for registering a data provider
*/ */
#define REGISTER_DATA_PROVIDER(__type_name, __class_name) \ #define REGISTER_DATA_PROVIDER(__type_name, __class_name) \
static InitFunction __reg_type_##__type_name([]() { \ static InitFunction __reg_type_##__type_name([]() { \
...@@ -52,37 +53,68 @@ class DataBatch; ...@@ -52,37 +53,68 @@ class DataBatch;
class BufferBatch; class BufferBatch;
typedef std::shared_ptr<DataBatch> DataBatchPtr; typedef std::shared_ptr<DataBatch> DataBatchPtr;
typedef std::shared_ptr<BufferBatch> BufferBatchPtr; typedef std::shared_ptr<BufferBatch> BufferBatchPtr;
/**
* @brief Data for batch training a neural network
*/
class DataBatch { class DataBatch {
public: public:
DataBatch() : size_(0) { data_.clear(); } DataBatch() : size_(0) { data_.clear(); }
/**
* @brief Get batch size
* @return batch size
*/
int64_t getSize() const { return size_; } int64_t getSize() const { return size_; }
/**
* @brief Get num of sequences of sequence data
* @return num of sequences
*/
int64_t getNumSequences() const { int64_t getNumSequences() const {
if (data_.empty()) return size_; if (data_.empty()) return size_;
return data_[0].sequenceStartPositions return data_[0].sequenceStartPositions
? data_[0].sequenceStartPositions->getSize() - 1 ? data_[0].sequenceStartPositions->getSize() - 1
: size_; : size_;
} }
/**
* @brief Set batch size
* @param[in] size size
*/
void setSize(int64_t size) { size_ = size; } void setSize(int64_t size) { size_ = size; }
/**
* @brief Get size of argument vector
* @return size of argument vector
* @note For usual supervised learning, input data and label is needed,
* then there will be two argument.
*/
int64_t getNumStreams() const { return data_.size(); } int64_t getNumStreams() const { return data_.size(); }
/**
* @brief Get a argument with index i
* @param[in] i index in argument vector
* @return a argument with index i
*/
const Argument& getStream(int i) const { return data_[i]; } const Argument& getStream(int i) const { return data_[i]; }
/**
* @brief Get all argument
* @return an argument vector
*/
std::vector<Argument>& getStreams() { return data_; } std::vector<Argument>& getStreams() { return data_; }
/**
* @brief Get all argument const
* @return an argument vector
*/
std::vector<Argument> getStreams() const { return data_; } std::vector<Argument> getStreams() const { return data_; }
/**
* @brief Clear DataBatch
*/
void clear() { void clear() {
data_.clear(); data_.clear();
size_ = 0; size_ = 0;
} }
/** /**
* The order in which each data stream is appended must match the order * @brief Append data to DataBatch
* @param[in] data matrix data
* @note The order in which each data stream is appended must match the order
* specified in stream_names of DataConfig. The stream_names can be obtained * specified in stream_names of DataConfig. The stream_names can be obtained
* using DataProvider::getStreamNames(). * using DataProvider::getStreamNames().
*/ */
...@@ -93,7 +125,10 @@ public: ...@@ -93,7 +125,10 @@ public:
} }
/** /**
* The order in which each data stream is appended must match the order * @brief Append sequence data to DataBatch
* @param[in] data matrix data
* @param[in] sequenceStartPositions sequence data
* @note The order in which each data stream is appended must match the order
* specified in stream_names of DataConfig. The stream_names can be obtained * specified in stream_names of DataConfig. The stream_names can be obtained
* using DataProvider::getStreamNames(). * using DataProvider::getStreamNames().
*/ */
...@@ -104,24 +139,32 @@ public: ...@@ -104,24 +139,32 @@ public:
argu.sequenceStartPositions = sequenceStartPositions; argu.sequenceStartPositions = sequenceStartPositions;
data_.push_back(argu); data_.push_back(argu);
} }
/**
* @brief Append label data
* @param[in] label label data
* @param[in] value matrix data, default null
*/
void appendLabel(IVectorPtr label, MatrixPtr value = nullptr) { void appendLabel(IVectorPtr label, MatrixPtr value = nullptr) {
Argument argu; Argument argu;
argu.ids = label; argu.ids = label;
argu.value = value; argu.value = value;
data_.push_back(argu); data_.push_back(argu);
} }
/**
* @brief Append user defined data
* @param[in] ptr user defined data
*/
void appendUserDefinedPtr(UserDefinedVectorPtr ptr) { void appendUserDefinedPtr(UserDefinedVectorPtr ptr) {
Argument argu; Argument argu;
argu.udp = ptr; argu.udp = ptr;
data_.push_back(argu); data_.push_back(argu);
} }
/** /*
* @param argus: DataBatch.getStreams() * @brief Append argument
* @param size: DataBatch.getSize() * @param[in] argus DataBatch.getStreams()
* @param dataId: sub dataprovider id (in MultiDataProvider) * @param[in] size DataBatch.getSize()
* @param[in] dataId sub dataprovider id (in MultiDataProvider)
*/ */
void appendArguments(const std::vector<Argument>& argus, int size, void appendArguments(const std::vector<Argument>& argus, int size,
int dataId) { int dataId) {
...@@ -133,7 +176,14 @@ public: ...@@ -133,7 +176,14 @@ public:
} }
protected: protected:
/**
* @brief batch size
*/
int64_t size_; int64_t size_;
/**
* @brief A batch data consist of a Argument vector,
* An argument corresponds to a type of input data.
*/
std::vector<Argument> data_; std::vector<Argument> data_;
}; };
...@@ -228,8 +278,8 @@ protected: ...@@ -228,8 +278,8 @@ protected:
}; };
/** /**
* DataProvider supplies data for training * @brief Base class for DataProvider, which supplies data for training
* It can supplies multiple streams of data. * @note It can supplies multiple streams of data.
* For typical supervised training, there are two streams: * For typical supervised training, there are two streams:
* one is for input, one is for label. * one is for input, one is for label.
*/ */
...@@ -253,16 +303,23 @@ public: ...@@ -253,16 +303,23 @@ public:
const DataConfig& getConfig() const { return config_; } const DataConfig& getConfig() const { return config_; }
void setSkipShuffle() { skipShuffle_ = true; } void setSkipShuffle() { skipShuffle_ = true; }
/**
* @brief Get next batch of training samples
* @param[in] size size of training samples to get
* @param[out] batch a batch of training samples
* @return actual size of obtained training samples
*/
int64_t getNextBatch(int64_t size, DataBatch* batch); int64_t getNextBatch(int64_t size, DataBatch* batch);
/** /**
* Shuffle the data set * @brief Shuffle the data set
*/ */
virtual void shuffle() = 0; virtual void shuffle() = 0;
/** /**
* reset() must be called before any calls to getNextBatch() * @brief reset all the value of index
* reset all the value of index * @note reset() must be called before any calls to getNextBatch()
* IMPORTANT: subclass reset() should always call the base class reset() * IMPORTANT: subclass reset() should always call the base class reset()
* at the end of the function * at the end of the function
*/ */
...@@ -274,10 +331,17 @@ public: ...@@ -274,10 +331,17 @@ public:
} }
/** /**
* return the number of training samples in the data set. * @brief Get the size of training samples
* return -1 to indicate unlimited number of samples. * @return the number of training samples in the data set.
* @note return -1 to indicate unlimited number of samples.
*/ */
virtual int64_t getSize() = 0; virtual int64_t getSize() = 0;
/**
* @brief Get next batch training samples internally
* @param[in] size size of training samples to get
* @param[out] batch a batch of training samples
* @return actual size of obtained training samples
*/
virtual int64_t getNextBatchInternal(int64_t size, DataBatch* batch) = 0; virtual int64_t getNextBatchInternal(int64_t size, DataBatch* batch) = 0;
...@@ -288,7 +352,12 @@ protected: ...@@ -288,7 +352,12 @@ protected:
bool useGpu_; bool useGpu_;
std::unique_ptr<DoubleBuffer> doubleBuffer_; std::unique_ptr<DoubleBuffer> doubleBuffer_;
ThreadLocal<std::vector<MatrixPtr>> constantSlots_; ThreadLocal<std::vector<MatrixPtr>> constantSlots_;
/**
* @@brief Get next batch training samples from buffer
* @param[in] size size of training samples to get
* @param[out] batch a batch of training samples
* @return actual size of obtained training samples
*/
int64_t getNextBatchFromBuffer(int64_t size, DataBatch* batch); int64_t getNextBatchFromBuffer(int64_t size, DataBatch* batch);
void initAsyncLoader(); void initAsyncLoader();
......
...@@ -26,8 +26,10 @@ limitations under the License. */ ...@@ -26,8 +26,10 @@ limitations under the License. */
namespace paddle { namespace paddle {
/** /**
* @brief Data file with each sample specified by proto message * @brief Provider data from protobuf data file with each sample
* DataSample defined in DataFormat.proto. * specified by proto message
*
* DataSample defined in DataFormat.proto.
* *
* The file format is * The file format is
* *
...@@ -68,19 +70,38 @@ public: ...@@ -68,19 +70,38 @@ public:
virtual int64_t getNextBatchInternal(int64_t size, DataBatch* batch); virtual int64_t getNextBatchInternal(int64_t size, DataBatch* batch);
protected: protected:
/**
* @brief load protobuf data from a list of file
* @param[in] fileName file name of a file which contains
* a list of file names
*/
void loadData(const std::string& fileName); void loadData(const std::string& fileName);
void loadDataFile(const std::string& fileName);
/**
* @brief load protobuf data from file
* @param[in] fileName data file name
*/
void loadDataFile(const std::string& fileName);
/** @brief check data header of each data sample
* @param[in] header data header read from protobuf data
*/
void checkDataHeader(const DataHeader& header); void checkDataHeader(const DataHeader& header);
/**
* @brief fill protobuf data into slot_,
* slot_ is a vector of ProtoSlot in memory.
* @param[in] sample data sample read from protobuf data
*/
void fillSlots(const DataSample& sample); void fillSlots(const DataSample& sample);
/** /**
* return true if each sample is one sequence, i.e., independent * @brief return true if each sample is one sequence, i.e., independent
* of other samples. * of other samples.
*/ */
inline bool iidData() const { return sequenceStartPositions_.empty(); } inline bool iidData() const { return sequenceStartPositions_.empty(); }
/// check that sample is consistent with header_ /**
* @brief check that sample is consistent with header_
*/
void checkSample(const DataSample& sample); void checkSample(const DataSample& sample);
template <class Op> template <class Op>
...@@ -129,20 +150,21 @@ protected: ...@@ -129,20 +150,21 @@ protected:
int64_t currentSequenceIndex_; int64_t currentSequenceIndex_;
/// The size should be the number of sequences. // The size should be the number of sequences.
std::vector<size_t> shuffledSequenceIds_; std::vector<size_t> shuffledSequenceIds_;
ThreadLocalD<DataBatch> cpuBatch_; ThreadLocalD<DataBatch> cpuBatch_;
ThreadLocalD<DataBatch> gpuBatch_; ThreadLocalD<DataBatch> gpuBatch_;
RWLock lock_; RWLock lock_;
// stats for number of none-zeros entries std::vector<StatPtr> nnzStats_; // stats for number of none-zeros entries
std::vector<StatPtr> nnzStats_;
}; };
/** /**
* Special use for Proto data: instances should contain sparse-non-value slots * @brief Special use for Proto data: instances should contain sparse-non-value slots
* and label. ProtoSequenceDataProvider treats each SPARSE SLOT as a SEQUENCE * and label.
*
* @note ProtoSequenceDataProvider treats each SPARSE SLOT as a SEQUENCE
*/ */
class ProtoSequenceDataProvider : public ProtoDataProvider { class ProtoSequenceDataProvider : public ProtoDataProvider {
public: public:
......
...@@ -33,7 +33,11 @@ void Evaluator::eval(const NeuralNetwork& nn) { ...@@ -33,7 +33,11 @@ void Evaluator::eval(const NeuralNetwork& nn) {
totalScore_ += score; totalScore_ += score;
updateSamplesNum(arguments); updateSamplesNum(arguments);
} }
/**
* @brief classification error Evaluator
*
* The config file api is classification_error_evaluator.
*/
class ClassificationErrorEvaluator : public Evaluator { class ClassificationErrorEvaluator : public Evaluator {
public: public:
virtual void updateSamplesNum(const std::vector<Argument>& arguments) { virtual void updateSamplesNum(const std::vector<Argument>& arguments) {
...@@ -99,8 +103,11 @@ public: ...@@ -99,8 +103,11 @@ public:
} }
}; };
// sequence level classification error stats: /**
// if any frame in one sequence has error, the sequence is error * @brief sequence classification error Evaluator
* @note sequence level classification error stats,
* if any frame in one sequence has error, the sequence is error
*/
class SequenceClassificationErrorEvaluator class SequenceClassificationErrorEvaluator
: public ClassificationErrorEvaluator { : public ClassificationErrorEvaluator {
public: public:
...@@ -135,7 +142,12 @@ public: ...@@ -135,7 +142,12 @@ public:
}; };
REGISTER_EVALUATOR(seq_classification_error, REGISTER_EVALUATOR(seq_classification_error,
SequenceClassificationErrorEvaluator); SequenceClassificationErrorEvaluator);
/**
* @brief sum Evaluator
* Calculate the sum of output or label
*
* The config file api is sum_evaluator.
*/
class SumEvaluator : public Evaluator { class SumEvaluator : public Evaluator {
public: public:
SumEvaluator() : cpuLabel_(nullptr), cpuWeight_(nullptr) {} SumEvaluator() : cpuLabel_(nullptr), cpuWeight_(nullptr) {}
...@@ -218,13 +230,18 @@ private: ...@@ -218,13 +230,18 @@ private:
IVectorPtr cpuLabel_; IVectorPtr cpuLabel_;
MatrixPtr cpuWeight_; MatrixPtr cpuWeight_;
}; };
/**
* @brief column sum Evaluator
* @note column sum for the colIdx-th column *
* - colIdx = 0: the 0-th column.
* - colIdx > 0: the colIdx-th column.
* - colIdx < 0: the last colIdx-th column.
*
* The config file api is column_sum_evaluator.
*
*/
class ColumnSumEvaluator : public Evaluator { class ColumnSumEvaluator : public Evaluator {
public: public:
// column sum for the colIdx-th column
// colIdx = 0: the 0-th column
// > 0: the colIdx-th column
// < 0: the last colIdx-th column
explicit ColumnSumEvaluator(int32_t colIdx) explicit ColumnSumEvaluator(int32_t colIdx)
: colIdx_(colIdx), colNum_(0), sum_(nullptr) {} : colIdx_(colIdx), colNum_(0), sum_(nullptr) {}
...@@ -845,7 +862,11 @@ Evaluator* Evaluator::create(const EvaluatorConfig& config) { ...@@ -845,7 +862,11 @@ Evaluator* Evaluator::create(const EvaluatorConfig& config) {
evaluator->init(config); evaluator->init(config);
return evaluator; return evaluator;
} }
/**
* @brief print value of each layer.
*
* The config file api is value_printer_evaluator.
*/
class ValuePrinter : public Evaluator { class ValuePrinter : public Evaluator {
public: public:
ValuePrinter() {} ValuePrinter() {}
...@@ -882,7 +903,11 @@ public: ...@@ -882,7 +903,11 @@ public:
virtual real evalImp(std::vector<Argument>& arguments) { return 0; } virtual real evalImp(std::vector<Argument>& arguments) { return 0; }
}; };
REGISTER_EVALUATOR(value_printer, ValuePrinter); REGISTER_EVALUATOR(value_printer, ValuePrinter);
/**
* @brief print gradient of each layer.
*
* The config file api is gradient_printer_evaluator.
*/
class GradientPrinter : public Evaluator { class GradientPrinter : public Evaluator {
public: public:
GradientPrinter() {} GradientPrinter() {}
...@@ -908,7 +933,11 @@ public: ...@@ -908,7 +933,11 @@ public:
virtual real evalImp(std::vector<Argument>& arguments) { return 0; } virtual real evalImp(std::vector<Argument>& arguments) { return 0; }
}; };
REGISTER_EVALUATOR(gradient_printer, GradientPrinter); REGISTER_EVALUATOR(gradient_printer, GradientPrinter);
/**
* @brief print row max id vctor of each layer
*
* The config file api is maxid_printer_evaluator.
*/
class MaxIdPrinter : public Evaluator { class MaxIdPrinter : public Evaluator {
private: private:
IVectorPtr maxIds_; IVectorPtr maxIds_;
...@@ -946,7 +975,11 @@ public: ...@@ -946,7 +975,11 @@ public:
virtual real evalImp(std::vector<Argument>& arguments) { return 0; } virtual real evalImp(std::vector<Argument>& arguments) { return 0; }
}; };
REGISTER_EVALUATOR(max_id_printer, MaxIdPrinter); REGISTER_EVALUATOR(max_id_printer, MaxIdPrinter);
/**
* @brief print sequence max frames of each layer
*
* The config file api is maxframe_printer_evaluator.
*/
class MaxFramePrinter : public Evaluator { class MaxFramePrinter : public Evaluator {
private: private:
IVectorPtr maxIds_; IVectorPtr maxIds_;
...@@ -998,30 +1031,29 @@ public: ...@@ -998,30 +1031,29 @@ public:
REGISTER_EVALUATOR(max_frame_printer, MaxFramePrinter); REGISTER_EVALUATOR(max_frame_printer, MaxFramePrinter);
/** /**
* Sequence text printer will print text according to index matrix and a * @brief print text according to index matrix and a dictionary.
* dictionary. There can be multiple input to this layer:
* *
* 1) If there is only one input, the input must be a matrix containing * There can be multiple input to this layer:
* - If there is only one input, the input must be a matrix containing
* the sequence of indices; * the sequence of indices;
* * - If there are more than one input, the first input should be ids,
* 2) If there are more than one input, the first input should be ids,
* and are interpreted as sample ids. * and are interpreted as sample ids.
* *
* The output format will be: * The output format will be:
* *
* 1) sequence without sub-sequence, and there is probability. * - sequence without sub-sequence, and there is probability.
* *
* @code * @code
* id \t prob space_seperated_tokens_from_dictionary_according_to_seq * id \t prob space_seperated_tokens_from_dictionary_according_to_seq
* @endcode * @endcode
* *
* 2) sequence without sub-sequence, and there is not probability. * - sequence without sub-sequence, and there is not probability.
* *
* @code * @code
* id \t space_seperated_tokens_from_dictionary_according_to_seq * id \t space_seperated_tokens_from_dictionary_according_to_seq
* @endcode * @endcode
* *
* 3) sequence with sub-sequence, and there is not probability. * - sequence with sub-sequence, and there is not probability.
* *
* @code * @code
* id \t space_seperated_tokens_from_dictionary_according_to_sub_seq * id \t space_seperated_tokens_from_dictionary_according_to_sub_seq
...@@ -1032,6 +1064,8 @@ REGISTER_EVALUATOR(max_frame_printer, MaxFramePrinter); ...@@ -1032,6 +1064,8 @@ REGISTER_EVALUATOR(max_frame_printer, MaxFramePrinter);
* Typically SequenceTextPrinter layer takes output of maxid or RecurrentGroup * Typically SequenceTextPrinter layer takes output of maxid or RecurrentGroup
* with maxid (when generating) as an input. * with maxid (when generating) as an input.
* *
* The config file api is seqtext_printer_evaluator.
*
*/ */
class SequenceTextPrinter : public Evaluator { class SequenceTextPrinter : public Evaluator {
private: private:
...@@ -1172,7 +1206,11 @@ public: ...@@ -1172,7 +1206,11 @@ public:
} }
}; };
REGISTER_EVALUATOR(seq_text_printer, SequenceTextPrinter); REGISTER_EVALUATOR(seq_text_printer, SequenceTextPrinter);
/**
* @brief print classification error.
*
* The config file api is classification_error_printer_evaluator.
*/
class ClassificationErrorPrinter : public ClassificationErrorEvaluator { class ClassificationErrorPrinter : public ClassificationErrorEvaluator {
public: public:
virtual void updateSamplesNum(const std::vector<Argument>& arguments) {} virtual void updateSamplesNum(const std::vector<Argument>& arguments) {}
......
...@@ -24,12 +24,21 @@ limitations under the License. */ ...@@ -24,12 +24,21 @@ limitations under the License. */
namespace paddle { namespace paddle {
class NeuralNetwork; class NeuralNetwork;
/**
* @def REGISTER_EVALUATOR
* @brief Macro for registering evaluator class
*/
#define REGISTER_EVALUATOR(__type_name, __class_name) \ #define REGISTER_EVALUATOR(__type_name, __class_name) \
static InitFunction __reg_type_##__type_name([]() { \ static InitFunction __reg_type_##__type_name([]() { \
Evaluator::registrar_.registerClass<__class_name>(#__type_name); \ Evaluator::registrar_.registerClass<__class_name>(#__type_name); \
}) })
/**
* @brief Base class for Evaluator
* Evaluating the performance of a model is very important.
* It indicates how successful the scores(predictions) of a datasets
* has been by a trained model.
*/
class Evaluator { class Evaluator {
public: public:
static Evaluator* create(const EvaluatorConfig& config); static Evaluator* create(const EvaluatorConfig& config);
...@@ -41,7 +50,7 @@ public: ...@@ -41,7 +50,7 @@ public:
virtual void init(const EvaluatorConfig& config) { config_ = config; } virtual void init(const EvaluatorConfig& config) { config_ = config; }
/** /**
* start to evaluate some data * @brief start to evaluate some data
*/ */
virtual void start() { virtual void start() {
numSamples_ = 0; numSamples_ = 0;
...@@ -49,20 +58,21 @@ public: ...@@ -49,20 +58,21 @@ public:
} }
/** /**
* Process a batch of data. * @brief Process a batch of data.
*/ */
virtual void eval(const NeuralNetwork& nn); virtual void eval(const NeuralNetwork& nn);
/** /**
* Process a batch of data. * @brief Process a batch of data.
* return the score for the batch if it make sense to sum the score across * @return the score for the batch if it make sense to sum the score across
* batches. Otherwise evaluator should return 0 and override finish() and * batches.
* @note Otherwise evaluator should return 0 and override finish() and
* printStats() to do the right calculation. * printStats() to do the right calculation.
*/ */
virtual real evalImp(std::vector<Argument>& arguments) = 0; virtual real evalImp(std::vector<Argument>& arguments) = 0;
/** /**
* Update the number of processed samples * @brief Update the number of processed samples
*/ */
virtual void updateSamplesNum(const std::vector<Argument>& arguments) { virtual void updateSamplesNum(const std::vector<Argument>& arguments) {
numSamples_ += arguments[0].getBatchSize(); numSamples_ += arguments[0].getBatchSize();
...@@ -81,11 +91,14 @@ public: ...@@ -81,11 +91,14 @@ public:
} }
/** /**
* finish the evaluation. * @brief finish the evaluation.
*/ */
virtual void finish() {} virtual void finish() {}
/// finish() should be called before printStats /**
* @brief print the statistics of evaluate result
* @note finish() should be called before printStats
*/
virtual void printStats(std::ostream& os) { virtual void printStats(std::ostream& os) {
os << config_.name() << "=" os << config_.name() << "="
<< (numSamples_ ? totalScore_ / numSamples_ : 0); << (numSamples_ ? totalScore_ / numSamples_ : 0);
...@@ -124,17 +137,23 @@ public: ...@@ -124,17 +137,23 @@ public:
virtual void finish() {} virtual void finish() {}
virtual void printStats(std::ostream&) {} virtual void printStats(std::ostream&) {}
}; };
/**
* @brief evaluate AUC using colIdx-th column as prediction.
* The AUC(Area Under the Curve) is a common evaluation metric
* for binary classification problems. It computes the area under
* the receiver operating characteristic(ROC) curve.
*
* @note colIdx-th column
*
* - colIdx = 0: the 0-th column.
* - colIdx > 0: the colIdx-th column.
* - colIdx < 0: the last colIdx-th column.
*
* The config file api is auc_evaluator.
*
*/
class AucEvaluator : public Evaluator { class AucEvaluator : public Evaluator {
public: public:
/**
* @brief evaluate AUC using colIdx-th column as prediction.
*
* - colIdx = 0: the 0-th column.
* - colIdx > 0: the colIdx-th column.
* - colIdx < 0: the last colIdx-th column.
*
*/
AucEvaluator(int32_t colIdx) AucEvaluator(int32_t colIdx)
: colIdx_(colIdx), : colIdx_(colIdx),
realColumnIdx_(0), realColumnIdx_(0),
...@@ -174,13 +193,11 @@ private: ...@@ -174,13 +193,11 @@ private:
}; };
/** /**
* @brief RankAucEvaluator calculates the AUC of each list * @brief RankAucEvaluator calculates the AUC of each list (i.e., titles
* (i.e., titles under the same query), and averages them. * under the same query), and averages them. Each list should be organized
* * as a sequence. The inputs of this evaluator is [output, click, pv]. If pv
* Each list should be organized as a sequence. * is not provided, it will be set to 1. The types of click and pv are
* The inputs of this evaluator is [output, click, pv]. * dense value.
* If pv is not provided, it will be set to 1.
* The types of click and pv are dense value.
*/ */
class RankAucEvaluator : public Evaluator { class RankAucEvaluator : public Evaluator {
public: public:
...@@ -204,7 +221,16 @@ private: ...@@ -204,7 +221,16 @@ private:
double calcRankAuc(real* outputData, real* clickData, real* pvData, double calcRankAuc(real* outputData, real* clickData, real* pvData,
size_t size); size_t size);
}; };
/**
* @brief precision, recall and f1 score Evaluator
* \f[
* precision = \frac{tp}{tp+tn} \\
* recall=\frac{tp}{tp+fn} \\
* f1=2*\frac{precsion*recall}{precision+recall}
* \f]
*
* The config file api is precision_recall_evaluator.
*/
class PrecisionRecallEvaluator : public Evaluator { class PrecisionRecallEvaluator : public Evaluator {
public: public:
// Evaluate precision, recall and F1 score // Evaluate precision, recall and F1 score
...@@ -274,8 +300,10 @@ private: ...@@ -274,8 +300,10 @@ private:
} }
}; };
/** /*
* Positive-negative pair rate Evaluator * @brief positive-negative pair rate Evaluator
*
* The config file api is pnpair_evaluator.
*/ */
class PnpairEvaluator : public Evaluator { class PnpairEvaluator : public Evaluator {
public: public:
......
...@@ -20,7 +20,19 @@ limitations under the License. */ ...@@ -20,7 +20,19 @@ limitations under the License. */
#include "paddle/utils/ThreadLocal.h" #include "paddle/utils/ThreadLocal.h"
namespace paddle { namespace paddle {
/**
* @brief A layer for calculating cosine similarity between two vector
* \f[
* f(x,y)=scale\frac{x_1y_1+x_2y_2+...+x_ny_n}{\sqrt{x_1^2+x_2^2+...
* +x_n^2}\sqrt{y_1^2+y_2^2+...+y_n^2}}
* \f]
*
* - Input1: A vector (batchSize * dataDim) *
* - Input2: A vector (batchSize * dataDim) or (1 * dataDim) *
* - Output: A vector (dataDim * 1)
*
* The config file api is cos_sim.
*/
class CosSimLayer : public Layer { class CosSimLayer : public Layer {
public: public:
explicit CosSimLayer(const LayerConfig& config) explicit CosSimLayer(const LayerConfig& config)
......
...@@ -21,13 +21,16 @@ limitations under the License. */ ...@@ -21,13 +21,16 @@ limitations under the License. */
namespace paddle { namespace paddle {
/** /**
* A layer for computing cosine similarity between a vector an each row of a * @brief A layer for computing cosine similarity between a vector
* matrix, * and each row of a matrix
* out[i] = cos_scale * cos(in1, in2(i,:)); * out[i] = cos_scale * cos(in1, in2(i,:));
* which is used in NEURAL TURING MACHINE * @note used in NEURAL TURING MACHINE
* Input: a vector (batchSize x dataDim) and a matrix in vec form (batchSize x *
* (weightDim*dataDim)) * Input1: a vector (batchSize * dataDim)
* Output: a vector (batchSize x weightDim) *
* Input2: a matrix in vector form (batchSize * (weightDim*dataDim))
*
* Output: a vector (batchSize * weightDim)
*/ */
class CosSimVecMatLayer : public Layer { class CosSimVecMatLayer : public Layer {
......
...@@ -22,18 +22,18 @@ limitations under the License. */ ...@@ -22,18 +22,18 @@ limitations under the License. */
namespace paddle { namespace paddle {
/** /**
* A layer for data normalization * @brief A layer for data normalization
* Input: One and only one input layer is accepted. The input layer must * - Input: One and only one input layer is accepted. The input layer must
* be DataLayer with dense data type. * be DataLayer with dense data type.
* Output: The normalization of the input data * - Output: The normalization of the input data
* *
* Reference: * Reference:
* LA Shalabi, Z Shaaban, B Kasasbeh. Data mining: A preprocessing engine * LA Shalabi, Z Shaaban, B Kasasbeh. Data mining: A preprocessing engine
* *
* Three data normalization methoeds are considered * Three data normalization methoeds are considered
* z-score: y = (x-mean)/std * - z-score: y = (x-mean)/std
* min-max: y = (x-min)/(max-min) * - min-max: y = (x-min)/(max-min)
* decimal-scaling: y = x/10^j, where j is the smallest integer such that * - decimal-scaling: y = x/10^j, where j is the smallest integer such that
*max(|y|)<1 *max(|y|)<1
*/ */
......
...@@ -23,8 +23,9 @@ limitations under the License. */ ...@@ -23,8 +23,9 @@ limitations under the License. */
namespace paddle { namespace paddle {
/** /**
* @brief basic parent layer of normalization * @brief Basic parent layer of normalization
* Normalize the input in local region *
* @note Normalize the input in local region
*/ */
class NormLayer : public Layer { class NormLayer : public Layer {
public: public:
...@@ -35,7 +36,9 @@ public: ...@@ -35,7 +36,9 @@ public:
return true; return true;
} }
// create norm layer by norm_type /**
* @brief create norm layer by norm_type
*/
static Layer* create(const LayerConfig& config); static Layer* create(const LayerConfig& config);
}; };
......
...@@ -21,10 +21,11 @@ limitations under the License. */ ...@@ -21,10 +21,11 @@ limitations under the License. */
namespace paddle { namespace paddle {
/** /**
* A layer for computing the outer product of two vectors, * @brief A layer for computing the outer product of two vectors
* which is used in NEURAL TURING MACHINE * @note used in NEURAL TURING MACHINE
* Input: two vectors: batchSize x dim1, batchSize x dim2 * Input1: vector (batchSize * dim1)
* Output: a matrix: (batchSize x (dim1*dim2)) * Input2: vector (batchSize * dim2)
* Output: a matrix: (batchSize * (dim1*dim2))
*/ */
class OuterProdLayer : public Layer { class OuterProdLayer : public Layer {
......
...@@ -22,7 +22,7 @@ limitations under the License. */ ...@@ -22,7 +22,7 @@ limitations under the License. */
namespace paddle { namespace paddle {
/** /**
* @brief basic parent layer of pooling * @brief Basic parent layer of pooling
* Pools the input within regions * Pools the input within regions
*/ */
class PoolLayer : public Layer { class PoolLayer : public Layer {
...@@ -41,7 +41,9 @@ protected: ...@@ -41,7 +41,9 @@ protected:
public: public:
explicit PoolLayer(const LayerConfig& config) : Layer(config) {} explicit PoolLayer(const LayerConfig& config) : Layer(config) {}
// create pooling layer by pool_type /**
* @brief create pooling layer by pool_type
*/
static Layer* create(const LayerConfig& config); static Layer* create(const LayerConfig& config);
virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap);
......
...@@ -20,7 +20,9 @@ limitations under the License. */ ...@@ -20,7 +20,9 @@ limitations under the License. */
#include <vector> #include <vector>
namespace paddle { namespace paddle {
/**
* @brief Basic parent layer of different kinds of pooling
*/
class PoolProjectionLayer : public PoolLayer { class PoolProjectionLayer : public PoolLayer {
protected: protected:
size_t imgSizeH_, imgSizeW_; size_t imgSizeH_, imgSizeW_;
...@@ -30,7 +32,9 @@ public: ...@@ -30,7 +32,9 @@ public:
size_t getSize(); size_t getSize();
explicit PoolProjectionLayer(const LayerConfig& config) : PoolLayer(config) {} explicit PoolProjectionLayer(const LayerConfig& config) : PoolLayer(config) {}
}; };
/**
* @brief A layer for max pooling
*/
class MaxPoolProjectionLayer : public PoolProjectionLayer { class MaxPoolProjectionLayer : public PoolProjectionLayer {
public: public:
explicit MaxPoolProjectionLayer(const LayerConfig& config) explicit MaxPoolProjectionLayer(const LayerConfig& config)
...@@ -41,7 +45,9 @@ public: ...@@ -41,7 +45,9 @@ public:
virtual void forward(PassType passType); virtual void forward(PassType passType);
virtual void backward(const UpdateCallback& callback = nullptr); virtual void backward(const UpdateCallback& callback = nullptr);
}; };
/**
* @brief A layer for average pooling
*/
class AvgPoolProjectionLayer : public PoolProjectionLayer { class AvgPoolProjectionLayer : public PoolProjectionLayer {
public: public:
explicit AvgPoolProjectionLayer(const LayerConfig& config) explicit AvgPoolProjectionLayer(const LayerConfig& config)
......
...@@ -18,7 +18,12 @@ limitations under the License. */ ...@@ -18,7 +18,12 @@ limitations under the License. */
#include "paddle/math/BaseMatrix.h" #include "paddle/math/BaseMatrix.h"
namespace paddle { namespace paddle {
/* resize a minibatch matrix h*w to h'*w' */ /**
* @brief A layer for resizing a minibatch matrix h*w to h'*w'
* @note
* origin matrix height * witdth)
* resize matrix: (height * width / size) * size
*/
class ResizeLayer : public Layer { class ResizeLayer : public Layer {
public: public:
explicit ResizeLayer(const LayerConfig& config) : Layer(config) {} explicit ResizeLayer(const LayerConfig& config) : Layer(config) {}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册