diff --git a/doc/source/gserver/activations/index.rst b/doc/source/gserver/activations/index.rst index 6ceab41ccb3282ebe474bea74873b3d29b16de9e..ccdae41128cd6b4edddda0ac44a825082d7495c9 100644 --- a/doc/source/gserver/activations/index.rst +++ b/doc/source/gserver/activations/index.rst @@ -1,5 +1,5 @@ Activations ============= -.. doxygenfile:: paddle/gserver/activations/ActivationFunction.h -.. doxygenfile:: paddle/gserver/activations/ActivationFunction.cpp +.. doxygenclass:: paddle::ActivationFunction + :members: diff --git a/paddle/gserver/activations/ActivationFunction.cpp b/paddle/gserver/activations/ActivationFunction.cpp index cf4fe5966b3a6d1c8d07aad04fb71fb4b684c248..9918d20d9082ae6c07684ce05eba68c4989dd5d5 100644 --- a/paddle/gserver/activations/ActivationFunction.cpp +++ b/paddle/gserver/activations/ActivationFunction.cpp @@ -28,8 +28,17 @@ limitations under the License. */ namespace paddle { static ClassRegistrar gActivationRegistrar; +/** + * @def ACTIVATION_CLASS_NAME + * @brief Macro for getting derived activation class name + * @note ACTIVATION_CLASS_NAME(softmax) softmax_; + * means softmaxActivation softmax_; + */ #define ACTIVATION_CLASS_NAME(ACTIVATION_NAME) ACTIVATION_NAME##Activation - +/** + * @def BEGIN_DEFINE_ACTIVATION + * @brief Macro for defining a devried activation class + */ #define BEGIN_DEFINE_ACTIVATION(ACTIVATION_NAME) \ class ACTIVATION_CLASS_NAME(ACTIVATION_NAME) : public ActivationFunction { \ private: \ @@ -37,7 +46,10 @@ static ClassRegistrar gActivationRegistrar; \ public: \ const std::string& getName() const { return name; } - +/** + * @def END_DEFINE_ACTIVATION + * @brief Macro for registering a derived activation class + */ #define END_DEFINE_ACTIVATION(ACTIVATION_NAME) \ }; \ const std::string ACTIVATION_CLASS_NAME(ACTIVATION_NAME)::name = \ @@ -66,9 +78,10 @@ static InitFunction __reg_activation__identity([] { }); /** - * SigmoidActivation - * + * @brief Sigmoid Activation + * \f[ * f(z) = \frac{1}{1+exp(-z)} + * \f] */ BEGIN_DEFINE_ACTIVATION(sigmoid) void forward(Argument& act) { act.value->sigmoid(*act.value); } @@ -76,8 +89,10 @@ void backward(Argument& act) { act.grad->sigmoidDerivative(*act.value); } END_DEFINE_ACTIVATION(sigmoid) /** - * Do Softmax activation for all sample. + * @brief Softmax Activation + * \f[ * P(y=j|x) = \frac{e^{x^Tw_j}}{\sum^K_{k=1}e^{x^Tw_k}} + * \f] */ BEGIN_DEFINE_ACTIVATION(softmax) private: @@ -115,8 +130,12 @@ void backward(Argument& act) { } END_DEFINE_ACTIVATION(softmax) -/// Softmax on all frames of one sequence. -/// Width of frame must be one. + +/** + * @brief Sequence_softmax Activation + * @note Softmax on all frames of one sequence. + * Width of frame must be one. + */ BEGIN_DEFINE_ACTIVATION(sequence_softmax) private: ACTIVATION_CLASS_NAME(softmax) softmax_; @@ -156,8 +175,7 @@ void backward(Argument& act) { END_DEFINE_ACTIVATION(sequence_softmax) /** - * Relu Activation. - * + * @brief Relu Activation. * forward. y = max(0, z) * * derivative of relu is: @@ -173,7 +191,7 @@ void backward(Argument& act) { act.grad->reluDerivative(*act.value); } END_DEFINE_ACTIVATION(relu) /** - * BRelu Activation. + * @brief BRelu Activation. * * forward. y = min(24, max(0, z)) * @@ -192,9 +210,10 @@ void backward(Argument& act) { act.grad->breluDerivative(*act.value); } END_DEFINE_ACTIVATION(brelu) /** - * tanh activation. - * + * @brief Tanh Activation. + * \f[ * f(z) = tanh(z)=\frac{e^z-e^{-z}}{e^z+e^{-z}} + * \f] */ BEGIN_DEFINE_ACTIVATION(tanh) void forward(Argument& act) { act.value->tanh(*act.value); } @@ -203,9 +222,10 @@ void backward(Argument& act) { act.grad->tanhDerivative(*act.value); } END_DEFINE_ACTIVATION(tanh) /** - * Scaled Tanh Activation - * + * @brief Scaled Tanh Activation + * \f[ * f(z) = 1.7159 * tanh(2/3*z) + * \f] */ BEGIN_DEFINE_ACTIVATION(stanh) private: @@ -221,9 +241,10 @@ void backward(Argument& act) { END_DEFINE_ACTIVATION(stanh) /** - * Soft relu activation. - * + * @brief Soft Relu Activation. + * \f[ * f(z) = ln(1+e^z) + * \f] */ BEGIN_DEFINE_ACTIVATION(softrelu) void forward(Argument& act) { act.value->softrelu(*act.value); } @@ -232,8 +253,7 @@ void backward(Argument& act) { act.grad->softreluDerivative(*act.value); } END_DEFINE_ACTIVATION(softrelu) /** - * Abs Activation. - * + * @brief Abs Activation. * Forward: f(z) = abs(z) * * Derivative: @@ -258,9 +278,10 @@ void backward(Argument& act) { act.grad->absDerivative(*act.in); } END_DEFINE_ACTIVATION(abs) /** - * Square Activation. - * + * @brief Square Activation. + * \f[ * f(z) = z^2. + * \f] */ BEGIN_DEFINE_ACTIVATION(square) void forward(Argument& act) { @@ -274,7 +295,12 @@ void forward(Argument& act) { void backward(Argument& act) { act.grad->squareDerivative(*act.in); } END_DEFINE_ACTIVATION(square) - +/** + * @brief Exponential Activation. + * \f[ + * f(z) = e^z + * \f] + */ BEGIN_DEFINE_ACTIVATION(exponential) void forward(Argument& act) { act.value->exp(*act.value); } diff --git a/paddle/gserver/activations/ActivationFunction.h b/paddle/gserver/activations/ActivationFunction.h index 0c5eddfc8ab84f7ab05a4fdd0a970a3189432ffb..29860b4a736c37dee70c56731820a4197ea4cdbe 100644 --- a/paddle/gserver/activations/ActivationFunction.h +++ b/paddle/gserver/activations/ActivationFunction.h @@ -17,7 +17,18 @@ limitations under the License. */ #include namespace paddle { + struct Argument; +/** + * @brief Activation function is a function that transforms a set of input + * signals into an output signals. The purpose of the activation function + * is to introduce non-liearilty into the network. + * + * @note Common activation function are provieded, including linear, + * sigmoid, softmax, sequence_max, relu, brelu, tanh, stanh, + * softrelu, abs, square, exponential. + * + */ class ActivationFunction { public: static ActivationFunction* create(const std::string& type); @@ -26,16 +37,25 @@ public: virtual ~ActivationFunction() {} - // act.value <- f(act.value), - // where f is the activation function. - // Suppose that before calling forward(), act.value is x and - // after forward() is called, act.value is y, then y = f(x), - // Usually, act is Layer::output_ + /** + * @brief Foward propagation + * + * act.value <- f(act.value), + * where f is the activation function. + * Suppose that before calling forward(), act.value is x and + * after forward() is called, act.value is y, then y = f(x). + * + * Usually, act is Layer::output_ + */ virtual void forward(Argument& act) = 0; - // x and y are defined in the above comment for forward(). - // Before calling backward(), act.grad = dE / dy, where E is the error/cost. - // After backward() returns, act.grad = dE / dx = (dE/dy) * (dy/dx) + /** + * @brief Backward propagaion + * + * x and y are defined in the above comment for forward(). + * - Before calling backward(), act.grad = dE / dy, where E is the error/cost + * - After backward() returns, act.grad = dE / dx = (dE/dy) * (dy/dx) + */ virtual void backward(Argument& act) = 0; virtual const std::string& getName() const = 0; diff --git a/paddle/gserver/dataproviders/DataProvider.h b/paddle/gserver/dataproviders/DataProvider.h index fb404405fbeb0e2da4785c09e3cce4ef7da71320..aab5d93fcaa1e7286db7c2aeb60c6d10695a5ced 100644 --- a/paddle/gserver/dataproviders/DataProvider.h +++ b/paddle/gserver/dataproviders/DataProvider.h @@ -41,7 +41,8 @@ limitations under the License. */ namespace paddle { /** - * @brief Macro for registering a data provider. + * @def REGISTER_DATA_PROVIDER + * @brief Macro for registering a data provider */ #define REGISTER_DATA_PROVIDER(__type_name, __class_name) \ static InitFunction __reg_type_##__type_name([]() { \ @@ -52,37 +53,68 @@ class DataBatch; class BufferBatch; typedef std::shared_ptr DataBatchPtr; typedef std::shared_ptr BufferBatchPtr; - +/** + * @brief Data for batch training a neural network + */ class DataBatch { public: DataBatch() : size_(0) { data_.clear(); } - + /** + * @brief Get batch size + * @return batch size + */ int64_t getSize() const { return size_; } - + /** + * @brief Get num of sequences of sequence data + * @return num of sequences + */ int64_t getNumSequences() const { if (data_.empty()) return size_; return data_[0].sequenceStartPositions ? data_[0].sequenceStartPositions->getSize() - 1 : size_; } - + /** + * @brief Set batch size + * @param[in] size size + */ void setSize(int64_t size) { size_ = size; } - + /** + * @brief Get size of argument vector + * @return size of argument vector + * @note For usual supervised learning, input data and label is needed, + * then there will be two argument. + */ int64_t getNumStreams() const { return data_.size(); } + /** + * @brief Get a argument with index i + * @param[in] i index in argument vector + * @return a argument with index i + */ const Argument& getStream(int i) const { return data_[i]; } - + /** + * @brief Get all argument + * @return an argument vector + */ std::vector& getStreams() { return data_; } - + /** + * @brief Get all argument const + * @return an argument vector + */ std::vector getStreams() const { return data_; } - + /** + * @brief Clear DataBatch + */ void clear() { data_.clear(); size_ = 0; } /** - * The order in which each data stream is appended must match the order + * @brief Append data to DataBatch + * @param[in] data matrix data + * @note The order in which each data stream is appended must match the order * specified in stream_names of DataConfig. The stream_names can be obtained * using DataProvider::getStreamNames(). */ @@ -93,7 +125,10 @@ public: } /** - * The order in which each data stream is appended must match the order + * @brief Append sequence data to DataBatch + * @param[in] data matrix data + * @param[in] sequenceStartPositions sequence data + * @note The order in which each data stream is appended must match the order * specified in stream_names of DataConfig. The stream_names can be obtained * using DataProvider::getStreamNames(). */ @@ -104,24 +139,32 @@ public: argu.sequenceStartPositions = sequenceStartPositions; data_.push_back(argu); } - + /** + * @brief Append label data + * @param[in] label label data + * @param[in] value matrix data, default null + */ void appendLabel(IVectorPtr label, MatrixPtr value = nullptr) { Argument argu; argu.ids = label; argu.value = value; data_.push_back(argu); } - + /** + * @brief Append user defined data + * @param[in] ptr user defined data + */ void appendUserDefinedPtr(UserDefinedVectorPtr ptr) { Argument argu; argu.udp = ptr; data_.push_back(argu); } - /** - * @param argus: DataBatch.getStreams() - * @param size: DataBatch.getSize() - * @param dataId: sub dataprovider id (in MultiDataProvider) + /* + * @brief Append argument + * @param[in] argus DataBatch.getStreams() + * @param[in] size DataBatch.getSize() + * @param[in] dataId sub dataprovider id (in MultiDataProvider) */ void appendArguments(const std::vector& argus, int size, int dataId) { @@ -133,7 +176,14 @@ public: } protected: + /** + * @brief batch size + */ int64_t size_; + /** + * @brief A batch data consist of a Argument vector, + * An argument corresponds to a type of input data. + */ std::vector data_; }; @@ -228,8 +278,8 @@ protected: }; /** - * DataProvider supplies data for training - * It can supplies multiple streams of data. + * @brief Base class for DataProvider, which supplies data for training + * @note It can supplies multiple streams of data. * For typical supervised training, there are two streams: * one is for input, one is for label. */ @@ -253,16 +303,23 @@ public: const DataConfig& getConfig() const { return config_; } void setSkipShuffle() { skipShuffle_ = true; } + + /** + * @brief Get next batch of training samples + * @param[in] size size of training samples to get + * @param[out] batch a batch of training samples + * @return actual size of obtained training samples + */ int64_t getNextBatch(int64_t size, DataBatch* batch); /** - * Shuffle the data set + * @brief Shuffle the data set */ virtual void shuffle() = 0; /** - * reset() must be called before any calls to getNextBatch() - * reset all the value of index + * @brief reset all the value of index + * @note reset() must be called before any calls to getNextBatch() * IMPORTANT: subclass reset() should always call the base class reset() * at the end of the function */ @@ -274,10 +331,17 @@ public: } /** - * return the number of training samples in the data set. - * return -1 to indicate unlimited number of samples. + * @brief Get the size of training samples + * @return the number of training samples in the data set. + * @note return -1 to indicate unlimited number of samples. */ virtual int64_t getSize() = 0; + /** + * @brief Get next batch training samples internally + * @param[in] size size of training samples to get + * @param[out] batch a batch of training samples + * @return actual size of obtained training samples + */ virtual int64_t getNextBatchInternal(int64_t size, DataBatch* batch) = 0; @@ -288,7 +352,12 @@ protected: bool useGpu_; std::unique_ptr doubleBuffer_; ThreadLocal> constantSlots_; - + /** + * @@brief Get next batch training samples from buffer + * @param[in] size size of training samples to get + * @param[out] batch a batch of training samples + * @return actual size of obtained training samples + */ int64_t getNextBatchFromBuffer(int64_t size, DataBatch* batch); void initAsyncLoader(); diff --git a/paddle/gserver/dataproviders/ProtoDataProvider.h b/paddle/gserver/dataproviders/ProtoDataProvider.h index 02db5f870db74be7237d96d187f134b94192933e..846dd7673abe8b836be1b728bb690daa0e8acc20 100644 --- a/paddle/gserver/dataproviders/ProtoDataProvider.h +++ b/paddle/gserver/dataproviders/ProtoDataProvider.h @@ -26,8 +26,10 @@ limitations under the License. */ namespace paddle { /** - * @brief Data file with each sample specified by proto message - * DataSample defined in DataFormat.proto. + * @brief Provider data from protobuf data file with each sample + * specified by proto message + * + * DataSample defined in DataFormat.proto. * * The file format is * @@ -68,19 +70,38 @@ public: virtual int64_t getNextBatchInternal(int64_t size, DataBatch* batch); protected: + /** + * @brief load protobuf data from a list of file + * @param[in] fileName file name of a file which contains + * a list of file names + */ void loadData(const std::string& fileName); - void loadDataFile(const std::string& fileName); + /** + * @brief load protobuf data from file + * @param[in] fileName data file name + */ + void loadDataFile(const std::string& fileName); + /** @brief check data header of each data sample + * @param[in] header data header read from protobuf data + */ void checkDataHeader(const DataHeader& header); + /** + * @brief fill protobuf data into slot_, + * slot_ is a vector of ProtoSlot in memory. + * @param[in] sample data sample read from protobuf data + */ void fillSlots(const DataSample& sample); /** - * return true if each sample is one sequence, i.e., independent + * @brief return true if each sample is one sequence, i.e., independent * of other samples. */ inline bool iidData() const { return sequenceStartPositions_.empty(); } - /// check that sample is consistent with header_ + /** + * @brief check that sample is consistent with header_ + */ void checkSample(const DataSample& sample); template @@ -129,20 +150,21 @@ protected: int64_t currentSequenceIndex_; - /// The size should be the number of sequences. + // The size should be the number of sequences. std::vector shuffledSequenceIds_; ThreadLocalD cpuBatch_; ThreadLocalD gpuBatch_; RWLock lock_; - // stats for number of none-zeros entries - std::vector nnzStats_; + std::vector nnzStats_; // stats for number of none-zeros entries }; /** - * Special use for Proto data: instances should contain sparse-non-value slots - * and label. ProtoSequenceDataProvider treats each SPARSE SLOT as a SEQUENCE + * @brief Special use for Proto data: instances should contain sparse-non-value slots + * and label. + * + * @note ProtoSequenceDataProvider treats each SPARSE SLOT as a SEQUENCE */ class ProtoSequenceDataProvider : public ProtoDataProvider { public: diff --git a/paddle/gserver/evaluators/Evaluator.cpp b/paddle/gserver/evaluators/Evaluator.cpp index a50eecdbb758ada0184a51cdb4546efe2f000d71..7bdcdaae53c638c93e567a2943586dcc27d75ded 100644 --- a/paddle/gserver/evaluators/Evaluator.cpp +++ b/paddle/gserver/evaluators/Evaluator.cpp @@ -33,7 +33,11 @@ void Evaluator::eval(const NeuralNetwork& nn) { totalScore_ += score; updateSamplesNum(arguments); } - +/** + * @brief classification error Evaluator + * + * The config file api is classification_error_evaluator. + */ class ClassificationErrorEvaluator : public Evaluator { public: virtual void updateSamplesNum(const std::vector& arguments) { @@ -99,8 +103,11 @@ public: } }; -// sequence level classification error stats: -// if any frame in one sequence has error, the sequence is error +/** + * @brief sequence classification error Evaluator + * @note sequence level classification error stats, + * if any frame in one sequence has error, the sequence is error + */ class SequenceClassificationErrorEvaluator : public ClassificationErrorEvaluator { public: @@ -135,7 +142,12 @@ public: }; REGISTER_EVALUATOR(seq_classification_error, SequenceClassificationErrorEvaluator); - +/** + * @brief sum Evaluator + * Calculate the sum of output or label + * + * The config file api is sum_evaluator. + */ class SumEvaluator : public Evaluator { public: SumEvaluator() : cpuLabel_(nullptr), cpuWeight_(nullptr) {} @@ -218,13 +230,18 @@ private: IVectorPtr cpuLabel_; MatrixPtr cpuWeight_; }; - +/** + * @brief column sum Evaluator + * @note column sum for the colIdx-th column * + * - colIdx = 0: the 0-th column. + * - colIdx > 0: the colIdx-th column. + * - colIdx < 0: the last colIdx-th column. + * + * The config file api is column_sum_evaluator. + * + */ class ColumnSumEvaluator : public Evaluator { public: - // column sum for the colIdx-th column - // colIdx = 0: the 0-th column - // > 0: the colIdx-th column - // < 0: the last colIdx-th column explicit ColumnSumEvaluator(int32_t colIdx) : colIdx_(colIdx), colNum_(0), sum_(nullptr) {} @@ -845,7 +862,11 @@ Evaluator* Evaluator::create(const EvaluatorConfig& config) { evaluator->init(config); return evaluator; } - +/** + * @brief print value of each layer. + * + * The config file api is value_printer_evaluator. + */ class ValuePrinter : public Evaluator { public: ValuePrinter() {} @@ -882,7 +903,11 @@ public: virtual real evalImp(std::vector& arguments) { return 0; } }; REGISTER_EVALUATOR(value_printer, ValuePrinter); - +/** + * @brief print gradient of each layer. + * + * The config file api is gradient_printer_evaluator. + */ class GradientPrinter : public Evaluator { public: GradientPrinter() {} @@ -908,7 +933,11 @@ public: virtual real evalImp(std::vector& arguments) { return 0; } }; REGISTER_EVALUATOR(gradient_printer, GradientPrinter); - +/** + * @brief print row max id vctor of each layer + * + * The config file api is maxid_printer_evaluator. + */ class MaxIdPrinter : public Evaluator { private: IVectorPtr maxIds_; @@ -946,7 +975,11 @@ public: virtual real evalImp(std::vector& arguments) { return 0; } }; REGISTER_EVALUATOR(max_id_printer, MaxIdPrinter); - +/** + * @brief print sequence max frames of each layer + * + * The config file api is maxframe_printer_evaluator. + */ class MaxFramePrinter : public Evaluator { private: IVectorPtr maxIds_; @@ -998,30 +1031,29 @@ public: REGISTER_EVALUATOR(max_frame_printer, MaxFramePrinter); /** - * Sequence text printer will print text according to index matrix and a - * dictionary. There can be multiple input to this layer: + * @brief print text according to index matrix and a dictionary. * - * 1) If there is only one input, the input must be a matrix containing + * There can be multiple input to this layer: + * - If there is only one input, the input must be a matrix containing * the sequence of indices; - * - * 2) If there are more than one input, the first input should be ids, + * - If there are more than one input, the first input should be ids, * and are interpreted as sample ids. * * The output format will be: * - * 1) sequence without sub-sequence, and there is probability. + * - sequence without sub-sequence, and there is probability. * * @code * id \t prob space_seperated_tokens_from_dictionary_according_to_seq * @endcode * - * 2) sequence without sub-sequence, and there is not probability. + * - sequence without sub-sequence, and there is not probability. * * @code * id \t space_seperated_tokens_from_dictionary_according_to_seq * @endcode * - * 3) sequence with sub-sequence, and there is not probability. + * - sequence with sub-sequence, and there is not probability. * * @code * id \t space_seperated_tokens_from_dictionary_according_to_sub_seq @@ -1032,6 +1064,8 @@ REGISTER_EVALUATOR(max_frame_printer, MaxFramePrinter); * Typically SequenceTextPrinter layer takes output of maxid or RecurrentGroup * with maxid (when generating) as an input. * + * The config file api is seqtext_printer_evaluator. + * */ class SequenceTextPrinter : public Evaluator { private: @@ -1172,7 +1206,11 @@ public: } }; REGISTER_EVALUATOR(seq_text_printer, SequenceTextPrinter); - +/** + * @brief print classification error. + * + * The config file api is classification_error_printer_evaluator. + */ class ClassificationErrorPrinter : public ClassificationErrorEvaluator { public: virtual void updateSamplesNum(const std::vector& arguments) {} diff --git a/paddle/gserver/evaluators/Evaluator.h b/paddle/gserver/evaluators/Evaluator.h index eee785e0e3a092995c9e152ad2dd75027706a6fc..b79a539384e9f7620c118d14b915c3f76a9a43af 100644 --- a/paddle/gserver/evaluators/Evaluator.h +++ b/paddle/gserver/evaluators/Evaluator.h @@ -24,12 +24,21 @@ limitations under the License. */ namespace paddle { class NeuralNetwork; +/** + * @def REGISTER_EVALUATOR + * @brief Macro for registering evaluator class + */ #define REGISTER_EVALUATOR(__type_name, __class_name) \ static InitFunction __reg_type_##__type_name([]() { \ Evaluator::registrar_.registerClass<__class_name>(#__type_name); \ }) - +/** + * @brief Base class for Evaluator + * Evaluating the performance of a model is very important. + * It indicates how successful the scores(predictions) of a datasets + * has been by a trained model. + */ class Evaluator { public: static Evaluator* create(const EvaluatorConfig& config); @@ -41,7 +50,7 @@ public: virtual void init(const EvaluatorConfig& config) { config_ = config; } /** - * start to evaluate some data + * @brief start to evaluate some data */ virtual void start() { numSamples_ = 0; @@ -49,20 +58,21 @@ public: } /** - * Process a batch of data. + * @brief Process a batch of data. */ virtual void eval(const NeuralNetwork& nn); /** - * Process a batch of data. - * return the score for the batch if it make sense to sum the score across - * batches. Otherwise evaluator should return 0 and override finish() and + * @brief Process a batch of data. + * @return the score for the batch if it make sense to sum the score across + * batches. + * @note Otherwise evaluator should return 0 and override finish() and * printStats() to do the right calculation. */ virtual real evalImp(std::vector& arguments) = 0; /** - * Update the number of processed samples + * @brief Update the number of processed samples */ virtual void updateSamplesNum(const std::vector& arguments) { numSamples_ += arguments[0].getBatchSize(); @@ -81,11 +91,14 @@ public: } /** - * finish the evaluation. + * @brief finish the evaluation. */ virtual void finish() {} - /// finish() should be called before printStats + /** + * @brief print the statistics of evaluate result + * @note finish() should be called before printStats + */ virtual void printStats(std::ostream& os) { os << config_.name() << "=" << (numSamples_ ? totalScore_ / numSamples_ : 0); @@ -124,17 +137,23 @@ public: virtual void finish() {} virtual void printStats(std::ostream&) {} }; - +/** + * @brief evaluate AUC using colIdx-th column as prediction. + * The AUC(Area Under the Curve) is a common evaluation metric + * for binary classification problems. It computes the area under + * the receiver operating characteristic(ROC) curve. + * + * @note colIdx-th column + * + * - colIdx = 0: the 0-th column. + * - colIdx > 0: the colIdx-th column. + * - colIdx < 0: the last colIdx-th column. + * + * The config file api is auc_evaluator. + * + */ class AucEvaluator : public Evaluator { public: - /** - * @brief evaluate AUC using colIdx-th column as prediction. - * - * - colIdx = 0: the 0-th column. - * - colIdx > 0: the colIdx-th column. - * - colIdx < 0: the last colIdx-th column. - * - */ AucEvaluator(int32_t colIdx) : colIdx_(colIdx), realColumnIdx_(0), @@ -174,13 +193,11 @@ private: }; /** - * @brief RankAucEvaluator calculates the AUC of each list - * (i.e., titles under the same query), and averages them. - * - * Each list should be organized as a sequence. - * The inputs of this evaluator is [output, click, pv]. - * If pv is not provided, it will be set to 1. - * The types of click and pv are dense value. + * @brief RankAucEvaluator calculates the AUC of each list (i.e., titles + * under the same query), and averages them. Each list should be organized + * as a sequence. The inputs of this evaluator is [output, click, pv]. If pv + * is not provided, it will be set to 1. The types of click and pv are + * dense value. */ class RankAucEvaluator : public Evaluator { public: @@ -204,7 +221,16 @@ private: double calcRankAuc(real* outputData, real* clickData, real* pvData, size_t size); }; - +/** + * @brief precision, recall and f1 score Evaluator + * \f[ + * precision = \frac{tp}{tp+tn} \\ + * recall=\frac{tp}{tp+fn} \\ + * f1=2*\frac{precsion*recall}{precision+recall} + * \f] + * + * The config file api is precision_recall_evaluator. + */ class PrecisionRecallEvaluator : public Evaluator { public: // Evaluate precision, recall and F1 score @@ -274,8 +300,10 @@ private: } }; -/** - * Positive-negative pair rate Evaluator +/* + * @brief positive-negative pair rate Evaluator + * + * The config file api is pnpair_evaluator. */ class PnpairEvaluator : public Evaluator { public: diff --git a/paddle/gserver/layers/CosSimLayer.h b/paddle/gserver/layers/CosSimLayer.h index 65c6fa8280b5831557caf75fb054661ed991408d..9b0e53335b2503513ce11a4ab19f2199acfee499 100644 --- a/paddle/gserver/layers/CosSimLayer.h +++ b/paddle/gserver/layers/CosSimLayer.h @@ -20,7 +20,19 @@ limitations under the License. */ #include "paddle/utils/ThreadLocal.h" namespace paddle { - +/** + * @brief A layer for calculating cosine similarity between two vector + * \f[ + * f(x,y)=scale\frac{x_1y_1+x_2y_2+...+x_ny_n}{\sqrt{x_1^2+x_2^2+... + * +x_n^2}\sqrt{y_1^2+y_2^2+...+y_n^2}} + * \f] + * + * - Input1: A vector (batchSize * dataDim) * + * - Input2: A vector (batchSize * dataDim) or (1 * dataDim) * + * - Output: A vector (dataDim * 1) + * + * The config file api is cos_sim. + */ class CosSimLayer : public Layer { public: explicit CosSimLayer(const LayerConfig& config) diff --git a/paddle/gserver/layers/CosSimVecMatLayer.cpp b/paddle/gserver/layers/CosSimVecMatLayer.cpp index 773d35c0f059bb12719ce2e690809b75f9e8cce2..7d251ace6fdfde2506e4890b276db5b0d08d51f5 100644 --- a/paddle/gserver/layers/CosSimVecMatLayer.cpp +++ b/paddle/gserver/layers/CosSimVecMatLayer.cpp @@ -21,13 +21,16 @@ limitations under the License. */ namespace paddle { /** - * A layer for computing cosine similarity between a vector an each row of a - * matrix, + * @brief A layer for computing cosine similarity between a vector + * and each row of a matrix * out[i] = cos_scale * cos(in1, in2(i,:)); - * which is used in NEURAL TURING MACHINE - * Input: a vector (batchSize x dataDim) and a matrix in vec form (batchSize x - * (weightDim*dataDim)) - * Output: a vector (batchSize x weightDim) + * @note used in NEURAL TURING MACHINE + * + * Input1: a vector (batchSize * dataDim) + * + * Input2: a matrix in vector form (batchSize * (weightDim*dataDim)) + * + * Output: a vector (batchSize * weightDim) */ class CosSimVecMatLayer : public Layer { diff --git a/paddle/gserver/layers/DataNormLayer.h b/paddle/gserver/layers/DataNormLayer.h index 31497367684d493f64dd62c305ed8e531f374181..232c73f0346a12d59fa0dc316ef510be75e6b2b1 100644 --- a/paddle/gserver/layers/DataNormLayer.h +++ b/paddle/gserver/layers/DataNormLayer.h @@ -22,18 +22,18 @@ limitations under the License. */ namespace paddle { /** - * A layer for data normalization - * Input: One and only one input layer is accepted. The input layer must + * @brief A layer for data normalization + * - Input: One and only one input layer is accepted. The input layer must * be DataLayer with dense data type. - * Output: The normalization of the input data + * - Output: The normalization of the input data * * Reference: * LA Shalabi, Z Shaaban, B Kasasbeh. Data mining: A preprocessing engine * * Three data normalization methoeds are considered - * z-score: y = (x-mean)/std - * min-max: y = (x-min)/(max-min) - * decimal-scaling: y = x/10^j, where j is the smallest integer such that + * - z-score: y = (x-mean)/std + * - min-max: y = (x-min)/(max-min) + * - decimal-scaling: y = x/10^j, where j is the smallest integer such that *max(|y|)<1 */ diff --git a/paddle/gserver/layers/NormLayer.h b/paddle/gserver/layers/NormLayer.h index 89bd23dae1bedcb1f1000f7af820cc6df61282bc..2b05be6fcb44fc3f61f9be4e464b2100284bf5c6 100644 --- a/paddle/gserver/layers/NormLayer.h +++ b/paddle/gserver/layers/NormLayer.h @@ -23,8 +23,9 @@ limitations under the License. */ namespace paddle { /** - * @brief basic parent layer of normalization - * Normalize the input in local region + * @brief Basic parent layer of normalization + * + * @note Normalize the input in local region */ class NormLayer : public Layer { public: @@ -35,7 +36,9 @@ public: return true; } - // create norm layer by norm_type + /** + * @brief create norm layer by norm_type + */ static Layer* create(const LayerConfig& config); }; diff --git a/paddle/gserver/layers/OuterProdLayer.cpp b/paddle/gserver/layers/OuterProdLayer.cpp index 307b70dc1a455b2dba326ebb5ba451593c54e722..708c901ba9e9d2a5421fc64789f4ac174b365dc1 100644 --- a/paddle/gserver/layers/OuterProdLayer.cpp +++ b/paddle/gserver/layers/OuterProdLayer.cpp @@ -21,10 +21,11 @@ limitations under the License. */ namespace paddle { /** - * A layer for computing the outer product of two vectors, - * which is used in NEURAL TURING MACHINE - * Input: two vectors: batchSize x dim1, batchSize x dim2 - * Output: a matrix: (batchSize x (dim1*dim2)) + * @brief A layer for computing the outer product of two vectors + * @note used in NEURAL TURING MACHINE + * Input1: vector (batchSize * dim1) + * Input2: vector (batchSize * dim2) + * Output: a matrix: (batchSize * (dim1*dim2)) */ class OuterProdLayer : public Layer { diff --git a/paddle/gserver/layers/PoolLayer.h b/paddle/gserver/layers/PoolLayer.h index 1c649bc66aeacf0cfd8c23aa6808c91a9b793a44..b7a1dfd7632f91bc3935f0f19ebfbd44258dcf7b 100644 --- a/paddle/gserver/layers/PoolLayer.h +++ b/paddle/gserver/layers/PoolLayer.h @@ -22,7 +22,7 @@ limitations under the License. */ namespace paddle { /** - * @brief basic parent layer of pooling + * @brief Basic parent layer of pooling * Pools the input within regions */ class PoolLayer : public Layer { @@ -41,7 +41,9 @@ protected: public: explicit PoolLayer(const LayerConfig& config) : Layer(config) {} - // create pooling layer by pool_type + /** + * @brief create pooling layer by pool_type + */ static Layer* create(const LayerConfig& config); virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); diff --git a/paddle/gserver/layers/PoolProjectionLayer.h b/paddle/gserver/layers/PoolProjectionLayer.h index ce321946b1e853d873bf1028baed40f35a275e02..42bbc83c62246dfc8e69aa0b427b27819a701eb6 100644 --- a/paddle/gserver/layers/PoolProjectionLayer.h +++ b/paddle/gserver/layers/PoolProjectionLayer.h @@ -20,7 +20,9 @@ limitations under the License. */ #include namespace paddle { - +/** + * @brief Basic parent layer of different kinds of pooling + */ class PoolProjectionLayer : public PoolLayer { protected: size_t imgSizeH_, imgSizeW_; @@ -30,7 +32,9 @@ public: size_t getSize(); explicit PoolProjectionLayer(const LayerConfig& config) : PoolLayer(config) {} }; - +/** + * @brief A layer for max pooling + */ class MaxPoolProjectionLayer : public PoolProjectionLayer { public: explicit MaxPoolProjectionLayer(const LayerConfig& config) @@ -41,7 +45,9 @@ public: virtual void forward(PassType passType); virtual void backward(const UpdateCallback& callback = nullptr); }; - +/** + * @brief A layer for average pooling + */ class AvgPoolProjectionLayer : public PoolProjectionLayer { public: explicit AvgPoolProjectionLayer(const LayerConfig& config) diff --git a/paddle/gserver/layers/ResizeLayer.cpp b/paddle/gserver/layers/ResizeLayer.cpp index df3a7fb1263ff9168b64599839895e95850ffbef..dc573e838f71623e6985b19a4ae2cba6109ef6b5 100644 --- a/paddle/gserver/layers/ResizeLayer.cpp +++ b/paddle/gserver/layers/ResizeLayer.cpp @@ -18,7 +18,12 @@ limitations under the License. */ #include "paddle/math/BaseMatrix.h" namespace paddle { -/* resize a minibatch matrix h*w to h'*w' */ +/** + * @brief A layer for resizing a minibatch matrix h*w to h'*w' + * @note + * origin matrix height * witdth) + * resize matrix: (height * width / size) * size + */ class ResizeLayer : public Layer { public: explicit ResizeLayer(const LayerConfig& config) : Layer(config) {}