From 66be6fed3458c26a679062b5689357c070de39e7 Mon Sep 17 00:00:00 2001
From: qijun <qijun@1ad973e4-5ce8-4261-8a94-b56d1f490c56>
Date: Wed, 31 Aug 2016 06:34:57 +0000
Subject: [PATCH] add some source code comments ISSUE=4592951

git-svn-id: https://svn.baidu.com/idl/trunk/paddle@1447 1ad973e4-5ce8-4261-8a94-b56d1f490c56
---
 doc/source/gserver/activations/index.rst      |   4 +-
 .../activations/ActivationFunction.cpp        |  68 ++++++----
 .../gserver/activations/ActivationFunction.h  |  36 ++++--
 paddle/gserver/dataproviders/DataProvider.h   | 119 ++++++++++++++----
 .../gserver/dataproviders/ProtoDataProvider.h |  42 +++++--
 paddle/gserver/evaluators/Evaluator.cpp       |  82 ++++++++----
 paddle/gserver/evaluators/Evaluator.h         |  84 ++++++++-----
 paddle/gserver/layers/CosSimLayer.h           |  14 ++-
 paddle/gserver/layers/CosSimVecMatLayer.cpp   |  15 ++-
 paddle/gserver/layers/DataNormLayer.h         |  12 +-
 paddle/gserver/layers/NormLayer.h             |   9 +-
 paddle/gserver/layers/OuterProdLayer.cpp      |   9 +-
 paddle/gserver/layers/PoolLayer.h             |   6 +-
 paddle/gserver/layers/PoolProjectionLayer.h   |  12 +-
 paddle/gserver/layers/ResizeLayer.cpp         |   7 +-
 15 files changed, 377 insertions(+), 142 deletions(-)

diff --git a/doc/source/gserver/activations/index.rst b/doc/source/gserver/activations/index.rst
index 6ceab41ccb..ccdae41128 100644
--- a/doc/source/gserver/activations/index.rst
+++ b/doc/source/gserver/activations/index.rst
@@ -1,5 +1,5 @@
 Activations
 =============
 
-..  doxygenfile:: paddle/gserver/activations/ActivationFunction.h
-..  doxygenfile:: paddle/gserver/activations/ActivationFunction.cpp
+..  doxygenclass:: paddle::ActivationFunction
+    :members:
diff --git a/paddle/gserver/activations/ActivationFunction.cpp b/paddle/gserver/activations/ActivationFunction.cpp
index cf4fe5966b..9918d20d90 100644
--- a/paddle/gserver/activations/ActivationFunction.cpp
+++ b/paddle/gserver/activations/ActivationFunction.cpp
@@ -28,8 +28,17 @@ limitations under the License. */
 namespace paddle {
 
 static ClassRegistrar<ActivationFunction> gActivationRegistrar;
+/**
+ * @def ACTIVATION_CLASS_NAME
+ * @brief Macro for getting derived activation class name
+ * @note ACTIVATION_CLASS_NAME(softmax) softmax_;
+ * means softmaxActivation softmax_;
+ */
 #define ACTIVATION_CLASS_NAME(ACTIVATION_NAME) ACTIVATION_NAME##Activation
-
+/**
+ * @def BEGIN_DEFINE_ACTIVATION
+ * @brief Macro for defining a devried activation class
+ */
 #define BEGIN_DEFINE_ACTIVATION(ACTIVATION_NAME)                             \
   class ACTIVATION_CLASS_NAME(ACTIVATION_NAME) : public ActivationFunction { \
   private:                                                                   \
@@ -37,7 +46,10 @@ static ClassRegistrar<ActivationFunction> gActivationRegistrar;
                                                                              \
   public:                                                                    \
     const std::string& getName() const { return name; }
-
+/**
+ * @def END_DEFINE_ACTIVATION
+ * @brief Macro for registering a derived activation class
+ */
 #define END_DEFINE_ACTIVATION(ACTIVATION_NAME)                     \
   };                                                               \
   const std::string ACTIVATION_CLASS_NAME(ACTIVATION_NAME)::name = \
@@ -66,9 +78,10 @@ static InitFunction __reg_activation__identity([] {
 });
 
 /**
- * SigmoidActivation
- *
+ * @brief Sigmoid Activation
+ * \f[
  * f(z) = \frac{1}{1+exp(-z)}
+ * \f]
  */
 BEGIN_DEFINE_ACTIVATION(sigmoid)
 void forward(Argument& act) { act.value->sigmoid(*act.value); }
@@ -76,8 +89,10 @@ void backward(Argument& act) { act.grad->sigmoidDerivative(*act.value); }
 END_DEFINE_ACTIVATION(sigmoid)
 
 /**
- * Do Softmax activation for all sample.
+ * @brief Softmax Activation
+ * \f[
  * P(y=j|x) = \frac{e^{x^Tw_j}}{\sum^K_{k=1}e^{x^Tw_k}}
+ * \f]
  */
 BEGIN_DEFINE_ACTIVATION(softmax)
 private:
@@ -115,8 +130,12 @@ void backward(Argument& act) {
 }
 END_DEFINE_ACTIVATION(softmax)
 
-/// Softmax on all frames of one sequence.
-/// Width of frame must be one.
+
+/**
+ * @brief Sequence_softmax Activation
+ * @note Softmax on all frames of one sequence.
+ * Width of frame must be one.
+ */
 BEGIN_DEFINE_ACTIVATION(sequence_softmax)
 private:
 ACTIVATION_CLASS_NAME(softmax) softmax_;
@@ -156,8 +175,7 @@ void backward(Argument& act) {
 END_DEFINE_ACTIVATION(sequence_softmax)
 
 /**
- * Relu Activation.
- *
+ * @brief Relu Activation.
  * forward. y = max(0, z)
  *
  * derivative of relu is:
@@ -173,7 +191,7 @@ void backward(Argument& act) { act.grad->reluDerivative(*act.value); }
 END_DEFINE_ACTIVATION(relu)
 
 /**
- * BRelu Activation.
+ * @brief BRelu Activation.
  *
  * forward. y = min(24, max(0, z))
  *
@@ -192,9 +210,10 @@ void backward(Argument& act) { act.grad->breluDerivative(*act.value); }
 END_DEFINE_ACTIVATION(brelu)
 
 /**
- * tanh activation.
- *
+ * @brief Tanh Activation.
+ * \f[
  * f(z) = tanh(z)=\frac{e^z-e^{-z}}{e^z+e^{-z}}
+ * \f]
  */
 BEGIN_DEFINE_ACTIVATION(tanh)
 void forward(Argument& act) { act.value->tanh(*act.value); }
@@ -203,9 +222,10 @@ void backward(Argument& act) { act.grad->tanhDerivative(*act.value); }
 END_DEFINE_ACTIVATION(tanh)
 
 /**
- * Scaled Tanh Activation
- *
+ * @brief Scaled Tanh Activation
+ * \f[
  * f(z) = 1.7159 * tanh(2/3*z)
+ * \f]
  */
 BEGIN_DEFINE_ACTIVATION(stanh)
 private:
@@ -221,9 +241,10 @@ void backward(Argument& act) {
 END_DEFINE_ACTIVATION(stanh)
 
 /**
- * Soft relu activation.
- *
+ * @brief Soft Relu Activation.
+ * \f[
  * f(z) = ln(1+e^z)
+ * \f]
  */
 BEGIN_DEFINE_ACTIVATION(softrelu)
 void forward(Argument& act) { act.value->softrelu(*act.value); }
@@ -232,8 +253,7 @@ void backward(Argument& act) { act.grad->softreluDerivative(*act.value); }
 END_DEFINE_ACTIVATION(softrelu)
 
 /**
- * Abs Activation.
- *
+ * @brief Abs Activation.
  * Forward: f(z) = abs(z)
  *
  * Derivative:
@@ -258,9 +278,10 @@ void backward(Argument& act) { act.grad->absDerivative(*act.in); }
 END_DEFINE_ACTIVATION(abs)
 
 /**
- * Square Activation.
- *
+ * @brief Square Activation.
+ * \f[
  * f(z) = z^2.
+ * \f]
  */
 BEGIN_DEFINE_ACTIVATION(square)
 void forward(Argument& act) {
@@ -274,7 +295,12 @@ void forward(Argument& act) {
 
 void backward(Argument& act) { act.grad->squareDerivative(*act.in); }
 END_DEFINE_ACTIVATION(square)
-
+/**
+ * @brief Exponential Activation.
+ * \f[
+ * f(z) = e^z
+ * \f]
+ */
 BEGIN_DEFINE_ACTIVATION(exponential)
 void forward(Argument& act) { act.value->exp(*act.value); }
 
diff --git a/paddle/gserver/activations/ActivationFunction.h b/paddle/gserver/activations/ActivationFunction.h
index 0c5eddfc8a..29860b4a73 100644
--- a/paddle/gserver/activations/ActivationFunction.h
+++ b/paddle/gserver/activations/ActivationFunction.h
@@ -17,7 +17,18 @@ limitations under the License. */
 #include <string>
 
 namespace paddle {
+
 struct Argument;
+/**
+ * @brief Activation function is a function that transforms a set of input
+ * signals into an output signals. The purpose of the activation function
+ * is to introduce non-liearilty into the network.
+ *
+ * @note Common activation function are provieded, including linear,
+ * sigmoid, softmax, sequence_max, relu, brelu, tanh, stanh,
+ * softrelu, abs, square, exponential.
+ *
+ */
 class ActivationFunction {
 public:
   static ActivationFunction* create(const std::string& type);
@@ -26,16 +37,25 @@ public:
 
   virtual ~ActivationFunction() {}
 
-  // act.value <- f(act.value),
-  // where f is the activation function.
-  // Suppose that before calling forward(), act.value is x and
-  // after forward() is called, act.value is y, then y = f(x),
-  // Usually, act is Layer::output_
+  /**
+   * @brief Foward propagation
+   *
+   * act.value <- f(act.value),
+   * where f is the activation function.
+   * Suppose that before calling forward(), act.value is x and
+   * after forward() is called, act.value is y, then y = f(x).
+   *
+   * Usually, act is Layer::output_
+   */
   virtual void forward(Argument& act) = 0;
 
-  // x and y are defined in the above comment for forward().
-  // Before calling backward(), act.grad = dE / dy, where E is the error/cost.
-  // After backward() returns, act.grad = dE / dx = (dE/dy) * (dy/dx)
+  /**
+   * @brief Backward propagaion
+   *
+   * x and y are defined in the above comment for forward().
+   * - Before calling backward(), act.grad = dE / dy, where E is the error/cost
+   * - After backward() returns, act.grad = dE / dx = (dE/dy) * (dy/dx)
+   */
   virtual void backward(Argument& act) = 0;
 
   virtual const std::string& getName() const = 0;
diff --git a/paddle/gserver/dataproviders/DataProvider.h b/paddle/gserver/dataproviders/DataProvider.h
index fb404405fb..aab5d93fca 100644
--- a/paddle/gserver/dataproviders/DataProvider.h
+++ b/paddle/gserver/dataproviders/DataProvider.h
@@ -41,7 +41,8 @@ limitations under the License. */
 namespace paddle {
 
 /**
- * @brief Macro for registering a data provider.
+ * @def REGISTER_DATA_PROVIDER
+ * @brief Macro for registering a data provider
  */
 #define REGISTER_DATA_PROVIDER(__type_name, __class_name)               \
   static InitFunction __reg_type_##__type_name([]() {                   \
@@ -52,37 +53,68 @@ class DataBatch;
 class BufferBatch;
 typedef std::shared_ptr<DataBatch> DataBatchPtr;
 typedef std::shared_ptr<BufferBatch> BufferBatchPtr;
-
+/**
+ * @brief Data for batch training a neural network
+ */
 class DataBatch {
 public:
   DataBatch() : size_(0) { data_.clear(); }
-
+  /**
+   * @brief Get batch size
+   * @return batch size
+   */
   int64_t getSize() const { return size_; }
-
+  /**
+   * @brief Get num of sequences of sequence data
+   * @return num of sequences
+   */
   int64_t getNumSequences() const {
     if (data_.empty()) return size_;
     return data_[0].sequenceStartPositions
                ? data_[0].sequenceStartPositions->getSize() - 1
                : size_;
   }
-
+  /**
+   * @brief Set batch size
+   * @param[in] size size
+   */
   void setSize(int64_t size) { size_ = size; }
-
+  /**
+   * @brief Get size of argument vector
+   * @return size of argument vector
+   * @note For usual supervised learning, input data and label is needed,
+   * then there will be two argument.
+   */
   int64_t getNumStreams() const { return data_.size(); }
 
+  /**
+   * @brief Get a argument with index i
+   * @param[in] i index in argument vector
+   * @return a argument with index i
+   */
   const Argument& getStream(int i) const { return data_[i]; }
-
+  /**
+   * @brief Get all argument
+   * @return an argument vector
+   */
   std::vector<Argument>& getStreams() { return data_; }
-
+  /**
+   * @brief Get all argument const
+   * @return an argument vector
+   */
   std::vector<Argument> getStreams() const { return data_; }
-
+  /**
+   * @brief Clear DataBatch
+   */
   void clear() {
     data_.clear();
     size_ = 0;
   }
 
   /**
-   * The order in which each data stream is appended must match the order
+   * @brief Append data to DataBatch
+   * @param[in] data  matrix data
+   * @note The order in which each data stream is appended must match the order
    * specified in stream_names of DataConfig. The stream_names can be obtained
    * using DataProvider::getStreamNames().
    */
@@ -93,7 +125,10 @@ public:
   }
 
   /**
-   * The order in which each data stream is appended must match the order
+   * @brief Append sequence data to DataBatch
+   * @param[in] data                      matrix data
+   * @param[in] sequenceStartPositions    sequence data
+   * @note The order in which each data stream is appended must match the order
    * specified in stream_names of DataConfig. The stream_names can be obtained
    * using DataProvider::getStreamNames().
    */
@@ -104,24 +139,32 @@ public:
     argu.sequenceStartPositions = sequenceStartPositions;
     data_.push_back(argu);
   }
-
+  /**
+   * @brief Append label data
+   * @param[in]  label    label data
+   * @param[in]  value    matrix data, default null
+   */
   void appendLabel(IVectorPtr label, MatrixPtr value = nullptr) {
     Argument argu;
     argu.ids = label;
     argu.value = value;
     data_.push_back(argu);
   }
-
+  /**
+   * @brief Append user defined data
+   * @param[in]  ptr     user defined data
+   */
   void appendUserDefinedPtr(UserDefinedVectorPtr ptr) {
     Argument argu;
     argu.udp = ptr;
     data_.push_back(argu);
   }
 
-  /**
-   * @param argus: DataBatch.getStreams()
-   * @param size: DataBatch.getSize()
-   * @param dataId: sub dataprovider id (in MultiDataProvider)
+  /*
+   * @brief Append argument
+   * @param[in]  argus   DataBatch.getStreams()
+   * @param[in]  size    DataBatch.getSize()
+   * @param[in]  dataId  sub dataprovider id (in MultiDataProvider)
    */
   void appendArguments(const std::vector<Argument>& argus, int size,
                        int dataId) {
@@ -133,7 +176,14 @@ public:
   }
 
 protected:
+  /**
+   * @brief batch size
+   */
   int64_t size_;
+  /**
+   * @brief A batch data consist of a Argument vector,
+   * An argument corresponds to a type of input data.
+   */
   std::vector<Argument> data_;
 };
 
@@ -228,8 +278,8 @@ protected:
 };
 
 /**
- * DataProvider supplies data for training
- * It can supplies multiple streams of data.
+ * @brief Base class for DataProvider, which supplies data for training
+ * @note It can supplies multiple streams of data.
  * For typical supervised training, there are two streams:
  * one is for input, one is for label.
  */
@@ -253,16 +303,23 @@ public:
   const DataConfig& getConfig() const { return config_; }
 
   void setSkipShuffle() { skipShuffle_ = true; }
+
+  /**
+   * @brief Get next batch of training samples
+   * @param[in]    size    size of training samples to get
+   * @param[out]   batch   a batch of training samples
+   * @return actual size of obtained training samples
+   */
   int64_t getNextBatch(int64_t size, DataBatch* batch);
 
   /**
-   * Shuffle the data set
+   * @brief Shuffle the data set
    */
   virtual void shuffle() = 0;
 
   /**
-   * reset() must be called before any calls to getNextBatch()
-   * reset all the value of index
+   * @brief reset all the value of index
+   * @note reset() must be called before any calls to getNextBatch()
    * IMPORTANT: subclass reset() should always call the base class reset()
    * at the end of the function
    */
@@ -274,10 +331,17 @@ public:
   }
 
   /**
-   * return the number of training samples in the data set.
-   * return -1 to indicate unlimited number of samples.
+   * @brief Get the size of training samples
+   * @return the number of training samples in the data set.
+   * @note return -1 to indicate unlimited number of samples.
    */
   virtual int64_t getSize() = 0;
+  /**
+   * @brief Get next batch training samples internally
+   * @param[in]    size      size of training samples to get
+   * @param[out]   batch     a batch of training samples
+   * @return actual size of obtained training samples
+   */
 
   virtual int64_t getNextBatchInternal(int64_t size, DataBatch* batch) = 0;
 
@@ -288,7 +352,12 @@ protected:
   bool useGpu_;
   std::unique_ptr<DoubleBuffer> doubleBuffer_;
   ThreadLocal<std::vector<MatrixPtr>> constantSlots_;
-
+  /**
+   * @@brief Get next batch training samples from buffer
+   * @param[in]    size      size of training samples to get
+   * @param[out]   batch     a batch of training samples
+   * @return actual size of obtained training samples
+   */
   int64_t getNextBatchFromBuffer(int64_t size, DataBatch* batch);
 
   void initAsyncLoader();
diff --git a/paddle/gserver/dataproviders/ProtoDataProvider.h b/paddle/gserver/dataproviders/ProtoDataProvider.h
index 02db5f870d..846dd7673a 100644
--- a/paddle/gserver/dataproviders/ProtoDataProvider.h
+++ b/paddle/gserver/dataproviders/ProtoDataProvider.h
@@ -26,8 +26,10 @@ limitations under the License. */
 namespace paddle {
 
 /**
- * @brief  Data file with each sample specified by proto message
- *         DataSample defined in DataFormat.proto.
+ * @brief Provider data from protobuf data file with each sample
+ * specified by proto message
+ *
+ * DataSample defined in DataFormat.proto.
  *
  * The file format is
  *
@@ -68,19 +70,38 @@ public:
   virtual int64_t getNextBatchInternal(int64_t size, DataBatch* batch);
 
 protected:
+  /**
+   * @brief load protobuf data from a list of file
+   * @param[in]  fileName  file name of a file which contains
+   * a list of file names
+   */
   void loadData(const std::string& fileName);
-  void loadDataFile(const std::string& fileName);
 
+  /**
+   * @brief load protobuf data from file
+   * @param[in]  fileName   data file name
+   */
+  void loadDataFile(const std::string& fileName);
+  /** @brief check data header of each data sample
+   *  @param[in] header     data header read from protobuf data
+   */
   void checkDataHeader(const DataHeader& header);
+  /**
+   * @brief fill protobuf data into slot_,
+   * slot_ is a vector of ProtoSlot in memory.
+   * @param[in]  sample     data sample read from protobuf data
+   */
   void fillSlots(const DataSample& sample);
 
   /**
-   * return true if each sample is one sequence, i.e., independent
+   * @brief return true if each sample is one sequence, i.e., independent
    * of other samples.
    */
   inline bool iidData() const { return sequenceStartPositions_.empty(); }
 
-  /// check that sample is consistent with header_
+  /**
+   * @brief check that sample is consistent with header_
+   */
   void checkSample(const DataSample& sample);
 
   template <class Op>
@@ -129,20 +150,21 @@ protected:
 
   int64_t currentSequenceIndex_;
 
-  /// The size should be the number of sequences.
+  // The size should be the number of sequences.
   std::vector<size_t> shuffledSequenceIds_;
 
   ThreadLocalD<DataBatch> cpuBatch_;
   ThreadLocalD<DataBatch> gpuBatch_;
 
   RWLock lock_;
-  // stats for number of none-zeros entries
-  std::vector<StatPtr> nnzStats_;
+  std::vector<StatPtr> nnzStats_;  // stats for number of none-zeros entries
 };
 
 /**
- * Special use for Proto data: instances should contain sparse-non-value slots
- * and label. ProtoSequenceDataProvider treats each SPARSE SLOT as a SEQUENCE
+ * @brief Special use for Proto data: instances should contain sparse-non-value slots
+ * and label.
+ *
+ * @note ProtoSequenceDataProvider treats each SPARSE SLOT as a SEQUENCE
  */
 class ProtoSequenceDataProvider : public ProtoDataProvider {
 public:
diff --git a/paddle/gserver/evaluators/Evaluator.cpp b/paddle/gserver/evaluators/Evaluator.cpp
index a50eecdbb7..7bdcdaae53 100644
--- a/paddle/gserver/evaluators/Evaluator.cpp
+++ b/paddle/gserver/evaluators/Evaluator.cpp
@@ -33,7 +33,11 @@ void Evaluator::eval(const NeuralNetwork& nn) {
   totalScore_ += score;
   updateSamplesNum(arguments);
 }
-
+/**
+ * @brief classification error Evaluator
+ *
+ * The config file api is classification_error_evaluator.
+ */
 class ClassificationErrorEvaluator : public Evaluator {
 public:
   virtual void updateSamplesNum(const std::vector<Argument>& arguments) {
@@ -99,8 +103,11 @@ public:
   }
 };
 
-// sequence level classification error stats:
-//   if any frame in one sequence has error, the sequence is error
+/**
+ * @brief sequence classification error Evaluator
+ * @note sequence level classification error stats,
+ * if any frame in one sequence has error, the sequence is error
+ */
 class SequenceClassificationErrorEvaluator
     : public ClassificationErrorEvaluator {
 public:
@@ -135,7 +142,12 @@ public:
 };
 REGISTER_EVALUATOR(seq_classification_error,
                    SequenceClassificationErrorEvaluator);
-
+/**
+ * @brief sum Evaluator
+ * Calculate the sum of output or label
+ *
+ * The config file api is sum_evaluator.
+ */
 class SumEvaluator : public Evaluator {
 public:
   SumEvaluator() : cpuLabel_(nullptr), cpuWeight_(nullptr) {}
@@ -218,13 +230,18 @@ private:
   IVectorPtr cpuLabel_;
   MatrixPtr cpuWeight_;
 };
-
+/**
+ * @brief column sum Evaluator
+ * @note column sum for the colIdx-th column *
+ * - colIdx = 0: the 0-th column.
+ * - colIdx > 0: the colIdx-th column.
+ * - colIdx < 0: the last colIdx-th column.
+ *
+ * The config file api is column_sum_evaluator.
+ *
+ */
 class ColumnSumEvaluator : public Evaluator {
 public:
-  // column sum for the colIdx-th column
-  // colIdx = 0: the 0-th column
-  //         > 0: the colIdx-th column
-  //         < 0: the last colIdx-th column
   explicit ColumnSumEvaluator(int32_t colIdx)
       : colIdx_(colIdx), colNum_(0), sum_(nullptr) {}
 
@@ -845,7 +862,11 @@ Evaluator* Evaluator::create(const EvaluatorConfig& config) {
   evaluator->init(config);
   return evaluator;
 }
-
+/**
+ * @brief print value of each layer.
+ *
+ * The config file api is value_printer_evaluator.
+ */
 class ValuePrinter : public Evaluator {
 public:
   ValuePrinter() {}
@@ -882,7 +903,11 @@ public:
   virtual real evalImp(std::vector<Argument>& arguments) { return 0; }
 };
 REGISTER_EVALUATOR(value_printer, ValuePrinter);
-
+/**
+ * @brief print gradient of each layer.
+ *
+ * The config file api is gradient_printer_evaluator.
+ */
 class GradientPrinter : public Evaluator {
 public:
   GradientPrinter() {}
@@ -908,7 +933,11 @@ public:
   virtual real evalImp(std::vector<Argument>& arguments) { return 0; }
 };
 REGISTER_EVALUATOR(gradient_printer, GradientPrinter);
-
+/**
+ * @brief print row max id vctor of each layer
+ *
+ * The config file api is maxid_printer_evaluator.
+ */
 class MaxIdPrinter : public Evaluator {
 private:
   IVectorPtr maxIds_;
@@ -946,7 +975,11 @@ public:
   virtual real evalImp(std::vector<Argument>& arguments) { return 0; }
 };
 REGISTER_EVALUATOR(max_id_printer, MaxIdPrinter);
-
+/**
+ * @brief print sequence max frames of each layer
+ *
+ * The config file api is maxframe_printer_evaluator.
+ */
 class MaxFramePrinter : public Evaluator {
 private:
   IVectorPtr maxIds_;
@@ -998,30 +1031,29 @@ public:
 REGISTER_EVALUATOR(max_frame_printer, MaxFramePrinter);
 
 /**
- * Sequence text printer will print text according to index matrix and a
- * dictionary. There can be multiple input to this layer:
+ * @brief print text according to index matrix and a dictionary.
  *
- *   1) If there is only one input, the input must be a matrix containing
+ * There can be multiple input to this layer:
+ * - If there is only one input, the input must be a matrix containing
  *      the sequence of indices;
- *
- *   2) If there are more than one input, the first input should be ids,
+ * - If there are more than one input, the first input should be ids,
  *      and are interpreted as sample ids.
  *
  * The output format will be:
  *
- *   1) sequence without sub-sequence, and there is probability.
+ * - sequence without sub-sequence, and there is probability.
  *
  *     @code
  *      id \t prob space_seperated_tokens_from_dictionary_according_to_seq
  *     @endcode
  *
- *   2) sequence without sub-sequence, and there is not probability.
+ * - sequence without sub-sequence, and there is not probability.
  *
  *     @code
  *      id \t space_seperated_tokens_from_dictionary_according_to_seq
  *     @endcode
  *
- *   3) sequence with sub-sequence, and there is not probability.
+ * - sequence with sub-sequence, and there is not probability.
  *
  *     @code
  *      id \t space_seperated_tokens_from_dictionary_according_to_sub_seq
@@ -1032,6 +1064,8 @@ REGISTER_EVALUATOR(max_frame_printer, MaxFramePrinter);
  * Typically SequenceTextPrinter layer takes output of maxid or RecurrentGroup
  * with maxid (when generating) as an input.
  *
+ * The config file api is seqtext_printer_evaluator.
+ *
  */
 class SequenceTextPrinter : public Evaluator {
 private:
@@ -1172,7 +1206,11 @@ public:
   }
 };
 REGISTER_EVALUATOR(seq_text_printer, SequenceTextPrinter);
-
+/**
+ * @brief print classification error.
+ *
+ * The config file api is classification_error_printer_evaluator.
+ */
 class ClassificationErrorPrinter : public ClassificationErrorEvaluator {
 public:
   virtual void updateSamplesNum(const std::vector<Argument>& arguments) {}
diff --git a/paddle/gserver/evaluators/Evaluator.h b/paddle/gserver/evaluators/Evaluator.h
index eee785e0e3..b79a539384 100644
--- a/paddle/gserver/evaluators/Evaluator.h
+++ b/paddle/gserver/evaluators/Evaluator.h
@@ -24,12 +24,21 @@ limitations under the License. */
 namespace paddle {
 
 class NeuralNetwork;
+/**
+ * @def REGISTER_EVALUATOR
+ * @brief Macro for registering evaluator class
+ */
 
 #define REGISTER_EVALUATOR(__type_name, __class_name)                \
   static InitFunction __reg_type_##__type_name([]() {                \
     Evaluator::registrar_.registerClass<__class_name>(#__type_name); \
   })
-
+/**
+ * @brief Base class for Evaluator
+ * Evaluating the performance of a model is very important.
+ * It indicates how successful the scores(predictions) of a datasets
+ * has been by a trained model.
+ */
 class Evaluator {
 public:
   static Evaluator* create(const EvaluatorConfig& config);
@@ -41,7 +50,7 @@ public:
   virtual void init(const EvaluatorConfig& config) { config_ = config; }
 
   /**
-   * start to evaluate some data
+   * @brief start to evaluate some data
    */
   virtual void start() {
     numSamples_ = 0;
@@ -49,20 +58,21 @@ public:
   }
 
   /**
-   * Process a batch of data.
+   * @brief Process a batch of data.
    */
   virtual void eval(const NeuralNetwork& nn);
 
   /**
-   * Process a batch of data.
-   * return the score for the batch if it make sense to sum the score across
-   * batches. Otherwise evaluator should return 0 and override finish() and
+   * @brief Process a batch of data.
+   * @return the score for the batch if it make sense to sum the score across
+   * batches.
+   * @note Otherwise evaluator should return 0 and override finish() and
    * printStats() to do the right calculation.
    */
   virtual real evalImp(std::vector<Argument>& arguments) = 0;
 
   /**
-   * Update the number of processed samples
+   * @brief Update the number of processed samples
    */
   virtual void updateSamplesNum(const std::vector<Argument>& arguments) {
     numSamples_ += arguments[0].getBatchSize();
@@ -81,11 +91,14 @@ public:
   }
 
   /**
-   * finish the evaluation.
+   * @brief finish the evaluation.
    */
   virtual void finish() {}
 
-  /// finish() should be called before printStats
+  /**
+   * @brief print the statistics of evaluate result
+   * @note finish() should be called before printStats
+   */
   virtual void printStats(std::ostream& os) {
     os << config_.name() << "="
        << (numSamples_ ? totalScore_ / numSamples_ : 0);
@@ -124,17 +137,23 @@ public:
   virtual void finish() {}
   virtual void printStats(std::ostream&) {}
 };
-
+/**
+ * @brief evaluate AUC using colIdx-th column as prediction.
+ * The AUC(Area Under the Curve) is a common evaluation metric
+ * for binary classification problems. It computes the area under
+ * the receiver operating characteristic(ROC) curve.
+ *
+ * @note colIdx-th column
+ *
+ * - colIdx = 0: the 0-th column.
+ * - colIdx > 0: the colIdx-th column.
+ * - colIdx < 0: the last colIdx-th column.
+ *
+ * The config file api is auc_evaluator.
+ *
+ */
 class AucEvaluator : public Evaluator {
 public:
-  /**
-   * @brief evaluate AUC using colIdx-th column as prediction.
-   *
-   * - colIdx = 0: the 0-th column.
-   * - colIdx > 0: the colIdx-th column.
-   * - colIdx < 0: the last colIdx-th column.
-   *
-   */
   AucEvaluator(int32_t colIdx)
       : colIdx_(colIdx),
         realColumnIdx_(0),
@@ -174,13 +193,11 @@ private:
 };
 
 /**
- * @brief RankAucEvaluator calculates the AUC of each list
- * (i.e., titles under the same query), and averages them.
- *
- * Each list should be organized as a sequence.
- * The inputs of this evaluator is [output, click, pv].
- * If pv is not provided, it will be set to 1.
- * The types of click and pv are dense value.
+ * @brief RankAucEvaluator calculates the AUC of each list (i.e., titles
+ * under the same query), and averages them. Each list should be organized
+ * as a sequence. The inputs of this evaluator is [output, click, pv]. If pv
+ * is not provided, it will be set to 1. The types of click and pv are
+ * dense value.
  */
 class RankAucEvaluator : public Evaluator {
 public:
@@ -204,7 +221,16 @@ private:
   double calcRankAuc(real* outputData, real* clickData, real* pvData,
                      size_t size);
 };
-
+/**
+ * @brief precision, recall and f1 score Evaluator
+ * \f[
+ * precision = \frac{tp}{tp+tn} \\
+ * recall=\frac{tp}{tp+fn} \\
+ * f1=2*\frac{precsion*recall}{precision+recall}
+ * \f]
+ *
+ * The config file api is precision_recall_evaluator.
+ */
 class PrecisionRecallEvaluator : public Evaluator {
 public:
   // Evaluate precision, recall and F1 score
@@ -274,8 +300,10 @@ private:
   }
 };
 
-/**
- * Positive-negative pair rate Evaluator
+/*
+ * @brief positive-negative pair rate Evaluator
+ *
+ * The config file api is pnpair_evaluator.
  */
 class PnpairEvaluator : public Evaluator {
 public:
diff --git a/paddle/gserver/layers/CosSimLayer.h b/paddle/gserver/layers/CosSimLayer.h
index 65c6fa8280..9b0e53335b 100644
--- a/paddle/gserver/layers/CosSimLayer.h
+++ b/paddle/gserver/layers/CosSimLayer.h
@@ -20,7 +20,19 @@ limitations under the License. */
 #include "paddle/utils/ThreadLocal.h"
 
 namespace paddle {
-
+/**
+ * @brief A layer for calculating cosine similarity between two vector
+ * \f[
+ * f(x,y)=scale\frac{x_1y_1+x_2y_2+...+x_ny_n}{\sqrt{x_1^2+x_2^2+...
+ * +x_n^2}\sqrt{y_1^2+y_2^2+...+y_n^2}}
+ * \f]
+ *
+ * - Input1: A vector (batchSize * dataDim) *
+ * - Input2: A vector (batchSize * dataDim) or (1 * dataDim) *
+ * - Output: A vector (dataDim * 1)
+ *
+ * The config file api is cos_sim.
+ */
 class CosSimLayer : public Layer {
 public:
   explicit CosSimLayer(const LayerConfig& config)
diff --git a/paddle/gserver/layers/CosSimVecMatLayer.cpp b/paddle/gserver/layers/CosSimVecMatLayer.cpp
index 773d35c0f0..7d251ace6f 100644
--- a/paddle/gserver/layers/CosSimVecMatLayer.cpp
+++ b/paddle/gserver/layers/CosSimVecMatLayer.cpp
@@ -21,13 +21,16 @@ limitations under the License. */
 namespace paddle {
 
 /**
- * A layer for computing cosine similarity between a vector an each row of a
- * matrix,
+ * @brief A layer for computing cosine similarity between a vector
+ * and each row of a matrix
  * out[i] = cos_scale * cos(in1, in2(i,:));
- * which is used in NEURAL TURING MACHINE
- * Input: a vector (batchSize x dataDim) and a matrix in vec form (batchSize x
- * (weightDim*dataDim))
- * Output: a vector (batchSize x weightDim)
+ * @note used in NEURAL TURING MACHINE
+ *
+ * Input1: a vector (batchSize * dataDim)
+ *
+ * Input2: a matrix in vector form (batchSize * (weightDim*dataDim))
+ *
+ * Output: a vector (batchSize * weightDim)
  */
 
 class CosSimVecMatLayer : public Layer {
diff --git a/paddle/gserver/layers/DataNormLayer.h b/paddle/gserver/layers/DataNormLayer.h
index 3149736768..232c73f034 100644
--- a/paddle/gserver/layers/DataNormLayer.h
+++ b/paddle/gserver/layers/DataNormLayer.h
@@ -22,18 +22,18 @@ limitations under the License. */
 namespace paddle {
 
 /**
- * A layer for data normalization
- * Input: One and only one input layer is accepted. The input layer must
+ * @brief A layer for data normalization
+ * - Input: One and only one input layer is accepted. The input layer must
  *        be DataLayer with dense data type.
- * Output: The normalization of the input data
+ * - Output: The normalization of the input data
  *
  * Reference:
  *    LA Shalabi, Z Shaaban, B Kasasbeh. Data mining: A preprocessing engine
  *
  * Three data normalization methoeds are considered
- *    z-score: y = (x-mean)/std
- *    min-max: y = (x-min)/(max-min)
- *    decimal-scaling: y = x/10^j, where j is the smallest integer such that
+ * - z-score: y = (x-mean)/std
+ * - min-max: y = (x-min)/(max-min)
+ * - decimal-scaling: y = x/10^j, where j is the smallest integer such that
  *max(|y|)<1
  */
 
diff --git a/paddle/gserver/layers/NormLayer.h b/paddle/gserver/layers/NormLayer.h
index 89bd23dae1..2b05be6fcb 100644
--- a/paddle/gserver/layers/NormLayer.h
+++ b/paddle/gserver/layers/NormLayer.h
@@ -23,8 +23,9 @@ limitations under the License. */
 namespace paddle {
 
 /**
- * @brief basic parent layer of normalization
- * Normalize the input in local region
+ * @brief Basic parent layer of normalization
+ *
+ * @note Normalize the input in local region
  */
 class NormLayer : public Layer {
 public:
@@ -35,7 +36,9 @@ public:
     return true;
   }
 
-  // create norm layer by norm_type
+  /**
+   * @brief create norm layer by norm_type
+   */
   static Layer* create(const LayerConfig& config);
 };
 
diff --git a/paddle/gserver/layers/OuterProdLayer.cpp b/paddle/gserver/layers/OuterProdLayer.cpp
index 307b70dc1a..708c901ba9 100644
--- a/paddle/gserver/layers/OuterProdLayer.cpp
+++ b/paddle/gserver/layers/OuterProdLayer.cpp
@@ -21,10 +21,11 @@ limitations under the License. */
 namespace paddle {
 
 /**
- * A layer for computing the outer product of two vectors,
- * which is used in NEURAL TURING MACHINE
- * Input: two vectors: batchSize x dim1, batchSize x dim2
- * Output: a matrix: (batchSize x (dim1*dim2))
+ * @brief A layer for computing the outer product of two vectors
+ * @note used in NEURAL TURING MACHINE
+ * Input1: vector (batchSize * dim1)
+ * Input2: vector (batchSize * dim2)
+ * Output: a matrix: (batchSize * (dim1*dim2))
  */
 
 class OuterProdLayer : public Layer {
diff --git a/paddle/gserver/layers/PoolLayer.h b/paddle/gserver/layers/PoolLayer.h
index 1c649bc66a..b7a1dfd763 100644
--- a/paddle/gserver/layers/PoolLayer.h
+++ b/paddle/gserver/layers/PoolLayer.h
@@ -22,7 +22,7 @@ limitations under the License. */
 namespace paddle {
 
 /**
- * @brief basic parent layer of pooling
+ * @brief Basic parent layer of pooling
  * Pools the input within regions
  */
 class PoolLayer : public Layer {
@@ -41,7 +41,9 @@ protected:
 public:
   explicit PoolLayer(const LayerConfig& config) : Layer(config) {}
 
-  // create pooling layer by pool_type
+  /**
+   * @brief create pooling layer by pool_type
+   */
   static Layer* create(const LayerConfig& config);
 
   virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap);
diff --git a/paddle/gserver/layers/PoolProjectionLayer.h b/paddle/gserver/layers/PoolProjectionLayer.h
index ce321946b1..42bbc83c62 100644
--- a/paddle/gserver/layers/PoolProjectionLayer.h
+++ b/paddle/gserver/layers/PoolProjectionLayer.h
@@ -20,7 +20,9 @@ limitations under the License. */
 #include <vector>
 
 namespace paddle {
-
+/**
+ * @brief Basic parent layer of different kinds of pooling
+ */
 class PoolProjectionLayer : public PoolLayer {
 protected:
   size_t imgSizeH_, imgSizeW_;
@@ -30,7 +32,9 @@ public:
   size_t getSize();
   explicit PoolProjectionLayer(const LayerConfig& config) : PoolLayer(config) {}
 };
-
+/**
+ * @brief A layer for max pooling
+ */
 class MaxPoolProjectionLayer : public PoolProjectionLayer {
 public:
   explicit MaxPoolProjectionLayer(const LayerConfig& config)
@@ -41,7 +45,9 @@ public:
   virtual void forward(PassType passType);
   virtual void backward(const UpdateCallback& callback = nullptr);
 };
-
+/**
+ * @brief A layer for average pooling
+ */
 class AvgPoolProjectionLayer : public PoolProjectionLayer {
 public:
   explicit AvgPoolProjectionLayer(const LayerConfig& config)
diff --git a/paddle/gserver/layers/ResizeLayer.cpp b/paddle/gserver/layers/ResizeLayer.cpp
index df3a7fb126..dc573e838f 100644
--- a/paddle/gserver/layers/ResizeLayer.cpp
+++ b/paddle/gserver/layers/ResizeLayer.cpp
@@ -18,7 +18,12 @@ limitations under the License. */
 #include "paddle/math/BaseMatrix.h"
 
 namespace paddle {
-/* resize a minibatch matrix h*w to h'*w' */
+/**
+ * @brief A layer for resizing a minibatch matrix h*w to h'*w'
+ * @note
+ * origin matrix height * witdth)
+ * resize matrix: (height * width / size) * size
+ */
 class ResizeLayer : public Layer {
 public:
   explicit ResizeLayer(const LayerConfig& config) : Layer(config) {}
-- 
GitLab