提交 66be6fed 编写于 作者: Q qijun

add some source code comments

ISSUE=4592951

git-svn-id: https://svn.baidu.com/idl/trunk/paddle@1447 1ad973e4-5ce8-4261-8a94-b56d1f490c56
上级 ff496cd4
Activations
=============
.. doxygenfile:: paddle/gserver/activations/ActivationFunction.h
.. doxygenfile:: paddle/gserver/activations/ActivationFunction.cpp
.. doxygenclass:: paddle::ActivationFunction
:members:
......@@ -28,8 +28,17 @@ limitations under the License. */
namespace paddle {
static ClassRegistrar<ActivationFunction> gActivationRegistrar;
/**
* @def ACTIVATION_CLASS_NAME
* @brief Macro for getting derived activation class name
* @note ACTIVATION_CLASS_NAME(softmax) softmax_;
* means softmaxActivation softmax_;
*/
#define ACTIVATION_CLASS_NAME(ACTIVATION_NAME) ACTIVATION_NAME##Activation
/**
* @def BEGIN_DEFINE_ACTIVATION
* @brief Macro for defining a devried activation class
*/
#define BEGIN_DEFINE_ACTIVATION(ACTIVATION_NAME) \
class ACTIVATION_CLASS_NAME(ACTIVATION_NAME) : public ActivationFunction { \
private: \
......@@ -37,7 +46,10 @@ static ClassRegistrar<ActivationFunction> gActivationRegistrar;
\
public: \
const std::string& getName() const { return name; }
/**
* @def END_DEFINE_ACTIVATION
* @brief Macro for registering a derived activation class
*/
#define END_DEFINE_ACTIVATION(ACTIVATION_NAME) \
}; \
const std::string ACTIVATION_CLASS_NAME(ACTIVATION_NAME)::name = \
......@@ -66,9 +78,10 @@ static InitFunction __reg_activation__identity([] {
});
/**
* SigmoidActivation
*
* @brief Sigmoid Activation
* \f[
* f(z) = \frac{1}{1+exp(-z)}
* \f]
*/
BEGIN_DEFINE_ACTIVATION(sigmoid)
void forward(Argument& act) { act.value->sigmoid(*act.value); }
......@@ -76,8 +89,10 @@ void backward(Argument& act) { act.grad->sigmoidDerivative(*act.value); }
END_DEFINE_ACTIVATION(sigmoid)
/**
* Do Softmax activation for all sample.
* @brief Softmax Activation
* \f[
* P(y=j|x) = \frac{e^{x^Tw_j}}{\sum^K_{k=1}e^{x^Tw_k}}
* \f]
*/
BEGIN_DEFINE_ACTIVATION(softmax)
private:
......@@ -115,8 +130,12 @@ void backward(Argument& act) {
}
END_DEFINE_ACTIVATION(softmax)
/// Softmax on all frames of one sequence.
/// Width of frame must be one.
/**
* @brief Sequence_softmax Activation
* @note Softmax on all frames of one sequence.
* Width of frame must be one.
*/
BEGIN_DEFINE_ACTIVATION(sequence_softmax)
private:
ACTIVATION_CLASS_NAME(softmax) softmax_;
......@@ -156,8 +175,7 @@ void backward(Argument& act) {
END_DEFINE_ACTIVATION(sequence_softmax)
/**
* Relu Activation.
*
* @brief Relu Activation.
* forward. y = max(0, z)
*
* derivative of relu is:
......@@ -173,7 +191,7 @@ void backward(Argument& act) { act.grad->reluDerivative(*act.value); }
END_DEFINE_ACTIVATION(relu)
/**
* BRelu Activation.
* @brief BRelu Activation.
*
* forward. y = min(24, max(0, z))
*
......@@ -192,9 +210,10 @@ void backward(Argument& act) { act.grad->breluDerivative(*act.value); }
END_DEFINE_ACTIVATION(brelu)
/**
* tanh activation.
*
* @brief Tanh Activation.
* \f[
* f(z) = tanh(z)=\frac{e^z-e^{-z}}{e^z+e^{-z}}
* \f]
*/
BEGIN_DEFINE_ACTIVATION(tanh)
void forward(Argument& act) { act.value->tanh(*act.value); }
......@@ -203,9 +222,10 @@ void backward(Argument& act) { act.grad->tanhDerivative(*act.value); }
END_DEFINE_ACTIVATION(tanh)
/**
* Scaled Tanh Activation
*
* @brief Scaled Tanh Activation
* \f[
* f(z) = 1.7159 * tanh(2/3*z)
* \f]
*/
BEGIN_DEFINE_ACTIVATION(stanh)
private:
......@@ -221,9 +241,10 @@ void backward(Argument& act) {
END_DEFINE_ACTIVATION(stanh)
/**
* Soft relu activation.
*
* @brief Soft Relu Activation.
* \f[
* f(z) = ln(1+e^z)
* \f]
*/
BEGIN_DEFINE_ACTIVATION(softrelu)
void forward(Argument& act) { act.value->softrelu(*act.value); }
......@@ -232,8 +253,7 @@ void backward(Argument& act) { act.grad->softreluDerivative(*act.value); }
END_DEFINE_ACTIVATION(softrelu)
/**
* Abs Activation.
*
* @brief Abs Activation.
* Forward: f(z) = abs(z)
*
* Derivative:
......@@ -258,9 +278,10 @@ void backward(Argument& act) { act.grad->absDerivative(*act.in); }
END_DEFINE_ACTIVATION(abs)
/**
* Square Activation.
*
* @brief Square Activation.
* \f[
* f(z) = z^2.
* \f]
*/
BEGIN_DEFINE_ACTIVATION(square)
void forward(Argument& act) {
......@@ -274,7 +295,12 @@ void forward(Argument& act) {
void backward(Argument& act) { act.grad->squareDerivative(*act.in); }
END_DEFINE_ACTIVATION(square)
/**
* @brief Exponential Activation.
* \f[
* f(z) = e^z
* \f]
*/
BEGIN_DEFINE_ACTIVATION(exponential)
void forward(Argument& act) { act.value->exp(*act.value); }
......
......@@ -17,7 +17,18 @@ limitations under the License. */
#include <string>
namespace paddle {
struct Argument;
/**
* @brief Activation function is a function that transforms a set of input
* signals into an output signals. The purpose of the activation function
* is to introduce non-liearilty into the network.
*
* @note Common activation function are provieded, including linear,
* sigmoid, softmax, sequence_max, relu, brelu, tanh, stanh,
* softrelu, abs, square, exponential.
*
*/
class ActivationFunction {
public:
static ActivationFunction* create(const std::string& type);
......@@ -26,16 +37,25 @@ public:
virtual ~ActivationFunction() {}
// act.value <- f(act.value),
// where f is the activation function.
// Suppose that before calling forward(), act.value is x and
// after forward() is called, act.value is y, then y = f(x),
// Usually, act is Layer::output_
/**
* @brief Foward propagation
*
* act.value <- f(act.value),
* where f is the activation function.
* Suppose that before calling forward(), act.value is x and
* after forward() is called, act.value is y, then y = f(x).
*
* Usually, act is Layer::output_
*/
virtual void forward(Argument& act) = 0;
// x and y are defined in the above comment for forward().
// Before calling backward(), act.grad = dE / dy, where E is the error/cost.
// After backward() returns, act.grad = dE / dx = (dE/dy) * (dy/dx)
/**
* @brief Backward propagaion
*
* x and y are defined in the above comment for forward().
* - Before calling backward(), act.grad = dE / dy, where E is the error/cost
* - After backward() returns, act.grad = dE / dx = (dE/dy) * (dy/dx)
*/
virtual void backward(Argument& act) = 0;
virtual const std::string& getName() const = 0;
......
......@@ -41,7 +41,8 @@ limitations under the License. */
namespace paddle {
/**
* @brief Macro for registering a data provider.
* @def REGISTER_DATA_PROVIDER
* @brief Macro for registering a data provider
*/
#define REGISTER_DATA_PROVIDER(__type_name, __class_name) \
static InitFunction __reg_type_##__type_name([]() { \
......@@ -52,37 +53,68 @@ class DataBatch;
class BufferBatch;
typedef std::shared_ptr<DataBatch> DataBatchPtr;
typedef std::shared_ptr<BufferBatch> BufferBatchPtr;
/**
* @brief Data for batch training a neural network
*/
class DataBatch {
public:
DataBatch() : size_(0) { data_.clear(); }
/**
* @brief Get batch size
* @return batch size
*/
int64_t getSize() const { return size_; }
/**
* @brief Get num of sequences of sequence data
* @return num of sequences
*/
int64_t getNumSequences() const {
if (data_.empty()) return size_;
return data_[0].sequenceStartPositions
? data_[0].sequenceStartPositions->getSize() - 1
: size_;
}
/**
* @brief Set batch size
* @param[in] size size
*/
void setSize(int64_t size) { size_ = size; }
/**
* @brief Get size of argument vector
* @return size of argument vector
* @note For usual supervised learning, input data and label is needed,
* then there will be two argument.
*/
int64_t getNumStreams() const { return data_.size(); }
/**
* @brief Get a argument with index i
* @param[in] i index in argument vector
* @return a argument with index i
*/
const Argument& getStream(int i) const { return data_[i]; }
/**
* @brief Get all argument
* @return an argument vector
*/
std::vector<Argument>& getStreams() { return data_; }
/**
* @brief Get all argument const
* @return an argument vector
*/
std::vector<Argument> getStreams() const { return data_; }
/**
* @brief Clear DataBatch
*/
void clear() {
data_.clear();
size_ = 0;
}
/**
* The order in which each data stream is appended must match the order
* @brief Append data to DataBatch
* @param[in] data matrix data
* @note The order in which each data stream is appended must match the order
* specified in stream_names of DataConfig. The stream_names can be obtained
* using DataProvider::getStreamNames().
*/
......@@ -93,7 +125,10 @@ public:
}
/**
* The order in which each data stream is appended must match the order
* @brief Append sequence data to DataBatch
* @param[in] data matrix data
* @param[in] sequenceStartPositions sequence data
* @note The order in which each data stream is appended must match the order
* specified in stream_names of DataConfig. The stream_names can be obtained
* using DataProvider::getStreamNames().
*/
......@@ -104,24 +139,32 @@ public:
argu.sequenceStartPositions = sequenceStartPositions;
data_.push_back(argu);
}
/**
* @brief Append label data
* @param[in] label label data
* @param[in] value matrix data, default null
*/
void appendLabel(IVectorPtr label, MatrixPtr value = nullptr) {
Argument argu;
argu.ids = label;
argu.value = value;
data_.push_back(argu);
}
/**
* @brief Append user defined data
* @param[in] ptr user defined data
*/
void appendUserDefinedPtr(UserDefinedVectorPtr ptr) {
Argument argu;
argu.udp = ptr;
data_.push_back(argu);
}
/**
* @param argus: DataBatch.getStreams()
* @param size: DataBatch.getSize()
* @param dataId: sub dataprovider id (in MultiDataProvider)
/*
* @brief Append argument
* @param[in] argus DataBatch.getStreams()
* @param[in] size DataBatch.getSize()
* @param[in] dataId sub dataprovider id (in MultiDataProvider)
*/
void appendArguments(const std::vector<Argument>& argus, int size,
int dataId) {
......@@ -133,7 +176,14 @@ public:
}
protected:
/**
* @brief batch size
*/
int64_t size_;
/**
* @brief A batch data consist of a Argument vector,
* An argument corresponds to a type of input data.
*/
std::vector<Argument> data_;
};
......@@ -228,8 +278,8 @@ protected:
};
/**
* DataProvider supplies data for training
* It can supplies multiple streams of data.
* @brief Base class for DataProvider, which supplies data for training
* @note It can supplies multiple streams of data.
* For typical supervised training, there are two streams:
* one is for input, one is for label.
*/
......@@ -253,16 +303,23 @@ public:
const DataConfig& getConfig() const { return config_; }
void setSkipShuffle() { skipShuffle_ = true; }
/**
* @brief Get next batch of training samples
* @param[in] size size of training samples to get
* @param[out] batch a batch of training samples
* @return actual size of obtained training samples
*/
int64_t getNextBatch(int64_t size, DataBatch* batch);
/**
* Shuffle the data set
* @brief Shuffle the data set
*/
virtual void shuffle() = 0;
/**
* reset() must be called before any calls to getNextBatch()
* reset all the value of index
* @brief reset all the value of index
* @note reset() must be called before any calls to getNextBatch()
* IMPORTANT: subclass reset() should always call the base class reset()
* at the end of the function
*/
......@@ -274,10 +331,17 @@ public:
}
/**
* return the number of training samples in the data set.
* return -1 to indicate unlimited number of samples.
* @brief Get the size of training samples
* @return the number of training samples in the data set.
* @note return -1 to indicate unlimited number of samples.
*/
virtual int64_t getSize() = 0;
/**
* @brief Get next batch training samples internally
* @param[in] size size of training samples to get
* @param[out] batch a batch of training samples
* @return actual size of obtained training samples
*/
virtual int64_t getNextBatchInternal(int64_t size, DataBatch* batch) = 0;
......@@ -288,7 +352,12 @@ protected:
bool useGpu_;
std::unique_ptr<DoubleBuffer> doubleBuffer_;
ThreadLocal<std::vector<MatrixPtr>> constantSlots_;
/**
* @@brief Get next batch training samples from buffer
* @param[in] size size of training samples to get
* @param[out] batch a batch of training samples
* @return actual size of obtained training samples
*/
int64_t getNextBatchFromBuffer(int64_t size, DataBatch* batch);
void initAsyncLoader();
......
......@@ -26,7 +26,9 @@ limitations under the License. */
namespace paddle {
/**
* @brief Data file with each sample specified by proto message
* @brief Provider data from protobuf data file with each sample
* specified by proto message
*
* DataSample defined in DataFormat.proto.
*
* The file format is
......@@ -68,19 +70,38 @@ public:
virtual int64_t getNextBatchInternal(int64_t size, DataBatch* batch);
protected:
/**
* @brief load protobuf data from a list of file
* @param[in] fileName file name of a file which contains
* a list of file names
*/
void loadData(const std::string& fileName);
void loadDataFile(const std::string& fileName);
/**
* @brief load protobuf data from file
* @param[in] fileName data file name
*/
void loadDataFile(const std::string& fileName);
/** @brief check data header of each data sample
* @param[in] header data header read from protobuf data
*/
void checkDataHeader(const DataHeader& header);
/**
* @brief fill protobuf data into slot_,
* slot_ is a vector of ProtoSlot in memory.
* @param[in] sample data sample read from protobuf data
*/
void fillSlots(const DataSample& sample);
/**
* return true if each sample is one sequence, i.e., independent
* @brief return true if each sample is one sequence, i.e., independent
* of other samples.
*/
inline bool iidData() const { return sequenceStartPositions_.empty(); }
/// check that sample is consistent with header_
/**
* @brief check that sample is consistent with header_
*/
void checkSample(const DataSample& sample);
template <class Op>
......@@ -129,20 +150,21 @@ protected:
int64_t currentSequenceIndex_;
/// The size should be the number of sequences.
// The size should be the number of sequences.
std::vector<size_t> shuffledSequenceIds_;
ThreadLocalD<DataBatch> cpuBatch_;
ThreadLocalD<DataBatch> gpuBatch_;
RWLock lock_;
// stats for number of none-zeros entries
std::vector<StatPtr> nnzStats_;
std::vector<StatPtr> nnzStats_; // stats for number of none-zeros entries
};
/**
* Special use for Proto data: instances should contain sparse-non-value slots
* and label. ProtoSequenceDataProvider treats each SPARSE SLOT as a SEQUENCE
* @brief Special use for Proto data: instances should contain sparse-non-value slots
* and label.
*
* @note ProtoSequenceDataProvider treats each SPARSE SLOT as a SEQUENCE
*/
class ProtoSequenceDataProvider : public ProtoDataProvider {
public:
......
......@@ -33,7 +33,11 @@ void Evaluator::eval(const NeuralNetwork& nn) {
totalScore_ += score;
updateSamplesNum(arguments);
}
/**
* @brief classification error Evaluator
*
* The config file api is classification_error_evaluator.
*/
class ClassificationErrorEvaluator : public Evaluator {
public:
virtual void updateSamplesNum(const std::vector<Argument>& arguments) {
......@@ -99,8 +103,11 @@ public:
}
};
// sequence level classification error stats:
// if any frame in one sequence has error, the sequence is error
/**
* @brief sequence classification error Evaluator
* @note sequence level classification error stats,
* if any frame in one sequence has error, the sequence is error
*/
class SequenceClassificationErrorEvaluator
: public ClassificationErrorEvaluator {
public:
......@@ -135,7 +142,12 @@ public:
};
REGISTER_EVALUATOR(seq_classification_error,
SequenceClassificationErrorEvaluator);
/**
* @brief sum Evaluator
* Calculate the sum of output or label
*
* The config file api is sum_evaluator.
*/
class SumEvaluator : public Evaluator {
public:
SumEvaluator() : cpuLabel_(nullptr), cpuWeight_(nullptr) {}
......@@ -218,13 +230,18 @@ private:
IVectorPtr cpuLabel_;
MatrixPtr cpuWeight_;
};
/**
* @brief column sum Evaluator
* @note column sum for the colIdx-th column *
* - colIdx = 0: the 0-th column.
* - colIdx > 0: the colIdx-th column.
* - colIdx < 0: the last colIdx-th column.
*
* The config file api is column_sum_evaluator.
*
*/
class ColumnSumEvaluator : public Evaluator {
public:
// column sum for the colIdx-th column
// colIdx = 0: the 0-th column
// > 0: the colIdx-th column
// < 0: the last colIdx-th column
explicit ColumnSumEvaluator(int32_t colIdx)
: colIdx_(colIdx), colNum_(0), sum_(nullptr) {}
......@@ -845,7 +862,11 @@ Evaluator* Evaluator::create(const EvaluatorConfig& config) {
evaluator->init(config);
return evaluator;
}
/**
* @brief print value of each layer.
*
* The config file api is value_printer_evaluator.
*/
class ValuePrinter : public Evaluator {
public:
ValuePrinter() {}
......@@ -882,7 +903,11 @@ public:
virtual real evalImp(std::vector<Argument>& arguments) { return 0; }
};
REGISTER_EVALUATOR(value_printer, ValuePrinter);
/**
* @brief print gradient of each layer.
*
* The config file api is gradient_printer_evaluator.
*/
class GradientPrinter : public Evaluator {
public:
GradientPrinter() {}
......@@ -908,7 +933,11 @@ public:
virtual real evalImp(std::vector<Argument>& arguments) { return 0; }
};
REGISTER_EVALUATOR(gradient_printer, GradientPrinter);
/**
* @brief print row max id vctor of each layer
*
* The config file api is maxid_printer_evaluator.
*/
class MaxIdPrinter : public Evaluator {
private:
IVectorPtr maxIds_;
......@@ -946,7 +975,11 @@ public:
virtual real evalImp(std::vector<Argument>& arguments) { return 0; }
};
REGISTER_EVALUATOR(max_id_printer, MaxIdPrinter);
/**
* @brief print sequence max frames of each layer
*
* The config file api is maxframe_printer_evaluator.
*/
class MaxFramePrinter : public Evaluator {
private:
IVectorPtr maxIds_;
......@@ -998,30 +1031,29 @@ public:
REGISTER_EVALUATOR(max_frame_printer, MaxFramePrinter);
/**
* Sequence text printer will print text according to index matrix and a
* dictionary. There can be multiple input to this layer:
* @brief print text according to index matrix and a dictionary.
*
* 1) If there is only one input, the input must be a matrix containing
* There can be multiple input to this layer:
* - If there is only one input, the input must be a matrix containing
* the sequence of indices;
*
* 2) If there are more than one input, the first input should be ids,
* - If there are more than one input, the first input should be ids,
* and are interpreted as sample ids.
*
* The output format will be:
*
* 1) sequence without sub-sequence, and there is probability.
* - sequence without sub-sequence, and there is probability.
*
* @code
* id \t prob space_seperated_tokens_from_dictionary_according_to_seq
* @endcode
*
* 2) sequence without sub-sequence, and there is not probability.
* - sequence without sub-sequence, and there is not probability.
*
* @code
* id \t space_seperated_tokens_from_dictionary_according_to_seq
* @endcode
*
* 3) sequence with sub-sequence, and there is not probability.
* - sequence with sub-sequence, and there is not probability.
*
* @code
* id \t space_seperated_tokens_from_dictionary_according_to_sub_seq
......@@ -1032,6 +1064,8 @@ REGISTER_EVALUATOR(max_frame_printer, MaxFramePrinter);
* Typically SequenceTextPrinter layer takes output of maxid or RecurrentGroup
* with maxid (when generating) as an input.
*
* The config file api is seqtext_printer_evaluator.
*
*/
class SequenceTextPrinter : public Evaluator {
private:
......@@ -1172,7 +1206,11 @@ public:
}
};
REGISTER_EVALUATOR(seq_text_printer, SequenceTextPrinter);
/**
* @brief print classification error.
*
* The config file api is classification_error_printer_evaluator.
*/
class ClassificationErrorPrinter : public ClassificationErrorEvaluator {
public:
virtual void updateSamplesNum(const std::vector<Argument>& arguments) {}
......
......@@ -24,12 +24,21 @@ limitations under the License. */
namespace paddle {
class NeuralNetwork;
/**
* @def REGISTER_EVALUATOR
* @brief Macro for registering evaluator class
*/
#define REGISTER_EVALUATOR(__type_name, __class_name) \
static InitFunction __reg_type_##__type_name([]() { \
Evaluator::registrar_.registerClass<__class_name>(#__type_name); \
})
/**
* @brief Base class for Evaluator
* Evaluating the performance of a model is very important.
* It indicates how successful the scores(predictions) of a datasets
* has been by a trained model.
*/
class Evaluator {
public:
static Evaluator* create(const EvaluatorConfig& config);
......@@ -41,7 +50,7 @@ public:
virtual void init(const EvaluatorConfig& config) { config_ = config; }
/**
* start to evaluate some data
* @brief start to evaluate some data
*/
virtual void start() {
numSamples_ = 0;
......@@ -49,20 +58,21 @@ public:
}
/**
* Process a batch of data.
* @brief Process a batch of data.
*/
virtual void eval(const NeuralNetwork& nn);
/**
* Process a batch of data.
* return the score for the batch if it make sense to sum the score across
* batches. Otherwise evaluator should return 0 and override finish() and
* @brief Process a batch of data.
* @return the score for the batch if it make sense to sum the score across
* batches.
* @note Otherwise evaluator should return 0 and override finish() and
* printStats() to do the right calculation.
*/
virtual real evalImp(std::vector<Argument>& arguments) = 0;
/**
* Update the number of processed samples
* @brief Update the number of processed samples
*/
virtual void updateSamplesNum(const std::vector<Argument>& arguments) {
numSamples_ += arguments[0].getBatchSize();
......@@ -81,11 +91,14 @@ public:
}
/**
* finish the evaluation.
* @brief finish the evaluation.
*/
virtual void finish() {}
/// finish() should be called before printStats
/**
* @brief print the statistics of evaluate result
* @note finish() should be called before printStats
*/
virtual void printStats(std::ostream& os) {
os << config_.name() << "="
<< (numSamples_ ? totalScore_ / numSamples_ : 0);
......@@ -124,17 +137,23 @@ public:
virtual void finish() {}
virtual void printStats(std::ostream&) {}
};
class AucEvaluator : public Evaluator {
public:
/**
/**
* @brief evaluate AUC using colIdx-th column as prediction.
* The AUC(Area Under the Curve) is a common evaluation metric
* for binary classification problems. It computes the area under
* the receiver operating characteristic(ROC) curve.
*
* @note colIdx-th column
*
* - colIdx = 0: the 0-th column.
* - colIdx > 0: the colIdx-th column.
* - colIdx < 0: the last colIdx-th column.
*
* The config file api is auc_evaluator.
*
*/
class AucEvaluator : public Evaluator {
public:
AucEvaluator(int32_t colIdx)
: colIdx_(colIdx),
realColumnIdx_(0),
......@@ -174,13 +193,11 @@ private:
};
/**
* @brief RankAucEvaluator calculates the AUC of each list
* (i.e., titles under the same query), and averages them.
*
* Each list should be organized as a sequence.
* The inputs of this evaluator is [output, click, pv].
* If pv is not provided, it will be set to 1.
* The types of click and pv are dense value.
* @brief RankAucEvaluator calculates the AUC of each list (i.e., titles
* under the same query), and averages them. Each list should be organized
* as a sequence. The inputs of this evaluator is [output, click, pv]. If pv
* is not provided, it will be set to 1. The types of click and pv are
* dense value.
*/
class RankAucEvaluator : public Evaluator {
public:
......@@ -204,7 +221,16 @@ private:
double calcRankAuc(real* outputData, real* clickData, real* pvData,
size_t size);
};
/**
* @brief precision, recall and f1 score Evaluator
* \f[
* precision = \frac{tp}{tp+tn} \\
* recall=\frac{tp}{tp+fn} \\
* f1=2*\frac{precsion*recall}{precision+recall}
* \f]
*
* The config file api is precision_recall_evaluator.
*/
class PrecisionRecallEvaluator : public Evaluator {
public:
// Evaluate precision, recall and F1 score
......@@ -274,8 +300,10 @@ private:
}
};
/**
* Positive-negative pair rate Evaluator
/*
* @brief positive-negative pair rate Evaluator
*
* The config file api is pnpair_evaluator.
*/
class PnpairEvaluator : public Evaluator {
public:
......
......@@ -20,7 +20,19 @@ limitations under the License. */
#include "paddle/utils/ThreadLocal.h"
namespace paddle {
/**
* @brief A layer for calculating cosine similarity between two vector
* \f[
* f(x,y)=scale\frac{x_1y_1+x_2y_2+...+x_ny_n}{\sqrt{x_1^2+x_2^2+...
* +x_n^2}\sqrt{y_1^2+y_2^2+...+y_n^2}}
* \f]
*
* - Input1: A vector (batchSize * dataDim) *
* - Input2: A vector (batchSize * dataDim) or (1 * dataDim) *
* - Output: A vector (dataDim * 1)
*
* The config file api is cos_sim.
*/
class CosSimLayer : public Layer {
public:
explicit CosSimLayer(const LayerConfig& config)
......
......@@ -21,13 +21,16 @@ limitations under the License. */
namespace paddle {
/**
* A layer for computing cosine similarity between a vector an each row of a
* matrix,
* @brief A layer for computing cosine similarity between a vector
* and each row of a matrix
* out[i] = cos_scale * cos(in1, in2(i,:));
* which is used in NEURAL TURING MACHINE
* Input: a vector (batchSize x dataDim) and a matrix in vec form (batchSize x
* (weightDim*dataDim))
* Output: a vector (batchSize x weightDim)
* @note used in NEURAL TURING MACHINE
*
* Input1: a vector (batchSize * dataDim)
*
* Input2: a matrix in vector form (batchSize * (weightDim*dataDim))
*
* Output: a vector (batchSize * weightDim)
*/
class CosSimVecMatLayer : public Layer {
......
......@@ -22,18 +22,18 @@ limitations under the License. */
namespace paddle {
/**
* A layer for data normalization
* Input: One and only one input layer is accepted. The input layer must
* @brief A layer for data normalization
* - Input: One and only one input layer is accepted. The input layer must
* be DataLayer with dense data type.
* Output: The normalization of the input data
* - Output: The normalization of the input data
*
* Reference:
* LA Shalabi, Z Shaaban, B Kasasbeh. Data mining: A preprocessing engine
*
* Three data normalization methoeds are considered
* z-score: y = (x-mean)/std
* min-max: y = (x-min)/(max-min)
* decimal-scaling: y = x/10^j, where j is the smallest integer such that
* - z-score: y = (x-mean)/std
* - min-max: y = (x-min)/(max-min)
* - decimal-scaling: y = x/10^j, where j is the smallest integer such that
*max(|y|)<1
*/
......
......@@ -23,8 +23,9 @@ limitations under the License. */
namespace paddle {
/**
* @brief basic parent layer of normalization
* Normalize the input in local region
* @brief Basic parent layer of normalization
*
* @note Normalize the input in local region
*/
class NormLayer : public Layer {
public:
......@@ -35,7 +36,9 @@ public:
return true;
}
// create norm layer by norm_type
/**
* @brief create norm layer by norm_type
*/
static Layer* create(const LayerConfig& config);
};
......
......@@ -21,10 +21,11 @@ limitations under the License. */
namespace paddle {
/**
* A layer for computing the outer product of two vectors,
* which is used in NEURAL TURING MACHINE
* Input: two vectors: batchSize x dim1, batchSize x dim2
* Output: a matrix: (batchSize x (dim1*dim2))
* @brief A layer for computing the outer product of two vectors
* @note used in NEURAL TURING MACHINE
* Input1: vector (batchSize * dim1)
* Input2: vector (batchSize * dim2)
* Output: a matrix: (batchSize * (dim1*dim2))
*/
class OuterProdLayer : public Layer {
......
......@@ -22,7 +22,7 @@ limitations under the License. */
namespace paddle {
/**
* @brief basic parent layer of pooling
* @brief Basic parent layer of pooling
* Pools the input within regions
*/
class PoolLayer : public Layer {
......@@ -41,7 +41,9 @@ protected:
public:
explicit PoolLayer(const LayerConfig& config) : Layer(config) {}
// create pooling layer by pool_type
/**
* @brief create pooling layer by pool_type
*/
static Layer* create(const LayerConfig& config);
virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap);
......
......@@ -20,7 +20,9 @@ limitations under the License. */
#include <vector>
namespace paddle {
/**
* @brief Basic parent layer of different kinds of pooling
*/
class PoolProjectionLayer : public PoolLayer {
protected:
size_t imgSizeH_, imgSizeW_;
......@@ -30,7 +32,9 @@ public:
size_t getSize();
explicit PoolProjectionLayer(const LayerConfig& config) : PoolLayer(config) {}
};
/**
* @brief A layer for max pooling
*/
class MaxPoolProjectionLayer : public PoolProjectionLayer {
public:
explicit MaxPoolProjectionLayer(const LayerConfig& config)
......@@ -41,7 +45,9 @@ public:
virtual void forward(PassType passType);
virtual void backward(const UpdateCallback& callback = nullptr);
};
/**
* @brief A layer for average pooling
*/
class AvgPoolProjectionLayer : public PoolProjectionLayer {
public:
explicit AvgPoolProjectionLayer(const LayerConfig& config)
......
......@@ -18,7 +18,12 @@ limitations under the License. */
#include "paddle/math/BaseMatrix.h"
namespace paddle {
/* resize a minibatch matrix h*w to h'*w' */
/**
* @brief A layer for resizing a minibatch matrix h*w to h'*w'
* @note
* origin matrix height * witdth)
* resize matrix: (height * width / size) * size
*/
class ResizeLayer : public Layer {
public:
explicit ResizeLayer(const LayerConfig& config) : Layer(config) {}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册