提交 c109e3bf 编写于 作者: D dangqingqing

add more unit tests

...@@ -27,3 +27,18 @@ std::string Evaluator::toString() { ...@@ -27,3 +27,18 @@ std::string Evaluator::toString() {
m->rawPtr->printStats(sout); m->rawPtr->printStats(sout);
return sout.str(); return sout.str();
} }
std::vector<std::string> Evaluator::getNames() const {
std::vector<std::string> retv;
m->rawPtr->getNames(&retv);
return retv;
}
double Evaluator::getValue(const std::string name) const {
paddle::Error err;
double v = m->rawPtr->getValue(name, &err);
if (err) {
throw std::runtime_error(err.msg());
}
return v;
}
...@@ -900,6 +900,10 @@ public: ...@@ -900,6 +900,10 @@ public:
*/ */
std::string toString(); std::string toString();
std::vector<std::string> getNames() const;
double getValue(const std::string name) const;
private: private:
EvaluatorPrivate* m; EvaluatorPrivate* m;
......
...@@ -89,9 +89,14 @@ def main(): ...@@ -89,9 +89,14 @@ def main():
except Exception as e: except Exception as e:
print e print e
ev = m.makeEvaluator()
ev.start()
m.forwardBackward(inArgs, outArgs, swig_paddle.PASS_TRAIN, m.forwardBackward(inArgs, outArgs, swig_paddle.PASS_TRAIN,
update_callback) update_callback)
m.eval(ev)
ev.finish()
for name in ev.getNames():
print name, ev.getValue(name)
for optimizer in optimizers: for optimizer in optimizers:
optimizer.finishBatch() optimizer.finishBatch()
......
...@@ -20,7 +20,7 @@ namespace paddle { ...@@ -20,7 +20,7 @@ namespace paddle {
/** /**
* calculate sequence-to-sequence edit distance * calculate sequence-to-sequence edit distance
*/ */
class CTCErrorEvaluator : public Evaluator { class CTCErrorEvaluator : public NotGetableEvaluator {
private: private:
MatrixPtr outActivations_; MatrixPtr outActivations_;
int numTimes_, numClasses_, numSequences_, blank_; int numTimes_, numClasses_, numSequences_, blank_;
......
...@@ -13,9 +13,9 @@ See the License for the specific language governing permissions and ...@@ -13,9 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/gserver/evaluators/Evaluator.h" #include "paddle/gserver/evaluators/Evaluator.h"
#include "paddle/utils/Stat.h"
#include "paddle/gserver/gradientmachines/NeuralNetwork.h" #include "paddle/gserver/gradientmachines/NeuralNetwork.h"
#include "paddle/utils/Stat.h"
#include "paddle/utils/StringUtil.h"
DECLARE_int32(trainer_id); DECLARE_int32(trainer_id);
...@@ -122,6 +122,10 @@ public: ...@@ -122,6 +122,10 @@ public:
virtual void distributeEval(ParameterClient2* client) { virtual void distributeEval(ParameterClient2* client) {
mergeResultsOfAllClients(client); mergeResultsOfAllClients(client);
} }
// Evaluator interface
protected:
std::string getTypeImpl() const { return "classification_error"; }
}; };
/** /**
...@@ -160,6 +164,10 @@ public: ...@@ -160,6 +164,10 @@ public:
virtual void distributeEval(ParameterClient2* client) { virtual void distributeEval(ParameterClient2* client) {
mergeResultsOfAllClients(client); mergeResultsOfAllClients(client);
} }
// Evaluator interface
protected:
std::string getTypeImpl() const { return "seq_classification_error"; }
}; };
REGISTER_EVALUATOR(seq_classification_error, REGISTER_EVALUATOR(seq_classification_error,
SequenceClassificationErrorEvaluator); SequenceClassificationErrorEvaluator);
...@@ -250,6 +258,10 @@ public: ...@@ -250,6 +258,10 @@ public:
private: private:
IVectorPtr cpuLabel_; IVectorPtr cpuLabel_;
MatrixPtr cpuWeight_; MatrixPtr cpuWeight_;
// Evaluator interface
protected:
std::string getTypeImpl() const { return "sum"; }
}; };
/** /**
* @brief column sum Evaluator * @brief column sum Evaluator
...@@ -357,10 +369,18 @@ public: ...@@ -357,10 +369,18 @@ public:
} }
private: private:
ColumnSumEvaluator() {}
int32_t colIdx_; int32_t colIdx_;
size_t colNum_; size_t colNum_;
MatrixPtr sum_; /* cpu matrix */ MatrixPtr sum_; /* cpu matrix */
// Evaluator interface
protected:
std::string getTypeImpl() const {
if (colIdx_ == -1)
return "last-column-sum";
else
return "column-sum";
}
}; };
void AucEvaluator::start() { void AucEvaluator::start() {
...@@ -469,6 +489,16 @@ double AucEvaluator::calcAuc() const { ...@@ -469,6 +489,16 @@ double AucEvaluator::calcAuc() const {
} }
} }
real AucEvaluator::getValueImpl() const { return calcAuc(); }
std::string AucEvaluator::getTypeImpl() const {
if (colIdx_ == -1) {
return "last-column-auc";
} else {
return "auc";
}
}
// class RankAucEvaluator // class RankAucEvaluator
REGISTER_EVALUATOR(rankauc, RankAucEvaluator); REGISTER_EVALUATOR(rankauc, RankAucEvaluator);
...@@ -548,12 +578,15 @@ double RankAucEvaluator::calcRankAuc(real* outputData, ...@@ -548,12 +578,15 @@ double RankAucEvaluator::calcRankAuc(real* outputData,
: aucTmp / (clickSum * noClickSum); : aucTmp / (clickSum * noClickSum);
} }
std::string RankAucEvaluator::getTypeImpl() const { return "rankauc"; }
// class PrecisionRecallEvaluator // class PrecisionRecallEvaluator
REGISTER_EVALUATOR(precision_recall, PrecisionRecallEvaluator); REGISTER_EVALUATOR(precision_recall, PrecisionRecallEvaluator);
void PrecisionRecallEvaluator::start() { void PrecisionRecallEvaluator::start() {
Evaluator::start(); Evaluator::start();
statsInfo_.clear(); statsInfo_.clear();
values_.clear();
} }
real PrecisionRecallEvaluator::evalImp(std::vector<Argument>& arguments) { real PrecisionRecallEvaluator::evalImp(std::vector<Argument>& arguments) {
...@@ -614,52 +647,23 @@ real PrecisionRecallEvaluator::evalImp(std::vector<Argument>& arguments) { ...@@ -614,52 +647,23 @@ real PrecisionRecallEvaluator::evalImp(std::vector<Argument>& arguments) {
} }
void PrecisionRecallEvaluator::printStats(std::ostream& os) const { void PrecisionRecallEvaluator::printStats(std::ostream& os) const {
int label = config_.positive_label(); PrintStatsInfo info;
if (label != -1) { bool containMacroMicroInfo = getStatsInfo(&info);
CHECK(label >= 0 && label < (int)statsInfo_.size()) os << "positive_label=" << config_.positive_label()
<< "positive_label [" << label << "] should be in range [0, " << " precision=" << info.precision << " recall=" << info.recall
<< statsInfo_.size() << ")"; << " F1-score=" << info.f1;
double precision = if (containMacroMicroInfo) {
calcPrecision(statsInfo_[label].TP, statsInfo_[label].FP); os << "macro-average-precision=" << info.macroAvgPrecision
double recall = calcRecall(statsInfo_[label].TP, statsInfo_[label].FN); << " macro-average-recall=" << info.macroAvgRecall
os << "positive_label=" << label << " precision=" << precision << " macro-average-F1-score=" << info.macroAvgF1Score;
<< " recall=" << recall if (!isMultiBinaryLabel_) {
<< " F1-score=" << calcF1Score(precision, recall); // precision and recall are equal in this case
return; os << " micro-average-precision=" << info.microAvgPrecision;
} } else {
os << " micro-average-precision=" << info.microAvgPrecision
// micro average method: precision = (TP1+TP2)/(TP1+FP1+TP2+FP2) << " micro-average-recall=" << info.microAvgRecall
// macro average method: precision = (precision1+precision2)/2 << " micro-average-F1-score=" << info.microAvgF1Score;
double microTotalTP = 0; }
double microTotalFP = 0;
double microTotalFN = 0;
double macroAvgPrecision = 0;
double macroAvgRecall = 0;
size_t numLabels = statsInfo_.size();
for (size_t i = 0; i < numLabels; ++i) {
microTotalTP += statsInfo_[i].TP;
microTotalFP += statsInfo_[i].FP;
microTotalFN += statsInfo_[i].FN;
macroAvgPrecision += calcPrecision(statsInfo_[i].TP, statsInfo_[i].FP);
macroAvgRecall += calcRecall(statsInfo_[i].TP, statsInfo_[i].FN);
}
macroAvgPrecision /= numLabels;
macroAvgRecall /= numLabels;
double macroAvgF1Score = calcF1Score(macroAvgPrecision, macroAvgRecall);
os << "macro-average-precision=" << macroAvgPrecision
<< " macro-average-recall=" << macroAvgRecall
<< " macro-average-F1-score=" << macroAvgF1Score;
double microAvgPrecision = calcPrecision(microTotalTP, microTotalFP);
double microAvgRecall = calcPrecision(microTotalTP, microTotalFN);
double microAvgF1Score = calcF1Score(microAvgPrecision, microAvgRecall);
if (!isMultiBinaryLabel_) {
// precision and recall are equal in this case
os << " micro-average-precision=" << microAvgPrecision;
} else {
os << " micro-average-precision=" << microAvgPrecision
<< " micro-average-recall=" << microAvgRecall
<< " micro-average-F1-score=" << microAvgF1Score;
} }
} }
...@@ -741,6 +745,60 @@ void PrecisionRecallEvaluator::calcStatsInfoMulti(const MatrixPtr& output, ...@@ -741,6 +745,60 @@ void PrecisionRecallEvaluator::calcStatsInfoMulti(const MatrixPtr& output,
} }
} }
void PrecisionRecallEvaluator::storeLocalValues() const {
if (this->values_.size() == 0) {
PrintStatsInfo info;
bool containMacroMicroInfo = getStatsInfo(&info);
values_["precision"] = info.precision;
values_["recal"] = info.recall;
values_["F1-score"] = info.f1;
if (containMacroMicroInfo) {
values_["macro-average-precision"] = info.macroAvgPrecision;
values_["macro-average-recall"] = info.macroAvgRecall;
values_["macro-average-F1-score"] = info.macroAvgF1Score;
if (!isMultiBinaryLabel_) {
// precision and recall are equal in this case
values_["micro-average-precision"] = info.microAvgPrecision;
} else {
values_["micro-average-precision"] = info.microAvgPrecision;
values_["micro-average-recall"] = info.microAvgRecall;
values_["micro-average-F1-score"] = info.microAvgF1Score;
}
}
}
}
void PrecisionRecallEvaluator::getNames(std::vector<std::string>* names) {
this->storeLocalValues();
names->reserve(this->values_.size());
for (auto it = this->values_.begin(); it != this->values_.end(); ++it) {
names->push_back(this->config_.name() + "." + it->first);
}
}
real PrecisionRecallEvaluator::getValue(const std::string& name,
Error* err) const {
this->storeLocalValues();
std::vector<std::string> buffers;
paddle::str::split(name, '.', &buffers);
auto it = this->values_.find(buffers[buffers.size() - 1]);
if (it == this->values_.end()) { // not found
*err = Error("No such key %s", name.c_str());
return .0f;
}
return it->second;
}
std::string PrecisionRecallEvaluator::getType(const std::string& name,
Error* err) const {
this->getValue(name, err);
if (!err->isOK()) {
return "";
}
return "precision_recall";
}
void PrecisionRecallEvaluator::distributeEval(ParameterClient2* client) { void PrecisionRecallEvaluator::distributeEval(ParameterClient2* client) {
size_t size = 4 * statsInfo_.size(); size_t size = 4 * statsInfo_.size();
double* buf = new double[size]; double* buf = new double[size];
...@@ -760,6 +818,47 @@ void PrecisionRecallEvaluator::distributeEval(ParameterClient2* client) { ...@@ -760,6 +818,47 @@ void PrecisionRecallEvaluator::distributeEval(ParameterClient2* client) {
delete[] buf; delete[] buf;
} }
bool PrecisionRecallEvaluator::getStatsInfo(
PrecisionRecallEvaluator::PrintStatsInfo* info) const {
int label = config_.positive_label();
if (label != -1) {
CHECK(label >= 0 && label < (int)statsInfo_.size())
<< "positive_label [" << label << "] should be in range [0, "
<< statsInfo_.size() << ")";
info->precision = calcPrecision(statsInfo_[label].TP, statsInfo_[label].FP);
info->recall = calcRecall(statsInfo_[label].TP, statsInfo_[label].FN);
info->f1 = calcF1Score(info->precision, info->recall);
return false;
}
// micro average method: precision = (TP1+TP2)/(TP1+FP1+TP2+FP2)
// macro average method: precision = (precision1+precision2)/2
double microTotalTP = 0;
double microTotalFP = 0;
double microTotalFN = 0;
info->macroAvgPrecision = 0;
info->macroAvgRecall = 0;
size_t numLabels = statsInfo_.size();
for (size_t i = 0; i < numLabels; ++i) {
microTotalTP += statsInfo_[i].TP;
microTotalFP += statsInfo_[i].FP;
microTotalFN += statsInfo_[i].FN;
info->macroAvgPrecision +=
calcPrecision(statsInfo_[i].TP, statsInfo_[i].FP);
info->macroAvgRecall += calcRecall(statsInfo_[i].TP, statsInfo_[i].FN);
}
info->macroAvgPrecision /= numLabels;
info->macroAvgRecall /= numLabels;
info->macroAvgF1Score =
calcF1Score(info->macroAvgPrecision, info->macroAvgRecall);
info->microAvgPrecision = calcPrecision(microTotalTP, microTotalFP);
info->microAvgRecall = calcPrecision(microTotalTP, microTotalFN);
info->microAvgF1Score =
calcF1Score(info->microAvgPrecision, info->microAvgRecall);
return true;
}
REGISTER_EVALUATOR(pnpair, PnpairEvaluator); REGISTER_EVALUATOR(pnpair, PnpairEvaluator);
void PnpairEvaluator::start() { void PnpairEvaluator::start() {
Evaluator::start(); Evaluator::start();
...@@ -884,6 +983,8 @@ void PnpairEvaluator::calc(std::vector<PredictionResult>& predictArray) { ...@@ -884,6 +983,8 @@ void PnpairEvaluator::calc(std::vector<PredictionResult>& predictArray) {
<< " calc total special pair: " << special; << " calc total special pair: " << special;
} }
std::string PnpairEvaluator::getTypeImpl() const { return "pnpair"; }
ClassRegistrar<Evaluator> Evaluator::registrar_; ClassRegistrar<Evaluator> Evaluator::registrar_;
Evaluator* Evaluator::create(const EvaluatorConfig& config) { Evaluator* Evaluator::create(const EvaluatorConfig& config) {
Evaluator* evaluator = registrar_.createByType(config.type()); Evaluator* evaluator = registrar_.createByType(config.type());
...@@ -905,7 +1006,7 @@ static InitFunction __reg_type_auc_sum__([]() { ...@@ -905,7 +1006,7 @@ static InitFunction __reg_type_auc_sum__([]() {
* *
* The config file api is value_printer_evaluator. * The config file api is value_printer_evaluator.
*/ */
class ValuePrinter : public Evaluator { class ValuePrinter : public NotGetableEvaluator {
public: public:
virtual void eval(const NeuralNetwork& nn) { virtual void eval(const NeuralNetwork& nn) {
for (const std::string& name : config_.input_layers()) { for (const std::string& name : config_.input_layers()) {
...@@ -919,12 +1020,13 @@ public: ...@@ -919,12 +1020,13 @@ public:
virtual real evalImp(std::vector<Argument>& arguments) { return 0; } virtual real evalImp(std::vector<Argument>& arguments) { return 0; }
}; };
REGISTER_EVALUATOR(value_printer, ValuePrinter); REGISTER_EVALUATOR(value_printer, ValuePrinter);
/** /**
* @brief print gradient of each layer. * @brief print gradient of each layer.
* *
* The config file api is gradient_printer_evaluator. * The config file api is gradient_printer_evaluator.
*/ */
class GradientPrinter : public Evaluator { class GradientPrinter : public NotGetableEvaluator {
public: public:
virtual void eval(const NeuralNetwork& nn) { virtual void eval(const NeuralNetwork& nn) {
for (const std::string& name : config_.input_layers()) { for (const std::string& name : config_.input_layers()) {
...@@ -947,7 +1049,7 @@ REGISTER_EVALUATOR(gradient_printer, GradientPrinter); ...@@ -947,7 +1049,7 @@ REGISTER_EVALUATOR(gradient_printer, GradientPrinter);
* *
* The config file api is maxid_printer_evaluator. * The config file api is maxid_printer_evaluator.
*/ */
class MaxIdPrinter : public Evaluator { class MaxIdPrinter : public NotGetableEvaluator {
private: private:
IVectorPtr maxIds_; IVectorPtr maxIds_;
MatrixPtr maxValues_; MatrixPtr maxValues_;
...@@ -989,7 +1091,7 @@ REGISTER_EVALUATOR(max_id_printer, MaxIdPrinter); ...@@ -989,7 +1091,7 @@ REGISTER_EVALUATOR(max_id_printer, MaxIdPrinter);
* *
* The config file api is maxframe_printer_evaluator. * The config file api is maxframe_printer_evaluator.
*/ */
class MaxFramePrinter : public Evaluator { class MaxFramePrinter : public NotGetableEvaluator {
private: private:
IVectorPtr maxIds_; IVectorPtr maxIds_;
MatrixPtr maxValues_; MatrixPtr maxValues_;
...@@ -1076,7 +1178,7 @@ REGISTER_EVALUATOR(max_frame_printer, MaxFramePrinter); ...@@ -1076,7 +1178,7 @@ REGISTER_EVALUATOR(max_frame_printer, MaxFramePrinter);
* The config file api is seqtext_printer_evaluator. * The config file api is seqtext_printer_evaluator.
* *
*/ */
class SequenceTextPrinter : public Evaluator { class SequenceTextPrinter : public NotGetableEvaluator {
private: private:
/// dict_file, which contains a list of tokens /// dict_file, which contains a list of tokens
std::vector<std::string> dict_; std::vector<std::string> dict_;
...@@ -1243,4 +1345,6 @@ public: ...@@ -1243,4 +1345,6 @@ public:
}; };
REGISTER_EVALUATOR(classification_error_printer, ClassificationErrorPrinter); REGISTER_EVALUATOR(classification_error_printer, ClassificationErrorPrinter);
std::string DummyEvaluator::getTypeImpl() const { return "dummy"; }
} // namespace paddle } // namespace paddle
...@@ -19,6 +19,7 @@ limitations under the License. */ ...@@ -19,6 +19,7 @@ limitations under the License. */
#include "paddle/parameter/Argument.h" #include "paddle/parameter/Argument.h"
#include "paddle/pserver/ParameterClient2.h" #include "paddle/pserver/ParameterClient2.h"
#include "paddle/utils/ClassRegistrar.h" #include "paddle/utils/ClassRegistrar.h"
#include "paddle/utils/Error.h"
namespace paddle { namespace paddle {
...@@ -117,12 +118,105 @@ public: ...@@ -117,12 +118,105 @@ public:
static ClassRegistrar<Evaluator> registrar_; static ClassRegistrar<Evaluator> registrar_;
/**
* @brief getNames will return all field names of current evaluator.
*
* The format of name is `evaluator_name.evaluator_fields`. If the evaluator
* has multiple field, the name could be `evaluator_name.field1`. For example
* the PrecisionRecallEvaluator contains `precision`, `recall` fields. The get
* names will return `precision_recall_evaluator.precision`,
* `precision_recall_evaluator.recal`, etc.
*
* Also, if current Evaluator is a combined evaluator. getNames will return
* all names of all evaluators inside the combined evaluator.
*
* @param names [out]: the field names of current evaluator.
* @note Never clear the names parameter inside getNames.
*/
virtual void getNames(std::vector<std::string>* names) {
names->push_back(config_.name());
}
/**
* @brief getValue will return the current evaluate value of one field.
*
* @param name: The field name of current evaluator.
* @param err [out]: The error state.
*
* @return The evaluate value(metric).
*/
virtual real getValue(const std::string& name, Error* err) const {
if (name != config_.name()) {
*err = Error("no such name of evaluator %s", name.c_str());
return .0f;
}
return this->getValueImpl();
}
/**
* @brief getType will return the evaluator type by field name.
*
* Evaluate Type is the current type of evaluator in string. Such as 'auc',
* 'precision_recall'. In combined evaluator, different name may get different
* evaluate type because it could be evaluated by different evaluator inside.
*
* @param name: The field name of current Evaluator.
* @param err: The error state. nullptr means don't care.
* @return the evaluator type string.
*/
virtual std::string getType(const std::string& name, Error* err) const {
if (name != config_.name()) {
*err = Error("no such name of evaluator %s", name.c_str());
return std::string();
}
return this->getTypeImpl();
}
protected:
/**
* @brief getValueImpl The simplest way to define getValue result. If this
* evaluator doesn't contain multiple fields, and do not throw any error, just
* implemented this method to get the evaluate result(metric).
* @return Evaluate result(metric).
*/
virtual real getValueImpl() const {
return numSamples_ != .0 ? totalScore_ / numSamples_ : .0;
}
/**
* @brief getTypeImpl The simplest way to define getType result. If this
* evaluator doesn't combine many evaluators, the get type should only return
* itself type.
* @return Evaluator type.
*/
virtual std::string getTypeImpl() const { return "base"; }
protected: protected:
EvaluatorConfig config_; EvaluatorConfig config_;
double numSamples_; double numSamples_;
double totalScore_; double totalScore_;
}; };
/**
* @brief The NotGetableEvaluator class is the base class of evaluator that
* cannot get value in runtime. The most NotGetableEvaluator is Printer
* Evaluator, which is only used to debug network configuration.
*/
class NotGetableEvaluator : public Evaluator {
// Evaluator interface
public:
void getNames(std::vector<std::string>* names) {}
real getValue(const std::string& name, Error* err) const {
*err = Error("Not implemented");
return .0f;
}
std::string getType(const std::string& name, Error* err) const {
*err = Error("Not implemented");
return "";
}
};
class DummyEvaluator : public Evaluator { class DummyEvaluator : public Evaluator {
public: public:
DummyEvaluator() {} DummyEvaluator() {}
...@@ -135,6 +229,10 @@ public: ...@@ -135,6 +229,10 @@ public:
} }
virtual void finish() {} virtual void finish() {}
virtual void printStats(std::ostream&) const {} virtual void printStats(std::ostream&) const {}
// Evaluator interface
protected:
std::string getTypeImpl() const;
}; };
/** /**
* @brief evaluate AUC using colIdx-th column as prediction. * @brief evaluate AUC using colIdx-th column as prediction.
...@@ -191,6 +289,11 @@ private: ...@@ -191,6 +289,11 @@ private:
} }
double calcAuc() const; double calcAuc() const;
// Evaluator interface
protected:
real getValueImpl() const;
std::string getTypeImpl() const;
}; };
/** /**
...@@ -223,6 +326,10 @@ private: ...@@ -223,6 +326,10 @@ private:
real* clickData, real* clickData,
real* pvData, real* pvData,
size_t size); size_t size);
// Evaluator interface
protected:
std::string getTypeImpl() const;
}; };
/** /**
* @brief precision, recall and f1 score Evaluator * @brief precision, recall and f1 score Evaluator
...@@ -272,6 +379,20 @@ private: ...@@ -272,6 +379,20 @@ private:
IVectorPtr cpuLabel_; IVectorPtr cpuLabel_;
MatrixPtr cpuWeight_; MatrixPtr cpuWeight_;
struct PrintStatsInfo {
double precision;
double recall;
double f1;
double macroAvgPrecision;
double macroAvgRecall;
double macroAvgF1Score;
double microAvgPrecision;
double microAvgRecall;
double microAvgF1Score;
};
bool getStatsInfo(PrintStatsInfo* info) const;
void calcStatsInfo(const MatrixPtr& output, void calcStatsInfo(const MatrixPtr& output,
const IVectorPtr& label, const IVectorPtr& label,
const MatrixPtr& weight); const MatrixPtr& weight);
...@@ -303,6 +424,15 @@ private: ...@@ -303,6 +424,15 @@ private:
return 0; return 0;
} }
} }
mutable std::unordered_map<std::string, real> values_;
void storeLocalValues() const;
// Evaluator interface
public:
void getNames(std::vector<std::string>* names);
real getValue(const std::string& name, Error* err) const;
std::string getType(const std::string& name, Error* err) const;
}; };
/* /*
...@@ -349,8 +479,7 @@ public: ...@@ -349,8 +479,7 @@ public:
virtual void finish() { calc(predictArray_); } virtual void finish() { calc(predictArray_); }
virtual void printStats(std::ostream& os) const { virtual void printStats(std::ostream& os) const {
os << " pos/neg" os << " pos/neg=" << this->getValueImpl();
<< "=" << pairArray_[0] / ((pairArray_[1] <= 0) ? 1.0 : pairArray_[1]);
} }
virtual void distributeEval(ParameterClient2* client) { virtual void distributeEval(ParameterClient2* client) {
...@@ -366,6 +495,13 @@ private: ...@@ -366,6 +495,13 @@ private:
IVectorPtr cpuLabel_; IVectorPtr cpuLabel_;
IVectorPtr cpuInfo_; IVectorPtr cpuInfo_;
MatrixPtr cpuWeight_; MatrixPtr cpuWeight_;
// Evaluator interface
protected:
real getValueImpl() const {
return pairArray_[0] / ((pairArray_[1] <= 0) ? 1.0 : pairArray_[1]);
}
std::string getTypeImpl() const;
}; };
} // namespace paddle } // namespace paddle
...@@ -306,7 +306,6 @@ void NeuralNetwork::onPassEnd() { ...@@ -306,7 +306,6 @@ void NeuralNetwork::onPassEnd() {
class CombinedEvaluator : public Evaluator { class CombinedEvaluator : public Evaluator {
public: public:
CombinedEvaluator() {}
void addEvaluator(std::unique_ptr<Evaluator>&& evaluator) { void addEvaluator(std::unique_ptr<Evaluator>&& evaluator) {
evaluators_.emplace_back(std::move(evaluator)); evaluators_.emplace_back(std::move(evaluator));
} }
...@@ -346,6 +345,55 @@ public: ...@@ -346,6 +345,55 @@ public:
protected: protected:
std::vector<std::unique_ptr<Evaluator>> evaluators_; std::vector<std::unique_ptr<Evaluator>> evaluators_;
// Evaluator interface
public:
/**
* @brief getNames will return all inside evaluators' names.
* @param names [out]: return names.
*/
void getNames(std::vector<std::string>* names) {
for (auto& eval : evaluators_) {
eval->getNames(names);
}
}
/**
* @brief getValue could get all inside evaluators' value.
*/
real getValue(const std::string& name, Error* err) const {
return this->getMethodHelper<real>(
name, err, [&name, err](const std::unique_ptr<Evaluator>& eval) {
return eval->getValue(name, err);
});
}
/**
* @brief getType could get all inside evaluators' type.
*/
std::string getType(const std::string& name, Error* err) const {
return this->getMethodHelper<std::string>(
name, err, [&name, err](const std::unique_ptr<Evaluator>& eval) {
return eval->getType(name, err);
});
}
private:
template <typename T>
T getMethodHelper(const std::string& name,
Error* err,
const std::function<T(const std::unique_ptr<Evaluator>&)>&
callback) const {
for (auto& eval : evaluators_) {
std::vector<std::string> names;
eval->getNames(&names);
if (std::find(names.begin(), names.end(), name) != names.end()) {
return callback(eval);
}
}
*err = Error("No such key %s", name.c_str());
return T();
}
}; };
Evaluator* NeuralNetwork::makeEvaluator() const { Evaluator* NeuralNetwork::makeEvaluator() const {
......
...@@ -110,6 +110,18 @@ void testEvaluator(TestConfig testConf, ...@@ -110,6 +110,18 @@ void testEvaluator(TestConfig testConf,
testEvaluator->finish(); testEvaluator->finish();
LOG(INFO) << *testEvaluator; LOG(INFO) << *testEvaluator;
std::vector<std::string> names;
testEvaluator->getNames(&names);
paddle::Error err;
for (auto& name : names) {
auto value = testEvaluator->getValue(name, &err);
ASSERT_TRUE(err.isOK());
LOG(INFO) << name << " " << value;
auto tp = testEvaluator->getType(name, &err);
ASSERT_TRUE(err.isOK());
ASSERT_EQ(testConf.evaluatorConfig.type(), tp);
}
double totalScore2 = 0.0; double totalScore2 = 0.0;
if (testConf.testAccumulate) { if (testConf.testAccumulate) {
testEvaluator->start(); testEvaluator->start();
......
...@@ -10,28 +10,30 @@ RUN apt-get update && \ ...@@ -10,28 +10,30 @@ RUN apt-get update && \
apt-get install -y wget unzip tar xz-utils bzip2 gzip coreutils && \ apt-get install -y wget unzip tar xz-utils bzip2 gzip coreutils && \
apt-get install -y curl sed grep graphviz libjpeg-dev zlib1g-dev && \ apt-get install -y curl sed grep graphviz libjpeg-dev zlib1g-dev && \
apt-get install -y python-numpy python-matplotlib gcc g++ gfortran && \ apt-get install -y python-numpy python-matplotlib gcc g++ gfortran && \
apt-get install -y automake clang-3.8 llvm-3.8 libclang-3.8-dev && \ apt-get install -y automake && \
apt-get clean -y apt-get clean -y
RUN pip install --upgrade pip && \ RUN pip install --upgrade pip && \
pip install -U protobuf && \ pip install -U "protobuf==3.1.0" && \
pip install -U wheel pillow BeautifulSoup && \ pip install -U wheel pillow BeautifulSoup && \
pip install -U docopt PyYAML sphinx && \ pip install -U docopt PyYAML sphinx && \
pip install -U sphinx_rtd_theme recommonmark jupyter pip install -U sphinx_rtd_theme recommonmark jupyter
RUN curl -sSL https://cmake.org/files/v3.4/cmake-3.4.1.tar.gz | tar -xz && \ RUN curl -sSL https://cmake.org/files/v3.4/cmake-3.4.1.tar.gz | tar -xz && \
cd cmake-3.4.1 && ./bootstrap && make -j4 && make install && \ cd cmake-3.4.1 && ./bootstrap && make -j `nproc` && make install && \
cd .. && rm -rf cmake-3.4.1 cd .. && rm -rf cmake-3.4.1
ARG BUILD_WOBOQ
ARG BUILD_AND_INSTALL ARG BUILD_AND_INSTALL
ARG WITH_AVX ARG WITH_AVX
ARG WITH_DOC ARG WITH_DOC
ARG WITH_STYLE_CHECK ARG WITH_STYLE_CHECK
ENV BUILD_WOBOQ=${BUILD_WOBOQ:-OFF}
ENV BUILD_AND_INSTALL=${BUILD_AND_INSTALL:-OFF} ENV BUILD_AND_INSTALL=${BUILD_AND_INSTALL:-OFF}
ENV WITH_GPU=OFF ENV WITH_GPU=OFF
ENV WITH_AVX=${WITH_AVX:-ON} ENV WITH_AVX=${WITH_AVX:-ON}
ENV WITH_DOC=${WITH_DOC:-ON} ENV WITH_DOC=${WITH_DOC:-OFF}
ENV WITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF} ENV WITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF}
RUN mkdir /paddle RUN mkdir /paddle
......
...@@ -10,28 +10,30 @@ RUN apt-get update && \ ...@@ -10,28 +10,30 @@ RUN apt-get update && \
apt-get install -y wget unzip tar xz-utils bzip2 gzip coreutils && \ apt-get install -y wget unzip tar xz-utils bzip2 gzip coreutils && \
apt-get install -y curl sed grep graphviz libjpeg-dev zlib1g-dev && \ apt-get install -y curl sed grep graphviz libjpeg-dev zlib1g-dev && \
apt-get install -y python-numpy python-matplotlib gcc g++ gfortran && \ apt-get install -y python-numpy python-matplotlib gcc g++ gfortran && \
apt-get install -y automake clang-3.8 llvm-3.8 libclang-3.8-dev && \ apt-get install -y automake && \
apt-get clean -y apt-get clean -y
RUN pip install --upgrade pip && \ RUN pip install --upgrade pip && \
pip install -U protobuf && \ pip install -U "protobuf==3.1.0" && \
pip install -U wheel pillow BeautifulSoup && \ pip install -U wheel pillow BeautifulSoup && \
pip install -U docopt PyYAML sphinx && \ pip install -U docopt PyYAML sphinx && \
pip install -U sphinx_rtd_theme recommonmark jupyter pip install -U sphinx_rtd_theme recommonmark jupyter
RUN curl -sSL https://cmake.org/files/v3.4/cmake-3.4.1.tar.gz | tar -xz && \ RUN curl -sSL https://cmake.org/files/v3.4/cmake-3.4.1.tar.gz | tar -xz && \
cd cmake-3.4.1 && ./bootstrap && make -j4 && make install && \ cd cmake-3.4.1 && ./bootstrap && make -j `nproc` && make install && \
cd .. && rm -rf cmake-3.4.1 cd .. && rm -rf cmake-3.4.1
ARG BUILD_WOBOQ
ARG BUILD_AND_INSTALL ARG BUILD_AND_INSTALL
ARG WITH_AVX ARG WITH_AVX
ARG WITH_DOC ARG WITH_DOC
ARG WITH_STYLE_CHECK ARG WITH_STYLE_CHECK
ENV BUILD_WOBOQ=${BUILD_WOBOQ:-OFF}
ENV BUILD_AND_INSTALL=${BUILD_AND_INSTALL:-OFF} ENV BUILD_AND_INSTALL=${BUILD_AND_INSTALL:-OFF}
ENV WITH_GPU=ON ENV WITH_GPU=ON
ENV WITH_AVX=${WITH_AVX:-ON} ENV WITH_AVX=${WITH_AVX:-ON}
ENV WITH_DOC=${WITH_DOC:-ON} ENV WITH_DOC=${WITH_DOC:-OFF}
ENV WITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF} ENV WITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF}
RUN mkdir /paddle RUN mkdir /paddle
......
...@@ -11,7 +11,7 @@ set -e ...@@ -11,7 +11,7 @@ set -e
# If Dockerfile.* sets BUILD_AND_INSTALL to 'ON', it would have copied # If Dockerfile.* sets BUILD_AND_INSTALL to 'ON', it would have copied
# source tree to /paddle, and this scripts should build it into # source tree to /paddle, and this scripts should build it into
# /paddle/build. # /paddle/build.
if [[ ${BUILD_AND_INSTALL:-ON} == 'ON' ]]; then if [[ ${BUILD_AND_INSTALL:-OFF} == 'ON' ]]; then
if [[ ${WITH_GPU:-OFF} == 'ON' ]]; then if [[ ${WITH_GPU:-OFF} == 'ON' ]]; then
ln -s /usr/lib/x86_64-linux-gnu/libcudnn.so /usr/lib/libcudnn.so ln -s /usr/lib/x86_64-linux-gnu/libcudnn.so /usr/lib/libcudnn.so
fi fi
...@@ -19,7 +19,7 @@ if [[ ${BUILD_AND_INSTALL:-ON} == 'ON' ]]; then ...@@ -19,7 +19,7 @@ if [[ ${BUILD_AND_INSTALL:-ON} == 'ON' ]]; then
mkdir -p /paddle/build # -p means no error if exists mkdir -p /paddle/build # -p means no error if exists
cd /paddle/build cd /paddle/build
cmake .. \ cmake .. \
-DWITH_DOC=ON \ -DWITH_DOC=${WITH_DOC:-OFF} \
-DWITH_GPU=${WITH_GPU:-OFF} \ -DWITH_GPU=${WITH_GPU:-OFF} \
-DWITH_AVX=${WITH_AVX:-OFF} \ -DWITH_AVX=${WITH_AVX:-OFF} \
-DWITH_SWIG_PY=ON \ -DWITH_SWIG_PY=ON \
...@@ -29,28 +29,32 @@ if [[ ${BUILD_AND_INSTALL:-ON} == 'ON' ]]; then ...@@ -29,28 +29,32 @@ if [[ ${BUILD_AND_INSTALL:-ON} == 'ON' ]]; then
make -j `nproc` make -j `nproc`
make install make install
# Install woboq_codebrowser. if [[ ${BUILD_WOBOQ:-OFF} == 'ON' ]]; then
git clone https://github.com/woboq/woboq_codebrowser /woboq apt-get install -y clang-3.8 llvm-3.8 libclang-3.8-dev
cd /woboq # Install woboq_codebrowser.
cmake -DLLVM_CONFIG_EXECUTABLE=/usr/bin/llvm-config-3.8 \ git clone https://github.com/woboq/woboq_codebrowser /woboq
-DCMAKE_BUILD_TYPE=Release \ cd /woboq
. cmake -DLLVM_CONFIG_EXECUTABLE=/usr/bin/llvm-config-3.8 \
make -DCMAKE_BUILD_TYPE=Release \
.
export WOBOQ_OUT=/usr/share/nginx/html/paddle make
export BUILD_DIR=/paddle/build
mkdir -p $WOBOQ_OUT export WOBOQ_OUT=/usr/share/nginx/html/paddle
cp -rv /woboq/data $WOBOQ_OUT/../data export BUILD_DIR=/paddle/build
/woboq/generator/codebrowser_generator \ mkdir -p $WOBOQ_OUT
-b /paddle/build \ cp -rv /woboq/data $WOBOQ_OUT/../data
-a \ /woboq/generator/codebrowser_generator \
-o $WOBOQ_OUT \ -b /paddle/build \
-p paddle:/paddle -a \
/woboq/indexgenerator/codebrowser_indexgenerator $WOBOQ_OUT -o $WOBOQ_OUT \
cd /woboq -p paddle:/paddle
make clean /woboq/indexgenerator/codebrowser_indexgenerator $WOBOQ_OUT
cd /woboq
pip install /usr/local/opt/paddle/share/wheels/*.whl make clean
fi
pip install /usr/local/opt/paddle/share/wheels/py_paddle*linux*.whl
pip install /usr/local/opt/paddle/share/wheels/paddle*.whl
paddle version paddle version
fi fi
......
...@@ -37,10 +37,10 @@ namespace paddle { ...@@ -37,10 +37,10 @@ namespace paddle {
* *
* Error __must_check bar() { * Error __must_check bar() {
* // do something. * // do something.
* Status s = foo(); // invoke other method return status. * Error err = foo(); // invoke other method return status.
* if (!s) return s; * if (err) return err;
* // do something else. * // do something else.
* return Status(); * return Error();
* } * }
* @endcode{cpp} * @endcode{cpp}
* *
...@@ -53,8 +53,8 @@ namespace paddle { ...@@ -53,8 +53,8 @@ namespace paddle {
* *
* int foo(Error* error) { * int foo(Error* error) {
* // Do something. * // Do something.
* Error s = bar(); * Error err = bar();
* if (!s) { * if (err) {
* *error = s; * *error = s;
* return 0; * return 0;
* } * }
...@@ -68,10 +68,10 @@ namespace paddle { ...@@ -68,10 +68,10 @@ namespace paddle {
* } * }
* *
* Error foobar() { * Error foobar() {
* Error s; * Error err;
* // do something. * // do something.
* foo(&s); * foo(&err);
* if (!s) return s; * if (err) return err;
* } * }
* @endcode{cpp} * @endcode{cpp}
* *
...@@ -112,16 +112,22 @@ public: ...@@ -112,16 +112,22 @@ public:
} }
/** /**
* @brief operator bool, return True if there is no error. * @brief operator bool, return True if there is something error.
*/ */
operator bool() const { return msg_ == nullptr; } operator bool() const { return !this->isOK(); }
/**
* @brief isOK return True if there is no error.
* @return True if no error.
*/
bool isOK() const { return msg_ == nullptr; }
/** /**
* @brief check this status by glog. * @brief check this status by glog.
* @note It is a temp method used during cleaning Paddle code. It will be * @note It is a temp method used during cleaning Paddle code. It will be
* removed later. * removed later.
*/ */
void check() const { CHECK(*this) << msg(); } void check() const { CHECK(this->isOK()) << msg(); }
private: private:
std::shared_ptr<std::string> msg_; std::shared_ptr<std::string> msg_;
......
...@@ -18,17 +18,17 @@ limitations under the License. */ ...@@ -18,17 +18,17 @@ limitations under the License. */
TEST(Error, testAll) { TEST(Error, testAll) {
paddle::Error error; paddle::Error error;
ASSERT_TRUE(error);
error = paddle::Error("I'm the error");
ASSERT_FALSE(error); ASSERT_FALSE(error);
error = paddle::Error("I'm the error");
ASSERT_TRUE(error);
ASSERT_STREQ("I'm the error", error.msg()); ASSERT_STREQ("I'm the error", error.msg());
error = paddle::Error("error2"); error = paddle::Error("error2");
ASSERT_FALSE(error); ASSERT_TRUE(error);
ASSERT_STREQ("error2", error.msg()); ASSERT_STREQ("error2", error.msg());
int i = 3; int i = 3;
auto error3 = paddle::Error("error%d", i); auto error3 = paddle::Error("error%d", i);
ASSERT_FALSE(error3); ASSERT_TRUE(error3);
ASSERT_STREQ("error3", error3.msg()); ASSERT_STREQ("error3", error3.msg());
} }
...@@ -19,11 +19,12 @@ import trainer ...@@ -19,11 +19,12 @@ import trainer
import event import event
import data_type import data_type
import data_feeder import data_feeder
import attr
import py_paddle.swig_paddle as api import py_paddle.swig_paddle as api
__all__ = [ __all__ = [
'optimizer', 'layer', 'activation', 'parameters', 'init', 'trainer', 'optimizer', 'layer', 'activation', 'parameters', 'init', 'trainer',
'event', 'data_type', 'data_feeder' 'event', 'data_type', 'attr', 'data_feeder'
] ]
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers.attrs import *
__all__ = [
"Param",
"Extra",
]
Param = ParameterAttribute
Extra = ExtraLayerAttribute
...@@ -23,9 +23,10 @@ class DataFeeder(DataProviderConverter): ...@@ -23,9 +23,10 @@ class DataFeeder(DataProviderConverter):
""" """
DataFeeder converts the data returned by paddle.reader into a data structure DataFeeder converts the data returned by paddle.reader into a data structure
of Arguments which is defined in the API. The paddle.reader usually returns of Arguments which is defined in the API. The paddle.reader usually returns
a list of mini-batch data. Each item in the list is a list or a tuple, a list of mini-batch data entries. Each data entry in the list is one sampe.
which is one sample with one or multiple features. DataFeeder converts this Each sample is a list or a tuple with one feature or multiple features.
mini-batch data into Arguments in order to feed it to C++ interface. DataFeeder converts this mini-batch data entries into Arguments in order
to feed it to C++ interface.
The example usage: The example usage:
...@@ -37,6 +38,10 @@ class DataFeeder(DataProviderConverter): ...@@ -37,6 +38,10 @@ class DataFeeder(DataProviderConverter):
( [1.0,2.0,3.0,4.0], 5, [6,7,8] ), # first sample ( [1.0,2.0,3.0,4.0], 5, [6,7,8] ), # first sample
( [1.0,2.0,3.0,4.0], 5, [6,7,8] ) # second sample ( [1.0,2.0,3.0,4.0], 5, [6,7,8] ) # second sample
] ]
# or minibatch_data = [
# [ [1.0,2.0,3.0,4.0], 5, [6,7,8] ], # first sample
# [ [1.0,2.0,3.0,4.0], 5, [6,7,8] ] # second sample
# ]
arg = feeder(minibatch_data) arg = feeder(minibatch_data)
""" """
...@@ -63,13 +68,19 @@ class DataFeeder(DataProviderConverter): ...@@ -63,13 +68,19 @@ class DataFeeder(DataProviderConverter):
def convert(self, dat, argument=None): def convert(self, dat, argument=None):
""" """
:param dat: A list of mini-batch data. Each item is a list or tuple, :param dat: A list of mini-batch data. Each sample is a list or tuple
one feature or multiple features.
for example: for example:
[ [
(feature_0, feature_1, feature_2, ...), # first sample ([0.2, 0.2], ), # first sample
(feature_0, feature_1, feature_2, ...), # second sample ([0.8, 0.3], ), # second sample
...
] ]
or,
[
[[0.2, 0.2], ], # first sample
[[0.8, 0.3], ], # second sample
]
:type dat: List :type dat: List
:param argument: An Arguments object contains this mini-batch data with :param argument: An Arguments object contains this mini-batch data with
one or multiple features. The Arguments definition is one or multiple features. The Arguments definition is
......
...@@ -14,9 +14,9 @@ ...@@ -14,9 +14,9 @@
from paddle.trainer.PyDataProvider2 import \ from paddle.trainer.PyDataProvider2 import \
InputType, dense_vector, sparse_binary_vector,\ InputType, dense_vector, sparse_binary_vector,\
sparse_vector, integer_value sparse_vector, integer_value, integer_value_sequence
__all__ = [ __all__ = [
'InputType', 'dense_vector', 'sparse_binary_vector', 'sparse_vector', 'InputType', 'dense_vector', 'sparse_binary_vector', 'sparse_vector',
'integer_value' 'integer_value', 'integer_value_sequence'
] ]
...@@ -74,6 +74,8 @@ from paddle.trainer_config_helpers.config_parser_utils import \ ...@@ -74,6 +74,8 @@ from paddle.trainer_config_helpers.config_parser_utils import \
from paddle.trainer_config_helpers.default_decorators import wrap_name_default from paddle.trainer_config_helpers.default_decorators import wrap_name_default
import data_type import data_type
import activation
import attr
__all__ = [ __all__ = [
'parse_network', 'data', 'fc', 'max_id', 'classification_cost', 'parse_network', 'data', 'fc', 'max_id', 'classification_cost',
...@@ -230,8 +232,11 @@ if __name__ == '__main__': ...@@ -230,8 +232,11 @@ if __name__ == '__main__':
weight = data(name='weight', type=data_type.dense_vector(10)) weight = data(name='weight', type=data_type.dense_vector(10))
score = data(name='score', type=data_type.dense_vector(1)) score = data(name='score', type=data_type.dense_vector(1))
hidden = fc(input=pixel, size=100, act=conf_helps.SigmoidActivation()) hidden = fc(input=pixel,
inference = fc(input=hidden, size=10, act=conf_helps.SoftmaxActivation()) size=100,
act=activation.Sigmoid(),
param_attr=attr.Param(name='hidden'))
inference = fc(input=hidden, size=10, act=activation.Softmax())
maxid = max_id(input=inference) maxid = max_id(input=inference)
cost1 = classification_cost(input=inference, label=label) cost1 = classification_cost(input=inference, label=label)
cost2 = classification_cost(input=inference, label=label, weight=weight) cost2 = classification_cost(input=inference, label=label, weight=weight)
......
add_test(NAME test_v2_layer
COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/
${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/v2/tests/test_layer.py
WORKING_DIRECTORY ${PROJ_ROOT}/python/paddle)
add_test(NAME test_v2_api add_test(NAME test_v2_api
COMMAND bash ${PROJ_ROOT}/python/paddle/v2/tests/run_tests.sh ${PYTHON_EXECUTABLE}) COMMAND bash ${PROJ_ROOT}/python/paddle/v2/tests/run_tests.sh ${PYTHON_EXECUTABLE})
...@@ -32,7 +32,7 @@ class DataFeederTest(unittest.TestCase): ...@@ -32,7 +32,7 @@ class DataFeederTest(unittest.TestCase):
num = np.random.randint(size_limit) num = np.random.randint(size_limit)
return np.random.randint(high, size=num).tolist() return np.random.randint(high, size=num).tolist()
def test_dense_vector(self): def test_dense(self):
def compare(input): def compare(input):
feeder = DataFeeder([('image', data_type.dense_vector(784))], feeder = DataFeeder([('image', data_type.dense_vector(784))],
{'image': 0}) {'image': 0})
...@@ -51,7 +51,7 @@ class DataFeederTest(unittest.TestCase): ...@@ -51,7 +51,7 @@ class DataFeederTest(unittest.TestCase):
data.append(each_sample) data.append(each_sample)
compare(data) compare(data)
# test list # each feature is a list
data = [] data = []
for i in xrange(batch_size): for i in xrange(batch_size):
each_sample = [] each_sample = []
...@@ -59,6 +59,13 @@ class DataFeederTest(unittest.TestCase): ...@@ -59,6 +59,13 @@ class DataFeederTest(unittest.TestCase):
data.append(each_sample) data.append(each_sample)
compare(data) compare(data)
# test tuple
data = []
for i in xrange(batch_size):
each_sample = (self.dense_reader(dim).tolist(), )
data.append(each_sample)
compare(data)
def test_sparse_binary(self): def test_sparse_binary(self):
dim = 10000 dim = 10000
batch_size = 32 batch_size = 32
...@@ -86,7 +93,7 @@ class DataFeederTest(unittest.TestCase): ...@@ -86,7 +93,7 @@ class DataFeederTest(unittest.TestCase):
a = self.sparse_binary_reader(dim, 40, non_empty=True) a = self.sparse_binary_reader(dim, 40, non_empty=True)
b = self.dense_reader(len(a)).tolist() b = self.dense_reader(len(a)).tolist()
v.append(a) v.append(a)
w.append(b[0]) w.append(np.array(b, dtype="float32"))
each_sample.append(zip(a, b)) each_sample.append(zip(a, b))
data.append(each_sample) data.append(each_sample)
...@@ -97,6 +104,10 @@ class DataFeederTest(unittest.TestCase): ...@@ -97,6 +104,10 @@ class DataFeederTest(unittest.TestCase):
assert isinstance(output, api.Matrix) assert isinstance(output, api.Matrix)
for i in xrange(batch_size): for i in xrange(batch_size):
self.assertEqual(output.getSparseRowCols(i), v[i]) self.assertEqual(output.getSparseRowCols(i), v[i])
cols_value = output.getSparseRowColsVal(i)
value = [val[1] for val in cols_value]
value = np.array(value, dtype="float32")
self.assertAlmostEqual(value.all(), w[i].all())
def test_integer(self): def test_integer(self):
dim = 100 dim = 100
...@@ -113,16 +124,42 @@ class DataFeederTest(unittest.TestCase): ...@@ -113,16 +124,42 @@ class DataFeederTest(unittest.TestCase):
index = np.array(index, dtype='int') index = np.array(index, dtype='int')
self.assertEqual(output.all(), index.flatten().all()) self.assertEqual(output.all(), index.flatten().all())
def test_multiple_slots(self): def test_integer_sequence(self):
dim = 10000
batch_size = 32
start = [0]
data = []
for i in xrange(batch_size):
each_sample = []
each_sample.append(
self.sparse_binary_reader(
dim, 30, non_empty=True))
data.append(each_sample)
start.append(len(each_sample[0]) + start[-1])
feeder = DataFeeder([('input', data_type.integer_value_sequence(dim))],
{'input': 0})
arg = feeder(data)
output_data = arg.getSlotIds(0).copyToNumpyArray()
output_start = arg.getSlotSequenceStartPositions(0).copyToNumpyArray()
index = []
for dat in data:
index.extend(x for x in dat[0]) # only one feature, so dat[0]
index = np.array(index, dtype='int')
start = np.array(start, dtype='int')
self.assertEqual(output_data.all(), index.all())
self.assertEqual(output_start.all(), start.all())
def test_multiple_features(self):
batch_size = 2 batch_size = 2
data = [] data = []
for i in xrange(batch_size): for i in xrange(batch_size):
each_sample = [] each_sample = []
each_sample.append(np.random.randint(10)) # size of feature 2: 10 each_sample.append(np.random.randint(10))
each_sample.append( each_sample.append(
self.sparse_binary_reader( self.sparse_binary_reader(
20000, 40, non_empty=True)) # size of feature 1: 20000 20000, 40, non_empty=True))
each_sample.append(self.dense_reader(100)) # size of feature 0: 100 each_sample.append(self.dense_reader(100))
data.append(each_sample) data.append(each_sample)
# test multiple features # test multiple features
...@@ -150,10 +187,30 @@ class DataFeederTest(unittest.TestCase): ...@@ -150,10 +187,30 @@ class DataFeederTest(unittest.TestCase):
self.assertEqual(output_dense[i].all(), data[i][2].all()) self.assertEqual(output_dense[i].all(), data[i][2].all())
self.assertEqual(output_index[i], data[i][0]) self.assertEqual(output_index[i], data[i][0])
def test_multiple_features_tuple(self):
batch_size = 2
data = []
for i in xrange(batch_size):
a = np.random.randint(10)
b = self.sparse_binary_reader(20000, 40, non_empty=True)
c = self.dense_reader(100)
each_sample = (a, b, c)
data.append(each_sample)
# test multiple features
data_types = [('fea0', data_type.dense_vector(100)),
('fea1', data_type.sparse_binary_vector(20000)),
('fea2', data_type.integer_value(10))]
feeder = DataFeeder(data_types, {'fea0': 2, 'fea1': 1, 'fea2': 0})
arg = feeder(data)
out_dense = arg.getSlotValue(0).copyToNumpyMat()
out_sparse = arg.getSlotValue(1)
out_index = arg.getSlotIds(2).copyToNumpyArray()
for i in xrange(batch_size):
self.assertEqual(out_dense[i].all(), data[i][2].all())
self.assertEqual(out_sparse.getSparseRowCols(i), data[i][1])
self.assertEqual(out_index[i], data[i][0])
if __name__ == '__main__':
api.initPaddle("--use_gpu=0")
unittest.main()
if __name__ == '__main__': if __name__ == '__main__':
api.initPaddle("--use_gpu=0") api.initPaddle("--use_gpu=0")
......
# Copyright PaddlePaddle contributors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import difflib
import unittest
import paddle.trainer_config_helpers as conf_helps
import paddle.v2.activation as activation
import paddle.v2.attr as attr
import paddle.v2.data_type as data_type
import paddle.v2.layer as layer
from paddle.trainer_config_helpers.config_parser_utils import \
parse_network_config as parse_network
pixel = layer.data(name='pixel', type=data_type.dense_vector(784))
label = layer.data(name='label', type=data_type.integer_value(10))
weight = layer.data(name='weight', type=data_type.dense_vector(10))
score = layer.data(name='score', type=data_type.dense_vector(1))
hidden = layer.fc(input=pixel,
size=100,
act=activation.Sigmoid(),
param_attr=attr.Param(name='hidden'))
inference = layer.fc(input=hidden, size=10, act=activation.Softmax())
class CostLayerTest(unittest.TestCase):
def test_cost_layer(self):
cost1 = layer.classification_cost(input=inference, label=label)
cost2 = layer.classification_cost(
input=inference, label=label, weight=weight)
cost3 = layer.cross_entropy_cost(input=inference, label=label)
cost4 = layer.cross_entropy_with_selfnorm_cost(
input=inference, label=label)
cost5 = layer.regression_cost(input=inference, label=label)
cost6 = layer.regression_cost(
input=inference, label=label, weight=weight)
cost7 = layer.multi_binary_label_cross_entropy_cost(
input=inference, label=label)
cost8 = layer.rank_cost(left=score, right=score, label=score)
cost9 = layer.lambda_cost(input=inference, score=score)
cost10 = layer.sum_cost(input=inference)
cost11 = layer.huber_cost(input=score, label=label)
print dir(layer)
layer.parse_network(cost1, cost2)
print dir(layer)
#print layer.parse_network(cost3, cost4)
#print layer.parse_network(cost5, cost6)
#print layer.parse_network(cost7, cost8, cost9, cost10, cost11)
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册