diff --git a/paddle/gserver/evaluators/ChunkEvaluator.cpp b/paddle/gserver/evaluators/ChunkEvaluator.cpp index 3d8af5bcd419e76fb2026eddc95dc409a33c9d92..13f02e51fe9e3831103982130bfdaa3255e1d174 100644 --- a/paddle/gserver/evaluators/ChunkEvaluator.cpp +++ b/paddle/gserver/evaluators/ChunkEvaluator.cpp @@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include #include #include "paddle/math/Vector.h" @@ -72,6 +73,7 @@ class ChunkEvaluator : public Evaluator { std::vector labelSegments_; std::vector outputSegments_; + std::set excludedChunkTypes_; public: virtual void init(const EvaluatorConfig& config) { @@ -105,6 +107,10 @@ public: } CHECK(config.has_num_chunk_types()) << "Missing num_chunk_types in config"; otherChunkType_ = numChunkTypes_ = config.num_chunk_types(); + + // the chunks of types in excludedChunkTypes_ will not be counted + auto& tmp = config.excluded_chunk_types(); + excludedChunkTypes_.insert(tmp.begin(), tmp.end()); } virtual void start() { @@ -156,7 +162,8 @@ public: getSegments(label, length, labelSegments_); size_t i = 0, j = 0; while (i < outputSegments_.size() && j < labelSegments_.size()) { - if (outputSegments_[i] == labelSegments_[j]) { + if (outputSegments_[i] == labelSegments_[j] && + excludedChunkTypes_.count(outputSegments_[i].type) != 1) { ++numCorrect_; } if (outputSegments_[i].end < labelSegments_[j].end) { @@ -168,8 +175,12 @@ public: ++j; } } - numLabelSegments_ += labelSegments_.size(); - numOutputSegments_ += outputSegments_.size(); + for (auto& segment : labelSegments_) { + if (excludedChunkTypes_.count(segment.type) != 1) ++numLabelSegments_; + } + for (auto& segment : outputSegments_) { + if (excludedChunkTypes_.count(segment.type) != 1) ++numOutputSegments_; + } } void getSegments(int* label, int length, std::vector& segments) { diff --git a/paddle/gserver/gradientmachines/GradientMachine.h b/paddle/gserver/gradientmachines/GradientMachine.h index 579eca71d4cdd2545a3a8be1c7f1dacfdd5ef66b..ad82869aec8318cef42f5a51a7e29c233bd23a95 100644 --- a/paddle/gserver/gradientmachines/GradientMachine.h +++ b/paddle/gserver/gradientmachines/GradientMachine.h @@ -212,11 +212,7 @@ public: * @note This function will only been implemented and used in a * multithreaded environment. */ - virtual void start(const TrainerConfig& config, - DataProviderPtr dataProvider) { - (void)config; - (void)dataProvider; - } + virtual void start() {} /** * @brief check each work-thread whether is failed/error/finish, diff --git a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp b/paddle/gserver/gradientmachines/MultiGradientMachine.cpp index 88c098b3559d8d2918309aa48329af067f79bdd5..95a4c0e16a91f572a0d78e2fee113e03a82d2056 100644 --- a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp +++ b/paddle/gserver/gradientmachines/MultiGradientMachine.cpp @@ -441,7 +441,7 @@ TrainerThread::TrainerThread(const ModelConfig& config, TrainerThread::~TrainerThread() { stop(); } void TrainerThread::start() { - gradientMachine_->start(*(TrainerConfig*)nullptr, (DataProviderPtr) nullptr); + gradientMachine_->start(); computeThread_.reset(new std::thread([this]() { computeThread(); })); diff --git a/paddle/gserver/gradientmachines/MultiNetwork.cpp b/paddle/gserver/gradientmachines/MultiNetwork.cpp index 6eb3d8db962161ed4123b4ef4a4bb42147bfdf19..f1308f3721f8d6cf2645d7cd54b14959311ae17f 100644 --- a/paddle/gserver/gradientmachines/MultiNetwork.cpp +++ b/paddle/gserver/gradientmachines/MultiNetwork.cpp @@ -109,10 +109,9 @@ void MultiNetwork::onPassEnd() { } } -void MultiNetwork::start(const TrainerConfig& config, - DataProviderPtr dataProvider) { +void MultiNetwork::start() { for (auto& subNetwork : subNetworks_) { - subNetwork->start(config, dataProvider); + subNetwork->start(); } } diff --git a/paddle/gserver/gradientmachines/MultiNetwork.h b/paddle/gserver/gradientmachines/MultiNetwork.h index 89fbf32b4f90bceab60b8335c27b369806faaee1..f04406b983746965d72071c6ea6cfd2b6afcf9dc 100644 --- a/paddle/gserver/gradientmachines/MultiNetwork.h +++ b/paddle/gserver/gradientmachines/MultiNetwork.h @@ -54,7 +54,7 @@ public: return subNetworks_; } - virtual void start(const TrainerConfig& config, DataProviderPtr dataProvider); + virtual void start(); virtual void finish(); diff --git a/paddle/gserver/gradientmachines/ParallelNeuralNetwork.cpp b/paddle/gserver/gradientmachines/ParallelNeuralNetwork.cpp index 980a5851a2734ce42b3417d16a37987dc5ed6b24..c6e3a3b321efb06a1678ec760c514fe1181cd3d7 100644 --- a/paddle/gserver/gradientmachines/ParallelNeuralNetwork.cpp +++ b/paddle/gserver/gradientmachines/ParallelNeuralNetwork.cpp @@ -131,11 +131,7 @@ void ParallelNeuralNetwork::forwardBackward(const std::vector& inArgs, backward(callback); } -void ParallelNeuralNetwork::start(const TrainerConfig& config, - DataProviderPtr dataProvider) { - (void)config; - (void)dataProvider; - +void ParallelNeuralNetwork::start() { for (auto& thread : threads_) { thread->start(); } diff --git a/paddle/gserver/gradientmachines/ParallelNeuralNetwork.h b/paddle/gserver/gradientmachines/ParallelNeuralNetwork.h index 8f445b1ded3eb8960dc06512dd3f80b00d284acc..39f5682a58e653cabaf4f3d3382d1e0bac5dece9 100644 --- a/paddle/gserver/gradientmachines/ParallelNeuralNetwork.h +++ b/paddle/gserver/gradientmachines/ParallelNeuralNetwork.h @@ -56,7 +56,7 @@ public: PassType passType, const UpdateCallback &callback = NULL); - virtual void start(const TrainerConfig &config, DataProviderPtr dataProvider); + virtual void start(); void addComputeThread(int deviceId); diff --git a/paddle/gserver/tests/test_NetworkCompare.cpp b/paddle/gserver/tests/test_NetworkCompare.cpp index fc60228f816e0cea30ef764c59a8c7875ed4a0e8..0d261059555c971cd509e64802d6c70abc9d2fef 100644 --- a/paddle/gserver/tests/test_NetworkCompare.cpp +++ b/paddle/gserver/tests/test_NetworkCompare.cpp @@ -114,7 +114,7 @@ void calcGradient(DataIn& in, DataOut& out, const std::string& configPath) { parameters[i]->getBuf(PARAMETER_VALUE)->copyFrom(*in.paraValues[i]); } } - gradientMachine->start(trainer.getConfig(), nullptr); + gradientMachine->start(); gradientMachine->forward(in.inArgs, &outArgs, PASS_TRAIN); for (size_t i = 0; i < in.outGrads.size(); i++) { // If the all the layers in the config have no parameters, also diff --git a/paddle/gserver/tests/test_RecurrentGradientMachine.cpp b/paddle/gserver/tests/test_RecurrentGradientMachine.cpp index e19cf35cd5eb0148879fc3d0d40ea9b106947f9a..150850da4d49a2320acc70ed370cf8728d5c9def 100644 --- a/paddle/gserver/tests/test_RecurrentGradientMachine.cpp +++ b/paddle/gserver/tests/test_RecurrentGradientMachine.cpp @@ -28,7 +28,7 @@ class TrainerForTest : public paddle::Trainer { public: void startTrain() { GradientMachine& gm = *this->trainerInternal_.getGradientMachine(); - gm.start(this->getConfig(), dataProvider_); + gm.start(); } void finishTrain() { diff --git a/paddle/trainer/Tester.cpp b/paddle/trainer/Tester.cpp index 24fac3e5a8141cbec912d276833ec491385b97ab..13aa28ae5d9699d267858d48e46797c756487ddd 100644 --- a/paddle/trainer/Tester.cpp +++ b/paddle/trainer/Tester.cpp @@ -257,7 +257,7 @@ void Tester::test() { CHECK(testDataProvider_) << "TestData is not specified"; testDataProvider_->setSkipShuffle(); testDataProvider_->reset(); - gradientMachine_->start(*config_, testDataProvider_); + gradientMachine_->start(); // For evaluation std::vector modelList; diff --git a/paddle/trainer/Trainer.cpp b/paddle/trainer/Trainer.cpp index 1eec2c432d235ef484b688db08aae8a39f878a85..6c57467cca30d6703a7ebe6babc624b70a427b8b 100644 --- a/paddle/trainer/Trainer.cpp +++ b/paddle/trainer/Trainer.cpp @@ -308,7 +308,7 @@ static double genPerturbation(real* d, real* grad, size_t dim) { } real Trainer::checkGradient() { - trainerInternal_.getGradientMachine()->start(*config_, dataProvider_); + trainerInternal_.getGradientMachine()->start(); std::vector& parameters = trainerInternal_.getGradientMachine()->getNonStaticParameters(); DataBatch dataBatch; @@ -390,7 +390,7 @@ void Trainer::startTrain() { dataProvider_->reset(); } - trainerInternal_.getGradientMachine()->start(*config_, dataProvider_); + trainerInternal_.getGradientMachine()->start(); } void Trainer::finishTrain() { trainerInternal_.getGradientMachine()->finish(); } diff --git a/paddle/trainer/tests/test_Compare.cpp b/paddle/trainer/tests/test_Compare.cpp index 72fc76bea35e433eeb08ba625b4bf6afdda491fb..e855a8fe2e09aa0f16a73f3e7bcc2f32921092f8 100644 --- a/paddle/trainer/tests/test_Compare.cpp +++ b/paddle/trainer/tests/test_Compare.cpp @@ -50,7 +50,7 @@ void calcGradient(bool useGpu, comData& Data) { trainer.getDataProvider()->getNextBatch(batchSize, &dataBatch); CHECK(dataBatch.getSize()) << "No data from data provider"; vector& inArgs = dataBatch.getStreams(); - trainer.getGradientMachine()->start(trainer.getConfig(), nullptr); + trainer.getGradientMachine()->start(); for (int i = 0; i < 2; ++i) { trainer.getGradientMachine()->forwardBackward( inArgs, &Data.outArgs, PASS_TRAIN); diff --git a/paddle/trainer/tests/test_CompareTwoNets.cpp b/paddle/trainer/tests/test_CompareTwoNets.cpp index 80c61e259e71dd31d7637072248b22a2910c532e..94f65e545d116c802fb4877dc14f07aaaf83a4fb 100644 --- a/paddle/trainer/tests/test_CompareTwoNets.cpp +++ b/paddle/trainer/tests/test_CompareTwoNets.cpp @@ -72,7 +72,7 @@ void calcGradient(ComData& data, const string configFile) { CHECK(dataBatch.getSize()) << "No data from data provider"; vector& inArgs = dataBatch.getStreams(); - trainer.getGradientMachine()->start(trainer.getConfig(), nullptr); + trainer.getGradientMachine()->start(); trainer.getGradientMachine()->forwardBackward( inArgs, &data.outArgs, PASS_TRAIN); diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto index 552af71e76e5adf27f35bb5ad6fd8a69c71df0f1..be4d0041f91cf7d0306d14338b43bb25e052fd58 100644 --- a/proto/ModelConfig.proto +++ b/proto/ModelConfig.proto @@ -433,8 +433,10 @@ message EvaluatorConfig { repeated string input_layers = 3; // Used by ChunkEvaluator - optional string chunk_scheme = 4; // one of "IOB", "IOE", "IOBES" - optional int32 num_chunk_types = 5; // number of chunk types other than "other" + // one of "IOB", "IOE", "IOBES" + optional string chunk_scheme = 4; + // number of chunk types other than "other" + optional int32 num_chunk_types = 5; // Used by PrecisionRecallEvaluator and ClassificationErrorEvaluator // For multi binary labels: true if output > classification_threshold @@ -453,6 +455,10 @@ message EvaluatorConfig { // whether to delimit the sequence in the seq_text_printer optional bool delimited = 11 [default = true]; + + // Used by ChunkEvaluator + // chunk of these types are not counted + repeated int32 excluded_chunk_types = 12; } message LinkConfig { diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index ea3e4308fe05be464c3e8c6b84d8b7be8a30c016..39892d0533aab468d808274146ae1a0f72170495 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -1240,7 +1240,8 @@ def Evaluator( dict_file=None, result_file=None, num_results=None, - delimited=None, ): + delimited=None, + excluded_chunk_types=None, ): evaluator = g_config.model_config.evaluators.add() evaluator.type = type evaluator.name = MakeLayerNameInSubmodel(name) @@ -1269,6 +1270,9 @@ def Evaluator( if delimited is not None: evaluator.delimited = delimited + if excluded_chunk_types: + evaluator.excluded_chunk_types.extend(excluded_chunk_types) + class LayerBase(object): def __init__( diff --git a/python/paddle/trainer_config_helpers/evaluators.py b/python/paddle/trainer_config_helpers/evaluators.py index 3e0e88972c58e8c853e79e21f839943ae4b027d6..bd247ea9af9d8dfb2d476cdc62638bd65c11add5 100644 --- a/python/paddle/trainer_config_helpers/evaluators.py +++ b/python/paddle/trainer_config_helpers/evaluators.py @@ -57,19 +57,21 @@ def evaluator(*attrs): return impl -def evaluator_base(input, - type, - label=None, - weight=None, - name=None, - chunk_scheme=None, - num_chunk_types=None, - classification_threshold=None, - positive_label=None, - dict_file=None, - result_file=None, - num_results=None, - delimited=None): +def evaluator_base( + input, + type, + label=None, + weight=None, + name=None, + chunk_scheme=None, + num_chunk_types=None, + classification_threshold=None, + positive_label=None, + dict_file=None, + result_file=None, + num_results=None, + delimited=None, + excluded_chunk_types=None, ): """ Evaluator will evaluate the network status while training/testing. @@ -127,7 +129,8 @@ def evaluator_base(input, positive_label=positive_label, dict_file=dict_file, result_file=result_file, - delimited=delimited) + delimited=delimited, + excluded_chunk_types=excluded_chunk_types, ) @evaluator(EvaluatorAttribute.FOR_CLASSIFICATION) @@ -330,7 +333,8 @@ def chunk_evaluator( label, chunk_scheme, num_chunk_types, - name=None, ): + name=None, + excluded_chunk_types=None, ): """ Chunk evaluator is used to evaluate segment labelling accuracy for a sequence. It calculates the chunk detection F1 score. @@ -376,6 +380,8 @@ def chunk_evaluator( :param num_chunk_types: number of chunk types other than "other" :param name: The Evaluator name, it is optional. :type name: basename|None + :param excluded_chunk_types: chunks of these types are not considered + :type excluded_chunk_types: list of integer|None """ evaluator_base( name=name, @@ -383,7 +389,8 @@ def chunk_evaluator( input=input, label=label, chunk_scheme=chunk_scheme, - num_chunk_types=num_chunk_types) + num_chunk_types=num_chunk_types, + excluded_chunk_types=excluded_chunk_types, ) @evaluator(EvaluatorAttribute.FOR_UTILS)