diff --git a/.travis.yml b/.travis.yml index 5b14f8e61e6143bb22a3aad5e0a9b11688b1b4be..047ca6ffe79bdaf013f6ef6dbf1a82bdb2f1f2b3 100644 --- a/.travis.yml +++ b/.travis.yml @@ -56,7 +56,7 @@ before_install: - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then sudo paddle/scripts/travis/before_install.linux.sh; fi - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then paddle/scripts/travis/before_install.osx.sh; fi - if [[ "$JOB" == "PRE_COMMIT" ]]; then sudo ln -s /usr/bin/clang-format-3.8 /usr/bin/clang-format; fi - - pip install wheel protobuf sphinx recommonmark virtualenv numpy sphinx_rtd_theme pre-commit + - pip install wheel protobuf sphinx recommonmark virtualenv numpy sphinx_rtd_theme pre-commit requests==2.9.2 LinkChecker script: - paddle/scripts/travis/main.sh notifications: diff --git a/paddle/gserver/gradientmachines/GradientMachine.h b/paddle/gserver/gradientmachines/GradientMachine.h index 579eca71d4cdd2545a3a8be1c7f1dacfdd5ef66b..ad82869aec8318cef42f5a51a7e29c233bd23a95 100644 --- a/paddle/gserver/gradientmachines/GradientMachine.h +++ b/paddle/gserver/gradientmachines/GradientMachine.h @@ -212,11 +212,7 @@ public: * @note This function will only been implemented and used in a * multithreaded environment. */ - virtual void start(const TrainerConfig& config, - DataProviderPtr dataProvider) { - (void)config; - (void)dataProvider; - } + virtual void start() {} /** * @brief check each work-thread whether is failed/error/finish, diff --git a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp b/paddle/gserver/gradientmachines/MultiGradientMachine.cpp index 88c098b3559d8d2918309aa48329af067f79bdd5..95a4c0e16a91f572a0d78e2fee113e03a82d2056 100644 --- a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp +++ b/paddle/gserver/gradientmachines/MultiGradientMachine.cpp @@ -441,7 +441,7 @@ TrainerThread::TrainerThread(const ModelConfig& config, TrainerThread::~TrainerThread() { stop(); } void TrainerThread::start() { - gradientMachine_->start(*(TrainerConfig*)nullptr, (DataProviderPtr) nullptr); + gradientMachine_->start(); computeThread_.reset(new std::thread([this]() { computeThread(); })); diff --git a/paddle/gserver/gradientmachines/MultiNetwork.cpp b/paddle/gserver/gradientmachines/MultiNetwork.cpp index 6eb3d8db962161ed4123b4ef4a4bb42147bfdf19..f1308f3721f8d6cf2645d7cd54b14959311ae17f 100644 --- a/paddle/gserver/gradientmachines/MultiNetwork.cpp +++ b/paddle/gserver/gradientmachines/MultiNetwork.cpp @@ -109,10 +109,9 @@ void MultiNetwork::onPassEnd() { } } -void MultiNetwork::start(const TrainerConfig& config, - DataProviderPtr dataProvider) { +void MultiNetwork::start() { for (auto& subNetwork : subNetworks_) { - subNetwork->start(config, dataProvider); + subNetwork->start(); } } diff --git a/paddle/gserver/gradientmachines/MultiNetwork.h b/paddle/gserver/gradientmachines/MultiNetwork.h index 89fbf32b4f90bceab60b8335c27b369806faaee1..f04406b983746965d72071c6ea6cfd2b6afcf9dc 100644 --- a/paddle/gserver/gradientmachines/MultiNetwork.h +++ b/paddle/gserver/gradientmachines/MultiNetwork.h @@ -54,7 +54,7 @@ public: return subNetworks_; } - virtual void start(const TrainerConfig& config, DataProviderPtr dataProvider); + virtual void start(); virtual void finish(); diff --git a/paddle/gserver/gradientmachines/ParallelNeuralNetwork.cpp b/paddle/gserver/gradientmachines/ParallelNeuralNetwork.cpp index 980a5851a2734ce42b3417d16a37987dc5ed6b24..c6e3a3b321efb06a1678ec760c514fe1181cd3d7 100644 --- a/paddle/gserver/gradientmachines/ParallelNeuralNetwork.cpp +++ b/paddle/gserver/gradientmachines/ParallelNeuralNetwork.cpp @@ -131,11 +131,7 @@ void ParallelNeuralNetwork::forwardBackward(const std::vector& inArgs, backward(callback); } -void ParallelNeuralNetwork::start(const TrainerConfig& config, - DataProviderPtr dataProvider) { - (void)config; - (void)dataProvider; - +void ParallelNeuralNetwork::start() { for (auto& thread : threads_) { thread->start(); } diff --git a/paddle/gserver/gradientmachines/ParallelNeuralNetwork.h b/paddle/gserver/gradientmachines/ParallelNeuralNetwork.h index 8f445b1ded3eb8960dc06512dd3f80b00d284acc..39f5682a58e653cabaf4f3d3382d1e0bac5dece9 100644 --- a/paddle/gserver/gradientmachines/ParallelNeuralNetwork.h +++ b/paddle/gserver/gradientmachines/ParallelNeuralNetwork.h @@ -56,7 +56,7 @@ public: PassType passType, const UpdateCallback &callback = NULL); - virtual void start(const TrainerConfig &config, DataProviderPtr dataProvider); + virtual void start(); void addComputeThread(int deviceId); diff --git a/paddle/gserver/tests/LayerGradUtil.cpp b/paddle/gserver/tests/LayerGradUtil.cpp index 1d5e7de1ba624d98c953efe1cdd2318548c4e914..57c176810fddf96828c210807673b7d1a3c739c0 100644 --- a/paddle/gserver/tests/LayerGradUtil.cpp +++ b/paddle/gserver/tests/LayerGradUtil.cpp @@ -303,13 +303,31 @@ void initDataLayer(TestConfig testConf, ICpuGpuVectorPtr sequenceStartPositions; ICpuGpuVectorPtr subSequenceStartPositions; IVectorPtr cpuSequenceDims; - for (size_t i = 0; i < testConf.inputDefs.size(); i++) { + for (size_t i = 0; i < testConf.inputDefs.size(); ++i) { + if (testConf.inputDefs[i].inputType != INPUT_SEQUENCE_LABEL) continue; + + const std::vector& labelSeqStartPositions = + testConf.inputDefs[i].labelSeqStartPositions; + if (labelSeqStartPositions.size() != 0) { + CHECK(!sequenceStartPositions); + CHECK_GE(labelSeqStartPositions.size(), 2); + + sequenceStartPositions = + ICpuGpuVector::create(labelSeqStartPositions.size(), useGpu); + sequenceStartPositions->copyFrom( + labelSeqStartPositions.data(), labelSeqStartPositions.size(), useGpu); + } + } + + for (size_t i = 0; i < testConf.inputDefs.size(); ++i) { LayerConfig config; config.set_name(testConf.inputDefs[i].name); config.set_type("data"); config.set_size(testConf.inputDefs[i].dim); LayerPtr layer = LayerPtr(new DataLayer(config)); - size_t numSequence = batchSize / 10 + 1; + size_t numSequence = sequenceStartPositions + ? sequenceStartPositions->getSize() - 1 + : batchSize / 10 + 1; Argument data; auto fillData = [&](bool trans, int height, int width) { @@ -336,9 +354,17 @@ void initDataLayer(TestConfig testConf, break; case INPUT_LABEL: case INPUT_SEQUENCE_LABEL: - data.ids = VectorT::create(batchSize, useGpu); - // now rand number can be 0 to inputDefs[i].dim - data.ids->rand(testConf.inputDefs[i].dim); + if (testConf.inputDefs[i].labelInitValue.size() != 0) { + const std::vector& labelInitValue = + testConf.inputDefs[i].labelInitValue; + CHECK_EQ(labelInitValue.size(), batchSize); + data.ids = VectorT::create(batchSize, useGpu); + data.ids->copyFrom(labelInitValue.data(), batchSize); + } else { + data.ids = VectorT::create(batchSize, useGpu); + // now rand number can be 0 to inputDefs[i].dim + data.ids->rand(testConf.inputDefs[i].dim); + } break; case INPUT_SPARSE_NON_VALUE_DATA: data.value = makeRandomSparseMatrix( diff --git a/paddle/gserver/tests/LayerGradUtil.h b/paddle/gserver/tests/LayerGradUtil.h index 62ac2d160fd916c5bb114341a442eac7df114c99..4e88ac0e81ef2596f14995be53f7c5c20ddba2d7 100644 --- a/paddle/gserver/tests/LayerGradUtil.h +++ b/paddle/gserver/tests/LayerGradUtil.h @@ -64,6 +64,9 @@ struct InputDef { size_t paraSize; ParaSparse sparse; bool isStatic; + std::vector labelInitValue; + std::vector labelSeqStartPositions; + InputDef(InputType type, string nameIn, size_t dimIn, size_t sizeIn) { inputType = type; name = nameIn; @@ -72,6 +75,23 @@ struct InputDef { sparse = {""}; isStatic = false; } + + InputDef(InputType type, + string nameIn, + size_t dimIn, + size_t sizeIn, + const std::vector& labelInitValue, + const std::vector& labelSeqStartPositions) + : labelInitValue(labelInitValue), + labelSeqStartPositions(labelSeqStartPositions) { + inputType = type; + name = nameIn; + dim = dimIn; + paraSize = sizeIn; + sparse = {""}; + isStatic = false; + } + InputDef(InputType type, string nameIn, size_t dimIn, diff --git a/paddle/gserver/tests/test_ConvTrans.cpp b/paddle/gserver/tests/test_ConvTrans.cpp index 99202c2d5702a9569c3a9a92897a8a0e38b8e2a6..dd3378304b433c135881310eb89273b6bf492af2 100644 --- a/paddle/gserver/tests/test_ConvTrans.cpp +++ b/paddle/gserver/tests/test_ConvTrans.cpp @@ -206,8 +206,8 @@ TEST(Layer, convTransLayerFwd2) { /* filter_size */ 5, result); - float resultData[] = {1, 2, 2, 2, 1, 2, 4, 4, 4, 2, 2, 4, 4, - 4, 2, 2, 4, 4, 4, 2, 1, 2, 2, 2, 1}; + real resultData[] = {1, 2, 2, 2, 1, 2, 4, 4, 4, 2, 2, 4, 4, + 4, 2, 2, 4, 4, 4, 2, 1, 2, 2, 2, 1}; result->setData(resultData); doOneConvtTest(/* imgSize */ 5, /* output_x */ 2, @@ -216,8 +216,8 @@ TEST(Layer, convTransLayerFwd2) { /* filter_size */ 4, result); - float resultData2[] = {1, 2, 2, 2, 1, 2, 4, 4, 4, 2, 2, 4, 4, - 4, 2, 2, 4, 4, 4, 2, 1, 2, 2, 2, 1}; + real resultData2[] = {1, 2, 2, 2, 1, 2, 4, 4, 4, 2, 2, 4, 4, + 4, 2, 2, 4, 4, 4, 2, 1, 2, 2, 2, 1}; result->setData(resultData2); doOneConvtTest(/* imgSize */ 5, /* output_x */ 2, @@ -226,8 +226,8 @@ TEST(Layer, convTransLayerFwd2) { /* filter_size */ 5, result); - float resultData3[] = {1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 2, 2, 4, - 2, 2, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1}; + real resultData3[] = {1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 2, 2, 4, + 2, 2, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1}; result->setData(resultData3); doOneConvtTest(/* imgSize */ 5, /* output_x */ 2, diff --git a/paddle/gserver/tests/test_ConvUnify.cpp b/paddle/gserver/tests/test_ConvUnify.cpp index 2ab18f886848d198b9063c7559790497ce131efe..ad99b50245cf56eb7db227fa582f6e3f41b47a7a 100644 --- a/paddle/gserver/tests/test_ConvUnify.cpp +++ b/paddle/gserver/tests/test_ConvUnify.cpp @@ -106,8 +106,8 @@ TEST(Layer, convParaUnified) { #ifndef PADDLE_ONLY_CPU MatrixPtr input, resultCpu, resultGpu; input = Matrix::create(1, 4 * 4, false, false); - float inputData[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; - float param[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 8, 7, 6, 5, 4, 3, 2, 1}; + real inputData[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + real param[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 8, 7, 6, 5, 4, 3, 2, 1}; input->setData(inputData); @@ -137,26 +137,9 @@ TEST(Layer, convParaUnified) { checkMatrixEqual(resultCpu, resultGpu); input = Matrix::create(1, 3 * 3 * 2, false, false); - float inputData2[] = {1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - - 10, - 11, - 12, - 13, - 14, - 15, - 16, - 17, - 18}; - float param2[] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 7, 6, 5, 4, 3, 2, 1}; + real inputData2[] = { + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}; + real param2[] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 7, 6, 5, 4, 3, 2, 1}; input->setData(inputData2); @@ -185,7 +168,7 @@ TEST(Layer, convParaUnified) { true); checkMatrixEqual(resultCpu, resultGpu); - float param3[] = {1, 2, 3, 4, 4, 3, 2, 1}; + real param3[] = {1, 2, 3, 4, 4, 3, 2, 1}; resultCpu = doOneConvTest(/* imgSize */ 3, /* output_x */ 2, diff --git a/paddle/gserver/tests/test_NetworkCompare.cpp b/paddle/gserver/tests/test_NetworkCompare.cpp index fc60228f816e0cea30ef764c59a8c7875ed4a0e8..0d261059555c971cd509e64802d6c70abc9d2fef 100644 --- a/paddle/gserver/tests/test_NetworkCompare.cpp +++ b/paddle/gserver/tests/test_NetworkCompare.cpp @@ -114,7 +114,7 @@ void calcGradient(DataIn& in, DataOut& out, const std::string& configPath) { parameters[i]->getBuf(PARAMETER_VALUE)->copyFrom(*in.paraValues[i]); } } - gradientMachine->start(trainer.getConfig(), nullptr); + gradientMachine->start(); gradientMachine->forward(in.inArgs, &outArgs, PASS_TRAIN); for (size_t i = 0; i < in.outGrads.size(); i++) { // If the all the layers in the config have no parameters, also diff --git a/paddle/gserver/tests/test_RecurrentGradientMachine.cpp b/paddle/gserver/tests/test_RecurrentGradientMachine.cpp index e19cf35cd5eb0148879fc3d0d40ea9b106947f9a..150850da4d49a2320acc70ed370cf8728d5c9def 100644 --- a/paddle/gserver/tests/test_RecurrentGradientMachine.cpp +++ b/paddle/gserver/tests/test_RecurrentGradientMachine.cpp @@ -28,7 +28,7 @@ class TrainerForTest : public paddle::Trainer { public: void startTrain() { GradientMachine& gm = *this->trainerInternal_.getGradientMachine(); - gm.start(this->getConfig(), dataProvider_); + gm.start(); } void finishTrain() { diff --git a/paddle/parameter/ParameterUpdaterBase.h b/paddle/parameter/ParameterUpdaterBase.h index 88148d9b769e9b6eca90f9651a121e926543d7c2..b230e170c15f1b004c5357fb7d0ad2204d01f44b 100644 --- a/paddle/parameter/ParameterUpdaterBase.h +++ b/paddle/parameter/ParameterUpdaterBase.h @@ -38,7 +38,7 @@ public: virtual void startPass() {} // called by Trainer then finishing a pass, ruturn true if pass accepted - virtual bool finishPass(real cost = 0) { return true; } + virtual bool finishPass() { return true; } // called by Trainer before backward() of a batch // Return the type of pass it needs. This pass type will be passed @@ -112,9 +112,9 @@ public: [&](int tid, size_t numThreads) { updaters_[tid]->startPass(); }); } - virtual bool finishPass(real cost = 0) { + virtual bool finishPass() { syncThreadPool_->execPlusOwner( - [&](int tid, size_t numThreads) { updaters_[tid]->finishPass(cost); }); + [&](int tid, size_t numThreads) { updaters_[tid]->finishPass(); }); return true; } diff --git a/paddle/scripts/travis/docs.sh b/paddle/scripts/travis/docs.sh index 0bbb76a8a3caa27da0911af0fe87df7fbff617b4..8690fe1d40c935e119fefbc02f3a228d76d8c0f9 100755 --- a/paddle/scripts/travis/docs.sh +++ b/paddle/scripts/travis/docs.sh @@ -7,6 +7,10 @@ source ./common.sh cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_DOC=ON make paddle_docs paddle_docs_cn +# check websites for broken links +linkchecker doc/en/html/index.html +linkchecker doc/cn/html/index.html + # Parse Github URL REPO=`git config remote.origin.url` SSH_REPO=${REPO/https:\/\/github.com\//git@github.com:} @@ -35,8 +39,8 @@ git checkout $TARGET_BRANCH || git checkout --orphan $TARGET_BRANCH # remove old docs. mv new docs. rm -rf doc doc_cn -mv ../doc_cn/html doc_cn -mv ../doc/html doc +mv ../doc/cn/html doc_cn +mv ../doc/en/html doc # Check is there anything changed. set +e diff --git a/paddle/trainer/ParameterUpdater.h b/paddle/trainer/ParameterUpdater.h index 4dae77567f8f4d097c583567275d4b90122feb6a..c3207e63ce72b73a57c2e40c72c5259f0ae61bc9 100644 --- a/paddle/trainer/ParameterUpdater.h +++ b/paddle/trainer/ParameterUpdater.h @@ -102,9 +102,9 @@ public: * @param cost sum cost during one pass. * @return true if accept (used for owlqn). */ - virtual bool finishPass(real cost) { + virtual bool finishPass() { optimizer_->finishPass(); - return ParameterUpdater::finishPass(cost); + return ParameterUpdater::finishPass(); } /** @@ -220,9 +220,9 @@ public: averager_->startPass(); SgdLocalUpdater::startPass(); } - virtual bool finishPass(real cost) { + virtual bool finishPass() { averager_->finishPass(); - return SgdLocalUpdater::finishPass(cost); + return SgdLocalUpdater::finishPass(); } /// apply the averaged parameter to PARAMETER_VALUE diff --git a/paddle/trainer/RemoteParameterUpdater.cpp b/paddle/trainer/RemoteParameterUpdater.cpp index 630f55d998d9f5b5b2880aa02b025e6e56e1f064..6939738203f41e0c1f7204d54834e34b2cd90682 100644 --- a/paddle/trainer/RemoteParameterUpdater.cpp +++ b/paddle/trainer/RemoteParameterUpdater.cpp @@ -309,7 +309,7 @@ void RemoteParameterUpdater::startPass() { } } -bool RemoteParameterUpdater::finishPass(real cost) { +bool RemoteParameterUpdater::finishPass() { if (localUpdater_) { localUpdater_->finishPass(); } @@ -712,7 +712,7 @@ void SparseRemoteParameterUpdater::startPass() { } } -bool SparseRemoteParameterUpdater::finishPass(real cost) { +bool SparseRemoteParameterUpdater::finishPass() { if (config_.algorithm() == TrainAlgorithm::SGD) { parameterClient_->waitPassFinish(); } else { diff --git a/paddle/trainer/RemoteParameterUpdater.h b/paddle/trainer/RemoteParameterUpdater.h index ec6ed443d33db1d695194092b34d6090a4b5ab94..7794b209009a3429e810074b61e1d5bffa8b3a4e 100644 --- a/paddle/trainer/RemoteParameterUpdater.h +++ b/paddle/trainer/RemoteParameterUpdater.h @@ -90,7 +90,7 @@ public: */ virtual void finishBatch(real cost); virtual void startPass(); - virtual bool finishPass(real cost); + virtual bool finishPass(); #ifndef PADDLE_DISABLE_TIMER virtual void setForwardbackwardTime(uint64_t delta) { @@ -281,7 +281,7 @@ public: /// send all sparse related parameters to all pservers virtual void finishBatch(real cost); virtual void startPass(); - virtual bool finishPass(real cost); + virtual bool finishPass(); virtual void apply(); virtual void restore(); diff --git a/paddle/trainer/Tester.cpp b/paddle/trainer/Tester.cpp index 24fac3e5a8141cbec912d276833ec491385b97ab..13aa28ae5d9699d267858d48e46797c756487ddd 100644 --- a/paddle/trainer/Tester.cpp +++ b/paddle/trainer/Tester.cpp @@ -257,7 +257,7 @@ void Tester::test() { CHECK(testDataProvider_) << "TestData is not specified"; testDataProvider_->setSkipShuffle(); testDataProvider_->reset(); - gradientMachine_->start(*config_, testDataProvider_); + gradientMachine_->start(); // For evaluation std::vector modelList; diff --git a/paddle/trainer/ThreadParameterUpdater.cpp b/paddle/trainer/ThreadParameterUpdater.cpp index 2a76d5723ccb68896f8ddbfad31a9d7d84adcf55..870d4a4b0246fe244bbd3796ec14449eb181aad2 100644 --- a/paddle/trainer/ThreadParameterUpdater.cpp +++ b/paddle/trainer/ThreadParameterUpdater.cpp @@ -70,7 +70,7 @@ void SgdThreadUpdater::startPass() { } } -bool SgdThreadUpdater::finishPass(real cost) { +bool SgdThreadUpdater::finishPass() { catchUpWith(); for (auto& para : parameters_) { diff --git a/paddle/trainer/ThreadParameterUpdater.h b/paddle/trainer/ThreadParameterUpdater.h index 198435c0f30056a9467b8a076c8443ae243e7c3f..880f1f9ddc49a1193ce23901419d988cae84eb88 100644 --- a/paddle/trainer/ThreadParameterUpdater.h +++ b/paddle/trainer/ThreadParameterUpdater.h @@ -47,7 +47,7 @@ public: virtual void startPass(); // Use the finishPass() function of the base optimizer. - virtual bool finishPass(real cost); + virtual bool finishPass(); virtual void init(const std::vector& parameters); virtual PassType startBatch(int64_t batchSize); diff --git a/paddle/trainer/Trainer.cpp b/paddle/trainer/Trainer.cpp index 1eec2c432d235ef484b688db08aae8a39f878a85..09e0a213ab2d71890cfafb905b5969383acfe95a 100644 --- a/paddle/trainer/Trainer.cpp +++ b/paddle/trainer/Trainer.cpp @@ -308,7 +308,7 @@ static double genPerturbation(real* d, real* grad, size_t dim) { } real Trainer::checkGradient() { - trainerInternal_.getGradientMachine()->start(*config_, dataProvider_); + trainerInternal_.getGradientMachine()->start(); std::vector& parameters = trainerInternal_.getGradientMachine()->getNonStaticParameters(); DataBatch dataBatch; @@ -390,7 +390,7 @@ void Trainer::startTrain() { dataProvider_->reset(); } - trainerInternal_.getGradientMachine()->start(*config_, dataProvider_); + trainerInternal_.getGradientMachine()->start(); } void Trainer::finishTrain() { trainerInternal_.getGradientMachine()->finish(); } @@ -537,7 +537,7 @@ void Trainer::trainOnePassBatch(int passId) { trainerInternal_.getGradientMachine()->onPassEnd(); - bool accepted = trainerInternal_.getParameterUpdater()->finishPass(cost); + bool accepted = trainerInternal_.getParameterUpdater()->finishPass(); globalStat.setThreadInfo(true); globalStat.printAllStatus(); diff --git a/paddle/trainer/tests/test_Compare.cpp b/paddle/trainer/tests/test_Compare.cpp index 72fc76bea35e433eeb08ba625b4bf6afdda491fb..e855a8fe2e09aa0f16a73f3e7bcc2f32921092f8 100644 --- a/paddle/trainer/tests/test_Compare.cpp +++ b/paddle/trainer/tests/test_Compare.cpp @@ -50,7 +50,7 @@ void calcGradient(bool useGpu, comData& Data) { trainer.getDataProvider()->getNextBatch(batchSize, &dataBatch); CHECK(dataBatch.getSize()) << "No data from data provider"; vector& inArgs = dataBatch.getStreams(); - trainer.getGradientMachine()->start(trainer.getConfig(), nullptr); + trainer.getGradientMachine()->start(); for (int i = 0; i < 2; ++i) { trainer.getGradientMachine()->forwardBackward( inArgs, &Data.outArgs, PASS_TRAIN); diff --git a/paddle/trainer/tests/test_CompareTwoNets.cpp b/paddle/trainer/tests/test_CompareTwoNets.cpp index 80c61e259e71dd31d7637072248b22a2910c532e..94f65e545d116c802fb4877dc14f07aaaf83a4fb 100644 --- a/paddle/trainer/tests/test_CompareTwoNets.cpp +++ b/paddle/trainer/tests/test_CompareTwoNets.cpp @@ -72,7 +72,7 @@ void calcGradient(ComData& data, const string configFile) { CHECK(dataBatch.getSize()) << "No data from data provider"; vector& inArgs = dataBatch.getStreams(); - trainer.getGradientMachine()->start(trainer.getConfig(), nullptr); + trainer.getGradientMachine()->start(); trainer.getGradientMachine()->forwardBackward( inArgs, &data.outArgs, PASS_TRAIN); diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 1f537d403220c1816218b9e356734a49297267fc..62e8258c98a68d5fea5498bf0260511a9ba7782b 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -498,9 +498,16 @@ class Input(Cfg): is_static=None, is_shared=None, update_hooks=None, - input_layer_argument=None, ): + input_layer_argument=None, + make_layer_name_in_submodel=True, ): + """ + @param make_layer_name_in_submodel True by defalut, you might need to + set it carefully when adding Input in config_parser.py. + """ self.add_keys(locals()) - self.input_layer_name = MakeLayerNameInSubmodel(input_layer_name) + self.input_layer_name = MakeLayerNameInSubmodel( + input_layer_name + ) if make_layer_name_in_submodel else input_layer_name # Define a projection for iexed layer @@ -1848,7 +1855,8 @@ class BatchNormLayer(LayerBase): initial_std=0.0, initial_mean=0.0, is_static=True, - is_shared=is_shared, )) + is_shared=is_shared, + make_layer_name_in_submodel=False, )) parallel_nn = bool(int(g_command_config_args.get("parallel_nn", 0))) cudnn_version = int(g_command_config_args.get("cudnn_version", 0)) @@ -1880,7 +1888,7 @@ class BatchNormLayer(LayerBase): # when either of it is non-zero. if input_layer.width != 0 or input_layer.height != 0: self.set_cnn_layer(name, image_conf.img_size_y, image_conf.img_size, - image_conf.channels, True) + image_conf.channels, False) else: self.set_layer_size(input_layer.size)