提交 b063b0c4 编写于 作者: P Peng Li

Merge branch 'develop' into fix-crf-weight-and-coeff-bug

...@@ -56,7 +56,7 @@ before_install: ...@@ -56,7 +56,7 @@ before_install:
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then sudo paddle/scripts/travis/before_install.linux.sh; fi - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then sudo paddle/scripts/travis/before_install.linux.sh; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then paddle/scripts/travis/before_install.osx.sh; fi - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then paddle/scripts/travis/before_install.osx.sh; fi
- if [[ "$JOB" == "PRE_COMMIT" ]]; then sudo ln -s /usr/bin/clang-format-3.8 /usr/bin/clang-format; fi - if [[ "$JOB" == "PRE_COMMIT" ]]; then sudo ln -s /usr/bin/clang-format-3.8 /usr/bin/clang-format; fi
- pip install wheel protobuf sphinx recommonmark virtualenv numpy sphinx_rtd_theme pre-commit - pip install wheel protobuf sphinx recommonmark virtualenv numpy sphinx_rtd_theme pre-commit requests==2.9.2 LinkChecker
script: script:
- paddle/scripts/travis/main.sh - paddle/scripts/travis/main.sh
notifications: notifications:
......
...@@ -212,11 +212,7 @@ public: ...@@ -212,11 +212,7 @@ public:
* @note This function will only been implemented and used in a * @note This function will only been implemented and used in a
* multithreaded environment. * multithreaded environment.
*/ */
virtual void start(const TrainerConfig& config, virtual void start() {}
DataProviderPtr dataProvider) {
(void)config;
(void)dataProvider;
}
/** /**
* @brief check each work-thread whether is failed/error/finish, * @brief check each work-thread whether is failed/error/finish,
......
...@@ -441,7 +441,7 @@ TrainerThread::TrainerThread(const ModelConfig& config, ...@@ -441,7 +441,7 @@ TrainerThread::TrainerThread(const ModelConfig& config,
TrainerThread::~TrainerThread() { stop(); } TrainerThread::~TrainerThread() { stop(); }
void TrainerThread::start() { void TrainerThread::start() {
gradientMachine_->start(*(TrainerConfig*)nullptr, (DataProviderPtr) nullptr); gradientMachine_->start();
computeThread_.reset(new std::thread([this]() { computeThread(); })); computeThread_.reset(new std::thread([this]() { computeThread(); }));
......
...@@ -109,10 +109,9 @@ void MultiNetwork::onPassEnd() { ...@@ -109,10 +109,9 @@ void MultiNetwork::onPassEnd() {
} }
} }
void MultiNetwork::start(const TrainerConfig& config, void MultiNetwork::start() {
DataProviderPtr dataProvider) {
for (auto& subNetwork : subNetworks_) { for (auto& subNetwork : subNetworks_) {
subNetwork->start(config, dataProvider); subNetwork->start();
} }
} }
......
...@@ -54,7 +54,7 @@ public: ...@@ -54,7 +54,7 @@ public:
return subNetworks_; return subNetworks_;
} }
virtual void start(const TrainerConfig& config, DataProviderPtr dataProvider); virtual void start();
virtual void finish(); virtual void finish();
......
...@@ -131,11 +131,7 @@ void ParallelNeuralNetwork::forwardBackward(const std::vector<Argument>& inArgs, ...@@ -131,11 +131,7 @@ void ParallelNeuralNetwork::forwardBackward(const std::vector<Argument>& inArgs,
backward(callback); backward(callback);
} }
void ParallelNeuralNetwork::start(const TrainerConfig& config, void ParallelNeuralNetwork::start() {
DataProviderPtr dataProvider) {
(void)config;
(void)dataProvider;
for (auto& thread : threads_) { for (auto& thread : threads_) {
thread->start(); thread->start();
} }
......
...@@ -56,7 +56,7 @@ public: ...@@ -56,7 +56,7 @@ public:
PassType passType, PassType passType,
const UpdateCallback &callback = NULL); const UpdateCallback &callback = NULL);
virtual void start(const TrainerConfig &config, DataProviderPtr dataProvider); virtual void start();
void addComputeThread(int deviceId); void addComputeThread(int deviceId);
......
...@@ -303,13 +303,31 @@ void initDataLayer(TestConfig testConf, ...@@ -303,13 +303,31 @@ void initDataLayer(TestConfig testConf,
ICpuGpuVectorPtr sequenceStartPositions; ICpuGpuVectorPtr sequenceStartPositions;
ICpuGpuVectorPtr subSequenceStartPositions; ICpuGpuVectorPtr subSequenceStartPositions;
IVectorPtr cpuSequenceDims; IVectorPtr cpuSequenceDims;
for (size_t i = 0; i < testConf.inputDefs.size(); i++) { for (size_t i = 0; i < testConf.inputDefs.size(); ++i) {
if (testConf.inputDefs[i].inputType != INPUT_SEQUENCE_LABEL) continue;
const std::vector<int>& labelSeqStartPositions =
testConf.inputDefs[i].labelSeqStartPositions;
if (labelSeqStartPositions.size() != 0) {
CHECK(!sequenceStartPositions);
CHECK_GE(labelSeqStartPositions.size(), 2);
sequenceStartPositions =
ICpuGpuVector::create(labelSeqStartPositions.size(), useGpu);
sequenceStartPositions->copyFrom(
labelSeqStartPositions.data(), labelSeqStartPositions.size(), useGpu);
}
}
for (size_t i = 0; i < testConf.inputDefs.size(); ++i) {
LayerConfig config; LayerConfig config;
config.set_name(testConf.inputDefs[i].name); config.set_name(testConf.inputDefs[i].name);
config.set_type("data"); config.set_type("data");
config.set_size(testConf.inputDefs[i].dim); config.set_size(testConf.inputDefs[i].dim);
LayerPtr layer = LayerPtr(new DataLayer(config)); LayerPtr layer = LayerPtr(new DataLayer(config));
size_t numSequence = batchSize / 10 + 1; size_t numSequence = sequenceStartPositions
? sequenceStartPositions->getSize() - 1
: batchSize / 10 + 1;
Argument data; Argument data;
auto fillData = [&](bool trans, int height, int width) { auto fillData = [&](bool trans, int height, int width) {
...@@ -336,9 +354,17 @@ void initDataLayer(TestConfig testConf, ...@@ -336,9 +354,17 @@ void initDataLayer(TestConfig testConf,
break; break;
case INPUT_LABEL: case INPUT_LABEL:
case INPUT_SEQUENCE_LABEL: case INPUT_SEQUENCE_LABEL:
data.ids = VectorT<int>::create(batchSize, useGpu); if (testConf.inputDefs[i].labelInitValue.size() != 0) {
// now rand number can be 0 to inputDefs[i].dim const std::vector<int>& labelInitValue =
data.ids->rand(testConf.inputDefs[i].dim); testConf.inputDefs[i].labelInitValue;
CHECK_EQ(labelInitValue.size(), batchSize);
data.ids = VectorT<int>::create(batchSize, useGpu);
data.ids->copyFrom(labelInitValue.data(), batchSize);
} else {
data.ids = VectorT<int>::create(batchSize, useGpu);
// now rand number can be 0 to inputDefs[i].dim
data.ids->rand(testConf.inputDefs[i].dim);
}
break; break;
case INPUT_SPARSE_NON_VALUE_DATA: case INPUT_SPARSE_NON_VALUE_DATA:
data.value = makeRandomSparseMatrix( data.value = makeRandomSparseMatrix(
......
...@@ -64,6 +64,9 @@ struct InputDef { ...@@ -64,6 +64,9 @@ struct InputDef {
size_t paraSize; size_t paraSize;
ParaSparse sparse; ParaSparse sparse;
bool isStatic; bool isStatic;
std::vector<int> labelInitValue;
std::vector<int> labelSeqStartPositions;
InputDef(InputType type, string nameIn, size_t dimIn, size_t sizeIn) { InputDef(InputType type, string nameIn, size_t dimIn, size_t sizeIn) {
inputType = type; inputType = type;
name = nameIn; name = nameIn;
...@@ -72,6 +75,23 @@ struct InputDef { ...@@ -72,6 +75,23 @@ struct InputDef {
sparse = {""}; sparse = {""};
isStatic = false; isStatic = false;
} }
InputDef(InputType type,
string nameIn,
size_t dimIn,
size_t sizeIn,
const std::vector<int>& labelInitValue,
const std::vector<int>& labelSeqStartPositions)
: labelInitValue(labelInitValue),
labelSeqStartPositions(labelSeqStartPositions) {
inputType = type;
name = nameIn;
dim = dimIn;
paraSize = sizeIn;
sparse = {""};
isStatic = false;
}
InputDef(InputType type, InputDef(InputType type,
string nameIn, string nameIn,
size_t dimIn, size_t dimIn,
......
...@@ -206,8 +206,8 @@ TEST(Layer, convTransLayerFwd2) { ...@@ -206,8 +206,8 @@ TEST(Layer, convTransLayerFwd2) {
/* filter_size */ 5, /* filter_size */ 5,
result); result);
float resultData[] = {1, 2, 2, 2, 1, 2, 4, 4, 4, 2, 2, 4, 4, real resultData[] = {1, 2, 2, 2, 1, 2, 4, 4, 4, 2, 2, 4, 4,
4, 2, 2, 4, 4, 4, 2, 1, 2, 2, 2, 1}; 4, 2, 2, 4, 4, 4, 2, 1, 2, 2, 2, 1};
result->setData(resultData); result->setData(resultData);
doOneConvtTest(/* imgSize */ 5, doOneConvtTest(/* imgSize */ 5,
/* output_x */ 2, /* output_x */ 2,
...@@ -216,8 +216,8 @@ TEST(Layer, convTransLayerFwd2) { ...@@ -216,8 +216,8 @@ TEST(Layer, convTransLayerFwd2) {
/* filter_size */ 4, /* filter_size */ 4,
result); result);
float resultData2[] = {1, 2, 2, 2, 1, 2, 4, 4, 4, 2, 2, 4, 4, real resultData2[] = {1, 2, 2, 2, 1, 2, 4, 4, 4, 2, 2, 4, 4,
4, 2, 2, 4, 4, 4, 2, 1, 2, 2, 2, 1}; 4, 2, 2, 4, 4, 4, 2, 1, 2, 2, 2, 1};
result->setData(resultData2); result->setData(resultData2);
doOneConvtTest(/* imgSize */ 5, doOneConvtTest(/* imgSize */ 5,
/* output_x */ 2, /* output_x */ 2,
...@@ -226,8 +226,8 @@ TEST(Layer, convTransLayerFwd2) { ...@@ -226,8 +226,8 @@ TEST(Layer, convTransLayerFwd2) {
/* filter_size */ 5, /* filter_size */ 5,
result); result);
float resultData3[] = {1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 2, 2, 4, real resultData3[] = {1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 2, 2, 4,
2, 2, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1}; 2, 2, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1};
result->setData(resultData3); result->setData(resultData3);
doOneConvtTest(/* imgSize */ 5, doOneConvtTest(/* imgSize */ 5,
/* output_x */ 2, /* output_x */ 2,
......
...@@ -106,8 +106,8 @@ TEST(Layer, convParaUnified) { ...@@ -106,8 +106,8 @@ TEST(Layer, convParaUnified) {
#ifndef PADDLE_ONLY_CPU #ifndef PADDLE_ONLY_CPU
MatrixPtr input, resultCpu, resultGpu; MatrixPtr input, resultCpu, resultGpu;
input = Matrix::create(1, 4 * 4, false, false); input = Matrix::create(1, 4 * 4, false, false);
float inputData[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; real inputData[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
float param[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 8, 7, 6, 5, 4, 3, 2, 1}; real param[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 8, 7, 6, 5, 4, 3, 2, 1};
input->setData(inputData); input->setData(inputData);
...@@ -137,26 +137,9 @@ TEST(Layer, convParaUnified) { ...@@ -137,26 +137,9 @@ TEST(Layer, convParaUnified) {
checkMatrixEqual(resultCpu, resultGpu); checkMatrixEqual(resultCpu, resultGpu);
input = Matrix::create(1, 3 * 3 * 2, false, false); input = Matrix::create(1, 3 * 3 * 2, false, false);
float inputData2[] = {1, real inputData2[] = {
2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18};
3, real param2[] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 7, 6, 5, 4, 3, 2, 1};
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18};
float param2[] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 7, 6, 5, 4, 3, 2, 1};
input->setData(inputData2); input->setData(inputData2);
...@@ -185,7 +168,7 @@ TEST(Layer, convParaUnified) { ...@@ -185,7 +168,7 @@ TEST(Layer, convParaUnified) {
true); true);
checkMatrixEqual(resultCpu, resultGpu); checkMatrixEqual(resultCpu, resultGpu);
float param3[] = {1, 2, 3, 4, 4, 3, 2, 1}; real param3[] = {1, 2, 3, 4, 4, 3, 2, 1};
resultCpu = doOneConvTest(/* imgSize */ 3, resultCpu = doOneConvTest(/* imgSize */ 3,
/* output_x */ 2, /* output_x */ 2,
......
...@@ -114,7 +114,7 @@ void calcGradient(DataIn& in, DataOut& out, const std::string& configPath) { ...@@ -114,7 +114,7 @@ void calcGradient(DataIn& in, DataOut& out, const std::string& configPath) {
parameters[i]->getBuf(PARAMETER_VALUE)->copyFrom(*in.paraValues[i]); parameters[i]->getBuf(PARAMETER_VALUE)->copyFrom(*in.paraValues[i]);
} }
} }
gradientMachine->start(trainer.getConfig(), nullptr); gradientMachine->start();
gradientMachine->forward(in.inArgs, &outArgs, PASS_TRAIN); gradientMachine->forward(in.inArgs, &outArgs, PASS_TRAIN);
for (size_t i = 0; i < in.outGrads.size(); i++) { for (size_t i = 0; i < in.outGrads.size(); i++) {
// If the all the layers in the config have no parameters, also // If the all the layers in the config have no parameters, also
......
...@@ -28,7 +28,7 @@ class TrainerForTest : public paddle::Trainer { ...@@ -28,7 +28,7 @@ class TrainerForTest : public paddle::Trainer {
public: public:
void startTrain() { void startTrain() {
GradientMachine& gm = *this->trainerInternal_.getGradientMachine(); GradientMachine& gm = *this->trainerInternal_.getGradientMachine();
gm.start(this->getConfig(), dataProvider_); gm.start();
} }
void finishTrain() { void finishTrain() {
......
...@@ -38,7 +38,7 @@ public: ...@@ -38,7 +38,7 @@ public:
virtual void startPass() {} virtual void startPass() {}
// called by Trainer then finishing a pass, ruturn true if pass accepted // called by Trainer then finishing a pass, ruturn true if pass accepted
virtual bool finishPass(real cost = 0) { return true; } virtual bool finishPass() { return true; }
// called by Trainer before backward() of a batch // called by Trainer before backward() of a batch
// Return the type of pass it needs. This pass type will be passed // Return the type of pass it needs. This pass type will be passed
...@@ -112,9 +112,9 @@ public: ...@@ -112,9 +112,9 @@ public:
[&](int tid, size_t numThreads) { updaters_[tid]->startPass(); }); [&](int tid, size_t numThreads) { updaters_[tid]->startPass(); });
} }
virtual bool finishPass(real cost = 0) { virtual bool finishPass() {
syncThreadPool_->execPlusOwner( syncThreadPool_->execPlusOwner(
[&](int tid, size_t numThreads) { updaters_[tid]->finishPass(cost); }); [&](int tid, size_t numThreads) { updaters_[tid]->finishPass(); });
return true; return true;
} }
......
...@@ -7,6 +7,10 @@ source ./common.sh ...@@ -7,6 +7,10 @@ source ./common.sh
cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_DOC=ON cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_DOC=ON
make paddle_docs paddle_docs_cn make paddle_docs paddle_docs_cn
# check websites for broken links
linkchecker doc/en/html/index.html
linkchecker doc/cn/html/index.html
# Parse Github URL # Parse Github URL
REPO=`git config remote.origin.url` REPO=`git config remote.origin.url`
SSH_REPO=${REPO/https:\/\/github.com\//git@github.com:} SSH_REPO=${REPO/https:\/\/github.com\//git@github.com:}
...@@ -35,8 +39,8 @@ git checkout $TARGET_BRANCH || git checkout --orphan $TARGET_BRANCH ...@@ -35,8 +39,8 @@ git checkout $TARGET_BRANCH || git checkout --orphan $TARGET_BRANCH
# remove old docs. mv new docs. # remove old docs. mv new docs.
rm -rf doc doc_cn rm -rf doc doc_cn
mv ../doc_cn/html doc_cn mv ../doc/cn/html doc_cn
mv ../doc/html doc mv ../doc/en/html doc
# Check is there anything changed. # Check is there anything changed.
set +e set +e
......
...@@ -102,9 +102,9 @@ public: ...@@ -102,9 +102,9 @@ public:
* @param cost sum cost during one pass. * @param cost sum cost during one pass.
* @return true if accept (used for owlqn). * @return true if accept (used for owlqn).
*/ */
virtual bool finishPass(real cost) { virtual bool finishPass() {
optimizer_->finishPass(); optimizer_->finishPass();
return ParameterUpdater::finishPass(cost); return ParameterUpdater::finishPass();
} }
/** /**
...@@ -220,9 +220,9 @@ public: ...@@ -220,9 +220,9 @@ public:
averager_->startPass(); averager_->startPass();
SgdLocalUpdater::startPass(); SgdLocalUpdater::startPass();
} }
virtual bool finishPass(real cost) { virtual bool finishPass() {
averager_->finishPass(); averager_->finishPass();
return SgdLocalUpdater::finishPass(cost); return SgdLocalUpdater::finishPass();
} }
/// apply the averaged parameter to PARAMETER_VALUE /// apply the averaged parameter to PARAMETER_VALUE
......
...@@ -309,7 +309,7 @@ void RemoteParameterUpdater::startPass() { ...@@ -309,7 +309,7 @@ void RemoteParameterUpdater::startPass() {
} }
} }
bool RemoteParameterUpdater::finishPass(real cost) { bool RemoteParameterUpdater::finishPass() {
if (localUpdater_) { if (localUpdater_) {
localUpdater_->finishPass(); localUpdater_->finishPass();
} }
...@@ -712,7 +712,7 @@ void SparseRemoteParameterUpdater::startPass() { ...@@ -712,7 +712,7 @@ void SparseRemoteParameterUpdater::startPass() {
} }
} }
bool SparseRemoteParameterUpdater::finishPass(real cost) { bool SparseRemoteParameterUpdater::finishPass() {
if (config_.algorithm() == TrainAlgorithm::SGD) { if (config_.algorithm() == TrainAlgorithm::SGD) {
parameterClient_->waitPassFinish(); parameterClient_->waitPassFinish();
} else { } else {
......
...@@ -90,7 +90,7 @@ public: ...@@ -90,7 +90,7 @@ public:
*/ */
virtual void finishBatch(real cost); virtual void finishBatch(real cost);
virtual void startPass(); virtual void startPass();
virtual bool finishPass(real cost); virtual bool finishPass();
#ifndef PADDLE_DISABLE_TIMER #ifndef PADDLE_DISABLE_TIMER
virtual void setForwardbackwardTime(uint64_t delta) { virtual void setForwardbackwardTime(uint64_t delta) {
...@@ -281,7 +281,7 @@ public: ...@@ -281,7 +281,7 @@ public:
/// send all sparse related parameters to all pservers /// send all sparse related parameters to all pservers
virtual void finishBatch(real cost); virtual void finishBatch(real cost);
virtual void startPass(); virtual void startPass();
virtual bool finishPass(real cost); virtual bool finishPass();
virtual void apply(); virtual void apply();
virtual void restore(); virtual void restore();
......
...@@ -257,7 +257,7 @@ void Tester::test() { ...@@ -257,7 +257,7 @@ void Tester::test() {
CHECK(testDataProvider_) << "TestData is not specified"; CHECK(testDataProvider_) << "TestData is not specified";
testDataProvider_->setSkipShuffle(); testDataProvider_->setSkipShuffle();
testDataProvider_->reset(); testDataProvider_->reset();
gradientMachine_->start(*config_, testDataProvider_); gradientMachine_->start();
// For evaluation // For evaluation
std::vector<std::string> modelList; std::vector<std::string> modelList;
......
...@@ -70,7 +70,7 @@ void SgdThreadUpdater::startPass() { ...@@ -70,7 +70,7 @@ void SgdThreadUpdater::startPass() {
} }
} }
bool SgdThreadUpdater::finishPass(real cost) { bool SgdThreadUpdater::finishPass() {
catchUpWith(); catchUpWith();
for (auto& para : parameters_) { for (auto& para : parameters_) {
......
...@@ -47,7 +47,7 @@ public: ...@@ -47,7 +47,7 @@ public:
virtual void startPass(); virtual void startPass();
// Use the finishPass() function of the base optimizer. // Use the finishPass() function of the base optimizer.
virtual bool finishPass(real cost); virtual bool finishPass();
virtual void init(const std::vector<ParameterPtr>& parameters); virtual void init(const std::vector<ParameterPtr>& parameters);
virtual PassType startBatch(int64_t batchSize); virtual PassType startBatch(int64_t batchSize);
......
...@@ -308,7 +308,7 @@ static double genPerturbation(real* d, real* grad, size_t dim) { ...@@ -308,7 +308,7 @@ static double genPerturbation(real* d, real* grad, size_t dim) {
} }
real Trainer::checkGradient() { real Trainer::checkGradient() {
trainerInternal_.getGradientMachine()->start(*config_, dataProvider_); trainerInternal_.getGradientMachine()->start();
std::vector<ParameterPtr>& parameters = std::vector<ParameterPtr>& parameters =
trainerInternal_.getGradientMachine()->getNonStaticParameters(); trainerInternal_.getGradientMachine()->getNonStaticParameters();
DataBatch dataBatch; DataBatch dataBatch;
...@@ -390,7 +390,7 @@ void Trainer::startTrain() { ...@@ -390,7 +390,7 @@ void Trainer::startTrain() {
dataProvider_->reset(); dataProvider_->reset();
} }
trainerInternal_.getGradientMachine()->start(*config_, dataProvider_); trainerInternal_.getGradientMachine()->start();
} }
void Trainer::finishTrain() { trainerInternal_.getGradientMachine()->finish(); } void Trainer::finishTrain() { trainerInternal_.getGradientMachine()->finish(); }
...@@ -537,7 +537,7 @@ void Trainer::trainOnePassBatch(int passId) { ...@@ -537,7 +537,7 @@ void Trainer::trainOnePassBatch(int passId) {
trainerInternal_.getGradientMachine()->onPassEnd(); trainerInternal_.getGradientMachine()->onPassEnd();
bool accepted = trainerInternal_.getParameterUpdater()->finishPass(cost); bool accepted = trainerInternal_.getParameterUpdater()->finishPass();
globalStat.setThreadInfo(true); globalStat.setThreadInfo(true);
globalStat.printAllStatus(); globalStat.printAllStatus();
......
...@@ -50,7 +50,7 @@ void calcGradient(bool useGpu, comData& Data) { ...@@ -50,7 +50,7 @@ void calcGradient(bool useGpu, comData& Data) {
trainer.getDataProvider()->getNextBatch(batchSize, &dataBatch); trainer.getDataProvider()->getNextBatch(batchSize, &dataBatch);
CHECK(dataBatch.getSize()) << "No data from data provider"; CHECK(dataBatch.getSize()) << "No data from data provider";
vector<Argument>& inArgs = dataBatch.getStreams(); vector<Argument>& inArgs = dataBatch.getStreams();
trainer.getGradientMachine()->start(trainer.getConfig(), nullptr); trainer.getGradientMachine()->start();
for (int i = 0; i < 2; ++i) { for (int i = 0; i < 2; ++i) {
trainer.getGradientMachine()->forwardBackward( trainer.getGradientMachine()->forwardBackward(
inArgs, &Data.outArgs, PASS_TRAIN); inArgs, &Data.outArgs, PASS_TRAIN);
......
...@@ -72,7 +72,7 @@ void calcGradient(ComData& data, const string configFile) { ...@@ -72,7 +72,7 @@ void calcGradient(ComData& data, const string configFile) {
CHECK(dataBatch.getSize()) << "No data from data provider"; CHECK(dataBatch.getSize()) << "No data from data provider";
vector<Argument>& inArgs = dataBatch.getStreams(); vector<Argument>& inArgs = dataBatch.getStreams();
trainer.getGradientMachine()->start(trainer.getConfig(), nullptr); trainer.getGradientMachine()->start();
trainer.getGradientMachine()->forwardBackward( trainer.getGradientMachine()->forwardBackward(
inArgs, &data.outArgs, PASS_TRAIN); inArgs, &data.outArgs, PASS_TRAIN);
......
...@@ -498,9 +498,16 @@ class Input(Cfg): ...@@ -498,9 +498,16 @@ class Input(Cfg):
is_static=None, is_static=None,
is_shared=None, is_shared=None,
update_hooks=None, update_hooks=None,
input_layer_argument=None, ): input_layer_argument=None,
make_layer_name_in_submodel=True, ):
"""
@param make_layer_name_in_submodel True by defalut, you might need to
set it carefully when adding Input in config_parser.py.
"""
self.add_keys(locals()) self.add_keys(locals())
self.input_layer_name = MakeLayerNameInSubmodel(input_layer_name) self.input_layer_name = MakeLayerNameInSubmodel(
input_layer_name
) if make_layer_name_in_submodel else input_layer_name
# Define a projection for iexed layer # Define a projection for iexed layer
...@@ -1848,7 +1855,8 @@ class BatchNormLayer(LayerBase): ...@@ -1848,7 +1855,8 @@ class BatchNormLayer(LayerBase):
initial_std=0.0, initial_std=0.0,
initial_mean=0.0, initial_mean=0.0,
is_static=True, is_static=True,
is_shared=is_shared, )) is_shared=is_shared,
make_layer_name_in_submodel=False, ))
parallel_nn = bool(int(g_command_config_args.get("parallel_nn", 0))) parallel_nn = bool(int(g_command_config_args.get("parallel_nn", 0)))
cudnn_version = int(g_command_config_args.get("cudnn_version", 0)) cudnn_version = int(g_command_config_args.get("cudnn_version", 0))
...@@ -1880,7 +1888,7 @@ class BatchNormLayer(LayerBase): ...@@ -1880,7 +1888,7 @@ class BatchNormLayer(LayerBase):
# when either of it is non-zero. # when either of it is non-zero.
if input_layer.width != 0 or input_layer.height != 0: if input_layer.width != 0 or input_layer.height != 0:
self.set_cnn_layer(name, image_conf.img_size_y, image_conf.img_size, self.set_cnn_layer(name, image_conf.img_size_y, image_conf.img_size,
image_conf.channels, True) image_conf.channels, False)
else: else:
self.set_layer_size(input_layer.size) self.set_layer_size(input_layer.size)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册