Fix sparse training for trainer_count=1 (#204)

* Fix sparse training for trainer_count=1 For trainer_count=1, the gradient machine is NeuralNetwork, which does not create parameter buf for PARAMETER_GRADIENT for sparse update in Parameter::enableType. But gradient parameter buf is still used in SgdThreadUpdater. * Minor update to comment

Fix sparse training for trainer_count=1 (#204)
* Fix sparse training for trainer_count=1 For trainer_count=1, the gradient machine is NeuralNetwork, which does not create parameter buf for PARAMETER_GRADIENT for sparse update in Parameter::enableType. But gradient parameter buf is still used in SgdThreadUpdater. * Minor update to comment
28bc05b1 · emailweixu · Yu Yang · b22e50ed · 28bc05b1 · 28bc05b1
5 changed file
--- a/paddle/gserver/evaluators/ChunkEvaluator.cpp
+++ b/paddle/gserver/evaluators/ChunkEvaluator.cpp
@@ -75,7 +75,6 @@ class ChunkEvaluator : public Evaluator {

 public:
  virtual void init(const EvaluatorConfig& config) {
-    CHECK(!FLAGS_use_gpu) << "Not supported";
    Evaluator::init(config);
    if (config.chunk_scheme() == "IOB") {
      numTagTypes_ = 2;
@@ -137,6 +136,7 @@ public:
    CHECK_EQ(arguments.size(), (size_t)2);
    IVectorPtr& output = arguments[0].ids;
    IVectorPtr& label = arguments[1].ids;
+    CHECK(!output->useGpu() && !label->useGpu()) << "Not supported";
    auto sequenceStartPositions =
        arguments[1].sequenceStartPositions->getVector(false);
    CHECK_EQ(output->getSize(), label->getSize());

--- a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp
+++ b/paddle/gserver/gradientmachines/MultiGradientMachine.cpp
@@ -813,7 +813,6 @@ void TrainerThread::mergeGradSparse(
      para->getMat(PARAMETER_GRADIENT).get());
  std::vector<uint32_t>& ids = mainMat->getIds(threadId_);

-  ids.clear();
  for (auto slaveParams : slaveParameters) {
    SparseRowCpuMatrix* mat =
        dynamic_cast<SparseRowCpuMatrix*>((*slaveParams)[pid]

--- a/paddle/parameter/Parameter.h
+++ b/paddle/parameter/Parameter.h
@@ -146,6 +146,12 @@ public:
    }
  }

+  void enableBufType(ParameterType type) {
+    if (bufs_[type]) return;
+    bufs_[type] = Vector::createParallelVector(config_.size(), useGpu_);
+    bufs_[type]->zeroMem();
+  }
+
  void enableIntType(ParameterType type, size_t intStoreSize = 0) {
    if (!intBufs_[type]) {
      SetDevice device(deviceId_);

--- a/paddle/trainer/ThreadParameterUpdater.cpp
+++ b/paddle/trainer/ThreadParameterUpdater.cpp
@@ -20,6 +20,8 @@ limitations under the License. */
 #include "paddle/math/SparseRowMatrix.h"
 #include "paddle/utils/Thread.h"

+P_DECLARE_int32(trainer_count);
+
 namespace paddle {

 SgdThreadUpdater::SgdThreadUpdater(const OptimizationConfig& optConfig)
@@ -48,6 +50,13 @@ void SgdThreadUpdater::init(std::vector<ParameterPtr>& parameters) {
                                              false /*inPserver*/));
    size_t numRows = para->isGradSparseUpdate() ? para->getConfig().dims(0) : 0;
    optimizers_[pid]->init(numRows, &para->getConfig());
+    if (para->isGradSparseUpdate() && FLAGS_trainer_count == 1) {
+      // For trainer_count=1, the gradient machine is NeuralNetwork, which does
+      // not create parameter buf for PARAMETER_GRADIENT for sparse update in
+      // Parameter::enableType(). But gradient parameter buf is still used
+      // in SgdThreadUpdater. We need to explicitly create it.
+      para->enableBufType(PARAMETER_GRADIENT);
+    }
  }
 }

@@ -211,7 +220,7 @@ void SgdThreadUpdater::threadUpdateSparse(
    // From MultiGradientMachine
    SparseRowIdsCpuMatrix* mainMat = dynamic_cast<SparseRowIdsCpuMatrix*>(
      para->getMat(PARAMETER_GRADIENT).get());
-    const std::vector<uint32_t>& sparseIds = mainMat->getIds(tid);
+    std::vector<uint32_t>& sparseIds = mainMat->getIds(tid);

    for (auto id : sparseIds) {
      // setup sub bufs
@@ -221,6 +230,7 @@ void SgdThreadUpdater::threadUpdateSparse(
      optimizer->update(vecs, para->getConfig(), id);
      vecs[PARAMETER_GRADIENT]->zeroMem();
    }
+    sparseIds.clear();
  } else if (dynamic_cast<SparseRowCpuMatrix*>(
               para->getMat(PARAMETER_GRADIENT).get())) {
    // From NeuralNetwork
@@ -246,6 +256,10 @@ void SgdThreadUpdater::threadUpdateSparse(
      optimizer->update(vecs, para->getConfig(), id);
      vecs[PARAMETER_GRADIENT]->zeroMem();
    }
+    // For numThreads > 1, MultiGradientMachine is used, which goes
+    // to the above branch.
+    CHECK_EQ(numThreads, 1);
+    mainMat->clearIndices();
  } else {
    auto & m = *para->getMat(PARAMETER_GRADIENT).get();
    LOG(FATAL) << "Internal error: " << para->getName() << " "

--- a/paddle/utils/Logging.h
+++ b/paddle/utils/Logging.h
@@ -191,7 +191,7 @@ void installFailureWriter(void(*callback)(const char*, int));
 }
 #endif  // PADDLE_USE_GLOG

-#ifdef NDEBUG
+#ifndef NDEBUG
 #define DEBUG_LEVEL 5
 #define DBG VLOG(DEBUG_LEVEL)
 #else