Follow comments, mainly use std::copy to simplify logic.

b5ab4b69 · yangyaming · b233ed13 · b5ab4b69 · b5ab4b69 · b5ab4b69
5 changed file
--- a/paddle/gserver/layers/DetectionOutputLayer.cpp
+++ b/paddle/gserver/layers/DetectionOutputLayer.cpp
@@ -48,8 +48,6 @@ void DetectionOutputLayer::forward(PassType passType) {
  Matrix::resizeOrCreate(locTmpBuffer_, 1, locSizeSum_, false, useGpu_);
  Matrix::resizeOrCreate(
      confTmpBuffer_, confSizeSum_ / numClasses_, numClasses_, false, useGpu_);
-  locBuffer_ = locTmpBuffer_;
-  confBuffer_ = confTmpBuffer_;
  size_t locOffset = 0;
  size_t confOffset = 0;
@@ -68,7 +66,7 @@ void DetectionOutputLayer::forward(PassType passType) {
                                   locSizeSum_,
                                   locOffset,
                                   batchSize,
-                                   *locBuffer_,
+                                   *locTmpBuffer_,
                                   kNCHWToNHWC);
    confOffset += appendWithPermute(*inConf,
                                    height,
@@ -76,7 +74,7 @@ void DetectionOutputLayer::forward(PassType passType) {
                                    confSizeSum_,
                                    confOffset,
                                    batchSize,
-                                    *confBuffer_,
+                                    *confTmpBuffer_,
                                    kNCHWToNHWC);
  }
  CHECK_EQ(locOffset, locSizeSum_ / batchSize);
@@ -100,23 +98,25 @@ void DetectionOutputLayer::forward(PassType passType) {
    priorValue = priorCpuValue_;
  } else {
    priorValue = getInputValue(*getPriorBoxLayer());
+    locBuffer_ = locTmpBuffer_;
+    confBuffer_ = confTmpBuffer_;
  }
  confBuffer_->softmax(*confBuffer_);
  size_t numPriors = priorValue->getElementCnt() / 8;
-  vector<vector<NormalizedBBox>> allDecodedBBoxes;
+  std::vector<std::vector<NormalizedBBox>> allDecodedBBoxes;
  for (size_t n = 0; n < batchSize; ++n) {
-    vector<NormalizedBBox> decodedBBoxes;
+    std::vector<NormalizedBBox> decodedBBoxes;
    for (size_t i = 0; i < numPriors; ++i) {
      size_t priorOffset = i * 8;
      size_t locPredOffset = n * numPriors * 4 + i * 4;
-      vector<NormalizedBBox> priorBBoxVec;
+      std::vector<NormalizedBBox> priorBBoxVec;
      getBBoxFromPriorData(
          priorValue->getData() + priorOffset, 1, priorBBoxVec);
-      vector<vector<real>> priorBBoxVar;
+      std::vector<std::vector<real>> priorBBoxVar;
      getBBoxVarFromPriorData(
          priorValue->getData() + priorOffset, 1, priorBBoxVar);
-      vector<real> locPredData;
+      std::vector<real> locPredData;
      for (size_t j = 0; j < 4; ++j)
        locPredData.push_back(*(locBuffer_->getData() + locPredOffset + j));
      NormalizedBBox bbox =
@@ -126,7 +126,7 @@ void DetectionOutputLayer::forward(PassType passType) {
    allDecodedBBoxes.push_back(decodedBBoxes);
  }
-  vector<map<size_t, vector<size_t>>> allIndices;
+  std::vector<std::map<size_t, std::vector<size_t>>> allIndices;
  size_t numKept = getDetectionIndices(confBuffer_->getData(),
                                       numPriors,
                                       numClasses_,

--- a/paddle/gserver/layers/DetectionOutputLayer.h
+++ b/paddle/gserver/layers/DetectionOutputLayer.h
@@ -19,17 +19,13 @@ limitations under the License. */
 #include "DetectionUtil.h"
 #include "Layer.h"
-using std::vector;
-using std::map;
-using std::pair;
 namespace paddle {
 /**
 * The detection output layer for a SSD detection task. This layer apply the
 * Non-maximum suppression to the all predicted bounding box and keep the
 * Top-K bounding boxes.
- * - Input: This layer need three input layers: This first input layer
+ * - Input: This layer needs three input layers: This first input layer
 *          is the priorbox layer. The rest two input layers are convolution
 *          layers for generating bbox location offset and the classification
 *          confidence.

--- a/paddle/gserver/layers/MultiBoxLossLayer.cpp
+++ b/paddle/gserver/layers/MultiBoxLossLayer.cpp
@@ -17,10 +17,6 @@ limitations under the License. */
 #include <vector>
 #include "DataLayer.h"
-using std::vector;
-using std::map;
-using std::pair;
 namespace paddle {
 REGISTER_LAYER(multibox_loss, MultiBoxLossLayer);
@@ -133,7 +129,7 @@ void MultiBoxLossLayer::forward(PassType passType) {
  }
  // Get max scores for each prior bbox. Used in negative mining
-  vector<vector<real>> allMaxConfScore;
+  std::vector<std::vector<real>> allMaxConfScore;
  numPriors_ = priorValue->getElementCnt() / 8;
  getMaxConfidenceScores(confBuffer_->getData(),
                         batchSize,
@@ -151,18 +147,18 @@ void MultiBoxLossLayer::forward(PassType passType) {
  allMatchIndices_.clear();
  allNegIndices_.clear();
-  pair<size_t, size_t> retPair = generateMatchIndices(*priorValue,
+  std::pair<size_t, size_t> retPair = generateMatchIndices(*priorValue,
-                                                      numPriors_,
+                                                           numPriors_,
-                                                      *labelValue,
+                                                           *labelValue,
-                                                      labelIndex,
+                                                           labelIndex,
-                                                      seqNum,
+                                                           seqNum,
-                                                      allMaxConfScore,
+                                                           allMaxConfScore,
-                                                      batchSize,
+                                                           batchSize,
-                                                      overlapThreshold_,
+                                                           overlapThreshold_,
-                                                      negOverlap_,
+                                                           negOverlap_,
-                                                      negPosRatio_,
+                                                           negPosRatio_,
-                                                      &allMatchIndices_,
+                                                           &allMatchIndices_,
-                                                      &allNegIndices_);
+                                                           &allNegIndices_);
  numMatches_ = retPair.first;
  numNegs_ = retPair.second;
@@ -175,30 +171,31 @@ void MultiBoxLossLayer::forward(PassType passType) {
    Matrix::resizeOrCreate(locGTData_, numMatches_ * 4, 1, false, false);
    Matrix::resizeOrCreate(locDiff_, numMatches_ * 4, 1, false, false);
    locDiff_->zeroMem();
-    vector<real> locGTData;
+    std::vector<real> locGTData;
+    real* locDiffData = locDiff_->getData();
+    const real* locBufferData = locBuffer_->getData();
    for (size_t n = 0; n < batchSize; ++n) {
      for (size_t i = 0; i < numPriors_; ++i) {
        if (allMatchIndices_[n][i] == -1) continue;  // match none
        size_t locOffset =
            n * (locBuffer_->getElementCnt() / batchSize) + i * 4;
-        locDiff_->getData()[count++] = (locBuffer_->getData() + locOffset)[0];
+        std::copy(locBufferData + locOffset,
-        locDiff_->getData()[count++] = (locBuffer_->getData() + locOffset)[1];
+                  locBufferData + locOffset + 4,
-        locDiff_->getData()[count++] = (locBuffer_->getData() + locOffset)[2];
+                  locDiffData + count);
-        locDiff_->getData()[count++] = (locBuffer_->getData() + locOffset)[3];
+        count += 4;
        const int gtIdx = allMatchIndices_[n][i];
        size_t priorOffset = i * 8;
-        vector<NormalizedBBox> priorBBoxVec;
+        std::vector<NormalizedBBox> priorBBoxVec;
        getBBoxFromPriorData(
            priorValue->getData() + priorOffset, 1, priorBBoxVec);
-        vector<vector<real>> priorBBoxVar;
+        std::vector<std::vector<real>> priorBBoxVar;
        getBBoxVarFromPriorData(
            priorValue->getData() + priorOffset, 1, priorBBoxVar);
        size_t labelOffset = (labelIndex[n] + gtIdx) * 6;
-        vector<NormalizedBBox> gtBBoxVec;
+        std::vector<NormalizedBBox> gtBBoxVec;
        getBBoxFromLabelData(labelValue->getData() + labelOffset, 1, gtBBoxVec);
-        vector<real> gtEncode;
+        std::vector<real> gtEncode;
        encodeBBoxWithVar(
            priorBBoxVec[0], priorBBoxVar[0], gtBBoxVec[0], gtEncode);
        locGTData.insert(locGTData.end(), gtEncode.begin(), gtEncode.end());
@@ -218,7 +215,9 @@ void MultiBoxLossLayer::forward(PassType passType) {
    confProb_->zeroMem();
    size_t count = 0;
-    vector<real> confPredData;
+    std::vector<real> confPredData;
+    real* confProbData = confProb_->getData();
+    const real* confBufferData = confBuffer_->getData();
    for (size_t n = 0; n < batchSize; ++n) {
      for (size_t i = 0; i < numPriors_; ++i) {
        if (allMatchIndices_[n][i] == -1) continue;
@@ -226,11 +225,13 @@ void MultiBoxLossLayer::forward(PassType passType) {
        const int gtLabel = (labelValue->getData() + labelOffset)[0];
        confGTData_->getData()[count] = gtLabel;
        size_t confOffset = n * numPriors_ * numClasses_ + i * numClasses_;
-        for (size_t j = 0; j < numClasses_; ++j) {
+        std::copy(confBufferData + confOffset,
-          confProb_->getData()[count * numClasses_ + j] =
+                  confBufferData + confOffset + numClasses_,
-              (confBuffer_->getData() + confOffset)[j];
+                  confProbData + count * numClasses_);
-          confPredData.push_back((confBuffer_->getData() + confOffset)[j]);
+        confPredData.reserve(confPredData.size() + numClasses_);
-        }
+        confPredData.insert(confPredData.end(),
+                            confBufferData + confOffset,
+                            confBufferData + confOffset + numClasses_);
        ++count;
      }
      // Negative mining samples
@@ -238,14 +239,17 @@ void MultiBoxLossLayer::forward(PassType passType) {
        confGTData_->getData()[count] = backgroundId_;
        size_t confOffset =
            n * numPriors_ * numClasses_ + allNegIndices_[n][i] * numClasses_;
-        for (size_t j = 0; j < numClasses_; ++j) {
+        std::copy(confBufferData + confOffset,
-          confProb_->getData()[count * numClasses_ + j] =
+                  confBufferData + confOffset + numClasses_,
-              (confBuffer_->getData() + confOffset)[j];
+                  confProbData + count * numClasses_);
-          confPredData.push_back((confBuffer_->getData() + confOffset)[j]);
+        confPredData.reserve(confPredData.size() + numClasses_);
-        }
+        confPredData.insert(confPredData.end(),
-        count++;
+                            confBufferData + confOffset,
+                            confBufferData + confOffset + numClasses_);
+        ++count;
      }
    }
+    CHECK_EQ(numConf_, count);
    confProb_->softmax(*confProb_);
    MatrixPtr confLossOutput;
    Matrix::resizeOrCreate(confLossOutput, numConf_, 1, false, false);
@@ -254,7 +258,7 @@ void MultiBoxLossLayer::forward(PassType passType) {
  }
  real loss = locLoss_ + confLoss_;
  MatrixPtr outV = getOutputValue();
-  vector<real> tmp(batchSize, loss);
+  std::vector<real> tmp(batchSize, loss);
  outV->copyFrom(&tmp[0], batchSize);
 }
@@ -274,16 +278,18 @@ void MultiBoxLossLayer::backward(const UpdateCallback& callback) {
      locDiff_->getData()[i] *= (1. / numMatches_);
    // Copy gradient back
    size_t count = 0;
-    for (size_t n = 0; n < batchSize; ++n)
+    const real* locDiffData = locDiff_->getData();
+    for (size_t n = 0; n < batchSize; ++n) {
      for (size_t i = 0; i < numPriors_; ++i) {
        if (allMatchIndices_[n][i] == -1) continue;
-        real* locDiffData = locBuffer_->getData() + n * numPriors_ * 4 + i * 4;
+        real* locBufferData =
-        locDiffData[0] = (locDiff_->getData() + count * 4)[0];
+            locBuffer_->getData() + n * numPriors_ * 4 + i * 4;
-        locDiffData[1] = (locDiff_->getData() + count * 4)[1];
+        std::copy(locDiffData + count * 4,
-        locDiffData[2] = (locDiff_->getData() + count * 4)[2];
+                  locDiffData + (count + 1) * 4,
-        locDiffData[3] = (locDiff_->getData() + count * 4)[3];
+                  locBufferData);
        ++count;
      }
+    }
    CHECK_EQ(count, numMatches_);
  }
@@ -293,21 +299,24 @@ void MultiBoxLossLayer::backward(const UpdateCallback& callback) {
    for (size_t i = 0; i < numConf_ * numClasses_; ++i)
      confProb_->getData()[i] *= (1. / numMatches_);
    size_t count = 0;
+    const real* confProbData = confProb_->getData();
    for (size_t n = 0; n < batchSize; ++n) {
      for (size_t i = 0; i < numPriors_; ++i) {
        if (allMatchIndices_[n][i] == -1) continue;
        real* confDiffData = confBuffer_->getData() +
                             n * numPriors_ * numClasses_ + i * numClasses_;
-        for (size_t j = 0; j < numClasses_; ++j)
+        std::copy(confProbData + count * numClasses_,
-          confDiffData[j] = (confProb_->getData() + count * numClasses_)[j];
+                  confProbData + (count + 1) * numClasses_,
+                  confDiffData);
        ++count;
      }
      for (size_t i = 0; i < allNegIndices_[n].size(); ++i) {
        int idx = allNegIndices_[n][i];
        real* confDiffData = confBuffer_->getData() +
                             n * numPriors_ * numClasses_ + idx * numClasses_;
-        for (size_t j = 0; j < numClasses_; ++j)
+        std::copy(confProbData + count * numClasses_,
-          confDiffData[j] = (confProb_->getData() + count * numClasses_)[j];
+                  confProbData + (count + 1) * numClasses_,
+                  confDiffData);
        ++count;
      }
    }

--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
@@ -1679,7 +1679,7 @@ class PriorBoxLayer(LayerBase):
 @config_layer('multibox_loss')
 class MultiBoxLossLayer(LayerBase):
    def __init__(self, name, inputs, input_num, num_classes, overlap_threshold,
-                 neg_pos_ratio, neg_overlap, background_id):
+                 neg_pos_ratio, neg_overlap, background_id, **xargs):
        super(MultiBoxLossLayer, self).__init__(name, 'multibox_loss', 0,
                                                inputs)
        config_assert(
@@ -1701,7 +1701,7 @@ class MultiBoxLossLayer(LayerBase):
 class DetectionOutputLayer(LayerBase):
    def __init__(self, name, inputs, size, input_num, num_classes,
                 nms_threshold, nms_top_k, keep_top_k, confidence_threshold,
-                 background_id):
+                 background_id, **xargs):
        super(DetectionOutputLayer, self).__init__(name, 'detection_output', 0,
                                                   inputs)
        config_assert(

--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -1092,22 +1092,19 @@ def multibox_loss_layer(input_loc,
    :type background_id: int
    :return: LayerOutput
    """
-    input_loc_num = 0
-    input_conf_num = 0
    if isinstance(input_loc, LayerOutput):
        input_loc = [input_loc]
    assert isinstance(input_loc, collections.Sequence)  # list or tuple
    for each in input_loc:
        assert isinstance(each, LayerOutput)
-        input_loc_num += 1
+    input_loc_num = len(input_loc)
    if isinstance(input_conf, LayerOutput):
        input_conf = [input_conf]
    assert isinstance(input_conf, collections.Sequence)  # list or tuple
    for each in input_conf:
        assert isinstance(each, LayerOutput)
-        input_conf_num += 1
+    input_conf_num = len(input_conf)
    # Check the input layer number.
    assert input_loc_num == input_conf_num