Merge pull request #5252 from hedaoyuan/inference

Remove some code to reduce the size of the mobile inference library.

Merge pull request #5252 from hedaoyuan/inference
Remove some code to reduce the size of the mobile inference library.
562599ec · hedaoyuan · GitHub · feaf1e2d · 4854a42b · 562599ec
17 changed file
--- a/paddle/capi/Matrix.cpp
+++ b/paddle/capi/Matrix.cpp
@@ -121,6 +121,7 @@ paddle_error paddle_matrix_get_shape(paddle_matrix mat,
 paddle_matrix paddle_matrix_create_sparse(
    uint64_t height, uint64_t width, uint64_t nnz, bool isBinary, bool useGpu) {
+#ifndef PADDLE_MOBILE_INFERENCE
  auto ptr = new paddle::capi::CMatrix();
  ptr->mat = paddle::Matrix::createSparseMatrix(
      height,
@@ -131,6 +132,9 @@ paddle_matrix paddle_matrix_create_sparse(
      false,
      useGpu);
  return ptr;
+#else
+  return nullptr;
+#endif
 }
 paddle_error paddle_matrix_sparse_copy_from(paddle_matrix mat,
@@ -140,6 +144,7 @@ paddle_error paddle_matrix_sparse_copy_from(paddle_matrix mat,
                                            uint64_t colSize,
                                            float* valueArray,
                                            uint64_t valueSize) {
+#ifndef PADDLE_MOBILE_INFERENCE
  if (mat == nullptr) return kPD_NULLPTR;
  auto ptr = cast(mat);
  if (rowArray == nullptr || colArray == nullptr ||
@@ -160,4 +165,7 @@ paddle_error paddle_matrix_sparse_copy_from(paddle_matrix mat,
  } else {
    return kPD_NOT_SUPPORTED;
  }
+#else
+  return kPD_NOT_SUPPORTED;
+#endif
 }
--- a/paddle/capi/matrix.h
+++ b/paddle/capi/matrix.h
@@ -48,6 +48,7 @@ PD_API paddle_matrix paddle_matrix_create(uint64_t height,
 * @param isBinary is binary (either 1 or 0 in matrix) or not.
 * @param useGpu is using GPU or not.
 * @return paddle_matrix.
+ * @note Mobile inference does not support this interface.
 */
 PD_API paddle_matrix paddle_matrix_create_sparse(
    uint64_t height, uint64_t width, uint64_t nnz, bool isBinary, bool useGpu);
@@ -129,6 +130,7 @@ PD_API paddle_error paddle_matrix_get_shape(paddle_matrix mat,
 * NULL if the matrix is binary.
 * @param [in] valueSize length of value array. Zero if the matrix is binary.
 * @return paddle_error
+ * @note Mobile inference does not support this interface.
 */
 PD_API paddle_error paddle_matrix_sparse_copy_from(paddle_matrix mat,
                                                   int* rowArray,

--- a/paddle/cuda/CMakeLists.txt
+++ b/paddle/cuda/CMakeLists.txt
@@ -27,7 +27,9 @@ if(WITH_GPU)
    set_source_files_properties(${CUDA_CXX_SOURCES}
                                PROPERTIES COMPILE_FLAGS "-D__NVCC__")
 else()
+    if (NOT MOBILE_INFERENCE)
    set(CUDA_CXX_SOURCES src/hl_warpctc_wrap.cc)
+    endif()
 endif()
 set(CUDA_CU_SOURCES

--- a/paddle/gserver/CMakeLists.txt
+++ b/paddle/gserver/CMakeLists.txt
@@ -85,9 +85,49 @@ if(MOBILE_INFERENCE)
         gradientmachines/GradientMachineMode.cpp
         gradientmachines/MultiGradientMachine.cpp)
-    # Remove useless layers
+    # Remove layers that used in training
    list(REMOVE_ITEM GSERVER_SOURCES
-    	 layers/RecurrentLayerGroup.cpp)
+    	 layers/RecurrentLayerGroup.cpp
+         layers/CostLayer.cpp
+         layers/MultiBoxLossLayer.cpp
+         layers/WarpCTCLayer.cpp
+         layers/CTCLayer.cpp
+         layers/LinearChainCTC.cpp
+         layers/PrintLayer.cpp)
+    list(REMOVE_ITEM GSERVER_SOURCES
+         layers/OuterProdLayer.cpp
+         layers/SumToOneNormLayer.cpp
+         layers/ConvShiftLayer.cpp
+         layers/InterpolationLayer.cpp
+         layers/AgentLayer.cpp
+         layers/DotMulOperator.cpp
+         layers/GruStepLayer.cpp
+         layers/LstmStepLayer.cpp
+         layers/ConvexCombinationLayer.cpp
+         layers/Conv3DLayer.cpp
+         layers/DeConv3DLayer.cpp
+         layers/CropLayer.cpp
+         layers/CrossEntropyOverBeam.cpp
+         layers/DataNormLayer.cpp
+         layers/FeatureMapExpandLayer.cpp
+         layers/HierarchicalSigmoidLayer.cpp
+         layers/MultinomialSampler.cpp
+         layers/NCELayer.cpp
+         layers/KmaxSeqScoreLayer.cpp
+         layers/MDLstmLayer.cpp
+         layers/MultiplexLayer.cpp
+         layers/PadLayer.cpp
+         layers/Pool3DLayer.cpp
+         layers/ResizeLayer.cpp
+         layers/RotateLayer.cpp
+         layers/RowConvLayer.cpp
+         layers/RowL2NormLayer.cpp
+         layers/SamplingIdLayer.cpp
+         layers/ScaleShiftLayer.cpp
+         layers/SelectiveFullyConnectedLayer.cpp
+         layers/SpatialPyramidPoolLayer.cpp
+         layers/BilinearInterpLayer.cpp
+         layers/ClipLayer.cpp)
 endif()
 if(WITH_GPU)

--- a/paddle/gserver/gradientmachines/NeuralNetwork.cpp
+++ b/paddle/gserver/gradientmachines/NeuralNetwork.cpp
@@ -16,7 +16,6 @@ limitations under the License. */
 #include "NeuralNetwork.h"
 #include "hl_gpu.h"
-#include "paddle/gserver/layers/AgentLayer.h"
 #include "paddle/utils/CustomStackTrace.h"
 #include "paddle/utils/Logging.h"
 #include "paddle/utils/Stat.h"
@@ -28,6 +27,7 @@ limitations under the License. */
 #ifndef PADDLE_MOBILE_INFERENCE
 #include "MultiNetwork.h"
 #include "RecurrentGradientMachine.h"
+#include "paddle/gserver/layers/AgentLayer.h"
 #endif
 namespace paddle {
@@ -192,9 +192,11 @@ void NeuralNetwork::init(const ModelConfig& config,
 void NeuralNetwork::connect(LayerPtr agentLayer,
                            LayerPtr realLayer,
                            int height) {
+#ifndef PADDLE_MOBILE_INFERENCE
  AgentLayer* agent = dynamic_cast<AgentLayer*>(agentLayer.get());
  CHECK_NOTNULL(agent);
  agent->setRealLayer(realLayer, height);
+#endif
 }
 void NeuralNetwork::connect(std::string agentLayerName,

--- a/paddle/gserver/layers/Layer.cpp
+++ b/paddle/gserver/layers/Layer.cpp
@@ -98,6 +98,7 @@ ClassRegistrar<Layer, LayerConfig> Layer::registrar_;
 LayerPtr Layer::create(const LayerConfig& config) {
  std::string type = config.type();
+#ifndef PADDLE_MOBILE_INFERENCE
  // NOTE: As following types have illegal character '-',
  // they can not use REGISTER_LAYER to registrar.
  // Besides, to fit with old training models,
@@ -106,7 +107,6 @@ LayerPtr Layer::create(const LayerConfig& config) {
    return LayerPtr(new MultiClassCrossEntropy(config));
  else if (type == "rank-cost")
    return LayerPtr(new RankingCost(config));
-#ifndef PADDLE_MOBILE_INFERENCE
  else if (type == "auc-validation")
    return LayerPtr(new AucValidation(config));
  else if (type == "pnpair-validation")

--- a/paddle/gserver/tests/CMakeLists.txt
+++ b/paddle/gserver/tests/CMakeLists.txt
 # gserver pacakge unittests
 add_simple_unittest(test_LinearChainCRF)
-add_simple_unittest(test_MultinomialSampler)
 add_simple_unittest(test_RecurrentLayer)
+if(NOT MOBILE_INFERENCE)
+  add_simple_unittest(test_MultinomialSampler)
+endif()
 function(gserver_test TARGET)
  add_unittest_without_exec(${TARGET}
      ${TARGET}.cpp
@@ -49,7 +52,7 @@ if(WITH_PYTHON)
 endif()
 ############### test_WarpCTCLayer #######################
-if(NOT WITH_DOUBLE)
+if(NOT WITH_DOUBLE AND NOT MOBILE_INFERENCE)
    add_unittest_without_exec(test_WarpCTCLayer
        test_WarpCTCLayer.cpp)

--- a/paddle/math/BaseMatrix.cu
+++ b/paddle/math/BaseMatrix.cu
@@ -1902,5 +1902,52 @@ void BaseMatrixT<real>::sumOfProducts(BaseMatrixT& b,
 }
 template class BaseMatrixT<real>;
+#ifndef PADDLE_MOBILE_INFERENCE
 template class BaseMatrixT<int>;
+#else
+template <>
+void BaseMatrixT<int>::zero() {
+  applyUnary(unary::Zero<int>());
+}
+template <>
+void BaseMatrixT<int>::assign(int p) {
+  applyUnary(unary::Assign<int>(p));
+}
+template <>
+void BaseMatrixT<int>::isEqualTo(BaseMatrixT& b, int value) {
+  applyBinary(binary::IsEqual<int>(value), b);
+}
+template <>
+void BaseMatrixT<int>::neg() {
+  applyUnary(unary::Neg<int>());
+}
+template <>
+void BaseMatrixT<int>::abs2() {
+  applyUnary(unary::Abs<int>());
+}
+template <>
+void BaseMatrixT<int>::add(int p) {
+  applyUnary(unary::Add<int>(p));
+}
+template <>
+void BaseMatrixT<int>::add(int p1, int p2) {
+  applyUnary(unary::Add2<int>(p1, p2));
+}
+template <>
+void BaseMatrixT<int>::applyL1(int learningRate, int decayRate) {
+  applyUnary(unary::ApplyL1<int>(learningRate * decayRate));
+}
+#endif
 }  // namespace paddle
--- a/paddle/math/CMakeLists.txt
+++ b/paddle/math/CMakeLists.txt
@@ -25,6 +25,19 @@ else()
    message(STATUS "Compile with MKLDNNMatrix")
 endif()
+if(MOBILE_INFERENCE)
+    list(REMOVE_ITEM MATH_SOURCES
+         ${CMAKE_CURRENT_SOURCE_DIR}/SIMDFunctions.cpp)
+    # Remove sparse
+    list(REMOVE_ITEM MATH_HEADERS
+         ${CMAKE_CURRENT_SOURCE_DIR}/CpuSparseMatrix.h
+         ${CMAKE_CURRENT_SOURCE_DIR}/SparseMatrix.h
+         ${CMAKE_CURRENT_SOURCE_DIR}/SparseRowMatrix.h)
+    list(REMOVE_ITEM MATH_SOURCES
+         ${CMAKE_CURRENT_SOURCE_DIR}/CpuSparseMatrix.cpp
+         ${CMAKE_CURRENT_SOURCE_DIR}/SparseMatrix.cpp
+         ${CMAKE_CURRENT_SOURCE_DIR}/SparseRowMatrix.cpp)
+endif()
 set(MATH_SOURCES
    "${PADDLE_SOURCE_DIR}/paddle/math/BaseMatrix.cu"
    "${PADDLE_SOURCE_DIR}/paddle/math/TrainingAlgorithmOp.cu"

--- a/paddle/math/CpuSparseMatrix.h
+++ b/paddle/math/CpuSparseMatrix.h
@@ -13,6 +13,9 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 #pragma once
+#ifndef PADDLE_MOBILE_INFERENCE
 #include <cstddef>
 #include "Matrix.h"
@@ -309,3 +312,57 @@ private:
  using Matrix::subMatrix;
 };
 }  // namespace paddle
+#else
+#include "Matrix.h"
+namespace paddle {
+class CpuSparseMatrix : public Matrix {
+public:
+  CpuSparseMatrix(size_t height,
+                  size_t width,
+                  size_t nnz, /* used to allocate space */
+                  SparseValueType valueType = FLOAT_VALUE,
+                  SparseFormat format = SPARSE_CSR,
+                  bool trans = false)
+      : Matrix(NULL, height, width, trans, false) {}
+  CpuSparseMatrix(real* data,
+                  int* rows,
+                  int* cols,
+                  size_t height,
+                  size_t width,
+                  size_t nnz,
+                  SparseValueType valueType,
+                  SparseFormat format,
+                  bool trans)
+      : Matrix(NULL, height, width, trans, false) {}
+  real* getValue() const { return nullptr; }
+  size_t getColStartIdx(size_t i) const { return 0; }
+  size_t getRowStartIdx(size_t i) const { return 0; }
+  size_t getColNum(size_t i) const { return 0; }
+  int* getRowCols(size_t i) const { return nullptr; }
+  CpuSparseMatrixPtr getTmpSparseMatrix(size_t height, size_t width) {
+    return nullptr;
+  }
+  void resize(size_t newHeight,
+              size_t newWidth,
+              size_t newNnz, /* used to allocate space */
+              SparseValueType valueType,
+              SparseFormat format) {}
+  void resize(size_t newHeight, size_t newWidth) {}
+  MatrixPtr getTranspose() { return nullptr; }
+  void setRow(size_t row,
+              size_t colNum,
+              const unsigned int* cols,
+              const real* values) {}
+};
+}  // namespace paddle
+#endif
--- a/paddle/math/Matrix.cpp
+++ b/paddle/math/Matrix.cpp
@@ -451,6 +451,7 @@ void GpuMatrix::addSharedBias(Matrix& b, real scale) {
 }
 void GpuMatrix::collectBias(Matrix& a, real scale) {
+#ifdef PADDLE_WITH_CUDA
  CHECK_EQ(getHeight(), (size_t)1);
  CHECK_EQ(width_, a.getWidth());
  GpuSparseMatrix* sMatPtr = dynamic_cast<GpuSparseMatrix*>(&a);
@@ -461,6 +462,7 @@ void GpuMatrix::collectBias(Matrix& a, real scale) {
    hl_sparse_matrix_s A_d = sMatPtr->sMatrix_.get();
    hl_sparse_matrix_column_sum(data, A_d, sMatPtr->getHeight(), width_, scale);
  }
+#endif
 }
 void GpuMatrix::collectSharedBias(Matrix& a, real scale) {
@@ -552,6 +554,7 @@ void GpuMatrix::mul(const GpuSparseMatrix& a,
                    const GpuMatrix& b,
                    real scaleAB,
                    real scaleT) {
+#ifdef PADDLE_WITH_CUDA
  CHECK(isContiguous());
  CHECK(b.isContiguous());
  CHECK(b.useGpu_ == true) << "Matrix type are not equal";
@@ -578,12 +581,14 @@ void GpuMatrix::mul(const GpuSparseMatrix& a,
                          b.height_,
                          scaleAB,
                          scaleT);
+#endif
 }
 void GpuMatrix::mul(const GpuMatrix& a,
                    const GpuSparseMatrix& b,
                    real scaleAB,
                    real scaleT) {
+#ifdef PADDLE_WITH_CUDA
  CHECK(isContiguous());
  CHECK(a.isContiguous());
  CHECK(a.useGpu_ == true) << "Matrix type are not equal";
@@ -622,6 +627,7 @@ void GpuMatrix::mul(const GpuMatrix& a,
                            scaleAB,
                            scaleT);
  }
+#endif
 }
 /* this = a*b */
@@ -1557,6 +1563,7 @@ void GpuMatrix::bilinearBackward(const Matrix& out,
 }
 void GpuMatrix::multiBinaryLabelCrossEntropy(Matrix& output, Matrix& label) {
+#ifdef PADDLE_WITH_CUDA
  GpuMatrix* outputPtr = dynamic_cast<GpuMatrix*>(&output);
  auto labelPtr = dynamic_cast<GpuSparseMatrix*>(&label);
@@ -1572,9 +1579,11 @@ void GpuMatrix::multiBinaryLabelCrossEntropy(Matrix& output, Matrix& label) {
  hl_sparse_matrix_s mat_d = labelPtr->sMatrix_.get();
  hl_matrix_multi_binary_cross_entropy(
      output_d, entropy_d, mat_d, height_, outputPtr->width_);
+#endif
 }
 void GpuMatrix::multiBinaryLabelCrossEntropyBp(Matrix& output, Matrix& label) {
+#ifdef PADDLE_WITH_CUDA
  GpuMatrix* outputPtr = dynamic_cast<GpuMatrix*>(&output);
  auto labelPtr = dynamic_cast<GpuSparseMatrix*>(&label);
@@ -1590,6 +1599,7 @@ void GpuMatrix::multiBinaryLabelCrossEntropyBp(Matrix& output, Matrix& label) {
  hl_sparse_matrix_s mat_d = labelPtr->sMatrix_.get();
  hl_matrix_multi_binary_cross_entropy_bp(
      output_d, grad_d, mat_d, height_, width_);
+#endif
 }
 void GpuMatrix::vol2Col(real* dataSrc,
@@ -3255,6 +3265,7 @@ template void CpuMatrix::mul<CpuMatrix, CacheRowCpuMatrix>(CpuSparseMatrix* a,
                                                           real scaleAB,
                                                           real scaleT);
+#ifndef PADDLE_MOBILE_INFERENCE
 void SharedCpuMatrix::mul(CpuSparseMatrix* a,
                          CpuMatrix* b,
                          real scaleAB,
@@ -3383,6 +3394,7 @@ void SharedCpuMatrix::initBlock(int blockNum) {
  }
 }
+#endif
 /* Add a (column) vector b to matrix a, column by column */
 void CpuMatrix::addColumnVector(const Matrix& b) {
  BaseMatrix::addColVector(const_cast<Matrix&>(b));

--- a/paddle/math/Matrix.h
+++ b/paddle/math/Matrix.h
@@ -2070,6 +2070,7 @@ public:
 class SharedCpuMatrix : public CpuMatrix {
 public:
+#ifndef PADDLE_MOBILE_INFERENCE
  /* blockNum is number of partitions of the matrix  */
  SharedCpuMatrix(int blockNum, size_t height, size_t width, bool trans = false)
      : CpuMatrix(height, width, trans) {
@@ -2115,6 +2116,7 @@ private:
  ThreadLocal<CpuMatrixPtr> localBuf_;
  ThreadLocal<std::vector<int>> localBufRows_;
  ThreadLocal<std::vector<int>> blockSeq_;
+#endif
 };
 typedef struct { unsigned int col; } sparse_non_value_t;

--- a/paddle/math/SparseMatrix.h
+++ b/paddle/math/SparseMatrix.h
@@ -13,6 +13,9 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 #pragma once
+#ifndef PADDLE_MOBILE_INFERENCE
 #include <cstddef>
 #include "CpuSparseMatrix.h"
 #include "Matrix.h"
@@ -237,3 +240,47 @@ private:
 };
 }  // namespace paddle
+#else
+#include "CpuSparseMatrix.h"
+namespace paddle {
+class GpuSparseMatrix : public Matrix {
+public:
+  GpuSparseMatrix(size_t height,
+                  size_t width,
+                  size_t nnz, /* used to allocate space */
+                  SparseValueType valueType = FLOAT_VALUE,
+                  SparseFormat format_ = SPARSE_CSR,
+                  bool trans = false)
+      : Matrix(NULL, height, width, trans, false) {}
+  GpuSparseMatrix(real* value,
+                  int* rows,
+                  int* cols,
+                  size_t height,
+                  size_t width,
+                  size_t nnz,
+                  SparseValueType valueType,
+                  SparseFormat format,
+                  bool trans)
+      : Matrix(NULL, height, width, trans, true) {}
+  void resize(size_t newHeight,
+              size_t newWidth,
+              size_t newNnz, /* used to allocate space */
+              SparseValueType valueType,
+              SparseFormat format) {}
+  void resize(size_t newHeight, size_t newWidth) {}
+  MatrixPtr getTranspose() { return nullptr; }
+  void setRow(size_t row,
+              size_t colNum,
+              const unsigned int* cols,
+              const real* values) {}
+};
+}  // namespace paddle
+#endif
--- a/paddle/math/SparseRowMatrix.h
+++ b/paddle/math/SparseRowMatrix.h
@@ -14,6 +14,8 @@ limitations under the License. */
 #pragma once
+#ifndef PADDLE_MOBILE_INFERENCE
 #include <gflags/gflags.h>
 #include <string.h>
 #include <algorithm>
@@ -313,3 +315,27 @@ private:
 };
 }  // namespace paddle
+#else
+namespace paddle {
+class SparseRowCpuMatrix : public CpuMatrix {
+public:
+  void reserveStore() {}
+  void clearIndices() {}
+};
+class SparsePrefetchRowCpuMatrix : public SparseRowCpuMatrix {
+public:
+  void setupIndices() {}
+  void addRows(MatrixPtr input) {}
+  void addRows(IVectorPtr ids) {}
+};
+class SparseAutoGrowRowCpuMatrix : public SparseRowCpuMatrix {};
+class CacheRowCpuMatrix : public SparseAutoGrowRowCpuMatrix {};
+class SparseRowIdsCpuMatrix : public CpuMatrix {};
+}  // namespace paddle
+#endif
--- a/paddle/math/tests/CMakeLists.txt
+++ b/paddle/math/tests/CMakeLists.txt
@@ -3,8 +3,10 @@
 add_simple_unittest(test_ExecViaCpu)
 add_simple_unittest(test_SIMDFunctions)
 add_simple_unittest(test_TrainingAlgorithm)
-add_simple_unittest(test_SparseMatrix)
 add_simple_unittest(test_RowBuffer)
+if(NOT MOBILE_INFERENCE)
+    add_simple_unittest(test_SparseMatrix)
+endif()
 # TODO(yuyang18): Refactor TestUtil.cpp. Remove this cross module reference.
 add_unittest(test_matrixCompare

--- a/paddle/parameter/Parameter.cpp
+++ b/paddle/parameter/Parameter.cpp
@@ -200,7 +200,10 @@ void Parameter::setMat(ParameterType pType, int matType) {
                                     false,
                                     useGpu_);
    }
-  } else if (matType == MAT_NORMAL_SHARED) {
+  }
+#ifndef PADDLE_MOBILE_INFERENCE
+  // NOLINTNEXTLINE
+  else if (matType == MAT_NORMAL_SHARED) {
    CHECK_EQ(height * width, bufs_[pType]->getSize());
    size_t blockNum = 0;
    CHECK(isGradShared(&blockNum));
@@ -259,7 +262,10 @@ void Parameter::setMat(ParameterType pType, int matType) {
  } else if (matType == MAT_SPARSE_ROW_AUTO_GROW) {
    CHECK(isGradSparseUpdate());
    mats_[pType] = std::make_shared<SparseAutoGrowRowCpuMatrix>(height, width);
-  } else {
+  }
+#endif
+  // NOLINTNEXTLINE
+  else {
    LOG(FATAL) << "Unsupported mat type" << matType;
  }
 }

--- a/paddle/testing/TestUtil.cpp
+++ b/paddle/testing/TestUtil.cpp
@@ -33,6 +33,7 @@ MatrixPtr makeRandomSparseMatrix(size_t height,
                                 bool withValue,
                                 bool useGpu,
                                 bool equalNnzPerSample) {
+#ifndef PADDLE_MOBILE_INFERENCE
  std::vector<int64_t> ids(height);
  std::vector<int64_t> indices(height + 1);
  indices[0] = 0;
@@ -84,6 +85,8 @@ MatrixPtr makeRandomSparseMatrix(size_t height,
    }
    return mat;
  }
+#endif
+  return nullptr;
 }
 void generateSequenceStartPositions(size_t batchSize,