Polish code

15550a27 · Yu Yang · 9e0b33d7 · 15550a27 · 15550a27 · 15550a27
3 changed file
--- a/cmake/external/python.cmake
+++ b/cmake/external/python.cmake
@@ -18,8 +18,8 @@ ENDIF()
 INCLUDE(python_module)
-FIND_PACKAGE(PythonInterp ${PY_VERSION})
+FIND_PACKAGE(PythonInterp ${PY_VERSION} REQUIRED)
-FIND_PACKAGE(PythonLibs ${PY_VERSION})
+FIND_PACKAGE(PythonLibs ${PY_VERSION} REQUIRED)
 if(WIN32)
    execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c"
@@ -79,6 +79,6 @@ IF(PYTHONINTERP_FOUND)
        "please use pip to upgrade protobuf. pip install -U protobuf")
    ENDIF()
 ENDIF(PYTHONINTERP_FOUND)
+message(STATUS ${PYTHON_INCLUDE_DIR})
 INCLUDE_DIRECTORIES(${PYTHON_INCLUDE_DIR})
 INCLUDE_DIRECTORIES(${PYTHON_NUMPY_INCLUDE_DIR})
--- a/paddle/fluid/operators/math/matrix_bit_code.cc
+++ b/paddle/fluid/operators/math/matrix_bit_code.cc
@@ -15,225 +15,379 @@ limitations under the License. */
 #include "paddle/fluid/operators/math/matrix_bit_code.h"
 #include <iostream>
 #include <map>
 namespace paddle {
 namespace operators {
 namespace math {
 template <typename T>
-void MatrixBitCodeFunctor<T>::Add(const framework::Tensor& vec,
+struct MatrixBitCodeFunctorAdd : public boost::static_visitor<void> {
-                                  framework::Tensor* tmat) {
+  const framework::Tensor &vec_;
-  size_t batch_size = tmat->dims()[0];
+  framework::Tensor *tmat_;
-  size_t width = tmat->dims()[1];
-  auto* tmat_data = tmat->data<T>();
+  MatrixBitCodeFunctorAdd(const framework::Tensor &vec, framework::Tensor *tmat)
-  auto* vec_data = vec.data<T>();
+      : vec_(vec), tmat_(tmat) {}
-  for (size_t i = 0; i < batch_size; ++i) {
-    auto code = code_table_->get_code(i);
+  template <typename CodeTable>
-    int code_length = code->get_length();
+  void operator()(const CodeTable &code_table) {
-    for (int j = 0; j < code_length; ++j) {
+    size_t batch_size = tmat_->dims()[0];
-      size_t index = code->calc_index(j);
+    size_t width = tmat_->dims()[1];
-      tmat_data[i * width + j] += vec_data[index];
+    auto *tmat_data = tmat_->data<T>();
+    auto *vec_data = vec_.data<T>();
+    for (size_t i = 0; i < batch_size; ++i) {
+      auto code = code_table.get_code(i);
+      int code_length = code.get_length();
+      for (int j = 0; j < code_length; ++j) {
+        size_t index = code.calc_index(j);
+        tmat_data[i * width + j] += vec_data[index];
+      }
    }
  }
+};
+template <typename T>
+void MatrixBitCodeFunctor<T>::Add(const framework::Tensor &vec,
+                                  framework::Tensor *tmat) {
+  MatrixBitCodeFunctorAdd<T> func(vec, tmat);
+  code_table_.apply_visitor(func);
 }
 template <typename T>
-void MatrixBitCodeFunctor<T>::AddGrad(const framework::Tensor& tmat,
+struct MatrixBitCodeFunctorAddGrad : public boost::static_visitor<void> {
-                                      framework::Tensor* vec) {
+  const framework::Tensor &tmat_;
-  size_t batch_size = tmat.dims()[0];
+  framework::Tensor *vec_;
-  size_t width = tmat.dims()[1];
+  MatrixBitCodeFunctorAddGrad(const framework::Tensor &tmat,
-  auto* vec_data = vec->data<T>();
+                              framework::Tensor *vec)
-  auto* tmat_data = tmat.data<T>();
+      : tmat_(tmat), vec_(vec) {}
-  for (size_t i = 0; i < batch_size; ++i) {
-    auto code = code_table_->get_code(i);
+  template <typename CodeTable>
-    int code_length = code->get_length();
+  void operator()(const CodeTable &table) {
-    for (int j = 0; j < code_length; ++j) {
+    size_t batch_size = tmat_.dims()[0];
-      size_t index = code->calc_index(j);
+    size_t width = tmat_.dims()[1];
-      vec_data[index] += tmat_data[i * width + j];
+    auto *vec_data = vec_->data<T>();
+    auto *tmat_data = tmat_.data<T>();
+    for (size_t i = 0; i < batch_size; ++i) {
+      auto code = table.get_code(i);
+      int code_length = code.get_length();
+      for (int j = 0; j < code_length; ++j) {
+        size_t index = code.calc_index(j);
+        vec_data[index] += tmat_data[i * width + j];
+      }
    }
  }
+};
+template <typename T>
+void MatrixBitCodeFunctor<T>::AddGrad(const framework::Tensor &tmat,
+                                      framework::Tensor *vec) {
+  MatrixBitCodeFunctorAddGrad<T> func(tmat, vec);
+  code_table_.apply_visitor(func);
 }
 template <typename T>
-void MatrixBitCodeFunctor<T>::AddGrad(const framework::Tensor& tmat,
+struct MatrixBitCodeFunctorSelectedRowsAddGrad
-                                      framework::SelectedRows* vec) {
+    : public boost::static_visitor<void> {
-  size_t batch_size = tmat.dims()[0];
+  const framework::Tensor &tmat_;
-  size_t width = tmat.dims()[1];
+  framework::SelectedRows *vec_;
-  auto* vec_data = vec->mutable_value()->data<T>();
-  auto* tmat_data = tmat.data<T>();
+  MatrixBitCodeFunctorSelectedRowsAddGrad(const framework::Tensor &tmat,
-  for (size_t i = 0; i < batch_size; ++i) {
+                                          framework::SelectedRows *vec)
-    auto code = code_table_->get_code(i);
+      : tmat_(tmat), vec_(vec) {}
-    int code_length = code->get_length();
-    for (int j = 0; j < code_length; ++j) {
+  template <typename CodeTable>
-      size_t index = code->calc_index(j);
+  void operator()(const CodeTable &code_table) {
-      int64_t row_index = vec->GetIndexFromId(static_cast<int64_t>(index));
+    size_t batch_size = tmat_.dims()[0];
-      vec_data[row_index] += tmat_data[i * width + j];
+    size_t width = tmat_.dims()[1];
+    auto *vec_data = vec_->mutable_value()->template data<T>();
+    auto *tmat_data = tmat_.data<T>();
+    for (size_t i = 0; i < batch_size; ++i) {
+      auto code = code_table.get_code(i);
+      int code_length = code.get_length();
+      for (int j = 0; j < code_length; ++j) {
+        size_t index = code.calc_index(j);
+        int64_t row_index = vec_->GetIndexFromId(static_cast<int64_t>(index));
+        vec_data[row_index] += tmat_data[i * width + j];
+      }
    }
  }
+};
+template <typename T>
+void MatrixBitCodeFunctor<T>::AddGrad(const framework::Tensor &tmat,
+                                      framework::SelectedRows *vec) {
+  MatrixBitCodeFunctorSelectedRowsAddGrad<T> func(tmat, vec);
+  code_table_.apply_visitor(func);
 }
 template <typename T>
-void MatrixBitCodeFunctor<T>::Sum(const framework::Tensor& tmat,
+struct MatrixBitCodeFunctorSum : public boost::static_visitor<void> {
-                                  framework::Tensor* sum, T scale_sum) {
+  const framework::Tensor &tmat_;
-  size_t num_samples = tmat.dims()[0];
+  framework::Tensor *sum_;
-  size_t o_width = tmat.dims()[1];
+  T scale_sum_;
-  auto* tmat_data = tmat.data<T>();
-  auto* sum_data = sum->data<T>();
+  MatrixBitCodeFunctorSum(const framework::Tensor &tmat, framework::Tensor *sum,
-  for (size_t i = 0; i < num_samples; ++i) {
+                          T scale_sum)
-    T sm = static_cast<T>(0.0);
+      : tmat_(tmat), sum_(sum), scale_sum_(scale_sum) {}
-    auto code = code_table_->get_code(i);
-    int code_length = code->get_length();
+  template <typename CodeTable>
-    for (int j = 0; j < code_length; ++j) {
+  void operator()(const CodeTable &code_table) {
-      if (code->calc_bit(j)) {
+    size_t num_samples = tmat_.dims()[0];
-        // calc_bit starts from right most bit, while data in tmat[i] is in the
+    size_t o_width = tmat_.dims()[1];
-        // reverse order.
+    auto *tmat_data = tmat_.data<T>();
-        sm += tmat_data[i * o_width + j];
+    auto *sum_data = sum_->data<T>();
+    for (size_t i = 0; i < num_samples; ++i) {
+      T sm = static_cast<T>(0.0);
+      auto code = code_table.get_code(i);
+      int code_length = code.get_length();
+      for (int j = 0; j < code_length; ++j) {
+        if (code.calc_bit(j)) {
+          // calc_bit starts from right most bit, while data in tmat[i] is in
+          // the
+          // reverse order.
+          sm += tmat_data[i * o_width + j];
+        }
      }
+      sum_data[i] = scale_sum_ * sm;
    }
-    sum_data[i] = scale_sum * sm;
  }
+};
+template <typename T>
+void MatrixBitCodeFunctor<T>::Sum(const framework::Tensor &tmat,
+                                  framework::Tensor *sum, T scale_sum) {
+  MatrixBitCodeFunctorSum<T> func(tmat, sum, scale_sum);
+  code_table_.apply_visitor(func);
 }
 template <typename T>
-void MatrixBitCodeFunctor<T>::Mul(framework::Tensor* tmat,
+struct MatrixBitCodeFunctorMul : public boost::static_visitor<void> {
-                                  const framework::Tensor& weight,
+  framework::Tensor *tmat_;
-                                  const framework::Tensor& input) {
+  const framework::Tensor &weight_;
-  auto blas =
+  const framework::Tensor &input_;
-      GetBlas<platform::CPUDeviceContext, T>(platform::CPUDeviceContext());
-  size_t num_samples = tmat->dims()[0];
+  MatrixBitCodeFunctorMul(framework::Tensor *tmat,
-  size_t tmat_width = tmat->dims()[1];
+                          const framework::Tensor &weight,
-  size_t input_width = input.dims()[1];
+                          const framework::Tensor &input)
-  size_t weight_width = weight.dims()[1];
+      : tmat_(tmat), weight_(weight), input_(input) {}
-  auto tmat_value = tmat->data<T>();
-  auto weight_value = weight.data<T>();
+  template <typename CodeTable>
-  auto input_value = input.data<T>();
+  void operator()(const CodeTable &code_table) {
-  for (size_t i = 0; i < num_samples; ++i) {
+    auto blas =
-    auto code = code_table_->get_code(i);
+        GetBlas<platform::CPUDeviceContext, T>(platform::CPUDeviceContext());
-    int code_length = code->get_length();
+    size_t num_samples = tmat_->dims()[0];
-    const T* input_row = input_value + input_width * i;
+    size_t tmat_width = tmat_->dims()[1];
-    for (int j = 0; j < code_length; ++j) {
+    size_t input_width = input_.dims()[1];
-      size_t index = code->calc_index(j);
+    size_t weight_width = weight_.dims()[1];
-      const T* weight_row = weight_value + weight_width * index;
+    auto tmat_value = tmat_->data<T>();
-      T sum = static_cast<T>(0.0);
+    auto weight_value = weight_.data<T>();
-      sum = blas.DOT(input_width, weight_row, input_row);
+    auto input_value = input_.data<T>();
-      tmat_value[i * tmat_width + j] += sum;
+    for (size_t i = 0; i < num_samples; ++i) {
+      auto code = code_table.get_code(i);
+      int code_length = code.get_length();
+      const T *input_row = input_value + input_width * i;
+      for (int j = 0; j < code_length; ++j) {
+        size_t index = code.calc_index(j);
+        const T *weight_row = weight_value + weight_width * index;
+        T sum = blas.DOT(input_width, weight_row, input_row);
+        tmat_value[i * tmat_width + j] += sum;
+      }
    }
  }
+};
+template <typename T>
+void MatrixBitCodeFunctor<T>::Mul(framework::Tensor *tmat,
+                                  const framework::Tensor &weight,
+                                  const framework::Tensor &input) {
+  MatrixBitCodeFunctorMul<T> func(tmat, weight, input);
+  code_table_.apply_visitor(func);
 }
+template <typename T, size_t N>
+class ReservedVector : public std::vector<T> {
+ public:
+  ReservedVector() { this->reserve(N); }
+};
 template <typename T>
-void MatrixBitCodeFunctor<T>::MulGradWeight(const framework::Tensor& tmat,
+struct MatrixBitCodeFunctorMulGradWeight : public boost::static_visitor<void> {
-                                            framework::Tensor* weight,
+  const framework::Tensor &tmat_;
-                                            const framework::Tensor& input) {
+  framework::Tensor *weight_;
-  auto blas =
+  const framework::Tensor &input_;
-      GetBlas<platform::CPUDeviceContext, T>(platform::CPUDeviceContext());
+  MatrixBitCodeFunctorMulGradWeight(const framework::Tensor &tmat,
-  size_t num_samples = tmat.dims()[0];
+                                    framework::Tensor *weight,
-  size_t input_width = input.dims()[1];
+                                    const framework::Tensor &input)
-  size_t tmat_width = tmat.dims()[1];
+      : tmat_(tmat), weight_(weight), input_(input) {}
-  size_t weight_width = weight->dims()[1];
+  template <typename CodeTable>
-  auto tmat_value = tmat.data<T>();
+  void operator()(const CodeTable &code_table) {
-  auto weight_value = weight->data<T>();
+    auto blas =
-  auto input_value = input.data<T>();
+        GetBlas<platform::CPUDeviceContext, T>(platform::CPUDeviceContext());
+    size_t num_samples = tmat_.dims()[0];
-  std::map<int, std::vector<std::pair<T, const T*>>> ops;
+    size_t input_width = input_.dims()[1];
-  for (size_t i = 0; i < num_samples; ++i) {
+    size_t tmat_width = tmat_.dims()[1];
-    auto code = code_table_->get_code(i);
+    size_t weight_width = weight_->dims()[1];
-    int code_length = code->get_length();
+    auto tmat_value = tmat_.data<T>();
-    const T* input_value_row = input_value + input_width * i;
+    auto weight_value = weight_->data<T>();
-    const T* tmat_row = tmat_value + i * tmat_width;
+    auto input_value = input_.data<T>();
-    for (int j = 0; j < code_length; ++j) {
-      ops[code->calc_index(j)].emplace_back(tmat_row[j], input_value_row);
+    std::map<int, ReservedVector<std::pair<T, const T *>, 8u>> ops;
+    for (size_t i = 0; i < num_samples; ++i) {
+      auto code = code_table.get_code(i);
+      int code_length = code.get_length();
+      const T *input_value_row = input_value + input_width * i;
+      const T *tmat_row = tmat_value + i * tmat_width;
+      for (int j = 0; j < code_length; ++j) {
+        ops[code.calc_index(j)].emplace_back(tmat_row[j], input_value_row);
+      }
    }
-  }
+    for (auto &op : ops) {
-  for (auto& op : ops) {
+      auto &op_in_row = op.second;
-    auto& op_in_row = op.second;
+      for (auto &pair : op_in_row) {
-    for (auto& pair : op_in_row) {
+        auto &scale = pair.first;
-      auto& scale = pair.first;
+        auto *input_row = pair.second;
-      auto* input_row = pair.second;
+        T *weight_row = weight_value + op.first * weight_width;
-      T* weight_row = weight_value + op.first * weight_width;
+        blas.AXPY(input_width, scale, input_row, weight_row);
-      blas.AXPY(input_width, scale, input_row, weight_row);
+      }
    }
  }
+};
+template <typename T>
+void MatrixBitCodeFunctor<T>::MulGradWeight(const framework::Tensor &tmat,
+                                            framework::Tensor *weight,
+                                            const framework::Tensor &input) {
+  MatrixBitCodeFunctorMulGradWeight<T> func(tmat, weight, input);
+  code_table_.apply_visitor(func);
 }
 template <typename T>
-void MatrixBitCodeFunctor<T>::MulGradWeight(const framework::Tensor& tmat,
+struct MatrixBitCodeFunctorMulGradWeightSR
-                                            framework::SelectedRows* weight,
+    : public boost::static_visitor<void> {
-                                            const framework::Tensor& input) {
+  const framework::Tensor &tmat_;
-  auto blas =
+  framework::SelectedRows *weight_;
-      GetBlas<platform::CPUDeviceContext, T>(platform::CPUDeviceContext());
+  const framework::Tensor &input_;
-  size_t num_samples = tmat.dims()[0];
-  size_t input_width = input.dims()[1];
+  MatrixBitCodeFunctorMulGradWeightSR(const framework::Tensor &tmat,
-  size_t tmat_width = tmat.dims()[1];
+                                      framework::SelectedRows *weight,
-  size_t weight_width = weight->value().dims()[1];
+                                      const framework::Tensor &input)
-  auto tmat_value = tmat.data<T>();
+      : tmat_(tmat), weight_(weight), input_(input) {}
-  auto weight_value = weight->mutable_value()->data<T>();
-  auto input_value = input.data<T>();
+  template <typename CodeTable>
+  void operator()(const CodeTable &code_table) {
-  std::unordered_map<int, std::vector<std::pair<T, const T*>>> ops;
+    auto blas =
-  ops.reserve(weight->rows().size());
+        GetBlas<platform::CPUDeviceContext, T>(platform::CPUDeviceContext());
+    size_t num_samples = tmat_.dims()[0];
-  for (size_t i = 0; i < num_samples; ++i) {
+    size_t input_width = input_.dims()[1];
-    auto code = code_table_->get_code(i);
+    size_t tmat_width = tmat_.dims()[1];
-    int code_length = code->get_length();
+    size_t weight_width = weight_->value().dims()[1];
-    const T* input_value_row = input_value + input_width * i;
+    auto tmat_value = tmat_.data<T>();
-    const T* tmat_row = tmat_value + i * tmat_width;
+    auto weight_value = weight_->mutable_value()->data<T>();
-    for (int j = 0; j < code_length; ++j) {
+    auto input_value = input_.data<T>();
-      ops[code->calc_index(j)].emplace_back(tmat_row[j], input_value_row);
+    std::unordered_map<int, std::vector<std::pair<T, const T *>>> ops;
+    ops.reserve(weight_->rows().size());
+    for (size_t i = 0; i < num_samples; ++i) {
+      auto code = code_table.get_code(i);
+      int code_length = code.get_length();
+      const T *input_value_row = input_value + input_width * i;
+      const T *tmat_row = tmat_value + i * tmat_width;
+      for (int j = 0; j < code_length; ++j) {
+        ops[code.calc_index(j)].emplace_back(tmat_row[j], input_value_row);
+      }
    }
-  }
-  for (auto& row : weight->rows()) {
+    for (auto &row : weight_->rows()) {
-    auto& op_in_row = ops[row];
+      auto &op_in_row = ops[row];
-    for (auto& pair : op_in_row) {
+      for (auto &pair : op_in_row) {
-      auto& scale = pair.first;
+        auto &scale = pair.first;
-      auto* input_row = pair.second;
+        auto *input_row = pair.second;
-      blas.AXPY(input_width, scale, input_row, weight_value);
+        blas.AXPY(input_width, scale, input_row, weight_value);
+      }
+      weight_value += weight_width;
    }
-    weight_value += weight_width;
  }
+};
+template <typename T>
+void MatrixBitCodeFunctor<T>::MulGradWeight(const framework::Tensor &tmat,
+                                            framework::SelectedRows *weight,
+                                            const framework::Tensor &input) {
+  MatrixBitCodeFunctorMulGradWeightSR<T> func(tmat, weight, input);
+  code_table_.apply_visitor(func);
 }
 template <typename T>
-void MatrixBitCodeFunctor<T>::MulGradError(const framework::Tensor& tmat,
+struct MatrixBitCodeFunctorMulGradError : public boost::static_visitor<void> {
-                                           const framework::Tensor& weight,
+  const framework::Tensor &tmat_;
-                                           framework::Tensor* input) {
+  const framework::Tensor &weight_;
-  size_t num_samples = tmat.dims()[0];
+  framework::Tensor *input_;
-  size_t tmat_width = tmat.dims()[1];
-  size_t input_width = input->dims()[1];
+  MatrixBitCodeFunctorMulGradError(const framework::Tensor &tmat,
-  size_t weight_width = weight.dims()[1];
+                                   const framework::Tensor &weight,
-  auto tmat_value = tmat.data<T>();
+                                   framework::Tensor *input)
-  auto weight_value = weight.data<T>();
+      : tmat_(tmat), weight_(weight), input_(input) {}
-  auto input_value = input->data<T>();
+  template <typename CodeTable>
+  void operator()(const CodeTable &code_table) {
-  for (size_t i = 0; i < num_samples; ++i) {
+    size_t num_samples = tmat_.dims()[0];
-    auto code = code_table_->get_code(i);
+    size_t tmat_width = tmat_.dims()[1];
-    int code_length = code->get_length();
+    size_t input_width = input_->dims()[1];
-    for (int j = 0; j < code_length; ++j) {
+    size_t weight_width = weight_.dims()[1];
-      size_t index = code->calc_index(j);
+    auto tmat_value = tmat_.data<T>();
+    auto weight_value = weight_.data<T>();
-      for (size_t k = 0; k < input_width; ++k) {
+    auto input_value = input_->data<T>();
-        input_value[input_width * i + k] +=
-            tmat_value[i * tmat_width + j] *
+    for (size_t i = 0; i < num_samples; ++i) {
-            weight_value[weight_width * index + k];
+      auto code = code_table.get_code(i);
+      int code_length = code.get_length();
+      for (int j = 0; j < code_length; ++j) {
+        size_t index = code.calc_index(j);
+        for (size_t k = 0; k < input_width; ++k) {
+          input_value[input_width * i + k] +=
+              tmat_value[i * tmat_width + j] *
+              weight_value[weight_width * index + k];
+        }
      }
    }
  }
+};
+template <typename T>
+void MatrixBitCodeFunctor<T>::MulGradError(const framework::Tensor &tmat,
+                                           const framework::Tensor &weight,
+                                           framework::Tensor *input) {
+  MatrixBitCodeFunctorMulGradError<T> func(tmat, weight, input);
+  code_table_.apply_visitor(func);
 }
 template <typename T>
-void MatrixBitCodeFunctor<T>::Sub(framework::Tensor* tmat) {
+struct MatrixBitCodeFunctorSub : public boost::static_visitor<void> {
-  size_t num_samples = tmat->dims()[0];
+  framework::Tensor *tmat_;
-  size_t o_width = tmat->dims()[1];
-  auto* tmat_data = tmat->data<T>();
+  explicit MatrixBitCodeFunctorSub(framework::Tensor *tmat) : tmat_(tmat) {}
-  for (size_t i = 0; i < num_samples; ++i) {
-    auto code = code_table_->get_code(i);
+  template <typename CodeTable>
-    int code_length = code->get_length();
+  void operator()(const CodeTable &code_table) {
-    for (int j = 0; j < code_length; ++j) {
+    size_t num_samples = tmat_->dims()[0];
-      if (code->calc_bit(j)) {
+    size_t o_width = tmat_->dims()[1];
-        tmat_data[i * o_width + j] -= 1;
+    auto *tmat_data = tmat_->data<T>();
+    for (size_t i = 0; i < num_samples; ++i) {
+      auto code = code_table.get_code(i);
+      int code_length = code.get_length();
+      for (int j = 0; j < code_length; ++j) {
+        if (code.calc_bit(j)) {
+          tmat_data[i * o_width + j] -= 1;
+        }
      }
    }
  }
+};
+template <typename T>
+void MatrixBitCodeFunctor<T>::Sub(framework::Tensor *tmat) {
+  MatrixBitCodeFunctorSub<T> func(tmat);
+  code_table_.apply_visitor(func);
 }
 template class MatrixBitCodeFunctor<float>;

--- a/paddle/fluid/operators/math/matrix_bit_code.h
+++ b/paddle/fluid/operators/math/matrix_bit_code.h
@@ -23,6 +23,7 @@ limitations under the License. */
 #include "paddle/fluid/framework/tensor.h"
 #include "paddle/fluid/operators/math/blas.h"
 #include "paddle/fluid/platform/device_context.h"
+#include "paddle/fluid/platform/variant.h"
 #if defined(_WIN32)
 #include <intrin.h>
@@ -99,24 +100,7 @@ inline int clz(const T& value) {
 inline size_t FindLastSet(size_t x) { return sizeof(size_t) * 8 - clz(x); }
 #endif  // !_WIN32
-// set a code interface to create multiple code
+class SimpleCode {
-class Code {
- public:
-  virtual ~Code() {}
-  virtual size_t calc_index(int bit) const = 0;
-  virtual bool calc_bit(int bit) const = 0;
-  virtual int get_length() const = 0;
-};
-// set a CodeTable interface to create multiple code table
-class CodeTable {
- public:
-  virtual Code* get_code(int64_t code) const = 0;
-  virtual size_t size() const = 0;
-  virtual int get_max_code_length() const = 0;
-  virtual ~CodeTable() {}
-};
-class SimpleCode : public Code {
 public:
  SimpleCode(size_t code, size_t num_classes, const int64_t* ids)
      : c_(static_cast<size_t>(ids[code]) + num_classes) {}
@@ -138,7 +122,7 @@ class SimpleCode : public Code {
 };
 template <typename T>
-class CustomCode : public Code {
+class CustomCode {
 public:
  CustomCode(const framework::Tensor& ptable, const framework::Tensor& pcode,
             const int64_t* ids, int index) {
@@ -155,11 +139,11 @@ class CustomCode : public Code {
   * Binary classification path is the suffixes of encoding, thus leave out the
   * left most bit in calc_bit.
   */
-  size_t calc_index(int bit) const override { return ptable_data_[bit]; }
+  size_t calc_index(int bit) const { return ptable_data_[bit]; }
-  bool calc_bit(int bit) const override { return pcode_data_[bit]; }
+  bool calc_bit(int bit) const { return pcode_data_[bit]; }
  // NOTE: this function is not thread-safe.
-  int get_length() const override {
+  int get_length() const {
    if (length_ < 0) {
      auto len = seq_len_;
      length_ =
@@ -177,46 +161,32 @@ class CustomCode : public Code {
  mutable int length_{-1};
 };
-class SimpleCodeTable : public CodeTable {
+class SimpleCodeTable {
 public:
  SimpleCodeTable(size_t num_classes, const int64_t* ids)
      : num_classes_(num_classes), ids_(ids) {}
-  Code* get_code(int64_t code) const {
+  SimpleCode get_code(int64_t code) const {
-    auto it = codes_.find(code);
+    return SimpleCode(code, num_classes_, ids_);
-    if (it != codes_.end()) {
-      return it->second.get();
-    }
-    auto* result = new SimpleCode(code, num_classes_, ids_);
-    codes_.emplace(code, std::unique_ptr<Code>(result));
-    return result;
  }
  size_t size() const { return num_classes_; }
  int get_max_code_length() const { return FindLastSet(num_classes_ - 1); }
 private:
-  mutable std::map<int64_t, std::unique_ptr<Code>> codes_;
  size_t num_classes_;
  const int64_t* ids_;
 };
 template <typename T>
-class CustomCodeTable : public CodeTable {
+class CustomCodeTable {
 public:
  CustomCodeTable(const framework::Tensor& ptable,
                  const framework::Tensor& pcode, const int64_t* ids)
      : ptable_(ptable), pcode_(pcode), ids_(ids) {}
-  Code* get_code(int64_t code) const {
+  CustomCode<T> get_code(int64_t code) const {
-    auto it = codes_.find(code);
+    return CustomCode<T>(ptable_, pcode_, ids_, code);
-    if (it != codes_.end()) {
-      return it->second.get();
-    }
-    auto* result = new CustomCode<T>(ptable_, pcode_, ids_, code);
-    codes_.emplace(code, std::unique_ptr<Code>(result));
-    return result;
  }
  size_t size() const { return static_cast<size_t>(ptable_.dims()[1]); }
@@ -225,25 +195,26 @@ class CustomCodeTable : public CodeTable {
  }
 private:
-  mutable std::unordered_map<int64_t, std::unique_ptr<Code>> codes_;
  const framework::Tensor& ptable_;
  const framework::Tensor& pcode_;
  const int64_t* ids_;
 };
+using CodeTable = boost::variant<SimpleCodeTable, CustomCodeTable<int64_t>>;
 template <typename T>
 class MatrixBitCodeFunctor {
 public:
  MatrixBitCodeFunctor(size_t num_classes, const int64_t* ids)
      : num_classes_(num_classes),
        ids_(ids),
-        code_table_(new SimpleCodeTable(num_classes, ids)) {}
+        code_table_(SimpleCodeTable(num_classes, ids)) {}
  MatrixBitCodeFunctor(const framework::Tensor& ptable,
                       const framework::Tensor& pcode, const int64_t* ids)
      : num_classes_(static_cast<size_t>(ptable.dims()[1])),
        ids_(ids),
-        code_table_(new CustomCodeTable<int64_t>(ptable, pcode, ids)) {}
+        code_table_(CustomCodeTable<int64_t>(ptable, pcode, ids)) {}
  /* For j < code_length
       tmat(i, j) += vec(0, index(i, j))
  */
@@ -293,7 +264,7 @@ class MatrixBitCodeFunctor {
  size_t num_classes_;
  const int64_t* ids_;
-  std::unique_ptr<CodeTable> code_table_;
+  CodeTable code_table_;
 };
 }  // namespace math
 }  // namespace operators