matrix_bit_code.h 8.1 KB
Newer Older
Y
Yancey1989 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once
Y
Yu Yang 已提交
16
#include <map>
J
JiabinYang 已提交
17 18 19
#include <unordered_map>
#include <utility>
#include <vector>
W
weixing02 已提交
20
#include "paddle/fluid/framework/eigen.h"
J
JiabinYang 已提交
21 22
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/selected_rows.h"
W
weixing02 已提交
23
#include "paddle/fluid/framework/tensor.h"
J
JiabinYang 已提交
24
#include "paddle/fluid/operators/math/blas.h"
W
weixing02 已提交
25
#include "paddle/fluid/platform/device_context.h"
Y
Yu Yang 已提交
26
#include "paddle/fluid/platform/variant.h"
Y
Yancey1989 已提交
27

D
dzhwinter 已提交
28 29 30 31 32
#if defined(_WIN32)
#include <intrin.h>
#include <windows.h>
#endif  // _WIN32

Y
Yancey1989 已提交
33 34 35
namespace paddle {
namespace operators {
namespace math {
W
weixing02 已提交
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61
/**
 * SimpleCodeTable class should support 3 functions:
 *
 * size_t size()
 *   return the number of ids
 *
 * int get_max_code_length()
 *   return the maximal code length
 *
 * SimpleCode operator()(size_t i)
 *   return the i-th code. Code class is descriebed below.
 *
 * SimpleCode class should support 3 functions:
 *
 * int get_length()
 *   return the length of the code
 *
 * size_t cal_index(int bit)
 *   bit ranges from 0 to get_length() - 1
 *   return the index for the (1+bit) level parent
 *
 * bool calc_bit(int bit)
 *   return true if the bit level parent is the right child of (1+bit) level
 *   parent
 *
 */
Y
Yancey1989 已提交
62 63 64 65 66 67

/**
 * return the 1-based index of the highest bit set
 *
 * for x > 0:
 * \f[
W
weixing02 已提交
68
 *    FindLastSet(x) = 1 + \floor*{\log_{2}x}
Y
Yancey1989 已提交
69 70
 * \f]
 */
D
dzhwinter 已提交
71
#if !defined(_WIN32)
Y
Yancey1989 已提交
72 73 74 75 76 77
inline constexpr size_t FindLastSet(size_t x) {
  return std::is_same<size_t, unsigned int>::value
             ? (x ? 8 * sizeof(x) - __builtin_clz(x) : 0)
             : (std::is_same<size_t, unsigned long>::value  // NOLINT
                    ? (x ? 8 * sizeof(x) - __builtin_clzl(x) : 0)
                    : (x ? 8 * sizeof(x) - __builtin_clzll(x) : 0));
W
wopeizl 已提交
78
}
D
dzhwinter 已提交
79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102
#else
// windows don't have built-in clz, ctz function
template <typename T>
inline int ctz(const T& value) {
  DWORD trailing_zero = 0;
  if (_BitScanForward(&trailing_zero, value)) {
    return static_cast<int>(trailing_zero);
  } else {
    return static_cast<int>(0);
  }
}

template <typename T>
inline int clz(const T& value) {
  DWORD leadning_zero = 0;
  if (_BitScanReverse(&leadning_zero, value)) {
    return static_cast<int>(sizeof(T) * 8 - leadning_zero);
  } else {
    return static_cast<int>(0);
  }
}

inline size_t FindLastSet(size_t x) { return sizeof(size_t) * 8 - clz(x); }
#endif  // !_WIN32
Y
Yu Yang 已提交
103
class SimpleCode {
104 105 106
 public:
  SimpleCode(size_t code, size_t num_classes, const int64_t* ids)
      : c_(static_cast<size_t>(ids[code]) + num_classes) {}
G
guosheng 已提交
107
  /**
108 109 110 111 112 113 114
   * Here the id of root shoud be 1 rather than 0, thus the encoding of class c
   * is `c + num_classes` and all siblings can get the same weight indice using
   * prefixes.
   * Weight index is the prefixes of encoding, thus leave out the right most
   * bit in calc_index.
   * Binary classification path is the suffixes of encoding, thus leave out the
   * left most bit in calc_bit.
G
guosheng 已提交
115
   */
116 117 118
  size_t calc_index(int bit) const { return (c_ >> (bit + 1)) - 1; }
  bool calc_bit(int bit) const { return c_ & (1 << bit); }
  int get_length() const { return FindLastSet(c_) - 1; }
Y
Yancey1989 已提交
119 120

 private:
121
  size_t c_;
Y
Yancey1989 已提交
122 123
};

J
JiabinYang 已提交
124
template <typename T>
Y
Yu Yang 已提交
125
class CustomCode {
126
 public:
127 128 129 130 131 132
  CustomCode(const framework::Tensor& path_table,
             const framework::Tensor& path_code, const int64_t* ids,
             int index) {
    seq_len_ = path_table.dims()[1];
    path_table_data_ = path_table.data<T>() + seq_len_ * index;
    path_code_data_ = path_code.data<T>() + seq_len_ * index;
J
JiabinYang 已提交
133
  }
134
  /**
Y
Yu Yang 已提交
135
   * Here the id of root should be 1 rather than 0, thus the encoding of class c
136 137 138 139 140 141 142
   * is `c + num_classes` and all siblings can get the same weight indice using
   * prefixes.
   * Weight index is the prefixes of encoding, thus leave out the right most
   * bit in calc_index.
   * Binary classification path is the suffixes of encoding, thus leave out the
   * left most bit in calc_bit.
   */
143 144
  size_t calc_index(int bit) const { return path_table_data_[bit]; }
  bool calc_bit(int bit) const { return path_code_data_[bit]; }
145

Y
Yu Yang 已提交
146
  // NOTE: this function is not thread-safe.
Y
Yu Yang 已提交
147
  int get_length() const {
Y
Yu Yang 已提交
148 149
    if (length_ < 0) {
      auto len = seq_len_;
150 151 152 153
      length_ = static_cast<int>(
          std::find_if(path_table_data_, path_table_data_ + len,
                       [](const T& val) { return val < 0; }) -
          path_table_data_);
154
    }
Y
Yu Yang 已提交
155
    return length_;
156 157 158
  }

 private:
Y
Yu Yang 已提交
159
  int64_t seq_len_;
160 161
  const T* path_table_data_;
  const T* path_code_data_;
Y
Yu Yang 已提交
162
  mutable int length_{-1};
163 164
};

Y
Yu Yang 已提交
165
class SimpleCodeTable {
166
 public:
J
JiabinYang 已提交
167
  SimpleCodeTable(size_t num_classes, const int64_t* ids)
168
      : num_classes_(num_classes), ids_(ids) {}
Y
Yu Yang 已提交
169

Y
Yu Yang 已提交
170 171
  SimpleCode get_code(int64_t code) const {
    return SimpleCode(code, num_classes_, ids_);
Y
Yancey1989 已提交
172
  }
Y
Yu Yang 已提交
173

Y
Yancey1989 已提交
174 175 176 177 178
  size_t size() const { return num_classes_; }
  int get_max_code_length() const { return FindLastSet(num_classes_ - 1); }

 private:
  size_t num_classes_;
179 180 181
  const int64_t* ids_;
};

J
JiabinYang 已提交
182
template <typename T>
Y
Yu Yang 已提交
183
class CustomCodeTable {
184
 public:
185 186 187
  CustomCodeTable(const framework::Tensor& path_table,
                  const framework::Tensor& path_code, const int64_t* ids)
      : ptable_(path_table), pcode_(path_code), ids_(ids) {}
188

Y
Yu Yang 已提交
189 190
  CustomCode<T> get_code(int64_t code) const {
    return CustomCode<T>(ptable_, pcode_, ids_, code);
191 192
  }

J
JiabinYang 已提交
193
  size_t size() const { return static_cast<size_t>(ptable_.dims()[1]); }
194
  int get_max_code_length() const {
J
JiabinYang 已提交
195
    return static_cast<size_t>(ptable_.dims()[1]);
196 197 198
  }

 private:
J
JiabinYang 已提交
199 200
  const framework::Tensor& ptable_;
  const framework::Tensor& pcode_;
201
  const int64_t* ids_;
Y
Yancey1989 已提交
202 203
};

Y
Yu Yang 已提交
204 205
using CodeTable = boost::variant<SimpleCodeTable, CustomCodeTable<int64_t>>;

Y
Yancey1989 已提交
206
template <typename T>
Y
Yancey1989 已提交
207 208
class MatrixBitCodeFunctor {
 public:
J
JiabinYang 已提交
209
  MatrixBitCodeFunctor(size_t num_classes, const int64_t* ids)
210 211
      : num_classes_(num_classes),
        ids_(ids),
Y
Yu Yang 已提交
212
        code_table_(SimpleCodeTable(num_classes, ids)) {}
213

214 215 216
  MatrixBitCodeFunctor(const framework::Tensor& path_table,
                       const framework::Tensor& path_code, const int64_t* ids)
      : num_classes_(static_cast<size_t>(path_table.dims()[1])),
217
        ids_(ids),
218
        code_table_(CustomCodeTable<int64_t>(path_table, path_code, ids)) {}
Y
Yancey1989 已提交
219 220 221
  /* For j < code_length
       tmat(i, j) += vec(0, index(i, j))
  */
J
JiabinYang 已提交
222
  void Add(const framework::Tensor& vec, framework::Tensor* tmat);
Y
Yancey1989 已提交
223

Y
Yancey1989 已提交
224 225 226
  /* For j < code_length
       vec(0, index(i, j)) += tmat(i, j)
  */
J
JiabinYang 已提交
227
  void AddGrad(const framework::Tensor& tmat, framework::Tensor* vec);
Y
Yancey1989 已提交
228 229

  /* For j < code_length
Y
Yancey1989 已提交
230
    sum(i, 0) = \sum_j bit(i, j) * tmat(i, j)
Y
Yancey1989 已提交
231
  */
J
JiabinYang 已提交
232
  void Sum(const framework::Tensor& tmat, framework::Tensor* sum, T scale_sum);
Y
Yancey1989 已提交
233

Y
Yancey1989 已提交
234 235 236
  /* For j < code_length
       tmat(i, j) -= bit(i, j)
  */
J
JiabinYang 已提交
237
  void Sub(framework::Tensor* tmat);
Y
Yancey1989 已提交
238 239 240
  /* For j < code_length
       input.row(i) += tmat(i, j) * weight.row(index(i, j))
  */
J
JiabinYang 已提交
241 242
  void Mul(framework::Tensor* tmat, const framework::Tensor& weight,
           const framework::Tensor& input);
Y
Yancey1989 已提交
243

Y
Yancey1989 已提交
244 245 246
  /* For index(i, j) >= 0:
      weight.row(index(i, j)) += tmat(i, j) * input.row(i)
  */
J
JiabinYang 已提交
247 248
  void MulGradWeight(const framework::Tensor& tmat, framework::Tensor* weight,
                     const framework::Tensor& input);
J
JiabinYang 已提交
249 250 251
  /* For SelectedRows Weight, For index(i, j) >= 0:
      weight.row(index(i, j)) += tmat(i, j) * input.row(i)
  */
J
JiabinYang 已提交
252
  void MulGradWeight(const framework::Tensor& tmat,
J
JiabinYang 已提交
253
                     framework::SelectedRows* weight,
J
JiabinYang 已提交
254
                     const framework::Tensor& input);
Y
Yancey1989 已提交
255 256 257
  /* For j < code_length
    input.row(i) += tmat(i, j) * weight.row(index(i, j))
  */
J
JiabinYang 已提交
258 259
  void MulGradError(const framework::Tensor& tmat,
                    const framework::Tensor& weight, framework::Tensor* input);
W
weixing02 已提交
260

Y
Yancey1989 已提交
261 262
  size_t num_classes_;
  const int64_t* ids_;
Y
Yu Yang 已提交
263
  CodeTable code_table_;
Y
Yancey1989 已提交
264
};
Y
Yancey1989 已提交
265 266 267
}  // namespace math
}  // namespace operators
}  // namespace paddle