matrix_bit_code.h 8.2 KB
Newer Older
Y
Yancey1989 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once
Y
Yu Yang 已提交
16
#include <map>
J
JiabinYang 已提交
17 18 19
#include <unordered_map>
#include <utility>
#include <vector>
W
weixing02 已提交
20
#include "paddle/fluid/framework/eigen.h"
J
JiabinYang 已提交
21 22
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/selected_rows.h"
W
weixing02 已提交
23
#include "paddle/fluid/framework/tensor.h"
J
JiabinYang 已提交
24
#include "paddle/fluid/operators/math/blas.h"
W
weixing02 已提交
25
#include "paddle/fluid/platform/device_context.h"
Y
Yu Yang 已提交
26
#include "paddle/fluid/platform/variant.h"
Y
Yancey1989 已提交
27

D
dzhwinter 已提交
28 29 30 31 32
#if defined(_WIN32)
#include <intrin.h>
#include <windows.h>
#endif  // _WIN32

Y
Yancey1989 已提交
33 34 35
namespace paddle {
namespace operators {
namespace math {
W
weixing02 已提交
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61
/**
 * SimpleCodeTable class should support 3 functions:
 *
 * size_t size()
 *   return the number of ids
 *
 * int get_max_code_length()
 *   return the maximal code length
 *
 * SimpleCode operator()(size_t i)
 *   return the i-th code. Code class is descriebed below.
 *
 * SimpleCode class should support 3 functions:
 *
 * int get_length()
 *   return the length of the code
 *
 * size_t cal_index(int bit)
 *   bit ranges from 0 to get_length() - 1
 *   return the index for the (1+bit) level parent
 *
 * bool calc_bit(int bit)
 *   return true if the bit level parent is the right child of (1+bit) level
 *   parent
 *
 */
Y
Yancey1989 已提交
62 63 64 65 66 67

/**
 * return the 1-based index of the highest bit set
 *
 * for x > 0:
 * \f[
W
weixing02 已提交
68
 *    FindLastSet(x) = 1 + \floor*{\log_{2}x}
Y
Yancey1989 已提交
69 70
 * \f]
 */
D
dzhwinter 已提交
71
#if !defined(_WIN32)
Y
Yancey1989 已提交
72 73 74 75 76 77
inline constexpr size_t FindLastSet(size_t x) {
  return std::is_same<size_t, unsigned int>::value
             ? (x ? 8 * sizeof(x) - __builtin_clz(x) : 0)
             : (std::is_same<size_t, unsigned long>::value  // NOLINT
                    ? (x ? 8 * sizeof(x) - __builtin_clzl(x) : 0)
                    : (x ? 8 * sizeof(x) - __builtin_clzll(x) : 0));
W
wopeizl 已提交
78
}
D
dzhwinter 已提交
79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102
#else
// windows don't have built-in clz, ctz function
template <typename T>
inline int ctz(const T& value) {
  DWORD trailing_zero = 0;
  if (_BitScanForward(&trailing_zero, value)) {
    return static_cast<int>(trailing_zero);
  } else {
    return static_cast<int>(0);
  }
}

template <typename T>
inline int clz(const T& value) {
  DWORD leadning_zero = 0;
  if (_BitScanReverse(&leadning_zero, value)) {
    return static_cast<int>(sizeof(T) * 8 - leadning_zero);
  } else {
    return static_cast<int>(0);
  }
}

inline size_t FindLastSet(size_t x) { return sizeof(size_t) * 8 - clz(x); }
#endif  // !_WIN32
Y
Yu Yang 已提交
103
class SimpleCode {
104 105 106
 public:
  SimpleCode(size_t code, size_t num_classes, const int64_t* ids)
      : c_(static_cast<size_t>(ids[code]) + num_classes) {}
G
guosheng 已提交
107
  /**
108 109 110 111 112 113 114
   * Here the id of root shoud be 1 rather than 0, thus the encoding of class c
   * is `c + num_classes` and all siblings can get the same weight indice using
   * prefixes.
   * Weight index is the prefixes of encoding, thus leave out the right most
   * bit in calc_index.
   * Binary classification path is the suffixes of encoding, thus leave out the
   * left most bit in calc_bit.
G
guosheng 已提交
115
   */
116 117 118
  size_t calc_index(int bit) const { return (c_ >> (bit + 1)) - 1; }
  bool calc_bit(int bit) const { return c_ & (1 << bit); }
  int get_length() const { return FindLastSet(c_) - 1; }
Y
Yancey1989 已提交
119 120

 private:
121
  size_t c_;
Y
Yancey1989 已提交
122 123
};

J
JiabinYang 已提交
124
template <typename T>
Y
Yu Yang 已提交
125
class CustomCode {
126
 public:
J
JiabinYang 已提交
127
  CustomCode(const framework::Tensor& ptable, const framework::Tensor& pcode,
Y
Yu Yang 已提交
128 129 130 131
             const int64_t* ids, int index) {
    seq_len_ = ptable.dims()[1];
    ptable_data_ = ptable.data<T>() + seq_len_ * index;
    pcode_data_ = pcode.data<T>() + seq_len_ * index;
J
JiabinYang 已提交
132
  }
133
  /**
Y
Yu Yang 已提交
134
   * Here the id of root should be 1 rather than 0, thus the encoding of class c
135 136 137 138 139 140 141
   * is `c + num_classes` and all siblings can get the same weight indice using
   * prefixes.
   * Weight index is the prefixes of encoding, thus leave out the right most
   * bit in calc_index.
   * Binary classification path is the suffixes of encoding, thus leave out the
   * left most bit in calc_bit.
   */
Y
Yu Yang 已提交
142 143
  size_t calc_index(int bit) const { return ptable_data_[bit]; }
  bool calc_bit(int bit) const { return pcode_data_[bit]; }
144

Y
Yu Yang 已提交
145
  // NOTE: this function is not thread-safe.
Y
Yu Yang 已提交
146
  int get_length() const {
Y
Yu Yang 已提交
147 148 149 150 151 152
    if (length_ < 0) {
      auto len = seq_len_;
      length_ =
          static_cast<int>(std::find_if(ptable_data_, ptable_data_ + len,
                                        [](const T& val) { return val < 0; }) -
                           ptable_data_);
153
    }
Y
Yu Yang 已提交
154
    return length_;
155 156 157
  }

 private:
Y
Yu Yang 已提交
158 159 160 161
  int64_t seq_len_;
  const T* ptable_data_;
  const T* pcode_data_;
  mutable int length_{-1};
162 163
};

Y
Yu Yang 已提交
164
class SimpleCodeTable {
165
 public:
J
JiabinYang 已提交
166
  SimpleCodeTable(size_t num_classes, const int64_t* ids)
167
      : num_classes_(num_classes), ids_(ids) {}
Y
Yu Yang 已提交
168

Y
Yu Yang 已提交
169 170
  SimpleCode get_code(int64_t code) const {
    return SimpleCode(code, num_classes_, ids_);
Y
Yancey1989 已提交
171
  }
Y
Yu Yang 已提交
172

Y
Yancey1989 已提交
173 174 175 176 177
  size_t size() const { return num_classes_; }
  int get_max_code_length() const { return FindLastSet(num_classes_ - 1); }

 private:
  size_t num_classes_;
178 179 180
  const int64_t* ids_;
};

J
JiabinYang 已提交
181
template <typename T>
Y
Yu Yang 已提交
182
class CustomCodeTable {
183
 public:
J
JiabinYang 已提交
184 185
  CustomCodeTable(const framework::Tensor& ptable,
                  const framework::Tensor& pcode, const int64_t* ids)
186 187
      : ptable_(ptable), pcode_(pcode), ids_(ids) {}

Y
Yu Yang 已提交
188 189
  CustomCode<T> get_code(int64_t code) const {
    return CustomCode<T>(ptable_, pcode_, ids_, code);
190 191
  }

J
JiabinYang 已提交
192
  size_t size() const { return static_cast<size_t>(ptable_.dims()[1]); }
193
  int get_max_code_length() const {
J
JiabinYang 已提交
194
    return static_cast<size_t>(ptable_.dims()[1]);
195 196 197
  }

 private:
J
JiabinYang 已提交
198 199
  const framework::Tensor& ptable_;
  const framework::Tensor& pcode_;
200
  const int64_t* ids_;
Y
Yancey1989 已提交
201 202
};

Y
Yu Yang 已提交
203 204
using CodeTable = boost::variant<SimpleCodeTable, CustomCodeTable<int64_t>>;

Y
Yancey1989 已提交
205
template <typename T>
Y
Yancey1989 已提交
206 207
class MatrixBitCodeFunctor {
 public:
J
JiabinYang 已提交
208
  MatrixBitCodeFunctor(size_t num_classes, const int64_t* ids)
209 210
      : num_classes_(num_classes),
        ids_(ids),
Y
Yu Yang 已提交
211
        code_table_(SimpleCodeTable(num_classes, ids)) {}
212

J
JiabinYang 已提交
213 214 215
  MatrixBitCodeFunctor(const framework::Tensor& ptable,
                       const framework::Tensor& pcode, const int64_t* ids)
      : num_classes_(static_cast<size_t>(ptable.dims()[1])),
216
        ids_(ids),
Y
Yu Yang 已提交
217
        code_table_(CustomCodeTable<int64_t>(ptable, pcode, ids)) {}
Y
Yancey1989 已提交
218 219 220
  /* For j < code_length
       tmat(i, j) += vec(0, index(i, j))
  */
J
JiabinYang 已提交
221
  void Add(const framework::Tensor& vec, framework::Tensor* tmat);
Y
Yancey1989 已提交
222

Y
Yancey1989 已提交
223 224 225
  /* For j < code_length
       vec(0, index(i, j)) += tmat(i, j)
  */
J
JiabinYang 已提交
226
  void AddGrad(const framework::Tensor& tmat, framework::Tensor* vec);
Y
Yancey1989 已提交
227

J
JiabinYang 已提交
228 229 230
  /* For selected rows For j < code_length
       vec(0, index(i, j)) += tmat(i, j)
  */
J
JiabinYang 已提交
231
  void AddGrad(const framework::Tensor& tmat, framework::SelectedRows* vec);
J
JiabinYang 已提交
232

Y
Yancey1989 已提交
233
  /* For j < code_length
Y
Yancey1989 已提交
234
    sum(i, 0) = \sum_j bit(i, j) * tmat(i, j)
Y
Yancey1989 已提交
235
  */
J
JiabinYang 已提交
236
  void Sum(const framework::Tensor& tmat, framework::Tensor* sum, T scale_sum);
Y
Yancey1989 已提交
237

Y
Yancey1989 已提交
238 239 240
  /* For j < code_length
       tmat(i, j) -= bit(i, j)
  */
J
JiabinYang 已提交
241
  void Sub(framework::Tensor* tmat);
Y
Yancey1989 已提交
242 243 244
  /* For j < code_length
       input.row(i) += tmat(i, j) * weight.row(index(i, j))
  */
J
JiabinYang 已提交
245 246
  void Mul(framework::Tensor* tmat, const framework::Tensor& weight,
           const framework::Tensor& input);
Y
Yancey1989 已提交
247

Y
Yancey1989 已提交
248 249 250
  /* For index(i, j) >= 0:
      weight.row(index(i, j)) += tmat(i, j) * input.row(i)
  */
J
JiabinYang 已提交
251 252
  void MulGradWeight(const framework::Tensor& tmat, framework::Tensor* weight,
                     const framework::Tensor& input);
J
JiabinYang 已提交
253 254 255
  /* For SelectedRows Weight, For index(i, j) >= 0:
      weight.row(index(i, j)) += tmat(i, j) * input.row(i)
  */
J
JiabinYang 已提交
256
  void MulGradWeight(const framework::Tensor& tmat,
J
JiabinYang 已提交
257
                     framework::SelectedRows* weight,
J
JiabinYang 已提交
258
                     const framework::Tensor& input);
Y
Yancey1989 已提交
259 260 261
  /* For j < code_length
    input.row(i) += tmat(i, j) * weight.row(index(i, j))
  */
J
JiabinYang 已提交
262 263
  void MulGradError(const framework::Tensor& tmat,
                    const framework::Tensor& weight, framework::Tensor* input);
W
weixing02 已提交
264

Y
Yancey1989 已提交
265 266
  size_t num_classes_;
  const int64_t* ids_;
Y
Yu Yang 已提交
267
  CodeTable code_table_;
Y
Yancey1989 已提交
268
};
Y
Yancey1989 已提交
269 270 271
}  // namespace math
}  // namespace operators
}  // namespace paddle