matrix_bit_code.h 8.1 KB
Newer Older
Y
Yancey1989 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once
J
JiabinYang 已提交
16 17
#include <utility>
#include <vector>
18

19 20 21
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/selected_rows.h"
#include "paddle/utils/variant.h"
Y
Yancey1989 已提交
22

D
dzhwinter 已提交
23 24
#if defined(_WIN32)
#include <intrin.h>
25 26 27
#ifndef NOMINMAX
#define NOMINMAX  // msvc max/min macro conflict with std::min/max
#endif
D
dzhwinter 已提交
28 29 30
#include <windows.h>
#endif  // _WIN32

31 32
namespace phi {
namespace funcs {
W
weixing02 已提交
33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58
/**
 * SimpleCodeTable class should support 3 functions:
 *
 * size_t size()
 *   return the number of ids
 *
 * int get_max_code_length()
 *   return the maximal code length
 *
 * SimpleCode operator()(size_t i)
 *   return the i-th code. Code class is descriebed below.
 *
 * SimpleCode class should support 3 functions:
 *
 * int get_length()
 *   return the length of the code
 *
 * size_t cal_index(int bit)
 *   bit ranges from 0 to get_length() - 1
 *   return the index for the (1+bit) level parent
 *
 * bool calc_bit(int bit)
 *   return true if the bit level parent is the right child of (1+bit) level
 *   parent
 *
 */
Y
Yancey1989 已提交
59 60 61 62 63 64

/**
 * return the 1-based index of the highest bit set
 *
 * for x > 0:
 * \f[
W
weixing02 已提交
65
 *    FindLastSet(x) = 1 + \floor*{\log_{2}x}
Y
Yancey1989 已提交
66 67
 * \f]
 */
D
dzhwinter 已提交
68
#if !defined(_WIN32)
Y
Yancey1989 已提交
69 70 71 72 73 74
inline constexpr size_t FindLastSet(size_t x) {
  return std::is_same<size_t, unsigned int>::value
             ? (x ? 8 * sizeof(x) - __builtin_clz(x) : 0)
             : (std::is_same<size_t, unsigned long>::value  // NOLINT
                    ? (x ? 8 * sizeof(x) - __builtin_clzl(x) : 0)
                    : (x ? 8 * sizeof(x) - __builtin_clzll(x) : 0));
W
wopeizl 已提交
75
}
D
dzhwinter 已提交
76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
#else
// windows don't have built-in clz, ctz function
template <typename T>
inline int ctz(const T& value) {
  DWORD trailing_zero = 0;
  if (_BitScanForward(&trailing_zero, value)) {
    return static_cast<int>(trailing_zero);
  } else {
    return static_cast<int>(0);
  }
}

template <typename T>
inline int clz(const T& value) {
  DWORD leadning_zero = 0;
  if (_BitScanReverse(&leadning_zero, value)) {
    return static_cast<int>(sizeof(T) * 8 - leadning_zero);
  } else {
    return static_cast<int>(0);
  }
}

98
inline size_t FindLastSet(size_t x) { return 1 + sizeof(size_t) * 8 - clz(x); }
D
dzhwinter 已提交
99
#endif  // !_WIN32
Y
Yu Yang 已提交
100
class SimpleCode {
101 102 103
 public:
  SimpleCode(size_t code, size_t num_classes, const int64_t* ids)
      : c_(static_cast<size_t>(ids[code]) + num_classes) {}
G
guosheng 已提交
104
  /**
T
tianshuo78520a 已提交
105
   * Here the id of root should be 1 rather than 0, thus the encoding of class c
T
tianshuo78520a 已提交
106
   * is `c + num_classes` and all siblings can get the same weight index using
107 108 109 110 111
   * prefixes.
   * Weight index is the prefixes of encoding, thus leave out the right most
   * bit in calc_index.
   * Binary classification path is the suffixes of encoding, thus leave out the
   * left most bit in calc_bit.
G
guosheng 已提交
112
   */
113 114 115
  size_t calc_index(int bit) const { return (c_ >> (bit + 1)) - 1; }
  bool calc_bit(int bit) const { return c_ & (1 << bit); }
  int get_length() const { return FindLastSet(c_) - 1; }
Y
Yancey1989 已提交
116 117

 private:
118
  size_t c_;
Y
Yancey1989 已提交
119 120
};

J
JiabinYang 已提交
121
template <typename T>
Y
Yu Yang 已提交
122
class CustomCode {
123
 public:
124 125
  CustomCode(const phi::DenseTensor& path_table,
             const phi::DenseTensor& path_code,
126
             const int64_t* ids,
127 128 129 130
             int index) {
    seq_len_ = path_table.dims()[1];
    path_table_data_ = path_table.data<T>() + seq_len_ * index;
    path_code_data_ = path_code.data<T>() + seq_len_ * index;
J
JiabinYang 已提交
131
  }
132
  /**
Y
Yu Yang 已提交
133
   * Here the id of root should be 1 rather than 0, thus the encoding of class c
T
tianshuo78520a 已提交
134
   * is `c + num_classes` and all siblings can get the same weight index using
135 136 137 138 139 140
   * prefixes.
   * Weight index is the prefixes of encoding, thus leave out the right most
   * bit in calc_index.
   * Binary classification path is the suffixes of encoding, thus leave out the
   * left most bit in calc_bit.
   */
141 142
  size_t calc_index(int bit) const { return path_table_data_[bit]; }
  bool calc_bit(int bit) const { return path_code_data_[bit]; }
143

Y
Yu Yang 已提交
144
  // NOTE: this function is not thread-safe.
Y
Yu Yang 已提交
145
  int get_length() const {
Y
Yu Yang 已提交
146 147
    if (length_ < 0) {
      auto len = seq_len_;
148 149 150 151 152
      length_ =
          static_cast<int>(std::find_if(path_table_data_,
                                        path_table_data_ + len,
                                        [](const T& val) { return val < 0; }) -
                           path_table_data_);
153
    }
Y
Yu Yang 已提交
154
    return length_;
155 156 157
  }

 private:
Y
Yu Yang 已提交
158
  int64_t seq_len_;
159 160
  const T* path_table_data_;
  const T* path_code_data_;
Y
Yu Yang 已提交
161
  mutable int length_{-1};
162 163
};

Y
Yu Yang 已提交
164
class SimpleCodeTable {
165
 public:
J
JiabinYang 已提交
166
  SimpleCodeTable(size_t num_classes, const int64_t* ids)
167
      : num_classes_(num_classes), ids_(ids) {}
Y
Yu Yang 已提交
168

Y
Yu Yang 已提交
169 170
  SimpleCode get_code(int64_t code) const {
    return SimpleCode(code, num_classes_, ids_);
Y
Yancey1989 已提交
171
  }
Y
Yu Yang 已提交
172

Y
Yancey1989 已提交
173 174 175 176 177
  size_t size() const { return num_classes_; }
  int get_max_code_length() const { return FindLastSet(num_classes_ - 1); }

 private:
  size_t num_classes_;
178 179 180
  const int64_t* ids_;
};

J
JiabinYang 已提交
181
template <typename T>
Y
Yu Yang 已提交
182
class CustomCodeTable {
183
 public:
184 185
  CustomCodeTable(const phi::DenseTensor& path_table,
                  const phi::DenseTensor& path_code,
186
                  const int64_t* ids)
187
      : ptable_(path_table), pcode_(path_code), ids_(ids) {}
188

Y
Yu Yang 已提交
189 190
  CustomCode<T> get_code(int64_t code) const {
    return CustomCode<T>(ptable_, pcode_, ids_, code);
191 192
  }

J
JiabinYang 已提交
193
  size_t size() const { return static_cast<size_t>(ptable_.dims()[1]); }
194
  int get_max_code_length() const {
J
JiabinYang 已提交
195
    return static_cast<size_t>(ptable_.dims()[1]);
196 197 198
  }

 private:
199 200
  const phi::DenseTensor& ptable_;
  const phi::DenseTensor& pcode_;
201
  const int64_t* ids_;
Y
Yancey1989 已提交
202 203
};

R
Ruibiao Chen 已提交
204
using CodeTable = paddle::variant<SimpleCodeTable, CustomCodeTable<int64_t>>;
Y
Yu Yang 已提交
205

Y
Yancey1989 已提交
206
template <typename T>
Y
Yancey1989 已提交
207 208
class MatrixBitCodeFunctor {
 public:
J
JiabinYang 已提交
209
  MatrixBitCodeFunctor(size_t num_classes, const int64_t* ids)
210 211
      : num_classes_(num_classes),
        ids_(ids),
Y
Yu Yang 已提交
212
        code_table_(SimpleCodeTable(num_classes, ids)) {}
213

214 215
  MatrixBitCodeFunctor(const phi::DenseTensor& path_table,
                       const phi::DenseTensor& path_code,
216
                       const int64_t* ids)
217
      : num_classes_(static_cast<size_t>(path_table.dims()[1])),
218
        ids_(ids),
219
        code_table_(CustomCodeTable<int64_t>(path_table, path_code, ids)) {}
Y
Yancey1989 已提交
220 221 222
  /* For j < code_length
       tmat(i, j) += vec(0, index(i, j))
  */
223
  void Add(const phi::DenseTensor& vec, phi::DenseTensor* tmat);
Y
Yancey1989 已提交
224

Y
Yancey1989 已提交
225 226 227
  /* For j < code_length
       vec(0, index(i, j)) += tmat(i, j)
  */
228
  void AddGrad(const phi::DenseTensor& tmat, phi::DenseTensor* vec);
Y
Yancey1989 已提交
229 230

  /* For j < code_length
Y
Yancey1989 已提交
231
    sum(i, 0) = \sum_j bit(i, j) * tmat(i, j)
Y
Yancey1989 已提交
232
  */
233
  void Sum(const phi::DenseTensor& tmat, phi::DenseTensor* sum, T scale_sum);
Y
Yancey1989 已提交
234

Y
Yancey1989 已提交
235 236 237
  /* For j < code_length
       tmat(i, j) -= bit(i, j)
  */
238
  void Sub(phi::DenseTensor* tmat);
Y
Yancey1989 已提交
239 240 241
  /* For j < code_length
       input.row(i) += tmat(i, j) * weight.row(index(i, j))
  */
242 243 244
  void Mul(phi::DenseTensor* tmat,
           const phi::DenseTensor& weight,
           const phi::DenseTensor& input);
Y
Yancey1989 已提交
245

Y
Yancey1989 已提交
246 247 248
  /* For index(i, j) >= 0:
      weight.row(index(i, j)) += tmat(i, j) * input.row(i)
  */
249 250 251
  void MulGradWeight(const phi::DenseTensor& tmat,
                     phi::DenseTensor* weight,
                     const phi::DenseTensor& input);
J
JiabinYang 已提交
252 253 254
  /* For SelectedRows Weight, For index(i, j) >= 0:
      weight.row(index(i, j)) += tmat(i, j) * input.row(i)
  */
255
  void MulGradWeight(const phi::DenseTensor& tmat,
256
                     phi::SelectedRows* weight,
257
                     const phi::DenseTensor& input);
Y
Yancey1989 已提交
258 259 260
  /* For j < code_length
    input.row(i) += tmat(i, j) * weight.row(index(i, j))
  */
261 262 263
  void MulGradError(const phi::DenseTensor& tmat,
                    const phi::DenseTensor& weight,
                    phi::DenseTensor* input);
W
weixing02 已提交
264

Y
Yancey1989 已提交
265 266
  size_t num_classes_;
  const int64_t* ids_;
Y
Yu Yang 已提交
267
  CodeTable code_table_;
Y
Yancey1989 已提交
268
};
269 270
}  // namespace funcs
}  // namespace phi