matrix_bit_code.cc 8.5 KB
Newer Older
Y
Yancey1989 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

W
weixing02 已提交
15 16
#include "paddle/fluid/operators/math/matrix_bit_code.h"
#include <iostream>
Y
Yu Yang 已提交
17
#include <map>
Y
Yancey1989 已提交
18 19 20 21
namespace paddle {
namespace operators {
namespace math {

Y
Yancey1989 已提交
22
template <typename T>
J
JiabinYang 已提交
23 24
void MatrixBitCodeFunctor<T>::Add(const framework::Tensor& vec,
                                  framework::Tensor* tmat) {
W
weixing02 已提交
25 26
  size_t batch_size = tmat->dims()[0];
  size_t width = tmat->dims()[1];
Y
Yu Yang 已提交
27 28
  auto* tmat_data = tmat->data<T>();
  auto* vec_data = vec.data<T>();
Y
Yancey1989 已提交
29
  for (size_t i = 0; i < batch_size; ++i) {
J
JiabinYang 已提交
30
    auto code = code_table_->get_code(i);
31
    int code_length = code->get_length();
Y
Yancey1989 已提交
32
    for (int j = 0; j < code_length; ++j) {
33
      size_t index = code->calc_index(j);
Y
Yu Yang 已提交
34
      tmat_data[i * width + j] += vec_data[index];
Y
Yancey1989 已提交
35 36 37 38
    }
  }
}

Y
Yancey1989 已提交
39
template <typename T>
J
JiabinYang 已提交
40 41
void MatrixBitCodeFunctor<T>::AddGrad(const framework::Tensor& tmat,
                                      framework::Tensor* vec) {
Y
Yancey1989 已提交
42 43
  size_t batch_size = tmat.dims()[0];
  size_t width = tmat.dims()[1];
Y
Yu Yang 已提交
44 45
  auto* vec_data = vec->data<T>();
  auto* tmat_data = tmat.data<T>();
Y
Yancey1989 已提交
46
  for (size_t i = 0; i < batch_size; ++i) {
J
JiabinYang 已提交
47
    auto code = code_table_->get_code(i);
48
    int code_length = code->get_length();
Y
Yancey1989 已提交
49
    for (int j = 0; j < code_length; ++j) {
50
      size_t index = code->calc_index(j);
Y
Yu Yang 已提交
51
      vec_data[index] += tmat_data[i * width + j];
Y
Yancey1989 已提交
52 53
    }
  }
Y
Yancey1989 已提交
54 55
}

J
JiabinYang 已提交
56
template <typename T>
J
JiabinYang 已提交
57
void MatrixBitCodeFunctor<T>::AddGrad(const framework::Tensor& tmat,
J
JiabinYang 已提交
58 59 60
                                      framework::SelectedRows* vec) {
  size_t batch_size = tmat.dims()[0];
  size_t width = tmat.dims()[1];
Y
Yu Yang 已提交
61 62
  auto* vec_data = vec->mutable_value()->data<T>();
  auto* tmat_data = tmat.data<T>();
J
JiabinYang 已提交
63
  for (size_t i = 0; i < batch_size; ++i) {
J
JiabinYang 已提交
64
    auto code = code_table_->get_code(i);
J
JiabinYang 已提交
65 66 67
    int code_length = code->get_length();
    for (int j = 0; j < code_length; ++j) {
      size_t index = code->calc_index(j);
J
JiabinYang 已提交
68
      int64_t row_index = vec->GetIndexFromId(static_cast<int64_t>(index));
Y
Yu Yang 已提交
69
      vec_data[row_index] += tmat_data[i * width + j];
J
JiabinYang 已提交
70 71 72 73
    }
  }
}

Y
Yancey1989 已提交
74
template <typename T>
J
JiabinYang 已提交
75 76
void MatrixBitCodeFunctor<T>::Sum(const framework::Tensor& tmat,
                                  framework::Tensor* sum, T scale_sum) {
Y
Yancey1989 已提交
77 78
  size_t num_samples = tmat.dims()[0];
  size_t o_width = tmat.dims()[1];
Y
Yu Yang 已提交
79 80
  auto* tmat_data = tmat.data<T>();
  auto* sum_data = sum->data<T>();
Y
Yancey1989 已提交
81
  for (size_t i = 0; i < num_samples; ++i) {
Y
Yancey1989 已提交
82
    T sm = static_cast<T>(0.0);
J
JiabinYang 已提交
83
    auto code = code_table_->get_code(i);
84
    int code_length = code->get_length();
Y
Yancey1989 已提交
85
    for (int j = 0; j < code_length; ++j) {
86
      if (code->calc_bit(j)) {
87 88
        // calc_bit starts from right most bit, while data in tmat[i] is in the
        // reverse order.
Y
Yu Yang 已提交
89
        sm += tmat_data[i * o_width + j];
Y
Yancey1989 已提交
90 91
      }
    }
Y
Yu Yang 已提交
92
    sum_data[i] = scale_sum * sm;
Y
Yancey1989 已提交
93 94
  }
}
Y
Yancey1989 已提交
95

Y
Yancey1989 已提交
96
template <typename T>
J
JiabinYang 已提交
97 98 99
void MatrixBitCodeFunctor<T>::Mul(framework::Tensor* tmat,
                                  const framework::Tensor& weight,
                                  const framework::Tensor& input) {
J
JiabinYang 已提交
100 101
  auto blas =
      GetBlas<platform::CPUDeviceContext, T>(platform::CPUDeviceContext());
W
weixing02 已提交
102 103
  size_t num_samples = tmat->dims()[0];
  size_t tmat_width = tmat->dims()[1];
Y
Yancey1989 已提交
104
  size_t input_width = input.dims()[1];
W
weixing02 已提交
105 106
  size_t weight_width = weight.dims()[1];
  auto tmat_value = tmat->data<T>();
Y
Yancey1989 已提交
107 108
  auto weight_value = weight.data<T>();
  auto input_value = input.data<T>();
Y
Yancey1989 已提交
109
  for (size_t i = 0; i < num_samples; ++i) {
J
JiabinYang 已提交
110
    auto code = code_table_->get_code(i);
111
    int code_length = code->get_length();
J
JiabinYang 已提交
112
    const T* input_row = input_value + input_width * i;
Y
Yancey1989 已提交
113
    for (int j = 0; j < code_length; ++j) {
114
      size_t index = code->calc_index(j);
J
JiabinYang 已提交
115
      const T* weight_row = weight_value + weight_width * index;
Y
Yancey1989 已提交
116
      T sum = static_cast<T>(0.0);
J
JiabinYang 已提交
117
      sum = blas.DOT(input_width, weight_row, input_row);
Y
Yancey1989 已提交
118
      tmat_value[i * tmat_width + j] += sum;
Y
Yancey1989 已提交
119 120 121 122 123
    }
  }
}

template <typename T>
J
JiabinYang 已提交
124 125 126
void MatrixBitCodeFunctor<T>::MulGradWeight(const framework::Tensor& tmat,
                                            framework::Tensor* weight,
                                            const framework::Tensor& input) {
J
JiabinYang 已提交
127 128
  auto blas =
      GetBlas<platform::CPUDeviceContext, T>(platform::CPUDeviceContext());
Y
Yancey1989 已提交
129 130
  size_t num_samples = tmat.dims()[0];
  size_t input_width = input.dims()[1];
W
weixing02 已提交
131 132
  size_t tmat_width = tmat.dims()[1];
  size_t weight_width = weight->dims()[1];
Y
Yancey1989 已提交
133
  auto tmat_value = tmat.data<T>();
W
weixing02 已提交
134
  auto weight_value = weight->data<T>();
Y
Yancey1989 已提交
135
  auto input_value = input.data<T>();
J
JiabinYang 已提交
136

Y
Yu Yang 已提交
137
  std::map<int, std::vector<std::pair<T, const T*>>> ops;
Y
Yancey1989 已提交
138
  for (size_t i = 0; i < num_samples; ++i) {
J
JiabinYang 已提交
139
    auto code = code_table_->get_code(i);
140
    int code_length = code->get_length();
J
JiabinYang 已提交
141 142
    const T* input_value_row = input_value + input_width * i;
    const T* tmat_row = tmat_value + i * tmat_width;
Y
Yancey1989 已提交
143
    for (int j = 0; j < code_length; ++j) {
J
JiabinYang 已提交
144 145 146 147 148 149 150 151 152 153
      ops[code->calc_index(j)].emplace_back(tmat_row[j], input_value_row);
    }
  }
  for (auto& op : ops) {
    auto& op_in_row = op.second;
    for (auto& pair : op_in_row) {
      auto& scale = pair.first;
      auto* input_row = pair.second;
      T* weight_row = weight_value + op.first * weight_width;
      blas.AXPY(input_width, scale, input_row, weight_row);
Y
Yancey1989 已提交
154
    }
Y
Yancey1989 已提交
155
  }
Y
Yancey1989 已提交
156
}
Y
Yancey1989 已提交
157

J
JiabinYang 已提交
158
template <typename T>
J
JiabinYang 已提交
159
void MatrixBitCodeFunctor<T>::MulGradWeight(const framework::Tensor& tmat,
J
JiabinYang 已提交
160
                                            framework::SelectedRows* weight,
J
JiabinYang 已提交
161
                                            const framework::Tensor& input) {
J
JiabinYang 已提交
162 163
  auto blas =
      GetBlas<platform::CPUDeviceContext, T>(platform::CPUDeviceContext());
J
JiabinYang 已提交
164 165 166 167 168 169 170
  size_t num_samples = tmat.dims()[0];
  size_t input_width = input.dims()[1];
  size_t tmat_width = tmat.dims()[1];
  size_t weight_width = weight->value().dims()[1];
  auto tmat_value = tmat.data<T>();
  auto weight_value = weight->mutable_value()->data<T>();
  auto input_value = input.data<T>();
J
JiabinYang 已提交
171 172 173 174

  std::unordered_map<int, std::vector<std::pair<T, const T*>>> ops;
  ops.reserve(weight->rows().size());

J
JiabinYang 已提交
175
  for (size_t i = 0; i < num_samples; ++i) {
J
JiabinYang 已提交
176
    auto code = code_table_->get_code(i);
J
JiabinYang 已提交
177
    int code_length = code->get_length();
J
JiabinYang 已提交
178 179
    const T* input_value_row = input_value + input_width * i;
    const T* tmat_row = tmat_value + i * tmat_width;
J
JiabinYang 已提交
180
    for (int j = 0; j < code_length; ++j) {
J
JiabinYang 已提交
181 182 183 184 185 186 187 188 189 190
      ops[code->calc_index(j)].emplace_back(tmat_row[j], input_value_row);
    }
  }

  for (auto& row : weight->rows()) {
    auto& op_in_row = ops[row];
    for (auto& pair : op_in_row) {
      auto& scale = pair.first;
      auto* input_row = pair.second;
      blas.AXPY(input_width, scale, input_row, weight_value);
J
JiabinYang 已提交
191
    }
J
JiabinYang 已提交
192
    weight_value += weight_width;
J
JiabinYang 已提交
193 194
  }
}
J
JiabinYang 已提交
195

Y
Yancey1989 已提交
196
template <typename T>
J
JiabinYang 已提交
197 198 199
void MatrixBitCodeFunctor<T>::MulGradError(const framework::Tensor& tmat,
                                           const framework::Tensor& weight,
                                           framework::Tensor* input) {
Y
Yancey1989 已提交
200
  size_t num_samples = tmat.dims()[0];
W
weixing02 已提交
201 202
  size_t tmat_width = tmat.dims()[1];
  size_t input_width = input->dims()[1];
Y
Yancey1989 已提交
203
  size_t weight_width = weight.dims()[1];
Y
Yancey1989 已提交
204 205
  auto tmat_value = tmat.data<T>();
  auto weight_value = weight.data<T>();
W
weixing02 已提交
206
  auto input_value = input->data<T>();
Y
Yancey1989 已提交
207

Y
Yancey1989 已提交
208
  for (size_t i = 0; i < num_samples; ++i) {
J
JiabinYang 已提交
209
    auto code = code_table_->get_code(i);
210
    int code_length = code->get_length();
Y
Yancey1989 已提交
211
    for (int j = 0; j < code_length; ++j) {
212
      size_t index = code->calc_index(j);
Y
Yancey1989 已提交
213 214

      for (size_t k = 0; k < input_width; ++k) {
W
weixing02 已提交
215 216 217
        input_value[input_width * i + k] +=
            tmat_value[i * tmat_width + j] *
            weight_value[weight_width * index + k];
Y
Yancey1989 已提交
218 219 220 221 222 223
      }
    }
  }
}

template <typename T>
J
JiabinYang 已提交
224
void MatrixBitCodeFunctor<T>::Sub(framework::Tensor* tmat) {
W
weixing02 已提交
225 226
  size_t num_samples = tmat->dims()[0];
  size_t o_width = tmat->dims()[1];
Y
Yu Yang 已提交
227
  auto* tmat_data = tmat->data<T>();
Y
Yancey1989 已提交
228
  for (size_t i = 0; i < num_samples; ++i) {
J
JiabinYang 已提交
229
    auto code = code_table_->get_code(i);
230
    int code_length = code->get_length();
Y
Yancey1989 已提交
231
    for (int j = 0; j < code_length; ++j) {
232
      if (code->calc_bit(j)) {
Y
Yu Yang 已提交
233
        tmat_data[i * o_width + j] -= 1;
Y
Yancey1989 已提交
234 235 236
      }
    }
  }
Y
Yancey1989 已提交
237 238
}

Y
Yancey1989 已提交
239 240 241
template class MatrixBitCodeFunctor<float>;
template class MatrixBitCodeFunctor<double>;

Y
Yancey1989 已提交
242 243 244
}  // namespace math
}  // namespace operators
}  // namespace paddle