matrix_bit_code.cc 8.5 KB
Newer Older
Y
Yancey1989 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

W
weixing02 已提交
15 16
#include "paddle/fluid/operators/math/matrix_bit_code.h"
#include <iostream>
Y
Yancey1989 已提交
17 18 19 20
namespace paddle {
namespace operators {
namespace math {

Y
Yancey1989 已提交
21
template <typename T>
J
JiabinYang 已提交
22 23
void MatrixBitCodeFunctor<T>::Add(const framework::Tensor& vec,
                                  framework::Tensor* tmat) {
W
weixing02 已提交
24 25
  size_t batch_size = tmat->dims()[0];
  size_t width = tmat->dims()[1];
Y
Yu Yang 已提交
26 27
  auto* tmat_data = tmat->data<T>();
  auto* vec_data = vec.data<T>();
Y
Yancey1989 已提交
28
  for (size_t i = 0; i < batch_size; ++i) {
J
JiabinYang 已提交
29
    auto code = code_table_->get_code(i);
30
    int code_length = code->get_length();
Y
Yancey1989 已提交
31
    for (int j = 0; j < code_length; ++j) {
32
      size_t index = code->calc_index(j);
Y
Yu Yang 已提交
33
      tmat_data[i * width + j] += vec_data[index];
Y
Yancey1989 已提交
34 35 36 37
    }
  }
}

Y
Yancey1989 已提交
38
template <typename T>
J
JiabinYang 已提交
39 40
void MatrixBitCodeFunctor<T>::AddGrad(const framework::Tensor& tmat,
                                      framework::Tensor* vec) {
Y
Yancey1989 已提交
41 42
  size_t batch_size = tmat.dims()[0];
  size_t width = tmat.dims()[1];
Y
Yu Yang 已提交
43 44
  auto* vec_data = vec->data<T>();
  auto* tmat_data = tmat.data<T>();
Y
Yancey1989 已提交
45
  for (size_t i = 0; i < batch_size; ++i) {
J
JiabinYang 已提交
46
    auto code = code_table_->get_code(i);
47
    int code_length = code->get_length();
Y
Yancey1989 已提交
48
    for (int j = 0; j < code_length; ++j) {
49
      size_t index = code->calc_index(j);
Y
Yu Yang 已提交
50
      vec_data[index] += tmat_data[i * width + j];
Y
Yancey1989 已提交
51 52
    }
  }
Y
Yancey1989 已提交
53 54
}

J
JiabinYang 已提交
55
template <typename T>
J
JiabinYang 已提交
56
void MatrixBitCodeFunctor<T>::AddGrad(const framework::Tensor& tmat,
J
JiabinYang 已提交
57 58 59
                                      framework::SelectedRows* vec) {
  size_t batch_size = tmat.dims()[0];
  size_t width = tmat.dims()[1];
Y
Yu Yang 已提交
60 61
  auto* vec_data = vec->mutable_value()->data<T>();
  auto* tmat_data = tmat.data<T>();
J
JiabinYang 已提交
62
  for (size_t i = 0; i < batch_size; ++i) {
J
JiabinYang 已提交
63
    auto code = code_table_->get_code(i);
J
JiabinYang 已提交
64 65 66
    int code_length = code->get_length();
    for (int j = 0; j < code_length; ++j) {
      size_t index = code->calc_index(j);
J
JiabinYang 已提交
67
      int64_t row_index = vec->GetIndexFromId(static_cast<int64_t>(index));
Y
Yu Yang 已提交
68
      vec_data[row_index] += tmat_data[i * width + j];
J
JiabinYang 已提交
69 70 71 72
    }
  }
}

Y
Yancey1989 已提交
73
template <typename T>
J
JiabinYang 已提交
74 75
void MatrixBitCodeFunctor<T>::Sum(const framework::Tensor& tmat,
                                  framework::Tensor* sum, T scale_sum) {
Y
Yancey1989 已提交
76 77
  size_t num_samples = tmat.dims()[0];
  size_t o_width = tmat.dims()[1];
Y
Yu Yang 已提交
78 79
  auto* tmat_data = tmat.data<T>();
  auto* sum_data = sum->data<T>();
Y
Yancey1989 已提交
80
  for (size_t i = 0; i < num_samples; ++i) {
Y
Yancey1989 已提交
81
    T sm = static_cast<T>(0.0);
J
JiabinYang 已提交
82
    auto code = code_table_->get_code(i);
83
    int code_length = code->get_length();
Y
Yancey1989 已提交
84
    for (int j = 0; j < code_length; ++j) {
85
      if (code->calc_bit(j)) {
86 87
        // calc_bit starts from right most bit, while data in tmat[i] is in the
        // reverse order.
Y
Yu Yang 已提交
88
        sm += tmat_data[i * o_width + j];
Y
Yancey1989 已提交
89 90
      }
    }
Y
Yu Yang 已提交
91
    sum_data[i] = scale_sum * sm;
Y
Yancey1989 已提交
92 93
  }
}
Y
Yancey1989 已提交
94

Y
Yancey1989 已提交
95
template <typename T>
J
JiabinYang 已提交
96 97 98
void MatrixBitCodeFunctor<T>::Mul(framework::Tensor* tmat,
                                  const framework::Tensor& weight,
                                  const framework::Tensor& input) {
J
JiabinYang 已提交
99 100
  auto blas =
      GetBlas<platform::CPUDeviceContext, T>(platform::CPUDeviceContext());
W
weixing02 已提交
101 102
  size_t num_samples = tmat->dims()[0];
  size_t tmat_width = tmat->dims()[1];
Y
Yancey1989 已提交
103
  size_t input_width = input.dims()[1];
W
weixing02 已提交
104 105
  size_t weight_width = weight.dims()[1];
  auto tmat_value = tmat->data<T>();
Y
Yancey1989 已提交
106 107
  auto weight_value = weight.data<T>();
  auto input_value = input.data<T>();
Y
Yancey1989 已提交
108
  for (size_t i = 0; i < num_samples; ++i) {
J
JiabinYang 已提交
109
    auto code = code_table_->get_code(i);
110
    int code_length = code->get_length();
J
JiabinYang 已提交
111
    const T* input_row = input_value + input_width * i;
Y
Yancey1989 已提交
112
    for (int j = 0; j < code_length; ++j) {
113
      size_t index = code->calc_index(j);
J
JiabinYang 已提交
114
      const T* weight_row = weight_value + weight_width * index;
Y
Yancey1989 已提交
115
      T sum = static_cast<T>(0.0);
J
JiabinYang 已提交
116
      sum = blas.DOT(input_width, weight_row, input_row);
Y
Yancey1989 已提交
117
      tmat_value[i * tmat_width + j] += sum;
Y
Yancey1989 已提交
118 119 120 121 122
    }
  }
}

template <typename T>
J
JiabinYang 已提交
123 124 125
void MatrixBitCodeFunctor<T>::MulGradWeight(const framework::Tensor& tmat,
                                            framework::Tensor* weight,
                                            const framework::Tensor& input) {
J
JiabinYang 已提交
126 127
  auto blas =
      GetBlas<platform::CPUDeviceContext, T>(platform::CPUDeviceContext());
Y
Yancey1989 已提交
128 129
  size_t num_samples = tmat.dims()[0];
  size_t input_width = input.dims()[1];
W
weixing02 已提交
130 131
  size_t tmat_width = tmat.dims()[1];
  size_t weight_width = weight->dims()[1];
Y
Yancey1989 已提交
132
  auto tmat_value = tmat.data<T>();
W
weixing02 已提交
133
  auto weight_value = weight->data<T>();
Y
Yancey1989 已提交
134
  auto input_value = input.data<T>();
J
JiabinYang 已提交
135 136 137

  std::unordered_map<int, std::vector<std::pair<T, const T*>>> ops;

Y
Yancey1989 已提交
138
  for (size_t i = 0; i < num_samples; ++i) {
J
JiabinYang 已提交
139
    auto code = code_table_->get_code(i);
140
    int code_length = code->get_length();
J
JiabinYang 已提交
141 142
    const T* input_value_row = input_value + input_width * i;
    const T* tmat_row = tmat_value + i * tmat_width;
Y
Yancey1989 已提交
143
    for (int j = 0; j < code_length; ++j) {
J
JiabinYang 已提交
144 145 146 147 148 149 150 151 152 153
      ops[code->calc_index(j)].emplace_back(tmat_row[j], input_value_row);
    }
  }
  for (auto& op : ops) {
    auto& op_in_row = op.second;
    for (auto& pair : op_in_row) {
      auto& scale = pair.first;
      auto* input_row = pair.second;
      T* weight_row = weight_value + op.first * weight_width;
      blas.AXPY(input_width, scale, input_row, weight_row);
Y
Yancey1989 已提交
154
    }
Y
Yancey1989 已提交
155
  }
Y
Yancey1989 已提交
156
}
Y
Yancey1989 已提交
157

J
JiabinYang 已提交
158
template <typename T>
J
JiabinYang 已提交
159
void MatrixBitCodeFunctor<T>::MulGradWeight(const framework::Tensor& tmat,
J
JiabinYang 已提交
160
                                            framework::SelectedRows* weight,
J
JiabinYang 已提交
161
                                            const framework::Tensor& input) {
J
JiabinYang 已提交
162 163
  auto blas =
      GetBlas<platform::CPUDeviceContext, T>(platform::CPUDeviceContext());
J
JiabinYang 已提交
164 165 166 167 168 169 170
  size_t num_samples = tmat.dims()[0];
  size_t input_width = input.dims()[1];
  size_t tmat_width = tmat.dims()[1];
  size_t weight_width = weight->value().dims()[1];
  auto tmat_value = tmat.data<T>();
  auto weight_value = weight->mutable_value()->data<T>();
  auto input_value = input.data<T>();
J
JiabinYang 已提交
171 172 173 174

  std::unordered_map<int, std::vector<std::pair<T, const T*>>> ops;
  ops.reserve(weight->rows().size());

J
JiabinYang 已提交
175
  for (size_t i = 0; i < num_samples; ++i) {
J
JiabinYang 已提交
176
    auto code = code_table_->get_code(i);
J
JiabinYang 已提交
177
    int code_length = code->get_length();
J
JiabinYang 已提交
178 179
    const T* input_value_row = input_value + input_width * i;
    const T* tmat_row = tmat_value + i * tmat_width;
J
JiabinYang 已提交
180
    for (int j = 0; j < code_length; ++j) {
J
JiabinYang 已提交
181 182 183 184 185 186 187 188 189 190
      ops[code->calc_index(j)].emplace_back(tmat_row[j], input_value_row);
    }
  }

  for (auto& row : weight->rows()) {
    auto& op_in_row = ops[row];
    for (auto& pair : op_in_row) {
      auto& scale = pair.first;
      auto* input_row = pair.second;
      blas.AXPY(input_width, scale, input_row, weight_value);
J
JiabinYang 已提交
191
    }
J
JiabinYang 已提交
192
    weight_value += weight_width;
J
JiabinYang 已提交
193 194
  }
}
J
JiabinYang 已提交
195

Y
Yancey1989 已提交
196
template <typename T>
J
JiabinYang 已提交
197 198 199
void MatrixBitCodeFunctor<T>::MulGradError(const framework::Tensor& tmat,
                                           const framework::Tensor& weight,
                                           framework::Tensor* input) {
Y
Yancey1989 已提交
200
  size_t num_samples = tmat.dims()[0];
W
weixing02 已提交
201 202
  size_t tmat_width = tmat.dims()[1];
  size_t input_width = input->dims()[1];
Y
Yancey1989 已提交
203
  size_t weight_width = weight.dims()[1];
Y
Yancey1989 已提交
204 205
  auto tmat_value = tmat.data<T>();
  auto weight_value = weight.data<T>();
W
weixing02 已提交
206
  auto input_value = input->data<T>();
Y
Yancey1989 已提交
207

Y
Yancey1989 已提交
208
  for (size_t i = 0; i < num_samples; ++i) {
J
JiabinYang 已提交
209
    auto code = code_table_->get_code(i);
210
    int code_length = code->get_length();
Y
Yancey1989 已提交
211
    for (int j = 0; j < code_length; ++j) {
212
      size_t index = code->calc_index(j);
Y
Yancey1989 已提交
213 214

      for (size_t k = 0; k < input_width; ++k) {
W
weixing02 已提交
215 216 217
        input_value[input_width * i + k] +=
            tmat_value[i * tmat_width + j] *
            weight_value[weight_width * index + k];
Y
Yancey1989 已提交
218 219 220 221 222 223
      }
    }
  }
}

template <typename T>
J
JiabinYang 已提交
224
void MatrixBitCodeFunctor<T>::Sub(framework::Tensor* tmat) {
W
weixing02 已提交
225 226
  size_t num_samples = tmat->dims()[0];
  size_t o_width = tmat->dims()[1];
Y
Yu Yang 已提交
227
  auto* tmat_data = tmat->data<T>();
Y
Yancey1989 已提交
228
  for (size_t i = 0; i < num_samples; ++i) {
J
JiabinYang 已提交
229
    auto code = code_table_->get_code(i);
230
    int code_length = code->get_length();
Y
Yancey1989 已提交
231
    for (int j = 0; j < code_length; ++j) {
232
      if (code->calc_bit(j)) {
Y
Yu Yang 已提交
233
        tmat_data[i * o_width + j] -= 1;
Y
Yancey1989 已提交
234 235 236
      }
    }
  }
Y
Yancey1989 已提交
237 238
}

Y
Yancey1989 已提交
239 240 241
template class MatrixBitCodeFunctor<float>;
template class MatrixBitCodeFunctor<double>;

Y
Yancey1989 已提交
242 243 244
}  // namespace math
}  // namespace operators
}  // namespace paddle