sparse_utils_kernel.cc 14.4 KB
Newer Older
1
/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
2 3 4 5 6 7 8 9 10 11 12 13 14

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

15
#include "paddle/phi/kernels/sparse/sparse_utils_kernel.h"
16

17 18 19
#include "paddle/phi/api/lib/utils/allocator.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/tensor_meta.h"
20
#include "paddle/phi/core/visit_type.h"
21
#include "paddle/phi/kernels/funcs/sparse/common_shape.h"
22

23
namespace phi {
24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
namespace sparse {

template <typename T>
inline bool IsZero(const T* data, const size_t n) {
  const T zero = static_cast<T>(0);
  for (size_t i = 0; i < n; i++) {
    if (data[i] != zero) {
      return false;
    }
  }
  return true;
}

// TODO(zhangkaihuo): implement a kernel to count the number of non-zero
// elements in tensor
template <typename T>
inline int64_t GetNonZeroNum(const DenseTensor& dense,
                             const int64_t sparse_dim) {
  const auto& dims = dense.dims();
  PADDLE_ENFORCE_GE(
      dims.size(),
      sparse_dim,
46
      phi::errors::InvalidArgument(
47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65
          "sparse_dim(%d) should be less than or equal to dense.dim(%d)",
          sparse_dim,
          dims.size()));

  auto dims_2d = flatten_to_2d(dims, sparse_dim);
  const int rows = dims_2d[0];
  const int cols = dims_2d[1];

  const T* data = dense.data<T>();
  int64_t non_zero_num = 0;
  for (int64_t i = 0; i < rows; i++) {
    if (!IsZero(data + i * cols, cols)) {
      non_zero_num = non_zero_num + 1;
    }
  }
  return non_zero_num;
}

template <typename T, typename Context>
66 67 68 69
void DenseToCooKernel(const Context& dev_ctx,
                      const DenseTensor& x,
                      const int64_t sparse_dim,
                      SparseCooTensor* out) {
70 71
  const T* x_data = x.data<T>();
  const auto& x_dims = x.dims();
72 73 74 75 76 77
  PADDLE_ENFORCE_LE(sparse_dim,
                    x_dims.size(),
                    phi::errors::InvalidArgument(
                        "sparse_dim must be less than the size of x.dims()"));
  PADDLE_ENFORCE_GT(
      sparse_dim, 0, phi::errors::InvalidArgument("sparse_dim must be >0"));
78 79 80

  int64_t non_zero_num = GetNonZeroNum<T>(x, sparse_dim);

81 82
  const auto values_dims =
      phi::funcs::sparse::InferDenseDims(x_dims, sparse_dim, non_zero_num);
83
  DenseTensorMeta values_meta(x.meta().dtype, values_dims, x.meta().layout);
84 85
  phi::DenseTensor indices =
      phi::Empty<int64_t>(dev_ctx, {sparse_dim, non_zero_num});
86
  phi::DenseTensor values = phi::Empty(dev_ctx, std::move(values_meta));
87 88
  int64_t* indices_data = indices.data<int64_t>();
  T* values_data = values.data<T>();
89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105

  auto dims_2d = flatten_to_2d(x_dims, sparse_dim);
  const int rows = dims_2d[0];
  const int cols = dims_2d[1];

  int index = 0;
  for (int i = 0; i < rows; i++) {
    if (!IsZero(x_data + i * cols, cols)) {
      int64_t sparse_index = i;
      for (int64_t j = sparse_dim - 1; j >= 0; j--) {
        indices_data[j * non_zero_num + index] = sparse_index % x_dims[j];
        sparse_index /= x_dims[j];
      }
      memcpy(values_data + index * cols, x_data + i * cols, cols * sizeof(T));
      ++index;
    }
  }
Z
zhangkaihuo 已提交
106

107 108 109
  out->SetMember(indices, values, x_dims, true);
}

110
template <typename T, typename IntT>
111 112 113
void CsrToCooCPUKernel(const CPUContext& dev_ctx,
                       const SparseCsrTensor& x,
                       SparseCooTensor* out) {
114
  const DDim& x_dims = x.dims();
115
  const int64_t non_zero_num = x.cols().numel();
116 117 118 119
  int64_t sparse_dim = 2;
  if (x_dims.size() == 3) {
    sparse_dim = 3;
  }
120 121 122
  phi::DenseTensor indices =
      phi::Empty<IntT>(dev_ctx, {sparse_dim, non_zero_num});
  phi::DenseTensor values = phi::Empty<T>(dev_ctx, {non_zero_num});
Z
zhangkaihuo 已提交
123 124 125 126 127 128 129 130 131 132 133
  if (x.nnz() <= 0) {
    out->SetMember(indices, values, x_dims, true);
    return;
  }
  const auto& csr_crows = x.crows();
  const auto& csr_cols = x.cols();
  const auto& csr_values = x.values();
  const IntT* csr_crows_data = csr_crows.data<IntT>();
  const IntT* csr_cols_data = csr_cols.data<IntT>();
  const T* csr_values_data = csr_values.data<T>();

134 135 136
  IntT* coo_indices = indices.data<IntT>();
  IntT* batch_ptr = x_dims.size() == 2 ? nullptr : coo_indices;
  IntT* coo_rows_data =
137
      x_dims.size() == 2 ? coo_indices : batch_ptr + non_zero_num;
138 139
  IntT* coo_cols_data = coo_rows_data + non_zero_num;
  T* coo_values_data = values.data<T>();
140 141 142 143 144 145 146

  int batch = x_dims.size() == 2 ? 1 : x_dims[0];
  int rows = x_dims.size() == 2 ? x_dims[0] : x_dims[1];

  int index = 0;
  for (int b = 0; b < batch; b++) {
    for (int i = 0; i < rows; i++) {
147
      for (IntT j = csr_crows_data[b * (rows + 1) + i];
148 149 150 151 152 153 154 155 156 157 158
           j < csr_crows_data[b * (rows + 1) + i + 1];
           j++) {
        coo_rows_data[index] = i;
        if (batch_ptr) {
          batch_ptr[index] = b;
        }
        ++index;
      }
    }
  }

159
  memcpy(coo_cols_data, csr_cols_data, sizeof(IntT) * non_zero_num);
160 161 162 163
  memcpy(coo_values_data, csr_values_data, sizeof(T) * non_zero_num);
  out->SetMember(indices, values, x_dims, true);
}

164
template <typename T, typename Context>
165 166 167 168 169 170
void CsrToCooKernel(const Context& dev_ctx,
                    const SparseCsrTensor& x,
                    SparseCooTensor* out) {
  PD_VISIT_BASE_INTEGRAL_TYPES(x.crows().dtype(), "CsrToCooCPUKernel", ([&] {
                                 CsrToCooCPUKernel<T, data_t>(dev_ctx, x, out);
                               }));
171 172 173
}

template <typename T, typename IntT>
174 175 176
void CooToCsrCPUKernel(const CPUContext& dev_ctx,
                       const SparseCooTensor& x,
                       SparseCsrTensor* out) {
177 178 179 180
  const auto& x_dims = x.dims();
  bool valid = x_dims.size() == 2 || x_dims.size() == 3;
  PADDLE_ENFORCE_EQ(valid,
                    true,
181
                    phi::errors::InvalidArgument(
182 183 184 185 186 187
                        "SparseCsrTensor only support 2-D or 3-D matrix"));
  const int64_t non_zero_num = x.nnz();

  int batchs = x_dims.size() == 2 ? 1 : x_dims[0];
  int rows = x_dims.size() == 2 ? x_dims[0] : x_dims[1];

Z
zhangkaihuo 已提交
188 189 190
  phi::DenseTensor crows = phi::Empty<IntT>(dev_ctx, {batchs * (rows + 1)});
  phi::DenseTensor cols = phi::Empty<IntT>(dev_ctx, {non_zero_num});
  phi::DenseTensor values = phi::EmptyLike<T, CPUContext>(dev_ctx, x.values());
Z
zhangkaihuo 已提交
191 192 193 194
  if (non_zero_num <= 0) {
    out->SetMember(crows, cols, values, x_dims);
    return;
  }
Z
zhangkaihuo 已提交
195 196 197
  IntT* csr_crows_data = crows.data<IntT>();
  IntT* csr_cols_data = cols.data<IntT>();
  T* csr_values_data = values.data<T>();
198

199 200
  const auto& coo_indices = x.indices();
  const auto& coo_values = x.values();
201 202
  const IntT* batchs_ptr = coo_indices.data<IntT>();
  const IntT* coo_rows_data =
Z
zhangkaihuo 已提交
203
      x_dims.size() == 2 ? batchs_ptr : batchs_ptr + non_zero_num;
204
  const IntT* coo_cols_data = coo_rows_data + non_zero_num;
205 206 207 208 209 210
  const T* coo_values_data = coo_values.data<T>();

  std::vector<int64_t> offsets(batchs, 0);
  if (batchs > 1) {
    for (int i = 0; i < non_zero_num; i++) {
      if (i == non_zero_num - 1 || batchs_ptr[i] != batchs_ptr[i + 1]) {
Z
zhangkaihuo 已提交
211 212 213 214 215
        const int start = batchs_ptr[i];
        const int end = i == non_zero_num - 1 ? batchs : batchs_ptr[i + 1];
        for (int j = start; j < end; j++) {
          offsets[j] = i + 1;
        }
216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233
      }
    }
  } else {
    offsets[0] = non_zero_num;
  }

  for (int b = 0; b < batchs; b++) {
    int batch_start = 0;
    int batch_non_zero_num = offsets[b];
    if (b > 0) {
      batch_start = offsets[b - 1];
      batch_non_zero_num -= batch_start;
    }
    auto* coo_rows_ptr = coo_rows_data + batch_start;
    for (int i = 0; i <= coo_rows_ptr[0]; i++) {
      csr_crows_data[b * (rows + 1) + i] = 0;
    }
    for (int64_t i = 1; i < batch_non_zero_num; i++) {
234
      for (IntT j = coo_rows_ptr[i - 1]; j < coo_rows_ptr[i]; j++) {
235 236 237
        csr_crows_data[b * (rows + 1) + j + 1] = i;
      }
    }
238
    for (IntT i = coo_rows_ptr[batch_non_zero_num - 1] + 1; i < rows + 1; i++) {
239 240
      csr_crows_data[b * (rows + 1) + i] = batch_non_zero_num;
    }
Z
zhangkaihuo 已提交
241 242 243
    if (batch_non_zero_num == 0) {
      memset(csr_crows_data + b * (rows + 1), 0, sizeof(IntT) * (rows + 1));
    }
244 245
  }

246
  memcpy(csr_cols_data, coo_cols_data, sizeof(IntT) * non_zero_num);
247
  memcpy(csr_values_data, coo_values_data, sizeof(T) * non_zero_num);
248
  out->SetMember(crows, cols, values, x_dims);
249 250
}

Z
zhangkaihuo 已提交
251
template <typename T, typename Context>
252 253 254 255 256 257
void CooToCsrKernel(const Context& dev_ctx,
                    const SparseCooTensor& x,
                    SparseCsrTensor* out) {
  PD_VISIT_BASE_INTEGRAL_TYPES(x.indices().dtype(), "CooToCsrCPUKernel", ([&] {
                                 CooToCsrCPUKernel<T, data_t>(dev_ctx, x, out);
                               }));
258 259 260
}

template <typename T, typename IntT>
261 262 263
void CooToDenseCPUKernel(const CPUContext& dev_ctx,
                         const SparseCooTensor& x,
                         DenseTensor* out) {
Z
zhangkaihuo 已提交
264 265
  const auto non_zero_num = x.nnz();
  const auto dense_dims = x.dims();
266 267
  const auto indices = x.indices();
  const auto values = x.values();
268
  const auto indices_dims = phi::vectorize<int>(indices.dims());
Z
zhangkaihuo 已提交
269 270 271 272
  int64_t sparse_dim = indices_dims[0];
  if (indices_dims.size() == 1) {
    sparse_dim = 1;
  }
Z
zhangkaihuo 已提交
273
  const int64_t dense_dim = x.dense_dim();
Z
zhangkaihuo 已提交
274 275

  const T* x_data = values.data<T>();
Z
zhangkaihuo 已提交
276
  dev_ctx.template Alloc<T>(out);
Z
zhangkaihuo 已提交
277
  T* out_data = out->data<T>();
Z
zhangkaihuo 已提交
278 279 280 281 282 283
  memset(out_data, 0, sizeof(T) * out->numel());

  if (x.nnz() <= 0) {
    return;
  }

Z
zhangkaihuo 已提交
284 285 286 287 288 289 290 291 292 293 294 295 296 297
  int64_t base_offset = 1;
  for (int64_t i = 0; i < dense_dim; i++) {
    base_offset *= dense_dims[sparse_dim + i];
  }
  std::vector<int64_t> sparse_offsets(sparse_dim);
  int64_t offset = 1;
  for (int i = sparse_dim - 1; i >= 0; i--) {
    sparse_offsets[i] = offset;
    offset *= dense_dims[i];
  }

  for (auto i = 0; i < non_zero_num; i++) {
    int64_t index = 0;
    for (int j = 0; j < sparse_dim; j++) {
298
      index += indices.data<IntT>()[j * non_zero_num + i] * sparse_offsets[j];
Z
zhangkaihuo 已提交
299 300 301 302 303 304 305 306
    }

    for (int j = 0; j < base_offset; j++) {
      out_data[index * base_offset + j] = x_data[i * base_offset + j];
    }
  }
}

307
template <typename T, typename Context>
308 309 310
void CooToDenseKernel(const Context& dev_ctx,
                      const SparseCooTensor& x,
                      DenseTensor* out) {
311
  PD_VISIT_BASE_INTEGRAL_TYPES(
312 313
      x.indices().dtype(), "CooToDenseCPUKernel", ([&] {
        CooToDenseCPUKernel<T, data_t>(dev_ctx, x, out);
314 315 316
      }));
}

317
}  // namespace sparse
318
}  // namespace phi
319

320
PD_REGISTER_KERNEL(dense_to_coo,
321 322
                   CPU,
                   ALL_LAYOUT,
323
                   phi::sparse::DenseToCooKernel,
324 325 326 327 328 329 330 331
                   float,
                   double,
                   paddle::float16,
                   uint8_t,
                   int8_t,
                   int16_t,
                   int,
                   int64_t) {}
332

333
PD_REGISTER_KERNEL(csr_to_coo,
334 335
                   CPU,
                   ALL_LAYOUT,
336
                   phi::sparse::CsrToCooKernel,
337 338 339 340 341 342 343
                   float,
                   double,
                   paddle::float16,
                   uint8_t,
                   int8_t,
                   int16_t,
                   int,
344 345
                   int64_t,
                   bool) {}
346

347
PD_REGISTER_KERNEL(coo_to_csr,
348 349
                   CPU,
                   ALL_LAYOUT,
350
                   phi::sparse::CooToCsrKernel,
351 352
                   float,
                   double,
353
                   phi::dtype::float16,
354 355 356 357
                   uint8_t,
                   int8_t,
                   int16_t,
                   int,
358 359
                   int64_t,
                   bool) {}
360

361
PD_REGISTER_KERNEL(dense_to_csr,
362 363
                   CPU,
                   ALL_LAYOUT,
364
                   phi::sparse::DenseToCsrKernel,
365 366
                   float,
                   double,
367
                   phi::dtype::float16,
368 369 370 371 372
                   uint8_t,
                   int8_t,
                   int16_t,
                   int,
                   int64_t) {}
Z
zhangkaihuo 已提交
373

374
PD_REGISTER_KERNEL(coo_to_dense,
Z
zhangkaihuo 已提交
375 376
                   CPU,
                   ALL_LAYOUT,
377
                   phi::sparse::CooToDenseKernel,
Z
zhangkaihuo 已提交
378 379
                   float,
                   double,
380
                   phi::dtype::float16,
Z
zhangkaihuo 已提交
381 382 383 384
                   uint8_t,
                   int8_t,
                   int16_t,
                   int,
385 386
                   int64_t,
                   bool) {}
Z
zhangkaihuo 已提交
387

388
PD_REGISTER_KERNEL(csr_to_dense,
Z
zhangkaihuo 已提交
389 390
                   CPU,
                   ALL_LAYOUT,
391
                   phi::sparse::CsrToDenseKernel,
Z
zhangkaihuo 已提交
392 393
                   float,
                   double,
394
                   phi::dtype::float16,
Z
zhangkaihuo 已提交
395 396 397 398
                   uint8_t,
                   int8_t,
                   int16_t,
                   int,
399 400
                   int64_t,
                   bool) {}
401

402
PD_REGISTER_KERNEL(values_coo,
403 404
                   CPU,
                   ALL_LAYOUT,
405
                   phi::sparse::ValuesCooKernel,
406 407 408 409 410 411 412
                   float,
                   double,
                   phi::dtype::float16,
                   uint8_t,
                   int8_t,
                   int16_t,
                   int,
413 414
                   int64_t,
                   bool) {
415 416 417
  kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_COO);
}

Z
zhangkaihuo 已提交
418 419 420 421 422 423 424 425 426 427 428 429 430 431 432
PD_REGISTER_KERNEL(indices_coo,
                   CPU,
                   ALL_LAYOUT,
                   phi::sparse::IndicesCooKernel,
                   float,
                   double,
                   phi::dtype::float16,
                   uint8_t,
                   int8_t,
                   int16_t,
                   int,
                   int64_t) {
  kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_COO);
}

433
PD_REGISTER_KERNEL(values_csr,
434 435
                   CPU,
                   ALL_LAYOUT,
436
                   phi::sparse::ValuesCsrKernel,
437 438 439 440 441 442 443
                   float,
                   double,
                   phi::dtype::float16,
                   uint8_t,
                   int8_t,
                   int16_t,
                   int,
444 445
                   int64_t,
                   bool) {
Z
zhangkaihuo 已提交
446
  kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_CSR);
447
}
448 449 450 451 452 453 454 455 456 457 458 459

PD_REGISTER_KERNEL(sparse_coo_tensor,
                   CPU,
                   ALL_LAYOUT,
                   phi::sparse::SparseCooTensorKernel,
                   float,
                   double,
                   phi::dtype::float16,
                   uint8_t,
                   int16_t,
                   int,
                   int64_t) {}