lod_tensor.cc 14.4 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
2

L
Luo Tao 已提交
3 4 5
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
6

L
Luo Tao 已提交
7
    http://www.apache.org/licenses/LICENSE-2.0
8

L
Luo Tao 已提交
9 10 11 12 13
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
14

W
wanghuancoder 已提交
15
#include "paddle/fluid/framework/lod_tensor.h"
16

F
fengjiayi 已提交
17
#include <stdint.h>
18

19
#include "paddle/fluid/framework/convert_utils.h"
X
refine  
Xin Pan 已提交
20
#include "paddle/fluid/framework/version.h"
21
#include "paddle/phi/core/serialization.h"
22

23 24 25
namespace paddle {
namespace framework {

Q
Qiao Longfei 已提交
26 27 28 29 30 31
std::string LoDToString(const LoD &lod) {
  std::ostringstream stream;
  stream << lod;
  return stream.str();
}

32 33 34
LoD SliceInLevel(const LoD &in,
                 size_t level,
                 size_t elem_begin,
Q
qijun 已提交
35
                 size_t elem_end) {
36 37
  PADDLE_ENFORCE_LT(level,
                    in.size(),
38 39 40
                    platform::errors::InvalidArgument(
                        "The input LoDTensor's lod level should be less than "
                        "the LoD size, but received level is %d, LoD is %s.",
41 42
                        level,
                        in));
43
  PADDLE_ENFORCE_LT(
44 45
      elem_begin,
      elem_end,
46 47 48
      platform::errors::InvalidArgument(
          "The index to start slicing should be less than the index to end "
          "slicing, but received start index is %d, end index is %d.",
49 50
          elem_begin,
          elem_end));
51
  PADDLE_ENFORCE_LT(
52 53
      elem_end,
      in[level].size(),
54 55 56
      platform::errors::InvalidArgument(
          "The index to end slicing should be less than the input LoD size, "
          "but received end index is %d, LoD size is %d.",
57 58
          elem_end,
          in[level].size()));
59 60 61 62 63 64 65

  LoD res;
  res.resize(in.size() - level);
  // copy the first level
  res[0].assign(in[level].begin() + elem_begin,
                in[level].begin() + elem_end + 1);
  for (size_t lvl = 1; lvl < res.size(); lvl++) {
武毅 已提交
66 67 68
    const auto &in_level = in[level + lvl];
    const auto &above_level = res[lvl - 1];
    auto &out_level = res[lvl];
69 70
    out_level.assign(in_level.begin() + above_level.front(),
                     in_level.begin() + above_level.back() + 1);
71
  }
72 73 74 75
  for (size_t lvl = 0; lvl < res.size(); lvl++) {
    // to make the first offset equals 0, all the elements minus the first
    // element
    size_t front = res[lvl].front();
武毅 已提交
76
    for (auto &ele : res[lvl]) {
77 78 79 80 81 82
      ele -= front;
    }
  }
  return res;
}

武毅 已提交
83
LoD ToAbsOffset(const LoD &in) {
84 85 86
  // the lowest level stores relative offsets
  if (in.empty() || in.size() == 1) return in;
  LoD result = in;
Q
Qiao Longfei 已提交
87 88 89 90
  for (auto level = static_cast<int>(in.size() - 2); level >= 0; level--) {
    for (size_t i = 0; i < in[level].size(); ++i) {
      size_t index = in[level][i];
      result[level][i] = result[level + 1][index];
91 92 93
    }
  }
  return result;
94 95
}

武毅 已提交
96
bool operator==(const LoD &a, const LoD &b) {
97 98 99 100 101
  if (a.size() != b.size()) {
    return false;
  }

  for (size_t i = 0; i < a.size(); i++) {
武毅 已提交
102 103
    const auto &a_level = a[i];
    const auto &b_level = b[i];
104 105 106 107 108 109 110 111 112 113
    if (a_level.size() != b_level.size()) {
      return false;
    }
    for (size_t j = 0; j < a_level.size(); j++) {
      if (a_level[j] != b_level[j]) {
        return false;
      }
    }
  }
  return true;
114 115
}

Y
Yan Chunwei 已提交
116 117 118 119 120 121 122
bool CheckLoD(const LoD &in, int tensor_height) {
  if (in.empty()) return true;
  for (const auto &level : in) {
    // check: there should be more than 2 offsets existing in each level.
    if (level.size() < 2) return false;
    // check: the first offset(the begin offset) of each level should be 0.
    if (level.front() != 0) return false;
123
    // check: all the offsets in a level should be non-descending
S
sneaxiy 已提交
124 125
    if (!std::is_sorted(level.begin(), level.end())) {
      return false;
Y
Yan Chunwei 已提交
126 127 128 129
    }
  }
  // check: the lowest level's last offset should equals `tensor_height` if
  //        tensor_height>0.
130 131
  if (tensor_height > 0 &&
      static_cast<size_t>(tensor_height) != in.back().back())
Y
Yan Chunwei 已提交
132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147
    return false;

  // check: the higher level's last offset should equals the lower level's
  // size-1.
  // NOTE LoD store the levels from top to bottom, so the higher level goes
  // first.
  for (size_t level = 0; level < in.size() - 1; level++) {
    if (in[level].back() != in[level + 1].size() - 1) return false;
  }
  return true;
}

bool CheckAbsLoD(const LoD &in, int tensor_height) {
  if (in.empty()) return true;
  for (const auto &level : in) {
    // check: all the offsets in a level should be ascending(no same items
148
    // allowed).
Y
Yan Chunwei 已提交
149 150 151 152 153 154 155 156 157 158 159 160 161 162 163
    if (!std::is_sorted(level.begin(), level.begin(), [](size_t a, size_t b) {
          if (a < b) return true;
          return false;
        })) {
      return false;
    }

    // check: there should be more than 2 offsets existing in each level.
    if (level.size() < 2) return false;

    // check: the first offset of each level should be 0, and the last should be
    // the same(the height of underlying tensor).
    if (level.front() != 0) return false;
    if (tensor_height < 0) {
      tensor_height = level.back();
164
    } else if (static_cast<size_t>(tensor_height) != level.back()) {
Y
Yan Chunwei 已提交
165 166 167 168 169 170
      return false;
    }
  }
  return true;
}

171
using LoDAndOffset = std::pair<LoD, std::pair<size_t, size_t>>;
172 173 174 175
LoDAndOffset GetSubLoDAndAbsoluteOffset(const LoD &lod,
                                        size_t start_idx,
                                        size_t end_idx,
                                        size_t start_level) {
176 177 178
  LoD sub_lod;

  for (size_t level_idx = start_level; level_idx < lod.size(); ++level_idx) {
179 180
    PADDLE_ENFORCE_LE(start_idx,
                      end_idx,
181 182 183
                      platform::errors::InvalidArgument(
                          "The start index should be less than the end index, "
                          "but received start index is %d, end index is %d.",
184 185
                          start_idx,
                          end_idx));
186
    PADDLE_ENFORCE_LT(
187 188
        end_idx,
        lod[level_idx].size(),
189 190 191
        platform::errors::InvalidArgument(
            "The end index should be less than the LoD level size, but "
            "received end index is %d, LoD level size is %d.",
192 193
            end_idx,
            lod[level_idx].size()));
194 195 196 197
    std::vector<size_t> level_lens;
    for (size_t i = start_idx; i < end_idx; ++i) {
      level_lens.push_back(lod[level_idx][i + 1] - lod[level_idx][i]);
    }
198
    sub_lod.emplace_back(level_lens);
199 200 201
    start_idx = lod[level_idx][start_idx];
    end_idx = lod[level_idx][end_idx];
  }
202 203

  return LoDAndOffset{sub_lod, {start_idx, end_idx}};
204 205
}

206 207
void SerializeToStream(std::ostream &os,
                       const LoDTensor &tensor,
武毅 已提交
208
                       const platform::DeviceContext &dev_ctx) {
209
  phi::SerializeToStream(os, tensor, dev_ctx);
武毅 已提交
210 211
}

212 213 214 215 216
void SerializeToStream(std::ostream &os, const LoDTensor &tensor) {
  platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance();
  const platform::DeviceContext *dev_ctx;
  auto place = tensor.place();
  dev_ctx = pool.Get(place);
217
  phi::SerializeToStream(os, tensor, *dev_ctx);
218 219
}

220
void DeserializeFromStream(std::istream &os, LoDTensor *tensor) {
221 222 223
  platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance();
  const platform::DeviceContext *dev_ctx;
  dev_ctx = pool.Get(platform::CPUPlace());
224
  phi::DeserializeFromStream(os, tensor, *dev_ctx);
225 226
}

227 228
void DeserializeFromStream(std::istream &is,
                           LoDTensor *tensor,
T
tangwei12 已提交
229 230 231
                           const platform::DeviceContext &dev_ctx,
                           const size_t &seek,
                           const std::vector<int64_t> &shape) {
232
  phi::DeserializeFromStream(is, tensor, dev_ctx, seek, shape);
T
tangwei12 已提交
233 234
}

235 236
void DeserializeFromStream(std::istream &is,
                           LoDTensor *tensor,
Y
Yancey 已提交
237
                           const platform::DeviceContext &dev_ctx) {
238
  phi::DeserializeFromStream(is, tensor, dev_ctx);
武毅 已提交
239 240
}

241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259
LoD ConvertToOffsetBasedLoD(const LoD &length_lod) {
  LoD offset_lod;
  offset_lod.reserve(length_lod.size());
  for (size_t lvl = 0; lvl < length_lod.size(); ++lvl) {
    std::vector<size_t> level;
    level.reserve(length_lod[lvl].size() + 1);
    size_t tmp = 0;
    level.push_back(tmp);
    for (size_t idx = 0; idx < length_lod[lvl].size(); ++idx) {
      tmp += length_lod[lvl][idx];
      level.push_back(tmp);
    }
    offset_lod.push_back(level);
  }
  return offset_lod;
}

std::vector<LoDTensor> SplitLoDTensor(
    const LoDTensor &src, const std::vector<platform::Place> places) {
260 261
  PADDLE_ENFORCE_GT(places.size(),
                    0,
262
                    platform::errors::InvalidArgument(
263
                        "Place number cannot be empty when splitting."));
264
  src.check_memory_size();
265 266 267 268 269 270 271 272 273 274
  auto rank = src.dims().size();
  // if rank is 0, just return #places.size() copys of src
  if (rank == 0) {
    LoDTensor dst;
    framework::TensorCopy(src, src.place(), &dst);
    std::vector<LoDTensor> ret;
    ret.emplace_back(std::move(dst));
    return ret;
  }

275 276
  size_t batch_size = src.lod().empty() ? static_cast<size_t>(src.dims()[0])
                                        : src.lod()[0].size() - 1;
Y
Yu Yang 已提交
277

278
  // if batch_size is 0, just return #places.size() copys of empty
279
  // tensors.
280 281 282
  if (batch_size == 0) {
    std::vector<LoDTensor> empty_results;
    empty_results.reserve(places.size());
283 284
    for (size_t i = 0; i < places.size(); ++i) {
      LoDTensor dst;
285
      dst.Resize(src.dims());
286
      dst.mutable_data(places[i], src.dtype());
287 288
      if (!src.lod().empty()) {
        dst.set_lod(src.lod());
289
      }
290
      empty_results.emplace_back(std::move(dst));
291
    }
292
    return empty_results;
293 294
  }

295 296 297 298 299
  auto step_width = (batch_size + places.size() - 1) / places.size();
  auto result_size = (batch_size + step_width - 1) / step_width;
  std::vector<LoDTensor> results;
  results.reserve(result_size);

Y
Yu Yang 已提交
300
  for (size_t i = 0; i < result_size; ++i) {
301 302
    auto begin = i * step_width;
    auto end = std::min<size_t>((i + 1) * step_width, batch_size);
303 304
    PADDLE_ENFORCE_LT(begin,
                      end,
305
                      platform::errors::InvalidArgument(
306 307
                          "The begin index must be less than the end index, "
                          "but received begin index is %d, end index is %d.",
308 309
                          begin,
                          end));
Y
Yang Yang 已提交
310

311
    LoDTensor dst;
312 313
    if (src.lod().empty()) {
      auto sliced_src = src.Slice(begin, end);
Y
Yang Yang 已提交
314
      auto &dst_place = places[i];
315
      framework::TensorCopy(sliced_src, dst_place, &dst);
Y
Yang Yang 已提交
316
    } else {
317 318
      auto lod_and_offset =
          GetSubLoDAndAbsoluteOffset(src.lod(), begin, end, 0);
Y
Yang Yang 已提交
319 320

      auto &offset = lod_and_offset.second;
321
      auto sliced_src = src.Slice(offset.first, offset.second);
Y
Yang Yang 已提交
322
      auto &dst_place = places[i];
323
      framework::TensorCopy(sliced_src, dst_place, &dst);
Y
Yang Yang 已提交
324 325 326 327 328 329 330 331 332 333 334

      LoD my_lod;
      for (auto &l : lod_and_offset.first) {
        std::vector<size_t> v{0};
        for (auto &ll : l) {
          v.push_back(ll + v.back());
        }
        my_lod.emplace_back(v);
      }
      dst.set_lod(my_lod);
    }
335
    results.emplace_back(std::move(dst));
Y
Yang Yang 已提交
336 337
  }

Y
Yu Yang 已提交
338
  return results;
Y
Yang Yang 已提交
339 340
}

341 342 343
void MergeLoDTensor(LoDTensor *target,
                    const std::vector<const LoDTensor *> &lod_tensors,
                    platform::Place dst_place) {
344 345
  PADDLE_ENFORCE_EQ(lod_tensors.empty(),
                    false,
346 347
                    platform::errors::InvalidArgument(
                        "The LoDTensors to be merged are empty."));
Y
Yang Yang 已提交
348

Y
Yang Yang 已提交
349
  framework::DDim new_dim = lod_tensors[0]->dims();
350
  proto::VarType::Type new_type = proto::VarType::FP32;
Y
Yang Yang 已提交
351
  framework::DataLayout new_layout = lod_tensors[0]->layout();
352 353 354
  for (auto *t : lod_tensors) {
    if (t->numel() && t->IsInitialized()) {
      new_dim = t->dims();
355
      new_type = framework::TransToProtoVarType(t->dtype());
356 357 358 359 360
      new_layout = t->layout();
      break;
    }
  }

Y
Yang Yang 已提交
361
  LoD new_lod = lod_tensors[0]->lod();
362
  auto rank = lod_tensors[0]->dims().size();
363

Y
Yang Yang 已提交
364 365
  for (size_t i = 1; i < lod_tensors.size(); ++i) {
    auto *t = lod_tensors[i];
366
    if (t->numel() && t->IsInitialized()) {
367
      PADDLE_ENFORCE_EQ(
368 369
          new_type,
          framework::TransToProtoVarType(t->dtype()),
370 371 372
          platform::errors::InvalidArgument(
              "LoDTensor data type does not match, expected type is %s, actual "
              "type is %s.",
373 374
              DataTypeToString(new_type),
              DataTypeToString(framework::TransToProtoVarType(t->dtype()))));
375
      PADDLE_ENFORCE_EQ(
376 377
          new_layout,
          t->layout(),
378 379 380
          platform::errors::InvalidArgument(
              "LoDTensor layout does not match, expected layout is %s, "
              "actual layout is %s.",
381 382
              DataLayoutToString(new_layout),
              DataLayoutToString(t->layout())));
383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400
      auto tensor_dims = t->dims();
      PADDLE_ENFORCE_EQ(tensor_dims.size(),
                        new_dim.size(),
                        platform::errors::InvalidArgument(
                            "dimensions of LoDTensor does not match"));
      for (int j = 1; j < t->dims().size(); j++) {
        PADDLE_ENFORCE_EQ(
            tensor_dims[j],
            new_dim[j],
            platform::errors::InvalidArgument(
                "LoDTensor.ddim[%d] should eaqual to %d, but is %d",
                j,
                new_dim[j],
                tensor_dims[j]));
      }
      if (rank > 0) {
        new_dim[0] += t->dims()[0];
      }
401
    }
Y
Yang Yang 已提交
402 403

    auto &lod = t->lod();
404 405
    PADDLE_ENFORCE_EQ(new_lod.size(),
                      lod.size(),
406 407 408
                      platform::errors::InvalidArgument(
                          "The LoD information of LoDTensor does not match, "
                          "expected LoD is %s, actual LoD is %s.",
409 410
                          new_lod,
                          lod));
Y
Yang Yang 已提交
411 412
    for (size_t j = 0; j < lod.size(); ++j) {
      auto &sub_lod = new_lod[j];
C
chengduo 已提交
413
      size_t offset = sub_lod.back();
Y
Yang Yang 已提交
414 415 416 417
      for (size_t k = 1; k < lod[j].size(); ++k) {
        sub_lod.push_back(lod[j][k] + offset);
      }
    }
Y
Yang Yang 已提交
418
  }
419 420 421
  target->Resize(new_dim);
  target->set_layout(new_layout);
  target->set_lod(new_lod);
422
  target->mutable_data(dst_place,
423
                       paddle::framework::TransToPhiDataType(new_type));
Y
Yang Yang 已提交
424

425
  int begin = 0;
Y
Yang Yang 已提交
426
  for (auto *src : lod_tensors) {
427
    int end = begin + src->dims()[0];
428 429 430
    if (end == begin) {
      continue;
    }
431
    auto dst = target->Slice(begin, end);
Y
Yi Wang 已提交
432
    framework::TensorCopy(*src, dst_place, &dst);
433
    begin = end;
Y
Yang Yang 已提交
434 435 436
  }
}

437 438
}  // namespace framework
}  // namespace paddle