loader.cpp 10.0 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

15
#include "framework/loader.h"
16
#include <memory>
17 18 19

#include "framework/lod_tensor.h"
#include "framework/program/program-optimize/program_optimize.h"
Y
yangfei 已提交
20 21 22
#ifdef PADDLE_MOBILE_CL
#include "framework/cl/cl_image.h"
#endif
23 24

namespace paddle_mobile {
25
namespace framework {
26

27 28
template <typename Device, typename T>
void Loader<Device, T>::InitMemoryFromProgram(
29 30
    const std::shared_ptr<ProgramDesc> &originProgramDesc,
    const std::shared_ptr<Scope> &scope) {
31 32 33
  for (const auto &block : originProgramDesc.get()->Blocks()) {
    for (const auto &var_desc : block->Vars()) {
      auto var = scope.get()->Var(var_desc->Name());
34
      if (var_desc->Type() == VARTYPE_TYPE_LOD_TENSOR) {
X
Xin Pan 已提交
35
        if (var_desc->Persistable()) {
36
          auto dim = var_desc->Tensor_desc().Dims();
37 38
          auto tensor = var->GetMutable<LoDTensor>();
          tensor->Resize(make_ddim(dim));
39 40
        } else {
          auto dim = var_desc->Tensor_desc().Dims();
41 42 43 44 45 46 47 48 49
          if (dim.size() == 0) {
            auto tensor = var->GetMutable<LoDTensor>();
            framework::DDim dDim = {0};
            tensor->Resize(dDim);
          } else {
            for (auto &d : dim) {
              if (d < 0) {
                d *= -1;
              }
E
eclipsess 已提交
50
            }
51 52
            auto tensor = var->GetMutable<LoDTensor>();
            tensor->Resize(make_ddim(dim));
E
eclipsess 已提交
53
          }
54 55
        }
      } else {
56
        // TODO(codeWorm)
57 58 59 60
      }
    }
  }
}
61

Y
yangfei 已提交
62
#ifdef PADDLE_MOBILE_CL
63
template <>
64
void Loader<GPU_CL, float>::InitMemoryFromProgram(
65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
    const std::shared_ptr<ProgramDesc> &originProgramDesc,
    const std::shared_ptr<Scope> &scope) {
  for (const auto &block : originProgramDesc.get()->Blocks()) {
    for (const auto &var_desc : block->Vars()) {
      auto var = scope.get()->Var(var_desc->Name());
      if (var_desc->Type() == VARTYPE_TYPE_LOD_TENSOR) {
        if (var_desc->Persistable()) {
          auto dim = var_desc->Tensor_desc().Dims();
          auto cl_image = var->GetMutable<framework::CLImage>();
          cl_image->Resize(make_ddim(dim));
        } else {
          auto dim = var_desc->Tensor_desc().Dims();
          PADDLE_MOBILE_ENFORCE(dim.size() > 0, "dim size is 0");
          dim[0] = 1;
          auto cl_image = var->GetMutable<framework::CLImage>();
          cl_image->Resize(make_ddim(dim));
Y
yangfei 已提交
81
        }
82
      } else {
83
        // TODO(codeWorm)
84 85 86 87
      }
    }
  }
}
Y
yangfei 已提交
88
template <>
89
const Program<GPU_CL, float> Loader<GPU_CL, float>::LoadCombinedMemory(
Y
yangfei 已提交
90
    size_t read_size, const uint8_t *buf, size_t combined_params_len,
91 92
    uint8_t *combined_params_buf, bool optimize, bool quantification,
    int quantification_fold) {
Y
yangfei 已提交
93 94 95 96 97 98 99 100 101 102 103 104 105 106 107
  bool can_add_split = false;

  PaddleMobile__Framework__Proto__ProgramDesc *c_program;
  PADDLE_MOBILE_ENFORCE(buf != nullptr, "read from __model__ is null");

  c_program = paddle_mobile__framework__proto__program_desc__unpack(
      nullptr, read_size, buf);
  //
  PADDLE_MOBILE_ENFORCE(c_program != nullptr, "program is null");
  //
  DLOG << "n_ops: " << (*c_program->blocks)->n_ops;
  //

  auto originProgramDesc = std::make_shared<ProgramDesc>(c_program);

108
  Program<GPU_CL, float> program;
Y
yangfei 已提交
109 110 111 112 113
  program.combined = true;
  program.originProgram = originProgramDesc;
  program.quantification = quantification;
  program.combined_params_len = combined_params_len;
  program.combined_params_buf = combined_params_buf;
114
  program.quantification_fold = quantification_fold;
Y
yangfei 已提交
115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136

  auto scope = std::make_shared<Scope>();
  program.scope = scope;
  InitMemoryFromProgram(originProgramDesc, scope);
  if (optimize) {
    ProgramOptimize program_optimize;
    program.optimizeProgram =
        program_optimize.FusionOptimize(originProgramDesc, can_add_split);
    if (!program.optimizeProgram) {
      program.optimizeProgram = originProgramDesc;
    }
  }
  if (optimize) {
    program.optimizeProgram->Description("optimize: ");
  } else {
    originProgramDesc->Description("program: ");
  }
  paddle_mobile__framework__proto__program_desc__free_unpacked(c_program,
                                                               nullptr);
  return program;
}

Y
yangfei 已提交
137 138
#endif

139 140
/**
 * fusion and print someinfos
141
 * @tparam Device
142 143 144 145 146 147
 * @tparam P
 * @param optimize
 * @param can_add_split
 * @param program
 * @param originProgramDesc
 */
148
template <typename Device, typename T>
149
void FusionAndPrintInfos(
150
    bool optimize, bool can_add_split, Program<Device, T> *program,
151
    const std::shared_ptr<ProgramDesc> &originProgramDesc) {
152
  if (optimize) {
153
    ProgramOptimize program_optimize;
L
liuruilong 已提交
154
    program->optimizeProgram =
155
        program_optimize.FusionOptimize(originProgramDesc, can_add_split);
L
liuruilong 已提交
156 157
    if (!program->optimizeProgram) {
      program->optimizeProgram = originProgramDesc;
158
    }
159 160
  }
  if (optimize) {
L
liuruilong 已提交
161
    program->optimizeProgram->Description("optimize: ");
162 163 164 165
  } else {
    originProgramDesc->Description("program: ");
  }
}
166

167 168 169 170 171 172 173 174 175 176
static size_t ReadBuffer(const char *file_name, uint8_t **out) {
  FILE *fp;
  fp = fopen(file_name, "rb");
  PADDLE_MOBILE_ENFORCE(fp != NULL, " %s open failed !", file_name);

  fseek(fp, 0, SEEK_END);
  size_t size = ftell(fp);
  rewind(fp);

  DLOG << "model size: " << size;
177
  PADDLE_MOBILE_ENFORCE(size > 0, "model size should > 0")
178 179 180 181 182 183 184 185 186 187 188
  *out = reinterpret_cast<uint8_t *>(malloc(size));

  size_t cur_len = 0;
  size_t nread;
  while ((nread = fread(*out + cur_len, 1, size - cur_len, fp)) != 0) {
    cur_len += nread;
  }
  fclose(fp);
  return cur_len;
}

189 190 191 192
template <typename Device, typename T>
const Program<Device, T> Loader<Device, T>::Load(const std::string &dirname,
                                                 bool optimize,
                                                 bool quantification,
193 194 195 196 197
                                                 bool can_add_split,
                                                 int quantification_fold) {
  auto program =
      this->LoadProgram(dirname + "/__model__", optimize, quantification,
                        can_add_split, quantification_fold);
198 199 200 201
  program.model_path = dirname;
  return program;
}

202 203 204 205
template <typename Device, typename T>
const Program<Device, T> Loader<Device, T>::Load(const std::string &model_path,
                                                 const std::string &para_path,
                                                 bool optimize,
206 207 208 209
                                                 bool quantification,
                                                 int quantification_fold) {
  auto program = this->LoadProgram(model_path, optimize, quantification, false,
                                   quantification_fold);
210

211 212
  program.para_path = para_path;
  program.combined = true;
213
  program.quantification = quantification;
214 215 216
  return program;
}

217 218
template <typename Device, typename T>
const Program<Device, T> Loader<Device, T>::LoadProgram(
W
wangliu 已提交
219
    const std::string &model_path, bool optimize, bool quantification,
220
    bool can_add_split, int quantification_fold) {
221 222 223 224 225 226 227 228 229 230 231 232 233 234
  std::string model_filename = model_path;
  PaddleMobile__Framework__Proto__ProgramDesc *c_program;
  uint8_t *buf = NULL;
  size_t read_size = ReadBuffer(model_filename.c_str(), &buf);

  PADDLE_MOBILE_ENFORCE(buf != NULL, "read from __model__ is null");

  c_program = paddle_mobile__framework__proto__program_desc__unpack(
      NULL, read_size, buf);
  //
  PADDLE_MOBILE_ENFORCE(c_program != NULL, "program is null");
  //
  DLOG << "n_ops: " << (*c_program->blocks)->n_ops;
  //
235
  auto originProgramDesc = std::make_shared<ProgramDesc>(c_program);
236

237
  Program<Device, T> program;
238
  program.originProgram = originProgramDesc;
239
  program.quantification = quantification;
240 241
  program.combined_params_len = 0;
  program.combined_params_buf = nullptr;
242
  program.quantification_fold = quantification_fold;
243
  auto scope = std::make_shared<Scope>();
244 245
  program.scope = scope;

246 247 248
  // use  originProgramDesc and scope to init tensors
  InitMemoryFromProgram(originProgramDesc, scope);
  // perform fusion and print infos
L
liuruilong 已提交
249
  FusionAndPrintInfos(optimize, can_add_split, &program, originProgramDesc);
250

251
  paddle_mobile__framework__proto__program_desc__free_unpacked(c_program, NULL);
252
  free(buf);
253 254
  return program;
}
255

256 257
template <typename Device, typename T>
const Program<Device, T> Loader<Device, T>::LoadCombinedMemory(
258
    size_t read_size, const uint8_t *buf, size_t combined_params_len,
259 260
    uint8_t *combined_params_buf, bool optimize, bool quantification,
    int quantification_fold) {
261
  bool can_add_split = false;
262

263 264 265 266 267 268 269 270 271 272 273
  PaddleMobile__Framework__Proto__ProgramDesc *c_program;
  PADDLE_MOBILE_ENFORCE(buf != nullptr, "read from __model__ is null");

  c_program = paddle_mobile__framework__proto__program_desc__unpack(
      nullptr, read_size, buf);
  //
  PADDLE_MOBILE_ENFORCE(c_program != nullptr, "program is null");
  //
  DLOG << "n_ops: " << (*c_program->blocks)->n_ops;
  //

274
  auto originProgramDesc = std::make_shared<ProgramDesc>(c_program);
275

276
  Program<Device, T> program;
277 278 279 280 281
  program.combined = true;
  program.originProgram = originProgramDesc;
  program.quantification = quantification;
  program.combined_params_len = combined_params_len;
  program.combined_params_buf = combined_params_buf;
282
  program.quantification_fold = quantification_fold;
283

284
  auto scope = std::make_shared<Scope>();
285 286
  program.scope = scope;
  InitMemoryFromProgram(originProgramDesc, scope);
L
liuruilong 已提交
287
  FusionAndPrintInfos(optimize, can_add_split, &program, originProgramDesc);
288 289
  paddle_mobile__framework__proto__program_desc__free_unpacked(c_program,
                                                               nullptr);
290 291 292
  return program;
}

293
template class Loader<CPU, float>;
294

295
template class Loader<FPGA, float>;
296

297
template class Loader<GPU_CL, float>;
298

299
}  // namespace framework
300
}  // namespace paddle_mobile