cxx_api_impl.cc 7.0 KB
Newer Older
Y
Yan Chunwei 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "lite/api/cxx_api.h"
16 17
#include <memory>
#include <mutex>  //NOLINT
18
#include <string>
Y
Yan Chunwei 已提交
19
#include "lite/api/paddle_api.h"
20
#include "lite/core/device_info.h"
21
#include "lite/core/version.h"
22 23 24 25 26

#ifndef LITE_ON_TINY_PUBLISH
#include "lite/api/paddle_use_passes.h"
#endif

27
#if (defined LITE_WITH_X86) && (defined PADDLE_WITH_MKLML) && \
28
    !(defined LITE_ON_MODEL_OPTIMIZE_TOOL) && !defined(__APPLE__)
29 30 31
#include <omp.h>
#include "lite/backends/x86/mklml.h"
#endif
Y
Yan Chunwei 已提交
32 33 34 35
namespace paddle {
namespace lite {

void CxxPaddleApiImpl::Init(const lite_api::CxxConfig &config) {
36
  config_ = config;
37 38 39
  if (!status_is_cloned_) {
    auto places = config.valid_places();
    std::vector<std::string> passes = config.get_passes_internal();
40
#ifdef LITE_WITH_CUDA
41 42 43 44 45 46 47 48 49 50
    // if kCUDA is included in valid places, it should be initialized first,
    // otherwise skip this step.
    for (auto &p : places) {
      if (p.target == TARGET(kCUDA)) {
        Env<TARGET(kCUDA)>::Init();
        if (config_.multi_stream()) {
          passes = {"multi_stream_analysis_pass"};
          VLOG(3) << "add pass: " << passes[0];
        }
        break;
51
      }
52
    }
53
#endif
54
#ifdef LITE_WITH_MLU
55
    Env<TARGET(kMLU)>::Init();
56 57 58 59
    lite::TargetWrapperMlu::SetMLURunMode(config.mlu_core_version(),
                                          config.mlu_core_number(),
                                          config.mlu_input_layout(),
                                          config.mlu_firstconv_param());
60
#endif  // LITE_WITH_MLU
61 62 63 64 65 66 67 68 69 70

#ifdef LITE_WITH_BM
    Env<TARGET(kBM)>::Init();
    int device_id = 0;
    if (const char *c_id = getenv("BM_VISIBLE_DEVICES")) {
      device_id = static_cast<int>(*c_id) - 48;
    }
    TargetWrapper<TARGET(kBM)>::SetDevice(device_id);
#endif  // LITE_WITH_BM

71 72 73 74 75 76 77 78 79 80 81 82
    auto use_layout_preprocess_pass =
        config.model_dir().find("OPENCL_PRE_PRECESS");
    VLOG(1) << "use_layout_preprocess_pass:" << use_layout_preprocess_pass;
    if (places[0].target == TARGET(kOpenCL) &&
        use_layout_preprocess_pass != std::string::npos) {
      passes = {"type_layout_cast_preprocess_pass"};
      VLOG(1) << "add pass:" << passes[0];
    }
    raw_predictor_->Build(config, places, passes);
  } else {
    raw_predictor_->PrepareFeedFetch();
    CHECK(raw_predictor_) << "The Predictor can not be nullptr in Clone mode.";
83
  }
T
TianXiaogang 已提交
84 85
  mode_ = config.power_mode();
  threads_ = config.threads();
86
#ifdef LITE_WITH_NPU
87 88
  // Store the model-level configuration into scope for kernels, and use
  // exe_scope to store the execution-level configuration
89
  Context<TargetType::kNPU>::SetSubgraphModelCacheDir(
90
      raw_predictor_->scope(), config.subgraph_model_cache_dir());
91
#endif
B
barry-ai 已提交
92 93 94 95 96 97 98 99

#ifdef LITE_WITH_APU
  // Store the model-level configuration into scope for kernels, and use
  // exe_scope to store the execution-level configuration
  Context<TargetType::kAPU>::SetSubgraphModelCacheDir(
      raw_predictor_->scope(), config.subgraph_model_cache_dir());
#endif

100 101 102 103 104 105
#ifdef LITE_WITH_HUAWEI_ASCEND_NPU
  Context<TargetType::kHuaweiAscendNPU>::SetHuaweiAscendDeviceID(
      config.get_device_id());
  Context<TargetType::kHuaweiAscendNPU>::SetSubgraphModelCacheDir(
      config.subgraph_model_cache_dir());
#endif
106
#if (defined LITE_WITH_X86) && (defined PADDLE_WITH_MKLML) && \
107
    !(defined LITE_ON_MODEL_OPTIMIZE_TOOL) && !defined(__APPLE__)
108
  int num_threads = config.x86_math_library_num_threads();
109 110 111
  int real_num_threads = num_threads > 1 ? num_threads : 1;
  paddle::lite::x86::MKL_Set_Num_Threads(real_num_threads);
  omp_set_num_threads(real_num_threads);
112
  VLOG(3) << "set_x86_math_library_math_threads() is set successfully and the "
113
             "number of threads is:"
114
          << real_num_threads;
115
#endif
Y
Yan Chunwei 已提交
116 117 118
}

std::unique_ptr<lite_api::Tensor> CxxPaddleApiImpl::GetInput(int i) {
119
  auto *x = raw_predictor_->GetInput(i);
Y
Yan Chunwei 已提交
120 121 122 123 124
  return std::unique_ptr<lite_api::Tensor>(new lite_api::Tensor(x));
}

std::unique_ptr<const lite_api::Tensor> CxxPaddleApiImpl::GetOutput(
    int i) const {
125
  const auto *x = raw_predictor_->GetOutput(i);
Y
Yan Chunwei 已提交
126 127 128
  return std::unique_ptr<lite_api::Tensor>(new lite_api::Tensor(x));
}

S
sangoly 已提交
129
std::vector<std::string> CxxPaddleApiImpl::GetInputNames() {
130
  return raw_predictor_->GetInputNames();
131 132
}

133
std::vector<std::string> CxxPaddleApiImpl::GetParamNames() {
134
  return raw_predictor_->GetParamNames();
135 136
}

S
sangoly 已提交
137
std::vector<std::string> CxxPaddleApiImpl::GetOutputNames() {
138
  return raw_predictor_->GetOutputNames();
139 140
}

T
TianXiaogang 已提交
141 142 143 144
void CxxPaddleApiImpl::Run() {
#ifdef LITE_WITH_ARM
  lite::DeviceInfo::Global().SetRunMode(mode_, threads_);
#endif
145
  raw_predictor_->Run();
T
TianXiaogang 已提交
146
}
Y
Yan Chunwei 已提交
147

148 149
std::shared_ptr<lite_api::PaddlePredictor> CxxPaddleApiImpl::Clone() {
  std::lock_guard<std::mutex> lock(mutex_);
150 151 152 153 154 155 156 157 158 159 160
  auto predictor =
      std::make_shared<lite::CxxPaddleApiImpl>(raw_predictor_->Clone());
  predictor->Init(config_);
  return predictor;
}

std::shared_ptr<lite_api::PaddlePredictor> CxxPaddleApiImpl::Clone(
    const std::vector<std::string> &var_names) {
  std::lock_guard<std::mutex> lock(mutex_);
  auto predictor = std::make_shared<lite::CxxPaddleApiImpl>(
      raw_predictor_->Clone(var_names));
161 162 163 164
  predictor->Init(config_);
  return predictor;
}

165 166
std::string CxxPaddleApiImpl::GetVersion() const { return version(); }

Y
Yan Chunwei 已提交
167 168
std::unique_ptr<const lite_api::Tensor> CxxPaddleApiImpl::GetTensor(
    const std::string &name) const {
169
  auto *x = raw_predictor_->GetTensor(name);
Y
Yan Chunwei 已提交
170 171 172
  return std::unique_ptr<const lite_api::Tensor>(new lite_api::Tensor(x));
}

173 174 175
std::unique_ptr<lite_api::Tensor> CxxPaddleApiImpl::GetMutableTensor(
    const std::string &name) {
  return std::unique_ptr<lite_api::Tensor>(
176
      new lite_api::Tensor(raw_predictor_->GetMutableTensor(name)));
177 178
}

179 180 181
std::unique_ptr<lite_api::Tensor> CxxPaddleApiImpl::GetInputByName(
    const std::string &name) {
  return std::unique_ptr<lite_api::Tensor>(
182
      new lite_api::Tensor(raw_predictor_->GetInputByName(name)));
183 184
}

Y
Yan Chunwei 已提交
185
void CxxPaddleApiImpl::SaveOptimizedModel(const std::string &model_dir,
186 187
                                          lite_api::LiteModelType model_type,
                                          bool record_info) {
188
  raw_predictor_->SaveModel(model_dir, model_type, record_info);
Y
Yan Chunwei 已提交
189 190 191 192 193 194 195 196 197
}

}  // namespace lite

namespace lite_api {

template <>
std::shared_ptr<PaddlePredictor> CreatePaddlePredictor(
    const CxxConfig &config) {
198 199
  static std::mutex mutex_conf;
  std::unique_lock<std::mutex> lck(mutex_conf);
Y
Yan Chunwei 已提交
200 201 202 203 204 205 206
  auto x = std::make_shared<lite::CxxPaddleApiImpl>();
  x->Init(config);
  return x;
}

}  // namespace lite_api
}  // namespace paddle