paddle_infer_contrib.cc 9.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle/fluid/inference/api/paddle_infer_contrib.h"
16

17 18 19 20 21 22 23 24 25 26 27 28 29 30
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/float16.h"

namespace paddle_infer {
namespace contrib {

using paddle::PaddleDType;

void* TensorUtils::CudaMallocPinnedMemory(size_t size) {
#if defined(PADDLE_WITH_CUDA)
  void* ptr = nullptr;
31
  PADDLE_ENFORCE_GPU_SUCCESS(cudaMallocHost(&ptr, size));
32 33 34 35 36 37 38 39
  return ptr;
#else
  return nullptr;
#endif
}

void TensorUtils::CudaFreePinnedMemory(void* ptr) {
#if defined(PADDLE_WITH_CUDA)
40
  PADDLE_ENFORCE_GPU_SUCCESS(cudaFreeHost(ptr));
41 42 43
#endif
}

44 45 46 47
void TensorUtils::CopyTensorImpl(Tensor* p_dst,
                                 const Tensor& src,
                                 void* exec_stream,
                                 CallbackFunc cb,
48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
                                 void* cb_params) {
  Tensor& dst = *p_dst;
  dst.Reshape(src.shape());
  PADDLE_ENFORCE(
      src.place() == PlaceType::kCPU || src.place() == PlaceType::kGPU,
      paddle::platform::errors::InvalidArgument(
          "CopyTensor only support PlaceType kCPU/kGPU now."));
  PADDLE_ENFORCE(
      dst.place() == PlaceType::kCPU || dst.place() == PlaceType::kGPU,
      paddle::platform::errors::InvalidArgument(
          "CopyTensor only support PlaceType kCPU/kGPU now."));
  // copy to cpu, gpu => cpu or cpu => cpu
  if (dst.place() == PlaceType::kCPU) {
    switch (src.type()) {
      case PaddleDType::INT32:
        src.CopyToCpuImpl(dst.mutable_data<int32_t>(PlaceType::kCPU),
64 65 66
                          exec_stream,
                          cb,
                          cb_params);
67 68 69
        break;
      case PaddleDType::INT64:
        src.CopyToCpuImpl(dst.mutable_data<int64_t>(PlaceType::kCPU),
70 71 72
                          exec_stream,
                          cb,
                          cb_params);
73
        break;
74 75 76 77 78 79
      case PaddleDType::FLOAT64:
        src.CopyToCpuImpl(dst.mutable_data<double>(PlaceType::kCPU),
                          exec_stream,
                          cb,
                          cb_params);
        break;
80
      case PaddleDType::FLOAT32:
81 82 83 84
        src.CopyToCpuImpl(dst.mutable_data<float>(PlaceType::kCPU),
                          exec_stream,
                          cb,
                          cb_params);
85 86 87
        break;
      case PaddleDType::UINT8:
        src.CopyToCpuImpl(dst.mutable_data<uint8_t>(PlaceType::kCPU),
88 89 90
                          exec_stream,
                          cb,
                          cb_params);
91 92 93
        break;
      case PaddleDType::INT8:
        src.CopyToCpuImpl(dst.mutable_data<int8_t>(PlaceType::kCPU),
94 95 96
                          exec_stream,
                          cb,
                          cb_params);
97
        break;
98 99 100 101 102 103
      case PaddleDType::BOOL:
        src.CopyToCpuImpl(dst.mutable_data<bool>(PlaceType::kCPU),
                          exec_stream,
                          cb,
                          cb_params);
        break;
104 105 106
      case PaddleDType::FLOAT16:
        src.CopyToCpuImpl(
            dst.mutable_data<paddle::platform::float16>(PlaceType::kCPU),
107 108 109
            exec_stream,
            cb,
            cb_params);
110 111 112
        break;
      default:
        PADDLE_THROW(paddle::platform::errors::Unimplemented(
113 114
            "Only INT32, INT64, UINT8, INT8, BOOL, FLOAT16, FLOAT32 and "
            "FLOAT64 is supported in Tensor. Others not implements"));
115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138
    }
    // gpu => gpu or cpu => gpu
  } else {
#if defined(PADDLE_WITH_CUDA)
    void* dst_data = nullptr;
    void* src_data = nullptr;
    size_t data_len = 0;
    int data_size = 0;
    PlaceType src_place;
    switch (src.type()) {
      case PaddleDType::INT32:
        dst_data =
            static_cast<void*>(dst.mutable_data<int32_t>(PlaceType::kGPU));
        src_data =
            static_cast<void*>(src.data<int32_t>(&src_place, &data_size));
        data_len = data_size * sizeof(int32_t);
        break;
      case PaddleDType::INT64:
        dst_data =
            static_cast<void*>(dst.mutable_data<int64_t>(PlaceType::kGPU));
        src_data =
            static_cast<void*>(src.data<int64_t>(&src_place, &data_size));
        data_len = data_size * sizeof(int64_t);
        break;
139 140 141 142 143 144
      case PaddleDType::FLOAT64:
        dst_data =
            static_cast<void*>(dst.mutable_data<double>(PlaceType::kGPU));
        src_data = static_cast<void*>(src.data<double>(&src_place, &data_size));
        data_len = data_size * sizeof(double);
        break;
145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
      case PaddleDType::FLOAT32:
        dst_data = static_cast<void*>(dst.mutable_data<float>(PlaceType::kGPU));
        src_data = static_cast<void*>(src.data<float>(&src_place, &data_size));
        data_len = data_size * sizeof(float);
        break;
      case PaddleDType::UINT8:
        dst_data =
            static_cast<void*>(dst.mutable_data<uint8_t>(PlaceType::kGPU));
        src_data =
            static_cast<void*>(src.data<uint8_t>(&src_place, &data_size));
        data_len = data_size * sizeof(uint8_t);
        break;
      case PaddleDType::INT8:
        dst_data =
            static_cast<void*>(dst.mutable_data<int8_t>(PlaceType::kGPU));
        src_data = static_cast<void*>(src.data<int8_t>(&src_place, &data_size));
        data_len = data_size * sizeof(int8_t);
        break;
163 164 165 166 167
      case PaddleDType::BOOL:
        dst_data = static_cast<void*>(dst.mutable_data<bool>(PlaceType::kGPU));
        src_data = static_cast<void*>(src.data<bool>(&src_place, &data_size));
        data_len = data_size * sizeof(bool);
        break;
168 169 170 171 172 173 174 175 176
      case PaddleDType::FLOAT16:
        dst_data = static_cast<void*>(
            dst.mutable_data<paddle::platform::float16>(PlaceType::kGPU));
        src_data = static_cast<void*>(
            src.data<paddle::platform::float16>(&src_place, &data_size));
        data_len = data_size * 2;
        break;
      default:
        PADDLE_THROW(paddle::platform::errors::Unimplemented(
177 178
            "Only INT32, INT64, UINT8, INT8, BOOL, FLOAT16, FLOAT32 and "
            "FLOAT64 is supported in Tensor. Others not implements"));
179 180 181 182 183
    }

    paddle::platform::DeviceContextPool& pool =
        paddle::platform::DeviceContextPool::Instance();
    paddle::platform::CUDAPlace gpu_place(dst.device_);
L
Leo Chen 已提交
184
    auto* dev_ctx = static_cast<const phi::GPUContext*>(pool.Get(gpu_place));
185 186

    if (src.place() == PlaceType::kCPU) {
187 188 189 190 191
      paddle::memory::Copy(gpu_place,
                           static_cast<void*>(dst_data),
                           paddle::platform::CPUPlace(),
                           src_data,
                           data_len,
192 193
                           dev_ctx->stream());
    } else {
194 195 196 197 198
      paddle::memory::Copy(gpu_place,
                           static_cast<void*>(dst_data),
                           paddle::platform::CUDAPlace(),
                           src_data,
                           data_len,
199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221
                           dev_ctx->stream());
    }

    if (nullptr != exec_stream) {
      *(static_cast<cudaStream_t*>(exec_stream)) = dev_ctx->stream();
    } else if (cb) {
      cudaLaunchHostFunc(dev_ctx->stream(), cb, cb_params);
    } else {
      cudaStreamSynchronize(dev_ctx->stream());
    }
#else
    PADDLE_THROW(paddle::platform::errors::Unavailable(
        "Can not copy tensor to GPU CUDA place because paddle is not compiled "
        "with CUDA."));
#endif
  }
  return;
}

void TensorUtils::CopyTensor(Tensor* p_dst, const Tensor& src) {
  CopyTensorImpl(p_dst, src, nullptr, nullptr, nullptr);
}

222 223
void TensorUtils::CopyTensorAsync(Tensor* p_dst,
                                  const Tensor& src,
224 225 226 227
                                  void* exec_stream) {
  CopyTensorImpl(p_dst, src, exec_stream, nullptr, nullptr);
}

228 229 230 231
void TensorUtils::CopyTensorAsync(Tensor* p_dst,
                                  const Tensor& src,
                                  CallbackFunc cb,
                                  void* cb_params) {
232 233 234
  CopyTensorImpl(p_dst, src, nullptr, cb, cb_params);
}

235 236 237 238 239
struct Status::Impl {
  int ec{0};
  std::string msg;
};

石晓伟 已提交
240 241
Status::Status() : impl_(std::make_shared<Impl>()) {}
Status::Status(const Status& status) : impl_(std::make_shared<Impl>()) {
242 243 244 245 246 247 248
  *impl_ = *status.impl_;
}

Status& Status::operator=(const Status& status) noexcept {
  *impl_ = *status.impl_;
  return *this;
}
石晓伟 已提交
249
Status::Status(std::exception_ptr e) : impl_(std::make_shared<Impl>()) {
250 251 252 253 254 255 256 257 258 259 260 261 262
  constexpr int kDefaultError{-1};
  impl_->ec = kDefaultError;
  try {
    std::rethrow_exception(e);
  } catch (paddle::platform::EnforceNotMet& e) {
    // Add one to the error code to make the number zero a non-error
    // status code.
    impl_->ec = e.code() + 1;
    impl_->msg = e.what();
  } catch (const std::exception& e) {
    impl_->msg = e.what();
  }
}
石晓伟 已提交
263
Status Status::OK() { return Status(); }
264 265 266 267 268 269 270 271 272 273
bool Status::ok() const noexcept { return impl_->ec == 0; }
Status::Code Status::code() const noexcept { return impl_->ec; }
const std::string& Status::error_message() const noexcept { return impl_->msg; }
bool Status::operator==(const Status& x) const noexcept {
  return code() == x.code() && error_message() == x.error_message();
}
bool Status::operator!=(const Status& x) const noexcept {
  return !(*this == x);
}

274 275
}  // namespace contrib
}  // namespace paddle_infer