io_copy_compute.cc 4.2 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81
// Copyright (c) 2019 Cambricon Authors. All Rights Reserved.

#include <Eigen/Core>
#include "lite/backends/mlu/target_wrapper.h"
#include "lite/core/kernel.h"
#include "lite/core/op_lite.h"
#include "lite/core/op_registry.h"
#include "lite/core/type_system.h"

namespace paddle {
namespace lite {
namespace kernels {
namespace mlu {

using TargetW = TargetWrapper<TARGET(kMLU)>;

// Host to MLU memory.
void CopyFromHostSync(void* target, const void* source, size_t size) {
  TargetW::MemcpySync(target, source, size, IoDirection::HtoD);
}

// MLU to Host memory.
void CopyToHostSync(void* target, const void* source, size_t size) {
  TargetW::MemcpySync(target, source, size, IoDirection::DtoH);
}

/*
 * This kernel copies a tensor from host to MLU space.
 */
template <PrecisionType Precision>
class IoCopyHostToMluCompute
    : public KernelLite<TARGET(kMLU), Precision, DATALAYOUT(kNHWC)> {
 public:
  using handler_t = KernelBase::type_infer_handler_t;
  using param_t = operators::IoCopyParam;

  void Run() override {
    auto& param = this->template Param<param_t>();
    CHECK(param.x->target() == TARGET(kHost) ||
          param.x->target() == TARGET(kX86));
    auto mem_size = param.x->memory_size();
    // LOG(INFO) << "copy size " << mem_size;
    auto* data = param.y->mutable_data(TARGET(kMLU), mem_size);
    CopyFromHostSync(data, param.x->raw_data(), mem_size);
  }

  std::unique_ptr<handler_t> GetTypeInferHandler() override {
    std::unique_ptr<handler_t> res(new handler_t);
    *res = [](const std::map<std::string, const Type*>& inputs,
              const std::string& out) -> const Type* {
      CHECK(!inputs.empty());
      auto* type = inputs.at("Input");
      CHECK(type->target() == TARGET(kHost));

      auto out_place = type->place();
      out_place.target = TARGET(kMLU);
      auto* out_type = Type::Get(type->id(),
                                 out_place.target,
                                 out_place.precision,
                                 out_place.layout,
                                 out_place.device);
      return out_type;
    };
    return res;
  }

  std::string doc() const override { return "Copy IO from HOST to MLU"; }
};

/*
 * This kernel copies a tensor from MLU to host space.
 */
template <PrecisionType Precision>
class IoCopyMluToHostCompute
    : public KernelLite<TARGET(kMLU), Precision, DATALAYOUT(kNHWC)> {
 public:
  void Run() override {
    auto& param = this->template Param<operators::IoCopyParam>();
    CHECK(param.x->target() == TARGET(kMLU));
    auto mem_size = param.x->memory_size();
    auto* data = param.y->mutable_data(TARGET(kHost), mem_size);
82 83 84 85 86

    // sync queue to ensure process done
    auto& mlu_context = this->ctx_->template As<MLUContext>();
    CNRT_CALL(cnrtSyncQueue(mlu_context.exec_queue()));

87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
    CopyToHostSync(data, param.x->raw_data(), mem_size);
  }

  std::string doc() const override { return "Copy IO from MLU to HOST"; }
};

}  // namespace mlu
}  // namespace kernels
}  // namespace lite
}  // namespace paddle

REGISTER_LITE_KERNEL(
    io_copy,
    kMLU,
    kFloat,
    kNHWC,
    paddle::lite::kernels::mlu::IoCopyHostToMluCompute<PRECISION(kFloat)>,
    host_to_device_kFloat)
    .BindInput("Input", {LiteType::GetTensorTy(TARGET(kHost))})
    .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kMLU))})
    .Finalize();

REGISTER_LITE_KERNEL(
    io_copy,
    kMLU,
    kFP16,
    kNHWC,
    paddle::lite::kernels::mlu::IoCopyHostToMluCompute<PRECISION(kFP16)>,
    host_to_device_kFP16)
    .BindInput("Input", {LiteType::GetTensorTy(TARGET(kHost))})
    .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kMLU))})
    .Finalize();

REGISTER_LITE_KERNEL(
    io_copy,
    kMLU,
    kFloat,
    kNHWC,
    paddle::lite::kernels::mlu::IoCopyMluToHostCompute<PRECISION(kFloat)>,
    device_to_host_kFloat)
    .BindInput("Input", {LiteType::GetTensorTy(TARGET(kMLU))})
    .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kHost))})
    .Finalize();

REGISTER_LITE_KERNEL(
    io_copy,
    kMLU,
    kFP16,
    kNHWC,
    paddle::lite::kernels::mlu::IoCopyMluToHostCompute<PRECISION(kFP16)>,
    device_to_host_kFP16)
    .BindInput("Input", {LiteType::GetTensorTy(TARGET(kMLU))})
    .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kHost))})
    .Finalize();