io_copy_compute.cc 4.1 KB
Newer Older
S
Superjomn 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle/fluid/lite/core/kernel.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/cuda/target_wrapper.h"

namespace paddle {
namespace lite {
namespace kernels {
namespace cuda {

S
superjomn 已提交
24
using TargetW = TargetWrapper<TARGET(kCUDA), cudaStream_t, cudaEvent_t>;
S
Superjomn 已提交
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50

// Host to CUDA memory.
void CopyFromHostSync(void* target, const void* source, size_t size) {
  TargetW::MemcpySync(target, source, size, IoDirection::HtoD);
}

void CopyFromHostAsync(void* target, const void* source, size_t size,
                       TargetW::stream_t stream) {
  TargetW::MemcpyAsync(target, source, size, IoDirection::HtoD, stream);
}

// Host to Host memory.
void CopyToHostSync(void* target, const void* source, size_t size) {
  TargetW::MemcpySync(target, source, size, IoDirection::DtoH);
}

/*
 * This kernel copies a tensor from host to CUDA space.
 */
class IoCopyHostToCudaCompute
    : public OpKernel<TARGET(kCUDA), PRECISION(kAny), DATALAYOUT(kAny)> {
 public:
  void Run() override {
    auto& param = Param<operators::IoCopyParam>();
    CHECK(param.x->target() == TARGET(kHost) ||
          param.x->target() == TARGET(kX86));
51 52
    LOG(INFO) << "copy size " << param.x->memory_size();
    auto* data = param.y->mutable_data(TARGET(kCUDA), param.x->memory_size());
S
Superjomn 已提交
53 54
    CopyFromHostSync(data, param.x->data<void>(), param.x->memory_size());
  }
S
superjomn 已提交
55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73

  std::unique_ptr<type_infer_handler_t> GetTypeInferHandler() override {
    std::unique_ptr<type_infer_handler_t> res(new type_infer_handler_t);
    *res = [](const std::map<std::string, const Type*>& inputs,
              const std::string& out) -> const Type* {
      CHECK(!inputs.empty());
      auto* type = inputs.at("Input");
      CHECK(type->target() == TARGET(kHost));

      auto out_place = type->place();
      out_place.target = TARGET(kCUDA);
      auto* out_type = LookupType(type->id(), type->IsUnsupported(),
                                  type->IsUnsupported(), out_place);
      return out_type;
    };
    return res;
  }

  std::string doc() const override { return "Copy IO from HOST to CUDA"; }
S
Superjomn 已提交
74 75 76 77 78 79 80 81 82 83 84 85
};

/*
 * This kernel copies a tensor from CUDA to host space.
 */
class IoCopyCudaToHostCompute
    : public OpKernel<TARGET(kCUDA), PRECISION(kAny), DATALAYOUT(kAny)> {
 public:
  void Run() override {
    auto& param = Param<operators::IoCopyParam>();
    CHECK(param.x->target() == TARGET(kCUDA));
    auto* data = param.y->mutable_data(TARGET(kHost), param.x->memory_size());
86
    LOG(INFO) << "copy size " << param.x->memory_size();
S
Superjomn 已提交
87 88
    CopyToHostSync(data, param.x->data<void>(), param.x->memory_size());
  }
S
superjomn 已提交
89 90

  std::string doc() const override { return "Copy IO from CUDA to HOST"; }
S
Superjomn 已提交
91 92 93 94 95 96 97
};

}  // namespace cuda
}  // namespace kernels
}  // namespace lite
}  // namespace paddle

S
superjomn 已提交
98
REGISTER_LITE_KERNEL(io_copy, kCUDA, kAny, kAny,
S
Superjomn 已提交
99 100 101 102 103 104 105 106
                     paddle::lite::kernels::cuda::IoCopyHostToCudaCompute,
                     host_to_device)
    .BindInput("Input", {paddle::lite::Type::Get<paddle::lite::TensorAnyTy>(
                            TARGET(kHost))})
    .BindOutput("Out", {paddle::lite::Type::Get<paddle::lite::TensorAnyTy>(
                           TARGET(kCUDA))})
    .Finalize();

S
superjomn 已提交
107
REGISTER_LITE_KERNEL(io_copy, kCUDA, kAny, kAny,
S
Superjomn 已提交
108 109 110 111 112 113 114
                     paddle::lite::kernels::cuda::IoCopyCudaToHostCompute,
                     device_to_host)
    .BindInput("Input", {paddle::lite::Type::Get<paddle::lite::TensorAnyTy>(
                            TARGET(kCUDA))})
    .BindOutput("Out", {paddle::lite::Type::Get<paddle::lite::TensorAnyTy>(
                           TARGET(kHost))})
    .Finalize();