// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "lite/kernels/cuda/fetch_compute.h" #include "lite/core/op_registry.h" namespace paddle { namespace lite { namespace kernels { namespace cuda { template void FetchCompute::Run() { auto& param = this->template Param(); auto& ctx = this->ctx_->template As(); auto stream = ctx.exec_stream(); auto* fetch_list = param.fetch_list; if (fetch_list->size() <= static_cast(param.col)) { fetch_list->resize(param.col + 1); } int num = static_cast(param.input->numel()); auto& dst = fetch_list->at(param.col); dst.Resize(param.input->dims()); auto output = dst.template mutable_data(); TargetW::MemcpyAsync(output, param.input->template data(), num * sizeof(T), IoDirection::DtoH, stream); } } // namespace cuda } // namespace kernels } // namespace lite } // namespace paddle typedef paddle::lite::kernels::cuda::FetchCompute FetchFp32; // When the model ends with a cpu kernel, adding cuda's fetch kernel will add // useless io_copy // REGISTER_LITE_KERNEL(fetch, kCUDA, kFloat, kNCHW, FetchFp32, nchw) // .BindInput("X", // {LiteType::GetTensorTy(TARGET(kCUDA), // PRECISION(kFloat), // DATALAYOUT(kNCHW))}) // .BindOutput("Out", // {LiteType::GetTensorTy(TARGET(kHost), // PRECISION(kFloat), // DATALAYOUT(kNCHW))}) // .Finalize(); // // REGISTER_LITE_KERNEL(fetch, kCUDA, kFloat, kNHWC, FetchFp32, nhwc) // .BindInput("X", // {LiteType::GetTensorTy(TARGET(kCUDA), // PRECISION(kFloat), // DATALAYOUT(kNHWC))}) // .BindOutput("Out", // {LiteType::GetTensorTy(TARGET(kHost), // PRECISION(kFloat), // DATALAYOUT(kNHWC))}) // .Finalize();