/** * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "ge_runtime/task/cce_task.h" #include "ge_runtime/task/task_factory.h" namespace ge { namespace model_runner { CceTask::CceTask(const ModelContext &model_context, const std::shared_ptr &task_info) : TaskRepeater(model_context, task_info), task_info_(task_info), stream_(nullptr), stub_func_(nullptr), args_(nullptr), sm_desc_(nullptr), flowtable_(nullptr), is_flowtable_(false) { if (task_info_ == nullptr) { GELOGW("task_info_ is null!"); } auto stream_list = model_context.stream_list(); if (stream_list.size() == 1) { stream_ = stream_list[0]; } else if (stream_list.size() > task_info->stream_id()) { stream_ = stream_list[task_info->stream_id()]; } else { GELOGW("index: %u >= stream_list.size(): %zu.", task_info->stream_id(), stream_list.size()); } } CceTask::~CceTask() { FreeRtMem(&args_); FreeRtMem(&flowtable_); rtError_t ret = (sm_desc_ != nullptr) ? rtMemFreeManaged(sm_desc_) : RT_ERROR_NONE; if (ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", ret); } sm_desc_ = nullptr; } void CceTask::FreeRtMem(void **ptr) noexcept { if (ptr == nullptr || *ptr == nullptr) { return; } rtError_t ret = rtFree(*ptr); if (ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", ret); } *ptr = nullptr; } bool CceTask::Distribute() { GELOGI("Distribute CCETask start."); if (stream_ == nullptr) { GELOGE(PARAM_INVALID, "stream_ is null!"); return false; } // Get stub_func if (task_info_->stub_func().empty()) { GELOGE(PARAM_INVALID, "kernel_info->stub_func is empty!"); return false; } rtError_t rt_ret = rtGetFunctionByName(const_cast(task_info_->stub_func().c_str()), &stub_func_); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "rtGetFunctionByName failed, ret: 0x%X", rt_ret); stub_func_ = nullptr; return false; } // Flowtable if (is_flowtable_) { rt_ret = rtMalloc(&flowtable_, task_info_->flow_table().size(), RT_MEMORY_HBM); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); return false; } rt_ret = rtMemcpy(flowtable_, task_info_->flow_table().size(), task_info_->flow_table().data(), task_info_->flow_table().size(), RT_MEMCPY_HOST_TO_DEVICE); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); return false; } // Modify flowtable addr in args auto args = const_cast(task_info_->args().data()); if (task_info_->args_offset().size() < sizeof(uint16_t)) { GELOGE(FAILED, "size of args_offset is smaller than sizeof(uint16_t)."); return false; } auto task_offset = reinterpret_cast(const_cast(task_info_->args_offset().data())); if (task_info_->args().size() < (task_offset[0] + sizeof(uint64_t))) { GELOGE(FAILED, "(context.args_offset().data()))[0]:%u + sizeof(uint64_t):%zu > kernelDef.args().size():%zu", static_cast(task_offset[0]), sizeof(uint64_t), task_info_->args().size()); return false; } *(reinterpret_cast(args + task_offset[0])) = reinterpret_cast(flowtable_); } // Args rt_ret = rtMalloc(&args_, task_info_->args_size(), RT_MEMORY_HBM); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); return false; } rt_ret = rtMemcpy(args_, task_info_->args_size(), task_info_->args().data(), task_info_->args_size(), RT_MEMCPY_HOST_TO_DEVICE); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); return false; } // L2 sm_desc if (!task_info_->sm_desc().empty()) { rt_ret = rtMemAllocManaged(&sm_desc_, task_info_->sm_desc().size(), RT_MEMORY_SPM); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); return false; } rt_ret = rtMemcpy(sm_desc_, task_info_->sm_desc().size(), task_info_->sm_desc().data(), task_info_->sm_desc().size(), RT_MEMCPY_HOST_TO_DEVICE); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); return false; } } // Kernel launch rt_ret = rtKernelLaunch(stub_func_, task_info_->block_dim(), args_, task_info_->args_size(), static_cast(sm_desc_), stream_); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); return false; } return true; } REGISTER_TASK(TaskInfoType::kCce, CceTask, CceTaskInfo); } // namespace model_runner } // namespace ge