diff --git a/mindspore/lite/src/runtime/kernel/arm/nnacl/README.md b/mindspore/lite/README.md similarity index 100% rename from mindspore/lite/src/runtime/kernel/arm/nnacl/README.md rename to mindspore/lite/README.md diff --git a/mindspore/lite/src/runtime/parallel_executor.cc b/mindspore/lite/src/runtime/parallel_executor.cc index fc7e6eaa27052294db20f1e75728b07a4fedeebd..5d4c983cea175e47ebd759b3c00359dd46cf4e90 100644 --- a/mindspore/lite/src/runtime/parallel_executor.cc +++ b/mindspore/lite/src/runtime/parallel_executor.cc @@ -14,6 +14,7 @@ * limitations under the License. */ +#include #include "src/runtime/parallel_executor.h" using mindspore::predict::ThreadPool; using mindspore::predict::TvmEnv; @@ -25,25 +26,15 @@ ParallelExecutor::~ParallelExecutor() { } int ParallelExecutor::Prepare(std::vector &kernels) { pool = new ThreadPool(); - pool->ConfigThreadPool(NO_BIND, MAX_THREAD_NUM); - for (mindspore::kernel::LiteKernel *kernel : kernels) { - refCount[kernel] = kernel->out_kernels().size(); + if (pool == nullptr) { + MS_LOG(ERROR) << "Memory error: fail to new ThreadPool"; + return RET_ERROR; } + pool->ConfigMaxThreadNum(MAX_THREAD_NUM); + pool->ConfigThreadPool(NO_BIND, MAX_THREAD_NUM); return RET_OK; } -void ParallelExecutor::PrepareReadyKernels(const std::vector &kernels) { - for (auto iter = refCount.begin(); iter != refCount.end();) { - if (iter->second == 0) { - readyKernels.emplace_back(iter->first); - iter = refCount.erase(iter); - } else { - iter++; - } - } - results.resize(readyKernels.size()); -} - static int RunKernel(int index, TvmEnv *env, void *data) { ParallelExecutor *executor = reinterpret_cast(data); auto kernel = executor->GetReadyKernel(index); @@ -83,27 +74,49 @@ int ParallelExecutor::Run(std::vector &in_tensors, std::vector } kernel::LiteKernelUtil::InitTensorRefCount(kernels); - PrepareReadyKernels(kernels); + for (auto kernel : kernels) { + if (kernel->in_kernels().size() == 0) { + readyKernels.emplace_back(kernel); + continue; + } + refCount[kernel] = kernel->in_kernels().size(); + } + std::vector newReadyKernels; while (readyKernels.size() > 0) { + results.resize(readyKernels.size(), RET_OK); pool->LaunchWork(RunKernel, this, readyKernels.size()); if (std::find_if(results.begin(), results.end(), [](const int &ret) { return (ret != 0); }) != results.end()) { return RET_ERROR; } - for (auto completedKernel : readyKernels) { - for (auto out : completedKernel->out_kernels()) { + newReadyKernels.clear(); + for (auto completed : readyKernels) { + for (auto out : completed->out_kernels()) { auto iter = refCount.find(out); if (iter == refCount.end()) { continue; } (iter->second)--; if (iter->second <= 0) { + newReadyKernels.emplace_back(iter->first); refCount.erase(iter); } } + + for (auto input_kernel : completed->in_kernels()) { + MS_ASSERT(input_kernel != nullptr); + if (input_kernel->is_model_output()) { + continue; + } + auto ret = input_kernel->DecOutTensorRefCount(); + if (0 != ret) { + MS_LOG(WARNING) << "DecOutTensorRefCount for kernel" << completed->name() << " failed"; + return -1; + } + } } readyKernels.clear(); - PrepareReadyKernels(kernels); + readyKernels = std::move(newReadyKernels); } return RET_OK; diff --git a/mindspore/lite/src/runtime/parallel_executor.h b/mindspore/lite/src/runtime/parallel_executor.h index fd47ca38d6be6f53d2be8dc3ed2d76cd9e1e6c4b..492d59911034ad122856d58fd40178da7f9dcac7 100644 --- a/mindspore/lite/src/runtime/parallel_executor.h +++ b/mindspore/lite/src/runtime/parallel_executor.h @@ -39,9 +39,6 @@ class ParallelExecutor : public Executor { inline kernel::LiteKernel *GetReadyKernel(const int index) { return readyKernels.at(index); } inline void SetResult(const int index, const int result) { results.at(index) = result; } - private: - void PrepareReadyKernels(const std::vector &kernels); - private: predict::ThreadPool *pool; std::unordered_map refCount;