提交 10be5005 编写于 作者: M mindspore-ci-bot 提交者: Gitee

!4716 fix fp16 mean transpose

Merge pull request !4716 from zhaozhenlong/lite/issue/fix_fp16_transpose
...@@ -43,7 +43,15 @@ int ConcatFp16CPUKernel::Init() { ...@@ -43,7 +43,15 @@ int ConcatFp16CPUKernel::Init() {
int ConcatFp16CPUKernel::ReSize() { int ConcatFp16CPUKernel::ReSize() {
FreeTmpBuffer(); FreeTmpBuffer();
auto ret = MallocTmpBuffer();
if (ret != RET_OK) {
FreeTmpBuffer();
return ret;
}
return ConcatBaseCPUKernel::ReSize();
}
int ConcatFp16CPUKernel::MallocTmpBuffer() {
for (const auto &in_tensor : in_tensors_) { for (const auto &in_tensor : in_tensors_) {
float16_t *ptr = nullptr; float16_t *ptr = nullptr;
if (in_tensor->data_type() == kNumberTypeFloat32 || in_tensor->data_type() == kNumberTypeFloat) { if (in_tensor->data_type() == kNumberTypeFloat32 || in_tensor->data_type() == kNumberTypeFloat) {
...@@ -58,10 +66,6 @@ int ConcatFp16CPUKernel::ReSize() { ...@@ -58,10 +66,6 @@ int ConcatFp16CPUKernel::ReSize() {
auto &out_tensor = out_tensors_.at(0); auto &out_tensor = out_tensors_.at(0);
if (out_tensor->data_type() == kNumberTypeFloat32 || out_tensor->data_type() == kNumberTypeFloat) { if (out_tensor->data_type() == kNumberTypeFloat32 || out_tensor->data_type() == kNumberTypeFloat) {
if (fp16_output_ != nullptr) {
context_->allocator->Free(fp16_output_);
fp16_output_ = nullptr;
}
fp16_output_ = fp16_output_ =
reinterpret_cast<float16_t *>(context_->allocator->Malloc(sizeof(float16_t) * out_tensors_[0]->ElementsNum())); reinterpret_cast<float16_t *>(context_->allocator->Malloc(sizeof(float16_t) * out_tensors_[0]->ElementsNum()));
if (fp16_output_ == nullptr) { if (fp16_output_ == nullptr) {
...@@ -70,17 +74,29 @@ int ConcatFp16CPUKernel::ReSize() { ...@@ -70,17 +74,29 @@ int ConcatFp16CPUKernel::ReSize() {
} }
} }
return ConcatBaseCPUKernel::ReSize(); return RET_OK;
} }
void ConcatFp16CPUKernel::FreeTmpBuffer() { void ConcatFp16CPUKernel::FreeTmpBuffer() {
for (auto ptr : fp16_inputs_) { for (auto i = 0; i < fp16_inputs_.size(); i++) {
if (ptr != nullptr) { auto &in_tensor = in_tensors_.at(i);
context_->allocator->Free(ptr); auto in_ptr = fp16_inputs_.at(i);
ptr = nullptr; if (in_tensor->data_type() == kNumberTypeFloat32 || in_tensor->data_type() == kNumberTypeFloat) {
if (in_ptr != nullptr) {
context_->allocator->Free(in_ptr);
in_ptr = nullptr;
}
} }
} }
fp16_inputs_.clear(); fp16_inputs_.clear();
auto &out_tensor = out_tensors_.at(0);
if (out_tensor->data_type() == kNumberTypeFloat32 || out_tensor->data_type() == kNumberTypeFloat) {
if (fp16_output_ != nullptr) {
context_->allocator->Free(fp16_output_);
fp16_output_ = nullptr;
}
}
} }
int ConcatFp16CPUKernel::Run() { int ConcatFp16CPUKernel::Run() {
...@@ -119,24 +135,10 @@ int ConcatFp16CPUKernel::Run() { ...@@ -119,24 +135,10 @@ int ConcatFp16CPUKernel::Run() {
ConcatFp16(reinterpret_cast<void **>(fp16_inputs_.data()), input_num, axis_, inputs_output_shape.data(), ConcatFp16(reinterpret_cast<void **>(fp16_inputs_.data()), input_num, axis_, inputs_output_shape.data(),
output_shape.size(), reinterpret_cast<void *>(fp16_output_)); output_shape.size(), reinterpret_cast<void *>(fp16_output_));
// free fp16 in out buffer
if (out_tensors_.at(0)->data_type() == kNumberTypeFloat32 || out_tensors_.at(0)->data_type() == kNumberTypeFloat) { if (out_tensors_.at(0)->data_type() == kNumberTypeFloat32 || out_tensors_.at(0)->data_type() == kNumberTypeFloat) {
Float16ToFloat32(fp16_output_, reinterpret_cast<float *>(output_addr), out_tensors_.at(0)->ElementsNum()); Float16ToFloat32(fp16_output_, reinterpret_cast<float *>(output_addr), out_tensors_.at(0)->ElementsNum());
context_->allocator->Free(fp16_output_);
fp16_output_ = nullptr;
} }
for (auto i = 0; i < fp16_inputs_.size(); i++) { FreeTmpBuffer();
const auto in_tensor = in_tensors_[i];
if (in_tensor->data_type() == kNumberTypeFloat || in_tensor->data_type() == kNumberTypeFloat32) {
auto ptr = fp16_inputs_[i];
if (ptr != nullptr) {
context_->allocator->Free(ptr);
ptr = nullptr;
}
}
}
fp16_inputs_.clear();
return RET_OK; return RET_OK;
} }
...@@ -164,5 +166,5 @@ kernel::LiteKernel *CpuConcatFp16KernelCreator(const std::vector<lite::tensor::T ...@@ -164,5 +166,5 @@ kernel::LiteKernel *CpuConcatFp16KernelCreator(const std::vector<lite::tensor::T
} }
return kernel; return kernel;
} }
// REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Concat, CpuConcatFp16KernelCreator) REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Concat, CpuConcatFp16KernelCreator)
} // namespace mindspore::kernel } // namespace mindspore::kernel
...@@ -41,6 +41,7 @@ class ConcatFp16CPUKernel : public ConcatBaseCPUKernel { ...@@ -41,6 +41,7 @@ class ConcatFp16CPUKernel : public ConcatBaseCPUKernel {
int Run() override; int Run() override;
private: private:
int MallocTmpBuffer();
void FreeTmpBuffer(); void FreeTmpBuffer();
private: private:
......
...@@ -58,20 +58,13 @@ int ReduceFp16CPUKernel::Init() { ...@@ -58,20 +58,13 @@ int ReduceFp16CPUKernel::Init() {
} }
int ReduceFp16CPUKernel::ReSize() { int ReduceFp16CPUKernel::ReSize() {
if (fp16_input_ != nullptr) { FreeTmpBuffer();
context_->allocator->Free(fp16_input_); auto ret = MallocTmpBuffer();
fp16_input_ = nullptr; if (ret != RET_OK) {
} FreeTmpBuffer();
auto in_tensor = in_tensors_.front(); return ret;
if (in_tensor->data_type() == kNumberTypeFloat32 || in_tensor->data_type() == kNumberTypeFloat) {
fp16_input_ =
reinterpret_cast<float16_t *>(context_->allocator->Malloc(in_tensor->ElementsNum() * sizeof(float16_t)));
if (fp16_input_ == nullptr) {
return RET_ERROR;
}
Float32ToFloat16(reinterpret_cast<float *>(in_tensor->Data()), fp16_input_, in_tensor->ElementsNum());
} }
return MallocTmpBuffer(); return RET_OK;
} }
int ReduceFp16CPUKernel::CallReduceUnit(int task_id) { int ReduceFp16CPUKernel::CallReduceUnit(int task_id) {
...@@ -99,9 +92,13 @@ int ReduceFp16CPUKernel::Run() { ...@@ -99,9 +92,13 @@ int ReduceFp16CPUKernel::Run() {
tmp_shape_ = in_tensors_.at(0)->shape(); tmp_shape_ = in_tensors_.at(0)->shape();
auto in_tensor = in_tensors_.at(0); auto in_tensor = in_tensors_.at(0);
if (in_tensor->data_type() == kNumberTypeFloat16) { if (in_tensor->data_type() == kNumberTypeFloat32 || in_tensor->data_type() == kNumberTypeFloat) {
auto input_data = reinterpret_cast<float *>(in_tensor->Data());
Float32ToFloat16(input_data, fp16_input_, in_tensor->ElementsNum());
} else {
fp16_input_ = reinterpret_cast<float16_t *>(in_tensor->Data()); fp16_input_ = reinterpret_cast<float16_t *>(in_tensor->Data());
} }
fp16_src_data_ = fp16_input_; fp16_src_data_ = fp16_input_;
for (int i = 0; i < data_buffers_.size(); ++i) { for (int i = 0; i < data_buffers_.size(); ++i) {
fp16_dst_data_ = data_buffers_[i]; fp16_dst_data_ = data_buffers_[i];
...@@ -117,6 +114,7 @@ int ReduceFp16CPUKernel::Run() { ...@@ -117,6 +114,7 @@ int ReduceFp16CPUKernel::Run() {
axis_size_ = tmp_shape_[axis]; axis_size_ = tmp_shape_[axis];
auto error_code = LiteBackendParallelLaunch(ReduceImpl, this, context_->thread_num_); auto error_code = LiteBackendParallelLaunch(ReduceImpl, this, context_->thread_num_);
if (error_code != RET_OK) { if (error_code != RET_OK) {
FreeTmpBuffer();
MS_LOG(ERROR) << "Reduce run error, error_code[" << error_code << "]"; MS_LOG(ERROR) << "Reduce run error, error_code[" << error_code << "]";
return RET_ERROR; return RET_ERROR;
} }
...@@ -132,16 +130,11 @@ int ReduceFp16CPUKernel::Run() { ...@@ -132,16 +130,11 @@ int ReduceFp16CPUKernel::Run() {
memcpy(out_tensor->Data(), fp16_dst_data_, out_tensor->ElementsNum() * sizeof(float16_t)); memcpy(out_tensor->Data(), fp16_dst_data_, out_tensor->ElementsNum() * sizeof(float16_t));
} }
if (in_tensor->data_type() == kNumberTypeFloat32 || in_tensor->data_type() == kNumberTypeFloat) {
context_->allocator->Free(fp16_input_);
}
fp16_input_ = nullptr;
FreeTmpBuffer(); FreeTmpBuffer();
return RET_OK; return RET_OK;
} }
int ReduceFp16CPUKernel::FreeTmpBuffer() { void ReduceFp16CPUKernel::FreeTmpBuffer() {
for (auto buffer : data_buffers_) { for (auto buffer : data_buffers_) {
if (buffer != nullptr) { if (buffer != nullptr) {
context_->allocator->Free(buffer); context_->allocator->Free(buffer);
...@@ -149,12 +142,17 @@ int ReduceFp16CPUKernel::FreeTmpBuffer() { ...@@ -149,12 +142,17 @@ int ReduceFp16CPUKernel::FreeTmpBuffer() {
} }
} }
data_buffers_.clear(); data_buffers_.clear();
return RET_OK;
auto in_tensor = in_tensors_.at(0);
if (in_tensor->data_type() == kNumberTypeFloat32 || in_tensor->data_type() == kNumberTypeFloat) {
if (fp16_input_ != nullptr) {
context_->allocator->Free(fp16_input_);
fp16_input_ = nullptr;
}
}
} }
int ReduceFp16CPUKernel::MallocTmpBuffer() { int ReduceFp16CPUKernel::MallocTmpBuffer() {
auto ret = FreeTmpBuffer();
auto input_shape = in_tensors_.at(0)->shape(); auto input_shape = in_tensors_.at(0)->shape();
for (auto i = 0; i < num_axes_; i++) { for (auto i = 0; i < num_axes_; i++) {
int axis = axes_[i]; int axis = axes_[i];
...@@ -166,13 +164,23 @@ int ReduceFp16CPUKernel::MallocTmpBuffer() { ...@@ -166,13 +164,23 @@ int ReduceFp16CPUKernel::MallocTmpBuffer() {
} }
float16_t *buffer = reinterpret_cast<float16_t *>(context_->allocator->Malloc(size * sizeof(float16_t))); float16_t *buffer = reinterpret_cast<float16_t *>(context_->allocator->Malloc(size * sizeof(float16_t)));
if (buffer == nullptr) { if (buffer == nullptr) {
MS_LOG(ERROR) << "Malloc data failed."; MS_LOG(ERROR) << "Malloc data failed";
return RET_ERROR; return RET_ERROR;
} }
data_buffers_.emplace_back(buffer); data_buffers_.emplace_back(buffer);
input_shape[axis] = 1; input_shape[axis] = 1;
} }
return ret;
auto in_tensor = in_tensors_.front();
if (in_tensor->data_type() == kNumberTypeFloat32 || in_tensor->data_type() == kNumberTypeFloat) {
fp16_input_ =
reinterpret_cast<float16_t *>(context_->allocator->Malloc(in_tensor->ElementsNum() * sizeof(float16_t)));
if (fp16_input_ == nullptr) {
MS_LOG(ERROR) << "Malloc data failed";
return RET_ERROR;
}
}
return RET_OK;
} }
kernel::LiteKernel *CpuReduceFp16KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, kernel::LiteKernel *CpuReduceFp16KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
...@@ -235,6 +243,6 @@ kernel::LiteKernel *CpuMeanFp16KernelCreator(const std::vector<lite::tensor::Ten ...@@ -235,6 +243,6 @@ kernel::LiteKernel *CpuMeanFp16KernelCreator(const std::vector<lite::tensor::Ten
return kernel; return kernel;
} }
// REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Reduce, CpuReduceFp16KernelCreator) REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Reduce, CpuReduceFp16KernelCreator)
// REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Mean, CpuMeanFp16KernelCreator) REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Mean, CpuMeanFp16KernelCreator)
} // namespace mindspore::kernel } // namespace mindspore::kernel
...@@ -52,7 +52,7 @@ class ReduceFp16CPUKernel : public ReduceBaseCPUKernel { ...@@ -52,7 +52,7 @@ class ReduceFp16CPUKernel : public ReduceBaseCPUKernel {
private: private:
int MallocTmpBuffer(); int MallocTmpBuffer();
int FreeTmpBuffer(); void FreeTmpBuffer();
}; };
} // namespace mindspore::kernel } // namespace mindspore::kernel
......
...@@ -72,5 +72,5 @@ int ReshapeCPUKernel::Run() { ...@@ -72,5 +72,5 @@ int ReshapeCPUKernel::Run() {
context_->allocator->Free(input_ptr); context_->allocator->Free(input_ptr);
} }
return RET_OK; return RET_OK;
} // namespace mindspore::kernel }
} // namespace mindspore::kernel } // namespace mindspore::kernel
...@@ -140,5 +140,4 @@ kernel::LiteKernel *CpuSplitFp16KernelCreator(const std::vector<lite::tensor::Te ...@@ -140,5 +140,4 @@ kernel::LiteKernel *CpuSplitFp16KernelCreator(const std::vector<lite::tensor::Te
return kernel; return kernel;
} }
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Split, CpuSplitFp16KernelCreator) REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Split, CpuSplitFp16KernelCreator)
} // namespace mindspore::kernel } // namespace mindspore::kernel
...@@ -59,10 +59,19 @@ int TransposeFp16CPUKernel::ReSize() { ...@@ -59,10 +59,19 @@ int TransposeFp16CPUKernel::ReSize() {
param->out_strides_[i] = out_shape[i + 1] * param->out_strides_[i + 1]; param->out_strides_[i] = out_shape[i + 1] * param->out_strides_[i + 1];
} }
if (fp16_in_data_ != nullptr) { FreeFp16Buffer();
context_->allocator->Free(fp16_in_data_); auto ret = MallocFp16Buffer();
fp16_in_data_ = nullptr; if (ret != RET_OK) {
FreeFp16Buffer();
return ret;
} }
return RET_OK;
}
int TransposeFp16CPUKernel::MallocFp16Buffer() {
auto &in_tensor = in_tensors_.front();
auto &out_tensor = out_tensors_.front();
if (in_tensor->data_type() == kNumberTypeFloat || in_tensor->data_type() == kNumberTypeFloat32) { if (in_tensor->data_type() == kNumberTypeFloat || in_tensor->data_type() == kNumberTypeFloat32) {
fp16_in_data_ = fp16_in_data_ =
reinterpret_cast<float16_t *>(context_->allocator->Malloc(sizeof(float16_t) * in_tensor->ElementsNum())); reinterpret_cast<float16_t *>(context_->allocator->Malloc(sizeof(float16_t) * in_tensor->ElementsNum()));
...@@ -71,11 +80,6 @@ int TransposeFp16CPUKernel::ReSize() { ...@@ -71,11 +80,6 @@ int TransposeFp16CPUKernel::ReSize() {
return RET_ERROR; return RET_ERROR;
} }
} }
if (fp16_out_data_ != nullptr) {
context_->allocator->Free(fp16_out_data_);
fp16_out_data_ = nullptr;
}
if (out_tensor->data_type() == kNumberTypeFloat || out_tensor->data_type() == kNumberTypeFloat32) { if (out_tensor->data_type() == kNumberTypeFloat || out_tensor->data_type() == kNumberTypeFloat32) {
fp16_out_data_ = fp16_out_data_ =
reinterpret_cast<float16_t *>(context_->allocator->Malloc(sizeof(float16_t) * out_tensor->ElementsNum())); reinterpret_cast<float16_t *>(context_->allocator->Malloc(sizeof(float16_t) * out_tensor->ElementsNum()));
...@@ -87,6 +91,24 @@ int TransposeFp16CPUKernel::ReSize() { ...@@ -87,6 +91,24 @@ int TransposeFp16CPUKernel::ReSize() {
return RET_OK; return RET_OK;
} }
void TransposeFp16CPUKernel::FreeFp16Buffer() {
auto &in_tensor = in_tensors_.front();
auto &out_tensor = out_tensors_.front();
if (in_tensor->data_type() == kNumberTypeFloat || in_tensor->data_type() == kNumberTypeFloat32) {
if (fp16_in_data_ != nullptr) {
context_->allocator->Free(fp16_in_data_);
fp16_in_data_ = nullptr;
}
}
if (out_tensor->data_type() == kNumberTypeFloat || out_tensor->data_type() == kNumberTypeFloat32) {
if (fp16_out_data_ != nullptr) {
context_->allocator->Free(fp16_out_data_);
fp16_out_data_ = nullptr;
}
}
}
int TransposeFp16CPUKernel::TransposeParallel(int task_id) { int TransposeFp16CPUKernel::TransposeParallel(int task_id) {
int num_unit_thread = MSMIN(thread_h_stride_, num_unit_ - task_id * thread_h_stride_); int num_unit_thread = MSMIN(thread_h_stride_, num_unit_ - task_id * thread_h_stride_);
if (num_unit_thread <= 0) { if (num_unit_thread <= 0) {
...@@ -95,13 +117,6 @@ int TransposeFp16CPUKernel::TransposeParallel(int task_id) { ...@@ -95,13 +117,6 @@ int TransposeFp16CPUKernel::TransposeParallel(int task_id) {
int thread_offset = task_id * thread_h_stride_; int thread_offset = task_id * thread_h_stride_;
TransposeParameter *param = reinterpret_cast<TransposeParameter *>(this->op_parameter_); TransposeParameter *param = reinterpret_cast<TransposeParameter *>(this->op_parameter_);
if (in_tensors_.at(0)->data_type() == kNumberTypeFloat16) {
fp16_in_data_ = reinterpret_cast<float16_t *>(in_tensors_.at(0)->Data());
}
if (out_tensors_.at(0)->data_type() == kNumberTypeFloat16) {
fp16_out_data_ = reinterpret_cast<float16_t *>(out_tensors_.at(0)->Data());
}
auto ret = DoTranspose(fp16_in_data_, fp16_out_data_, in_shape_, out_shape_, param, thread_offset, auto ret = DoTranspose(fp16_in_data_, fp16_out_data_, in_shape_, out_shape_, param, thread_offset,
thread_offset + num_unit_thread); thread_offset + num_unit_thread);
if (ret != RET_OK) { if (ret != RET_OK) {
...@@ -109,12 +124,6 @@ int TransposeFp16CPUKernel::TransposeParallel(int task_id) { ...@@ -109,12 +124,6 @@ int TransposeFp16CPUKernel::TransposeParallel(int task_id) {
return RET_ERROR; return RET_ERROR;
} }
if (in_tensors_.at(0)->data_type() == kNumberTypeFloat32 || in_tensors_.at(0)->data_type() == kNumberTypeFloat) {
context_->allocator->Free(fp16_in_data_);
}
if (out_tensors_.at(0)->data_type() == kNumberTypeFloat32 || out_tensors_.at(0)->data_type() == kNumberTypeFloat) {
context_->allocator->Free(fp16_out_data_);
}
return RET_OK; return RET_OK;
} }
...@@ -139,7 +148,8 @@ int TransposeFp16CPUKernel::Run() { ...@@ -139,7 +148,8 @@ int TransposeFp16CPUKernel::Run() {
auto &in_tensor = in_tensors_.front(); auto &in_tensor = in_tensors_.front();
auto &out_tensor = out_tensors_.front(); auto &out_tensor = out_tensors_.front();
if (in_tensor == nullptr || out_tensor == nullptr) { if (in_tensor == nullptr || out_tensor == nullptr) {
MS_LOG(ERROR) << "null pointer dreferencing."; MS_LOG(ERROR) << "null pointer referencing.";
FreeFp16Buffer();
return RET_ERROR; return RET_ERROR;
} }
...@@ -159,23 +169,15 @@ int TransposeFp16CPUKernel::Run() { ...@@ -159,23 +169,15 @@ int TransposeFp16CPUKernel::Run() {
ret = LiteBackendParallelLaunch(TransposeRun, this, thread_h_num_); ret = LiteBackendParallelLaunch(TransposeRun, this, thread_h_num_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "Tranpose error error_code[" << ret << "]"; MS_LOG(ERROR) << "Tranpose error error_code[" << ret << "]";
FreeFp16Buffer();
return ret; return ret;
} }
if (in_tensor->data_type() == kNumberTypeFloat || in_tensor->data_type() == kNumberTypeFloat32) {
context_->allocator->Free(fp16_in_data_);
fp16_in_data_ = nullptr;
}
if (out_tensor->data_type() == kNumberTypeFloat || out_tensor->data_type() == kNumberTypeFloat32) { if (out_tensor->data_type() == kNumberTypeFloat || out_tensor->data_type() == kNumberTypeFloat32) {
out_data_ = reinterpret_cast<float *>(out_tensor->Data()); out_data_ = reinterpret_cast<float *>(out_tensor->Data());
if (out_data_ == nullptr) {
return RET_ERROR;
}
Float16ToFloat32(fp16_out_data_, out_data_, out_tensor->ElementsNum()); Float16ToFloat32(fp16_out_data_, out_data_, out_tensor->ElementsNum());
context_->allocator->Free(fp16_out_data_);
fp16_out_data_ = nullptr;
} }
FreeFp16Buffer();
return ret; return ret;
} }
...@@ -206,5 +208,5 @@ kernel::LiteKernel *CpuTransposeFp16KernelCreator(const std::vector<lite::tensor ...@@ -206,5 +208,5 @@ kernel::LiteKernel *CpuTransposeFp16KernelCreator(const std::vector<lite::tensor
return kernel; return kernel;
} }
// REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Transpose, CpuTransposeFp16KernelCreator) REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Transpose, CpuTransposeFp16KernelCreator)
} // namespace mindspore::kernel } // namespace mindspore::kernel
...@@ -36,6 +36,8 @@ class TransposeFp16CPUKernel : public LiteKernel { ...@@ -36,6 +36,8 @@ class TransposeFp16CPUKernel : public LiteKernel {
int ReSize() override; int ReSize() override;
int Run() override; int Run() override;
int TransposeParallel(int task_id); int TransposeParallel(int task_id);
void FreeFp16Buffer();
int MallocFp16Buffer();
private: private:
int thread_num_; int thread_num_;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册