提交 4975b4d5 编写于 作者: M mindspore-ci-bot 提交者: Gitee

!4593 fp16 ops check input data type

Merge pull request !4593 from zhaozhenlong/lite/issue/transpose_mean_concat_fp16_fp32
......@@ -42,35 +42,47 @@ int ConcatFp16CPUKernel::Init() {
}
int ConcatFp16CPUKernel::ReSize() {
for (auto ptr : fp16_inputs_) {
if (ptr != nullptr) {
free(ptr);
ptr = nullptr;
FreeTmpBuffer();
for (const auto &in_tensor : in_tensors_) {
float16_t *ptr = nullptr;
if (in_tensor->data_type() == kNumberTypeFloat32 || in_tensor->data_type() == kNumberTypeFloat) {
ptr = reinterpret_cast<float16_t *>(context_->allocator->Malloc(sizeof(float16_t) * in_tensor->ElementsNum()));
if (ptr == nullptr) {
MS_LOG(ERROR) << "malloc failed";
return RET_ERROR;
}
}
fp16_inputs_.push_back(ptr);
}
fp16_inputs_.clear();
for (size_t i = 0; i < in_tensors_.size(); ++i) {
float16_t *ptr = nullptr;
ptr = reinterpret_cast<float16_t *>(malloc(sizeof(float16_t) * in_tensors_[i]->ElementsNum()));
if (ptr == nullptr) {
auto &out_tensor = out_tensors_.at(0);
if (out_tensor->data_type() == kNumberTypeFloat32 || out_tensor->data_type() == kNumberTypeFloat) {
if (fp16_output_ != nullptr) {
context_->allocator->Free(fp16_output_);
fp16_output_ = nullptr;
}
fp16_output_ =
reinterpret_cast<float16_t *>(context_->allocator->Malloc(sizeof(float16_t) * out_tensors_[0]->ElementsNum()));
if (fp16_output_ == nullptr) {
MS_LOG(ERROR) << "malloc failed";
return RET_ERROR;
}
fp16_inputs_.push_back(ptr);
}
if (fp16_output_ != nullptr) {
free(fp16_output_);
fp16_output_ = nullptr;
}
fp16_output_ = reinterpret_cast<float16_t *>(malloc(sizeof(float16_t) * out_tensors_[0]->ElementsNum()));
if (fp16_output_ == nullptr) {
MS_LOG(ERROR) << "malloc failed";
return RET_ERROR;
}
return ConcatBaseCPUKernel::ReSize();
}
void ConcatFp16CPUKernel::FreeTmpBuffer() {
for (auto ptr : fp16_inputs_) {
if (ptr != nullptr) {
context_->allocator->Free(ptr);
ptr = nullptr;
}
}
fp16_inputs_.clear();
}
int ConcatFp16CPUKernel::Run() {
auto prepare_ret = Prepare();
if (prepare_ret != RET_OK) {
......@@ -78,28 +90,53 @@ int ConcatFp16CPUKernel::Run() {
return prepare_ret;
}
auto input_num = in_tensors_.size();
std::vector<float *> inputs_addr(input_num, nullptr);
std::vector<int *> inputs_output_shape(input_num + 1, nullptr);
std::vector<std::vector<int>> shapes;
for (size_t i = 0; i < input_num; ++i) {
inputs_addr[i] = reinterpret_cast<float *>(in_tensors_[i]->Data());
if (inputs_addr[i] == nullptr) {
MS_LOG(ERROR) << "got nullptr when cast in_tensor to float ptr";
return RET_ERROR;
const auto in_tensor = in_tensors_[i];
if (in_tensor->data_type() == kNumberTypeFloat || in_tensor->data_type() == kNumberTypeFloat32) {
auto in_tensor_data = reinterpret_cast<float *>(in_tensor->Data());
if (in_tensor_data == nullptr) {
MS_LOG(ERROR) << "got nullptr when cast in_tensor to float ptr";
return RET_ERROR;
}
Float32ToFloat16(in_tensor_data, fp16_inputs_[i], in_tensor->ElementsNum());
} else {
fp16_inputs_[i] = reinterpret_cast<float16_t *>(in_tensor->Data());
}
Float32ToFloat16(inputs_addr[i], fp16_inputs_[i], in_tensors_[i]->ElementsNum());
shapes.push_back(in_tensors_[i]->shape());
inputs_output_shape[i] = shapes[i].data();
}
auto output_shape = out_tensors_.at(0)->shape();
inputs_output_shape[input_num] = output_shape.data();
auto output_addr = out_tensors_.at(0)->Data();
if (out_tensors_.at(0)->data_type() == kNumberTypeFloat16) {
fp16_output_ = reinterpret_cast<float16_t *>(out_tensors_.at(0)->Data());
}
ConcatFp16(reinterpret_cast<void **>(fp16_inputs_.data()), input_num, axis_, inputs_output_shape.data(),
output_shape.size(), reinterpret_cast<void *>(fp16_output_));
Float16ToFloat32(fp16_output_, reinterpret_cast<float *>(output_addr), out_tensors_.at(0)->ElementsNum());
// free fp16 in out buffer
if (out_tensors_.at(0)->data_type() == kNumberTypeFloat32 || out_tensors_.at(0)->data_type() == kNumberTypeFloat) {
Float16ToFloat32(fp16_output_, reinterpret_cast<float *>(output_addr), out_tensors_.at(0)->ElementsNum());
context_->allocator->Free(fp16_output_);
fp16_output_ = nullptr;
}
for (auto i = 0; i < fp16_inputs_.size(); i++) {
const auto in_tensor = in_tensors_[i];
if (in_tensor->data_type() == kNumberTypeFloat || in_tensor->data_type() == kNumberTypeFloat32) {
auto ptr = fp16_inputs_[i];
if (ptr != nullptr) {
context_->allocator->Free(ptr);
ptr = nullptr;
}
}
}
fp16_inputs_.clear();
return RET_OK;
}
......
......@@ -32,13 +32,7 @@ class ConcatFp16CPUKernel : public ConcatBaseCPUKernel {
const lite::Primitive *primitive)
: ConcatBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
~ConcatFp16CPUKernel() {
for (auto ptr : fp16_inputs_) {
if (ptr != nullptr) {
free(ptr);
}
}
}
~ConcatFp16CPUKernel() = default;
int Init() override;
......@@ -46,6 +40,9 @@ class ConcatFp16CPUKernel : public ConcatBaseCPUKernel {
int Run() override;
private:
void FreeTmpBuffer();
private:
std::vector<float16_t *> fp16_inputs_;
float16_t *fp16_output_ = nullptr;
......
......@@ -59,14 +59,17 @@ int ReduceFp16CPUKernel::Init() {
int ReduceFp16CPUKernel::ReSize() {
if (fp16_input_ != nullptr) {
free(fp16_input_);
context_->allocator->Free(fp16_input_);
fp16_input_ = nullptr;
}
auto ele_num = in_tensors_.at(0)->ElementsNum();
fp16_input_ = reinterpret_cast<float16_t *>(malloc(sizeof(float16_t) * ele_num));
if (fp16_input_ == nullptr) {
MS_LOG(ERROR) << "malloc fp16_src_data_ falied";
return RET_ERROR;
auto in_tensor = in_tensors_.front();
if (in_tensor->data_type() == kNumberTypeFloat32 || in_tensor->data_type() == kNumberTypeFloat) {
fp16_input_ =
reinterpret_cast<float16_t *>(context_->allocator->Malloc(in_tensor->ElementsNum() * sizeof(float16_t)));
if (fp16_input_ == nullptr) {
return RET_ERROR;
}
Float32ToFloat16(reinterpret_cast<float *>(in_tensor->Data()), fp16_input_, in_tensor->ElementsNum());
}
return MallocTmpBuffer();
}
......@@ -93,10 +96,12 @@ int ReduceFp16CPUKernel::Run() {
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
return prepare_ret;
}
tmp_shape_ = in_tensors_.at(0)->shape();
src_data_ = static_cast<float *>(in_tensors_.at(0)->Data());
auto ele_num = in_tensors_.at(0)->ElementsNum();
Float32ToFloat16(src_data_, fp16_input_, ele_num);
auto in_tensor = in_tensors_.at(0);
if (in_tensor->data_type() == kNumberTypeFloat16) {
fp16_input_ = reinterpret_cast<float16_t *>(in_tensor->Data());
}
fp16_src_data_ = fp16_input_;
for (int i = 0; i < data_buffers_.size(); ++i) {
fp16_dst_data_ = data_buffers_[i];
......@@ -119,19 +124,36 @@ int ReduceFp16CPUKernel::Run() {
fp16_src_data_ = fp16_dst_data_;
}
dst_data_ = reinterpret_cast<float *>(out_tensors_.at(0)->Data());
Float16ToFloat32(fp16_dst_data_, dst_data_, out_tensors_.at(0)->ElementsNum());
auto out_tensor = out_tensors_.at(0);
if (out_tensor->data_type() == kNumberTypeFloat32 || out_tensor->data_type() == kNumberTypeFloat) {
dst_data_ = reinterpret_cast<float *>(out_tensor->Data());
Float16ToFloat32(fp16_dst_data_, dst_data_, out_tensor->ElementsNum());
} else {
memcpy(out_tensor->Data(), fp16_dst_data_, out_tensor->ElementsNum() * sizeof(float16_t));
}
if (in_tensor->data_type() == kNumberTypeFloat32 || in_tensor->data_type() == kNumberTypeFloat) {
context_->allocator->Free(fp16_input_);
}
fp16_input_ = nullptr;
FreeTmpBuffer();
return RET_OK;
}
int ReduceFp16CPUKernel::MallocTmpBuffer() {
int ReduceFp16CPUKernel::FreeTmpBuffer() {
for (auto buffer : data_buffers_) {
if (buffer != nullptr) {
free(buffer);
context_->allocator->Free(buffer);
buffer = nullptr;
}
}
data_buffers_.clear();
return RET_OK;
}
int ReduceFp16CPUKernel::MallocTmpBuffer() {
auto ret = FreeTmpBuffer();
auto input_shape = in_tensors_.at(0)->shape();
for (auto i = 0; i < num_axes_; i++) {
......@@ -142,7 +164,7 @@ int ReduceFp16CPUKernel::MallocTmpBuffer() {
size *= input_shape[j];
}
}
float16_t *buffer = reinterpret_cast<float16_t *>(malloc(size * sizeof(float16_t)));
float16_t *buffer = reinterpret_cast<float16_t *>(context_->allocator->Malloc(size * sizeof(float16_t)));
if (buffer == nullptr) {
MS_LOG(ERROR) << "Malloc data failed.";
return RET_ERROR;
......@@ -150,7 +172,7 @@ int ReduceFp16CPUKernel::MallocTmpBuffer() {
data_buffers_.emplace_back(buffer);
input_shape[axis] = 1;
}
return RET_OK;
return ret;
}
kernel::LiteKernel *CpuReduceFp16KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
......
......@@ -34,21 +34,7 @@ class ReduceFp16CPUKernel : public ReduceBaseCPUKernel {
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: ReduceBaseCPUKernel(param, inputs, outputs, ctx, primitive) {}
~ReduceFp16CPUKernel() {
for (auto i = 0; i < data_buffers_.size(); i++) {
float16_t *buffer = data_buffers_[i];
if (buffer != nullptr) {
free(buffer);
buffer = nullptr;
}
}
if (fp16_input_ != nullptr) {
free(fp16_input_);
fp16_input_ = nullptr;
}
src_data_ = nullptr;
dst_data_ = nullptr;
}
~ReduceFp16CPUKernel() = default;
int Init() override;
int ReSize() override;
......@@ -66,6 +52,7 @@ class ReduceFp16CPUKernel : public ReduceBaseCPUKernel {
private:
int MallocTmpBuffer();
int FreeTmpBuffer();
};
} // namespace mindspore::kernel
......
......@@ -46,29 +46,44 @@ int TransposeFp16CPUKernel::Init() {
}
int TransposeFp16CPUKernel::ReSize() {
auto &inTensor = in_tensors_.front();
auto &outTensor = out_tensors_.front();
auto &in_tensor = in_tensors_.front();
auto &out_tensor = out_tensors_.front();
auto param = reinterpret_cast<TransposeParameter *>(op_parameter_);
auto in_shape = inTensor->shape();
auto out_shape = outTensor->shape();
auto in_shape = in_tensor->shape();
auto out_shape = out_tensor->shape();
param->strides_[param->num_axes_ - 1] = 1;
param->out_strides_[param->num_axes_ - 1] = 1;
param->data_size_ = inTensor->Size();
param->data_size_ = in_tensor->Size();
for (int i = param->num_axes_ - 2; i >= 0; i--) {
param->strides_[i] = in_shape[i + 1] * param->strides_[i + 1];
param->out_strides_[i] = out_shape[i + 1] * param->out_strides_[i + 1];
}
if (fp16_in_data_ != nullptr) {
free(fp16_in_data_);
context_->allocator->Free(fp16_in_data_);
fp16_in_data_ = nullptr;
}
fp16_in_data_ = reinterpret_cast<float16_t *>(malloc(sizeof(float16_t) * inTensor->ElementsNum()));
if (in_tensor->data_type() == kNumberTypeFloat || in_tensor->data_type() == kNumberTypeFloat32) {
fp16_in_data_ =
reinterpret_cast<float16_t *>(context_->allocator->Malloc(sizeof(float16_t) * in_tensor->ElementsNum()));
if (fp16_in_data_ == nullptr) {
MS_LOG(ERROR) << "Malloc data failed";
return RET_ERROR;
}
}
if (fp16_out_data_ != nullptr) {
free(fp16_out_data_);
context_->allocator->Free(fp16_out_data_);
fp16_out_data_ = nullptr;
}
fp16_out_data_ = reinterpret_cast<float16_t *>(malloc(sizeof(float16_t) * outTensor->ElementsNum()));
if (out_tensor->data_type() == kNumberTypeFloat || out_tensor->data_type() == kNumberTypeFloat32) {
fp16_out_data_ =
reinterpret_cast<float16_t *>(context_->allocator->Malloc(sizeof(float16_t) * out_tensor->ElementsNum()));
if (fp16_out_data_ == nullptr) {
MS_LOG(ERROR) << "Malloc data failed";
return RET_ERROR;
}
}
return RET_OK;
}
......@@ -79,12 +94,27 @@ int TransposeFp16CPUKernel::TransposeParallel(int task_id) {
}
int thread_offset = task_id * thread_h_stride_;
TransposeParameter *param = reinterpret_cast<TransposeParameter *>(this->op_parameter_);
if (in_tensors_.at(0)->data_type() == kNumberTypeFloat16) {
fp16_in_data_ = reinterpret_cast<float16_t *>(in_tensors_.at(0)->Data());
}
if (out_tensors_.at(0)->data_type() == kNumberTypeFloat16) {
fp16_out_data_ = reinterpret_cast<float16_t *>(out_tensors_.at(0)->Data());
}
auto ret = DoTranspose(fp16_in_data_, fp16_out_data_, in_shape_, out_shape_, param, thread_offset,
thread_offset + num_unit_thread);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Transpose error task_id[" << task_id << "] error_code[" << ret << "]";
return RET_ERROR;
}
if (in_tensors_.at(0)->data_type() == kNumberTypeFloat32 || in_tensors_.at(0)->data_type() == kNumberTypeFloat) {
context_->allocator->Free(fp16_in_data_);
}
if (out_tensors_.at(0)->data_type() == kNumberTypeFloat32 || out_tensors_.at(0)->data_type() == kNumberTypeFloat) {
context_->allocator->Free(fp16_out_data_);
}
return RET_OK;
}
......@@ -112,9 +142,17 @@ int TransposeFp16CPUKernel::Run() {
MS_LOG(ERROR) << "null pointer dreferencing.";
return RET_ERROR;
}
in_data_ = reinterpret_cast<float *>(in_tensor->Data());
out_data_ = reinterpret_cast<float *>(out_tensor->Data());
Float32ToFloat16(in_data_, fp16_in_data_, in_tensor->ElementsNum());
if (in_tensor->data_type() == kNumberTypeFloat || in_tensor->data_type() == kNumberTypeFloat32) {
in_data_ = reinterpret_cast<float *>(in_tensor->Data());
Float32ToFloat16(in_data_, fp16_in_data_, in_tensor->ElementsNum());
} else {
fp16_in_data_ = reinterpret_cast<float16_t *>(in_tensor->Data());
}
if (out_tensor->data_type() == kNumberTypeFloat16) {
fp16_out_data_ = reinterpret_cast<float16_t *>(out_tensor->Data());
}
in_shape_ = const_cast<int *>(in_tensor->shape().data());
out_shape_ = const_cast<int *>(out_tensor->shape().data());
......@@ -123,9 +161,24 @@ int TransposeFp16CPUKernel::Run() {
MS_LOG(ERROR) << "Tranpose error error_code[" << ret << "]";
return ret;
}
Float16ToFloat32(fp16_out_data_, out_data_, out_tensor->ElementsNum());
if (in_tensor->data_type() == kNumberTypeFloat || in_tensor->data_type() == kNumberTypeFloat32) {
context_->allocator->Free(fp16_in_data_);
fp16_in_data_ = nullptr;
}
if (out_tensor->data_type() == kNumberTypeFloat || out_tensor->data_type() == kNumberTypeFloat32) {
out_data_ = reinterpret_cast<float *>(out_tensor->Data());
if (out_data_ == nullptr) {
return RET_ERROR;
}
Float16ToFloat32(fp16_out_data_, out_data_, out_tensor->ElementsNum());
context_->allocator->Free(fp16_out_data_);
fp16_out_data_ = nullptr;
}
return ret;
} // namespace mindspore::kernel
}
kernel::LiteKernel *CpuTransposeFp16KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
......
......@@ -30,16 +30,7 @@ class TransposeFp16CPUKernel : public LiteKernel {
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(param, inputs, outputs, ctx, primitive), thread_num_(ctx->thread_num_) {}
~TransposeFp16CPUKernel() {
if (fp16_in_data_ != nullptr) {
free(fp16_in_data_);
fp16_in_data_ = nullptr;
}
if (fp16_out_data_ != nullptr) {
free(fp16_out_data_);
fp16_out_data_ = nullptr;
}
}
~TransposeFp16CPUKernel() = default;
int Init() override;
int ReSize() override;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册