提交 2b895074 编写于 作者: Z zhaozhenlong

pad int8 add multi thread

上级 1b63c76c
......@@ -111,18 +111,35 @@ int PadInt8CPUKernel::Init() {
return RET_OK;
}
int PadInt8CPUKernel::RunImpl(int task_id) {
return PadConstant4D(in_data_, out_data_, in_dims_, out_dims_, pad_param_->paddings_, task_id, context_->thread_num_);
}
int PadInt8Impl(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
auto resize = reinterpret_cast<PadInt8CPUKernel *>(cdata);
auto error_code = resize->RunImpl(task_id);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "Resize Run error task_id[" << task_id << "] error_code[" << error_code << "]";
return RET_ERROR;
}
return RET_OK;
}
int PadInt8CPUKernel::Run() {
auto ret = Prepare();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Prepare failed.";
return RET_ERROR;
}
int8_t *in_data = reinterpret_cast<int8_t *>(in_tensors_[0]->Data());
int8_t *out_data = reinterpret_cast<int8_t *>(out_tensors_[0]->Data());
in_data_ = reinterpret_cast<int8_t *>(in_tensors_[0]->Data());
out_data_ = reinterpret_cast<int8_t *>(out_tensors_[0]->Data());
memset(out_data, pad_param_->pad_quant_arg_.constant_value_[0], out_tensors_[0]->ElementsNum() * sizeof(int8_t));
PadConstant4D(in_data, out_data, in_dims_, out_dims_, pad_param_->paddings_);
memset(out_data_, pad_param_->pad_quant_arg_.constant_value_[0], out_tensors_[0]->ElementsNum() * sizeof(int8_t));
int error_code = LiteBackendParallelLaunch(PadInt8Impl, this, context_->thread_num_);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "Resize run error, error_code[" << error_code << "]";
return RET_ERROR;
}
return RET_OK;
}
} // namespace mindspore::kernel
......@@ -38,6 +38,7 @@ class PadInt8CPUKernel : public LiteKernel {
int Init() override;
int ReSize() override;
int Run() override;
int RunImpl(int task_id);
private:
int SetQuantParam();
......@@ -46,6 +47,8 @@ class PadInt8CPUKernel : public LiteKernel {
private:
PadParameter *pad_param_;
int8_t *in_data_;
int8_t *out_data_;
int in_dims_[DEFAULT_PAD_NDIMS];
int out_dims_[DEFAULT_PAD_NDIMS];
};
......
......@@ -16,12 +16,13 @@
#include "nnacl/int8/pad.h"
#include "nnacl/common_func.h"
#include "nnacl/errorcode.h"
void PadConstant4D(const int8_t *in_data, int8_t *out_data, const int32_t *in_dims, const int32_t *out_dims,
const int32_t *paddings) {
int PadConstant4D(const int8_t *in_data, int8_t *out_data, const int32_t *in_dims, const int32_t *out_dims,
const int32_t *paddings, const int tid, const int thread_num) {
int32_t copy_size = in_dims[3];
for (int n = 0; n < in_dims[0]; n++) {
for (int h = 0; h < in_dims[1]; h++) {
for (int h = tid; h < in_dims[1]; h += thread_num) {
for (int w = 0; w < in_dims[2]; w++) {
const int8_t *in = in_data + offset(in_dims, n, h, w, 0);
int8_t *out = out_data + offset(out_dims, n + paddings[0], h + paddings[2], w + paddings[4], paddings[6]);
......@@ -29,5 +30,5 @@ void PadConstant4D(const int8_t *in_data, int8_t *out_data, const int32_t *in_di
}
}
}
return;
return NNACL_OK;
}
......@@ -24,8 +24,8 @@
#ifdef __cplusplus
extern "C" {
#endif
void PadConstant4D(const int8_t *in_data, int8_t *out_data, const int32_t *in_dims, const int32_t *out_dims,
const int32_t *paddings);
int PadConstant4D(const int8_t *in_data, int8_t *out_data, const int32_t *in_dims, const int32_t *out_dims,
const int32_t *paddings, const int tid, const int thread_num);
#ifdef __cplusplus
}
#endif
......
......@@ -183,6 +183,7 @@ TEST_F(TestPadInt8, PadInt8TestInit4) {
std::vector<lite::tensor::Tensor *> outputs_;
auto pad_param = new PadParameter();
lite::Context *ctx = new lite::Context;
ctx->thread_num_ = 2;
int8_t *correct;
int total_size = PadInt8TestInit2(&inputs_, &outputs_, pad_param, &correct);
kernel::PadInt8CPUKernel *pad =
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册