kernels support resize

2aaba750 · chenjianping · 10be5005 · 2aaba750 · 2aaba750 · 2aaba750
24 changed file
--- a/mindspore/lite/src/lite_session.cc
+++ b/mindspore/lite/src/lite_session.cc
@@ -219,7 +219,6 @@ std::vector<mindspore::tensor::MSTensor *> LiteSession::GetInputs() const {
 int LiteSession::RunGraph(const session::KernelCallBack &before, const session::KernelCallBack &after) {
  MS_EXCEPTION_IF_NULL(this->context_);
  SetMaxWokerNum(context_->thread_num_);
-  context_->running_ = true;
  if (before == nullptr && after == nullptr) {
    return executor->Run(this->inputs_, this->outputs_, this->kernels_, this->context_->allocator.get());
  } else {
@@ -333,19 +332,21 @@ int LiteSession::ResizeInputs(const std::vector<mindspore::tensor::MSTensor *> &
 }

 int LiteSession::Resize(const std::vector<mindspore::tensor::MSTensor *> &inputs) {
-  inputs_old_.clear();
-  inputs_old_ = inputs_;
+  std::vector<tensor::Tensor *> inputs_old(inputs_);
  auto ret = ResizeInputs(inputs);
  if (ret != RET_OK) {
-    inputs_ = inputs_old_;
+    inputs_ = inputs_old;
    return ret;
  }

  Scheduler scheduler(context_);
  ret = scheduler.ReSizeKernels(kernels_);
  if (ret != RET_OK) {
-    inputs_ = inputs_old_;
-    scheduler.ReSizeKernels(kernels_);
+    inputs_ = inputs_old;
+    auto resize_ret = scheduler.ReSizeKernels(kernels_);
+    if (resize_ret != RET_OK) {
+      MS_LOG(ERROR) << "restore kernel size fail!ret: " << resize_ret;
+    }
    return ret;
  }
  return RET_OK;

--- a/mindspore/lite/src/lite_session.h
+++ b/mindspore/lite/src/lite_session.h
@@ -79,7 +79,6 @@ class LiteSession : public session::LiteSession {
  std::vector<tensor::Tensor *> tensors_;
  // graph input tensors
  std::vector<tensor::Tensor *> inputs_;
-  std::vector<tensor::Tensor *> inputs_old_;
  // graph output tensors
  std::vector<tensor::Tensor *> outputs_;
  // graph input MSTensors

--- a/mindspore/lite/src/runtime/kernel/arm/base/reduce_base.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/reduce_base.cc
@@ -98,14 +98,14 @@ int ReduceBaseCPUKernel::Init() {
  if (ret != RET_OK) {
    return ret;
  }
-  ret = CheckParameters();
-  if (ret != RET_OK) {
-    return ret;
-  }

  return RET_OK;
 }

+int ReduceBaseCPUKernel::ReSize() {
+  return CheckParameters();
+}
+
 kernel::LiteKernel *CpuReduceFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
                                               const std::vector<lite::tensor::Tensor *> &outputs,
                                               OpParameter *opParameter, const lite::Context *ctx,

--- a/mindspore/lite/src/runtime/kernel/arm/base/reduce_base.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/reduce_base.h
@@ -32,7 +32,7 @@ class ReduceBaseCPUKernel : public LiteKernel {
  virtual ~ReduceBaseCPUKernel() = default;

  int Init() override;
-  int ReSize() override { return 0; };
+  int ReSize() override;

 private:
  int CheckInputsOutputs();

--- a/mindspore/lite/src/runtime/kernel/arm/fp16/reduce_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/reduce_fp16.cc
@@ -59,7 +59,11 @@ int ReduceFp16CPUKernel::Init() {

 int ReduceFp16CPUKernel::ReSize() {
  FreeTmpBuffer();
-  auto ret = MallocTmpBuffer();
+  auto ret = ReduceBaseCPUKernel::ReSize();
+  if (ret != RET_OK) {
+    return ret;
+  }
+  ret = MallocTmpBuffer();
  if (ret != RET_OK) {
    FreeTmpBuffer();
    return ret;

--- a/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm.cc
@@ -60,11 +60,21 @@ int BatchnormCPUKernel::InitConstTensor() {
 }

 int BatchnormCPUKernel::Init() {
-  if (context_->infer_shape_interrupt_ && !context_->running_) {
-    set_need_reinit();
+  if (!InferShapeDone()) {
    return RET_OK;
  }
+  return ReSize();
+}

+int BatchnormCPUKernel::ReSize() {
+  if (mean_addr_ != nullptr) {
+    free(mean_addr_);
+    mean_addr_ = nullptr;
+  }
+  if (var_addr_ != nullptr) {
+    free(var_addr_);
+    var_addr_ = nullptr;
+  }
  auto input_shapes = in_tensors_[0]->shape();
  auto n_dim = input_shapes.size();
  batchnorm_param_->channel_ = input_shapes[n_dim - 1];
@@ -83,15 +93,6 @@ int BatchnormCPUKernel::Init() {
  return RET_OK;
 }

-int BatchnormCPUKernel::ReSize() {
-  auto input_shapes = in_tensors_[0]->shape();
-  batchnorm_param_->unit_ = 1;
-  for (int i = 0; i < input_shapes.size() - 1; i++) {
-    batchnorm_param_->unit_ *= input_shapes[i];
-  }
-  return RET_OK;
-}
-
 int BatchnormCPUKernel::DoExecute(int task_id) {
  BatchNorm(out_addr_, in_addr_, mean_addr_, var_addr_, task_id, batchnorm_param_);
  return RET_OK;

--- a/mindspore/lite/src/runtime/kernel/arm/fp32/fullconnection.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/fullconnection.cc
@@ -16,7 +16,6 @@

 #include "src/runtime/kernel/arm/fp32/fullconnection.h"
 #include "src/runtime/runtime_api.h"
-
 using mindspore::lite::RET_ERROR;
 using mindspore::lite::RET_MEMORY_FAILED;
 using mindspore::lite::RET_OK;
@@ -48,15 +47,6 @@ void FullconnectionCPUKernel::FreeBuf() {

 int FullconnectionCPUKernel::ReSize() {
  FreeBuf();
-  Init();
-  return RET_OK;
-}
-
-int FullconnectionCPUKernel::Init() {
-  if (context_->infer_shape_interrupt_ && !context_->running_) {
-    set_need_reinit();
-    return RET_OK;
-  }
  fc_param_->row_ = (in_tensors_[0]->shape())[0];
  fc_param_->col_ = (in_tensors_[1]->shape())[0];
  fc_param_->deep_ = (in_tensors_[1]->shape())[1];
@@ -81,12 +71,14 @@ int FullconnectionCPUKernel::Init() {

  b_r8_ptr_ = reinterpret_cast<float *>(malloc(fc_param_->col_8_ * fc_param_->deep_ * sizeof(float)));
  if (b_r8_ptr_ == nullptr) {
+    FreeBuf();
    return RET_MEMORY_FAILED;
  }
  memset(b_r8_ptr_, 0, fc_param_->col_8_ * fc_param_->deep_ * sizeof(float));

  c_r8x8_ptr_ = reinterpret_cast<float *>(malloc(fc_param_->row_8_ * fc_param_->col_8_ * sizeof(float)));
  if (c_r8x8_ptr_ == nullptr) {
+    FreeBuf();
    return RET_MEMORY_FAILED;
  }
  memset(c_r8x8_ptr_, 0, fc_param_->row_8_ * fc_param_->col_8_ * sizeof(float));
@@ -98,6 +90,13 @@ int FullconnectionCPUKernel::Init() {
  return RET_OK;
 }

+int FullconnectionCPUKernel::Init() {
+  if (!InferShapeDone()) {
+    return RET_OK;
+  }
+  return ReSize();
+}
+
 void FullconnectionCPUKernel::InitMatrixA(float *src_ptr, float *dst_ptr) {
  if (fc_param_->a_const_ == true) {
    return;

--- a/mindspore/lite/src/runtime/kernel/arm/fp32/fullconnection.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/fullconnection.h
@@ -47,10 +47,10 @@ class FullconnectionCPUKernel : public FullconnectionBaseCPUKernel {
  void InitMatrixB(float *src_ptr, float *dst_ptr);

 private:
-  float *a_c8_ptr_;
-  float *b_r8_ptr_;
-  float *c_r8x8_ptr_;
-  float *bias_ptr_;
+  float *a_c8_ptr_ = nullptr;
+  float *b_r8_ptr_ = nullptr;
+  float *c_r8x8_ptr_ = nullptr;
+  float *bias_ptr_ = nullptr;
 };
 }  // namespace mindspore::kernel
 #endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_FULLCONNECTION_H_
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm.cc
@@ -29,7 +29,9 @@ using mindspore::lite::RET_OK;
 using mindspore::schema::PrimitiveType_FusedBatchNorm;

 namespace mindspore::kernel {
-FusedBatchnormCPUKernel::~FusedBatchnormCPUKernel() {
+FusedBatchnormCPUKernel::~FusedBatchnormCPUKernel() { FreeTmpBuffer(); }
+
+void FusedBatchnormCPUKernel::FreeTmpBuffer() {
  if (scale_addr_ != nullptr) {
    free(scale_addr_);
    scale_addr_ = nullptr;
@@ -84,10 +86,14 @@ int FusedBatchnormCPUKernel::InitConstTensor() {
 }

 int FusedBatchnormCPUKernel::Init() {
-  if (context_->infer_shape_interrupt_ && !context_->running_) {
-    set_need_reinit();
+  if (!InferShapeDone()) {
    return RET_OK;
  }
+  return ReSize();
+}
+
+int FusedBatchnormCPUKernel::ReSize() {
+  FreeTmpBuffer();
  auto input_shapes = in_tensors_[0]->shape();
  auto n_dim = input_shapes.size();
  batchnorm_param_->channel_ = input_shapes[n_dim - 1];
@@ -106,15 +112,6 @@ int FusedBatchnormCPUKernel::Init() {
  return RET_OK;
 }

-int FusedBatchnormCPUKernel::ReSize() {
-  auto input_shapes = in_tensors_[0]->shape();
-  batchnorm_param_->unit_ = 1;
-  for (int i = 0; i < input_shapes.size() - 1; i++) {
-    batchnorm_param_->unit_ *= input_shapes[i];
-  }
-  return RET_OK;
-}
-
 int FusedBatchnormCPUKernel::Execute(int task_id) {
  FusedBatchNorm(out_addr_, in_addr_, scale_addr_, offset_addr_, mean_addr_, var_addr_, task_id, batchnorm_param_);
  return RET_OK;
@@ -149,13 +146,16 @@ int FusedBatchnormCPUKernel::Run() {

 kernel::LiteKernel *CpuFusedBatchnormKernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
                                                   const std::vector<lite::tensor::Tensor *> &outputs,
-                                                   OpParameter *opParameter, const lite::Context *ctx,
+                                                   OpParameter *op_parameter, const lite::Context *ctx,
                                                   const kernel::KernelKey &desc,
                                                   const mindspore::lite::PrimitiveC *primitive) {
-  MS_ASSERT(opParameter != nullptr);
+  if (op_parameter == nullptr) {
+    MS_LOG(ERROR) << "Input parameter is nullptr!";
+    return nullptr;
+  }
  MS_ASSERT(desc.type == schema::PrimitiveType_FusedBatchNorm);
  FusedBatchnormCPUKernel *kernel =
-    new (std::nothrow) FusedBatchnormCPUKernel(opParameter, inputs, outputs, ctx, primitive);
+    new (std::nothrow) FusedBatchnormCPUKernel(op_parameter, inputs, outputs, ctx, primitive);
  if (kernel == nullptr) {
    MS_LOG(ERROR) << "new FusedBatchnormCPUKernel fail!";
    return nullptr;
@@ -163,8 +163,8 @@ kernel::LiteKernel *CpuFusedBatchnormKernelCreator(const std::vector<lite::tenso
  auto ret = kernel->Init();
  if (ret != RET_OK) {
    delete kernel;
-    MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
-                  << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
+    MS_LOG(ERROR) << "Init kernel failed, name: " << op_parameter->name_ << ", type: "
+                  << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(op_parameter->type_));
    return nullptr;
  }
  return kernel;

--- a/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm.h
@@ -40,6 +40,7 @@ class FusedBatchnormCPUKernel : public LiteKernel {
  int Execute(int task_id);

 private:
+  void FreeTmpBuffer();
  float *in_addr_ = nullptr;
  float *mean_addr_ = nullptr;
  float *var_addr_ = nullptr;

--- a/mindspore/lite/src/runtime/kernel/arm/fp32/matmul.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/matmul.cc
@@ -25,20 +25,29 @@ using mindspore::lite::RET_MEMORY_FAILED;
 using mindspore::lite::RET_OK;

 namespace mindspore::kernel {
-MatmulCPUKernel::~MatmulCPUKernel() {
-  ctx_->allocator->Free(a_c8_ptr_);
-  ctx_->allocator->Free(b_r8_ptr_);
-  ctx_->allocator->Free(c_r8x8_ptr_);
-  ctx_->allocator->Free(bias_ptr_);
-}
-
-int MatmulCPUKernel::ReSize() { return RET_OK; }
+MatmulCPUKernel::~MatmulCPUKernel() { FreeTmpBuffer(); }

-int MatmulCPUKernel::Init() {
-  if (context_->infer_shape_interrupt_ && !context_->running_) {
-    set_need_reinit();
-    return RET_OK;
+void MatmulCPUKernel::FreeTmpBuffer() {
+  if (a_c8_ptr_ != nullptr) {
+    ctx_->allocator->Free(a_c8_ptr_);
+    a_c8_ptr_ = nullptr;
+  }
+  if (b_r8_ptr_ != nullptr) {
+    ctx_->allocator->Free(b_r8_ptr_);
+    b_r8_ptr_ = nullptr;
+  }
+  if (c_r8x8_ptr_ != nullptr) {
+    ctx_->allocator->Free(c_r8x8_ptr_);
+    c_r8x8_ptr_ = nullptr;
+  }
+  if (bias_ptr_ != nullptr) {
+    ctx_->allocator->Free(bias_ptr_);
+    bias_ptr_ = nullptr;
  }
+}
+
+int MatmulCPUKernel::ReSize() {
+  FreeTmpBuffer();
  int batch = 1;
  auto a_shape = in_tensors_[0]->shape();
  auto c_shape = out_tensors_[0]->shape();
@@ -63,17 +72,20 @@ int MatmulCPUKernel::Init() {
  thread_stride_ = UP_DIV(UP_DIV(params_->col_8_, 8), thread_count_);

  a_c8_ptr_ = reinterpret_cast<float *>(ctx_->allocator->Malloc(params_->row_8_ * params_->deep_ * sizeof(float)));
-  if (!a_c8_ptr_) {
+  if (a_c8_ptr_ == nullptr) {
+    FreeTmpBuffer();
    return RET_MEMORY_FAILED;
  }
  memset(a_c8_ptr_, 0, params_->row_8_ * params_->deep_ * sizeof(float));
  b_r8_ptr_ = reinterpret_cast<float *>(ctx_->allocator->Malloc(params_->col_8_ * params_->deep_ * sizeof(float)));
-  if (!b_r8_ptr_) {
+  if (b_r8_ptr_ == nullptr) {
+    FreeTmpBuffer();
    return RET_MEMORY_FAILED;
  }
  memset(b_r8_ptr_, 0, params_->col_8_ * params_->deep_ * sizeof(float));
  c_r8x8_ptr_ = reinterpret_cast<float *>(ctx_->allocator->Malloc(params_->row_8_ * params_->col_8_ * sizeof(float)));
-  if (!c_r8x8_ptr_) {
+  if (c_r8x8_ptr_ == nullptr) {
+    FreeTmpBuffer();
    return RET_MEMORY_FAILED;
  }
  memset(c_r8x8_ptr_, 0, params_->row_8_ * params_->col_8_ * sizeof(float));
@@ -85,6 +97,10 @@ int MatmulCPUKernel::Init() {

  if (in_tensors_.size() == 3) {
    bias_ptr_ = reinterpret_cast<float *>(malloc(params_->col_8_ * sizeof(float)));
+    if (bias_ptr_ == nullptr) {
+      FreeTmpBuffer();
+      return RET_MEMORY_FAILED;
+    }
    memset(bias_ptr_, 0, params_->col_8_ * sizeof(float));
    memcpy(bias_ptr_, in_tensors_[2]->Data(), params_->col_ * sizeof(float));
  } else {
@@ -128,6 +144,13 @@ void MatmulCPUKernel::InitMatrixB(float *src_ptr, float *dst_ptr) {
  return;
 }

+int MatmulCPUKernel::Init() {
+  if (!InferShapeDone()) {
+    return RET_OK;
+  }
+  return ReSize();
+}
+
 int MatmulCPUKernel::RunImpl(int task_id) {
  int cur_oc = MSMIN(thread_stride_, UP_DIV(params_->col_8_, 8) - task_id * thread_stride_);
  if (cur_oc <= 0) {

--- a/mindspore/lite/src/runtime/kernel/arm/fp32/matmul.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/matmul.h
@@ -38,12 +38,13 @@ class MatmulCPUKernel : public MatmulBaseCPUKernel {
 private:
  void InitMatrixA(float *src_ptr, float *dst_ptr);
  void InitMatrixB(float *src_ptr, float *dst_ptr);
+  void FreeTmpBuffer();

 private:
-  float *a_c8_ptr_;
-  float *b_r8_ptr_;
-  float *c_r8x8_ptr_;
-  float *bias_ptr_;
+  float *a_c8_ptr_ = nullptr;
+  float *b_r8_ptr_ = nullptr;
+  float *c_r8x8_ptr_ = nullptr;
+  float *bias_ptr_ = nullptr;
 };
 }  // namespace mindspore::kernel


--- a/mindspore/lite/src/runtime/kernel/arm/fp32/reduce.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/reduce.cc
@@ -81,7 +81,13 @@ int ReduceCPUKernel::Init() {
  return ReSize();
 }

-int ReduceCPUKernel::ReSize() { return MallocTmpBuffer(); }
+int ReduceCPUKernel::ReSize() {
+  auto ret = ReduceBaseCPUKernel::ReSize();
+  if (ret != RET_OK) {
+    return ret;
+  }
+  return MallocTmpBuffer();
+}

 int ReduceCPUKernel::CallReduceUnit(int task_id) {
  auto ret = reducer_(outer_size_, inner_size_, axis_size_, src_data_, tmp_shape_.data(), dst_data_, task_id,

--- a/mindspore/lite/src/runtime/kernel/arm/fp32/reverse_sequence.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/reverse_sequence.cc
@@ -24,10 +24,34 @@ using mindspore::schema::PrimitiveType_ReverseSequence;

 namespace mindspore::kernel {
 int ReverseSequenceCPUKernel::Init() {
-  if (context_->infer_shape_interrupt_ && !context_->running_) {
-    set_need_reinit();
+  if (!InferShapeDone()) {
    return RET_OK;
  }
+  return ReSize();
+}
+
+void ReverseSequenceCPUKernel::ConvertAxisToPositive(const std::vector<int> shape, int *axis) {
+  if (axis != nullptr && *axis < 0) {
+    *axis += shape.size();
+  }
+}
+
+int ReverseSequenceCPUKernel::CalcCountPreAxis(const std::vector<int> shape, int axis) {
+  int count = 1;
+  for (int i = 0; i < axis; ++i) {
+    count *= shape[i];
+  }
+  return count;
+}
+int ReverseSequenceCPUKernel::CalcCountAfterAxis(const std::vector<int> shape, int axis) {
+  int count = 1;
+  for (int i = axis + 1; i < shape.size(); ++i) {
+    count *= shape[i];
+  }
+  return count;
+}
+
+int ReverseSequenceCPUKernel::ReSize() {
  auto input0 = in_tensors_.at(0);
  auto input1 = in_tensors_.at(1);
  auto output = out_tensors_.at(0);
@@ -64,34 +88,11 @@ int ReverseSequenceCPUKernel::Init() {
  return RET_OK;
 }

-void ReverseSequenceCPUKernel::ConvertAxisToPositive(const std::vector<int> shape, int *axis) {
-  if (axis != nullptr && *axis < 0) {
-    *axis += shape.size();
-  }
-}
-
-int ReverseSequenceCPUKernel::CalcCountPreAxis(const std::vector<int> shape, int axis) {
-  int count = 1;
-  for (int i = 0; i < axis; ++i) {
-    count *= shape[i];
-  }
-  return count;
-}
-int ReverseSequenceCPUKernel::CalcCountAfterAxis(const std::vector<int> shape, int axis) {
-  int count = 1;
-  for (int i = axis + 1; i < shape.size(); ++i) {
-    count *= shape[i];
-  }
-  return count;
-}
-
-int ReverseSequenceCPUKernel::ReSize() { return RET_OK; }
-
 int ReverseSequenceCPUKernel::Run() {
  auto ret = Prepare();
  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "Prepare failed.";
-    return RET_ERROR;
+    MS_LOG(ERROR) << "Prepare fail!ret: " << ret;
+    return ret;
  }
  float *input0 = reinterpret_cast<float *>(in_tensors_.at(0)->Data());
  int *input1 = reinterpret_cast<int *>(in_tensors_.at(1)->Data());

--- a/mindspore/lite/src/runtime/kernel/arm/fp32/tile.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/tile.cc
@@ -25,18 +25,10 @@ using mindspore::schema::PrimitiveType_Tile;

 namespace mindspore::kernel {
 int TileCPUKernel::Init() {
-  if (context_->infer_shape_interrupt_ && !context_->running_) {
-    set_need_reinit();
+  if (!InferShapeDone()) {
    return RET_OK;
  }
-  auto tile_parameter_ = reinterpret_cast<TileParameter *>(op_parameter_);
-  for (int i = 0; i < tile_parameter_->in_dim_; ++i) {
-    tile_parameter_->in_shape_[i] = in_tensors_[0]->shape()[i];
-    tile_parameter_->out_shape_[i] = out_tensors_[0]->shape()[i];
-  }
-  ComputeStrides(tile_parameter_->in_shape_, tile_parameter_->in_strides_, tile_parameter_->in_dim_);
-  ComputeStrides(tile_parameter_->out_shape_, tile_parameter_->out_strides_, tile_parameter_->in_dim_);
-  return RET_OK;
+  return ReSize();
 }

 void TileCPUKernel::ComputeStrides(int *shape, int *strides, int ndim) {
@@ -47,13 +39,22 @@ void TileCPUKernel::ComputeStrides(int *shape, int *strides, int ndim) {
  }
 }

-int TileCPUKernel::ReSize() { return RET_OK; }
+int TileCPUKernel::ReSize() {
+  auto tile_parameter_ = reinterpret_cast<TileParameter *>(op_parameter_);
+  for (int i = 0; i < tile_parameter_->in_dim_; ++i) {
+    tile_parameter_->in_shape_[i] = in_tensors_[0]->shape()[i];
+    tile_parameter_->out_shape_[i] = out_tensors_[0]->shape()[i];
+  }
+  ComputeStrides(tile_parameter_->in_shape_, tile_parameter_->in_strides_, tile_parameter_->in_dim_);
+  ComputeStrides(tile_parameter_->out_shape_, tile_parameter_->out_strides_, tile_parameter_->in_dim_);
+  return RET_OK;
+}

 int TileCPUKernel::Run() {
  auto ret = Prepare();
  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "Prepare failed.";
-    return RET_ERROR;
+    MS_LOG(ERROR) << "Prepare fail!ret: " << ret;
+    return ret;
  }
  auto input_addr = reinterpret_cast<float *>(in_tensors_.at(0)->Data());
  auto output_addr = reinterpret_cast<float *>(out_tensors_.at(0)->Data());

--- a/mindspore/lite/src/runtime/kernel/arm/int8/pad_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/pad_int8.cc
@@ -88,27 +88,24 @@ int PadInt8CPUKernel::InitPadParam() {
 }

 int PadInt8CPUKernel::ReSize() {
-  InitPadParam();
-  return RET_OK;
-}
-
-int PadInt8CPUKernel::Init() {
-  if (context_->infer_shape_interrupt_ && !context_->running_) {
-    set_need_reinit();
-    return RET_OK;
-  }
  int error_code = InitPadParam();
  if (error_code != RET_OK) {
    MS_LOG(ERROR) << "InitPadParam failed. errorcode: " << error_code;
    return error_code;
  }
+  return RET_OK;
+}

-  error_code = SetQuantParam();
+int PadInt8CPUKernel::Init() {
+  auto error_code = SetQuantParam();
  if (error_code != RET_OK) {
    MS_LOG(ERROR) << "SetQuantParam failed. errorcode: " << error_code;
    return error_code;
  }
-  return RET_OK;
+  if (!InferShapeDone()) {
+    return RET_OK;
+  }
+  return ReSize();
 }

 int PadInt8CPUKernel::RunImpl(int task_id) {
@@ -128,8 +125,8 @@ int PadInt8Impl(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
 int PadInt8CPUKernel::Run() {
  auto ret = Prepare();
  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "Prepare failed.";
-    return RET_ERROR;
+    MS_LOG(ERROR) << "Prepare fail!ret: " << ret;
+    return ret;
  }
  in_data_ = reinterpret_cast<int8_t *>(in_tensors_[0]->Data());
  out_data_ = reinterpret_cast<int8_t *>(out_tensors_[0]->Data());

--- a/mindspore/lite/src/runtime/kernel/arm/int8/pad_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/pad_int8.h
@@ -46,9 +46,9 @@ class PadInt8CPUKernel : public LiteKernel {
  void FreeQuantParam();

 private:
-  PadParameter *pad_param_;
-  int8_t *in_data_;
-  int8_t *out_data_;
+  PadParameter *pad_param_ = nullptr;
+  int8_t *in_data_ = nullptr;
+  int8_t *out_data_ = nullptr;
  int in_dims_[DEFAULT_PAD_NDIMS];
  int out_dims_[DEFAULT_PAD_NDIMS];
 };

--- a/mindspore/lite/src/runtime/kernel/arm/int8/pooling_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/pooling_int8.cc
@@ -26,10 +26,6 @@ using mindspore::lite::RET_OK;

 namespace mindspore::kernel {
 int PoolingInt8CPUKernel::Init() {
-  if (context_->infer_shape_interrupt_ && !context_->running_) {
-    set_need_reinit();
-    return RET_OK;
-  }
  auto ret = PoolingBaseCPUKernel::Init();
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "PoolingBase Init failed.";

--- a/mindspore/lite/src/runtime/kernel/arm/int8/prelu_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/prelu_int8.cc
@@ -29,34 +29,43 @@ using mindspore::schema::PrimitiveType_Prelu;

 namespace mindspore::kernel {
 int PreluInt8CPUKernel::Init() {
-  if (context_->infer_shape_interrupt_ && !context_->running_) {
-    set_need_reinit();
-    return RET_OK;
-  }
  PreluBaseCPUKernel::Init();
  auto *input_tensor = in_tensors_.at(kInputIndex);
  auto in_quant_args = input_tensor->GetQuantParams();
  quant_prelu_parm_->quant_arg.in_args_.scale_ = in_quant_args.front().scale;
  quant_prelu_parm_->quant_arg.in_args_.zp_ = in_quant_args.front().zeroPoint;
-  auto input_dim = input_tensor->shape().size();
-  MS_ASSERT(input_dim <= CROP_OFFSET_MAX_SIZE);
-  quant_prelu_parm_->input_dim_ = input_dim;
-  quant_prelu_parm_->element_num = in_tensors_[0]->Size();
+
  auto *out_tensor = out_tensors_.at(kOutputIndex);
  auto out_quant_args = out_tensor->GetQuantParams();
  quant_prelu_parm_->quant_arg.out_args_.scale_ = out_quant_args.front().scale;
  quant_prelu_parm_->quant_arg.out_args_.zp_ = out_quant_args.front().zeroPoint;
-  quant_prelu_parm_->in_shape_ = input_tensor->shape().data();
-  quant_prelu_parm_->out_shape_ = out_tensor->shape().data();
+
  quant_prelu_parm_->quant_arg.output_activation_max_ = std::numeric_limits<int8_t>::max();
  quant_prelu_parm_->quant_arg.output_activation_min_ = std::numeric_limits<int8_t>::min();
-  return RET_OK;
+  if (!InferShapeDone()) {
+    return RET_OK;
+  }
+  return ReSize();
 }

-int PreluInt8CPUKernel::ReSize() { return 0; }
+int PreluInt8CPUKernel::ReSize() {
+  auto *input_tensor = in_tensors_.at(kInputIndex);
+  auto *out_tensor = out_tensors_.at(kOutputIndex);
+  auto input_dim = input_tensor->shape().size();
+  MS_ASSERT(input_dim <= CROP_OFFSET_MAX_SIZE);
+  quant_prelu_parm_->input_dim_ = input_dim;
+  quant_prelu_parm_->element_num = in_tensors_[0]->Size();
+  quant_prelu_parm_->in_shape_ = input_tensor->shape().data();
+  quant_prelu_parm_->out_shape_ = out_tensor->shape().data();
+}

 int PreluInt8CPUKernel::Run() {
-  auto ret = LiteBackendParallelLaunch(PreluInt8Run, this, quant_prelu_parm_->op_parameter_.thread_num_);
+  auto ret = Prepare();
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "Prepare fail!ret: " << ret;
+    return ret;
+  }
+  ret = LiteBackendParallelLaunch(PreluInt8Run, this, quant_prelu_parm_->op_parameter_.thread_num_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "RunPreluParam failed. errorcode: ";
  }

--- a/mindspore/lite/src/runtime/kernel/arm/int8/topk_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/topk_int8.cc
@@ -25,11 +25,18 @@ using mindspore::schema::PrimitiveType_TopK;

 namespace mindspore::kernel {
 int TopKInt8CPUKernel::Init() {
-  if (context_->infer_shape_interrupt_ && !context_->running_) {
-    set_need_reinit();
+  if (!InferShapeDone()) {
    return RET_OK;
  }
+  return ReSize();
+}
+
+int TopKInt8CPUKernel::ReSize() {
  TopkParameter *parameter = reinterpret_cast<TopkParameter *>(op_parameter_);
+  if (parameter->topk_node_list_ != nullptr) {
+    free(parameter->topk_node_list_);
+    parameter->topk_node_list_ = nullptr;
+  }
  lite::tensor::Tensor *input = in_tensors_.at(0);
  parameter->last_dim_size_ = input->shape()[input->shape().size() - 1];
  parameter->loop_num_ = 1;
@@ -45,8 +52,6 @@ int TopKInt8CPUKernel::Init() {
  return RET_OK;
 }

-int TopKInt8CPUKernel::ReSize() { return RET_OK; }
-
 int TopKInt8CPUKernel::Run() {
  auto ret = Prepare();
  if (ret != RET_OK) {
@@ -65,7 +70,11 @@ kernel::LiteKernel *CpuTopKInt8KernelCreator(const std::vector<lite::tensor::Ten
                                             const std::vector<lite::tensor::Tensor *> &outputs, OpParameter *parameter,
                                             const lite::Context *ctx, const KernelKey &desc,
                                             const mindspore::lite::PrimitiveC *primitive) {
-  MS_ASSERT(parameter != nullptr);
+  if (parameter == nullptr) {
+    MS_LOG(ERROR) << "input parameter is nullptr!";
+    return nullptr;
+  }
+
  TopKInt8CPUKernel *kernel = new (std::nothrow) TopKInt8CPUKernel(parameter, inputs, outputs, ctx, primitive);
  if (kernel == nullptr) {
    MS_LOG(ERROR) << "new TopKInt8CPUKernel fail!";

--- a/mindspore/lite/src/runtime/kernel/arm/int8/topk_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/topk_int8.h
@@ -26,17 +26,21 @@ class TopKInt8CPUKernel : public LiteKernel {
  explicit TopKInt8CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
                             const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
                             const mindspore::lite::PrimitiveC *primitive)
-      : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
+      : LiteKernel(parameter, inputs, outputs, ctx, primitive) {
+        TopkParameter *param = reinterpret_cast<TopkParameter *>(op_parameter_);
+        param->topk_node_list_ = nullptr;
+      }
  ~TopKInt8CPUKernel() override {
    TopkParameter *parameter = reinterpret_cast<TopkParameter *>(op_parameter_);
-    free(parameter->topk_node_list_);
+    if (parameter->topk_node_list_ != nullptr) {
+      free(parameter->topk_node_list_);
+      parameter->topk_node_list_ = nullptr;
+    }
  }

  int Init() override;
  int ReSize() override;
  int Run() override;
-
- private:
 };
 }  // namespace mindspore::kernel


--- a/mindspore/lite/src/runtime/kernel/arm/int8/unsqueeze_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/unsqueeze_int8.cc
@@ -29,10 +29,6 @@ using mindspore::schema::PrimitiveType_Unsqueeze;

 namespace mindspore::kernel {
 int Unsqueezeint8CPUKernel::Init() {
-  if (context_->infer_shape_interrupt_ && !context_->running_) {
-    set_need_reinit();
-    return RET_OK;
-  }
  auto *input_tensor = in_tensors_.at(0);
  auto quant_args = input_tensor->GetQuantParams();
  MS_ASSERT(quant_args.size() == 1);
@@ -43,9 +39,10 @@ int Unsqueezeint8CPUKernel::Init() {
  Unsq_para_->quant_arg.out_quant_args_.scale_ = out_quant_args.front().scale;
  Unsq_para_->quant_arg.out_quant_args_.zp_ = out_quant_args.front().zeroPoint;
  Unsq_para_->thread_count_ = thread_count_;
-
-  int ret = ReSize();
-  return ret;
+  if (!InferShapeDone()) {
+    return RET_OK;
+  }
+  return ReSize();
 }

 int Unsqueezeint8CPUKernel::ReSize() {
@@ -86,7 +83,7 @@ int UnsqueezeIn8Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
 int Unsqueezeint8CPUKernel::Run() {
  auto ret = Prepare();
  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "Prepare failed.";
+    MS_LOG(ERROR) << "Prepare fail!ret: " << ret;
    return ret;
  }
  in_ptr_ = reinterpret_cast<float *>(in_tensors_.at(0)->Data());

--- a/mindspore/lite/src/scheduler.cc
+++ b/mindspore/lite/src/scheduler.cc
@@ -82,6 +82,7 @@ int Scheduler::InferShape(const lite::Model *model, std::vector<tensor::Tensor *
  MS_EXCEPTION_IF_NULL(tensors);
  auto meta_graph = model->GetMetaGraph();
  MS_EXCEPTION_IF_NULL(meta_graph);
+  bool infer_shape_interrupt = false;
  uint32_t kernelCount = meta_graph->nodes()->size();
  for (uint32_t i = 0; i < kernelCount; i++) {
    auto cNode = meta_graph->nodes()->GetAs<schema::CNode>(i);
@@ -101,27 +102,18 @@ int Scheduler::InferShape(const lite::Model *model, std::vector<tensor::Tensor *
                    << schema::EnumNamePrimitiveType(cNode->primitive()->value_type());
      return RET_ERROR;
    }
-    if (!context_->infer_shape_interrupt_) {
-      auto ret = primitive->InferShape(inputs, outputs);
-      if (ret == RET_INFER_INVALID) {
-        MS_LOG(INFO) << "InferShape shouldn't be done before runtime, name: " << cNode->name()->str()
-                     << ", type: " << schema::EnumNamePrimitiveType(cNode->primitive()->value_type())
-                     << "flag set to false.";
-        primitive->SetInferFlag(false);
-        context_->InferShapeInterrupt();
-      } else if (ret != RET_OK) {
-        MS_LOG(ERROR) << "InferShape failed, name: " << cNode->name()->str()
-                      << ", type: " << schema::EnumNamePrimitiveType(cNode->primitive()->value_type());
-        return RET_INFER_ERR;
-      }
-    } else {
+    primitive->SetInferFlag(!infer_shape_interrupt);
+    auto ret = primitive->InferShape(inputs, outputs);
+    if (ret == RET_INFER_INVALID) {
+      MS_LOG(INFO) << "InferShape shouldn't be done before runtime, name: " << cNode->name()->str()
+                   << ", type: " << schema::EnumNamePrimitiveType(cNode->primitive()->value_type())
+                   << "flag set to false.";
      primitive->SetInferFlag(false);
-      auto ret = primitive->InferShape(inputs, outputs);
-      if (ret != RET_OK) {
-        MS_LOG(ERROR) << "InferShape fail! name: " << cNode->name()->str()
-                      << ", type: " << schema::EnumNamePrimitiveType(cNode->primitive()->value_type());
-        return RET_INFER_ERR;
-      }
+      infer_shape_interrupt = true;
+    } else if (ret != RET_OK) {
+      MS_LOG(ERROR) << "InferShape failed, name: " << cNode->name()->str()
+                    << ", type: " << schema::EnumNamePrimitiveType(cNode->primitive()->value_type());
+      return RET_INFER_ERR;
    }
  }
  return RET_OK;

--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/argminmax_fp32_test.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/argminmax_fp32_test.cc
@@ -76,6 +76,40 @@ TEST_F(TestArgMinMaxTestFp32, ArgMaxTest1_keep_dim) {
  CompareOutputData(out, except_out.data(), except_out.size(), 0.000001);
 }

+TEST_F(TestArgMinMaxTestFp32, ArgMaxTest_axis2_keep_dim) {
+  std::vector<float> in = {10, 20, 30,
+                           11, 15, 10,
+                            5, 10, 12,
+                            10, 20, 30,
+                           11, 15, 10,
+                            5, 10, 12,
+                            10, 20, 30,
+                           11, 15, 10,
+                            5, 10, 12
+                          };
+  std::vector<float> except_out = {1, 0, 0, 1, 0, 0, 1, 0, 0};
+  std::vector<int> shape = {1, 3, 3, 3};
+  float out[9];
+  ArgMinMaxParameter param;
+  param.topk_ = 1;
+  param.out_value_ = false;
+  param.axis_ = 2;
+  param.data_type_ = 43;
+  param.dims_size_ = 4;
+  param.get_max_ = true;
+  param.keep_dims_ = true;
+  param.arg_elements_ = reinterpret_cast<ArgElement *>(malloc(shape[param.axis_] * sizeof(ArgElement)));
+  std::vector<int> out_shape = {1, 3, 1, 3};
+  ComputeStrides(shape.data(), param.in_strides_, shape.size());
+  ComputeStrides(out_shape.data(), param.out_strides_, out_shape.size());
+  ArgMinMax(in.data(), out, shape.data(), &param);
+  for (size_t i = 0; i < except_out.size(); ++i) {
+    std::cout << out[i] << " ";
+  }
+  std::cout << "\n";
+  CompareOutputData(out, except_out.data(), except_out.size(), 0.000001);
+}
+
 TEST_F(TestArgMinMaxTestFp32, ArgMaxTest2) {
  std::vector<float> in = {10, 20, 30, 40, 90,
                           20, 11, 15, 1,  50,