optimize infershape when running graph

9a78a4a7 · chenjianping · 3783dd04 · 9a78a4a7 · 9a78a4a7 · 9a78a4a7
66 changed file
--- a/mindspore/lite/src/ops/concat.cc
+++ b/mindspore/lite/src/ops/concat.cc
@@ -46,6 +46,7 @@ int Concat::InferShape(std::vector<tensor::Tensor *> inputs_, std::vector<tensor
  auto input0_shape_without_axis = input0_shape;
  input0_shape_without_axis.erase(input0_shape_without_axis.begin() + axis);
  auto input0_data_type = inputs_.at(0)->data_type();
+  schema::Format input0_format = inputs_[0]->GetFormat();
  int output_axis_dim = input0_shape.at(axis);
  for (size_t i = 1; i < inputs_.size(); ++i) {
    if (inputs_.at(i)->data_type() != input0_data_type) {
@@ -53,6 +54,10 @@ int Concat::InferShape(std::vector<tensor::Tensor *> inputs_, std::vector<tensor
      return RET_PARAM_INVALID;
    }

+    if (inputs_.at(i)->GetFormat() != input0_format) {
+      MS_LOG(ERROR) << "All input format should be the same!";
+      return RET_PARAM_INVALID;
+    }
    auto shape_tmp = inputs_.at(i)->shape();
    if (shape_tmp.size() != input0_shape.size()) {
      MS_LOG(ERROR) << "All inputs should have the same dim num!";

--- a/mindspore/lite/src/ops/stack.cc
+++ b/mindspore/lite/src/ops/stack.cc
@@ -44,7 +44,13 @@ int Stack::InferShape(std::vector<tensor::Tensor *> inputs, std::vector<tensor::
    MS_LOG(ERROR) << "Invalid axis " << stack_prim->axis();
    return RET_PARAM_INVALID;
  }
+  schema::Format input0_format = input->GetFormat();
  for (size_t i = 1; i < inputs.size(); ++i) {
+    if (inputs[i]->GetFormat() != input0_format) {
+      MS_LOG(ERROR) << "All inputs should have the same format!";
+      return RET_PARAM_INVALID;
+    }
+
    auto input_shape_tmp = inputs[i]->shape();
    if (input_shape_tmp.size() != input_shape.size()) {
      MS_LOG(ERROR) << "All input shape size should be the same!";

--- a/mindspore/lite/src/runtime/kernel/arm/base/batch_to_space_base.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/batch_to_space_base.h
@@ -27,9 +27,7 @@ class BatchToSpaceBaseCPUKernel : public LiteKernel {
  BatchToSpaceBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
                            const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
                            const lite::Primitive *primitive)
-      : LiteKernel(parameter, inputs, outputs, ctx, primitive) {
-    opParameter->thread_num_ = ctx->thread_num_;
-  }
+      : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}

  virtual ~BatchToSpaceBaseCPUKernel() = default;


--- a/mindspore/lite/src/runtime/kernel/arm/base/concat_base.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/concat_base.cc
@@ -29,11 +29,9 @@ using mindspore::lite::RET_OK;
 using mindspore::schema::PrimitiveType_Concat;

 namespace mindspore::kernel {
-int ConcatBaseCPUKernel::Init() {
-  if (context_->infer_shape_interrupt_ && !context_->running_) {
-    SetNeedReInit();
-    return RET_OK;
-  }
+int ConcatBaseCPUKernel::Init() { return RET_OK; }
+
+int ConcatBaseCPUKernel::ReSize() {
  axis_ = concat_param_->axis_ >= 0 ? concat_param_->axis_ : inputs_.front()->shape().size() + concat_param_->axis_;
  return RET_OK;
 }

--- a/mindspore/lite/src/runtime/kernel/arm/base/concat_base.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/concat_base.h
@@ -31,7 +31,6 @@ class ConcatBaseCPUKernel : public LiteKernel {
                      const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx,
                      const lite::Primitive *primitive)
      : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) {
-    opParameter->thread_num_ = ctx->thread_num_;
    concat_param_ = reinterpret_cast<ConcatParameter *>(opParameter);
  }

@@ -39,7 +38,7 @@ class ConcatBaseCPUKernel : public LiteKernel {

  int Init() override;

-  int ReSize() override { return 0; }
+  int ReSize() override;

  int Run() override { return 0; }


--- a/mindspore/lite/src/runtime/kernel/arm/base/crop_base.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/crop_base.h
@@ -29,9 +29,7 @@ class CropBaseCPUKernel : public LiteKernel {
  CropBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
                    const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx,
                    const lite::Primitive *primitive)
-      : LiteKernel(parameter, inputs, outputs, ctx, primitive), thread_count_(ctx->thread_num_) {
-    opParameter->thread_num_ = ctx->thread_num_;
-  }
+      : LiteKernel(parameter, inputs, outputs, ctx, primitive), thread_count_(ctx->thread_num_) {}
  ~CropBaseCPUKernel() = default;

  int Init() override;

--- a/mindspore/lite/src/runtime/kernel/arm/base/depth_to_space_base.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/depth_to_space_base.h
@@ -27,9 +27,7 @@ class DepthToSpaceBaseCPUKernel : public LiteKernel {
  DepthToSpaceBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
                            const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
                            const lite::Primitive *primitive)
-      : LiteKernel(parameter, inputs, outputs, ctx, primitive) {
-    opParameter->thread_num_ = ctx->thread_num_;
-  }
+      : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}

  virtual ~DepthToSpaceBaseCPUKernel() = default;


--- a/mindspore/lite/src/runtime/kernel/arm/base/quant_dtype_cast.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/quant_dtype_cast.cc
@@ -25,26 +25,18 @@ using mindspore::kernel::KERNEL_ARCH::kCPU;
 using mindspore::lite::KernelRegistrar;
 using mindspore::lite::RET_ERROR;
 using mindspore::lite::RET_OK;
+using mindspore::lite::RET_PARAM_INVALID;
 using mindspore::schema::PrimitiveType_QuantDTypeCast;

 namespace mindspore::kernel {
-namespace {
-constexpr int kQuantDTypeCastInputNum = 1;
-constexpr int kQuantDTypeCastOutputNum = 1;
-}  // namespace
-
 int QuantDTypeCastCPUKernel::Init() {
-  if (context_->infer_shape_interrupt_ && !context_->running_) {
-    SetNeedReInit();
-    return RET_OK;
-  }
  if (inputs_.size() != 1) {
    MS_LOG(ERROR) << "inputs number should be 1, but " << inputs_.size() << " is given.";
-    return RET_ERROR;
+    return RET_PARAM_INVALID;
  }
  if (outputs_.size() != 1) {
    MS_LOG(ERROR) << "outputs number should be 1, but " << inputs_.size() << " is given.";
-    return RET_ERROR;
+    return RET_PARAM_INVALID;
  }
  auto in_tensor = inputs_.front();
  auto out_tensor = outputs_.front();
@@ -63,18 +55,23 @@ int QuantDTypeCastCPUKernel::Init() {
    inverse_ = true;
  } else {
    MS_LOG(ERROR) << "param data type not supported:" << " src: " << param->srcT << " dst: " << param->dstT;
-    return RET_ERROR;
+    return RET_PARAM_INVALID;
+  }
+
+  if (!InferShapeDone()) {
+    return RET_OK;
  }
+  return ReSize();
+}

+int QuantDTypeCastCPUKernel::ReSize() {
+  auto in_tensor = inputs_.front();
  num_unit_ = static_cast<int>(in_tensor->DataSize());
  thread_n_num_ = MSMIN(thread_num_, num_unit_);
  thread_n_stride_ = UP_DIV(num_unit_, thread_n_num_);
-
  return RET_OK;
 }

-int QuantDTypeCastCPUKernel::ReSize() { return RET_OK; }
-
 int QuantDTypeCastCPUKernel::QuantDTypeCast(int task_id) {
  int num_unit_thread = MSMIN(thread_n_stride_, num_unit_ - task_id * thread_n_stride_);
  if (num_unit_thread <= 0) {
@@ -108,6 +105,11 @@ int QuantDTypeCastRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
 }

 int QuantDTypeCastCPUKernel::Run() {
+  auto prepare_ret = Prepare();
+  if (prepare_ret != RET_OK) {
+    MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
+    return prepare_ret;
+  }
  if (inverse_) {
    int8_ptr_ = reinterpret_cast<int8_t *>(inputs_[0]->Data());
    float32_ptr_ = reinterpret_cast<float *>(outputs_[0]->Data());

--- a/mindspore/lite/src/runtime/kernel/arm/base/reshape_base.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/reshape_base.cc
@@ -28,10 +28,7 @@ using mindspore::lite::RET_OK;
 using mindspore::schema::PrimitiveType_Reshape;

 namespace mindspore::kernel {
-int ReshapeBaseCPUKernel::Init() {
-  reshape_param_->thread_count_ = thread_count_;
-  return RET_OK;
-}
+int ReshapeBaseCPUKernel::Init() { return RET_OK; }

 kernel::LiteKernel *CpuReshapeInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
                                                const std::vector<lite::tensor::Tensor *> &outputs,

--- a/mindspore/lite/src/runtime/kernel/arm/base/reshape_base.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/reshape_base.h
@@ -29,7 +29,7 @@ class ReshapeBaseCPUKernel : public LiteKernel {
  ReshapeBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
                       const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx,
                       const lite::Primitive *primitive)
-      : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) {
+      : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx) {
    reshape_param_ = reinterpret_cast<ReshapeParameter *>(opParameter);
  }
  ~ReshapeBaseCPUKernel() = default;
@@ -39,7 +39,6 @@ class ReshapeBaseCPUKernel : public LiteKernel {
  int Run() override { return 0; }

 protected:
-  int thread_count_;
  const Context *ctx_;
  ReshapeParameter *reshape_param_;
 };

--- a/mindspore/lite/src/runtime/kernel/arm/base/softmax_base.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/softmax_base.cc
@@ -36,7 +36,10 @@ int SoftmaxBaseCPUKernel::Init() {
    MS_LOG(ERROR) << "SoftmaxParameter nullptr";
    return RET_NULL_PTR;
  }
+  return RET_OK;
+}

+int SoftmaxBaseCPUKernel::ReSize() {
  auto input_tensor = inputs_.front();
  auto in_shape = input_tensor->shape();
  auto in_dims = in_shape.size();

--- a/mindspore/lite/src/runtime/kernel/arm/base/softmax_base.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/softmax_base.h
@@ -28,13 +28,12 @@ class SoftmaxBaseCPUKernel : public LiteKernel {
                       const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
                       const lite::Primitive *primitive)
      : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) {
-    opParameter->thread_num_ = ctx->thread_num_;
    softmax_param_ = reinterpret_cast<SoftmaxParameter *>(opParameter);
  }
  ~SoftmaxBaseCPUKernel() = default;

  int Init() override;
-  int ReSize() override { return 0; }
+  int ReSize() override;
  int Run() override { return 0; }

 protected:

--- a/mindspore/lite/src/runtime/kernel/arm/base/split_base.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/split_base.cc
@@ -28,7 +28,9 @@ using mindspore::lite::RET_OK;
 using mindspore::schema::PrimitiveType_Split;

 namespace mindspore::kernel {
-int SplitBaseCPUKernel::Init() {
+int SplitBaseCPUKernel::Init() { return RET_OK; }
+
+int SplitBaseCPUKernel::ReSize() {
  auto in_tensor = inputs_.front();
  auto input_shape = in_tensor->shape();


--- a/mindspore/lite/src/runtime/kernel/arm/base/split_base.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/split_base.h
@@ -35,7 +35,7 @@ class SplitBaseCPUKernel : public LiteKernel {
  ~SplitBaseCPUKernel() = default;

  int Init() override;
-  int ReSize() override { return 0; }
+  int ReSize() override;
  int Run() override { return 0; }

 protected:

--- a/mindspore/lite/src/runtime/kernel/arm/base/squeeze_base.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/squeeze_base.h
@@ -30,9 +30,7 @@ class SqueezeBaseCPUKernel : public LiteKernel {
  SqueezeBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
                       const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx,
                       const lite::Primitive *primitive)
-      : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) {
-    opParameter->thread_num_ = ctx->thread_num_;
-  }
+      : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) {}

  virtual ~SqueezeBaseCPUKernel() = default;


--- a/mindspore/lite/src/runtime/kernel/arm/base/strided_slice.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/strided_slice.cc
@@ -31,6 +31,14 @@ using mindspore::schema::PrimitiveType_StridedSlice;
 namespace mindspore::kernel {

 int StridedSliceCPUKernel::Init() {
+  if (!InferShapeDone()) {
+    return RET_OK;
+  }
+
+  return ReSize();
+}
+
+int StridedSliceCPUKernel::ReSize() {
  auto input = inputs_.at(0);
  auto parameter = reinterpret_cast<StridedSliceParameter *>(opParameter);
  MS_ASSERT(input);
@@ -39,13 +47,11 @@ int StridedSliceCPUKernel::Init() {
  return RET_OK;
 }

-int StridedSliceCPUKernel::ReSize() { return 0; }
-
 int StridedSliceCPUKernel::Run() {
  auto ret = Prepare();
  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "Prepare failed.";
-    return RET_ERROR;
+    MS_LOG(ERROR) << "Prepare fail!ret: " << ret;
+    return ret;
  }

  auto input = inputs_.at(0);

--- a/mindspore/lite/src/runtime/kernel/arm/base/strided_slice.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/strided_slice.h
@@ -27,15 +27,12 @@ class StridedSliceCPUKernel : public LiteKernel {
  StridedSliceCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
                        const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
                        const lite::Primitive *primitive)
-      : LiteKernel(parameter, inputs, outputs, ctx, primitive), thread_num_(ctx->thread_num_) {}
+      : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
  ~StridedSliceCPUKernel() override = default;

  int Init() override;
  int ReSize() override;
  int Run() override;
-
- private:
-  int thread_num_;
 };
 }  // namespace mindspore::kernel


--- a/mindspore/lite/src/runtime/kernel/arm/fp32/addn.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/addn.cc
@@ -57,7 +57,7 @@ int AddNCPUKernel::AddNParallelRun(int thread_id) {
 int AddNCPUKernel::Run() {
  auto ret = Prepare();
  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "Prepare failed.";
+    MS_LOG(ERROR) << "Prepare fail!ret: " << ret;
    return ret;
  }
  elements_num_ = inputs_[0]->ElementsNum();

--- a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic.cc
@@ -29,8 +29,7 @@ using mindspore::lite::RET_OK;
 using mindspore::schema::PrimitiveType_Eltwise;

 namespace mindspore::kernel {
-
-ArithmeticCPUKernel::~ArithmeticCPUKernel() {
+void ArithmeticCPUKernel::FreeTileData() {
  if (tile_data0_ != nullptr) {
    delete[](tile_data0_);
    tile_data0_ = nullptr;
@@ -40,21 +39,27 @@ ArithmeticCPUKernel::~ArithmeticCPUKernel() {
    tile_data1_ = nullptr;
  }
 }
+
+ArithmeticCPUKernel::~ArithmeticCPUKernel() {
+  FreeTileData();
+}
+
 int ArithmeticCPUKernel::Init() {
-  if (context_->infer_shape_interrupt_ && !context_->running_) {
-    SetNeedReInit();
+  if (!InferShapeDone()) {
    return RET_OK;
  }
+  return ReSize();
+}
+
+int ArithmeticCPUKernel::ReSize() {
+  FreeTileData();
  auto element_num = outputs_[0]->ElementsNum();

  tile_data0_ = new float[element_num];
  tile_data1_ = new float[element_num];
-
  return RET_OK;
 }

-int ArithmeticCPUKernel::ReSize() { return RET_OK; }
-
 int ArithmeticCPUKernel::DoArithmetic(int task_id) {
  auto input0_data = reinterpret_cast<float *>(inputs_[0]->Data());
  auto input1_data1 = reinterpret_cast<float *>(inputs_[1]->Data());
@@ -98,7 +103,7 @@ int ArithmeticsRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
 int ArithmeticCPUKernel::Run() {
  auto ret = Prepare();
  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "Prepare failed.";
+    MS_LOG(ERROR) << "Prepare fail!ret: " << ret;
    return ret;
  }
  if (arithmeticParameter_->broadcasting_) {

--- a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic.h
@@ -172,6 +172,7 @@ class ArithmeticCPUKernel : public LiteKernel {
  int DoArithmetic(int task_id);

 private:
+  void FreeTileData();
  int thread_count_;
  float *tile_data0_ = nullptr;
  float *tile_data1_ = nullptr;

--- a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_self.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_self.cc
@@ -27,12 +27,11 @@ using mindspore::lite::RET_OK;

 namespace mindspore::kernel {
 int ArithmeticSelfCPUKernel::Init() {
-  if (context_->infer_shape_interrupt_ && !context_->running_) {
-    SetNeedReInit();
+  if (!InferShapeDone()) {
    return RET_OK;
  }
-  int ret = ReSize();
-  return ret;
+
+  return ReSize();
 }

 int ArithmeticSelfCPUKernel::ReSize() {
@@ -74,7 +73,7 @@ int ArithmeticSelfCPUKernel::DoArithmeticSelf(int task_id) {
 int ArithmeticSelfCPUKernel::Run() {
  auto ret = Prepare();
  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "Prepare failed.";
+    MS_LOG(ERROR) << "Prepare fail!ret: " << ret;
    return ret;
  }
  auto input_tensor = inputs_.at(0);

--- a/mindspore/lite/src/runtime/kernel/arm/fp32/bias.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/bias.cc
@@ -28,7 +28,18 @@ using mindspore::lite::RET_OK;
 using mindspore::schema::PrimitiveType_BiasAdd;

 namespace mindspore::kernel {
-int BiasCPUKernel::ReSize() { return RET_OK; }
+int BiasCPUKernel::ReSize() {
+  auto dims = inputs_[0]->shape();
+  MS_ASSERT(dims.size() <= 5);
+  bias_param_->ndim_ = dims.size();
+  for (int i = 0; i < bias_param_->ndim_; i++) {
+    bias_param_->in_shape0_[i] = dims[i];
+    bias_param_->in_shape1_[i] = 1;
+    bias_param_->out_shape_[i] = dims[i];
+  }
+  bias_param_->in_shape1_[bias_param_->ndim_ - 1] = dims[bias_param_->ndim_ - 1];
+  return RET_OK;
+}

 int BiasCPUKernel::Run() {
  auto prepare_ret = Prepare();
@@ -49,20 +60,11 @@ int BiasCPUKernel::Run() {
 }

 int BiasCPUKernel::Init() {
-  if (context_->infer_shape_interrupt_ && !context_->running_) {
-    SetNeedReInit();
+  if (!InferShapeDone()) {
    return RET_OK;
  }
-  auto dims = inputs_[0]->shape();
-  MS_ASSERT(dims.size() <= 5);
-  bias_param_->ndim_ = dims.size();
-  for (int i = 0; i < bias_param_->ndim_; i++) {
-    bias_param_->in_shape0_[i] = dims[i];
-    bias_param_->in_shape1_[i] = 1;
-    bias_param_->out_shape_[i] = dims[i];
-  }
-  bias_param_->in_shape1_[bias_param_->ndim_ - 1] = dims[bias_param_->ndim_ - 1];
-  return RET_OK;
+
+  return ReSize();
 }

 kernel::LiteKernel *CpuBiasFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,

--- a/mindspore/lite/src/runtime/kernel/arm/fp32/broadcast_to.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/broadcast_to.cc
@@ -25,12 +25,7 @@ using mindspore::lite::RET_OK;
 using mindspore::schema::PrimitiveType_BroadcastTo;

 namespace mindspore::kernel {
-
-int BroadcastToCPUKernel::Init() {
-  if (context_->infer_shape_interrupt_ && !context_->running_) {
-    SetNeedReInit();
-    return RET_OK;
-  }
+int BroadcastToCPUKernel::ReSize() {
  auto input_shape = inputs_[0]->shape();
  for (size_t i = 0; i < input_shape.size(); ++i) {
    shape_info_.input_shape_[i] = input_shape[i];
@@ -45,6 +40,14 @@ int BroadcastToCPUKernel::Init() {
  return RET_OK;
 }

+int BroadcastToCPUKernel::Init() {
+  if (!InferShapeDone()) {
+    return RET_OK;
+  }
+
+  return ReSize();
+}
+
 int BroadcastToCPUKernel::Run() {
  auto prepare_ret = Prepare();
  if (prepare_ret != RET_OK) {

--- a/mindspore/lite/src/runtime/kernel/arm/fp32/broadcast_to.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/broadcast_to.h
@@ -31,7 +31,7 @@ class BroadcastToCPUKernel : public LiteKernel {
  ~BroadcastToCPUKernel() = default;

  int Init() override;
-  int ReSize() override { return 0; }
+  int ReSize() override;
  int Run() override;

 private:

--- a/mindspore/lite/src/runtime/kernel/arm/fp32/cast.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/cast.cc
@@ -41,10 +41,13 @@ int CastRun(int thread_id, LiteParallelGroupEnv *penv, void *cdata) {
 }  // namespace

 int CastCPUKernel::Init() {
-  if (context_->infer_shape_interrupt_ && !context_->running_) {
-    SetNeedReInit();
+  if (!InferShapeDone()) {
    return RET_OK;
  }
+  return ReSize();
+}
+
+int CastCPUKernel::ReSize() {
  data_num_ = inputs_[0]->ElementsNum();
  if (data_num_ == 0) {
    return RET_OK;

--- a/mindspore/lite/src/runtime/kernel/arm/fp32/cast.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/cast.h
@@ -25,14 +25,12 @@ class CastCPUKernel : public LiteKernel {
  CastCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
                const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
                const lite::Primitive *primitive)
-      : LiteKernel(parameter, inputs, outputs, ctx, primitive) {
-      opParameter->thread_num_ = ctx->thread_num_;
-    }
+      : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}

  ~CastCPUKernel() = default;

  int Init() override;
-  int ReSize() override { return 0; };
+  int ReSize() override;
  int Run() override;
  int DoCast(int thread_id);


--- a/mindspore/lite/src/runtime/kernel/arm/fp32/concat.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/concat.cc
@@ -29,30 +29,18 @@ using mindspore::schema::PrimitiveType_Concat;

 namespace mindspore::kernel {
 int ConcatCPUKernel::Init() {
-  if (context_->infer_shape_interrupt_ && !context_->running_) {
-    SetNeedReInit();
-    return RET_OK;
-  }
  auto ret = ConcatBaseCPUKernel::Init();
  if (ret != RET_OK) {
    return ret;
  }
-  schema::Format input0_format = inputs_[0]->GetFormat();
-  bool need_convert_format = false;
-  for (size_t i = 1; i < inputs_.size(); ++i) {
-    if (inputs_[i]->GetFormat() != input0_format) {
-      need_convert_format = true;
-    }
-  }
-  if (!need_convert_format) {
-    outputs_[0]->SetFormat(input0_format);
+  if (!InferShapeDone()) {
    return RET_OK;
  }
-  MS_LOG(ERROR) << "All input format should be the same!";
-  return RET_ERROR;
+
+  return ReSize();
 }

-int ConcatCPUKernel::ReSize() { return RET_OK; }
+int ConcatCPUKernel::ReSize() { return ConcatBaseCPUKernel::ReSize(); }

 int ConcatCPUKernel::Run() {
  auto prepare_ret = Prepare();

--- a/mindspore/lite/src/runtime/kernel/arm/fp32/depth_to_space.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/depth_to_space.cc
@@ -44,9 +44,7 @@ int DepthToSpaceCPUKernel::Init() {
  return ReSize();
 }

-int DepthToSpaceCPUKernel::ReSize() {
-  return DepthToSpaceBaseCPUKernel::ReSize();
-}
+int DepthToSpaceCPUKernel::ReSize() { return DepthToSpaceBaseCPUKernel::ReSize(); }

 int DepthToSpaceCPUKernel::Run() {
  auto prepare_ret = Prepare();

--- a/mindspore/lite/src/runtime/kernel/arm/fp32/elu.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/elu.cc
@@ -32,6 +32,7 @@ int EluCPUKernel::Init() {
  if (!InferShapeDone()) {
    return RET_OK;
  }
+
  return ReSize();
 }


--- a/mindspore/lite/src/runtime/kernel/arm/fp32/prelu.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/prelu.cc
@@ -28,10 +28,7 @@ using mindspore::lite::RET_OK;
 using mindspore::schema::PrimitiveType_Prelu;

 namespace mindspore::kernel {
-int PReluCPUKernel::Init() {
-  prelu_param_->op_parameter_.thread_num_ = thread_count_;
-  return RET_OK;
-}
+int PReluCPUKernel::Init() { return RET_OK; }

 int PReluCPUKernel::DoExcute(int task_id) {
  PRelu(input_data, output_data, prelu_param_, task_id);

--- a/mindspore/lite/src/runtime/kernel/arm/fp32/reshape.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/reshape.cc
@@ -38,8 +38,8 @@ int ReshapeCPUKernel::ReSize() { return RET_OK; }
 int ReshapeCPUKernel::Run() {
  auto ret = Prepare();
  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "Prepare failed.";
-    return RET_ERROR;
+    MS_LOG(ERROR) << "Prepare fail!ret: " << ret;
+    return ret;
  }
  auto input_ptr = inputs_.at(kInputIndex)->Data();
  auto output_ptr = outputs_.at(kOutputIndex)->Data();

--- a/mindspore/lite/src/runtime/kernel/arm/fp32/scale.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/scale.cc
@@ -29,7 +29,19 @@ using mindspore::lite::RET_OK;
 using mindspore::schema::PrimitiveType_Scale;

 namespace mindspore::kernel {
+void ScaleCPUKernel::FreeTmpBuffer() {
+  if (scale_ != nullptr) {
+    free(scale_);
+    scale_ = nullptr;
+  }
+  if (offset_ != nullptr) {
+    free(offset_);
+    offset_ = nullptr;
+  }
+}
+
 int ScaleCPUKernel::InitScaleOffset() {
+  FreeTmpBuffer();
  auto param = reinterpret_cast<ScaleParameter *>(opParameter);
  auto scale_tensor = inputs_.at(1);
  float *scale_ptr = reinterpret_cast<float *>(inputs_.at(1)->Data());
@@ -91,15 +103,18 @@ int ScaleCPUKernel::InitParameter() {
 }

 int ScaleCPUKernel::Init() {
-  if (context_->infer_shape_interrupt_ && !context_->running_) {
-    SetNeedReInit();
-    return RET_OK;
-  }
  if (inputs_.size() < 2 || inputs_.size() > 3) {
    MS_LOG(ERROR) << "inputs to Scale operator should be 2 or 3, but " << inputs_.size() << " is given.";
    return RET_ERROR;
  }

+  if (!InferShapeDone()) {
+    return RET_OK;
+  }
+  return ReSize();
+}
+
+int ScaleCPUKernel::ReSize() {
  auto ret = InitParameter();
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "Scale fp32 InitParameter failed.";
@@ -114,8 +129,6 @@ int ScaleCPUKernel::Init() {
  return RET_OK;
 }

-int ScaleCPUKernel::ReSize() { return RET_OK; }
-
 int ScaleCPUKernel::Scale(int task_id) {
  auto ret =
    DoScale(input_ptr_, output_ptr_, scale_, offset_, task_id, reinterpret_cast<ScaleParameter *>(opParameter));
@@ -140,8 +153,8 @@ int ScaleRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
 int ScaleCPUKernel::Run() {
  auto ret = Prepare();
  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "Prepare failed.";
-    return RET_ERROR;
+    MS_LOG(ERROR) << "Prepare fail!ret: " << ret;
+    return ret;
  }
  auto in_tensor = inputs_.front();
  input_ptr_ = reinterpret_cast<float *>(in_tensor->Data());
@@ -157,7 +170,7 @@ int ScaleCPUKernel::Run() {
    MS_LOG(ERROR) << "Scale error error_code[" << ret << "]";
    return RET_ERROR;
  }
-
+  FreeTmpBuffer();
  return RET_OK;
 }


--- a/mindspore/lite/src/runtime/kernel/arm/fp32/scale.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/scale.h
@@ -24,13 +24,13 @@ namespace mindspore::kernel {

 class ScaleCPUKernel : public LiteKernel {
 public:
-  explicit ScaleCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
+  ScaleCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
                          const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
                          const lite::Primitive *primitive)
-      : LiteKernel(parameter, inputs, outputs, ctx, primitive) {
-        opParameter->thread_num_ = ctx->thread_num_;
-      }
-  ~ScaleCPUKernel() override = default;
+      : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
+  ~ScaleCPUKernel() {
+    FreeTmpBuffer();
+  }

  int Init() override;
  int ReSize() override;
@@ -40,6 +40,7 @@ class ScaleCPUKernel : public LiteKernel {
  int Scale(int task_id);

 private:
+  void FreeTmpBuffer();
  float *input_ptr_;
  float *scale_;
  float *offset_;

--- a/mindspore/lite/src/runtime/kernel/arm/fp32/scatter_nd.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/scatter_nd.cc
@@ -37,11 +37,14 @@ constexpr int kScatterIndicesIndex = 1;
 constexpr int kScatterUpdateIndex = 2;
 }  // namespace
 int ScatterNDCPUKernel::Init() {
-  if (context_->infer_shape_interrupt_ && !context_->running_) {
-    SetNeedReInit();
+  if (!InferShapeDone()) {
    return RET_OK;
  }
-  auto shape = inputs_.at(kScatterShapeIndex);
+  return ReSize();
+}
+
+int ScatterNDCPUKernel::ReSize() {
+auto shape = inputs_.at(kScatterShapeIndex);
  auto indices = inputs_.at(kScatterIndicesIndex);
  auto update = inputs_.at(kScatterUpdateIndex);

@@ -116,13 +119,11 @@ int ScatterNDCPUKernel::Init() {
    output_unit_offsets_.push_back(tmp_stride);
  }

-  thread_n_num_ = MSMIN(thread_num_, num_unit_);
+  thread_n_num_ = MSMIN(opParameter->thread_num_, num_unit_);
  thread_n_stride_ = UP_DIV(num_unit_, thread_n_num_);
  return RET_OK;
 }

-int ScatterNDCPUKernel::ReSize() { return 0; }
-
 int ScatterNDCPUKernel::ScatterND(int task_id) {
  int num_unit_thread = MSMIN(thread_n_stride_, num_unit_ - task_id * thread_n_stride_);
  if (num_unit_thread <= 0) {
@@ -152,8 +153,8 @@ int ScatterNDRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
 int ScatterNDCPUKernel::Run() {
  auto ret = Prepare();
  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "Prepare failed.";
-    return RET_ERROR;
+    MS_LOG(ERROR) << "Prepare fail!ret: " << ret;
+    return ret;
  }
  ret = LiteBackendParallelLaunch(ScatterNDRun, this, thread_n_num_);
  if (ret != RET_OK) {

--- a/mindspore/lite/src/runtime/kernel/arm/fp32/scatter_nd.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/scatter_nd.h
@@ -28,7 +28,7 @@ class ScatterNDCPUKernel : public LiteKernel {
  explicit ScatterNDCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
                              const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
                              const lite::Primitive *primitive)
-      : LiteKernel(parameter, inputs, outputs, ctx, primitive), thread_num_(ctx->thread_num_) {}
+      : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
  ~ScatterNDCPUKernel() override = default;

  int Init() override;
@@ -37,7 +37,6 @@ class ScatterNDCPUKernel : public LiteKernel {
  int ScatterND(int task_id);

 private:
-  int thread_num_;
  int thread_n_num_;
  int thread_n_stride_;
  int num_unit_;

--- a/mindspore/lite/src/runtime/kernel/arm/fp32/shape.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/shape.cc
@@ -37,7 +37,7 @@ int ShapeCPUKernel::ReSize() { return RET_OK; }
 int ShapeCPUKernel::Run() {
  auto ret = Prepare();
  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "Prepare failed.";
+    MS_LOG(ERROR) << "Prepare fail!ret: " << ret;
    return RET_ERROR;
  }
  auto out_tensor = outputs_.front();

--- a/mindspore/lite/src/runtime/kernel/arm/fp32/shape.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/shape.h
@@ -25,17 +25,15 @@ namespace mindspore::kernel {

 class ShapeCPUKernel : public LiteKernel {
 public:
-  explicit ShapeCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
-                          const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
-                          const lite::Primitive *primitive)
+  ShapeCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
+                 const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
+                 const lite::Primitive *primitive)
      : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
  ~ShapeCPUKernel() override = default;

  int Init() override;
  int ReSize() override;
  int Run() override;
-
- private:
 };
 }  // namespace mindspore::kernel


--- a/mindspore/lite/src/runtime/kernel/arm/fp32/slice.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/slice.cc
@@ -39,11 +39,7 @@ int SliceLaunch(int thread_id, LiteParallelGroupEnv *penv, void *cdata) {
 }
 }  // namespace

-int SliceCPUKernel::Init() {
-  if (context_->infer_shape_interrupt_ && !context_->running_) {
-    SetNeedReInit();
-    return RET_OK;
-  }
+int SliceCPUKernel::ReSize() {
  auto *param = reinterpret_cast<SliceParameter *>(opParameter);
  auto input_shape = inputs_[0]->shape();
  if (input_shape.size() != param->param_length_) {
@@ -59,10 +55,16 @@ int SliceCPUKernel::Init() {
  for (size_t i = 0; i < input_shape.size(); ++i) {
    param->shape_[i] = input_shape[i];
  }
-  outputs_[0]->SetFormat(inputs_[0]->GetFormat());
  return RET_OK;
 }

+int SliceCPUKernel::Init() {
+  if (!InferShapeDone()) {
+    return RET_OK;
+  }
+  return ReSize();
+}
+
 int SliceCPUKernel::SliceParallelRun(int thread_id) {
  const float *input_data = reinterpret_cast<const float *>(inputs_[0]->Data());
  float *output_data = reinterpret_cast<float *>(outputs_[0]->Data());
@@ -74,8 +76,8 @@ int SliceCPUKernel::SliceParallelRun(int thread_id) {
 int SliceCPUKernel::Run() {
  auto ret = Prepare();
  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "Prepare failed.";
-    return RET_ERROR;
+    MS_LOG(ERROR) << "Prepare fail!ret: " << ret;
+    return ret;
  }
  SliceParameter *param = reinterpret_cast<SliceParameter *>(opParameter);
  for (int i = 0; i < param->param_length_; ++i) {
@@ -111,12 +113,8 @@ kernel::LiteKernel *CpuSliceFp32KernelCreator(const std::vector<lite::tensor::Te
    MS_LOG(ERROR) << "Input op_parameter is nullptr!";
    return nullptr;
  }
-  if (ctx == nullptr) {
-    MS_LOG(ERROR) << "Input context is nullptr!";
-    return nullptr;
-  }
+
  MS_ASSERT(desc.type == schema::PrimitiveType_Slice);
-  op_parameter->thread_num_ = ctx->thread_num_;
  auto *kernel = new (std::nothrow) SliceCPUKernel(op_parameter, inputs, outputs, ctx, primitive);
  if (kernel == nullptr) {
    MS_LOG(ERROR) << "new SliceCPUKernel fail!";

--- a/mindspore/lite/src/runtime/kernel/arm/fp32/slice.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/slice.h
@@ -29,7 +29,7 @@ class SliceCPUKernel : public LiteKernel {
  ~SliceCPUKernel() = default;

  int Init() override;
-  int ReSize() override { return 0; }
+  int ReSize() override;
  int Run() override;
  int SliceParallelRun(int thread_id);
 };

--- a/mindspore/lite/src/runtime/kernel/arm/fp32/softmax.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/softmax.cc
@@ -30,13 +30,22 @@ using mindspore::schema::PrimitiveType_SoftMax;

 namespace mindspore::kernel {
 int SoftmaxCPUKernel::Init() {
-  if (context_->infer_shape_interrupt_ && !context_->running_) {
-    SetNeedReInit();
+  auto ret = SoftmaxBaseCPUKernel::Init();
+  if (ret != RET_OK) {
+    return ret;
+  }
+
+  if (!InferShapeDone()) {
    return RET_OK;
  }
-  SoftmaxBaseCPUKernel::Init();
+  return ReSize();
+}

-  // malloc tmp buffer
+int SoftmaxCPUKernel::ReSize() {
+  auto ret = SoftmaxBaseCPUKernel::ReSize();
+  if (ret != RET_OK) {
+    return ret;
+  }
  auto n_dim = softmax_param_->n_dim_;
  auto axis = softmax_param_->axis_;
  if (axis == -1) {
@@ -52,17 +61,18 @@ int SoftmaxCPUKernel::Init() {
  for (int i = axis + 1; i < n_dim; i++) {
    in_plane_size *= in_shape[i];
  }
+  if (sum_data_ != nullptr) {
+    free(sum_data_);
+  }
  sum_data_ = reinterpret_cast<float *>(malloc(out_plane_size * in_plane_size * sizeof(float)));
  memset(sum_data_, 0, out_plane_size * in_plane_size * sizeof(float));
  return RET_OK;
 }

-int SoftmaxCPUKernel::ReSize() { return RET_OK; }
-
 int SoftmaxCPUKernel::Run() {
  auto ret = Prepare();
  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "Prepare failed.";
+    MS_LOG(ERROR) << "Prepare fail!ret: " << ret;
    return RET_ERROR;
  }
  auto input_ptr = reinterpret_cast<float *>(inputs_.at(kInputIndex)->Data());

--- a/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_batch.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_batch.cc
@@ -25,20 +25,13 @@
 using mindspore::lite::KernelRegistrar;
 using mindspore::lite::RET_FORMAT_ERR;
 using mindspore::lite::RET_OK;
+using mindspore::lite::RET_ERROR;
 using mindspore::lite::RET_OP_EXECUTE_FAILURE;
 using mindspore::schema::PrimitiveType_SpaceToBatch;

 namespace mindspore::kernel {

 int SpaceToBatchCPUKernel::Init() {
-  if (context_->infer_shape_interrupt_ && !context_->running_) {
-    SetNeedReInit();
-    return RET_OK;
-  }
-  if (inputs_[0]->GetFormat() != schema::Format_NHWC) {
-    MS_LOG(ERROR) << "space_to_batch only support NHWC now!";
-    return RET_FORMAT_ERR;
-  }
  SpaceToBatchParameter *param = reinterpret_cast<SpaceToBatchParameter *>(this->opParameter);
  for (int i = 0; i < SPACE_TO_BATCH_PADDINGS_SIZE; ++i) {
    if (param->paddings_[i] != 0) {
@@ -48,6 +41,18 @@ int SpaceToBatchCPUKernel::Init() {
  }
  param->n_dims_ = DIMENSION_4D;
  param->n_space_dims_ = SPACE_TO_BATCH_BLOCK_SIZES_SIZE;
+  if (!InferShapeDone()) {
+    return RET_OK;
+  }
+  return ReSize();
+}
+
+int SpaceToBatchCPUKernel::ReSize() {
+  if (inputs_[0]->GetFormat() != schema::Format_NHWC) {
+    MS_LOG(ERROR) << "space_to_batch only support NHWC now!";
+    return RET_FORMAT_ERR;
+  }
+  SpaceToBatchParameter *param = reinterpret_cast<SpaceToBatchParameter *>(this->opParameter);
  param->num_elements_ = EnumElement(param->in_shape_, param->n_dims_);
  param->num_elements_padded_ = EnumElement(param->padded_in_shape_, param->n_dims_);
  return RET_OK;
@@ -56,8 +61,8 @@ int SpaceToBatchCPUKernel::Init() {
 int SpaceToBatchCPUKernel::Run() {
  auto ret = Prepare();
  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "Prepare failed.";
-    return RET_ERROR;
+    MS_LOG(ERROR) << "Prepare fail!ret: " << ret;
+    return ret;
  }
  auto input = inputs_[0];
  auto output = outputs_[0];
@@ -67,14 +72,19 @@ int SpaceToBatchCPUKernel::Run() {

  float *tmp_space[3] = {nullptr, nullptr, nullptr};
  if (param->need_paddings_) {
-    tmp_space[0] = reinterpret_cast<float *>(malloc(param->num_elements_padded_ * sizeof(float)));
-    (void)memset(tmp_space[0], 0, param->num_elements_padded_);
-    tmp_space[1] = reinterpret_cast<float *>(malloc(param->num_elements_padded_ * sizeof(float)));
-    (void)memset(tmp_space[1], 0, param->num_elements_padded_);
-    tmp_space[2] = reinterpret_cast<float *>(malloc(param->num_elements_padded_ * sizeof(float)));
-    (void)memset(tmp_space[2], 0, param->num_elements_padded_);
-
+    for (int i = 0; i < 3; ++i) {
+      tmp_space[i]
+        = reinterpret_cast<float *>(context_->allocator->Malloc(param->num_elements_padded_ * sizeof(float)));
+      (void)memset(tmp_space[i], 0, param->num_elements_padded_ * sizeof(float));
+      if (tmp_space[i] == nullptr) {
+        MS_LOG(ERROR) << "malloc tmp buffer fail!";
+        return RET_ERROR;
+      }
+    }
    ret = SpaceToBatch(input_ptr_, output_ptr_, *param, tmp_space);
+    for (int i = 0; i < 3; ++i) {
+      context_->allocator->Free(tmp_space);
+    }
  } else {
    ret = SpaceToBatch(input_ptr_, output_ptr_, *param, tmp_space);
  }

--- a/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_batch.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_batch.h
@@ -30,7 +30,7 @@ class SpaceToBatchCPUKernel : public LiteKernel {
  ~SpaceToBatchCPUKernel() = default;

  int Init() override;
-  int ReSize() override { return 0; }
+  int ReSize() override;
  int Run() override;

 private:

--- a/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_depth.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_depth.cc
@@ -32,22 +32,26 @@ using mindspore::schema::PrimitiveType_SpaceToDepth;
 namespace mindspore::kernel {

 int SpaceToDepthCPUKernel::Init() {
-  if (context_->infer_shape_interrupt_ && !context_->running_) {
-    SetNeedReInit();
-    return RET_OK;
-  }
-  if (inputs_[0]->GetFormat() != schema::Format_NHWC) {
-    MS_LOG(ERROR) << "space_to_depth only support NHWC now!";
-    return RET_FORMAT_ERR;
-  }
  SpaceToDepthParameter *param = reinterpret_cast<SpaceToDepthParameter *>(opParameter);
  if (param->block_size_ <= 0) {
    MS_LOG(ERROR) << "Input block_size should > 0!";
    return RET_PARAM_INVALID;
  }

+  if (!InferShapeDone()) {
+    return RET_OK;
+  }
+  return ReSize();
+}
+
+int SpaceToDepthCPUKernel::ReSize() {
+if (inputs_[0]->GetFormat() != schema::Format_NHWC) {
+    MS_LOG(ERROR) << "space_to_depth only support NHWC now!";
+    return RET_FORMAT_ERR;
+  }
+
  num_unit_ = static_cast<int>(inputs_[0]->shape().at(kNHWC_H));
-  thread_h_num_ = MSMIN(thread_num_, num_unit_);
+  thread_h_num_ = MSMIN(opParameter->thread_num_, num_unit_);
  thread_h_stride_ = UP_DIV(num_unit_, thread_h_num_);
  return RET_OK;
 }
@@ -83,8 +87,8 @@ int SpaceToDepthRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
 int SpaceToDepthCPUKernel::Run() {
  auto ret = Prepare();
  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "Prepare failed.";
-    return RET_ERROR;
+    MS_LOG(ERROR) << "Prepare fail!ret: " << ret;
+    return ret;
  }
  input_ptr_ = reinterpret_cast<float *>(inputs_[0]->Data());
  output_ptr_ = reinterpret_cast<float *>(outputs_[0]->Data());

--- a/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_depth.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_depth.h
@@ -26,16 +26,15 @@ class SpaceToDepthCPUKernel : public LiteKernel {
  SpaceToDepthCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
                        const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
                        const lite::Primitive *primitive)
-      : LiteKernel(parameter, inputs, outputs, ctx, primitive), thread_num_(ctx->thread_num_) {}
+      : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
  ~SpaceToDepthCPUKernel() = default;

  int SpaceToDepth(int task_id);
  int Init() override;
-  int ReSize() override { return 0; };
+  int ReSize() override;
  int Run() override;

 private:
-  int thread_num_;
  int thread_h_stride_;
  int thread_h_num_;
  int num_unit_;

--- a/mindspore/lite/src/runtime/kernel/arm/fp32/split.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/split.cc
@@ -31,16 +31,21 @@ using mindspore::schema::PrimitiveType_Split;
 namespace mindspore::kernel {

 int SplitCPUKernel::Init() {
-  if (context_->infer_shape_interrupt_ && !context_->running_) {
-    SetNeedReInit();
-    return RET_OK;
+  auto ret = SplitBaseCPUKernel::Init();
+  if (ret != RET_OK) {
+    return ret;
  }
-  SplitBaseCPUKernel::Init();
+
  output_ptr_.resize(param->num_split_);
-  return RET_OK;
+
+  if (!InferShapeDone()) {
+    return RET_OK;
+  }
+
+  return ReSize();
 }

-int SplitCPUKernel::ReSize() { return RET_OK; }
+int SplitCPUKernel::ReSize() { return SplitBaseCPUKernel::ReSize(); }

 int SplitCPUKernel::Split(int task_id) {
  int num_unit_thread = MSMIN(thread_n_stride_, num_unit_ - task_id * thread_n_stride_);
@@ -69,15 +74,15 @@ int SplitRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) {

 int SplitCPUKernel::Run() {
  auto ret = Prepare();
-  auto in_tensor = inputs_.front();
-  input_ptr_ = reinterpret_cast<float *>(in_tensor->Data());
-  for (int i = 0; i < output_ptr_.size(); i++) {
-    output_ptr_[i] = reinterpret_cast<float *>(outputs_.at(i)->Data());
-  }
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "Prepare failed.";
    return RET_ERROR;
  }
+  auto in_tensor = inputs_.front();
+  input_ptr_ = reinterpret_cast<float *>(in_tensor->Data());
+  for (int i = 0; i < param->num_split_; i++) {
+    output_ptr_.push_back(reinterpret_cast<float *>(outputs_.at(i)->Data()));
+  }
  ret = LiteBackendParallelLaunch(SplitRun, this, thread_n_num_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "Scale error error_code[" << ret << "]";

--- a/mindspore/lite/src/runtime/kernel/arm/fp32/squeeze.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/squeeze.cc
@@ -39,8 +39,8 @@ int SqueezeCPUKernel::ReSize() { return RET_OK; }
 int SqueezeCPUKernel::Run() {
  auto ret = Prepare();
  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "Prepare failed.";
-    return RET_ERROR;
+    MS_LOG(ERROR) << "Prepare fail!ret: " << ret;
+    return ret;
  }
  auto input_ptr = reinterpret_cast<float *>(inputs_.front()->Data());
  auto output_ptr = reinterpret_cast<float *>(outputs_.front()->Data());

--- a/mindspore/lite/src/runtime/kernel/arm/fp32/stack.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/stack.cc
@@ -26,55 +26,26 @@ using mindspore::lite::RET_OK;
 using mindspore::schema::PrimitiveType_Stack;

 namespace mindspore::kernel {
-int StackCPUKernel::Init() {
-  if (context_->infer_shape_interrupt_ && !context_->running_) {
-    SetNeedReInit();
-    return RET_OK;
-  }
+int StackCPUKernel::ReSize() {
  StackParameter *param = reinterpret_cast<StackParameter *>(opParameter);
  auto input0_shape = inputs_[0]->shape();
  axis_ = param->axis_ < 0 ? param->axis_ + input0_shape.size() : param->axis_;
-  schema::Format input0_format = inputs_[0]->GetFormat();
-  bool need_convert_format = false;
-  for (size_t i = 1; i < inputs_.size(); ++i) {
-    if (inputs_[i]->GetFormat() != input0_format) {
-      need_convert_format = true;
-    }
-  }
-  if (!need_convert_format) {
-    outputs_[0]->SetFormat(input0_format);
+  return RET_OK;
+}
+
+int StackCPUKernel::Init() {
+  if (!InferShapeDone()) {
    return RET_OK;
  }

-  for (size_t i = 0; i < inputs_.size(); ++i) {
-    if (inputs_[i]->GetFormat() != schema::Format_NHWC) {
-      convert_functions_[i] = LayoutTransform(inputs_[i]->data_type(), inputs_[i]->GetFormat(), schema::Format_NHWC);
-      if (convert_functions_[i] == nullptr) {
-        MS_LOG(ERROR) << "Can not convert format " << inputs_[i]->GetFormat() << " to " << schema::Format_NHWC;
-        return RET_ERROR;
-      }
-      size_t packed_input_size =
-        inputs_[i]->Channel() * inputs_[i]->Batch() * inputs_[i]->Height() * inputs_[i]->Width();
-      packed_inputs_[i] = reinterpret_cast<float *>(malloc(packed_input_size * sizeof(float)));
-      if (packed_inputs_[i] == nullptr) {
-        MS_LOG(ERROR) << "malloc memory fail!";
-        return RET_ERROR;
-      }
-      memset(packed_inputs_[i], 0, packed_input_size * sizeof(float));
-    } else {
-      convert_functions_[i] = nullptr;
-      packed_inputs_[i] = nullptr;
-    }
-  }
-  outputs_[0]->SetFormat(schema::Format_NHWC);
-  return RET_OK;
+  return ReSize();
 }

 int StackCPUKernel::Run() {
  auto ret = Prepare();
  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "Prepare failed.";
-    return RET_ERROR;
+    MS_LOG(ERROR) << "Prepare fail!ret: " << ret;
+    return ret;
  }
  size_t inputs_num = inputs_.size();
  auto input0_shape = inputs_[0]->shape();
@@ -82,14 +53,8 @@ int StackCPUKernel::Run() {
  float *inputs[inputs_num];
  for (size_t i = 0; i < inputs_num; ++i) {
    inputs[i] = reinterpret_cast<float *>(inputs_[i]->Data());
-    if (convert_functions_[i] != nullptr) {
-      convert_functions_[i](inputs[i], packed_inputs_[i], inputs_[i]->Batch(),
-                            inputs_[i]->Height() * inputs_[i]->Width(), inputs_[i]->Channel());
-    } else {
-      packed_inputs_[i] = inputs[i];
-    }
  }
-  DoStack(packed_inputs_.data(), inputs_num, input0_shape.data(), input0_shape.size(), axis_, output_data);
+  DoStack(inputs, inputs_num, input0_shape.data(), input0_shape.size(), axis_, output_data);
  return RET_OK;
 }


--- a/mindspore/lite/src/runtime/kernel/arm/fp32/stack.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/stack.h
@@ -19,35 +19,22 @@
 #include <vector>
 #include "src/lite_kernel.h"

-#include "src/runtime/kernel/arm/base/layout_transform.h"
-
 namespace mindspore::kernel {
 class StackCPUKernel : public LiteKernel {
 public:
  StackCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
                 const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
                 const lite::Primitive *primitive)
-      : LiteKernel(parameter, inputs, outputs, ctx, primitive),
-        convert_functions_(inputs_.size(), nullptr),
-        packed_inputs_(inputs_.size(), nullptr) {}
+      : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}

-  ~StackCPUKernel() {
-    for (size_t i = 0; i < packed_inputs_.size(); ++i) {
-      if (packed_inputs_[i] != nullptr) {
-        free(packed_inputs_[i]);
-        packed_inputs_[i] = nullptr;
-      }
-    }
-  }
+  ~StackCPUKernel() = default;

  int Init() override;
-  int ReSize() override { return 0; }
+  int ReSize() override;
  int Run() override;

 private:
  int axis_;
-  std::vector<LayoutConvertor> convert_functions_;
-  std::vector<float *> packed_inputs_;
 };
 }  // namespace mindspore::kernel


--- a/mindspore/lite/src/runtime/kernel/arm/fp32/transpose.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/transpose.cc
@@ -32,10 +32,13 @@ constexpr int kTransposeInputNum = 1;
 constexpr int kTransposeOutputNum = 1;
 }  // namespace
 int TransposeCPUKernel::Init() {
-  if (context_->infer_shape_interrupt_ && !context_->running_) {
-    SetNeedReInit();
+  if (!InferShapeDone()) {
    return RET_OK;
  }
+  return ReSize();
+}
+
+int TransposeCPUKernel::ReSize() {
  auto &inTensor = inputs_.front();
  auto &outTensor = outputs_.front();
  auto param = reinterpret_cast<TransposeParameter *>(opParameter);
@@ -51,13 +54,11 @@ int TransposeCPUKernel::Init() {
  return RET_OK;
 }

-int TransposeCPUKernel::ReSize() { return RET_OK; }
-
 int TransposeCPUKernel::Run() {
  auto ret = Prepare();
  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "Prepare failed.";
-    return RET_ERROR;
+    MS_LOG(ERROR) << "Prepare fail!ret: " << ret;
+    return ret;
  }
  MS_ASSERT(inputs_.size() == TransposeInputNum);
  MS_ASSERT(outputs_.size() == TransposeOutputNum);

--- a/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_int8.cc
@@ -48,7 +48,7 @@ int ArithmeticsInt8Launch(int thread_id, LiteParallelGroupEnv *penv, void *cdata
 }
 }  // namespace

-ArithmeticInt8CPUKernel::~ArithmeticInt8CPUKernel() {
+void ArithmeticInt8CPUKernel::FreeTileData() {
  auto param = reinterpret_cast<ArithmeticParameter *>(opParameter);
  if (!param->broadcasting_) {
    return;
@@ -72,6 +72,10 @@ ArithmeticInt8CPUKernel::~ArithmeticInt8CPUKernel() {
  tile_data1_ = nullptr;
 }

+ArithmeticInt8CPUKernel::~ArithmeticInt8CPUKernel() {
+  FreeTileData();
+}
+
 int ArithmeticInt8CPUKernel::Init() {
  switch (opParameter->type_) {
    case PrimitiveType_Equal:
@@ -97,6 +101,15 @@ int ArithmeticInt8CPUKernel::Init() {
      arithmetic_run_ = nullptr;
      return RET_PARAM_INVALID;
  }
+  if (!InferShapeDone()) {
+    return RET_OK;
+  }
+
+  return ReSize();
+}
+
+int ArithmeticInt8CPUKernel::ReSize() {
+  FreeTileData();
  auto data_size = outputs_[0]->Size();
  auto param = reinterpret_cast<ArithmeticParameter *>(opParameter);
  if (param->broadcasting_) {
@@ -114,8 +127,6 @@ int ArithmeticInt8CPUKernel::Init() {
  return RET_OK;
 }

-int ArithmeticInt8CPUKernel::ReSize() { return RET_OK; }
-
 int ArithmeticInt8CPUKernel::DoArithmetic(int thread_id) {
  auto input0_data = reinterpret_cast<int8_t *>(inputs_[0]->Data());
  auto input1_data1 = reinterpret_cast<int8_t *>(inputs_[1]->Data());
@@ -123,8 +134,8 @@ int ArithmeticInt8CPUKernel::DoArithmetic(int thread_id) {
  auto element_num = outputs_[0]->ElementsNum();
  auto param = reinterpret_cast<ArithmeticParameter *>(opParameter);
  if (param->broadcasting_ && arithmetic_run_ != nullptr) {
-    MS_ASSERT(thread_count_ != 0);
-    int stride = UP_DIV(element_num, thread_count_);
+    MS_ASSERT(opParameter->thread_num_ != 0);
+    int stride = UP_DIV(element_num, opParameter->thread_num_);
    int count = MSMIN(stride, element_num - stride * thread_id);
    if (count <= 0) {
      return RET_OK;
@@ -150,13 +161,18 @@ int ArithmeticInt8CPUKernel::DoArithmetic(int thread_id) {
 }

 int ArithmeticInt8CPUKernel::Run() {
+  auto ret = Prepare();
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "Prepare fail!ret: " << ret;
+    return ret;
+  }
  auto param = reinterpret_cast<ArithmeticParameter *>(opParameter);
  if (param->broadcasting_) {
    auto input_data0 = reinterpret_cast<int8_t *>(inputs_[0]->Data());
    auto input_data1 = reinterpret_cast<int8_t *>(inputs_[1]->Data());
    TileDimensionsInt8(input_data0, input_data1, tile_data0_, tile_data1_, param);
  }
-  int error_code = LiteBackendParallelLaunch(ArithmeticsInt8Launch, this, thread_count_);
+  int error_code = LiteBackendParallelLaunch(ArithmeticsInt8Launch, this, opParameter->thread_num_);
  if (error_code != RET_OK) {
    MS_LOG(ERROR) << "Arithmetic launch function fail! ret: " << error_code;
    return RET_ERROR;

--- a/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_int8.h
@@ -29,7 +29,7 @@ class ArithmeticInt8CPUKernel : public LiteKernel {
  ArithmeticInt8CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
                          const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
                          const lite::Primitive *primitive)
-      : LiteKernel(parameter, inputs, outputs, ctx, primitive), thread_count_(ctx->thread_num_), context_(ctx) {}
+      : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
  ~ArithmeticInt8CPUKernel();

  int Init() override;
@@ -38,10 +38,10 @@ class ArithmeticInt8CPUKernel : public LiteKernel {
  int DoArithmetic(int thread_id);

 private:
+  void FreeTileData();
  int thread_count_;
  int8_t *tile_data0_;
  int8_t *tile_data1_;
-  const lite::Context *context_;
  ArithmeticRunInt8 arithmetic_run_;
 };
 }  // namespace mindspore::kernel

--- a/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_self_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_self_int8.cc
@@ -99,8 +99,8 @@ int ArithmeticSelfInt8CPUKernel::DoArithmeticSelf(int task_id) {
 int ArithmeticSelfInt8CPUKernel::Run() {
  auto ret = Prepare();
  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "Prepare failed.";
-    return RET_ERROR;
+    MS_LOG(ERROR) << "Prepare fail!ret: " << ret;
+    return ret;
  }
  auto input_tensor = inputs_.at(0);
  auto out_tensor = outputs_.at(0);

--- a/mindspore/lite/src/runtime/kernel/arm/int8/batch_to_space_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/batch_to_space_int8.cc
@@ -51,8 +51,8 @@ int BatchToSpaceInt8CPUKernel::ReSize() {
 int BatchToSpaceInt8CPUKernel::Run() {
  auto ret = Prepare();
  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "Prepare failed.";
-    return RET_ERROR;
+    MS_LOG(ERROR) << "Prepare fail!ret: " << ret;
+    return ret;
  }
  auto input = inputs_[0];
  auto output = outputs_[0];

--- a/mindspore/lite/src/runtime/kernel/arm/int8/concat_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/concat_int8.cc
@@ -27,32 +27,8 @@ using mindspore::lite::RET_OK;
 namespace mindspore::kernel {

 int ConcatInt8CPUKernel::Init() {
-  if (context_->infer_shape_interrupt_ && !context_->running_) {
-    SetNeedReInit();
-    return RET_OK;
-  }
  ConcatBaseCPUKernel::Init();
  auto input_num = inputs_.size();
-  concat_param_->input_num_ = input_num;
-  concat_param_->input_shapes_ = reinterpret_cast<const int **>(ctx_->allocator->Malloc(sizeof(int *) * input_num));
-  for (size_t i = 0; i < input_num; i++) {
-    concat_param_->input_shapes_[i] = reinterpret_cast<const int *>(inputs_.at(i)->shape().data());
-  }
-
-  before_axis_size = 1;
-  for (int i = 0; i < axis_; i++) {
-    before_axis_size *= outputs_.at(kOutputIndex)->DimensionSize(i);
-  }
-
-  int64_t after_axis_size = 1;
-  auto output_tensor = outputs_.at(kOutputIndex);
-  int output_dim = output_tensor->shape().size();
-  concat_param_->output_shapes_ = output_tensor->shape().data();
-  for (size_t i = axis_ + 1; i < output_dim; i++) {
-    after_axis_size *= concat_param_->output_shapes_[i];
-  }
-  concat_param_->after_axis_size = after_axis_size;
-
  concat_param_->quant_arg_.in_args_ =
    reinterpret_cast<QuantArg *>(ctx_->allocator->Malloc(sizeof(QuantArg) * input_num));
  if (concat_param_->quant_arg_.in_args_ == nullptr) {
@@ -66,23 +42,56 @@ int ConcatInt8CPUKernel::Init() {
    concat_param_->quant_arg_.in_args_[i].zp_ = quant_args.front().zeroPoint;
  }

+  auto output_tensor = outputs_.at(kOutputIndex);
  auto quant_args = output_tensor->GetQuantParams();
  concat_param_->quant_arg_.out_args_.scale_ = quant_args.front().scale;
  concat_param_->quant_arg_.out_args_.zp_ = quant_args.front().zeroPoint;

  concat_param_->quant_arg_.output_activation_min_ = std::numeric_limits<int8_t>::min();
  concat_param_->quant_arg_.output_activation_max_ = std::numeric_limits<int8_t>::max();
+  if (!InferShapeDone()) {
+    return RET_OK;
+  }
+  return ReSize();
+}

+
+int ConcatInt8CPUKernel::ReSize() {
+  auto ret = ConcatBaseCPUKernel::ReSize();
+  if (ret != RET_OK) {
+    return ret;
+  }
+  if (concat_param_->input_shapes_ != nullptr) {
+    ctx_->allocator->Free(concat_param_->input_shapes_);
+  }
+  auto input_num = inputs_.size();
+  concat_param_->input_num_ = input_num;
+  concat_param_->input_shapes_ = reinterpret_cast<const int **>(ctx_->allocator->Malloc(sizeof(int *) * input_num));
+  for (size_t i = 0; i < input_num; i++) {
+    concat_param_->input_shapes_[i] = reinterpret_cast<const int *>(inputs_.at(i)->shape().data());
+  }
+
+  before_axis_size = 1;
+  for (int i = 0; i < axis_; i++) {
+    before_axis_size *= outputs_.at(kOutputIndex)->DimensionSize(i);
+  }
+
+  int64_t after_axis_size = 1;
+  auto output_tensor = outputs_.at(kOutputIndex);
+  int output_dim = output_tensor->shape().size();
+  concat_param_->output_shapes_ = output_tensor->shape().data();
+  for (size_t i = axis_ + 1; i < output_dim; i++) {
+    after_axis_size *= concat_param_->output_shapes_[i];
+  }
+  concat_param_->after_axis_size = after_axis_size;
  return RET_OK;
 }

-int ConcatInt8CPUKernel::ReSize() { return 0; }
-
 int ConcatInt8CPUKernel::Run() {
  auto ret = Prepare();
  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "Prepare failed.";
-    return RET_ERROR;
+    MS_LOG(ERROR) << "Prepare fail!ret: " << ret;
+    return ret;
  }

  auto input_num = concat_param_->input_num_;

--- a/mindspore/lite/src/runtime/kernel/arm/int8/crop_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/crop_int8.cc
@@ -27,37 +27,45 @@ using mindspore::lite::RET_OK;
 namespace mindspore::kernel {

 int CropInt8CPUKernel::Init() {
-  CropBaseCPUKernel::Init();
+  auto ret = CropBaseCPUKernel::Init();
+  if (ret != RET_OK) {
+    return ret;
+  }
  auto *input_tensor = inputs_.at(kInputIndex);
  auto in_quant_args = input_tensor->GetQuantParams();
  crop_para_->quant_arg.in_args_.scale_ = in_quant_args.front().scale;
  crop_para_->quant_arg.in_args_.zp_ = in_quant_args.front().zeroPoint;
-  auto input_dim = input_tensor->shape().size();
-  MS_ASSERT(input_dim <= CROP_OFFSET_MAX_SIZE);
-  crop_para_->input_dim_ = input_dim;

  auto *out_tensor = outputs_.at(kOutputIndex);
  auto out_quant_args = out_tensor->GetQuantParams();
  crop_para_->quant_arg.out_args_.scale_ = out_quant_args.front().scale;
  crop_para_->quant_arg.out_args_.zp_ = out_quant_args.front().zeroPoint;

-  crop_para_->in_shape_ = input_tensor->shape().data();
-  crop_para_->out_shape_ = out_tensor->shape().data();
-
  crop_para_->quant_arg.output_activation_max_ = std::numeric_limits<int8_t>::max();
  crop_para_->quant_arg.output_activation_min_ = std::numeric_limits<int8_t>::min();
+  if (!InferShapeDone()) {
+    return RET_OK;
+  }
+  return ReSize();
+}

+int CropInt8CPUKernel::ReSize() {
+  auto *input_tensor = inputs_.at(kInputIndex);
+  crop_para_->in_shape_ = input_tensor->shape().data();
+  auto *out_tensor = outputs_.at(kOutputIndex);
+  crop_para_->out_shape_ = out_tensor->shape().data();
+  auto input_dim = input_tensor->shape().size();
+  MS_ASSERT(input_dim <= CROP_OFFSET_MAX_SIZE);
+  crop_para_->input_dim_ = input_dim;
  PadOffset(input_dim, crop_para_);
  return RET_OK;
 }

-int CropInt8CPUKernel::ReSize() { return 0; }
-
 int CropInt8CPUKernel::Run() {
  auto ret = Prepare();
  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "Prepare failed.";
-    return RET_ERROR;
+    MS_LOG(ERROR) << "Prepare fail!ret: " << ret;
+    return ret;
  }
  ret = LiteBackendParallelLaunch(CropInt8Run, this, thread_count_);
  return ret;

--- a/mindspore/lite/src/runtime/kernel/arm/int8/depth_to_space_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/depth_to_space_int8.cc
@@ -55,8 +55,8 @@ int DepthToSpaceInt8CPUKernel::ReSize() {
 int DepthToSpaceInt8CPUKernel::Run() {
  auto ret = Prepare();
  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "Prepare failed.";
-    return RET_ERROR;
+    MS_LOG(ERROR) << "Prepare fail!ret: " << ret;
+    return ret;
  }
  auto input = inputs_[0];
  auto output = outputs_[0];

--- a/mindspore/lite/src/runtime/kernel/arm/int8/relux_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/relux_int8.cc
@@ -28,11 +28,6 @@ using mindspore::schema::ActivationType_RELU;

 namespace mindspore::kernel {
 int ReluXInt8CPUKernel::Init() {
-  if (context_->infer_shape_interrupt_ && !context_->running_) {
-    SetNeedReInit();
-    return RET_OK;
-  }
-
  lite::tensor::Tensor *input = inputs_.at(0);
  lite::tensor::Tensor *output = outputs_.at(0);
  MS_ASSERT(input);
@@ -56,7 +51,7 @@ int ReluXInt8CPUKernel::DoActivation(int task_id) {
  auto output_addr = reinterpret_cast<int8_t *>(outputs_.at(0)->Data());
  auto length = inputs_.at(0)->ElementsNum();

-  int stride = UP_DIV(length, thread_count_);
+  int stride = UP_DIV(length, opParameter->thread_num_);
  int count = MSMIN(stride, length - stride * task_id);

  ReluXInt8(input_addr + stride * task_id, count, output_addr + stride * task_id, &quant_arg_);
@@ -76,10 +71,10 @@ int ReluXInt8Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
 int ReluXInt8CPUKernel::Run() {
  auto ret = Prepare();
  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "Prepare failed.";
+    MS_LOG(ERROR) << "Prepare fail!ret: " << ret;
    return ret;
  }
-  int error_code = LiteBackendParallelLaunch(ReluXInt8Run, this, thread_count_);
+  int error_code = LiteBackendParallelLaunch(ReluXInt8Run, this, opParameter->thread_num_);
  if (error_code != RET_OK) {
    MS_LOG(ERROR) << "ReluXInt8Run function error error_code[" << error_code << "]";
    return RET_ERROR;

--- a/mindspore/lite/src/runtime/kernel/arm/int8/relux_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/relux_int8.h
@@ -28,7 +28,7 @@ class ReluXInt8CPUKernel : public LiteKernel {
  ReluXInt8CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
                     const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
                     const lite::Primitive *primitive)
-      : LiteKernel(parameter, inputs, outputs, ctx, primitive), thread_count_(ctx->thread_num_) {
+      : LiteKernel(parameter, inputs, outputs, ctx, primitive) {
    type_ = (reinterpret_cast<ActivationParameter *>(parameter))->type_;
  }
  ~ReluXInt8CPUKernel() override = default;
@@ -41,7 +41,6 @@ class ReluXInt8CPUKernel : public LiteKernel {
  ReluXQuantArg quant_arg_;

 private:
-  int thread_count_;
  int type_;
 };


--- a/mindspore/lite/src/runtime/kernel/arm/int8/reshape_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/reshape_int8.cc
@@ -27,10 +27,6 @@ using mindspore::lite::RET_OK;
 namespace mindspore::kernel {

 int ReshapeInt8CPUKernel::Init() {
-  if (context_->infer_shape_interrupt_ && !context_->running_) {
-    SetNeedReInit();
-    return RET_OK;
-  }
  ReshapeBaseCPUKernel::Init();
  auto *input_tensor = inputs_.at(kInputIndex);
  auto in_quant_args = input_tensor->GetQuantParams();
@@ -53,7 +49,7 @@ int ReshapeInt8CPUKernel::ReSize() { return 0; }
 int ReshapeInt8CPUKernel::Run() {
  auto ret = Prepare();
  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "Prepare failed.";
+    MS_LOG(ERROR) << "Prepare fail!ret: " << ret;
    return ret;
  }
  MS_ASSERT(inputs_.size() == 1);
@@ -62,9 +58,9 @@ int ReshapeInt8CPUKernel::Run() {
  output_data_ = static_cast<int8_t *>(outputs_.at(kOutputIndex)->Data());

  elements_num_ = inputs_.at(kInputIndex)->ElementsNum();
-  count_unit_ = thread_count_ > 1 ? UP_DIV(elements_num_, thread_count_) : elements_num_;
+  count_unit_ = opParameter->thread_num_ > 1 ? UP_DIV(elements_num_, opParameter->thread_num_) : elements_num_;

-  ret = LiteBackendParallelLaunch(ReshapeInt8Run, this, thread_count_);
+  ret = LiteBackendParallelLaunch(ReshapeInt8Run, this, opParameter->thread_num_);
  return ret;
 }


--- a/mindspore/lite/src/runtime/kernel/arm/int8/reshape_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/reshape_int8.h
@@ -40,7 +40,6 @@ class ReshapeInt8CPUKernel : public ReshapeBaseCPUKernel {
  int DoExecute(int task_id);

 private:
-  int thread_count_;
  int64_t elements_num_;
  int64_t count_unit_;
  int8_t *input_data_ = nullptr;

--- a/mindspore/lite/src/runtime/kernel/arm/int8/sigmoid_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/sigmoid_int8.cc
@@ -72,7 +72,7 @@ int SigmoidInt8CPUKernel::DoActivation(int task_id) {
  auto output_addr = reinterpret_cast<int8_t *>(outputs_.at(0)->Data());
  auto length = inputs_.at(0)->ElementsNum();

-  int stride = UP_DIV(length, thread_count_);
+  int stride = UP_DIV(length, opParameter->thread_num_);
  int count = MSMIN(stride, length - stride * task_id);

  SigmoidInt8(input_addr + stride * task_id, count, output_addr + stride * task_id, &quant_arg_);
@@ -92,10 +92,10 @@ int SigmoidInt8Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
 int SigmoidInt8CPUKernel::Run() {
  auto ret = Prepare();
  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "Prepare failed.";
+    MS_LOG(ERROR) << "Prepare fail!ret: " << ret;
    return ret;
  }
-  int error_code = LiteBackendParallelLaunch(SigmoidInt8Run, this, thread_count_);
+  int error_code = LiteBackendParallelLaunch(SigmoidInt8Run, this, opParameter->thread_num_);
  if (error_code != RET_OK) {
    MS_LOG(ERROR) << "SigmoidInt8Run function error error_code[" << error_code << "]";
    return RET_ERROR;

--- a/mindspore/lite/src/runtime/kernel/arm/int8/sigmoid_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/sigmoid_int8.h
@@ -27,7 +27,7 @@ class SigmoidInt8CPUKernel : public LiteKernel {
  SigmoidInt8CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
                      const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
                      const lite::Primitive *primitive)
-      : LiteKernel(parameter, inputs, outputs, ctx, primitive), thread_count_(ctx->thread_num_) {}
+      : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
  ~SigmoidInt8CPUKernel() override = default;

  int Init() override;
@@ -36,7 +36,6 @@ class SigmoidInt8CPUKernel : public LiteKernel {
  int DoActivation(int task_id);

 private:
-  int thread_count_;
  SigmoidQuantArg quant_arg_;
  void MultiplierInt32ToInt16(int32_t input, int16_t *output);
 };

--- a/mindspore/lite/src/runtime/kernel/arm/int8/softmax_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/softmax_int8.cc
@@ -26,11 +26,10 @@ using mindspore::lite::RET_OK;
 namespace mindspore::kernel {

 int SoftmaxInt8CPUKernel::Init() {
-  if (context_->infer_shape_interrupt_ && !context_->running_) {
-    SetNeedReInit();
-    return RET_OK;
+  auto ret = SoftmaxBaseCPUKernel::Init();
+  if (ret != RET_OK) {
+    return ret;
  }
-  SoftmaxBaseCPUKernel::Init();

  auto *input_tensor = inputs_.at(kInputIndex);
  MS_ASSERT(input_tensor);
@@ -46,18 +45,39 @@ int SoftmaxInt8CPUKernel::Init() {
  quant_params_.out_quant_arg_.scale_ = out_quant_args.front().scale;
  quant_params_.out_quant_arg_.zp_ = out_quant_args.front().zeroPoint;

+  if (!InferShapeDone()) {
+    return RET_OK;
+  }
+
+  return ReSize();
+}
+
+void SoftmaxInt8CPUKernel::FreeTmpBuffer() {
+  if (exp_data_ != nullptr) {
+    free(exp_data_);
+    exp_data_ = nullptr;
+  }
+  if (sum_data_ != nullptr) {
+    free(sum_data_);
+    sum_data_ = nullptr;
+  }
+}
+
+int SoftmaxInt8CPUKernel::ReSize() {
+  auto ret = SoftmaxBaseCPUKernel::ReSize();
+  if (ret != RET_OK) {
+    return ret;
+  }
+  FreeTmpBuffer();
+  exp_data_ = reinterpret_cast<float *>(malloc(softmax_param_->element_size_ * sizeof(float)));
  int inner_size = 1;
  for (int i = softmax_param_->axis_ + 1; i < softmax_param_->n_dim_; i++) {
    inner_size *= softmax_param_->input_shape_[i];
  }
-
-  exp_data_ = reinterpret_cast<float *>(malloc(softmax_param_->element_size_ * sizeof(float)));
  sum_data_ = reinterpret_cast<float *>(malloc(inner_size * sizeof(float)));
  return RET_OK;
 }

-int SoftmaxInt8CPUKernel::ReSize() { return RET_OK; }
-
 int SoftmaxInt8CPUKernel::DoSoftmax(int task_id) {
  MS_ASSERT(inputs_.size() == 1);
  MS_ASSERT(outputs_.size() == 1);
@@ -101,7 +121,7 @@ int SoftmaxRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
 int SoftmaxInt8CPUKernel::Run() {
  auto ret = Prepare();
  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "Prepare failed.";
+    MS_LOG(ERROR) << "Prepare fail!ret: " << ret;
    return RET_ERROR;
  }
  auto input_ptr = reinterpret_cast<int8_t *>(inputs_.at(0)->Data());

--- a/mindspore/lite/src/runtime/kernel/arm/int8/softmax_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/softmax_int8.h
@@ -28,7 +28,9 @@ class SoftmaxInt8CPUKernel : public SoftmaxBaseCPUKernel {
                       const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
                       const lite::Primitive *primitive)
      : SoftmaxBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
-  ~SoftmaxInt8CPUKernel() = default;
+  ~SoftmaxInt8CPUKernel() {
+    FreeTmpBuffer();
+  }

  int Init() override;
  int ReSize() override;
@@ -36,6 +38,7 @@ class SoftmaxInt8CPUKernel : public SoftmaxBaseCPUKernel {
  int DoSoftmax(int task_id);

 private:
+  void FreeTmpBuffer();
  float *sum_data_;
  float *exp_data_;
  SoftmaxQuantArg quant_params_;

--- a/mindspore/lite/src/runtime/kernel/arm/int8/split_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/split_int8.cc
@@ -28,21 +28,15 @@ using mindspore::lite::RET_OK;
 namespace mindspore::kernel {

 int SplitInt8CPUKernel::Init() {
-  if (context_->infer_shape_interrupt_ && !context_->running_) {
-    SetNeedReInit();
-    return RET_OK;
+  auto ret = SplitBaseCPUKernel::Init();
+  if (ret != RET_OK) {
+    return ret;
  }
-  SplitBaseCPUKernel::Init();
  auto in_tensor = inputs_.at(kInputIndex);
-  input_ptr_ = reinterpret_cast<int8_t *>(in_tensor->Data());
-  for (int i = 0; i < param->num_split_; i++) {
-    output_ptr_.push_back(reinterpret_cast<int8_t *>(outputs_.at(i)->Data()));
-  }

  auto in_quant_args = in_tensor->GetQuantParams();
  param->quant_arg_.in_args_.scale_ = in_quant_args.front().scale;
  param->quant_arg_.in_args_.zp_ = in_quant_args.front().zeroPoint;
-
  MS_ASSERT(param->num_split_ == outputs_.size());
  for (int i = 0; i < param->num_split_; i++) {
    auto *out_tensor = outputs_.at(i);
@@ -53,11 +47,14 @@ int SplitInt8CPUKernel::Init() {

  param->quant_arg_.output_activation_max_ = std::numeric_limits<int8_t>::max();
  param->quant_arg_.output_activation_min_ = std::numeric_limits<int8_t>::min();
+  if (!InferShapeDone()) {
+    return RET_OK;
+  }

-  return RET_OK;
+  return ReSize();
 }

-int SplitInt8CPUKernel::ReSize() { return RET_OK; }
+int SplitInt8CPUKernel::ReSize() { return SplitBaseCPUKernel::ReSize(); }

 int SplitInt8CPUKernel::Split(int task_id) {
  int num_unit_thread = MSMIN(thread_n_stride_, num_unit_ - task_id * thread_n_stride_);
@@ -90,6 +87,13 @@ int SplitInt8CPUKernel::Run() {
    MS_LOG(ERROR) << "Prepare failed.";
    return ret;
  }
+  auto in_tensor = inputs_.at(kInputIndex);
+  input_ptr_ = reinterpret_cast<int8_t *>(in_tensor->Data());
+  MS_ASSERT(param->num_split_ == outputs_.size());
+  for (int i = 0; i < param->num_split_; i++) {
+    output_ptr_.push_back(reinterpret_cast<int8_t *>(outputs_.at(i)->Data()));
+  }
+
  ret = LiteBackendParallelLaunch(SplitInt8Run, this, thread_n_num_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "Scale error error_code[" << ret << "]";

--- a/mindspore/lite/src/runtime/kernel/arm/int8/squeeze_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/squeeze_int8.cc
@@ -29,11 +29,10 @@ using mindspore::lite::RET_OK;
 namespace mindspore::kernel {

 int SqueezeInt8CPUKernel::Init() {
-  if (context_->infer_shape_interrupt_ && !context_->running_) {
-    SetNeedReInit();
-    return RET_OK;
+  auto init_ret = SqueezeBaseCPUKernel::Init();
+  if (init_ret != RET_OK) {
+    return init_ret;
  }
-  SqueezeBaseCPUKernel::Init();
  quant_Squeeze_parm_ = new (std::nothrow) SqueezeQuantArg;
  auto input_num = inputs_.size();
  quant_Squeeze_parm_->input_num_ = input_num;
@@ -52,6 +51,37 @@ int SqueezeInt8CPUKernel::Init() {
    return RET_ERROR;
  }

+  quant_Squeeze_parm_->axis_ = 0;
+  quant_Squeeze_parm_->in_quant_args_ = reinterpret_cast<QuantArg *>(malloc(sizeof(QuantArg) * input_num));
+  if (quant_Squeeze_parm_->in_quant_args_ == nullptr) {
+    MS_LOG(ERROR) << "Null pointer reference: quant_Squeeze_parm_->in_quant_args_.";
+    return RET_ERROR;
+  }
+
+  for (size_t i = 0; i < input_num; i++) {
+    auto *input_tensor = inputs_.at(i);
+    auto quant_args = input_tensor->GetQuantParams();
+    MS_ASSERT(quant_args.size() == 1);
+    quant_Squeeze_parm_->in_quant_args_[i].scale_ = quant_args.front().scale;
+    quant_Squeeze_parm_->in_quant_args_[i].zp_ = quant_args.front().zeroPoint;
+  }
+
+  MS_ASSERT(outputs_.size() == 1);
+  auto output_tensor = outputs_.at(0);
+  MS_ASSERT(output_tensor != nullptr);
+  auto quant_args = output_tensor->GetQuantParams();
+  MS_ASSERT(quant_args.size() == 1);
+  quant_Squeeze_parm_->out_quant_args_.scale_ = quant_args.front().scale;
+  quant_Squeeze_parm_->out_quant_args_.zp_ = quant_args.front().zeroPoint;
+  if (!InferShapeDone()) {
+    return RET_OK;
+  }
+
+  return ReSize();
+}
+
+int SqueezeInt8CPUKernel::ReSize() {
+  auto input_num = inputs_.size();
  for (size_t i = 0; i < input_num; i++) {
    auto *input_tensor = inputs_.at(i);
    MS_ASSERT(input_tensor != nullptr);
@@ -70,20 +100,6 @@ int SqueezeInt8CPUKernel::Init() {
      quant_Squeeze_parm_->input_sizes_[i] *= input_shape;
    }
  }
-  quant_Squeeze_parm_->axis_ = 0;
-  quant_Squeeze_parm_->in_quant_args_ = reinterpret_cast<QuantArg *>(malloc(sizeof(QuantArg) * input_num));
-  if (quant_Squeeze_parm_->in_quant_args_ == nullptr) {
-    MS_LOG(ERROR) << "Null pointer reference: quant_Squeeze_parm_->in_quant_args_.";
-    return RET_ERROR;
-  }
-
-  for (size_t i = 0; i < input_num; i++) {
-    auto *input_tensor = inputs_.at(i);
-    auto quant_args = input_tensor->GetQuantParams();
-    MS_ASSERT(quant_args.size() == 1);
-    quant_Squeeze_parm_->in_quant_args_[i].scale_ = quant_args.front().scale;
-    quant_Squeeze_parm_->in_quant_args_[i].zp_ = quant_args.front().zeroPoint;
-  }

  MS_ASSERT(outputs_.size() == 1);
  auto output_tensor = outputs_.at(0);
@@ -100,21 +116,13 @@ int SqueezeInt8CPUKernel::Init() {

  quant_Squeeze_parm_->output_shape_ = new int[output_size];
  ::memcpy(quant_Squeeze_parm_->output_shape_, output_shape.data(), sizeof(int) * output_size);
-
-  auto quant_args = output_tensor->GetQuantParams();
-  MS_ASSERT(quant_args.size() == 1);
-  quant_Squeeze_parm_->out_quant_args_.scale_ = quant_args.front().scale;
-  quant_Squeeze_parm_->out_quant_args_.zp_ = quant_args.front().zeroPoint;
-
  return RET_OK;
 }

-int SqueezeInt8CPUKernel::ReSize() { return 0; }
-
 int SqueezeInt8CPUKernel::Run() {
  auto ret = Prepare();
  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "Prepare failed.";
+    MS_LOG(ERROR) << "Prepare fail!ret: " << ret;
    return ret;
  }
  auto input_dim = quant_Squeeze_parm_->input_num_;