[NPU][XPU][BM] Remove the dependencies from X86 and ARM kernels (#2963)

294375f9 · hong19860320 · GitHub · b74c7ebd · 294375f9 · 294375f9
49 changed file
--- a/lite/api/test_resnet50_lite_bm.cc
+++ b/lite/api/test_resnet50_lite_bm.cc
@@ -32,8 +32,6 @@ namespace lite {
 void TestModel(const std::vector<Place>& valid_places) {
  lite::Predictor predictor;
-  std::vector<std::string> passes;
-  passes.push_back("bm_subgraph_pass");
  predictor.Build(FLAGS_model_dir, "", "", valid_places, passes);
  auto* input_tensor = predictor.GetInput(0);

--- a/lite/core/mir/subgraph/subgraph_detector.cc
+++ b/lite/core/mir/subgraph/subgraph_detector.cc
@@ -341,9 +341,6 @@ void SubgraphFuser::InsertNewNode(SSAGraph *graph,
  for (auto &op_node : subgraph_nodes) {
    auto sub_block_op_desc = sub_block_desc->AddOp<cpp::OpDesc>();
    *sub_block_op_desc = *op_node->AsStmt().op_info();
-    sub_block_op_desc->SetAttr(
-        kKernelTypeAttr,
-        op_node->AsStmt().picked_kernel().SerializedKernelType());
  }
  subgraph_op_desc.SetAttr<int32_t>("sub_block", sub_block_idx);
@@ -413,12 +410,6 @@ void SubgraphFuser::InsertNewNode(SSAGraph *graph,
    IR_OP_VAR_LINK(subgraph_op_node, var_node);
  }
-  // Create and assign the context to the picked kernel of the new subgraph
-  // node
-  auto &inst = subgraph_op_node->AsStmt();
-  inst.picked_kernel().SetContext(
-      ContextScheduler::Global().NewContext(inst.picked_kernel().target()));
  // Remove subgraph nodes and unused var nodes
  auto nodes2rm = GetNodes2RM(subgraph_nodes,
                              {input_var_nodes,

--- a/lite/core/optimizer.h
+++ b/lite/core/optimizer.h
@@ -75,6 +75,9 @@ class Optimizer {
    (defined LITE_WITH_ARM)
           "lite_elementwise_add_activation_fuse_pass",  //
 #endif
+           "npu_subgraph_pass",
+           "xpu_subgraph_pass",
+           "bm_subgraph_pass",
           "static_kernel_pick_pass",        // pick original kernel from graph
           "variable_place_inference_pass",  // inference arg/var's
           // info(target/precision/layout/device)
@@ -108,9 +111,7 @@ class Optimizer {
           "runtime_context_assign_pass",
           "argument_type_display_pass",
-           "memory_optimize_pass",
+           "memory_optimize_pass"}};
-           "npu_subgraph_pass",
-           "xpu_subgraph_pass"}};
      RunPasses(passes_local);
    } else {
      RunPasses(passes);

--- a/lite/kernels/bm/bridges/concat_op.cc
+++ b/lite/kernels/bm/bridges/concat_op.cc
@@ -30,8 +30,6 @@ int ConcatConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  auto op_type = op_info->Type();
  // input
  auto x_names = op_info->Input("X");
-  auto x_type = kernel->GetInputDeclType("X");
-  CHECK(x_type->layout() == DATALAYOUT(kNCHW));
  // output
  auto output_var_name = op_info->Output("Out").front();
  auto output = scope->FindVar(output_var_name)->GetMutable<lite::Tensor>();

--- a/lite/kernels/npu/bridges/act_op.cc
+++ b/lite/kernels/npu/bridges/act_op.cc
@@ -32,15 +32,9 @@ int ActConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  // Get input and output vars and op attributes
  auto x_name = op_info->Input("X").front();
-  auto x_type = kernel->GetInputDeclType("X");
-  CHECK(x_type->precision() == PRECISION(kFloat));
-  CHECK(x_type->layout() == DATALAYOUT(kNCHW));
  auto x = scope->FindMutableTensor(x_name);
  auto x_dims = x->dims();
  auto out_name = op_info->Output("Out").front();
-  auto out_type = kernel->GetOutputDeclType("Out");
-  CHECK(out_type->precision() == PRECISION(kFloat));
-  CHECK(out_type->layout() == DATALAYOUT(kNCHW));
  // X node
  std::shared_ptr<Node> x_node = nullptr;

--- a/lite/kernels/npu/bridges/argmax_op.cc
+++ b/lite/kernels/npu/bridges/argmax_op.cc
@@ -32,15 +32,9 @@ int ArgmaxConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  // Get input and output vars and op attributes
  auto x_name = op_info->Input("X").front();
-  auto x_type = kernel->GetInputDeclType("X");
-  CHECK(x_type->precision() == PRECISION(kFloat));
-  CHECK(x_type->layout() == DATALAYOUT(kNCHW));
  auto x = scope->FindMutableTensor(x_name);
  auto x_dims = x->dims();
  auto out_name = op_info->Output("Out").front();
-  auto out_type = kernel->GetOutputDeclType("Out");
-  CHECK(out_type->precision() == PRECISION(kFloat));
-  CHECK(out_type->layout() == DATALAYOUT(kNCHW));
  int axis = op_info->GetAttr<int64_t>("axis");
  // X node

--- a/lite/kernels/npu/bridges/batch_norm_op.cc
+++ b/lite/kernels/npu/bridges/batch_norm_op.cc
@@ -32,35 +32,17 @@ int BatchNormConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  // Get input and output vars and op attributes
  auto x_name = op_info->Input("X").front();
-  auto x_type = kernel->GetInputDeclType("X");
-  CHECK(x_type->precision() == PRECISION(kFloat));
-  CHECK(x_type->layout() == DATALAYOUT(kNCHW));
  auto x = scope->FindMutableTensor(x_name);
  auto x_dims = x->dims();
  auto scale_name = op_info->Input("Scale").front();
-  auto scale_type = kernel->GetInputDeclType("Scale");
-  CHECK(scale_type->precision() == PRECISION(kFloat));
-  CHECK(scale_type->layout() == DATALAYOUT(kNCHW));
  auto scale = scope->FindMutableTensor(scale_name);
  auto bias_name = op_info->Input("Bias").front();
-  auto bias_type = kernel->GetInputDeclType("Bias");
-  CHECK(bias_type->precision() == PRECISION(kFloat));
-  CHECK(bias_type->layout() == DATALAYOUT(kNCHW));
  auto bias = scope->FindMutableTensor(bias_name);
  auto mean_name = op_info->Input("Mean").front();
-  auto mean_type = kernel->GetInputDeclType("Mean");
-  CHECK(mean_type->precision() == PRECISION(kFloat));
-  CHECK(mean_type->layout() == DATALAYOUT(kNCHW));
  auto mean = scope->FindMutableTensor(mean_name);
  auto variance_name = op_info->Input("Variance").front();
-  auto variance_type = kernel->GetInputDeclType("Variance");
-  CHECK(variance_type->precision() == PRECISION(kFloat));
-  CHECK(variance_type->layout() == DATALAYOUT(kNCHW));
  auto variance = scope->FindMutableTensor(variance_name);
  auto y_name = op_info->Output("Y").front();
-  auto y_type = kernel->GetOutputDeclType("Y");
-  CHECK(y_type->precision() == PRECISION(kFloat));
-  CHECK(y_type->layout() == DATALAYOUT(kNCHW));
  float momentum = op_info->GetAttr<float>("momentum");
  float epsilon = op_info->GetAttr<float>("epsilon");
  int mode = 1;  // bnScale, bnBias tensor dims are 1xCx1x1

--- a/lite/kernels/npu/bridges/concat_op.cc
+++ b/lite/kernels/npu/bridges/concat_op.cc
@@ -32,13 +32,7 @@ int ConcatConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  // Get input and output vars and op attributes
  auto x_names = op_info->Input("X");
-  auto x_type = kernel->GetInputDeclType("X");
-  CHECK(x_type->precision() == PRECISION(kFloat));
-  CHECK(x_type->layout() == DATALAYOUT(kNCHW));
  auto out_name = op_info->Output("Out").front();
-  auto out_type = kernel->GetOutputDeclType("Out");
-  CHECK(out_type->precision() == PRECISION(kFloat));
-  CHECK(out_type->layout() == DATALAYOUT(kNCHW));
  auto axis = op_info->GetAttr<int>("axis");
  auto num = x_names.size();

--- a/lite/kernels/npu/bridges/conv_op.cc
+++ b/lite/kernels/npu/bridges/conv_op.cc
@@ -33,23 +33,14 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  // Get input and output vars and op attributes
  auto input_name = op_info->Input("Input").front();
-  auto input_type = kernel->GetInputDeclType("Input");
-  CHECK(input_type->precision() == PRECISION(kFloat));
-  CHECK(input_type->layout() == DATALAYOUT(kNCHW));
  auto input = scope->FindMutableTensor(input_name);
  auto input_dims = input->dims();
  auto filter_name = op_info->Input("Filter").front();
-  auto filter_type = kernel->GetInputDeclType("Filter");
-  CHECK(filter_type->precision() == PRECISION(kFloat));
-  CHECK(filter_type->layout() == DATALAYOUT(kNCHW));
  auto filter = scope->FindMutableTensor(filter_name);
  auto filter_dims = filter->dims();
  auto output_name = op_info->Output("Output").front();
-  auto output_type = kernel->GetOutputDeclType("Output");
-  CHECK(output_type->precision() == PRECISION(kFloat));
-  CHECK(output_type->layout() == DATALAYOUT(kNCHW));
  auto output = scope->FindMutableTensor(output_name);
  auto output_dims = output->dims();
@@ -132,9 +123,6 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
    if (graph->Has(bias_name)) {
      bias_node = graph->Get(bias_name);
    } else {
-      auto bias_type = kernel->GetInputDeclType("Bias");
-      CHECK(bias_type->precision() == PRECISION(kFloat));
-      CHECK(bias_type->layout() == DATALAYOUT(kNCHW));
      auto bias = scope->FindMutableTensor(bias_name);
      auto bias_dims = bias->dims();
      auto bias_data_size = bias_dims.production();

--- a/lite/kernels/npu/bridges/conv_transpose_op.cc
+++ b/lite/kernels/npu/bridges/conv_transpose_op.cc
@@ -33,25 +33,16 @@ int ConvTransposeConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  // Get input, output and op attributes
  auto input_name = op_info->Input("Input").front();
-  auto input_type = kernel->GetInputDeclType("Input");
-  CHECK(input_type->precision() == PRECISION(kFloat));
-  CHECK(input_type->layout() == DATALAYOUT(kNCHW));
  auto input = scope->FindMutableTensor(input_name);
  auto input_dims = input->dims();
  CHECK_EQ(input_dims.size(), 4);
  auto filter_name = op_info->Input("Filter").front();
-  auto filter_type = kernel->GetInputDeclType("Filter");
-  CHECK(filter_type->precision() == PRECISION(kFloat));
-  CHECK(filter_type->layout() == DATALAYOUT(kNCHW));
  auto filter = scope->FindMutableTensor(filter_name);
  auto filter_dims = filter->dims();
  CHECK_EQ(filter_dims.size(), 4);
  auto output_name = op_info->Output("Output").front();
-  auto output_type = kernel->GetOutputDeclType("Output");
-  CHECK(output_type->precision() == PRECISION(kFloat));
-  CHECK(output_type->layout() == DATALAYOUT(kNCHW));
  auto strides = op_info->GetAttr<std::vector<int>>("strides");
  CHECK_EQ(strides.size(), 2L);
@@ -157,9 +148,6 @@ int ConvTransposeConverter(void* ctx, OpLite* op, KernelBase* kernel) {
    if (graph->Has(bias_name)) {
      bias_node = graph->Get(bias_name);
    } else {
-      auto bias_type = kernel->GetInputDeclType("Bias");
-      CHECK(bias_type->precision() == PRECISION(kFloat));
-      CHECK(bias_type->layout() == DATALAYOUT(kNCHW));
      auto bias = scope->FindMutableTensor(bias_name);
      auto channel_size = bias->dims().production();
      CHECK_EQ(channel_size, filter_dims[1] * groups);

--- a/lite/kernels/npu/bridges/dropout_op.cc
+++ b/lite/kernels/npu/bridges/dropout_op.cc
@@ -32,16 +32,12 @@ int DropoutConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  // Get input, output and op attributes
  auto x_name = op_info->Input("X").front();
-  auto x_type = kernel->GetInputDeclType("X");
-  CHECK(x_type->precision() == PRECISION(kFloat));
  auto x = scope->FindMutableTensor(x_name);
  auto x_dims = x->dims();
  auto x_rank = x_dims.size();
  CHECK_GE(x_rank, 2);
  auto out_name = op_info->Output("Out").front();
-  auto out_type = kernel->GetOutputDeclType("Out");
-  CHECK(out_type->precision() == PRECISION(kFloat));
  auto dropout_implementation =
      op_info->GetAttr<std::string>("dropout_implementation");

--- a/lite/kernels/npu/bridges/elementwise_ops.cc
+++ b/lite/kernels/npu/bridges/elementwise_ops.cc
@@ -70,23 +70,14 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  // Get input and output vars and op attributes
  auto x_name = op_info->Input("X").front();
-  auto x_type = kernel->GetInputDeclType("X");
-  CHECK(x_type->precision() == PRECISION(kFloat));
-  CHECK(x_type->layout() == DATALAYOUT(kNCHW));
  auto x = scope->FindMutableTensor(x_name);
  auto x_dims = x->dims();
  auto y_name = op_info->Input("Y").front();
-  auto y_type = kernel->GetInputDeclType("Y");
-  CHECK(y_type->precision() == PRECISION(kFloat));
-  CHECK(y_type->layout() == DATALAYOUT(kNCHW));
  auto y = scope->FindMutableTensor(y_name);
  auto y_dims = y->dims();
  auto out_name = op_info->Output("Out").front();
-  auto out_type = kernel->GetOutputDeclType("Out");
-  CHECK(out_type->precision() == PRECISION(kFloat));
-  CHECK(out_type->layout() == DATALAYOUT(kNCHW));
  auto out = scope->FindMutableTensor(out_name);
  auto out_dims = out->dims();

--- a/lite/kernels/npu/bridges/engine.cc
+++ b/lite/kernels/npu/bridges/engine.cc
@@ -63,11 +63,16 @@ int Engine::BuildOriginProgram() {
      auto kernels =
          op->CreateKernels({Place{TARGET(kX86)}, Place{TARGET(kHost)}});
 #endif
-      CHECK_GT(kernels.size(), 0) << "No kernels found for " << op_type;
+      if (kernels.size() > 0) {
-      picked_kernel = std::move(kernels.front());
+        picked_kernel = std::move(kernels.front());
+      } else {
+        LOG(WARNING) << "No kernels found for " << op_type;
+      }
+    }
+    if (picked_kernel != nullptr) {
+      picked_kernel->SetContext(
+          ContextScheduler::Global().NewContext(picked_kernel->target()));
    }
-    picked_kernel->SetContext(
-        ContextScheduler::Global().NewContext(picked_kernel->target()));
    origin_program_.emplace_back(std::move(op), std::move(picked_kernel));
  }
  return 0;

--- a/lite/kernels/npu/bridges/fc_op.cc
+++ b/lite/kernels/npu/bridges/fc_op.cc
@@ -31,24 +31,15 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  VLOG(3) << "[NPU] Converting " + op_type + "...";
  auto input_name = op_info->Input("Input").front();
-  auto input_type = kernel->GetInputDeclType("Input");
-  CHECK(input_type->precision() == PRECISION(kFloat));
-  CHECK(input_type->layout() == DATALAYOUT(kNCHW));
  auto input = scope->FindTensor(input_name);
  auto input_dims = input->dims();
  auto w_name = op_info->Input("W").front();
-  auto w_type = kernel->GetInputDeclType("W");
-  CHECK(w_type->precision() == PRECISION(kFloat));
-  CHECK(w_type->layout() == DATALAYOUT(kNCHW));
  auto w = scope->FindTensor(w_name);
  auto w_dims = w->dims();
  CHECK_EQ(w_dims.size(), 2UL);
  auto out_name = op_info->Output("Out").front();
-  auto out_type = kernel->GetOutputDeclType("Out");
-  CHECK(out_type->precision() == PRECISION(kFloat));
-  CHECK(out_type->layout() == DATALAYOUT(kNCHW));
  auto out = scope->FindTensor(out_name);
  auto out_dims = out->dims();
@@ -99,9 +90,6 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
    if (graph->Has(bias_name)) {
      bias_node = graph->Get(bias_name);
    } else {
-      auto bias_type = kernel->GetInputDeclType("Bias");
-      CHECK(bias_type->precision() == PRECISION(kFloat));
-      CHECK(bias_type->layout() == DATALAYOUT(kNCHW));
      auto bias = scope->FindTensor(bias_name);
      auto bias_dims = bias->dims();
      CHECK_EQ(bias_dims.production(), n);

--- a/lite/kernels/npu/bridges/instance_norm_op.cc
+++ b/lite/kernels/npu/bridges/instance_norm_op.cc
@@ -32,9 +32,6 @@ int InstanceNormConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  // Get input and output vars and op attributes
  auto x_name = op_info->Input("X").front();
-  auto x_type = kernel->GetInputDeclType("X");
-  CHECK(x_type->precision() == PRECISION(kFloat));
-  CHECK(x_type->layout() == DATALAYOUT(kNCHW));
  auto x = scope->FindMutableTensor(x_name);
  auto x_dims = x->dims();
  CHECK_EQ(x_dims.size(), 4L);
@@ -43,9 +40,6 @@ int InstanceNormConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  auto spatial_size = x_dims[2] * x_dims[3];
  DDim scale_bias_dims({1, channel_size, 1, 1});
  auto y_name = op_info->Output("Y").front();
-  auto y_type = kernel->GetOutputDeclType("Y");
-  CHECK(y_type->precision() == PRECISION(kFloat));
-  CHECK(y_type->layout() == DATALAYOUT(kNCHW));
  float epsilon = op_info->GetAttr<float>("epsilon");
  // X node
@@ -60,9 +54,6 @@ int InstanceNormConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  std::shared_ptr<Node> bias_node = nullptr;
  if (HasInputArg(op_info, scope, "Bias")) {
    auto bias_name = op_info->Input("Bias").front();
-    auto bias_type = kernel->GetInputDeclType("Bias");
-    CHECK(bias_type->precision() == PRECISION(kFloat));
-    CHECK(bias_type->layout() == DATALAYOUT(kNCHW));
    auto bias = scope->FindMutableTensor(bias_name);
    auto bias_dims = bias->dims();
    CHECK_EQ(channel_size, bias_dims.production());
@@ -100,9 +91,6 @@ int InstanceNormConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  std::shared_ptr<Node> scale_node = nullptr;
  if (HasInputArg(op_info, scope, "Scale")) {
    auto scale_name = op_info->Input("Scale").front();
-    auto scale_type = kernel->GetInputDeclType("Scale");
-    CHECK(scale_type->precision() == PRECISION(kFloat));
-    CHECK(scale_type->layout() == DATALAYOUT(kNCHW));
    auto scale = scope->FindMutableTensor(scale_name);
    auto scale_dims = scale->dims();
    CHECK_EQ(channel_size, scale_dims.production());

--- a/lite/kernels/npu/bridges/interpolate_op.cc
+++ b/lite/kernels/npu/bridges/interpolate_op.cc
@@ -32,18 +32,12 @@ int InterpolateConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  // Get input and output vars and op attributes
  auto x_name = op_info->Input("X").front();
-  auto x_type = kernel->GetInputDeclType("X");
-  CHECK(x_type->precision() == PRECISION(kFloat));
-  CHECK(x_type->layout() == DATALAYOUT(kNCHW));
  auto x = scope->FindMutableTensor(x_name);
  auto x_dims = x->dims();
  auto x_h = x_dims[2];
  auto x_w = x_dims[3];
  CHECK_EQ(x_dims.size(), 4);
  auto out_name = op_info->Output("Out").front();
-  auto out_type = kernel->GetOutputDeclType("Out");
-  CHECK(out_type->precision() == PRECISION(kFloat));
-  CHECK(out_type->layout() == DATALAYOUT(kNCHW));
  auto scale = op_info->GetAttr<float>("scale");
  auto out_w = op_info->GetAttr<int>("out_w");
  auto out_h = op_info->GetAttr<int>("out_h");
@@ -78,9 +72,6 @@ int InterpolateConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  std::shared_ptr<Node> out_size_node = nullptr;
  if (HasInputArg(op_info, scope, "OutSize")) {
    auto out_size_name = op_info->Input("OutSize").front();
-    auto out_size_type = kernel->GetInputDeclType("OutSize");
-    CHECK(out_size_type->precision() == PRECISION(kInt32));
-    CHECK(out_size_type->layout() == DATALAYOUT(kNCHW));
    if (graph->Has(out_size_name)) {
      out_size_node = graph->Get(out_size_name);
    } else {

--- a/lite/kernels/npu/bridges/layer_norm_op.cc
+++ b/lite/kernels/npu/bridges/layer_norm_op.cc
@@ -32,9 +32,6 @@ int LayerNormConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  // Get input and output vars and op attributes
  auto x_name = op_info->Input("X").front();
-  auto x_type = kernel->GetInputDeclType("X");
-  CHECK(x_type->precision() == PRECISION(kFloat));
-  CHECK(x_type->layout() == DATALAYOUT(kNCHW));
  auto x = scope->FindMutableTensor(x_name);
  auto x_dims = x->dims();
  auto padded_x_shape = CvtShape(x_dims);
@@ -42,9 +39,6 @@ int LayerNormConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  CHECK(x_rank >= 2 && x_rank <= 4);
  auto y_name = op_info->Output("Y").front();
-  auto y_type = kernel->GetOutputDeclType("Y");
-  CHECK(y_type->precision() == PRECISION(kFloat));
-  CHECK(y_type->layout() == DATALAYOUT(kNCHW));
  auto y = scope->FindMutableTensor(y_name);
  auto y_dims = y->dims();
  auto padded_y_shape = CvtShape(y_dims);
@@ -102,9 +96,6 @@ int LayerNormConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  std::shared_ptr<Node> bias_node = nullptr;
  if (HasInputArg(op_info, scope, "Bias")) {
    auto bias_name = op_info->Input("Bias").front();
-    auto bias_type = kernel->GetInputDeclType("Bias");
-    CHECK(bias_type->precision() == PRECISION(kFloat));
-    CHECK(bias_type->layout() == DATALAYOUT(kNCHW));
    auto bias = scope->FindMutableTensor(bias_name);
    auto bias_dims = bias->dims();
    CHECK_EQ(bias_dims.size(), 1);
@@ -122,9 +113,6 @@ int LayerNormConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  std::shared_ptr<Node> scale_node = nullptr;
  if (HasInputArg(op_info, scope, "Scale")) {
    auto scale_name = op_info->Input("Scale").front();
-    auto scale_type = kernel->GetInputDeclType("Scale");
-    CHECK(scale_type->precision() == PRECISION(kFloat));
-    CHECK(scale_type->layout() == DATALAYOUT(kNCHW));
    auto scale = scope->FindMutableTensor(scale_name);
    auto scale_dims = scale->dims();
    CHECK_EQ(scale_dims.size(), 1);

--- a/lite/kernels/npu/bridges/matmul_op.cc
+++ b/lite/kernels/npu/bridges/matmul_op.cc
@@ -32,16 +32,10 @@ int MatMulConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  // Get input and output vars and op attributes
  auto x_name = op_info->Input("X").front();
-  auto x_type = kernel->GetInputDeclType("X");
-  CHECK(x_type->precision() == PRECISION(kFloat));
-  CHECK(x_type->layout() == DATALAYOUT(kNCHW));
  auto x = scope->FindTensor(x_name);
  auto x_dims = x->dims();
  auto y_name = op_info->Input("Y").front();
-  auto y_type = kernel->GetInputDeclType("Y");
-  CHECK(y_type->precision() == PRECISION(kFloat));
-  CHECK(y_type->layout() == DATALAYOUT(kNCHW));
  auto y = scope->FindTensor(y_name);
  auto y_dims = y->dims();
@@ -62,9 +56,6 @@ int MatMulConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  }
  auto out_name = op_info->Output("Out").front();
-  auto out_type = kernel->GetOutputDeclType("Out");
-  CHECK(out_type->precision() == PRECISION(kFloat));
-  CHECK(out_type->layout() == DATALAYOUT(kNCHW));
  auto out = scope->FindTensor(out_name);
  auto out_dims = out->dims();

--- a/lite/kernels/npu/bridges/mul_op.cc
+++ b/lite/kernels/npu/bridges/mul_op.cc
@@ -33,23 +33,14 @@ int MulConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  // Get input and output vars and op attributes
  auto x_name = op_info->Input("X").front();
-  auto x_type = kernel->GetInputDeclType("X");
-  CHECK(x_type->precision() == PRECISION(kFloat));
-  CHECK(x_type->layout() == DATALAYOUT(kNCHW));
  auto x = scope->FindTensor(x_name);
  auto x_dims = x->dims();
  auto y_name = op_info->Input("Y").front();
-  auto y_type = kernel->GetInputDeclType("Y");
-  CHECK(y_type->precision() == PRECISION(kFloat));
-  CHECK(y_type->layout() == DATALAYOUT(kNCHW));
  auto y = scope->FindTensor(y_name);
  auto y_dims = y->dims();
  auto out_name = op_info->Output("Out").front();
-  auto out_type = kernel->GetOutputDeclType("Out");
-  CHECK(out_type->precision() == PRECISION(kFloat));
-  CHECK(out_type->layout() == DATALAYOUT(kNCHW));
  auto out = scope->FindTensor(out_name);
  auto out_dims = out->dims();
  if (out_dims.size() > 4) {

--- a/lite/kernels/npu/bridges/pad2d_op.cc
+++ b/lite/kernels/npu/bridges/pad2d_op.cc
@@ -32,15 +32,9 @@ int Pad2dConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  // Get input and output vars and op attributes
  auto x_name = op_info->Input("X").front();
-  auto x_type = kernel->GetInputDeclType("X");
-  CHECK(x_type->precision() == PRECISION(kFloat));
-  CHECK(x_type->layout() == DATALAYOUT(kNCHW));
  auto x = scope->FindMutableTensor(x_name);
  auto x_dims = x->dims();
  auto out_name = op_info->Output("Out").front();
-  auto out_type = kernel->GetOutputDeclType("Out");
-  CHECK(out_type->precision() == PRECISION(kFloat));
-  CHECK(out_type->layout() == DATALAYOUT(kNCHW));
  auto padding = op_info->GetAttr<std::vector<int>>("paddings");
  CHECK_EQ(padding.size(), 4);

--- a/lite/kernels/npu/bridges/pool_op.cc
+++ b/lite/kernels/npu/bridges/pool_op.cc
@@ -33,15 +33,9 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  // Get input and output vars and op attributes
  auto x_name = op_info->Input("X").front();
-  auto x_type = kernel->GetInputDeclType("X");
-  CHECK(x_type->precision() == PRECISION(kFloat));
-  CHECK(x_type->layout() == DATALAYOUT(kNCHW));
  auto x = scope->FindMutableTensor(x_name);
  auto x_dims = x->dims();
  auto out_name = op_info->Output("Out").front();
-  auto out_type = kernel->GetOutputDeclType("Out");
-  CHECK(out_type->precision() == PRECISION(kFloat));
-  CHECK(out_type->layout() == DATALAYOUT(kNCHW));
  auto pooling_type = op_info->GetAttr<std::string>("pooling_type");
  auto global_pooling = op_info->GetAttr<bool>("global_pooling");
  auto ksize = op_info->GetAttr<std::vector<int>>("ksize");

--- a/lite/kernels/npu/bridges/reduce_mean_op.cc
+++ b/lite/kernels/npu/bridges/reduce_mean_op.cc
@@ -32,15 +32,9 @@ int ReduceMeanConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  // Get input and output vars and op attributes
  auto x_name = op_info->Input("X").front();
-  auto x_type = kernel->GetInputDeclType("X");
-  CHECK(x_type->precision() == PRECISION(kFloat));
-  CHECK(x_type->layout() == DATALAYOUT(kNCHW));
  auto x = scope->FindMutableTensor(x_name);
  auto x_dims = x->dims();
  auto out_name = op_info->Input("Out").front();
-  auto out_type = kernel->GetOutputDeclType("Out");
-  CHECK(out_type->precision() == PRECISION(kFloat));
-  CHECK(out_type->layout() == DATALAYOUT(kNCHW));
  auto keep_dim = op_info->GetAttr<bool>("keep_dim");
  auto dim = op_info->GetAttr<std::vector<int>>("dim");
  CHECK(!dim.empty()) << "[NPU] \"dim\" of reduce_mean should not be empty.";

--- a/lite/kernels/npu/bridges/reshape_op.cc
+++ b/lite/kernels/npu/bridges/reshape_op.cc
@@ -33,12 +33,10 @@ int ReshapeConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  // Get input and output vars and op attributes
  auto x_name = op_info->Input("X").front();
-  auto x_type = kernel->GetInputDeclType("X");
  auto x = scope->FindMutableTensor(x_name);
  auto x_dims = x->dims();
  auto out_name = op_info->Output("Out").front();
-  auto out_type = kernel->GetOutputDeclType("Out");
  // X node
  std::shared_ptr<Node> x_node = nullptr;

--- a/lite/kernels/npu/bridges/scale_op.cc
+++ b/lite/kernels/npu/bridges/scale_op.cc
@@ -32,17 +32,11 @@ int ScaleConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  // Get input, output and op attributes
  auto x_name = op_info->Input("X").front();
-  auto x_type = kernel->GetInputDeclType("X");
-  CHECK(x_type->precision() == PRECISION(kFloat));
-  CHECK(x_type->layout() == DATALAYOUT(kNCHW));
  auto x = scope->FindMutableTensor(x_name);
  auto x_dims = x->dims();
  auto x_rank = x_dims.size();
  CHECK_GE(x_rank, 2);
  auto out_name = op_info->Output("Out").front();
-  auto out_type = kernel->GetOutputDeclType("Out");
-  CHECK(out_type->precision() == PRECISION(kFloat));
-  CHECK(out_type->layout() == DATALAYOUT(kNCHW));
  // HiAI only support [n, c, 1, 1] for the shape of scale and bias
  std::vector<int64_t> scale_bias_shape = {
      1, x_rank < 3 ? 1 : x_dims[x_rank - 3], 1, 1};

--- a/lite/kernels/npu/bridges/shuffle_channel_op.cc
+++ b/lite/kernels/npu/bridges/shuffle_channel_op.cc
@@ -32,15 +32,9 @@ int ShuffleChannelConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  // Get input and output vars and op attributes
  auto x_name = op_info->Input("X").front();
-  auto x_type = kernel->GetInputDeclType("X");
-  CHECK(x_type->precision() == PRECISION(kFloat));
-  CHECK(x_type->layout() == DATALAYOUT(kNCHW));
  auto x = scope->FindMutableTensor(x_name);
  auto x_dims = x->dims();
  auto out_name = op_info->Output("Out").front();
-  auto out_type = kernel->GetOutputDeclType("Out");
-  CHECK(out_type->precision() == PRECISION(kFloat));
-  CHECK(out_type->layout() == DATALAYOUT(kNCHW));
  auto group = op_info->GetAttr<int>("group");
  // X node

--- a/lite/kernels/npu/bridges/softmax_op.cc
+++ b/lite/kernels/npu/bridges/softmax_op.cc
@@ -32,16 +32,10 @@ int SoftmaxConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  // Get input and output vars and op attributes
  auto x_name = op_info->Input("X").front();
-  auto x_type = kernel->GetInputDeclType("X");
-  CHECK(x_type->precision() == PRECISION(kFloat));
-  CHECK(x_type->layout() == DATALAYOUT(kNCHW));
  auto x = scope->FindMutableTensor(x_name);
  auto x_dims = x->dims();
  auto x_rank = x_dims.size();
  auto out_name = op_info->Output("Out").front();
-  auto out_type = kernel->GetOutputDeclType("Out");
-  CHECK(out_type->precision() == PRECISION(kFloat));
-  CHECK(out_type->layout() == DATALAYOUT(kNCHW));
  int axis = op_info->HasAttr("axis") ? op_info->GetAttr<int>("axis") : -1;
  if (axis < 0) {
    axis += x_rank;

--- a/lite/kernels/npu/bridges/split_op.cc
+++ b/lite/kernels/npu/bridges/split_op.cc
@@ -32,15 +32,9 @@ int SplitConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  // Get input and output vars and op attributes
  auto x_name = op_info->Input("X").front();
-  auto x_type = kernel->GetInputDeclType("X");
-  CHECK(x_type->precision() == PRECISION(kFloat));
-  CHECK(x_type->layout() == DATALAYOUT(kNCHW));
  auto x = scope->FindMutableTensor(x_name);
  auto x_dims = x->dims();
  auto out_names = op_info->Output("Out");
-  auto out_type = kernel->GetOutputDeclType("Out");
-  CHECK(out_type->precision() == PRECISION(kFloat));
-  CHECK(out_type->layout() == DATALAYOUT(kNCHW));
  auto axis = op_info->GetAttr<int>("axis");
  auto num = op_info->GetAttr<int>("num");
  auto sections = op_info->GetAttr<std::vector<int>>("sections");

--- a/lite/kernels/npu/bridges/sqrt_op.cc
+++ b/lite/kernels/npu/bridges/sqrt_op.cc
@@ -32,15 +32,9 @@ int SqrtConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  // Get input and output vars and op attributes
  auto x_name = op_info->Input("X").front();
-  auto x_type = kernel->GetInputDeclType("X");
-  CHECK(x_type->precision() == PRECISION(kFloat));
-  CHECK(x_type->layout() == DATALAYOUT(kNCHW));
  auto x = scope->FindMutableTensor(x_name);
  auto x_dims = x->dims();
  auto out_name = op_info->Output("Out").front();
-  auto out_type = kernel->GetOutputDeclType("Out");
-  CHECK(out_type->precision() == PRECISION(kFloat));
-  CHECK(out_type->layout() == DATALAYOUT(kNCHW));
  // X node
  std::shared_ptr<Node> x_node = nullptr;

--- a/lite/kernels/npu/bridges/square_op.cc
+++ b/lite/kernels/npu/bridges/square_op.cc
@@ -32,15 +32,9 @@ int SquareConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  // Get input and output vars and op attributes
  auto x_name = op_info->Input("X").front();
-  auto x_type = kernel->GetInputDeclType("X");
-  CHECK(x_type->precision() == PRECISION(kFloat));
-  CHECK(x_type->layout() == DATALAYOUT(kNCHW));
  auto x = scope->FindMutableTensor(x_name);
  auto x_dims = x->dims();
  auto out_name = op_info->Output("Out").front();
-  auto out_type = kernel->GetOutputDeclType("Out");
-  CHECK(out_type->precision() == PRECISION(kFloat));
-  CHECK(out_type->layout() == DATALAYOUT(kNCHW));
  // X node
  std::shared_ptr<Node> x_node = nullptr;

--- a/lite/kernels/npu/bridges/transpose_op.cc
+++ b/lite/kernels/npu/bridges/transpose_op.cc
@@ -32,16 +32,10 @@ int TransposeConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  // Get input and output vars and op attributes
  auto x_name = op_info->Input("X").front();
-  auto x_type = kernel->GetInputDeclType("X");
-  CHECK(x_type->precision() == PRECISION(kFloat));
-  CHECK(x_type->layout() == DATALAYOUT(kNCHW));
  auto x = scope->FindMutableTensor(x_name);
  auto x_dims = x->dims();
  auto out_name = op_info->Output("Out").front();
-  auto out_type = kernel->GetOutputDeclType("Out");
-  CHECK(out_type->precision() == PRECISION(kFloat));
-  CHECK(out_type->layout() == DATALAYOUT(kNCHW));
  auto axis = op_info->GetAttr<std::vector<int>>("axis");

--- a/lite/kernels/npu/bridges/unsqueeze_op.cc
+++ b/lite/kernels/npu/bridges/unsqueeze_op.cc
@@ -31,14 +31,10 @@ int UnsqueezeConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  VLOG(3) << "[NPU] Converting " << op_type << "... ";
  auto x_name = op_info->Input("X").front();
-  auto x_type = kernel->GetInputDeclType("X");
-  CHECK(x_type->layout() == DATALAYOUT(kNCHW));
  auto x = scope->FindMutableTensor(x_name);
  auto x_dims = x->dims();
  auto out_name = op_info->Output("Out").front();
-  auto out_type = kernel->GetOutputDeclType("Out");
-  CHECK(out_type->layout() == DATALAYOUT(kNCHW));
  auto out_shape = scope->FindTensor(out_name)->dims().Vectorize();
  CHECK(op_info->HasAttr("axes"))
      << "[NPU] unsqueeze not support axes from tensor now";

--- a/lite/kernels/npu/subgraph_compute.cc
+++ b/lite/kernels/npu/subgraph_compute.cc
@@ -177,6 +177,7 @@ int SubgraphEngine::BuildDeviceProgram() {
                   << PrecisionToStr(precision);
        break;
    }
+    /*
    if (!subgraph::npu::CheckShape(origin_odims_[i], device_odims[i])) {
      LOG(WARNING) << "origin and device output's dims are mismatched.";
      for (int j = 0; j < origin_odims_[i].size(); j++) {
@@ -190,6 +191,7 @@ int SubgraphEngine::BuildDeviceProgram() {
                   << device_odims[i].GetWidth() << "}";
      return subgraph::FAILED;
    }
+    */
    device_otensors_[i].reset(new hiai::AiTensor);
    device_otensors_[i]->Init(&(device_odims[i]));
  }

--- a/lite/kernels/xpu/bridges/act_op.cc
+++ b/lite/kernels/xpu/bridges/act_op.cc
@@ -32,15 +32,9 @@ int ActConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  // Get input and output vars and op attributes
  auto x_name = op_info->Input("X").front();
-  auto x_type = kernel->GetInputDeclType("X");
-  CHECK(x_type->precision() == PRECISION(kFloat));
-  CHECK(x_type->layout() == DATALAYOUT(kNCHW));
  auto x = scope->FindMutableTensor(x_name);
  auto x_dims = x->dims();
  auto out_name = op_info->Output("Out").front();
-  auto out_type = kernel->GetOutputDeclType("Out");
-  CHECK(out_type->precision() == PRECISION(kFloat));
-  CHECK(out_type->layout() == DATALAYOUT(kNCHW));
  // X node
  std::shared_ptr<Node> x_node = nullptr;

--- a/lite/kernels/xpu/bridges/batch_norm_op.cc
+++ b/lite/kernels/xpu/bridges/batch_norm_op.cc
@@ -32,40 +32,22 @@ int BatchNormConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  // Get input and output vars and op attributes
  auto x_name = op_info->Input("X").front();
-  auto x_type = kernel->GetInputDeclType("X");
-  CHECK(x_type->precision() == PRECISION(kFloat));
-  CHECK(x_type->layout() == DATALAYOUT(kNCHW));
  auto x = scope->FindMutableTensor(x_name);
  auto x_dims = x->dims();
  auto scale_name = op_info->Input("Scale").front();
-  auto scale_type = kernel->GetInputDeclType("Scale");
-  CHECK(scale_type->precision() == PRECISION(kFloat));
-  CHECK(scale_type->layout() == DATALAYOUT(kNCHW));
  auto scale = scope->FindMutableTensor(scale_name);
  auto bias_name = op_info->Input("Bias").front();
-  auto bias_type = kernel->GetInputDeclType("Bias");
-  CHECK(bias_type->precision() == PRECISION(kFloat));
-  CHECK(bias_type->layout() == DATALAYOUT(kNCHW));
  auto bias = scope->FindMutableTensor(bias_name);
  auto mean_name = op_info->Input("Mean").front();
-  auto mean_type = kernel->GetInputDeclType("Mean");
-  CHECK(mean_type->precision() == PRECISION(kFloat));
-  CHECK(mean_type->layout() == DATALAYOUT(kNCHW));
  auto mean = scope->FindMutableTensor(mean_name);
  auto variance_name = op_info->Input("Variance").front();
-  auto variance_type = kernel->GetInputDeclType("Variance");
-  CHECK(variance_type->precision() == PRECISION(kFloat));
-  CHECK(variance_type->layout() == DATALAYOUT(kNCHW));
  auto variance = scope->FindMutableTensor(variance_name);
  auto y_name = op_info->Output("Y").front();
-  auto y_type = kernel->GetOutputDeclType("Y");
-  CHECK(y_type->precision() == PRECISION(kFloat));
-  CHECK(y_type->layout() == DATALAYOUT(kNCHW));
  auto epsilon = op_info->GetAttr<float>("epsilon");

--- a/lite/kernels/xpu/bridges/conv_op.cc
+++ b/lite/kernels/xpu/bridges/conv_op.cc
@@ -33,21 +33,12 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  // Get input and output vars and op attributes
  auto input_name = op_info->Input("Input").front();
-  auto input_type = kernel->GetInputDeclType("Input");
-  CHECK(input_type->precision() == PRECISION(kFloat));
-  CHECK(input_type->layout() == DATALAYOUT(kNCHW));
  auto input = scope->FindMutableTensor(input_name);
  auto input_dims = input->dims();
  auto filter_name = op_info->Input("Filter").front();
-  auto filter_type = kernel->GetInputDeclType("Filter");
-  CHECK(filter_type->precision() == PRECISION(kFloat));
-  CHECK(filter_type->layout() == DATALAYOUT(kNCHW));
  auto filter = scope->FindMutableTensor(filter_name);
  auto filter_dims = filter->dims();
  auto output_name = op_info->Output("Output").front();
-  auto output_type = kernel->GetOutputDeclType("Output");
-  CHECK(output_type->precision() == PRECISION(kFloat));
-  CHECK(output_type->layout() == DATALAYOUT(kNCHW));
  auto bs = input_dims[0];
  auto oc = filter_dims[0];
  CHECK_EQ(input_dims.size(), 4);
@@ -125,9 +116,6 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  // 2: {n, oc, oh, ow}
  if (HasInputArg(op_info, scope, "Bias")) {
    auto bias_name = op_info->Input("Bias").front();
-    auto bias_type = kernel->GetInputDeclType("Bias");
-    CHECK(bias_type->precision() == PRECISION(kFloat));
-    CHECK(bias_type->layout() == DATALAYOUT(kNCHW));
    auto bias = scope->FindMutableTensor(bias_name);
    auto bias_dims = bias->dims();
    auto bias_data_size = bias_dims.production();

--- a/lite/kernels/xpu/bridges/dropout_op.cc
+++ b/lite/kernels/xpu/bridges/dropout_op.cc
@@ -32,15 +32,9 @@ int DropoutConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  // Get input and output vars and op attributes
  auto x_name = op_info->Input("X").front();
-  auto x_type = kernel->GetInputDeclType("X");
-  CHECK(x_type->precision() == PRECISION(kFloat));
-  CHECK(x_type->layout() == DATALAYOUT(kNCHW));
  auto x = scope->FindMutableTensor(x_name);
  auto x_dims = x->dims();
  auto out_name = op_info->Output("Out").front();
-  auto out_type = kernel->GetOutputDeclType("Out");
-  CHECK(out_type->precision() == PRECISION(kFloat));
-  CHECK(out_type->layout() == DATALAYOUT(kNCHW));
  auto dropout_prob = op_info->GetAttr<float>("dropout_prob");
  auto dropout_implementation =
      op_info->GetAttr<std::string>("dropout_implementation");

--- a/lite/kernels/xpu/bridges/elementwise_ops.cc
+++ b/lite/kernels/xpu/bridges/elementwise_ops.cc
@@ -32,21 +32,12 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  // Get input and output vars and op attributes
  auto x_name = op_info->Input("X").front();
-  auto x_type = kernel->GetInputDeclType("X");
-  CHECK(x_type->precision() == PRECISION(kFloat));
-  CHECK(x_type->layout() == DATALAYOUT(kNCHW));
  auto x = scope->FindMutableTensor(x_name);
  auto x_dims = x->dims();
  auto y_name = op_info->Input("Y").front();
-  auto y_type = kernel->GetInputDeclType("Y");
-  CHECK(y_type->precision() == PRECISION(kFloat));
-  CHECK(y_type->layout() == DATALAYOUT(kNCHW));
  auto y = scope->FindMutableTensor(y_name);
  auto y_dims = y->dims();
  auto out_name = op_info->Output("Out").front();
-  auto out_type = kernel->GetOutputDeclType("Out");
-  CHECK(out_type->precision() == PRECISION(kFloat));
-  CHECK(out_type->layout() == DATALAYOUT(kNCHW));
  auto axis = op_info->GetAttr<int>("axis");
  // X node

--- a/lite/kernels/xpu/bridges/gather_op.cc
+++ b/lite/kernels/xpu/bridges/gather_op.cc
@@ -32,24 +32,14 @@ int GatherConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  // Get input and output vars and op attributes
  auto x_name = op_info->Input("X").front();
-  auto x_type = kernel->GetInputDeclType("X");
-  CHECK(x_type->precision() == PRECISION(kFloat));
-  CHECK(x_type->layout() == DATALAYOUT(kNCHW));
  auto x = scope->FindMutableTensor(x_name);
  auto x_dims = x->dims();
  auto index_name = op_info->Input("Index").front();
-  auto index_type = kernel->GetInputDeclType("Index");
-  CHECK(index_type->precision() == PRECISION(kInt32) ||
-        index_type->precision() == PRECISION(kInt64));
-  CHECK(index_type->layout() == DATALAYOUT(kNCHW));
  auto index = scope->FindMutableTensor(index_name);
  auto index_dims = index->dims();
  CHECK(index_dims.size() == 1 ||
        (index_dims.size() == 2 && index_dims[1] == 1));
  auto out_name = op_info->Output("Out").front();
-  auto out_type = kernel->GetOutputDeclType("Out");
-  CHECK(out_type->precision() == PRECISION(kFloat));
-  CHECK(out_type->layout() == DATALAYOUT(kNCHW));
  auto out = scope->FindMutableTensor(out_name);
  auto out_dims = out->dims();

--- a/lite/kernels/xpu/bridges/layer_norm_op.cc
+++ b/lite/kernels/xpu/bridges/layer_norm_op.cc
@@ -32,15 +32,9 @@ int LayerNormConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  // Get input and output vars and op attributes
  auto x_name = op_info->Input("X").front();
-  auto x_type = kernel->GetInputDeclType("X");
-  CHECK(x_type->precision() == PRECISION(kFloat));
-  CHECK(x_type->layout() == DATALAYOUT(kNCHW));
  auto x = scope->FindMutableTensor(x_name);
  auto x_dims = x->dims();
  auto y_name = op_info->Output("Y").front();
-  auto y_type = kernel->GetOutputDeclType("Y");
-  CHECK(y_type->precision() == PRECISION(kFloat));
-  CHECK(y_type->layout() == DATALAYOUT(kNCHW));
  auto y = scope->FindMutableTensor(y_name);
  auto y_dims = y->dims();
  auto epsilon = op_info->GetAttr<float>("epsilon");
@@ -70,9 +64,6 @@ int LayerNormConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  std::shared_ptr<Node> scale_node = nullptr;
  if (HasInputArg(op_info, scope, "Scale")) {
    auto scale_name = op_info->Input("Scale").front();
-    auto scale_type = kernel->GetInputDeclType("Scale");
-    CHECK(scale_type->precision() == PRECISION(kFloat));
-    CHECK(scale_type->layout() == DATALAYOUT(kNCHW));
    auto scale = scope->FindMutableTensor(scale_name);
    auto scale_dims = scale->dims();
    CHECK_EQ(scale_dims.size(), 1);
@@ -86,9 +77,6 @@ int LayerNormConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  std::shared_ptr<Node> bias_node = nullptr;
  if (HasInputArg(op_info, scope, "Bias")) {
    auto bias_name = op_info->Input("Bias").front();
-    auto bias_type = kernel->GetInputDeclType("Bias");
-    CHECK(bias_type->precision() == PRECISION(kFloat));
-    CHECK(bias_type->layout() == DATALAYOUT(kNCHW));
    auto bias = scope->FindMutableTensor(bias_name);
    auto bias_dims = bias->dims();
    CHECK_EQ(bias_dims.size(), 1);

--- a/lite/kernels/xpu/bridges/lookup_table_op.cc
+++ b/lite/kernels/xpu/bridges/lookup_table_op.cc
@@ -32,22 +32,13 @@ int LookupTableConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  // Get input and output vars and op attributes
  auto ids_name = op_info->Input("Ids").front();
-  auto ids_type = kernel->GetInputDeclType("Ids");
-  CHECK(ids_type->precision() == PRECISION(kInt64));
-  CHECK(ids_type->layout() == DATALAYOUT(kNCHW));
  auto ids = scope->FindMutableTensor(ids_name);
  auto ids_dims = ids->dims();
  auto w_name = op_info->Input("W").front();
-  auto w_type = kernel->GetInputDeclType("W");
-  CHECK(w_type->precision() == PRECISION(kFloat));
-  CHECK(w_type->layout() == DATALAYOUT(kNCHW));
  auto w = scope->FindMutableTensor(w_name);
  auto w_dims = w->dims();
  CHECK_EQ(w_dims.size(), 2);
  auto out_name = op_info->Output("Out").front();
-  auto out_type = kernel->GetOutputDeclType("Out");
-  CHECK(out_type->precision() == PRECISION(kFloat));
-  CHECK(out_type->layout() == DATALAYOUT(kNCHW));
  auto out = scope->FindMutableTensor(out_name);
  auto out_dims = out->dims();
  auto padding_idx = op_info->GetAttr<int64_t>("padding_idx");

--- a/lite/kernels/xpu/bridges/matmul_op.cc
+++ b/lite/kernels/xpu/bridges/matmul_op.cc
@@ -32,23 +32,14 @@ int MatmulConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  // Get input and output vars and op attributes
  auto x_name = op_info->Input("X").front();
-  auto x_type = kernel->GetInputDeclType("X");
-  CHECK(x_type->precision() == PRECISION(kFloat));
-  CHECK(x_type->layout() == DATALAYOUT(kNCHW));
  auto x = scope->FindMutableTensor(x_name);
  auto x_dims = x->dims();
  auto y_name = op_info->Input("Y").front();
-  auto y_type = kernel->GetInputDeclType("Y");
-  CHECK(y_type->precision() == PRECISION(kFloat));
-  CHECK(y_type->layout() == DATALAYOUT(kNCHW));
  auto y = scope->FindMutableTensor(y_name);
  auto y_dims = y->dims();
  auto out_name = op_info->Output("Out").front();
-  auto out_type = kernel->GetOutputDeclType("Out");
-  CHECK(out_type->precision() == PRECISION(kFloat));
-  CHECK(out_type->layout() == DATALAYOUT(kNCHW));
  auto out = scope->FindMutableTensor(out_name);
  auto out_dims = out->dims();

--- a/lite/kernels/xpu/bridges/mul_op.cc
+++ b/lite/kernels/xpu/bridges/mul_op.cc
@@ -32,21 +32,12 @@ int MulConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  // Get input and output vars and op attributes
  auto x_name = op_info->Input("X").front();
-  auto x_type = kernel->GetInputDeclType("X");
-  CHECK(x_type->precision() == PRECISION(kFloat));
-  CHECK(x_type->layout() == DATALAYOUT(kNCHW));
  auto x = scope->FindMutableTensor(x_name);
  auto x_dims = x->dims();
  auto y_name = op_info->Input("Y").front();
-  auto y_type = kernel->GetInputDeclType("Y");
-  CHECK(y_type->precision() == PRECISION(kFloat));
-  CHECK(y_type->layout() == DATALAYOUT(kNCHW));
  auto y = scope->FindMutableTensor(y_name);
  auto y_dims = y->dims();
  auto out_name = op_info->Output("Out").front();
-  auto out_type = kernel->GetOutputDeclType("Out");
-  CHECK(out_type->precision() == PRECISION(kFloat));
-  CHECK(out_type->layout() == DATALAYOUT(kNCHW));
  auto out = scope->FindMutableTensor(out_name);
  auto out_dims = out->dims();
  auto x_num_col_dims = op_info->GetAttr<int>("x_num_col_dims");

--- a/lite/kernels/xpu/bridges/pool_op.cc
+++ b/lite/kernels/xpu/bridges/pool_op.cc
@@ -32,15 +32,9 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  // Get input, and attributes
  auto x_name = op_info->Input("X").front();
-  auto x_type = kernel->GetInputDeclType("X");
-  CHECK(x_type->precision() == PRECISION(kFloat));
-  CHECK(x_type->layout() == DATALAYOUT(kNCHW));
  auto x = scope->FindMutableTensor(x_name);
  auto x_dims = x->dims();
  auto out_name = op_info->Output("Out").front();
-  auto out_type = kernel->GetOutputDeclType("Out");
-  CHECK(out_type->precision() == PRECISION(kFloat));
-  CHECK(out_type->layout() == DATALAYOUT(kNCHW));
  auto pooling_type = op_info->GetAttr<std::string>("pooling_type");
  auto ceil_mode = op_info->GetAttr<bool>("ceil_mode");
  auto paddings = op_info->GetAttr<std::vector<int>>("paddings");

--- a/lite/kernels/xpu/bridges/reshape_op.cc
+++ b/lite/kernels/xpu/bridges/reshape_op.cc
@@ -48,9 +48,6 @@ int ReshapeConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  std::vector<int> shape;
  if (HasInputArg(op_info, scope, "ShapeTensor")) {
    auto shape_tensor_names = op_info->Input("ShapeTensor");
-    // auto shape_tensor_type = kernel->GetInputDeclType("ShapeTensor");
-    // CHECK(shape_tensor_type->precision() == PRECISION(kInt32));
-    // CHECK(shape_tensor_type->layout() == DATALAYOUT(kNCHW));
    for (auto shape_tensor_name : shape_tensor_names) {
      auto shape_tensor = scope->FindMutableTensor(shape_tensor_name);
      CHECK(shape_tensor->persistable());
@@ -64,9 +61,6 @@ int ReshapeConverter(void* ctx, OpLite* op, KernelBase* kernel) {
        << shape.size();
  } else if (HasInputArg(op_info, scope, "Shape")) {
    auto actual_shape_name = op_info->Input("Shape").front();
-    // auto actual_shape_type = kernel->GetInputDeclType("Shape");
-    // CHECK(actual_shape_type->precision() == PRECISION(kInt32));
-    // CHECK(actual_shape_type->layout() == DATALAYOUT(kNCHW));
    auto actual_shape = scope->FindMutableTensor(actual_shape_name);
    CHECK(actual_shape->persistable());
    auto actual_shape_dims = actual_shape->dims();

--- a/lite/kernels/xpu/bridges/scale_op.cc
+++ b/lite/kernels/xpu/bridges/scale_op.cc
@@ -32,15 +32,9 @@ int ScaleConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  // Get input and output vars and op attributes
  auto x_name = op_info->Input("X").front();
-  auto x_type = kernel->GetInputDeclType("X");
-  CHECK(x_type->precision() == PRECISION(kFloat));
-  CHECK(x_type->layout() == DATALAYOUT(kNCHW));
  auto x = scope->FindMutableTensor(x_name);
  auto x_dims = x->dims();
  auto out_name = op_info->Output("Out").front();
-  auto out_type = kernel->GetOutputDeclType("Out");
-  CHECK(out_type->precision() == PRECISION(kFloat));
-  CHECK(out_type->layout() == DATALAYOUT(kNCHW));
  float scale = op_info->GetAttr<float>("scale");
  bool bias_after_scale = op_info->GetAttr<bool>("bias_after_scale");
  float bias = op_info->GetAttr<float>("bias");

--- a/lite/kernels/xpu/bridges/slice_op.cc
+++ b/lite/kernels/xpu/bridges/slice_op.cc
@@ -32,15 +32,9 @@ int SliceConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  // Get input, output and op attributes
  auto input_name = op_info->Input("Input").front();
-  auto input_type = kernel->GetInputDeclType("Input");
-  CHECK(input_type->precision() == PRECISION(kFloat));
-  CHECK(input_type->layout() == DATALAYOUT(kNCHW));
  auto input = scope->FindMutableTensor(input_name);
  auto input_dims = input->dims();
  auto out_name = op_info->Output("Out").front();
-  auto out_type = kernel->GetOutputDeclType("Out");
-  CHECK(out_type->precision() == PRECISION(kFloat));
-  CHECK(out_type->layout() == DATALAYOUT(kNCHW));
  auto axes = op_info->GetAttr<std::vector<int>>("axes");
  auto starts = op_info->GetAttr<std::vector<int>>("starts");
  auto ends = op_info->GetAttr<std::vector<int>>("ends");

--- a/lite/kernels/xpu/bridges/softmax_op.cc
+++ b/lite/kernels/xpu/bridges/softmax_op.cc
@@ -32,15 +32,9 @@ int SoftmaxConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  // Get input and output vars and op attributes
  auto x_name = op_info->Input("X").front();
-  auto x_type = kernel->GetInputDeclType("X");
-  CHECK(x_type->precision() == PRECISION(kFloat));
-  CHECK(x_type->layout() == DATALAYOUT(kNCHW));
  auto x = scope->FindMutableTensor(x_name);
  auto x_dims = x->dims();
  auto out_name = op_info->Output("Out").front();
-  auto out_type = kernel->GetOutputDeclType("Out");
-  CHECK(out_type->precision() == PRECISION(kFloat));
-  CHECK(out_type->layout() == DATALAYOUT(kNCHW));
  int axis = op_info->HasAttr("axis") ? op_info->GetAttr<int>("axis") : -1;
  // X node

--- a/lite/kernels/xpu/bridges/stack_op.cc
+++ b/lite/kernels/xpu/bridges/stack_op.cc
@@ -32,13 +32,7 @@ int StackConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  // Get input and output vars and op attributes
  auto x_names = op_info->Input("X");
-  auto x_type = kernel->GetInputDeclType("X");
-  CHECK(x_type->precision() == PRECISION(kFloat));
-  CHECK(x_type->layout() == DATALAYOUT(kNCHW));
  auto y_name = op_info->Output("Y").front();
-  auto y_type = kernel->GetOutputDeclType("Y");
-  CHECK(y_type->precision() == PRECISION(kFloat));
-  CHECK(y_type->layout() == DATALAYOUT(kNCHW));
  int axis = op_info->GetAttr<int>("axis");
  // X nodes

--- a/lite/kernels/xpu/bridges/transpose_op.cc
+++ b/lite/kernels/xpu/bridges/transpose_op.cc
@@ -32,15 +32,9 @@ int TransposeConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  // Get input and output vars and op attributes
  auto x_name = op_info->Input("X").front();
-  auto x_type = kernel->GetInputDeclType("X");
-  CHECK(x_type->precision() == PRECISION(kFloat));
-  CHECK(x_type->layout() == DATALAYOUT(kNCHW));
  auto x = scope->FindMutableTensor(x_name);
  auto x_dims = x->dims();
  auto out_name = op_info->Output("Out").front();
-  auto out_type = kernel->GetOutputDeclType("Out");
-  CHECK(out_type->precision() == PRECISION(kFloat));
-  CHECK(out_type->layout() == DATALAYOUT(kNCHW));
  auto axis = op_info->GetAttr<std::vector<int>>("axis");
  // X node