diff --git a/lite/core/mir/quantized_op_attributes_inference_pass.cc b/lite/core/mir/quantized_op_attributes_inference_pass.cc index 2cc897b3986653c81ea8f3a6e23c384b61bb0f6f..259447aa21b76261a266a243dcc9c2a7530c9dc5 100644 --- a/lite/core/mir/quantized_op_attributes_inference_pass.cc +++ b/lite/core/mir/quantized_op_attributes_inference_pass.cc @@ -38,12 +38,12 @@ void QuantizedOpAttributesInferencePass::Apply( auto op_info = inst.op_info(); auto op_type = op_info->Type(); - // Check only if all of the inputs of the op have scale value - bool has_input_scale = true; + // Check if any of the inputs of the op have scale value + bool has_input_scale = false; for (auto in_var_node : op_node->inlinks) { CHECK(in_var_node->IsArg()); auto in_var_node_name = in_var_node->arg()->name; - has_input_scale &= op_info->HasInputScale(in_var_node_name); + has_input_scale |= op_info->HasInputScale(in_var_node_name); } if (!has_input_scale) continue; @@ -52,32 +52,31 @@ void QuantizedOpAttributesInferencePass::Apply( bool is_quantized = true; for (auto out_var_node : op_node->outlinks) { CHECK(out_var_node->IsArg()); - bool found = false; - float output_scale; + std::vector output_scale; + bool has_output_scale = false; auto out_var_node_name = out_var_node->arg()->name; for (auto out_op_node : out_var_node->outlinks) { CHECK(out_op_node->IsStmt()); auto& out_inst = out_op_node->AsStmt(); auto out_op_info = out_inst.op_info(); if (!out_op_info->HasInputScale(out_var_node_name)) continue; - auto input_scale = out_op_info->GetInputScale(out_var_node_name)[0]; - if (!found) { - found = true; + auto input_scale = out_op_info->GetInputScale(out_var_node_name); + if (!has_output_scale) { output_scale = input_scale; + has_output_scale = true; } else { - CHECK_EQ(output_scale, input_scale); + CHECK_EQ(output_scale.size(), input_scale.size()); } } - if (found) { - inst.mutable_op_info()->SetOutputScale(out_var_node_name, - {output_scale}); + if (has_output_scale) { + inst.mutable_op_info()->SetOutputScale(out_var_node_name, output_scale); } else if (op_info->HasAttr("out_threshold")) { // Only consider one output, there are only one out_threshold int bit_length = op_info->GetAttr("bit_length"); int range = (1 << (bit_length - 1)) - 1; - output_scale = op_info->GetAttr("out_threshold"); - inst.mutable_op_info()->SetOutputScale(out_var_node_name, - {output_scale / range}); + output_scale = std::vector{ + op_info->GetAttr("out_threshold") / range}; + inst.mutable_op_info()->SetOutputScale(out_var_node_name, output_scale); } else { is_quantized = false; } diff --git a/lite/core/mir/subgraph/subgraph_detector.cc b/lite/core/mir/subgraph/subgraph_detector.cc index c5220bb33c7cada3972ef2a2f8329d0d485fbb83..928eb049d7694658b6bccc332f3ac64900e7da56 100644 --- a/lite/core/mir/subgraph/subgraph_detector.cc +++ b/lite/core/mir/subgraph/subgraph_detector.cc @@ -452,39 +452,6 @@ void SubgraphFuser::InsertNewNode(SSAGraph *graph, subgraph_op_desc.SetAttr>("output_data_names", output_var_names); - // Set input/output scale values of input/output var nodes for - // type_precision_cast_pass. - std::vector input_data_scales; - std::vector output_data_scales; - for (auto &var_node : input_var_nodes) { - auto var_node_name = var_node->arg()->name; - auto any_op_node = var_node->outlinks.front(); - CHECK(any_op_node->IsStmt()); - auto &any_inst = any_op_node->AsStmt(); - if (any_inst.op_info()->HasInputScale(var_node_name)) { - input_data_scales.push_back( - any_inst.op_info()->GetInputScale(var_node_name)[0]); - } - } - for (auto &var_node : output_var_nodes) { - auto var_node_name = var_node->arg()->name; - auto any_op_node = var_node->inlinks.front(); - CHECK(any_op_node->IsStmt()); - auto &any_inst = any_op_node->AsStmt(); - if (any_inst.op_info()->HasOutputScale(var_node_name)) { - output_data_scales.push_back( - any_inst.op_info()->GetOutputScale(var_node_name)[0]); - } - } - if (input_data_scales.size() > 0) { - subgraph_op_desc.SetAttr>("input_data_scales", - input_data_scales); - } - if (output_data_scales.size() > 0) { - subgraph_op_desc.SetAttr>("output_data_scales", - output_data_scales); - } - // Set all of the inputs and outputs to the target subgraph op // To prevent vars are removed in RuntimeProgram::UpdateVarsOfProgram() for (auto &var_node : weight_var_nodes) { @@ -504,6 +471,29 @@ void SubgraphFuser::InsertNewNode(SSAGraph *graph, auto any_op = (*subgraph_nodes.begin())->AsStmt().op(); subgraph_op->Attach(subgraph_op_desc, any_op->scope()); + // Export the scale values of the input/output var nodes of the inner op nodes + // only for type_precision_cast_pass. + for (auto &var_node : input_var_nodes) { + auto var_node_name = var_node->arg()->name; + auto any_op_node = var_node->outlinks.front(); + CHECK(any_op_node->IsStmt()); + auto &any_inst = any_op_node->AsStmt(); + if (any_inst.op_info()->HasInputScale(var_node_name)) { + subgraph_op->mutable_op_info()->SetInputScale( + var_node_name, any_inst.op_info()->GetInputScale(var_node_name)); + } + } + for (auto &var_node : output_var_nodes) { + auto var_node_name = var_node->arg()->name; + auto any_op_node = var_node->inlinks.front(); + CHECK(any_op_node->IsStmt()); + auto &any_inst = any_op_node->AsStmt(); + if (any_inst.op_info()->HasOutputScale(var_node_name)) { + subgraph_op->mutable_op_info()->SetOutputScale( + var_node_name, any_inst.op_info()->GetOutputScale(var_node_name)); + } + } + // Create and add a new subgraph node into the graph auto subgraph_op_node = graph->GraphCreateInstructNode(subgraph_op, any_op->valid_places()); diff --git a/lite/core/mir/type_precision_cast_pass.cc b/lite/core/mir/type_precision_cast_pass.cc index e46e997f0ccc2c515a72cd23425f27330050287b..39a94cbca6bd6222da5da1d314ea07475592bf0e 100644 --- a/lite/core/mir/type_precision_cast_pass.cc +++ b/lite/core/mir/type_precision_cast_pass.cc @@ -66,65 +66,30 @@ void UpdateInputs(OpLite* op, const std::string& from, const std::string& to) { } } -// Infer the scale value for the new calib op from the subgraph op -static bool InferScaleFromSubgraph(std::string var_name, - const OpInfo* op_info, - float* scale, - bool reverse = false) { - std::string attr_name = reverse ? "output_data_names" : "input_data_names"; - if (!op_info->HasAttr(attr_name)) return false; - auto input_or_output_names = - op_info->GetAttr>(attr_name); - attr_name = reverse ? "output_data_scales" : "input_data_scales"; - if (!op_info->HasAttr(attr_name)) return false; - auto input_or_output_scales = op_info->GetAttr>(attr_name); - auto size = input_or_output_names.size(); - CHECK(size == input_or_output_scales.size()); - for (size_t i = 0; i < size; i++) { - if (input_or_output_names[i] == var_name) { - *scale = input_or_output_scales[i]; - return true; - } - } - return false; -} - // Infer the scale value for the new calib op from the input_scale of the // current op and output_scale of the previous op. // case 1: prev_op->var_node->op_node(int8->any op, with input_scale). -// case 2: prev_op->var_node->op_node(subgraph op, int8->any, with -// input_data_scales). -// case 3: prev_op(any->int8, with output_scale)->var_node->op_node(fp32->any, +// case 2: prev_op(any->int8, with output_scale)->var_node->op_node(fp32->any, // without input_scale). -// case 4: prev_op(any->int8, subgraph_op, with -// output_data_scales)->var_node->op_node(fp32->any, without input_scale). static bool InferScale(Node* var_node, Node* op_node, float* scale) { bool found = false; auto& inst = op_node->AsStmt(); auto op_info = inst.op_info(); auto op_type = op_info->Type(); auto var_name = var_node->AsArg().name; - if (op_type == "subgraph") { - found = InferScaleFromSubgraph(var_name, op_info, scale, false); + if (op_info->HasInputScale(var_name)) { + *scale = op_info->GetInputScale(var_name)[0]; + found = true; } else { - if (op_info->HasInputScale(var_name)) { - *scale = op_info->GetInputScale(var_name)[0]; + // Obtain the output_scale from one of its previous Ops + auto prev_op_node = var_node->inlinks.front(); + CHECK(prev_op_node->IsStmt()); + auto& prev_inst = prev_op_node->AsStmt(); + auto prev_op_info = prev_inst.op_info(); + auto prev_op_type = prev_op_info->Type(); + if (prev_op_info->HasOutputScale(var_name)) { + *scale = prev_op_info->GetOutputScale(var_name)[0]; found = true; - } else { - // Obtain the output_scale from one of its previous Ops - auto prev_op_node = var_node->inlinks.front(); - CHECK(prev_op_node->IsStmt()); - auto& prev_inst = prev_op_node->AsStmt(); - auto prev_op_info = prev_inst.op_info(); - auto prev_op_type = prev_op_info->Type(); - if (prev_op_type == "subgraph") { - found = InferScaleFromSubgraph(var_name, prev_op_info, scale, true); - } else { - if (prev_op_info->HasOutputScale(var_name)) { - *scale = prev_op_info->GetOutputScale(var_name)[0]; - found = true; - } - } } } return found; diff --git a/lite/kernels/apu/bridges/conv_op.cc b/lite/kernels/apu/bridges/conv_op.cc index 990bf6a27673daef3cf98539946da70172742140..bf5e313180d9d8089b29f993384bd243b2a5ed05 100644 --- a/lite/kernels/apu/bridges/conv_op.cc +++ b/lite/kernels/apu/bridges/conv_op.cc @@ -35,6 +35,9 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) { int neuron_errCode; VLOG(3) << "[APU] Converting [" << op_type << "]"; + CHECK(op_info->HasAttr("enable_int8") && + op_info->GetAttr("enable_int8")); + // Get input and output vars and op attributes auto input_name = op_info->Input("Input").front(); auto input = scope->FindMutableTensor(input_name); @@ -94,34 +97,18 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) { input_dims, filter_dims); - float input_scale; - float output_scale; - std::vector weight_scale; - if (op_info->HasAttr("enable_int8")) { - if (op_info->GetAttr("enable_int8")) { - auto input_name = op_info->Input("Input").front(); - auto filter_name = op_info->Input("Filter").front(); - auto output_name = op_info->Output("Output").front(); - if (op_info->HasInputScale(input_name)) - input_scale = op_info->GetInputScale(input_name)[0]; - if (op_info->HasInputScale(filter_name)) - weight_scale = op_info->GetInputScale(filter_name); - if (op_info->HasOutputScale(output_name)) { - output_scale = op_info->GetOutputScale(output_name)[0]; - } - VLOG(3) << "has output scale:" << output_scale; - } else { - return FAILED; - } - } else { - return FAILED; - } + CHECK(op_info->HasInputScale(input_name)); + auto input_scale = op_info->GetInputScale(input_name)[0]; + CHECK(op_info->HasInputScale(filter_name)); + auto filter_scale = op_info->GetInputScale(filter_name); + CHECK(op_info->HasOutputScale(output_name)); + auto output_scale = op_info->GetOutputScale(output_name)[0]; VLOG(3) << "strides.size(): " << strides.size() << " ,groups: " << groups << " ,dilations: " << dilations[0] << ":" << dilations[1]; VLOG(3) << "with_act: " << with_act << " ,act_type:" << act_type; VLOG(3) << "input_dims: " << input_dims << " ,output_dims: " << output_dims - << " ,weight_scale size: " << weight_scale.size(); + << " ,filter_scale size: " << filter_scale.size(); VLOG(3) << "filter_dims: " << filter_dims << " ,memory_size: " << filter->memory_size() << " ,data_size: " << filter->data_size(); @@ -220,10 +207,10 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) { NeuronOperandType filterType; NeuronOperandType channelFilterType; NeuronSymmPerChannelQuantParams symmPerChannelQuantParams; - if (1 == weight_scale.size()) { + if (1 == filter_scale.size()) { // Per layer type filterType.type = NEURON_TENSOR_QUANT8_ASYMM; - filterType.scale = weight_scale[0]; + filterType.scale = filter_scale[0]; filterType.zeroPoint = 128; filterType.dimensionCount = filter_dims.size(); filterType.dimensions = &dims_filter[0]; @@ -241,17 +228,17 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) { symmPerChannelQuantParams.channelDim = 3; else symmPerChannelQuantParams.channelDim = 0; - symmPerChannelQuantParams.scaleCount = weight_scale.size(); - symmPerChannelQuantParams.scales = weight_scale.data(); + symmPerChannelQuantParams.scaleCount = filter_scale.size(); + symmPerChannelQuantParams.scales = filter_scale.data(); biasType.scale = 0; } std::shared_ptr filter_node = nullptr; - if (1 == weight_scale.size()) { + if (1 == filter_scale.size()) { NeuronModel_addOperand(model, &filterType); // 1: filter filter_node = graph->Add(filter_name, dims_filter); - VLOG(3) << "filter node idx: " << filter_node->index() << "w_scale[0]" - << weight_scale[0] << ": filterType: " << filterType.dimensions[0] + VLOG(3) << "filter node idx: " << filter_node->index() << "filter_scale[0]" + << filter_scale[0] << ": filterType: " << filterType.dimensions[0] << ":" << filterType.dimensions[1] << ":" << filterType.dimensions[2] << ":" << filterType.dimensions[3]; memcpy(filter->mutable_data(), @@ -267,8 +254,8 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) { NeuronModel_addOperand(model, &channelFilterType); // 1: filter filter_node = graph->Add(filter_name, dims_filter); VLOG(3) << "chennel filter node idx: " << filter_node->index() - << " ,scale_count:" << weight_scale.size() - << " weight_scale[0]:" << weight_scale.data()[0] + << " ,scale_count:" << filter_scale.size() + << " filter_scale[0]:" << filter_scale.data()[0] << " ,channelFilterType: " << channelFilterType.dimensions[0] << ":" << channelFilterType.dimensions[1] << ":" << channelFilterType.dimensions[2] << ":" @@ -302,7 +289,6 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) { std::shared_ptr bias_node = nullptr; if (HasInputArg(op_info, scope, "Bias")) { auto bias_name = op_info->Input("Bias").front(); - auto bias_type = kernel->GetInputDeclType("Bias"); auto bias = scope->FindMutableTensor(bias_name); auto bias_dims = bias->dims(); @@ -368,10 +354,7 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) { // Add output tensor type NeuronOperandType outType; outType.type = NEURON_TENSOR_QUANT8_ASYMM; - if (graph->IsOutput(output_name)) - outType.scale = output_scale / 127; - else - outType.scale = output_scale; + outType.scale = output_scale; outType.zeroPoint = 128; outType.dimensionCount = output_dims.size(); std::vector dims_out = {(uint32_t)output_dims[0], @@ -405,7 +388,7 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) { int32_t* int32_bias_data = reinterpret_cast(bias->mutable_data()); float2int32( - bias->data(), input_scale, weight_scale, int32_bias_data); + bias->data(), input_scale, filter_scale, int32_bias_data); VLOG(3) << "int32_bias_data: " << int32_bias_data[0] << " : " << int32_bias_data[1] << " : " << int32_bias_data[2] << " : " diff --git a/lite/kernels/apu/bridges/fc_op.cc b/lite/kernels/apu/bridges/fc_op.cc index 989ecb32d0550905c2f3fba41369e9a0b7c71e7b..106ce2c16f3fd287a27c92179fa3a429c7be57c8 100644 --- a/lite/kernels/apu/bridges/fc_op.cc +++ b/lite/kernels/apu/bridges/fc_op.cc @@ -31,6 +31,10 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) { auto scope = op->scope(); VLOG(3) << "[APU] Converting [" + op_type + "]"; + CHECK(op_info->HasAttr("enable_int8") && + op_info->GetAttr("enable_int8")); + + // Get input and output vars and op attributes auto input_name = op_info->Input("Input").front(); auto input = scope->FindMutableTensor(input_name); auto input_dims = input->dims(); @@ -52,26 +56,12 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) { << " out_dims: " << out_dims << " m: " << m << " k: " << k << " n: " << n; - float input_scale = 1.0f; - float out_scale = 1.0f; - std::vector w_scale; - if (op_info->HasAttr("enable_int8")) { - if (op_info->GetAttr("enable_int8")) { - auto input_name = op_info->Input("Input").front(); - auto weight_name = op_info->Input("W").front(); - auto out_name = op_info->Output("Out").front(); - if (op_info->HasInputScale(input_name)) - input_scale = op_info->GetInputScale(input_name)[0]; - if (op_info->HasInputScale(weight_name)) - w_scale = op_info->GetInputScale(weight_name); - if (op_info->HasOutputScale(out_name)) - out_scale = op_info->GetOutputScale(out_name)[0]; - } else { - return FAILED; - } - } else { - return FAILED; - } + CHECK(op_info->HasInputScale(input_name)); + auto input_scale = op_info->GetInputScale(input_name)[0]; + CHECK(op_info->HasInputScale(w_name)); + auto w_scale = op_info->GetInputScale(w_name); + CHECK(op_info->HasOutputScale(out_name)); + auto out_scale = op_info->GetOutputScale(out_name)[0]; // Add input tensor type NeuronOperandType inType; diff --git a/lite/kernels/apu/bridges/pool_op.cc b/lite/kernels/apu/bridges/pool_op.cc index b7c8bcc1999ea772cbc41304099edbe4713d4ace..b82f23beaf715e8c720ffc22792b804ff6c2c225 100644 --- a/lite/kernels/apu/bridges/pool_op.cc +++ b/lite/kernels/apu/bridges/pool_op.cc @@ -32,6 +32,9 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) { auto scope = op->scope(); VLOG(3) << "[APU] Converting [" + op_type + "] "; + CHECK(op_info->HasAttr("enable_int8") && + op_info->GetAttr("enable_int8")); + // Get input and output vars and op attributes auto x_name = op_info->Input("X").front(); auto x = scope->FindMutableTensor(x_name); @@ -87,24 +90,10 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) { ksize); // Add x tensor type - float x_scale = 1.0f; - float out_scale = 1.0f; - if (op_info->HasAttr("enable_int8")) { - if (op_info->GetAttr("enable_int8")) { - auto x_name = op_info->Input("X").front(); - auto out_name = op_info->Output("Out").front(); - if (op_info->HasInputScale(x_name)) - x_scale = op_info->GetInputScale(x_name)[0]; - if (op_info->HasOutputScale(out_name)) - out_scale = op_info->GetOutputScale(out_name)[0]; - } else { - LOG(WARNING) << "Do not enable_int8"; - return FAILED; - } - } else { - LOG(WARNING) << "Do not enable_int8"; - return FAILED; - } + CHECK(op_info->HasInputScale(x_name)); + auto x_scale = op_info->GetInputScale(x_name)[0]; + CHECK(op_info->HasOutputScale(out_name)); + auto out_scale = op_info->GetOutputScale(out_name)[0]; NeuronOperandType xType; xType.type = NEURON_TENSOR_QUANT8_ASYMM; diff --git a/lite/kernels/apu/bridges/softmax_op.cc b/lite/kernels/apu/bridges/softmax_op.cc index c43b3cfc3654b0ce9e14799adc86e450f1f25540..dec6d12307b50798d04f743064360aa6870acfa3 100644 --- a/lite/kernels/apu/bridges/softmax_op.cc +++ b/lite/kernels/apu/bridges/softmax_op.cc @@ -31,6 +31,9 @@ int SoftmaxConverter(void* ctx, OpLite* op, KernelBase* kernel) { auto scope = op->scope(); VLOG(3) << "[APU] Converting [" + op_type + "]"; + CHECK(op_info->HasAttr("enable_int8") && + op_info->GetAttr("enable_int8")); + // Get input and output vars and op attributes auto x_name = op_info->Input("X").front(); auto x = scope->FindMutableTensor(x_name); @@ -45,24 +48,10 @@ int SoftmaxConverter(void* ctx, OpLite* op, KernelBase* kernel) { axis += x_rank; } - float input_scale = 1.0f; - float out_scale = 1.0f; - if (op_info->HasAttr("enable_int8")) { - if (op_info->GetAttr("enable_int8")) { - auto x_name = op_info->Input("X").front(); - auto out_name = op_info->Output("Out").front(); - if (op_info->HasInputScale(x_name)) - input_scale = op_info->GetInputScale(x_name)[0]; - if (op_info->HasOutputScale(out_name)) - out_scale = op_info->GetOutputScale(out_name)[0]; - } else { - LOG(WARNING) << "Do not enable_int8"; - return FAILED; - } - } else { - LOG(WARNING) << "Do not enable_int8"; - return FAILED; - } + CHECK(op_info->HasInputScale(x_name)); + auto input_scale = op_info->GetInputScale(x_name)[0]; + CHECK(op_info->HasOutputScale(out_name)); + auto out_scale = op_info->GetOutputScale(out_name)[0]; // Check output scale NeuronOperandType xType; @@ -106,14 +95,14 @@ int SoftmaxConverter(void* ctx, OpLite* op, KernelBase* kernel) { // Add out operand NeuronOperandType outType; outType.type = NEURON_TENSOR_QUANT8_ASYMM; - outType.scale = out_scale / 127; + outType.scale = out_scale; outType.zeroPoint = 128; outType.dimensionCount = x_dims.size(); outType.dimensions = &dims_x[0]; NeuronModel_addOperand(model, &outType); // 3: output std::shared_ptr out_node = nullptr; out_node = graph->Add(out_name, dims_x); - VLOG(3) << "output_scale: " << out_scale; + VLOG(3) << "out_scale: " << out_scale; float beta_val[] = {1.0f}; NeuronModel_setOperandValue( diff --git a/lite/kernels/apu/subgraph_compute.cc b/lite/kernels/apu/subgraph_compute.cc index 6009e71e05c33f6dedfd995020612e112c888d36..d5599e959d97d505b4d368d4000274b529dc9536 100644 --- a/lite/kernels/apu/subgraph_compute.cc +++ b/lite/kernels/apu/subgraph_compute.cc @@ -153,18 +153,15 @@ int SubgraphEngine::LaunchDeviceProgram() { } // Set input buffer - Tensor input_temp; for (size_t i = 0; i < origin_itensors_.size(); i++) { - input_temp.Resize({origin_idims_[i]}); - uint8_t* input_data = input_temp.mutable_data(); - memcpy(input_data, - origin_itensors_[i]->raw_data(), - origin_itensors_[i]->memory_size()); + auto origin_data = origin_itensors_[i]->mutable_data(); + auto converted_data = reinterpret_cast(origin_data); for (int j = 0; j < origin_itensors_[i]->data_size(); j++) { - input_data[j] += (uint8_t)128; + converted_data[j] = + static_cast(static_cast(origin_data[j]) + 128); } NeuronExecution_setInput( - run, i, NULL, input_data, origin_itensors_[i]->memory_size()); + run, i, NULL, converted_data, origin_itensors_[i]->memory_size()); } // Set output buffer @@ -184,10 +181,11 @@ int SubgraphEngine::LaunchDeviceProgram() { } for (size_t i = 0; i < origin_otensors_.size(); i++) { - int8_t* output_data = origin_otensors_[i]->mutable_data(); - VLOG(3) << "output size:" << origin_otensors_[i]->memory_size(); + auto converted_data = origin_otensors_[i]->mutable_data(); + auto origin_data = reinterpret_cast(converted_data); for (int j = 0; j < origin_otensors_[i]->data_size(); j++) { - output_data[j] -= (int8_t)128; + converted_data[j] = + static_cast(static_cast(origin_data[j]) - 128); } } NeuronExecution_free(run);