未验证 提交 698c7e76 编写于 作者: H hong19860320 提交者: GitHub

[APU] Adapting to the changing of the quantization parameters (#3863)

上级 cb138726
...@@ -38,12 +38,12 @@ void QuantizedOpAttributesInferencePass::Apply( ...@@ -38,12 +38,12 @@ void QuantizedOpAttributesInferencePass::Apply(
auto op_info = inst.op_info(); auto op_info = inst.op_info();
auto op_type = op_info->Type(); auto op_type = op_info->Type();
// Check only if all of the inputs of the op have scale value // Check if any of the inputs of the op have scale value
bool has_input_scale = true; bool has_input_scale = false;
for (auto in_var_node : op_node->inlinks) { for (auto in_var_node : op_node->inlinks) {
CHECK(in_var_node->IsArg()); CHECK(in_var_node->IsArg());
auto in_var_node_name = in_var_node->arg()->name; auto in_var_node_name = in_var_node->arg()->name;
has_input_scale &= op_info->HasInputScale(in_var_node_name); has_input_scale |= op_info->HasInputScale(in_var_node_name);
} }
if (!has_input_scale) continue; if (!has_input_scale) continue;
...@@ -52,32 +52,31 @@ void QuantizedOpAttributesInferencePass::Apply( ...@@ -52,32 +52,31 @@ void QuantizedOpAttributesInferencePass::Apply(
bool is_quantized = true; bool is_quantized = true;
for (auto out_var_node : op_node->outlinks) { for (auto out_var_node : op_node->outlinks) {
CHECK(out_var_node->IsArg()); CHECK(out_var_node->IsArg());
bool found = false; std::vector<float> output_scale;
float output_scale; bool has_output_scale = false;
auto out_var_node_name = out_var_node->arg()->name; auto out_var_node_name = out_var_node->arg()->name;
for (auto out_op_node : out_var_node->outlinks) { for (auto out_op_node : out_var_node->outlinks) {
CHECK(out_op_node->IsStmt()); CHECK(out_op_node->IsStmt());
auto& out_inst = out_op_node->AsStmt(); auto& out_inst = out_op_node->AsStmt();
auto out_op_info = out_inst.op_info(); auto out_op_info = out_inst.op_info();
if (!out_op_info->HasInputScale(out_var_node_name)) continue; if (!out_op_info->HasInputScale(out_var_node_name)) continue;
auto input_scale = out_op_info->GetInputScale(out_var_node_name)[0]; auto input_scale = out_op_info->GetInputScale(out_var_node_name);
if (!found) { if (!has_output_scale) {
found = true;
output_scale = input_scale; output_scale = input_scale;
has_output_scale = true;
} else { } else {
CHECK_EQ(output_scale, input_scale); CHECK_EQ(output_scale.size(), input_scale.size());
} }
} }
if (found) { if (has_output_scale) {
inst.mutable_op_info()->SetOutputScale(out_var_node_name, inst.mutable_op_info()->SetOutputScale(out_var_node_name, output_scale);
{output_scale});
} else if (op_info->HasAttr("out_threshold")) { } else if (op_info->HasAttr("out_threshold")) {
// Only consider one output, there are only one out_threshold // Only consider one output, there are only one out_threshold
int bit_length = op_info->GetAttr<int>("bit_length"); int bit_length = op_info->GetAttr<int>("bit_length");
int range = (1 << (bit_length - 1)) - 1; int range = (1 << (bit_length - 1)) - 1;
output_scale = op_info->GetAttr<float>("out_threshold"); output_scale = std::vector<float>{
inst.mutable_op_info()->SetOutputScale(out_var_node_name, op_info->GetAttr<float>("out_threshold") / range};
{output_scale / range}); inst.mutable_op_info()->SetOutputScale(out_var_node_name, output_scale);
} else { } else {
is_quantized = false; is_quantized = false;
} }
......
...@@ -452,39 +452,6 @@ void SubgraphFuser::InsertNewNode(SSAGraph *graph, ...@@ -452,39 +452,6 @@ void SubgraphFuser::InsertNewNode(SSAGraph *graph,
subgraph_op_desc.SetAttr<std::vector<std::string>>("output_data_names", subgraph_op_desc.SetAttr<std::vector<std::string>>("output_data_names",
output_var_names); output_var_names);
// Set input/output scale values of input/output var nodes for
// type_precision_cast_pass.
std::vector<float> input_data_scales;
std::vector<float> output_data_scales;
for (auto &var_node : input_var_nodes) {
auto var_node_name = var_node->arg()->name;
auto any_op_node = var_node->outlinks.front();
CHECK(any_op_node->IsStmt());
auto &any_inst = any_op_node->AsStmt();
if (any_inst.op_info()->HasInputScale(var_node_name)) {
input_data_scales.push_back(
any_inst.op_info()->GetInputScale(var_node_name)[0]);
}
}
for (auto &var_node : output_var_nodes) {
auto var_node_name = var_node->arg()->name;
auto any_op_node = var_node->inlinks.front();
CHECK(any_op_node->IsStmt());
auto &any_inst = any_op_node->AsStmt();
if (any_inst.op_info()->HasOutputScale(var_node_name)) {
output_data_scales.push_back(
any_inst.op_info()->GetOutputScale(var_node_name)[0]);
}
}
if (input_data_scales.size() > 0) {
subgraph_op_desc.SetAttr<std::vector<float>>("input_data_scales",
input_data_scales);
}
if (output_data_scales.size() > 0) {
subgraph_op_desc.SetAttr<std::vector<float>>("output_data_scales",
output_data_scales);
}
// Set all of the inputs and outputs to the target subgraph op // Set all of the inputs and outputs to the target subgraph op
// To prevent vars are removed in RuntimeProgram::UpdateVarsOfProgram() // To prevent vars are removed in RuntimeProgram::UpdateVarsOfProgram()
for (auto &var_node : weight_var_nodes) { for (auto &var_node : weight_var_nodes) {
...@@ -504,6 +471,29 @@ void SubgraphFuser::InsertNewNode(SSAGraph *graph, ...@@ -504,6 +471,29 @@ void SubgraphFuser::InsertNewNode(SSAGraph *graph,
auto any_op = (*subgraph_nodes.begin())->AsStmt().op(); auto any_op = (*subgraph_nodes.begin())->AsStmt().op();
subgraph_op->Attach(subgraph_op_desc, any_op->scope()); subgraph_op->Attach(subgraph_op_desc, any_op->scope());
// Export the scale values of the input/output var nodes of the inner op nodes
// only for type_precision_cast_pass.
for (auto &var_node : input_var_nodes) {
auto var_node_name = var_node->arg()->name;
auto any_op_node = var_node->outlinks.front();
CHECK(any_op_node->IsStmt());
auto &any_inst = any_op_node->AsStmt();
if (any_inst.op_info()->HasInputScale(var_node_name)) {
subgraph_op->mutable_op_info()->SetInputScale(
var_node_name, any_inst.op_info()->GetInputScale(var_node_name));
}
}
for (auto &var_node : output_var_nodes) {
auto var_node_name = var_node->arg()->name;
auto any_op_node = var_node->inlinks.front();
CHECK(any_op_node->IsStmt());
auto &any_inst = any_op_node->AsStmt();
if (any_inst.op_info()->HasOutputScale(var_node_name)) {
subgraph_op->mutable_op_info()->SetOutputScale(
var_node_name, any_inst.op_info()->GetOutputScale(var_node_name));
}
}
// Create and add a new subgraph node into the graph // Create and add a new subgraph node into the graph
auto subgraph_op_node = auto subgraph_op_node =
graph->GraphCreateInstructNode(subgraph_op, any_op->valid_places()); graph->GraphCreateInstructNode(subgraph_op, any_op->valid_places());
......
...@@ -66,65 +66,30 @@ void UpdateInputs(OpLite* op, const std::string& from, const std::string& to) { ...@@ -66,65 +66,30 @@ void UpdateInputs(OpLite* op, const std::string& from, const std::string& to) {
} }
} }
// Infer the scale value for the new calib op from the subgraph op
static bool InferScaleFromSubgraph(std::string var_name,
const OpInfo* op_info,
float* scale,
bool reverse = false) {
std::string attr_name = reverse ? "output_data_names" : "input_data_names";
if (!op_info->HasAttr(attr_name)) return false;
auto input_or_output_names =
op_info->GetAttr<std::vector<std::string>>(attr_name);
attr_name = reverse ? "output_data_scales" : "input_data_scales";
if (!op_info->HasAttr(attr_name)) return false;
auto input_or_output_scales = op_info->GetAttr<std::vector<float>>(attr_name);
auto size = input_or_output_names.size();
CHECK(size == input_or_output_scales.size());
for (size_t i = 0; i < size; i++) {
if (input_or_output_names[i] == var_name) {
*scale = input_or_output_scales[i];
return true;
}
}
return false;
}
// Infer the scale value for the new calib op from the input_scale of the // Infer the scale value for the new calib op from the input_scale of the
// current op and output_scale of the previous op. // current op and output_scale of the previous op.
// case 1: prev_op->var_node->op_node(int8->any op, with input_scale). // case 1: prev_op->var_node->op_node(int8->any op, with input_scale).
// case 2: prev_op->var_node->op_node(subgraph op, int8->any, with // case 2: prev_op(any->int8, with output_scale)->var_node->op_node(fp32->any,
// input_data_scales).
// case 3: prev_op(any->int8, with output_scale)->var_node->op_node(fp32->any,
// without input_scale). // without input_scale).
// case 4: prev_op(any->int8, subgraph_op, with
// output_data_scales)->var_node->op_node(fp32->any, without input_scale).
static bool InferScale(Node* var_node, Node* op_node, float* scale) { static bool InferScale(Node* var_node, Node* op_node, float* scale) {
bool found = false; bool found = false;
auto& inst = op_node->AsStmt(); auto& inst = op_node->AsStmt();
auto op_info = inst.op_info(); auto op_info = inst.op_info();
auto op_type = op_info->Type(); auto op_type = op_info->Type();
auto var_name = var_node->AsArg().name; auto var_name = var_node->AsArg().name;
if (op_type == "subgraph") { if (op_info->HasInputScale(var_name)) {
found = InferScaleFromSubgraph(var_name, op_info, scale, false); *scale = op_info->GetInputScale(var_name)[0];
found = true;
} else { } else {
if (op_info->HasInputScale(var_name)) { // Obtain the output_scale from one of its previous Ops
*scale = op_info->GetInputScale(var_name)[0]; auto prev_op_node = var_node->inlinks.front();
CHECK(prev_op_node->IsStmt());
auto& prev_inst = prev_op_node->AsStmt();
auto prev_op_info = prev_inst.op_info();
auto prev_op_type = prev_op_info->Type();
if (prev_op_info->HasOutputScale(var_name)) {
*scale = prev_op_info->GetOutputScale(var_name)[0];
found = true; found = true;
} else {
// Obtain the output_scale from one of its previous Ops
auto prev_op_node = var_node->inlinks.front();
CHECK(prev_op_node->IsStmt());
auto& prev_inst = prev_op_node->AsStmt();
auto prev_op_info = prev_inst.op_info();
auto prev_op_type = prev_op_info->Type();
if (prev_op_type == "subgraph") {
found = InferScaleFromSubgraph(var_name, prev_op_info, scale, true);
} else {
if (prev_op_info->HasOutputScale(var_name)) {
*scale = prev_op_info->GetOutputScale(var_name)[0];
found = true;
}
}
} }
} }
return found; return found;
......
...@@ -35,6 +35,9 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -35,6 +35,9 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
int neuron_errCode; int neuron_errCode;
VLOG(3) << "[APU] Converting [" << op_type << "]"; VLOG(3) << "[APU] Converting [" << op_type << "]";
CHECK(op_info->HasAttr("enable_int8") &&
op_info->GetAttr<bool>("enable_int8"));
// Get input and output vars and op attributes // Get input and output vars and op attributes
auto input_name = op_info->Input("Input").front(); auto input_name = op_info->Input("Input").front();
auto input = scope->FindMutableTensor(input_name); auto input = scope->FindMutableTensor(input_name);
...@@ -94,34 +97,18 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -94,34 +97,18 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
input_dims, input_dims,
filter_dims); filter_dims);
float input_scale; CHECK(op_info->HasInputScale(input_name));
float output_scale; auto input_scale = op_info->GetInputScale(input_name)[0];
std::vector<float> weight_scale; CHECK(op_info->HasInputScale(filter_name));
if (op_info->HasAttr("enable_int8")) { auto filter_scale = op_info->GetInputScale(filter_name);
if (op_info->GetAttr<bool>("enable_int8")) { CHECK(op_info->HasOutputScale(output_name));
auto input_name = op_info->Input("Input").front(); auto output_scale = op_info->GetOutputScale(output_name)[0];
auto filter_name = op_info->Input("Filter").front();
auto output_name = op_info->Output("Output").front();
if (op_info->HasInputScale(input_name))
input_scale = op_info->GetInputScale(input_name)[0];
if (op_info->HasInputScale(filter_name))
weight_scale = op_info->GetInputScale(filter_name);
if (op_info->HasOutputScale(output_name)) {
output_scale = op_info->GetOutputScale(output_name)[0];
}
VLOG(3) << "has output scale:" << output_scale;
} else {
return FAILED;
}
} else {
return FAILED;
}
VLOG(3) << "strides.size(): " << strides.size() << " ,groups: " << groups VLOG(3) << "strides.size(): " << strides.size() << " ,groups: " << groups
<< " ,dilations: " << dilations[0] << ":" << dilations[1]; << " ,dilations: " << dilations[0] << ":" << dilations[1];
VLOG(3) << "with_act: " << with_act << " ,act_type:" << act_type; VLOG(3) << "with_act: " << with_act << " ,act_type:" << act_type;
VLOG(3) << "input_dims: " << input_dims << " ,output_dims: " << output_dims VLOG(3) << "input_dims: " << input_dims << " ,output_dims: " << output_dims
<< " ,weight_scale size: " << weight_scale.size(); << " ,filter_scale size: " << filter_scale.size();
VLOG(3) << "filter_dims: " << filter_dims VLOG(3) << "filter_dims: " << filter_dims
<< " ,memory_size: " << filter->memory_size() << " ,memory_size: " << filter->memory_size()
<< " ,data_size: " << filter->data_size(); << " ,data_size: " << filter->data_size();
...@@ -220,10 +207,10 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -220,10 +207,10 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
NeuronOperandType filterType; NeuronOperandType filterType;
NeuronOperandType channelFilterType; NeuronOperandType channelFilterType;
NeuronSymmPerChannelQuantParams symmPerChannelQuantParams; NeuronSymmPerChannelQuantParams symmPerChannelQuantParams;
if (1 == weight_scale.size()) { if (1 == filter_scale.size()) {
// Per layer type // Per layer type
filterType.type = NEURON_TENSOR_QUANT8_ASYMM; filterType.type = NEURON_TENSOR_QUANT8_ASYMM;
filterType.scale = weight_scale[0]; filterType.scale = filter_scale[0];
filterType.zeroPoint = 128; filterType.zeroPoint = 128;
filterType.dimensionCount = filter_dims.size(); filterType.dimensionCount = filter_dims.size();
filterType.dimensions = &dims_filter[0]; filterType.dimensions = &dims_filter[0];
...@@ -241,17 +228,17 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -241,17 +228,17 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
symmPerChannelQuantParams.channelDim = 3; symmPerChannelQuantParams.channelDim = 3;
else else
symmPerChannelQuantParams.channelDim = 0; symmPerChannelQuantParams.channelDim = 0;
symmPerChannelQuantParams.scaleCount = weight_scale.size(); symmPerChannelQuantParams.scaleCount = filter_scale.size();
symmPerChannelQuantParams.scales = weight_scale.data(); symmPerChannelQuantParams.scales = filter_scale.data();
biasType.scale = 0; biasType.scale = 0;
} }
std::shared_ptr<Node> filter_node = nullptr; std::shared_ptr<Node> filter_node = nullptr;
if (1 == weight_scale.size()) { if (1 == filter_scale.size()) {
NeuronModel_addOperand(model, &filterType); // 1: filter NeuronModel_addOperand(model, &filterType); // 1: filter
filter_node = graph->Add(filter_name, dims_filter); filter_node = graph->Add(filter_name, dims_filter);
VLOG(3) << "filter node idx: " << filter_node->index() << "w_scale[0]" VLOG(3) << "filter node idx: " << filter_node->index() << "filter_scale[0]"
<< weight_scale[0] << ": filterType: " << filterType.dimensions[0] << filter_scale[0] << ": filterType: " << filterType.dimensions[0]
<< ":" << filterType.dimensions[1] << ":" << ":" << filterType.dimensions[1] << ":"
<< filterType.dimensions[2] << ":" << filterType.dimensions[3]; << filterType.dimensions[2] << ":" << filterType.dimensions[3];
memcpy(filter->mutable_data<int8_t>(), memcpy(filter->mutable_data<int8_t>(),
...@@ -267,8 +254,8 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -267,8 +254,8 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
NeuronModel_addOperand(model, &channelFilterType); // 1: filter NeuronModel_addOperand(model, &channelFilterType); // 1: filter
filter_node = graph->Add(filter_name, dims_filter); filter_node = graph->Add(filter_name, dims_filter);
VLOG(3) << "chennel filter node idx: " << filter_node->index() VLOG(3) << "chennel filter node idx: " << filter_node->index()
<< " ,scale_count:" << weight_scale.size() << " ,scale_count:" << filter_scale.size()
<< " weight_scale[0]:" << weight_scale.data()[0] << " filter_scale[0]:" << filter_scale.data()[0]
<< " ,channelFilterType: " << channelFilterType.dimensions[0] << ":" << " ,channelFilterType: " << channelFilterType.dimensions[0] << ":"
<< channelFilterType.dimensions[1] << ":" << channelFilterType.dimensions[1] << ":"
<< channelFilterType.dimensions[2] << ":" << channelFilterType.dimensions[2] << ":"
...@@ -302,7 +289,6 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -302,7 +289,6 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
std::shared_ptr<Node> bias_node = nullptr; std::shared_ptr<Node> bias_node = nullptr;
if (HasInputArg(op_info, scope, "Bias")) { if (HasInputArg(op_info, scope, "Bias")) {
auto bias_name = op_info->Input("Bias").front(); auto bias_name = op_info->Input("Bias").front();
auto bias_type = kernel->GetInputDeclType("Bias");
auto bias = scope->FindMutableTensor(bias_name); auto bias = scope->FindMutableTensor(bias_name);
auto bias_dims = bias->dims(); auto bias_dims = bias->dims();
...@@ -368,10 +354,7 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -368,10 +354,7 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
// Add output tensor type // Add output tensor type
NeuronOperandType outType; NeuronOperandType outType;
outType.type = NEURON_TENSOR_QUANT8_ASYMM; outType.type = NEURON_TENSOR_QUANT8_ASYMM;
if (graph->IsOutput(output_name)) outType.scale = output_scale;
outType.scale = output_scale / 127;
else
outType.scale = output_scale;
outType.zeroPoint = 128; outType.zeroPoint = 128;
outType.dimensionCount = output_dims.size(); outType.dimensionCount = output_dims.size();
std::vector<uint32_t> dims_out = {(uint32_t)output_dims[0], std::vector<uint32_t> dims_out = {(uint32_t)output_dims[0],
...@@ -405,7 +388,7 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -405,7 +388,7 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
int32_t* int32_bias_data = int32_t* int32_bias_data =
reinterpret_cast<int32_t*>(bias->mutable_data<float>()); reinterpret_cast<int32_t*>(bias->mutable_data<float>());
float2int32( float2int32(
bias->data<float>(), input_scale, weight_scale, int32_bias_data); bias->data<float>(), input_scale, filter_scale, int32_bias_data);
VLOG(3) << "int32_bias_data: " << int32_bias_data[0] << " : " VLOG(3) << "int32_bias_data: " << int32_bias_data[0] << " : "
<< int32_bias_data[1] << " : " << int32_bias_data[2] << " : " << int32_bias_data[1] << " : " << int32_bias_data[2] << " : "
......
...@@ -31,6 +31,10 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -31,6 +31,10 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
auto scope = op->scope(); auto scope = op->scope();
VLOG(3) << "[APU] Converting [" + op_type + "]"; VLOG(3) << "[APU] Converting [" + op_type + "]";
CHECK(op_info->HasAttr("enable_int8") &&
op_info->GetAttr<bool>("enable_int8"));
// Get input and output vars and op attributes
auto input_name = op_info->Input("Input").front(); auto input_name = op_info->Input("Input").front();
auto input = scope->FindMutableTensor(input_name); auto input = scope->FindMutableTensor(input_name);
auto input_dims = input->dims(); auto input_dims = input->dims();
...@@ -52,26 +56,12 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -52,26 +56,12 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
<< " out_dims: " << out_dims << " m: " << m << " k: " << k << " out_dims: " << out_dims << " m: " << m << " k: " << k
<< " n: " << n; << " n: " << n;
float input_scale = 1.0f; CHECK(op_info->HasInputScale(input_name));
float out_scale = 1.0f; auto input_scale = op_info->GetInputScale(input_name)[0];
std::vector<float> w_scale; CHECK(op_info->HasInputScale(w_name));
if (op_info->HasAttr("enable_int8")) { auto w_scale = op_info->GetInputScale(w_name);
if (op_info->GetAttr<bool>("enable_int8")) { CHECK(op_info->HasOutputScale(out_name));
auto input_name = op_info->Input("Input").front(); auto out_scale = op_info->GetOutputScale(out_name)[0];
auto weight_name = op_info->Input("W").front();
auto out_name = op_info->Output("Out").front();
if (op_info->HasInputScale(input_name))
input_scale = op_info->GetInputScale(input_name)[0];
if (op_info->HasInputScale(weight_name))
w_scale = op_info->GetInputScale(weight_name);
if (op_info->HasOutputScale(out_name))
out_scale = op_info->GetOutputScale(out_name)[0];
} else {
return FAILED;
}
} else {
return FAILED;
}
// Add input tensor type // Add input tensor type
NeuronOperandType inType; NeuronOperandType inType;
......
...@@ -32,6 +32,9 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -32,6 +32,9 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) {
auto scope = op->scope(); auto scope = op->scope();
VLOG(3) << "[APU] Converting [" + op_type + "] "; VLOG(3) << "[APU] Converting [" + op_type + "] ";
CHECK(op_info->HasAttr("enable_int8") &&
op_info->GetAttr<bool>("enable_int8"));
// Get input and output vars and op attributes // Get input and output vars and op attributes
auto x_name = op_info->Input("X").front(); auto x_name = op_info->Input("X").front();
auto x = scope->FindMutableTensor(x_name); auto x = scope->FindMutableTensor(x_name);
...@@ -87,24 +90,10 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -87,24 +90,10 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) {
ksize); ksize);
// Add x tensor type // Add x tensor type
float x_scale = 1.0f; CHECK(op_info->HasInputScale(x_name));
float out_scale = 1.0f; auto x_scale = op_info->GetInputScale(x_name)[0];
if (op_info->HasAttr("enable_int8")) { CHECK(op_info->HasOutputScale(out_name));
if (op_info->GetAttr<bool>("enable_int8")) { auto out_scale = op_info->GetOutputScale(out_name)[0];
auto x_name = op_info->Input("X").front();
auto out_name = op_info->Output("Out").front();
if (op_info->HasInputScale(x_name))
x_scale = op_info->GetInputScale(x_name)[0];
if (op_info->HasOutputScale(out_name))
out_scale = op_info->GetOutputScale(out_name)[0];
} else {
LOG(WARNING) << "Do not enable_int8";
return FAILED;
}
} else {
LOG(WARNING) << "Do not enable_int8";
return FAILED;
}
NeuronOperandType xType; NeuronOperandType xType;
xType.type = NEURON_TENSOR_QUANT8_ASYMM; xType.type = NEURON_TENSOR_QUANT8_ASYMM;
......
...@@ -31,6 +31,9 @@ int SoftmaxConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -31,6 +31,9 @@ int SoftmaxConverter(void* ctx, OpLite* op, KernelBase* kernel) {
auto scope = op->scope(); auto scope = op->scope();
VLOG(3) << "[APU] Converting [" + op_type + "]"; VLOG(3) << "[APU] Converting [" + op_type + "]";
CHECK(op_info->HasAttr("enable_int8") &&
op_info->GetAttr<bool>("enable_int8"));
// Get input and output vars and op attributes // Get input and output vars and op attributes
auto x_name = op_info->Input("X").front(); auto x_name = op_info->Input("X").front();
auto x = scope->FindMutableTensor(x_name); auto x = scope->FindMutableTensor(x_name);
...@@ -45,24 +48,10 @@ int SoftmaxConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -45,24 +48,10 @@ int SoftmaxConverter(void* ctx, OpLite* op, KernelBase* kernel) {
axis += x_rank; axis += x_rank;
} }
float input_scale = 1.0f; CHECK(op_info->HasInputScale(x_name));
float out_scale = 1.0f; auto input_scale = op_info->GetInputScale(x_name)[0];
if (op_info->HasAttr("enable_int8")) { CHECK(op_info->HasOutputScale(out_name));
if (op_info->GetAttr<bool>("enable_int8")) { auto out_scale = op_info->GetOutputScale(out_name)[0];
auto x_name = op_info->Input("X").front();
auto out_name = op_info->Output("Out").front();
if (op_info->HasInputScale(x_name))
input_scale = op_info->GetInputScale(x_name)[0];
if (op_info->HasOutputScale(out_name))
out_scale = op_info->GetOutputScale(out_name)[0];
} else {
LOG(WARNING) << "Do not enable_int8";
return FAILED;
}
} else {
LOG(WARNING) << "Do not enable_int8";
return FAILED;
}
// Check output scale // Check output scale
NeuronOperandType xType; NeuronOperandType xType;
...@@ -106,14 +95,14 @@ int SoftmaxConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -106,14 +95,14 @@ int SoftmaxConverter(void* ctx, OpLite* op, KernelBase* kernel) {
// Add out operand // Add out operand
NeuronOperandType outType; NeuronOperandType outType;
outType.type = NEURON_TENSOR_QUANT8_ASYMM; outType.type = NEURON_TENSOR_QUANT8_ASYMM;
outType.scale = out_scale / 127; outType.scale = out_scale;
outType.zeroPoint = 128; outType.zeroPoint = 128;
outType.dimensionCount = x_dims.size(); outType.dimensionCount = x_dims.size();
outType.dimensions = &dims_x[0]; outType.dimensions = &dims_x[0];
NeuronModel_addOperand(model, &outType); // 3: output NeuronModel_addOperand(model, &outType); // 3: output
std::shared_ptr<Node> out_node = nullptr; std::shared_ptr<Node> out_node = nullptr;
out_node = graph->Add(out_name, dims_x); out_node = graph->Add(out_name, dims_x);
VLOG(3) << "output_scale: " << out_scale; VLOG(3) << "out_scale: " << out_scale;
float beta_val[] = {1.0f}; float beta_val[] = {1.0f};
NeuronModel_setOperandValue( NeuronModel_setOperandValue(
......
...@@ -153,18 +153,15 @@ int SubgraphEngine::LaunchDeviceProgram() { ...@@ -153,18 +153,15 @@ int SubgraphEngine::LaunchDeviceProgram() {
} }
// Set input buffer // Set input buffer
Tensor input_temp;
for (size_t i = 0; i < origin_itensors_.size(); i++) { for (size_t i = 0; i < origin_itensors_.size(); i++) {
input_temp.Resize({origin_idims_[i]}); auto origin_data = origin_itensors_[i]->mutable_data<int8_t>();
uint8_t* input_data = input_temp.mutable_data<uint8_t>(); auto converted_data = reinterpret_cast<uint8_t*>(origin_data);
memcpy(input_data,
origin_itensors_[i]->raw_data(),
origin_itensors_[i]->memory_size());
for (int j = 0; j < origin_itensors_[i]->data_size(); j++) { for (int j = 0; j < origin_itensors_[i]->data_size(); j++) {
input_data[j] += (uint8_t)128; converted_data[j] =
static_cast<uint8_t>(static_cast<int16_t>(origin_data[j]) + 128);
} }
NeuronExecution_setInput( NeuronExecution_setInput(
run, i, NULL, input_data, origin_itensors_[i]->memory_size()); run, i, NULL, converted_data, origin_itensors_[i]->memory_size());
} }
// Set output buffer // Set output buffer
...@@ -184,10 +181,11 @@ int SubgraphEngine::LaunchDeviceProgram() { ...@@ -184,10 +181,11 @@ int SubgraphEngine::LaunchDeviceProgram() {
} }
for (size_t i = 0; i < origin_otensors_.size(); i++) { for (size_t i = 0; i < origin_otensors_.size(); i++) {
int8_t* output_data = origin_otensors_[i]->mutable_data<int8_t>(); auto converted_data = origin_otensors_[i]->mutable_data<int8_t>();
VLOG(3) << "output size:" << origin_otensors_[i]->memory_size(); auto origin_data = reinterpret_cast<uint8_t*>(converted_data);
for (int j = 0; j < origin_otensors_[i]->data_size(); j++) { for (int j = 0; j < origin_otensors_[i]->data_size(); j++) {
output_data[j] -= (int8_t)128; converted_data[j] =
static_cast<int8_t>(static_cast<int16_t>(origin_data[j]) - 128);
} }
} }
NeuronExecution_free(run); NeuronExecution_free(run);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册