提交 620c9c1a 编写于 作者: J Jiansong Wang

1, Change bridge code to follow the new way(2.6->dev) to obtain input/output scale;

2, Add kImaginationNNA to BindTargets of
'quantized_op_attribute_inference_pass'
3, Test image classification with one imput passed
上级 0633d001
......@@ -97,4 +97,4 @@ void QuantizedOpAttributesInferencePass::Apply(
REGISTER_MIR_PASS(quantized_op_attributes_inference_pass,
paddle::lite::mir::QuantizedOpAttributesInferencePass)
.BindTargets({TARGET(kAPU), TARGET(kRKNPU)});
.BindTargets({TARGET(kAPU), TARGET(kRKNPU), TARGET(kImaginationNNA)});
......@@ -31,6 +31,9 @@ int ConvConverter(void *ctx, OpLite *op, KernelBase *kernel) {
auto scope = op->scope();
VLOG(3) << "[NNA] Converting " << op_type << "... ";
CHECK(op_info->HasAttr("enable_int8") &&
op_info->GetAttr<bool>("enable_int8"));
// Get input and output vars and op attributes
auto input_name = op_info->Input("Input").front();
auto input = scope->FindMutableTensor(input_name);
......@@ -67,19 +70,14 @@ int ConvConverter(void *ctx, OpLite *op, KernelBase *kernel) {
CHECK_EQ(strides.size(), 2L);
CHECK_EQ(dilations.size(), 2L);
// for quantization
bool enable_int8 = false;
float input_scale = 1.0;
float output_scale = 1.0;
std::vector<float> weight_scale;
TensorInfo qnt;
CHECK(op_info->HasInputScale(input_name));
float input_scale = op_info->GetInputScale(input_name)[0];
CHECK(op_info->HasInputScale(filter_name));
std::vector<float> weight_scale = op_info->GetInputScale(filter_name);
CHECK(op_info->HasOutputScale(output_name));
float output_scale = op_info->GetOutputScale(output_name)[0];
if (op_info->HasAttr("enable_int8")) {
enable_int8 = op_info->GetAttr<bool>("enable_int8");
input_scale = op_info->GetAttr<float>("input_scale");
output_scale = op_info->GetAttr<float>("output_scale");
weight_scale = op_info->GetAttr<std::vector<float>>("weight_scale");
}
TensorInfo qnt;
// Input node
std::shared_ptr<Node> input_node = nullptr;
......@@ -89,10 +87,7 @@ int ConvConverter(void *ctx, OpLite *op, KernelBase *kernel) {
in_tensor = input_node->data();
} else {
TensorInfoReset(&qnt);
if (enable_int8)
qnt.type = IMGDNN_TYPE_Q_U8;
else
qnt.type = IMGDNN_TYPE_F32;
qnt.type = IMGDNN_TYPE_Q_U8;
qnt.scales.push_back(input_scale);
qnt.zero_points.push_back(128);
input_node = graph->Add(input_name, *input, qnt, Node::Role::kInput);
......@@ -123,39 +118,32 @@ int ConvConverter(void *ctx, OpLite *op, KernelBase *kernel) {
bool is_depthwise_mode = (ic == groups && oc == groups && groups != 1);
// Filter node
std::shared_ptr<Node> filter_node = nullptr;
imgdnn_tensor filter_tensor;
bool per_channel = isScalesPerChannel(weight_scale);
TensorInfoReset(&qnt);
uint8_t *weights_u8 =
graph->GetBuilder()->GetBufromPool(filter_dims.production());
if (enable_int8) {
char *weight_src = static_cast<char *>(filter->raw_data());
char *weight_src = static_cast<char *>(filter->raw_data());
qnt.type = IMGDNN_TYPE_Q_U8;
if (per_channel) {
qnt.scales.assign(weight_scale.begin(), weight_scale.end());
qnt.zero_points.assign(weight_scale.size(), 128);
qnt.count = oc;
qnt.axis = 1;
} else {
qnt.scales.push_back(weight_scale.at(0));
qnt.zero_points.push_back(128);
}
for (int i = 0; i < filter_dims.production(); i++) {
weights_u8[i] = static_cast<uint8_t>(weight_src[i] + 128);
}
filter_node = graph->Add(filter_name,
weights_u8,
filter_dims.Vectorize(),
qnt,
Node::Role::kConst);
filter_tensor = filter_node->data();
qnt.type = IMGDNN_TYPE_Q_U8;
if (per_channel) {
qnt.scales.assign(weight_scale.begin(), weight_scale.end());
qnt.zero_points.assign(weight_scale.size(), 128);
qnt.count = oc;
qnt.axis = 1;
} else {
qnt.type = IMGDNN_TYPE_F32;
filter_node = graph->Add(filter_name, *filter, qnt, Node::Role::kConst);
qnt.scales.push_back(weight_scale.at(0));
qnt.zero_points.push_back(128);
}
for (int i = 0; i < filter_dims.production(); i++) {
weights_u8[i] = static_cast<uint8_t>(weight_src[i] + 128);
}
std::shared_ptr<Node> filter_node = graph->Add(filter_name,
weights_u8,
filter_dims.Vectorize(),
qnt,
Node::Role::kConst);
imgdnn_tensor filter_tensor = filter_node->data();
// Add bias node if exists bias
// Supports the bias nodes with the following dimensions
......@@ -192,46 +180,38 @@ int ConvConverter(void *ctx, OpLite *op, KernelBase *kernel) {
}
TensorInfoReset(&qnt);
std::vector<int64_t> shapes{1, oc};
auto bias_data = bias->data<float, float>();
if (enable_int8) {
qnt.type = IMGDNN_TYPE_I32;
if (per_channel) {
qnt.scales.resize(bias_data_size);
for (int i = 0; i < bias_data_size; i++)
qnt.scales[i] = input_scale * weight_scale[i];
qnt.zero_points.assign(bias_data_size, 0);
qnt.count = 2;
qnt.axis = 1;
} else {
qnt.scales.push_back(input_scale * weight_scale[0]);
qnt.zero_points.push_back(0);
}
int quant_bits = 32;
auto dtype_max = static_cast<int>((1 << (quant_bits - 1)) - 1);
auto dtype_min = static_cast<int>(0 - dtype_max);
int32_t *bias_qnt_data =
reinterpret_cast<int32_t *>(graph->GetBuilder()->GetBufromPool(
bias_dims.production() * sizeof(int32_t)));
for (int i = 0; i < bias_data_size; i++) {
float current_scale = per_channel ? qnt.scales[i] : qnt.scales[0];
bias_qnt_data[i] =
std::min(std::max(static_cast<int>(bias_data[i] / current_scale),
dtype_min),
dtype_max);
}
bias_node = graph->Add(
bias_name, bias_qnt_data, shapes, qnt, Node::Role::kConst);
qnt.type = IMGDNN_TYPE_I32;
if (per_channel) {
qnt.scales.resize(bias_data_size);
for (int i = 0; i < bias_data_size; i++)
qnt.scales[i] = input_scale * weight_scale[i];
qnt.zero_points.assign(bias_data_size, 0);
qnt.count = 2;
qnt.axis = 1;
} else {
qnt.type = IMGDNN_TYPE_F32;
std::vector<float> bias_float_data(bias_data,
bias_data + bias_data_size);
bias_node = graph->Add(
bias_name, bias_float_data.data(), shapes, qnt, Node::Role::kConst);
qnt.scales.push_back(input_scale * weight_scale[0]);
qnt.zero_points.push_back(0);
}
int quant_bits = 32;
auto dtype_max = static_cast<int>((1 << (quant_bits - 1)) - 1);
auto dtype_min = static_cast<int>(0 - dtype_max);
auto bias_data = bias->data<float, float>();
int32_t *bias_qnt_data =
reinterpret_cast<int32_t *>(graph->GetBuilder()->GetBufromPool(
bias_dims.production() * sizeof(int32_t)));
for (int i = 0; i < bias_data_size; i++) {
float current_scale = per_channel ? qnt.scales[i] : qnt.scales[0];
bias_qnt_data[i] = std::min(
std::max(static_cast<int>(bias_data[i] / current_scale), dtype_min),
dtype_max);
}
std::vector<int64_t> shapes{1, oc};
bias_node =
graph->Add(bias_name, bias_qnt_data, shapes, qnt, Node::Role::kConst);
bias_tensor = bias_node->data();
}
}
......
......@@ -30,6 +30,9 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
auto scope = op->scope();
VLOG(3) << "[NNA] Converting " + op_type + "...";
CHECK(op_info->HasAttr("enable_int8") &&
op_info->GetAttr<bool>("enable_int8"));
auto input_name = op_info->Input("Input").front();
auto input = scope->FindTensor(input_name);
auto input_dims = input->dims();
......@@ -58,19 +61,12 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
VLOG(3) << "[NNA] input dims: " << input_dims << " w dims: " << w_dims
<< " m: " << m << " k: " << k << " n: " << n;
// for quantization
bool enable_int8 = false;
float input_scale = 1.0;
float output_scale = 1.0;
std::vector<float> weight_scale;
TensorInfo qnt;
if (op_info->HasAttr("enable_int8")) {
enable_int8 = op_info->GetAttr<bool>("enable_int8");
input_scale = op_info->GetAttr<float>("input_scale");
output_scale = op_info->GetAttr<float>("output_scale");
weight_scale = op_info->GetAttr<std::vector<float>>("weight_scale");
}
CHECK(op_info->HasInputScale(input_name));
float input_scale = op_info->GetInputScale(input_name)[0];
CHECK(op_info->HasInputScale(weight_name));
std::vector<float> weight_scale = op_info->GetInputScale(weight_name);
CHECK(op_info->HasOutputScale(out_name));
float output_scale = op_info->GetOutputScale(out_name)[0];
// Create input node and reshape it to (m, k, 1, 1)
std::shared_ptr<Node> input_node = nullptr;
......@@ -84,21 +80,19 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
std::shared_ptr<Node> weight_node = nullptr;
bool per_channel = isScalesPerChannel(weight_scale);
uint8_t* weights_u8 = graph->GetBuilder()->GetBufromPool(w_dims.production());
if (enable_int8) {
qnt.type = IMGDNN_TYPE_Q_U8;
if (per_channel) {
LOG(FATAL)
<< "[NNA] FC per-channel quantization is not supported for Mirage";
} else {
qnt.scales.push_back(weight_scale.at(0));
qnt.zero_points.push_back(128);
}
const char* weight_src = static_cast<const char*>(weights->raw_data());
for (int i = 0; i < w_dims.production(); i++)
weights_u8[i] = static_cast<uint8_t>(weight_src[i] + 128);
TensorInfo qnt;
qnt.type = IMGDNN_TYPE_Q_U8;
if (per_channel) {
LOG(FATAL)
<< "[NNA] FC per-channel quantization is not supported for Mirage";
} else {
LOG(FATAL) << "[NNA] PaddleLite Only 8-bits quantization.";
qnt.scales.push_back(weight_scale.at(0));
qnt.zero_points.push_back(128);
}
const char* weight_src = static_cast<const char*>(weights->raw_data());
for (int i = 0; i < w_dims.production(); i++)
weights_u8[i] = static_cast<uint8_t>(weight_src[i] + 128);
weight_node = graph->Add(
weight_name, weights_u8, w_dims.Vectorize(), qnt, Node::Role::kConst);
......@@ -111,49 +105,43 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
bias_node = graph->Get(bias_name);
} else {
auto bias = scope->FindTensor(bias_name);
// CHECK_EQ(bias->precision(), PRECISION(kFloat));
auto bias_dims = bias->dims();
CHECK_EQ(bias_dims.production(), n);
if (enable_int8 && bias->precision() == PRECISION(kFloat)) {
TensorInfoReset(&qnt);
qnt.type = IMGDNN_TYPE_I32;
if (per_channel) {
qnt.scales.resize(weight_scale.size());
qnt.count = bias_dims.size();
qnt.axis = 0;
for (int i = 0; i < weight_scale.size(); i++) {
qnt.scales[i] = input_scale * weight_scale[i];
}
LOG(FATAL)
<< "[NNA] per-channel quantization is not supported for FC";
} else {
qnt.scales.push_back(weight_scale.at(0) * input_scale);
qnt.zero_points.push_back(0);
}
int quant_bits = 32;
auto dtype_max = static_cast<int>((1 << (quant_bits - 1)) - 1);
auto dtype_min = static_cast<int>(0 - dtype_max);
auto* bias_data = bias->data<float, float>();
int32_t* bias_qnt_data =
reinterpret_cast<int32_t*>(graph->GetBuilder()->GetBufromPool(
bias_dims.production() * sizeof(int32_t)));
for (int i = 0; i < n; i++) {
float current_scale = per_channel ? qnt.scales[i] : qnt.scales[0];
bias_qnt_data[i] =
std::min(std::max(static_cast<int>(bias_data[i] / current_scale),
dtype_min),
dtype_max);
TensorInfoReset(&qnt);
qnt.type = IMGDNN_TYPE_I32;
if (per_channel) {
qnt.scales.resize(weight_scale.size());
qnt.count = bias_dims.size();
qnt.axis = 0;
for (int i = 0; i < weight_scale.size(); i++) {
qnt.scales[i] = input_scale * weight_scale[i];
}
std::vector<int64_t> shapes{1};
bias_node = graph->Add(
bias_name, bias_qnt_data, shapes, qnt, Node::Role::kConst);
LOG(FATAL) << "[NNA] per-channel quantization is not supported for FC";
} else {
qnt.type = IMGDNN_TYPE_F32;
bias_node = graph->Add(bias_name, *bias, qnt, Node::Role::kConst);
qnt.scales.push_back(weight_scale.at(0) * input_scale);
qnt.zero_points.push_back(0);
}
int quant_bits = 32;
auto dtype_max = static_cast<int>((1 << (quant_bits - 1)) - 1);
auto dtype_min = static_cast<int>(0 - dtype_max);
auto* bias_data = bias->data<float, float>();
int32_t* bias_qnt_data =
reinterpret_cast<int32_t*>(graph->GetBuilder()->GetBufromPool(
bias_dims.production() * sizeof(int32_t)));
for (int i = 0; i < n; i++) {
float current_scale = per_channel ? qnt.scales[i] : qnt.scales[0];
bias_qnt_data[i] = std::min(
std::max(static_cast<int>(bias_data[i] / current_scale), dtype_min),
dtype_max);
}
std::vector<int64_t> shapes{1};
bias_node =
graph->Add(bias_name, bias_qnt_data, shapes, qnt, Node::Role::kConst);
}
bias_tensor = bias_node->data();
}
......
......@@ -32,6 +32,9 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) {
auto scope = op->scope();
VLOG(3) << "[NNA] Converting " + op_type + "...";
CHECK(op_info->HasAttr("enable_int8") &&
op_info->GetAttr<bool>("enable_int8"));
// Get input and output vars and op attributes
auto x_name = op_info->Input("X").front();
auto x = scope->FindMutableTensor(x_name);
......@@ -43,11 +46,8 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) {
auto paddings = op_info->GetAttr<std::vector<int>>("paddings");
// for quantization
float output_scale = 1.0;
if (op_info->HasAttr("enable_int8")) {
output_scale = op_info->GetAttr<float>("output_scale");
}
CHECK(op_info->HasOutputScale(out_name));
float output_scale = op_info->GetOutputScale(out_name)[0];
// X node
std::shared_ptr<Node> x_node = nullptr;
......
......@@ -30,6 +30,9 @@ int SoftmaxConverter(void* ctx, OpLite* op, KernelBase* kernel) {
auto scope = op->scope();
VLOG(3) << "[NNA] Converting " + op_type + "...";
CHECK(op_info->HasAttr("enable_int8") &&
op_info->GetAttr<bool>("enable_int8"));
// Get input and output vars and op attributes
auto x_name = op_info->Input("X").front();
auto x = scope->FindMutableTensor(x_name);
......@@ -41,11 +44,9 @@ int SoftmaxConverter(void* ctx, OpLite* op, KernelBase* kernel) {
axis += x_rank;
}
// for quantization
float output_scale = 1.0;
if (op_info->HasAttr("enable_int8")) {
output_scale = op_info->GetAttr<float>("output_scale");
CHECK(op_info->HasOutputScale(out_name));
float output_scale = op_info->GetOutputScale(out_name)[0];
// X node
std::shared_ptr<Node> x_node = nullptr;
......
......@@ -28,6 +28,7 @@ namespace kernels {
namespace imagination_nna {
bool SubgraphEngine::BuildDeviceProgram() {
device_program_ready = false;
int status = 0;
// Convert all of ops and their input vars and weights and added into the NNA
// IMG IR graph
......@@ -44,7 +45,6 @@ bool SubgraphEngine::BuildDeviceProgram() {
op->InferShape();
std::string op_type = op->op_info()->Type();
if (!bridges.Exists(op_type, TARGET(kImaginationNNA))) {
// return subgraph::FAILED;
return false;
}
auto kernel = inst.kernel();
......@@ -53,7 +53,6 @@ bool SubgraphEngine::BuildDeviceProgram() {
const_cast<OpLite*>(op),
const_cast<KernelBase*>(kernel));
if (subgraph::CHECK_FAILED(status)) {
// return subgraph::FAILED;
return false;
}
}
......@@ -156,11 +155,15 @@ bool SubgraphEngine::BuildDeviceProgram() {
break;
}
}
device_program_ready = true;
return true;
}
bool SubgraphEngine::LaunchDeviceProgram() {
if (!device_program_ready) // build device program fail
LaunchOriginProgram();
// Set input buffer
for (size_t i = 0; i < origin_itensors_.size(); i++) {
// check input shapes
......
......@@ -55,6 +55,7 @@ class SubgraphEngine : public subgraph::Engine {
std::vector<imgdnn_input> device_itensors_;
std::vector<imgdnn_output> device_otensors_;
lite::imagination_nna::ImgdnnManager imgdnn_mgr_;
bool device_program_ready{false};
};
class SubgraphCompute : public KernelLite<TARGET(kImaginationNNA),
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册