未验证 提交 b2e7d97c 编写于 作者: H hong19860320 提交者: GitHub

[RKNPU] Adapting to the changing of the quantization parameters, and...

[RKNPU] Adapting to the changing of the quantization parameters, and supporting the fully quantized mobilenetv1 model (#4046)

* [RKNPU] Adapting to the changing of the quantization parameters, and supporting the fully quantized mobilenetv1 model
test=develop

* [Doc] Update the doc for apu and rknpu
test=develop
上级 898792ca
......@@ -159,12 +159,18 @@ $ git checkout <release-version-tag>
$ wget https://paddlelite-demo.bj.bcebos.com/devices/mediatek/apu_ddk.tar.gz
$ tar -xvf apu_ddk.tar.gz
```
- 编译tiny_publish for MT8168-P2V1 Tablet
- 编译tiny_publish for MT8168-P2V1 Tablet and Smart TVs(S900)
```shell
$ ./lite/tools/build.sh --arm_os=android --arm_abi=armv8 --arm_lang=gcc --android_stl=c++_shared --build_extra=ON --with_log=ON --build_apu=ON --apu_ddk_root=./apu_ddk tiny_publish
For MT8168-P2V1 Tablet
$ ./lite/tools/build_android.sh --android_stl=c++_shared --with_extra=ON --with_log=ON --with_mediatek_apu=ON --mediatek_apu_sdk_root=./apu_ddk
For Smart TVs(S900)
$ ./lite/tools/build_android.sh --arch=armv7 --android_stl=c++_shared --with_extra=ON --with_log=ON --with_mediatek_apu=ON --mediatek_apu_sdk_root=./apu_ddk
```
- 将编译生成的build.lite.android.armv8.gcc/inference_lite_lib.android.armv8.apu/cxx/include替换PaddleLite-android-demo/libs/PaddleLite/arm64-v8a/include目录;
- 将编译生成的build.lite.android.armv8.gcc/inference_lite_lib.android.armv8.apu/cxx/lib/libpaddle_light_api_shared.so替换PaddleLite-android-demo/libs/PaddleLite/arm64-v8a/lib/libpaddle_light_api_shared.so文件。
- 将编译生成的build.lite.android.armv8.gcc/inference_lite_lib.android.armv8.apu/cxx/lib/libpaddle_light_api_shared.so替换PaddleLite-android-demo/libs/PaddleLite/arm64-v8a/lib/libpaddle_light_api_shared.so文件;
- 将编译生成的build.lite.android.armv7.gcc/inference_lite_lib.android.armv7.apu/cxx/include替换PaddleLite-android-demo/libs/PaddleLite/armeabi-v7a/include目录;
- 将编译生成的build.lite.android.armv7.gcc/inference_lite_lib.android.armv7.apu/cxx/lib/libpaddle_light_api_shared.so替换PaddleLite-android-demo/libs/PaddleLite/armeabi-v7a/lib/libpaddle_light_api_shared.so文件。
## 其它说明
......
......@@ -137,20 +137,26 @@ $ cd Paddle-Lite
$ git checkout <release-version-tag>
$ git clone https://github.com/airockchip/rknpu_ddk.git
```
- 编译full_publish and tiny_publish for RK1808 and RK1806 EVB
- 编译tiny_publish and full_publish for RK1808 and RK1806 EVB
```shell
For RK1808 EVB
$ ./lite/tools/build.sh --arm_os=armlinux --arm_abi=armv8 --arm_lang=gcc --build_extra=ON --with_log=ON --build_rknpu=ON --rknpu_ddk_root=./rknpu_ddk full_publish
$ ./lite/tools/build.sh --arm_os=armlinux --arm_abi=armv8 --arm_lang=gcc --build_extra=ON --with_log=ON --build_rknpu=ON --rknpu_ddk_root=./rknpu_ddk tiny_publish
tiny_publish
$ ./lite/tools/build_linux.sh --with_extra=ON --with_log=ON --with_rockchip_npu=ON --rockchip_npu_sdk_root=./rknpu_ddk
full_publish
$ ./lite/tools/build_linux.sh --with_extra=ON --with_log=ON --with_rockchip_npu=ON --rockchip_npu_sdk_root=./rknpu_ddk full_publish
For RK1806 EVB
$ ./lite/tools/build.sh --arm_os=armlinux --arm_abi=armv7 --arm_lang=gcc --build_extra=ON --with_log=ON --build_rknpu=ON --rknpu_ddk_root=./rknpu_ddk full_publish
$ ./lite/tools/build.sh --arm_os=armlinux --arm_abi=armv7 --arm_lang=gcc --build_extra=ON --with_log=ON --build_rknpu=ON --rknpu_ddk_root=./rknpu_ddk tiny_publish
tiny_publish
$ ./lite/tools/build_linux.sh --arch=armv7 --with_extra=ON --with_log=ON --with_rockchip_npu=ON --rockchip_npu_sdk_root=./rknpu_ddk
full_publish
$ ./lite/tools/build_linux.sh --arch=armv7 --with_extra=ON --with_log=ON --with_rockchip_npu=ON --rockchip_npu_sdk_root=./rknpu_ddk full_publish
```
- 将编译生成的build.lite.armlinux.armv8.gcc/inference_lite_lib.armlinux.armv8.rknpu/cxx/include替换PaddleLite-linux-demo/libs/PaddleLite/arm64/include目录;
- 将编译生成的build.lite.armlinux.armv8.gcc/inference_lite_lib.armlinux.armv8.rknpu/cxx/lib/libpaddle_light_api_shared.so替换PaddleLite-linux-demo/libs/PaddleLite/arm64/lib/libpaddle_light_api_shared.so文件;
- 将tiny_publish模式下编译生成的build.lite.armlinux.armv8.gcc/inference_lite_lib.armlinux.armv8.rknpu/cxx/lib/libpaddle_light_api_shared.so替换PaddleLite-linux-demo/libs/PaddleLite/arm64/lib/libpaddle_light_api_shared.so文件;
- 将full_publish模式下编译生成的build.lite.armlinux.armv8.gcc/inference_lite_lib.armlinux.armv8.rknpu/cxx/lib/libpaddle_full_api_shared.so替换PaddleLite-linux-demo/libs/PaddleLite/arm64/lib/libpaddle_full_api_shared.so文件;
- 将编译生成的build.lite.armlinux.armv7.gcc/inference_lite_lib.armlinux.armv7.rknpu/cxx/include替换PaddleLite-linux-demo/libs/PaddleLite/armhf/include目录;
- 将编译生成的build.lite.armlinux.armv7.gcc/inference_lite_lib.armlinux.armv7.rknpu/cxx/lib/libpaddle_light_api_shared.so替换PaddleLite-linux-demo/libs/PaddleLite/armhf/lib/libpaddle_light_api_shared.so文件。
- 将tiny_publish模式下编译生成的build.lite.armlinux.armv7.gcc/inference_lite_lib.armlinux.armv7.rknpu/cxx/lib/libpaddle_light_api_shared.so替换PaddleLite-linux-demo/libs/PaddleLite/armhf/lib/libpaddle_light_api_shared.so文件;
- 将full_publish模式下编译生成的build.lite.armlinux.armv7.gcc/inference_lite_lib.armlinux.armv7.rknpu/cxx/lib/libpaddle_full_api_shared.so替换PaddleLite-linux-demo/libs/PaddleLite/armhf/lib/libpaddle_full_api_shared.so文件。
## 其它说明
......
......@@ -32,30 +32,18 @@ int BatchNormConverter(void* ctx, OpLite* op, KernelBase* kernel) {
// Get input and output vars and op attributes
auto x_name = op_info->Input("X").front();
auto x_type = kernel->GetInputDeclType("X");
CHECK(x_type->layout() == DATALAYOUT(kNCHW));
auto x = scope->FindMutableTensor(x_name);
auto x_dims = x->dims();
auto scale_name = op_info->Input("Scale").front();
auto scale_type = kernel->GetInputDeclType("Scale");
CHECK(scale_type->layout() == DATALAYOUT(kNCHW));
auto scale = scope->FindMutableTensor(scale_name);
auto bias_name = op_info->Input("Bias").front();
auto bias_type = kernel->GetInputDeclType("Bias");
CHECK(bias_type->layout() == DATALAYOUT(kNCHW));
auto bias = scope->FindMutableTensor(bias_name);
auto mean_name = op_info->Input("Mean").front();
auto mean_type = kernel->GetInputDeclType("Mean");
CHECK(mean_type->layout() == DATALAYOUT(kNCHW));
auto mean = scope->FindMutableTensor(mean_name);
auto variance_name = op_info->Input("Variance").front();
auto variance_type = kernel->GetInputDeclType("Variance");
CHECK(variance_type->layout() == DATALAYOUT(kNCHW));
auto variance = scope->FindMutableTensor(variance_name);
auto y_name = op_info->Output("Y").front();
auto y_type = kernel->GetOutputDeclType("Y");
auto y = scope->FindMutableTensor(y_name);
CHECK(y_type->layout() == DATALAYOUT(kNCHW));
float momentum = op_info->GetAttr<float>("momentum");
float epsilon = op_info->GetAttr<float>("epsilon");
int mode = 1; // bnScale, bnBias tensor dims are 1xCx1x1
......@@ -71,9 +59,11 @@ int BatchNormConverter(void* ctx, OpLite* op, KernelBase* kernel) {
if (op_info->HasAttr("enable_int8")) {
enable_int8 = op_info->GetAttr<bool>("enable_int8");
input_scale = op_info->GetAttr<float>("input_scale");
CHECK(op_info->HasInputScale(x_name));
input_scale = op_info->GetInputScale(x_name)[0];
bit_length = op_info->GetAttr<int>("bit_length");
output_scale = op_info->GetAttr<float>("output_scale");
CHECK(op_info->HasOutputScale(y_name));
output_scale = op_info->GetOutputScale(y_name)[0];
if (enable_int8) {
precision = PRECISION(kInt8);
......
......@@ -32,9 +32,7 @@ int ConcatConverter(void* ctx, OpLite* op, KernelBase* kernel) {
// Get input and output vars and op attributes
auto x_names = op_info->Input("X");
auto x_type = kernel->GetInputDeclType("X");
auto out_name = op_info->Output("Out").front();
auto out_type = kernel->GetOutputDeclType("Out");
auto output = scope->FindMutableTensor(out_name);
auto axis = op_info->GetAttr<int>("axis");
......@@ -50,9 +48,9 @@ int ConcatConverter(void* ctx, OpLite* op, KernelBase* kernel) {
if (op_info->HasAttr("enable_int8")) {
enable_int8 = op_info->GetAttr<bool>("enable_int8");
input_scale = op_info->GetAttr<float>("input_scale");
bit_length = op_info->GetAttr<int>("bit_length");
output_scale = op_info->GetAttr<float>("output_scale");
CHECK(op_info->HasOutputScale(out_name));
output_scale = op_info->GetOutputScale(out_name)[0];
if (enable_int8) {
precision = PRECISION(kInt8);
......@@ -77,12 +75,13 @@ int ConcatConverter(void* ctx, OpLite* op, KernelBase* kernel) {
qnt.enable_int8 = enable_int8;
if (enable_int8) {
CHECK(op_info->HasInputScale(x_name));
input_scale = op_info->GetInputScale(x_name)[0];
qnt.quant_bits = bit_length;
qnt.scale.push_back(input_scale);
x->mutable_data<int8_t>();
}
x_node =
graph->Add(x_name, *x, x_type->precision(), x_type->layout(), qnt);
x_node = graph->Add(x_name, *x, precision, layout, qnt);
}
inputs.push_back(x_node->data());
......
......@@ -59,7 +59,8 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
CHECK_EQ(dilations.size(), 2L);
// Check depthwise mode
bool is_depthwise_mode = (ic == groups && oc == groups && groups != 1);
auto weight_scale = op_info->GetAttr<std::vector<float>>("weight_scale");
CHECK(op_info->HasInputScale(filter_name));
auto weight_scale = op_info->GetInputScale(filter_name);
// for quantization
bool enable_int8 = false;
......@@ -71,9 +72,11 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
if (op_info->HasAttr("enable_int8")) {
enable_int8 = op_info->GetAttr<bool>("enable_int8");
input_scale = op_info->GetAttr<float>("input_scale");
CHECK(op_info->HasInputScale(input_name));
input_scale = op_info->GetInputScale(input_name)[0];
bit_length = op_info->GetAttr<int>("bit_length");
output_scale = op_info->GetAttr<float>("output_scale");
CHECK(op_info->HasOutputScale(output_name));
output_scale = op_info->GetOutputScale(output_name)[0];
if (enable_int8) {
precision = PRECISION(kInt8);
......
......@@ -56,11 +56,9 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) {
// Get input and output vars and op attributes
auto x_name = op_info->Input("X").front();
auto x_type = kernel->GetInputDeclType("X");
auto x = scope->FindMutableTensor(x_name);
auto x_dims = x->dims();
auto y_name = op_info->Input("Y").front();
auto y_type = kernel->GetInputDeclType("Y");
auto y = scope->FindMutableTensor(y_name);
auto y_dims = y->dims();
auto out_name = op_info->Output("Out").front();
......@@ -78,9 +76,11 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) {
if (op_info->HasAttr("enable_int8")) {
enable_int8 = op_info->GetAttr<bool>("enable_int8");
input_scale = op_info->GetAttr<float>("input_scale");
CHECK(op_info->HasInputScale(x_name));
input_scale = op_info->GetInputScale(x_name)[0];
bit_length = op_info->GetAttr<int>("bit_length");
output_scale = op_info->GetAttr<float>("output_scale");
CHECK(op_info->HasOutputScale(out_name));
output_scale = op_info->GetOutputScale(out_name)[0];
if (enable_int8) {
precision = PRECISION(kInt8);
......@@ -100,7 +100,7 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) {
qnt.scale.push_back(input_scale);
qnt.quant_bits = op_info->GetAttr<int>("bit_length");
}
x_node = graph->Add(x_name, *x, x_type->precision(), x_type->layout(), qnt);
x_node = graph->Add(x_name, *x, precision, layout, qnt);
}
// Y node
......@@ -118,7 +118,7 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) {
qnt.scale.clear();
qnt.scale.push_back(input_scale);
}
y_node = graph->Add(y_name, *y, y_type->precision(), y_type->layout(), qnt);
y_node = graph->Add(y_name, *y, precision, layout, qnt);
}
std::shared_ptr<Node> output_node = nullptr;
......@@ -133,8 +133,7 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) {
output->mutable_data<int8_t>();
}
output_node = graph->Add(
out_name, *output, x_type->precision(), x_type->layout(), output_qnt);
output_node = graph->Add(out_name, *output, precision, layout, output_qnt);
std::vector<std::shared_ptr<rk::nn::Tensor>> inputs;
std::vector<std::shared_ptr<rk::nn::Tensor>> outputs;
......
......@@ -31,17 +31,14 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
VLOG(3) << "[RKNPU] Converting " + op_type + "...";
auto input_name = op_info->Input("Input").front();
auto input_type = kernel->GetInputDeclType("Input");
auto input = scope->FindMutableTensor(input_name);
auto input_dims = input->dims();
CHECK_GE(input_dims.size(), 2UL);
auto w_name = op_info->Input("W").front();
auto w_type = kernel->GetInputDeclType("W");
auto w = scope->FindMutableTensor(w_name);
auto w_dims = w->dims();
CHECK_EQ(w_dims.size(), 2UL);
auto out_name = op_info->Output("Out").front();
auto out_type = kernel->GetOutputDeclType("Out");
auto output = scope->FindMutableTensor(out_name);
int in_num_col_dims = op_info->GetAttr<int>("in_num_col_dims");
int m = input_dims.Slice(0, in_num_col_dims).production();
......@@ -61,9 +58,11 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
if (op_info->HasAttr("enable_int8")) {
enable_int8 = op_info->GetAttr<bool>("enable_int8");
input_scale = op_info->GetAttr<float>("input_scale");
CHECK(op_info->HasInputScale(input_name));
input_scale = op_info->GetInputScale(input_name)[0];
bit_length = op_info->GetAttr<int>("bit_length");
output_scale = op_info->GetAttr<float>("output_scale");
CHECK(op_info->HasOutputScale(out_name));
output_scale = op_info->GetOutputScale(out_name)[0];
if (enable_int8) {
precision = PRECISION(kInt8);
}
......@@ -86,7 +85,8 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
if (enable_int8) {
QuantizationInfo filter_qnt;
auto weight_scale = op_info->GetAttr<std::vector<float>>("weight_scale");
CHECK(op_info->HasInputScale(w_name));
auto weight_scale = op_info->GetInputScale(w_name);
filter_qnt.enable_int8 = enable_int8;
filter_qnt.scale = weight_scale;
filter_qnt.quant_bits = bit_length;
......@@ -99,8 +99,8 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
transpose_w_data[j * k + i] = w_data[i * n + j];
}
}
trans_w_node = graph->Add(
w_name, *transpose_w, precision, w_type->layout(), filter_qnt);
trans_w_node =
graph->Add(w_name, *transpose_w, precision, layout, filter_qnt);
} else {
auto transpose_w_data = transpose_w->mutable_data<float>();
auto w_data = w->mutable_data<float>();
......@@ -110,8 +110,7 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
transpose_w_data[j * k + i] = w_data[i * n + j];
}
}
trans_w_node =
graph->Add(w_name, *transpose_w, precision, w_type->layout());
trans_w_node = graph->Add(w_name, *transpose_w, precision, layout);
}
// Add bias node if bias tensor exists
......@@ -132,8 +131,8 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
if (enable_int8) {
auto bias_name_qnt = bias_name + "/qnt";
auto* bias_qnt = scope->NewTensor(bias_name_qnt);
auto weight_scale =
op_info->GetAttr<std::vector<float>>("weight_scale");
CHECK(op_info->HasInputScale(w_name));
auto weight_scale = op_info->GetInputScale(w_name);
bias_qnt->Resize(bias_shape);
bias_qnt->set_persistable(true);
......@@ -176,7 +175,8 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
bias->set_persistable(true);
if (enable_int8) {
auto weight_scale = op_info->GetAttr<std::vector<float>>("weight_scale");
CHECK(op_info->HasInputScale(w_name));
auto weight_scale = op_info->GetInputScale(w_name);
bias->set_precision(PrecisionType::kInt32);
auto* bias_data = bias->mutable_data<int32_t>();
......
......@@ -55,9 +55,11 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) {
if (x->precision() == PRECISION(kInt8)) {
// enable_int8 = op_info->GetAttr<bool>("enable_int8");
enable_int8 = true;
input_scale = op_info->GetAttr<float>("input_scale");
CHECK(op_info->HasInputScale(x_name));
input_scale = op_info->GetInputScale(x_name)[0];
bit_length = op_info->GetAttr<int>("bit_length");
output_scale = op_info->GetAttr<float>("output_scale");
CHECK(op_info->HasOutputScale(out_name));
output_scale = op_info->GetOutputScale(out_name)[0];
if (enable_int8) {
precision = PRECISION(kInt8);
......@@ -132,18 +134,16 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) {
ceil_mode = op_info->GetAttr<bool>("ceil_mode") ? 1 : 0;
}
std::shared_ptr<Node> output_node = nullptr;
QuantizationInfo output_qnt;
output_qnt.enable_int8 = enable_int8;
if (enable_int8) {
output_qnt.quant_bits = bit_length;
output_qnt.scale.push_back(output_scale);
output->mutable_data<int8_t>();
}
output_node = graph->Add(out_name, *output, precision, layout, output_qnt);
auto output_node =
graph->Add(out_name, *output, precision, layout, output_qnt);
std::vector<std::shared_ptr<rk::nn::Tensor>> inputs;
std::vector<std::shared_ptr<rk::nn::Tensor>> outputs;
......
......@@ -32,14 +32,10 @@ int SoftmaxConverter(void* ctx, OpLite* op, KernelBase* kernel) {
// Get input and output vars and op attributes
auto x_name = op_info->Input("X").front();
auto x_type = kernel->GetInputDeclType("X");
CHECK(x_type->layout() == DATALAYOUT(kNCHW));
auto x = scope->FindMutableTensor(x_name);
auto x_dims = x->dims();
auto x_rank = x_dims.size();
auto out_name = op_info->Output("Out").front();
auto out_type = kernel->GetOutputDeclType("Out");
CHECK(out_type->layout() == DATALAYOUT(kNCHW));
auto output = scope->FindMutableTensor(out_name);
auto axis = op_info->GetAttr<int>("axis");
if (axis < 0) {
......@@ -56,9 +52,11 @@ int SoftmaxConverter(void* ctx, OpLite* op, KernelBase* kernel) {
if (op_info->HasAttr("enable_int8")) {
enable_int8 = op_info->GetAttr<bool>("enable_int8");
input_scale = op_info->GetAttr<float>("input_scale");
CHECK(op_info->HasInputScale(x_name));
input_scale = op_info->GetInputScale(x_name)[0];
bit_length = op_info->GetAttr<int>("bit_length");
output_scale = op_info->GetAttr<float>("output_scale");
CHECK(op_info->HasOutputScale(out_name));
output_scale = op_info->GetOutputScale(out_name)[0];
if (enable_int8) {
precision = PRECISION(kInt8);
......
......@@ -25,6 +25,9 @@ WITH_STRIP=OFF
# options of compiling NPU lib.
WITH_HUAWEI_KIRIN_NPU=OFF
HUAWEI_KIRIN_NPU_SDK_ROOT="$(pwd)/ai_ddk_lib/" # Download HiAI DDK from https://developer.huawei.com/consumer/cn/hiai/
# options of compiling APU lib.
WITH_MEDIATEK_APU=OFF
MEDIATEK_APU_SDK_ROOT="$(pwd)/apu_ddk" # Download APU SDK from https://paddlelite-demo.bj.bcebos.com/devices/mediatek/apu_ddk.tar.gz
# options of compiling OPENCL lib.
WITH_OPENCL=OFF
# options of adding training ops
......@@ -154,6 +157,8 @@ function make_tiny_publish_so {
-DLITE_WITH_CV=$WITH_CV \
-DLITE_WITH_NPU=$WITH_HUAWEI_KIRIN_NPU \
-DNPU_DDK_ROOT=$HUAWEI_KIRIN_NPU_SDK_ROOT \
-DLITE_WITH_APU=$WITH_MEDIATEK_APU \
-DAPU_DDK_ROOT=$MEDIATEK_APU_SDK_ROOT \
-DLITE_WITH_OPENCL=$WITH_OPENCL \
-DARM_TARGET_ARCH_ABI=$ARCH \
-DARM_TARGET_LANG=$TOOLCHAIN \
......@@ -204,6 +209,8 @@ function make_full_publish_so {
-DLITE_WITH_CV=$WITH_CV \
-DLITE_WITH_NPU=$WITH_HUAWEI_KIRIN_NPU \
-DNPU_DDK_ROOT=$HUAWEI_KIRIN_NPU_SDK_ROOT \
-DLITE_WITH_APU=$WITH_MEDIATEK_APU \
-DAPU_DDK_ROOT=$MEDIATEK_APU_SDK_ROOT \
-DLITE_WITH_OPENCL=$WITH_OPENCL \
-DARM_TARGET_ARCH_ABI=$ARCH \
-DARM_TARGET_LANG=$TOOLCHAIN \
......@@ -257,6 +264,13 @@ function print_usage {
echo -e "| you can download huawei HiAi DDK from: https://developer.huawei.com/consumer/cn/hiai/ |"
echo -e "| detailed information about Paddle-Lite NPU: https://paddle-lite.readthedocs.io/zh/latest/demo_guides/npu.html |"
echo -e "| |"
echo -e "| arguments of apu library compiling:(armv8, gcc, c++_static) |"
echo -e "| ./lite/tools/build_android.sh --with_mediatek_apu=ON --mediatek_apu_sdk_root=YourApuSdkPath |"
echo -e "| --with_mediatek_apu: (OFF|ON); controls whether to compile lib for mediatek_apu, default is OFF |"
echo -e "| --mediatek_apu_sdk_root: (path to mediatek APU SDK file) required when compiling apu library |"
echo -e "| you can download mediatek APU SDK from: https://paddlelite-demo.bj.bcebos.com/devices/mediatek/apu_ddk.tar.gz |"
echo -e "| detailed information about Paddle-Lite APU: https://paddle-lite.readthedocs.io/zh/latest/demo_guides/mediatek_apu.html |"
echo -e "| |"
echo -e "| arguments of opencl library compiling:(armv8, gcc, c++_static) |"
echo -e "| ./lite/tools/build_android.sh --with_opencl=ON |"
echo -e "| --with_opencl: (OFF|ON); controls whether to compile lib for opencl, default is OFF |"
......@@ -351,6 +365,15 @@ function main {
HUAWEI_KIRIN_NPU_SDK_ROOT="${i#*=}"
shift
;;
# compiling lib which can operate on mediatek apu.
--with_mediatek_apu=*)
WITH_MEDIATEK_APU="${i#*=}"
shift
;;
--mediatek_apu_sdk_root=*)
MEDIATEK_APU_SDK_ROOT="${i#*=}"
shift
;;
# compiling result contains both light_api and cxx_api lib.
full_publish)
make_full_publish_so
......
......@@ -26,7 +26,7 @@ OPTMODEL_DIR=""
WITH_OPENCL=OFF
# options of compiling rockchip NPU lib.
WITH_ROCKCHIP_NPU=OFF
ROCKCHIP_NPU_SDK_ROOT=""
ROCKCHIP_NPU_SDK_ROOT="$(pwd)/rknpu_ddk" # Download RKNPU SDK from https://github.com/airockchip/rknpu_ddk.git
# options of compiling baidu XPU lib.
WITH_BAIDU_XPU=OFF
BAIDU_XPU_SDK_ROOT=""
......@@ -229,6 +229,8 @@ function print_usage {
echo -e "| ./lite/tools/build_linux.sh --with_rockchip_npu=ON --rockchip_npu_sdk_root=YourRockchipNpuSdkPath |"
echo -e "| --with_rockchip_npu: (OFF|ON); controls whether to compile lib for rockchip_npu, default is OFF |"
echo -e "| --rockchip_npu_sdk_root: (path to rockchip_npu DDK file) required when compiling rockchip_npu library |"
echo -e "| you can download rockchip NPU SDK from: https://github.com/airockchip/rknpu_ddk.git |"
echo -e "| detailed information about Paddle-Lite RKNPU: https://paddle-lite.readthedocs.io/zh/latest/demo_guides/rockchip_npu.html |"
echo -e "| |"
echo -e "| arguments of baidu xpu library compiling: |"
echo -e "| ./lite/tools/build_linux.sh --with_baidu_xpu=ON --baidu_xpu_sdk_root=YourBaiduXpuSdkPath |"
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册