提交 4b0a4230 编写于 作者: 开心的小妮's avatar 开心的小妮

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle-Lite into fix-opencl-concat

......@@ -159,12 +159,18 @@ $ git checkout <release-version-tag>
$ wget https://paddlelite-demo.bj.bcebos.com/devices/mediatek/apu_ddk.tar.gz
$ tar -xvf apu_ddk.tar.gz
```
- 编译tiny_publish for MT8168-P2V1 Tablet
- 编译tiny_publish for MT8168-P2V1 Tablet and Smart TVs(S900)
```shell
$ ./lite/tools/build.sh --arm_os=android --arm_abi=armv8 --arm_lang=gcc --android_stl=c++_shared --build_extra=ON --with_log=ON --build_apu=ON --apu_ddk_root=./apu_ddk tiny_publish
For MT8168-P2V1 Tablet
$ ./lite/tools/build_android.sh --android_stl=c++_shared --with_extra=ON --with_log=ON --with_mediatek_apu=ON --mediatek_apu_sdk_root=./apu_ddk
For Smart TVs(S900)
$ ./lite/tools/build_android.sh --arch=armv7 --android_stl=c++_shared --with_extra=ON --with_log=ON --with_mediatek_apu=ON --mediatek_apu_sdk_root=./apu_ddk
```
- 将编译生成的build.lite.android.armv8.gcc/inference_lite_lib.android.armv8.apu/cxx/include替换PaddleLite-android-demo/libs/PaddleLite/arm64-v8a/include目录;
- 将编译生成的build.lite.android.armv8.gcc/inference_lite_lib.android.armv8.apu/cxx/lib/libpaddle_light_api_shared.so替换PaddleLite-android-demo/libs/PaddleLite/arm64-v8a/lib/libpaddle_light_api_shared.so文件。
- 将编译生成的build.lite.android.armv8.gcc/inference_lite_lib.android.armv8.apu/cxx/lib/libpaddle_light_api_shared.so替换PaddleLite-android-demo/libs/PaddleLite/arm64-v8a/lib/libpaddle_light_api_shared.so文件;
- 将编译生成的build.lite.android.armv7.gcc/inference_lite_lib.android.armv7.apu/cxx/include替换PaddleLite-android-demo/libs/PaddleLite/armeabi-v7a/include目录;
- 将编译生成的build.lite.android.armv7.gcc/inference_lite_lib.android.armv7.apu/cxx/lib/libpaddle_light_api_shared.so替换PaddleLite-android-demo/libs/PaddleLite/armeabi-v7a/lib/libpaddle_light_api_shared.so文件。
## 其它说明
......
......@@ -137,20 +137,26 @@ $ cd Paddle-Lite
$ git checkout <release-version-tag>
$ git clone https://github.com/airockchip/rknpu_ddk.git
```
- 编译full_publish and tiny_publish for RK1808 and RK1806 EVB
- 编译tiny_publish and full_publish for RK1808 and RK1806 EVB
```shell
For RK1808 EVB
$ ./lite/tools/build.sh --arm_os=armlinux --arm_abi=armv8 --arm_lang=gcc --build_extra=ON --with_log=ON --build_rknpu=ON --rknpu_ddk_root=./rknpu_ddk full_publish
$ ./lite/tools/build.sh --arm_os=armlinux --arm_abi=armv8 --arm_lang=gcc --build_extra=ON --with_log=ON --build_rknpu=ON --rknpu_ddk_root=./rknpu_ddk tiny_publish
tiny_publish
$ ./lite/tools/build_linux.sh --with_extra=ON --with_log=ON --with_rockchip_npu=ON --rockchip_npu_sdk_root=./rknpu_ddk
full_publish
$ ./lite/tools/build_linux.sh --with_extra=ON --with_log=ON --with_rockchip_npu=ON --rockchip_npu_sdk_root=./rknpu_ddk full_publish
For RK1806 EVB
$ ./lite/tools/build.sh --arm_os=armlinux --arm_abi=armv7 --arm_lang=gcc --build_extra=ON --with_log=ON --build_rknpu=ON --rknpu_ddk_root=./rknpu_ddk full_publish
$ ./lite/tools/build.sh --arm_os=armlinux --arm_abi=armv7 --arm_lang=gcc --build_extra=ON --with_log=ON --build_rknpu=ON --rknpu_ddk_root=./rknpu_ddk tiny_publish
tiny_publish
$ ./lite/tools/build_linux.sh --arch=armv7 --with_extra=ON --with_log=ON --with_rockchip_npu=ON --rockchip_npu_sdk_root=./rknpu_ddk
full_publish
$ ./lite/tools/build_linux.sh --arch=armv7 --with_extra=ON --with_log=ON --with_rockchip_npu=ON --rockchip_npu_sdk_root=./rknpu_ddk full_publish
```
- 将编译生成的build.lite.armlinux.armv8.gcc/inference_lite_lib.armlinux.armv8.rknpu/cxx/include替换PaddleLite-linux-demo/libs/PaddleLite/arm64/include目录;
- 将编译生成的build.lite.armlinux.armv8.gcc/inference_lite_lib.armlinux.armv8.rknpu/cxx/lib/libpaddle_light_api_shared.so替换PaddleLite-linux-demo/libs/PaddleLite/arm64/lib/libpaddle_light_api_shared.so文件;
- 将tiny_publish模式下编译生成的build.lite.armlinux.armv8.gcc/inference_lite_lib.armlinux.armv8.rknpu/cxx/lib/libpaddle_light_api_shared.so替换PaddleLite-linux-demo/libs/PaddleLite/arm64/lib/libpaddle_light_api_shared.so文件;
- 将full_publish模式下编译生成的build.lite.armlinux.armv8.gcc/inference_lite_lib.armlinux.armv8.rknpu/cxx/lib/libpaddle_full_api_shared.so替换PaddleLite-linux-demo/libs/PaddleLite/arm64/lib/libpaddle_full_api_shared.so文件;
- 将编译生成的build.lite.armlinux.armv7.gcc/inference_lite_lib.armlinux.armv7.rknpu/cxx/include替换PaddleLite-linux-demo/libs/PaddleLite/armhf/include目录;
- 将编译生成的build.lite.armlinux.armv7.gcc/inference_lite_lib.armlinux.armv7.rknpu/cxx/lib/libpaddle_light_api_shared.so替换PaddleLite-linux-demo/libs/PaddleLite/armhf/lib/libpaddle_light_api_shared.so文件。
- 将tiny_publish模式下编译生成的build.lite.armlinux.armv7.gcc/inference_lite_lib.armlinux.armv7.rknpu/cxx/lib/libpaddle_light_api_shared.so替换PaddleLite-linux-demo/libs/PaddleLite/armhf/lib/libpaddle_light_api_shared.so文件;
- 将full_publish模式下编译生成的build.lite.armlinux.armv7.gcc/inference_lite_lib.armlinux.armv7.rknpu/cxx/lib/libpaddle_full_api_shared.so替换PaddleLite-linux-demo/libs/PaddleLite/armhf/lib/libpaddle_full_api_shared.so文件。
## 其它说明
......
......@@ -25,6 +25,8 @@
#include "lite/core/profile/basic_profiler.h"
#endif // LITE_WITH_PROFILE
#include <gflags/gflags.h>
#include "lite/api/paddle_use_kernels.h"
#include "lite/api/paddle_use_ops.h"
using paddle::lite::profile::Timer;
......
......@@ -137,10 +137,11 @@ void ElementwiseSubCompute::Run() {
auto x_dims = param.X->dims();
auto y_dims = param.Y->dims();
int pre, n, post;
if (x_dims.size() < y_dims.size()) {
LOG(FATAL) << "elewise div don't support x_dims size < y_dims size";
}
if (is_broadcast(x_dims, y_dims, axis, &pre, &n, &post)) {
if (x_dims.size() < y_dims.size() &&
is_broadcast(y_dims, x_dims, axis, &pre, &n, &post)) {
lite::arm::math::elementwise_sub_broadcast(
y_data, x_data, out_data, pre, n, post);
} else if (is_broadcast(x_dims, y_dims, axis, &pre, &n, &post)) {
lite::arm::math::elementwise_sub_broadcast(
x_data, y_data, out_data, pre, n, post);
} else {
......@@ -158,24 +159,21 @@ void ElementwiseSubActivationCompute::Run() {
std::string act_type = param.act_type;
auto x_dims = param.X->dims();
auto y_dims = param.Y->dims();
if (x_dims.size() < y_dims.size()) {
LOG(FATAL) << "elewise div don't support x_dims size < y_dims size";
}
int pre, n, post;
if (is_broadcast(x_dims, y_dims, axis, &pre, &n, &post)) {
if (act_type == "relu") {
lite::arm::math::elementwise_sub_relu_broadcast(
x_data, y_data, out_data, pre, n, post);
} else {
LOG(FATAL) << "unsupported Activation type: " << act_type;
}
if (act_type != "relu") {
LOG(FATAL) << "unsupported Activation type: " << act_type;
}
if (x_dims.size() < y_dims.size() &&
is_broadcast(y_dims, x_dims, axis, &pre, &n, &post)) {
lite::arm::math::elementwise_sub_relu_broadcast(
y_data, x_data, out_data, pre, n, post);
} else if (is_broadcast(x_dims, y_dims, axis, &pre, &n, &post)) {
lite::arm::math::elementwise_sub_relu_broadcast(
x_data, y_data, out_data, pre, n, post);
} else {
if (act_type == "relu") {
lite::arm::math::elementwise_sub_relu(
x_data, y_data, out_data, x_dims.production());
} else {
LOG(FATAL) << "unsupported Activation type: " << act_type;
}
lite::arm::math::elementwise_sub_relu(
x_data, y_data, out_data, x_dims.production());
}
}
......
......@@ -7,6 +7,7 @@ add_kernel(squeeze_compute_host Host basic SRCS squeeze_compute.cc DEPS ${lite_k
add_kernel(unsqueeze_compute_host Host basic SRCS unsqueeze_compute.cc DEPS ${lite_kernel_deps})
add_kernel(multiclass_nms_compute_host Host basic SRCS multiclass_nms_compute.cc DEPS ${lite_kernel_deps})
add_kernel(expand_compute_host Host basic SRCS expand_compute.cc DEPS ${lite_kernel_deps})
add_kernel(expand_as_compute_host Host basic SRCS expand_as_compute.cc DEPS ${lite_kernel_deps})
add_kernel(shape_compute_host Host extra SRCS shape_compute.cc DEPS ${lite_kernel_deps})
add_kernel(is_empty_compute_host Host extra SRCS is_empty_compute.cc DEPS ${lite_kernel_deps})
add_kernel(crf_decoding_compute_host Host extra SRCS crf_decoding_compute.cc DEPS ${lite_kernel_deps})
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/host/expand_as_compute.h"
#include <vector>
namespace paddle {
namespace lite {
namespace kernels {
namespace host {
template <typename T, PrecisionType PType>
void ExpandAsCompute<T, PType>::Run() {
auto& param = this->template Param<operators::ExpandAsParam>();
const auto* x = param.X;
auto* out = param.Out;
const auto* target = param.Target;
std::vector<int> expand_times;
const T* src = x->template data<T>();
T* dst = out->template mutable_data<T>();
// int dims = expand_times.size();
for (int i = 0; i < target->dims().size(); ++i) {
int times = target->dims()[i] / x->dims()[i];
expand_times.push_back(times);
}
int dims = target->dims().size();
DDim in_shape = x->dims();
int inner_num = 1;
int pos = dims - 1;
int outer_num = in_shape.count(0, pos);
inner_num *= in_shape[pos];
for (int j = 0; j < outer_num; ++j) {
for (int k = 0; k < expand_times[pos]; ++k) {
memcpy(dst + (j * expand_times[pos] + k) * inner_num,
src + j * inner_num,
sizeof(T) * inner_num);
}
}
inner_num *= expand_times[pos];
for (int i = dims - 2; i >= 0; --i) {
int outer_num = in_shape.count(0, i);
inner_num *= in_shape[i];
for (int j = outer_num - 1; j >= 0; --j) {
for (int k = expand_times[i] - 1; k >= 0; --k) {
memcpy(dst + (j * expand_times[i] + k) * inner_num,
dst + j * inner_num,
sizeof(T) * inner_num);
}
}
inner_num *= expand_times[i];
}
}
} // namespace host
} // namespace kernels
} // namespace lite
} // namespace paddle
using expand_as_float =
paddle::lite::kernels::host::ExpandAsCompute<float, PRECISION(kFloat)>;
REGISTER_LITE_KERNEL(expand_as, kHost, kFloat, kAny, expand_as_float, def)
.BindInput("X",
{LiteType::GetTensorTy(TARGET(kHost),
PRECISION(kFloat),
DATALAYOUT(kAny))})
.BindInput("Target",
{LiteType::GetTensorTy(TARGET(kHost),
PRECISION(kFloat),
DATALAYOUT(kAny))})
.BindOutput("Out",
{LiteType::GetTensorTy(TARGET(kHost),
PRECISION(kFloat),
DATALAYOUT(kAny))})
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "lite/core/kernel.h"
#include "lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace host {
template <typename T, PrecisionType PType>
class ExpandAsCompute
: public KernelLite<TARGET(kHost), PType, DATALAYOUT(kAny)> {
public:
void Run() override;
virtual ~ExpandAsCompute() = default;
};
} // namespace host
} // namespace kernels
} // namespace lite
} // namespace paddle
......@@ -32,30 +32,18 @@ int BatchNormConverter(void* ctx, OpLite* op, KernelBase* kernel) {
// Get input and output vars and op attributes
auto x_name = op_info->Input("X").front();
auto x_type = kernel->GetInputDeclType("X");
CHECK(x_type->layout() == DATALAYOUT(kNCHW));
auto x = scope->FindMutableTensor(x_name);
auto x_dims = x->dims();
auto scale_name = op_info->Input("Scale").front();
auto scale_type = kernel->GetInputDeclType("Scale");
CHECK(scale_type->layout() == DATALAYOUT(kNCHW));
auto scale = scope->FindMutableTensor(scale_name);
auto bias_name = op_info->Input("Bias").front();
auto bias_type = kernel->GetInputDeclType("Bias");
CHECK(bias_type->layout() == DATALAYOUT(kNCHW));
auto bias = scope->FindMutableTensor(bias_name);
auto mean_name = op_info->Input("Mean").front();
auto mean_type = kernel->GetInputDeclType("Mean");
CHECK(mean_type->layout() == DATALAYOUT(kNCHW));
auto mean = scope->FindMutableTensor(mean_name);
auto variance_name = op_info->Input("Variance").front();
auto variance_type = kernel->GetInputDeclType("Variance");
CHECK(variance_type->layout() == DATALAYOUT(kNCHW));
auto variance = scope->FindMutableTensor(variance_name);
auto y_name = op_info->Output("Y").front();
auto y_type = kernel->GetOutputDeclType("Y");
auto y = scope->FindMutableTensor(y_name);
CHECK(y_type->layout() == DATALAYOUT(kNCHW));
float momentum = op_info->GetAttr<float>("momentum");
float epsilon = op_info->GetAttr<float>("epsilon");
int mode = 1; // bnScale, bnBias tensor dims are 1xCx1x1
......@@ -71,9 +59,11 @@ int BatchNormConverter(void* ctx, OpLite* op, KernelBase* kernel) {
if (op_info->HasAttr("enable_int8")) {
enable_int8 = op_info->GetAttr<bool>("enable_int8");
input_scale = op_info->GetAttr<float>("input_scale");
CHECK(op_info->HasInputScale(x_name));
input_scale = op_info->GetInputScale(x_name)[0];
bit_length = op_info->GetAttr<int>("bit_length");
output_scale = op_info->GetAttr<float>("output_scale");
CHECK(op_info->HasOutputScale(y_name));
output_scale = op_info->GetOutputScale(y_name)[0];
if (enable_int8) {
precision = PRECISION(kInt8);
......
......@@ -32,9 +32,7 @@ int ConcatConverter(void* ctx, OpLite* op, KernelBase* kernel) {
// Get input and output vars and op attributes
auto x_names = op_info->Input("X");
auto x_type = kernel->GetInputDeclType("X");
auto out_name = op_info->Output("Out").front();
auto out_type = kernel->GetOutputDeclType("Out");
auto output = scope->FindMutableTensor(out_name);
auto axis = op_info->GetAttr<int>("axis");
......@@ -50,9 +48,9 @@ int ConcatConverter(void* ctx, OpLite* op, KernelBase* kernel) {
if (op_info->HasAttr("enable_int8")) {
enable_int8 = op_info->GetAttr<bool>("enable_int8");
input_scale = op_info->GetAttr<float>("input_scale");
bit_length = op_info->GetAttr<int>("bit_length");
output_scale = op_info->GetAttr<float>("output_scale");
CHECK(op_info->HasOutputScale(out_name));
output_scale = op_info->GetOutputScale(out_name)[0];
if (enable_int8) {
precision = PRECISION(kInt8);
......@@ -77,12 +75,13 @@ int ConcatConverter(void* ctx, OpLite* op, KernelBase* kernel) {
qnt.enable_int8 = enable_int8;
if (enable_int8) {
CHECK(op_info->HasInputScale(x_name));
input_scale = op_info->GetInputScale(x_name)[0];
qnt.quant_bits = bit_length;
qnt.scale.push_back(input_scale);
x->mutable_data<int8_t>();
}
x_node =
graph->Add(x_name, *x, x_type->precision(), x_type->layout(), qnt);
x_node = graph->Add(x_name, *x, precision, layout, qnt);
}
inputs.push_back(x_node->data());
......
......@@ -59,7 +59,8 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
CHECK_EQ(dilations.size(), 2L);
// Check depthwise mode
bool is_depthwise_mode = (ic == groups && oc == groups && groups != 1);
auto weight_scale = op_info->GetAttr<std::vector<float>>("weight_scale");
CHECK(op_info->HasInputScale(filter_name));
auto weight_scale = op_info->GetInputScale(filter_name);
// for quantization
bool enable_int8 = false;
......@@ -71,9 +72,11 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
if (op_info->HasAttr("enable_int8")) {
enable_int8 = op_info->GetAttr<bool>("enable_int8");
input_scale = op_info->GetAttr<float>("input_scale");
CHECK(op_info->HasInputScale(input_name));
input_scale = op_info->GetInputScale(input_name)[0];
bit_length = op_info->GetAttr<int>("bit_length");
output_scale = op_info->GetAttr<float>("output_scale");
CHECK(op_info->HasOutputScale(output_name));
output_scale = op_info->GetOutputScale(output_name)[0];
if (enable_int8) {
precision = PRECISION(kInt8);
......
......@@ -56,11 +56,9 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) {
// Get input and output vars and op attributes
auto x_name = op_info->Input("X").front();
auto x_type = kernel->GetInputDeclType("X");
auto x = scope->FindMutableTensor(x_name);
auto x_dims = x->dims();
auto y_name = op_info->Input("Y").front();
auto y_type = kernel->GetInputDeclType("Y");
auto y = scope->FindMutableTensor(y_name);
auto y_dims = y->dims();
auto out_name = op_info->Output("Out").front();
......@@ -78,9 +76,11 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) {
if (op_info->HasAttr("enable_int8")) {
enable_int8 = op_info->GetAttr<bool>("enable_int8");
input_scale = op_info->GetAttr<float>("input_scale");
CHECK(op_info->HasInputScale(x_name));
input_scale = op_info->GetInputScale(x_name)[0];
bit_length = op_info->GetAttr<int>("bit_length");
output_scale = op_info->GetAttr<float>("output_scale");
CHECK(op_info->HasOutputScale(out_name));
output_scale = op_info->GetOutputScale(out_name)[0];
if (enable_int8) {
precision = PRECISION(kInt8);
......@@ -100,7 +100,7 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) {
qnt.scale.push_back(input_scale);
qnt.quant_bits = op_info->GetAttr<int>("bit_length");
}
x_node = graph->Add(x_name, *x, x_type->precision(), x_type->layout(), qnt);
x_node = graph->Add(x_name, *x, precision, layout, qnt);
}
// Y node
......@@ -118,7 +118,7 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) {
qnt.scale.clear();
qnt.scale.push_back(input_scale);
}
y_node = graph->Add(y_name, *y, y_type->precision(), y_type->layout(), qnt);
y_node = graph->Add(y_name, *y, precision, layout, qnt);
}
std::shared_ptr<Node> output_node = nullptr;
......@@ -133,8 +133,7 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) {
output->mutable_data<int8_t>();
}
output_node = graph->Add(
out_name, *output, x_type->precision(), x_type->layout(), output_qnt);
output_node = graph->Add(out_name, *output, precision, layout, output_qnt);
std::vector<std::shared_ptr<rk::nn::Tensor>> inputs;
std::vector<std::shared_ptr<rk::nn::Tensor>> outputs;
......
......@@ -31,17 +31,14 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
VLOG(3) << "[RKNPU] Converting " + op_type + "...";
auto input_name = op_info->Input("Input").front();
auto input_type = kernel->GetInputDeclType("Input");
auto input = scope->FindMutableTensor(input_name);
auto input_dims = input->dims();
CHECK_GE(input_dims.size(), 2UL);
auto w_name = op_info->Input("W").front();
auto w_type = kernel->GetInputDeclType("W");
auto w = scope->FindMutableTensor(w_name);
auto w_dims = w->dims();
CHECK_EQ(w_dims.size(), 2UL);
auto out_name = op_info->Output("Out").front();
auto out_type = kernel->GetOutputDeclType("Out");
auto output = scope->FindMutableTensor(out_name);
int in_num_col_dims = op_info->GetAttr<int>("in_num_col_dims");
int m = input_dims.Slice(0, in_num_col_dims).production();
......@@ -61,9 +58,11 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
if (op_info->HasAttr("enable_int8")) {
enable_int8 = op_info->GetAttr<bool>("enable_int8");
input_scale = op_info->GetAttr<float>("input_scale");
CHECK(op_info->HasInputScale(input_name));
input_scale = op_info->GetInputScale(input_name)[0];
bit_length = op_info->GetAttr<int>("bit_length");
output_scale = op_info->GetAttr<float>("output_scale");
CHECK(op_info->HasOutputScale(out_name));
output_scale = op_info->GetOutputScale(out_name)[0];
if (enable_int8) {
precision = PRECISION(kInt8);
}
......@@ -86,7 +85,8 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
if (enable_int8) {
QuantizationInfo filter_qnt;
auto weight_scale = op_info->GetAttr<std::vector<float>>("weight_scale");
CHECK(op_info->HasInputScale(w_name));
auto weight_scale = op_info->GetInputScale(w_name);
filter_qnt.enable_int8 = enable_int8;
filter_qnt.scale = weight_scale;
filter_qnt.quant_bits = bit_length;
......@@ -99,8 +99,8 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
transpose_w_data[j * k + i] = w_data[i * n + j];
}
}
trans_w_node = graph->Add(
w_name, *transpose_w, precision, w_type->layout(), filter_qnt);
trans_w_node =
graph->Add(w_name, *transpose_w, precision, layout, filter_qnt);
} else {
auto transpose_w_data = transpose_w->mutable_data<float>();
auto w_data = w->mutable_data<float>();
......@@ -110,8 +110,7 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
transpose_w_data[j * k + i] = w_data[i * n + j];
}
}
trans_w_node =
graph->Add(w_name, *transpose_w, precision, w_type->layout());
trans_w_node = graph->Add(w_name, *transpose_w, precision, layout);
}
// Add bias node if bias tensor exists
......@@ -132,8 +131,8 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
if (enable_int8) {
auto bias_name_qnt = bias_name + "/qnt";
auto* bias_qnt = scope->NewTensor(bias_name_qnt);
auto weight_scale =
op_info->GetAttr<std::vector<float>>("weight_scale");
CHECK(op_info->HasInputScale(w_name));
auto weight_scale = op_info->GetInputScale(w_name);
bias_qnt->Resize(bias_shape);
bias_qnt->set_persistable(true);
......@@ -176,7 +175,8 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
bias->set_persistable(true);
if (enable_int8) {
auto weight_scale = op_info->GetAttr<std::vector<float>>("weight_scale");
CHECK(op_info->HasInputScale(w_name));
auto weight_scale = op_info->GetInputScale(w_name);
bias->set_precision(PrecisionType::kInt32);
auto* bias_data = bias->mutable_data<int32_t>();
......
......@@ -55,9 +55,11 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) {
if (x->precision() == PRECISION(kInt8)) {
// enable_int8 = op_info->GetAttr<bool>("enable_int8");
enable_int8 = true;
input_scale = op_info->GetAttr<float>("input_scale");
CHECK(op_info->HasInputScale(x_name));
input_scale = op_info->GetInputScale(x_name)[0];
bit_length = op_info->GetAttr<int>("bit_length");
output_scale = op_info->GetAttr<float>("output_scale");
CHECK(op_info->HasOutputScale(out_name));
output_scale = op_info->GetOutputScale(out_name)[0];
if (enable_int8) {
precision = PRECISION(kInt8);
......@@ -132,18 +134,16 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) {
ceil_mode = op_info->GetAttr<bool>("ceil_mode") ? 1 : 0;
}
std::shared_ptr<Node> output_node = nullptr;
QuantizationInfo output_qnt;
output_qnt.enable_int8 = enable_int8;
if (enable_int8) {
output_qnt.quant_bits = bit_length;
output_qnt.scale.push_back(output_scale);
output->mutable_data<int8_t>();
}
output_node = graph->Add(out_name, *output, precision, layout, output_qnt);
auto output_node =
graph->Add(out_name, *output, precision, layout, output_qnt);
std::vector<std::shared_ptr<rk::nn::Tensor>> inputs;
std::vector<std::shared_ptr<rk::nn::Tensor>> outputs;
......
......@@ -32,14 +32,10 @@ int SoftmaxConverter(void* ctx, OpLite* op, KernelBase* kernel) {
// Get input and output vars and op attributes
auto x_name = op_info->Input("X").front();
auto x_type = kernel->GetInputDeclType("X");
CHECK(x_type->layout() == DATALAYOUT(kNCHW));
auto x = scope->FindMutableTensor(x_name);
auto x_dims = x->dims();
auto x_rank = x_dims.size();
auto out_name = op_info->Output("Out").front();
auto out_type = kernel->GetOutputDeclType("Out");
CHECK(out_type->layout() == DATALAYOUT(kNCHW));
auto output = scope->FindMutableTensor(out_name);
auto axis = op_info->GetAttr<int>("axis");
if (axis < 0) {
......@@ -56,9 +52,11 @@ int SoftmaxConverter(void* ctx, OpLite* op, KernelBase* kernel) {
if (op_info->HasAttr("enable_int8")) {
enable_int8 = op_info->GetAttr<bool>("enable_int8");
input_scale = op_info->GetAttr<float>("input_scale");
CHECK(op_info->HasInputScale(x_name));
input_scale = op_info->GetInputScale(x_name)[0];
bit_length = op_info->GetAttr<int>("bit_length");
output_scale = op_info->GetAttr<float>("output_scale");
CHECK(op_info->HasOutputScale(out_name));
output_scale = op_info->GetOutputScale(out_name)[0];
if (enable_int8) {
precision = PRECISION(kInt8);
......
......@@ -34,6 +34,7 @@ add_operator(fake_quant extra SRCS fake_quantize_moving_avg_max_abs.cc DEPS ${op
add_operator(fake_dequant extra SRCS fake_dequantize_max_abs.cc DEPS ${op_DEPS})
add_operator(conv_transpose_op basic SRCS conv_transpose_op.cc DEPS ${op_DEPS})
add_operator(expand_op_lite basic SRCS expand_op.cc DEPS ${op_DEPS})
add_operator(expand_as_op_lite basic SRCS expand_as_op.cc DEPS ${op_DEPS})
add_operator(squeeze_op_lite basic SRCS squeeze_op.cc DEPS ${op_DEPS})
add_operator(unsqueeze_op_lite basic SRCS unsqueeze_op.cc DEPS ${op_DEPS})
add_operator(stack_op basic SRCS stack_op.cc DEPS ${op_DEPS})
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/operators/expand_as_op.h"
#include "lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace operators {
bool ExpandAsOpLite::CheckShape() const {
CHECK_OR_FALSE(param_.X);
CHECK_OR_FALSE(param_.Target);
CHECK_OR_FALSE(param_.Out);
int target_size = param_.Target->dims().size();
int x_dims_size = param_.X->dims().size();
CHECK_EQ(target_size, x_dims_size)
<< "The number of expand_times size must be qual to the rank of "
"Input(X).";
CHECK_LE(param_.X->dims().size(), 6u)
<< "The rank of Input(X) must not be greater than 6.";
return true;
}
bool ExpandAsOpLite::InferShapeImpl() const {
DDim out_dims(param_.X->dims());
for (size_t i = 0; i < param_.Target->dims().size(); ++i) {
// out_dims[i] *= param_.expand_times[i];
out_dims[i] = param_.Target->dims()[i];
}
param_.Out->Resize(out_dims);
return true;
}
bool ExpandAsOpLite::AttachImpl(const cpp::OpDesc& opdesc, lite::Scope* scope) {
auto X_name = opdesc.Input("X").front();
auto Out_name = opdesc.Output("Out").front();
param_.X = GetVar<lite::Tensor>(scope, X_name);
param_.Out = GetMutableVar<lite::Tensor>(scope, Out_name);
auto Target_name = opdesc.Input("Target").front();
param_.Target = GetVar<lite::Tensor>(scope, Target_name);
return true;
}
} // namespace operators
} // namespace lite
} // namespace paddle
REGISTER_LITE_OP(expand_as, paddle::lite::operators::ExpandAsOpLite);
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <vector>
#include "lite/core/op_lite.h"
namespace paddle {
namespace lite {
namespace operators {
class ExpandAsOpLite : public OpLite {
public:
ExpandAsOpLite() {}
explicit ExpandAsOpLite(const std::string &op_type) : OpLite(op_type) {}
bool CheckShape() const override;
bool InferShapeImpl() const override;
bool AttachImpl(const cpp::OpDesc &opdesc, lite::Scope *scope) override;
void AttachKernel(KernelBase *kernel) override { kernel->SetParam(param_); }
std::string DebugString() const override { return "expand_as"; }
private:
mutable ExpandAsParam param_;
};
} // namespace operators
} // namespace lite
} // namespace paddle
......@@ -28,8 +28,13 @@ bool FusionElementwiseActivationOp::CheckShape() const {
}
bool FusionElementwiseActivationOp::InferShapeImpl() const {
CHECK_OR_FALSE(param_.X->dims().size() >= param_.Y->dims().size());
param_.Out->Resize(param_.X->dims());
size_t x_size = param_.X->dims().size();
size_t y_size = param_.Y->dims().size();
if (x_size >= y_size) {
param_.Out->Resize(param_.X->dims());
} else {
param_.Out->Resize(param_.Y->dims());
}
return true;
}
......
......@@ -1287,6 +1287,13 @@ struct ExpandParam : ParamBase {
std::vector<int> expand_times{};
};
/// ----------------------- expand as operators ----------------------
struct ExpandAsParam : ParamBase {
const lite::Tensor* X{};
const lite::Tensor* Target{};
lite::Tensor* Out{};
};
/// ----------------------- matmul operators ----------------------
struct MatMulParam : ParamBase {
const lite::Tensor* X{};
......
......@@ -86,6 +86,7 @@ endif()
lite_cc_test(test_kernel_squeeze_compute SRCS squeeze_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_slice_compute SRCS slice_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_expand_compute SRCS expand_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_expand_as_compute SRCS expand_as_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_matmul_compute SRCS matmul_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
#lite_cc_test(test_kernel_crf_decoding_compute SRCS crf_decoding_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
endif()
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gtest/gtest.h>
#include "lite/api/paddle_use_kernels.h"
#include "lite/api/paddle_use_ops.h"
#include "lite/core/arena/framework.h"
namespace paddle {
namespace lite {
class ExpandAsComputeTester : public arena::TestCase {
protected:
// common attributes for this op.
std::string x_ = "X";
std::string out_ = "Out";
std::string target_ = "Target";
DDim dims_;
DDim target_dims_;
public:
ExpandAsComputeTester(const Place& place,
const std::string& alias,
DDim dims,
DDim target_dims)
: TestCase(place, alias), dims_(dims), target_dims_(target_dims) {}
void RunBaseline(Scope* scope) override {
const auto* input = scope->FindTensor(x_);
CHECK(input);
auto* out = scope->NewTensor(out_);
CHECK(out);
const auto* target = scope->FindTensor(target_);
DDim out_shape(input->dims());
DDim in_shape = input->dims();
std::vector<int> expand_times_;
for (size_t i = 0; i < target->dims().size(); ++i) {
int times = target->dims()[i] / input->dims()[i];
expand_times_.push_back(times);
}
for (size_t i = 0; i < expand_times_.size(); ++i) {
out_shape[i] *= expand_times_[i];
}
out->Resize(out_shape);
float* out_data = out->mutable_data<float>();
const float* input_data = input->data<float>();
std::vector<int> in_stride(in_shape.size(), 1),
out_stride(out_shape.size(), 1);
for (int i = in_shape.size() - 2; i >= 0; --i) {
in_stride[i] = in_shape[i + 1] * in_stride[i + 1];
}
for (int i = out_shape.size() - 2; i >= 0; --i) {
out_stride[i] = out_shape[i + 1] * out_stride[i + 1];
}
for (size_t out_id = 0; out_id < out_shape.production(); ++out_id) {
int in_id = 0;
for (int i = expand_times_.size() - 1; i >= 0; --i) {
int in_j = (out_id / out_stride[i]) % in_shape[i];
in_id += in_j * in_stride[i];
}
out_data[out_id] = input_data[in_id];
}
}
void PrepareOpDesc(cpp::OpDesc* op_desc) {
op_desc->SetType("expand_as");
op_desc->SetInput("X", {x_});
op_desc->SetInput("Target", {target_});
op_desc->SetOutput("Out", {out_});
}
void PrepareData() override {
std::vector<float> in_data(dims_.production());
std::vector<float> target_data(target_dims_.production());
for (int i = 0; i < dims_.production(); ++i) {
in_data[i] = i;
}
for (int i = 0; i < target_dims_.production(); ++i) {
target_data[i] = i;
}
SetCommonTensor(x_, dims_, in_data.data());
SetCommonTensor(target_, target_dims_, target_data.data());
}
};
void test_expand_as_3dim(Place place, float abs_error) {
for (int C : {3}) {
for (int H : {2}) {
for (int W : {4}) {
std::unique_ptr<arena::TestCase> tester(new ExpandAsComputeTester(
place, "def", DDim({C, H, W}), DDim({C * 2, H * 3, W * 1})));
arena::Arena arena(std::move(tester), place, abs_error);
arena.TestPrecision();
}
}
}
}
void test_expand_as_4dim(Place place, float abs_error) {
for (int N : {2}) {
for (int C : {3}) {
for (int H : {2}) {
for (int W : {4}) {
std::unique_ptr<arena::TestCase> tester(
new ExpandAsComputeTester(place,
"def",
DDim({N, C, H, W}),
DDim({N * 2, C * 3, H * 1, W * 4})));
arena::Arena arena(std::move(tester), place, abs_error);
arena.TestPrecision();
}
}
}
}
}
TEST(ExpandAs, precision) {
float abs_error = 1e-5;
Place place;
#if defined(LITE_WITH_NPU)
place = TARGET(kNPU);
abs_error = 1e-2; // Using fp16 in NPU
#elif defined(LITE_WITH_ARM)
place = TARGET(kHost);
#elif defined(LITE_WITH_X86)
place = TARGET(kHost);
#else
return;
#endif
test_expand_as_3dim(place, abs_error);
test_expand_as_4dim(place, abs_error);
}
} // namespace lite
} // namespace paddle
......@@ -25,6 +25,9 @@ WITH_STRIP=OFF
# options of compiling NPU lib.
WITH_HUAWEI_KIRIN_NPU=OFF
HUAWEI_KIRIN_NPU_SDK_ROOT="$(pwd)/ai_ddk_lib/" # Download HiAI DDK from https://developer.huawei.com/consumer/cn/hiai/
# options of compiling APU lib.
WITH_MEDIATEK_APU=OFF
MEDIATEK_APU_SDK_ROOT="$(pwd)/apu_ddk" # Download APU SDK from https://paddlelite-demo.bj.bcebos.com/devices/mediatek/apu_ddk.tar.gz
# options of compiling OPENCL lib.
WITH_OPENCL=OFF
# options of adding training ops
......@@ -154,6 +157,8 @@ function make_tiny_publish_so {
-DLITE_WITH_CV=$WITH_CV \
-DLITE_WITH_NPU=$WITH_HUAWEI_KIRIN_NPU \
-DNPU_DDK_ROOT=$HUAWEI_KIRIN_NPU_SDK_ROOT \
-DLITE_WITH_APU=$WITH_MEDIATEK_APU \
-DAPU_DDK_ROOT=$MEDIATEK_APU_SDK_ROOT \
-DLITE_WITH_OPENCL=$WITH_OPENCL \
-DARM_TARGET_ARCH_ABI=$ARCH \
-DARM_TARGET_LANG=$TOOLCHAIN \
......@@ -204,6 +209,8 @@ function make_full_publish_so {
-DLITE_WITH_CV=$WITH_CV \
-DLITE_WITH_NPU=$WITH_HUAWEI_KIRIN_NPU \
-DNPU_DDK_ROOT=$HUAWEI_KIRIN_NPU_SDK_ROOT \
-DLITE_WITH_APU=$WITH_MEDIATEK_APU \
-DAPU_DDK_ROOT=$MEDIATEK_APU_SDK_ROOT \
-DLITE_WITH_OPENCL=$WITH_OPENCL \
-DARM_TARGET_ARCH_ABI=$ARCH \
-DARM_TARGET_LANG=$TOOLCHAIN \
......@@ -257,6 +264,13 @@ function print_usage {
echo -e "| you can download huawei HiAi DDK from: https://developer.huawei.com/consumer/cn/hiai/ |"
echo -e "| detailed information about Paddle-Lite NPU: https://paddle-lite.readthedocs.io/zh/latest/demo_guides/npu.html |"
echo -e "| |"
echo -e "| arguments of apu library compiling:(armv8, gcc, c++_static) |"
echo -e "| ./lite/tools/build_android.sh --with_mediatek_apu=ON --mediatek_apu_sdk_root=YourApuSdkPath |"
echo -e "| --with_mediatek_apu: (OFF|ON); controls whether to compile lib for mediatek_apu, default is OFF |"
echo -e "| --mediatek_apu_sdk_root: (path to mediatek APU SDK file) required when compiling apu library |"
echo -e "| you can download mediatek APU SDK from: https://paddlelite-demo.bj.bcebos.com/devices/mediatek/apu_ddk.tar.gz |"
echo -e "| detailed information about Paddle-Lite APU: https://paddle-lite.readthedocs.io/zh/latest/demo_guides/mediatek_apu.html |"
echo -e "| |"
echo -e "| arguments of opencl library compiling:(armv8, gcc, c++_static) |"
echo -e "| ./lite/tools/build_android.sh --with_opencl=ON |"
echo -e "| --with_opencl: (OFF|ON); controls whether to compile lib for opencl, default is OFF |"
......@@ -351,6 +365,15 @@ function main {
HUAWEI_KIRIN_NPU_SDK_ROOT="${i#*=}"
shift
;;
# compiling lib which can operate on mediatek apu.
--with_mediatek_apu=*)
WITH_MEDIATEK_APU="${i#*=}"
shift
;;
--mediatek_apu_sdk_root=*)
MEDIATEK_APU_SDK_ROOT="${i#*=}"
shift
;;
# compiling result contains both light_api and cxx_api lib.
full_publish)
make_full_publish_so
......
......@@ -26,7 +26,7 @@ OPTMODEL_DIR=""
WITH_OPENCL=OFF
# options of compiling rockchip NPU lib.
WITH_ROCKCHIP_NPU=OFF
ROCKCHIP_NPU_SDK_ROOT=""
ROCKCHIP_NPU_SDK_ROOT="$(pwd)/rknpu_ddk" # Download RKNPU SDK from https://github.com/airockchip/rknpu_ddk.git
# options of compiling baidu XPU lib.
WITH_BAIDU_XPU=OFF
BAIDU_XPU_SDK_ROOT=""
......@@ -229,6 +229,8 @@ function print_usage {
echo -e "| ./lite/tools/build_linux.sh --with_rockchip_npu=ON --rockchip_npu_sdk_root=YourRockchipNpuSdkPath |"
echo -e "| --with_rockchip_npu: (OFF|ON); controls whether to compile lib for rockchip_npu, default is OFF |"
echo -e "| --rockchip_npu_sdk_root: (path to rockchip_npu DDK file) required when compiling rockchip_npu library |"
echo -e "| you can download rockchip NPU SDK from: https://github.com/airockchip/rknpu_ddk.git |"
echo -e "| detailed information about Paddle-Lite RKNPU: https://paddle-lite.readthedocs.io/zh/latest/demo_guides/rockchip_npu.html |"
echo -e "| |"
echo -e "| arguments of baidu xpu library compiling: |"
echo -e "| ./lite/tools/build_linux.sh --with_baidu_xpu=ON --baidu_xpu_sdk_root=YourBaiduXpuSdkPath |"
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册