提交 13cdbc5e 编写于 作者: Y Yuan Shuai 提交者: GitHub

[LITE][OPENCL] Fix reshape enhance elemul opencl; Enhance precision profiler. etc. (#3268)

* [LITE][OPENCL] enhance elemul, fix reshape for opencl. test=develop

* comment loop test for pad2 opencl kernel. test=develop

* [LITE][PROFILE] add sequnce statics for precision profiler. test=develop
上级 d8438e83
...@@ -33,13 +33,14 @@ namespace lite { ...@@ -33,13 +33,14 @@ namespace lite {
namespace profile { namespace profile {
template <typename dtype> template <typename dtype>
static void write_tensorfile(const Tensor* tensor, const std::string& locate) { static bool write_tensorfile(const Tensor* tensor, const std::string& locate) {
if (locate.find('/') != std::string::npos) { if (locate.find('/') != std::string::npos) {
return; return false;
} }
FILE* fp = fopen(locate.c_str(), "w"); FILE* fp = fopen(locate.c_str(), "w");
if (fp == nullptr) { if (fp == nullptr) {
LOG(ERROR) << "file open field " << locate; LOG(ERROR) << "file open field " << locate;
return false;
} else { } else {
const dtype* data = tensor->data<dtype>(); const dtype* data = tensor->data<dtype>();
for (int i = 0; i < tensor->numel(); ++i) { for (int i = 0; i < tensor->numel(); ++i) {
...@@ -47,6 +48,7 @@ static void write_tensorfile(const Tensor* tensor, const std::string& locate) { ...@@ -47,6 +48,7 @@ static void write_tensorfile(const Tensor* tensor, const std::string& locate) {
} }
} }
fclose(fp); fclose(fp);
return true;
} }
class PrecisionProfiler { class PrecisionProfiler {
...@@ -69,9 +71,10 @@ class PrecisionProfiler { ...@@ -69,9 +71,10 @@ class PrecisionProfiler {
<< "=========================================" << std::endl; << "=========================================" << std::endl;
ss << setw(45) << left << "operator:(kernel_info)" ss << setw(45) << left << "operator:(kernel_info)"
<< " " << setw(70) << left << "output_tensor_name:(tensor_info)" << " " << setw(70) << left << "output_tensor_name:(tensor_info)"
<< " " << setw(15) << left << "tensor_dims" << " " << setw(15) << left << "dims"
<< " " << setw(15) << left << "tensor_mean" << " " << setw(15) << left << "mean"
<< " " << setw(15) << left << "tensor_standard_deviation" << std::endl; << " " << setw(15) << left << "std_deviation"
<< " " << setw(15) << left << "ave_grow_rate*" << std::endl;
return ss.str(); return ss.str();
} }
...@@ -102,6 +105,17 @@ class PrecisionProfiler { ...@@ -102,6 +105,17 @@ class PrecisionProfiler {
return sqrt(variance); return sqrt(variance);
} }
template <typename T>
double compute_average_grow_rate(const T* in, const size_t length) {
const double eps = 1e-5;
double ave_grow_rate = 0.0f;
for (size_t i = 1; i < length; ++i) {
ave_grow_rate += (in[i] - in[i - 1]) / (in[i - 1] + eps);
}
ave_grow_rate /= length;
return ave_grow_rate;
}
// check if output tensor unused // check if output tensor unused
bool is_unused(const Tensor* in) { bool is_unused(const Tensor* in) {
if (!in->data<int8_t>()) { if (!in->data<int8_t>()) {
...@@ -116,7 +130,9 @@ class PrecisionProfiler { ...@@ -116,7 +130,9 @@ class PrecisionProfiler {
DataLayoutType layout_type, DataLayoutType layout_type,
double* mean, double* mean,
double* std_dev, double* std_dev,
std::string name = "inst") { double* ave_grow_rate,
std::string name = "inst",
bool write_result_to_file = false) {
std::string unsupported_error_log = std::string unsupported_error_log =
"Unsupported precision profile for kernel registered on" + "Unsupported precision profile for kernel registered on" +
TargetToStr(target_type) + "/" + PrecisionToStr(precision_type) + "/" + TargetToStr(target_type) + "/" + PrecisionToStr(precision_type) + "/" +
...@@ -127,39 +143,44 @@ class PrecisionProfiler { ...@@ -127,39 +143,44 @@ class PrecisionProfiler {
switch (precision_type) { switch (precision_type) {
case PRECISION(kFloat): { case PRECISION(kFloat): {
auto ptr = in->data<float>(); auto ptr = in->data<float>();
// write_tensorfile<float>(in, name);
*mean = compute_mean<float>(ptr, in->numel()); *mean = compute_mean<float>(ptr, in->numel());
*std_dev = *std_dev =
compute_standard_deviation<float>(ptr, in->numel(), true, *mean); compute_standard_deviation<float>(ptr, in->numel(), true, *mean);
*ave_grow_rate = compute_average_grow_rate<float>(ptr, in->numel());
write_result_to_file&& write_tensorfile<float>(in, name);
return; return;
} }
case PRECISION(kAny): { case PRECISION(kAny): {
auto ptr = in->data<float>(); auto ptr = in->data<float>();
// write_tensorfile<float>(in, name);
*mean = compute_mean<float>(ptr, in->numel()); *mean = compute_mean<float>(ptr, in->numel());
*std_dev = *std_dev =
compute_standard_deviation<float>(ptr, in->numel(), true, *mean); compute_standard_deviation<float>(ptr, in->numel(), true, *mean);
*ave_grow_rate = compute_average_grow_rate<float>(ptr, in->numel());
write_result_to_file&& write_tensorfile<float>(in, name);
return; return;
} }
case PRECISION(kInt8): { case PRECISION(kInt8): {
auto ptr = in->data<int8_t>(); auto ptr = in->data<int8_t>();
// write_tensorfile<int8_t>(in, name);
*mean = compute_mean<int8_t>(ptr, in->numel()); *mean = compute_mean<int8_t>(ptr, in->numel());
*std_dev = *std_dev =
compute_standard_deviation<int8_t>(ptr, in->numel(), true, *mean); compute_standard_deviation<int8_t>(ptr, in->numel(), true, *mean);
*ave_grow_rate = compute_average_grow_rate<int8_t>(ptr, in->numel());
write_result_to_file&& write_tensorfile<int8_t>(in, name);
return; return;
} }
case PRECISION(kInt32): { case PRECISION(kInt32): {
auto ptr = in->data<int32_t>(); auto ptr = in->data<int32_t>();
// write_tensorfile<int32_t>(in, name);
*mean = compute_mean<int32_t>(ptr, in->numel()); *mean = compute_mean<int32_t>(ptr, in->numel());
*std_dev = compute_standard_deviation<int32_t>( *std_dev = compute_standard_deviation<int32_t>(
ptr, in->numel(), true, *mean); ptr, in->numel(), true, *mean);
*ave_grow_rate = compute_average_grow_rate<int32_t>(ptr, in->numel());
write_result_to_file&& write_tensorfile<int32_t>(in, name);
return; return;
} }
default: default:
*mean = -333333333333; *mean = -333333333333;
*std_dev = -33333333333; *std_dev = -33333333333;
*ave_grow_rate = -33333333333;
LOG(ERROR) << unsupported_error_log; LOG(ERROR) << unsupported_error_log;
return; return;
} }
...@@ -186,11 +207,13 @@ class PrecisionProfiler { ...@@ -186,11 +207,13 @@ class PrecisionProfiler {
IoDirection::DtoH); IoDirection::DtoH);
default_convertor.ImageToNCHW( default_convertor.ImageToNCHW(
in_data_v.data(), real_out_v.data(), image_shape, in->dims()); in_data_v.data(), real_out_v.data(), image_shape, in->dims());
// write_tensorfile<float>(in, name);
CHECK(real_out_v.size() == in->numel()); CHECK(real_out_v.size() == in->numel());
*mean = compute_mean<float>(real_out_v.data(), real_out_v.size()); *mean = compute_mean<float>(real_out_v.data(), real_out_v.size());
*std_dev = compute_standard_deviation<float>( *std_dev = compute_standard_deviation<float>(
real_out_v.data(), in->numel(), true, *mean); real_out_v.data(), in->numel(), true, *mean);
*ave_grow_rate = compute_average_grow_rate<float>(real_out_v.data(),
real_out_v.size());
write_result_to_file&& write_tensorfile<float>(in, name);
return; return;
} }
case DATALAYOUT(kNCHW): { case DATALAYOUT(kNCHW): {
...@@ -203,11 +226,15 @@ class PrecisionProfiler { ...@@ -203,11 +226,15 @@ class PrecisionProfiler {
*mean = compute_mean<float>(in_data_v.data(), in->numel()); *mean = compute_mean<float>(in_data_v.data(), in->numel());
*std_dev = compute_standard_deviation<float>( *std_dev = compute_standard_deviation<float>(
in_data_v.data(), in->numel(), true, *mean); in_data_v.data(), in->numel(), true, *mean);
*ave_grow_rate =
compute_average_grow_rate<float>(in_data_v.data(), in->numel());
write_result_to_file&& write_tensorfile<float>(in, name);
return; return;
} }
default: default:
*mean = -222222222222; *mean = -222222222222;
*std_dev = -22222222222; *std_dev = -22222222222;
*ave_grow_rate = -22222222222;
LOG(ERROR) << unsupported_error_log; LOG(ERROR) << unsupported_error_log;
return; return;
} }
...@@ -215,6 +242,7 @@ class PrecisionProfiler { ...@@ -215,6 +242,7 @@ class PrecisionProfiler {
} else { } else {
*mean = -111111111111; *mean = -111111111111;
*std_dev = -11111111111; *std_dev = -11111111111;
*ave_grow_rate = -11111111111;
LOG(ERROR) << unsupported_error_log; LOG(ERROR) << unsupported_error_log;
return; return;
} }
...@@ -225,6 +253,7 @@ class PrecisionProfiler { ...@@ -225,6 +253,7 @@ class PrecisionProfiler {
using std::left; using std::left;
using std::fixed; using std::fixed;
STL::stringstream ss; STL::stringstream ss;
bool write_result_to_file = false;
VLOG(1) << ">> Running kernel: " << inst->op()->op_info()->Repr() VLOG(1) << ">> Running kernel: " << inst->op()->op_info()->Repr()
<< " registered on " << TargetToStr(inst->kernel()->target()) << "/" << " registered on " << TargetToStr(inst->kernel()->target()) << "/"
...@@ -252,8 +281,10 @@ class PrecisionProfiler { ...@@ -252,8 +281,10 @@ class PrecisionProfiler {
op_scope->FindVar(out_name)->GetMutable<Tensor>(); op_scope->FindVar(out_name)->GetMutable<Tensor>();
double mean = -999999; double mean = -999999;
double std_dev = -100000; double std_dev = -100000;
double ave_grow_rate = 99999;
std::string mean_str{"unused"}; std::string mean_str{"unused"};
std::string std_dev_str{"unused"}; std::string std_dev_str{"unused"};
std::string ave_grow_rate_str{"unused"};
if (!is_unused(tout)) { if (!is_unused(tout)) {
compute_tensor_precision_info(tout, compute_tensor_precision_info(tout,
...@@ -262,9 +293,12 @@ class PrecisionProfiler { ...@@ -262,9 +293,12 @@ class PrecisionProfiler {
type->layout(), type->layout(),
&mean, &mean,
&std_dev, &std_dev,
out_name); &ave_grow_rate,
mean_str = paddle::lite::to_string(mean); out_name,
std_dev_str = paddle::lite::to_string(std_dev); write_result_to_file);
mean_str = std::to_string(mean);
std_dev_str = std::to_string(std_dev);
ave_grow_rate_str = std::to_string(ave_grow_rate);
} }
std::string kernel_info = op_name + ":" + kernel_place; std::string kernel_info = op_name + ":" + kernel_place;
std::string output_arg_info = out_name + ":" + std::string output_arg_info = out_name + ":" +
...@@ -275,7 +309,8 @@ class PrecisionProfiler { ...@@ -275,7 +309,8 @@ class PrecisionProfiler {
ss << setw(45) << left << kernel_info << " " << setw(70) << left ss << setw(45) << left << kernel_info << " " << setw(70) << left
<< output_arg_info << " " << setw(15) << left << tout->dims() << output_arg_info << " " << setw(15) << left << tout->dims()
<< " " << setw(15) << left << mean_str << " " << setw(15) << left << " " << setw(15) << left << mean_str << " " << setw(15) << left
<< std_dev_str << std::endl; << std_dev_str << " " << setw(15) << left << ave_grow_rate_str
<< std::endl;
} else if (type->IsTensorList()) { } else if (type->IsTensorList()) {
auto touts = auto touts =
op_scope->FindVar(out_name)->GetMutable<std::vector<Tensor>>(); op_scope->FindVar(out_name)->GetMutable<std::vector<Tensor>>();
...@@ -283,8 +318,10 @@ class PrecisionProfiler { ...@@ -283,8 +318,10 @@ class PrecisionProfiler {
const Tensor* tout = &t; const Tensor* tout = &t;
double mean = -999999; double mean = -999999;
double std_dev = -100000; double std_dev = -100000;
double ave_grow_rate = 99999;
std::string mean_str{"unused"}; std::string mean_str{"unused"};
std::string std_dev_str{"unused"}; std::string std_dev_str{"unused"};
std::string ave_grow_rate_str{"unused"};
if (!is_unused(tout)) { if (!is_unused(tout)) {
compute_tensor_precision_info(tout, compute_tensor_precision_info(tout,
...@@ -293,9 +330,12 @@ class PrecisionProfiler { ...@@ -293,9 +330,12 @@ class PrecisionProfiler {
type->layout(), type->layout(),
&mean, &mean,
&std_dev, &std_dev,
out_name); &ave_grow_rate,
mean_str = paddle::lite::to_string(mean); out_name,
std_dev_str = paddle::lite::to_string(std_dev); write_result_to_file);
mean_str = std::to_string(mean);
std_dev_str = std::to_string(std_dev);
ave_grow_rate_str = std::to_string(ave_grow_rate);
} }
std::string kernel_info = op_name + ":" + kernel_place; std::string kernel_info = op_name + ":" + kernel_place;
std::string output_arg_info = out_name + ":" + std::string output_arg_info = out_name + ":" +
...@@ -306,7 +346,8 @@ class PrecisionProfiler { ...@@ -306,7 +346,8 @@ class PrecisionProfiler {
ss << setw(45) << left << kernel_info << " " << setw(70) << left ss << setw(45) << left << kernel_info << " " << setw(70) << left
<< output_arg_info << " " << setw(15) << left << tout->dims() << output_arg_info << " " << setw(15) << left << tout->dims()
<< " " << setw(15) << left << mean_str << " " << setw(15) << left << " " << setw(15) << left << mean_str << " " << setw(15) << left
<< std_dev_str << std::endl; << std_dev_str << " " << setw(15) << left << ave_grow_rate_str
<< std::endl;
} }
} }
} }
......
...@@ -56,7 +56,7 @@ class ElementwiseMulImageCompute ...@@ -56,7 +56,7 @@ class ElementwiseMulImageCompute
} else { } else {
kernel_func_name_ = "channel_mul_d2_hw"; kernel_func_name_ = "channel_mul_d2_hw";
} }
} else if (y_dims.size() == 4) { } else if (y_dims.size() == 4 || x_dims.size() == 4) {
kernel_func_name_ = "channel_mul_d4"; kernel_func_name_ = "channel_mul_d4";
} else { } else {
LOG(FATAL) << "ElementwiseMul not supported y_dims.size():" LOG(FATAL) << "ElementwiseMul not supported y_dims.size():"
...@@ -172,6 +172,18 @@ class ElementwiseMulImageCompute ...@@ -172,6 +172,18 @@ class ElementwiseMulImageCompute
status = kernel.setArg(++arg_idx, static_cast<const int>(y_tensor_h)); status = kernel.setArg(++arg_idx, static_cast<const int>(y_tensor_h));
CL_CHECK_FATAL(status); CL_CHECK_FATAL(status);
} }
} else if (x_dims.size() == 4) {
auto tensor_w = y_dims[y_dims.size() - 1];
VLOG(4) << "tensor_w:" << tensor_w;
// kernel: channel_mul_d4
cl_int status = kernel.setArg(arg_idx, *y_img);
CL_CHECK_FATAL(status);
status = kernel.setArg(++arg_idx, *x_img);
CL_CHECK_FATAL(status);
status = kernel.setArg(++arg_idx, *out_img);
CL_CHECK_FATAL(status);
status = kernel.setArg(++arg_idx, static_cast<const int>(tensor_w));
CL_CHECK_FATAL(status);
} else { } else {
LOG(FATAL) << "ElementwiseMul not supported y_dims.size():" LOG(FATAL) << "ElementwiseMul not supported y_dims.size():"
<< y_dims.size(); << y_dims.size();
......
...@@ -89,7 +89,7 @@ void pad2d_ref(const float *x_data, ...@@ -89,7 +89,7 @@ void pad2d_ref(const float *x_data,
} }
} }
#define LOOP_TEST // #define LOOP_TEST
// #define PRINT_RESULT // #define PRINT_RESULT
TEST(pad2d_image2d, compute) { TEST(pad2d_image2d, compute) {
LOG(INFO) << "main steps of test: host -> layout(buf2img) -> " LOG(INFO) << "main steps of test: host -> layout(buf2img) -> "
......
...@@ -203,8 +203,8 @@ REGISTER_LITE_KERNEL(reshape2, ...@@ -203,8 +203,8 @@ REGISTER_LITE_KERNEL(reshape2,
PRECISION(kFP16), PRECISION(kFP16),
DATALAYOUT(kImageDefault))}) DATALAYOUT(kImageDefault))})
.BindInput("ShapeTensor", {LiteType::GetTensorTy(TARGET(kOpenCL))}) .BindInput("ShapeTensor", {LiteType::GetTensorTy(TARGET(kOpenCL))})
.BindInput("Shape", {LiteType::GetTensorTy(TARGET(kOpenCL))}) .BindInput("Shape", {LiteType::GetTensorTy(TARGET(kARM))})
.BindOutput("XShape", {LiteType::GetTensorTy(TARGET(kOpenCL))}) .BindOutput("XShape", {LiteType::GetTensorTy(TARGET(kARM))})
.BindOutput("Out", .BindOutput("Out",
{LiteType::GetTensorTy(TARGET(kOpenCL), {LiteType::GetTensorTy(TARGET(kOpenCL),
PRECISION(kFP16), PRECISION(kFP16),
...@@ -221,7 +221,7 @@ REGISTER_LITE_KERNEL(flatten, ...@@ -221,7 +221,7 @@ REGISTER_LITE_KERNEL(flatten,
{LiteType::GetTensorTy(TARGET(kOpenCL), {LiteType::GetTensorTy(TARGET(kOpenCL),
PRECISION(kFP16), PRECISION(kFP16),
DATALAYOUT(kImageDefault))}) DATALAYOUT(kImageDefault))})
.BindInput("Shape", {LiteType::GetTensorTy(TARGET(kOpenCL))}) .BindInput("Shape", {LiteType::GetTensorTy(TARGET(kARM))})
.BindOutput("Out", .BindOutput("Out",
{LiteType::GetTensorTy(TARGET(kOpenCL), {LiteType::GetTensorTy(TARGET(kOpenCL),
PRECISION(kFP16), PRECISION(kFP16),
...@@ -239,7 +239,7 @@ REGISTER_LITE_KERNEL(flatten2, ...@@ -239,7 +239,7 @@ REGISTER_LITE_KERNEL(flatten2,
PRECISION(kFP16), PRECISION(kFP16),
DATALAYOUT(kImageDefault))}) DATALAYOUT(kImageDefault))})
.BindInput("Shape", {LiteType::GetTensorTy(TARGET(kOpenCL))}) .BindInput("Shape", {LiteType::GetTensorTy(TARGET(kOpenCL))})
.BindOutput("XShape", {LiteType::GetTensorTy(TARGET(kOpenCL))}) .BindOutput("XShape", {LiteType::GetTensorTy(TARGET(kARM))})
.BindOutput("Out", .BindOutput("Out",
{LiteType::GetTensorTy(TARGET(kOpenCL), {LiteType::GetTensorTy(TARGET(kOpenCL),
PRECISION(kFP16), PRECISION(kFP16),
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册