未验证 提交 90ec3960 编写于 作者: Y Yuan Shuai 提交者: GitHub

[LITE][OPENCL] Fix reshape enhance elemul opencl; Enhance precision profiler. etc. (#3268)

* [LITE][OPENCL] enhance elemul, fix reshape for opencl. test=develop

* comment loop test for pad2 opencl kernel. test=develop

* [LITE][PROFILE] add sequnce statics for precision profiler. test=develop
上级 0ff43268
......@@ -33,13 +33,14 @@ namespace lite {
namespace profile {
template <typename dtype>
static void write_tensorfile(const Tensor* tensor, const std::string& locate) {
static bool write_tensorfile(const Tensor* tensor, const std::string& locate) {
if (locate.find('/') != std::string::npos) {
return;
return false;
}
FILE* fp = fopen(locate.c_str(), "w");
if (fp == nullptr) {
LOG(ERROR) << "file open field " << locate;
return false;
} else {
const dtype* data = tensor->data<dtype>();
for (int i = 0; i < tensor->numel(); ++i) {
......@@ -47,6 +48,7 @@ static void write_tensorfile(const Tensor* tensor, const std::string& locate) {
}
}
fclose(fp);
return true;
}
class PrecisionProfiler {
......@@ -69,9 +71,10 @@ class PrecisionProfiler {
<< "=========================================" << std::endl;
ss << setw(45) << left << "operator:(kernel_info)"
<< " " << setw(70) << left << "output_tensor_name:(tensor_info)"
<< " " << setw(15) << left << "tensor_dims"
<< " " << setw(15) << left << "tensor_mean"
<< " " << setw(15) << left << "tensor_standard_deviation" << std::endl;
<< " " << setw(15) << left << "dims"
<< " " << setw(15) << left << "mean"
<< " " << setw(15) << left << "std_deviation"
<< " " << setw(15) << left << "ave_grow_rate*" << std::endl;
return ss.str();
}
......@@ -102,6 +105,17 @@ class PrecisionProfiler {
return sqrt(variance);
}
template <typename T>
double compute_average_grow_rate(const T* in, const size_t length) {
const double eps = 1e-5;
double ave_grow_rate = 0.0f;
for (size_t i = 1; i < length; ++i) {
ave_grow_rate += (in[i] - in[i - 1]) / (in[i - 1] + eps);
}
ave_grow_rate /= length;
return ave_grow_rate;
}
// check if output tensor unused
bool is_unused(const Tensor* in) {
if (!in->data<int8_t>()) {
......@@ -116,7 +130,9 @@ class PrecisionProfiler {
DataLayoutType layout_type,
double* mean,
double* std_dev,
std::string name = "inst") {
double* ave_grow_rate,
std::string name = "inst",
bool write_result_to_file = false) {
std::string unsupported_error_log =
"Unsupported precision profile for kernel registered on" +
TargetToStr(target_type) + "/" + PrecisionToStr(precision_type) + "/" +
......@@ -127,39 +143,44 @@ class PrecisionProfiler {
switch (precision_type) {
case PRECISION(kFloat): {
auto ptr = in->data<float>();
// write_tensorfile<float>(in, name);
*mean = compute_mean<float>(ptr, in->numel());
*std_dev =
compute_standard_deviation<float>(ptr, in->numel(), true, *mean);
*ave_grow_rate = compute_average_grow_rate<float>(ptr, in->numel());
write_result_to_file&& write_tensorfile<float>(in, name);
return;
}
case PRECISION(kAny): {
auto ptr = in->data<float>();
// write_tensorfile<float>(in, name);
*mean = compute_mean<float>(ptr, in->numel());
*std_dev =
compute_standard_deviation<float>(ptr, in->numel(), true, *mean);
*ave_grow_rate = compute_average_grow_rate<float>(ptr, in->numel());
write_result_to_file&& write_tensorfile<float>(in, name);
return;
}
case PRECISION(kInt8): {
auto ptr = in->data<int8_t>();
// write_tensorfile<int8_t>(in, name);
*mean = compute_mean<int8_t>(ptr, in->numel());
*std_dev =
compute_standard_deviation<int8_t>(ptr, in->numel(), true, *mean);
*ave_grow_rate = compute_average_grow_rate<int8_t>(ptr, in->numel());
write_result_to_file&& write_tensorfile<int8_t>(in, name);
return;
}
case PRECISION(kInt32): {
auto ptr = in->data<int32_t>();
// write_tensorfile<int32_t>(in, name);
*mean = compute_mean<int32_t>(ptr, in->numel());
*std_dev = compute_standard_deviation<int32_t>(
ptr, in->numel(), true, *mean);
*ave_grow_rate = compute_average_grow_rate<int32_t>(ptr, in->numel());
write_result_to_file&& write_tensorfile<int32_t>(in, name);
return;
}
default:
*mean = -333333333333;
*std_dev = -33333333333;
*ave_grow_rate = -33333333333;
LOG(ERROR) << unsupported_error_log;
return;
}
......@@ -186,11 +207,13 @@ class PrecisionProfiler {
IoDirection::DtoH);
default_convertor.ImageToNCHW(
in_data_v.data(), real_out_v.data(), image_shape, in->dims());
// write_tensorfile<float>(in, name);
CHECK(real_out_v.size() == in->numel());
*mean = compute_mean<float>(real_out_v.data(), real_out_v.size());
*std_dev = compute_standard_deviation<float>(
real_out_v.data(), in->numel(), true, *mean);
*ave_grow_rate = compute_average_grow_rate<float>(real_out_v.data(),
real_out_v.size());
write_result_to_file&& write_tensorfile<float>(in, name);
return;
}
case DATALAYOUT(kNCHW): {
......@@ -203,11 +226,15 @@ class PrecisionProfiler {
*mean = compute_mean<float>(in_data_v.data(), in->numel());
*std_dev = compute_standard_deviation<float>(
in_data_v.data(), in->numel(), true, *mean);
*ave_grow_rate =
compute_average_grow_rate<float>(in_data_v.data(), in->numel());
write_result_to_file&& write_tensorfile<float>(in, name);
return;
}
default:
*mean = -222222222222;
*std_dev = -22222222222;
*ave_grow_rate = -22222222222;
LOG(ERROR) << unsupported_error_log;
return;
}
......@@ -215,6 +242,7 @@ class PrecisionProfiler {
} else {
*mean = -111111111111;
*std_dev = -11111111111;
*ave_grow_rate = -11111111111;
LOG(ERROR) << unsupported_error_log;
return;
}
......@@ -225,6 +253,7 @@ class PrecisionProfiler {
using std::left;
using std::fixed;
STL::stringstream ss;
bool write_result_to_file = false;
VLOG(1) << ">> Running kernel: " << inst->op()->op_info()->Repr()
<< " registered on " << TargetToStr(inst->kernel()->target()) << "/"
......@@ -252,8 +281,10 @@ class PrecisionProfiler {
op_scope->FindVar(out_name)->GetMutable<Tensor>();
double mean = -999999;
double std_dev = -100000;
double ave_grow_rate = 99999;
std::string mean_str{"unused"};
std::string std_dev_str{"unused"};
std::string ave_grow_rate_str{"unused"};
if (!is_unused(tout)) {
compute_tensor_precision_info(tout,
......@@ -262,9 +293,12 @@ class PrecisionProfiler {
type->layout(),
&mean,
&std_dev,
out_name);
mean_str = paddle::lite::to_string(mean);
std_dev_str = paddle::lite::to_string(std_dev);
&ave_grow_rate,
out_name,
write_result_to_file);
mean_str = std::to_string(mean);
std_dev_str = std::to_string(std_dev);
ave_grow_rate_str = std::to_string(ave_grow_rate);
}
std::string kernel_info = op_name + ":" + kernel_place;
std::string output_arg_info = out_name + ":" +
......@@ -275,7 +309,8 @@ class PrecisionProfiler {
ss << setw(45) << left << kernel_info << " " << setw(70) << left
<< output_arg_info << " " << setw(15) << left << tout->dims()
<< " " << setw(15) << left << mean_str << " " << setw(15) << left
<< std_dev_str << std::endl;
<< std_dev_str << " " << setw(15) << left << ave_grow_rate_str
<< std::endl;
} else if (type->IsTensorList()) {
auto touts =
op_scope->FindVar(out_name)->GetMutable<std::vector<Tensor>>();
......@@ -283,8 +318,10 @@ class PrecisionProfiler {
const Tensor* tout = &t;
double mean = -999999;
double std_dev = -100000;
double ave_grow_rate = 99999;
std::string mean_str{"unused"};
std::string std_dev_str{"unused"};
std::string ave_grow_rate_str{"unused"};
if (!is_unused(tout)) {
compute_tensor_precision_info(tout,
......@@ -293,9 +330,12 @@ class PrecisionProfiler {
type->layout(),
&mean,
&std_dev,
out_name);
mean_str = paddle::lite::to_string(mean);
std_dev_str = paddle::lite::to_string(std_dev);
&ave_grow_rate,
out_name,
write_result_to_file);
mean_str = std::to_string(mean);
std_dev_str = std::to_string(std_dev);
ave_grow_rate_str = std::to_string(ave_grow_rate);
}
std::string kernel_info = op_name + ":" + kernel_place;
std::string output_arg_info = out_name + ":" +
......@@ -306,7 +346,8 @@ class PrecisionProfiler {
ss << setw(45) << left << kernel_info << " " << setw(70) << left
<< output_arg_info << " " << setw(15) << left << tout->dims()
<< " " << setw(15) << left << mean_str << " " << setw(15) << left
<< std_dev_str << std::endl;
<< std_dev_str << " " << setw(15) << left << ave_grow_rate_str
<< std::endl;
}
}
}
......
......@@ -56,7 +56,7 @@ class ElementwiseMulImageCompute
} else {
kernel_func_name_ = "channel_mul_d2_hw";
}
} else if (y_dims.size() == 4) {
} else if (y_dims.size() == 4 || x_dims.size() == 4) {
kernel_func_name_ = "channel_mul_d4";
} else {
LOG(FATAL) << "ElementwiseMul not supported y_dims.size():"
......@@ -172,6 +172,18 @@ class ElementwiseMulImageCompute
status = kernel.setArg(++arg_idx, static_cast<const int>(y_tensor_h));
CL_CHECK_FATAL(status);
}
} else if (x_dims.size() == 4) {
auto tensor_w = y_dims[y_dims.size() - 1];
VLOG(4) << "tensor_w:" << tensor_w;
// kernel: channel_mul_d4
cl_int status = kernel.setArg(arg_idx, *y_img);
CL_CHECK_FATAL(status);
status = kernel.setArg(++arg_idx, *x_img);
CL_CHECK_FATAL(status);
status = kernel.setArg(++arg_idx, *out_img);
CL_CHECK_FATAL(status);
status = kernel.setArg(++arg_idx, static_cast<const int>(tensor_w));
CL_CHECK_FATAL(status);
} else {
LOG(FATAL) << "ElementwiseMul not supported y_dims.size():"
<< y_dims.size();
......
......@@ -89,7 +89,7 @@ void pad2d_ref(const float *x_data,
}
}
#define LOOP_TEST
// #define LOOP_TEST
// #define PRINT_RESULT
TEST(pad2d_image2d, compute) {
LOG(INFO) << "main steps of test: host -> layout(buf2img) -> "
......
......@@ -203,8 +203,8 @@ REGISTER_LITE_KERNEL(reshape2,
PRECISION(kFP16),
DATALAYOUT(kImageDefault))})
.BindInput("ShapeTensor", {LiteType::GetTensorTy(TARGET(kOpenCL))})
.BindInput("Shape", {LiteType::GetTensorTy(TARGET(kOpenCL))})
.BindOutput("XShape", {LiteType::GetTensorTy(TARGET(kOpenCL))})
.BindInput("Shape", {LiteType::GetTensorTy(TARGET(kARM))})
.BindOutput("XShape", {LiteType::GetTensorTy(TARGET(kARM))})
.BindOutput("Out",
{LiteType::GetTensorTy(TARGET(kOpenCL),
PRECISION(kFP16),
......@@ -221,7 +221,7 @@ REGISTER_LITE_KERNEL(flatten,
{LiteType::GetTensorTy(TARGET(kOpenCL),
PRECISION(kFP16),
DATALAYOUT(kImageDefault))})
.BindInput("Shape", {LiteType::GetTensorTy(TARGET(kOpenCL))})
.BindInput("Shape", {LiteType::GetTensorTy(TARGET(kARM))})
.BindOutput("Out",
{LiteType::GetTensorTy(TARGET(kOpenCL),
PRECISION(kFP16),
......@@ -239,7 +239,7 @@ REGISTER_LITE_KERNEL(flatten2,
PRECISION(kFP16),
DATALAYOUT(kImageDefault))})
.BindInput("Shape", {LiteType::GetTensorTy(TARGET(kOpenCL))})
.BindOutput("XShape", {LiteType::GetTensorTy(TARGET(kOpenCL))})
.BindOutput("XShape", {LiteType::GetTensorTy(TARGET(kARM))})
.BindOutput("Out",
{LiteType::GetTensorTy(TARGET(kOpenCL),
PRECISION(kFP16),
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册