未验证 提交 9bea65cb 编写于 作者: Y ysh329 提交者: GitHub

[cherry-pick][BugFix][OPENCL] BugFix for OpenCL: image memory malloc; dropout...

[cherry-pick][BugFix][OPENCL] BugFix for OpenCL: image memory malloc; dropout kernel register; precision profiler enhance; layout pass bugfix for opencl (#4426)
上级 2f87a652
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
#pragma once #pragma once
#include <algorithm>
#include <string> #include <string>
#include "lite/api/paddle_place.h" #include "lite/api/paddle_place.h"
#include "lite/core/target_wrapper.h" #include "lite/core/target_wrapper.h"
...@@ -140,20 +141,21 @@ class Buffer { ...@@ -140,20 +141,21 @@ class Buffer {
#ifdef LITE_WITH_OPENCL #ifdef LITE_WITH_OPENCL
template <typename T> template <typename T>
void ResetLazyImage2D(TargetType target, void ResetLazyImage2D(TargetType target,
const size_t img_w, const size_t img_w_req,
const size_t img_h, const size_t img_h_req,
void* host_ptr = nullptr) { void* host_ptr = nullptr) {
if (target != target_ || cl_image2d_width_ < img_w || if (target != target_ || cl_image2d_width_ < img_w_req ||
cl_image2d_height_ < img_h || host_ptr != nullptr) { cl_image2d_height_ < img_h_req || host_ptr != nullptr) {
CHECK_EQ(own_data_, true) << "Can not reset unowned buffer."; CHECK_EQ(own_data_, true) << "Can not reset unowned buffer.";
cl_image2d_width_ = std::max(cl_image2d_width_, img_w_req);
cl_image2d_height_ = std::max(cl_image2d_height_, img_h_req);
Free(); Free();
data_ = TargetWrapperCL::MallocImage<T>(img_w, img_h, host_ptr); data_ = TargetWrapperCL::MallocImage<T>(
cl_image2d_width_, cl_image2d_height_, host_ptr);
target_ = target; target_ = target;
space_ = sizeof(T) * img_w * img_h * space_ = sizeof(T) * cl_image2d_width_ * cl_image2d_height_ *
4; // un-used for opencl Image2D, 4 for RGBA, 4; // un-used for opencl Image2D, 4 for RGBA,
cl_use_image2d_ = true; cl_use_image2d_ = true;
cl_image2d_width_ = img_w;
cl_image2d_height_ = img_h;
} }
} }
#endif #endif
......
...@@ -28,6 +28,12 @@ TEST(memory, test) { ...@@ -28,6 +28,12 @@ TEST(memory, test) {
ASSERT_TRUE(buf_cuda); ASSERT_TRUE(buf_cuda);
TargetFree(TARGET(kCUDA), buf_cuda); TargetFree(TARGET(kCUDA), buf_cuda);
#endif #endif
#ifdef LITE_WITH_OPENCL
auto* buf_cl = TargetMalloc(TARGET(kOpenCL), 10);
ASSERT_TRUE(buf_cl);
TargetFree(TARGET(kOpenCL), buf_cl);
#endif
} }
} // namespace lite } // namespace lite
......
...@@ -82,8 +82,11 @@ void TypeLayoutTransformPass::ComplementInputs(SSAGraph* graph, ...@@ -82,8 +82,11 @@ void TypeLayoutTransformPass::ComplementInputs(SSAGraph* graph,
// not a good judge, but don't find the source of this issue from // not a good judge, but don't find the source of this issue from
// static_pick_kernel_pass // static_pick_kernel_pass
// to this pass. // to this pass.
auto is_host = [](TargetType x) -> bool {
return x == TARGET(kHost) || x == TARGET(kX86) || x == TARGET(kARM);
};
auto* in_arg_type = const_cast<Type*>(in->AsArg().type); auto* in_arg_type = const_cast<Type*>(in->AsArg().type);
if (in_arg_type->target() == TARGET(kARM) && if (is_host(in_arg_type->target()) &&
in_arg_type->layout() == DATALAYOUT(kImageDefault)) { in_arg_type->layout() == DATALAYOUT(kImageDefault)) {
return; return;
} }
......
...@@ -80,8 +80,8 @@ class Optimizer { ...@@ -80,8 +80,8 @@ class Optimizer {
InitControlFlowOpUnusedInputsAndOutputsEliminatePass(); InitControlFlowOpUnusedInputsAndOutputsEliminatePass();
if (passes.empty() || passes.size() == 1) { if (passes.empty() || passes.size() == 1) {
std::vector<std::string> passes_local{ std::vector<std::string> passes_local{{
{"lite_quant_dequant_fuse_pass", // "lite_quant_dequant_fuse_pass", //
"weight_quantization_preprocess_pass", // "weight_quantization_preprocess_pass", //
"lite_conv_elementwise_fuse_pass", // conv-elemwise-bn "lite_conv_elementwise_fuse_pass", // conv-elemwise-bn
"lite_conv_bn_fuse_pass", // "lite_conv_bn_fuse_pass", //
...@@ -169,8 +169,9 @@ class Optimizer { ...@@ -169,8 +169,9 @@ class Optimizer {
"runtime_context_assign_pass", "runtime_context_assign_pass",
"argument_type_display_pass", "argument_type_display_pass",
"lite_reshape_fuse_pass", "lite_reshape_fuse_pass",
"memory_optimize_pass" // you can comment this line when enable
"memory_optimize_pass"}}; // PRECISION_PROFILE
}};
if (passes.size() == 1) { if (passes.size() == 1) {
// multi_stream_analysis_pass must be in the front of // multi_stream_analysis_pass must be in the front of
......
...@@ -18,10 +18,18 @@ ...@@ -18,10 +18,18 @@
* of each kernel. * of each kernel.
*/ */
#pragma once #pragma once
#include <sys/time.h>
#include <time.h>
#include <cmath> #include <cmath>
#include <cstdlib>
#include <map>
#include <memory>
#include <string> #include <string>
#include <vector> #include <vector>
#include "lite/core/program.h" #include "lite/core/program.h"
#include "lite/utils/io.h"
#ifdef LITE_WITH_X86 #ifdef LITE_WITH_X86
#include "lite/fluid/float16.h" #include "lite/fluid/float16.h"
#endif #endif
...@@ -40,14 +48,50 @@ namespace paddle { ...@@ -40,14 +48,50 @@ namespace paddle {
namespace lite { namespace lite {
namespace profile { namespace profile {
static const std::string get_date_str() {
struct tm tm_time;
time_t timestamp = time(NULL);
localtime_r(&timestamp, &tm_time);
struct timeval tv;
gettimeofday(&tv, NULL);
// print date / time
std::string date_str =
std::to_string(1900 + tm_time.tm_year) +
std::to_string(1 + tm_time.tm_mon) + std::to_string(tm_time.tm_mday) +
'_' + std::to_string(tm_time.tm_hour) + std::to_string(tm_time.tm_min) +
std::to_string(tm_time.tm_sec) + '_' + std::to_string(tv.tv_usec / 1000);
return date_str;
}
inline std::string generate_valid_tensor_name(const std::string& name) {
std::string new_name("");
for (size_t i = 0; i < name.length(); ++i) {
if (name[i] != '/') {
new_name += name[i];
} else {
new_name += "_";
}
}
return new_name;
}
template <typename dtype> template <typename dtype>
static bool write_tensorfile(const Tensor* tensor, const std::string& locate) { static bool write_tensorfile(
if (locate.find('/') != std::string::npos) { const Tensor* tensor,
return false; const std::string& tensor_name,
const std::string prefix_path = "/storage/emulated/0/") {
std::string new_tensor_name = generate_valid_tensor_name(tensor_name);
if (tensor_name.find('/') != std::string::npos) {
LOG(ERROR) << "--> tensor name is abnormal with '\\':" << tensor_name
<< " !!!, replace with '_'," << new_tensor_name
<< new_tensor_name;
} }
FILE* fp = fopen(locate.c_str(), "w");
std::string tensor_save_path = prefix_path + new_tensor_name + ".txt";
FILE* fp = fopen(tensor_save_path.c_str(), "w");
if (fp == nullptr) { if (fp == nullptr) {
LOG(ERROR) << "file open field " << locate; LOG(ERROR) << "failed open file " << tensor_save_path;
return false; return false;
} else { } else {
const dtype* data = tensor->data<dtype>(); const dtype* data = tensor->data<dtype>();
...@@ -56,19 +100,23 @@ static bool write_tensorfile(const Tensor* tensor, const std::string& locate) { ...@@ -56,19 +100,23 @@ static bool write_tensorfile(const Tensor* tensor, const std::string& locate) {
} }
} }
fclose(fp); fclose(fp);
LOG(INFO) << "write tensor " << tensor_name
<< " to file:" << tensor_save_path;
return true; return true;
} }
static bool write_precision_summary_tofile(const std::string& string, static bool write_precision_summary_tofile(
const std::string& log_dir = "") { const std::string& string, const std::string& summary_log_dir = "") {
if (log_dir == "") { if (summary_log_dir == "") {
LOG(INFO) << "The `log_dir` of precision summary file is not set. log_dir:" LOG(INFO) << "The `summary_log_dir` of precision summary file is not set. "
<< log_dir; "summary_log_dir:"
<< summary_log_dir;
return false; return false;
} }
FILE* fp = fopen(log_dir.c_str(), "a");
FILE* fp = fopen(summary_log_dir.c_str(), "a");
if (fp == nullptr) { if (fp == nullptr) {
LOG(INFO) << "Open precision summary file:" << log_dir << "failed."; LOG(INFO) << "Open precision summary file:" << summary_log_dir << "failed.";
return false; return false;
} else { } else {
fprintf(fp, "%s\n", string.c_str()); fprintf(fp, "%s\n", string.c_str());
...@@ -85,7 +133,14 @@ class PrecisionProfiler { ...@@ -85,7 +133,14 @@ class PrecisionProfiler {
std::string inst_precison_str = GetInstPrecision(inst); std::string inst_precison_str = GetInstPrecision(inst);
} }
PrecisionProfiler() {} PrecisionProfiler() {
MkDirRecur(log_dir_);
const char* write_to_file_raw =
std::getenv("PADDLELITE_PRECISION_WRITE_TO_FILE");
write_result_to_file_ = (write_to_file_raw && atoi(write_to_file_raw) > 0)
? atoi(write_to_file_raw) > 0
: false;
}
std::string GetSummaryHeader() { std::string GetSummaryHeader() {
using std::setw; using std::setw;
...@@ -102,9 +157,9 @@ class PrecisionProfiler { ...@@ -102,9 +157,9 @@ class PrecisionProfiler {
<< " " << setw(15) << left << "std_deviation" << " " << setw(15) << left << "std_deviation"
<< " " << setw(15) << left << "ave_grow_rate*" << std::endl; << " " << setw(15) << left << "ave_grow_rate*" << std::endl;
// write to file with path: `log_dir` // write to file with path: `summary_log_dir`
if (log_dir_ != "") { if (summary_log_dir_ != "") {
FILE* fp = fopen(log_dir_.c_str(), "a"); FILE* fp = fopen(summary_log_dir_.c_str(), "a");
std::string header_str{ss.str()}; std::string header_str{ss.str()};
fprintf(fp, "%s\n", header_str.c_str()); fprintf(fp, "%s\n", header_str.c_str());
fclose(fp); fclose(fp);
...@@ -112,6 +167,18 @@ class PrecisionProfiler { ...@@ -112,6 +167,18 @@ class PrecisionProfiler {
return ss.str(); return ss.str();
} }
std::string GetSummaryTail() {
STL::stringstream ss;
ss << "[note]" << std::endl;
ss << "1. `ave_grow_rate`: show the sequence value of tensor when std_dev "
"& mean are same."
<< std::endl;
ss << "2. Enable write each output tensor to file: `export "
"PADDLELITE_PRECISION_WRITE_TO_FILE=1` on ADB command line."
<< std::endl;
return ss.str();
}
template <typename T> template <typename T>
double compute_mean(const T* in, const size_t length) { double compute_mean(const T* in, const size_t length) {
double sum = 0.; double sum = 0.;
...@@ -157,6 +224,17 @@ class PrecisionProfiler { ...@@ -157,6 +224,17 @@ class PrecisionProfiler {
return false; return false;
} }
std::string rename_out_for_mem_reuse_pass(const std::string& old_name) {
if (out_tensor_names_map.find(old_name) == out_tensor_names_map.end()) {
out_tensor_names_map[old_name] = 1;
} else {
++out_tensor_names_map[old_name];
}
std::string new_name =
old_name + "_" + std::to_string(out_tensor_names_map[old_name]);
return new_name;
}
void compute_tensor_precision_info(const Tensor* in, void compute_tensor_precision_info(const Tensor* in,
TargetType target_type, TargetType target_type,
PrecisionType precision_type, PrecisionType precision_type,
...@@ -180,7 +258,7 @@ class PrecisionProfiler { ...@@ -180,7 +258,7 @@ class PrecisionProfiler {
*std_dev = *std_dev =
compute_standard_deviation<float>(ptr, in->numel(), true, *mean); compute_standard_deviation<float>(ptr, in->numel(), true, *mean);
*ave_grow_rate = compute_average_grow_rate<float>(ptr, in->numel()); *ave_grow_rate = compute_average_grow_rate<float>(ptr, in->numel());
write_result_to_file&& write_tensorfile<float>(in, name); write_result_to_file&& write_tensorfile<float>(in, name, log_dir_);
return; return;
} }
case PRECISION(kAny): { case PRECISION(kAny): {
...@@ -189,7 +267,7 @@ class PrecisionProfiler { ...@@ -189,7 +267,7 @@ class PrecisionProfiler {
*std_dev = *std_dev =
compute_standard_deviation<float>(ptr, in->numel(), true, *mean); compute_standard_deviation<float>(ptr, in->numel(), true, *mean);
*ave_grow_rate = compute_average_grow_rate<float>(ptr, in->numel()); *ave_grow_rate = compute_average_grow_rate<float>(ptr, in->numel());
write_result_to_file&& write_tensorfile<float>(in, name); write_result_to_file&& write_tensorfile<float>(in, name, log_dir_);
return; return;
} }
case PRECISION(kInt8): { case PRECISION(kInt8): {
...@@ -198,7 +276,7 @@ class PrecisionProfiler { ...@@ -198,7 +276,7 @@ class PrecisionProfiler {
*std_dev = *std_dev =
compute_standard_deviation<int8_t>(ptr, in->numel(), true, *mean); compute_standard_deviation<int8_t>(ptr, in->numel(), true, *mean);
*ave_grow_rate = compute_average_grow_rate<int8_t>(ptr, in->numel()); *ave_grow_rate = compute_average_grow_rate<int8_t>(ptr, in->numel());
write_result_to_file&& write_tensorfile<int8_t>(in, name); write_result_to_file&& write_tensorfile<int8_t>(in, name, log_dir_);
return; return;
} }
case PRECISION(kInt32): { case PRECISION(kInt32): {
...@@ -207,7 +285,7 @@ class PrecisionProfiler { ...@@ -207,7 +285,7 @@ class PrecisionProfiler {
*std_dev = compute_standard_deviation<int32_t>( *std_dev = compute_standard_deviation<int32_t>(
ptr, in->numel(), true, *mean); ptr, in->numel(), true, *mean);
*ave_grow_rate = compute_average_grow_rate<int32_t>(ptr, in->numel()); *ave_grow_rate = compute_average_grow_rate<int32_t>(ptr, in->numel());
write_result_to_file&& write_tensorfile<int32_t>(in, name); write_result_to_file&& write_tensorfile<int32_t>(in, name, log_dir_);
return; return;
} }
case PRECISION(kInt64): { case PRECISION(kInt64): {
...@@ -254,7 +332,14 @@ class PrecisionProfiler { ...@@ -254,7 +332,14 @@ class PrecisionProfiler {
real_out_v.data(), in->numel(), true, *mean); real_out_v.data(), in->numel(), true, *mean);
*ave_grow_rate = compute_average_grow_rate<float>(real_out_v.data(), *ave_grow_rate = compute_average_grow_rate<float>(real_out_v.data(),
real_out_v.size()); real_out_v.size());
write_result_to_file&& write_tensorfile<float>(in, name); std::shared_ptr<lite::Tensor> real_out_t(new lite::Tensor);
real_out_t->Resize(in->dims());
float* real_out_data = real_out_t->mutable_data<float>();
memcpy(real_out_data,
real_out_v.data(),
real_out_v.size() * sizeof(float));
write_result_to_file&& write_tensorfile<float>(
real_out_t.get(), name, log_dir_);
return; return;
} }
case DATALAYOUT(kNCHW): { case DATALAYOUT(kNCHW): {
...@@ -269,7 +354,14 @@ class PrecisionProfiler { ...@@ -269,7 +354,14 @@ class PrecisionProfiler {
in_data_v.data(), in->numel(), true, *mean); in_data_v.data(), in->numel(), true, *mean);
*ave_grow_rate = *ave_grow_rate =
compute_average_grow_rate<float>(in_data_v.data(), in->numel()); compute_average_grow_rate<float>(in_data_v.data(), in->numel());
write_result_to_file&& write_tensorfile<float>(in, name); std::shared_ptr<lite::Tensor> real_out_t(new lite::Tensor);
real_out_t->Resize(in->dims());
float* real_out_data = real_out_t->mutable_data<float>();
memcpy(real_out_data,
in_data_v.data(),
in_data_v.size() * sizeof(float));
write_result_to_file&& write_tensorfile<float>(
real_out_t.get(), name, log_dir_);
return; return;
} }
default: default:
...@@ -296,7 +388,7 @@ class PrecisionProfiler { ...@@ -296,7 +388,7 @@ class PrecisionProfiler {
in_data_v.data(), in->numel(), true, *mean); in_data_v.data(), in->numel(), true, *mean);
*ave_grow_rate = *ave_grow_rate =
compute_average_grow_rate<float>(in_data_v.data(), in->numel()); compute_average_grow_rate<float>(in_data_v.data(), in->numel());
write_result_to_file&& write_tensorfile<float>(in, name); write_result_to_file&& write_tensorfile<float>(in, name, log_dir_);
return; return;
} }
case PRECISION(kInt32): { case PRECISION(kInt32): {
...@@ -311,7 +403,7 @@ class PrecisionProfiler { ...@@ -311,7 +403,7 @@ class PrecisionProfiler {
in_data_v.data(), in->numel(), true, *mean); in_data_v.data(), in->numel(), true, *mean);
*ave_grow_rate = *ave_grow_rate =
compute_average_grow_rate<int>(in_data_v.data(), in->numel()); compute_average_grow_rate<int>(in_data_v.data(), in->numel());
write_result_to_file&& write_tensorfile<float>(in, name); write_result_to_file&& write_tensorfile<float>(in, name, log_dir_);
return; return;
} }
case PRECISION(kInt64): { case PRECISION(kInt64): {
...@@ -326,7 +418,7 @@ class PrecisionProfiler { ...@@ -326,7 +418,7 @@ class PrecisionProfiler {
in_data_v.data(), in->numel(), true, *mean); in_data_v.data(), in->numel(), true, *mean);
*ave_grow_rate = *ave_grow_rate =
compute_average_grow_rate<int64_t>(in_data_v.data(), in->numel()); compute_average_grow_rate<int64_t>(in_data_v.data(), in->numel());
write_result_to_file&& write_tensorfile<float>(in, name); write_result_to_file&& write_tensorfile<float>(in, name, log_dir_);
return; return;
} }
case PRECISION(kFP16): { case PRECISION(kFP16): {
...@@ -347,7 +439,7 @@ class PrecisionProfiler { ...@@ -347,7 +439,7 @@ class PrecisionProfiler {
in_data_v.data(), in->numel(), true, *mean); in_data_v.data(), in->numel(), true, *mean);
*ave_grow_rate = *ave_grow_rate =
compute_average_grow_rate<float>(in_data_v.data(), in->numel()); compute_average_grow_rate<float>(in_data_v.data(), in->numel());
write_result_to_file&& write_tensorfile<float>(in, name); write_result_to_file&& write_tensorfile<float>(in, name, log_dir_);
return; return;
} }
default: default:
...@@ -372,12 +464,12 @@ class PrecisionProfiler { ...@@ -372,12 +464,12 @@ class PrecisionProfiler {
using std::left; using std::left;
using std::fixed; using std::fixed;
STL::stringstream ss; STL::stringstream ss;
bool write_result_to_file = false;
VLOG(1) << ">> Running kernel: " << inst->op()->op_info()->Repr() VLOG(1) << ">> Running kernel: " << inst->op()->op_info()->Repr()
<< " registered on " << TargetToStr(inst->kernel()->target()) << "/" << " registered on " << TargetToStr(inst->kernel()->target()) << "/"
<< PrecisionToStr(inst->kernel()->precision()) << "/" << PrecisionToStr(inst->kernel()->precision()) << "/"
<< DataLayoutToStr(inst->kernel()->layout()); << DataLayoutToStr(inst->kernel()->layout())
<< ", write_result_to_file_:" << write_result_to_file_;
std::string kernel_repr = inst->op()->op_info()->Repr(); std::string kernel_repr = inst->op()->op_info()->Repr();
std::string kernel_place = TargetToStr(inst->kernel()->target()) + "/" + std::string kernel_place = TargetToStr(inst->kernel()->target()) + "/" +
...@@ -404,6 +496,7 @@ class PrecisionProfiler { ...@@ -404,6 +496,7 @@ class PrecisionProfiler {
std::string mean_str{"unused"}; std::string mean_str{"unused"};
std::string std_dev_str{"unused"}; std::string std_dev_str{"unused"};
std::string ave_grow_rate_str{"unused"}; std::string ave_grow_rate_str{"unused"};
std::string new_out_name = rename_out_for_mem_reuse_pass(out_name);
if (!is_unused(tout)) { if (!is_unused(tout)) {
compute_tensor_precision_info(tout, compute_tensor_precision_info(tout,
...@@ -413,14 +506,14 @@ class PrecisionProfiler { ...@@ -413,14 +506,14 @@ class PrecisionProfiler {
&mean, &mean,
&std_dev, &std_dev,
&ave_grow_rate, &ave_grow_rate,
out_name, new_out_name,
write_result_to_file); write_result_to_file_);
mean_str = std::to_string(mean); mean_str = std::to_string(mean);
std_dev_str = std::to_string(std_dev); std_dev_str = std::to_string(std_dev);
ave_grow_rate_str = std::to_string(ave_grow_rate); ave_grow_rate_str = std::to_string(ave_grow_rate);
} }
std::string kernel_info = op_name + ":" + kernel_place; std::string kernel_info = op_name + ":" + kernel_place;
std::string output_arg_info = out_name + ":" + std::string output_arg_info = new_out_name + ":" +
TargetToStr(type->target()) + "/" + TargetToStr(type->target()) + "/" +
PrecisionToStr(type->precision()) + PrecisionToStr(type->precision()) +
"/" + DataLayoutToStr(type->layout()); "/" + DataLayoutToStr(type->layout());
...@@ -441,6 +534,7 @@ class PrecisionProfiler { ...@@ -441,6 +534,7 @@ class PrecisionProfiler {
std::string mean_str{"unused"}; std::string mean_str{"unused"};
std::string std_dev_str{"unused"}; std::string std_dev_str{"unused"};
std::string ave_grow_rate_str{"unused"}; std::string ave_grow_rate_str{"unused"};
std::string new_out_name = rename_out_for_mem_reuse_pass(out_name);
if (!is_unused(tout)) { if (!is_unused(tout)) {
compute_tensor_precision_info(tout, compute_tensor_precision_info(tout,
...@@ -450,14 +544,14 @@ class PrecisionProfiler { ...@@ -450,14 +544,14 @@ class PrecisionProfiler {
&mean, &mean,
&std_dev, &std_dev,
&ave_grow_rate, &ave_grow_rate,
out_name, new_out_name,
write_result_to_file); write_result_to_file_);
mean_str = std::to_string(mean); mean_str = std::to_string(mean);
std_dev_str = std::to_string(std_dev); std_dev_str = std::to_string(std_dev);
ave_grow_rate_str = std::to_string(ave_grow_rate); ave_grow_rate_str = std::to_string(ave_grow_rate);
} }
std::string kernel_info = op_name + ":" + kernel_place; std::string kernel_info = op_name + ":" + kernel_place;
std::string output_arg_info = out_name + ":" + std::string output_arg_info = new_out_name + ":" +
TargetToStr(type->target()) + "/" + TargetToStr(type->target()) + "/" +
PrecisionToStr(type->precision()) + PrecisionToStr(type->precision()) +
"/" + DataLayoutToStr(type->layout()); "/" + DataLayoutToStr(type->layout());
...@@ -471,12 +565,16 @@ class PrecisionProfiler { ...@@ -471,12 +565,16 @@ class PrecisionProfiler {
} }
} }
} }
write_precision_summary_tofile(ss.str(), log_dir_); write_precision_summary_tofile(ss.str(), summary_log_dir_);
return ss.str(); return ss.str();
} }
private: private:
std::string log_dir_{"/storage/emulated/0/precision.log"}; std::string log_dir_{"/storage/emulated/0/PaddleLite_" + get_date_str() +
"/"};
std::string summary_log_dir_{log_dir_ + "precision_summary.log"};
std::map<std::string, size_t> out_tensor_names_map;
bool write_result_to_file_{false};
}; };
} // namespace profile } // namespace profile
......
...@@ -302,7 +302,9 @@ void RuntimeProgram::Run() { ...@@ -302,7 +302,9 @@ void RuntimeProgram::Run() {
LOG(INFO) << "\n" << profiler_.Summary(profile::Type::kDispatch, false, 1); LOG(INFO) << "\n" << profiler_.Summary(profile::Type::kDispatch, false, 1);
#endif #endif
#ifdef LITE_WITH_PRECISION_PROFILE #ifdef LITE_WITH_PRECISION_PROFILE
LOG(INFO) << "\n" << precision_profiler_summary; LOG(INFO) << "\n"
<< precision_profiler_summary
<< inst_precision_profiler.GetSummaryTail();
#endif #endif
} }
......
...@@ -29,6 +29,21 @@ int64_t ShapeProduction(const shape_t& shape) { ...@@ -29,6 +29,21 @@ int64_t ShapeProduction(const shape_t& shape) {
return res; return res;
} }
std::string ShapePrint(const std::vector<shape_t>& shapes) {
std::string shapes_str{""};
for (size_t shape_idx = 0; shape_idx < shapes.size(); ++shape_idx) {
auto shape = shapes[shape_idx];
std::string shape_str;
for (auto i : shape) {
shape_str += std::to_string(i) + ",";
}
shapes_str += shape_str;
shapes_str +=
(shape_idx != 0 && shape_idx == shapes.size() - 1) ? "" : " : ";
}
return shapes_str;
}
std::string ShapePrint(const shape_t& shape) { std::string ShapePrint(const shape_t& shape) {
std::string shape_str{""}; std::string shape_str{""};
for (auto i : shape) { for (auto i : shape) {
...@@ -37,6 +52,37 @@ std::string ShapePrint(const shape_t& shape) { ...@@ -37,6 +52,37 @@ std::string ShapePrint(const shape_t& shape) {
return shape_str; return shape_str;
} }
std::vector<std::string> split_string(const std::string& str_in) {
std::vector<std::string> str_out;
std::string tmp_str = str_in;
while (!tmp_str.empty()) {
size_t next_offset = tmp_str.find(":");
str_out.push_back(tmp_str.substr(0, next_offset));
if (next_offset == std::string::npos) {
break;
} else {
tmp_str = tmp_str.substr(next_offset + 1);
}
}
return str_out;
}
std::vector<int64_t> get_shape(const std::string& str_shape) {
std::vector<int64_t> shape;
std::string tmp_str = str_shape;
while (!tmp_str.empty()) {
int dim = atoi(tmp_str.data());
shape.push_back(dim);
size_t next_offset = tmp_str.find(",");
if (next_offset == std::string::npos) {
break;
} else {
tmp_str = tmp_str.substr(next_offset + 1);
}
}
return shape;
}
template <typename T> template <typename T>
double compute_mean(const T* in, const size_t length) { double compute_mean(const T* in, const size_t length) {
double sum = 0.; double sum = 0.;
...@@ -70,7 +116,7 @@ inline double GetCurrentUS() { ...@@ -70,7 +116,7 @@ inline double GetCurrentUS() {
} }
void RunModel(std::string model_dir, void RunModel(std::string model_dir,
const shape_t& input_shape, const std::vector<shape_t>& input_shapes,
size_t repeats, size_t repeats,
size_t warmup, size_t warmup,
size_t print_output_elem, size_t print_output_elem,
...@@ -111,12 +157,19 @@ void RunModel(std::string model_dir, ...@@ -111,12 +157,19 @@ void RunModel(std::string model_dir,
CreatePaddlePredictor<MobileConfig>(config); CreatePaddlePredictor<MobileConfig>(config);
// 3. Prepare input data // 3. Prepare input data
std::unique_ptr<Tensor> input_tensor(std::move(predictor->GetInput(0))); std::cout << "input_shapes.size():" << input_shapes.size() << std::endl;
input_tensor->Resize( for (int j = 0; j < input_shapes.size(); ++j) {
{input_shape[0], input_shape[1], input_shape[2], input_shape[3]}); auto input_tensor = predictor->GetInput(j);
auto* data = input_tensor->mutable_data<float>(); input_tensor->Resize(input_shapes[j]);
for (int i = 0; i < ShapeProduction(input_tensor->shape()); ++i) { auto input_data = input_tensor->mutable_data<float>();
data[i] = 1; int input_num = 1;
for (int i = 0; i < input_shapes[j].size(); ++i) {
input_num *= input_shapes[j][i];
}
for (int i = 0; i < input_num; ++i) {
input_data[i] = 1.f;
}
} }
// 4. Run predictor // 4. Run predictor
...@@ -142,7 +195,7 @@ void RunModel(std::string model_dir, ...@@ -142,7 +195,7 @@ void RunModel(std::string model_dir,
} }
avg_duration = sum_duration / static_cast<float>(repeats); avg_duration = sum_duration / static_cast<float>(repeats);
std::cout << "\n======= benchmark summary =======\n" std::cout << "\n======= benchmark summary =======\n"
<< "input_shape(NCHW):" << ShapePrint(input_shape) << "\n" << "input_shape(s) (NCHW):" << ShapePrint(input_shapes) << "\n"
<< "model_dir:" << model_dir << "\n" << "model_dir:" << model_dir << "\n"
<< "warmup:" << warmup << "\n" << "warmup:" << warmup << "\n"
<< "repeats:" << repeats << "\n" << "repeats:" << repeats << "\n"
...@@ -184,18 +237,19 @@ void RunModel(std::string model_dir, ...@@ -184,18 +237,19 @@ void RunModel(std::string model_dir,
} }
int main(int argc, char** argv) { int main(int argc, char** argv) {
shape_t input_shape{1, 3, 224, 224}; // shape_t ==> std::vector<int64_t> std::vector<std::string> str_input_shapes;
std::vector<shape_t> input_shapes{
{1, 3, 224, 224}}; // shape_t ==> std::vector<int64_t>
int repeats = 10; int repeats = 10;
int warmup = 10; int warmup = 10;
int print_output_elem = 0; int print_output_elem = 0;
if (argc > 2 && argc < 9) { if (argc > 2 && argc < 6) {
std::cerr << "usage: ./" << argv[0] << "\n" std::cerr << "usage: ./" << argv[0] << "\n"
<< " <naive_buffer_model_dir>\n" << " <naive_buffer_model_dir>\n"
<< " <input_n>\n" << " <raw_input_shapes>, eg: 1,3,224,224 for 1 input; "
<< " <input_c>\n" "1,3,224,224:1,5 for 2 inputs\n"
<< " <input_h>\n"
<< " <input_w>\n"
<< " <repeats>\n" << " <repeats>\n"
<< " <warmup>\n" << " <warmup>\n"
<< " <print_output>" << std::endl; << " <print_output>" << std::endl;
...@@ -203,14 +257,19 @@ int main(int argc, char** argv) { ...@@ -203,14 +257,19 @@ int main(int argc, char** argv) {
} }
std::string model_dir = argv[1]; std::string model_dir = argv[1];
if (argc >= 9) { if (argc >= 6) {
input_shape[0] = atoi(argv[2]); input_shapes.clear();
input_shape[1] = atoi(argv[3]); std::string raw_input_shapes = argv[2];
input_shape[2] = atoi(argv[4]); std::cout << "raw_input_shapes: " << raw_input_shapes << std::endl;
input_shape[3] = atoi(argv[5]); str_input_shapes = split_string(raw_input_shapes);
repeats = atoi(argv[6]); for (size_t i = 0; i < str_input_shapes.size(); ++i) {
warmup = atoi(argv[7]); std::cout << "input shape: " << str_input_shapes[i] << std::endl;
print_output_elem = atoi(argv[8]); input_shapes.push_back(get_shape(str_input_shapes[i]));
}
repeats = atoi(argv[3]);
warmup = atoi(argv[4]);
print_output_elem = atoi(argv[5]);
} }
// set arm power mode: // set arm power mode:
// 0 for big cluster, high performance // 0 for big cluster, high performance
...@@ -220,7 +279,7 @@ int main(int argc, char** argv) { ...@@ -220,7 +279,7 @@ int main(int argc, char** argv) {
size_t power_mode = 0; size_t power_mode = 0;
RunModel( RunModel(
model_dir, input_shape, repeats, warmup, print_output_elem, power_mode); model_dir, input_shapes, repeats, warmup, print_output_elem, power_mode);
return 0; return 0;
} }
...@@ -35,7 +35,6 @@ void gen_log(STL::ostream& log_stream_, ...@@ -35,7 +35,6 @@ void gen_log(STL::ostream& log_stream_,
const int kMaxLen) { const int kMaxLen) {
const int len = strlen(file); const int len = strlen(file);
std::string time_str;
struct tm tm_time; // Time of creation of LogMessage struct tm tm_time; // Time of creation of LogMessage
time_t timestamp = time(NULL); time_t timestamp = time(NULL);
#if defined(_WIN32) #if defined(_WIN32)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册