未验证 提交 86762e1e 编写于 作者: X xiebaiyuan 提交者: GitHub

[mobile][opencl]optimise log print , use kNOLOG to close develop time… (#2693)

* [mobile][opencl]optimise log print , ues kNOLOG to close develop time logs ,test=mobile

* [mobile][opencl]optimise log print , ues kNOLOG to close develop time logs ,test=mobile
上级 9171b70e
......@@ -80,7 +80,6 @@ static const char *ANDROID_LOG_TAG =
#endif
enum LogLevel {
kNO_LOG,
kLOG_ERROR,
kLOG_WARNING,
kLOG_INFO,
......@@ -89,15 +88,16 @@ enum LogLevel {
kLOG_DEBUG1,
kLOG_DEBUG2,
kLOG_DEBUG3,
kLOG_DEBUG4
kLOG_DEBUG4,
kNO_LOG,
};
// log level
static LogLevel log_level = kLOG_DEBUG4;
static std::vector<std::string> logs{"NO ", "ERROR ", "WARNING", "INFO ",
"VERBOSE", "DEBUG ", "DEBUG1 ", "DEBUG2 ",
"DEBUG3 ", "DEBUG4 "};
static std::vector<std::string> logs{"ERROR ", "WARNING", "INFO ", "VERBOSE",
"DEBUG ", "DEBUG1 ", "DEBUG2 ", "DEBUG3 ",
"DEBUG4 ", "NO "};
struct ToLog;
struct Print;
......@@ -217,7 +217,6 @@ struct ToLog {
#define ANDROIDLOGV(...)
enum LogLevel {
kNO_LOG,
kLOG_ERROR,
kLOG_WARNING,
kLOG_INFO,
......@@ -226,7 +225,8 @@ enum LogLevel {
kLOG_DEBUG1,
kLOG_DEBUG2,
kLOG_DEBUG3,
kLOG_DEBUG4
kLOG_DEBUG4,
kNO_LOG
};
struct ToLog;
......
......@@ -124,9 +124,9 @@ class CLEngine {
if (status != CL_SUCCESS || ret_size / sizeof(size_t) < 3) {
return CLLocalWorkSizeInfo(0, 0, 0, 0);
}
DLOG << max_work_item_sizes[0];
DLOG << max_work_item_sizes[1];
DLOG << max_work_item_sizes[2];
DLOG << " max_work_item_sizes {" << max_work_item_sizes[0] << ", "
<< max_work_item_sizes[1] << ", " << max_work_item_sizes[2] << "}";
localWorkSizeInfo_ =
CLLocalWorkSizeInfo(max_work_group_size, max_work_item_sizes[0],
max_work_item_sizes[1], max_work_item_sizes[2]);
......@@ -182,8 +182,8 @@ class CLEngine {
cl_program p =
clCreateProgramWithSource(context, 1, &source, sourceSize, &status_);
DLOG << " cl kernel from source";
DLOG << " source size: " << sourceSize[0];
LOG(kLOG_DEBUG4) << " cl kernel from source";
LOG(kLOG_DEBUG4) << " source size: " << sourceSize[0];
CL_CHECK_ERRORS(status_);
std::unique_ptr<_cl_program, CLProgramDeleter> program_ptr(p);
......
......@@ -36,9 +36,9 @@ class CLHelper {
void AddKernel(const std::string &kernel_name, const std::string &file_name,
const std::string &options = "") {
DLOG << " begin add kernel ";
LOG(kLOG_DEBUG1) << " begin add kernel ";
auto kernel = scope_->GetKernel(kernel_name, file_name, options);
DLOG << " add kernel ing ";
LOG(kLOG_DEBUG1) << " begin add kernel ";
kernels.emplace_back(std::move(kernel));
}
......
......@@ -87,14 +87,14 @@ class CLImage {
PADDLE_MOBILE_ENFORCE(tensor_data_ != nullptr,
" need call SetTensorData first");
DLOG << " begin init cl image ";
LOG(kNO_LOG) << " begin init cl image ";
image_dims_ = converter->InitImageDimInfoWith(tensor_dims_);
half_t *image_data = new half_t[product(image_dims_) * 4];
DLOG << " convert to image";
LOG(kNO_LOG) << " convert to image";
converter->NCHWToImage(tensor_data_, image_data, tensor_dims_);
DLOG << " end convert to image";
LOG(kNO_LOG) << " end convert to image";
InitCLImage(context, image_dims_[0], image_dims_[1], image_data);
......@@ -105,7 +105,7 @@ class CLImage {
tensor_data_ = nullptr;
image_converter_ = converter;
initialized_ = true;
DLOG << " end init cl image";
LOG(kNO_LOG) << " end init cl image";
}
void InitNImage(cl_context context, cl_command_queue command_queue) {
......@@ -137,9 +137,9 @@ class CLImage {
// CLImageConverterFolder();
CLImageConverterNormal *normal_converter = new CLImageConverterNormal();
PADDLE_MOBILE_ENFORCE(!shared_mem_, "do not init mem after shared .")
DLOG << " to get image dims ";
// LOG(kNO_LOG) << " to get image dims ";
image_dims_ = normal_converter->InitImageDimInfoWith(dim);
DLOG << " end get image dims " << image_dims_;
// LOG(kNO_LOG) << " end get image dims " << image_dims_;
InitCLImage(context, image_dims_[0], image_dims_[1], nullptr);
......@@ -148,7 +148,7 @@ class CLImage {
image_converter_ = normal_converter;
cl_event_ = CLEngine::Instance()->CreateEvent(context);
initialized_ = true;
DLOG << " end init cl image";
// LOG(kNO_LOG) << " end init cl image";
}
/**
* create fake size cl_mem for mem share
......@@ -169,9 +169,9 @@ class CLImage {
InitCLImage(context, real_image_dims_[0], real_image_dims_[1], nullptr);
// cheat cl_image they got what they wanted
image_dims_ = normal_converter->InitImageDimInfoWith(need_dims);
DLOG << "InitFakeSizeImage ... ";
DLOG << "real_image_dims: " << real_image_dims_;
DLOG << "image_dims_: " << image_dims_;
LOG(kNO_LOG) << "InitFakeSizeImage ... ";
LOG(kNO_LOG) << "real_image_dims: " << real_image_dims_;
LOG(kNO_LOG) << "image_dims_: " << image_dims_;
PADDLE_MOBILE_ENFORCE(real_image_dims_[0] >= image_dims_[0] &&
real_image_dims_[1] >= image_dims_[1],
"real image is not enough");
......@@ -182,7 +182,7 @@ class CLImage {
initialized_ = true;
shared_mem_ = true;
DLOG << " end init FakeSizeImage";
LOG(kNO_LOG) << " end init FakeSizeImage";
}
/**
* init cl mem with a exist cl mem
......@@ -197,15 +197,15 @@ class CLImage {
real_image_dims_ = src.real_image_dims_;
image_dims_ = normal_converter->InitImageDimInfoWith(need_dims);
DLOG << "InitWithExistMem ... ";
DLOG << "real_image_dims: " << real_image_dims_;
DLOG << "image_dims_: " << image_dims_;
LOG(kNO_LOG) << "InitWithExistMem ... ";
LOG(kNO_LOG) << "real_image_dims: " << real_image_dims_;
LOG(kNO_LOG) << "image_dims_: " << image_dims_;
if (real_image_dims_[0] < image_dims_[0] ||
real_image_dims_[1] < image_dims_[1]) {
DLOG << "real image is not enough!";
DLOG << "real_image_dims: " << real_image_dims_;
DLOG << "image_dims_: " << image_dims_;
LOG(kNO_LOG) << "real image is not enough!";
LOG(kNO_LOG) << "real_image_dims: " << real_image_dims_;
LOG(kNO_LOG) << "image_dims_: " << image_dims_;
}
PADDLE_MOBILE_ENFORCE(real_image_dims_[0] >= image_dims_[0] &&
real_image_dims_[1] >= image_dims_[1],
......@@ -221,7 +221,7 @@ class CLImage {
initialized_ = true;
shared_mem_ = true;
DLOG << " end init WithExistMem";
LOG(kNO_LOG) << " end init WithExistMem";
}
void InitConv2dTransposeFilterCLImage(cl_context context,
......
......@@ -47,14 +47,14 @@ class CLScope {
std::unique_ptr<_cl_kernel, CLKernelDeleter> GetKernel(
const std::string &kernel_name, const std::string &file_name,
const std::string &options) {
DLOG << " to get program " << file_name;
LOG(kLOG_DEBUG2) << " to get program " << file_name;
auto program = Program(file_name, kernel_name, options);
DLOG << " end get program ~ ";
DLOG << " to create kernel: " << kernel_name;
LOG(kLOG_DEBUG2) << " end get program ~ ";
LOG(kLOG_DEBUG2) << " to create kernel: " << kernel_name;
std::unique_ptr<_cl_kernel, CLKernelDeleter> kernel(
clCreateKernel(program, kernel_name.c_str(), &status_));
CL_CHECK_ERRORS(status_);
DLOG << " end create kernel ~ ";
LOG(kLOG_DEBUG2) << " end create kernel ~ ";
return std::move(kernel);
}
......@@ -81,9 +81,11 @@ class CLScope {
auto program = CLEngine::Instance()->CreateProgramWithSource(
context_, source.c_str());
DLOG << " --- begin build program -> " << program_key << " --- ";
LOG(kLOG_DEBUG3) << " --- begin build program -> " << program_key
<< " --- ";
CLEngine::Instance()->BuildProgram(program.get(), options);
DLOG << " --- end build program -> " << program_key << " --- ";
LOG(kLOG_DEBUG3) << " --- end build program -> " << program_key
<< " --- ";
programs_[program_key] = std::move(program);
return programs_[program_key].get();
......@@ -100,9 +102,11 @@ class CLScope {
context_,
CLEngine::Instance()->GetCLPath() + "/cl_kernel/" + file_name);
DLOG << " --- begin build program -> " << program_key << " --- ";
LOG(kLOG_DEBUG3) << " --- begin build program ele-> " << program_key
<< " --- ";
CLEngine::Instance()->BuildProgram(program.get(), options);
DLOG << " --- end build program -> " << program_key << " --- ";
LOG(kLOG_DEBUG3) << " --- end build program ele-> " << program_key
<< " --- ";
programs_[program_key] = std::move(program);
return programs_[program_key].get();
......
......@@ -80,7 +80,7 @@ Executor<Device, T>::Executor(const Program<Device> &program,
std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops();
for (int j = 0; j < ops.size(); ++j) {
std::shared_ptr<OpDesc> op_desc = ops[j];
DLOG << "create op: " << op_desc->Type();
LOG(kLOG_INFO) << "create op[" << j << "]: " << op_desc->Type();
auto op_handler = OpRegistry<Device>::CreateOp(
op_desc->Type(), op_desc->GetInputs(), op_desc->GetOutputs(),
......@@ -111,7 +111,8 @@ Executor<Device, T>::Executor(const Program<Device> &program,
clock_gettime(CLOCK_MONOTONIC, &ts);
profile[op_index].runBegin = (uint64_t)ts.tv_sec * 1e9 + ts.tv_nsec;
#endif
DLOG << "Initialize op[" << count++ << "]: " << op_handler->Type();
LOG(kLOG_INFO) << "Initialize op[" << count++
<< "]: " << op_handler->Type();
if (op_handler->Type() == "feed" || op_handler->Type() == "fetch") {
op_handler->setPrePostType(config_.pre_post_type);
}
......@@ -1015,7 +1016,7 @@ void Executor<GPU_CL, float>::InitMemory() {
const TensorDesc &desc = var_desc->Tensor_desc();
// DDim ddim = make_ddim(desc.Dims());
DDim ddim = cl_image->dims();
DLOG << var_desc->Name();
LOG(kLOG_DEBUG1) << "init image of " << var_desc->Name();
cl_image->InitEmptyImage(context, command_queue, ddim);
}
}
......
......@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "framework/loader.h"
#include <memory>
#include "framework/lod_tensor.h"
#include "framework/program/program-optimize/program_optimize.h"
......@@ -173,7 +174,7 @@ static size_t ReadBuffer(const char *file_name, uint8_t **out) {
rewind(fp);
DLOG << "model size: " << size;
PADDLE_MOBILE_ENFORCE(size > 0, "model size should > 0")
*out = reinterpret_cast<uint8_t *>(malloc(size));
size_t cur_len = 0;
......
......@@ -14,6 +14,7 @@ limitations under the License. */
#ifdef PADDLE_MOBILE_CL
#include "pass/memory_optimize_cl.h"
#include <algorithm>
#include <utility>
#include "framework/cl/cl_image.h"
#include "framework/lod_tensor.h"
namespace paddle_mobile {
......@@ -79,7 +80,7 @@ void MemoryOptPassCl::operator()(
std::vector<ClVarNode *> fetch_var_nodes;
for (const auto &op : block->Ops()) {
DLOG << "op_desc->Type(): " << op->Type();
LOG(kNO_LOG) << "op_desc->Type(): " << op->Type();
for (const auto &outputs : op->GetOutputs()) {
for (const auto &output : outputs.second) {
// not a persistable and not a exclude one ,then add it to
......@@ -87,7 +88,7 @@ void MemoryOptPassCl::operator()(
if (!IsPersistable(output) &&
std::find(exclude_var_names.begin(), exclude_var_names.end(),
output) == exclude_var_names.end()) {
DLOG << "output: " << output;
LOG(kNO_LOG) << "output: " << output;
ClVarNode *node = CreateNode(output);
analysis_nodes_.push(node);
}
......@@ -100,7 +101,7 @@ void MemoryOptPassCl::operator()(
if (!IsPersistable(input) &&
std::find(exclude_var_names.begin(), exclude_var_names.end(),
input) == exclude_var_names.end()) {
DLOG << "input: " << input;
LOG(kNO_LOG) << "input: " << input;
ClVarNode *node = CreateNode(input);
analysis_nodes_.push(node);
if (op->Type() == "fetch") {
......@@ -114,7 +115,7 @@ void MemoryOptPassCl::operator()(
if (!IsPersistable(output) &&
std::find(exclude_var_names.begin(), exclude_var_names.end(),
output) == exclude_var_names.end()) {
DLOG << "output: " << output;
LOG(kNO_LOG) << "output: " << output;
ClVarNode *node = CreateNode(output);
analysis_nodes_.push(node);
}
......@@ -164,8 +165,8 @@ void MemoryOptPassCl::ShareData(
cl_command_queue command_queue = scope->GetCLScpoe()->CommandQueue();
for (const auto &list : reused_nodes_) {
DLOG << "\n";
DLOG << "gpu . share memory within these variables";
LOG(kNO_LOG) << "\n";
LOG(kNO_LOG) << "gpu . share memory within these variables";
int64_t x_based_max_numl = -1;
int64_t y_based_max_numl = -1;
int64_t x_based_max_x = -1;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册