diff --git a/lite/api/CMakeLists.txt b/lite/api/CMakeLists.txt index de1a76c9c391102b8d7a1d113164f45beb913e6e..84f8a09860edea1bd0f29a0e7a726b1011a02ffd 100644 --- a/lite/api/CMakeLists.txt +++ b/lite/api/CMakeLists.txt @@ -277,7 +277,7 @@ if (LITE_ON_MODEL_OPTIMIZE_TOOL) message(STATUS "Compiling model_optimize_tool") lite_cc_binary(model_optimize_tool SRCS model_optimize_tool.cc cxx_api_impl.cc paddle_api.cc cxx_api.cc DEPS gflags kernel op optimizer mir_passes utils) - add_dependencies(model_optimize_tool op_list_h kernel_list_h all_kernel_faked_cc) + add_dependencies(model_optimize_tool op_list_h kernel_list_h all_kernel_faked_cc supported_kernel_op_info_h) endif(LITE_ON_MODEL_OPTIMIZE_TOOL) lite_cc_test(test_paddle_api SRCS paddle_api_test.cc DEPS paddle_api_full paddle_api_light diff --git a/lite/api/cxx_api.cc b/lite/api/cxx_api.cc index 990d08f18f541088d797510e9dbd4881d42b164f..c1e9fc422450adf96d62c68d622907bd7e15b405 100644 --- a/lite/api/cxx_api.cc +++ b/lite/api/cxx_api.cc @@ -201,7 +201,11 @@ void Predictor::Build(const lite_api::CxxConfig &config, const std::string &model_file = config.model_file(); const std::string ¶m_file = config.param_file(); const bool model_from_memory = config.model_from_memory(); - LOG(INFO) << "load from memory " << model_from_memory; + if (model_from_memory) { + LOG(INFO) << "Load model from memory."; + } else { + LOG(INFO) << "Load model from file."; + } Build(model_path, model_file, diff --git a/lite/api/model_optimize_tool.cc b/lite/api/model_optimize_tool.cc index b678c7ecd24c5ffbf3e9e3531264ac195c6a7325..fc23e0b54be41bff5b7b65b4e58908546b186bb4 100644 --- a/lite/api/model_optimize_tool.cc +++ b/lite/api/model_optimize_tool.cc @@ -16,8 +16,9 @@ #ifdef PADDLE_WITH_TESTING #include #endif -// "all_kernel_faked.cc" and "kernel_src_map.h" are created automatically during -// model_optimize_tool's compiling period +// "supported_kernel_op_info.h", "all_kernel_faked.cc" and "kernel_src_map.h" +// are created automatically during model_optimize_tool's compiling period +#include #include "all_kernel_faked.cc" // NOLINT #include "kernel_src_map.h" // NOLINT #include "lite/api/cxx_api.h" @@ -25,8 +26,11 @@ #include "lite/api/paddle_use_ops.h" #include "lite/api/paddle_use_passes.h" #include "lite/core/op_registry.h" +#include "lite/model_parser/compatible_pb.h" +#include "lite/model_parser/pb/program_desc.h" #include "lite/utils/cp_logging.h" #include "lite/utils/string.h" +#include "supported_kernel_op_info.h" // NOLINT DEFINE_string(model_dir, "", @@ -62,10 +66,16 @@ DEFINE_string(valid_targets, "The targets this model optimized for, should be one of (arm, " "opencl, x86), splitted by space"); DEFINE_bool(prefer_int8_kernel, false, "Prefer to run model with int8 kernels"); +DEFINE_bool(print_supported_ops, + false, + "Print supported operators on the inputed target"); +DEFINE_bool(print_all_ops, + false, + "Print all the valid operators of Paddle-Lite"); +DEFINE_bool(print_model_ops, false, "Print operators in the input model"); namespace paddle { namespace lite_api { - //! Display the kernel information. void DisplayKernels() { LOG(INFO) << ::paddle::lite::KernelRegistry::Global().DebugString(); @@ -130,9 +140,7 @@ void RunOptimize(const std::string& model_dir, config.set_model_dir(model_dir); config.set_model_file(model_file); config.set_param_file(param_file); - config.set_valid_places(valid_places); - auto predictor = lite_api::CreatePaddlePredictor(config); LiteModelType model_type; @@ -168,6 +176,202 @@ void CollectModelMetaInfo(const std::string& output_dir, lite::WriteLines(std::vector(total.begin(), total.end()), output_path); } +void PrintOpsInfo(std::set valid_ops = {}) { + std::vector targets = {"kHost", + "kX86", + "kCUDA", + "kARM", + "kOpenCL", + "kFPGA", + "kNPU", + "kXPU", + "kAny", + "kUnk"}; + int maximum_optype_length = 0; + for (auto it = supported_ops.begin(); it != supported_ops.end(); it++) { + maximum_optype_length = it->first.size() > maximum_optype_length + ? it->first.size() + : maximum_optype_length; + } + std::cout << std::setiosflags(std::ios::internal); + std::cout << std::setw(maximum_optype_length) << "OP_name"; + for (int i = 0; i < targets.size(); i++) { + std::cout << std::setw(10) << targets[i].substr(1); + } + std::cout << std::endl; + if (valid_ops.empty()) { + for (auto it = supported_ops.begin(); it != supported_ops.end(); it++) { + std::cout << std::setw(maximum_optype_length) << it->first; + auto ops_valid_places = it->second; + for (int i = 0; i < targets.size(); i++) { + if (std::find(ops_valid_places.begin(), + ops_valid_places.end(), + targets[i]) != ops_valid_places.end()) { + std::cout << std::setw(10) << "Y"; + } else { + std::cout << std::setw(10) << " "; + } + } + std::cout << std::endl; + } + } else { + for (auto op = valid_ops.begin(); op != valid_ops.end(); op++) { + std::cout << std::setw(maximum_optype_length) << *op; + // Check: If this kernel doesn't match any operator, we will skip it. + if (supported_ops.find(*op) == supported_ops.end()) { + continue; + } + // Print OP info. + auto ops_valid_places = supported_ops.at(*op); + for (int i = 0; i < targets.size(); i++) { + if (std::find(ops_valid_places.begin(), + ops_valid_places.end(), + targets[i]) != ops_valid_places.end()) { + std::cout << std::setw(10) << "Y"; + } else { + std::cout << std::setw(10) << " "; + } + } + std::cout << std::endl; + } + } +} +/// Print help information +void PrintHelpInfo() { + // at least one argument should be inputed + const char help_info[] = + "At least one argument should be inputed. Valid arguments are listed " + "below:\n" + " Arguments of model optimization:\n" + " `--model_dir=`\n" + " `--model_file=`\n" + " `--param_file=`\n" + " `--optimize_out_type=(protobuf|naive_buffer)`\n" + " `--optimize_out=`\n" + " `--valid_targets=(arm|opencl|x86|npu|xpu)`\n" + " `--prefer_int8_kernel=(true|false)`\n" + " `--record_tailoring_info=(true|false)`\n" + " Arguments of model checking and ops information:\n" + " `--print_all_ops=true` Display all the valid operators of " + "Paddle-Lite\n" + " `--print_supported_ops=true " + "--valid_targets=(arm|opencl|x86|npu|xpu)`" + " Display valid operators of input targets\n" + " `--print_model_ops=true --model_dir= " + "--valid_targets=(arm|opencl|x86|npu|xpu)`" + " Display operators in the input model\n"; + std::cout << help_info << std::endl; + exit(1); +} + +// Parse Input command +void ParseInputCommand() { + if (FLAGS_print_all_ops) { + std::cout << "All OPs supported by Paddle-Lite: " << supported_ops.size() + << " ops in total." << std::endl; + PrintOpsInfo(); + exit(1); + } else if (FLAGS_print_supported_ops) { + auto valid_places = paddle::lite_api::ParserValidPlaces(); + // get valid_targets string + std::vector target_types = {}; + for (int i = 0; i < valid_places.size(); i++) { + target_types.push_back(valid_places[i].target); + } + std::string targets_str = TargetToStr(target_types[0]); + for (int i = 1; i < target_types.size(); i++) { + targets_str = targets_str + TargetToStr(target_types[i]); + } + + std::cout << "Supported OPs on '" << targets_str << "': " << std::endl; + target_types.push_back(TARGET(kHost)); + target_types.push_back(TARGET(kUnk)); + + std::set valid_ops; + for (int i = 0; i < target_types.size(); i++) { + auto ops = supported_ops_target[static_cast(target_types[i])]; + valid_ops.insert(ops.begin(), ops.end()); + } + PrintOpsInfo(valid_ops); + exit(1); + } +} +// test whether this model is supported +void CheckIfModelSupported() { + // 1. parse valid places and valid targets + auto valid_places = paddle::lite_api::ParserValidPlaces(); + // set valid_ops + auto valid_ops = supported_ops_target[static_cast(TARGET(kHost))]; + auto valid_unktype_ops = supported_ops_target[static_cast(TARGET(kUnk))]; + valid_ops.insert( + valid_ops.end(), valid_unktype_ops.begin(), valid_unktype_ops.end()); + for (int i = 0; i < valid_places.size(); i++) { + auto target = valid_places[i].target; + auto ops = supported_ops_target[static_cast(target)]; + valid_ops.insert(valid_ops.end(), ops.begin(), ops.end()); + } + // get valid ops + std::set valid_ops_set(valid_ops.begin(), valid_ops.end()); + + // 2.Load model into program to get ops in model + std::string prog_path = FLAGS_model_dir + "/__model__"; + if (!FLAGS_model_file.empty() && !FLAGS_param_file.empty()) { + prog_path = FLAGS_model_file; + } + lite::cpp::ProgramDesc cpp_prog; + framework::proto::ProgramDesc pb_proto_prog = + *lite::LoadProgram(prog_path, false); + lite::pb::ProgramDesc pb_prog(&pb_proto_prog); + // Transform to cpp::ProgramDesc + lite::TransformProgramDescAnyToCpp(pb_prog, &cpp_prog); + + std::set unsupported_ops; + std::set input_model_ops; + for (int index = 0; index < cpp_prog.BlocksSize(); index++) { + auto current_block = cpp_prog.GetBlock(index); + for (size_t i = 0; i < current_block->OpsSize(); ++i) { + auto& op_desc = *current_block->GetOp(i); + auto op_type = op_desc.Type(); + input_model_ops.insert(op_type); + if (valid_ops_set.count(op_type) == 0) { + unsupported_ops.insert(op_type); + } + } + } + // 3. Print ops_info of input model and check if this model is supported + if (FLAGS_print_model_ops) { + std::cout << "OPs in the input model include:\n"; + PrintOpsInfo(input_model_ops); + } + if (!unsupported_ops.empty()) { + std::string unsupported_ops_str = *unsupported_ops.begin(); + for (auto op_str = ++unsupported_ops.begin(); + op_str != unsupported_ops.end(); + op_str++) { + unsupported_ops_str = unsupported_ops_str + ", " + *op_str; + } + std::vector targets = {}; + for (int i = 0; i < valid_places.size(); i++) { + targets.push_back(valid_places[i].target); + } + std::sort(targets.begin(), targets.end()); + targets.erase(unique(targets.begin(), targets.end()), targets.end()); + std::string targets_str = TargetToStr(targets[0]); + for (int i = 1; i < targets.size(); i++) { + targets_str = targets_str + "," + TargetToStr(targets[i]); + } + + LOG(ERROR) << "Error: This model is not supported, because " + << unsupported_ops.size() << " ops are not supported on '" + << targets_str << "'. These unsupported ops are: '" + << unsupported_ops_str << "'."; + exit(1); + } + if (FLAGS_print_model_ops) { + std::cout << "Paddle-Lite supports this model!" << std::endl; + exit(1); + } +} void Main() { if (FLAGS_display_kernels) { @@ -241,7 +445,13 @@ void Main() { } // namespace paddle int main(int argc, char** argv) { + // If there is none input argument, print help info. + if (argc < 2) { + paddle::lite_api::PrintHelpInfo(); + } google::ParseCommandLineFlags(&argc, &argv, false); + paddle::lite_api::ParseInputCommand(); + paddle::lite_api::CheckIfModelSupported(); paddle::lite_api::Main(); return 0; } diff --git a/lite/core/CMakeLists.txt b/lite/core/CMakeLists.txt index 34d9deff6a5262c16c2f74301771b73479f3ae30..8fda0a12fd3a66e27acba91af58fa67b3c9cb348 100644 --- a/lite/core/CMakeLists.txt +++ b/lite/core/CMakeLists.txt @@ -95,7 +95,15 @@ add_custom_command( add_custom_target(op_list_h DEPENDS ops.h) add_custom_target(kernel_list_h DEPENDS kernels.h) add_custom_target(all_kernel_faked_cc DEPENDS all_kernel_faked.cc) - +# create headfile to restore ops info sorted by suppported platforms +add_custom_command( + COMMAND python ${CMAKE_SOURCE_DIR}/lite/tools/cmake_tools/record_supported_kernel_op.py + ${kernels_src_list} + ${ops_src_list} + ${CMAKE_BINARY_DIR}/supported_kernel_op_info.h + OUTPUT supported_kernel_op_info.h # not a real path to the output to force it execute every time. + ) + add_custom_target(supported_kernel_op_info_h DEPENDS supported_kernel_op_info.h) #----------------------------------------------- NOT CHANGE ----------------------------------------------- lite_cc_library(kernel SRCS kernel.cc DEPS context type_system target_wrapper any op_params tensor diff --git a/lite/kernels/arm/CMakeLists.txt b/lite/kernels/arm/CMakeLists.txt index ce8b8365a8c55796772e7fbbe672ead682343a60..74b86c519e44f3aec5f0fbc7f3e2b3aa8d39c554 100644 --- a/lite/kernels/arm/CMakeLists.txt +++ b/lite/kernels/arm/CMakeLists.txt @@ -1,6 +1,6 @@ # NOTE we leave the add_kernel not protected by LITE_WITH_LIGHT_WEIGHT_FRAMEWORK so that all the kernels will be registered # to the model_optimize_tool. -if(NOT (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND LITE_WITH_ARM)) +if((NOT LITE_ON_MODEL_OPTIMIZE_TOOL) AND (NOT (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND LITE_WITH_ARM))) return() endif() diff --git a/lite/kernels/cuda/CMakeLists.txt b/lite/kernels/cuda/CMakeLists.txt index bf59d0272611d314dcee41c620bb3f9b3ca08c7e..2df00f00a4eefd8fc6f9bee5e0c9b76656232041 100644 --- a/lite/kernels/cuda/CMakeLists.txt +++ b/lite/kernels/cuda/CMakeLists.txt @@ -1,4 +1,4 @@ -if(NOT LITE_WITH_CUDA) +if((NOT LITE_ON_MODEL_OPTIMIZE_TOOL) AND (NOT LITE_WITH_CUDA)) return() endif() diff --git a/lite/kernels/fpga/CMakeLists.txt b/lite/kernels/fpga/CMakeLists.txt index 7c47e72872ecae6216288c20fa1a6ae30fac65bd..f6c3a399490a86e2ac2fcd9cbeb76fca8c8ac479 100755 --- a/lite/kernels/fpga/CMakeLists.txt +++ b/lite/kernels/fpga/CMakeLists.txt @@ -1,4 +1,4 @@ -if (NOT LITE_WITH_FPGA) +if ((NOT LITE_ON_MODEL_OPTIMIZE_TOOL) AND (NOT LITE_WITH_FPGA)) return() endif() diff --git a/lite/kernels/opencl/CMakeLists.txt b/lite/kernels/opencl/CMakeLists.txt index 3423b1e920e5e7c4aaa34125303b09d943e47b62..f4d3254a7b54cfea96fc2419bd425f8328990ebe 100644 --- a/lite/kernels/opencl/CMakeLists.txt +++ b/lite/kernels/opencl/CMakeLists.txt @@ -14,7 +14,7 @@ add_kernel(pool_opencl OPENCL basic SRCS pool_compute.cc DEPS ${cl_kernel_deps}) add_kernel(io_copy_compute_opencl OPENCL basic SRCS io_copy_compute.cc DEPS ${tensor_lite} ${cl_kernel_deps}) add_kernel(relu_opencl OPENCL basic SRCS relu_compute.cc DEPS ${cl_kernel_deps}) add_kernel(depthwise_conv2d_opencl OPENCL basic SRCS depthwise_conv2d_compute.cc DEPS ${cl_kernel_deps}) -add_kernel(conv2d_1x1_opencl OPENCL basic SRCS conv2d_1x1_compute.cc DEPS ${cl_kernel_deps}) +#add_kernel(conv2d_1x1_opencl OPENCL basic SRCS conv2d_1x1_compute.cc DEPS ${cl_kernel_deps}) add_kernel(reshape_opencl OPENCL basic SRCS reshape_compute.cc DEPS ${cl_kernel_deps}) add_kernel(conv_opencl OPENCL basic SRCS conv_compute.cc DEPS ${cl_kernel_deps}) add_kernel(layout_opencl OPENCL basic SRCS layout_compute.cc DEPS ${cl_kernel_deps}) @@ -49,12 +49,14 @@ lite_cc_test(test_depthwise_conv2d_opencl SRCS depthwise_conv2d_compute_test.cc DEPS depthwise_conv2d_opencl op_registry program context cl_image_converter ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl) -lite_cc_test(test_conv2d_1x1_opencl SRCS conv2d_1x1_compute_test.cc - DEPS conv2d_1x1_opencl cl_image_converter op_registry program context - ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl) +#lite_cc_test(test_conv2d_1x1_opencl SRCS conv2d_1x1_compute_test.cc +# DEPS conv2d_1x1_opencl cl_image_converter op_registry program context +# ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl) + lite_cc_test(test_reshape_opencl SRCS reshape_compute_test.cc DEPS reshape_opencl cl_image_converter op_registry program context ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl) + lite_cc_test(test_conv_opencl SRCS conv_compute_test.cc DEPS conv_opencl op_registry program context ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl) diff --git a/lite/operators/compare_op.cc b/lite/operators/compare_op.cc index 3210520cd5d71f239da258955df0e917e5e1153e..aa500ba35c37cf8af17091d8d37d8fd8d1a08e0e 100644 --- a/lite/operators/compare_op.cc +++ b/lite/operators/compare_op.cc @@ -54,7 +54,7 @@ bool CompareOp::AttachImpl(const cpp::OpDesc &opdesc, lite::Scope *scope) { } // namespace paddle REGISTER_LITE_OP(equal, paddle::lite::operators::CompareOp); -REGISTER_LITE_OP(notequal, paddle::lite::operators::CompareOp); +REGISTER_LITE_OP(not_equal, paddle::lite::operators::CompareOp); REGISTER_LITE_OP(less_than, paddle::lite::operators::CompareOp); REGISTER_LITE_OP(less_equal, paddle::lite::operators::CompareOp); REGISTER_LITE_OP(greater_than, paddle::lite::operators::CompareOp); diff --git a/lite/tools/cmake_tools/create_fake_kernel_registry.py b/lite/tools/cmake_tools/create_fake_kernel_registry.py index 140d77320704f62dfb2492eec3ad7238fe3868ff..35012d5b163aac2b6998790b4cfcf31e16cb1454 100644 --- a/lite/tools/cmake_tools/create_fake_kernel_registry.py +++ b/lite/tools/cmake_tools/create_fake_kernel_registry.py @@ -18,6 +18,9 @@ import logging from ast import RegisterLiteKernelParser from utils import * +if len(sys.argv) != 4: + print("Error: create_fake_kernel_registry.py requires three inputs!") + exit(1) ops_list_path = sys.argv[1] dest_path = sys.argv[2] kernelmap_path = sys.argv[3] diff --git a/lite/tools/cmake_tools/parse_kernel_registry.py b/lite/tools/cmake_tools/parse_kernel_registry.py index f4f0b95483687d3785168c132d30ac8a4fa87c8e..6c020ec438682b670e4e36a926095fed5452ec37 100644 --- a/lite/tools/cmake_tools/parse_kernel_registry.py +++ b/lite/tools/cmake_tools/parse_kernel_registry.py @@ -12,10 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import print_function import sys import logging from ast import RegisterLiteKernelParser +if len(sys.argv) != 5: + print("Error: parse_kernel_registry.py requires four inputs!") + exit(1) ops_list_path = sys.argv[1] dest_path = sys.argv[2] minkernels_list_path = sys.argv[3] diff --git a/lite/tools/cmake_tools/parse_op_registry.py b/lite/tools/cmake_tools/parse_op_registry.py index db58c455a9d5863ec0c66d7783871831c73c120f..7eb3337ed87b708102b2032de9a279fcae2d321c 100644 --- a/lite/tools/cmake_tools/parse_op_registry.py +++ b/lite/tools/cmake_tools/parse_op_registry.py @@ -13,10 +13,14 @@ # limitations under the License. ''' Collect op registry information. ''' +from __future__ import print_function import sys import logging from ast import RegisterLiteOpParser +if len(sys.argv) != 5: + print("Error: parse_op_registry.py requires four inputs!") + exit(1) ops_list_path = sys.argv[1] dest_path = sys.argv[2] minops_list_path = sys.argv[3] diff --git a/lite/tools/cmake_tools/record_supported_kernel_op.py b/lite/tools/cmake_tools/record_supported_kernel_op.py new file mode 100644 index 0000000000000000000000000000000000000000..f6a3af6bd3e5a2decfb6b3b65b0357bff8b4a378 --- /dev/null +++ b/lite/tools/cmake_tools/record_supported_kernel_op.py @@ -0,0 +1,129 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function +import sys +import logging +from ast import RegisterLiteKernelParser +from ast import RegisterLiteOpParser + +if len(sys.argv) != 4: + print("Error: record_supported_kernel_op.py requires three inputs!") + exit(1) +kernels_list_path = sys.argv[1] +ops_list_path = sys.argv[2] +kernel_op_map_dest_path = sys.argv[3] + + +out_lines = [ +''' +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include +#include + +const std::vector> supported_ops_target = { +''' +] + +ops_lines=[] + +# valid targets and valid_ops +valid_targets = ["kUnk", "kHost", "kX86", "kCUDA", "kARM", "kOpenCL", "kAny", "kFPGA", "kNPU", "kXPU"] +valid_ops = [[],[],[],[],[],[],[],[],[],[]] +class TargetType: + kUnk = 0 + kHost = 1 + kX86 = 2 + kCUDA = 3 + kARM = 4 + kOpenCL = 5 + kFPGA = 7 + kNPU = 8 + kXPU = 9 + kAny = 6 # any target + +# record op_info of valid kernels into `valid_ops` according to different target type +with open(kernels_list_path) as f: + paths = set([path for path in f]) + for path in paths: + with open(path.strip()) as g: + c = g.read() + kernel_parser = RegisterLiteKernelParser(c) + kernel_parser.parse() + for k in kernel_parser.kernels: + if hasattr(TargetType, k.target): + index=getattr(TargetType, k.target) + valid_ops[index].append(k.op_type) + +# clear the repeated ops +for target in valid_targets: + index = getattr(TargetType, target) + valid_ops[index] = list(set(valid_ops[index])) + +paths = set() +with open(ops_list_path) as f: + paths = set([path for path in f]) + for path in paths: + str_info = open(path.strip()).read() + op_parser = RegisterLiteOpParser(str_info) + ops = op_parser.parse() + for op in ops: + if "_grad" in op: + continue + out = ' {"%s", { "' % op + op_targets = [] + for target in valid_targets: + if op in valid_ops[getattr(TargetType, target)]: + op_targets.append(target) + if len(op_targets) > 0: + out = out +'", "'.join(op_targets)+ '" }}' + else: + # unknow type op: kUnk = 0 + valid_ops[0].append(op) + out = out +'kUnk" }}' + ops_lines.append(out) + +with open(kernel_op_map_dest_path, 'w') as f: + logging.info("write kernel list to %s" % kernel_op_map_dest_path) + f.write('\n'.join(out_lines)) + # write kernels into head file + for target in valid_targets: + if len(valid_ops[getattr(TargetType, target)]) == 0 : + f.write("\n // %s_OPS: " %target) + f.write('\n {},') + else: + f.write("\n // %s_OPS: " %target) + f.write('\n {"') + f.write('","'.join(valid_ops[getattr(TargetType, target)])) + f.write('"},\n') + f.write('};') + # write op info into head file + f.write('\nconst std::map> supported_ops={\n') + f.write(',\n'.join(ops_lines)) + f.write('\n};') diff --git a/mobile/src/common/log.h b/mobile/src/common/log.h index 69654c505d234709d6c3119be346cefaf82c04a9..3b42188b62278c0acde41d52d68cc4b48ee6cda9 100644 --- a/mobile/src/common/log.h +++ b/mobile/src/common/log.h @@ -80,7 +80,6 @@ static const char *ANDROID_LOG_TAG = #endif enum LogLevel { - kNO_LOG, kLOG_ERROR, kLOG_WARNING, kLOG_INFO, @@ -89,15 +88,16 @@ enum LogLevel { kLOG_DEBUG1, kLOG_DEBUG2, kLOG_DEBUG3, - kLOG_DEBUG4 + kLOG_DEBUG4, + kNO_LOG, }; // log level static LogLevel log_level = kLOG_DEBUG4; -static std::vector logs{"NO ", "ERROR ", "WARNING", "INFO ", - "VERBOSE", "DEBUG ", "DEBUG1 ", "DEBUG2 ", - "DEBUG3 ", "DEBUG4 "}; +static std::vector logs{"ERROR ", "WARNING", "INFO ", "VERBOSE", + "DEBUG ", "DEBUG1 ", "DEBUG2 ", "DEBUG3 ", + "DEBUG4 ", "NO "}; struct ToLog; struct Print; @@ -217,7 +217,6 @@ struct ToLog { #define ANDROIDLOGV(...) enum LogLevel { - kNO_LOG, kLOG_ERROR, kLOG_WARNING, kLOG_INFO, @@ -226,7 +225,8 @@ enum LogLevel { kLOG_DEBUG1, kLOG_DEBUG2, kLOG_DEBUG3, - kLOG_DEBUG4 + kLOG_DEBUG4, + kNO_LOG }; struct ToLog; diff --git a/mobile/src/framework/cl/cl_engine.h b/mobile/src/framework/cl/cl_engine.h index 2e21dd9e395354d2bd5e35a648687a6116347caf..cf758f8328338f936e26270c24f2bf73688312c7 100644 --- a/mobile/src/framework/cl/cl_engine.h +++ b/mobile/src/framework/cl/cl_engine.h @@ -124,9 +124,9 @@ class CLEngine { if (status != CL_SUCCESS || ret_size / sizeof(size_t) < 3) { return CLLocalWorkSizeInfo(0, 0, 0, 0); } - DLOG << max_work_item_sizes[0]; - DLOG << max_work_item_sizes[1]; - DLOG << max_work_item_sizes[2]; + DLOG << " max_work_item_sizes {" << max_work_item_sizes[0] << ", " + << max_work_item_sizes[1] << ", " << max_work_item_sizes[2] << "}"; + localWorkSizeInfo_ = CLLocalWorkSizeInfo(max_work_group_size, max_work_item_sizes[0], max_work_item_sizes[1], max_work_item_sizes[2]); @@ -182,8 +182,8 @@ class CLEngine { cl_program p = clCreateProgramWithSource(context, 1, &source, sourceSize, &status_); - DLOG << " cl kernel from source"; - DLOG << " source size: " << sourceSize[0]; + LOG(kLOG_DEBUG4) << " cl kernel from source"; + LOG(kLOG_DEBUG4) << " source size: " << sourceSize[0]; CL_CHECK_ERRORS(status_); std::unique_ptr<_cl_program, CLProgramDeleter> program_ptr(p); diff --git a/mobile/src/framework/cl/cl_helper.h b/mobile/src/framework/cl/cl_helper.h index 893456211d0429701b49d0f0be654beaad16e0e2..db9aa37ae2b7219131b5950e54ec008828f1fc70 100644 --- a/mobile/src/framework/cl/cl_helper.h +++ b/mobile/src/framework/cl/cl_helper.h @@ -36,9 +36,9 @@ class CLHelper { void AddKernel(const std::string &kernel_name, const std::string &file_name, const std::string &options = "") { - DLOG << " begin add kernel "; + LOG(kLOG_DEBUG1) << " begin add kernel "; auto kernel = scope_->GetKernel(kernel_name, file_name, options); - DLOG << " add kernel ing "; + LOG(kLOG_DEBUG1) << " begin add kernel "; kernels.emplace_back(std::move(kernel)); } diff --git a/mobile/src/framework/cl/cl_image.h b/mobile/src/framework/cl/cl_image.h index d3d48cda8b86b07e76658ef903863268042ab36f..f891e41a6a715f4e97776f90afcf42945e2449cf 100644 --- a/mobile/src/framework/cl/cl_image.h +++ b/mobile/src/framework/cl/cl_image.h @@ -87,14 +87,14 @@ class CLImage { PADDLE_MOBILE_ENFORCE(tensor_data_ != nullptr, " need call SetTensorData first"); - DLOG << " begin init cl image "; + LOG(kNO_LOG) << " begin init cl image "; image_dims_ = converter->InitImageDimInfoWith(tensor_dims_); half_t *image_data = new half_t[product(image_dims_) * 4]; - DLOG << " convert to image"; + LOG(kNO_LOG) << " convert to image"; converter->NCHWToImage(tensor_data_, image_data, tensor_dims_); - DLOG << " end convert to image"; + LOG(kNO_LOG) << " end convert to image"; InitCLImage(context, image_dims_[0], image_dims_[1], image_data); @@ -105,7 +105,7 @@ class CLImage { tensor_data_ = nullptr; image_converter_ = converter; initialized_ = true; - DLOG << " end init cl image"; + LOG(kNO_LOG) << " end init cl image"; } void InitNImage(cl_context context, cl_command_queue command_queue) { @@ -137,9 +137,9 @@ class CLImage { // CLImageConverterFolder(); CLImageConverterNormal *normal_converter = new CLImageConverterNormal(); PADDLE_MOBILE_ENFORCE(!shared_mem_, "do not init mem after shared .") - DLOG << " to get image dims "; + // LOG(kNO_LOG) << " to get image dims "; image_dims_ = normal_converter->InitImageDimInfoWith(dim); - DLOG << " end get image dims " << image_dims_; + // LOG(kNO_LOG) << " end get image dims " << image_dims_; InitCLImage(context, image_dims_[0], image_dims_[1], nullptr); @@ -148,7 +148,7 @@ class CLImage { image_converter_ = normal_converter; cl_event_ = CLEngine::Instance()->CreateEvent(context); initialized_ = true; - DLOG << " end init cl image"; + // LOG(kNO_LOG) << " end init cl image"; } /** * create fake size cl_mem for mem share @@ -169,9 +169,9 @@ class CLImage { InitCLImage(context, real_image_dims_[0], real_image_dims_[1], nullptr); // cheat cl_image they got what they wanted image_dims_ = normal_converter->InitImageDimInfoWith(need_dims); - DLOG << "InitFakeSizeImage ... "; - DLOG << "real_image_dims: " << real_image_dims_; - DLOG << "image_dims_: " << image_dims_; + LOG(kNO_LOG) << "InitFakeSizeImage ... "; + LOG(kNO_LOG) << "real_image_dims: " << real_image_dims_; + LOG(kNO_LOG) << "image_dims_: " << image_dims_; PADDLE_MOBILE_ENFORCE(real_image_dims_[0] >= image_dims_[0] && real_image_dims_[1] >= image_dims_[1], "real image is not enough"); @@ -182,7 +182,7 @@ class CLImage { initialized_ = true; shared_mem_ = true; - DLOG << " end init FakeSizeImage"; + LOG(kNO_LOG) << " end init FakeSizeImage"; } /** * init cl mem with a exist cl mem @@ -197,15 +197,15 @@ class CLImage { real_image_dims_ = src.real_image_dims_; image_dims_ = normal_converter->InitImageDimInfoWith(need_dims); - DLOG << "InitWithExistMem ... "; - DLOG << "real_image_dims: " << real_image_dims_; - DLOG << "image_dims_: " << image_dims_; + LOG(kNO_LOG) << "InitWithExistMem ... "; + LOG(kNO_LOG) << "real_image_dims: " << real_image_dims_; + LOG(kNO_LOG) << "image_dims_: " << image_dims_; if (real_image_dims_[0] < image_dims_[0] || real_image_dims_[1] < image_dims_[1]) { - DLOG << "real image is not enough!"; - DLOG << "real_image_dims: " << real_image_dims_; - DLOG << "image_dims_: " << image_dims_; + LOG(kNO_LOG) << "real image is not enough!"; + LOG(kNO_LOG) << "real_image_dims: " << real_image_dims_; + LOG(kNO_LOG) << "image_dims_: " << image_dims_; } PADDLE_MOBILE_ENFORCE(real_image_dims_[0] >= image_dims_[0] && real_image_dims_[1] >= image_dims_[1], @@ -221,7 +221,7 @@ class CLImage { initialized_ = true; shared_mem_ = true; - DLOG << " end init WithExistMem"; + LOG(kNO_LOG) << " end init WithExistMem"; } void InitConv2dTransposeFilterCLImage(cl_context context, diff --git a/mobile/src/framework/cl/cl_scope.h b/mobile/src/framework/cl/cl_scope.h index 643ce32b57616305da0c581d6d50dfcbbc4f1b1d..25552165640cca5ef31b53b7fe442214384eeab8 100644 --- a/mobile/src/framework/cl/cl_scope.h +++ b/mobile/src/framework/cl/cl_scope.h @@ -47,14 +47,14 @@ class CLScope { std::unique_ptr<_cl_kernel, CLKernelDeleter> GetKernel( const std::string &kernel_name, const std::string &file_name, const std::string &options) { - DLOG << " to get program " << file_name; + LOG(kLOG_DEBUG2) << " to get program " << file_name; auto program = Program(file_name, kernel_name, options); - DLOG << " end get program ~ "; - DLOG << " to create kernel: " << kernel_name; + LOG(kLOG_DEBUG2) << " end get program ~ "; + LOG(kLOG_DEBUG2) << " to create kernel: " << kernel_name; std::unique_ptr<_cl_kernel, CLKernelDeleter> kernel( clCreateKernel(program, kernel_name.c_str(), &status_)); CL_CHECK_ERRORS(status_); - DLOG << " end create kernel ~ "; + LOG(kLOG_DEBUG2) << " end create kernel ~ "; return std::move(kernel); } @@ -81,9 +81,11 @@ class CLScope { auto program = CLEngine::Instance()->CreateProgramWithSource( context_, source.c_str()); - DLOG << " --- begin build program -> " << program_key << " --- "; + LOG(kLOG_DEBUG3) << " --- begin build program -> " << program_key + << " --- "; CLEngine::Instance()->BuildProgram(program.get(), options); - DLOG << " --- end build program -> " << program_key << " --- "; + LOG(kLOG_DEBUG3) << " --- end build program -> " << program_key + << " --- "; programs_[program_key] = std::move(program); return programs_[program_key].get(); @@ -100,9 +102,11 @@ class CLScope { context_, CLEngine::Instance()->GetCLPath() + "/cl_kernel/" + file_name); - DLOG << " --- begin build program -> " << program_key << " --- "; + LOG(kLOG_DEBUG3) << " --- begin build program ele-> " << program_key + << " --- "; CLEngine::Instance()->BuildProgram(program.get(), options); - DLOG << " --- end build program -> " << program_key << " --- "; + LOG(kLOG_DEBUG3) << " --- end build program ele-> " << program_key + << " --- "; programs_[program_key] = std::move(program); return programs_[program_key].get(); diff --git a/mobile/src/framework/context.h b/mobile/src/framework/context.h index 944d54cc499f2a3c4fcad5c2fb0dfc4fe9bcac1d..18e40311bc2a5d555bb02cf0eb7af6356cbbf0b0 100644 --- a/mobile/src/framework/context.h +++ b/mobile/src/framework/context.h @@ -44,15 +44,13 @@ namespace framework { struct CPUContext { private: CPUContext(); - virtual ~CPUContext() {} public: + ~CPUContext() {} + static CPUContext* Context() { - static CPUContext* ctx = nullptr; - if (ctx == nullptr) { - ctx = new CPUContext(); - } - return ctx; + static CPUContext ctx; + return &ctx; } void set_thread_num(int thread_num, diff --git a/mobile/src/framework/executor.cpp b/mobile/src/framework/executor.cpp index d03cefe59a221093d4e5fb4e86273b3007097d9f..cda5c5522c961c70fc15bf76fcd650a17bb76835 100644 --- a/mobile/src/framework/executor.cpp +++ b/mobile/src/framework/executor.cpp @@ -80,7 +80,7 @@ Executor::Executor(const Program &program, std::vector> ops = block_desc->Ops(); for (int j = 0; j < ops.size(); ++j) { std::shared_ptr op_desc = ops[j]; - DLOG << "create op: " << op_desc->Type(); + LOG(kLOG_INFO) << "create op[" << j << "]: " << op_desc->Type(); auto op_handler = OpRegistry::CreateOp( op_desc->Type(), op_desc->GetInputs(), op_desc->GetOutputs(), @@ -111,7 +111,8 @@ Executor::Executor(const Program &program, clock_gettime(CLOCK_MONOTONIC, &ts); profile[op_index].runBegin = (uint64_t)ts.tv_sec * 1e9 + ts.tv_nsec; #endif - DLOG << "Initialize op[" << count++ << "]: " << op_handler->Type(); + LOG(kLOG_INFO) << "Initialize op[" << count++ + << "]: " << op_handler->Type(); if (op_handler->Type() == "feed" || op_handler->Type() == "fetch") { op_handler->setPrePostType(config_.pre_post_type); } @@ -1015,7 +1016,7 @@ void Executor::InitMemory() { const TensorDesc &desc = var_desc->Tensor_desc(); // DDim ddim = make_ddim(desc.Dims()); DDim ddim = cl_image->dims(); - DLOG << var_desc->Name(); + LOG(kLOG_DEBUG1) << "init image of " << var_desc->Name(); cl_image->InitEmptyImage(context, command_queue, ddim); } } diff --git a/mobile/src/framework/loader.cpp b/mobile/src/framework/loader.cpp index 34cf6253cb4571c3b52fe61161cba3e140eb0110..31274743f8b1d4b3d8195526e1ae77129c2729bb 100644 --- a/mobile/src/framework/loader.cpp +++ b/mobile/src/framework/loader.cpp @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "framework/loader.h" +#include #include "framework/lod_tensor.h" #include "framework/program/program-optimize/program_optimize.h" @@ -173,7 +174,7 @@ static size_t ReadBuffer(const char *file_name, uint8_t **out) { rewind(fp); DLOG << "model size: " << size; - + PADDLE_MOBILE_ENFORCE(size > 0, "model size should > 0") *out = reinterpret_cast(malloc(size)); size_t cur_len = 0; diff --git a/mobile/src/framework/operator.cpp b/mobile/src/framework/operator.cpp index 402512c7237be0ca26470361cc16369bd97f7758..a091a49b35203445cda48b2387413193079ecd5e 100644 --- a/mobile/src/framework/operator.cpp +++ b/mobile/src/framework/operator.cpp @@ -62,31 +62,39 @@ void OperatorBase::Run() { DLOG << "-------------" << type_ << "----------------------------"; vector input_keys = GetInputKeys(); for (const auto key : input_keys) { - auto var_vec_in = inputs_.at(key); - for (int i = 0; i < var_vec_in.size(); ++i) { - auto var = this->scope_->FindVar(var_vec_in[i]); - if (var->IsInitialized() && - var->template IsType()) { - const Tensor *tensor = var->template Get(); - if (tensor) DLOG << type_ << " input- " << key << "=" << *tensor; + if (inputs_.count(key) > 0) { + auto var_vec_in = inputs_.at(key); + for (int i = 0; i < var_vec_in.size(); ++i) { + auto var = this->scope_->FindVar(var_vec_in[i]); + if (var->IsInitialized() && + var->template IsType()) { + const Tensor *tensor = var->template Get(); + if (tensor) DLOG << type_ << " input- " << key << "=" << *tensor; #ifdef PADDLE_MOBILE_FPGA - DLOG << var_vec_in[i]; + DLOG << var_vec_in[i]; #endif + } } + } else { + DLOG << "did not find key (" << key << ") in inputs_"; } } for (const auto key : GetOutKeys()) { - auto var_vec_out = outputs_.at(key); - for (int i = 0; i < var_vec_out.size(); ++i) { - auto var = scope_->FindVar(var_vec_out[i]); - if (var->IsInitialized() && - var->template IsType()) { - const Tensor *tensor = var->template Get(); - if (tensor) DLOG << type_ << " output- " << key << "=" << *tensor; + if (outputs_.count(key) > 0) { + auto var_vec_out = outputs_.at(key); + for (int i = 0; i < var_vec_out.size(); ++i) { + auto var = scope_->FindVar(var_vec_out[i]); + if (var->IsInitialized() && + var->template IsType()) { + const Tensor *tensor = var->template Get(); + if (tensor) DLOG << type_ << " output- " << key << "=" << *tensor; #ifdef PADDLE_MOBILE_FPGA - DLOG << var_vec_out[i]; + DLOG << var_vec_out[i]; #endif + } } + } else { + DLOG << "did not find key (" << key << ") in outputs_"; } } #endif @@ -100,27 +108,37 @@ void OperatorBase::Run() { DLOG << "-------------" << type_ << "----------------------------"; vector input_keys = GetInputKeys(); for (const auto key : input_keys) { - auto var_vec_in = inputs_.at(key); - for (int i = 0; i < var_vec_in.size(); ++i) { - auto var = scope_->FindVar(var_vec_in[i]); - if (var->IsInitialized() && var->template IsType()) { - const CLImage *cl_image = var->template Get(); - if (cl_image) { - DLOG << type_ << " input- " << key << "=" << *cl_image; + if (inputs_.count(key) > 0) { + auto var_vec_in = inputs_.at(key); + for (int i = 0; i < var_vec_in.size(); ++i) { + auto var = scope_->FindVar(var_vec_in[i]); + if (var->IsInitialized() && + var->template IsType()) { + const CLImage *cl_image = var->template Get(); + if (cl_image) { + DLOG << type_ << " input- " << key << "=" << *cl_image; + } } } + } else { + DLOG << "did not find key (" << key << ") in inputs_"; } } for (const auto key : GetOutKeys()) { - auto var_vec_out = outputs_.at(key); - for (int i = 0; i < var_vec_out.size(); ++i) { - auto var = scope_->FindVar(var_vec_out[i]); - if (var->IsInitialized() && var->template IsType()) { - const CLImage *cl_image = var->template Get(); - if (cl_image) { - DLOG << type_ << " output- " << key << "=" << *cl_image; + if (outputs_.count(key) > 0) { + auto var_vec_out = outputs_.at(key); + for (int i = 0; i < var_vec_out.size(); ++i) { + auto var = scope_->FindVar(var_vec_out[i]); + if (var->IsInitialized() && + var->template IsType()) { + const CLImage *cl_image = var->template Get(); + if (cl_image) { + DLOG << type_ << " output- " << key << "=" << *cl_image; + } } } + } else { + DLOG << "did not find key (" << key << ") in outputs_"; } } #endif diff --git a/mobile/src/operators/op_param.h b/mobile/src/operators/op_param.h index e58159fbb74e7a91a88c3e76f8aa713b679d94b8..85dabe3bcd009c8c00a59ccf74b7651d907b6dc2 100644 --- a/mobile/src/operators/op_param.h +++ b/mobile/src/operators/op_param.h @@ -344,10 +344,14 @@ class OpParam { template static const T GetAttr(const string &key, const AttributeMap &map) { + PADDLE_MOBILE_ENFORCE(HasAttr(key, map), "%s is not contained in attr map", + key.c_str()) return ((Attribute)map.at(key)).Get(); } static const std::string GetStringAttr(const string &key, const AttributeMap &map) { + PADDLE_MOBILE_ENFORCE(HasAttr(key, map), "%s is not contained in attr map", + key.c_str()) return ((Attribute)map.at(key)).GetString(); } @@ -355,6 +359,10 @@ class OpParam { return map.count(key) > 0; } + static const bool HasVar(const string &key, const VariableNameMap &var_map) { + return var_map.count(key) > 0; + } + template static T *GetVarValue(const string &key, const VariableNameMap &var_map, const Scope &scope) { @@ -3100,16 +3108,37 @@ class NearestInterpolationParam : public OpParam { const AttributeMap &attrs, Scope *scope) : OpParam(inputs, outputs, attrs, scope) { input_x_ = InputXFrom(inputs, *scope); - input_outsize_ = InputOutSizeFrom(inputs, *scope); + const bool has_out_size = HasVar("OutSize", inputs); + + if (has_out_size) { + input_outsize_ = InputOutSizeFrom(inputs, *scope); + } + out_ = OutFrom(outputs, *scope); - out_h_ = GetAttr("out_h", attrs); - out_w_ = GetAttr("out_w", attrs); + + if (HasAttr("out_h", attrs)) { + out_h_ = GetAttr("out_h", attrs); + } else if (HasAttr("out_h ", attrs)) { + // some models hurts .... attr with space .. + out_h_ = GetAttr("out_h ", attrs); + } + + if (HasAttr("out_w", attrs)) { + out_w_ = GetAttr("out_w", attrs); + } else if (HasAttr("out_w ", attrs)) { + // some models hurts .... attr with space .. + out_w_ = GetAttr("out_w ", attrs); + } + + LOG(kLOG_DEBUG1) << "out_h_: " << out_h_; + LOG(kLOG_DEBUG1) << "out_w_: " << out_w_; + if (HasAttr("scale", attrs)) { has_scale_ = true; scale_ = GetAttr("scale", attrs); } - DLOG << "has_scale_: " << has_scale_; - DLOG << "scale_: " << scale_; + LOG(kLOG_DEBUG1) << "has_scale_: " << has_scale_; + LOG(kLOG_DEBUG1) << "scale_: " << scale_; } const GType *InputX() const { return input_x_; } const GType *InputOutPutSize() const { return input_outsize_; } diff --git a/mobile/src/pass/memory_optimize_cl.cpp b/mobile/src/pass/memory_optimize_cl.cpp index 355123349d645075fd2ccc37144144da7d332a8f..53bb675f17b2bae9c3954fa57894b8f73fc611fe 100644 --- a/mobile/src/pass/memory_optimize_cl.cpp +++ b/mobile/src/pass/memory_optimize_cl.cpp @@ -14,6 +14,7 @@ limitations under the License. */ #ifdef PADDLE_MOBILE_CL #include "pass/memory_optimize_cl.h" #include +#include #include "framework/cl/cl_image.h" #include "framework/lod_tensor.h" namespace paddle_mobile { @@ -79,7 +80,7 @@ void MemoryOptPassCl::operator()( std::vector fetch_var_nodes; for (const auto &op : block->Ops()) { - DLOG << "op_desc->Type(): " << op->Type(); + LOG(kNO_LOG) << "op_desc->Type(): " << op->Type(); for (const auto &outputs : op->GetOutputs()) { for (const auto &output : outputs.second) { // not a persistable and not a exclude one ,then add it to @@ -87,7 +88,7 @@ void MemoryOptPassCl::operator()( if (!IsPersistable(output) && std::find(exclude_var_names.begin(), exclude_var_names.end(), output) == exclude_var_names.end()) { - DLOG << "output: " << output; + LOG(kNO_LOG) << "output: " << output; ClVarNode *node = CreateNode(output); analysis_nodes_.push(node); } @@ -100,7 +101,7 @@ void MemoryOptPassCl::operator()( if (!IsPersistable(input) && std::find(exclude_var_names.begin(), exclude_var_names.end(), input) == exclude_var_names.end()) { - DLOG << "input: " << input; + LOG(kNO_LOG) << "input: " << input; ClVarNode *node = CreateNode(input); analysis_nodes_.push(node); if (op->Type() == "fetch") { @@ -114,7 +115,7 @@ void MemoryOptPassCl::operator()( if (!IsPersistable(output) && std::find(exclude_var_names.begin(), exclude_var_names.end(), output) == exclude_var_names.end()) { - DLOG << "output: " << output; + LOG(kNO_LOG) << "output: " << output; ClVarNode *node = CreateNode(output); analysis_nodes_.push(node); } @@ -164,8 +165,8 @@ void MemoryOptPassCl::ShareData( cl_command_queue command_queue = scope->GetCLScpoe()->CommandQueue(); for (const auto &list : reused_nodes_) { - DLOG << "\n"; - DLOG << "gpu . share memory within these variables"; + LOG(kNO_LOG) << "\n"; + LOG(kNO_LOG) << "gpu . share memory within these variables"; int64_t x_based_max_numl = -1; int64_t y_based_max_numl = -1; int64_t x_based_max_x = -1;