未验证 提交 0ef51804 编写于 作者: C chalsliu 提交者: GitHub

Reduce inference library size and compile time (#53369)

* Reduce inference library size and compile time

* resolve conflicts
上级 972daa46
......@@ -64,6 +64,11 @@ function(generate_unify_header DIR_NAME)
endif()
endif()
endforeach()
if(DEFINED REDUCE_INFERENCE_LIB_SIZE)
if(${kernel_name} MATCHES ".*_grad")
continue()
endif()
endif()
# append header into extension.h
string(REPLACE "${PADDLE_SOURCE_DIR}\/" "" header_file "${header_file}")
file(APPEND ${phi_extension_header_file} "#include \"${header_file}\"\n")
......
......@@ -118,6 +118,11 @@ file(
"strings/gpu/*.cu"
"fusion/gpu/*.cu")
if(DEFINED REDUCE_INFERENCE_LIB_SIZE)
list(FILTER kernel_cu EXCLUDE REGEX ".*_grad_kernel\\.cc$")
list(FILTER kernel_cu EXCLUDE REGEX ".*_grad_kernel\\.cu$")
endif()
if(WITH_CUTLASS)
execute_process(
COMMAND ${CMAKE_COMMAND} -E make_directory
......@@ -184,6 +189,10 @@ else()
"fusion/cpu/*.cc")
endif()
if(DEFINED REDUCE_INFERENCE_LIB_SIZE)
list(FILTER kernel_cc EXCLUDE REGEX ".*_grad_kernel\\.cc$")
endif()
file(
GLOB
kernel_xpu
......
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This script simply removes grad kernels. You should use this script
when cmake ON_INFER=ON, which can greatly reduce the volume of the inference library.
"""
import glob
import os
def is_balanced(content):
"""
Check whether sequence contains valid parenthesis.
Args:
content (str): content of string.
Returns:
boolean: True if sequence contains valid parenthesis.
"""
if content.find('{') == -1:
return False
stack = []
push_chars, pop_chars = '({', ')}'
for c in content:
if c in push_chars:
stack.append(c)
elif c in pop_chars:
if not len(stack):
return False
else:
stack_top = stack.pop()
balancing_bracket = push_chars[pop_chars.index(c)]
if stack_top != balancing_bracket:
return False
return not stack
def grad_kernel_definition(content, kernel_pattern, grad_pattern):
"""
Args:
content(str): file content
kernel_pattern(str): kernel pattern
grad_pattern(str): grad pattern
Returns:
(list, int): grad kernel definitions in file and count.
"""
results = []
count = 0
start = 0
lens = len(content)
while True:
index = content.find(kernel_pattern, start)
if index == -1:
return results, count
i = index + 1
while i <= lens:
check_str = content[index:i]
if is_balanced(check_str):
if check_str.find(grad_pattern) != -1:
results.append(check_str)
count += 1
start = i
break
i += 1
else:
return results, count
def remove_grad_kernels(dry_run=False):
"""
Args:
dry_run(bool): whether just print
Returns:
int: number of kernel(grad) removed
"""
pd_kernel_pattern = 'PD_REGISTER_STRUCT_KERNEL'
register_op_pd_kernel_count = 0
matches = []
tool_dir = os.path.dirname(os.path.abspath(__file__))
all_op = glob.glob(
os.path.join(tool_dir, '../paddle/fluid/operators/**/*.cc'),
recursive=True,
)
all_op += glob.glob(
os.path.join(tool_dir, '../paddle/fluid/operators/**/*.cu'),
recursive=True,
)
for op_file in all_op:
with open(op_file, 'r', encoding='utf-8') as f:
content = ''.join(f.readlines())
pd_kernel, pd_kernel_count = grad_kernel_definition(
content, pd_kernel_pattern, '_grad,'
)
register_op_pd_kernel_count += pd_kernel_count
matches.extend(pd_kernel)
for to_remove in matches:
content = content.replace(to_remove, '')
if dry_run:
print(op_file, to_remove)
if not dry_run:
with open(op_file, 'w', encoding='utf-8') as f:
f.write(content)
return register_op_pd_kernel_count
......@@ -12,14 +12,27 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This script simply removes all grad ops and kernels. You should use this script
when cmake ON_INFER=ON, which can greatly reduce the volume of the prediction library.
This script simply removes grad ops and kernels. You should use this script
when cmake ON_INFER=ON, which can greatly reduce the volume of the inference library.
"""
import argparse
import glob
import os
import re
import reduce_lib_size_util
def parse_args():
"""Parse input arguments."""
parser = argparse.ArgumentParser(description='Remove grad op and kernels.')
parser.add_argument('--only_kernel', action='store_true', default=False)
parser.add_argument('--dry_run', action='store_true', default=False)
args = parser.parse_args()
return args
def find_type_files(cur_dir, file_type, file_list=[]):
next_level_dirs = os.listdir(cur_dir)
......@@ -42,6 +55,10 @@ def remove_grad_op_and_kernel(content, pattern1, pattern2):
def update_operator_cmake(cmake_file):
"""Update operator cmake.
Args:
cmake_file (str): cmake file path.
"""
pat1 = 'add_subdirectory(optimizers)'
pat2 = r'register_operators\(EXCLUDES.*?py_func_op.*?\)'
......@@ -66,6 +83,8 @@ def update_operator_cmake(cmake_file):
if __name__ == '__main__':
args = parse_args()
tool_dir = os.path.dirname(os.path.abspath(__file__))
all_op = glob.glob(
......@@ -92,14 +111,17 @@ if __name__ == '__main__':
# remove all grad op
op_pattern1 = r'REGISTER_OPERATOR\(.*?\);?'
op_pattern2 = r'REGISTER_OPERATOR\(.*?_grad,.*?\);?'
if args.only_kernel:
op_pattern1 = 'DISABLE_REMOVE_GRAD_OP_' + op_pattern1
op_pattern2 = 'DISABLE_REMOVE_GRAD_OP_' + op_pattern2
# remove all cpu grad kernel
cpu_kernel_pattern1 = r'REGISTER_OP_CPU_KERNEL\(.*?\);?'
cpu_kernel_pattern2 = r'REGISTER_OP_CPU_KERNEL\(.*?_grad,.*?\);?'
cpu_kernel_pattern1 = r'REGISTER_OP_CPU_KERNEL\(.*?\);?|REGISTER_OP_CPU_KERNEL_FUNCTOR\(.*?\);?'
cpu_kernel_pattern2 = r'REGISTER_OP_CPU_KERNEL\(.*?_grad,.*?\);?|REGISTER_OP_CPU_KERNEL_FUNCTOR\(.*?_grad,.*?\);?'
# remove all gpu grad kernel
gpu_kernel_pattern1 = r'REGISTER_OP_CUDA_KERNEL\(.*?\);?'
gpu_kernel_pattern2 = r'REGISTER_OP_CUDA_KERNEL\(.*?_grad,.*?\);?'
gpu_kernel_pattern1 = r'REGISTER_OP_CUDA_KERNEL\(.*?\);?|REGISTER_OP_CUDA_KERNEL_FUNCTOR\(.*?\);?'
gpu_kernel_pattern2 = r'REGISTER_OP_CUDA_KERNEL\(.*?_grad,.*?\);?|REGISTER_OP_CUDA_KERNEL_FUNCTOR\(.*?_grad,.*?\);?'
# remove all xpu grad kernel
xpu_kernel_pattern1 = r'REGISTER_OP_XPU_KERNEL\(.*?\);?'
......@@ -166,11 +188,14 @@ if __name__ == '__main__':
all_matches.extend(op_kernel)
all_matches.extend(custom_kernel)
for i in all_matches:
content = content.replace(i, '')
for to_remove in all_matches:
content = content.replace(to_remove, '')
if args.dry_run:
print(op_file, to_remove)
with open(op_file, 'w', encoding='utf-8') as f:
f.write(content)
if not args.dry_run:
with open(op_file, 'w', encoding='utf-8') as f:
f.write(content)
# 2. update operators/CMakeLists.txt
cmake_file = os.path.join(
......@@ -178,6 +203,10 @@ if __name__ == '__main__':
)
update_operator_cmake(cmake_file)
register_pd_kernel_count = reduce_lib_size_util.remove_grad_kernels(
args.dry_run
)
print('We erase all grad op and kernel for Paddle-Inference lib.')
print('%50s%10s' % ('type', 'count'))
print('%50s%10s' % ('REGISTER_OPERATOR', register_op_count))
......@@ -194,3 +223,4 @@ if __name__ == '__main__':
register_op_kernel_with_custom_type_count,
)
)
print('%50s%10s' % ('REGISTER_OP_PD_KERNEL', register_pd_kernel_count))
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册