未验证 提交 0ef51804 编写于 作者: C chalsliu 提交者: GitHub

Reduce inference library size and compile time (#53369)

* Reduce inference library size and compile time

* resolve conflicts
上级 972daa46
...@@ -64,6 +64,11 @@ function(generate_unify_header DIR_NAME) ...@@ -64,6 +64,11 @@ function(generate_unify_header DIR_NAME)
endif() endif()
endif() endif()
endforeach() endforeach()
if(DEFINED REDUCE_INFERENCE_LIB_SIZE)
if(${kernel_name} MATCHES ".*_grad")
continue()
endif()
endif()
# append header into extension.h # append header into extension.h
string(REPLACE "${PADDLE_SOURCE_DIR}\/" "" header_file "${header_file}") string(REPLACE "${PADDLE_SOURCE_DIR}\/" "" header_file "${header_file}")
file(APPEND ${phi_extension_header_file} "#include \"${header_file}\"\n") file(APPEND ${phi_extension_header_file} "#include \"${header_file}\"\n")
......
...@@ -118,6 +118,11 @@ file( ...@@ -118,6 +118,11 @@ file(
"strings/gpu/*.cu" "strings/gpu/*.cu"
"fusion/gpu/*.cu") "fusion/gpu/*.cu")
if(DEFINED REDUCE_INFERENCE_LIB_SIZE)
list(FILTER kernel_cu EXCLUDE REGEX ".*_grad_kernel\\.cc$")
list(FILTER kernel_cu EXCLUDE REGEX ".*_grad_kernel\\.cu$")
endif()
if(WITH_CUTLASS) if(WITH_CUTLASS)
execute_process( execute_process(
COMMAND ${CMAKE_COMMAND} -E make_directory COMMAND ${CMAKE_COMMAND} -E make_directory
...@@ -184,6 +189,10 @@ else() ...@@ -184,6 +189,10 @@ else()
"fusion/cpu/*.cc") "fusion/cpu/*.cc")
endif() endif()
if(DEFINED REDUCE_INFERENCE_LIB_SIZE)
list(FILTER kernel_cc EXCLUDE REGEX ".*_grad_kernel\\.cc$")
endif()
file( file(
GLOB GLOB
kernel_xpu kernel_xpu
......
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This script simply removes grad kernels. You should use this script
when cmake ON_INFER=ON, which can greatly reduce the volume of the inference library.
"""
import glob
import os
def is_balanced(content):
"""
Check whether sequence contains valid parenthesis.
Args:
content (str): content of string.
Returns:
boolean: True if sequence contains valid parenthesis.
"""
if content.find('{') == -1:
return False
stack = []
push_chars, pop_chars = '({', ')}'
for c in content:
if c in push_chars:
stack.append(c)
elif c in pop_chars:
if not len(stack):
return False
else:
stack_top = stack.pop()
balancing_bracket = push_chars[pop_chars.index(c)]
if stack_top != balancing_bracket:
return False
return not stack
def grad_kernel_definition(content, kernel_pattern, grad_pattern):
"""
Args:
content(str): file content
kernel_pattern(str): kernel pattern
grad_pattern(str): grad pattern
Returns:
(list, int): grad kernel definitions in file and count.
"""
results = []
count = 0
start = 0
lens = len(content)
while True:
index = content.find(kernel_pattern, start)
if index == -1:
return results, count
i = index + 1
while i <= lens:
check_str = content[index:i]
if is_balanced(check_str):
if check_str.find(grad_pattern) != -1:
results.append(check_str)
count += 1
start = i
break
i += 1
else:
return results, count
def remove_grad_kernels(dry_run=False):
"""
Args:
dry_run(bool): whether just print
Returns:
int: number of kernel(grad) removed
"""
pd_kernel_pattern = 'PD_REGISTER_STRUCT_KERNEL'
register_op_pd_kernel_count = 0
matches = []
tool_dir = os.path.dirname(os.path.abspath(__file__))
all_op = glob.glob(
os.path.join(tool_dir, '../paddle/fluid/operators/**/*.cc'),
recursive=True,
)
all_op += glob.glob(
os.path.join(tool_dir, '../paddle/fluid/operators/**/*.cu'),
recursive=True,
)
for op_file in all_op:
with open(op_file, 'r', encoding='utf-8') as f:
content = ''.join(f.readlines())
pd_kernel, pd_kernel_count = grad_kernel_definition(
content, pd_kernel_pattern, '_grad,'
)
register_op_pd_kernel_count += pd_kernel_count
matches.extend(pd_kernel)
for to_remove in matches:
content = content.replace(to_remove, '')
if dry_run:
print(op_file, to_remove)
if not dry_run:
with open(op_file, 'w', encoding='utf-8') as f:
f.write(content)
return register_op_pd_kernel_count
...@@ -12,14 +12,27 @@ ...@@ -12,14 +12,27 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
""" """
This script simply removes all grad ops and kernels. You should use this script This script simply removes grad ops and kernels. You should use this script
when cmake ON_INFER=ON, which can greatly reduce the volume of the prediction library. when cmake ON_INFER=ON, which can greatly reduce the volume of the inference library.
""" """
import argparse
import glob import glob
import os import os
import re import re
import reduce_lib_size_util
def parse_args():
"""Parse input arguments."""
parser = argparse.ArgumentParser(description='Remove grad op and kernels.')
parser.add_argument('--only_kernel', action='store_true', default=False)
parser.add_argument('--dry_run', action='store_true', default=False)
args = parser.parse_args()
return args
def find_type_files(cur_dir, file_type, file_list=[]): def find_type_files(cur_dir, file_type, file_list=[]):
next_level_dirs = os.listdir(cur_dir) next_level_dirs = os.listdir(cur_dir)
...@@ -42,6 +55,10 @@ def remove_grad_op_and_kernel(content, pattern1, pattern2): ...@@ -42,6 +55,10 @@ def remove_grad_op_and_kernel(content, pattern1, pattern2):
def update_operator_cmake(cmake_file): def update_operator_cmake(cmake_file):
"""Update operator cmake.
Args:
cmake_file (str): cmake file path.
"""
pat1 = 'add_subdirectory(optimizers)' pat1 = 'add_subdirectory(optimizers)'
pat2 = r'register_operators\(EXCLUDES.*?py_func_op.*?\)' pat2 = r'register_operators\(EXCLUDES.*?py_func_op.*?\)'
...@@ -66,6 +83,8 @@ def update_operator_cmake(cmake_file): ...@@ -66,6 +83,8 @@ def update_operator_cmake(cmake_file):
if __name__ == '__main__': if __name__ == '__main__':
args = parse_args()
tool_dir = os.path.dirname(os.path.abspath(__file__)) tool_dir = os.path.dirname(os.path.abspath(__file__))
all_op = glob.glob( all_op = glob.glob(
...@@ -92,14 +111,17 @@ if __name__ == '__main__': ...@@ -92,14 +111,17 @@ if __name__ == '__main__':
# remove all grad op # remove all grad op
op_pattern1 = r'REGISTER_OPERATOR\(.*?\);?' op_pattern1 = r'REGISTER_OPERATOR\(.*?\);?'
op_pattern2 = r'REGISTER_OPERATOR\(.*?_grad,.*?\);?' op_pattern2 = r'REGISTER_OPERATOR\(.*?_grad,.*?\);?'
if args.only_kernel:
op_pattern1 = 'DISABLE_REMOVE_GRAD_OP_' + op_pattern1
op_pattern2 = 'DISABLE_REMOVE_GRAD_OP_' + op_pattern2
# remove all cpu grad kernel # remove all cpu grad kernel
cpu_kernel_pattern1 = r'REGISTER_OP_CPU_KERNEL\(.*?\);?' cpu_kernel_pattern1 = r'REGISTER_OP_CPU_KERNEL\(.*?\);?|REGISTER_OP_CPU_KERNEL_FUNCTOR\(.*?\);?'
cpu_kernel_pattern2 = r'REGISTER_OP_CPU_KERNEL\(.*?_grad,.*?\);?' cpu_kernel_pattern2 = r'REGISTER_OP_CPU_KERNEL\(.*?_grad,.*?\);?|REGISTER_OP_CPU_KERNEL_FUNCTOR\(.*?_grad,.*?\);?'
# remove all gpu grad kernel # remove all gpu grad kernel
gpu_kernel_pattern1 = r'REGISTER_OP_CUDA_KERNEL\(.*?\);?' gpu_kernel_pattern1 = r'REGISTER_OP_CUDA_KERNEL\(.*?\);?|REGISTER_OP_CUDA_KERNEL_FUNCTOR\(.*?\);?'
gpu_kernel_pattern2 = r'REGISTER_OP_CUDA_KERNEL\(.*?_grad,.*?\);?' gpu_kernel_pattern2 = r'REGISTER_OP_CUDA_KERNEL\(.*?_grad,.*?\);?|REGISTER_OP_CUDA_KERNEL_FUNCTOR\(.*?_grad,.*?\);?'
# remove all xpu grad kernel # remove all xpu grad kernel
xpu_kernel_pattern1 = r'REGISTER_OP_XPU_KERNEL\(.*?\);?' xpu_kernel_pattern1 = r'REGISTER_OP_XPU_KERNEL\(.*?\);?'
...@@ -166,11 +188,14 @@ if __name__ == '__main__': ...@@ -166,11 +188,14 @@ if __name__ == '__main__':
all_matches.extend(op_kernel) all_matches.extend(op_kernel)
all_matches.extend(custom_kernel) all_matches.extend(custom_kernel)
for i in all_matches: for to_remove in all_matches:
content = content.replace(i, '') content = content.replace(to_remove, '')
if args.dry_run:
print(op_file, to_remove)
with open(op_file, 'w', encoding='utf-8') as f: if not args.dry_run:
f.write(content) with open(op_file, 'w', encoding='utf-8') as f:
f.write(content)
# 2. update operators/CMakeLists.txt # 2. update operators/CMakeLists.txt
cmake_file = os.path.join( cmake_file = os.path.join(
...@@ -178,6 +203,10 @@ if __name__ == '__main__': ...@@ -178,6 +203,10 @@ if __name__ == '__main__':
) )
update_operator_cmake(cmake_file) update_operator_cmake(cmake_file)
register_pd_kernel_count = reduce_lib_size_util.remove_grad_kernels(
args.dry_run
)
print('We erase all grad op and kernel for Paddle-Inference lib.') print('We erase all grad op and kernel for Paddle-Inference lib.')
print('%50s%10s' % ('type', 'count')) print('%50s%10s' % ('type', 'count'))
print('%50s%10s' % ('REGISTER_OPERATOR', register_op_count)) print('%50s%10s' % ('REGISTER_OPERATOR', register_op_count))
...@@ -194,3 +223,4 @@ if __name__ == '__main__': ...@@ -194,3 +223,4 @@ if __name__ == '__main__':
register_op_kernel_with_custom_type_count, register_op_kernel_with_custom_type_count,
) )
) )
print('%50s%10s' % ('REGISTER_OP_PD_KERNEL', register_pd_kernel_count))
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册