From 0ff59df1a486d180dbc44182f49f06b4838d1635 Mon Sep 17 00:00:00 2001 From: test Date: Fri, 20 Sep 2019 06:57:23 +0000 Subject: [PATCH] MS-575 Add Clang-format & Clang-tidy & Cpplint Former-commit-id: e62b3b42b0b27c3bbbb6d62cfff4a518913298d7 --- cpp/.gitignore | 1 + cpp/CMakeLists.txt | 49 +++++- ...mat_exclusions.txt => lint_exclusions.txt} | 0 cpp/build-support/lintutils.py | 110 ++++++++++++ cpp/build-support/run_clang_format.py | 161 ++++++++++++------ cpp/cmake/FindClangTools.cmake | 7 +- 6 files changed, 274 insertions(+), 54 deletions(-) rename cpp/build-support/{clang_format_exclusions.txt => lint_exclusions.txt} (100%) create mode 100755 cpp/build-support/lintutils.py diff --git a/cpp/.gitignore b/cpp/.gitignore index 71ee3f37..f7243a0c 100644 --- a/cpp/.gitignore +++ b/cpp/.gitignore @@ -8,3 +8,4 @@ output.info output_new.info server.info thirdparty/knowhere/ +*.pyc diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 01007bf0..d9267be7 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -90,6 +90,8 @@ else() set(MILVUS_BUILD_ARCH unknown) endif() +find_package (Python COMPONENTS Interpreter Development) + find_package(CUDA) set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -Xcompiler -fPIC -std=c++11 -D_FORCE_INLINES --expt-extended-lambda") @@ -157,8 +159,47 @@ install(FILES DESTINATION conf) -add_custom_target(format ${BUILD_SUPPORT_DIR}/run_clang_format.py - ${CLANG_FORMAT_BIN} - ${BUILD_SUPPORT_DIR}/clang_format_exclusions.txt - ${CMAKE_CURRENT_SOURCE_DIR}/src) + +# +# "make lint" target +# +if(NOT MILVUS_VERBOSE_LINT) + set(MILVUS_LINT_QUIET "--quiet") +endif() + +if(NOT LINT_EXCLUSIONS_FILE) + # source files matching a glob from a line in this file + # will be excluded from linting (cpplint, clang-tidy, clang-format) + set(LINT_EXCLUSIONS_FILE ${BUILD_SUPPORT_DIR}/lint_exclusions.txt) +endif() + +# +# "make format" and "make check-format" targets +# +if(${CLANG_FORMAT_FOUND}) + # runs clang format and updates files in place. + add_custom_target(format + ${PYTHON_EXECUTABLE} + ${BUILD_SUPPORT_DIR}/run_clang_format.py + --clang_format_binary + ${CLANG_FORMAT_BIN} + --exclude_globs + ${LINT_EXCLUSIONS_FILE} + --source_dir + ${CMAKE_CURRENT_SOURCE_DIR}/src + --fix + ${MILVUS_LINT_QUIET}) + + # runs clang format and exits with a non-zero exit code if any files need to be reformatted + add_custom_target(check-format + ${PYTHON_EXECUTABLE} + ${BUILD_SUPPORT_DIR}/run_clang_format.py + --clang_format_binary + ${CLANG_FORMAT_BIN} + --exclude_globs + ${LINT_EXCLUSIONS_FILE} + --source_dir + ${CMAKE_CURRENT_SOURCE_DIR}/src + ${MILVUS_LINT_QUIET}) +endif() diff --git a/cpp/build-support/clang_format_exclusions.txt b/cpp/build-support/lint_exclusions.txt similarity index 100% rename from cpp/build-support/clang_format_exclusions.txt rename to cpp/build-support/lint_exclusions.txt diff --git a/cpp/build-support/lintutils.py b/cpp/build-support/lintutils.py new file mode 100755 index 00000000..b4b01da3 --- /dev/null +++ b/cpp/build-support/lintutils.py @@ -0,0 +1,110 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import multiprocessing as mp +import os +from fnmatch import fnmatch +from subprocess import Popen + + +def chunk(seq, n): + """ + divide a sequence into equal sized chunks + (the last chunk may be smaller, but won't be empty) + """ + chunks = [] + some = [] + for element in seq: + if len(some) == n: + chunks.append(some) + some = [] + some.append(element) + if len(some) > 0: + chunks.append(some) + return chunks + + +def dechunk(chunks): + "flatten chunks into a single list" + seq = [] + for chunk in chunks: + seq.extend(chunk) + return seq + + +def run_parallel(cmds, **kwargs): + """ + Run each of cmds (with shared **kwargs) using subprocess.Popen + then wait for all of them to complete. + Runs batches of multiprocessing.cpu_count() * 2 from cmds + returns a list of tuples containing each process' + returncode, stdout, stderr + """ + complete = [] + for cmds_batch in chunk(cmds, mp.cpu_count() * 2): + procs_batch = [Popen(cmd, **kwargs) for cmd in cmds_batch] + for proc in procs_batch: + stdout, stderr = proc.communicate() + complete.append((proc.returncode, stdout, stderr)) + return complete + + +_source_extensions = ''' +.h +.cc +.cpp +'''.split() + + +def get_sources(source_dir, exclude_globs=[]): + sources = [] + for directory, subdirs, basenames in os.walk(source_dir): + for path in [os.path.join(directory, basename) + for basename in basenames]: + # filter out non-source files + if os.path.splitext(path)[1] not in _source_extensions: + continue + + path = os.path.abspath(path) + + # filter out files that match the globs in the globs file + if any([fnmatch(path, glob) for glob in exclude_globs]): + continue + + sources.append(path) + return sources + + +def stdout_pathcolonline(completed_process, filenames): + """ + given a completed process which may have reported some files as problematic + by printing the path name followed by ':' then a line number, examine + stdout and return the set of actually reported file names + """ + returncode, stdout, stderr = completed_process + bfilenames = set() + for filename in filenames: + bfilenames.add(filename.encode('utf-8') + b':') + problem_files = set() + for line in stdout.splitlines(): + for filename in bfilenames: + if line.startswith(filename): + problem_files.add(filename.decode('utf-8')) + bfilenames.remove(filename) + break + return problem_files, stdout + diff --git a/cpp/build-support/run_clang_format.py b/cpp/build-support/run_clang_format.py index 4235b2d4..4c7fdfb2 100755 --- a/cpp/build-support/run_clang_format.py +++ b/cpp/build-support/run_clang_format.py @@ -16,64 +16,127 @@ # specific language governing permissions and limitations # under the License. -import fnmatch -import os -import subprocess +from __future__ import print_function +import lintutils +from subprocess import PIPE +import argparse +import difflib +import multiprocessing as mp import sys +from functools import partial -if len(sys.argv) < 4: - sys.stderr.write("Usage: %s $CLANG_FORMAT_VERSION exclude_globs.txt " - "$source_dir\n" % - sys.argv[0]) - sys.exit(1) -CLANG_FORMAT = sys.argv[1] -EXCLUDE_GLOBS_FILENAME = sys.argv[2] -SOURCE_DIR = sys.argv[3] +# examine the output of clang-format and if changes are +# present assemble a (unified)patch of the difference +def _check_one_file(completed_processes, filename): + with open(filename, "rb") as reader: + original = reader.read() -if len(sys.argv) > 4: - CHECK_FORMAT = int(sys.argv[4]) == 1 -else: - CHECK_FORMAT = False + returncode, stdout, stderr = completed_processes[filename] + formatted = stdout + if formatted != original: + # Run the equivalent of diff -u + diff = list(difflib.unified_diff( + original.decode('utf8').splitlines(True), + formatted.decode('utf8').splitlines(True), + fromfile=filename, + tofile="{} (after clang format)".format( + filename))) + else: + diff = None + return filename, diff -exclude_globs = [line.strip() for line in open(EXCLUDE_GLOBS_FILENAME, "r")] -files_to_format = [] -matches = [] -for directory, subdirs, files in os.walk(SOURCE_DIR): - for name in files: - name = os.path.join(directory, name) - if not (name.endswith('.h') or name.endswith('.cpp') or name.endswith('.cuh') or name.endswith('.cu')): - continue +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Runs clang-format on all of the source " + "files. If --fix is specified enforce format by " + "modifying in place, otherwise compare the output " + "with the existing file and output any necessary " + "changes as a patch in unified diff format") + parser.add_argument("--clang_format_binary", + required=True, + help="Path to the clang-format binary") + parser.add_argument("--exclude_globs", + help="Filename containing globs for files " + "that should be excluded from the checks") + parser.add_argument("--source_dir", + required=True, + help="Root directory of the source code") + parser.add_argument("--fix", default=False, + action="store_true", + help="If specified, will re-format the source " + "code instead of comparing the re-formatted " + "output, defaults to %(default)s") + parser.add_argument("--quiet", default=False, + action="store_true", + help="If specified, only print errors") + arguments = parser.parse_args() - excluded = False - for g in exclude_globs: - if fnmatch.fnmatch(name, g): - excluded = True - break - if not excluded: - files_to_format.append(name) + exclude_globs = [] + if arguments.exclude_globs: + for line in open(arguments.exclude_globs): + exclude_globs.append(line.strip()) -if CHECK_FORMAT: - output = subprocess.check_output([CLANG_FORMAT, '-output-replacements-xml'] - + files_to_format, - stderr=subprocess.STDOUT).decode('utf8') + formatted_filenames = [] + for path in lintutils.get_sources(arguments.source_dir, exclude_globs): + formatted_filenames.append(str(path)) - to_fix = [] - for line in output.split('\n'): - if 'offset' in line: - to_fix.append(line) + if arguments.fix: + if not arguments.quiet: + print("\n".join(map(lambda x: "Formatting {}".format(x), + formatted_filenames))) - if len(to_fix) > 0: - print("clang-format checks failed, run 'make format' to fix") - sys.exit(-1) -else: - try: - cmd = [CLANG_FORMAT, '-i'] + files_to_format - subprocess.check_output(cmd, stderr=subprocess.STDOUT) - except Exception as e: - print(e) - print(' '.join(cmd)) - raise + # Break clang-format invocations into chunks: each invocation formats + # 16 files. Wait for all processes to complete + results = lintutils.run_parallel([ + [arguments.clang_format_binary, "-i"] + some + for some in lintutils.chunk(formatted_filenames, 16) + ]) + for returncode, stdout, stderr in results: + # if any clang-format reported a parse error, bubble it + if returncode != 0: + sys.exit(returncode) + + else: + # run an instance of clang-format for each source file in parallel, + # then wait for all processes to complete + results = lintutils.run_parallel([ + [arguments.clang_format_binary, filename] + for filename in formatted_filenames + ], stdout=PIPE, stderr=PIPE) + for returncode, stdout, stderr in results: + # if any clang-format reported a parse error, bubble it + if returncode != 0: + sys.exit(returncode) + + error = False + checker = partial(_check_one_file, { + filename: result + for filename, result in zip(formatted_filenames, results) + }) + pool = mp.Pool() + try: + # check the output from each invocation of clang-format in parallel + for filename, diff in pool.imap(checker, formatted_filenames): + if not arguments.quiet: + print("Checking {}".format(filename)) + if diff: + print("{} had clang-format style issues".format(filename)) + # Print out the diff to stderr + error = True + # pad with a newline + print(file=sys.stderr) + diff_out = [] + for diff_str in diff: + diff_out.append(diff_str.encode('raw_unicode_escape')) + sys.stderr.writelines(diff_out) + except Exception: + error = True + raise + finally: + pool.terminate() + pool.join() + sys.exit(1 if error else 0) diff --git a/cpp/cmake/FindClangTools.cmake b/cpp/cmake/FindClangTools.cmake index 075d9381..83e71d75 100644 --- a/cpp/cmake/FindClangTools.cmake +++ b/cpp/cmake/FindClangTools.cmake @@ -32,6 +32,7 @@ find_program(CLANG_TIDY_BIN NAMES + clang-tidy-7.0 clang-tidy-6.0 clang-tidy-5.0 clang-tidy-4.0 @@ -83,7 +84,11 @@ if (CLANG_FORMAT_VERSION) endif() else() find_program(CLANG_FORMAT_BIN - NAMES clang-format-4.0 + NAMES + clang-format-7.0 + clang-format-6.0 + clang-format-5.0 + clang-format-4.0 clang-format-3.9 clang-format-3.8 clang-format-3.7 -- GitLab