From 9d723b8c58e9b04f99ece057a2d0ed35a120dd52 Mon Sep 17 00:00:00 2001 From: gongweibao Date: Mon, 28 May 2018 01:40:04 -0500 Subject: [PATCH] Add docstring checker. (#9848) --- .pre-commit-config.yaml | 8 + .travis.yml | 2 + Dockerfile | 3 + tools/codestyle/docstring_checker.py | 334 ++++++++++++++++++++++ tools/codestyle/pylint_pre_commit.hook | 19 ++ tools/codestyle/test_docstring_checker.py | 232 +++++++++++++++ 6 files changed, 598 insertions(+) create mode 100644 tools/codestyle/docstring_checker.py create mode 100755 tools/codestyle/pylint_pre_commit.hook create mode 100644 tools/codestyle/test_docstring_checker.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6140340890c..eeda759ff18 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -34,6 +34,14 @@ repos: entry: bash ./tools/codestyle/cpplint_pre_commit.hook language: system files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx)$ +- repo: local + hooks: + - id: pylint-doc-string + name: pylint + description: Check python docstring style using docstring_checker. + entry: bash ./tools/codestyle/pylint_pre_commit.hook + language: system + files: \.(py)$ - repo: https://github.com/PaddlePaddle/pre-commit-golang sha: 8337620115c25ff8333f1b1a493bd031049bd7c0 hooks: diff --git a/.travis.yml b/.travis.yml index 3391e2c3cab..8c772030925 100644 --- a/.travis.yml +++ b/.travis.yml @@ -18,6 +18,8 @@ env: addons: ssh_known_hosts: 13.229.163.131 before_install: + # For pylint dockstring checker + - sudo pip install pylint pytest astroid isort - | function timeout() { perl -e 'alarm shift; exec @ARGV' "$@"; } script: diff --git a/Dockerfile b/Dockerfile index e5508486d6d..80a96983ec1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -79,6 +79,9 @@ RUN pip install pre-commit 'ipython==5.3.0' && \ pip install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \ pip install opencv-python +#For docstring checker +RUN pip install pylint pytest astroid isort + COPY ./python/requirements.txt /root/ RUN pip install -r /root/requirements.txt diff --git a/tools/codestyle/docstring_checker.py b/tools/codestyle/docstring_checker.py new file mode 100644 index 00000000000..48100e5bf98 --- /dev/null +++ b/tools/codestyle/docstring_checker.py @@ -0,0 +1,334 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""DocstringChecker is used to check python doc string's style.""" + +import six +import astroid + +from pylint.checkers import BaseChecker, utils +from pylint.interfaces import IAstroidChecker + +from collections import defaultdict +import re + + +def register(linter): + """Register checkers.""" + linter.register_checker(DocstringChecker(linter)) + + +class Docstring(object): + """Docstring class holds the parsed doc string elements. + """ + + def __init__(self): + self.d = defaultdict(list) #name->[] + self.clear() + + def clear(self): + self.d['Args'] = [] + self.d['Examples'] = [] + self.d['Returns'] = [] + self.d['Raises'] = [] + self.args = {} #arg_name->arg_type + + def get_level(self, string, indent=' '): + level = 0 + unit_size = len(indent) + while string[:unit_size] == indent: + string = string[unit_size:] + level += 1 + + return level + + def parse(self, doc): + """parse gets sections from doc + Such as Args, Returns, Raises, Examples s + Args: + doc (string): is the astroid node doc string. + Returns: + True if doc is parsed successfully. + """ + self.clear() + + lines = doc.splitlines() + state = ("others", -1) + for l in lines: + c = l.strip() + if len(c) <= 0: + continue + + level = self.get_level(l) + if c.startswith("Args:"): + state = ("Args", level) + elif c.startswith("Returns:"): + state = ("Returns", level) + elif c.startswith("Raises:"): + state = ("Raises", level) + elif c.startswith("Examples:"): + state = ("Examples", level) + else: + if level > state[1]: + self.d[state[0]].append(c) + continue + + state = ("others", -1) + self.d[state[0]].append(c) + + self._arg_with_type() + return True + + def get_returns(self): + return self.d['Returns'] + + def get_raises(self): + return self.d['Raises'] + + def get_examples(self): + return self.d['Examples'] + + def _arg_with_type(self): + + for t in self.d['Args']: + m = re.search('([A-Za-z0-9_-]+)\s{0,4}(\(.+\))\s{0,4}:', t) + if m: + self.args[m.group(1)] = m.group(2) + + return self.args + + +class DocstringChecker(BaseChecker): + """DosstringChecker is pylint checker to + check docstring style. + """ + __implements__ = (IAstroidChecker, ) + + POSITIONAL_MESSAGE_ID = 'str-used-on-positional-format-argument' + KEYWORD_MESSAGE_ID = 'str-used-on-keyword-format-argument' + + name = 'doc-string-checker' + symbol = "doc-string" + priority = -1 + msgs = { + 'W9001': ('One line doc string on > 1 lines', symbol + "-one-line", + 'Used when a short doc string is on multiple lines'), + 'W9002': + ('Doc string does not end with "." period', symbol + "-end-with", + 'Used when a doc string does not end with a period'), + 'W9003': ('All args with their types must be mentioned in doc string', + symbol + "-with-all-args", + 'Used when not all arguments are in the doc string '), + 'W9005': ('Missing docstring or docstring is too short', + symbol + "-missing", 'Add docstring longer >=10'), + 'W9006': ('Docstring indent error, use 4 space for indent', + symbol + "-indent-error", 'Use 4 space for indent'), + 'W9007': ('You should add `Returns` in comments', + symbol + "-with-returns", + 'There should be a `Returns` section in comments'), + 'W9008': ('You should add `Raises` section in comments', + symbol + "-with-raises", + 'There should be a `Raises` section in comments'), + } + options = () + + def visit_functiondef(self, node): + """visit_functiondef checks Function node docstring style. + Args: + node (astroid.node): The visiting node. + Returns: + True if successful other wise False. + """ + + self.check_doc_string(node) + + if node.tolineno - node.fromlineno <= 10: + return True + + if not node.doc: + return True + + doc = Docstring() + doc.parse(node.doc) + + self.all_args_in_doc(node, doc) + self.with_returns(node, doc) + self.with_raises(node, doc) + + def visit_module(self, node): + self.check_doc_string(node) + + def visit_classdef(self, node): + self.check_doc_string(node) + + def check_doc_string(self, node): + self.missing_doc_string(node) + self.one_line(node) + self.has_period(node) + self.indent_style(node) + + def missing_doc_string(self, node): + if node.tolineno - node.fromlineno <= 10: + return True + + if node.doc is None or len(node.doc) < 10: + self.add_message('W9005', node=node, line=node.fromlineno) + return False + + # FIXME(gongwb): give the docstring line-no + def indent_style(self, node, indent=4): + """indent_style checks docstring's indent style + Args: + node (astroid.node): The visiting node. + indent (int): The default indent of style + Returns: + True if successful other wise False. + """ + if node.doc is None: + return True + + doc = node.doc + lines = doc.splitlines() + + for l in lines: + cur_indent = len(l) - len(l.lstrip()) + if cur_indent % indent != 0: + self.add_message('W9006', node=node, line=node.fromlineno) + return False + + return True + + def one_line(self, node): + """one_line checks if docstring (len < 40) is on one line. + Args: + node (astroid.node): The node visiting. + Returns: + True if successful otherwise False. + """ + + doc = node.doc + if doc is None: + return True + + if len(doc) > 40: + return True + elif sum(doc.find(nl) for nl in ('\n', '\r', '\n\r')) == -3: + return True + else: + self.add_message('W9001', node=node, line=node.fromlineno) + return False + + return True + + def has_period(self, node): + """has_period checks if one line doc end-with '.' . + Args: + node (astroid.node): the node is visiting. + Returns: + True if successful otherwise False. + """ + if node.doc is None: + return True + + if len(node.doc.splitlines()) > 1: + return True + + if not node.doc.strip().endswith('.'): + self.add_message('W9002', node=node, line=node.fromlineno) + return False + + return True + + def with_raises(self, node, doc): + """with_raises checks if one line doc end-with '.' . + Args: + node (astroid.node): the node is visiting. + doc (Docstring): Docstring object. + Returns: + True if successful otherwise False. + """ + + find = False + for t in node.body: + if not isinstance(t, astroid.Raise): + continue + + find = True + break + + if not find: + return True + + if len(doc.get_raises()) == 0: + self.add_message('W9008', node=node, line=node.fromlineno) + return False + + return True + + def with_returns(self, node, doc): + """with_returns checks if docstring comments what are returned . + Args: + node (astroid.node): the node is visiting. + doc (Docstring): Docstring object. + Returns: + True if successful otherwise False. + """ + + find = False + for t in node.body: + if not isinstance(t, astroid.Return): + continue + + find = True + break + + if not find: + return True + + if len(doc.get_returns()) == 0: + self.add_message('W9007', node=node, line=node.fromlineno) + return False + + return True + + def all_args_in_doc(self, node, doc): + """all_args_in_doc checks if arguments are mentioned in doc + Args: + node (astroid.node): the node is visiting. + doc (Docstring): Docstring object + Returns: + True if successful otherwise False. + """ + args = [] + for arg in node.args.get_children(): + if (not isinstance(arg, astroid.AssignName)) \ + or arg.name == "self": + continue + args.append(arg.name) + + if len(args) <= 0: + return True + + parsed_args = doc.args + if len(args) > 0 and len(parsed_args) <= 0: + print "debug:parsed args: ", parsed_args + self.add_message('W9003', node=node, line=node.fromlineno) + return False + + for t in args: + if t not in parsed_args: + print t, " with (type) not in ", parsed_args + self.add_message('W9003', node=node, line=node.fromlineno) + return False + + return True diff --git a/tools/codestyle/pylint_pre_commit.hook b/tools/codestyle/pylint_pre_commit.hook new file mode 100755 index 00000000000..e7c92ba671e --- /dev/null +++ b/tools/codestyle/pylint_pre_commit.hook @@ -0,0 +1,19 @@ +#!/bin/bash + +TOTAL_ERRORS=0 + + +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +export PYTHONPATH=$DIR:$PYTHONPATH + +# The trick to remove deleted files: https://stackoverflow.com/a/2413151 +for file in $(git diff --cached --name-status | awk '$1 != "D" {print $2}'); do + pylint --disable=all --load-plugins=docstring_checker \ + --enable=doc-string-one-line,doc-string-end-with,doc-string-with-all-args,doc-string-triple-quotes,doc-string-missing,doc-string-indent-error,doc-string-with-returns,doc-string-with-raises $file; + TOTAL_ERRORS=$(expr $TOTAL_ERRORS + $?); +done + +#exit $TOTAL_ERRORS +#For now, just warning: +exit 0 + diff --git a/tools/codestyle/test_docstring_checker.py b/tools/codestyle/test_docstring_checker.py new file mode 100644 index 00000000000..0547f7d1610 --- /dev/null +++ b/tools/codestyle/test_docstring_checker.py @@ -0,0 +1,232 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import docstring_checker +import pylint.testutils +import astroid +import pytest +import sys + + +class TestDocstring(pylint.testutils.CheckerTestCase): + CHECKER_CLASS = docstring_checker.DocstringChecker + + def test_one_line(self): + func_node = astroid.extract_node(''' + def test(): + """get + news. + """ + if True: + return 5 + return 5 + ''') + + self.checker.visit_functiondef(func_node) + got = self.linter.release_messages() + assert len(got) == 1 + assert 'W9001' == got[0][0] + + def test_one_line(self): + func_node = astroid.extract_node(''' + def test(): + """get news""" + if True: + return 5 + return 5 + ''') + + self.checker.visit_functiondef(func_node) + got = self.linter.release_messages() + assert len(got) == 1 + assert 'W9002' == got[0][0] + + def test_args(self): + func_node = astroid.extract_node(''' + def test(scale, mean): + """get news. + Args: + scale (int): scale is the number. + """ + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + ''') + + self.checker.visit_functiondef(func_node) + got = self.linter.release_messages() + assert len(got) == 1 + assert 'W9003' == got[0][0] + + def test_missing(self): + func_node = astroid.extract_node(''' + def test(): + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + ''') + + self.checker.visit_functiondef(func_node) + got = self.linter.release_messages() + assert len(got) == 1 + assert 'W9005' == got[0][0] + + def test_indent(self): + func_node = astroid.extract_node(''' + def test(): + """ get get get get get get get get + get get get get get get get get. + """ + pass + ''') + + self.checker.visit_functiondef(func_node) + got = self.linter.release_messages() + assert len(got) == 1 + assert 'W9006' == got[0][0] + + def test_with_resturns(self): + func_node = astroid.extract_node(''' + def test(): + """get news. + Args: + scale (int): scale is the number. + """ + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + return mean + ''') + + self.checker.visit_functiondef(func_node) + got = self.linter.release_messages() + assert len(got) == 1 + assert 'W9007' == got[0][0] + + def test_with_raises(self): + func_node = astroid.extract_node(''' + def test(): + """get news. + Args: + scale (int): scale is the number. + """ + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + raise ValueError('A very specific bad thing happened.') + ''') + + self.checker.visit_functiondef(func_node) + got = self.linter.release_messages() + assert len(got) == 1 + assert 'W9008' == got[0][0] + + def test_no_message(self): + p = ''' +def fc(input, + size, + num_flatten_dims=1, + param_attr=None, + bias_attr=None, + act=None, + name=None): + """ + **Fully Connected Layer** + The fully connected layer can take multiple tensors as its inputs. It + creates a variable called weights for each input tensor, which represents + a fully connected weight matrix from each input unit to each output unit. + The fully connected layer multiplies each input tensor with its coresponding + weight to produce an output Tensor. If multiple input tensors are given, + the results of multiple multiplications will be sumed up. If bias_attr is + not None, a bias variable will be created and added to the output. Finally, + if activation is not None, it will be applied to the output as well. + This process can be formulated as follows: + + Args: + input (Variable|list of Variable): The input tensor(s) of this layer, and the dimension of + the input tensor(s) is at least 2. + size(int): The number of output units in this layer. + num_flatten_dims (int, default 1): The fc layer can accept an input tensor with more than + two dimensions. If this happens, the multidimensional tensor will first be flattened + into a 2-dimensional matrix. The parameter `num_flatten_dims` determines how the input + tensor is flattened: the first `num_flatten_dims` (inclusive, index starts from 1) + dimensions will be flatten to form the first dimension of the final matrix (height of + the matrix), and the rest `rank(X) - num_flatten_dims` dimensions are flattened to + form the second dimension of the final matrix (width of the matrix). For example, suppose + `X` is a 6-dimensional tensor with a shape [2, 3, 4, 5, 6], and `num_flatten_dims` = 3. + Then, the flattened matrix will have a shape [2 x 3 x 4, 5 x 6] = [24, 30]. + param_attr (ParamAttr|list of ParamAttr, default None): The parameter attribute for learnable + parameters/weights of this layer. + bias_attr (ParamAttr|list of ParamAttr, default None): The parameter attribute for the bias + of this layer. If it is set to None, no bias will be added to the output units. + act (str, default None): Activation to be applied to the output of this layer. + name (str, default None): The name of this layer. + Returns: + A tensor variable storing the transformation result. + Raises: + ValueError: If rank of the input tensor is less than 2. + Examples: + .. code-block:: python + data = fluid.layers.data(name="data", shape=[32, 32], dtype="float32") + fc = fluid.layers.fc(input=data, size=1000, act="tanh") + """ + raise ValueError('A very specific bad thing happened.') + size = 1 + size = 1 + size = 1 + size = 1 + size = 1 + size = 1 + size = 1 + size = 1 + size = 1 + size = 1 + size = 1 + size = 1 + size = 1 + return size + ''' + + func_node = astroid.extract_node(p) + self.checker.visit_functiondef(func_node) + got = self.linter.release_messages() + assert len(got) == 0 -- GitLab