From 72e8cea8afb85f6704fcf6f5648b0a01f1abaab3 Mon Sep 17 00:00:00 2001 From: Eugenio Lacuesta <1731933+elacuesta@users.noreply.github.com> Date: Mon, 22 Mar 2021 11:51:11 -0300 Subject: [PATCH] Avoid exceptions in is_generator_with_return_value (#4935) --- scrapy/utils/misc.py | 27 ++- ...t_return_with_argument_inside_generator.py | 169 +++++++++++++++--- 2 files changed, 162 insertions(+), 34 deletions(-) diff --git a/scrapy/utils/misc.py b/scrapy/utils/misc.py index 081cd33f1..5c986eedc 100644 --- a/scrapy/utils/misc.py +++ b/scrapy/utils/misc.py @@ -9,7 +9,6 @@ from collections import deque from contextlib import contextmanager from importlib import import_module from pkgutil import iter_modules -from textwrap import dedent from w3lib.html import replace_entities @@ -227,7 +226,8 @@ def is_generator_with_return_value(callable): return value is None or isinstance(value, ast.NameConstant) and value.value is None if inspect.isgeneratorfunction(callable): - tree = ast.parse(dedent(inspect.getsource(callable))) + code = re.sub(r"^[\t ]+", "", inspect.getsource(callable)) + tree = ast.parse(code) for node in walk_callable(tree): if isinstance(node, ast.Return) and not returns_none(node): _generator_callbacks_cache[callable] = True @@ -242,12 +242,23 @@ def warn_on_generator_with_return_value(spider, callable): Logs a warning if a callable is a generator function and includes a 'return' statement with a value different than None """ - if is_generator_with_return_value(callable): + try: + if is_generator_with_return_value(callable): + warnings.warn( + f'The "{spider.__class__.__name__}.{callable.__name__}" method is ' + 'a generator and includes a "return" statement with a value ' + 'different than None. This could lead to unexpected behaviour. Please see ' + 'https://docs.python.org/3/reference/simple_stmts.html#the-return-statement ' + 'for details about the semantics of the "return" statement within generators', + stacklevel=2, + ) + except IndentationError: + callable_name = spider.__class__.__name__ + "." + callable.__name__ warnings.warn( - f'The "{spider.__class__.__name__}.{callable.__name__}" method is ' - 'a generator and includes a "return" statement with a value ' - 'different than None. This could lead to unexpected behaviour. Please see ' - 'https://docs.python.org/3/reference/simple_stmts.html#the-return-statement ' - 'for details about the semantics of the "return" statement within generators', + f'Unable to determine whether or not "{callable_name}" is a generator with a return value. ' + 'This will not prevent your code from working, but it prevents Scrapy from detecting ' + f'potential issues in your implementation of "{callable_name}". Please, report this in the ' + 'Scrapy issue tracker (https://github.com/scrapy/scrapy/issues), ' + f'including the code of "{callable_name}"', stacklevel=2, ) diff --git a/tests/test_utils_misc/test_return_with_argument_inside_generator.py b/tests/test_utils_misc/test_return_with_argument_inside_generator.py index 2be38620c..1c85ca353 100644 --- a/tests/test_utils_misc/test_return_with_argument_inside_generator.py +++ b/tests/test_utils_misc/test_return_with_argument_inside_generator.py @@ -1,35 +1,116 @@ import unittest +import warnings +from unittest import mock -from scrapy.utils.misc import is_generator_with_return_value +from scrapy.utils.misc import is_generator_with_return_value, warn_on_generator_with_return_value + + +def _indentation_error(*args, **kwargs): + raise IndentationError() + + +def top_level_return_something(): + """ +docstring + """ + url = """ +https://example.org +""" + yield url + return 1 + + +def top_level_return_none(): + """ +docstring + """ + url = """ +https://example.org +""" + yield url + return + + +def generator_that_returns_stuff(): + yield 1 + yield 2 + return 3 class UtilsMiscPy3TestCase(unittest.TestCase): - def test_generators_with_return_statements(self): - def f(): + def test_generators_return_something(self): + def f1(): yield 1 return 2 - def g(): + def g1(): + yield 1 + return "asdf" + + def h1(): yield 1 - return 'asdf' - def h(): + def helper(): + return 0 + + yield helper() + return 2 + + def i1(): + """ +docstring + """ + url = """ +https://example.org + """ + yield url + return 1 + + assert is_generator_with_return_value(top_level_return_something) + assert is_generator_with_return_value(f1) + assert is_generator_with_return_value(g1) + assert is_generator_with_return_value(h1) + assert is_generator_with_return_value(i1) + + with warnings.catch_warnings(record=True) as w: + warn_on_generator_with_return_value(None, top_level_return_something) + self.assertEqual(len(w), 1) + self.assertIn('The "NoneType.top_level_return_something" method is a generator', str(w[0].message)) + with warnings.catch_warnings(record=True) as w: + warn_on_generator_with_return_value(None, f1) + self.assertEqual(len(w), 1) + self.assertIn('The "NoneType.f1" method is a generator', str(w[0].message)) + with warnings.catch_warnings(record=True) as w: + warn_on_generator_with_return_value(None, g1) + self.assertEqual(len(w), 1) + self.assertIn('The "NoneType.g1" method is a generator', str(w[0].message)) + with warnings.catch_warnings(record=True) as w: + warn_on_generator_with_return_value(None, h1) + self.assertEqual(len(w), 1) + self.assertIn('The "NoneType.h1" method is a generator', str(w[0].message)) + with warnings.catch_warnings(record=True) as w: + warn_on_generator_with_return_value(None, i1) + self.assertEqual(len(w), 1) + self.assertIn('The "NoneType.i1" method is a generator', str(w[0].message)) + + def test_generators_return_none(self): + def f2(): yield 1 return None - def i(): + def g2(): yield 1 return - def j(): + def h2(): yield 1 - def k(): + def i2(): yield 1 - yield from g() + yield from generator_that_returns_stuff() - def m(): + def j2(): yield 1 def helper(): @@ -37,20 +118,56 @@ class UtilsMiscPy3TestCase(unittest.TestCase): yield helper() - def n(): - yield 1 - - def helper(): - return 0 + def k2(): + """ +docstring + """ + url = """ +https://example.org + """ + yield url + return - yield helper() - return 2 + def l2(): + return - assert is_generator_with_return_value(f) - assert is_generator_with_return_value(g) - assert not is_generator_with_return_value(h) - assert not is_generator_with_return_value(i) - assert not is_generator_with_return_value(j) - assert not is_generator_with_return_value(k) # not recursive - assert not is_generator_with_return_value(m) - assert is_generator_with_return_value(n) + assert not is_generator_with_return_value(top_level_return_none) + assert not is_generator_with_return_value(f2) + assert not is_generator_with_return_value(g2) + assert not is_generator_with_return_value(h2) + assert not is_generator_with_return_value(i2) + assert not is_generator_with_return_value(j2) # not recursive + assert not is_generator_with_return_value(k2) # not recursive + assert not is_generator_with_return_value(l2) + + with warnings.catch_warnings(record=True) as w: + warn_on_generator_with_return_value(None, top_level_return_none) + self.assertEqual(len(w), 0) + with warnings.catch_warnings(record=True) as w: + warn_on_generator_with_return_value(None, f2) + self.assertEqual(len(w), 0) + with warnings.catch_warnings(record=True) as w: + warn_on_generator_with_return_value(None, g2) + self.assertEqual(len(w), 0) + with warnings.catch_warnings(record=True) as w: + warn_on_generator_with_return_value(None, h2) + self.assertEqual(len(w), 0) + with warnings.catch_warnings(record=True) as w: + warn_on_generator_with_return_value(None, i2) + self.assertEqual(len(w), 0) + with warnings.catch_warnings(record=True) as w: + warn_on_generator_with_return_value(None, j2) + self.assertEqual(len(w), 0) + with warnings.catch_warnings(record=True) as w: + warn_on_generator_with_return_value(None, k2) + self.assertEqual(len(w), 0) + with warnings.catch_warnings(record=True) as w: + warn_on_generator_with_return_value(None, l2) + self.assertEqual(len(w), 0) + + @mock.patch("scrapy.utils.misc.is_generator_with_return_value", new=_indentation_error) + def test_indentation_error(self): + with warnings.catch_warnings(record=True) as w: + warn_on_generator_with_return_value(None, top_level_return_none) + self.assertEqual(len(w), 1) + self.assertIn('Unable to determine', str(w[0].message)) -- GitLab