diff --git a/maint/circlerefs/circlerefs.py b/maint/circlerefs/circlerefs.py deleted file mode 100755 index bd8214aa82f211db3d515388644fa04e86d09acb..0000000000000000000000000000000000000000 --- a/maint/circlerefs/circlerefs.py +++ /dev/null @@ -1,107 +0,0 @@ -#!/usr/bin/env python -"""Test script to find circular references. - -Circular references are not leaks per se, because they will eventually -be GC'd. However, on CPython, they prevent the reference-counting fast -path from being used and instead rely on the slower full GC. This -increases memory footprint and CPU overhead, so we try to eliminate -circular references created by normal operation. -""" - -import gc -import traceback -import types -from tornado import web, ioloop, gen, httpclient - - -def find_circular_references(garbage=None): - def inner(level): - for item in level: - item_id = id(item) - if item_id not in garbage_ids: - continue - if item_id in visited_ids: - continue - if item_id in stack_ids: - candidate = stack[stack.index(item):] - candidate.append(item) - found.append(candidate) - continue - - stack.append(item) - stack_ids.add(item_id) - inner(gc.get_referents(item)) - stack.pop() - stack_ids.remove(item_id) - visited_ids.add(item_id) - - garbage = garbage or gc.garbage - found = [] - stack = [] - stack_ids = set() - garbage_ids = set(map(id, garbage)) - visited_ids = set() - - inner(garbage) - inner = None - return found - - -class CollectHandler(web.RequestHandler): - @gen.coroutine - def get(self): - self.write("Collected: {}\n".format(gc.collect())) - self.write("Garbage: {}\n".format(len(gc.garbage))) - for circular in find_circular_references(): - print('\n==========\n Circular \n==========') - for item in circular: - print(' ', repr(item)) - for item in circular: - if isinstance(item, types.FrameType): - print('\nLocals:', item.f_locals) - print('\nTraceback:', repr(item)) - traceback.print_stack(item) - - -class DummyHandler(web.RequestHandler): - @gen.coroutine - def get(self): - self.write('ok\n') - - -class DummyAsyncHandler(web.RequestHandler): - @gen.coroutine - def get(self): - raise web.Finish('ok\n') - - -application = web.Application([ - (r'/dummy/', DummyHandler), - (r'/dummyasync/', DummyAsyncHandler), - (r'/collect/', CollectHandler), -], debug=True) - - -@gen.coroutine -def main(): - gc.disable() - gc.collect() - gc.set_debug(gc.DEBUG_STATS | gc.DEBUG_SAVEALL) - print('GC disabled') - - print("Start on 8888") - application.listen(8888, '127.0.0.1') - - # Do a little work. Alternately, could leave this script running and - # poke at it with a browser. - client = httpclient.AsyncHTTPClient() - yield client.fetch('http://127.0.0.1:8888/dummy/') - yield client.fetch('http://127.0.0.1:8888/dummyasync/', raise_error=False) - - # Now report on the results. - resp = yield client.fetch('http://127.0.0.1:8888/collect/') - print(resp.body) - - -if __name__ == "__main__": - ioloop.IOLoop.current().run_sync(main) diff --git a/tornado/concurrent.py b/tornado/concurrent.py index 6e05346b56d00048910752201dc110edd7df9aae..f0bbf6231763bb8beb957ee6c3d82e9c4ce88be2 100644 --- a/tornado/concurrent.py +++ b/tornado/concurrent.py @@ -150,8 +150,7 @@ def chain_future(a: "Future[_T]", b: "Future[_T]") -> None: """ - def copy(future: "Future[_T]") -> None: - assert future is a + def copy(a: "Future[_T]") -> None: if b.done(): return if hasattr(a, "exc_info") and a.exc_info() is not None: # type: ignore diff --git a/tornado/ioloop.py b/tornado/ioloop.py index cdfb4a630fd063f61b2818ea5d5f9dd1b38636fd..3fb1359aae174162e6d4aa8b12e877c0582d9e0e 100644 --- a/tornado/ioloop.py +++ b/tornado/ioloop.py @@ -696,15 +696,13 @@ class IOLoop(Configurable): # the error logging (i.e. it goes to tornado.log.app_log # instead of asyncio's log). future.add_done_callback( - lambda f: self._run_callback(functools.partial(callback, future)) + lambda f: self._run_callback(functools.partial(callback, f)) ) else: assert is_future(future) # For concurrent futures, we use self.add_callback, so # it's fine if future_add_done_callback inlines that call. - future_add_done_callback( - future, lambda f: self.add_callback(callback, future) - ) + future_add_done_callback(future, lambda f: self.add_callback(callback, f)) def run_in_executor( self, diff --git a/tornado/test/circlerefs_test.py b/tornado/test/circlerefs_test.py new file mode 100644 index 0000000000000000000000000000000000000000..5c71858ff72d1ee57588903174d5854e25e7f480 --- /dev/null +++ b/tornado/test/circlerefs_test.py @@ -0,0 +1,217 @@ +"""Test script to find circular references. + +Circular references are not leaks per se, because they will eventually +be GC'd. However, on CPython, they prevent the reference-counting fast +path from being used and instead rely on the slower full GC. This +increases memory footprint and CPU overhead, so we try to eliminate +circular references created by normal operation. +""" + +import asyncio +import contextlib +import gc +import io +import sys +import traceback +import types +import typing +import unittest + +import tornado +from tornado import web, gen, httpclient +from tornado.test.util import skipNotCPython + + +def find_circular_references(garbage): + """Find circular references in a list of objects. + + The garbage list contains objects that participate in a cycle, + but also the larger set of objects kept alive by that cycle. + This function finds subsets of those objects that make up + the cycle(s). + """ + + def inner(level): + for item in level: + item_id = id(item) + if item_id not in garbage_ids: + continue + if item_id in visited_ids: + continue + if item_id in stack_ids: + candidate = stack[stack.index(item) :] + candidate.append(item) + found.append(candidate) + continue + + stack.append(item) + stack_ids.add(item_id) + inner(gc.get_referents(item)) + stack.pop() + stack_ids.remove(item_id) + visited_ids.add(item_id) + + found: typing.List[object] = [] + stack = [] + stack_ids = set() + garbage_ids = set(map(id, garbage)) + visited_ids = set() + + inner(garbage) + return found + + +@contextlib.contextmanager +def assert_no_cycle_garbage(): + """Raise AssertionError if the wrapped code creates garbage with cycles.""" + gc.disable() + gc.collect() + gc.set_debug(gc.DEBUG_STATS | gc.DEBUG_SAVEALL) + yield + try: + # We have DEBUG_STATS on which causes gc.collect to write to stderr. + # Capture the output instead of spamming the logs on passing runs. + f = io.StringIO() + old_stderr = sys.stderr + sys.stderr = f + try: + gc.collect() + finally: + sys.stderr = old_stderr + garbage = gc.garbage[:] + # Must clear gc.garbage (the same object, not just replacing it with a + # new list) to avoid warnings at shutdown. + gc.garbage[:] = [] + if len(garbage) == 0: + return + for circular in find_circular_references(garbage): + f.write("\n==========\n Circular \n==========") + for item in circular: + f.write(f"\n {repr(item)}") + for item in circular: + if isinstance(item, types.FrameType): + f.write(f"\nLocals: {item.f_locals}") + f.write(f"\nTraceback: {repr(item)}") + traceback.print_stack(item) + del garbage + raise AssertionError(f.getvalue()) + finally: + gc.set_debug(0) + gc.enable() + + +# GC behavior is cpython-specific +@skipNotCPython +class CircleRefsTest(unittest.TestCase): + def test_known_leak(self): + # Construct a known leak scenario to make sure the test harness works. + class C(object): + def __init__(self, name): + self.name = name + self.a: typing.Optional[C] = None + self.b: typing.Optional[C] = None + self.c: typing.Optional[C] = None + + def __repr__(self): + return f"name={self.name}" + + with self.assertRaises(AssertionError) as cm: + with assert_no_cycle_garbage(): + # a and b form a reference cycle. c is not part of the cycle, + # but it cannot be GC'd while a and b are alive. + a = C("a") + b = C("b") + c = C("c") + a.b = b + a.c = c + b.a = a + b.c = c + del a, b + self.assertIn("Circular", str(cm.exception)) + # Leading spaces ensure we only catch these at the beginning of a line, meaning they are a + # cycle participant and not simply the contents of a locals dict or similar container. (This + # depends on the formatting above which isn't ideal but this test evolved from a + # command-line script) Note that the behavior here changed in python 3.11; in newer pythons + # locals are handled a bit differently and the test passes without the spaces. + self.assertIn(" name=a", str(cm.exception)) + self.assertIn(" name=b", str(cm.exception)) + self.assertNotIn(" name=c", str(cm.exception)) + + async def run_handler(self, handler_class): + app = web.Application( + [ + (r"/", handler_class), + ] + ) + socket, port = tornado.testing.bind_unused_port() + server = tornado.httpserver.HTTPServer(app) + server.add_socket(socket) + + client = httpclient.AsyncHTTPClient() + with assert_no_cycle_garbage(): + # Only the fetch (and the corresponding server-side handler) + # are being tested for cycles. In particular, the Application + # object has internal cycles (as of this writing) which we don't + # care to fix since in real world usage the Application object + # is effectively a global singleton. + await client.fetch(f"http://127.0.0.1:{port}/") + client.close() + server.stop() + socket.close() + + def test_sync_handler(self): + class Handler(web.RequestHandler): + def get(self): + self.write("ok\n") + + asyncio.run(self.run_handler(Handler)) + + def test_finish_exception_handler(self): + class Handler(web.RequestHandler): + def get(self): + raise web.Finish("ok\n") + + asyncio.run(self.run_handler(Handler)) + + def test_coro_handler(self): + class Handler(web.RequestHandler): + @gen.coroutine + def get(self): + yield asyncio.sleep(0.01) + self.write("ok\n") + + asyncio.run(self.run_handler(Handler)) + + def test_async_handler(self): + class Handler(web.RequestHandler): + async def get(self): + await asyncio.sleep(0.01) + self.write("ok\n") + + asyncio.run(self.run_handler(Handler)) + + def test_run_on_executor(self): + # From https://github.com/tornadoweb/tornado/issues/2620 + # + # When this test was introduced it found cycles in IOLoop.add_future + # and tornado.concurrent.chain_future. + import concurrent.futures + + thread_pool = concurrent.futures.ThreadPoolExecutor(1) + + class Factory(object): + executor = thread_pool + + @tornado.concurrent.run_on_executor + def run(self): + return None + + factory = Factory() + + async def main(): + # The cycle is not reported on the first call. It's not clear why. + for i in range(2): + await factory.run() + + with assert_no_cycle_garbage(): + asyncio.run(main()) diff --git a/tornado/test/runtests.py b/tornado/test/runtests.py index 58cecd383e0f417c734f8cb687fd54c7b404c52e..f35b3725453eb5fa81f7752d1aed90857293fcd8 100644 --- a/tornado/test/runtests.py +++ b/tornado/test/runtests.py @@ -22,6 +22,7 @@ TEST_MODULES = [ "tornado.test.asyncio_test", "tornado.test.auth_test", "tornado.test.autoreload_test", + "tornado.test.circlerefs_test", "tornado.test.concurrent_test", "tornado.test.curl_httpclient_test", "tornado.test.escape_test",