[PATCH] __block_write_full_page race fix

When running fsstress -v -d $DIR/tmp -n 1000 -p 1000 -l 2 on an ext2 filesystem with 1024 byte block size, on SMP i386 with 4096 byte page size over loopback to an image file on a tmpfs filesystem, I would very quickly hit BUG_ON(!buffer_async_write(bh)); in fs/buffer.c:end_buffer_async_write It seems that more than one request would be submitted for a given bh at a time. What would happen is the following: 2 threads doing __mpage_writepages on the same page. Thread 1 - lock the page first, and enter __block_write_full_page. Thread 1 - (eg.) mark_buffer_async_write on the first 2 buffers. Thread 1 - set page writeback, unlock page. Thread 2 - lock page, wait on page writeback Thread 1 - submit_bh on the first 2 buffers. => both requests complete, none of the page buffers are async_write, end_page_writeback is called. Thread 2 - wakes up. enters __block_write_full_page. Thread 2 - mark_buffer_async_write on (eg.) the last buffer Thread 1 - finds the last buffer has async_write set, submit_bh on that. Thread 2 - submit_bh on the last buffer. => oops. So change __block_write_full_page to explicitly keep track of the last bh we need to issue, so we don't touch anything after issuing the last request. Signed-off-by: N Nick Piggin <nickpiggin@yahoo.com.au> Signed-off-by: N Andrew Morton <akpm@osdl.org> Signed-off-by: N Linus Torvalds <torvalds@osdl.org>

[PATCH] __block_write_full_page race fix
When running fsstress -v -d $DIR/tmp -n 1000 -p 1000 -l 2 on an ext2 filesystem with 1024 byte block size, on SMP i386 with 4096 byte page size over loopback to an image file on a tmpfs filesystem, I would very quickly hit BUG_ON(!buffer_async_write(bh)); in fs/buffer.c:end_buffer_async_write It seems that more than one request would be submitted for a given bh at a time. What would happen is the following: 2 threads doing __mpage_writepages on the same page. Thread 1 - lock the page first, and enter __block_write_full_page. Thread 1 - (eg.) mark_buffer_async_write on the first 2 buffers. Thread 1 - set page writeback, unlock page. Thread 2 - lock page, wait on page writeback Thread 1 - submit_bh on the first 2 buffers. => both requests complete, none of the page buffers are async_write, end_page_writeback is called. Thread 2 - wakes up. enters __block_write_full_page. Thread 2 - mark_buffer_async_write on (eg.) the last buffer Thread 1 - finds the last buffer has async_write set, submit_bh on that. Thread 2 - submit_bh on the last buffer. => oops. So change __block_write_full_page to explicitly keep track of the last bh we need to issue, so we don't touch anything after issuing the last request. Signed-off-by: N Nick Piggin <nickpiggin@yahoo.com.au> Signed-off-by: N Andrew Morton <akpm@osdl.org> Signed-off-by: N Linus Torvalds <torvalds@osdl.org>
ad576e63 · Nick Piggin · Linus Torvalds · f3ddbdc6 · ad576e63
隐藏空白更改
内联并排

Showing with 13 addition and 5 deletion

fs/buffer.c fs/buffer.c +13 -5

未找到文件。
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1751,7 +1751,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
 	int err;
 	sector_t block;
 	sector_t last_block;
-	struct buffer_head *bh, *head;
+	struct buffer_head *bh, *head, *last_bh = NULL;
 	int nr_underway = 0;

 	BUG_ON(!PageLocked(page));
@@ -1809,7 +1809,6 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
 	} while (bh != head);

 	do {
-		get_bh(bh);
 		if (!buffer_mapped(bh))
 			continue;
 		/*
@@ -1827,6 +1826,8 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
 		}
 		if (test_clear_buffer_dirty(bh)) {
 			mark_buffer_async_write(bh);
+			get_bh(bh);
+			last_bh = bh;
 		} else {
 			unlock_buffer(bh);
 		}
@@ -1845,10 +1846,13 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
 		if (buffer_async_write(bh)) {
 			submit_bh(WRITE, bh);
 			nr_underway++;
+			put_bh(bh);
+			if (bh == last_bh)
+				break;
 		}
-		put_bh(bh);
 		bh = next;
 	} while (bh != head);
+	bh = head;

 	err = 0;
 done:
@@ -1887,10 +1891,11 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
 	bh = head;
 	/* Recovery: lock and submit the mapped buffers */
 	do {
-		get_bh(bh);
 		if (buffer_mapped(bh) && buffer_dirty(bh)) {
 			lock_buffer(bh);
 			mark_buffer_async_write(bh);
+			get_bh(bh);
+			last_bh = bh;
 		} else {
 			/*
 			 * The buffer may have been set dirty during
@@ -1909,10 +1914,13 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
 			clear_buffer_dirty(bh);
 			submit_bh(WRITE, bh);
 			nr_underway++;
+			put_bh(bh);
+			if (bh == last_bh)
+				break;
 		}
-		put_bh(bh);
 		bh = next;
 	} while (bh != head);
+	bh = head;
 	goto done;
 }