diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 9a9158b19205d4fbc81f8e14defbbf194668c00a..c6243d242bc912e3d07c7f6092950d60f995d488 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3043,24 +3043,10 @@ void btrfs_writepage_endio_finish_ordered(struct btrfs_inode *inode, struct page *page, u64 start, u64 end, int uptodate) { - struct btrfs_fs_info *fs_info = inode->root->fs_info; - struct btrfs_ordered_extent *ordered_extent = NULL; - struct btrfs_workqueue *wq; - trace_btrfs_writepage_end_io_hook(inode, start, end, uptodate); - ClearPagePrivate2(page); - if (!btrfs_dec_test_ordered_pending(inode, &ordered_extent, start, - end - start + 1, uptodate)) - return; - - if (btrfs_is_free_space_inode(inode)) - wq = fs_info->endio_freespace_worker; - else - wq = fs_info->endio_write_workers; - - btrfs_init_work(&ordered_extent->work, finish_ordered_fn, NULL, NULL); - btrfs_queue_work(wq, &ordered_extent->work); + btrfs_mark_ordered_io_finished(inode, page, start, end + 1 - start, + finish_ordered_fn, uptodate); } /* @@ -7959,41 +7945,8 @@ static void __endio_write_update_ordered(struct btrfs_inode *inode, const u64 offset, const u64 bytes, const bool uptodate) { - struct btrfs_fs_info *fs_info = inode->root->fs_info; - struct btrfs_ordered_extent *ordered = NULL; - struct btrfs_workqueue *wq; - u64 ordered_offset = offset; - u64 ordered_bytes = bytes; - u64 last_offset; - - if (btrfs_is_free_space_inode(inode)) - wq = fs_info->endio_freespace_worker; - else - wq = fs_info->endio_write_workers; - - while (ordered_offset < offset + bytes) { - last_offset = ordered_offset; - if (btrfs_dec_test_first_ordered_pending(inode, &ordered, - &ordered_offset, - ordered_bytes, - uptodate)) { - btrfs_init_work(&ordered->work, finish_ordered_fn, NULL, - NULL); - btrfs_queue_work(wq, &ordered->work); - } - - /* No ordered extent found in the range, exit */ - if (ordered_offset == last_offset) - return; - /* - * Our bio might span multiple ordered extents. In this case - * we keep going until we have accounted the whole dio. - */ - if (ordered_offset < offset + bytes) { - ordered_bytes = offset + bytes - ordered_offset; - ordered = NULL; - } - } + btrfs_mark_ordered_io_finished(inode, NULL, offset, bytes, + finish_ordered_fn, uptodate); } static blk_status_t btrfs_submit_bio_start_direct_io(struct inode *inode, diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 6c413bb451a3dc2f59296b274100d83deeb7a38e..e7ecce2c1bd8e124a99b1fa27e0a558122a77bfb 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -300,81 +300,142 @@ void btrfs_add_ordered_sum(struct btrfs_ordered_extent *entry, } /* - * Finish IO for one ordered extent across a given range. The range can - * contain several ordered extents. + * Mark all ordered extents io inside the specified range finished. * - * @found_ret: Return the finished ordered extent - * @file_offset: File offset for the finished IO - * Will also be updated to one byte past the range that is - * recordered as finished. This allows caller to walk forward. - * @io_size: Length of the finish IO range - * @uptodate: If the IO finished without problem - * - * Return true if any ordered extent is finished in the range, and update - * @found_ret and @file_offset. - * Return false otherwise. + * @page: The invovled page for the opeartion. + * For uncompressed buffered IO, the page status also needs to be + * updated to indicate whether the pending ordered io is finished. + * Can be NULL for direct IO and compressed write. + * For these cases, callers are ensured they won't execute the + * endio function twice. + * @finish_func: The function to be executed when all the IO of an ordered + * extent are finished. * - * NOTE: Although The range can cross multiple ordered extents, only one - * ordered extent will be updated during one call. The caller is responsible to - * iterate all ordered extents in the range. + * This function is called for endio, thus the range must have ordered + * extent(s) coveri it. */ -bool btrfs_dec_test_first_ordered_pending(struct btrfs_inode *inode, - struct btrfs_ordered_extent **finished_ret, - u64 *file_offset, u64 io_size, int uptodate) +void btrfs_mark_ordered_io_finished(struct btrfs_inode *inode, + struct page *page, u64 file_offset, + u64 num_bytes, btrfs_func_t finish_func, + bool uptodate) { - struct btrfs_fs_info *fs_info = inode->root->fs_info; struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree; + struct btrfs_fs_info *fs_info = inode->root->fs_info; + struct btrfs_workqueue *wq; struct rb_node *node; struct btrfs_ordered_extent *entry = NULL; - bool finished = false; unsigned long flags; - u64 dec_end; - u64 dec_start; - u64 to_dec; + u64 cur = file_offset; + + if (btrfs_is_free_space_inode(inode)) + wq = fs_info->endio_freespace_worker; + else + wq = fs_info->endio_write_workers; + + if (page) + ASSERT(page->mapping && page_offset(page) <= file_offset && + file_offset + num_bytes <= page_offset(page) + PAGE_SIZE); spin_lock_irqsave(&tree->lock, flags); - node = tree_search(tree, *file_offset); - if (!node) - goto out; + while (cur < file_offset + num_bytes) { + u64 entry_end; + u64 end; + u32 len; - entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); - if (!in_range(*file_offset, entry->file_offset, entry->num_bytes)) - goto out; + node = tree_search(tree, cur); + /* No ordered extents at all */ + if (!node) + break; - dec_start = max(*file_offset, entry->file_offset); - dec_end = min(*file_offset + io_size, - entry->file_offset + entry->num_bytes); - *file_offset = dec_end; - if (dec_start > dec_end) { - btrfs_crit(fs_info, "bad ordering dec_start %llu end %llu", - dec_start, dec_end); - } - to_dec = dec_end - dec_start; - if (to_dec > entry->bytes_left) { - btrfs_crit(fs_info, - "bad ordered accounting left %llu size %llu", - entry->bytes_left, to_dec); - } - entry->bytes_left -= to_dec; - if (!uptodate) - set_bit(BTRFS_ORDERED_IOERR, &entry->flags); + entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); + entry_end = entry->file_offset + entry->num_bytes; + /* + * |<-- OE --->| | + * cur + * Go to next OE. + */ + if (cur >= entry_end) { + node = rb_next(node); + /* No more ordered extents, exit */ + if (!node) + break; + entry = rb_entry(node, struct btrfs_ordered_extent, + rb_node); + + /* Go to next ordered extent and continue */ + cur = entry->file_offset; + continue; + } + /* + * | |<--- OE --->| + * cur + * Go to the start of OE. + */ + if (cur < entry->file_offset) { + cur = entry->file_offset; + continue; + } - if (entry->bytes_left == 0) { /* - * Ensure only one caller can set the flag and finished_ret - * accordingly + * Now we are definitely inside one ordered extent. + * + * |<--- OE --->| + * | + * cur */ - finished = !test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); - /* test_and_set_bit implies a barrier */ - cond_wake_up_nomb(&entry->wait); - } -out: - if (finished && finished_ret && entry) { - *finished_ret = entry; - refcount_inc(&entry->refs); + end = min(entry->file_offset + entry->num_bytes, + file_offset + num_bytes) - 1; + ASSERT(end + 1 - cur < U32_MAX); + len = end + 1 - cur; + + if (page) { + /* + * Private2 bit indicates whether we still have pending + * io unfinished for the ordered extent. + * + * If there's no such bit, we need to skip to next range. + */ + if (!PagePrivate2(page)) { + cur += len; + continue; + } + ClearPagePrivate2(page); + } + + /* Now we're fine to update the accounting */ + if (unlikely(len > entry->bytes_left)) { + WARN_ON(1); + btrfs_crit(fs_info, +"bad ordered extent accounting, root=%llu ino=%llu OE offset=%llu OE len=%llu to_dec=%u left=%llu", + inode->root->root_key.objectid, + btrfs_ino(inode), + entry->file_offset, + entry->num_bytes, + len, entry->bytes_left); + entry->bytes_left = 0; + } else { + entry->bytes_left -= len; + } + + if (!uptodate) + set_bit(BTRFS_ORDERED_IOERR, &entry->flags); + + /* + * All the IO of the ordered extent is finished, we need to queue + * the finish_func to be executed. + */ + if (entry->bytes_left == 0) { + set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); + cond_wake_up(&entry->wait); + refcount_inc(&entry->refs); + spin_unlock_irqrestore(&tree->lock, flags); + btrfs_init_work(&entry->work, finish_func, NULL, NULL); + btrfs_queue_work(wq, &entry->work); + spin_lock_irqsave(&tree->lock, flags); + } + cur += len; } spin_unlock_irqrestore(&tree->lock, flags); - return finished; } /* diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index e60c07f364276deadc9cc554e2a5bed1d2b849f4..72eb4b8cbb88141233e920bccffb0a09e9bf3159 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -172,13 +172,13 @@ btrfs_ordered_inode_tree_init(struct btrfs_ordered_inode_tree *t) void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry); void btrfs_remove_ordered_extent(struct btrfs_inode *btrfs_inode, struct btrfs_ordered_extent *entry); +void btrfs_mark_ordered_io_finished(struct btrfs_inode *inode, + struct page *page, u64 file_offset, + u64 num_bytes, btrfs_func_t finish_func, + bool uptodate); bool btrfs_dec_test_ordered_pending(struct btrfs_inode *inode, struct btrfs_ordered_extent **cached, u64 file_offset, u64 io_size, int uptodate); -bool btrfs_dec_test_first_ordered_pending(struct btrfs_inode *inode, - struct btrfs_ordered_extent **finished_ret, - u64 *file_offset, u64 io_size, - int uptodate); int btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset, u64 disk_bytenr, u64 num_bytes, u64 disk_num_bytes, int type);