diff --git a/block/blk-core.c b/block/blk-core.c index 71c5cf508127d4d32e18163eb8586010d694c4a0..f0e28624ef9c8abe7c5d2592a5658cea3c25e8e6 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1374,15 +1374,18 @@ void blk_account_io_start(struct request *rq) } static unsigned long __part_start_io_acct(struct hd_struct *part, - unsigned int sectors, unsigned int op) + unsigned int sectors, unsigned int op, + bool precise) { const int sgrp = op_stat_group(op); unsigned long now = READ_ONCE(jiffies); part_stat_lock(); update_io_ticks(part, now, false); - part_stat_inc(part, ios[sgrp]); - part_stat_add(part, sectors[sgrp], sectors); + if (!precise) { + part_stat_inc(part, ios[sgrp]); + part_stat_add(part, sectors[sgrp], sectors); + } part_stat_local_inc(part, in_flight[op_is_write(op)]); part_stat_unlock(); @@ -1394,19 +1397,21 @@ unsigned long part_start_io_acct(struct gendisk *disk, struct hd_struct **part, { *part = disk_map_sector_rcu(disk, bio->bi_iter.bi_sector); - return __part_start_io_acct(*part, bio_sectors(bio), bio_op(bio)); + return __part_start_io_acct(*part, bio_sectors(bio), bio_op(bio), + false); } EXPORT_SYMBOL_GPL(part_start_io_acct); unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors, unsigned int op) { - return __part_start_io_acct(&disk->part0, sectors, op); + return __part_start_io_acct(&disk->part0, sectors, op, false); } EXPORT_SYMBOL(disk_start_io_acct); -static void __part_end_io_acct(struct hd_struct *part, unsigned int op, - unsigned long start_time) +static void __part_end_io_acct(struct hd_struct *part, unsigned int sectors, + unsigned int op, unsigned long start_time, + bool precise) { const int sgrp = op_stat_group(op); unsigned long now = READ_ONCE(jiffies); @@ -1414,6 +1419,10 @@ static void __part_end_io_acct(struct hd_struct *part, unsigned int op, part_stat_lock(); update_io_ticks(part, now, true); + if (precise) { + part_stat_inc(part, ios[sgrp]); + part_stat_add(part, sectors[sgrp], sectors); + } part_stat_add(part, nsecs[sgrp], jiffies_to_nsecs(duration)); part_stat_local_dec(part, in_flight[op_is_write(op)]); part_stat_unlock(); @@ -1422,7 +1431,7 @@ static void __part_end_io_acct(struct hd_struct *part, unsigned int op, void part_end_io_acct(struct hd_struct *part, struct bio *bio, unsigned long start_time) { - __part_end_io_acct(part, bio_op(bio), start_time); + __part_end_io_acct(part, 0, bio_op(bio), start_time, false); hd_struct_put(part); } EXPORT_SYMBOL_GPL(part_end_io_acct); @@ -1430,10 +1439,42 @@ EXPORT_SYMBOL_GPL(part_end_io_acct); void disk_end_io_acct(struct gendisk *disk, unsigned int op, unsigned long start_time) { - __part_end_io_acct(&disk->part0, op, start_time); + __part_end_io_acct(&disk->part0, 0, op, start_time, false); } EXPORT_SYMBOL(disk_end_io_acct); +unsigned long part_start_precise_io_acct(struct gendisk *disk, + struct hd_struct **part, + struct bio *bio) +{ + *part = disk_map_sector_rcu(disk, bio->bi_iter.bi_sector); + + return __part_start_io_acct(*part, 0, bio_op(bio), true); +} +EXPORT_SYMBOL_GPL(part_start_precise_io_acct); + +unsigned long disk_start_precise_io_acct(struct gendisk *disk, unsigned int op) +{ + return __part_start_io_acct(&disk->part0, 0, op, true); +} +EXPORT_SYMBOL(disk_start_precise_io_acct); + +void part_end_precise_io_acct(struct hd_struct *part, struct bio *bio, + unsigned long start_time) +{ + __part_end_io_acct(part, bio_sectors(bio), bio_op(bio), start_time, + true); + hd_struct_put(part); +} +EXPORT_SYMBOL_GPL(part_end_precise_io_acct); + +void disk_end_precise_io_acct(struct gendisk *disk, unsigned int sectors, + unsigned int op, unsigned long start_time) +{ + __part_end_io_acct(&disk->part0, sectors, op, start_time, true); +} +EXPORT_SYMBOL(disk_end_precise_io_acct); + /* * Steal bios from a request and add them to a bio list. * The request must not have been partially completed before. diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 7c01f848742716b9e56cfe862475383935fcbb64..8780c95f9b86ecddd24b038d286c182fc18f34c2 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -301,7 +301,7 @@ static void call_bio_endio(struct r1bio *r1_bio) bio->bi_status = BLK_STS_IOERR; if (blk_queue_io_stat(bio->bi_disk->queue)) - bio_end_io_acct(bio, r1_bio->start_time); + bio_end_precise_io_acct(bio, r1_bio->start_time); bio_endio(bio); } @@ -1295,7 +1295,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, r1_bio->read_disk = rdisk; if (!r1bio_existed && blk_queue_io_stat(bio->bi_disk->queue)) - r1_bio->start_time = bio_start_io_acct(bio); + r1_bio->start_time = bio_start_precise_io_acct(bio); read_bio = bio_clone_fast(bio, gfp, &mddev->bio_set); @@ -1487,7 +1487,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, } if (blk_queue_io_stat(bio->bi_disk->queue)) - r1_bio->start_time = bio_start_io_acct(bio); + r1_bio->start_time = bio_start_precise_io_acct(bio); atomic_set(&r1_bio->remaining, 1); atomic_set(&r1_bio->behind_remaining, 0); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index beaada3e87bab1eff27bf75676e58a1e97f5817d..04869394e3458e038a7993c54e327bef9c576536 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -298,7 +298,7 @@ static void raid_end_bio_io(struct r10bio *r10_bio) bio->bi_status = BLK_STS_IOERR; if (blk_queue_io_stat(bio->bi_disk->queue)) - bio_end_io_acct(bio, r10_bio->start_time); + bio_end_precise_io_acct(bio, r10_bio->start_time); bio_endio(bio); /* * Wake up any possible resync thread that waits for the device @@ -1188,7 +1188,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio, slot = r10_bio->read_slot; if (!handle_error && blk_queue_io_stat(bio->bi_disk->queue)) - r10_bio->start_time = bio_start_io_acct(bio); + r10_bio->start_time = bio_start_precise_io_acct(bio); read_bio = bio_clone_fast(bio, gfp, &mddev->bio_set); r10_bio->devs[slot].bio = read_bio; @@ -1473,7 +1473,7 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio, } if (blk_queue_io_stat(bio->bi_disk->queue)) - r10_bio->start_time = bio_start_io_acct(bio); + r10_bio->start_time = bio_start_precise_io_acct(bio); atomic_set(&r10_bio->remaining, 1); md_bitmap_startwrite(mddev->bitmap, r10_bio->sector, r10_bio->sectors, 0); @@ -3624,6 +3624,20 @@ static int setup_geo(struct geom *geo, struct mddev *mddev, enum geo_type new) return nc*fc; } +static void raid10_free_conf(struct r10conf *conf) +{ + if (!conf) + return; + + mempool_exit(&conf->r10bio_pool); + kfree(conf->mirrors); + kfree(conf->mirrors_old); + kfree(conf->mirrors_new); + safe_put_page(conf->tmppage); + bioset_exit(&conf->bio_split); + kfree(conf); +} + static struct r10conf *setup_conf(struct mddev *mddev) { struct r10conf *conf = NULL; @@ -3706,13 +3720,7 @@ static struct r10conf *setup_conf(struct mddev *mddev) return conf; out: - if (conf) { - mempool_exit(&conf->r10bio_pool); - kfree(conf->mirrors); - safe_put_page(conf->tmppage); - bioset_exit(&conf->bio_split); - kfree(conf); - } + raid10_free_conf(conf); return ERR_PTR(err); } @@ -3918,10 +3926,7 @@ static int raid10_run(struct mddev *mddev) out_free_conf: md_unregister_thread(&mddev->thread); - mempool_exit(&conf->r10bio_pool); - safe_put_page(conf->tmppage); - kfree(conf->mirrors); - kfree(conf); + raid10_free_conf(conf); mddev->private = NULL; out: return -EIO; @@ -3929,15 +3934,7 @@ static int raid10_run(struct mddev *mddev) static void raid10_free(struct mddev *mddev, void *priv) { - struct r10conf *conf = priv; - - mempool_exit(&conf->r10bio_pool); - safe_put_page(conf->tmppage); - kfree(conf->mirrors); - kfree(conf->mirrors_old); - kfree(conf->mirrors_new); - bioset_exit(&conf->bio_split); - kfree(conf); + raid10_free_conf(priv); } static void raid10_quiesce(struct mddev *mddev, int quiesce) diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c index a226a040647a470cb41ac2f53abbf16bb2bbab80..4575d0f0dd6c064a00c3b825bfd8e198abd96819 100644 --- a/drivers/scsi/iscsi_tcp.c +++ b/drivers/scsi/iscsi_tcp.c @@ -783,7 +783,7 @@ static int iscsi_sw_tcp_host_get_param(struct Scsi_Host *shost, enum iscsi_host_param param, char *buf) { struct iscsi_sw_tcp_host *tcp_sw_host = iscsi_host_priv(shost); - struct iscsi_session *session = tcp_sw_host->session; + struct iscsi_session *session; struct iscsi_conn *conn; struct iscsi_tcp_conn *tcp_conn; struct iscsi_sw_tcp_conn *tcp_sw_conn; @@ -793,6 +793,7 @@ static int iscsi_sw_tcp_host_get_param(struct Scsi_Host *shost, switch (param) { case ISCSI_HOST_PARAM_IPADDRESS: + session = tcp_sw_host->session; if (!session) return -ENOTCONN; @@ -889,11 +890,13 @@ iscsi_sw_tcp_session_create(struct iscsi_endpoint *ep, uint16_t cmds_max, if (!cls_session) goto remove_host; session = cls_session->dd_data; - tcp_sw_host = iscsi_host_priv(shost); - tcp_sw_host->session = session; if (iscsi_tcp_r2tpool_alloc(session)) goto remove_session; + + /* We are now fully setup so expose the session to sysfs. */ + tcp_sw_host = iscsi_host_priv(shost); + tcp_sw_host->session = session; return cls_session; remove_session: @@ -913,10 +916,17 @@ static void iscsi_sw_tcp_session_destroy(struct iscsi_cls_session *cls_session) if (WARN_ON_ONCE(session->leadconn)) return; + iscsi_session_remove(cls_session); + /* + * Our get_host_param needs to access the session, so remove the + * host from sysfs before freeing the session to make sure userspace + * is no longer accessing the callout. + */ + iscsi_host_remove(shost); + iscsi_tcp_r2tpool_free(cls_session->dd_data); - iscsi_session_teardown(cls_session); - iscsi_host_remove(shost); + iscsi_session_free(cls_session); iscsi_host_free(shost); } diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index 176842a869f178e2c1714175fbead91e8720d493..39d5067f804bc22e185b02edefa1f7ebbd3e862e 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -2983,20 +2983,34 @@ iscsi_session_setup(struct iscsi_transport *iscsit, struct Scsi_Host *shost, } EXPORT_SYMBOL_GPL(iscsi_session_setup); +/* + * issi_session_remove - Remove session from iSCSI class. + */ +void iscsi_session_remove(struct iscsi_cls_session *cls_session) +{ + struct iscsi_session *session = cls_session->dd_data; + struct Scsi_Host *shost = session->host; + + iscsi_remove_session(cls_session); + /* + * host removal only has to wait for its children to be removed from + * sysfs, and iscsi_tcp needs to do iscsi_host_remove before freeing + * the session, so drop the session count here. + */ + iscsi_host_dec_session_cnt(shost); +} +EXPORT_SYMBOL_GPL(iscsi_session_remove); + /** - * iscsi_session_teardown - destroy session, host, and cls_session + * iscsi_session_free - Free iscsi session and it's resources * @cls_session: iscsi session */ -void iscsi_session_teardown(struct iscsi_cls_session *cls_session) +void iscsi_session_free(struct iscsi_cls_session *cls_session) { struct iscsi_session *session = cls_session->dd_data; struct module *owner = cls_session->transport->owner; - struct Scsi_Host *shost = session->host; iscsi_pool_free(&session->cmdpool); - - iscsi_remove_session(cls_session); - kfree(session->password); kfree(session->password_in); kfree(session->username); @@ -3012,10 +3026,19 @@ void iscsi_session_teardown(struct iscsi_cls_session *cls_session) kfree(session->discovery_parent_type); iscsi_free_session(cls_session); - - iscsi_host_dec_session_cnt(shost); module_put(owner); } +EXPORT_SYMBOL_GPL(iscsi_session_free); + +/** + * iscsi_session_teardown - destroy session and cls_session + * @cls_session: iscsi session + */ +void iscsi_session_teardown(struct iscsi_cls_session *cls_session) +{ + iscsi_session_remove(cls_session); + iscsi_session_free(cls_session); +} EXPORT_SYMBOL_GPL(iscsi_session_teardown); /** diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index 42db9c52208e60ff1a834ffc170645da845b68cb..8a24d144395de790852a1b1c68f285af935f1c88 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -1503,6 +1503,40 @@ void scsi_remove_device(struct scsi_device *sdev) } EXPORT_SYMBOL(scsi_remove_device); +/* Cancel the inflight async probe for scsi_device */ +static void __scsi_kill_devices(struct scsi_target *starget) +{ + struct Scsi_Host *shost = dev_to_shost(starget->dev.parent); + struct scsi_device *sdev, *to_put = NULL; + unsigned long flags; + + spin_lock_irqsave(shost->host_lock, flags); + list_for_each_entry(sdev, &shost->__devices, siblings) { + if (sdev->channel != starget->channel || + sdev->id != starget->id) + continue; + + if ((sdev->sdev_state != SDEV_DEL && + sdev->sdev_state != SDEV_CANCEL) || !sdev->is_visible) + continue; + if (!kobject_get_unless_zero(&sdev->sdev_gendev.kobj)) + continue; + spin_unlock_irqrestore(shost->host_lock, flags); + + if (to_put) + put_device(&to_put->sdev_gendev); + device_lock(&sdev->sdev_gendev); + kill_device(&sdev->sdev_gendev); + device_unlock(&sdev->sdev_gendev); + to_put = sdev; + + spin_lock_irqsave(shost->host_lock, flags); + } + spin_unlock_irqrestore(shost->host_lock, flags); + if (to_put) + put_device(&to_put->sdev_gendev); +} + static void __scsi_remove_target(struct scsi_target *starget) { struct Scsi_Host *shost = dev_to_shost(starget->dev.parent); @@ -1532,6 +1566,8 @@ static void __scsi_remove_target(struct scsi_target *starget) goto restart; } spin_unlock_irqrestore(shost->host_lock, flags); + + __scsi_kill_devices(starget); } /** @@ -1556,7 +1592,16 @@ void scsi_remove_target(struct device *dev) starget->state == STARGET_CREATED_REMOVE) continue; if (starget->dev.parent == dev || &starget->dev == dev) { - kref_get(&starget->reap_ref); + /* + * If the reference count is already zero, skip + * this target. Calling kref_get_unless_zero() if + * the reference count is zero is safe because + * scsi_target_destroy() will wait until the host + * lock has been released before freeing starget. + */ + if (!kref_get_unless_zero(&starget->reap_ref)) + continue; + if (starget->state == STARGET_CREATED) starget->state = STARGET_CREATED_REMOVE; else diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 1afd60fcd7723e04fea15354bcf6b38eb15e38b7..50a0e90e8af9b1ca854e834b579b8165a80b61c6 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -303,6 +303,22 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb, return desc; } +static ext4_fsblk_t ext4_valid_block_bitmap_padding(struct super_block *sb, + ext4_group_t block_group, + struct buffer_head *bh) +{ + ext4_grpblk_t next_zero_bit; + unsigned long bitmap_size = sb->s_blocksize * 8; + unsigned int offset = num_clusters_in_group(sb, block_group); + + if (bitmap_size <= offset) + return 0; + + next_zero_bit = ext4_find_next_zero_bit(bh->b_data, bitmap_size, offset); + + return (next_zero_bit < bitmap_size ? next_zero_bit : 0); +} + /* * Return the block number which was discovered to be invalid, or 0 if * the block bitmap is valid. @@ -401,6 +417,15 @@ static int ext4_validate_block_bitmap(struct super_block *sb, EXT4_GROUP_INFO_BBITMAP_CORRUPT); return -EFSCORRUPTED; } + blk = ext4_valid_block_bitmap_padding(sb, block_group, bh); + if (unlikely(blk != 0)) { + ext4_unlock_group(sb, block_group); + ext4_error(sb, "bg %u: block %llu: padding at end of block bitmap is not set", + block_group, blk); + ext4_mark_group_bitmap_corrupted(sb, block_group, + EXT4_GROUP_INFO_BBITMAP_CORRUPT); + return -EFSCORRUPTED; + } set_buffer_verified(bh); verified: ext4_unlock_group(sb, block_group); diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index c470a5fb2f205624326cbce5a487fe75875e46a1..7e5abaa31fea0924675e3fd0d4e6ba300782bf6d 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1459,6 +1459,7 @@ struct ext4_sb_info { unsigned int s_mount_opt2; unsigned long s_mount_flags; unsigned int s_def_mount_opt; + unsigned int s_def_mount_opt2; ext4_fsblk_t s_sb_block; atomic64_t s_resv_clusters; kuid_t s_resuid; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index dd3b72ba67e8c8709ccb2196067519b4cffaa006..da8bd8031119020c720b46190388ebd343eab7b7 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2585,7 +2585,7 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb, { struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; - int def_errors, def_mount_opt = sbi->s_def_mount_opt; + int def_errors; const struct mount_opts *m; char sep = nodefs ? '\n' : ','; @@ -2597,15 +2597,28 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb, for (m = ext4_mount_opts; m->token != Opt_err; m++) { int want_set = m->flags & MOPT_SET; + int opt_2 = m->flags & MOPT_2; + unsigned int mount_opt, def_mount_opt; + if (((m->flags & (MOPT_SET|MOPT_CLEAR)) == 0) || (m->flags & MOPT_CLEAR_ERR) || m->flags & MOPT_SKIP) continue; - if (!nodefs && !(m->mount_opt & (sbi->s_mount_opt ^ def_mount_opt))) - continue; /* skip if same as the default */ + + if (opt_2) { + mount_opt = sbi->s_mount_opt2; + def_mount_opt = sbi->s_def_mount_opt2; + } else { + mount_opt = sbi->s_mount_opt; + def_mount_opt = sbi->s_def_mount_opt; + } + /* skip if same as the default */ + if (!nodefs && !(m->mount_opt & (mount_opt ^ def_mount_opt))) + continue; + /* select Opt_noFoo vs Opt_Foo */ if ((want_set && - (sbi->s_mount_opt & m->mount_opt) != m->mount_opt) || - (!want_set && (sbi->s_mount_opt & m->mount_opt))) - continue; /* select Opt_noFoo vs Opt_Foo */ + (mount_opt & m->mount_opt) != m->mount_opt) || + (!want_set && (mount_opt & m->mount_opt))) + continue; SEQ_OPTS_PRINT("%s", token2str(m->token)); } @@ -2635,7 +2648,7 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb, if (nodefs || sbi->s_stripe) SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe); if (nodefs || EXT4_MOUNT_DATA_FLAGS & - (sbi->s_mount_opt ^ def_mount_opt)) { + (sbi->s_mount_opt ^ sbi->s_def_mount_opt)) { if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) SEQ_OPTS_PUTS("data=journal"); else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) @@ -4340,6 +4353,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) kfree(s_mount_opts); } sbi->s_def_mount_opt = sbi->s_mount_opt; + sbi->s_def_mount_opt2 = sbi->s_mount_opt2; if (!parse_options((char *) data, sb, &journal_devnum, &journal_ioprio, 0)) goto failed_mount; diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 019e5f019468a2b572a047d0cd428961a700dbc5..cc1e8bd4ae51368fda17776949cf0814761ee414 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -885,7 +885,7 @@ xfs_reclaim_inode( if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { xfs_iunpin_wait(ip); - xfs_iflush_abort(ip); + xfs_iflush_shutdown_abort(ip); goto reclaim; } if (xfs_ipincount(ip)) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index c9cf34a4fee8be5cb41f9dd12db4523bc6571ab2..623fa48912616e1978a0bde63f8bec31e9efa238 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -3723,7 +3723,7 @@ xfs_iflush_cluster( /* * We must use the safe variant here as on shutdown xfs_iflush_abort() - * can remove itself from the list. + * will remove itself from the list. */ list_for_each_entry_safe(lip, n, &bp->b_li_list, li_bio_list) { iip = (struct xfs_inode_log_item *)lip; diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 3aba4559469f1eaa6f5d08298200a6d13bcd6de9..fec0a75e8121a4aa59e443f0e77c1b75a9199158 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -517,10 +517,17 @@ xfs_inode_item_push( uint rval = XFS_ITEM_SUCCESS; int error; - ASSERT(iip->ili_item.li_buf); + if (!bp || (ip->i_flags & XFS_ISTALE)) { + /* + * Inode item/buffer is being being aborted due to cluster + * buffer deletion. Trigger a log force to have that operation + * completed and items removed from the AIL before the next push + * attempt. + */ + return XFS_ITEM_PINNED; + } - if (xfs_ipincount(ip) > 0 || xfs_buf_ispinned(bp) || - (ip->i_flags & XFS_ISTALE)) + if (xfs_ipincount(ip) > 0 || xfs_buf_ispinned(bp)) return XFS_ITEM_PINNED; if (xfs_iflags_test(ip, XFS_IFLUSHING)) @@ -796,46 +803,143 @@ xfs_buf_inode_io_fail( } /* - * This is the inode flushing abort routine. It is called when - * the filesystem is shutting down to clean up the inode state. It is - * responsible for removing the inode item from the AIL if it has not been - * re-logged and clearing the inode's flush state. + * Clear the inode logging fields so no more flushes are attempted. If we are + * on a buffer list, it is now safe to remove it because the buffer is + * guaranteed to be locked. The caller will drop the reference to the buffer + * the log item held. + */ +static void +xfs_iflush_abort_clean( + struct xfs_inode_log_item *iip) +{ + iip->ili_last_fields = 0; + iip->ili_fields = 0; + iip->ili_fsync_fields = 0; + iip->ili_flush_lsn = 0; + iip->ili_item.li_buf = NULL; + list_del_init(&iip->ili_item.li_bio_list); +} + +/* + * Abort flushing the inode from a context holding the cluster buffer locked. + * + * This is the normal runtime method of aborting writeback of an inode that is + * attached to a cluster buffer. It occurs when the inode and the backing + * cluster buffer have been freed (i.e. inode is XFS_ISTALE), or when cluster + * flushing or buffer IO completion encounters a log shutdown situation. + * + * If we need to abort inode writeback and we don't already hold the buffer + * locked, call xfs_iflush_shutdown_abort() instead as this should only ever be + * necessary in a shutdown situation. */ void xfs_iflush_abort( struct xfs_inode *ip) { struct xfs_inode_log_item *iip = ip->i_itemp; - struct xfs_buf *bp = NULL; + struct xfs_buf *bp; - if (iip) { - /* - * Clear the failed bit before removing the item from the AIL so - * xfs_trans_ail_delete() doesn't try to clear and release the - * buffer attached to the log item before we are done with it. - */ - clear_bit(XFS_LI_FAILED, &iip->ili_item.li_flags); - xfs_trans_ail_delete(&iip->ili_item, 0); + if (!iip) { + /* clean inode, nothing to do */ + xfs_iflags_clear(ip, XFS_IFLUSHING); + return; + } + + /* + * Remove the inode item from the AIL before we clear its internal + * state. Whilst the inode is in the AIL, it should have a valid buffer + * pointer for push operations to access - it is only safe to remove the + * inode from the buffer once it has been removed from the AIL. + * + * We also clear the failed bit before removing the item from the AIL + * as xfs_trans_ail_delete()->xfs_clear_li_failed() will release buffer + * references the inode item owns and needs to hold until we've fully + * aborted the inode log item and detached it from the buffer. + */ + clear_bit(XFS_LI_FAILED, &iip->ili_item.li_flags); + xfs_trans_ail_delete(&iip->ili_item, 0); + + /* + * Grab the inode buffer so can we release the reference the inode log + * item holds on it. + */ + spin_lock(&iip->ili_lock); + bp = iip->ili_item.li_buf; + xfs_iflush_abort_clean(iip); + spin_unlock(&iip->ili_lock); + xfs_iflags_clear(ip, XFS_IFLUSHING); + if (bp) + xfs_buf_rele(bp); +} + +/* + * Abort an inode flush in the case of a shutdown filesystem. This can be called + * from anywhere with just an inode reference and does not require holding the + * inode cluster buffer locked. If the inode is attached to a cluster buffer, + * it will grab and lock it safely, then abort the inode flush. + */ +void +xfs_iflush_shutdown_abort( + struct xfs_inode *ip) +{ + struct xfs_inode_log_item *iip = ip->i_itemp; + struct xfs_buf *bp; + + if (!iip) { + /* clean inode, nothing to do */ + xfs_iflags_clear(ip, XFS_IFLUSHING); + return; + } + + spin_lock(&iip->ili_lock); + bp = iip->ili_item.li_buf; + if (!bp) { + spin_unlock(&iip->ili_lock); + xfs_iflush_abort(ip); + return; + } + + /* + * We have to take a reference to the buffer so that it doesn't get + * freed when we drop the ili_lock and then wait to lock the buffer. + * We'll clean up the extra reference after we pick up the ili_lock + * again. + */ + xfs_buf_hold(bp); + spin_unlock(&iip->ili_lock); + xfs_buf_lock(bp); + + spin_lock(&iip->ili_lock); + if (!iip->ili_item.li_buf) { /* - * Clear the inode logging fields so no more flushes are - * attempted. + * Raced with another removal, hold the only reference + * to bp now. Inode should not be in the AIL now, so just clean + * up and return; */ - spin_lock(&iip->ili_lock); - iip->ili_last_fields = 0; - iip->ili_fields = 0; - iip->ili_fsync_fields = 0; - iip->ili_flush_lsn = 0; - bp = iip->ili_item.li_buf; - iip->ili_item.li_buf = NULL; - list_del_init(&iip->ili_item.li_bio_list); + ASSERT(list_empty(&iip->ili_item.li_bio_list)); + ASSERT(!test_bit(XFS_LI_IN_AIL, &iip->ili_item.li_flags)); + xfs_iflush_abort_clean(iip); spin_unlock(&iip->ili_lock); + xfs_iflags_clear(ip, XFS_IFLUSHING); + xfs_buf_relse(bp); + return; } - xfs_iflags_clear(ip, XFS_IFLUSHING); - if (bp) - xfs_buf_rele(bp); + + /* + * Got two references to bp. The first will get dropped by + * xfs_iflush_abort() when the item is removed from the buffer list, but + * we can't drop our reference until _abort() returns because we have to + * unlock the buffer as well. Hence we abort and then unlock and release + * our reference to the buffer. + */ + ASSERT(iip->ili_item.li_buf == bp); + spin_unlock(&iip->ili_lock); + xfs_iflush_abort(ip); + xfs_buf_relse(bp); } + /* * convert an xfs_inode_log_format struct from the old 32 bit version * (which can have different field alignments) to the native 64 bit version diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h index 403b45ab9aa283916e93bd5645d5665c958d0a9d..9c829cf5c839f2dea7193b133205fa9e588d2c7d 100644 --- a/fs/xfs/xfs_inode_item.h +++ b/fs/xfs/xfs_inode_item.h @@ -44,6 +44,7 @@ static inline int xfs_inode_clean(struct xfs_inode *ip) extern void xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *); extern void xfs_inode_item_destroy(struct xfs_inode *); extern void xfs_iflush_abort(struct xfs_inode *); +extern void xfs_iflush_shutdown_abort(struct xfs_inode *); extern int xfs_inode_item_format_convert(xfs_log_iovec_t *, struct xfs_inode_log_format *); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 171884608cada3145b9e3fc53c1804f0c9b9a3e1..b04613bc3ed57092ac056f1380b30f0da5c58812 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -2027,6 +2027,27 @@ static inline void bio_end_io_acct(struct bio *bio, unsigned long start_time) return disk_end_io_acct(bio->bi_disk, bio_op(bio), start_time); } +unsigned long disk_start_precise_io_acct(struct gendisk *disk, unsigned int op); +void disk_end_precise_io_acct(struct gendisk *disk, unsigned int sectors, + unsigned int op, unsigned long start_time); +unsigned long part_start_precise_io_acct(struct gendisk *disk, + struct hd_struct **part, + struct bio *bio); +void part_end_precise_io_acct(struct hd_struct *part, struct bio *bio, + unsigned long start_time); + +static inline unsigned long bio_start_precise_io_acct(struct bio *bio) +{ + return disk_start_precise_io_acct(bio->bi_disk, bio_op(bio)); +} + +static inline void bio_end_precise_io_acct(struct bio *bio, + unsigned long start_time) +{ + return disk_end_precise_io_acct(bio->bi_disk, bio_sectors(bio), + bio_op(bio), start_time); +} + int bdev_read_only(struct block_device *bdev); int set_blocksize(struct block_device *bdev, int size); diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h index 881e4762d626b4f9547bb69adbf297967a1aac07..fb7c5f82dcf33c98f5a423e0cad60f3324e60f80 100644 --- a/include/scsi/libiscsi.h +++ b/include/scsi/libiscsi.h @@ -404,6 +404,8 @@ extern int iscsi_host_get_max_scsi_cmds(struct Scsi_Host *shost, extern struct iscsi_cls_session * iscsi_session_setup(struct iscsi_transport *, struct Scsi_Host *shost, uint16_t, int, int, uint32_t, unsigned int); +void iscsi_session_remove(struct iscsi_cls_session *cls_session); +void iscsi_session_free(struct iscsi_cls_session *cls_session); extern void iscsi_session_teardown(struct iscsi_cls_session *); extern void iscsi_session_recovery_timedout(struct iscsi_cls_session *); extern int iscsi_set_param(struct iscsi_cls_conn *cls_conn, diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 4a9b16dfb5cdf75ca1451d9d9123c87adbea49e0..c64a654e213eabf519e6609301ba68fa38ebfadb 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2469,6 +2469,9 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, /* Mark the rest of the page with padding */ rb_event_set_padding(event); + /* Make sure the padding is visible before the write update */ + smp_wmb(); + /* Set the write back to the previous setting */ local_sub(length, &tail_page->write); return; @@ -2480,6 +2483,9 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, /* time delta must be non zero */ event->time_delta = 1; + /* Make sure the padding is visible before the tail_page->write update */ + smp_wmb(); + /* Set write to end of buffer */ length = (tail + length) - BUF_PAGE_SIZE; local_sub(length, &tail_page->write); @@ -4294,6 +4300,33 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) arch_spin_unlock(&cpu_buffer->lock); local_irq_restore(flags); + /* + * The writer has preempt disable, wait for it. But not forever + * Although, 1 second is pretty much "forever" + */ +#define USECS_WAIT 1000000 + for (nr_loops = 0; nr_loops < USECS_WAIT; nr_loops++) { + /* If the write is past the end of page, a writer is still updating it */ + if (likely(!reader || rb_page_write(reader) <= BUF_PAGE_SIZE)) + break; + + udelay(1); + + /* Get the latest version of the reader write value */ + smp_rmb(); + } + + /* The writer is not moving forward? Something is wrong */ + if (RB_WARN_ON(cpu_buffer, nr_loops == USECS_WAIT)) + reader = NULL; + + /* + * Make sure we see any padding after the write update + * (see rb_reset_tail()) + */ + smp_rmb(); + + return reader; } diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c index 59e653b528b1faec6c6fcf73f0dd42633880e08d..6b95d3ba8fe1cecf4d75956bf87546b1f1a81c4f 100644 --- a/net/sctp/bind_addr.c +++ b/net/sctp/bind_addr.c @@ -73,6 +73,12 @@ int sctp_bind_addr_copy(struct net *net, struct sctp_bind_addr *dest, } } + /* If somehow no addresses were found that can be used with this + * scope, it's an error. + */ + if (list_empty(&dest->address_list)) + error = -ENETUNREACH; + out: if (error) sctp_bind_addr_clean(dest);