diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 53ef53d5b4143c3d50110c3d237b39e621c1e45e..b2ce17f29c75ea278f5f255e1b08a2998822114a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4692,12 +4692,11 @@ static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu, { unsigned long val; + memset(dbgregs, 0, sizeof(*dbgregs)); memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db)); kvm_get_dr(vcpu, 6, &val); dbgregs->dr6 = val; dbgregs->dr7 = vcpu->arch.dr7; - dbgregs->flags = 0; - memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved)); } static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index 447e2a688113bc813dce802d62b09d7dc636465a..432aeb7051fa74f819e8a7a030355ecc3210b459 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -41,8 +41,8 @@ #define CNTACR_RWVT BIT(4) #define CNTACR_RWPT BIT(5) -#define CNTVCT_LO 0x00 -#define CNTPCT_LO 0x08 +#define CNTPCT_LO 0x00 +#define CNTVCT_LO 0x08 #define CNTFRQ 0x10 #define CNTP_CVAL_LO 0x20 #define CNTP_CTL 0x2c diff --git a/drivers/nvme/host/trace.h b/drivers/nvme/host/trace.h index aa8b0f86b2be1f3f4056748b31856978e37b326b..d4eacb0b43ce718d363620f632efde73ce31e4af 100644 --- a/drivers/nvme/host/trace.h +++ b/drivers/nvme/host/trace.h @@ -98,7 +98,7 @@ TRACE_EVENT(nvme_complete_rq, TP_fast_assign( __entry->ctrl_id = nvme_req(req)->ctrl->instance; __entry->qid = nvme_req_qid(req); - __entry->cid = nvme_req(req)->cmd->common.command_id; + __entry->cid = nvme_cid(req); __entry->result = le64_to_cpu(nvme_req(req)->result.u64); __entry->retries = nvme_req(req)->retries; __entry->flags = nvme_req(req)->flags; diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c index fe8a5e5c0df84f2c7a057f7fe46a42b3fd697491..bf0b3178f84d075e3d3161233b990aba3a71409f 100644 --- a/drivers/scsi/device_handler/scsi_dh_alua.c +++ b/drivers/scsi/device_handler/scsi_dh_alua.c @@ -1036,10 +1036,12 @@ static int alua_activate(struct scsi_device *sdev, rcu_read_unlock(); mutex_unlock(&h->init_mutex); - if (alua_rtpg_queue(pg, sdev, qdata, true)) + if (alua_rtpg_queue(pg, sdev, qdata, true)) { fn = NULL; - else + } else { + kfree(qdata); err = SCSI_DH_DEV_OFFLINED; + } kref_put(&pg->kref, release_port_group); out: if (fn) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index f1d36671bc2be2b7628c52f53f42451de4bdc03b..5d5ae6f44510a712aa45f622bbd0fb6ee92cbeea 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1618,7 +1618,8 @@ struct ext4_sb_info { /* * Barrier between writepages ops and changing any inode's JOURNAL_DATA - * or EXTENTS flag. + * or EXTENTS flag or between writepages ops and changing DELALLOC or + * DIOREAD_NOLOCK mount options on remount. */ struct percpu_rw_semaphore s_writepages_rwsem; struct dax_device *s_daxdev; diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 6f78f7fbf419fb9958ee71b86a767cb447702d79..32de10ef895b052ee057f2d0894155fe1bfc8b96 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -190,8 +190,9 @@ ext4_extending_io(struct inode *inode, loff_t offset, size_t len) return false; } -/* Is IO overwriting allocated and initialized blocks? */ -static bool ext4_overwrite_io(struct inode *inode, loff_t pos, loff_t len) +/* Is IO overwriting allocated or initialized blocks? */ +static bool ext4_overwrite_io(struct inode *inode, + loff_t pos, loff_t len, bool *unwritten) { struct ext4_map_blocks map; unsigned int blkbits = inode->i_blkbits; @@ -205,12 +206,15 @@ static bool ext4_overwrite_io(struct inode *inode, loff_t pos, loff_t len) blklen = map.m_len; err = ext4_map_blocks(NULL, inode, &map, 0); + if (err != blklen) + return false; /* * 'err==len' means that all of the blocks have been preallocated, - * regardless of whether they have been initialized or not. To exclude - * unwritten extents, we need to check m_flags. + * regardless of whether they have been initialized or not. We need to + * check m_flags to distinguish the unwritten extents. */ - return err == blklen && (map.m_flags & EXT4_MAP_MAPPED); + *unwritten = !(map.m_flags & EXT4_MAP_MAPPED); + return true; } static ssize_t ext4_generic_write_checks(struct kiocb *iocb, @@ -421,11 +425,16 @@ static const struct iomap_dio_ops ext4_dio_write_ops = { * - For extending writes case we don't take the shared lock, since it requires * updating inode i_disksize and/or orphan handling with exclusive lock. * - * - shared locking will only be true mostly with overwrites. Otherwise we will - * switch to exclusive i_rwsem lock. + * - shared locking will only be true mostly with overwrites, including + * initialized blocks and unwritten blocks. For overwrite unwritten blocks + * we protect splitting extents by i_data_sem in ext4_inode_info, so we can + * also release exclusive i_rwsem lock. + * + * - Otherwise we will switch to exclusive i_rwsem lock. */ static ssize_t ext4_dio_write_checks(struct kiocb *iocb, struct iov_iter *from, - bool *ilock_shared, bool *extend) + bool *ilock_shared, bool *extend, + bool *unwritten) { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); @@ -449,7 +458,7 @@ static ssize_t ext4_dio_write_checks(struct kiocb *iocb, struct iov_iter *from, * in file_modified(). */ if (*ilock_shared && (!IS_NOSEC(inode) || *extend || - !ext4_overwrite_io(inode, offset, count))) { + !ext4_overwrite_io(inode, offset, count, unwritten))) { if (iocb->ki_flags & IOCB_NOWAIT) { ret = -EAGAIN; goto out; @@ -481,7 +490,7 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from) loff_t offset = iocb->ki_pos; size_t count = iov_iter_count(from); const struct iomap_ops *iomap_ops = &ext4_iomap_ops; - bool extend = false, unaligned_io = false; + bool extend = false, unaligned_io = false, unwritten = false; bool ilock_shared = true; /* @@ -524,7 +533,8 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from) return ext4_buffered_write_iter(iocb, from); } - ret = ext4_dio_write_checks(iocb, from, &ilock_shared, &extend); + ret = ext4_dio_write_checks(iocb, from, + &ilock_shared, &extend, &unwritten); if (ret <= 0) return ret; @@ -568,7 +578,7 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from) ext4_journal_stop(handle); } - if (ilock_shared) + if (ilock_shared && !unwritten) iomap_ops = &ext4_iomap_overwrite_ops; ret = iomap_dio_rw(iocb, from, iomap_ops, &ext4_dio_write_ops, is_sync_kiocb(iocb) || unaligned_io || extend); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 9e34f90552eef71dc3b13272573ac351fea63666..3278d46a7d652f6f6344b3a29e2f31d8cd4d02bf 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -5984,10 +5984,20 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) vfs_flags = SB_LAZYTIME | SB_I_VERSION; sb->s_flags = (sb->s_flags & ~vfs_flags) | (*flags & vfs_flags); + /* + * Changing the DIOREAD_NOLOCK or DELALLOC mount options may cause + * two calls to ext4_should_dioread_nolock() to return inconsistent + * values, triggering WARN_ON in ext4_add_complete_io(). we grab + * here s_writepages_rwsem to avoid race between writepages ops and + * remount. + */ + percpu_down_write(&sbi->s_writepages_rwsem); if (!parse_options(data, sb, NULL, &journal_ioprio, 1)) { err = -EINVAL; + percpu_up_write(&sbi->s_writepages_rwsem); goto restore_opts; } + percpu_up_write(&sbi->s_writepages_rwsem); if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^ test_opt(sb, JOURNAL_CHECKSUM)) { @@ -6205,6 +6215,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) return 0; restore_opts: + percpu_down_write(&sbi->s_writepages_rwsem); sb->s_flags = old_sb_flags; sbi->s_mount_opt = old_opts.s_mount_opt; sbi->s_mount_opt2 = old_opts.s_mount_opt2; @@ -6213,6 +6224,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) sbi->s_commit_interval = old_opts.s_commit_interval; sbi->s_min_batch_time = old_opts.s_min_batch_time; sbi->s_max_batch_time = old_opts.s_max_batch_time; + percpu_up_write(&sbi->s_writepages_rwsem); + if (!test_opt(sb, BLOCK_VALIDITY) && sbi->s_system_blks) ext4_release_system_zone(sb); #ifdef CONFIG_QUOTA diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 6f54f639913fc81333ff21eed7f21ec7911d03ba..e18f3c089eeae680217ebeff710b6146ab60ea0f 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -2983,6 +2983,7 @@ xfs_alloc_read_agf( struct xfs_agf *agf; /* ag freelist header */ struct xfs_perag *pag; /* per allocation group data */ int error; + int allocbt_blks; trace_xfs_alloc_read_agf(mp, agno); @@ -3013,6 +3014,19 @@ xfs_alloc_read_agf( pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level); pag->pagf_init = 1; pag->pagf_agflreset = xfs_agfl_needs_reset(mp, agf); + + /* + * Update the in-core allocbt counter. Filter out the rmapbt + * subset of the btreeblks counter because the rmapbt is managed + * by perag reservation. Subtract one for the rmapbt root block + * because the rmap counter includes it while the btreeblks + * counter only tracks non-root blocks. + */ + allocbt_blks = pag->pagf_btreeblks; + if (xfs_sb_version_hasrmapbt(&mp->m_sb)) + allocbt_blks -= be32_to_cpu(agf->agf_rmap_blocks) - 1; + if (allocbt_blks > 0) + atomic64_add(allocbt_blks, &mp->m_allocbt_blks); } #ifdef DEBUG else if (!XFS_FORCED_SHUTDOWN(mp)) { diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c index dbe302d1cb8de526dc09eb2356683b0a470d0242..a43e4c50e69b7ba9bd828e742125c2b75f4334d3 100644 --- a/fs/xfs/libxfs/xfs_alloc_btree.c +++ b/fs/xfs/libxfs/xfs_alloc_btree.c @@ -71,6 +71,7 @@ xfs_allocbt_alloc_block( return 0; } + atomic64_inc(&cur->bc_mp->m_allocbt_blks); xfs_extent_busy_reuse(cur->bc_mp, cur->bc_ag.agno, bno, 1, false); new->s = cpu_to_be32(bno); @@ -94,6 +95,7 @@ xfs_allocbt_free_block( if (error) return error; + atomic64_dec(&cur->bc_mp->m_allocbt_blks); xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1, XFS_EXTENT_BUSY_SKIP_DISCARD); return 0; diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 81cb2b3b20f2df0a9795815fd2267024369ee524..0e899eafbc1634bf9faa44d1f3b11100f73833e3 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -291,7 +291,7 @@ xfs_fs_counts( cnt->allocino = percpu_counter_read_positive(&mp->m_icount); cnt->freeino = percpu_counter_read_positive(&mp->m_ifree); cnt->freedata = percpu_counter_read_positive(&mp->m_fdblocks) - - mp->m_alloc_set_aside; + xfs_fdblocks_unavailable(mp); spin_lock(&mp->m_sb_lock); cnt->freertx = mp->m_sb.sb_frextents; diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 6504503f07b3aae241e4a8601424b13cc9b0ffea..743c7ffdc31683d14af8e479de08cdaf84f4496a 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1244,6 +1244,7 @@ xfs_mod_fdblocks( int64_t lcounter; long long res_used; s32 batch; + uint64_t set_aside; if (delta > 0) { /* @@ -1283,8 +1284,20 @@ xfs_mod_fdblocks( else batch = XFS_FDBLOCKS_BATCH; + /* + * Set aside allocbt blocks because these blocks are tracked as free + * space but not available for allocation. Technically this means that a + * single reservation cannot consume all remaining free space, but the + * ratio of allocbt blocks to usable free blocks should be rather small. + * The tradeoff without this is that filesystems that maintain high + * perag block reservations can over reserve physical block availability + * and fail physical allocation, which leads to much more serious + * problems (i.e. transaction abort, pagecache discards, etc.) than + * slightly premature -ENOSPC. + */ + set_aside = xfs_fdblocks_unavailable(mp); percpu_counter_add_batch(&mp->m_fdblocks, delta, batch); - if (__percpu_counter_compare(&mp->m_fdblocks, mp->m_alloc_set_aside, + if (__percpu_counter_compare(&mp->m_fdblocks, set_aside, XFS_FDBLOCKS_BATCH) >= 0) { /* we had space! */ return 0; diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 91a66b7b88151f1e825f98e9569248257a1208f4..3dc54ed4315acb9a0a46cfd2385f96cd0bd7cb43 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -187,6 +187,12 @@ typedef struct xfs_mount { * extents or anything related to the rt device. */ struct percpu_counter m_delalloc_blks; + /* + * Global count of allocation btree blocks in use across all AGs. Only + * used when perag reservation is enabled. Helps prevent block + * reservation from attempting to reserve allocation btree blocks. + */ + atomic64_t m_allocbt_blks; struct radix_tree_root m_perag_tree; /* per-ag accounting info */ spinlock_t m_perag_lock; /* lock for m_perag_tree */ @@ -472,7 +478,7 @@ static inline uint64_t xfs_fdblocks_unavailable( struct xfs_mount *mp) { - return mp->m_alloc_set_aside; + return mp->m_alloc_set_aside + atomic64_read(&mp->m_allocbt_blks); } extern int xfs_mod_fdblocks(struct xfs_mount *mp, int64_t delta, diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index bc4ea6f13dad56a0aa4dc88cc84cf2e228c09b39..9b4ff8247de29c41bca6e106ffef778bcc3128f8 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -809,7 +809,8 @@ xfs_fs_statfs( spin_unlock(&mp->m_sb_lock); /* make sure statp->f_bfree does not underflow */ - statp->f_bfree = max_t(int64_t, fdblocks - mp->m_alloc_set_aside, 0); + statp->f_bfree = max_t(int64_t, 0, + fdblocks - xfs_fdblocks_unavailable(mp)); statp->f_bavail = statp->f_bfree; fakeinos = XFS_FSB_TO_INO(mp, statp->f_bfree); diff --git a/include/linux/mem_reliable.h b/include/linux/mem_reliable.h index ddadf28037429a37473bced527391530e7886921..79228a1b2f0b8f21641794133baf499bbd3dab08 100644 --- a/include/linux/mem_reliable.h +++ b/include/linux/mem_reliable.h @@ -135,6 +135,14 @@ static inline void reliable_page_counter(struct page *page, if (page_reliable(page)) atomic_long_add(val, &mm->reliable_nr_page); } + +static inline void reliable_clear_page_counter(struct mm_struct *mm) +{ + if (!mem_reliable_is_enabled()) + return; + + atomic_long_set(&mm->reliable_nr_page, 0); +} #else #define reliable_enabled 0 #define pagecache_use_reliable_mem 0 @@ -178,6 +186,7 @@ static inline void reliable_page_counter(struct page *page, struct mm_struct *mm, int val) {} static inline void reliable_report_usage(struct seq_file *m, struct mm_struct *mm) {} +static inline void reliable_clear_page_counter(struct mm_struct *mm) {} #endif #endif diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 73c6993554706e7e66376a158feb706474e961a9..8271b10bc30e4a5745ae61b9740579b299aa244f 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -1329,11 +1329,6 @@ void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc, void mini_qdisc_pair_block_init(struct mini_Qdisc_pair *miniqp, struct tcf_block *block); -static inline int skb_tc_reinsert(struct sk_buff *skb, struct tcf_result *res) -{ - return res->ingress ? netif_receive_skb(skb) : dev_queue_xmit(skb); -} - /* Make sure qdisc is no longer in SCHED state. */ static inline void qdisc_synchronize(const struct Qdisc *q) { diff --git a/kernel/fork.c b/kernel/fork.c index f0aa2da990b827330b17f7d6752e6b82e17310a3..908c4e2e7896e821101b9048de46d337335eea5d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1049,6 +1049,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, atomic_set(&mm->has_pinned, 0); atomic64_set(&mm->pinned_vm, 0); memset(&mm->rss_stat, 0, sizeof(mm->rss_stat)); + reliable_clear_page_counter(mm); spin_lock_init(&mm->page_table_lock); spin_lock_init(&mm->arg_lock); mm_init_cpumask(mm); diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 24d561d8d9c97efecf668c376a873342f8d6d74c..296af520817d5f8c07ab6845fc375663aa451b1d 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -28,8 +28,8 @@ static LIST_HEAD(mirred_list); static DEFINE_SPINLOCK(mirred_list_lock); -#define MIRRED_RECURSION_LIMIT 4 -static DEFINE_PER_CPU(unsigned int, mirred_rec_level); +#define MIRRED_NEST_LIMIT 4 +static DEFINE_PER_CPU(unsigned int, mirred_nest_level); static bool tcf_mirred_is_act_redirect(int action) { @@ -206,6 +206,25 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, return err; } +static bool is_mirred_nested(void) +{ + return unlikely(__this_cpu_read(mirred_nest_level) > 1); +} + +static int tcf_mirred_forward(bool want_ingress, struct sk_buff *skb) +{ + int err; + + if (!want_ingress) + err = dev_queue_xmit(skb); + else if (is_mirred_nested()) + err = netif_rx(skb); + else + err = netif_receive_skb(skb); + + return err; +} + static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { @@ -213,7 +232,7 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a, struct sk_buff *skb2 = skb; bool m_mac_header_xmit; struct net_device *dev; - unsigned int rec_level; + unsigned int nest_level; int retval, err = 0; bool use_reinsert; bool want_ingress; @@ -224,11 +243,11 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a, int mac_len; bool at_nh; - rec_level = __this_cpu_inc_return(mirred_rec_level); - if (unlikely(rec_level > MIRRED_RECURSION_LIMIT)) { + nest_level = __this_cpu_inc_return(mirred_nest_level); + if (unlikely(nest_level > MIRRED_NEST_LIMIT)) { net_warn_ratelimited("Packet exceeded mirred recursion limit on dev %s\n", netdev_name(skb->dev)); - __this_cpu_dec(mirred_rec_level); + __this_cpu_dec(mirred_nest_level); return TC_ACT_SHOT; } @@ -295,25 +314,22 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a, /* let's the caller reinsert the packet, if possible */ if (use_reinsert) { res->ingress = want_ingress; - if (skb_tc_reinsert(skb, res)) + err = tcf_mirred_forward(res->ingress, skb); + if (err) tcf_action_inc_overlimit_qstats(&m->common); - __this_cpu_dec(mirred_rec_level); + __this_cpu_dec(mirred_nest_level); return TC_ACT_CONSUMED; } } - if (!want_ingress) - err = dev_queue_xmit(skb2); - else - err = netif_receive_skb(skb2); - + err = tcf_mirred_forward(want_ingress, skb2); if (err) { out: tcf_action_inc_overlimit_qstats(&m->common); if (tcf_mirred_is_act_redirect(m_eaction)) retval = TC_ACT_SHOT; } - __this_cpu_dec(mirred_rec_level); + __this_cpu_dec(mirred_nest_level); return retval; } diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 54e2309315eb56020ed2620d427c623b8be8a33f..b12a3502fca5509098728728c6010bc0ee60beac 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -507,7 +507,8 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt, list_for_each_entry(stab, &qdisc_stab_list, list) { if (memcmp(&stab->szopts, s, sizeof(*s))) continue; - if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16))) + if (tsize > 0 && + memcmp(stab->data, tab, flex_array_size(stab, data, tsize))) continue; stab->refcnt++; return stab; @@ -519,14 +520,14 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt, return ERR_PTR(-EINVAL); } - stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL); + stab = kmalloc(struct_size(stab, data, tsize), GFP_KERNEL); if (!stab) return ERR_PTR(-ENOMEM); stab->refcnt = 1; stab->szopts = *s; if (tsize > 0) - memcpy(stab->data, tab, tsize * sizeof(u16)); + memcpy(stab->data, tab, flex_array_size(stab, data, tsize)); list_add_tail(&stab->list, &qdisc_stab_list); diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index adc5407fd5d589f3194e9d82e8f2983231bf6d2f..b72b308fe406baebd6c5db4a08238967c5709df2 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -785,7 +785,7 @@ static int get_dist_table(struct Qdisc *sch, struct disttable **tbl, if (!n || n > NETEM_DIST_MAX) return -EINVAL; - d = kvmalloc(sizeof(struct disttable) + n * sizeof(s16), GFP_KERNEL); + d = kvmalloc(struct_size(d, table, n), GFP_KERNEL); if (!d) return -ENOMEM; diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh index d9eca227136bb6d4a1c7921251190c65d76fcb04..e396e24d30e087e1e0c742be93e405593ae32390 100755 --- a/tools/testing/selftests/net/forwarding/tc_actions.sh +++ b/tools/testing/selftests/net/forwarding/tc_actions.sh @@ -3,7 +3,8 @@ ALL_TESTS="gact_drop_and_ok_test mirred_egress_redirect_test \ mirred_egress_mirror_test matchall_mirred_egress_mirror_test \ - gact_trap_test" + gact_trap_test \ + mirred_egress_to_ingress_tcp_test" NUM_NETIFS=4 source tc_common.sh source lib.sh @@ -153,6 +154,52 @@ gact_trap_test() log_test "trap ($tcflags)" } +mirred_egress_to_ingress_tcp_test() +{ + local tmpfile=$(mktemp) tmpfile1=$(mktemp) + + RET=0 + dd conv=sparse status=none if=/dev/zero bs=1M count=2 of=$tmpfile + tc filter add dev $h1 protocol ip pref 100 handle 100 egress flower \ + $tcflags ip_proto tcp src_ip 192.0.2.1 dst_ip 192.0.2.2 \ + action ct commit nat src addr 192.0.2.2 pipe \ + action ct clear pipe \ + action ct commit nat dst addr 192.0.2.1 pipe \ + action ct clear pipe \ + action skbedit ptype host pipe \ + action mirred ingress redirect dev $h1 + tc filter add dev $h1 protocol ip pref 101 handle 101 egress flower \ + $tcflags ip_proto icmp \ + action mirred ingress redirect dev $h1 + tc filter add dev $h1 protocol ip pref 102 handle 102 ingress flower \ + ip_proto icmp \ + action drop + + ip vrf exec v$h1 nc --recv-only -w10 -l -p 12345 -o $tmpfile1 & + local rpid=$! + ip vrf exec v$h1 nc -w1 --send-only 192.0.2.2 12345 <$tmpfile + wait -n $rpid + cmp -s $tmpfile $tmpfile1 + check_err $? "server output check failed" + + $MZ $h1 -c 10 -p 64 -a $h1mac -b $h1mac -A 192.0.2.1 -B 192.0.2.1 \ + -t icmp "ping,id=42,seq=5" -q + tc_check_packets "dev $h1 egress" 101 10 + check_err $? "didn't mirred redirect ICMP" + tc_check_packets "dev $h1 ingress" 102 10 + check_err $? "didn't drop mirred ICMP" + local overlimits=$(tc_rule_stats_get ${h1} 101 egress .overlimits) + test ${overlimits} = 10 + check_err $? "wrong overlimits, expected 10 got ${overlimits}" + + tc filter del dev $h1 egress protocol ip pref 100 handle 100 flower + tc filter del dev $h1 egress protocol ip pref 101 handle 101 flower + tc filter del dev $h1 ingress protocol ip pref 102 handle 102 flower + + rm -f $tmpfile $tmpfile1 + log_test "mirred_egress_to_ingress_tcp ($tcflags)" +} + setup_prepare() { h1=${NETIFS[p1]}