未验证 提交 7a9bd93b 编写于 作者: O openeuler-ci-bot 提交者: Gitee

!563 Backport CVEs and bugfixes

Merge Pull Request from: @zhangjialin11 
 
Pull new CVEs:
CVE-2023-1513
CVE-2022-4269

net bugfixes from Zhengchao Shao
ext4 bugfixes from Zhihao Cheng and Baokun Li
mm bugfix from Ma Wupeng
timer bugfix from Yu Liao
nvme bugfix from Li Lingfeng
xfs bugfixes from Zhihao Cheng
scsi bugfix from Yu Kuai 
 
Link:https://gitee.com/openeuler/kernel/pulls/563 

Reviewed-by: Zheng Zengkai <zhengzengkai@huawei.com> 
Signed-off-by: Zheng Zengkai <zhengzengkai@huawei.com> 
...@@ -4692,12 +4692,11 @@ static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu, ...@@ -4692,12 +4692,11 @@ static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
{ {
unsigned long val; unsigned long val;
memset(dbgregs, 0, sizeof(*dbgregs));
memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db)); memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
kvm_get_dr(vcpu, 6, &val); kvm_get_dr(vcpu, 6, &val);
dbgregs->dr6 = val; dbgregs->dr6 = val;
dbgregs->dr7 = vcpu->arch.dr7; dbgregs->dr7 = vcpu->arch.dr7;
dbgregs->flags = 0;
memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved));
} }
static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
......
...@@ -41,8 +41,8 @@ ...@@ -41,8 +41,8 @@
#define CNTACR_RWVT BIT(4) #define CNTACR_RWVT BIT(4)
#define CNTACR_RWPT BIT(5) #define CNTACR_RWPT BIT(5)
#define CNTVCT_LO 0x00 #define CNTPCT_LO 0x00
#define CNTPCT_LO 0x08 #define CNTVCT_LO 0x08
#define CNTFRQ 0x10 #define CNTFRQ 0x10
#define CNTP_CVAL_LO 0x20 #define CNTP_CVAL_LO 0x20
#define CNTP_CTL 0x2c #define CNTP_CTL 0x2c
......
...@@ -98,7 +98,7 @@ TRACE_EVENT(nvme_complete_rq, ...@@ -98,7 +98,7 @@ TRACE_EVENT(nvme_complete_rq,
TP_fast_assign( TP_fast_assign(
__entry->ctrl_id = nvme_req(req)->ctrl->instance; __entry->ctrl_id = nvme_req(req)->ctrl->instance;
__entry->qid = nvme_req_qid(req); __entry->qid = nvme_req_qid(req);
__entry->cid = nvme_req(req)->cmd->common.command_id; __entry->cid = nvme_cid(req);
__entry->result = le64_to_cpu(nvme_req(req)->result.u64); __entry->result = le64_to_cpu(nvme_req(req)->result.u64);
__entry->retries = nvme_req(req)->retries; __entry->retries = nvme_req(req)->retries;
__entry->flags = nvme_req(req)->flags; __entry->flags = nvme_req(req)->flags;
......
...@@ -1036,10 +1036,12 @@ static int alua_activate(struct scsi_device *sdev, ...@@ -1036,10 +1036,12 @@ static int alua_activate(struct scsi_device *sdev,
rcu_read_unlock(); rcu_read_unlock();
mutex_unlock(&h->init_mutex); mutex_unlock(&h->init_mutex);
if (alua_rtpg_queue(pg, sdev, qdata, true)) if (alua_rtpg_queue(pg, sdev, qdata, true)) {
fn = NULL; fn = NULL;
else } else {
kfree(qdata);
err = SCSI_DH_DEV_OFFLINED; err = SCSI_DH_DEV_OFFLINED;
}
kref_put(&pg->kref, release_port_group); kref_put(&pg->kref, release_port_group);
out: out:
if (fn) if (fn)
......
...@@ -1618,7 +1618,8 @@ struct ext4_sb_info { ...@@ -1618,7 +1618,8 @@ struct ext4_sb_info {
/* /*
* Barrier between writepages ops and changing any inode's JOURNAL_DATA * Barrier between writepages ops and changing any inode's JOURNAL_DATA
* or EXTENTS flag. * or EXTENTS flag or between writepages ops and changing DELALLOC or
* DIOREAD_NOLOCK mount options on remount.
*/ */
struct percpu_rw_semaphore s_writepages_rwsem; struct percpu_rw_semaphore s_writepages_rwsem;
struct dax_device *s_daxdev; struct dax_device *s_daxdev;
......
...@@ -190,8 +190,9 @@ ext4_extending_io(struct inode *inode, loff_t offset, size_t len) ...@@ -190,8 +190,9 @@ ext4_extending_io(struct inode *inode, loff_t offset, size_t len)
return false; return false;
} }
/* Is IO overwriting allocated and initialized blocks? */ /* Is IO overwriting allocated or initialized blocks? */
static bool ext4_overwrite_io(struct inode *inode, loff_t pos, loff_t len) static bool ext4_overwrite_io(struct inode *inode,
loff_t pos, loff_t len, bool *unwritten)
{ {
struct ext4_map_blocks map; struct ext4_map_blocks map;
unsigned int blkbits = inode->i_blkbits; unsigned int blkbits = inode->i_blkbits;
...@@ -205,12 +206,15 @@ static bool ext4_overwrite_io(struct inode *inode, loff_t pos, loff_t len) ...@@ -205,12 +206,15 @@ static bool ext4_overwrite_io(struct inode *inode, loff_t pos, loff_t len)
blklen = map.m_len; blklen = map.m_len;
err = ext4_map_blocks(NULL, inode, &map, 0); err = ext4_map_blocks(NULL, inode, &map, 0);
if (err != blklen)
return false;
/* /*
* 'err==len' means that all of the blocks have been preallocated, * 'err==len' means that all of the blocks have been preallocated,
* regardless of whether they have been initialized or not. To exclude * regardless of whether they have been initialized or not. We need to
* unwritten extents, we need to check m_flags. * check m_flags to distinguish the unwritten extents.
*/ */
return err == blklen && (map.m_flags & EXT4_MAP_MAPPED); *unwritten = !(map.m_flags & EXT4_MAP_MAPPED);
return true;
} }
static ssize_t ext4_generic_write_checks(struct kiocb *iocb, static ssize_t ext4_generic_write_checks(struct kiocb *iocb,
...@@ -421,11 +425,16 @@ static const struct iomap_dio_ops ext4_dio_write_ops = { ...@@ -421,11 +425,16 @@ static const struct iomap_dio_ops ext4_dio_write_ops = {
* - For extending writes case we don't take the shared lock, since it requires * - For extending writes case we don't take the shared lock, since it requires
* updating inode i_disksize and/or orphan handling with exclusive lock. * updating inode i_disksize and/or orphan handling with exclusive lock.
* *
* - shared locking will only be true mostly with overwrites. Otherwise we will * - shared locking will only be true mostly with overwrites, including
* switch to exclusive i_rwsem lock. * initialized blocks and unwritten blocks. For overwrite unwritten blocks
* we protect splitting extents by i_data_sem in ext4_inode_info, so we can
* also release exclusive i_rwsem lock.
*
* - Otherwise we will switch to exclusive i_rwsem lock.
*/ */
static ssize_t ext4_dio_write_checks(struct kiocb *iocb, struct iov_iter *from, static ssize_t ext4_dio_write_checks(struct kiocb *iocb, struct iov_iter *from,
bool *ilock_shared, bool *extend) bool *ilock_shared, bool *extend,
bool *unwritten)
{ {
struct file *file = iocb->ki_filp; struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file); struct inode *inode = file_inode(file);
...@@ -449,7 +458,7 @@ static ssize_t ext4_dio_write_checks(struct kiocb *iocb, struct iov_iter *from, ...@@ -449,7 +458,7 @@ static ssize_t ext4_dio_write_checks(struct kiocb *iocb, struct iov_iter *from,
* in file_modified(). * in file_modified().
*/ */
if (*ilock_shared && (!IS_NOSEC(inode) || *extend || if (*ilock_shared && (!IS_NOSEC(inode) || *extend ||
!ext4_overwrite_io(inode, offset, count))) { !ext4_overwrite_io(inode, offset, count, unwritten))) {
if (iocb->ki_flags & IOCB_NOWAIT) { if (iocb->ki_flags & IOCB_NOWAIT) {
ret = -EAGAIN; ret = -EAGAIN;
goto out; goto out;
...@@ -481,7 +490,7 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from) ...@@ -481,7 +490,7 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
loff_t offset = iocb->ki_pos; loff_t offset = iocb->ki_pos;
size_t count = iov_iter_count(from); size_t count = iov_iter_count(from);
const struct iomap_ops *iomap_ops = &ext4_iomap_ops; const struct iomap_ops *iomap_ops = &ext4_iomap_ops;
bool extend = false, unaligned_io = false; bool extend = false, unaligned_io = false, unwritten = false;
bool ilock_shared = true; bool ilock_shared = true;
/* /*
...@@ -524,7 +533,8 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from) ...@@ -524,7 +533,8 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
return ext4_buffered_write_iter(iocb, from); return ext4_buffered_write_iter(iocb, from);
} }
ret = ext4_dio_write_checks(iocb, from, &ilock_shared, &extend); ret = ext4_dio_write_checks(iocb, from,
&ilock_shared, &extend, &unwritten);
if (ret <= 0) if (ret <= 0)
return ret; return ret;
...@@ -568,7 +578,7 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from) ...@@ -568,7 +578,7 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
ext4_journal_stop(handle); ext4_journal_stop(handle);
} }
if (ilock_shared) if (ilock_shared && !unwritten)
iomap_ops = &ext4_iomap_overwrite_ops; iomap_ops = &ext4_iomap_overwrite_ops;
ret = iomap_dio_rw(iocb, from, iomap_ops, &ext4_dio_write_ops, ret = iomap_dio_rw(iocb, from, iomap_ops, &ext4_dio_write_ops,
is_sync_kiocb(iocb) || unaligned_io || extend); is_sync_kiocb(iocb) || unaligned_io || extend);
......
...@@ -5984,10 +5984,20 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) ...@@ -5984,10 +5984,20 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
vfs_flags = SB_LAZYTIME | SB_I_VERSION; vfs_flags = SB_LAZYTIME | SB_I_VERSION;
sb->s_flags = (sb->s_flags & ~vfs_flags) | (*flags & vfs_flags); sb->s_flags = (sb->s_flags & ~vfs_flags) | (*flags & vfs_flags);
/*
* Changing the DIOREAD_NOLOCK or DELALLOC mount options may cause
* two calls to ext4_should_dioread_nolock() to return inconsistent
* values, triggering WARN_ON in ext4_add_complete_io(). we grab
* here s_writepages_rwsem to avoid race between writepages ops and
* remount.
*/
percpu_down_write(&sbi->s_writepages_rwsem);
if (!parse_options(data, sb, NULL, &journal_ioprio, 1)) { if (!parse_options(data, sb, NULL, &journal_ioprio, 1)) {
err = -EINVAL; err = -EINVAL;
percpu_up_write(&sbi->s_writepages_rwsem);
goto restore_opts; goto restore_opts;
} }
percpu_up_write(&sbi->s_writepages_rwsem);
if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^ if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^
test_opt(sb, JOURNAL_CHECKSUM)) { test_opt(sb, JOURNAL_CHECKSUM)) {
...@@ -6205,6 +6215,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) ...@@ -6205,6 +6215,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
return 0; return 0;
restore_opts: restore_opts:
percpu_down_write(&sbi->s_writepages_rwsem);
sb->s_flags = old_sb_flags; sb->s_flags = old_sb_flags;
sbi->s_mount_opt = old_opts.s_mount_opt; sbi->s_mount_opt = old_opts.s_mount_opt;
sbi->s_mount_opt2 = old_opts.s_mount_opt2; sbi->s_mount_opt2 = old_opts.s_mount_opt2;
...@@ -6213,6 +6224,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) ...@@ -6213,6 +6224,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
sbi->s_commit_interval = old_opts.s_commit_interval; sbi->s_commit_interval = old_opts.s_commit_interval;
sbi->s_min_batch_time = old_opts.s_min_batch_time; sbi->s_min_batch_time = old_opts.s_min_batch_time;
sbi->s_max_batch_time = old_opts.s_max_batch_time; sbi->s_max_batch_time = old_opts.s_max_batch_time;
percpu_up_write(&sbi->s_writepages_rwsem);
if (!test_opt(sb, BLOCK_VALIDITY) && sbi->s_system_blks) if (!test_opt(sb, BLOCK_VALIDITY) && sbi->s_system_blks)
ext4_release_system_zone(sb); ext4_release_system_zone(sb);
#ifdef CONFIG_QUOTA #ifdef CONFIG_QUOTA
......
...@@ -2983,6 +2983,7 @@ xfs_alloc_read_agf( ...@@ -2983,6 +2983,7 @@ xfs_alloc_read_agf(
struct xfs_agf *agf; /* ag freelist header */ struct xfs_agf *agf; /* ag freelist header */
struct xfs_perag *pag; /* per allocation group data */ struct xfs_perag *pag; /* per allocation group data */
int error; int error;
int allocbt_blks;
trace_xfs_alloc_read_agf(mp, agno); trace_xfs_alloc_read_agf(mp, agno);
...@@ -3013,6 +3014,19 @@ xfs_alloc_read_agf( ...@@ -3013,6 +3014,19 @@ xfs_alloc_read_agf(
pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level); pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level);
pag->pagf_init = 1; pag->pagf_init = 1;
pag->pagf_agflreset = xfs_agfl_needs_reset(mp, agf); pag->pagf_agflreset = xfs_agfl_needs_reset(mp, agf);
/*
* Update the in-core allocbt counter. Filter out the rmapbt
* subset of the btreeblks counter because the rmapbt is managed
* by perag reservation. Subtract one for the rmapbt root block
* because the rmap counter includes it while the btreeblks
* counter only tracks non-root blocks.
*/
allocbt_blks = pag->pagf_btreeblks;
if (xfs_sb_version_hasrmapbt(&mp->m_sb))
allocbt_blks -= be32_to_cpu(agf->agf_rmap_blocks) - 1;
if (allocbt_blks > 0)
atomic64_add(allocbt_blks, &mp->m_allocbt_blks);
} }
#ifdef DEBUG #ifdef DEBUG
else if (!XFS_FORCED_SHUTDOWN(mp)) { else if (!XFS_FORCED_SHUTDOWN(mp)) {
......
...@@ -71,6 +71,7 @@ xfs_allocbt_alloc_block( ...@@ -71,6 +71,7 @@ xfs_allocbt_alloc_block(
return 0; return 0;
} }
atomic64_inc(&cur->bc_mp->m_allocbt_blks);
xfs_extent_busy_reuse(cur->bc_mp, cur->bc_ag.agno, bno, 1, false); xfs_extent_busy_reuse(cur->bc_mp, cur->bc_ag.agno, bno, 1, false);
new->s = cpu_to_be32(bno); new->s = cpu_to_be32(bno);
...@@ -94,6 +95,7 @@ xfs_allocbt_free_block( ...@@ -94,6 +95,7 @@ xfs_allocbt_free_block(
if (error) if (error)
return error; return error;
atomic64_dec(&cur->bc_mp->m_allocbt_blks);
xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1, xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1,
XFS_EXTENT_BUSY_SKIP_DISCARD); XFS_EXTENT_BUSY_SKIP_DISCARD);
return 0; return 0;
......
...@@ -291,7 +291,7 @@ xfs_fs_counts( ...@@ -291,7 +291,7 @@ xfs_fs_counts(
cnt->allocino = percpu_counter_read_positive(&mp->m_icount); cnt->allocino = percpu_counter_read_positive(&mp->m_icount);
cnt->freeino = percpu_counter_read_positive(&mp->m_ifree); cnt->freeino = percpu_counter_read_positive(&mp->m_ifree);
cnt->freedata = percpu_counter_read_positive(&mp->m_fdblocks) - cnt->freedata = percpu_counter_read_positive(&mp->m_fdblocks) -
mp->m_alloc_set_aside; xfs_fdblocks_unavailable(mp);
spin_lock(&mp->m_sb_lock); spin_lock(&mp->m_sb_lock);
cnt->freertx = mp->m_sb.sb_frextents; cnt->freertx = mp->m_sb.sb_frextents;
......
...@@ -1244,6 +1244,7 @@ xfs_mod_fdblocks( ...@@ -1244,6 +1244,7 @@ xfs_mod_fdblocks(
int64_t lcounter; int64_t lcounter;
long long res_used; long long res_used;
s32 batch; s32 batch;
uint64_t set_aside;
if (delta > 0) { if (delta > 0) {
/* /*
...@@ -1283,8 +1284,20 @@ xfs_mod_fdblocks( ...@@ -1283,8 +1284,20 @@ xfs_mod_fdblocks(
else else
batch = XFS_FDBLOCKS_BATCH; batch = XFS_FDBLOCKS_BATCH;
/*
* Set aside allocbt blocks because these blocks are tracked as free
* space but not available for allocation. Technically this means that a
* single reservation cannot consume all remaining free space, but the
* ratio of allocbt blocks to usable free blocks should be rather small.
* The tradeoff without this is that filesystems that maintain high
* perag block reservations can over reserve physical block availability
* and fail physical allocation, which leads to much more serious
* problems (i.e. transaction abort, pagecache discards, etc.) than
* slightly premature -ENOSPC.
*/
set_aside = xfs_fdblocks_unavailable(mp);
percpu_counter_add_batch(&mp->m_fdblocks, delta, batch); percpu_counter_add_batch(&mp->m_fdblocks, delta, batch);
if (__percpu_counter_compare(&mp->m_fdblocks, mp->m_alloc_set_aside, if (__percpu_counter_compare(&mp->m_fdblocks, set_aside,
XFS_FDBLOCKS_BATCH) >= 0) { XFS_FDBLOCKS_BATCH) >= 0) {
/* we had space! */ /* we had space! */
return 0; return 0;
......
...@@ -187,6 +187,12 @@ typedef struct xfs_mount { ...@@ -187,6 +187,12 @@ typedef struct xfs_mount {
* extents or anything related to the rt device. * extents or anything related to the rt device.
*/ */
struct percpu_counter m_delalloc_blks; struct percpu_counter m_delalloc_blks;
/*
* Global count of allocation btree blocks in use across all AGs. Only
* used when perag reservation is enabled. Helps prevent block
* reservation from attempting to reserve allocation btree blocks.
*/
atomic64_t m_allocbt_blks;
struct radix_tree_root m_perag_tree; /* per-ag accounting info */ struct radix_tree_root m_perag_tree; /* per-ag accounting info */
spinlock_t m_perag_lock; /* lock for m_perag_tree */ spinlock_t m_perag_lock; /* lock for m_perag_tree */
...@@ -472,7 +478,7 @@ static inline uint64_t ...@@ -472,7 +478,7 @@ static inline uint64_t
xfs_fdblocks_unavailable( xfs_fdblocks_unavailable(
struct xfs_mount *mp) struct xfs_mount *mp)
{ {
return mp->m_alloc_set_aside; return mp->m_alloc_set_aside + atomic64_read(&mp->m_allocbt_blks);
} }
extern int xfs_mod_fdblocks(struct xfs_mount *mp, int64_t delta, extern int xfs_mod_fdblocks(struct xfs_mount *mp, int64_t delta,
......
...@@ -809,7 +809,8 @@ xfs_fs_statfs( ...@@ -809,7 +809,8 @@ xfs_fs_statfs(
spin_unlock(&mp->m_sb_lock); spin_unlock(&mp->m_sb_lock);
/* make sure statp->f_bfree does not underflow */ /* make sure statp->f_bfree does not underflow */
statp->f_bfree = max_t(int64_t, fdblocks - mp->m_alloc_set_aside, 0); statp->f_bfree = max_t(int64_t, 0,
fdblocks - xfs_fdblocks_unavailable(mp));
statp->f_bavail = statp->f_bfree; statp->f_bavail = statp->f_bfree;
fakeinos = XFS_FSB_TO_INO(mp, statp->f_bfree); fakeinos = XFS_FSB_TO_INO(mp, statp->f_bfree);
......
...@@ -135,6 +135,14 @@ static inline void reliable_page_counter(struct page *page, ...@@ -135,6 +135,14 @@ static inline void reliable_page_counter(struct page *page,
if (page_reliable(page)) if (page_reliable(page))
atomic_long_add(val, &mm->reliable_nr_page); atomic_long_add(val, &mm->reliable_nr_page);
} }
static inline void reliable_clear_page_counter(struct mm_struct *mm)
{
if (!mem_reliable_is_enabled())
return;
atomic_long_set(&mm->reliable_nr_page, 0);
}
#else #else
#define reliable_enabled 0 #define reliable_enabled 0
#define pagecache_use_reliable_mem 0 #define pagecache_use_reliable_mem 0
...@@ -178,6 +186,7 @@ static inline void reliable_page_counter(struct page *page, ...@@ -178,6 +186,7 @@ static inline void reliable_page_counter(struct page *page,
struct mm_struct *mm, int val) {} struct mm_struct *mm, int val) {}
static inline void reliable_report_usage(struct seq_file *m, static inline void reliable_report_usage(struct seq_file *m,
struct mm_struct *mm) {} struct mm_struct *mm) {}
static inline void reliable_clear_page_counter(struct mm_struct *mm) {}
#endif #endif
#endif #endif
...@@ -1329,11 +1329,6 @@ void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc, ...@@ -1329,11 +1329,6 @@ void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc,
void mini_qdisc_pair_block_init(struct mini_Qdisc_pair *miniqp, void mini_qdisc_pair_block_init(struct mini_Qdisc_pair *miniqp,
struct tcf_block *block); struct tcf_block *block);
static inline int skb_tc_reinsert(struct sk_buff *skb, struct tcf_result *res)
{
return res->ingress ? netif_receive_skb(skb) : dev_queue_xmit(skb);
}
/* Make sure qdisc is no longer in SCHED state. */ /* Make sure qdisc is no longer in SCHED state. */
static inline void qdisc_synchronize(const struct Qdisc *q) static inline void qdisc_synchronize(const struct Qdisc *q)
{ {
......
...@@ -1049,6 +1049,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, ...@@ -1049,6 +1049,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
atomic_set(&mm->has_pinned, 0); atomic_set(&mm->has_pinned, 0);
atomic64_set(&mm->pinned_vm, 0); atomic64_set(&mm->pinned_vm, 0);
memset(&mm->rss_stat, 0, sizeof(mm->rss_stat)); memset(&mm->rss_stat, 0, sizeof(mm->rss_stat));
reliable_clear_page_counter(mm);
spin_lock_init(&mm->page_table_lock); spin_lock_init(&mm->page_table_lock);
spin_lock_init(&mm->arg_lock); spin_lock_init(&mm->arg_lock);
mm_init_cpumask(mm); mm_init_cpumask(mm);
......
...@@ -28,8 +28,8 @@ ...@@ -28,8 +28,8 @@
static LIST_HEAD(mirred_list); static LIST_HEAD(mirred_list);
static DEFINE_SPINLOCK(mirred_list_lock); static DEFINE_SPINLOCK(mirred_list_lock);
#define MIRRED_RECURSION_LIMIT 4 #define MIRRED_NEST_LIMIT 4
static DEFINE_PER_CPU(unsigned int, mirred_rec_level); static DEFINE_PER_CPU(unsigned int, mirred_nest_level);
static bool tcf_mirred_is_act_redirect(int action) static bool tcf_mirred_is_act_redirect(int action)
{ {
...@@ -206,6 +206,25 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, ...@@ -206,6 +206,25 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
return err; return err;
} }
static bool is_mirred_nested(void)
{
return unlikely(__this_cpu_read(mirred_nest_level) > 1);
}
static int tcf_mirred_forward(bool want_ingress, struct sk_buff *skb)
{
int err;
if (!want_ingress)
err = dev_queue_xmit(skb);
else if (is_mirred_nested())
err = netif_rx(skb);
else
err = netif_receive_skb(skb);
return err;
}
static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a, static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res) struct tcf_result *res)
{ {
...@@ -213,7 +232,7 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a, ...@@ -213,7 +232,7 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
struct sk_buff *skb2 = skb; struct sk_buff *skb2 = skb;
bool m_mac_header_xmit; bool m_mac_header_xmit;
struct net_device *dev; struct net_device *dev;
unsigned int rec_level; unsigned int nest_level;
int retval, err = 0; int retval, err = 0;
bool use_reinsert; bool use_reinsert;
bool want_ingress; bool want_ingress;
...@@ -224,11 +243,11 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a, ...@@ -224,11 +243,11 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
int mac_len; int mac_len;
bool at_nh; bool at_nh;
rec_level = __this_cpu_inc_return(mirred_rec_level); nest_level = __this_cpu_inc_return(mirred_nest_level);
if (unlikely(rec_level > MIRRED_RECURSION_LIMIT)) { if (unlikely(nest_level > MIRRED_NEST_LIMIT)) {
net_warn_ratelimited("Packet exceeded mirred recursion limit on dev %s\n", net_warn_ratelimited("Packet exceeded mirred recursion limit on dev %s\n",
netdev_name(skb->dev)); netdev_name(skb->dev));
__this_cpu_dec(mirred_rec_level); __this_cpu_dec(mirred_nest_level);
return TC_ACT_SHOT; return TC_ACT_SHOT;
} }
...@@ -295,25 +314,22 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a, ...@@ -295,25 +314,22 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
/* let's the caller reinsert the packet, if possible */ /* let's the caller reinsert the packet, if possible */
if (use_reinsert) { if (use_reinsert) {
res->ingress = want_ingress; res->ingress = want_ingress;
if (skb_tc_reinsert(skb, res)) err = tcf_mirred_forward(res->ingress, skb);
if (err)
tcf_action_inc_overlimit_qstats(&m->common); tcf_action_inc_overlimit_qstats(&m->common);
__this_cpu_dec(mirred_rec_level); __this_cpu_dec(mirred_nest_level);
return TC_ACT_CONSUMED; return TC_ACT_CONSUMED;
} }
} }
if (!want_ingress) err = tcf_mirred_forward(want_ingress, skb2);
err = dev_queue_xmit(skb2);
else
err = netif_receive_skb(skb2);
if (err) { if (err) {
out: out:
tcf_action_inc_overlimit_qstats(&m->common); tcf_action_inc_overlimit_qstats(&m->common);
if (tcf_mirred_is_act_redirect(m_eaction)) if (tcf_mirred_is_act_redirect(m_eaction))
retval = TC_ACT_SHOT; retval = TC_ACT_SHOT;
} }
__this_cpu_dec(mirred_rec_level); __this_cpu_dec(mirred_nest_level);
return retval; return retval;
} }
......
...@@ -507,7 +507,8 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt, ...@@ -507,7 +507,8 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt,
list_for_each_entry(stab, &qdisc_stab_list, list) { list_for_each_entry(stab, &qdisc_stab_list, list) {
if (memcmp(&stab->szopts, s, sizeof(*s))) if (memcmp(&stab->szopts, s, sizeof(*s)))
continue; continue;
if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16))) if (tsize > 0 &&
memcmp(stab->data, tab, flex_array_size(stab, data, tsize)))
continue; continue;
stab->refcnt++; stab->refcnt++;
return stab; return stab;
...@@ -519,14 +520,14 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt, ...@@ -519,14 +520,14 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt,
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
} }
stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL); stab = kmalloc(struct_size(stab, data, tsize), GFP_KERNEL);
if (!stab) if (!stab)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
stab->refcnt = 1; stab->refcnt = 1;
stab->szopts = *s; stab->szopts = *s;
if (tsize > 0) if (tsize > 0)
memcpy(stab->data, tab, tsize * sizeof(u16)); memcpy(stab->data, tab, flex_array_size(stab, data, tsize));
list_add_tail(&stab->list, &qdisc_stab_list); list_add_tail(&stab->list, &qdisc_stab_list);
......
...@@ -785,7 +785,7 @@ static int get_dist_table(struct Qdisc *sch, struct disttable **tbl, ...@@ -785,7 +785,7 @@ static int get_dist_table(struct Qdisc *sch, struct disttable **tbl,
if (!n || n > NETEM_DIST_MAX) if (!n || n > NETEM_DIST_MAX)
return -EINVAL; return -EINVAL;
d = kvmalloc(sizeof(struct disttable) + n * sizeof(s16), GFP_KERNEL); d = kvmalloc(struct_size(d, table, n), GFP_KERNEL);
if (!d) if (!d)
return -ENOMEM; return -ENOMEM;
......
...@@ -3,7 +3,8 @@ ...@@ -3,7 +3,8 @@
ALL_TESTS="gact_drop_and_ok_test mirred_egress_redirect_test \ ALL_TESTS="gact_drop_and_ok_test mirred_egress_redirect_test \
mirred_egress_mirror_test matchall_mirred_egress_mirror_test \ mirred_egress_mirror_test matchall_mirred_egress_mirror_test \
gact_trap_test" gact_trap_test \
mirred_egress_to_ingress_tcp_test"
NUM_NETIFS=4 NUM_NETIFS=4
source tc_common.sh source tc_common.sh
source lib.sh source lib.sh
...@@ -153,6 +154,52 @@ gact_trap_test() ...@@ -153,6 +154,52 @@ gact_trap_test()
log_test "trap ($tcflags)" log_test "trap ($tcflags)"
} }
mirred_egress_to_ingress_tcp_test()
{
local tmpfile=$(mktemp) tmpfile1=$(mktemp)
RET=0
dd conv=sparse status=none if=/dev/zero bs=1M count=2 of=$tmpfile
tc filter add dev $h1 protocol ip pref 100 handle 100 egress flower \
$tcflags ip_proto tcp src_ip 192.0.2.1 dst_ip 192.0.2.2 \
action ct commit nat src addr 192.0.2.2 pipe \
action ct clear pipe \
action ct commit nat dst addr 192.0.2.1 pipe \
action ct clear pipe \
action skbedit ptype host pipe \
action mirred ingress redirect dev $h1
tc filter add dev $h1 protocol ip pref 101 handle 101 egress flower \
$tcflags ip_proto icmp \
action mirred ingress redirect dev $h1
tc filter add dev $h1 protocol ip pref 102 handle 102 ingress flower \
ip_proto icmp \
action drop
ip vrf exec v$h1 nc --recv-only -w10 -l -p 12345 -o $tmpfile1 &
local rpid=$!
ip vrf exec v$h1 nc -w1 --send-only 192.0.2.2 12345 <$tmpfile
wait -n $rpid
cmp -s $tmpfile $tmpfile1
check_err $? "server output check failed"
$MZ $h1 -c 10 -p 64 -a $h1mac -b $h1mac -A 192.0.2.1 -B 192.0.2.1 \
-t icmp "ping,id=42,seq=5" -q
tc_check_packets "dev $h1 egress" 101 10
check_err $? "didn't mirred redirect ICMP"
tc_check_packets "dev $h1 ingress" 102 10
check_err $? "didn't drop mirred ICMP"
local overlimits=$(tc_rule_stats_get ${h1} 101 egress .overlimits)
test ${overlimits} = 10
check_err $? "wrong overlimits, expected 10 got ${overlimits}"
tc filter del dev $h1 egress protocol ip pref 100 handle 100 flower
tc filter del dev $h1 egress protocol ip pref 101 handle 101 flower
tc filter del dev $h1 ingress protocol ip pref 102 handle 102 flower
rm -f $tmpfile $tmpfile1
log_test "mirred_egress_to_ingress_tcp ($tcflags)"
}
setup_prepare() setup_prepare()
{ {
h1=${NETIFS[p1]} h1=${NETIFS[p1]}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册