未验证 提交 5bb8e279 编写于 作者: O openeuler-ci-bot 提交者: Gitee

!590 Backport CVEs and bugfixes

Merge Pull Request from: @zhangjialin11 
 
Pull new CVEs:
CVE-2023-1829
CVE-2022-36280
CVE-2022-1015
CVE-2023-1989
CVE-2023-30456
CVE-2023-1990

xfs bugfixes from Long Li and yangerkun 
 
Link:https://gitee.com/openeuler/kernel/pulls/590 

Reviewed-by: Xie XiuQi <xiexiuqi@huawei.com> 
Signed-off-by: Xie XiuQi <xiexiuqi@huawei.com> 
...@@ -296,7 +296,7 @@ The following sysctls are available for the XFS filesystem: ...@@ -296,7 +296,7 @@ The following sysctls are available for the XFS filesystem:
XFS_ERRLEVEL_LOW: 1 XFS_ERRLEVEL_LOW: 1
XFS_ERRLEVEL_HIGH: 5 XFS_ERRLEVEL_HIGH: 5
fs.xfs.panic_mask (Min: 0 Default: 0 Max: 256) fs.xfs.panic_mask (Min: 0 Default: 0 Max: 511)
Causes certain error conditions to call BUG(). Value is a bitmask; Causes certain error conditions to call BUG(). Value is a bitmask;
OR together the tags which represent errors which should cause panics: OR together the tags which represent errors which should cause panics:
......
...@@ -2995,7 +2995,7 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu, ...@@ -2995,7 +2995,7 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12, struct vmcs12 *vmcs12,
enum vm_entry_failure_code *entry_failure_code) enum vm_entry_failure_code *entry_failure_code)
{ {
bool ia32e; bool ia32e = !!(vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE);
*entry_failure_code = ENTRY_FAIL_DEFAULT; *entry_failure_code = ENTRY_FAIL_DEFAULT;
...@@ -3021,6 +3021,13 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu, ...@@ -3021,6 +3021,13 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
vmcs12->guest_ia32_perf_global_ctrl))) vmcs12->guest_ia32_perf_global_ctrl)))
return -EINVAL; return -EINVAL;
if (CC((vmcs12->guest_cr0 & (X86_CR0_PG | X86_CR0_PE)) == X86_CR0_PG))
return -EINVAL;
if (CC(ia32e && !(vmcs12->guest_cr4 & X86_CR4_PAE)) ||
CC(ia32e && !(vmcs12->guest_cr0 & X86_CR0_PG)))
return -EINVAL;
/* /*
* If the load IA32_EFER VM-entry control is 1, the following checks * If the load IA32_EFER VM-entry control is 1, the following checks
* are performed on the field for the IA32_EFER MSR: * are performed on the field for the IA32_EFER MSR:
...@@ -3032,7 +3039,6 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu, ...@@ -3032,7 +3039,6 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
*/ */
if (to_vmx(vcpu)->nested.nested_run_pending && if (to_vmx(vcpu)->nested.nested_run_pending &&
(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)) { (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)) {
ia32e = (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) != 0;
if (CC(!kvm_valid_efer(vcpu, vmcs12->guest_ia32_efer)) || if (CC(!kvm_valid_efer(vcpu, vmcs12->guest_ia32_efer)) ||
CC(ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA)) || CC(ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA)) ||
CC(((vmcs12->guest_cr0 & X86_CR0_PG) && CC(((vmcs12->guest_cr0 & X86_CR0_PG) &&
......
...@@ -355,6 +355,7 @@ static void btsdio_remove(struct sdio_func *func) ...@@ -355,6 +355,7 @@ static void btsdio_remove(struct sdio_func *func)
if (!data) if (!data)
return; return;
cancel_work_sync(&data->work);
hdev = data->hdev; hdev = data->hdev;
sdio_set_drvdata(func, NULL); sdio_set_drvdata(func, NULL);
......
...@@ -182,7 +182,8 @@ void vmw_kms_cursor_snoop(struct vmw_surface *srf, ...@@ -182,7 +182,8 @@ void vmw_kms_cursor_snoop(struct vmw_surface *srf,
if (cmd->dma.guest.ptr.offset % PAGE_SIZE || if (cmd->dma.guest.ptr.offset % PAGE_SIZE ||
box->x != 0 || box->y != 0 || box->z != 0 || box->x != 0 || box->y != 0 || box->z != 0 ||
box->srcx != 0 || box->srcy != 0 || box->srcz != 0 || box->srcx != 0 || box->srcy != 0 || box->srcz != 0 ||
box->d != 1 || box_count != 1) { box->d != 1 || box_count != 1 ||
box->w > 64 || box->h > 64) {
/* TODO handle none page aligned offsets */ /* TODO handle none page aligned offsets */
/* TODO handle more dst & src != 0 */ /* TODO handle more dst & src != 0 */
/* TODO handle more then one copy */ /* TODO handle more then one copy */
......
...@@ -286,13 +286,15 @@ EXPORT_SYMBOL(ndlc_probe); ...@@ -286,13 +286,15 @@ EXPORT_SYMBOL(ndlc_probe);
void ndlc_remove(struct llt_ndlc *ndlc) void ndlc_remove(struct llt_ndlc *ndlc)
{ {
st_nci_remove(ndlc->ndev);
/* cancel timers */ /* cancel timers */
del_timer_sync(&ndlc->t1_timer); del_timer_sync(&ndlc->t1_timer);
del_timer_sync(&ndlc->t2_timer); del_timer_sync(&ndlc->t2_timer);
ndlc->t2_active = false; ndlc->t2_active = false;
ndlc->t1_active = false; ndlc->t1_active = false;
/* cancel work */
cancel_work_sync(&ndlc->sm_work);
st_nci_remove(ndlc->ndev);
skb_queue_purge(&ndlc->rcv_q); skb_queue_purge(&ndlc->rcv_q);
skb_queue_purge(&ndlc->send_q); skb_queue_purge(&ndlc->send_q);
......
...@@ -926,13 +926,15 @@ xfs_bmap_add_attrfork_btree( ...@@ -926,13 +926,15 @@ xfs_bmap_add_attrfork_btree(
xfs_inode_t *ip, /* incore inode pointer */ xfs_inode_t *ip, /* incore inode pointer */
int *flags) /* inode logging flags */ int *flags) /* inode logging flags */
{ {
struct xfs_btree_block *block = ip->i_df.if_broot;
xfs_btree_cur_t *cur; /* btree cursor */ xfs_btree_cur_t *cur; /* btree cursor */
int error; /* error return value */ int error; /* error return value */
xfs_mount_t *mp; /* file system mount struct */ xfs_mount_t *mp; /* file system mount struct */
int stat; /* newroot status */ int stat; /* newroot status */
mp = ip->i_mount; mp = ip->i_mount;
if (ip->i_df.if_broot_bytes <= xfs_inode_data_fork_size(ip))
if (XFS_BMAP_BMDR_SPACE(block) <= xfs_inode_data_fork_size(ip))
*flags |= XFS_ILOG_DBROOT; *flags |= XFS_ILOG_DBROOT;
else { else {
cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK); cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK);
...@@ -4053,7 +4055,7 @@ xfs_bmap_alloc_userdata( ...@@ -4053,7 +4055,7 @@ xfs_bmap_alloc_userdata(
* the busy list. * the busy list.
*/ */
bma->datatype = XFS_ALLOC_NOBUSY; bma->datatype = XFS_ALLOC_NOBUSY;
if (whichfork == XFS_DATA_FORK) { if (whichfork == XFS_DATA_FORK || whichfork == XFS_COW_FORK) {
bma->datatype |= XFS_ALLOC_USERDATA; bma->datatype |= XFS_ALLOC_USERDATA;
if (bma->offset == 0) if (bma->offset == 0)
bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA; bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA;
......
...@@ -2840,9 +2840,22 @@ xfs_btree_split_worker( ...@@ -2840,9 +2840,22 @@ xfs_btree_split_worker(
} }
/* /*
* BMBT split requests often come in with little stack to work on. Push * BMBT split requests often come in with little stack to work on so we push
* them off to a worker thread so there is lots of stack to use. For the other * them off to a worker thread so there is lots of stack to use. For the other
* btree types, just call directly to avoid the context switch overhead here. * btree types, just call directly to avoid the context switch overhead here.
*
* Care must be taken here - the work queue rescuer thread introduces potential
* AGF <> worker queue deadlocks if the BMBT block allocation has to lock new
* AGFs to allocate blocks. A task being run by the rescuer could attempt to
* lock an AGF that is already locked by a task queued to run by the rescuer,
* resulting in an ABBA deadlock as the rescuer cannot run the lock holder to
* release it until the current thread it is running gains the lock.
*
* To avoid this issue, we only ever queue BMBT splits that don't have an AGF
* already locked to allocate from. The only place that doesn't hold an AGF
* locked is unwritten extent conversion at IO completion, but that has already
* been offloaded to a worker thread and hence has no stack consumption issues
* we have to worry about.
*/ */
STATIC int /* error */ STATIC int /* error */
xfs_btree_split( xfs_btree_split(
...@@ -2856,7 +2869,8 @@ xfs_btree_split( ...@@ -2856,7 +2869,8 @@ xfs_btree_split(
struct xfs_btree_split_args args; struct xfs_btree_split_args args;
DECLARE_COMPLETION_ONSTACK(done); DECLARE_COMPLETION_ONSTACK(done);
if (cur->bc_btnum != XFS_BTNUM_BMAP) if (cur->bc_btnum != XFS_BTNUM_BMAP ||
cur->bc_tp->t_firstblock == NULLFSBLOCK)
return __xfs_btree_split(cur, level, ptrp, key, curp, stat); return __xfs_btree_split(cur, level, ptrp, key, curp, stat);
args.cur = cur; args.cur = cur;
......
...@@ -580,7 +580,7 @@ typedef struct xfs_efi_log_format { ...@@ -580,7 +580,7 @@ typedef struct xfs_efi_log_format {
uint16_t efi_size; /* size of this item */ uint16_t efi_size; /* size of this item */
uint32_t efi_nextents; /* # extents to free */ uint32_t efi_nextents; /* # extents to free */
uint64_t efi_id; /* efi identifier */ uint64_t efi_id; /* efi identifier */
xfs_extent_t efi_extents[1]; /* array of extents to free */ xfs_extent_t efi_extents[]; /* array of extents to free */
} xfs_efi_log_format_t; } xfs_efi_log_format_t;
typedef struct xfs_efi_log_format_32 { typedef struct xfs_efi_log_format_32 {
...@@ -588,7 +588,7 @@ typedef struct xfs_efi_log_format_32 { ...@@ -588,7 +588,7 @@ typedef struct xfs_efi_log_format_32 {
uint16_t efi_size; /* size of this item */ uint16_t efi_size; /* size of this item */
uint32_t efi_nextents; /* # extents to free */ uint32_t efi_nextents; /* # extents to free */
uint64_t efi_id; /* efi identifier */ uint64_t efi_id; /* efi identifier */
xfs_extent_32_t efi_extents[1]; /* array of extents to free */ xfs_extent_32_t efi_extents[]; /* array of extents to free */
} __attribute__((packed)) xfs_efi_log_format_32_t; } __attribute__((packed)) xfs_efi_log_format_32_t;
typedef struct xfs_efi_log_format_64 { typedef struct xfs_efi_log_format_64 {
...@@ -596,7 +596,7 @@ typedef struct xfs_efi_log_format_64 { ...@@ -596,7 +596,7 @@ typedef struct xfs_efi_log_format_64 {
uint16_t efi_size; /* size of this item */ uint16_t efi_size; /* size of this item */
uint32_t efi_nextents; /* # extents to free */ uint32_t efi_nextents; /* # extents to free */
uint64_t efi_id; /* efi identifier */ uint64_t efi_id; /* efi identifier */
xfs_extent_64_t efi_extents[1]; /* array of extents to free */ xfs_extent_64_t efi_extents[]; /* array of extents to free */
} xfs_efi_log_format_64_t; } xfs_efi_log_format_64_t;
/* /*
...@@ -609,7 +609,7 @@ typedef struct xfs_efd_log_format { ...@@ -609,7 +609,7 @@ typedef struct xfs_efd_log_format {
uint16_t efd_size; /* size of this item */ uint16_t efd_size; /* size of this item */
uint32_t efd_nextents; /* # of extents freed */ uint32_t efd_nextents; /* # of extents freed */
uint64_t efd_efi_id; /* id of corresponding efi */ uint64_t efd_efi_id; /* id of corresponding efi */
xfs_extent_t efd_extents[1]; /* array of extents freed */ xfs_extent_t efd_extents[]; /* array of extents freed */
} xfs_efd_log_format_t; } xfs_efd_log_format_t;
typedef struct xfs_efd_log_format_32 { typedef struct xfs_efd_log_format_32 {
...@@ -617,7 +617,7 @@ typedef struct xfs_efd_log_format_32 { ...@@ -617,7 +617,7 @@ typedef struct xfs_efd_log_format_32 {
uint16_t efd_size; /* size of this item */ uint16_t efd_size; /* size of this item */
uint32_t efd_nextents; /* # of extents freed */ uint32_t efd_nextents; /* # of extents freed */
uint64_t efd_efi_id; /* id of corresponding efi */ uint64_t efd_efi_id; /* id of corresponding efi */
xfs_extent_32_t efd_extents[1]; /* array of extents freed */ xfs_extent_32_t efd_extents[]; /* array of extents freed */
} __attribute__((packed)) xfs_efd_log_format_32_t; } __attribute__((packed)) xfs_efd_log_format_32_t;
typedef struct xfs_efd_log_format_64 { typedef struct xfs_efd_log_format_64 {
...@@ -625,7 +625,7 @@ typedef struct xfs_efd_log_format_64 { ...@@ -625,7 +625,7 @@ typedef struct xfs_efd_log_format_64 {
uint16_t efd_size; /* size of this item */ uint16_t efd_size; /* size of this item */
uint32_t efd_nextents; /* # of extents freed */ uint32_t efd_nextents; /* # of extents freed */
uint64_t efd_efi_id; /* id of corresponding efi */ uint64_t efd_efi_id; /* id of corresponding efi */
xfs_extent_64_t efd_extents[1]; /* array of extents freed */ xfs_extent_64_t efd_extents[]; /* array of extents freed */
} xfs_efd_log_format_64_t; } xfs_efd_log_format_64_t;
/* /*
......
...@@ -108,12 +108,6 @@ struct xlog_recover { ...@@ -108,12 +108,6 @@ struct xlog_recover {
#define ITEM_TYPE(i) (*(unsigned short *)(i)->ri_buf[0].i_addr) #define ITEM_TYPE(i) (*(unsigned short *)(i)->ri_buf[0].i_addr)
/*
* This is the number of entries in the l_buf_cancel_table used during
* recovery.
*/
#define XLOG_BC_TABLE_SIZE 64
#define XLOG_RECOVER_CRCPASS 0 #define XLOG_RECOVER_CRCPASS 0
#define XLOG_RECOVER_PASS1 1 #define XLOG_RECOVER_PASS1 1
#define XLOG_RECOVER_PASS2 2 #define XLOG_RECOVER_PASS2 2
...@@ -124,5 +118,13 @@ bool xlog_is_buffer_cancelled(struct xlog *log, xfs_daddr_t blkno, uint len); ...@@ -124,5 +118,13 @@ bool xlog_is_buffer_cancelled(struct xlog *log, xfs_daddr_t blkno, uint len);
void xlog_recover_release_intent(struct xlog *log, unsigned short intent_type, void xlog_recover_release_intent(struct xlog *log, unsigned short intent_type,
uint64_t intent_id); uint64_t intent_id);
int xlog_alloc_buf_cancel_table(struct xlog *log);
void xlog_free_buf_cancel_table(struct xlog *log);
#ifdef DEBUG
void xlog_check_buf_cancel_table(struct xlog *log);
#else
#define xlog_check_buf_cancel_table(log) do { } while (0)
#endif
#endif /* __XFS_LOG_RECOVER_H__ */ #endif /* __XFS_LOG_RECOVER_H__ */
...@@ -590,28 +590,18 @@ static const struct xfs_item_ops xfs_bui_item_ops = { ...@@ -590,28 +590,18 @@ static const struct xfs_item_ops xfs_bui_item_ops = {
.iop_relog = xfs_bui_item_relog, .iop_relog = xfs_bui_item_relog,
}; };
/* static inline void
* Copy an BUI format buffer from the given buf, and into the destination
* BUI format structure. The BUI/BUD items were designed not to need any
* special alignment handling.
*/
static int
xfs_bui_copy_format( xfs_bui_copy_format(
struct xfs_log_iovec *buf, struct xfs_bui_log_format *dst,
struct xfs_bui_log_format *dst_bui_fmt) const struct xfs_bui_log_format *src)
{ {
struct xfs_bui_log_format *src_bui_fmt; unsigned int i;
uint len;
src_bui_fmt = buf->i_addr; memcpy(dst, src, offsetof(struct xfs_bui_log_format, bui_extents));
len = xfs_bui_log_format_sizeof(src_bui_fmt->bui_nextents);
if (buf->i_len == len) { for (i = 0; i < src->bui_nextents; i++)
memcpy(dst_bui_fmt, src_bui_fmt, len); memcpy(&dst->bui_extents[i], &src->bui_extents[i],
return 0; sizeof(struct xfs_map_extent));
}
XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL);
return -EFSCORRUPTED;
} }
/* /*
...@@ -628,23 +618,31 @@ xlog_recover_bui_commit_pass2( ...@@ -628,23 +618,31 @@ xlog_recover_bui_commit_pass2(
struct xlog_recover_item *item, struct xlog_recover_item *item,
xfs_lsn_t lsn) xfs_lsn_t lsn)
{ {
int error;
struct xfs_mount *mp = log->l_mp; struct xfs_mount *mp = log->l_mp;
struct xfs_bui_log_item *buip; struct xfs_bui_log_item *buip;
struct xfs_bui_log_format *bui_formatp; struct xfs_bui_log_format *bui_formatp;
size_t len;
bui_formatp = item->ri_buf[0].i_addr; bui_formatp = item->ri_buf[0].i_addr;
if (item->ri_buf[0].i_len < xfs_bui_log_format_sizeof(0)) {
XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp);
return -EFSCORRUPTED;
}
if (bui_formatp->bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) { if (bui_formatp->bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) {
XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp);
return -EFSCORRUPTED; return -EFSCORRUPTED;
} }
buip = xfs_bui_init(mp);
error = xfs_bui_copy_format(&item->ri_buf[0], &buip->bui_format); len = xfs_bui_log_format_sizeof(bui_formatp->bui_nextents);
if (error) { if (item->ri_buf[0].i_len != len) {
xfs_bui_item_free(buip); XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp);
return error; return -EFSCORRUPTED;
} }
buip = xfs_bui_init(mp);
xfs_bui_copy_format(&buip->bui_format, bui_formatp);
atomic_set(&buip->bui_next_extent, bui_formatp->bui_nextents); atomic_set(&buip->bui_next_extent, bui_formatp->bui_nextents);
/* /*
* Insert the intent into the AIL directly and drop one reference so * Insert the intent into the AIL directly and drop one reference so
......
...@@ -1947,6 +1947,7 @@ xfs_free_buftarg( ...@@ -1947,6 +1947,7 @@ xfs_free_buftarg(
list_lru_destroy(&btp->bt_lru); list_lru_destroy(&btp->bt_lru);
xfs_blkdev_issue_flush(btp); xfs_blkdev_issue_flush(btp);
invalidate_bdev(btp->bt_bdev);
kmem_free(btp); kmem_free(btp);
} }
......
...@@ -23,6 +23,15 @@ ...@@ -23,6 +23,15 @@
#include "xfs_dir2.h" #include "xfs_dir2.h"
#include "xfs_quota.h" #include "xfs_quota.h"
/*
* This is the number of entries in the l_buf_cancel_table used during
* recovery.
*/
#define XLOG_BC_TABLE_SIZE 64
#define XLOG_BUF_CANCEL_BUCKET(log, blkno) \
((log)->l_buf_cancel_table + ((uint64_t)blkno % XLOG_BC_TABLE_SIZE))
/* /*
* This structure is used during recovery to record the buf log items which * This structure is used during recovery to record the buf log items which
* have been canceled and should not be replayed. * have been canceled and should not be replayed.
...@@ -993,3 +1002,60 @@ const struct xlog_recover_item_ops xlog_buf_item_ops = { ...@@ -993,3 +1002,60 @@ const struct xlog_recover_item_ops xlog_buf_item_ops = {
.commit_pass1 = xlog_recover_buf_commit_pass1, .commit_pass1 = xlog_recover_buf_commit_pass1,
.commit_pass2 = xlog_recover_buf_commit_pass2, .commit_pass2 = xlog_recover_buf_commit_pass2,
}; };
#ifdef DEBUG
void
xlog_check_buf_cancel_table(
struct xlog *log)
{
int i;
for (i = 0; i < XLOG_BC_TABLE_SIZE; i++)
ASSERT(list_empty(&log->l_buf_cancel_table[i]));
}
#endif
int
xlog_alloc_buf_cancel_table(
struct xlog *log)
{
void *p;
int i;
ASSERT(log->l_buf_cancel_table == NULL);
p = kmalloc_array(XLOG_BC_TABLE_SIZE, sizeof(struct list_head),
GFP_KERNEL);
if (!p)
return -ENOMEM;
log->l_buf_cancel_table = p;
for (i = 0; i < XLOG_BC_TABLE_SIZE; i++)
INIT_LIST_HEAD(&log->l_buf_cancel_table[i]);
return 0;
}
void
xlog_free_buf_cancel_table(
struct xlog *log)
{
int i;
if (!log->l_buf_cancel_table)
return;
for (i = 0; i < XLOG_BC_TABLE_SIZE; i++) {
struct xfs_buf_cancel *bc;
while ((bc = list_first_entry_or_null(
&log->l_buf_cancel_table[i],
struct xfs_buf_cancel, bc_list))) {
list_del(&bc->bc_list);
kmem_free(bc);
}
}
kmem_free(log->l_buf_cancel_table);
log->l_buf_cancel_table = NULL;
}
...@@ -62,7 +62,7 @@ extern int xfs_errortag_clearall(struct xfs_mount *mp); ...@@ -62,7 +62,7 @@ extern int xfs_errortag_clearall(struct xfs_mount *mp);
/* /*
* XFS panic tags -- allow a call to xfs_alert_tag() be turned into * XFS panic tags -- allow a call to xfs_alert_tag() be turned into
* a panic by setting xfs_panic_mask in a sysctl. * a panic by setting fs.xfs.panic_mask in a sysctl.
*/ */
#define XFS_NO_PTAG 0 #define XFS_NO_PTAG 0
#define XFS_PTAG_IFLUSH 0x00000001 #define XFS_PTAG_IFLUSH 0x00000001
...@@ -75,4 +75,14 @@ extern int xfs_errortag_clearall(struct xfs_mount *mp); ...@@ -75,4 +75,14 @@ extern int xfs_errortag_clearall(struct xfs_mount *mp);
#define XFS_PTAG_FSBLOCK_ZERO 0x00000080 #define XFS_PTAG_FSBLOCK_ZERO 0x00000080
#define XFS_PTAG_VERIFIER_ERROR 0x00000100 #define XFS_PTAG_VERIFIER_ERROR 0x00000100
#define XFS_PTAG_MASK (XFS_PTAG_IFLUSH | \
XFS_PTAG_LOGRES | \
XFS_PTAG_AILDELETE | \
XFS_PTAG_ERROR_REPORT | \
XFS_PTAG_SHUTDOWN_CORRUPT | \
XFS_PTAG_SHUTDOWN_IOERROR | \
XFS_PTAG_SHUTDOWN_LOGERROR | \
XFS_PTAG_FSBLOCK_ZERO | \
XFS_PTAG_VERIFIER_ERROR)
#endif /* __XFS_ERROR_H__ */ #endif /* __XFS_ERROR_H__ */
...@@ -244,6 +244,7 @@ xfs_extent_busy_update_extent( ...@@ -244,6 +244,7 @@ xfs_extent_busy_update_extent(
* *
*/ */
busyp->bno = fend; busyp->bno = fend;
busyp->length = bend - fend;
} else if (bbno < fbno) { } else if (bbno < fbno) {
/* /*
* Case 8: * Case 8:
......
...@@ -74,7 +74,7 @@ xfs_efi_item_sizeof( ...@@ -74,7 +74,7 @@ xfs_efi_item_sizeof(
struct xfs_efi_log_item *efip) struct xfs_efi_log_item *efip)
{ {
return sizeof(struct xfs_efi_log_format) + return sizeof(struct xfs_efi_log_format) +
(efip->efi_format.efi_nextents - 1) * sizeof(xfs_extent_t); efip->efi_format.efi_nextents * sizeof(xfs_extent_t);
} }
STATIC void STATIC void
...@@ -158,7 +158,7 @@ xfs_efi_init( ...@@ -158,7 +158,7 @@ xfs_efi_init(
ASSERT(nextents > 0); ASSERT(nextents > 0);
if (nextents > XFS_EFI_MAX_FAST_EXTENTS) { if (nextents > XFS_EFI_MAX_FAST_EXTENTS) {
size = (uint)(sizeof(struct xfs_efi_log_item) + size = (uint)(sizeof(struct xfs_efi_log_item) +
((nextents - 1) * sizeof(xfs_extent_t))); (nextents * sizeof(xfs_extent_t)));
efip = kmem_zalloc(size, 0); efip = kmem_zalloc(size, 0);
} else { } else {
efip = kmem_cache_zalloc(xfs_efi_zone, efip = kmem_cache_zalloc(xfs_efi_zone,
...@@ -187,14 +187,19 @@ xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt) ...@@ -187,14 +187,19 @@ xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt)
xfs_efi_log_format_t *src_efi_fmt = buf->i_addr; xfs_efi_log_format_t *src_efi_fmt = buf->i_addr;
uint i; uint i;
uint len = sizeof(xfs_efi_log_format_t) + uint len = sizeof(xfs_efi_log_format_t) +
(src_efi_fmt->efi_nextents - 1) * sizeof(xfs_extent_t); src_efi_fmt->efi_nextents * sizeof(xfs_extent_t);
uint len32 = sizeof(xfs_efi_log_format_32_t) + uint len32 = sizeof(xfs_efi_log_format_32_t) +
(src_efi_fmt->efi_nextents - 1) * sizeof(xfs_extent_32_t); src_efi_fmt->efi_nextents * sizeof(xfs_extent_32_t);
uint len64 = sizeof(xfs_efi_log_format_64_t) + uint len64 = sizeof(xfs_efi_log_format_64_t) +
(src_efi_fmt->efi_nextents - 1) * sizeof(xfs_extent_64_t); src_efi_fmt->efi_nextents * sizeof(xfs_extent_64_t);
if (buf->i_len == len) { if (buf->i_len == len) {
memcpy((char *)dst_efi_fmt, (char*)src_efi_fmt, len); memcpy(dst_efi_fmt, src_efi_fmt,
offsetof(struct xfs_efi_log_format, efi_extents));
for (i = 0; i < src_efi_fmt->efi_nextents; i++)
memcpy(&dst_efi_fmt->efi_extents[i],
&src_efi_fmt->efi_extents[i],
sizeof(struct xfs_extent));
return 0; return 0;
} else if (buf->i_len == len32) { } else if (buf->i_len == len32) {
xfs_efi_log_format_32_t *src_efi_fmt_32 = buf->i_addr; xfs_efi_log_format_32_t *src_efi_fmt_32 = buf->i_addr;
...@@ -254,7 +259,7 @@ xfs_efd_item_sizeof( ...@@ -254,7 +259,7 @@ xfs_efd_item_sizeof(
struct xfs_efd_log_item *efdp) struct xfs_efd_log_item *efdp)
{ {
return sizeof(xfs_efd_log_format_t) + return sizeof(xfs_efd_log_format_t) +
(efdp->efd_format.efd_nextents - 1) * sizeof(xfs_extent_t); efdp->efd_format.efd_nextents * sizeof(xfs_extent_t);
} }
STATIC void STATIC void
...@@ -330,7 +335,7 @@ xfs_trans_get_efd( ...@@ -330,7 +335,7 @@ xfs_trans_get_efd(
if (nextents > XFS_EFD_MAX_FAST_EXTENTS) { if (nextents > XFS_EFD_MAX_FAST_EXTENTS) {
efdp = kmem_zalloc(sizeof(struct xfs_efd_log_item) + efdp = kmem_zalloc(sizeof(struct xfs_efd_log_item) +
(nextents - 1) * sizeof(struct xfs_extent), nextents * sizeof(struct xfs_extent),
0); 0);
} else { } else {
efdp = kmem_cache_zalloc(xfs_efd_zone, efdp = kmem_cache_zalloc(xfs_efd_zone,
...@@ -701,6 +706,12 @@ xlog_recover_efi_commit_pass2( ...@@ -701,6 +706,12 @@ xlog_recover_efi_commit_pass2(
efi_formatp = item->ri_buf[0].i_addr; efi_formatp = item->ri_buf[0].i_addr;
if (item->ri_buf[0].i_len <
offsetof(struct xfs_efi_log_format, efi_extents)) {
XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp);
return -EFSCORRUPTED;
}
efip = xfs_efi_init(mp, efi_formatp->efi_nextents); efip = xfs_efi_init(mp, efi_formatp->efi_nextents);
error = xfs_efi_copy_format(&item->ri_buf[0], &efip->efi_format); error = xfs_efi_copy_format(&item->ri_buf[0], &efip->efi_format);
if (error) { if (error) {
...@@ -740,9 +751,9 @@ xlog_recover_efd_commit_pass2( ...@@ -740,9 +751,9 @@ xlog_recover_efd_commit_pass2(
efd_formatp = item->ri_buf[0].i_addr; efd_formatp = item->ri_buf[0].i_addr;
ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) + ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) +
((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_32_t)))) || (efd_formatp->efd_nextents * sizeof(xfs_extent_32_t)))) ||
(item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_64_t) + (item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_64_t) +
((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_64_t))))); (efd_formatp->efd_nextents * sizeof(xfs_extent_64_t)))));
xlog_recover_release_intent(log, XFS_LI_EFI, efd_formatp->efd_efi_id); xlog_recover_release_intent(log, XFS_LI_EFI, efd_formatp->efd_efi_id);
return 0; return 0;
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
* All Rights Reserved. * All Rights Reserved.
*/ */
#include "xfs.h" #include "xfs.h"
#include "xfs_error.h"
/* /*
* Tunable XFS parameters. xfs_params is required even when CONFIG_SYSCTL=n, * Tunable XFS parameters. xfs_params is required even when CONFIG_SYSCTL=n,
...@@ -15,7 +16,7 @@ xfs_param_t xfs_params = { ...@@ -15,7 +16,7 @@ xfs_param_t xfs_params = {
/* MIN DFLT MAX */ /* MIN DFLT MAX */
.sgid_inherit = { 0, 0, 1 }, .sgid_inherit = { 0, 0, 1 },
.symlink_mode = { 0, 0, 1 }, .symlink_mode = { 0, 0, 1 },
.panic_mask = { 0, 0, 256 }, .panic_mask = { 0, 0, XFS_PTAG_MASK},
.error_level = { 0, 3, 11 }, .error_level = { 0, 3, 11 },
.syncd_timer = { 1*100, 30*100, 7200*100}, .syncd_timer = { 1*100, 30*100, 7200*100},
.stats_clear = { 0, 0, 1 }, .stats_clear = { 0, 0, 1 },
......
...@@ -991,11 +991,11 @@ xfs_reclaim_inodes( ...@@ -991,11 +991,11 @@ xfs_reclaim_inodes(
long long
xfs_reclaim_inodes_nr( xfs_reclaim_inodes_nr(
struct xfs_mount *mp, struct xfs_mount *mp,
int nr_to_scan) unsigned long nr_to_scan)
{ {
struct xfs_icwalk icw = { struct xfs_icwalk icw = {
.icw_flags = XFS_ICWALK_FLAG_SCAN_LIMIT, .icw_flags = XFS_ICWALK_FLAG_SCAN_LIMIT,
.icw_scan_limit = nr_to_scan, .icw_scan_limit = min_t(unsigned long, LONG_MAX, nr_to_scan),
}; };
if (xfs_want_reclaim_sick(mp)) if (xfs_want_reclaim_sick(mp))
...@@ -1013,13 +1013,13 @@ xfs_reclaim_inodes_nr( ...@@ -1013,13 +1013,13 @@ xfs_reclaim_inodes_nr(
* Return the number of reclaimable inodes in the filesystem for * Return the number of reclaimable inodes in the filesystem for
* the shrinker to determine how much to reclaim. * the shrinker to determine how much to reclaim.
*/ */
int long
xfs_reclaim_inodes_count( xfs_reclaim_inodes_count(
struct xfs_mount *mp) struct xfs_mount *mp)
{ {
struct xfs_perag *pag; struct xfs_perag *pag;
xfs_agnumber_t ag = 0; xfs_agnumber_t ag = 0;
int reclaimable = 0; long reclaimable = 0;
while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) { while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
ag = pag->pag_agno + 1; ag = pag->pag_agno + 1;
......
...@@ -15,7 +15,7 @@ struct xfs_icwalk { ...@@ -15,7 +15,7 @@ struct xfs_icwalk {
kgid_t icw_gid; kgid_t icw_gid;
prid_t icw_prid; prid_t icw_prid;
__u64 icw_min_file_size; __u64 icw_min_file_size;
int icw_scan_limit; long icw_scan_limit;
}; };
/* Flags that reflect xfs_fs_eofblocks functionality. */ /* Flags that reflect xfs_fs_eofblocks functionality. */
...@@ -49,8 +49,8 @@ void xfs_inode_free(struct xfs_inode *ip); ...@@ -49,8 +49,8 @@ void xfs_inode_free(struct xfs_inode *ip);
void xfs_reclaim_worker(struct work_struct *work); void xfs_reclaim_worker(struct work_struct *work);
void xfs_reclaim_inodes(struct xfs_mount *mp); void xfs_reclaim_inodes(struct xfs_mount *mp);
int xfs_reclaim_inodes_count(struct xfs_mount *mp); long xfs_reclaim_inodes_count(struct xfs_mount *mp);
long xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan); long xfs_reclaim_inodes_nr(struct xfs_mount *mp, unsigned long nr_to_scan);
void xfs_inode_mark_reclaimable(struct xfs_inode *ip); void xfs_inode_mark_reclaimable(struct xfs_inode *ip);
......
...@@ -3183,16 +3183,27 @@ xfs_cross_rename( ...@@ -3183,16 +3183,27 @@ xfs_cross_rename(
*/ */
static int static int
xfs_rename_alloc_whiteout( xfs_rename_alloc_whiteout(
struct xfs_name *src_name,
struct xfs_inode *dp, struct xfs_inode *dp,
struct xfs_inode **wip) struct xfs_inode **wip)
{ {
struct xfs_inode *tmpfile; struct xfs_inode *tmpfile;
struct qstr name;
int error; int error;
error = xfs_create_tmpfile(dp, S_IFCHR | WHITEOUT_MODE, &tmpfile); error = xfs_create_tmpfile(dp, S_IFCHR | WHITEOUT_MODE, &tmpfile);
if (error) if (error)
return error; return error;
name.name = src_name->name;
name.len = src_name->len;
error = xfs_inode_init_security(VFS_I(tmpfile), VFS_I(dp), &name);
if (error) {
xfs_finish_inode_setup(tmpfile);
xfs_irele(tmpfile);
return error;
}
/* /*
* Prepare the tmpfile inode as if it were created through the VFS. * Prepare the tmpfile inode as if it were created through the VFS.
* Complete the inode setup and flag it as linkable. nlink is already * Complete the inode setup and flag it as linkable. nlink is already
...@@ -3241,7 +3252,7 @@ xfs_rename( ...@@ -3241,7 +3252,7 @@ xfs_rename(
* appropriately. * appropriately.
*/ */
if (flags & RENAME_WHITEOUT) { if (flags & RENAME_WHITEOUT) {
error = xfs_rename_alloc_whiteout(target_dp, &wip); error = xfs_rename_alloc_whiteout(src_name, target_dp, &wip);
if (error) if (error)
return error; return error;
......
...@@ -830,7 +830,7 @@ xfs_bulkstat_fmt( ...@@ -830,7 +830,7 @@ xfs_bulkstat_fmt(
static int static int
xfs_bulk_ireq_setup( xfs_bulk_ireq_setup(
struct xfs_mount *mp, struct xfs_mount *mp,
struct xfs_bulk_ireq *hdr, const struct xfs_bulk_ireq *hdr,
struct xfs_ibulk *breq, struct xfs_ibulk *breq,
void __user *ubuffer) void __user *ubuffer)
{ {
...@@ -856,7 +856,7 @@ xfs_bulk_ireq_setup( ...@@ -856,7 +856,7 @@ xfs_bulk_ireq_setup(
switch (hdr->ino) { switch (hdr->ino) {
case XFS_BULK_IREQ_SPECIAL_ROOT: case XFS_BULK_IREQ_SPECIAL_ROOT:
hdr->ino = mp->m_sb.sb_rootino; breq->startino = mp->m_sb.sb_rootino;
break; break;
default: default:
return -EINVAL; return -EINVAL;
......
...@@ -871,6 +871,10 @@ xfs_buffered_write_iomap_begin( ...@@ -871,6 +871,10 @@ xfs_buffered_write_iomap_begin(
ASSERT(!XFS_IS_REALTIME_INODE(ip)); ASSERT(!XFS_IS_REALTIME_INODE(ip));
error = xfs_qm_dqattach(ip);
if (error)
return error;
xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_ilock(ip, XFS_ILOCK_EXCL);
if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(&ip->i_df)) || if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(&ip->i_df)) ||
...@@ -974,10 +978,6 @@ xfs_buffered_write_iomap_begin( ...@@ -974,10 +978,6 @@ xfs_buffered_write_iomap_begin(
allocfork = XFS_COW_FORK; allocfork = XFS_COW_FORK;
} }
error = xfs_qm_dqattach_locked(ip, false);
if (error)
goto out_unlock;
if (eof && offset + count > XFS_ISIZE(ip)) { if (eof && offset + count > XFS_ISIZE(ip)) {
/* /*
* Determine the initial size of the preallocation. * Determine the initial size of the preallocation.
......
...@@ -71,9 +71,8 @@ xfs_initxattrs( ...@@ -71,9 +71,8 @@ xfs_initxattrs(
* these attrs can be journalled at inode creation time (along with the * these attrs can be journalled at inode creation time (along with the
* inode, of course, such that log replay can't cause these to be lost). * inode, of course, such that log replay can't cause these to be lost).
*/ */
int
STATIC int xfs_inode_init_security(
xfs_init_security(
struct inode *inode, struct inode *inode,
struct inode *dir, struct inode *dir,
const struct qstr *qstr) const struct qstr *qstr)
...@@ -118,7 +117,7 @@ xfs_cleanup_inode( ...@@ -118,7 +117,7 @@ xfs_cleanup_inode(
/* Oh, the horror. /* Oh, the horror.
* If we can't add the ACL or we fail in * If we can't add the ACL or we fail in
* xfs_init_security we must back out. * xfs_inode_init_security we must back out.
* ENOSPC can hit here, among other things. * ENOSPC can hit here, among other things.
*/ */
xfs_dentry_to_name(&teardown, dentry); xfs_dentry_to_name(&teardown, dentry);
...@@ -170,7 +169,7 @@ xfs_generic_create( ...@@ -170,7 +169,7 @@ xfs_generic_create(
inode = VFS_I(ip); inode = VFS_I(ip);
error = xfs_init_security(inode, dir, &dentry->d_name); error = xfs_inode_init_security(inode, dir, &dentry->d_name);
if (unlikely(error)) if (unlikely(error))
goto out_cleanup_inode; goto out_cleanup_inode;
...@@ -385,7 +384,7 @@ xfs_vn_symlink( ...@@ -385,7 +384,7 @@ xfs_vn_symlink(
inode = VFS_I(cip); inode = VFS_I(cip);
error = xfs_init_security(inode, dir, &dentry->d_name); error = xfs_inode_init_security(inode, dir, &dentry->d_name);
if (unlikely(error)) if (unlikely(error))
goto out_cleanup_inode; goto out_cleanup_inode;
......
...@@ -24,4 +24,7 @@ extern int xfs_setattr_nonsize(struct xfs_inode *ip, struct iattr *vap, ...@@ -24,4 +24,7 @@ extern int xfs_setattr_nonsize(struct xfs_inode *ip, struct iattr *vap,
extern int xfs_vn_setattr_nonsize(struct dentry *dentry, struct iattr *vap); extern int xfs_vn_setattr_nonsize(struct dentry *dentry, struct iattr *vap);
extern int xfs_vn_setattr_size(struct dentry *dentry, struct iattr *vap); extern int xfs_vn_setattr_size(struct dentry *dentry, struct iattr *vap);
int xfs_inode_init_security(struct inode *inode, struct inode *dir,
const struct qstr *qstr);
#endif /* __XFS_IOPS_H__ */ #endif /* __XFS_IOPS_H__ */
...@@ -349,13 +349,15 @@ xfs_log_writable( ...@@ -349,13 +349,15 @@ xfs_log_writable(
struct xfs_mount *mp) struct xfs_mount *mp)
{ {
/* /*
* Never write to the log on norecovery mounts, if the block device is * Do not write to the log on norecovery mounts, if the data or log
* read-only, or if the filesystem is shutdown. Read-only mounts still * devices are read-only, or if the filesystem is shutdown. Read-only
* allow internal writes for log recovery and unmount purposes, so don't * mounts allow internal writes for log recovery and unmount purposes,
* restrict that case here. * so don't restrict that case.
*/ */
if (mp->m_flags & XFS_MOUNT_NORECOVERY) if (mp->m_flags & XFS_MOUNT_NORECOVERY)
return false; return false;
if (xfs_readonly_buftarg(mp->m_ddev_targp))
return false;
if (xfs_readonly_buftarg(mp->m_log->l_targ)) if (xfs_readonly_buftarg(mp->m_log->l_targ))
return false; return false;
if (XFS_FORCED_SHUTDOWN(mp)) if (XFS_FORCED_SHUTDOWN(mp))
......
...@@ -444,9 +444,6 @@ struct xlog { ...@@ -444,9 +444,6 @@ struct xlog {
uint32_t l_iclog_roundoff;/* padding roundoff */ uint32_t l_iclog_roundoff;/* padding roundoff */
}; };
#define XLOG_BUF_CANCEL_BUCKET(log, blkno) \
((log)->l_buf_cancel_table + ((uint64_t)blkno % XLOG_BC_TABLE_SIZE))
/* /*
* Bits for operational state * Bits for operational state
*/ */
......
...@@ -3229,7 +3229,7 @@ xlog_do_log_recovery( ...@@ -3229,7 +3229,7 @@ xlog_do_log_recovery(
xfs_daddr_t head_blk, xfs_daddr_t head_blk,
xfs_daddr_t tail_blk) xfs_daddr_t tail_blk)
{ {
int error, i; int error;
ASSERT(head_blk != tail_blk); ASSERT(head_blk != tail_blk);
...@@ -3237,37 +3237,25 @@ xlog_do_log_recovery( ...@@ -3237,37 +3237,25 @@ xlog_do_log_recovery(
* First do a pass to find all of the cancelled buf log items. * First do a pass to find all of the cancelled buf log items.
* Store them in the buf_cancel_table for use in the second pass. * Store them in the buf_cancel_table for use in the second pass.
*/ */
log->l_buf_cancel_table = kmem_zalloc(XLOG_BC_TABLE_SIZE * error = xlog_alloc_buf_cancel_table(log);
sizeof(struct list_head), if (error)
0); return error;
for (i = 0; i < XLOG_BC_TABLE_SIZE; i++)
INIT_LIST_HEAD(&log->l_buf_cancel_table[i]);
error = xlog_do_recovery_pass(log, head_blk, tail_blk, error = xlog_do_recovery_pass(log, head_blk, tail_blk,
XLOG_RECOVER_PASS1, NULL); XLOG_RECOVER_PASS1, NULL);
if (error != 0) { if (error != 0)
kmem_free(log->l_buf_cancel_table); goto out_cancel;
log->l_buf_cancel_table = NULL;
return error;
}
/* /*
* Then do a second pass to actually recover the items in the log. * Then do a second pass to actually recover the items in the log.
* When it is complete free the table of buf cancel items. * When it is complete free the table of buf cancel items.
*/ */
error = xlog_do_recovery_pass(log, head_blk, tail_blk, error = xlog_do_recovery_pass(log, head_blk, tail_blk,
XLOG_RECOVER_PASS2, NULL); XLOG_RECOVER_PASS2, NULL);
#ifdef DEBUG if (!error)
if (!error) { xlog_check_buf_cancel_table(log);
int i; out_cancel:
xlog_free_buf_cancel_table(log);
for (i = 0; i < XLOG_BC_TABLE_SIZE; i++)
ASSERT(list_empty(&log->l_buf_cancel_table[i]));
}
#endif /* DEBUG */
kmem_free(log->l_buf_cancel_table);
log->l_buf_cancel_table = NULL;
return error; return error;
} }
......
...@@ -118,10 +118,10 @@ xfs_check_ondisk_structs(void) ...@@ -118,10 +118,10 @@ xfs_check_ondisk_structs(void)
/* log structures */ /* log structures */
XFS_CHECK_STRUCT_SIZE(struct xfs_buf_log_format, 88); XFS_CHECK_STRUCT_SIZE(struct xfs_buf_log_format, 88);
XFS_CHECK_STRUCT_SIZE(struct xfs_dq_logformat, 24); XFS_CHECK_STRUCT_SIZE(struct xfs_dq_logformat, 24);
XFS_CHECK_STRUCT_SIZE(struct xfs_efd_log_format_32, 28); XFS_CHECK_STRUCT_SIZE(struct xfs_efd_log_format_32, 16);
XFS_CHECK_STRUCT_SIZE(struct xfs_efd_log_format_64, 32); XFS_CHECK_STRUCT_SIZE(struct xfs_efd_log_format_64, 16);
XFS_CHECK_STRUCT_SIZE(struct xfs_efi_log_format_32, 28); XFS_CHECK_STRUCT_SIZE(struct xfs_efi_log_format_32, 16);
XFS_CHECK_STRUCT_SIZE(struct xfs_efi_log_format_64, 32); XFS_CHECK_STRUCT_SIZE(struct xfs_efi_log_format_64, 16);
XFS_CHECK_STRUCT_SIZE(struct xfs_extent_32, 12); XFS_CHECK_STRUCT_SIZE(struct xfs_extent_32, 12);
XFS_CHECK_STRUCT_SIZE(struct xfs_extent_64, 16); XFS_CHECK_STRUCT_SIZE(struct xfs_extent_64, 16);
XFS_CHECK_STRUCT_SIZE(struct xfs_log_dinode, 176); XFS_CHECK_STRUCT_SIZE(struct xfs_log_dinode, 176);
...@@ -132,6 +132,21 @@ xfs_check_ondisk_structs(void) ...@@ -132,6 +132,21 @@ xfs_check_ondisk_structs(void)
XFS_CHECK_STRUCT_SIZE(struct xfs_inode_log_format, 56); XFS_CHECK_STRUCT_SIZE(struct xfs_inode_log_format, 56);
XFS_CHECK_STRUCT_SIZE(struct xfs_qoff_logformat, 20); XFS_CHECK_STRUCT_SIZE(struct xfs_qoff_logformat, 20);
XFS_CHECK_STRUCT_SIZE(struct xfs_trans_header, 16); XFS_CHECK_STRUCT_SIZE(struct xfs_trans_header, 16);
XFS_CHECK_STRUCT_SIZE(struct xfs_bui_log_format, 16);
XFS_CHECK_STRUCT_SIZE(struct xfs_bud_log_format, 16);
XFS_CHECK_STRUCT_SIZE(struct xfs_cui_log_format, 16);
XFS_CHECK_STRUCT_SIZE(struct xfs_cud_log_format, 16);
XFS_CHECK_STRUCT_SIZE(struct xfs_rui_log_format, 16);
XFS_CHECK_STRUCT_SIZE(struct xfs_rud_log_format, 16);
XFS_CHECK_STRUCT_SIZE(struct xfs_map_extent, 32);
XFS_CHECK_STRUCT_SIZE(struct xfs_phys_extent, 16);
XFS_CHECK_OFFSET(struct xfs_bui_log_format, bui_extents, 16);
XFS_CHECK_OFFSET(struct xfs_cui_log_format, cui_extents, 16);
XFS_CHECK_OFFSET(struct xfs_rui_log_format, rui_extents, 16);
XFS_CHECK_OFFSET(struct xfs_efi_log_format, efi_extents, 16);
XFS_CHECK_OFFSET(struct xfs_efi_log_format_32, efi_extents, 16);
XFS_CHECK_OFFSET(struct xfs_efi_log_format_64, efi_extents, 16);
/* /*
* The v5 superblock format extended several v4 header structures with * The v5 superblock format extended several v4 header structures with
......
...@@ -598,28 +598,18 @@ static const struct xfs_item_ops xfs_cui_item_ops = { ...@@ -598,28 +598,18 @@ static const struct xfs_item_ops xfs_cui_item_ops = {
.iop_relog = xfs_cui_item_relog, .iop_relog = xfs_cui_item_relog,
}; };
/* static inline void
* Copy an CUI format buffer from the given buf, and into the destination
* CUI format structure. The CUI/CUD items were designed not to need any
* special alignment handling.
*/
static int
xfs_cui_copy_format( xfs_cui_copy_format(
struct xfs_log_iovec *buf, struct xfs_cui_log_format *dst,
struct xfs_cui_log_format *dst_cui_fmt) const struct xfs_cui_log_format *src)
{ {
struct xfs_cui_log_format *src_cui_fmt; unsigned int i;
uint len;
src_cui_fmt = buf->i_addr; memcpy(dst, src, offsetof(struct xfs_cui_log_format, cui_extents));
len = xfs_cui_log_format_sizeof(src_cui_fmt->cui_nextents);
if (buf->i_len == len) { for (i = 0; i < src->cui_nextents; i++)
memcpy(dst_cui_fmt, src_cui_fmt, len); memcpy(&dst->cui_extents[i], &src->cui_extents[i],
return 0; sizeof(struct xfs_phys_extent));
}
XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL);
return -EFSCORRUPTED;
} }
/* /*
...@@ -636,19 +626,26 @@ xlog_recover_cui_commit_pass2( ...@@ -636,19 +626,26 @@ xlog_recover_cui_commit_pass2(
struct xlog_recover_item *item, struct xlog_recover_item *item,
xfs_lsn_t lsn) xfs_lsn_t lsn)
{ {
int error;
struct xfs_mount *mp = log->l_mp; struct xfs_mount *mp = log->l_mp;
struct xfs_cui_log_item *cuip; struct xfs_cui_log_item *cuip;
struct xfs_cui_log_format *cui_formatp; struct xfs_cui_log_format *cui_formatp;
size_t len;
cui_formatp = item->ri_buf[0].i_addr; cui_formatp = item->ri_buf[0].i_addr;
cuip = xfs_cui_init(mp, cui_formatp->cui_nextents); if (item->ri_buf[0].i_len < xfs_cui_log_format_sizeof(0)) {
error = xfs_cui_copy_format(&item->ri_buf[0], &cuip->cui_format); XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp);
if (error) { return -EFSCORRUPTED;
xfs_cui_item_free(cuip); }
return error;
len = xfs_cui_log_format_sizeof(cui_formatp->cui_nextents);
if (item->ri_buf[0].i_len != len) {
XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp);
return -EFSCORRUPTED;
} }
cuip = xfs_cui_init(mp, cui_formatp->cui_nextents);
xfs_cui_copy_format(&cuip->cui_format, cui_formatp);
atomic_set(&cuip->cui_next_extent, cui_formatp->cui_nextents); atomic_set(&cuip->cui_next_extent, cui_formatp->cui_nextents);
/* /*
* Insert the intent into the AIL directly and drop one reference so * Insert the intent into the AIL directly and drop one reference so
......
...@@ -154,31 +154,6 @@ xfs_rui_init( ...@@ -154,31 +154,6 @@ xfs_rui_init(
return ruip; return ruip;
} }
/*
* Copy an RUI format buffer from the given buf, and into the destination
* RUI format structure. The RUI/RUD items were designed not to need any
* special alignment handling.
*/
STATIC int
xfs_rui_copy_format(
struct xfs_log_iovec *buf,
struct xfs_rui_log_format *dst_rui_fmt)
{
struct xfs_rui_log_format *src_rui_fmt;
uint len;
src_rui_fmt = buf->i_addr;
len = xfs_rui_log_format_sizeof(src_rui_fmt->rui_nextents);
if (buf->i_len != len) {
XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL);
return -EFSCORRUPTED;
}
memcpy(dst_rui_fmt, src_rui_fmt, len);
return 0;
}
static inline struct xfs_rud_log_item *RUD_ITEM(struct xfs_log_item *lip) static inline struct xfs_rud_log_item *RUD_ITEM(struct xfs_log_item *lip)
{ {
return container_of(lip, struct xfs_rud_log_item, rud_item); return container_of(lip, struct xfs_rud_log_item, rud_item);
...@@ -621,6 +596,20 @@ static const struct xfs_item_ops xfs_rui_item_ops = { ...@@ -621,6 +596,20 @@ static const struct xfs_item_ops xfs_rui_item_ops = {
.iop_relog = xfs_rui_item_relog, .iop_relog = xfs_rui_item_relog,
}; };
static inline void
xfs_rui_copy_format(
struct xfs_rui_log_format *dst,
const struct xfs_rui_log_format *src)
{
unsigned int i;
memcpy(dst, src, offsetof(struct xfs_rui_log_format, rui_extents));
for (i = 0; i < src->rui_nextents; i++)
memcpy(&dst->rui_extents[i], &src->rui_extents[i],
sizeof(struct xfs_map_extent));
}
/* /*
* This routine is called to create an in-core extent rmap update * This routine is called to create an in-core extent rmap update
* item from the rui format structure which was logged on disk. * item from the rui format structure which was logged on disk.
...@@ -635,19 +624,26 @@ xlog_recover_rui_commit_pass2( ...@@ -635,19 +624,26 @@ xlog_recover_rui_commit_pass2(
struct xlog_recover_item *item, struct xlog_recover_item *item,
xfs_lsn_t lsn) xfs_lsn_t lsn)
{ {
int error;
struct xfs_mount *mp = log->l_mp; struct xfs_mount *mp = log->l_mp;
struct xfs_rui_log_item *ruip; struct xfs_rui_log_item *ruip;
struct xfs_rui_log_format *rui_formatp; struct xfs_rui_log_format *rui_formatp;
size_t len;
rui_formatp = item->ri_buf[0].i_addr; rui_formatp = item->ri_buf[0].i_addr;
ruip = xfs_rui_init(mp, rui_formatp->rui_nextents); if (item->ri_buf[0].i_len < xfs_rui_log_format_sizeof(0)) {
error = xfs_rui_copy_format(&item->ri_buf[0], &ruip->rui_format); XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp);
if (error) { return -EFSCORRUPTED;
xfs_rui_item_free(ruip);
return error;
} }
len = xfs_rui_log_format_sizeof(rui_formatp->rui_nextents);
if (item->ri_buf[0].i_len != len) {
XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp);
return -EFSCORRUPTED;
}
ruip = xfs_rui_init(mp, rui_formatp->rui_nextents);
xfs_rui_copy_format(&ruip->rui_format, rui_formatp);
atomic_set(&ruip->rui_next_extent, rui_formatp->rui_nextents); atomic_set(&ruip->rui_next_extent, rui_formatp->rui_nextents);
/* /*
* Insert the intent into the AIL directly and drop one reference so * Insert the intent into the AIL directly and drop one reference so
......
...@@ -2023,7 +2023,7 @@ xfs_init_zones(void) ...@@ -2023,7 +2023,7 @@ xfs_init_zones(void)
xfs_efd_zone = kmem_cache_create("xfs_efd_item", xfs_efd_zone = kmem_cache_create("xfs_efd_item",
(sizeof(struct xfs_efd_log_item) + (sizeof(struct xfs_efd_log_item) +
(XFS_EFD_MAX_FAST_EXTENTS - 1) * XFS_EFD_MAX_FAST_EXTENTS *
sizeof(struct xfs_extent)), sizeof(struct xfs_extent)),
0, 0, NULL); 0, 0, NULL);
if (!xfs_efd_zone) if (!xfs_efd_zone)
...@@ -2031,7 +2031,7 @@ xfs_init_zones(void) ...@@ -2031,7 +2031,7 @@ xfs_init_zones(void)
xfs_efi_zone = kmem_cache_create("xfs_efi_item", xfs_efi_zone = kmem_cache_create("xfs_efi_item",
(sizeof(struct xfs_efi_log_item) + (sizeof(struct xfs_efi_log_item) +
(XFS_EFI_MAX_FAST_EXTENTS - 1) * XFS_EFI_MAX_FAST_EXTENTS *
sizeof(struct xfs_extent)), sizeof(struct xfs_extent)),
0, 0, NULL); 0, 0, NULL);
if (!xfs_efi_zone) if (!xfs_efi_zone)
......
...@@ -3962,7 +3962,7 @@ DECLARE_EVENT_CLASS(xfs_icwalk_class, ...@@ -3962,7 +3962,7 @@ DECLARE_EVENT_CLASS(xfs_icwalk_class,
__field(uint32_t, gid) __field(uint32_t, gid)
__field(prid_t, prid) __field(prid_t, prid)
__field(__u64, min_file_size) __field(__u64, min_file_size)
__field(int, scan_limit) __field(long, scan_limit)
__field(unsigned long, caller_ip) __field(unsigned long, caller_ip)
), ),
TP_fast_assign( TP_fast_assign(
...@@ -3977,7 +3977,7 @@ DECLARE_EVENT_CLASS(xfs_icwalk_class, ...@@ -3977,7 +3977,7 @@ DECLARE_EVENT_CLASS(xfs_icwalk_class,
__entry->scan_limit = icw ? icw->icw_scan_limit : 0; __entry->scan_limit = icw ? icw->icw_scan_limit : 0;
__entry->caller_ip = caller_ip; __entry->caller_ip = caller_ip;
), ),
TP_printk("dev %d:%d flags 0x%x uid %u gid %u prid %u minsize %llu scan_limit %d caller %pS", TP_printk("dev %d:%d flags 0x%x uid %u gid %u prid %u minsize %llu scan_limit %ld caller %pS",
MAJOR(__entry->dev), MINOR(__entry->dev), MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->flags, __entry->flags,
__entry->uid, __entry->uid,
......
...@@ -608,19 +608,12 @@ xfs_trans_unreserve_and_mod_sb( ...@@ -608,19 +608,12 @@ xfs_trans_unreserve_and_mod_sb(
ASSERT(!error); ASSERT(!error);
} }
if (idelta) { if (idelta)
percpu_counter_add_batch(&mp->m_icount, idelta, percpu_counter_add_batch(&mp->m_icount, idelta,
XFS_ICOUNT_BATCH); XFS_ICOUNT_BATCH);
if (idelta < 0)
ASSERT(__percpu_counter_compare(&mp->m_icount, 0,
XFS_ICOUNT_BATCH) >= 0);
}
if (ifreedelta) { if (ifreedelta)
percpu_counter_add(&mp->m_ifree, ifreedelta); percpu_counter_add(&mp->m_ifree, ifreedelta);
if (ifreedelta < 0)
ASSERT(percpu_counter_compare(&mp->m_ifree, 0) >= 0);
}
if (rtxdelta == 0 && !(tp->t_flags & XFS_TRANS_SB_DIRTY)) if (rtxdelta == 0 && !(tp->t_flags & XFS_TRANS_SB_DIRTY))
return; return;
......
...@@ -205,7 +205,7 @@ static inline enum nft_registers nft_type_to_reg(enum nft_data_types type) ...@@ -205,7 +205,7 @@ static inline enum nft_registers nft_type_to_reg(enum nft_data_types type)
} }
int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest); int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest);
unsigned int nft_parse_register(const struct nlattr *attr); int nft_parse_register(const struct nlattr *attr, u32 *preg);
int nft_dump_register(struct sk_buff *skb, unsigned int attr, unsigned int reg); int nft_dump_register(struct sk_buff *skb, unsigned int attr, unsigned int reg);
int nft_parse_register_load(const struct nlattr *attr, u8 *sreg, u32 len); int nft_parse_register_load(const struct nlattr *attr, u8 *sreg, u32 len);
......
...@@ -8481,17 +8481,23 @@ EXPORT_SYMBOL_GPL(nft_parse_u32_check); ...@@ -8481,17 +8481,23 @@ EXPORT_SYMBOL_GPL(nft_parse_u32_check);
* Registers used to be 128 bit wide, these register numbers will be * Registers used to be 128 bit wide, these register numbers will be
* mapped to the corresponding 32 bit register numbers. * mapped to the corresponding 32 bit register numbers.
*/ */
unsigned int nft_parse_register(const struct nlattr *attr) int nft_parse_register(const struct nlattr *attr, u32 *preg)
{ {
unsigned int reg; unsigned int reg;
reg = ntohl(nla_get_be32(attr)); reg = ntohl(nla_get_be32(attr));
switch (reg) { switch (reg) {
case NFT_REG_VERDICT...NFT_REG_4: case NFT_REG_VERDICT...NFT_REG_4:
return reg * NFT_REG_SIZE / NFT_REG32_SIZE; *preg = reg * NFT_REG_SIZE / NFT_REG32_SIZE;
break;
case NFT_REG32_00...NFT_REG32_15:
*preg = reg + NFT_REG_SIZE / NFT_REG32_SIZE - NFT_REG32_00;
break;
default: default:
return reg + NFT_REG_SIZE / NFT_REG32_SIZE - NFT_REG32_00; return -ERANGE;
} }
return 0;
} }
EXPORT_SYMBOL_GPL(nft_parse_register); EXPORT_SYMBOL_GPL(nft_parse_register);
...@@ -8543,7 +8549,10 @@ int nft_parse_register_load(const struct nlattr *attr, u8 *sreg, u32 len) ...@@ -8543,7 +8549,10 @@ int nft_parse_register_load(const struct nlattr *attr, u8 *sreg, u32 len)
u32 reg; u32 reg;
int err; int err;
reg = nft_parse_register(attr); err = nft_parse_register(attr, &reg);
if (err < 0)
return err;
err = nft_validate_register_load(reg, len); err = nft_validate_register_load(reg, len);
if (err < 0) if (err < 0)
return err; return err;
...@@ -8612,7 +8621,10 @@ int nft_parse_register_store(const struct nft_ctx *ctx, ...@@ -8612,7 +8621,10 @@ int nft_parse_register_store(const struct nft_ctx *ctx,
int err; int err;
u32 reg; u32 reg;
reg = nft_parse_register(attr); err = nft_parse_register(attr, &reg);
if (err < 0)
return err;
err = nft_validate_register_store(ctx, reg, data, type, len); err = nft_validate_register_store(ctx, reg, data, type, len);
if (err < 0) if (err < 0)
return err; return err;
......
...@@ -503,17 +503,6 @@ config NET_CLS_BASIC ...@@ -503,17 +503,6 @@ config NET_CLS_BASIC
To compile this code as a module, choose M here: the To compile this code as a module, choose M here: the
module will be called cls_basic. module will be called cls_basic.
config NET_CLS_TCINDEX
tristate "Traffic-Control Index (TCINDEX)"
select NET_CLS
help
Say Y here if you want to be able to classify packets based on
traffic control indices. You will want this feature if you want
to implement Differentiated Services together with DSMARK.
To compile this code as a module, choose M here: the
module will be called cls_tcindex.
config NET_CLS_ROUTE4 config NET_CLS_ROUTE4
tristate "Routing decision (ROUTE)" tristate "Routing decision (ROUTE)"
depends on INET depends on INET
......
...@@ -69,7 +69,6 @@ obj-$(CONFIG_NET_CLS_U32) += cls_u32.o ...@@ -69,7 +69,6 @@ obj-$(CONFIG_NET_CLS_U32) += cls_u32.o
obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o
obj-$(CONFIG_NET_CLS_FW) += cls_fw.o obj-$(CONFIG_NET_CLS_FW) += cls_fw.o
obj-$(CONFIG_NET_CLS_RSVP) += cls_rsvp.o obj-$(CONFIG_NET_CLS_RSVP) += cls_rsvp.o
obj-$(CONFIG_NET_CLS_TCINDEX) += cls_tcindex.o
obj-$(CONFIG_NET_CLS_RSVP6) += cls_rsvp6.o obj-$(CONFIG_NET_CLS_RSVP6) += cls_rsvp6.o
obj-$(CONFIG_NET_CLS_BASIC) += cls_basic.o obj-$(CONFIG_NET_CLS_BASIC) += cls_basic.o
obj-$(CONFIG_NET_CLS_FLOW) += cls_flow.o obj-$(CONFIG_NET_CLS_FLOW) += cls_flow.o
......
// SPDX-License-Identifier: GPL-2.0-only
/*
* net/sched/cls_tcindex.c Packet classifier for skb->tc_index
*
* Written 1998,1999 by Werner Almesberger, EPFL ICA
*/
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/skbuff.h>
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/refcount.h>
#include <linux/rcupdate.h>
#include <net/act_api.h>
#include <net/netlink.h>
#include <net/pkt_cls.h>
#include <net/sch_generic.h>
/*
* Passing parameters to the root seems to be done more awkwardly than really
* necessary. At least, u32 doesn't seem to use such dirty hacks. To be
* verified. FIXME.
*/
#define PERFECT_HASH_THRESHOLD 64 /* use perfect hash if not bigger */
#define DEFAULT_HASH_SIZE 64 /* optimized for diffserv */
struct tcindex_data;
struct tcindex_filter_result {
struct tcf_exts exts;
struct tcf_result res;
struct tcindex_data *p;
struct rcu_work rwork;
};
struct tcindex_filter {
u16 key;
struct tcindex_filter_result result;
struct tcindex_filter __rcu *next;
struct rcu_work rwork;
};
struct tcindex_data {
struct tcindex_filter_result *perfect; /* perfect hash; NULL if none */
struct tcindex_filter __rcu **h; /* imperfect hash; */
struct tcf_proto *tp;
u16 mask; /* AND key with mask */
u32 shift; /* shift ANDed key to the right */
u32 hash; /* hash table size; 0 if undefined */
u32 alloc_hash; /* allocated size */
u32 fall_through; /* 0: only classify if explicit match */
refcount_t refcnt; /* a temporary refcnt for perfect hash */
struct rcu_work rwork;
};
static inline int tcindex_filter_is_set(struct tcindex_filter_result *r)
{
return tcf_exts_has_actions(&r->exts) || r->res.classid;
}
static void tcindex_data_get(struct tcindex_data *p)
{
refcount_inc(&p->refcnt);
}
static void tcindex_data_put(struct tcindex_data *p)
{
if (refcount_dec_and_test(&p->refcnt)) {
kfree(p->perfect);
kfree(p->h);
kfree(p);
}
}
static struct tcindex_filter_result *tcindex_lookup(struct tcindex_data *p,
u16 key)
{
if (p->perfect) {
struct tcindex_filter_result *f = p->perfect + key;
return tcindex_filter_is_set(f) ? f : NULL;
} else if (p->h) {
struct tcindex_filter __rcu **fp;
struct tcindex_filter *f;
fp = &p->h[key % p->hash];
for (f = rcu_dereference_bh_rtnl(*fp);
f;
fp = &f->next, f = rcu_dereference_bh_rtnl(*fp))
if (f->key == key)
return &f->result;
}
return NULL;
}
static int tcindex_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res)
{
struct tcindex_data *p = rcu_dereference_bh(tp->root);
struct tcindex_filter_result *f;
int key = (skb->tc_index & p->mask) >> p->shift;
pr_debug("tcindex_classify(skb %p,tp %p,res %p),p %p\n",
skb, tp, res, p);
f = tcindex_lookup(p, key);
if (!f) {
struct Qdisc *q = tcf_block_q(tp->chain->block);
if (!p->fall_through)
return -1;
res->classid = TC_H_MAKE(TC_H_MAJ(q->handle), key);
res->class = 0;
pr_debug("alg 0x%x\n", res->classid);
return 0;
}
*res = f->res;
pr_debug("map 0x%x\n", res->classid);
return tcf_exts_exec(skb, &f->exts, res);
}
static void *tcindex_get(struct tcf_proto *tp, u32 handle)
{
struct tcindex_data *p = rtnl_dereference(tp->root);
struct tcindex_filter_result *r;
pr_debug("tcindex_get(tp %p,handle 0x%08x)\n", tp, handle);
if (p->perfect && handle >= p->alloc_hash)
return NULL;
r = tcindex_lookup(p, handle);
return r && tcindex_filter_is_set(r) ? r : NULL;
}
static int tcindex_init(struct tcf_proto *tp)
{
struct tcindex_data *p;
pr_debug("tcindex_init(tp %p)\n", tp);
p = kzalloc(sizeof(struct tcindex_data), GFP_KERNEL);
if (!p)
return -ENOMEM;
p->mask = 0xffff;
p->hash = DEFAULT_HASH_SIZE;
p->fall_through = 1;
refcount_set(&p->refcnt, 1); /* Paired with tcindex_destroy_work() */
rcu_assign_pointer(tp->root, p);
return 0;
}
static void __tcindex_destroy_rexts(struct tcindex_filter_result *r)
{
tcf_exts_destroy(&r->exts);
tcf_exts_put_net(&r->exts);
tcindex_data_put(r->p);
}
static void tcindex_destroy_rexts_work(struct work_struct *work)
{
struct tcindex_filter_result *r;
r = container_of(to_rcu_work(work),
struct tcindex_filter_result,
rwork);
rtnl_lock();
__tcindex_destroy_rexts(r);
rtnl_unlock();
}
static void __tcindex_destroy_fexts(struct tcindex_filter *f)
{
tcf_exts_destroy(&f->result.exts);
tcf_exts_put_net(&f->result.exts);
kfree(f);
}
static void tcindex_destroy_fexts_work(struct work_struct *work)
{
struct tcindex_filter *f = container_of(to_rcu_work(work),
struct tcindex_filter,
rwork);
rtnl_lock();
__tcindex_destroy_fexts(f);
rtnl_unlock();
}
static int tcindex_delete(struct tcf_proto *tp, void *arg, bool *last,
bool rtnl_held, struct netlink_ext_ack *extack)
{
struct tcindex_data *p = rtnl_dereference(tp->root);
struct tcindex_filter_result *r = arg;
struct tcindex_filter __rcu **walk;
struct tcindex_filter *f = NULL;
pr_debug("tcindex_delete(tp %p,arg %p),p %p\n", tp, arg, p);
if (p->perfect) {
if (!r->res.class)
return -ENOENT;
} else {
int i;
for (i = 0; i < p->hash; i++) {
walk = p->h + i;
for (f = rtnl_dereference(*walk); f;
walk = &f->next, f = rtnl_dereference(*walk)) {
if (&f->result == r)
goto found;
}
}
return -ENOENT;
found:
rcu_assign_pointer(*walk, rtnl_dereference(f->next));
}
tcf_unbind_filter(tp, &r->res);
/* all classifiers are required to call tcf_exts_destroy() after rcu
* grace period, since converted-to-rcu actions are relying on that
* in cleanup() callback
*/
if (f) {
if (tcf_exts_get_net(&f->result.exts))
tcf_queue_work(&f->rwork, tcindex_destroy_fexts_work);
else
__tcindex_destroy_fexts(f);
} else {
tcindex_data_get(p);
if (tcf_exts_get_net(&r->exts))
tcf_queue_work(&r->rwork, tcindex_destroy_rexts_work);
else
__tcindex_destroy_rexts(r);
}
*last = false;
return 0;
}
static void tcindex_destroy_work(struct work_struct *work)
{
struct tcindex_data *p = container_of(to_rcu_work(work),
struct tcindex_data,
rwork);
tcindex_data_put(p);
}
static inline int
valid_perfect_hash(struct tcindex_data *p)
{
return p->hash > (p->mask >> p->shift);
}
static const struct nla_policy tcindex_policy[TCA_TCINDEX_MAX + 1] = {
[TCA_TCINDEX_HASH] = { .type = NLA_U32 },
[TCA_TCINDEX_MASK] = { .type = NLA_U16 },
[TCA_TCINDEX_SHIFT] = { .type = NLA_U32 },
[TCA_TCINDEX_FALL_THROUGH] = { .type = NLA_U32 },
[TCA_TCINDEX_CLASSID] = { .type = NLA_U32 },
};
static int tcindex_filter_result_init(struct tcindex_filter_result *r,
struct tcindex_data *p,
struct net *net)
{
memset(r, 0, sizeof(*r));
r->p = p;
return tcf_exts_init(&r->exts, net, TCA_TCINDEX_ACT,
TCA_TCINDEX_POLICE);
}
static void tcindex_free_perfect_hash(struct tcindex_data *cp);
static void tcindex_partial_destroy_work(struct work_struct *work)
{
struct tcindex_data *p = container_of(to_rcu_work(work),
struct tcindex_data,
rwork);
rtnl_lock();
if (p->perfect)
tcindex_free_perfect_hash(p);
kfree(p);
rtnl_unlock();
}
static void tcindex_free_perfect_hash(struct tcindex_data *cp)
{
int i;
for (i = 0; i < cp->hash; i++)
tcf_exts_destroy(&cp->perfect[i].exts);
kfree(cp->perfect);
}
static int tcindex_alloc_perfect_hash(struct net *net, struct tcindex_data *cp)
{
int i, err = 0;
cp->perfect = kcalloc(cp->hash, sizeof(struct tcindex_filter_result),
GFP_KERNEL | __GFP_NOWARN);
if (!cp->perfect)
return -ENOMEM;
for (i = 0; i < cp->hash; i++) {
err = tcf_exts_init(&cp->perfect[i].exts, net,
TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
if (err < 0)
goto errout;
cp->perfect[i].p = cp;
}
return 0;
errout:
tcindex_free_perfect_hash(cp);
return err;
}
static int
tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
u32 handle, struct tcindex_data *p,
struct tcindex_filter_result *r, struct nlattr **tb,
struct nlattr *est, bool ovr, struct netlink_ext_ack *extack)
{
struct tcindex_filter_result new_filter_result, *old_r = r;
struct tcindex_data *cp = NULL, *oldp;
struct tcindex_filter *f = NULL; /* make gcc behave */
struct tcf_result cr = {};
int err, balloc = 0;
struct tcf_exts e;
bool update_h = false;
err = tcf_exts_init(&e, net, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
if (err < 0)
return err;
err = tcf_exts_validate(net, tp, tb, est, &e, ovr, true, extack);
if (err < 0)
goto errout;
err = -ENOMEM;
/* tcindex_data attributes must look atomic to classifier/lookup so
* allocate new tcindex data and RCU assign it onto root. Keeping
* perfect hash and hash pointers from old data.
*/
cp = kzalloc(sizeof(*cp), GFP_KERNEL);
if (!cp)
goto errout;
cp->mask = p->mask;
cp->shift = p->shift;
cp->hash = p->hash;
cp->alloc_hash = p->alloc_hash;
cp->fall_through = p->fall_through;
cp->tp = tp;
refcount_set(&cp->refcnt, 1); /* Paired with tcindex_destroy_work() */
if (tb[TCA_TCINDEX_HASH])
cp->hash = nla_get_u32(tb[TCA_TCINDEX_HASH]);
if (tb[TCA_TCINDEX_MASK])
cp->mask = nla_get_u16(tb[TCA_TCINDEX_MASK]);
if (tb[TCA_TCINDEX_SHIFT]) {
cp->shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]);
if (cp->shift > 16) {
err = -EINVAL;
goto errout;
}
}
if (!cp->hash) {
/* Hash not specified, use perfect hash if the upper limit
* of the hashing index is below the threshold.
*/
if ((cp->mask >> cp->shift) < PERFECT_HASH_THRESHOLD)
cp->hash = (cp->mask >> cp->shift) + 1;
else
cp->hash = DEFAULT_HASH_SIZE;
}
if (p->perfect) {
int i;
if (tcindex_alloc_perfect_hash(net, cp) < 0)
goto errout;
cp->alloc_hash = cp->hash;
for (i = 0; i < min(cp->hash, p->hash); i++)
cp->perfect[i].res = p->perfect[i].res;
balloc = 1;
}
cp->h = p->h;
err = tcindex_filter_result_init(&new_filter_result, cp, net);
if (err < 0)
goto errout_alloc;
if (old_r)
cr = r->res;
err = -EBUSY;
/* Hash already allocated, make sure that we still meet the
* requirements for the allocated hash.
*/
if (cp->perfect) {
if (!valid_perfect_hash(cp) ||
cp->hash > cp->alloc_hash)
goto errout_alloc;
} else if (cp->h && cp->hash != cp->alloc_hash) {
goto errout_alloc;
}
err = -EINVAL;
if (tb[TCA_TCINDEX_FALL_THROUGH])
cp->fall_through = nla_get_u32(tb[TCA_TCINDEX_FALL_THROUGH]);
if (!cp->perfect && !cp->h)
cp->alloc_hash = cp->hash;
/* Note: this could be as restrictive as if (handle & ~(mask >> shift))
* but then, we'd fail handles that may become valid after some future
* mask change. While this is extremely unlikely to ever matter,
* the check below is safer (and also more backwards-compatible).
*/
if (cp->perfect || valid_perfect_hash(cp))
if (handle >= cp->alloc_hash)
goto errout_alloc;
err = -ENOMEM;
if (!cp->perfect && !cp->h) {
if (valid_perfect_hash(cp)) {
if (tcindex_alloc_perfect_hash(net, cp) < 0)
goto errout_alloc;
balloc = 1;
} else {
struct tcindex_filter __rcu **hash;
hash = kcalloc(cp->hash,
sizeof(struct tcindex_filter *),
GFP_KERNEL);
if (!hash)
goto errout_alloc;
cp->h = hash;
balloc = 2;
}
}
if (cp->perfect) {
r = cp->perfect + handle;
} else {
/* imperfect area is updated in-place using rcu */
update_h = !!tcindex_lookup(cp, handle);
r = &new_filter_result;
}
if (r == &new_filter_result) {
f = kzalloc(sizeof(*f), GFP_KERNEL);
if (!f)
goto errout_alloc;
f->key = handle;
f->next = NULL;
err = tcindex_filter_result_init(&f->result, cp, net);
if (err < 0) {
kfree(f);
goto errout_alloc;
}
}
if (tb[TCA_TCINDEX_CLASSID]) {
cr.classid = nla_get_u32(tb[TCA_TCINDEX_CLASSID]);
tcf_bind_filter(tp, &cr, base);
}
if (old_r && old_r != r) {
err = tcindex_filter_result_init(old_r, cp, net);
if (err < 0) {
kfree(f);
goto errout_alloc;
}
}
oldp = p;
r->res = cr;
tcf_exts_change(&r->exts, &e);
rcu_assign_pointer(tp->root, cp);
if (update_h) {
struct tcindex_filter __rcu **fp;
struct tcindex_filter *cf;
f->result.res = r->res;
tcf_exts_change(&f->result.exts, &r->exts);
/* imperfect area bucket */
fp = cp->h + (handle % cp->hash);
/* lookup the filter, guaranteed to exist */
for (cf = rcu_dereference_bh_rtnl(*fp); cf;
fp = &cf->next, cf = rcu_dereference_bh_rtnl(*fp))
if (cf->key == (u16)handle)
break;
f->next = cf->next;
cf = rcu_replace_pointer(*fp, f, 1);
tcf_exts_get_net(&cf->result.exts);
tcf_queue_work(&cf->rwork, tcindex_destroy_fexts_work);
} else if (r == &new_filter_result) {
struct tcindex_filter *nfp;
struct tcindex_filter __rcu **fp;
f->result.res = r->res;
tcf_exts_change(&f->result.exts, &r->exts);
fp = cp->h + (handle % cp->hash);
for (nfp = rtnl_dereference(*fp);
nfp;
fp = &nfp->next, nfp = rtnl_dereference(*fp))
; /* nothing */
rcu_assign_pointer(*fp, f);
} else {
tcf_exts_destroy(&new_filter_result.exts);
}
if (oldp)
tcf_queue_work(&oldp->rwork, tcindex_partial_destroy_work);
return 0;
errout_alloc:
if (balloc == 1)
tcindex_free_perfect_hash(cp);
else if (balloc == 2)
kfree(cp->h);
tcf_exts_destroy(&new_filter_result.exts);
errout:
kfree(cp);
tcf_exts_destroy(&e);
return err;
}
static int
tcindex_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base, u32 handle,
struct nlattr **tca, void **arg, bool ovr,
bool rtnl_held, struct netlink_ext_ack *extack)
{
struct nlattr *opt = tca[TCA_OPTIONS];
struct nlattr *tb[TCA_TCINDEX_MAX + 1];
struct tcindex_data *p = rtnl_dereference(tp->root);
struct tcindex_filter_result *r = *arg;
int err;
pr_debug("tcindex_change(tp %p,handle 0x%08x,tca %p,arg %p),opt %p,"
"p %p,r %p,*arg %p\n",
tp, handle, tca, arg, opt, p, r, *arg);
if (!opt)
return 0;
err = nla_parse_nested_deprecated(tb, TCA_TCINDEX_MAX, opt,
tcindex_policy, NULL);
if (err < 0)
return err;
return tcindex_set_parms(net, tp, base, handle, p, r, tb,
tca[TCA_RATE], ovr, extack);
}
static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker,
bool rtnl_held)
{
struct tcindex_data *p = rtnl_dereference(tp->root);
struct tcindex_filter *f, *next;
int i;
pr_debug("tcindex_walk(tp %p,walker %p),p %p\n", tp, walker, p);
if (p->perfect) {
for (i = 0; i < p->hash; i++) {
if (!p->perfect[i].res.class)
continue;
if (walker->count >= walker->skip) {
if (walker->fn(tp, p->perfect + i, walker) < 0) {
walker->stop = 1;
return;
}
}
walker->count++;
}
}
if (!p->h)
return;
for (i = 0; i < p->hash; i++) {
for (f = rtnl_dereference(p->h[i]); f; f = next) {
next = rtnl_dereference(f->next);
if (walker->count >= walker->skip) {
if (walker->fn(tp, &f->result, walker) < 0) {
walker->stop = 1;
return;
}
}
walker->count++;
}
}
}
static void tcindex_destroy(struct tcf_proto *tp, bool rtnl_held,
struct netlink_ext_ack *extack)
{
struct tcindex_data *p = rtnl_dereference(tp->root);
int i;
pr_debug("tcindex_destroy(tp %p),p %p\n", tp, p);
if (p->perfect) {
for (i = 0; i < p->hash; i++) {
struct tcindex_filter_result *r = p->perfect + i;
/* tcf_queue_work() does not guarantee the ordering we
* want, so we have to take this refcnt temporarily to
* ensure 'p' is freed after all tcindex_filter_result
* here. Imperfect hash does not need this, because it
* uses linked lists rather than an array.
*/
tcindex_data_get(p);
tcf_unbind_filter(tp, &r->res);
if (tcf_exts_get_net(&r->exts))
tcf_queue_work(&r->rwork,
tcindex_destroy_rexts_work);
else
__tcindex_destroy_rexts(r);
}
}
for (i = 0; p->h && i < p->hash; i++) {
struct tcindex_filter *f, *next;
bool last;
for (f = rtnl_dereference(p->h[i]); f; f = next) {
next = rtnl_dereference(f->next);
tcindex_delete(tp, &f->result, &last, rtnl_held, NULL);
}
}
tcf_queue_work(&p->rwork, tcindex_destroy_work);
}
static int tcindex_dump(struct net *net, struct tcf_proto *tp, void *fh,
struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
{
struct tcindex_data *p = rtnl_dereference(tp->root);
struct tcindex_filter_result *r = fh;
struct nlattr *nest;
pr_debug("tcindex_dump(tp %p,fh %p,skb %p,t %p),p %p,r %p\n",
tp, fh, skb, t, p, r);
pr_debug("p->perfect %p p->h %p\n", p->perfect, p->h);
nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
if (nest == NULL)
goto nla_put_failure;
if (!fh) {
t->tcm_handle = ~0; /* whatever ... */
if (nla_put_u32(skb, TCA_TCINDEX_HASH, p->hash) ||
nla_put_u16(skb, TCA_TCINDEX_MASK, p->mask) ||
nla_put_u32(skb, TCA_TCINDEX_SHIFT, p->shift) ||
nla_put_u32(skb, TCA_TCINDEX_FALL_THROUGH, p->fall_through))
goto nla_put_failure;
nla_nest_end(skb, nest);
} else {
if (p->perfect) {
t->tcm_handle = r - p->perfect;
} else {
struct tcindex_filter *f;
struct tcindex_filter __rcu **fp;
int i;
t->tcm_handle = 0;
for (i = 0; !t->tcm_handle && i < p->hash; i++) {
fp = &p->h[i];
for (f = rtnl_dereference(*fp);
!t->tcm_handle && f;
fp = &f->next, f = rtnl_dereference(*fp)) {
if (&f->result == r)
t->tcm_handle = f->key;
}
}
}
pr_debug("handle = %d\n", t->tcm_handle);
if (r->res.class &&
nla_put_u32(skb, TCA_TCINDEX_CLASSID, r->res.classid))
goto nla_put_failure;
if (tcf_exts_dump(skb, &r->exts) < 0)
goto nla_put_failure;
nla_nest_end(skb, nest);
if (tcf_exts_dump_stats(skb, &r->exts) < 0)
goto nla_put_failure;
}
return skb->len;
nla_put_failure:
nla_nest_cancel(skb, nest);
return -1;
}
static void tcindex_bind_class(void *fh, u32 classid, unsigned long cl,
void *q, unsigned long base)
{
struct tcindex_filter_result *r = fh;
if (r && r->res.classid == classid) {
if (cl)
__tcf_bind_filter(q, &r->res, base);
else
__tcf_unbind_filter(q, &r->res);
}
}
static struct tcf_proto_ops cls_tcindex_ops __read_mostly = {
.kind = "tcindex",
.classify = tcindex_classify,
.init = tcindex_init,
.destroy = tcindex_destroy,
.get = tcindex_get,
.change = tcindex_change,
.delete = tcindex_delete,
.walk = tcindex_walk,
.dump = tcindex_dump,
.bind_class = tcindex_bind_class,
.owner = THIS_MODULE,
};
static int __init init_tcindex(void)
{
return register_tcf_proto_ops(&cls_tcindex_ops);
}
static void __exit exit_tcindex(void)
{
unregister_tcf_proto_ops(&cls_tcindex_ops);
}
module_init(init_tcindex)
module_exit(exit_tcindex)
MODULE_LICENSE("GPL");
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册