提交 b1aa812b 编写于 作者: J Jan Kara 提交者: Linus Torvalds

mm: move handling of COW faults into DAX code

Move final handling of COW faults from generic code into DAX fault
handler.  That way generic code doesn't have to be aware of
peculiarities of DAX locking so remove that knowledge and make locking
functions private to fs/dax.c.

Link: http://lkml.kernel.org/r/1479460644-25076-11-git-send-email-jack@suse.czSigned-off-by: NJan Kara <jack@suse.cz>
Acked-by: NKirill A. Shutemov <kirill.shutemov@linux.intel.com>
Reviewed-by: NRoss Zwisler <ross.zwisler@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
上级 9118c0cb
...@@ -240,6 +240,23 @@ static void *get_unlocked_mapping_entry(struct address_space *mapping, ...@@ -240,6 +240,23 @@ static void *get_unlocked_mapping_entry(struct address_space *mapping,
} }
} }
static void dax_unlock_mapping_entry(struct address_space *mapping,
pgoff_t index)
{
void *entry, **slot;
spin_lock_irq(&mapping->tree_lock);
entry = __radix_tree_lookup(&mapping->page_tree, index, NULL, &slot);
if (WARN_ON_ONCE(!entry || !radix_tree_exceptional_entry(entry) ||
!slot_locked(mapping, slot))) {
spin_unlock_irq(&mapping->tree_lock);
return;
}
unlock_slot(mapping, slot);
spin_unlock_irq(&mapping->tree_lock);
dax_wake_mapping_entry_waiter(mapping, index, entry, false);
}
static void put_locked_mapping_entry(struct address_space *mapping, static void put_locked_mapping_entry(struct address_space *mapping,
pgoff_t index, void *entry) pgoff_t index, void *entry)
{ {
...@@ -433,22 +450,6 @@ void dax_wake_mapping_entry_waiter(struct address_space *mapping, ...@@ -433,22 +450,6 @@ void dax_wake_mapping_entry_waiter(struct address_space *mapping,
__wake_up(wq, TASK_NORMAL, wake_all ? 0 : 1, &key); __wake_up(wq, TASK_NORMAL, wake_all ? 0 : 1, &key);
} }
void dax_unlock_mapping_entry(struct address_space *mapping, pgoff_t index)
{
void *entry, **slot;
spin_lock_irq(&mapping->tree_lock);
entry = __radix_tree_lookup(&mapping->page_tree, index, NULL, &slot);
if (WARN_ON_ONCE(!entry || !radix_tree_exceptional_entry(entry) ||
!slot_locked(mapping, slot))) {
spin_unlock_irq(&mapping->tree_lock);
return;
}
unlock_slot(mapping, slot);
spin_unlock_irq(&mapping->tree_lock);
dax_wake_mapping_entry_waiter(mapping, index, entry, false);
}
/* /*
* Delete exceptional DAX entry at @index from @mapping. Wait for radix tree * Delete exceptional DAX entry at @index from @mapping. Wait for radix tree
* entry to get unlocked before deleting it. * entry to get unlocked before deleting it.
...@@ -500,10 +501,8 @@ static int dax_load_hole(struct address_space *mapping, void *entry, ...@@ -500,10 +501,8 @@ static int dax_load_hole(struct address_space *mapping, void *entry,
/* This will replace locked radix tree entry with a hole page */ /* This will replace locked radix tree entry with a hole page */
page = find_or_create_page(mapping, vmf->pgoff, page = find_or_create_page(mapping, vmf->pgoff,
vmf->gfp_mask | __GFP_ZERO); vmf->gfp_mask | __GFP_ZERO);
if (!page) { if (!page)
put_locked_mapping_entry(mapping, vmf->pgoff, entry);
return VM_FAULT_OOM; return VM_FAULT_OOM;
}
vmf->page = page; vmf->page = page;
return VM_FAULT_LOCKED; return VM_FAULT_LOCKED;
} }
...@@ -954,7 +953,7 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf, ...@@ -954,7 +953,7 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
struct iomap iomap = { 0 }; struct iomap iomap = { 0 };
unsigned flags = IOMAP_FAULT; unsigned flags = IOMAP_FAULT;
int error, major = 0; int error, major = 0;
int locked_status = 0; int vmf_ret = 0;
void *entry; void *entry;
/* /*
...@@ -1007,13 +1006,11 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf, ...@@ -1007,13 +1006,11 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
if (error) if (error)
goto finish_iomap; goto finish_iomap;
if (!radix_tree_exceptional_entry(entry)) {
vmf->page = entry; __SetPageUptodate(vmf->cow_page);
locked_status = VM_FAULT_LOCKED; vmf_ret = finish_fault(vmf);
} else { if (!vmf_ret)
vmf->entry = entry; vmf_ret = VM_FAULT_DONE_COW;
locked_status = VM_FAULT_DAX_LOCKED;
}
goto finish_iomap; goto finish_iomap;
} }
...@@ -1030,7 +1027,7 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf, ...@@ -1030,7 +1027,7 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
case IOMAP_UNWRITTEN: case IOMAP_UNWRITTEN:
case IOMAP_HOLE: case IOMAP_HOLE:
if (!(vmf->flags & FAULT_FLAG_WRITE)) { if (!(vmf->flags & FAULT_FLAG_WRITE)) {
locked_status = dax_load_hole(mapping, entry, vmf); vmf_ret = dax_load_hole(mapping, entry, vmf);
break; break;
} }
/*FALLTHRU*/ /*FALLTHRU*/
...@@ -1042,7 +1039,7 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf, ...@@ -1042,7 +1039,7 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
finish_iomap: finish_iomap:
if (ops->iomap_end) { if (ops->iomap_end) {
if (error) { if (error || (vmf_ret & VM_FAULT_ERROR)) {
/* keep previous error */ /* keep previous error */
ops->iomap_end(inode, pos, PAGE_SIZE, 0, flags, ops->iomap_end(inode, pos, PAGE_SIZE, 0, flags,
&iomap); &iomap);
...@@ -1052,7 +1049,7 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf, ...@@ -1052,7 +1049,7 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
} }
} }
unlock_entry: unlock_entry:
if (!locked_status || error) if (vmf_ret != VM_FAULT_LOCKED || error)
put_locked_mapping_entry(mapping, vmf->pgoff, entry); put_locked_mapping_entry(mapping, vmf->pgoff, entry);
out: out:
if (error == -ENOMEM) if (error == -ENOMEM)
...@@ -1060,9 +1057,9 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf, ...@@ -1060,9 +1057,9 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
/* -EBUSY is fine, somebody else faulted on the same PTE */ /* -EBUSY is fine, somebody else faulted on the same PTE */
if (error < 0 && error != -EBUSY) if (error < 0 && error != -EBUSY)
return VM_FAULT_SIGBUS | major; return VM_FAULT_SIGBUS | major;
if (locked_status) { if (vmf_ret) {
WARN_ON_ONCE(error); /* -EBUSY from ops->iomap_end? */ WARN_ON_ONCE(error); /* -EBUSY from ops->iomap_end? */
return locked_status; return vmf_ret;
} }
return VM_FAULT_NOPAGE | major; return VM_FAULT_NOPAGE | major;
} }
......
...@@ -46,7 +46,6 @@ void dax_wake_mapping_entry_waiter(struct address_space *mapping, ...@@ -46,7 +46,6 @@ void dax_wake_mapping_entry_waiter(struct address_space *mapping,
#ifdef CONFIG_FS_DAX #ifdef CONFIG_FS_DAX
struct page *read_dax_sector(struct block_device *bdev, sector_t n); struct page *read_dax_sector(struct block_device *bdev, sector_t n);
void dax_unlock_mapping_entry(struct address_space *mapping, pgoff_t index);
int __dax_zero_page_range(struct block_device *bdev, sector_t sector, int __dax_zero_page_range(struct block_device *bdev, sector_t sector,
unsigned int offset, unsigned int length); unsigned int offset, unsigned int length);
#else #else
...@@ -55,12 +54,6 @@ static inline struct page *read_dax_sector(struct block_device *bdev, ...@@ -55,12 +54,6 @@ static inline struct page *read_dax_sector(struct block_device *bdev,
{ {
return ERR_PTR(-ENXIO); return ERR_PTR(-ENXIO);
} }
/* Shouldn't ever be called when dax is disabled. */
static inline void dax_unlock_mapping_entry(struct address_space *mapping,
pgoff_t index)
{
BUG();
}
static inline int __dax_zero_page_range(struct block_device *bdev, static inline int __dax_zero_page_range(struct block_device *bdev,
sector_t sector, unsigned int offset, unsigned int length) sector_t sector, unsigned int offset, unsigned int length)
{ {
......
...@@ -308,12 +308,6 @@ struct vm_fault { ...@@ -308,12 +308,6 @@ struct vm_fault {
* is set (which is also implied by * is set (which is also implied by
* VM_FAULT_ERROR). * VM_FAULT_ERROR).
*/ */
void *entry; /* ->fault handler can alternatively
* return locked DAX entry. In that
* case handler should return
* VM_FAULT_DAX_LOCKED and fill in
* entry here.
*/
/* These three entries are valid only while holding ptl lock */ /* These three entries are valid only while holding ptl lock */
pte_t *pte; /* Pointer to pte entry matching pte_t *pte; /* Pointer to pte entry matching
* the 'address'. NULL if the page * the 'address'. NULL if the page
...@@ -1104,8 +1098,7 @@ static inline void clear_page_pfmemalloc(struct page *page) ...@@ -1104,8 +1098,7 @@ static inline void clear_page_pfmemalloc(struct page *page)
#define VM_FAULT_LOCKED 0x0200 /* ->fault locked the returned page */ #define VM_FAULT_LOCKED 0x0200 /* ->fault locked the returned page */
#define VM_FAULT_RETRY 0x0400 /* ->fault blocked, must retry */ #define VM_FAULT_RETRY 0x0400 /* ->fault blocked, must retry */
#define VM_FAULT_FALLBACK 0x0800 /* huge page fault failed, fall back to small */ #define VM_FAULT_FALLBACK 0x0800 /* huge page fault failed, fall back to small */
#define VM_FAULT_DAX_LOCKED 0x1000 /* ->fault has locked DAX entry */ #define VM_FAULT_DONE_COW 0x1000 /* ->fault has fully handled COW */
#define VM_FAULT_DONE_COW 0x2000 /* ->fault has fully handled COW */
#define VM_FAULT_HWPOISON_LARGE_MASK 0xf000 /* encodes hpage index for large hwpoison */ #define VM_FAULT_HWPOISON_LARGE_MASK 0xf000 /* encodes hpage index for large hwpoison */
......
...@@ -2845,7 +2845,7 @@ static int __do_fault(struct vm_fault *vmf) ...@@ -2845,7 +2845,7 @@ static int __do_fault(struct vm_fault *vmf)
ret = vma->vm_ops->fault(vma, vmf); ret = vma->vm_ops->fault(vma, vmf);
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY | if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY |
VM_FAULT_DAX_LOCKED | VM_FAULT_DONE_COW))) VM_FAULT_DONE_COW)))
return ret; return ret;
if (unlikely(PageHWPoison(vmf->page))) { if (unlikely(PageHWPoison(vmf->page))) {
...@@ -3276,17 +3276,12 @@ static int do_cow_fault(struct vm_fault *vmf) ...@@ -3276,17 +3276,12 @@ static int do_cow_fault(struct vm_fault *vmf)
if (ret & VM_FAULT_DONE_COW) if (ret & VM_FAULT_DONE_COW)
return ret; return ret;
if (!(ret & VM_FAULT_DAX_LOCKED)) copy_user_highpage(vmf->cow_page, vmf->page, vmf->address, vma);
copy_user_highpage(vmf->cow_page, vmf->page, vmf->address, vma);
__SetPageUptodate(vmf->cow_page); __SetPageUptodate(vmf->cow_page);
ret |= finish_fault(vmf); ret |= finish_fault(vmf);
if (!(ret & VM_FAULT_DAX_LOCKED)) { unlock_page(vmf->page);
unlock_page(vmf->page); put_page(vmf->page);
put_page(vmf->page);
} else {
dax_unlock_mapping_entry(vma->vm_file->f_mapping, vmf->pgoff);
}
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
goto uncharge_out; goto uncharge_out;
return ret; return ret;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册