提交 29a8ea4f 编写于 作者: L Linus Torvalds

Merge branch 'libnvdimm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm

Pull libnvdimm fixes from Dan Williams:
 "1/ Fixes to the libnvdimm 'pfn' device that establishes a reserved
     area for storing a struct page array.

  2/ Fixes for dax operations on a raw block device to prevent pagecache
     collisions with dax mappings.

  3/ A fix for pfn_t usage in vm_insert_mixed that lead to a null
     pointer de-reference.

  These have received build success notification from the kbuild robot
  across 153 configs and pass the latest ndctl tests"

* 'libnvdimm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm:
  phys_to_pfn_t: use phys_addr_t
  mm: fix pfn_t to page conversion in vm_insert_mixed
  block: use DAX for partition table reads
  block: revert runtime dax control of the raw block device
  fs, block: force direct-I/O for dax-enabled block devices
  devm_memremap_pages: fix vmem_altmap lifetime + alignment handling
  libnvdimm, pfn: fix restoring memmap location
  libnvdimm: fix mode determination for e820 devices
......@@ -434,42 +434,6 @@ bool blkdev_dax_capable(struct block_device *bdev)
return true;
}
static int blkdev_daxset(struct block_device *bdev, unsigned long argp)
{
unsigned long arg;
int rc = 0;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
if (get_user(arg, (int __user *)(argp)))
return -EFAULT;
arg = !!arg;
if (arg == !!(bdev->bd_inode->i_flags & S_DAX))
return 0;
if (arg)
arg = S_DAX;
if (arg && !blkdev_dax_capable(bdev))
return -ENOTTY;
inode_lock(bdev->bd_inode);
if (bdev->bd_map_count == 0)
inode_set_flags(bdev->bd_inode, arg, S_DAX);
else
rc = -EBUSY;
inode_unlock(bdev->bd_inode);
return rc;
}
#else
static int blkdev_daxset(struct block_device *bdev, int arg)
{
if (arg)
return -ENOTTY;
return 0;
}
#endif
static int blkdev_flushbuf(struct block_device *bdev, fmode_t mode,
......@@ -634,8 +598,6 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
case BLKTRACESETUP:
case BLKTRACETEARDOWN:
return blk_trace_ioctl(bdev, cmd, argp);
case BLKDAXSET:
return blkdev_daxset(bdev, arg);
case BLKDAXGET:
return put_int(arg, !!(bdev->bd_inode->i_flags & S_DAX));
break;
......
......@@ -16,6 +16,7 @@
#include <linux/kmod.h>
#include <linux/ctype.h>
#include <linux/genhd.h>
#include <linux/dax.h>
#include <linux/blktrace_api.h>
#include "partitions/check.h"
......@@ -550,13 +551,24 @@ int invalidate_partitions(struct gendisk *disk, struct block_device *bdev)
return 0;
}
unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p)
static struct page *read_pagecache_sector(struct block_device *bdev, sector_t n)
{
struct address_space *mapping = bdev->bd_inode->i_mapping;
return read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_CACHE_SHIFT-9)),
NULL);
}
unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p)
{
struct page *page;
page = read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_CACHE_SHIFT-9)),
NULL);
/* don't populate page cache for dax capable devices */
if (IS_DAX(bdev->bd_inode))
page = read_dax_sector(bdev, n);
else
page = read_pagecache_sector(bdev, n);
if (!IS_ERR(page)) {
if (PageError(page))
goto fail;
......
......@@ -1277,10 +1277,12 @@ static ssize_t mode_show(struct device *dev,
device_lock(dev);
claim = ndns->claim;
if (pmem_should_map_pages(dev) || (claim && is_nd_pfn(claim)))
mode = "memory";
else if (claim && is_nd_btt(claim))
if (claim && is_nd_btt(claim))
mode = "safe";
else if (claim && is_nd_pfn(claim))
mode = "memory";
else if (!claim && pmem_should_map_pages(dev))
mode = "memory";
else
mode = "raw";
rc = sprintf(buf, "%s\n", mode);
......
......@@ -301,10 +301,8 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn)
switch (le32_to_cpu(pfn_sb->mode)) {
case PFN_MODE_RAM:
break;
case PFN_MODE_PMEM:
/* TODO: allocate from PMEM support */
return -ENOTTY;
break;
default:
return -ENXIO;
}
......
......@@ -1736,37 +1736,13 @@ static int blkdev_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
return __dax_pmd_fault(vma, addr, pmd, flags, blkdev_get_block, NULL);
}
static void blkdev_vm_open(struct vm_area_struct *vma)
{
struct inode *bd_inode = bdev_file_inode(vma->vm_file);
struct block_device *bdev = I_BDEV(bd_inode);
inode_lock(bd_inode);
bdev->bd_map_count++;
inode_unlock(bd_inode);
}
static void blkdev_vm_close(struct vm_area_struct *vma)
{
struct inode *bd_inode = bdev_file_inode(vma->vm_file);
struct block_device *bdev = I_BDEV(bd_inode);
inode_lock(bd_inode);
bdev->bd_map_count--;
inode_unlock(bd_inode);
}
static const struct vm_operations_struct blkdev_dax_vm_ops = {
.open = blkdev_vm_open,
.close = blkdev_vm_close,
.fault = blkdev_dax_fault,
.pmd_fault = blkdev_dax_pmd_fault,
.pfn_mkwrite = blkdev_dax_fault,
};
static const struct vm_operations_struct blkdev_default_vm_ops = {
.open = blkdev_vm_open,
.close = blkdev_vm_close,
.fault = filemap_fault,
.map_pages = filemap_map_pages,
};
......@@ -1774,18 +1750,14 @@ static const struct vm_operations_struct blkdev_default_vm_ops = {
static int blkdev_mmap(struct file *file, struct vm_area_struct *vma)
{
struct inode *bd_inode = bdev_file_inode(file);
struct block_device *bdev = I_BDEV(bd_inode);
file_accessed(file);
inode_lock(bd_inode);
bdev->bd_map_count++;
if (IS_DAX(bd_inode)) {
vma->vm_ops = &blkdev_dax_vm_ops;
vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE;
} else {
vma->vm_ops = &blkdev_default_vm_ops;
}
inode_unlock(bd_inode);
return 0;
}
......
......@@ -58,6 +58,26 @@ static void dax_unmap_atomic(struct block_device *bdev,
blk_queue_exit(bdev->bd_queue);
}
struct page *read_dax_sector(struct block_device *bdev, sector_t n)
{
struct page *page = alloc_pages(GFP_KERNEL, 0);
struct blk_dax_ctl dax = {
.size = PAGE_SIZE,
.sector = n & ~((((int) PAGE_SIZE) / 512) - 1),
};
long rc;
if (!page)
return ERR_PTR(-ENOMEM);
rc = dax_map_atomic(bdev, &dax);
if (rc < 0)
return ERR_PTR(rc);
memcpy_from_pmem(page_address(page), dax.addr, PAGE_SIZE);
dax_unmap_atomic(bdev, &dax);
return page;
}
/*
* dax_clear_blocks() is called from within transaction context from XFS,
* and hence this means the stack from this point must follow GFP_NOFS
......
......@@ -14,6 +14,17 @@ int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t,
dax_iodone_t);
int __dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t,
dax_iodone_t);
#ifdef CONFIG_FS_DAX
struct page *read_dax_sector(struct block_device *bdev, sector_t n);
#else
static inline struct page *read_dax_sector(struct block_device *bdev,
sector_t n)
{
return ERR_PTR(-ENXIO);
}
#endif
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
int dax_pmd_fault(struct vm_area_struct *, unsigned long addr, pmd_t *,
unsigned int flags, get_block_t, dax_iodone_t);
......
......@@ -484,9 +484,6 @@ struct block_device {
int bd_fsfreeze_count;
/* Mutex for freeze */
struct mutex bd_fsfreeze_mutex;
#ifdef CONFIG_FS_DAX
int bd_map_count;
#endif
};
/*
......@@ -2907,7 +2904,7 @@ extern void replace_mount_options(struct super_block *sb, char *options);
static inline bool io_is_direct(struct file *filp)
{
return (filp->f_flags & O_DIRECT) || IS_DAX(file_inode(filp));
return (filp->f_flags & O_DIRECT) || IS_DAX(filp->f_mapping->host);
}
static inline int iocb_flags(struct file *file)
......
......@@ -29,7 +29,7 @@ static inline pfn_t pfn_to_pfn_t(unsigned long pfn)
return __pfn_to_pfn_t(pfn, 0);
}
extern pfn_t phys_to_pfn_t(dma_addr_t addr, unsigned long flags);
extern pfn_t phys_to_pfn_t(phys_addr_t addr, unsigned long flags);
static inline bool pfn_t_has_page(pfn_t pfn)
{
......@@ -48,7 +48,7 @@ static inline struct page *pfn_t_to_page(pfn_t pfn)
return NULL;
}
static inline dma_addr_t pfn_t_to_phys(pfn_t pfn)
static inline phys_addr_t pfn_t_to_phys(pfn_t pfn)
{
return PFN_PHYS(pfn_t_to_pfn(pfn));
}
......
......@@ -222,7 +222,6 @@ struct fsxattr {
#define BLKSECDISCARD _IO(0x12,125)
#define BLKROTATIONAL _IO(0x12,126)
#define BLKZEROOUT _IO(0x12,127)
#define BLKDAXSET _IO(0x12,128)
#define BLKDAXGET _IO(0x12,129)
#define BMAP_IOCTL 1 /* obsolete - kept for compatibility */
......
......@@ -150,7 +150,7 @@ void devm_memunmap(struct device *dev, void *addr)
}
EXPORT_SYMBOL(devm_memunmap);
pfn_t phys_to_pfn_t(dma_addr_t addr, unsigned long flags)
pfn_t phys_to_pfn_t(phys_addr_t addr, unsigned long flags)
{
return __pfn_to_pfn_t(addr >> PAGE_SHIFT, flags);
}
......@@ -183,7 +183,11 @@ EXPORT_SYMBOL(put_zone_device_page);
static void pgmap_radix_release(struct resource *res)
{
resource_size_t key;
resource_size_t key, align_start, align_size, align_end;
align_start = res->start & ~(SECTION_SIZE - 1);
align_size = ALIGN(resource_size(res), SECTION_SIZE);
align_end = align_start + align_size - 1;
mutex_lock(&pgmap_lock);
for (key = res->start; key <= res->end; key += SECTION_SIZE)
......@@ -226,12 +230,11 @@ static void devm_memremap_pages_release(struct device *dev, void *data)
percpu_ref_put(pgmap->ref);
}
pgmap_radix_release(res);
/* pages are dead and unused, undo the arch mapping */
align_start = res->start & ~(SECTION_SIZE - 1);
align_size = ALIGN(resource_size(res), SECTION_SIZE);
arch_remove_memory(align_start, align_size);
pgmap_radix_release(res);
dev_WARN_ONCE(dev, pgmap->altmap && pgmap->altmap->alloc,
"%s: failed to free all reserved pages\n", __func__);
}
......@@ -267,7 +270,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
{
int is_ram = region_intersects(res->start, resource_size(res),
"System RAM");
resource_size_t key, align_start, align_size;
resource_size_t key, align_start, align_size, align_end;
struct dev_pagemap *pgmap;
struct page_map *page_map;
unsigned long pfn;
......@@ -309,7 +312,10 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
mutex_lock(&pgmap_lock);
error = 0;
for (key = res->start; key <= res->end; key += SECTION_SIZE) {
align_start = res->start & ~(SECTION_SIZE - 1);
align_size = ALIGN(resource_size(res), SECTION_SIZE);
align_end = align_start + align_size - 1;
for (key = align_start; key <= align_end; key += SECTION_SIZE) {
struct dev_pagemap *dup;
rcu_read_lock();
......@@ -336,8 +342,6 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
if (nid < 0)
nid = numa_mem_id();
align_start = res->start & ~(SECTION_SIZE - 1);
align_size = ALIGN(resource_size(res), SECTION_SIZE);
error = arch_add_memory(nid, align_start, align_size, true);
if (error)
goto err_add_memory;
......
......@@ -1591,10 +1591,15 @@ int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
* than insert_pfn). If a zero_pfn were inserted into a VM_MIXEDMAP
* without pte special, it would there be refcounted as a normal page.
*/
if (!HAVE_PTE_SPECIAL && pfn_t_valid(pfn)) {
if (!HAVE_PTE_SPECIAL && !pfn_t_devmap(pfn) && pfn_t_valid(pfn)) {
struct page *page;
page = pfn_t_to_page(pfn);
/*
* At this point we are committed to insert_page()
* regardless of whether the caller specified flags that
* result in pfn_t_has_page() == false.
*/
page = pfn_to_page(pfn_t_to_pfn(pfn));
return insert_page(vma, addr, page, vma->vm_page_prot);
}
return insert_pfn(vma, addr, pfn, vma->vm_page_prot);
......
......@@ -113,7 +113,7 @@ void *__wrap_devm_memremap_pages(struct device *dev, struct resource *res,
}
EXPORT_SYMBOL(__wrap_devm_memremap_pages);
pfn_t __wrap_phys_to_pfn_t(dma_addr_t addr, unsigned long flags)
pfn_t __wrap_phys_to_pfn_t(phys_addr_t addr, unsigned long flags)
{
struct nfit_test_resource *nfit_res = get_nfit_res(addr);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册