提交 a5516438 编写于 作者: A Andi Kleen 提交者: Linus Torvalds

hugetlb: modular state for hugetlb page size

The goal of this patchset is to support multiple hugetlb page sizes.  This
is achieved by introducing a new struct hstate structure, which
encapsulates the important hugetlb state and constants (eg.  huge page
size, number of huge pages currently allocated, etc).

The hstate structure is then passed around the code which requires these
fields, they will do the right thing regardless of the exact hstate they
are operating on.

This patch adds the hstate structure, with a single global instance of it
(default_hstate), and does the basic work of converting hugetlb to use the
hstate.

Future patches will add more hstate structures to allow for different
hugetlbfs mounts to have different page sizes.

[akpm@linux-foundation.org: coding-style fixes]
Acked-by: NAdam Litke <agl@us.ibm.com>
Acked-by: NNishanth Aravamudan <nacc@us.ibm.com>
Signed-off-by: NAndi Kleen <ak@suse.de>
Signed-off-by: NNick Piggin <npiggin@suse.de>
Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
上级 b7ba30c6
......@@ -24,7 +24,7 @@
unsigned int hpage_shift=HPAGE_SHIFT_DEFAULT;
pte_t *
huge_pte_alloc (struct mm_struct *mm, unsigned long addr)
huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz)
{
unsigned long taddr = htlbpage_to_page(addr);
pgd_t *pgd;
......@@ -75,7 +75,8 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
* Don't actually need to do any preparation, but need to make sure
* the address is in the right region.
*/
int prepare_hugepage_range(unsigned long addr, unsigned long len)
int prepare_hugepage_range(struct file *file,
unsigned long addr, unsigned long len)
{
if (len & ~HPAGE_MASK)
return -EINVAL;
......@@ -149,7 +150,7 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, u
/* Handle MAP_FIXED */
if (flags & MAP_FIXED) {
if (prepare_hugepage_range(addr, len))
if (prepare_hugepage_range(file, addr, len))
return -EINVAL;
return addr;
}
......
......@@ -128,7 +128,8 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
return NULL;
}
pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
pte_t *huge_pte_alloc(struct mm_struct *mm,
unsigned long addr, unsigned long sz)
{
pgd_t *pg;
pud_t *pu;
......
......@@ -72,7 +72,8 @@ void arch_release_hugepage(struct page *page)
page[1].index = 0;
}
pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
pte_t *huge_pte_alloc(struct mm_struct *mm,
unsigned long addr, unsigned long sz)
{
pgd_t *pgdp;
pud_t *pudp;
......
......@@ -22,7 +22,8 @@
#include <asm/tlbflush.h>
#include <asm/cacheflush.h>
pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
pte_t *huge_pte_alloc(struct mm_struct *mm,
unsigned long addr, unsigned long sz)
{
pgd_t *pgd;
pud_t *pud;
......
......@@ -175,7 +175,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
return -ENOMEM;
if (flags & MAP_FIXED) {
if (prepare_hugepage_range(addr, len))
if (prepare_hugepage_range(file, addr, len))
return -EINVAL;
return addr;
}
......@@ -195,7 +195,8 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
pgoff, flags);
}
pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
pte_t *huge_pte_alloc(struct mm_struct *mm,
unsigned long addr, unsigned long sz)
{
pgd_t *pgd;
pud_t *pud;
......
......@@ -124,7 +124,8 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
return 1;
}
pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
pte_t *huge_pte_alloc(struct mm_struct *mm,
unsigned long addr, unsigned long sz)
{
pgd_t *pgd;
pud_t *pud;
......@@ -368,7 +369,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
return -ENOMEM;
if (flags & MAP_FIXED) {
if (prepare_hugepage_range(addr, len))
if (prepare_hugepage_range(file, addr, len))
return -EINVAL;
return addr;
}
......
......@@ -80,6 +80,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
struct inode *inode = file->f_path.dentry->d_inode;
loff_t len, vma_len;
int ret;
struct hstate *h = hstate_file(file);
/*
* vma address alignment (but not the pgoff alignment) has
......@@ -92,7 +93,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
vma->vm_flags |= VM_HUGETLB | VM_RESERVED;
vma->vm_ops = &hugetlb_vm_ops;
if (vma->vm_pgoff & ~(HPAGE_MASK >> PAGE_SHIFT))
if (vma->vm_pgoff & ~(huge_page_mask(h) >> PAGE_SHIFT))
return -EINVAL;
vma_len = (loff_t)(vma->vm_end - vma->vm_start);
......@@ -104,8 +105,8 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
if (hugetlb_reserve_pages(inode,
vma->vm_pgoff >> (HPAGE_SHIFT-PAGE_SHIFT),
len >> HPAGE_SHIFT, vma))
vma->vm_pgoff >> huge_page_order(h),
len >> huge_page_shift(h), vma))
goto out;
ret = 0;
......@@ -130,20 +131,21 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
unsigned long start_addr;
struct hstate *h = hstate_file(file);
if (len & ~HPAGE_MASK)
if (len & ~huge_page_mask(h))
return -EINVAL;
if (len > TASK_SIZE)
return -ENOMEM;
if (flags & MAP_FIXED) {
if (prepare_hugepage_range(addr, len))
if (prepare_hugepage_range(file, addr, len))
return -EINVAL;
return addr;
}
if (addr) {
addr = ALIGN(addr, HPAGE_SIZE);
addr = ALIGN(addr, huge_page_size(h));
vma = find_vma(mm, addr);
if (TASK_SIZE - len >= addr &&
(!vma || addr + len <= vma->vm_start))
......@@ -156,7 +158,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
start_addr = TASK_UNMAPPED_BASE;
full_search:
addr = ALIGN(start_addr, HPAGE_SIZE);
addr = ALIGN(start_addr, huge_page_size(h));
for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
/* At this point: (!vma || addr < vma->vm_end). */
......@@ -174,7 +176,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
if (!vma || addr + len <= vma->vm_start)
return addr;
addr = ALIGN(vma->vm_end, HPAGE_SIZE);
addr = ALIGN(vma->vm_end, huge_page_size(h));
}
}
#endif
......@@ -225,10 +227,11 @@ hugetlbfs_read_actor(struct page *page, unsigned long offset,
static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
size_t len, loff_t *ppos)
{
struct hstate *h = hstate_file(filp);
struct address_space *mapping = filp->f_mapping;
struct inode *inode = mapping->host;
unsigned long index = *ppos >> HPAGE_SHIFT;
unsigned long offset = *ppos & ~HPAGE_MASK;
unsigned long index = *ppos >> huge_page_shift(h);
unsigned long offset = *ppos & ~huge_page_mask(h);
unsigned long end_index;
loff_t isize;
ssize_t retval = 0;
......@@ -243,17 +246,17 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
if (!isize)
goto out;
end_index = (isize - 1) >> HPAGE_SHIFT;
end_index = (isize - 1) >> huge_page_shift(h);
for (;;) {
struct page *page;
int nr, ret;
unsigned long nr, ret;
/* nr is the maximum number of bytes to copy from this page */
nr = HPAGE_SIZE;
nr = huge_page_size(h);
if (index >= end_index) {
if (index > end_index)
goto out;
nr = ((isize - 1) & ~HPAGE_MASK) + 1;
nr = ((isize - 1) & ~huge_page_mask(h)) + 1;
if (nr <= offset) {
goto out;
}
......@@ -287,8 +290,8 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
offset += ret;
retval += ret;
len -= ret;
index += offset >> HPAGE_SHIFT;
offset &= ~HPAGE_MASK;
index += offset >> huge_page_shift(h);
offset &= ~huge_page_mask(h);
if (page)
page_cache_release(page);
......@@ -298,7 +301,7 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
break;
}
out:
*ppos = ((loff_t)index << HPAGE_SHIFT) + offset;
*ppos = ((loff_t)index << huge_page_shift(h)) + offset;
mutex_unlock(&inode->i_mutex);
return retval;
}
......@@ -339,8 +342,9 @@ static void truncate_huge_page(struct page *page)
static void truncate_hugepages(struct inode *inode, loff_t lstart)
{
struct hstate *h = hstate_inode(inode);
struct address_space *mapping = &inode->i_data;
const pgoff_t start = lstart >> HPAGE_SHIFT;
const pgoff_t start = lstart >> huge_page_shift(h);
struct pagevec pvec;
pgoff_t next;
int i, freed = 0;
......@@ -449,8 +453,9 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
{
pgoff_t pgoff;
struct address_space *mapping = inode->i_mapping;
struct hstate *h = hstate_inode(inode);
BUG_ON(offset & ~HPAGE_MASK);
BUG_ON(offset & ~huge_page_mask(h));
pgoff = offset >> PAGE_SHIFT;
i_size_write(inode, offset);
......@@ -465,6 +470,7 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr)
{
struct inode *inode = dentry->d_inode;
struct hstate *h = hstate_inode(inode);
int error;
unsigned int ia_valid = attr->ia_valid;
......@@ -476,7 +482,7 @@ static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr)
if (ia_valid & ATTR_SIZE) {
error = -EINVAL;
if (!(attr->ia_size & ~HPAGE_MASK))
if (!(attr->ia_size & ~huge_page_mask(h)))
error = hugetlb_vmtruncate(inode, attr->ia_size);
if (error)
goto out;
......@@ -610,9 +616,10 @@ static int hugetlbfs_set_page_dirty(struct page *page)
static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb);
struct hstate *h = hstate_inode(dentry->d_inode);
buf->f_type = HUGETLBFS_MAGIC;
buf->f_bsize = HPAGE_SIZE;
buf->f_bsize = huge_page_size(h);
if (sbinfo) {
spin_lock(&sbinfo->stat_lock);
/* If no limits set, just report 0 for max/free/used
......@@ -942,7 +949,8 @@ struct file *hugetlb_file_setup(const char *name, size_t size)
goto out_dentry;
error = -ENOMEM;
if (hugetlb_reserve_pages(inode, 0, size >> HPAGE_SHIFT, NULL))
if (hugetlb_reserve_pages(inode, 0,
size >> huge_page_shift(hstate_inode(inode)), NULL))
goto out_inode;
d_instantiate(dentry, inode);
......
......@@ -8,7 +8,8 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
unsigned long end, unsigned long floor,
unsigned long ceiling);
int prepare_hugepage_range(unsigned long addr, unsigned long len);
int prepare_hugepage_range(struct file *file,
unsigned long addr, unsigned long len);
static inline int is_hugepage_only_range(struct mm_struct *mm,
unsigned long addr,
......
......@@ -21,7 +21,8 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
* If the arch doesn't supply something else, assume that hugepage
* size aligned regions are ok without further preparation.
*/
static inline int prepare_hugepage_range(unsigned long addr, unsigned long len)
static inline int prepare_hugepage_range(struct file *file,
unsigned long addr, unsigned long len)
{
if (len & ~HPAGE_MASK)
return -EINVAL;
......
......@@ -22,7 +22,8 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
* If the arch doesn't supply something else, assume that hugepage
* size aligned regions are ok without further preparation.
*/
static inline int prepare_hugepage_range(unsigned long addr, unsigned long len)
static inline int prepare_hugepage_range(struct file *file,
unsigned long addr, unsigned long len)
{
if (len & ~HPAGE_MASK)
return -EINVAL;
......
......@@ -14,7 +14,8 @@ static inline int is_hugepage_only_range(struct mm_struct *mm,
* If the arch doesn't supply something else, assume that hugepage
* size aligned regions are ok without further preparation.
*/
static inline int prepare_hugepage_range(unsigned long addr, unsigned long len)
static inline int prepare_hugepage_range(struct file *file,
unsigned long addr, unsigned long len)
{
if (len & ~HPAGE_MASK)
return -EINVAL;
......
......@@ -22,7 +22,8 @@ static inline int is_hugepage_only_range(struct mm_struct *mm,
* If the arch doesn't supply something else, assume that hugepage
* size aligned regions are ok without further preparation.
*/
static inline int prepare_hugepage_range(unsigned long addr, unsigned long len)
static inline int prepare_hugepage_range(struct file *file,
unsigned long addr, unsigned long len)
{
if (len & ~HPAGE_MASK)
return -EINVAL;
......
......@@ -14,11 +14,13 @@ static inline int is_hugepage_only_range(struct mm_struct *mm,
* If the arch doesn't supply something else, assume that hugepage
* size aligned regions are ok without further preparation.
*/
static inline int prepare_hugepage_range(unsigned long addr, unsigned long len)
static inline int prepare_hugepage_range(struct file *file,
unsigned long addr, unsigned long len)
{
if (len & ~HPAGE_MASK)
struct hstate *h = hstate_file(file);
if (len & ~huge_page_mask(h))
return -EINVAL;
if (addr & ~HPAGE_MASK)
if (addr & ~huge_page_mask(h))
return -EINVAL;
return 0;
}
......
......@@ -8,7 +8,6 @@
#include <linux/mempolicy.h>
#include <linux/shm.h>
#include <asm/tlbflush.h>
#include <asm/hugetlb.h>
struct ctl_table;
......@@ -45,7 +44,8 @@ extern int sysctl_hugetlb_shm_group;
/* arch callbacks */
pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr);
pte_t *huge_pte_alloc(struct mm_struct *mm,
unsigned long addr, unsigned long sz);
pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr);
int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep);
struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
......@@ -80,7 +80,7 @@ static inline unsigned long hugetlb_total_pages(void)
#define hugetlb_report_meminfo(buf) 0
#define hugetlb_report_node_meminfo(n, buf) 0
#define follow_huge_pmd(mm, addr, pmd, write) NULL
#define prepare_hugepage_range(addr,len) (-EINVAL)
#define prepare_hugepage_range(file, addr, len) (-EINVAL)
#define pmd_huge(x) 0
#define is_hugepage_only_range(mm, addr, len) 0
#define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; })
......@@ -134,8 +134,6 @@ struct file *hugetlb_file_setup(const char *name, size_t);
int hugetlb_get_quota(struct address_space *mapping, long delta);
void hugetlb_put_quota(struct address_space *mapping, long delta);
#define BLOCKS_PER_HUGEPAGE (HPAGE_SIZE / 512)
static inline int is_file_hugepages(struct file *file)
{
if (file->f_op == &hugetlbfs_file_operations)
......@@ -164,4 +162,84 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
unsigned long flags);
#endif /* HAVE_ARCH_HUGETLB_UNMAPPED_AREA */
#ifdef CONFIG_HUGETLB_PAGE
/* Defines one hugetlb page size */
struct hstate {
int hugetlb_next_nid;
unsigned int order;
unsigned long mask;
unsigned long max_huge_pages;
unsigned long nr_huge_pages;
unsigned long free_huge_pages;
unsigned long resv_huge_pages;
unsigned long surplus_huge_pages;
unsigned long nr_overcommit_huge_pages;
struct list_head hugepage_freelists[MAX_NUMNODES];
unsigned int nr_huge_pages_node[MAX_NUMNODES];
unsigned int free_huge_pages_node[MAX_NUMNODES];
unsigned int surplus_huge_pages_node[MAX_NUMNODES];
};
extern struct hstate default_hstate;
static inline struct hstate *hstate_vma(struct vm_area_struct *vma)
{
return &default_hstate;
}
static inline struct hstate *hstate_file(struct file *f)
{
return &default_hstate;
}
static inline struct hstate *hstate_inode(struct inode *i)
{
return &default_hstate;
}
static inline unsigned long huge_page_size(struct hstate *h)
{
return (unsigned long)PAGE_SIZE << h->order;
}
static inline unsigned long huge_page_mask(struct hstate *h)
{
return h->mask;
}
static inline unsigned int huge_page_order(struct hstate *h)
{
return h->order;
}
static inline unsigned huge_page_shift(struct hstate *h)
{
return h->order + PAGE_SHIFT;
}
static inline unsigned int pages_per_huge_page(struct hstate *h)
{
return 1 << h->order;
}
static inline unsigned int blocks_per_huge_page(struct hstate *h)
{
return huge_page_size(h) / 512;
}
#include <asm/hugetlb.h>
#else
struct hstate {};
#define hstate_file(f) NULL
#define hstate_vma(v) NULL
#define hstate_inode(i) NULL
#define huge_page_size(h) PAGE_SIZE
#define huge_page_mask(h) PAGE_MASK
#define huge_page_order(h) 0
#define huge_page_shift(h) PAGE_SHIFT
#define pages_per_huge_page(h) 1
#endif
#endif /* _LINUX_HUGETLB_H */
......@@ -577,7 +577,8 @@ static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss,
if (is_file_hugepages(shp->shm_file)) {
struct address_space *mapping = inode->i_mapping;
*rss += (HPAGE_SIZE/PAGE_SIZE)*mapping->nrpages;
struct hstate *h = hstate_file(shp->shm_file);
*rss += pages_per_huge_page(h) * mapping->nrpages;
} else {
struct shmem_inode_info *info = SHMEM_I(inode);
spin_lock(&info->lock);
......
此差异已折叠。
......@@ -903,7 +903,7 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp,
if (unlikely(is_vm_hugetlb_page(vma))) {
unmap_hugepage_range(vma, start, end, NULL);
zap_work -= (end - start) /
(HPAGE_SIZE / PAGE_SIZE);
pages_per_huge_page(hstate_vma(vma));
start = end;
} else
start = unmap_page_range(*tlbp, vma,
......
......@@ -1481,7 +1481,7 @@ struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr,
if (unlikely((*mpol)->mode == MPOL_INTERLEAVE)) {
zl = node_zonelist(interleave_nid(*mpol, vma, addr,
HPAGE_SHIFT), gfp_flags);
huge_page_shift(hstate_vma(vma))), gfp_flags);
} else {
zl = policy_zonelist(gfp_flags, *mpol);
if ((*mpol)->mode == MPOL_BIND)
......@@ -2220,9 +2220,12 @@ static void check_huge_range(struct vm_area_struct *vma,
{
unsigned long addr;
struct page *page;
struct hstate *h = hstate_vma(vma);
unsigned long sz = huge_page_size(h);
for (addr = start; addr < end; addr += HPAGE_SIZE) {
pte_t *ptep = huge_pte_offset(vma->vm_mm, addr & HPAGE_MASK);
for (addr = start; addr < end; addr += sz) {
pte_t *ptep = huge_pte_offset(vma->vm_mm,
addr & huge_page_mask(h));
pte_t pte;
if (!ptep)
......
......@@ -1812,7 +1812,8 @@ int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
struct mempolicy *pol;
struct vm_area_struct *new;
if (is_vm_hugetlb_page(vma) && (addr & ~HPAGE_MASK))
if (is_vm_hugetlb_page(vma) && (addr &
~(huge_page_mask(hstate_vma(vma)))))
return -EINVAL;
if (mm->map_count >= sysctl_max_map_count)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册