提交 aa2e7100 编写于 作者: L Linus Torvalds

Merge branch 'akpm' (patches from Andrew Morton)

Merge misc fixes from Andrew Morton:
 "A few hotfixes and various leftovers which were awaiting other merges.

  Mainly movement of zram into mm/"

* emailed patches fron Andrew Morton <akpm@linux-foundation.org>: (25 commits)
  memcg: fix mutex not unlocked on memcg_create_kmem_cache fail path
  Documentation/filesystems/vfs.txt: update file_operations documentation
  mm, oom: base root bonus on current usage
  mm: don't lose the SOFT_DIRTY flag on mprotect
  mm/slub.c: fix page->_count corruption (again)
  mm/mempolicy.c: fix mempolicy printing in numa_maps
  zram: remove zram->lock in read path and change it with mutex
  zram: remove workqueue for freeing removed pending slot
  zram: introduce zram->tb_lock
  zram: use atomic operation for stat
  zram: remove unnecessary free
  zram: delay pending free request in read path
  zram: fix race between reset and flushing pending work
  zsmalloc: add maintainers
  zram: add zram maintainers
  zsmalloc: add copyright
  zram: add copyright
  zram: remove old private project comment
  zram: promote zram from staging
  zsmalloc: move it under mm
  ...
zram: Compressed RAM based block devices
----------------------------------------
Project home: http://compcache.googlecode.com/
* Introduction
The zram module creates RAM based block devices named /dev/zram<id>
......@@ -69,9 +67,5 @@ Following shows a typical sequence of steps for using zram.
resets the disksize to zero. You must set the disksize again
before reusing the device.
Please report any problems at:
- Mailing list: linux-mm-cc at laptop dot org
- Issue tracker: http://code.google.com/p/compcache/issues/list
Nitin Gupta
ngupta@vflare.org
......@@ -1386,8 +1386,8 @@ may allocate from based on an estimation of its current memory and swap use.
For example, if a task is using all allowed memory, its badness score will be
1000. If it is using half of its allowed memory, its score will be 500.
There is an additional factor included in the badness score: root
processes are given 3% extra memory over other tasks.
There is an additional factor included in the badness score: the current memory
and swap usage is discounted by 3% for root processes.
The amount of "allowed" memory depends on the context in which the oom killer
was called. If it is due to the memory assigned to the allocating task's cpuset
......
......@@ -782,7 +782,7 @@ struct file_operations
----------------------
This describes how the VFS can manipulate an open file. As of kernel
3.5, the following members are defined:
3.12, the following members are defined:
struct file_operations {
struct module *owner;
......@@ -803,9 +803,6 @@ struct file_operations {
int (*aio_fsync) (struct kiocb *, int datasync);
int (*fasync) (int, struct file *, int);
int (*lock) (struct file *, int, struct file_lock *);
ssize_t (*readv) (struct file *, const struct iovec *, unsigned long, loff_t *);
ssize_t (*writev) (struct file *, const struct iovec *, unsigned long, loff_t *);
ssize_t (*sendfile) (struct file *, loff_t *, size_t, read_actor_t, void *);
ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
int (*check_flags)(int);
......@@ -814,6 +811,7 @@ struct file_operations {
ssize_t (*splice_read)(struct file *, struct pipe_inode_info *, size_t, unsigned int);
int (*setlease)(struct file *, long arg, struct file_lock **);
long (*fallocate)(struct file *, int mode, loff_t offset, loff_t len);
int (*show_fdinfo)(struct seq_file *m, struct file *f);
};
Again, all methods are called without any locks being held, unless
......@@ -864,12 +862,6 @@ otherwise noted.
lock: called by the fcntl(2) system call for F_GETLK, F_SETLK, and F_SETLKW
commands
readv: called by the readv(2) system call
writev: called by the writev(2) system call
sendfile: called by the sendfile(2) system call
get_unmapped_area: called by the mmap(2) system call
check_flags: called by the fcntl(2) system call for F_SETFL command
......
......@@ -9740,11 +9740,27 @@ T: Mercurial http://linuxtv.org/hg/v4l-dvb
S: Odd Fixes
F: drivers/media/pci/zoran/
ZRAM COMPRESSED RAM BLOCK DEVICE DRVIER
M: Minchan Kim <minchan@kernel.org>
M: Nitin Gupta <ngupta@vflare.org>
L: linux-kernel@vger.kernel.org
S: Maintained
F: drivers/block/zram/
F: Documentation/blockdev/zram.txt
ZS DECSTATION Z85C30 SERIAL DRIVER
M: "Maciej W. Rozycki" <macro@linux-mips.org>
S: Maintained
F: drivers/tty/serial/zs.*
ZSMALLOC COMPRESSED SLAB MEMORY ALLOCATOR
M: Minchan Kim <minchan@kernel.org>
M: Nitin Gupta <ngupta@vflare.org>
L: linux-mm@kvack.org
S: Maintained
F: mm/zsmalloc.c
F: include/linux/zsmalloc.h
ZSWAP COMPRESSED SWAP CACHING
M: Seth Jennings <sjenning@linux.vnet.ibm.com>
L: linux-mm@kvack.org
......
......@@ -121,7 +121,8 @@
/* Set of bits not changed in pte_modify */
#define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \
_PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY)
_PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY | \
_PAGE_SOFT_DIRTY)
#define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE)
#define _PAGE_CACHE_MASK (_PAGE_PCD | _PAGE_PWT)
......
......@@ -108,6 +108,8 @@ source "drivers/block/paride/Kconfig"
source "drivers/block/mtip32xx/Kconfig"
source "drivers/block/zram/Kconfig"
config BLK_CPQ_DA
tristate "Compaq SMART2 support"
depends on PCI && VIRT_TO_BUS && 0
......
......@@ -42,6 +42,7 @@ obj-$(CONFIG_BLK_DEV_PCIESSD_MTIP32XX) += mtip32xx/
obj-$(CONFIG_BLK_DEV_RSXX) += rsxx/
obj-$(CONFIG_BLK_DEV_NULL_BLK) += null_blk.o
obj-$(CONFIG_ZRAM) += zram/
nvme-y := nvme-core.o nvme-scsi.o
skd-y := skd_main.o
......
......@@ -14,7 +14,6 @@ config ZRAM
disks and maybe many more.
See zram.txt for more information.
Project home: <https://compcache.googlecode.com/>
config ZRAM_DEBUG
bool "Compressed RAM block device debug support"
......
......@@ -2,6 +2,7 @@
* Compressed RAM block device
*
* Copyright (C) 2008, 2009, 2010 Nitin Gupta
* 2012, 2013 Minchan Kim
*
* This code is released using a dual license strategy: BSD/GPL
* You can choose the licence that better fits your requirements.
......@@ -9,7 +10,6 @@
* Released under the terms of 3-clause BSD License
* Released under the terms of GNU General Public License Version 2.0
*
* Project home: http://compcache.googlecode.com
*/
#define KMSG_COMPONENT "zram"
......@@ -104,7 +104,7 @@ static ssize_t zero_pages_show(struct device *dev,
{
struct zram *zram = dev_to_zram(dev);
return sprintf(buf, "%u\n", zram->stats.pages_zero);
return sprintf(buf, "%u\n", atomic_read(&zram->stats.pages_zero));
}
static ssize_t orig_data_size_show(struct device *dev,
......@@ -113,7 +113,7 @@ static ssize_t orig_data_size_show(struct device *dev,
struct zram *zram = dev_to_zram(dev);
return sprintf(buf, "%llu\n",
(u64)(zram->stats.pages_stored) << PAGE_SHIFT);
(u64)(atomic_read(&zram->stats.pages_stored)) << PAGE_SHIFT);
}
static ssize_t compr_data_size_show(struct device *dev,
......@@ -140,6 +140,7 @@ static ssize_t mem_used_total_show(struct device *dev,
return sprintf(buf, "%llu\n", val);
}
/* flag operations needs meta->tb_lock */
static int zram_test_flag(struct zram_meta *meta, u32 index,
enum zram_pageflags flag)
{
......@@ -228,6 +229,8 @@ static struct zram_meta *zram_meta_alloc(u64 disksize)
goto free_table;
}
rwlock_init(&meta->tb_lock);
mutex_init(&meta->buffer_lock);
return meta;
free_table:
......@@ -280,6 +283,7 @@ static void handle_zero_page(struct bio_vec *bvec)
flush_dcache_page(page);
}
/* NOTE: caller should hold meta->tb_lock with write-side */
static void zram_free_page(struct zram *zram, size_t index)
{
struct zram_meta *meta = zram->meta;
......@@ -293,21 +297,21 @@ static void zram_free_page(struct zram *zram, size_t index)
*/
if (zram_test_flag(meta, index, ZRAM_ZERO)) {
zram_clear_flag(meta, index, ZRAM_ZERO);
zram->stats.pages_zero--;
atomic_dec(&zram->stats.pages_zero);
}
return;
}
if (unlikely(size > max_zpage_size))
zram->stats.bad_compress--;
atomic_dec(&zram->stats.bad_compress);
zs_free(meta->mem_pool, handle);
if (size <= PAGE_SIZE / 2)
zram->stats.good_compress--;
atomic_dec(&zram->stats.good_compress);
atomic64_sub(meta->table[index].size, &zram->stats.compr_size);
zram->stats.pages_stored--;
atomic_dec(&zram->stats.pages_stored);
meta->table[index].handle = 0;
meta->table[index].size = 0;
......@@ -319,20 +323,26 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
size_t clen = PAGE_SIZE;
unsigned char *cmem;
struct zram_meta *meta = zram->meta;
unsigned long handle = meta->table[index].handle;
unsigned long handle;
u16 size;
read_lock(&meta->tb_lock);
handle = meta->table[index].handle;
size = meta->table[index].size;
if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) {
read_unlock(&meta->tb_lock);
clear_page(mem);
return 0;
}
cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO);
if (meta->table[index].size == PAGE_SIZE)
if (size == PAGE_SIZE)
copy_page(mem, cmem);
else
ret = lzo1x_decompress_safe(cmem, meta->table[index].size,
mem, &clen);
ret = lzo1x_decompress_safe(cmem, size, mem, &clen);
zs_unmap_object(meta->mem_pool, handle);
read_unlock(&meta->tb_lock);
/* Should NEVER happen. Return bio error if it does. */
if (unlikely(ret != LZO_E_OK)) {
......@@ -353,11 +363,14 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
struct zram_meta *meta = zram->meta;
page = bvec->bv_page;
read_lock(&meta->tb_lock);
if (unlikely(!meta->table[index].handle) ||
zram_test_flag(meta, index, ZRAM_ZERO)) {
read_unlock(&meta->tb_lock);
handle_zero_page(bvec);
return 0;
}
read_unlock(&meta->tb_lock);
if (is_partial_io(bvec))
/* Use a temporary buffer to decompress the page */
......@@ -400,6 +413,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
struct page *page;
unsigned char *user_mem, *cmem, *src, *uncmem = NULL;
struct zram_meta *meta = zram->meta;
bool locked = false;
page = bvec->bv_page;
src = meta->compress_buffer;
......@@ -419,6 +433,8 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
goto out;
}
mutex_lock(&meta->buffer_lock);
locked = true;
user_mem = kmap_atomic(page);
if (is_partial_io(bvec)) {
......@@ -433,25 +449,18 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
if (page_zero_filled(uncmem)) {
kunmap_atomic(user_mem);
/* Free memory associated with this sector now. */
write_lock(&zram->meta->tb_lock);
zram_free_page(zram, index);
zram->stats.pages_zero++;
zram_set_flag(meta, index, ZRAM_ZERO);
write_unlock(&zram->meta->tb_lock);
atomic_inc(&zram->stats.pages_zero);
ret = 0;
goto out;
}
/*
* zram_slot_free_notify could miss free so that let's
* double check.
*/
if (unlikely(meta->table[index].handle ||
zram_test_flag(meta, index, ZRAM_ZERO)))
zram_free_page(zram, index);
ret = lzo1x_1_compress(uncmem, PAGE_SIZE, src, &clen,
meta->compress_workmem);
if (!is_partial_io(bvec)) {
kunmap_atomic(user_mem);
user_mem = NULL;
......@@ -464,7 +473,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
}
if (unlikely(clen > max_zpage_size)) {
zram->stats.bad_compress++;
atomic_inc(&zram->stats.bad_compress);
clen = PAGE_SIZE;
src = NULL;
if (is_partial_io(bvec))
......@@ -494,18 +503,22 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
* Free memory associated with this sector
* before overwriting unused sectors.
*/
write_lock(&zram->meta->tb_lock);
zram_free_page(zram, index);
meta->table[index].handle = handle;
meta->table[index].size = clen;
write_unlock(&zram->meta->tb_lock);
/* Update stats */
atomic64_add(clen, &zram->stats.compr_size);
zram->stats.pages_stored++;
atomic_inc(&zram->stats.pages_stored);
if (clen <= PAGE_SIZE / 2)
zram->stats.good_compress++;
atomic_inc(&zram->stats.good_compress);
out:
if (locked)
mutex_unlock(&meta->buffer_lock);
if (is_partial_io(bvec))
kfree(uncmem);
......@@ -514,36 +527,15 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
return ret;
}
static void handle_pending_slot_free(struct zram *zram)
{
struct zram_slot_free *free_rq;
spin_lock(&zram->slot_free_lock);
while (zram->slot_free_rq) {
free_rq = zram->slot_free_rq;
zram->slot_free_rq = free_rq->next;
zram_free_page(zram, free_rq->index);
kfree(free_rq);
}
spin_unlock(&zram->slot_free_lock);
}
static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
int offset, struct bio *bio, int rw)
{
int ret;
if (rw == READ) {
down_read(&zram->lock);
handle_pending_slot_free(zram);
if (rw == READ)
ret = zram_bvec_read(zram, bvec, index, offset, bio);
up_read(&zram->lock);
} else {
down_write(&zram->lock);
handle_pending_slot_free(zram);
else
ret = zram_bvec_write(zram, bvec, index, offset);
up_write(&zram->lock);
}
return ret;
}
......@@ -553,8 +545,6 @@ static void zram_reset_device(struct zram *zram, bool reset_capacity)
size_t index;
struct zram_meta *meta;
flush_work(&zram->free_work);
down_write(&zram->init_lock);
if (!zram->init_done) {
up_write(&zram->init_lock);
......@@ -762,40 +752,19 @@ static void zram_make_request(struct request_queue *queue, struct bio *bio)
bio_io_error(bio);
}
static void zram_slot_free(struct work_struct *work)
{
struct zram *zram;
zram = container_of(work, struct zram, free_work);
down_write(&zram->lock);
handle_pending_slot_free(zram);
up_write(&zram->lock);
}
static void add_slot_free(struct zram *zram, struct zram_slot_free *free_rq)
{
spin_lock(&zram->slot_free_lock);
free_rq->next = zram->slot_free_rq;
zram->slot_free_rq = free_rq;
spin_unlock(&zram->slot_free_lock);
}
static void zram_slot_free_notify(struct block_device *bdev,
unsigned long index)
{
struct zram *zram;
struct zram_slot_free *free_rq;
struct zram_meta *meta;
zram = bdev->bd_disk->private_data;
atomic64_inc(&zram->stats.notify_free);
free_rq = kmalloc(sizeof(struct zram_slot_free), GFP_ATOMIC);
if (!free_rq)
return;
meta = zram->meta;
free_rq->index = index;
add_slot_free(zram, free_rq);
schedule_work(&zram->free_work);
write_lock(&meta->tb_lock);
zram_free_page(zram, index);
write_unlock(&meta->tb_lock);
atomic64_inc(&zram->stats.notify_free);
}
static const struct block_device_operations zram_devops = {
......@@ -839,13 +808,8 @@ static int create_device(struct zram *zram, int device_id)
{
int ret = -ENOMEM;
init_rwsem(&zram->lock);
init_rwsem(&zram->init_lock);
INIT_WORK(&zram->free_work, zram_slot_free);
spin_lock_init(&zram->slot_free_lock);
zram->slot_free_rq = NULL;
zram->queue = blk_alloc_queue(GFP_KERNEL);
if (!zram->queue) {
pr_err("Error allocating disk queue for device %d\n",
......
......@@ -2,6 +2,7 @@
* Compressed RAM block device
*
* Copyright (C) 2008, 2009, 2010 Nitin Gupta
* 2012, 2013 Minchan Kim
*
* This code is released using a dual license strategy: BSD/GPL
* You can choose the licence that better fits your requirements.
......@@ -9,7 +10,6 @@
* Released under the terms of 3-clause BSD License
* Released under the terms of GNU General Public License Version 2.0
*
* Project home: http://compcache.googlecode.com
*/
#ifndef _ZRAM_DRV_H_
......@@ -17,8 +17,7 @@
#include <linux/spinlock.h>
#include <linux/mutex.h>
#include "../zsmalloc/zsmalloc.h"
#include <linux/zsmalloc.h>
/*
* Some arbitrary value. This is just to catch
......@@ -69,10 +68,6 @@ struct table {
u8 flags;
} __aligned(4);
/*
* All 64bit fields should only be manipulated by 64bit atomic accessors.
* All modifications to 32bit counter should be protected by zram->lock.
*/
struct zram_stats {
atomic64_t compr_size; /* compressed size of pages stored */
atomic64_t num_reads; /* failed + successful */
......@@ -81,33 +76,23 @@ struct zram_stats {
atomic64_t failed_writes; /* can happen when memory is too low */
atomic64_t invalid_io; /* non-page-aligned I/O requests */
atomic64_t notify_free; /* no. of swap slot free notifications */
u32 pages_zero; /* no. of zero filled pages */
u32 pages_stored; /* no. of pages currently stored */
u32 good_compress; /* % of pages with compression ratio<=50% */
u32 bad_compress; /* % of pages with compression ratio>=75% */
atomic_t pages_zero; /* no. of zero filled pages */
atomic_t pages_stored; /* no. of pages currently stored */
atomic_t good_compress; /* % of pages with compression ratio<=50% */
atomic_t bad_compress; /* % of pages with compression ratio>=75% */
};
struct zram_meta {
rwlock_t tb_lock; /* protect table */
void *compress_workmem;
void *compress_buffer;
struct table *table;
struct zs_pool *mem_pool;
};
struct zram_slot_free {
unsigned long index;
struct zram_slot_free *next;
struct mutex buffer_lock; /* protect compress buffers */
};
struct zram {
struct zram_meta *meta;
struct rw_semaphore lock; /* protect compression buffers, table,
* 32bit stat counters against concurrent
* notifications, reads and writes */
struct work_struct free_work; /* handle pending free request */
struct zram_slot_free *slot_free_rq; /* list head of free request */
struct request_queue *queue;
struct gendisk *disk;
int init_done;
......@@ -118,7 +103,6 @@ struct zram {
* we can store in a disk.
*/
u64 disksize; /* bytes */
spinlock_t slot_free_lock;
struct zram_stats stats;
};
......
......@@ -150,6 +150,7 @@ int mdiobus_register(struct mii_bus *bus)
err = device_register(&bus->dev);
if (err) {
pr_err("mii_bus %s failed to register\n", bus->id);
put_device(&bus->dev);
return -EINVAL;
}
......
......@@ -76,10 +76,6 @@ source "drivers/staging/sep/Kconfig"
source "drivers/staging/iio/Kconfig"
source "drivers/staging/zsmalloc/Kconfig"
source "drivers/staging/zram/Kconfig"
source "drivers/staging/wlags49_h2/Kconfig"
source "drivers/staging/wlags49_h25/Kconfig"
......
......@@ -32,8 +32,6 @@ obj-$(CONFIG_VT6656) += vt6656/
obj-$(CONFIG_VME_BUS) += vme/
obj-$(CONFIG_DX_SEP) += sep/
obj-$(CONFIG_IIO) += iio/
obj-$(CONFIG_ZRAM) += zram/
obj-$(CONFIG_ZSMALLOC) += zsmalloc/
obj-$(CONFIG_WLAGS49_H2) += wlags49_h2/
obj-$(CONFIG_WLAGS49_H25) += wlags49_h25/
obj-$(CONFIG_FB_SM7XX) += sm7xxfb/
......
config ZSMALLOC
bool "Memory allocator for compressed pages"
depends on MMU
default n
help
zsmalloc is a slab-based memory allocator designed to store
compressed RAM pages. zsmalloc uses virtual memory mapping
in order to reduce fragmentation. However, this results in a
non-standard allocator interface where a handle, not a pointer, is
returned by an alloc(). This handle must be mapped in order to
access the allocated space.
config PGTABLE_MAPPING
bool "Use page table mapping to access object in zsmalloc"
depends on ZSMALLOC
help
By default, zsmalloc uses a copy-based object mapping method to
access allocations that span two pages. However, if a particular
architecture (ex, ARM) performs VM mapping faster than copying,
then you should select this. This causes zsmalloc to use page table
mapping rather than copying for object mapping.
You can check speed with zsmalloc benchmark[1].
[1] https://github.com/spartacus06/zsmalloc
zsmalloc-y := zsmalloc-main.o
obj-$(CONFIG_ZSMALLOC) += zsmalloc.o
......@@ -228,7 +228,7 @@ struct lcd_device *lcd_device_register(const char *name, struct device *parent,
rc = device_register(&new_ld->dev);
if (rc) {
kfree(new_ld);
put_device(&new_ld->dev);
return ERR_PTR(rc);
}
......
......@@ -95,10 +95,7 @@ enum rq_cmd_type_bits {
* as well!
*/
struct request {
union {
struct list_head queuelist;
struct llist_node ll_list;
};
struct list_head queuelist;
union {
struct call_single_data csd;
struct work_struct mq_flush_data;
......
......@@ -264,7 +264,7 @@ static inline void * __init memblock_virt_alloc_low(
{
if (!align)
align = SMP_CACHE_BYTES;
return __alloc_bootmem_low(size, align, BOOTMEM_LOW_LIMIT);
return __alloc_bootmem_low(size, align, 0);
}
static inline void * __init memblock_virt_alloc_low_nopanic(
......@@ -272,7 +272,7 @@ static inline void * __init memblock_virt_alloc_low_nopanic(
{
if (!align)
align = SMP_CACHE_BYTES;
return __alloc_bootmem_low_nopanic(size, align, BOOTMEM_LOW_LIMIT);
return __alloc_bootmem_low_nopanic(size, align, 0);
}
static inline void * __init memblock_virt_alloc_from_nopanic(
......
......@@ -11,12 +11,16 @@
#include <linux/list.h>
#include <linux/cpumask.h>
#include <linux/init.h>
#include <linux/llist.h>
extern void cpu_idle(void);
typedef void (*smp_call_func_t)(void *info);
struct call_single_data {
struct list_head list;
union {
struct list_head list;
struct llist_node llist;
};
smp_call_func_t func;
void *info;
u16 flags;
......
......@@ -2,6 +2,7 @@
* zsmalloc memory allocator
*
* Copyright (C) 2011 Nitin Gupta
* Copyright (C) 2012, 2013 Minchan Kim
*
* This code is released using a dual license strategy: BSD/GPL
* You can choose the license that better fits your requirements.
......
......@@ -23,17 +23,11 @@ enum {
struct call_function_data {
struct call_single_data __percpu *csd;
cpumask_var_t cpumask;
cpumask_var_t cpumask_ipi;
};
static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_function_data, cfd_data);
struct call_single_queue {
struct list_head list;
raw_spinlock_t lock;
};
static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_single_queue, call_single_queue);
static DEFINE_PER_CPU_SHARED_ALIGNED(struct llist_head, call_single_queue);
static int
hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu)
......@@ -47,14 +41,8 @@ hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu)
if (!zalloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL,
cpu_to_node(cpu)))
return notifier_from_errno(-ENOMEM);
if (!zalloc_cpumask_var_node(&cfd->cpumask_ipi, GFP_KERNEL,
cpu_to_node(cpu))) {
free_cpumask_var(cfd->cpumask);
return notifier_from_errno(-ENOMEM);
}
cfd->csd = alloc_percpu(struct call_single_data);
if (!cfd->csd) {
free_cpumask_var(cfd->cpumask_ipi);
free_cpumask_var(cfd->cpumask);
return notifier_from_errno(-ENOMEM);
}
......@@ -67,7 +55,6 @@ hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu)
case CPU_DEAD:
case CPU_DEAD_FROZEN:
free_cpumask_var(cfd->cpumask);
free_cpumask_var(cfd->cpumask_ipi);
free_percpu(cfd->csd);
break;
#endif
......@@ -85,12 +72,8 @@ void __init call_function_init(void)
void *cpu = (void *)(long)smp_processor_id();
int i;
for_each_possible_cpu(i) {
struct call_single_queue *q = &per_cpu(call_single_queue, i);
raw_spin_lock_init(&q->lock);
INIT_LIST_HEAD(&q->list);
}
for_each_possible_cpu(i)
init_llist_head(&per_cpu(call_single_queue, i));
hotplug_cfd(&hotplug_cfd_notifier, CPU_UP_PREPARE, cpu);
register_cpu_notifier(&hotplug_cfd_notifier);
......@@ -141,18 +124,9 @@ static void csd_unlock(struct call_single_data *csd)
*/
static void generic_exec_single(int cpu, struct call_single_data *csd, int wait)
{
struct call_single_queue *dst = &per_cpu(call_single_queue, cpu);
unsigned long flags;
int ipi;
if (wait)
csd->flags |= CSD_FLAG_WAIT;
raw_spin_lock_irqsave(&dst->lock, flags);
ipi = list_empty(&dst->list);
list_add_tail(&csd->list, &dst->list);
raw_spin_unlock_irqrestore(&dst->lock, flags);
/*
* The list addition should be visible before sending the IPI
* handler locks the list to pull the entry off it because of
......@@ -164,7 +138,7 @@ static void generic_exec_single(int cpu, struct call_single_data *csd, int wait)
* locking and barrier primitives. Generic code isn't really
* equipped to do the right thing...
*/
if (ipi)
if (llist_add(&csd->llist, &per_cpu(call_single_queue, cpu)))
arch_send_call_function_single_ipi(cpu);
if (wait)
......@@ -177,27 +151,26 @@ static void generic_exec_single(int cpu, struct call_single_data *csd, int wait)
*/
void generic_smp_call_function_single_interrupt(void)
{
struct call_single_queue *q = &__get_cpu_var(call_single_queue);
LIST_HEAD(list);
struct llist_node *entry, *next;
/*
* Shouldn't receive this interrupt on a cpu that is not yet online.
*/
WARN_ON_ONCE(!cpu_online(smp_processor_id()));
raw_spin_lock(&q->lock);
list_replace_init(&q->list, &list);
raw_spin_unlock(&q->lock);
entry = llist_del_all(&__get_cpu_var(call_single_queue));
entry = llist_reverse_order(entry);
while (!list_empty(&list)) {
while (entry) {
struct call_single_data *csd;
csd = list_entry(list.next, struct call_single_data, list);
list_del(&csd->list);
next = entry->next;
csd = llist_entry(entry, struct call_single_data, llist);
csd->func(csd->info);
csd_unlock(csd);
entry = next;
}
}
......@@ -402,30 +375,17 @@ void smp_call_function_many(const struct cpumask *mask,
if (unlikely(!cpumask_weight(cfd->cpumask)))
return;
/*
* After we put an entry into the list, cfd->cpumask may be cleared
* again when another CPU sends another IPI for a SMP function call, so
* cfd->cpumask will be zero.
*/
cpumask_copy(cfd->cpumask_ipi, cfd->cpumask);
for_each_cpu(cpu, cfd->cpumask) {
struct call_single_data *csd = per_cpu_ptr(cfd->csd, cpu);
struct call_single_queue *dst =
&per_cpu(call_single_queue, cpu);
unsigned long flags;
csd_lock(csd);
csd->func = func;
csd->info = info;
raw_spin_lock_irqsave(&dst->lock, flags);
list_add_tail(&csd->list, &dst->list);
raw_spin_unlock_irqrestore(&dst->lock, flags);
llist_add(&csd->llist, &per_cpu(call_single_queue, cpu));
}
/* Send a message to all CPUs in the map */
arch_send_call_function_ipi_mask(cfd->cpumask_ipi);
arch_send_call_function_ipi_mask(cfd->cpumask);
if (wait) {
for_each_cpu(cpu, cfd->cpumask) {
......
......@@ -552,3 +552,28 @@ config MEM_SOFT_DIRTY
it can be cleared by hands.
See Documentation/vm/soft-dirty.txt for more details.
config ZSMALLOC
bool "Memory allocator for compressed pages"
depends on MMU
default n
help
zsmalloc is a slab-based memory allocator designed to store
compressed RAM pages. zsmalloc uses virtual memory mapping
in order to reduce fragmentation. However, this results in a
non-standard allocator interface where a handle, not a pointer, is
returned by an alloc(). This handle must be mapped in order to
access the allocated space.
config PGTABLE_MAPPING
bool "Use page table mapping to access object in zsmalloc"
depends on ZSMALLOC
help
By default, zsmalloc uses a copy-based object mapping method to
access allocations that span two pages. However, if a particular
architecture (ex, ARM) performs VM mapping faster than copying,
then you should select this. This causes zsmalloc to use page table
mapping rather than copying for object mapping.
You can check speed with zsmalloc benchmark[1].
[1] https://github.com/spartacus06/zsmalloc
......@@ -60,3 +60,4 @@ obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o
obj-$(CONFIG_CLEANCACHE) += cleancache.o
obj-$(CONFIG_MEMORY_ISOLATION) += page_isolation.o
obj-$(CONFIG_ZBUD) += zbud.o
obj-$(CONFIG_ZSMALLOC) += zsmalloc.o
......@@ -3400,7 +3400,7 @@ void mem_cgroup_destroy_cache(struct kmem_cache *cachep)
static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,
struct kmem_cache *s)
{
struct kmem_cache *new;
struct kmem_cache *new = NULL;
static char *tmp_name = NULL;
static DEFINE_MUTEX(mutex); /* protects tmp_name */
......@@ -3416,7 +3416,7 @@ static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,
if (!tmp_name) {
tmp_name = kmalloc(PATH_MAX, GFP_KERNEL);
if (!tmp_name)
return NULL;
goto out;
}
rcu_read_lock();
......@@ -3426,12 +3426,11 @@ static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,
new = kmem_cache_create_memcg(memcg, tmp_name, s->object_size, s->align,
(s->flags & ~SLAB_PANIC), s->ctor, s);
if (new)
new->allocflags |= __GFP_KMEMCG;
else
new = s;
out:
mutex_unlock(&mutex);
return new;
}
......
......@@ -2930,7 +2930,7 @@ void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
unsigned short mode = MPOL_DEFAULT;
unsigned short flags = 0;
if (pol && pol != &default_policy) {
if (pol && pol != &default_policy && !(pol->flags & MPOL_F_MORON)) {
mode = pol->mode;
flags = pol->flags;
}
......
......@@ -178,7 +178,7 @@ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
* implementation used by LSMs.
*/
if (has_capability_noaudit(p, CAP_SYS_ADMIN))
adj -= 30;
points -= (points * 3) / 100;
/* Normalize to oom_score_adj units */
adj *= totalpages / 1000;
......
......@@ -355,6 +355,21 @@ static __always_inline void slab_unlock(struct page *page)
__bit_spin_unlock(PG_locked, &page->flags);
}
static inline void set_page_slub_counters(struct page *page, unsigned long counters_new)
{
struct page tmp;
tmp.counters = counters_new;
/*
* page->counters can cover frozen/inuse/objects as well
* as page->_count. If we assign to ->counters directly
* we run the risk of losing updates to page->_count, so
* be careful and only assign to the fields we need.
*/
page->frozen = tmp.frozen;
page->inuse = tmp.inuse;
page->objects = tmp.objects;
}
/* Interrupts must be disabled (for the fallback code to work right) */
static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
void *freelist_old, unsigned long counters_old,
......@@ -376,7 +391,7 @@ static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page
if (page->freelist == freelist_old &&
page->counters == counters_old) {
page->freelist = freelist_new;
page->counters = counters_new;
set_page_slub_counters(page, counters_new);
slab_unlock(page);
return 1;
}
......@@ -415,7 +430,7 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
if (page->freelist == freelist_old &&
page->counters == counters_old) {
page->freelist = freelist_new;
page->counters = counters_new;
set_page_slub_counters(page, counters_new);
slab_unlock(page);
local_irq_restore(flags);
return 1;
......
......@@ -2,6 +2,7 @@
* zsmalloc memory allocator
*
* Copyright (C) 2011 Nitin Gupta
* Copyright (C) 2012, 2013 Minchan Kim
*
* This code is released using a dual license strategy: BSD/GPL
* You can choose the license that better fits your requirements.
......@@ -90,8 +91,7 @@
#include <linux/hardirq.h>
#include <linux/spinlock.h>
#include <linux/types.h>
#include "zsmalloc.h"
#include <linux/zsmalloc.h>
/*
* This must be power of 2 and greater than of equal to sizeof(link_free).
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册