提交 bde17b90 编写于 作者: L Linus Torvalds

Merge branch 'akpm' (patches from Andrew)

Merge misc fixes from Andrew Morton:
 "12 fixes"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
  dmapool: fix overflow condition in pool_find_page()
  thermal: avoid division by zero in power allocator
  memcg: remove pcp_counter_lock
  kprobes: use _do_fork() in samples to make them work again
  drivers/input/joystick/Kconfig: zhenhua.c needs BITREVERSE
  memcg: make mem_cgroup_read_stat() unsigned
  memcg: fix dirty page migration
  dax: fix NULL pointer in __dax_pmd_fault()
  mm: hugetlbfs: skip shared VMAs when unmapping private pages to satisfy a fault
  mm/slab: fix unexpected index mapping result of kmalloc_size(INDEX_NODE+1)
  userfaultfd: remove kernel header include from uapi header
  arch/x86/include/asm/efi.h: fix build failure
......@@ -86,6 +86,7 @@ extern u64 asmlinkage efi_call(void *fp, ...);
extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size,
u32 type, u64 attribute);
#ifdef CONFIG_KASAN
/*
* CONFIG_KASAN may redefine memset to __memset. __memset function is present
* only in kernel binary. Since the EFI stub linked into a separate binary it
......@@ -95,6 +96,7 @@ extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size,
#undef memcpy
#undef memset
#undef memmove
#endif
#endif /* CONFIG_X86_32 */
......
......@@ -196,6 +196,7 @@ config JOYSTICK_TWIDJOY
config JOYSTICK_ZHENHUA
tristate "5-byte Zhenhua RC transmitter"
select SERIO
select BITREVERSE
help
Say Y here if you have a Zhen Hua PPM-4CH transmitter which is
supplied with a ready to fly micro electric indoor helicopters
......
......@@ -144,6 +144,16 @@ static void estimate_pid_constants(struct thermal_zone_device *tz,
switch_on_temp = 0;
temperature_threshold = control_temp - switch_on_temp;
/*
* estimate_pid_constants() tries to find appropriate default
* values for thermal zones that don't provide them. If a
* system integrator has configured a thermal zone with two
* passive trip points at the same temperature, that person
* hasn't put any effort to set up the thermal zone properly
* so just give up.
*/
if (!temperature_threshold)
return;
if (!tz->tzp->k_po || force)
tz->tzp->k_po = int_to_frac(sustainable_power) /
......
......@@ -569,8 +569,20 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
if (!buffer_size_valid(&bh) || bh.b_size < PMD_SIZE)
goto fallback;
sector = bh.b_blocknr << (blkbits - 9);
if (buffer_unwritten(&bh) || buffer_new(&bh)) {
int i;
length = bdev_direct_access(bh.b_bdev, sector, &kaddr, &pfn,
bh.b_size);
if (length < 0) {
result = VM_FAULT_SIGBUS;
goto out;
}
if ((length < PMD_SIZE) || (pfn & PG_PMD_COLOUR))
goto fallback;
for (i = 0; i < PTRS_PER_PMD; i++)
clear_pmem(kaddr + i * PAGE_SIZE, PAGE_SIZE);
wmb_pmem();
......@@ -623,7 +635,6 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
result = VM_FAULT_NOPAGE;
spin_unlock(ptl);
} else {
sector = bh.b_blocknr << (blkbits - 9);
length = bdev_direct_access(bh.b_bdev, sector, &kaddr, &pfn,
bh.b_size);
if (length < 0) {
......
......@@ -242,7 +242,6 @@ struct mem_cgroup {
* percpu counter.
*/
struct mem_cgroup_stat_cpu __percpu *stat;
spinlock_t pcp_counter_lock;
#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_INET)
struct cg_proto tcp_mem;
......
......@@ -905,6 +905,27 @@ static inline void set_page_links(struct page *page, enum zone_type zone,
#endif
}
#ifdef CONFIG_MEMCG
static inline struct mem_cgroup *page_memcg(struct page *page)
{
return page->mem_cgroup;
}
static inline void set_page_memcg(struct page *page, struct mem_cgroup *memcg)
{
page->mem_cgroup = memcg;
}
#else
static inline struct mem_cgroup *page_memcg(struct page *page)
{
return NULL;
}
static inline void set_page_memcg(struct page *page, struct mem_cgroup *memcg)
{
}
#endif
/*
* Some inline functions in vmstat.h depend on page_zone()
*/
......
......@@ -11,8 +11,6 @@
#include <linux/types.h>
#include <linux/compiler.h>
#define UFFD_API ((__u64)0xAA)
/*
* After implementing the respective features it will become:
......
......@@ -394,7 +394,7 @@ static struct dma_page *pool_find_page(struct dma_pool *pool, dma_addr_t dma)
list_for_each_entry(page, &pool->page_list, page_list) {
if (dma < page->dma)
continue;
if (dma < (page->dma + pool->allocation))
if ((dma - page->dma) < pool->allocation)
return page;
}
return NULL;
......
......@@ -3201,6 +3201,14 @@ static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
if (iter_vma == vma)
continue;
/*
* Shared VMAs have their own reserves and do not affect
* MAP_PRIVATE accounting but it is possible that a shared
* VMA is using the same page so check and skip such VMAs.
*/
if (iter_vma->vm_flags & VM_MAYSHARE)
continue;
/*
* Unmap the page from other VMAs without their own reserves.
* They get marked to be SIGKILLed if they fault in these
......
......@@ -644,12 +644,14 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
}
/*
* Return page count for single (non recursive) @memcg.
*
* Implementation Note: reading percpu statistics for memcg.
*
* Both of vmstat[] and percpu_counter has threshold and do periodic
* synchronization to implement "quick" read. There are trade-off between
* reading cost and precision of value. Then, we may have a chance to implement
* a periodic synchronizion of counter in memcg's counter.
* a periodic synchronization of counter in memcg's counter.
*
* But this _read() function is used for user interface now. The user accounts
* memory usage by memory cgroup and he _always_ requires exact value because
......@@ -659,17 +661,24 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
*
* If there are kernel internal actions which can make use of some not-exact
* value, and reading all cpu value can be performance bottleneck in some
* common workload, threashold and synchonization as vmstat[] should be
* common workload, threshold and synchronization as vmstat[] should be
* implemented.
*/
static long mem_cgroup_read_stat(struct mem_cgroup *memcg,
enum mem_cgroup_stat_index idx)
static unsigned long
mem_cgroup_read_stat(struct mem_cgroup *memcg, enum mem_cgroup_stat_index idx)
{
long val = 0;
int cpu;
/* Per-cpu values can be negative, use a signed accumulator */
for_each_possible_cpu(cpu)
val += per_cpu(memcg->stat->count[idx], cpu);
/*
* Summing races with updates, so val may be negative. Avoid exposing
* transient negative values.
*/
if (val < 0)
val = 0;
return val;
}
......@@ -1254,7 +1263,7 @@ void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) {
if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account)
continue;
pr_cont(" %s:%ldKB", mem_cgroup_stat_names[i],
pr_cont(" %s:%luKB", mem_cgroup_stat_names[i],
K(mem_cgroup_read_stat(iter, i)));
}
......@@ -2819,14 +2828,11 @@ static unsigned long tree_stat(struct mem_cgroup *memcg,
enum mem_cgroup_stat_index idx)
{
struct mem_cgroup *iter;
long val = 0;
unsigned long val = 0;
/* Per-cpu values can be negative, use a signed accumulator */
for_each_mem_cgroup_tree(iter, memcg)
val += mem_cgroup_read_stat(iter, idx);
if (val < 0) /* race ? */
val = 0;
return val;
}
......@@ -3169,7 +3175,7 @@ static int memcg_stat_show(struct seq_file *m, void *v)
for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) {
if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account)
continue;
seq_printf(m, "%s %ld\n", mem_cgroup_stat_names[i],
seq_printf(m, "%s %lu\n", mem_cgroup_stat_names[i],
mem_cgroup_read_stat(memcg, i) * PAGE_SIZE);
}
......@@ -3194,13 +3200,13 @@ static int memcg_stat_show(struct seq_file *m, void *v)
(u64)memsw * PAGE_SIZE);
for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) {
long long val = 0;
unsigned long long val = 0;
if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account)
continue;
for_each_mem_cgroup_tree(mi, memcg)
val += mem_cgroup_read_stat(mi, i) * PAGE_SIZE;
seq_printf(m, "total_%s %lld\n", mem_cgroup_stat_names[i], val);
seq_printf(m, "total_%s %llu\n", mem_cgroup_stat_names[i], val);
}
for (i = 0; i < MEM_CGROUP_EVENTS_NSTATS; i++) {
......@@ -4179,7 +4185,6 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
if (memcg_wb_domain_init(memcg, GFP_KERNEL))
goto out_free_stat;
spin_lock_init(&memcg->pcp_counter_lock);
return memcg;
out_free_stat:
......
......@@ -740,6 +740,15 @@ static int move_to_new_page(struct page *newpage, struct page *page,
if (PageSwapBacked(page))
SetPageSwapBacked(newpage);
/*
* Indirectly called below, migrate_page_copy() copies PG_dirty and thus
* needs newpage's memcg set to transfer memcg dirty page accounting.
* So perform memcg migration in two steps:
* 1. set newpage->mem_cgroup (here)
* 2. clear page->mem_cgroup (below)
*/
set_page_memcg(newpage, page_memcg(page));
mapping = page_mapping(page);
if (!mapping)
rc = migrate_page(mapping, newpage, page, mode);
......@@ -756,9 +765,10 @@ static int move_to_new_page(struct page *newpage, struct page *page,
rc = fallback_migrate_page(mapping, newpage, page, mode);
if (rc != MIGRATEPAGE_SUCCESS) {
set_page_memcg(newpage, NULL);
newpage->mapping = NULL;
} else {
mem_cgroup_migrate(page, newpage, false);
set_page_memcg(page, NULL);
if (page_was_mapped)
remove_migration_ptes(page, newpage);
page->mapping = NULL;
......
......@@ -2190,9 +2190,16 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
size += BYTES_PER_WORD;
}
#if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC)
if (size >= kmalloc_size(INDEX_NODE + 1)
&& cachep->object_size > cache_line_size()
&& ALIGN(size, cachep->align) < PAGE_SIZE) {
/*
* To activate debug pagealloc, off-slab management is necessary
* requirement. In early phase of initialization, small sized slab
* doesn't get initialized so it would not be possible. So, we need
* to check size >= 256. It guarantees that all necessary small
* sized slab is initialized in current slab initialization sequence.
*/
if (!slab_early_init && size >= kmalloc_size(INDEX_NODE) &&
size >= 256 && cachep->object_size > cache_line_size() &&
ALIGN(size, cachep->align) < PAGE_SIZE) {
cachep->obj_offset += PAGE_SIZE - ALIGN(size, cachep->align);
size = PAGE_SIZE;
}
......
/*
* Here's a sample kernel module showing the use of jprobes to dump
* the arguments of do_fork().
* the arguments of _do_fork().
*
* For more information on theory of operation of jprobes, see
* Documentation/kprobes.txt
*
* Build and insert the kernel module as done in the kprobe example.
* You will see the trace data in /var/log/messages and on the
* console whenever do_fork() is invoked to create a new process.
* console whenever _do_fork() is invoked to create a new process.
* (Some messages may be suppressed if syslogd is configured to
* eliminate duplicate messages.)
*/
......@@ -17,13 +17,13 @@
#include <linux/kprobes.h>
/*
* Jumper probe for do_fork.
* Jumper probe for _do_fork.
* Mirror principle enables access to arguments of the probed routine
* from the probe handler.
*/
/* Proxy routine having the same arguments as actual do_fork() routine */
static long jdo_fork(unsigned long clone_flags, unsigned long stack_start,
/* Proxy routine having the same arguments as actual _do_fork() routine */
static long j_do_fork(unsigned long clone_flags, unsigned long stack_start,
unsigned long stack_size, int __user *parent_tidptr,
int __user *child_tidptr)
{
......@@ -36,9 +36,9 @@ static long jdo_fork(unsigned long clone_flags, unsigned long stack_start,
}
static struct jprobe my_jprobe = {
.entry = jdo_fork,
.entry = j_do_fork,
.kp = {
.symbol_name = "do_fork",
.symbol_name = "_do_fork",
},
};
......
/*
* NOTE: This example is works on x86 and powerpc.
* Here's a sample kernel module showing the use of kprobes to dump a
* stack trace and selected registers when do_fork() is called.
* stack trace and selected registers when _do_fork() is called.
*
* For more information on theory of operation of kprobes, see
* Documentation/kprobes.txt
*
* You will see the trace data in /var/log/messages and on the console
* whenever do_fork() is invoked to create a new process.
* whenever _do_fork() is invoked to create a new process.
*/
#include <linux/kernel.h>
......@@ -16,7 +16,7 @@
/* For each probe you need to allocate a kprobe structure */
static struct kprobe kp = {
.symbol_name = "do_fork",
.symbol_name = "_do_fork",
};
/* kprobe pre_handler: called just before the probed instruction is executed */
......
......@@ -7,7 +7,7 @@
*
* usage: insmod kretprobe_example.ko func=<func_name>
*
* If no func_name is specified, do_fork is instrumented
* If no func_name is specified, _do_fork is instrumented
*
* For more information on theory of operation of kretprobes, see
* Documentation/kprobes.txt
......@@ -25,7 +25,7 @@
#include <linux/limits.h>
#include <linux/sched.h>
static char func_name[NAME_MAX] = "do_fork";
static char func_name[NAME_MAX] = "_do_fork";
module_param_string(func, func_name, NAME_MAX, S_IRUGO);
MODULE_PARM_DESC(func, "Function to kretprobe; this module will report the"
" function's execution time");
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册