1. 08 3月, 2017 1 次提交
  2. 03 3月, 2017 1 次提交
    • I
      sched/headers: Move task->mm handling methods to <linux/sched/mm.h> · 68e21be2
      Ingo Molnar 提交于
      Move the following task->mm helper APIs into a new header file,
      <linux/sched/mm.h>, to further reduce the size and complexity
      of <linux/sched.h>.
      
      Here are how the APIs are used in various kernel files:
      
        # mm_alloc():
        arch/arm/mach-rpc/ecard.c
        fs/exec.c
        include/linux/sched/mm.h
        kernel/fork.c
      
        # __mmdrop():
        arch/arc/include/asm/mmu_context.h
        include/linux/sched/mm.h
        kernel/fork.c
      
        # mmdrop():
        arch/arm/mach-rpc/ecard.c
        arch/m68k/sun3/mmu_emu.c
        arch/x86/mm/tlb.c
        drivers/gpu/drm/amd/amdkfd/kfd_process.c
        drivers/gpu/drm/i915/i915_gem_userptr.c
        drivers/infiniband/hw/hfi1/file_ops.c
        drivers/vfio/vfio_iommu_spapr_tce.c
        fs/exec.c
        fs/proc/base.c
        fs/proc/task_mmu.c
        fs/proc/task_nommu.c
        fs/userfaultfd.c
        include/linux/mmu_notifier.h
        include/linux/sched/mm.h
        kernel/fork.c
        kernel/futex.c
        kernel/sched/core.c
        mm/khugepaged.c
        mm/ksm.c
        mm/mmu_context.c
        mm/mmu_notifier.c
        mm/oom_kill.c
        virt/kvm/kvm_main.c
      
        # mmdrop_async_fn():
        include/linux/sched/mm.h
      
        # mmdrop_async():
        include/linux/sched/mm.h
        kernel/fork.c
      
        # mmget_not_zero():
        fs/userfaultfd.c
        include/linux/sched/mm.h
        mm/oom_kill.c
      
        # mmput():
        arch/arc/include/asm/mmu_context.h
        arch/arc/kernel/troubleshoot.c
        arch/frv/mm/mmu-context.c
        arch/powerpc/platforms/cell/spufs/context.c
        arch/sparc/include/asm/mmu_context_32.h
        drivers/android/binder.c
        drivers/gpu/drm/etnaviv/etnaviv_gem.c
        drivers/gpu/drm/i915/i915_gem_userptr.c
        drivers/infiniband/core/umem.c
        drivers/infiniband/core/umem_odp.c
        drivers/infiniband/core/uverbs_main.c
        drivers/infiniband/hw/mlx4/main.c
        drivers/infiniband/hw/mlx5/main.c
        drivers/infiniband/hw/usnic/usnic_uiom.c
        drivers/iommu/amd_iommu_v2.c
        drivers/iommu/intel-svm.c
        drivers/lguest/lguest_user.c
        drivers/misc/cxl/fault.c
        drivers/misc/mic/scif/scif_rma.c
        drivers/oprofile/buffer_sync.c
        drivers/vfio/vfio_iommu_type1.c
        drivers/vhost/vhost.c
        drivers/xen/gntdev.c
        fs/exec.c
        fs/proc/array.c
        fs/proc/base.c
        fs/proc/task_mmu.c
        fs/proc/task_nommu.c
        fs/userfaultfd.c
        include/linux/sched/mm.h
        kernel/cpuset.c
        kernel/events/core.c
        kernel/events/uprobes.c
        kernel/exit.c
        kernel/fork.c
        kernel/ptrace.c
        kernel/sys.c
        kernel/trace/trace_output.c
        kernel/tsacct.c
        mm/memcontrol.c
        mm/memory.c
        mm/mempolicy.c
        mm/migrate.c
        mm/mmu_notifier.c
        mm/nommu.c
        mm/oom_kill.c
        mm/process_vm_access.c
        mm/rmap.c
        mm/swapfile.c
        mm/util.c
        virt/kvm/async_pf.c
      
        # mmput_async():
        include/linux/sched/mm.h
        kernel/fork.c
        mm/oom_kill.c
      
        # get_task_mm():
        arch/arc/kernel/troubleshoot.c
        arch/powerpc/platforms/cell/spufs/context.c
        drivers/android/binder.c
        drivers/gpu/drm/etnaviv/etnaviv_gem.c
        drivers/infiniband/core/umem.c
        drivers/infiniband/core/umem_odp.c
        drivers/infiniband/hw/mlx4/main.c
        drivers/infiniband/hw/mlx5/main.c
        drivers/infiniband/hw/usnic/usnic_uiom.c
        drivers/iommu/amd_iommu_v2.c
        drivers/iommu/intel-svm.c
        drivers/lguest/lguest_user.c
        drivers/misc/cxl/fault.c
        drivers/misc/mic/scif/scif_rma.c
        drivers/oprofile/buffer_sync.c
        drivers/vfio/vfio_iommu_type1.c
        drivers/vhost/vhost.c
        drivers/xen/gntdev.c
        fs/proc/array.c
        fs/proc/base.c
        fs/proc/task_mmu.c
        include/linux/sched/mm.h
        kernel/cpuset.c
        kernel/events/core.c
        kernel/exit.c
        kernel/fork.c
        kernel/ptrace.c
        kernel/sys.c
        kernel/trace/trace_output.c
        kernel/tsacct.c
        mm/memcontrol.c
        mm/memory.c
        mm/mempolicy.c
        mm/migrate.c
        mm/mmu_notifier.c
        mm/nommu.c
        mm/util.c
      
        # mm_access():
        fs/proc/base.c
        include/linux/sched/mm.h
        kernel/fork.c
        mm/process_vm_access.c
      
        # mm_release():
        arch/arc/include/asm/mmu_context.h
        fs/exec.c
        include/linux/sched/mm.h
        include/uapi/linux/sched.h
        kernel/exit.c
        kernel/fork.c
      Acked-by: NLinus Torvalds <torvalds@linux-foundation.org>
      Cc: Mike Galbraith <efault@gmx.de>
      Cc: Peter Zijlstra <peterz@infradead.org>
      Cc: Thomas Gleixner <tglx@linutronix.de>
      Cc: linux-kernel@vger.kernel.org
      Signed-off-by: NIngo Molnar <mingo@kernel.org>
      68e21be2
  3. 02 3月, 2017 18 次提交
  4. 01 3月, 2017 2 次提交
    • P
      KVM: PPC: Book3S HV: Don't use ASDR for real-mode HPT faults on POWER9 · 4e5acdc2
      Paul Mackerras 提交于
      In HPT mode on POWER9, the ASDR register is supposed to record
      segment information for hypervisor page faults.  It turns out that
      POWER9 DD1 does not record the page size information in the ASDR
      for faults in guest real mode.  We have the necessary information
      in memory already, so by moving the checks for real mode that already
      existed, we can use the in-memory copy.  Since a load is likely to
      be faster than reading an SPR, we do this unconditionally (not just
      for POWER9 DD1).
      Signed-off-by: NPaul Mackerras <paulus@ozlabs.org>
      4e5acdc2
    • P
      KVM: PPC: Book3S HV: Fix software walk of guest process page tables · 70cd4c10
      Paul Mackerras 提交于
      This fixes some bugs in the code that walks the guest's page tables.
      These bugs cause MMIO emulation to fail whenever the guest is in
      virtial mode (MMU on), leading to the guest hanging if it tried to
      access a virtio device.
      
      The first bug was that when reading the guest's process table, we were
      using the whole of arch->process_table, not just the field that contains
      the process table base address.  The second bug was that the mask used
      when reading the process table entry to get the radix tree base address,
      RPDB_MASK, had the wrong value.
      
      Fixes: 9e04ba69 ("KVM: PPC: Book3S HV: Add basic infrastructure for radix guests")
      Fixes: e9983344 ("powerpc/mm/radix: Add partition table format & callback")
      Signed-off-by: NPaul Mackerras <paulus@ozlabs.org>
      70cd4c10
  5. 28 2月, 2017 5 次提交
  6. 25 2月, 2017 4 次提交
  7. 23 2月, 2017 5 次提交
    • M
      lib/show_mem.c: teach show_mem to work with the given nodemask · 9af744d7
      Michal Hocko 提交于
      show_mem() allows to filter out node specific data which is irrelevant
      to the allocation request via SHOW_MEM_FILTER_NODES.  The filtering is
      done in skip_free_areas_node which skips all nodes which are not in the
      mems_allowed of the current process.  This works most of the time as
      expected because the nodemask shouldn't be outside of the allocating
      task but there are some exceptions.  E.g.  memory hotplug might want to
      request allocations from outside of the allowed nodes (see
      new_node_page).
      
      Get rid of this hardcoded behavior and push the allocation mask down the
      show_mem path and use it instead of cpuset_current_mems_allowed.  NULL
      nodemask is interpreted as cpuset_current_mems_allowed.
      
      [akpm@linux-foundation.org: coding-style fixes]
      Link: http://lkml.kernel.org/r/20170117091543.25850-5-mhocko@kernel.orgSigned-off-by: NMichal Hocko <mhocko@suse.com>
      Acked-by: NMel Gorman <mgorman@suse.de>
      Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
      Cc: Johannes Weiner <hannes@cmpxchg.org>
      Cc: Vlastimil Babka <vbabka@suse.cz>
      Cc: David Rientjes <rientjes@google.com>
      Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
      Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
      9af744d7
    • D
      powerpc: do not make the entire heap executable · 16e72e9b
      Denys Vlasenko 提交于
      On 32-bit powerpc the ELF PLT sections of binaries (built with
      --bss-plt, or with a toolchain which defaults to it) look like this:
      
        [17] .sbss             NOBITS          0002aff8 01aff8 000014 00  WA  0   0  4
        [18] .plt              NOBITS          0002b00c 01aff8 000084 00 WAX  0   0  4
        [19] .bss              NOBITS          0002b090 01aff8 0000a4 00  WA  0   0  4
      
      Which results in an ELF load header:
      
        Type           Offset   VirtAddr   PhysAddr   FileSiz MemSiz  Flg Align
        LOAD           0x019c70 0x00029c70 0x00029c70 0x01388 0x014c4 RWE 0x10000
      
      This is all correct, the load region containing the PLT is marked as
      executable.  Note that the PLT starts at 0002b00c but the file mapping
      ends at 0002aff8, so the PLT falls in the 0 fill section described by
      the load header, and after a page boundary.
      
      Unfortunately the generic ELF loader ignores the X bit in the load
      headers when it creates the 0 filled non-file backed mappings.  It
      assumes all of these mappings are RW BSS sections, which is not the case
      for PPC.
      
      gcc/ld has an option (--secure-plt) to not do this, this is said to
      incur a small performance penalty.
      
      Currently, to support 32-bit binaries with PLT in BSS kernel maps
      *entire brk area* with executable rights for all binaries, even
      --secure-plt ones.
      
      Stop doing that.
      
      Teach the ELF loader to check the X bit in the relevant load header and
      create 0 filled anonymous mappings that are executable if the load
      header requests that.
      
      Test program showing the difference in /proc/$PID/maps:
      
      int main() {
      	char buf[16*1024];
      	char *p = malloc(123); /* make "[heap]" mapping appear */
      	int fd = open("/proc/self/maps", O_RDONLY);
      	int len = read(fd, buf, sizeof(buf));
      	write(1, buf, len);
      	printf("%p\n", p);
      	return 0;
      }
      
      Compiled using: gcc -mbss-plt -m32 -Os test.c -otest
      
      Unpatched ppc64 kernel:
      00100000-00120000 r-xp 00000000 00:00 0                                  [vdso]
      0fe10000-0ffd0000 r-xp 00000000 fd:00 67898094                           /usr/lib/libc-2.17.so
      0ffd0000-0ffe0000 r--p 001b0000 fd:00 67898094                           /usr/lib/libc-2.17.so
      0ffe0000-0fff0000 rw-p 001c0000 fd:00 67898094                           /usr/lib/libc-2.17.so
      10000000-10010000 r-xp 00000000 fd:00 100674505                          /home/user/test
      10010000-10020000 r--p 00000000 fd:00 100674505                          /home/user/test
      10020000-10030000 rw-p 00010000 fd:00 100674505                          /home/user/test
      10690000-106c0000 rwxp 00000000 00:00 0                                  [heap]
      f7f70000-f7fa0000 r-xp 00000000 fd:00 67898089                           /usr/lib/ld-2.17.so
      f7fa0000-f7fb0000 r--p 00020000 fd:00 67898089                           /usr/lib/ld-2.17.so
      f7fb0000-f7fc0000 rw-p 00030000 fd:00 67898089                           /usr/lib/ld-2.17.so
      ffa90000-ffac0000 rw-p 00000000 00:00 0                                  [stack]
      0x10690008
      
      Patched ppc64 kernel:
      00100000-00120000 r-xp 00000000 00:00 0                                  [vdso]
      0fe10000-0ffd0000 r-xp 00000000 fd:00 67898094                           /usr/lib/libc-2.17.so
      0ffd0000-0ffe0000 r--p 001b0000 fd:00 67898094                           /usr/lib/libc-2.17.so
      0ffe0000-0fff0000 rw-p 001c0000 fd:00 67898094                           /usr/lib/libc-2.17.so
      10000000-10010000 r-xp 00000000 fd:00 100674505                          /home/user/test
      10010000-10020000 r--p 00000000 fd:00 100674505                          /home/user/test
      10020000-10030000 rw-p 00010000 fd:00 100674505                          /home/user/test
      10180000-101b0000 rw-p 00000000 00:00 0                                  [heap]
                        ^^^^ this has changed
      f7c60000-f7c90000 r-xp 00000000 fd:00 67898089                           /usr/lib/ld-2.17.so
      f7c90000-f7ca0000 r--p 00020000 fd:00 67898089                           /usr/lib/ld-2.17.so
      f7ca0000-f7cb0000 rw-p 00030000 fd:00 67898089                           /usr/lib/ld-2.17.so
      ff860000-ff890000 rw-p 00000000 00:00 0                                  [stack]
      0x10180008
      
      The patch was originally posted in 2012 by Jason Gunthorpe
      and apparently ignored:
      
      https://lkml.org/lkml/2012/9/30/138
      
      Lightly run-tested.
      
      Link: http://lkml.kernel.org/r/20161215131950.23054-1-dvlasenk@redhat.comSigned-off-by: NJason Gunthorpe <jgunthorpe@obsidianresearch.com>
      Signed-off-by: NDenys Vlasenko <dvlasenk@redhat.com>
      Acked-by: NKees Cook <keescook@chromium.org>
      Acked-by: NMichael Ellerman <mpe@ellerman.id.au>
      Tested-by: NJason Gunthorpe <jgunthorpe@obsidianresearch.com>
      Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
      Cc: Paul Mackerras <paulus@samba.org>
      Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
      Cc: Oleg Nesterov <oleg@redhat.com>
      Cc: Florian Weimer <fweimer@redhat.com>
      Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
      Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
      16e72e9b
    • F
      powerpc: Remove leftover cputime_to_nsecs call causing build error · 9f3768e0
      Frédéric Weisbecker 提交于
      This type conversion is a leftover that got ignored during the kcpustat
      conversion to nanosecs, resulting in build breakage with config having
      CONFIG_NO_HZ_FULL=y.
      
      	arch/powerpc/kernel/time.c: In function 'running_clock':
      	arch/powerpc/kernel/time.c:712:2: error: implicit declaration of function 'cputime_to_nsecs' [-Werror=implicit-function-declaration]
      	  return local_clock() - cputime_to_nsecs(kcpustat_this_cpu->cpustat[CPUTIME_STEAL]);
      
      All we need is to remove it.
      
      Fixes: e7f340ca ("powerpc, sched/cputime: Remove unused cputime definitions")
      Reported-by: NAbdul Haleem <abdhalee@linux.vnet.ibm.com>
      Signed-off-by: NFrederic Weisbecker <fweisbec@gmail.com>
      Signed-off-by: NMichael Ellerman <mpe@ellerman.id.au>
      9f3768e0
    • A
      powerpc/mm/hash: Always clear UPRT and Host Radix bits when setting up CPU · fda2d27d
      Aneesh Kumar K.V 提交于
      We will set LPCR with correct value for radix during int. This make sure we
      start with a sanitized value of LPCR. In case of kexec, cpus can have LPCR
      value based on the previous translation mode we were running.
      
      Fixes: fe036a06 ("powerpc/64/kexec: Fix MMU cleanup on radix")
      Cc: stable@vger.kernel.org # v4.9+
      Acked-by: NMichael Neuling <mikey@neuling.org>
      Signed-off-by: NAneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
      Signed-off-by: NMichael Ellerman <mpe@ellerman.id.au>
      fda2d27d
    • N
      powerpc/optprobes: Fix TOC handling in optprobes trampoline · f558b37b
      Naveen N. Rao 提交于
      Optprobes on powerpc are limited to kernel text area. We decided to also
      optimize kretprobe_trampoline since that is also in kernel text area.
      However,we failed to take into consideration the fact that the same
      trampoline is also used to catch function returns from kernel modules.
      As an example:
      
        $ sudo modprobe kobject-example
        $ sudo bash -c "echo 'r foo_show+8' > /sys/kernel/debug/tracing/kprobe_events"
        $ sudo bash -c "echo 1 > /sys/kernel/debug/tracing/events/kprobes/enable"
        $ sudo cat /sys/kernel/debug/kprobes/list
        c000000000041350  k  kretprobe_trampoline+0x0    [OPTIMIZED]
        d000000000e00200  r  foo_show+0x8  kobject_example
        $ cat /sys/kernel/kobject_example/foo
        Segmentation fault
      
      With the below (trimmed) splat in dmesg:
      
        Unable to handle kernel paging request for data at address 0xfec40000
        Faulting instruction address: 0xc000000000041540
        Oops: Kernel access of bad area, sig: 11 [#1]
        ...
        NIP [c000000000041540] optimized_callback+0x70/0xe0
        LR [c000000000041e60] optinsn_slot+0xf8/0x10000
        Call Trace:
        [c0000000c7327850] [c000000000289af4] alloc_set_pte+0x1c4/0x860 (unreliable)
        [c0000000c7327890] [c000000000041e60] optinsn_slot+0xf8/0x10000
        --- interrupt: 700 at 0xc0000000c7327a80
      	       LR = kretprobe_trampoline+0x0/0x10
        [c0000000c7327ba0] [c0000000003a30d4] sysfs_kf_seq_show+0x104/0x1d0
        [c0000000c7327bf0] [c0000000003a0bb4] kernfs_seq_show+0x44/0x60
        [c0000000c7327c10] [c000000000330578] seq_read+0xf8/0x560
        [c0000000c7327cb0] [c0000000003a1e64] kernfs_fop_read+0x194/0x260
        [c0000000c7327d00] [c0000000002f9954] __vfs_read+0x44/0x1a0
        [c0000000c7327d90] [c0000000002fb4cc] vfs_read+0xbc/0x1b0
        [c0000000c7327de0] [c0000000002fd138] SyS_read+0x68/0x110
        [c0000000c7327e30] [c00000000000b8e0] system_call+0x38/0xfc
      
      Fix this by loading up the kernel TOC before calling into the kernel.
      The original TOC gets restored as part of the usual pt_regs restore.
      
      Fixes: 762df10b ("powerpc/kprobes: Optimize kprobe in kretprobe_trampoline()")
      Signed-off-by: NNaveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
      Signed-off-by: NMichael Ellerman <mpe@ellerman.id.au>
      f558b37b
  8. 22 2月, 2017 2 次提交
  9. 21 2月, 2017 2 次提交