1. 13 6月, 2018 1 次提交
    • K
      treewide: kzalloc() -> kcalloc() · 6396bb22
      Kees Cook 提交于
      The kzalloc() function has a 2-factor argument form, kcalloc(). This
      patch replaces cases of:
      
              kzalloc(a * b, gfp)
      
      with:
              kcalloc(a * b, gfp)
      
      as well as handling cases of:
      
              kzalloc(a * b * c, gfp)
      
      with:
      
              kzalloc(array3_size(a, b, c), gfp)
      
      as it's slightly less ugly than:
      
              kzalloc_array(array_size(a, b), c, gfp)
      
      This does, however, attempt to ignore constant size factors like:
      
              kzalloc(4 * 1024, gfp)
      
      though any constants defined via macros get caught up in the conversion.
      
      Any factors with a sizeof() of "unsigned char", "char", and "u8" were
      dropped, since they're redundant.
      
      The Coccinelle script used for this was:
      
      // Fix redundant parens around sizeof().
      @@
      type TYPE;
      expression THING, E;
      @@
      
      (
        kzalloc(
      -	(sizeof(TYPE)) * E
      +	sizeof(TYPE) * E
        , ...)
      |
        kzalloc(
      -	(sizeof(THING)) * E
      +	sizeof(THING) * E
        , ...)
      )
      
      // Drop single-byte sizes and redundant parens.
      @@
      expression COUNT;
      typedef u8;
      typedef __u8;
      @@
      
      (
        kzalloc(
      -	sizeof(u8) * (COUNT)
      +	COUNT
        , ...)
      |
        kzalloc(
      -	sizeof(__u8) * (COUNT)
      +	COUNT
        , ...)
      |
        kzalloc(
      -	sizeof(char) * (COUNT)
      +	COUNT
        , ...)
      |
        kzalloc(
      -	sizeof(unsigned char) * (COUNT)
      +	COUNT
        , ...)
      |
        kzalloc(
      -	sizeof(u8) * COUNT
      +	COUNT
        , ...)
      |
        kzalloc(
      -	sizeof(__u8) * COUNT
      +	COUNT
        , ...)
      |
        kzalloc(
      -	sizeof(char) * COUNT
      +	COUNT
        , ...)
      |
        kzalloc(
      -	sizeof(unsigned char) * COUNT
      +	COUNT
        , ...)
      )
      
      // 2-factor product with sizeof(type/expression) and identifier or constant.
      @@
      type TYPE;
      expression THING;
      identifier COUNT_ID;
      constant COUNT_CONST;
      @@
      
      (
      - kzalloc
      + kcalloc
        (
      -	sizeof(TYPE) * (COUNT_ID)
      +	COUNT_ID, sizeof(TYPE)
        , ...)
      |
      - kzalloc
      + kcalloc
        (
      -	sizeof(TYPE) * COUNT_ID
      +	COUNT_ID, sizeof(TYPE)
        , ...)
      |
      - kzalloc
      + kcalloc
        (
      -	sizeof(TYPE) * (COUNT_CONST)
      +	COUNT_CONST, sizeof(TYPE)
        , ...)
      |
      - kzalloc
      + kcalloc
        (
      -	sizeof(TYPE) * COUNT_CONST
      +	COUNT_CONST, sizeof(TYPE)
        , ...)
      |
      - kzalloc
      + kcalloc
        (
      -	sizeof(THING) * (COUNT_ID)
      +	COUNT_ID, sizeof(THING)
        , ...)
      |
      - kzalloc
      + kcalloc
        (
      -	sizeof(THING) * COUNT_ID
      +	COUNT_ID, sizeof(THING)
        , ...)
      |
      - kzalloc
      + kcalloc
        (
      -	sizeof(THING) * (COUNT_CONST)
      +	COUNT_CONST, sizeof(THING)
        , ...)
      |
      - kzalloc
      + kcalloc
        (
      -	sizeof(THING) * COUNT_CONST
      +	COUNT_CONST, sizeof(THING)
        , ...)
      )
      
      // 2-factor product, only identifiers.
      @@
      identifier SIZE, COUNT;
      @@
      
      - kzalloc
      + kcalloc
        (
      -	SIZE * COUNT
      +	COUNT, SIZE
        , ...)
      
      // 3-factor product with 1 sizeof(type) or sizeof(expression), with
      // redundant parens removed.
      @@
      expression THING;
      identifier STRIDE, COUNT;
      type TYPE;
      @@
      
      (
        kzalloc(
      -	sizeof(TYPE) * (COUNT) * (STRIDE)
      +	array3_size(COUNT, STRIDE, sizeof(TYPE))
        , ...)
      |
        kzalloc(
      -	sizeof(TYPE) * (COUNT) * STRIDE
      +	array3_size(COUNT, STRIDE, sizeof(TYPE))
        , ...)
      |
        kzalloc(
      -	sizeof(TYPE) * COUNT * (STRIDE)
      +	array3_size(COUNT, STRIDE, sizeof(TYPE))
        , ...)
      |
        kzalloc(
      -	sizeof(TYPE) * COUNT * STRIDE
      +	array3_size(COUNT, STRIDE, sizeof(TYPE))
        , ...)
      |
        kzalloc(
      -	sizeof(THING) * (COUNT) * (STRIDE)
      +	array3_size(COUNT, STRIDE, sizeof(THING))
        , ...)
      |
        kzalloc(
      -	sizeof(THING) * (COUNT) * STRIDE
      +	array3_size(COUNT, STRIDE, sizeof(THING))
        , ...)
      |
        kzalloc(
      -	sizeof(THING) * COUNT * (STRIDE)
      +	array3_size(COUNT, STRIDE, sizeof(THING))
        , ...)
      |
        kzalloc(
      -	sizeof(THING) * COUNT * STRIDE
      +	array3_size(COUNT, STRIDE, sizeof(THING))
        , ...)
      )
      
      // 3-factor product with 2 sizeof(variable), with redundant parens removed.
      @@
      expression THING1, THING2;
      identifier COUNT;
      type TYPE1, TYPE2;
      @@
      
      (
        kzalloc(
      -	sizeof(TYPE1) * sizeof(TYPE2) * COUNT
      +	array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
        , ...)
      |
        kzalloc(
      -	sizeof(TYPE1) * sizeof(THING2) * (COUNT)
      +	array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
        , ...)
      |
        kzalloc(
      -	sizeof(THING1) * sizeof(THING2) * COUNT
      +	array3_size(COUNT, sizeof(THING1), sizeof(THING2))
        , ...)
      |
        kzalloc(
      -	sizeof(THING1) * sizeof(THING2) * (COUNT)
      +	array3_size(COUNT, sizeof(THING1), sizeof(THING2))
        , ...)
      |
        kzalloc(
      -	sizeof(TYPE1) * sizeof(THING2) * COUNT
      +	array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
        , ...)
      |
        kzalloc(
      -	sizeof(TYPE1) * sizeof(THING2) * (COUNT)
      +	array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
        , ...)
      )
      
      // 3-factor product, only identifiers, with redundant parens removed.
      @@
      identifier STRIDE, SIZE, COUNT;
      @@
      
      (
        kzalloc(
      -	(COUNT) * STRIDE * SIZE
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      |
        kzalloc(
      -	COUNT * (STRIDE) * SIZE
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      |
        kzalloc(
      -	COUNT * STRIDE * (SIZE)
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      |
        kzalloc(
      -	(COUNT) * (STRIDE) * SIZE
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      |
        kzalloc(
      -	COUNT * (STRIDE) * (SIZE)
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      |
        kzalloc(
      -	(COUNT) * STRIDE * (SIZE)
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      |
        kzalloc(
      -	(COUNT) * (STRIDE) * (SIZE)
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      |
        kzalloc(
      -	COUNT * STRIDE * SIZE
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      )
      
      // Any remaining multi-factor products, first at least 3-factor products,
      // when they're not all constants...
      @@
      expression E1, E2, E3;
      constant C1, C2, C3;
      @@
      
      (
        kzalloc(C1 * C2 * C3, ...)
      |
        kzalloc(
      -	(E1) * E2 * E3
      +	array3_size(E1, E2, E3)
        , ...)
      |
        kzalloc(
      -	(E1) * (E2) * E3
      +	array3_size(E1, E2, E3)
        , ...)
      |
        kzalloc(
      -	(E1) * (E2) * (E3)
      +	array3_size(E1, E2, E3)
        , ...)
      |
        kzalloc(
      -	E1 * E2 * E3
      +	array3_size(E1, E2, E3)
        , ...)
      )
      
      // And then all remaining 2 factors products when they're not all constants,
      // keeping sizeof() as the second factor argument.
      @@
      expression THING, E1, E2;
      type TYPE;
      constant C1, C2, C3;
      @@
      
      (
        kzalloc(sizeof(THING) * C2, ...)
      |
        kzalloc(sizeof(TYPE) * C2, ...)
      |
        kzalloc(C1 * C2 * C3, ...)
      |
        kzalloc(C1 * C2, ...)
      |
      - kzalloc
      + kcalloc
        (
      -	sizeof(TYPE) * (E2)
      +	E2, sizeof(TYPE)
        , ...)
      |
      - kzalloc
      + kcalloc
        (
      -	sizeof(TYPE) * E2
      +	E2, sizeof(TYPE)
        , ...)
      |
      - kzalloc
      + kcalloc
        (
      -	sizeof(THING) * (E2)
      +	E2, sizeof(THING)
        , ...)
      |
      - kzalloc
      + kcalloc
        (
      -	sizeof(THING) * E2
      +	E2, sizeof(THING)
        , ...)
      |
      - kzalloc
      + kcalloc
        (
      -	(E1) * E2
      +	E1, E2
        , ...)
      |
      - kzalloc
      + kcalloc
        (
      -	(E1) * (E2)
      +	E1, E2
        , ...)
      |
      - kzalloc
      + kcalloc
        (
      -	E1 * E2
      +	E1, E2
        , ...)
      )
      Signed-off-by: NKees Cook <keescook@chromium.org>
      6396bb22
  2. 30 3月, 2018 2 次提交
  3. 08 2月, 2018 1 次提交
    • N
      powerpc/numa: Invalidate numa_cpu_lookup_table on cpu remove · 1d9a0907
      Nathan Fontenot 提交于
      When DLPAR removing a CPU, the unmapping of the cpu from a node in
      unmap_cpu_from_node() should also invalidate the CPUs entry in the
      numa_cpu_lookup_table. There is not a guarantee that on a subsequent
      DLPAR add of the CPU the associativity will be the same and thus
      could be in a different node. Invalidating the entry in the
      numa_cpu_lookup_table causes the associativity to be read from the
      device tree at the time of the add.
      
      The current behavior of not invalidating the CPUs entry in the
      numa_cpu_lookup_table can result in scenarios where the the topology
      layout of CPUs in the partition does not match the device tree
      or the topology reported by the HMC.
      
      This bug looks like it was introduced in 2004 in the commit titled
      "ppc64: cpu hotplug notifier for numa", which is 6b15e4e87e32 in the
      linux-fullhist tree. Hence tag it for all stable releases.
      
      Cc: stable@vger.kernel.org
      Signed-off-by: NNathan Fontenot <nfont@linux.vnet.ibm.com>
      Reviewed-by: NTyrel Datwyler <tyreld@linux.vnet.ibm.com>
      Signed-off-by: NMichael Ellerman <mpe@ellerman.id.au>
      1d9a0907
  4. 27 1月, 2018 3 次提交
    • M
      powerpc/pseries: Fix cpu hotplug crash with memoryless nodes · e67e02a5
      Michael Bringmann 提交于
      On powerpc systems with shared configurations of CPUs and memory and
      memoryless nodes at boot, an event ordering problem was observed on a
      SLES12 build platforms with the hot-add of CPUs to the memoryless
      nodes.
      
      * The most common error occurred when the memory SLAB driver attempted
        to reference the memoryless node to which a CPU was being added
        before the kernel had finished initializing all of the data
        structures for the CPU and exited 'device_online' under
        DLPAR/hot-add.
      
        Normally the memoryless node would be initialized through the call
        path device_online ... arch_update_cpu_topology ... find_cpu_nid ...
        try_online_node. This patch ensures that the powerpc node will be
        initialized as early as possible, even if it was memoryless and
        CPU-less at the point when we are trying to hot-add a new CPU to it.
      Signed-off-by: NMichael Bringmann <mwb@linux.vnet.ibm.com>
      Reviewed-by: NNathan Fontenot <nfont@linux.vnet.ibm.com>
      Signed-off-by: NMichael Ellerman <mpe@ellerman.id.au>
      e67e02a5
    • M
      powerpc/numa: Ensure nodes initialized for hotplug · ea05ba7c
      Michael Bringmann 提交于
      This patch fixes some problems encountered at runtime with
      configurations that support memory-less nodes, or that hot-add CPUs
      into nodes that are memoryless during system execution after boot. The
      problems of interest include:
      
      * Nodes known to powerpc to be memoryless at boot, but to have CPUs in
        them are allowed to be 'possible' and 'online'. Memory allocations
        for those nodes are taken from another node that does have memory
        until and if memory is hot-added to the node.
      
      * Nodes which have no resources assigned at boot, but which may still
        be referenced subsequently by affinity or associativity attributes,
        are kept in the list of 'possible' nodes for powerpc. Hot-add of
        memory or CPUs to the system can reference these nodes and bring
        them online instead of redirecting the references to one of the set
        of nodes known to have memory at boot.
      
      Note that this software operates under the context of CPU hotplug. We
      are not doing memory hotplug in this code, but rather updating the
      kernel's CPU topology (i.e. arch_update_cpu_topology /
      numa_update_cpu_topology). We are initializing a node that may be used
      by CPUs or memory before it can be referenced as invalid by a CPU
      hotplug operation. CPU hotplug operations are protected by a range of
      APIs including cpu_maps_update_begin/cpu_maps_update_done,
      cpus_read/write_lock / cpus_read/write_unlock, device locks, and more.
      Memory hotplug operations, including try_online_node, are protected by
      mem_hotplug_begin/mem_hotplug_done, device locks, and more. In the
      case of CPUs being hot-added to a previously memoryless node, the
      try_online_node operation occurs wholly within the CPU locks with no
      overlap. Using HMC hot-add/hot-remove operations, we have been able to
      add and remove CPUs to any possible node without failures. HMC
      operations involve a degree self-serialization, though.
      Signed-off-by: NMichael Bringmann <mwb@linux.vnet.ibm.com>
      Reviewed-by: NNathan Fontenot <nfont@linux.vnet.ibm.com>
      Signed-off-by: NMichael Ellerman <mpe@ellerman.id.au>
      ea05ba7c
    • M
      powerpc/numa: Use ibm,max-associativity-domains to discover possible nodes · a346137e
      Michael Bringmann 提交于
      On powerpc systems which allow 'hot-add' of CPU or memory resources,
      it may occur that the new resources are to be inserted into nodes that
      were not used for these resources at bootup. In the kernel, any node
      that is used must be defined and initialized. These empty nodes may
      occur when,
      
      * Dedicated vs. shared resources. Shared resources require information
        such as the VPHN hcall for CPU assignment to nodes. Associativity
        decisions made based on dedicated resource rules, such as
        associativity properties in the device tree, may vary from decisions
        made using the values returned by the VPHN hcall.
      
      * memoryless nodes at boot. Nodes need to be defined as 'possible' at
        boot for operation with other code modules. Previously, the powerpc
        code would limit the set of possible nodes to those which have
        memory assigned at boot, and were thus online. Subsequent add/remove
        of CPUs or memory would only work with this subset of possible
        nodes.
      
      * memoryless nodes with CPUs at boot. Due to the previous restriction
        on nodes, nodes that had CPUs but no memory were being collapsed
        into other nodes that did have memory at boot. In practice this
        meant that the node assignment presented by the runtime kernel
        differed from the affinity and associativity attributes presented by
        the device tree or VPHN hcalls. Nodes that might be known to the
        pHyp were not 'possible' in the runtime kernel because they did not
        have memory at boot.
      
      This patch ensures that sufficient nodes are defined to support
      configuration requirements after boot, as well as at boot. This patch
      set fixes a couple of problems.
      
      * Nodes known to powerpc to be memoryless at boot, but to have CPUs in
        them are allowed to be 'possible' and 'online'. Memory allocations
        for those nodes are taken from another node that does have memory
        until and if memory is hot-added to the node. * Nodes which have no
        resources assigned at boot, but which may still be referenced
        subsequently by affinity or associativity attributes, are kept in
        the list of 'possible' nodes for powerpc. Hot-add of memory or CPUs
        to the system can reference these nodes and bring them online
        instead of redirecting to one of the set of nodes that were known to
        have memory at boot.
      
      This patch extracts the value of the lowest domain level (number of
      allocable resources) from the device tree property
      "ibm,max-associativity-domains" to use as the maximum number of nodes
      to setup as possibly available in the system. This new setting will
      override the instruction:
      
          nodes_and(node_possible_map, node_possible_map, node_online_map);
      
      presently seen in the function arch/powerpc/mm/numa.c:initmem_init().
      
      If the "ibm,max-associativity-domains" property is not present at
      boot, no operation will be performed to define or enable additional
      nodes, or enable the above 'nodes_and()'.
      Signed-off-by: NMichael Bringmann <mwb@linux.vnet.ibm.com>
      Reviewed-by: NNathan Fontenot <nfont@linux.vnet.ibm.com>
      Signed-off-by: NMichael Ellerman <mpe@ellerman.id.au>
      a346137e
  5. 16 1月, 2018 4 次提交
  6. 16 10月, 2017 4 次提交
  7. 10 10月, 2017 1 次提交
    • T
      powerpc: Don't call lockdep_assert_cpus_held() from arch_update_cpu_topology() · 6b2c08f9
      Thiago Jung Bauermann 提交于
      It turns out that not all paths calling arch_update_cpu_topology() hold
      cpu_hotplug_lock, but that's OK because those paths can't race with
      any concurrent hotplug events.
      
      Warnings were reported with the following trace:
      
        lockdep_assert_cpus_held
        arch_update_cpu_topology
        sched_init_domains
        sched_init_smp
        kernel_init_freeable
        kernel_init
        ret_from_kernel_thread
      
      Which is safe because it's called early in boot when hotplug is not
      live yet.
      
      And also this trace:
      
        lockdep_assert_cpus_held
        arch_update_cpu_topology
        partition_sched_domains
        cpuset_update_active_cpus
        sched_cpu_deactivate
        cpuhp_invoke_callback
        cpuhp_down_callbacks
        cpuhp_thread_fun
        smpboot_thread_fn
        kthread
        ret_from_kernel_thread
      
      Which is safe because it's called as part of CPU hotplug, so although
      we don't hold the CPU hotplug lock, there is another thread driving
      the CPU hotplug operation which does hold the lock, and there is no
      race.
      
      Thanks to tglx for deciphering it for us.
      
      Fixes: 3e401f7a ("powerpc: Only obtain cpu_hotplug_lock if called by rtasd")
      Signed-off-by: NThiago Jung Bauermann <bauerman@linux.vnet.ibm.com>
      Signed-off-by: NMichael Ellerman <mpe@ellerman.id.au>
      6b2c08f9
  8. 05 10月, 2017 1 次提交
    • K
      timer: Remove init_timer_deferrable() in favor of timer_setup() · df7e828c
      Kees Cook 提交于
      This refactors the only users of init_timer_deferrable() to use
      the new timer_setup() and from_timer(). Removes definition of
      init_timer_deferrable().
      Signed-off-by: NKees Cook <keescook@chromium.org>
      Signed-off-by: NThomas Gleixner <tglx@linutronix.de>
      Acked-by: David S. Miller <davem@davemloft.net> # for networking parts
      Acked-by: Sebastian Reichel <sre@kernel.org> # for drivers/hsi parts
      Cc: linux-mips@linux-mips.org
      Cc: Petr Mladek <pmladek@suse.com>
      Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
      Cc: Lai Jiangshan <jiangshanlai@gmail.com>
      Cc: Oleg Nesterov <oleg@redhat.com>
      Cc: Kalle Valo <kvalo@qca.qualcomm.com>
      Cc: Paul Mackerras <paulus@samba.org>
      Cc: Pavel Machek <pavel@ucw.cz>
      Cc: linux1394-devel@lists.sourceforge.net
      Cc: Chris Metcalf <cmetcalf@mellanox.com>
      Cc: linux-s390@vger.kernel.org
      Cc: "James E.J. Bottomley" <jejb@linux.vnet.ibm.com>
      Cc: Wim Van Sebroeck <wim@iguana.be>
      Cc: Michael Ellerman <mpe@ellerman.id.au>
      Cc: Ursula Braun <ubraun@linux.vnet.ibm.com>
      Cc: Geert Uytterhoeven <geert@linux-m68k.org>
      Cc: Viresh Kumar <viresh.kumar@linaro.org>
      Cc: Harish Patil <harish.patil@cavium.com>
      Cc: Stephen Boyd <sboyd@codeaurora.org>
      Cc: Guenter Roeck <linux@roeck-us.net>
      Cc: Manish Chopra <manish.chopra@cavium.com>
      Cc: Len Brown <len.brown@intel.com>
      Cc: Arnd Bergmann <arnd@arndb.de>
      Cc: linux-pm@vger.kernel.org
      Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
      Cc: Tejun Heo <tj@kernel.org>
      Cc: Julian Wiedmann <jwi@linux.vnet.ibm.com>
      Cc: John Stultz <john.stultz@linaro.org>
      Cc: Mark Gross <mark.gross@intel.com>
      Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
      Cc: linux-watchdog@vger.kernel.org
      Cc: linux-scsi@vger.kernel.org
      Cc: "Martin K. Petersen" <martin.petersen@oracle.com>
      Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
      Cc: linux-wireless@vger.kernel.org
      Cc: Sebastian Reichel <sre@kernel.org>
      Cc: Ralf Baechle <ralf@linux-mips.org>
      Cc: Stefan Richter <stefanr@s5r6.in-berlin.de>
      Cc: Michael Reed <mdr@sgi.com>
      Cc: netdev@vger.kernel.org
      Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
      Cc: Andrew Morton <akpm@linux-foundation.org>
      Cc: linuxppc-dev@lists.ozlabs.org
      Cc: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
      Link: https://lkml.kernel.org/r/1507159627-127660-6-git-send-email-keescook@chromium.org
      df7e828c
  9. 23 6月, 2017 1 次提交
  10. 11 4月, 2017 1 次提交
    • A
      powerpc/mm: Remove reduntant initmem information from log · ea614555
      Anshuman Khandual 提交于
      Generic core VM already prints these information in the log
      buffer, hence there is no need for a second print. This just
      removes the second print from arch powerpc NUMA init path.
      
      Before the patch:
      
        $ dmesg | grep "Initmem"
      
        numa: Initmem setup node 0 [mem 0x00000000-0xffffffff]
        numa: Initmem setup node 1 [mem 0x100000000-0x1ffffffff]
        numa: Initmem setup node 2 [mem 0x200000000-0x2ffffffff]
        numa: Initmem setup node 3 [mem 0x300000000-0x3ffffffff]
        numa: Initmem setup node 4 [mem 0x400000000-0x4ffffffff]
        numa: Initmem setup node 5 [mem 0x500000000-0x5ffffffff]
        numa: Initmem setup node 6 [mem 0x600000000-0x6ffffffff]
        numa: Initmem setup node 7 [mem 0x700000000-0x7ffffffff]
        Initmem setup node 0 [mem 0x0000000000000000-0x00000000ffffffff]
        Initmem setup node 1 [mem 0x0000000100000000-0x00000001ffffffff]
        Initmem setup node 2 [mem 0x0000000200000000-0x00000002ffffffff]
        Initmem setup node 3 [mem 0x0000000300000000-0x00000003ffffffff]
        Initmem setup node 4 [mem 0x0000000400000000-0x00000004ffffffff]
        Initmem setup node 5 [mem 0x0000000500000000-0x00000005ffffffff]
        Initmem setup node 6 [mem 0x0000000600000000-0x00000006ffffffff]
        Initmem setup node 7 [mem 0x0000000700000000-0x00000007ffffffff]
      
      After the patch just the latter set is printed.
      Signed-off-by: NAnshuman Khandual <khandual@linux.vnet.ibm.com>
      Signed-off-by: NMichael Ellerman <mpe@ellerman.id.au>
      ea614555
  11. 10 2月, 2017 1 次提交
  12. 30 1月, 2017 2 次提交
    • R
      powerpc/mm: Allow memory hotplug into an offline node · 2a8628d4
      Reza Arbab 提交于
      Relax the check preventing us from hotplugging into an offline node.
      
      This limitation was added in commit 482ec7c4 ("[PATCH] powerpc numa:
      Support sparse online node map") to prevent adding resources to an
      uninitialized node.
      
      These days, there is no harm in doing so. The addition will actually
      cause the node to be initialized and onlined; add_memory_resource()
      calls hotadd_new_pgdat() (if necessary) and node_set_online().
      Signed-off-by: NReza Arbab <arbab@linux.vnet.ibm.com>
      Signed-off-by: NMichael Ellerman <mpe@ellerman.id.au>
      2a8628d4
    • R
      powerpc/mm: Simplify loop control in parse_numa_properties() · 7656cd8e
      Reza Arbab 提交于
      The flow of the main loop in parse_numa_properties() is overly
      complicated. Simplify it to be less confusing and easier to read.
      No functional change.
      
      The end of the main loop in parse_numa_properties() looks like this:
      
      	for_each_node_by_type(...) {
      		...
      		if (!condition) {
      			if (--ranges)
      				goto new_range;
      			else
      				continue;
      		}
      
      		statement();
      
      		if (--ranges)
      			goto new_range;
      		/* else
      		 *	continue; <- implicit, this is the end of the loop
      		 */
      	}
      
      The only effect of !condition is to skip execution of statement(). This
      can be rewritten in a simpler way:
      
      	for_each_node_by_type(...) {
      		...
      		if (condition)
      			statement();
      
      		if (--ranges)
      			goto new_range;
      	}
      Signed-off-by: NReza Arbab <arbab@linux.vnet.ibm.com>
      Signed-off-by: NMichael Ellerman <mpe@ellerman.id.au>
      7656cd8e
  13. 25 12月, 2016 1 次提交
  14. 13 12月, 2016 1 次提交
    • R
      powerpc/mm: allow memory hotplug into a memoryless node · 4a3bac4e
      Reza Arbab 提交于
      Patch series "enable movable nodes on non-x86 configs", v7.
      
      This patchset allows more configs to make use of movable nodes.  When
      CONFIG_MOVABLE_NODE is selected, there are two ways to introduce such
      nodes into the system:
      
      1. Discover movable nodes at boot. Currently this is only possible on
         x86, but we will enable configs supporting fdt to do the same.
      
      2. Hotplug and online all of a node's memory using online_movable. This
         is already possible on any config supporting memory hotplug, not
         just x86, but the Kconfig doesn't say so. We will fix that.
      
      We'll also remove some cruft on power which would prevent (2).
      
      This patch (of 5):
      
      Remove the check which prevents us from hotplugging into an empty node.
      
      The original commit b226e462 ("[PATCH] powerpc: don't add memory to
      empty node/zone"), states that this was intended to be a temporary measure.
      It is a workaround for an oops which no longer occurs.
      
      Link: http://lkml.kernel.org/r/1479160961-25840-2-git-send-email-arbab@linux.vnet.ibm.comSigned-off-by: NReza Arbab <arbab@linux.vnet.ibm.com>
      Reviewed-by: NAneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
      Acked-by: NBalbir Singh <bsingharora@gmail.com>
      Acked-by: NMichael Ellerman <mpe@ellerman.id.au>
      Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
      Cc: "H. Peter Anvin" <hpa@zytor.com>
      Cc: Alistair Popple <apopple@au1.ibm.com>
      Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
      Cc: Bharata B Rao <bharata@linux.vnet.ibm.com>
      Cc: Frank Rowand <frowand.list@gmail.com>
      Cc: Ingo Molnar <mingo@redhat.com>
      Cc: Nathan Fontenot <nfont@linux.vnet.ibm.com>
      Cc: Paul Mackerras <paulus@samba.org>
      Cc: Rob Herring <robh+dt@kernel.org>
      Cc: Stewart Smith <stewart@linux.vnet.ibm.com>
      Cc: Thomas Gleixner <tglx@linutronix.de>
      Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
      Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
      4a3bac4e
  15. 19 10月, 2016 2 次提交
    • A
      powerpc: Fix numa topology console print · 8467801c
      Aneesh Kumar K.V 提交于
      With recent update to printk, we get console output like below:
      
      [    0.550639] Brought up 160 CPUs
      [    0.550718] Node 0 CPUs:
      [    0.550721]  0
      [    0.550754] -39
      
      [    0.550794] Node 1 CPUs:
      [    0.550798]  40
      [    0.550817] -79
      
      [    0.550856] Node 16 CPUs:
      [    0.550860]  80
      [    0.550880] -119
      
      [    0.550917] Node 17 CPUs:
      [    0.550923]  120
      [    0.550942] -159
      
      Fix this by properly using pr_cont(), ie. KERN_CONT.
      Signed-off-by: NAneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
      Signed-off-by: NMichael Ellerman <mpe@ellerman.id.au>
      8467801c
    • M
      powerpc/mm: Drop dump_numa_memory_topology() · 08b5e79e
      Michael Ellerman 提交于
      At boot we dump the NUMA memory topology in dump_numa_memory_topology(),
      at KERN_DEBUG level, resulting in output like:
      
        Node 0 Memory: 0x0-0x100000000
        Node 1 Memory: 0x100000000-0x200000000
      
      Which is nice enough, but immediately after that we iterate over each
      node and call setup_node_data(), which also prints out the node ranges,
      at KERN_INFO, giving eg:
      
        numa: Initmem setup node 0 [mem 0x00000000-0xffffffff]
        numa: Initmem setup node 1 [mem 0x100000000-0x1ffffffff]
      
      Additionally dump_numa_memory_topology() does not use KERN_CONT
      correctly, resulting in split output lines on recent kernels.
      
      So drop dump_numa_memory_topology() as superfluous chatter.
      Signed-off-by: NMichael Ellerman <mpe@ellerman.id.au>
      Acked-by: NBalbir Singh <bsingharora@gmail.com>
      Signed-off-by: NMichael Ellerman <mpe@ellerman.id.au>
      08b5e79e
  16. 23 7月, 2016 1 次提交
    • S
      powerpc/numa: Convert to hotplug state machine · bdab88e0
      Sebastian Andrzej Siewior 提交于
      Install the callbacks via the state machine. On the boot cpu the callback is
      invoked manually because cpuhp is not up yet and everything must be
      preinitialized before additional CPUs are up.
      Signed-off-by: NSebastian Andrzej Siewior <bigeasy@linutronix.de>
      Cc: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
      Cc: Peter Zijlstra <peterz@infradead.org>
      Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
      Cc: Bharata B Rao <bharata@linux.vnet.ibm.com>
      Cc: Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com>
      Cc: Linus Torvalds <torvalds@linux-foundation.org>
      Cc: Christophe Jaillet <christophe.jaillet@wanadoo.fr>
      Cc: Anton Blanchard <anton@samba.org>
      Cc: Michael Ellerman <mpe@ellerman.id.au>
      Cc: Paul Mackerras <paulus@samba.org>
      Cc: Andrew Morton <akpm@linux-foundation.org>
      Cc: linuxppc-dev@lists.ozlabs.org
      Cc: rt@linutronix.de
      Link: http://lkml.kernel.org/r/20160718140727.GA13132@linutronix.deSigned-off-by: NThomas Gleixner <tglx@linutronix.de>
      bdab88e0
  17. 14 6月, 2016 2 次提交
    • B
      powerpc/numa: Fix multiple bugs in memory_hotplug_max() · 45b64ee6
      Bharata B Rao 提交于
      memory_hotplug_max() uses hot_add_drconf_memory_max() to get maxmimum
      addressable memory by referring to ibm,dyanamic-memory property. There
      are three problems with the current approach:
      
      1 hot_add_drconf_memory_max() assumes that ibm,dynamic-memory includes
        all the LMBs of the guest, but that is not true for PowerKVM which
        populates only DR LMBs (LMBs that can be hotplugged/removed) in that
        property.
      2 hot_add_drconf_memory_max() multiplies lmb-size with lmb-count to arrive
        at the max possible address. Since ibm,dynamic-memory doesn't include
        RMA LMBs, the address thus obtained will be less than the actual max
        address. For example, if max possible memory size is 32G, with lmb-size
        of 256MB there can be 127 LMBs in ibm,dynamic-memory (1 LMB for RMA
        which won't be present here).  hot_add_drconf_memory_max() would then
        return the max addressable memory as 127 * 256MB = 31.75GB, the max
        address should have been 32G which is what ibm,lrdr-capacity shows.
      3 In PowerKVM, there can be a gap between the end of boot time RAM and
        beginning of hotplug RAM area. So just multiplying lmb-count with
        lmb-size will not provide the correct max possible address for PowerKVM.
      
      This patch fixes 1 by using ibm,lrdr-capacity property to return the max
      addressable memory whenever the property is present. Then it fixes 2 & 3
      by fetching the address of the last LMB in ibm,dynamic-memory property.
      
      Fixes: cd34206e ("powerpc: Add memory_hotplug_max()")
      Signed-off-by: NBharata B Rao <bharata@linux.vnet.ibm.com>
      Reviewed-by: NDavid Gibson <david@gibson.dropbear.id.au>
      Signed-off-by: NMichael Ellerman <mpe@ellerman.id.au>
      45b64ee6
    • B
  18. 06 11月, 2015 1 次提交
    • R
      arch/powerpc/mm/numa.c: do not allocate bootmem memory for non existing nodes · c118baf8
      Raghavendra K T 提交于
      With the setup_nr_nodes(), we have already initialized
      node_possible_map.  So it is safe to use for_each_node here.
      
      There are many places in the kernel that use hardcoded 'for' loop with
      nr_node_ids, because all other architectures have numa nodes populated
      serially.  That should be reason we had maintained the same for
      powerpc.
      
      But, since sparse numa node ids possible on powerpc, we unnecessarily
      allocate memory for non existent numa nodes.
      
      For e.g., on a system with 0,1,16,17 as numa nodes nr_node_ids=18 and
      we allocate memory for nodes 2-14.  This patch we allocate memory for
      only existing numa nodes.
      
      The patch is boot tested on a 4 node tuleta, confirming with printks
      that it works as expected.
      Signed-off-by: NRaghavendra K T <raghavendra.kt@linux.vnet.ibm.com>
      Cc: Vladimir Davydov <vdavydov@parallels.com>
      Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
      Cc: Paul Mackerras <paulus@samba.org>
      Cc: Michael Ellerman <mpe@ellerman.id.au>
      Cc: Anton Blanchard <anton@samba.org>
      Cc: Nishanth Aravamudan <nacc@linux.vnet.ibm.com>
      Cc: Greg Kurz <gkurz@linux.vnet.ibm.com>
      Cc: Grant Likely <grant.likely@linaro.org>
      Cc: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
      Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
      Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
      c118baf8
  19. 15 10月, 2015 1 次提交
  20. 18 8月, 2015 1 次提交
    • N
      powerpc/numa: initialize distance lookup table from drconf path · 1d805440
      Nikunj A Dadhania 提交于
      In some situations, a NUMA guest that supports
      ibm,dynamic-memory-reconfiguration node will end up having flat NUMA
      distances between nodes. This is because of two problems in the
      current code.
      
      1) Different representations of associativity lists.
      
         There is an assumption about the associativity list in
         initialize_distance_lookup_table(). Associativity list has two forms:
      
         a) [cpu,memory]@x/ibm,associativity has following
            format:
                 <N> <N integers>
      
         b) ibm,dynamic-reconfiguration-memory/ibm,associativity-lookup-arrays
      
                 <M> <N> <M associativity lists each having N integers>
                 M = the number of associativity lists
                 N = the number of entries per associativity list
      
         Fix initialize_distance_lookup_table() so that it does not assume
         "case a". And update the caller to skip the length field before
         sending the associativity list.
      
      2) Distance table not getting updated from drconf path.
      
         Node distance table will not get initialized in certain cases as
         ibm,dynamic-reconfiguration-memory path does not initialize the
         lookup table.
      
         Call initialize_distance_lookup_table() from drconf path with
         appropriate associativity list.
      Reported-by: NBharata B Rao <bharata@linux.vnet.ibm.com>
      Signed-off-by: NNikunj A Dadhania <nikunj@linux.vnet.ibm.com>
      Acked-by: NAnton Blanchard <anton@samba.org>
      Signed-off-by: NMichael Ellerman <mpe@ellerman.id.au>
      1d805440
  21. 23 3月, 2015 1 次提交
    • N
      powerpc/numa: Reset node_possible_map to only node_online_map · 3af229f2
      Nishanth Aravamudan 提交于
      Raghu noticed an issue with excessive memory allocation on power with a
      simple cgroup test, specifically, in mem_cgroup_css_alloc ->
      for_each_node -> alloc_mem_cgroup_per_zone_info(), which ends up blowing
      up the kmalloc-2048 slab (to the order of 200MB for 400 cgroup
      directories).
      
      The underlying issue is that NODES_SHIFT on power is 8 (256 NUMA nodes
      possible), which defines node_possible_map, which in turn defines the
      value of nr_node_ids in setup_nr_node_ids and the iteration of
      for_each_node.
      
      In practice, we never see a system with 256 NUMA nodes, and in fact, we
      do not support node hotplug on power in the first place, so the nodes
      that are online when we come up are the nodes that will be present for
      the lifetime of this kernel. So let's, at least, drop the NUMA possible
      map down to the online map at runtime. This is similar to what x86 does
      in its initialization routines.
      
      mem_cgroup_css_alloc should also be fixed to only iterate over
      memory-populated nodes and handle hotplug, but that is a separate
      change.
      Signed-off-by: NNishanth Aravamudan <nacc@linux.vnet.ibm.com>
      Cc: Tejun Heo <tj@kernel.org>
      Cc: David Rientjes <rientjes@google.com>
      Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
      Cc: Paul Mackerras <paulus@samba.org>
      Cc: Anton Blanchard <anton@samba.org>
      Cc: Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com>
      Signed-off-by: NMichael Ellerman <mpe@ellerman.id.au>
      3af229f2
  22. 18 3月, 2015 3 次提交
  23. 25 11月, 2014 1 次提交
    • G
      of/reconfig: Always use the same structure for notifiers · f5242e5a
      Grant Likely 提交于
      The OF_RECONFIG notifier callback uses a different structure depending
      on whether it is a node change or a property change. This is silly, and
      not very safe. Rework the code to use the same data structure regardless
      of the type of notifier.
      Signed-off-by: NGrant Likely <grant.likely@linaro.org>
      Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
      Cc: Rob Herring <robh+dt@kernel.org>
      Cc: Pantelis Antoniou <pantelis.antoniou@konsulko.com>
      Cc: <linuxppc-dev@lists.ozlabs.org>
      f5242e5a
  24. 10 11月, 2014 2 次提交
  25. 29 10月, 2014 1 次提交