1. 05 6月, 2019 1 次提交
  2. 22 3月, 2019 1 次提交
  3. 25 9月, 2018 1 次提交
  4. 22 11月, 2017 1 次提交
    • K
      treewide: setup_timer() -> timer_setup() · e99e88a9
      Kees Cook 提交于
      This converts all remaining cases of the old setup_timer() API into using
      timer_setup(), where the callback argument is the structure already
      holding the struct timer_list. These should have no behavioral changes,
      since they just change which pointer is passed into the callback with
      the same available pointers after conversion. It handles the following
      examples, in addition to some other variations.
      
      Casting from unsigned long:
      
          void my_callback(unsigned long data)
          {
              struct something *ptr = (struct something *)data;
          ...
          }
          ...
          setup_timer(&ptr->my_timer, my_callback, ptr);
      
      and forced object casts:
      
          void my_callback(struct something *ptr)
          {
          ...
          }
          ...
          setup_timer(&ptr->my_timer, my_callback, (unsigned long)ptr);
      
      become:
      
          void my_callback(struct timer_list *t)
          {
              struct something *ptr = from_timer(ptr, t, my_timer);
          ...
          }
          ...
          timer_setup(&ptr->my_timer, my_callback, 0);
      
      Direct function assignments:
      
          void my_callback(unsigned long data)
          {
              struct something *ptr = (struct something *)data;
          ...
          }
          ...
          ptr->my_timer.function = my_callback;
      
      have a temporary cast added, along with converting the args:
      
          void my_callback(struct timer_list *t)
          {
              struct something *ptr = from_timer(ptr, t, my_timer);
          ...
          }
          ...
          ptr->my_timer.function = (TIMER_FUNC_TYPE)my_callback;
      
      And finally, callbacks without a data assignment:
      
          void my_callback(unsigned long data)
          {
          ...
          }
          ...
          setup_timer(&ptr->my_timer, my_callback, 0);
      
      have their argument renamed to verify they're unused during conversion:
      
          void my_callback(struct timer_list *unused)
          {
          ...
          }
          ...
          timer_setup(&ptr->my_timer, my_callback, 0);
      
      The conversion is done with the following Coccinelle script:
      
      spatch --very-quiet --all-includes --include-headers \
      	-I ./arch/x86/include -I ./arch/x86/include/generated \
      	-I ./include -I ./arch/x86/include/uapi \
      	-I ./arch/x86/include/generated/uapi -I ./include/uapi \
      	-I ./include/generated/uapi --include ./include/linux/kconfig.h \
      	--dir . \
      	--cocci-file ~/src/data/timer_setup.cocci
      
      @fix_address_of@
      expression e;
      @@
      
       setup_timer(
      -&(e)
      +&e
       , ...)
      
      // Update any raw setup_timer() usages that have a NULL callback, but
      // would otherwise match change_timer_function_usage, since the latter
      // will update all function assignments done in the face of a NULL
      // function initialization in setup_timer().
      @change_timer_function_usage_NULL@
      expression _E;
      identifier _timer;
      type _cast_data;
      @@
      
      (
      -setup_timer(&_E->_timer, NULL, _E);
      +timer_setup(&_E->_timer, NULL, 0);
      |
      -setup_timer(&_E->_timer, NULL, (_cast_data)_E);
      +timer_setup(&_E->_timer, NULL, 0);
      |
      -setup_timer(&_E._timer, NULL, &_E);
      +timer_setup(&_E._timer, NULL, 0);
      |
      -setup_timer(&_E._timer, NULL, (_cast_data)&_E);
      +timer_setup(&_E._timer, NULL, 0);
      )
      
      @change_timer_function_usage@
      expression _E;
      identifier _timer;
      struct timer_list _stl;
      identifier _callback;
      type _cast_func, _cast_data;
      @@
      
      (
      -setup_timer(&_E->_timer, _callback, _E);
      +timer_setup(&_E->_timer, _callback, 0);
      |
      -setup_timer(&_E->_timer, &_callback, _E);
      +timer_setup(&_E->_timer, _callback, 0);
      |
      -setup_timer(&_E->_timer, _callback, (_cast_data)_E);
      +timer_setup(&_E->_timer, _callback, 0);
      |
      -setup_timer(&_E->_timer, &_callback, (_cast_data)_E);
      +timer_setup(&_E->_timer, _callback, 0);
      |
      -setup_timer(&_E->_timer, (_cast_func)_callback, _E);
      +timer_setup(&_E->_timer, _callback, 0);
      |
      -setup_timer(&_E->_timer, (_cast_func)&_callback, _E);
      +timer_setup(&_E->_timer, _callback, 0);
      |
      -setup_timer(&_E->_timer, (_cast_func)_callback, (_cast_data)_E);
      +timer_setup(&_E->_timer, _callback, 0);
      |
      -setup_timer(&_E->_timer, (_cast_func)&_callback, (_cast_data)_E);
      +timer_setup(&_E->_timer, _callback, 0);
      |
      -setup_timer(&_E._timer, _callback, (_cast_data)_E);
      +timer_setup(&_E._timer, _callback, 0);
      |
      -setup_timer(&_E._timer, _callback, (_cast_data)&_E);
      +timer_setup(&_E._timer, _callback, 0);
      |
      -setup_timer(&_E._timer, &_callback, (_cast_data)_E);
      +timer_setup(&_E._timer, _callback, 0);
      |
      -setup_timer(&_E._timer, &_callback, (_cast_data)&_E);
      +timer_setup(&_E._timer, _callback, 0);
      |
      -setup_timer(&_E._timer, (_cast_func)_callback, (_cast_data)_E);
      +timer_setup(&_E._timer, _callback, 0);
      |
      -setup_timer(&_E._timer, (_cast_func)_callback, (_cast_data)&_E);
      +timer_setup(&_E._timer, _callback, 0);
      |
      -setup_timer(&_E._timer, (_cast_func)&_callback, (_cast_data)_E);
      +timer_setup(&_E._timer, _callback, 0);
      |
      -setup_timer(&_E._timer, (_cast_func)&_callback, (_cast_data)&_E);
      +timer_setup(&_E._timer, _callback, 0);
      |
       _E->_timer@_stl.function = _callback;
      |
       _E->_timer@_stl.function = &_callback;
      |
       _E->_timer@_stl.function = (_cast_func)_callback;
      |
       _E->_timer@_stl.function = (_cast_func)&_callback;
      |
       _E._timer@_stl.function = _callback;
      |
       _E._timer@_stl.function = &_callback;
      |
       _E._timer@_stl.function = (_cast_func)_callback;
      |
       _E._timer@_stl.function = (_cast_func)&_callback;
      )
      
      // callback(unsigned long arg)
      @change_callback_handle_cast
       depends on change_timer_function_usage@
      identifier change_timer_function_usage._callback;
      identifier change_timer_function_usage._timer;
      type _origtype;
      identifier _origarg;
      type _handletype;
      identifier _handle;
      @@
      
       void _callback(
      -_origtype _origarg
      +struct timer_list *t
       )
       {
      (
      	... when != _origarg
      	_handletype *_handle =
      -(_handletype *)_origarg;
      +from_timer(_handle, t, _timer);
      	... when != _origarg
      |
      	... when != _origarg
      	_handletype *_handle =
      -(void *)_origarg;
      +from_timer(_handle, t, _timer);
      	... when != _origarg
      |
      	... when != _origarg
      	_handletype *_handle;
      	... when != _handle
      	_handle =
      -(_handletype *)_origarg;
      +from_timer(_handle, t, _timer);
      	... when != _origarg
      |
      	... when != _origarg
      	_handletype *_handle;
      	... when != _handle
      	_handle =
      -(void *)_origarg;
      +from_timer(_handle, t, _timer);
      	... when != _origarg
      )
       }
      
      // callback(unsigned long arg) without existing variable
      @change_callback_handle_cast_no_arg
       depends on change_timer_function_usage &&
                           !change_callback_handle_cast@
      identifier change_timer_function_usage._callback;
      identifier change_timer_function_usage._timer;
      type _origtype;
      identifier _origarg;
      type _handletype;
      @@
      
       void _callback(
      -_origtype _origarg
      +struct timer_list *t
       )
       {
      +	_handletype *_origarg = from_timer(_origarg, t, _timer);
      +
      	... when != _origarg
      -	(_handletype *)_origarg
      +	_origarg
      	... when != _origarg
       }
      
      // Avoid already converted callbacks.
      @match_callback_converted
       depends on change_timer_function_usage &&
                  !change_callback_handle_cast &&
      	    !change_callback_handle_cast_no_arg@
      identifier change_timer_function_usage._callback;
      identifier t;
      @@
      
       void _callback(struct timer_list *t)
       { ... }
      
      // callback(struct something *handle)
      @change_callback_handle_arg
       depends on change_timer_function_usage &&
      	    !match_callback_converted &&
                  !change_callback_handle_cast &&
                  !change_callback_handle_cast_no_arg@
      identifier change_timer_function_usage._callback;
      identifier change_timer_function_usage._timer;
      type _handletype;
      identifier _handle;
      @@
      
       void _callback(
      -_handletype *_handle
      +struct timer_list *t
       )
       {
      +	_handletype *_handle = from_timer(_handle, t, _timer);
      	...
       }
      
      // If change_callback_handle_arg ran on an empty function, remove
      // the added handler.
      @unchange_callback_handle_arg
       depends on change_timer_function_usage &&
      	    change_callback_handle_arg@
      identifier change_timer_function_usage._callback;
      identifier change_timer_function_usage._timer;
      type _handletype;
      identifier _handle;
      identifier t;
      @@
      
       void _callback(struct timer_list *t)
       {
      -	_handletype *_handle = from_timer(_handle, t, _timer);
       }
      
      // We only want to refactor the setup_timer() data argument if we've found
      // the matching callback. This undoes changes in change_timer_function_usage.
      @unchange_timer_function_usage
       depends on change_timer_function_usage &&
                  !change_callback_handle_cast &&
                  !change_callback_handle_cast_no_arg &&
      	    !change_callback_handle_arg@
      expression change_timer_function_usage._E;
      identifier change_timer_function_usage._timer;
      identifier change_timer_function_usage._callback;
      type change_timer_function_usage._cast_data;
      @@
      
      (
      -timer_setup(&_E->_timer, _callback, 0);
      +setup_timer(&_E->_timer, _callback, (_cast_data)_E);
      |
      -timer_setup(&_E._timer, _callback, 0);
      +setup_timer(&_E._timer, _callback, (_cast_data)&_E);
      )
      
      // If we fixed a callback from a .function assignment, fix the
      // assignment cast now.
      @change_timer_function_assignment
       depends on change_timer_function_usage &&
                  (change_callback_handle_cast ||
                   change_callback_handle_cast_no_arg ||
                   change_callback_handle_arg)@
      expression change_timer_function_usage._E;
      identifier change_timer_function_usage._timer;
      identifier change_timer_function_usage._callback;
      type _cast_func;
      typedef TIMER_FUNC_TYPE;
      @@
      
      (
       _E->_timer.function =
      -_callback
      +(TIMER_FUNC_TYPE)_callback
       ;
      |
       _E->_timer.function =
      -&_callback
      +(TIMER_FUNC_TYPE)_callback
       ;
      |
       _E->_timer.function =
      -(_cast_func)_callback;
      +(TIMER_FUNC_TYPE)_callback
       ;
      |
       _E->_timer.function =
      -(_cast_func)&_callback
      +(TIMER_FUNC_TYPE)_callback
       ;
      |
       _E._timer.function =
      -_callback
      +(TIMER_FUNC_TYPE)_callback
       ;
      |
       _E._timer.function =
      -&_callback;
      +(TIMER_FUNC_TYPE)_callback
       ;
      |
       _E._timer.function =
      -(_cast_func)_callback
      +(TIMER_FUNC_TYPE)_callback
       ;
      |
       _E._timer.function =
      -(_cast_func)&_callback
      +(TIMER_FUNC_TYPE)_callback
       ;
      )
      
      // Sometimes timer functions are called directly. Replace matched args.
      @change_timer_function_calls
       depends on change_timer_function_usage &&
                  (change_callback_handle_cast ||
                   change_callback_handle_cast_no_arg ||
                   change_callback_handle_arg)@
      expression _E;
      identifier change_timer_function_usage._timer;
      identifier change_timer_function_usage._callback;
      type _cast_data;
      @@
      
       _callback(
      (
      -(_cast_data)_E
      +&_E->_timer
      |
      -(_cast_data)&_E
      +&_E._timer
      |
      -_E
      +&_E->_timer
      )
       )
      
      // If a timer has been configured without a data argument, it can be
      // converted without regard to the callback argument, since it is unused.
      @match_timer_function_unused_data@
      expression _E;
      identifier _timer;
      identifier _callback;
      @@
      
      (
      -setup_timer(&_E->_timer, _callback, 0);
      +timer_setup(&_E->_timer, _callback, 0);
      |
      -setup_timer(&_E->_timer, _callback, 0L);
      +timer_setup(&_E->_timer, _callback, 0);
      |
      -setup_timer(&_E->_timer, _callback, 0UL);
      +timer_setup(&_E->_timer, _callback, 0);
      |
      -setup_timer(&_E._timer, _callback, 0);
      +timer_setup(&_E._timer, _callback, 0);
      |
      -setup_timer(&_E._timer, _callback, 0L);
      +timer_setup(&_E._timer, _callback, 0);
      |
      -setup_timer(&_E._timer, _callback, 0UL);
      +timer_setup(&_E._timer, _callback, 0);
      |
      -setup_timer(&_timer, _callback, 0);
      +timer_setup(&_timer, _callback, 0);
      |
      -setup_timer(&_timer, _callback, 0L);
      +timer_setup(&_timer, _callback, 0);
      |
      -setup_timer(&_timer, _callback, 0UL);
      +timer_setup(&_timer, _callback, 0);
      |
      -setup_timer(_timer, _callback, 0);
      +timer_setup(_timer, _callback, 0);
      |
      -setup_timer(_timer, _callback, 0L);
      +timer_setup(_timer, _callback, 0);
      |
      -setup_timer(_timer, _callback, 0UL);
      +timer_setup(_timer, _callback, 0);
      )
      
      @change_callback_unused_data
       depends on match_timer_function_unused_data@
      identifier match_timer_function_unused_data._callback;
      type _origtype;
      identifier _origarg;
      @@
      
       void _callback(
      -_origtype _origarg
      +struct timer_list *unused
       )
       {
      	... when != _origarg
       }
      Signed-off-by: NKees Cook <keescook@chromium.org>
      e99e88a9
  5. 07 11月, 2017 1 次提交
  6. 12 10月, 2017 1 次提交
    • T
      iommu/iova: Make rcache flush optional on IOVA allocation failure · 538d5b33
      Tomasz Nowicki 提交于
      Since IOVA allocation failure is not unusual case we need to flush
      CPUs' rcache in hope we will succeed in next round.
      
      However, it is useful to decide whether we need rcache flush step because
      of two reasons:
      - Not scalability. On large system with ~100 CPUs iterating and flushing
        rcache for each CPU becomes serious bottleneck so we may want to defer it.
      - free_cpu_cached_iovas() does not care about max PFN we are interested in.
        Thus we may flush our rcaches and still get no new IOVA like in the
        commonly used scenario:
      
          if (dma_limit > DMA_BIT_MASK(32) && dev_is_pci(dev))
              iova = alloc_iova_fast(iovad, iova_len, DMA_BIT_MASK(32) >> shift);
      
          if (!iova)
              iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift);
      
         1. First alloc_iova_fast() call is limited to DMA_BIT_MASK(32) to get
            PCI devices a SAC address
         2. alloc_iova() fails due to full 32-bit space
         3. rcaches contain PFNs out of 32-bit space so free_cpu_cached_iovas()
            throws entries away for nothing and alloc_iova() fails again
         4. Next alloc_iova_fast() call cannot take advantage of rcache since we
            have just defeated caches. In this case we pick the slowest option
            to proceed.
      
      This patch reworks flushed_rcache local flag to be additional function
      argument instead and control rcache flush step. Also, it updates all users
      to do the flush as the last chance.
      Signed-off-by: NTomasz Nowicki <Tomasz.Nowicki@caviumnetworks.com>
      Reviewed-by: NRobin Murphy <robin.murphy@arm.com>
      Tested-by: NNate Watterson <nwatters@codeaurora.org>
      Signed-off-by: NJoerg Roedel <jroedel@suse.de>
      538d5b33
  7. 02 10月, 2017 1 次提交
  8. 28 9月, 2017 3 次提交
    • R
      iommu/iova: Try harder to allocate from rcache magazine · e8b19840
      Robin Murphy 提交于
      When devices with different DMA masks are using the same domain, or for
      PCI devices where we usually try a speculative 32-bit allocation first,
      there is a fair possibility that the top PFN of the rcache stack at any
      given time may be unsuitable for the lower limit, prompting a fallback
      to allocating anew from the rbtree. Consequently, we may end up
      artifically increasing pressure on the 32-bit IOVA space as unused IOVAs
      accumulate lower down in the rcache stacks, while callers with 32-bit
      masks also impose unnecessary rbtree overhead.
      
      In such cases, let's try a bit harder to satisfy the allocation locally
      first - scanning the whole stack should still be relatively inexpensive.
      Signed-off-by: NRobin Murphy <robin.murphy@arm.com>
      Signed-off-by: NJoerg Roedel <jroedel@suse.de>
      e8b19840
    • R
      iommu/iova: Make rcache limit_pfn handling more robust · b826ee9a
      Robin Murphy 提交于
      When popping a pfn from an rcache, we are currently checking it directly
      against limit_pfn for viability. Since this represents iova->pfn_lo, it
      is technically possible for the corresponding iova->pfn_hi to be greater
      than limit_pfn. Although we generally get away with it in practice since
      limit_pfn is typically a power-of-two boundary and the IOVAs are
      size-aligned, it's pretty trivial to make the iova_rcache_get() path
      take the allocation size into account for complete safety.
      Signed-off-by: NRobin Murphy <robin.murphy@arm.com>
      Signed-off-by: NJoerg Roedel <jroedel@suse.de>
      b826ee9a
    • R
      iommu/iova: Simplify domain destruction · 7595dc58
      Robin Murphy 提交于
      All put_iova_domain() should have to worry about is freeing memory - by
      that point the domain must no longer be live, so the act of cleaning up
      doesn't need to be concurrency-safe or maintain the rbtree in a
      self-consistent state. There's no need to waste time with locking or
      emptying the rcache magazines, and we can just use the postorder
      traversal helper to clear out the remaining rbtree entries in-place.
      Signed-off-by: NRobin Murphy <robin.murphy@arm.com>
      Signed-off-by: NJoerg Roedel <jroedel@suse.de>
      7595dc58
  9. 27 9月, 2017 6 次提交
  10. 16 8月, 2017 5 次提交
    • J
      iommu/iova: Add flush timer · 9a005a80
      Joerg Roedel 提交于
      Add a timer to flush entries from the Flush-Queues every
      10ms. This makes sure that no stale TLB entries remain for
      too long after an IOVA has been unmapped.
      Signed-off-by: NJoerg Roedel <jroedel@suse.de>
      9a005a80
    • J
      iommu/iova: Add locking to Flush-Queues · 8109c2a2
      Joerg Roedel 提交于
      The lock is taken from the same CPU most of the time. But
      having it allows to flush the queue also from another CPU if
      necessary.
      
      This will be used by a timer to regularily flush any pending
      IOVAs from the Flush-Queues.
      Signed-off-by: NJoerg Roedel <jroedel@suse.de>
      8109c2a2
    • J
      iommu/iova: Add flush counters to Flush-Queue implementation · fb418dab
      Joerg Roedel 提交于
      There are two counters:
      
      	* fq_flush_start_cnt  - Increased when a TLB flush
      	                        is started.
      
      	* fq_flush_finish_cnt - Increased when a TLB flush
      				is finished.
      
      The fq_flush_start_cnt is assigned to every Flush-Queue
      entry on its creation. When freeing entries from the
      Flush-Queue, the value in the entry is compared to the
      fq_flush_finish_cnt. The entry can only be freed when its
      value is less than the value of fq_flush_finish_cnt.
      
      The reason for these counters it to take advantage of IOMMU
      TLB flushes that happened on other CPUs. These already
      flushed the TLB for Flush-Queue entries on other CPUs so
      that they can already be freed without flushing the TLB
      again.
      
      This makes it less likely that the Flush-Queue is full and
      saves IOMMU TLB flushes.
      Signed-off-by: NJoerg Roedel <jroedel@suse.de>
      fb418dab
    • J
      iommu/iova: Implement Flush-Queue ring buffer · 19282101
      Joerg Roedel 提交于
      Add a function to add entries to the Flush-Queue ring
      buffer. If the buffer is full, call the flush-callback and
      free the entries.
      Signed-off-by: NJoerg Roedel <jroedel@suse.de>
      19282101
    • J
      iommu/iova: Add flush-queue data structures · 42f87e71
      Joerg Roedel 提交于
      This patch adds the basic data-structures to implement
      flush-queues in the generic IOVA code. It also adds the
      initialization and destroy routines for these data
      structures.
      
      The initialization routine is designed so that the use of
      this feature is optional for the users of IOVA code.
      Signed-off-by: NJoerg Roedel <jroedel@suse.de>
      42f87e71
  11. 28 6月, 2017 1 次提交
    • S
      iommu/iova: Don't disable preempt around this_cpu_ptr() · aaffaa8a
      Sebastian Andrzej Siewior 提交于
      Commit 583248e6 ("iommu/iova: Disable preemption around use of
      this_cpu_ptr()") disables preemption while accessing a per-CPU variable.
      This does keep lockdep quiet. However I don't see the point why it is
      bad if we get migrated after its access to another CPU.
      __iova_rcache_insert() and __iova_rcache_get() immediately locks the
      variable after obtaining it - before accessing its members.
      _If_ we get migrated away after retrieving the address of cpu_rcache
      before taking the lock then the *other* task on the same CPU will
      retrieve the same address of cpu_rcache and will spin on the lock.
      
      alloc_iova_fast() disables preemption while invoking
      free_cpu_cached_iovas() on each CPU. The function itself uses
      per_cpu_ptr() which does not trigger a warning (like this_cpu_ptr()
      does). It _could_ make sense to use get_online_cpus() instead but the we
      have a hotplug notifier for CPU down (and none for up) so we are good.
      
      Cc: Joerg Roedel <joro@8bytes.org>
      Cc: iommu@lists.linux-foundation.org
      Cc: Andrew Morton <akpm@linux-foundation.org>
      Signed-off-by: NSebastian Andrzej Siewior <bigeasy@linutronix.de>
      Signed-off-by: NJoerg Roedel <jroedel@suse.de>
      aaffaa8a
  12. 17 5月, 2017 1 次提交
    • R
      iommu/iova: Sort out rbtree limit_pfn handling · 757c370f
      Robin Murphy 提交于
      When walking the rbtree, the fact that iovad->start_pfn and limit_pfn
      are both inclusive limits creates an ambiguity once limit_pfn reaches
      the bottom of the address space and they overlap. Commit 5016bdb7
      ("iommu/iova: Fix underflow bug in __alloc_and_insert_iova_range") fixed
      the worst side-effect of this, that of underflow wraparound leading to
      bogus allocations, but the remaining fallout is that any attempt to
      allocate start_pfn itself erroneously fails.
      
      The cleanest way to resolve the ambiguity is to simply make limit_pfn an
      exclusive limit when inside the guts of the rbtree. Since we're working
      with PFNs, representing one past the top of the address space is always
      possible without fear of overflow, and elsewhere it just makes life a
      little more straightforward.
      Reported-by: NAaron Sierra <asierra@xes-inc.com>
      Signed-off-by: NRobin Murphy <robin.murphy@arm.com>
      Signed-off-by: NJoerg Roedel <jroedel@suse.de>
      757c370f
  13. 07 4月, 2017 1 次提交
    • N
      iommu/iova: Fix underflow bug in __alloc_and_insert_iova_range · 5016bdb7
      Nate Watterson 提交于
      Normally, calling alloc_iova() using an iova_domain with insufficient
      pfns remaining between start_pfn and dma_limit will fail and return a
      NULL pointer. Unexpectedly, if such a "full" iova_domain contains an
      iova with pfn_lo == 0, the alloc_iova() call will instead succeed and
      return an iova containing invalid pfns.
      
      This is caused by an underflow bug in __alloc_and_insert_iova_range()
      that occurs after walking the "full" iova tree when the search ends
      at the iova with pfn_lo == 0 and limit_pfn is then adjusted to be just
      below that (-1). This (now huge) limit_pfn gives the impression that a
      vast amount of space is available between it and start_pfn and thus
      a new iova is allocated with the invalid pfn_hi value, 0xFFF.... .
      
      To rememdy this, a check is introduced to ensure that adjustments to
      limit_pfn will not underflow.
      
      This issue has been observed in the wild, and is easily reproduced with
      the following sample code.
      
      	struct iova_domain *iovad = kzalloc(sizeof(*iovad), GFP_KERNEL);
      	struct iova *rsvd_iova, *good_iova, *bad_iova;
      	unsigned long limit_pfn = 3;
      	unsigned long start_pfn = 1;
      	unsigned long va_size = 2;
      
      	init_iova_domain(iovad, SZ_4K, start_pfn, limit_pfn);
      	rsvd_iova = reserve_iova(iovad, 0, 0);
      	good_iova = alloc_iova(iovad, va_size, limit_pfn, true);
      	bad_iova = alloc_iova(iovad, va_size, limit_pfn, true);
      
      Prior to the patch, this yielded:
      	*rsvd_iova == {0, 0}   /* Expected */
      	*good_iova == {2, 3}   /* Expected */
      	*bad_iova  == {-2, -1} /* Oh no... */
      
      After the patch, bad_iova is NULL as expected since inadequate
      space remains between limit_pfn and start_pfn after allocating
      good_iova.
      Signed-off-by: NNate Watterson <nwatters@codeaurora.org>
      Signed-off-by: NJoerg Roedel <jroedel@suse.de>
      5016bdb7
  14. 21 3月, 2017 1 次提交
  15. 04 1月, 2017 1 次提交
  16. 15 11月, 2016 1 次提交
    • R
      iommu/iova: Extend cached node lookup condition · 62280cf2
      Robin Murphy 提交于
      When searching for a free IOVA range, we optimise the tree traversal
      by starting from the cached32_node, instead of the last node, when
      limit_pfn is equal to dma_32bit_pfn. However, if limit_pfn happens to
      be smaller, then we'll go ahead and start from the top even though
      dma_32bit_pfn is still a more suitable upper bound. Since this is
      clearly a silly thing to do, adjust the lookup condition appropriately.
      Signed-off-by: NRobin Murphy <robin.murphy@arm.com>
      Signed-off-by: NJoerg Roedel <jroedel@suse.de>
      62280cf2
  17. 27 6月, 2016 1 次提交
    • C
      iommu/iova: Disable preemption around use of this_cpu_ptr() · 583248e6
      Chris Wilson 提交于
      Between acquiring the this_cpu_ptr() and using it, ideally we don't want
      to be preempted and work on another CPU's private data. this_cpu_ptr()
      checks whether or not preemption is disable, and get_cpu_ptr() provides
      a convenient wrapper for operating on the cpu ptr inside a preemption
      disabled critical section (which currently is provided by the
      spinlock).
      
      [  167.997877] BUG: using smp_processor_id() in preemptible [00000000] code: usb-storage/216
      [  167.997940] caller is debug_smp_processor_id+0x17/0x20
      [  167.997945] CPU: 7 PID: 216 Comm: usb-storage Tainted: G     U          4.7.0-rc1-gfxbench-RO_Patchwork_1057+ #1
      [  167.997948] Hardware name: Hewlett-Packard HP Pro 3500 Series/2ABF, BIOS 8.11 10/24/2012
      [  167.997951]  0000000000000000 ffff880118b7f9c8 ffffffff8140dca5 0000000000000007
      [  167.997958]  ffffffff81a3a7e9 ffff880118b7f9f8 ffffffff8142a927 0000000000000000
      [  167.997965]  ffff8800d499ed58 0000000000000001 00000000000fffff ffff880118b7fa08
      [  167.997971] Call Trace:
      [  167.997977]  [<ffffffff8140dca5>] dump_stack+0x67/0x92
      [  167.997981]  [<ffffffff8142a927>] check_preemption_disabled+0xd7/0xe0
      [  167.997985]  [<ffffffff8142a947>] debug_smp_processor_id+0x17/0x20
      [  167.997990]  [<ffffffff81507e17>] alloc_iova_fast+0xb7/0x210
      [  167.997994]  [<ffffffff8150c55f>] intel_alloc_iova+0x7f/0xd0
      [  167.997998]  [<ffffffff8151021d>] intel_map_sg+0xbd/0x240
      [  167.998002]  [<ffffffff810e5efd>] ? debug_lockdep_rcu_enabled+0x1d/0x20
      [  167.998009]  [<ffffffff81596059>] usb_hcd_map_urb_for_dma+0x4b9/0x5a0
      [  167.998013]  [<ffffffff81596d19>] usb_hcd_submit_urb+0xe9/0xaa0
      [  167.998017]  [<ffffffff810cff2f>] ? mark_held_locks+0x6f/0xa0
      [  167.998022]  [<ffffffff810d525c>] ? __raw_spin_lock_init+0x1c/0x50
      [  167.998025]  [<ffffffff810e5efd>] ? debug_lockdep_rcu_enabled+0x1d/0x20
      [  167.998028]  [<ffffffff815988f3>] usb_submit_urb+0x3f3/0x5a0
      [  167.998032]  [<ffffffff810d0082>] ? trace_hardirqs_on_caller+0x122/0x1b0
      [  167.998035]  [<ffffffff81599ae7>] usb_sg_wait+0x67/0x150
      [  167.998039]  [<ffffffff815dc202>] usb_stor_bulk_transfer_sglist.part.3+0x82/0xd0
      [  167.998042]  [<ffffffff815dc29c>] usb_stor_bulk_srb+0x4c/0x60
      [  167.998045]  [<ffffffff815dc42e>] usb_stor_Bulk_transport+0x17e/0x420
      [  167.998049]  [<ffffffff815dcf32>] usb_stor_invoke_transport+0x242/0x540
      [  167.998052]  [<ffffffff810e5efd>] ? debug_lockdep_rcu_enabled+0x1d/0x20
      [  167.998058]  [<ffffffff815dba19>] usb_stor_transparent_scsi_command+0x9/0x10
      [  167.998061]  [<ffffffff815de518>] usb_stor_control_thread+0x158/0x260
      [  167.998064]  [<ffffffff815de3c0>] ? fill_inquiry_response+0x20/0x20
      [  167.998067]  [<ffffffff815de3c0>] ? fill_inquiry_response+0x20/0x20
      [  167.998071]  [<ffffffff8109ddfa>] kthread+0xea/0x100
      [  167.998078]  [<ffffffff817ac6af>] ret_from_fork+0x1f/0x40
      [  167.998081]  [<ffffffff8109dd10>] ? kthread_create_on_node+0x1f0/0x1f0
      
      Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96293Signed-off-by: NChris Wilson <chris@chris-wilson.co.uk>
      Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
      Cc: Joerg Roedel <joro@8bytes.org>
      Cc: iommu@lists.linux-foundation.org
      Cc: linux-kernel@vger.kernel.org
      Fixes: 9257b4a2 ('iommu/iova: introduce per-cpu caching to iova allocation')
      Signed-off-by: NJoerg Roedel <jroedel@suse.de>
      583248e6
  18. 21 4月, 2016 1 次提交
    • O
      iommu/iova: introduce per-cpu caching to iova allocation · 9257b4a2
      Omer Peleg 提交于
      IOVA allocation has two problems that impede high-throughput I/O.
      First, it can do a linear search over the allocated IOVA ranges.
      Second, the rbtree spinlock that serializes IOVA allocations becomes
      contended.
      
      Address these problems by creating an API for caching allocated IOVA
      ranges, so that the IOVA allocator isn't accessed frequently.  This
      patch adds a per-CPU cache, from which CPUs can alloc/free IOVAs
      without taking the rbtree spinlock.  The per-CPU caches are backed by
      a global cache, to avoid invoking the (linear-time) IOVA allocator
      without needing to make the per-CPU cache size excessive.  This design
      is based on magazines, as described in "Magazines and Vmem: Extending
      the Slab Allocator to Many CPUs and Arbitrary Resources" (currently
      available at https://www.usenix.org/legacy/event/usenix01/bonwick.html)
      
      Adding caching on top of the existing rbtree allocator maintains the
      property that IOVAs are densely packed in the IO virtual address space,
      which is important for keeping IOMMU page table usage low.
      
      To keep the cache size reasonable, we bound the IOVA space a CPU can
      cache by 32 MiB (we cache a bounded number of IOVA ranges, and only
      ranges of size <= 128 KiB).  The shared global cache is bounded at
      4 MiB of IOVA space.
      Signed-off-by: NOmer Peleg <omer@cs.technion.ac.il>
      [mad@cs.technion.ac.il: rebased, cleaned up and reworded the commit message]
      Signed-off-by: NAdam Morrison <mad@cs.technion.ac.il>
      Reviewed-by: NShaohua Li <shli@fb.com>
      Reviewed-by: NBen Serebrin <serebrin@google.com>
      [dwmw2: split out VT-d part into a separate patch]
      Signed-off-by: NDavid Woodhouse <David.Woodhouse@intel.com>
      9257b4a2
  19. 04 10月, 2015 3 次提交
  20. 28 7月, 2015 4 次提交
  21. 05 5月, 2015 1 次提交
  22. 19 1月, 2015 3 次提交