1. 22 11月, 2017 1 次提交
    • K
      treewide: setup_timer() -> timer_setup() (2 field) · 86cb30ec
      Kees Cook 提交于
      This converts all remaining setup_timer() calls that use a nested field
      to reach a struct timer_list. Coccinelle does not have an easy way to
      match multiple fields, so a new script is needed to change the matches of
      "&_E->_timer" into "&_E->_field1._timer" in all the rules.
      
      spatch --very-quiet --all-includes --include-headers \
      	-I ./arch/x86/include -I ./arch/x86/include/generated \
      	-I ./include -I ./arch/x86/include/uapi \
      	-I ./arch/x86/include/generated/uapi -I ./include/uapi \
      	-I ./include/generated/uapi --include ./include/linux/kconfig.h \
      	--dir . \
      	--cocci-file ~/src/data/timer_setup-2fields.cocci
      
      @fix_address_of depends@
      expression e;
      @@
      
       setup_timer(
      -&(e)
      +&e
       , ...)
      
      // Update any raw setup_timer() usages that have a NULL callback, but
      // would otherwise match change_timer_function_usage, since the latter
      // will update all function assignments done in the face of a NULL
      // function initialization in setup_timer().
      @change_timer_function_usage_NULL@
      expression _E;
      identifier _field1;
      identifier _timer;
      type _cast_data;
      @@
      
      (
      -setup_timer(&_E->_field1._timer, NULL, _E);
      +timer_setup(&_E->_field1._timer, NULL, 0);
      |
      -setup_timer(&_E->_field1._timer, NULL, (_cast_data)_E);
      +timer_setup(&_E->_field1._timer, NULL, 0);
      |
      -setup_timer(&_E._field1._timer, NULL, &_E);
      +timer_setup(&_E._field1._timer, NULL, 0);
      |
      -setup_timer(&_E._field1._timer, NULL, (_cast_data)&_E);
      +timer_setup(&_E._field1._timer, NULL, 0);
      )
      
      @change_timer_function_usage@
      expression _E;
      identifier _field1;
      identifier _timer;
      struct timer_list _stl;
      identifier _callback;
      type _cast_func, _cast_data;
      @@
      
      (
      -setup_timer(&_E->_field1._timer, _callback, _E);
      +timer_setup(&_E->_field1._timer, _callback, 0);
      |
      -setup_timer(&_E->_field1._timer, &_callback, _E);
      +timer_setup(&_E->_field1._timer, _callback, 0);
      |
      -setup_timer(&_E->_field1._timer, _callback, (_cast_data)_E);
      +timer_setup(&_E->_field1._timer, _callback, 0);
      |
      -setup_timer(&_E->_field1._timer, &_callback, (_cast_data)_E);
      +timer_setup(&_E->_field1._timer, _callback, 0);
      |
      -setup_timer(&_E->_field1._timer, (_cast_func)_callback, _E);
      +timer_setup(&_E->_field1._timer, _callback, 0);
      |
      -setup_timer(&_E->_field1._timer, (_cast_func)&_callback, _E);
      +timer_setup(&_E->_field1._timer, _callback, 0);
      |
      -setup_timer(&_E->_field1._timer, (_cast_func)_callback, (_cast_data)_E);
      +timer_setup(&_E->_field1._timer, _callback, 0);
      |
      -setup_timer(&_E->_field1._timer, (_cast_func)&_callback, (_cast_data)_E);
      +timer_setup(&_E->_field1._timer, _callback, 0);
      |
      -setup_timer(&_E._field1._timer, _callback, (_cast_data)_E);
      +timer_setup(&_E._field1._timer, _callback, 0);
      |
      -setup_timer(&_E._field1._timer, _callback, (_cast_data)&_E);
      +timer_setup(&_E._field1._timer, _callback, 0);
      |
      -setup_timer(&_E._field1._timer, &_callback, (_cast_data)_E);
      +timer_setup(&_E._field1._timer, _callback, 0);
      |
      -setup_timer(&_E._field1._timer, &_callback, (_cast_data)&_E);
      +timer_setup(&_E._field1._timer, _callback, 0);
      |
      -setup_timer(&_E._field1._timer, (_cast_func)_callback, (_cast_data)_E);
      +timer_setup(&_E._field1._timer, _callback, 0);
      |
      -setup_timer(&_E._field1._timer, (_cast_func)_callback, (_cast_data)&_E);
      +timer_setup(&_E._field1._timer, _callback, 0);
      |
      -setup_timer(&_E._field1._timer, (_cast_func)&_callback, (_cast_data)_E);
      +timer_setup(&_E._field1._timer, _callback, 0);
      |
      -setup_timer(&_E._field1._timer, (_cast_func)&_callback, (_cast_data)&_E);
      +timer_setup(&_E._field1._timer, _callback, 0);
      |
       _E->_field1._timer@_stl.function = _callback;
      |
       _E->_field1._timer@_stl.function = &_callback;
      |
       _E->_field1._timer@_stl.function = (_cast_func)_callback;
      |
       _E->_field1._timer@_stl.function = (_cast_func)&_callback;
      |
       _E._field1._timer@_stl.function = _callback;
      |
       _E._field1._timer@_stl.function = &_callback;
      |
       _E._field1._timer@_stl.function = (_cast_func)_callback;
      |
       _E._field1._timer@_stl.function = (_cast_func)&_callback;
      )
      
      // callback(unsigned long arg)
      @change_callback_handle_cast
       depends on change_timer_function_usage@
      identifier change_timer_function_usage._callback;
      identifier change_timer_function_usage._field1;
      identifier change_timer_function_usage._timer;
      type _origtype;
      identifier _origarg;
      type _handletype;
      identifier _handle;
      @@
      
       void _callback(
      -_origtype _origarg
      +struct timer_list *t
       )
       {
      (
      	... when != _origarg
      	_handletype *_handle =
      -(_handletype *)_origarg;
      +from_timer(_handle, t, _field1._timer);
      	... when != _origarg
      |
      	... when != _origarg
      	_handletype *_handle =
      -(void *)_origarg;
      +from_timer(_handle, t, _field1._timer);
      	... when != _origarg
      |
      	... when != _origarg
      	_handletype *_handle;
      	... when != _handle
      	_handle =
      -(_handletype *)_origarg;
      +from_timer(_handle, t, _field1._timer);
      	... when != _origarg
      |
      	... when != _origarg
      	_handletype *_handle;
      	... when != _handle
      	_handle =
      -(void *)_origarg;
      +from_timer(_handle, t, _field1._timer);
      	... when != _origarg
      )
       }
      
      // callback(unsigned long arg) without existing variable
      @change_callback_handle_cast_no_arg
       depends on change_timer_function_usage &&
                           !change_callback_handle_cast@
      identifier change_timer_function_usage._callback;
      identifier change_timer_function_usage._field1;
      identifier change_timer_function_usage._timer;
      type _origtype;
      identifier _origarg;
      type _handletype;
      @@
      
       void _callback(
      -_origtype _origarg
      +struct timer_list *t
       )
       {
      +	_handletype *_origarg = from_timer(_origarg, t, _field1._timer);
      +
      	... when != _origarg
      -	(_handletype *)_origarg
      +	_origarg
      	... when != _origarg
       }
      
      // Avoid already converted callbacks.
      @match_callback_converted
       depends on change_timer_function_usage &&
                  !change_callback_handle_cast &&
      	    !change_callback_handle_cast_no_arg@
      identifier change_timer_function_usage._callback;
      identifier t;
      @@
      
       void _callback(struct timer_list *t)
       { ... }
      
      // callback(struct something *handle)
      @change_callback_handle_arg
       depends on change_timer_function_usage &&
      	    !match_callback_converted &&
                  !change_callback_handle_cast &&
                  !change_callback_handle_cast_no_arg@
      identifier change_timer_function_usage._callback;
      identifier change_timer_function_usage._field1;
      identifier change_timer_function_usage._timer;
      type _handletype;
      identifier _handle;
      @@
      
       void _callback(
      -_handletype *_handle
      +struct timer_list *t
       )
       {
      +	_handletype *_handle = from_timer(_handle, t, _field1._timer);
      	...
       }
      
      // If change_callback_handle_arg ran on an empty function, remove
      // the added handler.
      @unchange_callback_handle_arg
       depends on change_timer_function_usage &&
      	    change_callback_handle_arg@
      identifier change_timer_function_usage._callback;
      identifier change_timer_function_usage._field1;
      identifier change_timer_function_usage._timer;
      type _handletype;
      identifier _handle;
      identifier t;
      @@
      
       void _callback(struct timer_list *t)
       {
      -	_handletype *_handle = from_timer(_handle, t, _field1._timer);
       }
      
      // We only want to refactor the setup_timer() data argument if we've found
      // the matching callback. This undoes changes in change_timer_function_usage.
      @unchange_timer_function_usage
       depends on change_timer_function_usage &&
                  !change_callback_handle_cast &&
                  !change_callback_handle_cast_no_arg &&
      	    !change_callback_handle_arg@
      expression change_timer_function_usage._E;
      identifier change_timer_function_usage._field1;
      identifier change_timer_function_usage._timer;
      identifier change_timer_function_usage._callback;
      type change_timer_function_usage._cast_data;
      @@
      
      (
      -timer_setup(&_E->_field1._timer, _callback, 0);
      +setup_timer(&_E->_field1._timer, _callback, (_cast_data)_E);
      |
      -timer_setup(&_E._field1._timer, _callback, 0);
      +setup_timer(&_E._field1._timer, _callback, (_cast_data)&_E);
      )
      
      // If we fixed a callback from a .function assignment, fix the
      // assignment cast now.
      @change_timer_function_assignment
       depends on change_timer_function_usage &&
                  (change_callback_handle_cast ||
                   change_callback_handle_cast_no_arg ||
                   change_callback_handle_arg)@
      expression change_timer_function_usage._E;
      identifier change_timer_function_usage._field1;
      identifier change_timer_function_usage._timer;
      identifier change_timer_function_usage._callback;
      type _cast_func;
      typedef TIMER_FUNC_TYPE;
      @@
      
      (
       _E->_field1._timer.function =
      -_callback
      +(TIMER_FUNC_TYPE)_callback
       ;
      |
       _E->_field1._timer.function =
      -&_callback
      +(TIMER_FUNC_TYPE)_callback
       ;
      |
       _E->_field1._timer.function =
      -(_cast_func)_callback;
      +(TIMER_FUNC_TYPE)_callback
       ;
      |
       _E->_field1._timer.function =
      -(_cast_func)&_callback
      +(TIMER_FUNC_TYPE)_callback
       ;
      |
       _E._field1._timer.function =
      -_callback
      +(TIMER_FUNC_TYPE)_callback
       ;
      |
       _E._field1._timer.function =
      -&_callback;
      +(TIMER_FUNC_TYPE)_callback
       ;
      |
       _E._field1._timer.function =
      -(_cast_func)_callback
      +(TIMER_FUNC_TYPE)_callback
       ;
      |
       _E._field1._timer.function =
      -(_cast_func)&_callback
      +(TIMER_FUNC_TYPE)_callback
       ;
      )
      
      // Sometimes timer functions are called directly. Replace matched args.
      @change_timer_function_calls
       depends on change_timer_function_usage &&
                  (change_callback_handle_cast ||
                   change_callback_handle_cast_no_arg ||
                   change_callback_handle_arg)@
      expression _E;
      identifier change_timer_function_usage._field1;
      identifier change_timer_function_usage._timer;
      identifier change_timer_function_usage._callback;
      type _cast_data;
      @@
      
       _callback(
      (
      -(_cast_data)_E
      +&_E->_field1._timer
      |
      -(_cast_data)&_E
      +&_E._field1._timer
      |
      -_E
      +&_E->_field1._timer
      )
       )
      
      // If a timer has been configured without a data argument, it can be
      // converted without regard to the callback argument, since it is unused.
      @match_timer_function_unused_data@
      expression _E;
      identifier _field1;
      identifier _timer;
      identifier _callback;
      @@
      
      (
      -setup_timer(&_E->_field1._timer, _callback, 0);
      +timer_setup(&_E->_field1._timer, _callback, 0);
      |
      -setup_timer(&_E->_field1._timer, _callback, 0L);
      +timer_setup(&_E->_field1._timer, _callback, 0);
      |
      -setup_timer(&_E->_field1._timer, _callback, 0UL);
      +timer_setup(&_E->_field1._timer, _callback, 0);
      |
      -setup_timer(&_E._field1._timer, _callback, 0);
      +timer_setup(&_E._field1._timer, _callback, 0);
      |
      -setup_timer(&_E._field1._timer, _callback, 0L);
      +timer_setup(&_E._field1._timer, _callback, 0);
      |
      -setup_timer(&_E._field1._timer, _callback, 0UL);
      +timer_setup(&_E._field1._timer, _callback, 0);
      |
      -setup_timer(&_field1._timer, _callback, 0);
      +timer_setup(&_field1._timer, _callback, 0);
      |
      -setup_timer(&_field1._timer, _callback, 0L);
      +timer_setup(&_field1._timer, _callback, 0);
      |
      -setup_timer(&_field1._timer, _callback, 0UL);
      +timer_setup(&_field1._timer, _callback, 0);
      |
      -setup_timer(_field1._timer, _callback, 0);
      +timer_setup(_field1._timer, _callback, 0);
      |
      -setup_timer(_field1._timer, _callback, 0L);
      +timer_setup(_field1._timer, _callback, 0);
      |
      -setup_timer(_field1._timer, _callback, 0UL);
      +timer_setup(_field1._timer, _callback, 0);
      )
      
      @change_callback_unused_data
       depends on match_timer_function_unused_data@
      identifier match_timer_function_unused_data._callback;
      type _origtype;
      identifier _origarg;
      @@
      
       void _callback(
      -_origtype _origarg
      +struct timer_list *unused
       )
       {
      	... when != _origarg
       }
      Signed-off-by: NKees Cook <keescook@chromium.org>
      86cb30ec
  2. 04 6月, 2017 1 次提交
  3. 27 4月, 2017 1 次提交
  4. 20 4月, 2017 1 次提交
  5. 25 12月, 2016 1 次提交
  6. 27 9月, 2016 1 次提交
  7. 01 7月, 2016 1 次提交
  8. 13 5月, 2016 1 次提交
    • C
      KVM: halt_polling: provide a way to qualify wakeups during poll · 3491caf2
      Christian Borntraeger 提交于
      Some wakeups should not be considered a sucessful poll. For example on
      s390 I/O interrupts are usually floating, which means that _ALL_ CPUs
      would be considered runnable - letting all vCPUs poll all the time for
      transactional like workload, even if one vCPU would be enough.
      This can result in huge CPU usage for large guests.
      This patch lets architectures provide a way to qualify wakeups if they
      should be considered a good/bad wakeups in regard to polls.
      
      For s390 the implementation will fence of halt polling for anything but
      known good, single vCPU events. The s390 implementation for floating
      interrupts does a wakeup for one vCPU, but the interrupt will be delivered
      by whatever CPU checks first for a pending interrupt. We prefer the
      woken up CPU by marking the poll of this CPU as "good" poll.
      This code will also mark several other wakeup reasons like IPI or
      expired timers as "good". This will of course also mark some events as
      not sucessful. As  KVM on z runs always as a 2nd level hypervisor,
      we prefer to not poll, unless we are really sure, though.
      
      This patch successfully limits the CPU usage for cases like uperf 1byte
      transactional ping pong workload or wakeup heavy workload like OLTP
      while still providing a proper speedup.
      
      This also introduced a new vcpu stat "halt_poll_no_tuning" that marks
      wakeups that are considered not good for polling.
      Signed-off-by: NChristian Borntraeger <borntraeger@de.ibm.com>
      Acked-by: Radim Krčmář <rkrcmar@redhat.com> (for an earlier version)
      Cc: David Matlack <dmatlack@google.com>
      Cc: Wanpeng Li <kernellwp@gmail.com>
      [Rename config symbol. - Paolo]
      Signed-off-by: NPaolo Bonzini <pbonzini@redhat.com>
      3491caf2
  9. 01 3月, 2016 1 次提交
  10. 01 12月, 2015 1 次提交
  11. 16 9月, 2015 1 次提交
    • P
      KVM: add halt_attempted_poll to VCPU stats · 62bea5bf
      Paolo Bonzini 提交于
      This new statistic can help diagnosing VCPUs that, for any reason,
      trigger bad behavior of halt_poll_ns autotuning.
      
      For example, say halt_poll_ns = 480000, and wakeups are spaced exactly
      like 479us, 481us, 479us, 481us. Then KVM always fails polling and wastes
      10+20+40+80+160+320+480 = 1110 microseconds out of every
      479+481+479+481+479+481+479 = 3359 microseconds. The VCPU then
      is consuming about 30% more CPU than it would use without
      polling.  This would show as an abnormally high number of
      attempted polling compared to the successful polls.
      
      Acked-by: Christian Borntraeger <borntraeger@de.ibm.com<
      Reviewed-by: NDavid Matlack <dmatlack@google.com>
      Signed-off-by: NPaolo Bonzini <pbonzini@redhat.com>
      62bea5bf
  12. 22 8月, 2015 1 次提交
  13. 28 5月, 2015 1 次提交
  14. 26 5月, 2015 1 次提交
  15. 07 5月, 2015 1 次提交
  16. 06 2月, 2015 1 次提交
    • P
      kvm: add halt_poll_ns module parameter · f7819512
      Paolo Bonzini 提交于
      This patch introduces a new module parameter for the KVM module; when it
      is present, KVM attempts a bit of polling on every HLT before scheduling
      itself out via kvm_vcpu_block.
      
      This parameter helps a lot for latency-bound workloads---in particular
      I tested it with O_DSYNC writes with a battery-backed disk in the host.
      In this case, writes are fast (because the data doesn't have to go all
      the way to the platters) but they cannot be merged by either the host or
      the guest.  KVM's performance here is usually around 30% of bare metal,
      or 50% if you use cache=directsync or cache=writethrough (these
      parameters avoid that the guest sends pointless flush requests, and
      at the same time they are not slow because of the battery-backed cache).
      The bad performance happens because on every halt the host CPU decides
      to halt itself too.  When the interrupt comes, the vCPU thread is then
      migrated to a new physical CPU, and in general the latency is horrible
      because the vCPU thread has to be scheduled back in.
      
      With this patch performance reaches 60-65% of bare metal and, more
      important, 99% of what you get if you use idle=poll in the guest.  This
      means that the tunable gets rid of this particular bottleneck, and more
      work can be done to improve performance in the kernel or QEMU.
      
      Of course there is some price to pay; every time an otherwise idle vCPUs
      is interrupted by an interrupt, it will poll unnecessarily and thus
      impose a little load on the host.  The above results were obtained with
      a mostly random value of the parameter (500000), and the load was around
      1.5-2.5% CPU usage on one of the host's core for each idle guest vCPU.
      
      The patch also adds a new stat, /sys/kernel/debug/kvm/halt_successful_poll,
      that can be used to tune the parameter.  It counts how many HLT
      instructions received an interrupt during the polling period; each
      successful poll avoids that Linux schedules the VCPU thread out and back
      in, and may also avoid a likely trip to C1 and back for the physical CPU.
      
      While the VM is idle, a Linux 4 VCPU VM halts around 10 times per second.
      Of these halts, almost all are failed polls.  During the benchmark,
      instead, basically all halts end within the polling period, except a more
      or less constant stream of 50 per second coming from vCPUs that are not
      running the benchmark.  The wasted time is thus very low.  Things may
      be slightly different for Windows VMs, which have a ~10 ms timer tick.
      
      The effect is also visible on Marcelo's recently-introduced latency
      test for the TSC deadline timer.  Though of course a non-RT kernel has
      awful latency bounds, the latency of the timer is around 8000-10000 clock
      cycles compared to 20000-120000 without setting halt_poll_ns.  For the TSC
      deadline timer, thus, the effect is both a smaller average latency and
      a smaller variance.
      Signed-off-by: NPaolo Bonzini <pbonzini@redhat.com>
      f7819512
  17. 22 9月, 2014 11 次提交
  18. 29 7月, 2014 2 次提交
  19. 28 7月, 2014 8 次提交
  20. 27 1月, 2014 1 次提交
    • S
      kvm/ppc: IRQ disabling cleanup · 6c85f52b
      Scott Wood 提交于
      Simplify the handling of lazy EE by going directly from fully-enabled
      to hard-disabled.  This replaces the lazy_irq_pending() check
      (including its misplaced kvm_guest_exit() call).
      
      As suggested by Tiejun Chen, move the interrupt disabling into
      kvmppc_prepare_to_enter() rather than have each caller do it.  Also
      move the IRQ enabling on heavyweight exit into
      kvmppc_prepare_to_enter().
      Signed-off-by: NScott Wood <scottwood@freescale.com>
      Signed-off-by: NAlexander Graf <agraf@suse.de>
      6c85f52b
  21. 09 1月, 2014 2 次提交