diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 44d5c7fdcd9704d1b294f070799ae4d7409cf521..213bf983242f53392edd8ff247f9f65bc207c174 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -81,8 +81,8 @@ extern void find_udbg_vterm(void); int fwnmi_active; /* TRUE if an FWNMI handler is present */ -static void pseries_shared_idle(void); -static void pseries_dedicated_idle(void); +static void pseries_shared_idle_sleep(void); +static void pseries_dedicated_idle_sleep(void); struct mpic *pSeries_mpic; @@ -236,14 +236,13 @@ static void __init pSeries_setup_arch(void) vpa_init(boot_cpuid); if (get_lppaca()->shared_proc) { printk(KERN_INFO "Using shared processor idle loop\n"); - ppc_md.idle_loop = pseries_shared_idle; + ppc_md.power_save = pseries_shared_idle_sleep; } else { printk(KERN_INFO "Using dedicated idle loop\n"); - ppc_md.idle_loop = pseries_dedicated_idle; + ppc_md.power_save = pseries_dedicated_idle_sleep; } } else { printk(KERN_INFO "Using default idle loop\n"); - ppc_md.idle_loop = default_idle; } if (firmware_has_feature(FW_FEATURE_LPAR)) @@ -393,136 +392,87 @@ static int __init pSeries_probe(int platform) DECLARE_PER_CPU(unsigned long, smt_snooze_delay); -static inline void dedicated_idle_sleep(unsigned int cpu) -{ - struct lppaca *plppaca = &lppaca[cpu ^ 1]; - - /* Only sleep if the other thread is not idle */ - if (!(plppaca->idle)) { - local_irq_disable(); - - /* - * We are about to sleep the thread and so wont be polling any - * more. - */ - clear_thread_flag(TIF_POLLING_NRFLAG); - smp_mb__after_clear_bit(); - - /* - * SMT dynamic mode. Cede will result in this thread going - * dormant, if the partner thread is still doing work. Thread - * wakes up if partner goes idle, an interrupt is presented, or - * a prod occurs. Returning from the cede enables external - * interrupts. - */ - if (!need_resched()) - cede_processor(); - else - local_irq_enable(); - set_thread_flag(TIF_POLLING_NRFLAG); - } else { - /* - * Give the HV an opportunity at the processor, since we are - * not doing any work. - */ - poll_pending(); - } -} - -static void pseries_dedicated_idle(void) +static void pseries_dedicated_idle_sleep(void) { unsigned int cpu = smp_processor_id(); unsigned long start_snooze; unsigned long *smt_snooze_delay = &__get_cpu_var(smt_snooze_delay); - set_thread_flag(TIF_POLLING_NRFLAG); - - while (1) { - /* - * Indicate to the HV that we are idle. Now would be - * a good time to find other work to dispatch. - */ - get_lppaca()->idle = 1; - - if (!need_resched()) { - start_snooze = get_tb() + - *smt_snooze_delay * tb_ticks_per_usec; - - while (!need_resched() && !cpu_is_offline(cpu)) { - ppc64_runlatch_off(); - - /* - * Go into low thread priority and possibly - * low power mode. - */ - HMT_low(); - HMT_very_low(); - - if (*smt_snooze_delay != 0 && - get_tb() > start_snooze) { - HMT_medium(); - dedicated_idle_sleep(cpu); - } - - } - - HMT_medium(); - } - get_lppaca()->idle = 0; - ppc64_runlatch_on(); + /* + * Indicate to the HV that we are idle. Now would be + * a good time to find other work to dispatch. + */ + get_lppaca()->idle = 1; - preempt_enable_no_resched(); - schedule(); - preempt_disable(); + /* + * We come in with interrupts disabled, and need_resched() + * has been checked recently. If we should poll for a little + * while, do so. + */ + if (*smt_snooze_delay) { + start_snooze = get_tb() + + *smt_snooze_delay * tb_ticks_per_usec; + local_irq_enable(); + set_thread_flag(TIF_POLLING_NRFLAG); - if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) - cpu_die(); + while (get_tb() < start_snooze) { + if (need_resched() || cpu_is_offline(cpu)) + goto out; + ppc64_runlatch_off(); + HMT_low(); + HMT_very_low(); + } + + HMT_medium(); + clear_thread_flag(TIF_POLLING_NRFLAG); + smp_mb(); + local_irq_disable(); + if (need_resched() || cpu_is_offline(cpu)) + goto out; } + + /* + * Cede if the other thread is not idle, so that it can + * go single-threaded. If the other thread is idle, + * we ask the hypervisor if it has pending work it + * wants to do and cede if it does. Otherwise we keep + * polling in order to reduce interrupt latency. + * + * Doing the cede when the other thread is active will + * result in this thread going dormant, meaning the other + * thread gets to run in single-threaded (ST) mode, which + * is slightly faster than SMT mode with this thread at + * very low priority. The cede enables interrupts, which + * doesn't matter here. + */ + if (!lppaca[cpu ^ 1].idle || poll_pending() == H_Pending) + cede_processor(); + +out: + HMT_medium(); + get_lppaca()->idle = 0; } -static void pseries_shared_idle(void) +static void pseries_shared_idle_sleep(void) { unsigned int cpu = smp_processor_id(); - while (1) { - /* - * Indicate to the HV that we are idle. Now would be - * a good time to find other work to dispatch. - */ - get_lppaca()->idle = 1; - - while (!need_resched() && !cpu_is_offline(cpu)) { - local_irq_disable(); - ppc64_runlatch_off(); - - /* - * Yield the processor to the hypervisor. We return if - * an external interrupt occurs (which are driven prior - * to returning here) or if a prod occurs from another - * processor. When returning here, external interrupts - * are enabled. - * - * Check need_resched() again with interrupts disabled - * to avoid a race. - */ - if (!need_resched()) - cede_processor(); - else - local_irq_enable(); - - HMT_medium(); - } - - get_lppaca()->idle = 0; - ppc64_runlatch_on(); + /* + * Indicate to the HV that we are idle. Now would be + * a good time to find other work to dispatch. + */ + get_lppaca()->idle = 1; - preempt_enable_no_resched(); - schedule(); - preempt_disable(); + /* + * Yield the processor to the hypervisor. We return if + * an external interrupt occurs (which are driven prior + * to returning here) or if a prod occurs from another + * processor. When returning here, external interrupts + * are enabled. + */ + cede_processor(); - if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) - cpu_die(); - } + get_lppaca()->idle = 0; } static int pSeries_pci_probe_mode(struct pci_bus *bus) diff --git a/include/asm-powerpc/hvcall.h b/include/asm-powerpc/hvcall.h index 38ca9ad6110d6640b37a1b0e06bd8ed50599abbc..b72c04f3f55195ea46a29040430e413c29a33686 100644 --- a/include/asm-powerpc/hvcall.h +++ b/include/asm-powerpc/hvcall.h @@ -9,6 +9,7 @@ #define H_Closed 2 /* Resource closed */ #define H_Constrained 4 /* Resource request constrained to max allowed */ #define H_InProgress 14 /* Kind of like busy */ +#define H_Pending 17 /* returned from H_POLL_PENDING */ #define H_Continue 18 /* Returned from H_Join on success */ #define H_LongBusyStartRange 9900 /* Start of long busy range */ #define H_LongBusyOrder1msec 9900 /* Long busy, hint that 1msec is a good time to retry */