提交 1c32cdc6 编写于 作者: D David Vrabel 提交者: Konrad Rzeszutek Wilk

xen/x86: avoid updating TLS descriptors if they haven't changed

When switching tasks in a Xen PV guest, avoid updating the TLS
descriptors if they haven't changed.  This improves the speed of
context switches by almost 10% as much of the time the descriptors are
the same or only one is different.

The descriptors written into the GDT by Xen are modified from the
values passed in the update_descriptor hypercall so we keep shadow
copies of the three TLS descriptors to compare against.

lmbench3 test     Before  After  Improvement
--------------------------------------------
lat_ctx -s 32 24   7.19    6.52  9%
lat_pipe          12.56   11.66  7%
Signed-off-by: NDavid Vrabel <david.vrabel@citrix.com>
Signed-off-by: NKonrad Rzeszutek Wilk <konrad.wilk@oracle.com>
上级 59290362
...@@ -125,6 +125,19 @@ struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info; ...@@ -125,6 +125,19 @@ struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info;
*/ */
static int have_vcpu_info_placement = 1; static int have_vcpu_info_placement = 1;
struct tls_descs {
struct desc_struct desc[3];
};
/*
* Updating the 3 TLS descriptors in the GDT on every task switch is
* surprisingly expensive so we avoid updating them if they haven't
* changed. Since Xen writes different descriptors than the one
* passed in the update_descriptor hypercall we keep shadow copies to
* compare against.
*/
static DEFINE_PER_CPU(struct tls_descs, shadow_tls_desc);
static void clamp_max_cpus(void) static void clamp_max_cpus(void)
{ {
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
...@@ -548,9 +561,19 @@ static inline bool desc_equal(const struct desc_struct *d1, ...@@ -548,9 +561,19 @@ static inline bool desc_equal(const struct desc_struct *d1,
static void load_TLS_descriptor(struct thread_struct *t, static void load_TLS_descriptor(struct thread_struct *t,
unsigned int cpu, unsigned int i) unsigned int cpu, unsigned int i)
{ {
struct desc_struct *gdt = get_cpu_gdt_table(cpu); struct desc_struct *shadow = &per_cpu(shadow_tls_desc, cpu).desc[i];
xmaddr_t maddr = arbitrary_virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]); struct desc_struct *gdt;
struct multicall_space mc = __xen_mc_entry(0); xmaddr_t maddr;
struct multicall_space mc;
if (desc_equal(shadow, &t->tls_array[i]))
return;
*shadow = t->tls_array[i];
gdt = get_cpu_gdt_table(cpu);
maddr = arbitrary_virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]);
mc = __xen_mc_entry(0);
MULTI_update_descriptor(mc.mc, maddr.maddr, t->tls_array[i]); MULTI_update_descriptor(mc.mc, maddr.maddr, t->tls_array[i]);
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册