提交 a39f69d2 编写于 作者: O Osier Yang 提交者: Peter Krempa

qemu: Set cpuset.cpus for domain process

When either "cpuset" of <vcpu> is specified, or the "placement" of
<vcpu> is "auto", only setting the cpuset.mems might cause the guest
starting to fail. E.g. ("placement" of both <vcpu> and <numatune> is
"auto"):

1) Related XMLs
  <vcpu placement='auto'>4</vcpu>
  <numatune>
    <memory mode='strict' placement='auto'/>
  </numatune>

2) Host NUMA topology
  % numactl --hardware
  available: 8 nodes (0-7)
  node 0 cpus: 0 4 8 12 16 20 24 28
  node 0 size: 16374 MB
  node 0 free: 11899 MB
  node 1 cpus: 32 36 40 44 48 52 56 60
  node 1 size: 16384 MB
  node 1 free: 15318 MB
  node 2 cpus: 2 6 10 14 18 22 26 30
  node 2 size: 16384 MB
  node 2 free: 15766 MB
  node 3 cpus: 34 38 42 46 50 54 58 62
  node 3 size: 16384 MB
  node 3 free: 15347 MB
  node 4 cpus: 3 7 11 15 19 23 27 31
  node 4 size: 16384 MB
  node 4 free: 15041 MB
  node 5 cpus: 35 39 43 47 51 55 59 63
  node 5 size: 16384 MB
  node 5 free: 15202 MB
  node 6 cpus: 1 5 9 13 17 21 25 29
  node 6 size: 16384 MB
  node 6 free: 15197 MB
  node 7 cpus: 33 37 41 45 49 53 57 61
  node 7 size: 16368 MB
  node 7 free: 15669 MB

4) cpuset.cpus will be set as: (from debug log)

2013-05-09 16:50:17.296+0000: 417: debug : virCgroupSetValueStr:331 :
Set value '/sys/fs/cgroup/cpuset/libvirt/qemu/toy/cpuset.cpus'
to '0-63'

5) The advisory nodeset got from querying numad (from debug log)

2013-05-09 16:50:17.295+0000: 417: debug : qemuProcessStart:3614 :
Nodeset returned from numad: 1

6) cpuset.mems will be set as: (from debug log)

2013-05-09 16:50:17.296+0000: 417: debug : virCgroupSetValueStr:331 :
Set value '/sys/fs/cgroup/cpuset/libvirt/qemu/toy/cpuset.mems'
to '0-7'

I.E, the domain process's memory is restricted on the first NUMA node,
however, it can use all of the CPUs, which will likely cause the domain
process to fail to start because of the kernel fails to allocate
memory with the the memory policy as "strict".

% tail -n 20 /var/log/libvirt/qemu/toy.log
...
2013-05-09 05:53:32.972+0000: 7318: debug : virCommandHandshakeChild:377 :
Handshake with parent is done
char device redirected to /dev/pts/2 (label charserial0)
kvm_init_vcpu failed: Cannot allocate memory
...
Signed-off-by: NPeter Krempa <pkrempa@redhat.com>
上级 b8b38321
......@@ -615,10 +615,12 @@ cleanup:
static int
qemuSetupCpusetCgroup(virDomainObjPtr vm,
virBitmapPtr nodemask)
virBitmapPtr nodemask,
virCapsPtr caps)
{
qemuDomainObjPrivatePtr priv = vm->privateData;
char *mask = NULL;
char *mem_mask = NULL;
char *cpu_mask = NULL;
int rc;
int ret = -1;
......@@ -632,17 +634,17 @@ qemuSetupCpusetCgroup(virDomainObjPtr vm,
if (vm->def->numatune.memory.placement_mode ==
VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_AUTO)
mask = virBitmapFormat(nodemask);
mem_mask = virBitmapFormat(nodemask);
else
mask = virBitmapFormat(vm->def->numatune.memory.nodemask);
mem_mask = virBitmapFormat(vm->def->numatune.memory.nodemask);
if (!mask) {
if (!mem_mask) {
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("failed to convert memory nodemask"));
goto cleanup;
}
rc = virCgroupSetCpusetMems(priv->cgroup, mask);
rc = virCgroupSetCpusetMems(priv->cgroup, mem_mask);
if (rc != 0) {
virReportSystemError(-rc,
......@@ -652,9 +654,39 @@ qemuSetupCpusetCgroup(virDomainObjPtr vm,
}
}
if (vm->def->cpumask ||
(vm->def->placement_mode == VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO)) {
if (vm->def->placement_mode == VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO) {
virBitmapPtr cpumap;
if (!(cpumap = virCapabilitiesGetCpusForNodemask(caps, nodemask)))
goto cleanup;
cpu_mask = virBitmapFormat(cpumap);
virBitmapFree(cpumap);
} else {
cpu_mask = virBitmapFormat(vm->def->cpumask);
}
if (!cpu_mask) {
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("failed to convert cpu mask"));
goto cleanup;
}
rc = virCgroupSetCpusetCpus(priv->cgroup, cpu_mask);
if (rc != 0) {
virReportSystemError(-rc,
_("Unable to set cpuset.cpus for domain %s"),
vm->def->name);
goto cleanup;
}
}
ret = 0;
cleanup:
VIR_FREE(mask);
VIR_FREE(mem_mask);
VIR_FREE(cpu_mask);
return ret;
}
......@@ -801,6 +833,7 @@ int qemuSetupCgroup(virQEMUDriverPtr driver,
virBitmapPtr nodemask)
{
qemuDomainObjPrivatePtr priv = vm->privateData;
virCapsPtr caps = NULL;
int ret = -1;
if (qemuInitCgroup(driver, vm, true) < 0)
......@@ -809,6 +842,9 @@ int qemuSetupCgroup(virQEMUDriverPtr driver,
if (!priv->cgroup)
return 0;
if (!(caps = virQEMUDriverGetCapabilities(driver, false)))
goto cleanup;
if (qemuSetupDevicesCgroup(driver, vm) < 0)
goto cleanup;
......@@ -821,11 +857,12 @@ int qemuSetupCgroup(virQEMUDriverPtr driver,
if (qemuSetupCpuCgroup(vm) < 0)
goto cleanup;
if (qemuSetupCpusetCgroup(vm, nodemask) < 0)
if (qemuSetupCpusetCgroup(vm, nodemask, caps) < 0)
goto cleanup;
ret = 0;
cleanup:
virObjectUnref(caps);
return ret;
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册