numa_conf.c 38.5 KB
Newer Older
1
/*
2
 * numa_conf.c
3
 *
4
 * Copyright (C) 2014-2015 Red Hat, Inc.
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library.  If not, see
 * <http://www.gnu.org/licenses/>.
 */

#include <config.h>

23
#include "numa_conf.h"
24

25 26
#include "domain_conf.h"
#include "viralloc.h"
27
#include "virnuma.h"
28 29
#include "virstring.h"

30 31 32 33 34 35 36 37 38
/*
 * Distance definitions defined Conform ACPI 2.0 SLIT.
 * See include/linux/topology.h
 */
#define LOCAL_DISTANCE          10
#define REMOTE_DISTANCE         20
/* SLIT entry value is a one-byte unsigned integer. */
#define UNREACHABLE            255

39 40
#define VIR_FROM_THIS VIR_FROM_DOMAIN

41 42 43 44
VIR_ENUM_IMPL(virDomainNumatuneMemMode,
              VIR_DOMAIN_NUMATUNE_MEM_LAST,
              "strict",
              "preferred",
45 46
              "interleave",
);
47

48 49
VIR_ENUM_IMPL(virDomainNumatunePlacement,
              VIR_DOMAIN_NUMATUNE_PLACEMENT_LAST,
50 51
              "default",
              "static",
52 53
              "auto",
);
54

55 56
VIR_ENUM_IMPL(virDomainMemoryAccess,
              VIR_DOMAIN_MEMORY_ACCESS_LAST,
57 58
              "default",
              "shared",
59 60
              "private",
);
61

62 63
typedef struct _virDomainNumaDistance virDomainNumaDistance;
typedef virDomainNumaDistance *virDomainNumaDistancePtr;
64

65 66
typedef struct _virDomainNumaNode virDomainNumaNode;
typedef virDomainNumaNode *virDomainNumaNodePtr;
67

68
struct _virDomainNuma {
69
    struct {
70
        bool specified;
71 72 73 74 75
        virBitmapPtr nodeset;
        virDomainNumatuneMemMode mode;
        virDomainNumatunePlacement placement;
    } memory;               /* pinning for all the memory */

76
    struct _virDomainNumaNode {
77 78 79 80
        unsigned long long mem; /* memory size in KiB */
        virBitmapPtr cpumask;   /* bitmap of vCPUs corresponding to the node */
        virBitmapPtr nodeset;   /* host memory nodes where this guest node resides */
        virDomainNumatuneMemMode mode;  /* memory mode selection */
81
        virDomainMemoryAccess memAccess; /* shared memory access configuration */
82
        virTristateBool discard; /* discard-data for memory-backend-file */
83 84 85 86 87 88

        struct _virDomainNumaDistance {
            unsigned int value; /* locality value for node i->j or j->i */
            unsigned int cellid;
        } *distances;           /* remote node distances */
        size_t ndistances;
89
    } *mem_nodes;           /* guest node configuration */
90 91
    size_t nmem_nodes;

92 93 94 95
    /* Future NUMA tuning related stuff should go here. */
};


96
bool
97
virDomainNumatuneNodeSpecified(virDomainNumaPtr numatune,
98 99 100 101 102 103 104 105 106 107
                               int cellid)
{
    if (numatune &&
        cellid >= 0 &&
        cellid < numatune->nmem_nodes)
        return numatune->mem_nodes[cellid].nodeset;

    return false;
}

108
static int
109
virDomainNumatuneNodeParseXML(virDomainNumaPtr numa,
110 111 112
                              xmlXPathContextPtr ctxt)
{
    char *tmp = NULL;
E
Eric Blake 已提交
113
    int n = 0;
114 115 116 117 118 119 120 121 122 123 124 125 126
    int ret = -1;
    size_t i = 0;
    xmlNodePtr *nodes = NULL;

    if ((n = virXPathNodeSet("./numatune/memnode", ctxt, &nodes)) < 0) {
        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                       _("Cannot extract memnode nodes"));
        goto cleanup;
    }

    if (!n)
        return 0;

127 128
    if (numa->memory.specified &&
        numa->memory.placement == VIR_DOMAIN_NUMATUNE_PLACEMENT_AUTO) {
129 130 131 132 133 134
        virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
                       _("Per-node binding is not compatible with "
                         "automatic NUMA placement."));
        goto cleanup;
    }

135
    if (!numa->nmem_nodes) {
136 137 138 139 140 141 142 143 144
        virReportError(VIR_ERR_XML_ERROR, "%s",
                       _("Element 'memnode' is invalid without "
                         "any guest NUMA cells"));
        goto cleanup;
    }

    for (i = 0; i < n; i++) {
        int mode = 0;
        unsigned int cellid = 0;
145
        virDomainNumaNodePtr mem_node = NULL;
146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
        xmlNodePtr cur_node = nodes[i];

        tmp = virXMLPropString(cur_node, "cellid");
        if (!tmp) {
            virReportError(VIR_ERR_XML_ERROR, "%s",
                           _("Missing required cellid attribute "
                             "in memnode element"));
            goto cleanup;
        }
        if (virStrToLong_uip(tmp, NULL, 10, &cellid) < 0) {
            virReportError(VIR_ERR_XML_ERROR,
                           _("Invalid cellid attribute in memnode element: %s"),
                           tmp);
            goto cleanup;
        }
        VIR_FREE(tmp);

163
        if (cellid >= numa->nmem_nodes) {
164 165 166 167 168 169
            virReportError(VIR_ERR_XML_ERROR, "%s",
                           _("Argument 'cellid' in memnode element must "
                             "correspond to existing guest's NUMA cell"));
            goto cleanup;
        }

170
        mem_node = &numa->mem_nodes[cellid];
171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198

        if (mem_node->nodeset) {
            virReportError(VIR_ERR_XML_ERROR,
                           _("Multiple memnode elements with cellid %u"),
                           cellid);
            goto cleanup;
        }

        tmp = virXMLPropString(cur_node, "mode");
        if (!tmp) {
            mem_node->mode = VIR_DOMAIN_NUMATUNE_MEM_STRICT;
        } else {
            if ((mode = virDomainNumatuneMemModeTypeFromString(tmp)) < 0) {
                virReportError(VIR_ERR_XML_ERROR, "%s",
                               _("Invalid mode attribute in memnode element"));
                goto cleanup;
            }
            VIR_FREE(tmp);
            mem_node->mode = mode;
        }

        tmp = virXMLPropString(cur_node, "nodeset");
        if (!tmp) {
            virReportError(VIR_ERR_XML_ERROR, "%s",
                           _("Missing required nodeset attribute "
                             "in memnode element"));
            goto cleanup;
        }
199
        if (virBitmapParse(tmp, &mem_node->nodeset, VIR_DOMAIN_CPUMASK_LEN) < 0)
200
            goto cleanup;
201 202 203 204 205 206

        if (virBitmapIsAllClear(mem_node->nodeset)) {
            virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
                           _("Invalid value of 'nodeset': %s"), tmp);
            goto cleanup;
        }
207 208 209 210 211 212 213 214 215 216
        VIR_FREE(tmp);
    }

    ret = 0;
 cleanup:
    VIR_FREE(nodes);
    VIR_FREE(tmp);
    return ret;
}

217
int
218
virDomainNumatuneParseXML(virDomainNumaPtr numa,
219
                          bool placement_static,
220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241
                          xmlXPathContextPtr ctxt)
{
    char *tmp = NULL;
    int mode = -1;
    int n = 0;
    int placement = -1;
    int ret = -1;
    virBitmapPtr nodeset = NULL;
    xmlNodePtr node = NULL;

    if (virXPathInt("count(./numatune)", ctxt, &n) < 0) {
        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                       _("cannot extract numatune nodes"));
        goto cleanup;
    } else if (n > 1) {
        virReportError(VIR_ERR_XML_ERROR, "%s",
                       _("only one numatune is supported"));
        goto cleanup;
    }

    node = virXPathNode("./numatune/memory[1]", ctxt);

242 243 244 245 246 247 248 249
    if (!placement_static && !node)
        placement = VIR_DOMAIN_NUMATUNE_PLACEMENT_AUTO;

    if (node) {
        if ((tmp = virXMLPropString(node, "mode")) &&
            (mode = virDomainNumatuneMemModeTypeFromString(tmp)) < 0) {
            virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
                           _("Unsupported NUMA memory tuning mode '%s'"), tmp);
250
            goto cleanup;
251 252
        }
        VIR_FREE(tmp);
253

254 255 256 257 258 259 260
        if ((tmp = virXMLPropString(node, "placement")) &&
            (placement = virDomainNumatunePlacementTypeFromString(tmp)) < 0) {
            virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
                           _("Unsupported NUMA memory placement mode '%s'"), tmp);
            goto cleanup;
        }
        VIR_FREE(tmp);
261

262 263
        tmp = virXMLPropString(node, "nodeset");
        if (tmp) {
264
            if (virBitmapParse(tmp, &nodeset, VIR_DOMAIN_CPUMASK_LEN) < 0)
265 266 267 268 269 270 271 272 273 274
                goto cleanup;

            if (virBitmapIsAllClear(nodeset)) {
                virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
                               _("Invalid value of 'nodeset': %s"), tmp);
                goto cleanup;
            }

            VIR_FREE(tmp);
        }
275 276
    }

277
    if (virDomainNumatuneSet(numa,
278 279 280 281
                             placement_static,
                             placement,
                             mode,
                             nodeset) < 0)
282 283
        goto cleanup;

284
    if (virDomainNumatuneNodeParseXML(numa, ctxt) < 0)
285
        goto cleanup;
286

287 288 289 290 291 292 293 294 295
    ret = 0;
 cleanup:
    virBitmapFree(nodeset);
    VIR_FREE(tmp);
    return ret;
}

int
virDomainNumatuneFormatXML(virBufferPtr buf,
296
                           virDomainNumaPtr numatune)
297 298 299
{
    const char *tmp = NULL;
    char *nodeset = NULL;
300
    bool nodesetSpecified = false;
301
    size_t i = 0;
302 303 304 305

    if (!numatune)
        return 0;

306 307 308 309 310 311 312 313 314 315
    for (i = 0; i < numatune->nmem_nodes; i++) {
        if (numatune->mem_nodes[i].nodeset) {
            nodesetSpecified = true;
            break;
        }
    }

    if (!nodesetSpecified && !numatune->memory.specified)
        return 0;

316 317 318
    virBufferAddLit(buf, "<numatune>\n");
    virBufferAdjustIndent(buf, 2);

319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334
    if (numatune->memory.specified) {
        tmp = virDomainNumatuneMemModeTypeToString(numatune->memory.mode);
        virBufferAsprintf(buf, "<memory mode='%s' ", tmp);

        if (numatune->memory.placement == VIR_DOMAIN_NUMATUNE_PLACEMENT_STATIC) {
            if (!(nodeset = virBitmapFormat(numatune->memory.nodeset)))
                return -1;
            virBufferAsprintf(buf, "nodeset='%s'/>\n", nodeset);
            VIR_FREE(nodeset);
        } else if (numatune->memory.placement) {
            tmp = virDomainNumatunePlacementTypeToString(numatune->memory.placement);
            virBufferAsprintf(buf, "placement='%s'/>\n", tmp);
        }
    }

    for (i = 0; i < numatune->nmem_nodes; i++) {
335
        virDomainNumaNodePtr mem_node = &numatune->mem_nodes[i];
336

337 338 339 340
        if (!mem_node->nodeset)
            continue;

        if (!(nodeset = virBitmapFormat(mem_node->nodeset)))
341
            return -1;
342 343 344 345 346 347

        virBufferAsprintf(buf,
                          "<memnode cellid='%zu' mode='%s' nodeset='%s'/>\n",
                          i,
                          virDomainNumatuneMemModeTypeToString(mem_node->mode),
                          nodeset);
348 349 350 351 352 353 354 355 356
        VIR_FREE(nodeset);
    }

    virBufferAdjustIndent(buf, -2);
    virBufferAddLit(buf, "</numatune>\n");
    return 0;
}

void
357
virDomainNumaFree(virDomainNumaPtr numa)
358
{
359 360
    size_t i = 0;

361
    if (!numa)
362 363
        return;

364
    virBitmapFree(numa->memory.nodeset);
365 366
    for (i = 0; i < numa->nmem_nodes; i++) {
        virBitmapFree(numa->mem_nodes[i].cpumask);
367
        virBitmapFree(numa->mem_nodes[i].nodeset);
368 369 370

        if (numa->mem_nodes[i].ndistances > 0)
            VIR_FREE(numa->mem_nodes[i].distances);
371
    }
372
    VIR_FREE(numa->mem_nodes);
373

374
    VIR_FREE(numa);
375 376
}

377 378 379 380 381 382 383 384 385 386 387 388 389 390 391
/**
 * virDomainNumatuneGetMode:
 * @numatune: pointer to numatune definition
 * @cellid: cell selector
 * @mode: where to store the result
 *
 * Get the defined mode for domain's memory. It's safe to pass
 * NULL to @mode if the return value is the only info needed.
 *
 * Returns: 0 on success (with @mode updated)
 *         -1 if no mode was defined in XML
 */
int virDomainNumatuneGetMode(virDomainNumaPtr numatune,
                             int cellid,
                             virDomainNumatuneMemMode *mode)
392
{
393 394
    virDomainNumatuneMemMode tmp_mode;

395
    if (!numatune)
396
        return -1;
397 398

    if (virDomainNumatuneNodeSpecified(numatune, cellid))
399 400 401 402
        tmp_mode = numatune->mem_nodes[cellid].mode;
    else if (numatune->memory.specified)
        tmp_mode = numatune->memory.mode;
    else
403
        return -1;
404

405 406
    if (mode)
        *mode = tmp_mode;
407 408

    return 0;
409 410 411
}

virBitmapPtr
412
virDomainNumatuneGetNodeset(virDomainNumaPtr numatune,
413 414
                            virBitmapPtr auto_nodeset,
                            int cellid)
415 416 417 418
{
    if (!numatune)
        return NULL;

419 420
    if (numatune->memory.specified &&
        numatune->memory.placement == VIR_DOMAIN_NUMATUNE_PLACEMENT_AUTO)
421 422
        return auto_nodeset;

423 424 425 426
    if (virDomainNumatuneNodeSpecified(numatune, cellid))
        return numatune->mem_nodes[cellid].nodeset;

    if (!numatune->memory.specified)
427 428 429 430 431 432
        return NULL;

    return numatune->memory.nodeset;
}

char *
433
virDomainNumatuneFormatNodeset(virDomainNumaPtr numatune,
434 435
                               virBitmapPtr auto_nodeset,
                               int cellid)
436 437
{
    return virBitmapFormat(virDomainNumatuneGetNodeset(numatune,
438 439
                                                       auto_nodeset,
                                                       cellid));
440 441
}

442

443
int
444
virDomainNumatuneMaybeGetNodeset(virDomainNumaPtr numatune,
445 446 447
                                 virBitmapPtr auto_nodeset,
                                 virBitmapPtr *retNodeset,
                                 int cellid)
448
{
449
    *retNodeset = NULL;
450 451 452 453

    if (!numatune)
        return 0;

454 455 456 457 458 459
    if (!virDomainNumatuneNodeSpecified(numatune, cellid) &&
        !numatune->memory.specified)
        return 0;

    if (numatune->memory.specified &&
        numatune->memory.placement == VIR_DOMAIN_NUMATUNE_PLACEMENT_AUTO &&
460 461 462 463 464 465 466
        !auto_nodeset) {
        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                       _("Advice from numad is needed in case of "
                         "automatic numa placement"));
        return -1;
    }

467 468 469 470 471 472 473
    *retNodeset = virDomainNumatuneGetNodeset(numatune, auto_nodeset, cellid);

    return 0;
}


int
474
virDomainNumatuneMaybeFormatNodeset(virDomainNumaPtr numatune,
475 476 477 478 479 480 481 482 483 484 485 486
                                    virBitmapPtr auto_nodeset,
                                    char **mask,
                                    int cellid)
{
    virBitmapPtr nodeset;

    if (virDomainNumatuneMaybeGetNodeset(numatune, auto_nodeset, &nodeset,
                                         cellid) < 0)
        return -1;

    if (nodeset &&
        !(*mask = virBitmapFormat(nodeset)))
487 488 489 490 491 492
        return -1;

    return 0;
}

int
493
virDomainNumatuneSet(virDomainNumaPtr numa,
494
                     bool placement_static,
495 496 497 498 499 500 501 502
                     int placement,
                     int mode,
                     virBitmapPtr nodeset)
{
    /* No need to do anything in this case */
    if (mode == -1 && placement == -1 && !nodeset)
        return 0;

503
    if (!numa->memory.specified) {
504 505 506 507 508 509
        if (mode == -1)
            mode = VIR_DOMAIN_NUMATUNE_MEM_STRICT;
        if (placement == -1)
            placement = VIR_DOMAIN_NUMATUNE_PLACEMENT_DEFAULT;
    }

510 511 512 513 514 515
    /* Range checks */
    if (mode != -1 &&
        (mode < 0 || mode >= VIR_DOMAIN_NUMATUNE_MEM_LAST)) {
        virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
                       _("Unsupported numatune mode '%d'"),
                       mode);
516
        return -1;
517
    }
518

519 520 521 522 523
    if (placement != -1 &&
        (placement < 0 || placement >= VIR_DOMAIN_NUMATUNE_PLACEMENT_LAST)) {
        virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
                       _("Unsupported numatune placement '%d'"),
                       mode);
524
        return -1;
525 526 527
    }

    if (mode != -1)
528
        numa->memory.mode = mode;
529

530
    if (nodeset) {
531 532
        virBitmapFree(numa->memory.nodeset);
        if (!(numa->memory.nodeset = virBitmapNewCopy(nodeset)))
533
            return -1;
534 535 536 537 538
        if (placement == -1)
            placement = VIR_DOMAIN_NUMATUNE_PLACEMENT_STATIC;
    }

    if (placement == VIR_DOMAIN_NUMATUNE_PLACEMENT_DEFAULT) {
539
        if (numa->memory.nodeset || placement_static)
540 541 542 543 544 545
            placement = VIR_DOMAIN_NUMATUNE_PLACEMENT_STATIC;
        else
            placement = VIR_DOMAIN_NUMATUNE_PLACEMENT_AUTO;
    }

    if (placement == VIR_DOMAIN_NUMATUNE_PLACEMENT_STATIC &&
546
        !numa->memory.nodeset) {
547 548 549
        virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
                       _("nodeset for NUMA memory tuning must be set "
                         "if 'placement' is 'static'"));
550
        return -1;
551 552
    }

553 554
    /* setting nodeset when placement auto is invalid */
    if (placement == VIR_DOMAIN_NUMATUNE_PLACEMENT_AUTO &&
555 556 557
        numa->memory.nodeset) {
        virBitmapFree(numa->memory.nodeset);
        numa->memory.nodeset = NULL;
558 559
    }

560
    if (placement != -1)
561
        numa->memory.placement = placement;
562

563
    numa->memory.specified = true;
564

565
    return 0;
566 567
}

568
static bool
569 570
virDomainNumaNodesEqual(virDomainNumaPtr n1,
                        virDomainNumaPtr n2)
571 572 573 574 575 576 577
{
    size_t i = 0;

    if (n1->nmem_nodes != n2->nmem_nodes)
        return false;

    for (i = 0; i < n1->nmem_nodes; i++) {
578 579
        virDomainNumaNodePtr nd1 = &n1->mem_nodes[i];
        virDomainNumaNodePtr nd2 = &n2->mem_nodes[i];
580 581 582 583 584 585 586 587 588 589 590 591 592 593

        if (!nd1->nodeset && !nd2->nodeset)
            continue;

        if (nd1->mode != nd2->mode)
            return false;

        if (!virBitmapEqual(nd1->nodeset, nd2->nodeset))
            return false;
    }

    return true;
}

594
bool
595 596
virDomainNumaEquals(virDomainNumaPtr n1,
                    virDomainNumaPtr n2)
597 598 599 600 601 602 603
{
    if (!n1 && !n2)
        return true;

    if (!n1 || !n2)
        return false;

604
    if (!n1->memory.specified && !n2->memory.specified)
605
        return virDomainNumaNodesEqual(n1, n2);
606 607 608 609

    if (!n1->memory.specified || !n2->memory.specified)
        return false;

610 611 612 613 614 615
    if (n1->memory.mode != n2->memory.mode)
        return false;

    if (n1->memory.placement != n2->memory.placement)
        return false;

616 617 618
    if (!virBitmapEqual(n1->memory.nodeset, n2->memory.nodeset))
        return false;

619
    return virDomainNumaNodesEqual(n1, n2);
620 621 622
}

bool
623
virDomainNumatuneHasPlacementAuto(virDomainNumaPtr numatune)
624 625 626 627
{
    if (!numatune)
        return false;

628 629 630
    if (!numatune->memory.specified)
        return false;

631 632 633 634 635
    if (numatune->memory.placement == VIR_DOMAIN_NUMATUNE_PLACEMENT_AUTO)
        return true;

    return false;
}
636 637

bool
638
virDomainNumatuneHasPerNodeBinding(virDomainNumaPtr numatune)
639 640 641 642 643 644 645 646 647 648 649 650 651
{
    size_t i = 0;

    if (!numatune)
        return false;

    for (i = 0; i < numatune->nmem_nodes; i++) {
        if (numatune->mem_nodes[i].nodeset)
            return true;
    }

    return false;
}
652 653

int
654
virDomainNumatuneSpecifiedMaxNode(virDomainNumaPtr numatune)
655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679
{
    int ret = -1;
    virBitmapPtr nodemask = NULL;
    size_t i;
    int bit;

    if (!numatune)
        return ret;

    nodemask = virDomainNumatuneGetNodeset(numatune, NULL, -1);
    if (nodemask)
        ret = virBitmapLastSetBit(nodemask);

    for (i = 0; i < numatune->nmem_nodes; i++) {
        nodemask = numatune->mem_nodes[i].nodeset;
        if (!nodemask)
            continue;

        bit = virBitmapLastSetBit(nodemask);
        if (bit > ret)
            ret = bit;
    }

    return ret;
}
680 681

bool
682
virDomainNumatuneNodesetIsAvailable(virDomainNumaPtr numatune,
683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702
                                    virBitmapPtr auto_nodeset)
{
    size_t i = 0;
    virBitmapPtr b = NULL;

    if (!numatune)
        return true;

    b = virDomainNumatuneGetNodeset(numatune, auto_nodeset, -1);
    if (!virNumaNodesetIsAvailable(b))
        return false;

    for (i = 0; i < numatune->nmem_nodes; i++) {
        b = virDomainNumatuneGetNodeset(numatune, auto_nodeset, i);
        if (!virNumaNodesetIsAvailable(b))
            return false;
    }

    return true;
}
703 704


705 706 707 708 709 710 711 712 713 714 715
static int
virDomainNumaDefNodeDistanceParseXML(virDomainNumaPtr def,
                                     xmlXPathContextPtr ctxt,
                                     unsigned int cur_cell)
{
    int ret = -1;
    int sibling;
    char *tmp = NULL;
    xmlNodePtr *nodes = NULL;
    size_t i, ndistances = def->nmem_nodes;

716
    if (ndistances == 0)
717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806
        return 0;

    /* check if NUMA distances definition is present */
    if (!virXPathNode("./distances[1]", ctxt))
        return 0;

    if ((sibling = virXPathNodeSet("./distances[1]/sibling", ctxt, &nodes)) <= 0) {
        virReportError(VIR_ERR_XML_ERROR, "%s",
                       _("NUMA distances defined without siblings"));
        goto cleanup;
    }

    for (i = 0; i < sibling; i++) {
        virDomainNumaDistancePtr ldist, rdist;
        unsigned int sibling_id, sibling_value;

        /* siblings are in order of parsing or explicitly numbered */
        if (!(tmp = virXMLPropString(nodes[i], "id"))) {
            virReportError(VIR_ERR_XML_ERROR,
                           _("Missing 'id' attribute in NUMA "
                             "distances under 'cell id %d'"),
                           cur_cell);
            goto cleanup;
        }

        /* The "id" needs to be applicable */
        if (virStrToLong_uip(tmp, NULL, 10, &sibling_id) < 0) {
            virReportError(VIR_ERR_XML_ERROR,
                           _("Invalid 'id' attribute in NUMA "
                             "distances for sibling: '%s'"),
                           tmp);
            goto cleanup;
        }
        VIR_FREE(tmp);

        /* The "id" needs to be within numa/cell range */
        if (sibling_id >= ndistances) {
            virReportError(VIR_ERR_XML_ERROR,
                           _("'sibling_id %d' does not refer to a "
                             "valid cell within NUMA 'cell id %d'"),
                           sibling_id, cur_cell);
            goto cleanup;
        }

        /* We need a locality value. Check and correct
         * distance to local and distance to remote node.
         */
        if (!(tmp = virXMLPropString(nodes[i], "value"))) {
            virReportError(VIR_ERR_XML_ERROR,
                           _("Missing 'value' attribute in NUMA distances "
                             "under 'cell id %d' for 'sibling id %d'"),
                           cur_cell, sibling_id);
            goto cleanup;
        }

        /* The "value" needs to be applicable */
        if (virStrToLong_uip(tmp, NULL, 10, &sibling_value) < 0) {
            virReportError(VIR_ERR_XML_ERROR,
                           _("'value %s' is invalid for "
                             "'sibling id %d' under NUMA 'cell id %d'"),
                           tmp, sibling_id, cur_cell);
            goto cleanup;
        }
        VIR_FREE(tmp);

        /* Assure LOCAL_DISTANCE <= "value" <= UNREACHABLE
         * and correct LOCAL_DISTANCE setting if such applies.
         */
        if ((sibling_value < LOCAL_DISTANCE ||
             sibling_value > UNREACHABLE) ||
            (sibling_id == cur_cell &&
             sibling_value != LOCAL_DISTANCE) ||
            (sibling_id != cur_cell &&
             sibling_value == LOCAL_DISTANCE)) {
            virReportError(VIR_ERR_XML_ERROR,
                           _("'value %d' is invalid for "
                             "'sibling id %d' under NUMA 'cell id %d'"),
                           sibling_value, sibling_id, cur_cell);
            goto cleanup;
        }

        /* Apply the local / remote distance */
        ldist = def->mem_nodes[cur_cell].distances;
        if (!ldist) {
            if (VIR_ALLOC_N(ldist, ndistances) < 0)
                goto cleanup;

            ldist[cur_cell].value = LOCAL_DISTANCE;
            ldist[cur_cell].cellid = cur_cell;
            def->mem_nodes[cur_cell].ndistances = ndistances;
807
            def->mem_nodes[cur_cell].distances = ldist;
808 809 810 811 812 813 814 815 816 817 818 819 820 821
        }

        ldist[sibling_id].cellid = sibling_id;
        ldist[sibling_id].value = sibling_value;

        /* Apply symmetry if none given */
        rdist = def->mem_nodes[sibling_id].distances;
        if (!rdist) {
            if (VIR_ALLOC_N(rdist, ndistances) < 0)
                goto cleanup;

            rdist[sibling_id].value = LOCAL_DISTANCE;
            rdist[sibling_id].cellid = sibling_id;
            def->mem_nodes[sibling_id].ndistances = ndistances;
822
            def->mem_nodes[sibling_id].distances = rdist;
823 824 825 826 827 828 829 830 831 832
        }

        rdist[cur_cell].cellid = cur_cell;
        if (!rdist[cur_cell].value)
            rdist[cur_cell].value = sibling_value;
    }

    ret = 0;

 cleanup:
833
    if (ret < 0) {
834 835
        for (i = 0; i < ndistances; i++)
            VIR_FREE(def->mem_nodes[i].distances);
836
        def->mem_nodes[i].ndistances = 0;
837 838 839 840 841 842 843
    }
    VIR_FREE(nodes);
    VIR_FREE(tmp);

    return ret;
}

844
int
845
virDomainNumaDefCPUParseXML(virDomainNumaPtr def,
846 847 848 849
                            xmlXPathContextPtr ctxt)
{
    xmlNodePtr *nodes = NULL;
    xmlNodePtr oldNode = ctxt->node;
850
    char *tmp = NULL;
851
    int n;
852
    size_t i, j;
853 854
    int ret = -1;

855
    /* check if NUMA definition is present */
856
    if (!virXPathNode("./cpu/numa[1]", ctxt))
857
        return 0;
858

859
    if ((n = virXPathNodeSet("./cpu/numa[1]/cell", ctxt, &nodes)) <= 0) {
860 861 862 863 864
        virReportError(VIR_ERR_XML_ERROR, "%s",
                       _("NUMA topology defined without NUMA cells"));
        goto cleanup;
    }

865
    if (VIR_ALLOC_N(def->mem_nodes, n) < 0)
866
        goto cleanup;
867
    def->nmem_nodes = n;
868

869
    for (i = 0; i < n; i++) {
870
        int rc;
871 872 873 874 875 876 877 878 879
        unsigned int cur_cell = i;

        /* cells are in order of parsing or explicitly numbered */
        if ((tmp = virXMLPropString(nodes[i], "id"))) {
            if (virStrToLong_uip(tmp, NULL, 10, &cur_cell) < 0) {
                virReportError(VIR_ERR_XML_ERROR,
                               _("Invalid 'id' attribute in NUMA cell: '%s'"),
                               tmp);
                goto cleanup;
880 881 882 883 884 885 886
            }

            if (cur_cell >= n) {
                virReportError(VIR_ERR_XML_ERROR, "%s",
                               _("Exactly one 'cell' element per guest "
                                 "NUMA cell allowed, non-contiguous ranges or "
                                 "ranges not starting from 0 are not allowed"));
887
                goto cleanup;
888
            }
889 890
        }
        VIR_FREE(tmp);
891

892
        if (def->mem_nodes[cur_cell].cpumask) {
893 894 895 896 897
            virReportError(VIR_ERR_XML_ERROR,
                           _("Duplicate NUMA cell info for cell id '%u'"),
                           cur_cell);
            goto cleanup;
        }
898

899 900 901 902 903
        if (!(tmp = virXMLPropString(nodes[i], "cpus"))) {
            virReportError(VIR_ERR_XML_ERROR, "%s",
                           _("Missing 'cpus' attribute in NUMA cell"));
            goto cleanup;
        }
904

905
        if (virBitmapParse(tmp, &def->mem_nodes[cur_cell].cpumask,
906
                           VIR_DOMAIN_CPUMASK_LEN) < 0)
907
            goto cleanup;
908

909
        if (virBitmapIsAllClear(def->mem_nodes[cur_cell].cpumask)) {
910 911 912 913
            virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
                          _("NUMA cell %d has no vCPUs assigned"), cur_cell);
            goto cleanup;
        }
914
        VIR_FREE(tmp);
915

916 917 918 919
        for (j = 0; j < n; j++) {
            if (j == cur_cell || !def->mem_nodes[j].cpumask)
                continue;

920
            if (virBitmapOverlaps(def->mem_nodes[j].cpumask,
921
                                  def->mem_nodes[cur_cell].cpumask)) {
922
                virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
923 924
                               _("NUMA cells %u and %zu have overlapping vCPU ids"),
                               cur_cell, j);
925 926 927 928
                goto cleanup;
            }
        }

929 930
        ctxt->node = nodes[i];
        if (virDomainParseMemory("./@memory", "./@unit", ctxt,
931
                                 &def->mem_nodes[cur_cell].mem, true, false) < 0)
932
            goto cleanup;
933

934
        if ((tmp = virXMLPropString(nodes[i], "memAccess"))) {
935
            if ((rc = virDomainMemoryAccessTypeFromString(tmp)) <= 0) {
936 937 938 939
                virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
                               _("Invalid 'memAccess' attribute value '%s'"),
                               tmp);
                goto cleanup;
940
            }
941

942
            def->mem_nodes[cur_cell].memAccess = rc;
943
            VIR_FREE(tmp);
944
        }
945

946 947 948 949 950 951 952 953 954 955 956 957
        if ((tmp = virXMLPropString(nodes[i], "discard"))) {
            if ((rc = virTristateBoolTypeFromString(tmp)) <= 0) {
                virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
                               _("Invalid 'discard' attribute value '%s'"),
                               tmp);
                goto cleanup;
            }

            def->mem_nodes[cur_cell].discard = rc;
            VIR_FREE(tmp);
        }

958 959 960
        /* Parse NUMA distances info */
        if (virDomainNumaDefNodeDistanceParseXML(def, ctxt, cur_cell) < 0)
                goto cleanup;
961 962 963 964 965 966 967
    }

    ret = 0;

 cleanup:
    ctxt->node = oldNode;
    VIR_FREE(nodes);
968
    VIR_FREE(tmp);
969 970
    return ret;
}
971 972 973


int
974 975
virDomainNumaDefCPUFormatXML(virBufferPtr buf,
                             virDomainNumaPtr def)
976
{
977
    virDomainMemoryAccess memAccess;
978
    virTristateBool discard;
979
    char *cpustr;
980
    size_t ncells = virDomainNumaGetNodeCount(def);
981 982
    size_t i;

983
    if (ncells == 0)
984 985 986 987
        return 0;

    virBufferAddLit(buf, "<numa>\n");
    virBufferAdjustIndent(buf, 2);
988
    for (i = 0; i < ncells; i++) {
989 990
        int ndistances;

991
        memAccess = virDomainNumaGetNodeMemoryAccessMode(def, i);
992
        discard = virDomainNumaGetNodeDiscard(def, i);
993

994
        if (!(cpustr = virBitmapFormat(virDomainNumaGetNodeCpumask(def, i))))
995 996 997 998 999
            return -1;

        virBufferAddLit(buf, "<cell");
        virBufferAsprintf(buf, " id='%zu'", i);
        virBufferAsprintf(buf, " cpus='%s'", cpustr);
1000 1001
        virBufferAsprintf(buf, " memory='%llu'",
                          virDomainNumaGetNodeMemorySize(def, i));
1002 1003 1004
        virBufferAddLit(buf, " unit='KiB'");
        if (memAccess)
            virBufferAsprintf(buf, " memAccess='%s'",
1005
                              virDomainMemoryAccessTypeToString(memAccess));
1006

1007 1008 1009 1010
        if (discard)
            virBufferAsprintf(buf, " discard='%s'",
                              virTristateBoolTypeToString(discard));

1011
        ndistances = def->mem_nodes[i].ndistances;
1012
        if (ndistances == 0) {
1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035
            virBufferAddLit(buf, "/>\n");
        } else {
            size_t j;
            virDomainNumaDistancePtr distances = def->mem_nodes[i].distances;

            virBufferAddLit(buf, ">\n");
            virBufferAdjustIndent(buf, 2);
            virBufferAddLit(buf, "<distances>\n");
            virBufferAdjustIndent(buf, 2);
            for (j = 0; j < ndistances; j++) {
                if (distances[j].value) {
                    virBufferAddLit(buf, "<sibling");
                    virBufferAsprintf(buf, " id='%d'", distances[j].cellid);
                    virBufferAsprintf(buf, " value='%d'", distances[j].value);
                    virBufferAddLit(buf, "/>\n");
                }
            }
            virBufferAdjustIndent(buf, -2);
            virBufferAddLit(buf, "</distances>\n");
            virBufferAdjustIndent(buf, -2);
            virBufferAddLit(buf, "</cell>\n");
        }

1036 1037 1038 1039 1040 1041 1042
        VIR_FREE(cpustr);
    }
    virBufferAdjustIndent(buf, -2);
    virBufferAddLit(buf, "</numa>\n");

    return 0;
}
1043 1044 1045


unsigned int
1046
virDomainNumaGetCPUCountTotal(virDomainNumaPtr numa)
1047 1048 1049 1050
{
    size_t i;
    unsigned int ret = 0;

1051
    for (i = 0; i < numa->nmem_nodes; i++)
1052
        ret += virBitmapCountBits(virDomainNumaGetNodeCpumask(numa, i));
1053 1054 1055

    return ret;
}
1056

1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073
unsigned int
virDomainNumaGetMaxCPUID(virDomainNumaPtr numa)
{
    size_t i;
    unsigned int ret = 0;

    for (i = 0; i < numa->nmem_nodes; i++) {
        int bit;

        bit = virBitmapLastSetBit(virDomainNumaGetNodeCpumask(numa, i));
        if (bit > ret)
            ret = bit;
    }

    return ret;
}

1074 1075 1076 1077 1078 1079 1080 1081 1082 1083

virDomainNumaPtr
virDomainNumaNew(void)
{
    virDomainNumaPtr ret = NULL;

    ignore_value(VIR_ALLOC(ret));

    return ret;
}
1084 1085


1086 1087 1088 1089 1090
bool
virDomainNumaCheckABIStability(virDomainNumaPtr src,
                               virDomainNumaPtr tgt)
{
    size_t i;
1091
    size_t j;
1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119

    if (virDomainNumaGetNodeCount(src) != virDomainNumaGetNodeCount(tgt)) {
        virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
                       _("Target NUMA node count '%zu' doesn't match "
                         "source '%zu'"),
                       virDomainNumaGetNodeCount(tgt),
                       virDomainNumaGetNodeCount(src));
        return false;
    }

    for (i = 0; i < virDomainNumaGetNodeCount(src); i++) {
        if (virDomainNumaGetNodeMemorySize(src, i) !=
            virDomainNumaGetNodeMemorySize(tgt, i)) {
            virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
                           _("Size of target NUMA node %zu (%llu) doesn't "
                             "match source (%llu)"), i,
                           virDomainNumaGetNodeMemorySize(tgt, i),
                           virDomainNumaGetNodeMemorySize(src, i));
            return false;
        }

        if (!virBitmapEqual(virDomainNumaGetNodeCpumask(src, i),
                            virDomainNumaGetNodeCpumask(tgt, i))) {
            virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
                           _("Processor mask of target NUMA node %zu doesn't "
                             "match source"), i);
            return false;
        }
1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130

        for (j = 0; j < virDomainNumaGetNodeCount(src); j++) {
            if (virDomainNumaGetNodeDistance(src, i, j) !=
                virDomainNumaGetNodeDistance(tgt, i, j)) {
                virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
                               _("Target NUMA distance from %zu to %zu "
                                 "doesn't match source"), i, j);

                return false;
            }
        }
1131 1132 1133 1134 1135 1136
    }

    return true;
}


1137
size_t
1138
virDomainNumaGetNodeCount(virDomainNumaPtr numa)
1139 1140 1141 1142
{
    if (!numa)
        return 0;

1143
    return numa->nmem_nodes;
1144
}
1145 1146


1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169
size_t
virDomainNumaSetNodeCount(virDomainNumaPtr numa, size_t nmem_nodes)
{
    if (!nmem_nodes) {
        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                       _("Cannot set an empty mem_nodes set"));
        return 0;
    }

    if (numa->mem_nodes) {
        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                       _("Cannot alter an existing mem_nodes set"));
        return 0;
    }

    if (VIR_ALLOC_N(numa->mem_nodes, nmem_nodes) < 0)
        return 0;

    numa->nmem_nodes = nmem_nodes;

    return numa->nmem_nodes;
}

1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190

bool
virDomainNumaNodeDistanceIsUsingDefaults(virDomainNumaPtr numa,
                                         size_t node,
                                         size_t sibling)
{
    if (node >= numa->nmem_nodes ||
        sibling >= numa->nmem_nodes)
        return false;

    if (!numa->mem_nodes[node].distances)
        return true;

    if (numa->mem_nodes[node].distances[sibling].value == LOCAL_DISTANCE ||
        numa->mem_nodes[node].distances[sibling].value == REMOTE_DISTANCE)
        return true;

    return false;
}


1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206
size_t
virDomainNumaGetNodeDistance(virDomainNumaPtr numa,
                             size_t node,
                             size_t cellid)
{
    virDomainNumaDistancePtr distances = NULL;

    if (node < numa->nmem_nodes)
        distances = numa->mem_nodes[node].distances;

    /*
     * Present the configured distance value. If
     * out of range or not available set the platform
     * defined default for local and remote nodes.
     */
    if (!distances ||
1207 1208
        cellid >= numa->nmem_nodes ||
        !distances[cellid].value)
1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293
        return (node == cellid) ? LOCAL_DISTANCE : REMOTE_DISTANCE;

    return distances[cellid].value;
}


int
virDomainNumaSetNodeDistance(virDomainNumaPtr numa,
                             size_t node,
                             size_t cellid,
                             unsigned int value)
{
    virDomainNumaDistancePtr distances;

    if (node >= numa->nmem_nodes) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Argument 'node' %zu outranges "
                         "defined number of NUMA nodes"),
                       node);
        return -1;
    }

    distances = numa->mem_nodes[node].distances;
    if (!distances ||
        cellid >= numa->mem_nodes[node].ndistances) {
        virReportError(VIR_ERR_XML_ERROR, "%s",
                       _("Arguments under memnode element do not "
                         "correspond with existing guest's NUMA cell"));
        return -1;
    }

    /*
     * Advanced Configuration and Power Interface
     * Specification version 6.1. Chapter 5.2.17
     * System Locality Distance Information Table
     * ... Distance values of 0-9 are reserved.
     */
    if (value < LOCAL_DISTANCE ||
        value > UNREACHABLE) {
        virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
                       _("Distance value of %d is not in valid range"),
                       value);
        return -1;
    }

    if (value == LOCAL_DISTANCE && node != cellid) {
        virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
                       _("Distance value %d under node %zu is "
                         "LOCAL_DISTANCE and should be set to 10"),
                       value, node);
        return -1;
    }

    distances[cellid].cellid = cellid;
    distances[cellid].value = value;

    return distances[cellid].value;
}


size_t
virDomainNumaSetNodeDistanceCount(virDomainNumaPtr numa,
                                  size_t node,
                                  size_t ndistances)
{
    virDomainNumaDistancePtr distances;

    distances = numa->mem_nodes[node].distances;
    if (distances) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Cannot alter an existing nmem_nodes distances set for node: %zu"),
                       node);
        return 0;
    }

    if (VIR_ALLOC_N(distances, ndistances) < 0)
        return 0;

    numa->mem_nodes[node].distances = distances;
    numa->mem_nodes[node].ndistances = ndistances;

    return numa->mem_nodes[node].ndistances;
}


1294
virBitmapPtr
1295
virDomainNumaGetNodeCpumask(virDomainNumaPtr numa,
1296 1297
                            size_t node)
{
1298
    return numa->mem_nodes[node].cpumask;
1299
}
1300 1301


1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312
virBitmapPtr
virDomainNumaSetNodeCpumask(virDomainNumaPtr numa,
                            size_t node,
                            virBitmapPtr cpumask)
{
    numa->mem_nodes[node].cpumask = cpumask;

    return numa->mem_nodes[node].cpumask;
}


1313
virDomainMemoryAccess
1314
virDomainNumaGetNodeMemoryAccessMode(virDomainNumaPtr numa,
1315 1316
                                     size_t node)
{
1317
    return numa->mem_nodes[node].memAccess;
1318
}
1319 1320


1321 1322 1323 1324 1325 1326 1327 1328
virTristateBool
virDomainNumaGetNodeDiscard(virDomainNumaPtr numa,
                            size_t node)
{
    return numa->mem_nodes[node].discard;
}


1329
unsigned long long
1330
virDomainNumaGetNodeMemorySize(virDomainNumaPtr numa,
1331 1332
                               size_t node)
{
1333
    return numa->mem_nodes[node].mem;
1334 1335 1336 1337
}


void
1338
virDomainNumaSetNodeMemorySize(virDomainNumaPtr numa,
1339 1340 1341
                               size_t node,
                               unsigned long long size)
{
1342
    numa->mem_nodes[node].mem = size;
1343
}
1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356


unsigned long long
virDomainNumaGetMemorySize(virDomainNumaPtr numa)
{
    size_t i;
    unsigned long long ret = 0;

    for (i = 0; i < numa->nmem_nodes; i++)
        ret += numa->mem_nodes[i].mem;

    return ret;
}