virpci.c 79.6 KB
Newer Older
1
/*
2 3
 * virpci.c: helper APIs for managing host PCI devices
 *
4
 * Copyright (C) 2009-2015 Red Hat, Inc.
5 6 7 8 9 10 11 12 13 14 15 16
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library.  If not, see
O
Osier Yang 已提交
18
 * <http://www.gnu.org/licenses/>.
19 20 21 22
 */

#include <config.h>

23
#include "virpci.h"
24
#include "virnetdev.h"
25 26 27 28 29 30 31 32

#include <dirent.h>
#include <fcntl.h>
#include <inttypes.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>

33
#include "dirname.h"
34
#include "virlog.h"
35
#include "vircommand.h"
36
#include "virerror.h"
E
Eric Blake 已提交
37
#include "virfile.h"
38
#include "virkmod.h"
39 40
#include "virstring.h"
#include "virutil.h"
41
#include "viralloc.h"
42

43 44
VIR_LOG_INIT("util.pci");

45 46 47
#define PCI_SYSFS "/sys/bus/pci/"
#define PCI_ID_LEN 10   /* "XXXX XXXX" */

48 49
VIR_ENUM_IMPL(virPCIELinkSpeed,
              VIR_PCIE_LINK_SPEED_LAST,
50 51
              "", "2.5", "5", "8", "16",
);
52

53 54
VIR_ENUM_IMPL(virPCIStubDriver,
              VIR_PCI_STUB_DRIVER_LAST,
55 56 57
              "none",
              "pciback", /* XEN */
              "vfio-pci", /* VFIO */
58
);
59

60 61
VIR_ENUM_IMPL(virPCIHeader,
              VIR_PCI_HEADER_LAST,
62 63 64
              "endpoint",
              "pci-bridge",
              "cardbus-bridge",
65
);
66

67
struct _virPCIDevice {
68
    virPCIDeviceAddress address;
69

70
    char          *name;              /* domain:bus:slot.function */
71
    char          id[PCI_ID_LEN];     /* product vendor */
E
Eric Blake 已提交
72
    char          *path;
C
Chunyan Liu 已提交
73 74 75 76

    /* The driver:domain which uses the device */
    char          *used_by_drvname;
    char          *used_by_domname;
77

78 79
    unsigned int  pcie_cap_pos;
    unsigned int  pci_pm_cap_pos;
80 81
    bool          has_flr;
    bool          has_pm_reset;
82
    bool          managed;
83 84

    virPCIStubDriver stubDriver;
85 86

    /* used by reattach function */
87 88 89
    bool          unbind_from_stub;
    bool          remove_slot;
    bool          reprobe;
90 91
};

92
struct _virPCIDeviceList {
93 94
    virObjectLockable parent;

95
    size_t count;
96
    virPCIDevicePtr *devs;
97 98 99
};


100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116
/* For virReportOOMError()  and virReportSystemError() */
#define VIR_FROM_THIS VIR_FROM_NONE

/* Specifications referenced in comments:
 *  PCI30  - PCI Local Bus Specification 3.0
 *  PCIe20 - PCI Express Base Specification 2.0
 *  BR12   - PCI-to-PCI Bridge Architecture Specification 1.2
 *  PM12   - PCI Bus Power Management Interface Specification 1.2
 *  ECN_AF - Advanced Capabilities for Conventional PCI ECN
 */

/* Type 0 config space header length; PCI30 Section 6.1 Configuration Space Organization */
#define PCI_CONF_LEN            0x100
#define PCI_CONF_HEADER_LEN     0x40

/* PCI30 6.2.1 */
#define PCI_HEADER_TYPE         0x0e    /* Header type */
117 118 119
#define PCI_HEADER_TYPE_BRIDGE 0x1
#define PCI_HEADER_TYPE_MASK   0x7f
#define PCI_HEADER_TYPE_MULTI  0x80
120 121 122 123 124 125 126 127 128

/* PCI30 6.2.1  Device Identification */
#define PCI_CLASS_DEVICE        0x0a    /* Device class */

/* Class Code for bridge; PCI30 D.7  Base Class 06h */
#define PCI_CLASS_BRIDGE_PCI    0x0604

/* PCI30 6.2.3  Device Status */
#define PCI_STATUS              0x06    /* 16 bits */
129
#define PCI_STATUS_CAP_LIST    0x10    /* Support Capability List */
130 131 132

/* PCI30 6.7  Capabilities List */
#define PCI_CAPABILITY_LIST     0x34    /* Offset of first capability list entry */
133
#define PCI_CAP_FLAGS           2       /* Capability defined flags (16 bits) */
134 135 136 137 138 139 140 141 142 143

/* PM12 3.2.1  Capability Identifier */
#define PCI_CAP_ID_PM           0x01    /* Power Management */
/* PCI30 H Capability IDs */
#define PCI_CAP_ID_EXP          0x10    /* PCI Express */
/* ECN_AF 6.x.1.1  Capability ID for AF */
#define PCI_CAP_ID_AF           0x13    /* Advanced Features */

/* PCIe20 7.8.3  Device Capabilities Register (Offset 04h) */
#define PCI_EXP_DEVCAP          0x4     /* Device capabilities */
144 145
#define PCI_EXP_DEVCAP_FLR     (1<<28)  /* Function Level Reset */
#define PCI_EXP_LNKCAP          0xc     /* Link Capabilities */
146
#define PCI_EXP_LNKCAP_SPEED    0x0000f /* Maximum Link Speed */
147 148 149 150
#define PCI_EXP_LNKCAP_WIDTH    0x003f0 /* Maximum Link Width */
#define PCI_EXP_LNKSTA          0x12    /* Link Status */
#define PCI_EXP_LNKSTA_SPEED    0x000f  /* Negotiated Link Speed */
#define PCI_EXP_LNKSTA_WIDTH    0x03f0  /* Negotiated Link Width */
151 152 153 154 155 156 157

/* Header type 1 BR12 3.2 PCI-to-PCI Bridge Configuration Space Header Format */
#define PCI_PRIMARY_BUS         0x18    /* BR12 3.2.5.2 Primary bus number */
#define PCI_SECONDARY_BUS       0x19    /* BR12 3.2.5.3 Secondary bus number */
#define PCI_SUBORDINATE_BUS     0x1a    /* BR12 3.2.5.4 Highest bus number behind the bridge */
#define PCI_BRIDGE_CONTROL      0x3e
/* BR12 3.2.5.18  Bridge Control Register */
158
#define PCI_BRIDGE_CTL_RESET   0x40    /* Secondary bus reset */
159 160 161

/* PM12 3.2.4  Power Management Control/Status (Offset = 4) */
#define PCI_PM_CTRL                4    /* PM control and status register */
162 163 164 165
#define PCI_PM_CTRL_STATE_MASK    0x3  /* Current power state (D0 to D3) */
#define PCI_PM_CTRL_STATE_D0      0x0  /* D0 state */
#define PCI_PM_CTRL_STATE_D3hot   0x3  /* D3 state */
#define PCI_PM_CTRL_NO_SOFT_RESET 0x8  /* No reset for D3hot->D0 */
166 167 168

/* ECN_AF 6.x.1  Advanced Features Capability Structure */
#define PCI_AF_CAP              0x3     /* Advanced features capabilities */
169
#define PCI_AF_CAP_FLR         0x2     /* Function Level Reset */
170

J
Jiri Denemark 已提交
171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187
#define PCI_EXP_FLAGS           0x2
#define PCI_EXP_FLAGS_TYPE      0x00f0
#define PCI_EXP_TYPE_DOWNSTREAM 0x6

#define PCI_EXT_CAP_BASE          0x100
#define PCI_EXT_CAP_LIMIT         0x1000
#define PCI_EXT_CAP_ID_MASK       0x0000ffff
#define PCI_EXT_CAP_OFFSET_SHIFT  20
#define PCI_EXT_CAP_OFFSET_MASK   0x00000ffc

#define PCI_EXT_CAP_ID_ACS      0x000d
#define PCI_EXT_ACS_CTRL        0x06

#define PCI_EXT_CAP_ACS_SV      0x01
#define PCI_EXT_CAP_ACS_RR      0x04
#define PCI_EXT_CAP_ACS_CR      0x08
#define PCI_EXT_CAP_ACS_UF      0x10
188 189 190
#define PCI_EXT_CAP_ACS_ENABLED (PCI_EXT_CAP_ACS_SV | \
                                 PCI_EXT_CAP_ACS_RR | \
                                 PCI_EXT_CAP_ACS_CR | \
J
Jiri Denemark 已提交
191 192
                                 PCI_EXT_CAP_ACS_UF)

193 194 195
#define PCI_EXP_TYPE_ROOT_INT_EP 0x9    /* Root Complex Integrated Endpoint */
#define PCI_EXP_TYPE_ROOT_EC 0xa        /* Root Complex Event Collector */

196 197 198 199 200 201
static virClassPtr virPCIDeviceListClass;

static void virPCIDeviceListDispose(void *obj);

static int virPCIOnceInit(void)
{
202
    if (!VIR_CLASS_NEW(virPCIDeviceList, virClassForObjectLockable()))
203 204 205 206 207
        return -1;

    return 0;
}

208
VIR_ONCE_GLOBAL_INIT(virPCI);
209

L
Laine Stump 已提交
210

211 212
static char *
virPCIDriverDir(const char *driver)
L
Laine Stump 已提交
213
{
214
    char *buffer;
L
Laine Stump 已提交
215

216 217
    ignore_value(virAsprintf(&buffer, PCI_SYSFS "drivers/%s", driver));
    return buffer;
L
Laine Stump 已提交
218 219 220
}


221 222
static char *
virPCIFile(const char *device, const char *file)
L
Laine Stump 已提交
223
{
224
    char *buffer;
L
Laine Stump 已提交
225

226 227
    ignore_value(virAsprintf(&buffer, PCI_SYSFS "devices/%s/%s", device, file));
    return buffer;
L
Laine Stump 已提交
228 229 230 231 232 233 234 235 236 237
}


/* virPCIDeviceGetDriverPathAndName - put the path to the driver
 * directory of the driver in use for this device in @path and the
 * name of the driver in @name. Both could be NULL if it's not bound
 * to any driver.
 *
 * Return 0 for success, -1 for error.
 */
238
int
L
Laine Stump 已提交
239 240 241
virPCIDeviceGetDriverPathAndName(virPCIDevicePtr dev, char **path, char **name)
{
    int ret = -1;
242
    g_autofree char *drvlink = NULL;
L
Laine Stump 已提交
243 244 245

    *path = *name = NULL;
    /* drvlink = "/sys/bus/pci/dddd:bb:ss.ff/driver" */
246
    if (!(drvlink = virPCIFile(dev->name, "driver")))
L
Laine Stump 已提交
247 248
        goto cleanup;

249 250 251 252 253
    if (!virFileExists(drvlink)) {
        ret = 0;
        goto cleanup;
    }

L
Laine Stump 已提交
254 255 256 257 258 259 260 261 262 263 264 265 266 267
    if (virFileIsLink(drvlink) != 1) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Invalid device %s driver file %s is not a symlink"),
                       dev->name, drvlink);
        goto cleanup;
    }
    if (virFileResolveLink(drvlink, path) < 0) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Unable to resolve device %s driver symlink %s"),
                       dev->name, drvlink);
        goto cleanup;
    }
    /* path = "/sys/bus/pci/drivers/${drivername}" */

268
    *name = g_strdup(last_component(*path));
L
Laine Stump 已提交
269 270 271
    /* name = "${drivername}" */

    ret = 0;
272
 cleanup:
L
Laine Stump 已提交
273 274 275 276 277 278 279 280
    if (ret < 0) {
        VIR_FREE(*path);
        VIR_FREE(*name);
    }
    return ret;
}


281
static int
282
virPCIDeviceConfigOpenInternal(virPCIDevicePtr dev, bool readonly, bool fatal)
283 284 285
{
    int fd;

286
    fd = open(dev->path, readonly ? O_RDONLY : O_RDWR);
287

288
    if (fd < 0) {
289 290 291 292 293 294 295 296 297
        if (fatal) {
            virReportSystemError(errno,
                                 _("Failed to open config space file '%s'"),
                                 dev->path);
        } else {
            char ebuf[1024];
            VIR_WARN("Failed to open config space file '%s': %s",
                     dev->path, virStrerror(errno, ebuf, sizeof(ebuf)));
        }
298 299
        return -1;
    }
300

301
    VIR_DEBUG("%s %s: opened %s", dev->id, dev->name, dev->path);
302
    return fd;
303 304
}

305
static int
306
virPCIDeviceConfigOpen(virPCIDevicePtr dev)
307
{
308
    return virPCIDeviceConfigOpenInternal(dev, true, true);
309 310
}

311 312 313 314 315 316
static int
virPCIDeviceConfigOpenTry(virPCIDevicePtr dev)
{
    return virPCIDeviceConfigOpenInternal(dev, true, false);
}

317 318 319
static int
virPCIDeviceConfigOpenWrite(virPCIDevicePtr dev)
{
320
    return virPCIDeviceConfigOpenInternal(dev, false, true);
321 322
}

323
static void
324
virPCIDeviceConfigClose(virPCIDevicePtr dev, int cfgfd)
325
{
326 327 328 329 330
    if (VIR_CLOSE(cfgfd) < 0) {
        char ebuf[1024];
        VIR_WARN("Failed to close config space file '%s': %s",
                 dev->path, virStrerror(errno, ebuf, sizeof(ebuf)));
    }
331 332
}

333

334
static int
335 336
virPCIDeviceRead(virPCIDevicePtr dev,
                 int cfgfd,
337
                 unsigned int pos,
338
                 uint8_t *buf,
339
                 unsigned int buflen)
340 341 342
{
    memset(buf, 0, buflen);

343 344
    if (lseek(cfgfd, pos, SEEK_SET) != pos ||
        saferead(cfgfd, buf, buflen) != buflen) {
345
        char ebuf[1024];
346
        VIR_WARN("Failed to read from '%s' : %s", dev->path,
347 348 349 350 351 352 353
                 virStrerror(errno, ebuf, sizeof(ebuf)));
        return -1;
    }
    return 0;
}

static uint8_t
354
virPCIDeviceRead8(virPCIDevicePtr dev, int cfgfd, unsigned int pos)
355 356
{
    uint8_t buf;
357
    virPCIDeviceRead(dev, cfgfd, pos, &buf, sizeof(buf));
358 359 360 361
    return buf;
}

static uint16_t
362
virPCIDeviceRead16(virPCIDevicePtr dev, int cfgfd, unsigned int pos)
363 364
{
    uint8_t buf[2];
365
    virPCIDeviceRead(dev, cfgfd, pos, &buf[0], sizeof(buf));
366 367 368 369
    return (buf[0] << 0) | (buf[1] << 8);
}

static uint32_t
370
virPCIDeviceRead32(virPCIDevicePtr dev, int cfgfd, unsigned int pos)
371 372
{
    uint8_t buf[4];
373
    virPCIDeviceRead(dev, cfgfd, pos, &buf[0], sizeof(buf));
374 375 376
    return (buf[0] << 0) | (buf[1] << 8) | (buf[2] << 16) | (buf[3] << 24);
}

377 378 379
static int
virPCIDeviceReadClass(virPCIDevicePtr dev, uint16_t *device_class)
{
380 381
    g_autofree char *path = NULL;
    g_autofree char *id_str = NULL;
382 383
    unsigned int value;

384
    if (!(path = virPCIFile(dev->name, "class")))
385
        return -1;
386 387 388

    /* class string is '0xNNNNNN\n' ... i.e. 9 bytes */
    if (virFileReadAll(path, 9, &id_str) < 0)
389
        return -1;
390 391 392 393 394 395

    id_str[8] = '\0';
    if (virStrToLong_ui(id_str, NULL, 16, &value) < 0) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Unusual value in %s/devices/%s/class: %s"),
                       PCI_SYSFS, dev->name, id_str);
396
        return -1;
397 398 399
    }

    *device_class = (value >> 8) & 0xFFFF;
400
    return 0;
401 402
}

403
static int
404 405
virPCIDeviceWrite(virPCIDevicePtr dev,
                  int cfgfd,
406
                  unsigned int pos,
407
                  uint8_t *buf,
408
                  unsigned int buflen)
409
{
410 411
    if (lseek(cfgfd, pos, SEEK_SET) != pos ||
        safewrite(cfgfd, buf, buflen) != buflen) {
412
        char ebuf[1024];
413
        VIR_WARN("Failed to write to '%s' : %s", dev->path,
414 415 416 417 418 419 420
                 virStrerror(errno, ebuf, sizeof(ebuf)));
        return -1;
    }
    return 0;
}

static void
421
virPCIDeviceWrite16(virPCIDevicePtr dev, int cfgfd, unsigned int pos, uint16_t val)
422 423
{
    uint8_t buf[2] = { (val >> 0), (val >> 8) };
424
    virPCIDeviceWrite(dev, cfgfd, pos, &buf[0], sizeof(buf));
425 426 427
}

static void
428
virPCIDeviceWrite32(virPCIDevicePtr dev, int cfgfd, unsigned int pos, uint32_t val)
429
{
430
    uint8_t buf[4] = { (val >> 0), (val >> 8), (val >> 16), (val >> 24) };
431
    virPCIDeviceWrite(dev, cfgfd, pos, &buf[0], sizeof(buf));
432 433
}

E
Eric Blake 已提交
434 435
typedef int (*virPCIDeviceIterPredicate)(virPCIDevicePtr, virPCIDevicePtr,
                                         void *);
436 437 438 439 440 441 442

/* Iterate over available PCI devices calling @predicate
 * to compare each one to @dev.
 * Return -1 on error since we don't want to assume it is
 * safe to reset if there is an error.
 */
static int
443 444 445 446
virPCIDeviceIterDevices(virPCIDeviceIterPredicate predicate,
                        virPCIDevicePtr dev,
                        virPCIDevicePtr *matched,
                        void *data)
447 448 449
{
    DIR *dir;
    struct dirent *entry;
450
    int ret = 0;
451
    int rc;
452 453 454 455 456

    *matched = NULL;

    VIR_DEBUG("%s %s: iterating over " PCI_SYSFS "devices", dev->id, dev->name);

J
Ján Tomko 已提交
457
    if (virDirOpen(&dir, PCI_SYSFS "devices") < 0)
458 459
        return -1;

E
Eric Blake 已提交
460
    while ((ret = virDirRead(dir, &entry, PCI_SYSFS "devices")) > 0) {
461
        unsigned int domain, bus, slot, function;
J
Ján Tomko 已提交
462
        g_autoptr(virPCIDevice) check = NULL;
463
        char *tmp;
464

465 466 467 468 469 470 471 472 473
        /* expected format: <domain>:<bus>:<slot>.<function> */
        if (/* domain */
            virStrToLong_ui(entry->d_name, &tmp, 16, &domain) < 0 || *tmp != ':' ||
            /* bus */
            virStrToLong_ui(tmp + 1, &tmp, 16, &bus) < 0 || *tmp != ':' ||
            /* slot */
            virStrToLong_ui(tmp + 1, &tmp, 16, &slot) < 0 || *tmp != '.' ||
            /* function */
            virStrToLong_ui(tmp + 1, NULL, 16, &function) < 0) {
474 475 476 477
            VIR_WARN("Unusual entry in " PCI_SYSFS "devices: %s", entry->d_name);
            continue;
        }

478
        check = virPCIDeviceNew(domain, bus, slot, function);
479
        if (!check) {
480 481 482
            ret = -1;
            break;
        }
483

484 485 486 487 488
        rc = predicate(dev, check, data);
        if (rc < 0) {
            /* the predicate returned an error, bail */
            ret = -1;
            break;
489
        } else if (rc == 1) {
490
            VIR_DEBUG("%s %s: iter matched on %s", dev->id, dev->name, check->name);
491
            *matched = g_steal_pointer(&check);
492
            ret = 1;
493 494 495
            break;
        }
    }
J
Ján Tomko 已提交
496
    VIR_DIR_CLOSE(dir);
497
    return ret;
498 499 500
}

static uint8_t
501 502 503
virPCIDeviceFindCapabilityOffset(virPCIDevicePtr dev,
                                 int cfgfd,
                                 unsigned int capability)
504 505 506 507
{
    uint16_t status;
    uint8_t pos;

508
    status = virPCIDeviceRead16(dev, cfgfd, PCI_STATUS);
509 510 511
    if (!(status & PCI_STATUS_CAP_LIST))
        return 0;

512
    pos = virPCIDeviceRead8(dev, cfgfd, PCI_CAPABILITY_LIST);
513 514 515 516 517 518 519 520 521

    /* Zero indicates last capability, capabilities can't
     * be in the config space header and 0xff is returned
     * by the kernel if we don't have access to this region
     *
     * Note: we're not handling loops or extended
     * capabilities here.
     */
    while (pos >= PCI_CONF_HEADER_LEN && pos != 0xff) {
522
        uint8_t capid = virPCIDeviceRead8(dev, cfgfd, pos);
523 524 525 526 527 528
        if (capid == capability) {
            VIR_DEBUG("%s %s: found cap 0x%.2x at 0x%.2x",
                      dev->id, dev->name, capability, pos);
            return pos;
        }

529
        pos = virPCIDeviceRead8(dev, cfgfd, pos + 1);
530 531 532 533 534 535 536
    }

    VIR_DEBUG("%s %s: failed to find cap 0x%.2x", dev->id, dev->name, capability);

    return 0;
}

J
Jiri Denemark 已提交
537
static unsigned int
538 539
virPCIDeviceFindExtendedCapabilityOffset(virPCIDevicePtr dev,
                                         int cfgfd,
540
                                         unsigned int capability)
J
Jiri Denemark 已提交
541 542 543 544 545 546 547 548 549 550
{
    int ttl;
    unsigned int pos;
    uint32_t header;

    /* minimum 8 bytes per capability */
    ttl = (PCI_EXT_CAP_LIMIT - PCI_EXT_CAP_BASE) / 8;
    pos = PCI_EXT_CAP_BASE;

    while (ttl > 0 && pos >= PCI_EXT_CAP_BASE) {
551
        header = virPCIDeviceRead32(dev, cfgfd, pos);
J
Jiri Denemark 已提交
552 553 554 555 556 557 558 559 560 561 562

        if ((header & PCI_EXT_CAP_ID_MASK) == capability)
            return pos;

        pos = (header >> PCI_EXT_CAP_OFFSET_SHIFT) & PCI_EXT_CAP_OFFSET_MASK;
        ttl--;
    }

    return 0;
}

563 564 565 566
/* detects whether this device has FLR.  Returns 0 if the device does
 * not have FLR, 1 if it does, and -1 on error
 */
static int
567
virPCIDeviceDetectFunctionLevelReset(virPCIDevicePtr dev, int cfgfd)
568
{
M
Mark McLoughlin 已提交
569
    uint32_t caps;
570
    uint8_t pos;
571
    g_autofree char *path = NULL;
572
    int found;
573 574 575 576 577 578 579 580

    /* The PCIe Function Level Reset capability allows
     * individual device functions to be reset without
     * affecting any other functions on the device or
     * any other devices on the bus. This is only common
     * on SR-IOV NICs at the moment.
     */
    if (dev->pcie_cap_pos) {
581
        caps = virPCIDeviceRead32(dev, cfgfd, dev->pcie_cap_pos + PCI_EXP_DEVCAP);
582 583 584 585 586 587 588 589 590 591
        if (caps & PCI_EXP_DEVCAP_FLR) {
            VIR_DEBUG("%s %s: detected PCIe FLR capability", dev->id, dev->name);
            return 1;
        }
    }

    /* The PCI AF Function Level Reset capability is
     * the same thing, except for conventional PCI
     * devices. This is not common yet.
     */
592
    pos = virPCIDeviceFindCapabilityOffset(dev, cfgfd, PCI_CAP_ID_AF);
593
    if (pos) {
594
        caps = virPCIDeviceRead16(dev, cfgfd, pos + PCI_AF_CAP);
595 596 597 598 599 600
        if (caps & PCI_AF_CAP_FLR) {
            VIR_DEBUG("%s %s: detected PCI FLR capability", dev->id, dev->name);
            return 1;
        }
    }

601 602 603 604 605 606
    /* there are some buggy devices that do support FLR, but forget to
     * advertise that fact in their capabilities.  However, FLR is *required*
     * to be present for virtual functions (VFs), so if we see that this
     * device is a VF, we just assume FLR works
     */

607
    if (virAsprintf(&path, PCI_SYSFS "devices/%s/physfn", dev->name) < 0)
608 609 610 611 612 613 614 615 616
        return -1;

    found = virFileExists(path);
    if (found) {
        VIR_DEBUG("%s %s: buggy device didn't advertise FLR, but is a VF; forcing flr on",
                  dev->id, dev->name);
        return 1;
    }

617 618 619 620 621 622 623 624 625
    VIR_DEBUG("%s %s: no FLR capability found", dev->id, dev->name);

    return 0;
}

/* Require the device has the PCI Power Management capability
 * and that a D3hot->D0 transition will results in a full
 * internal reset, not just a soft reset.
 */
626
static unsigned int
627
virPCIDeviceDetectPowerManagementReset(virPCIDevicePtr dev, int cfgfd)
628 629 630 631 632
{
    if (dev->pci_pm_cap_pos) {
        uint32_t ctl;

        /* require the NO_SOFT_RESET bit is clear */
633
        ctl = virPCIDeviceRead32(dev, cfgfd, dev->pci_pm_cap_pos + PCI_PM_CTRL);
634 635 636 637 638 639 640 641 642 643 644
        if (!(ctl & PCI_PM_CTRL_NO_SOFT_RESET)) {
            VIR_DEBUG("%s %s: detected PM reset capability", dev->id, dev->name);
            return 1;
        }
    }

    VIR_DEBUG("%s %s: no PM reset capability found", dev->id, dev->name);

    return 0;
}

645
/* Any active devices on the same domain/bus ? */
646
static int
647
virPCIDeviceSharesBusWithActive(virPCIDevicePtr dev, virPCIDevicePtr check, void *data)
648
{
649
    virPCIDeviceList *inactiveDevs = data;
650

651
    /* Different domain, different bus, or simply identical device */
652 653 654 655
    if (dev->address.domain != check->address.domain ||
        dev->address.bus != check->address.bus ||
        (dev->address.slot == check->address.slot &&
         dev->address.function == check->address.function))
656 657
        return 0;

658
    /* same bus, but inactive, i.e. about to be assigned to guest */
659
    if (inactiveDevs && virPCIDeviceListFind(inactiveDevs, check))
660
        return 0;
661

662
    return 1;
663 664
}

665 666 667
static virPCIDevicePtr
virPCIDeviceBusContainsActiveDevices(virPCIDevicePtr dev,
                                     virPCIDeviceList *inactiveDevs)
668
{
669 670 671
    virPCIDevicePtr active = NULL;
    if (virPCIDeviceIterDevices(virPCIDeviceSharesBusWithActive,
                                dev, &active, inactiveDevs) < 0)
672 673 674 675 676
        return NULL;
    return active;
}

/* Is @check the parent of @dev ? */
677
static int
678
virPCIDeviceIsParent(virPCIDevicePtr dev, virPCIDevicePtr check, void *data)
679 680 681
{
    uint16_t device_class;
    uint8_t header_type, secondary, subordinate;
682
    virPCIDevicePtr *best = data;
683 684
    int ret = 0;
    int fd;
685

686
    if (dev->address.domain != check->address.domain)
687 688
        return 0;

689
    if ((fd = virPCIDeviceConfigOpenTry(check)) < 0)
690 691
        return 0;

692
    /* Is it a bridge? */
693 694
    ret = virPCIDeviceReadClass(check, &device_class);
    if (ret < 0 || device_class != PCI_CLASS_BRIDGE_PCI)
695
        goto cleanup;
696 697

    /* Is it a plane? */
698
    header_type = virPCIDeviceRead8(check, fd, PCI_HEADER_TYPE);
699
    if ((header_type & PCI_HEADER_TYPE_MASK) != PCI_HEADER_TYPE_BRIDGE)
700
        goto cleanup;
701

702 703
    secondary   = virPCIDeviceRead8(check, fd, PCI_SECONDARY_BUS);
    subordinate = virPCIDeviceRead8(check, fd, PCI_SUBORDINATE_BUS);
704

705
    VIR_DEBUG("%s %s: found parent device %s", dev->id, dev->name, check->name);
706

707 708 709
    /* if the secondary bus exactly equals the device's bus, then we found
     * the direct parent.  No further work is necessary
     */
710
    if (dev->address.bus == secondary) {
711 712 713
        ret = 1;
        goto cleanup;
    }
714

715
    /* otherwise, SRIOV allows VFs to be on different buses than their PFs.
716 717 718
     * In this case, what we need to do is look for the "best" match; i.e.
     * the most restrictive match that still satisfies all of the conditions.
     */
719
    if (dev->address.bus > secondary && dev->address.bus <= subordinate) {
720
        if (*best == NULL) {
721 722 723 724
            *best = virPCIDeviceNew(check->address.domain,
                                    check->address.bus,
                                    check->address.slot,
                                    check->address.function);
725 726 727 728 729
            if (*best == NULL) {
                ret = -1;
                goto cleanup;
            }
        } else {
730 731 732 733
            /* OK, we had already recorded a previous "best" match for the
             * parent.  See if the current device is more restrictive than the
             * best, and if so, make it the new best
             */
734 735 736
            int bestfd;
            uint8_t best_secondary;

737
            if ((bestfd = virPCIDeviceConfigOpenTry(*best)) < 0)
738
                goto cleanup;
739 740
            best_secondary = virPCIDeviceRead8(*best, bestfd, PCI_SECONDARY_BUS);
            virPCIDeviceConfigClose(*best, bestfd);
741 742

            if (secondary > best_secondary) {
743
                virPCIDeviceFree(*best);
744 745 746 747
                *best = virPCIDeviceNew(check->address.domain,
                                        check->address.bus,
                                        check->address.slot,
                                        check->address.function);
748 749 750 751
                if (*best == NULL) {
                    ret = -1;
                    goto cleanup;
                }
752 753 754 755
            }
        }
    }

756
 cleanup:
757
    virPCIDeviceConfigClose(check, fd);
758
    return ret;
759 760
}

761
static int
762
virPCIDeviceGetParent(virPCIDevicePtr dev, virPCIDevicePtr *parent)
763
{
764
    virPCIDevicePtr best = NULL;
765 766 767
    int ret;

    *parent = NULL;
768
    ret = virPCIDeviceIterDevices(virPCIDeviceIsParent, dev, parent, &best);
769
    if (ret == 1)
770
        virPCIDeviceFree(best);
771 772 773
    else if (ret == 0)
        *parent = best;
    return ret;
774 775 776 777 778 779
}

/* Secondary Bus Reset is our sledgehammer - it resets all
 * devices behind a bus.
 */
static int
780 781 782
virPCIDeviceTrySecondaryBusReset(virPCIDevicePtr dev,
                                 int cfgfd,
                                 virPCIDeviceList *inactiveDevs)
783
{
J
Ján Tomko 已提交
784 785
    g_autoptr(virPCIDevice) parent = NULL;
    g_autoptr(virPCIDevice) conflict = NULL;
786 787 788
    uint8_t config_space[PCI_CONF_LEN];
    uint16_t ctl;
    int ret = -1;
789
    int parentfd;
790

791 792 793
    /* Refuse to do a secondary bus reset if there are other
     * devices/functions behind the bus are used by the host
     * or other guests.
794
     */
795
    if ((conflict = virPCIDeviceBusContainsActiveDevices(dev, inactiveDevs))) {
796
        virReportError(VIR_ERR_INTERNAL_ERROR,
797 798
                       _("Active %s devices on bus with %s, not doing bus reset"),
                       conflict->name, dev->name);
799 800 801 802
        return -1;
    }

    /* Find the parent bus */
803
    if (virPCIDeviceGetParent(dev, &parent) < 0)
804
        return -1;
805
    if (!parent) {
806
        virReportError(VIR_ERR_INTERNAL_ERROR,
807 808
                       _("Failed to find parent device for %s"),
                       dev->name);
809 810
        return -1;
    }
811
    if ((parentfd = virPCIDeviceConfigOpenWrite(parent)) < 0)
812
        goto out;
813 814 815 816 817 818 819

    VIR_DEBUG("%s %s: doing a secondary bus reset", dev->id, dev->name);

    /* Save and restore the device's config space; we only do this
     * for the supplied device since we refuse to do a reset if there
     * are multiple devices/functions
     */
820
    if (virPCIDeviceRead(dev, cfgfd, 0, config_space, PCI_CONF_LEN) < 0) {
821
        virReportError(VIR_ERR_INTERNAL_ERROR,
822
                       _("Failed to read PCI config space for %s"),
823
                       dev->name);
824 825 826 827 828 829
        goto out;
    }

    /* Read the control register, set the reset flag, wait 200ms,
     * unset the reset flag and wait 200ms.
     */
H
hexin 已提交
830
    ctl = virPCIDeviceRead16(dev, parentfd, PCI_BRIDGE_CONTROL);
831

832 833
    virPCIDeviceWrite16(parent, parentfd, PCI_BRIDGE_CONTROL,
                        ctl | PCI_BRIDGE_CTL_RESET);
834

835
    g_usleep(200 * 1000); /* sleep 200ms */
836

837
    virPCIDeviceWrite16(parent, parentfd, PCI_BRIDGE_CONTROL, ctl);
838

839
    g_usleep(200 * 1000); /* sleep 200ms */
840

841
    if (virPCIDeviceWrite(dev, cfgfd, 0, config_space, PCI_CONF_LEN) < 0) {
842
        virReportError(VIR_ERR_INTERNAL_ERROR,
843 844 845 846
                       _("Failed to restore PCI config space for %s"),
                       dev->name);
        goto out;
    }
847
    ret = 0;
848

849
 out:
850
    virPCIDeviceConfigClose(parent, parentfd);
851 852 853 854 855 856 857 858
    return ret;
}

/* Power management reset attempts to reset a device using a
 * D-state transition from D3hot to D0. Note, in detect_pm_reset()
 * above we require the device supports a full internal reset.
 */
static int
859
virPCIDeviceTryPowerManagementReset(virPCIDevicePtr dev, int cfgfd)
860 861 862 863 864 865 866 867
{
    uint8_t config_space[PCI_CONF_LEN];
    uint32_t ctl;

    if (!dev->pci_pm_cap_pos)
        return -1;

    /* Save and restore the device's config space. */
868
    if (virPCIDeviceRead(dev, cfgfd, 0, &config_space[0], PCI_CONF_LEN) < 0) {
869
        virReportError(VIR_ERR_INTERNAL_ERROR,
870
                       _("Failed to read PCI config space for %s"),
871
                       dev->name);
872 873 874 875 876
        return -1;
    }

    VIR_DEBUG("%s %s: doing a power management reset", dev->id, dev->name);

877
    ctl = virPCIDeviceRead32(dev, cfgfd, dev->pci_pm_cap_pos + PCI_PM_CTRL);
878 879
    ctl &= ~PCI_PM_CTRL_STATE_MASK;

880 881
    virPCIDeviceWrite32(dev, cfgfd, dev->pci_pm_cap_pos + PCI_PM_CTRL,
                        ctl | PCI_PM_CTRL_STATE_D3hot);
882

883
    g_usleep(10 * 1000); /* sleep 10ms */
884

885 886
    virPCIDeviceWrite32(dev, cfgfd, dev->pci_pm_cap_pos + PCI_PM_CTRL,
                        ctl | PCI_PM_CTRL_STATE_D0);
887

888
    g_usleep(10 * 1000); /* sleep 10ms */
889

890
    if (virPCIDeviceWrite(dev, cfgfd, 0, &config_space[0], PCI_CONF_LEN) < 0) {
891
        virReportError(VIR_ERR_INTERNAL_ERROR,
892 893 894 895
                       _("Failed to restore PCI config space for %s"),
                       dev->name);
        return -1;
    }
896 897 898 899 900

    return 0;
}

static int
901
virPCIDeviceInit(virPCIDevicePtr dev, int cfgfd)
902
{
903 904
    int flr;

905 906 907
    dev->pcie_cap_pos   = virPCIDeviceFindCapabilityOffset(dev, cfgfd, PCI_CAP_ID_EXP);
    dev->pci_pm_cap_pos = virPCIDeviceFindCapabilityOffset(dev, cfgfd, PCI_CAP_ID_PM);
    flr = virPCIDeviceDetectFunctionLevelReset(dev, cfgfd);
908
    if (flr < 0)
909
        return flr;
910 911
    dev->has_flr        = !!flr;
    dev->has_pm_reset   = !!virPCIDeviceDetectPowerManagementReset(dev, cfgfd);
912

913 914 915 916
    return 0;
}

int
917 918 919
virPCIDeviceReset(virPCIDevicePtr dev,
                  virPCIDeviceList *activeDevs,
                  virPCIDeviceList *inactiveDevs)
920
{
921 922
    g_autofree char *drvPath = NULL;
    g_autofree char *drvName = NULL;
923
    int ret = -1;
924
    int fd = -1;
925 926 927 928 929 930 931 932 933 934 935 936
    int hdrType = -1;

    if (virPCIGetHeaderType(dev, &hdrType) < 0)
        return -1;

    if (hdrType != VIR_PCI_HEADER_ENDPOINT) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Invalid attempt to reset PCI device %s. "
                         "Only PCI endpoint devices can be reset"),
                       dev->name);
        return -1;
    }
937

938
    if (activeDevs && virPCIDeviceListFind(activeDevs, dev)) {
939
        virReportError(VIR_ERR_INTERNAL_ERROR,
940 941 942 943
                       _("Not resetting active device %s"), dev->name);
        return -1;
    }

944 945 946 947 948 949 950 951
    /* If the device is currently bound to vfio-pci, ignore all
     * requests to reset it, since the vfio-pci driver will always
     * reset it whenever appropriate, so doing it ourselves would just
     * be redundant.
     */
    if (virPCIDeviceGetDriverPathAndName(dev, &drvPath, &drvName) < 0)
        goto cleanup;

952
    if (virPCIStubDriverTypeFromString(drvName) == VIR_PCI_STUB_DRIVER_VFIO) {
953 954 955 956 957 958 959
        VIR_DEBUG("Device %s is bound to vfio-pci - skip reset",
                  dev->name);
        ret = 0;
        goto cleanup;
    }
    VIR_DEBUG("Resetting device %s", dev->name);

960
    if ((fd = virPCIDeviceConfigOpenWrite(dev)) < 0)
961
        goto cleanup;
962

963
    if (virPCIDeviceInit(dev, fd) < 0)
964 965
        goto cleanup;

966 967 968
    /* KVM will perform FLR when starting and stopping
     * a guest, so there is no need for us to do it here.
     */
969 970 971 972
    if (dev->has_flr) {
        ret = 0;
        goto cleanup;
    }
973

974 975 976 977 978
    /* If the device supports PCI power management reset,
     * that's the next best thing because it only resets
     * the function, not the whole device.
     */
    if (dev->has_pm_reset)
979
        ret = virPCIDeviceTryPowerManagementReset(dev, fd);
980

981
    /* Bus reset is not an option with the root bus */
982
    if (ret < 0 && dev->address.bus != 0)
983
        ret = virPCIDeviceTrySecondaryBusReset(dev, fd, inactiveDevs);
984

985 986
    if (ret < 0) {
        virErrorPtr err = virGetLastError();
987
        virReportError(VIR_ERR_INTERNAL_ERROR,
988 989
                       _("Unable to reset PCI device %s: %s"),
                       dev->name,
990 991
                       err ? err->message :
                       _("no FLR, PM reset or bus reset available"));
992 993
    }

994
 cleanup:
995
    virPCIDeviceConfigClose(dev, fd);
996 997 998
    return ret;
}

999

1000
static int
1001
virPCIProbeStubDriver(virPCIStubDriver driver)
1002
{
1003
    const char *drvname = NULL;
1004
    g_autofree char *drvpath = NULL;
1005
    bool probed = false;
1006

1007 1008 1009 1010 1011 1012 1013 1014
    if (driver == VIR_PCI_STUB_DRIVER_NONE ||
        !(drvname = virPCIStubDriverTypeToString(driver))) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       "%s",
                       _("Attempting to use unknown stub driver"));
        return -1;
    }

1015
 recheck:
1016
    if ((drvpath = virPCIDriverDir(drvname)) && virFileExists(drvpath))
1017 1018
        /* driver already loaded, return */
        return 0;
1019 1020

    if (!probed) {
1021
        g_autofree char *errbuf = NULL;
1022
        probed = true;
1023 1024
        if ((errbuf = virKModLoad(drvname, true))) {
            VIR_WARN("failed to load driver %s: %s", drvname, errbuf);
1025
            goto cleanup;
1026
        }
1027 1028

        goto recheck;
1029 1030
    }

1031
 cleanup:
1032 1033 1034
    /* If we know failure was because of blacklist, let's report that;
     * otherwise, report a more generic failure message
     */
1035
    if (virKModIsBlacklisted(drvname)) {
1036 1037 1038
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Failed to load PCI stub module %s: "
                         "administratively prohibited"),
1039
                       drvname);
1040 1041 1042
    } else {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Failed to load PCI stub module %s"),
1043
                       drvname);
1044 1045
    }

1046
    return -1;
1047 1048
}

1049
int
1050
virPCIDeviceUnbind(virPCIDevicePtr dev)
1051
{
1052 1053 1054
    g_autofree char *path = NULL;
    g_autofree char *drvpath = NULL;
    g_autofree char *driver = NULL;
1055 1056

    if (virPCIDeviceGetDriverPathAndName(dev, &drvpath, &driver) < 0)
1057
        return -1;
1058

1059
    if (!driver)
1060
        /* The device is not bound to any driver */
1061
        return 0;
1062

1063
    if (!(path = virPCIFile(dev->name, "driver/unbind")))
1064
        return -1;
1065 1066 1067 1068 1069 1070

    if (virFileExists(path)) {
        if (virFileWriteStr(path, dev->name, 0) < 0) {
            virReportSystemError(errno,
                                 _("Failed to unbind PCI device '%s' from %s"),
                                 dev->name, driver);
1071
            return -1;
1072 1073 1074
        }
    }

1075
    return 0;
1076 1077
}

1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102

/**
 * virPCIDeviceRebind:
 *  @dev: virPCIDevice object describing the device to rebind
 *
 * unbind a device from its driver, then immediately rebind it.
 *
 * Returns 0 on success, -1 on failure
 */
int virPCIDeviceRebind(virPCIDevicePtr dev)
{
    if (virPCIDeviceUnbind(dev) < 0)
        return -1;

    if (virFileWriteStr(PCI_SYSFS "drivers_probe", dev->name, 0) < 0) {
        virReportSystemError(errno,
                             _("Failed to trigger a probe for PCI device '%s'"),
                             dev->name);
        return -1;
    }

    return 0;
}


1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113
/*
 * Bind a PCI device to a driver using driver_override sysfs interface.
 * E.g.
 *
 *  echo driver-name > /sys/bus/pci/devices/0000:03:00.0/driver_override
 *  echo 0000:03:00.0 > /sys/bus/pci/devices/0000:03:00.0/driver/unbind
 *  echo 0000:03:00.0 > /sys/bus/pci/drivers_probe
 *
 * An empty driverName will cause the device to be bound to its
 * preferred driver.
 */
1114
static int
1115 1116 1117
virPCIDeviceBindWithDriverOverride(virPCIDevicePtr dev,
                                   const char *driverName)
{
1118
    g_autofree char *path = NULL;
1119 1120 1121 1122 1123 1124 1125 1126 1127

    if (!(path = virPCIFile(dev->name, "driver_override")))
        return -1;

    if (virFileWriteStr(path, driverName, 0) < 0) {
        virReportSystemError(errno,
                             _("Failed to add driver '%s' to driver_override "
                               " interface of PCI device '%s'"),
                             driverName, dev->name);
1128
        return -1;
1129 1130
    }

1131
    if (virPCIDeviceRebind(dev) < 0)
1132
        return -1;
1133

1134
    return 0;
1135 1136 1137
}

static int
1138
virPCIDeviceUnbindFromStub(virPCIDevicePtr dev)
1139 1140 1141 1142 1143 1144 1145 1146
{
    if (!dev->unbind_from_stub) {
        VIR_DEBUG("Unbind from stub skipped for PCI device %s", dev->name);
        return 0;
    }

    return virPCIDeviceBindWithDriverOverride(dev, "\n");
}
1147 1148

static int
1149
virPCIDeviceBindToStub(virPCIDevicePtr dev)
1150 1151
{
    const char *stubDriverName;
1152 1153
    g_autofree char *stubDriverPath = NULL;
    g_autofree char *driverLink = NULL;
1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169

    /* Check the device is configured to use one of the known stub drivers */
    if (dev->stubDriver == VIR_PCI_STUB_DRIVER_NONE) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("No stub driver configured for PCI device %s"),
                       dev->name);
        return -1;
    } else if (!(stubDriverName = virPCIStubDriverTypeToString(dev->stubDriver))) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Unknown stub driver configured for PCI device %s"),
                       dev->name);
        return -1;
    }

    if (!(stubDriverPath = virPCIDriverDir(stubDriverName))  ||
        !(driverLink = virPCIFile(dev->name, "driver")))
1170
        return -1;
1171 1172 1173 1174 1175 1176

    if (virFileExists(driverLink)) {
        if (virFileLinkPointsTo(driverLink, stubDriverPath)) {
            /* The device is already bound to the correct driver */
            VIR_DEBUG("Device %s is already bound to %s",
                      dev->name, stubDriverName);
1177
            return 0;
1178 1179 1180 1181
        }
    }

    if (virPCIDeviceBindWithDriverOverride(dev, stubDriverName) < 0)
1182
        return -1;
1183 1184

    dev->unbind_from_stub = true;
1185
    return 0;
1186 1187
}

1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205
/* virPCIDeviceDetach:
 *
 * Detach this device from the host driver, attach it to the stub
 * driver (previously set with virPCIDeviceSetStubDriver(), and add *a
 * copy* of the object to the inactiveDevs list (if provided). This
 * function will *never* consume dev, so the caller should free it.
 *
 * Returns 0 on success, -1 on failure (will fail if the device is
 * already in the activeDevs list, but will be a NOP if the device is
 * already bound to the stub).
 *
 * GENERAL NOTE: activeDevs should be a list of all PCI devices
 * currently in use by a domain. inactiveDevs is a list of all PCI
 * devices that libvirt has detached from the host driver + attached
 * to the stub driver, but hasn't yet assigned to a domain. Any device
 * that is still attached to its host driver should not be on either
 * list.
 */
1206
int
1207 1208
virPCIDeviceDetach(virPCIDevicePtr dev,
                   virPCIDeviceList *activeDevs,
1209
                   virPCIDeviceList *inactiveDevs)
1210
{
1211
    if (virPCIProbeStubDriver(dev->stubDriver) < 0)
1212 1213
        return -1;

1214
    if (activeDevs && virPCIDeviceListFind(activeDevs, dev)) {
1215
        virReportError(VIR_ERR_INTERNAL_ERROR,
1216 1217 1218 1219
                       _("Not detaching active device %s"), dev->name);
        return -1;
    }

1220
    if (virPCIDeviceBindToStub(dev) < 0)
1221 1222
        return -1;

1223 1224 1225
    /* Add *a copy of* the dev into list inactiveDevs, if
     * it's not already there.
     */
1226 1227 1228 1229
    if (inactiveDevs && !virPCIDeviceListFind(inactiveDevs, dev)) {
        VIR_DEBUG("Adding PCI device %s to inactive list", dev->name);
        if (virPCIDeviceListAddCopy(inactiveDevs, dev) < 0)
            return -1;
1230 1231 1232
    }

    return 0;
1233 1234
}

1235 1236 1237 1238
/*
 * Pre-condition: inactivePCIHostdevs & activePCIHostdevs
 * are locked
 */
1239
int
1240 1241
virPCIDeviceReattach(virPCIDevicePtr dev,
                     virPCIDeviceListPtr activeDevs,
1242
                     virPCIDeviceListPtr inactiveDevs)
1243
{
1244
    if (activeDevs && virPCIDeviceListFind(activeDevs, dev)) {
1245
        virReportError(VIR_ERR_INTERNAL_ERROR,
1246 1247 1248 1249
                       _("Not reattaching active device %s"), dev->name);
        return -1;
    }

1250
    if (virPCIDeviceUnbindFromStub(dev) < 0)
1251 1252 1253
        return -1;

    /* Steal the dev from list inactiveDevs */
1254 1255
    if (inactiveDevs) {
        VIR_DEBUG("Removing PCI device %s from inactive list", dev->name);
1256
        virPCIDeviceListDel(inactiveDevs, dev);
1257
    }
1258 1259

    return 0;
1260 1261 1262
}

static char *
1263
virPCIDeviceReadID(virPCIDevicePtr dev, const char *id_name)
1264
{
1265
    g_autofree char *path = NULL;
1266 1267
    char *id_str;

1268
    if (!(path = virPCIFile(dev->name, id_name)))
1269
        return NULL;
1270 1271

    /* ID string is '0xNNNN\n' ... i.e. 7 bytes */
1272
    if (virFileReadAll(path, 7, &id_str) < 0)
1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286
        return NULL;

    /* Check for 0x suffix */
    if (id_str[0] != '0' || id_str[1] != 'x') {
        VIR_FREE(id_str);
        return NULL;
    }

    /* Chop off the newline; we know the string is 7 bytes */
    id_str[6] = '\0';

    return id_str;
}

1287 1288 1289 1290
bool
virPCIDeviceAddressIsValid(virPCIDeviceAddressPtr addr,
                           bool report)
{
1291
    if (addr->domain > 0xFFFFFFFF) {
1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339
        if (report)
            virReportError(VIR_ERR_XML_ERROR,
                           _("Invalid PCI address domain='0x%x', "
                             "must be <= 0xFFFF"),
                           addr->domain);
        return false;
    }
    if (addr->bus > 0xFF) {
        if (report)
            virReportError(VIR_ERR_XML_ERROR,
                           _("Invalid PCI address bus='0x%x', "
                             "must be <= 0xFF"),
                           addr->bus);
        return false;
    }
    if (addr->slot > 0x1F) {
        if (report)
            virReportError(VIR_ERR_XML_ERROR,
                           _("Invalid PCI address slot='0x%x', "
                             "must be <= 0x1F"),
                           addr->slot);
        return false;
    }
    if (addr->function > 7) {
        if (report)
            virReportError(VIR_ERR_XML_ERROR,
                           _("Invalid PCI address function=0x%x, "
                             "must be <= 7"),
                           addr->function);
        return false;
    }
    if (virPCIDeviceAddressIsEmpty(addr)) {
        if (report)
            virReportError(VIR_ERR_XML_ERROR, "%s",
                           _("Invalid PCI address 0000:00:00, at least "
                             "one of domain, bus, or slot must be > 0"));
        return false;
    }
    return true;
}

bool
virPCIDeviceAddressIsEmpty(const virPCIDeviceAddress *addr)
{
    return !(addr->domain || addr->bus || addr->slot);
}

bool
1340 1341
virPCIDeviceAddressEqual(const virPCIDeviceAddress *addr1,
                         const virPCIDeviceAddress *addr2)
1342 1343 1344 1345 1346 1347 1348 1349 1350 1351
{
    if (addr1->domain == addr2->domain &&
        addr1->bus == addr2->bus &&
        addr1->slot == addr2->slot &&
        addr1->function == addr2->function) {
        return true;
    }
    return false;
}

1352
char *
1353
virPCIDeviceAddressAsString(const virPCIDeviceAddress *addr)
1354 1355 1356
{
    char *str;

1357 1358
    ignore_value(virAsprintf(&str,
                             VIR_PCI_DEVICE_ADDRESS_FMT,
1359 1360 1361 1362 1363 1364 1365
                             addr->domain,
                             addr->bus,
                             addr->slot,
                             addr->function));
    return str;
}

1366
virPCIDevicePtr
1367 1368 1369 1370
virPCIDeviceNew(unsigned int domain,
                unsigned int bus,
                unsigned int slot,
                unsigned int function)
1371
{
J
Ján Tomko 已提交
1372
    g_autoptr(virPCIDevice) dev = NULL;
1373 1374
    g_autofree char *vendor = NULL;
    g_autofree char *product = NULL;
1375

1376
    if (VIR_ALLOC(dev) < 0)
1377 1378
        return NULL;

1379 1380 1381 1382
    dev->address.domain = domain;
    dev->address.bus = bus;
    dev->address.slot = slot;
    dev->address.function = function;
1383

1384
    if (virAsprintf(&dev->name,
1385
                    VIR_PCI_DEVICE_ADDRESS_FMT,
1386
                    domain, bus, slot, function) < 0)
1387
        return NULL;
1388

E
Eric Blake 已提交
1389
    if (virAsprintf(&dev->path, PCI_SYSFS "devices/%s/config",
1390
                    dev->name) < 0)
1391
        return NULL;
1392

1393
    if (!virFileExists(dev->path)) {
1394 1395 1396
        virReportSystemError(errno,
                             _("Device %s not found: could not access %s"),
                             dev->name, dev->path);
1397
        return NULL;
1398 1399
    }

1400 1401
    vendor  = virPCIDeviceReadID(dev, "vendor");
    product = virPCIDeviceReadID(dev, "device");
1402 1403

    if (!vendor || !product) {
1404
        virReportError(VIR_ERR_INTERNAL_ERROR,
1405 1406
                       _("Failed to read product/vendor ID for %s"),
                       dev->name);
1407
        return NULL;
1408 1409 1410
    }

    /* strings contain '0x' prefix */
E
Eric Blake 已提交
1411 1412
    if (snprintf(dev->id, sizeof(dev->id), "%s %s", &vendor[2],
                 &product[2]) >= sizeof(dev->id)) {
1413
        virReportError(VIR_ERR_INTERNAL_ERROR,
E
Eric Blake 已提交
1414 1415
                       _("dev->id buffer overflow: %s %s"),
                       &vendor[2], &product[2]);
1416
        return NULL;
E
Eric Blake 已提交
1417
    }
1418 1419 1420

    VIR_DEBUG("%s %s: initialized", dev->id, dev->name);

J
Ján Tomko 已提交
1421
    return g_steal_pointer(&dev);
1422 1423
}

L
Laine Stump 已提交
1424 1425 1426 1427 1428 1429

virPCIDevicePtr
virPCIDeviceCopy(virPCIDevicePtr dev)
{
    virPCIDevicePtr copy;

1430
    if (VIR_ALLOC(copy) < 0)
L
Laine Stump 已提交
1431 1432 1433 1434
        return NULL;

    /* shallow copy to take care of most attributes */
    *copy = *dev;
1435
    copy->path = NULL;
C
Chunyan Liu 已提交
1436
    copy->used_by_drvname = copy->used_by_domname = NULL;
1437 1438 1439 1440
    copy->name = g_strdup(dev->name);
    copy->path = g_strdup(dev->path);
    copy->used_by_drvname = g_strdup(dev->used_by_drvname);
    copy->used_by_domname = g_strdup(dev->used_by_domname);
L
Laine Stump 已提交
1441 1442 1443 1444
    return copy;
}


1445
void
1446
virPCIDeviceFree(virPCIDevicePtr dev)
1447
{
1448 1449
    if (!dev)
        return;
1450
    VIR_DEBUG("%s %s: freeing", dev->id, dev->name);
1451
    VIR_FREE(dev->name);
E
Eric Blake 已提交
1452
    VIR_FREE(dev->path);
C
Chunyan Liu 已提交
1453 1454
    VIR_FREE(dev->used_by_drvname);
    VIR_FREE(dev->used_by_domname);
1455 1456
    VIR_FREE(dev);
}
1457

1458 1459 1460 1461 1462
/**
 * virPCIDeviceGetAddress:
 * @dev: device to get address from
 *
 * Take a PCI device on input and return its PCI address. The
1463
 * returned object is owned by the device and must not be freed.
1464
 *
1465
 * Returns: a pointer to the address, which can never be NULL.
1466 1467 1468 1469
 */
virPCIDeviceAddressPtr
virPCIDeviceGetAddress(virPCIDevicePtr dev)
{
1470
    return &(dev->address);
1471 1472
}

1473
const char *
1474
virPCIDeviceGetName(virPCIDevicePtr dev)
1475 1476 1477 1478
{
    return dev->name;
}

1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490
/**
 * virPCIDeviceGetConfigPath:
 *
 * Returns a pointer to a string containing the path of @dev's PCI
 * config file.
 */
const char *
virPCIDeviceGetConfigPath(virPCIDevicePtr dev)
{
    return dev->path;
}

1491
void virPCIDeviceSetManaged(virPCIDevicePtr dev, bool managed)
1492
{
1493
    dev->managed = managed;
1494 1495
}

1496
bool
1497
virPCIDeviceGetManaged(virPCIDevicePtr dev)
1498 1499 1500 1501
{
    return dev->managed;
}

1502 1503
void
virPCIDeviceSetStubDriver(virPCIDevicePtr dev, virPCIStubDriver driver)
1504
{
1505
    dev->stubDriver = driver;
1506 1507
}

1508
virPCIStubDriver
1509 1510 1511 1512 1513
virPCIDeviceGetStubDriver(virPCIDevicePtr dev)
{
    return dev->stubDriver;
}

1514
bool
1515
virPCIDeviceGetUnbindFromStub(virPCIDevicePtr dev)
1516 1517 1518 1519 1520
{
    return dev->unbind_from_stub;
}

void
1521
virPCIDeviceSetUnbindFromStub(virPCIDevicePtr dev, bool unbind)
1522
{
1523
    dev->unbind_from_stub = unbind;
1524 1525
}

1526
bool
1527
virPCIDeviceGetRemoveSlot(virPCIDevicePtr dev)
1528 1529 1530 1531 1532
{
    return dev->remove_slot;
}

void
1533
virPCIDeviceSetRemoveSlot(virPCIDevicePtr dev, bool remove_slot)
1534
{
1535
    dev->remove_slot = remove_slot;
1536 1537
}

1538
bool
1539
virPCIDeviceGetReprobe(virPCIDevicePtr dev)
1540 1541 1542 1543 1544
{
    return dev->reprobe;
}

void
1545
virPCIDeviceSetReprobe(virPCIDevicePtr dev, bool reprobe)
1546
{
1547
    dev->reprobe = reprobe;
1548 1549
}

C
Chunyan Liu 已提交
1550 1551 1552 1553
int
virPCIDeviceSetUsedBy(virPCIDevicePtr dev,
                      const char *drv_name,
                      const char *dom_name)
1554
{
C
Chunyan Liu 已提交
1555 1556
    VIR_FREE(dev->used_by_drvname);
    VIR_FREE(dev->used_by_domname);
1557 1558
    dev->used_by_drvname = g_strdup(drv_name);
    dev->used_by_domname = g_strdup(dom_name);
C
Chunyan Liu 已提交
1559 1560

    return 0;
1561 1562
}

C
Chunyan Liu 已提交
1563 1564 1565 1566
void
virPCIDeviceGetUsedBy(virPCIDevicePtr dev,
                      const char **drv_name,
                      const char **dom_name)
1567
{
C
Chunyan Liu 已提交
1568 1569
    *drv_name = dev->used_by_drvname;
    *dom_name = dev->used_by_domname;
1570 1571
}

1572 1573
virPCIDeviceListPtr
virPCIDeviceListNew(void)
1574
{
1575
    virPCIDeviceListPtr list;
1576

1577 1578 1579 1580
    if (virPCIInitialize() < 0)
        return NULL;

    if (!(list = virObjectLockableNew(virPCIDeviceListClass)))
1581 1582 1583 1584 1585
        return NULL;

    return list;
}

1586 1587
static void
virPCIDeviceListDispose(void *obj)
1588
{
1589
    virPCIDeviceListPtr list = obj;
1590
    size_t i;
1591 1592

    for (i = 0; i < list->count; i++) {
1593
        virPCIDeviceFree(list->devs[i]);
1594 1595 1596 1597 1598 1599 1600 1601
        list->devs[i] = NULL;
    }

    list->count = 0;
    VIR_FREE(list->devs);
}

int
1602 1603
virPCIDeviceListAdd(virPCIDeviceListPtr list,
                    virPCIDevicePtr dev)
1604
{
1605
    if (virPCIDeviceListFind(list, dev)) {
1606
        virReportError(VIR_ERR_INTERNAL_ERROR,
1607 1608 1609
                       _("Device %s is already in use"), dev->name);
        return -1;
    }
1610
    return VIR_APPEND_ELEMENT(list->devs, list->count, dev);
1611 1612
}

L
Laine Stump 已提交
1613 1614 1615 1616 1617

/* virPCIDeviceListAddCopy - add a *copy* of the device to this list */
int
virPCIDeviceListAddCopy(virPCIDeviceListPtr list, virPCIDevicePtr dev)
{
J
Ján Tomko 已提交
1618
    g_autoptr(virPCIDevice) copy = virPCIDeviceCopy(dev);
L
Laine Stump 已提交
1619 1620 1621

    if (!copy)
        return -1;
1622
    if (virPCIDeviceListAdd(list, copy) < 0)
L
Laine Stump 已提交
1623
        return -1;
1624 1625

    copy = NULL;
L
Laine Stump 已提交
1626 1627 1628 1629
    return 0;
}


1630 1631 1632
virPCIDevicePtr
virPCIDeviceListGet(virPCIDeviceListPtr list,
                    int idx)
1633 1634 1635 1636 1637 1638 1639 1640 1641
{
    if (idx >= list->count)
        return NULL;
    if (idx < 0)
        return NULL;

    return list->devs[idx];
}

1642
size_t
1643
virPCIDeviceListCount(virPCIDeviceListPtr list)
1644
{
1645 1646 1647
    return list->count;
}

1648 1649 1650
virPCIDevicePtr
virPCIDeviceListStealIndex(virPCIDeviceListPtr list,
                           int idx)
1651
{
1652
    virPCIDevicePtr ret;
1653

1654 1655
    if (idx < 0 || idx >= list->count)
        return NULL;
1656

1657
    ret = list->devs[idx];
1658
    VIR_DELETE_ELEMENT(list->devs, idx, list->count);
1659 1660 1661
    return ret;
}

1662 1663 1664
virPCIDevicePtr
virPCIDeviceListSteal(virPCIDeviceListPtr list,
                      virPCIDevicePtr dev)
1665
{
1666
    return virPCIDeviceListStealIndex(list, virPCIDeviceListFindIndex(list, dev));
1667 1668
}

1669
void
1670 1671
virPCIDeviceListDel(virPCIDeviceListPtr list,
                    virPCIDevicePtr dev)
1672
{
1673
    virPCIDeviceFree(virPCIDeviceListSteal(list, dev));
1674 1675
}

1676
int
1677
virPCIDeviceListFindIndex(virPCIDeviceListPtr list, virPCIDevicePtr dev)
1678
{
1679
    size_t i;
1680

1681 1682 1683 1684 1685 1686
    for (i = 0; i < list->count; i++) {
        virPCIDevicePtr other = list->devs[i];
        if (other->address.domain   == dev->address.domain &&
            other->address.bus      == dev->address.bus    &&
            other->address.slot     == dev->address.slot   &&
            other->address.function == dev->address.function)
1687
            return i;
1688
    }
1689 1690 1691
    return -1;
}

L
Laine Stump 已提交
1692 1693 1694 1695 1696 1697 1698 1699

virPCIDevicePtr
virPCIDeviceListFindByIDs(virPCIDeviceListPtr list,
                          unsigned int domain,
                          unsigned int bus,
                          unsigned int slot,
                          unsigned int function)
{
1700
    size_t i;
L
Laine Stump 已提交
1701 1702

    for (i = 0; i < list->count; i++) {
1703 1704 1705 1706 1707
        virPCIDevicePtr other = list->devs[i];
        if (other->address.domain   == domain &&
            other->address.bus      == bus    &&
            other->address.slot     == slot   &&
            other->address.function == function)
L
Laine Stump 已提交
1708 1709 1710 1711 1712 1713
            return list->devs[i];
    }
    return NULL;
}


1714 1715
virPCIDevicePtr
virPCIDeviceListFind(virPCIDeviceListPtr list, virPCIDevicePtr dev)
1716
{
1717
    int idx;
1718

1719 1720
    if ((idx = virPCIDeviceListFindIndex(list, dev)) >= 0)
        return list->devs[idx];
1721 1722
    else
        return NULL;
1723
}
1724 1725


1726 1727 1728
int virPCIDeviceFileIterate(virPCIDevicePtr dev,
                            virPCIDeviceFileActor actor,
                            void *opaque)
1729
{
1730
    g_autofree char *pcidir = NULL;
1731 1732 1733
    DIR *dir = NULL;
    int ret = -1;
    struct dirent *ent;
E
Eric Blake 已提交
1734
    int direrr;
1735

1736
    if (virAsprintf(&pcidir, "/sys/bus/pci/devices/" VIR_PCI_DEVICE_ADDRESS_FMT,
1737 1738
                    dev->address.domain, dev->address.bus,
                    dev->address.slot, dev->address.function) < 0)
1739 1740
        goto cleanup;

J
Ján Tomko 已提交
1741
    if (virDirOpen(&dir, pcidir) < 0)
1742 1743
        goto cleanup;

E
Eric Blake 已提交
1744
    while ((direrr = virDirRead(dir, &ent, pcidir)) > 0) {
1745
        g_autofree char *file = NULL;
1746
        /* Device assignment requires:
A
Alex Williamson 已提交
1747
         *   $PCIDIR/config, $PCIDIR/resource, $PCIDIR/resourceNNN,
1748
         *   $PCIDIR/rom, $PCIDIR/reset, $PCIDIR/vendor, $PCIDIR/device
1749 1750 1751
         */
        if (STREQ(ent->d_name, "config") ||
            STRPREFIX(ent->d_name, "resource") ||
A
Alex Williamson 已提交
1752
            STREQ(ent->d_name, "rom") ||
1753 1754
            STREQ(ent->d_name, "vendor") ||
            STREQ(ent->d_name, "device") ||
A
Alex Williamson 已提交
1755
            STREQ(ent->d_name, "reset")) {
1756
            if (virAsprintf(&file, "%s/%s", pcidir, ent->d_name) < 0)
1757
                goto cleanup;
1758
            if ((actor)(dev, file, opaque) < 0)
1759 1760 1761
                goto cleanup;
        }
    }
E
Eric Blake 已提交
1762 1763
    if (direrr < 0)
        goto cleanup;
1764 1765 1766

    ret = 0;

1767
 cleanup:
J
Ján Tomko 已提交
1768
    VIR_DIR_CLOSE(dir);
1769 1770
    return ret;
}
J
Jiri Denemark 已提交
1771

L
Laine Stump 已提交
1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782

/* virPCIDeviceAddressIOMMUGroupIterate:
 *   Call @actor for all devices in the same iommu_group as orig
 *   (including orig itself) Even if there is no iommu_group for the
 *   device, call @actor once for orig.
 */
int
virPCIDeviceAddressIOMMUGroupIterate(virPCIDeviceAddressPtr orig,
                                     virPCIDeviceAddressActor actor,
                                     void *opaque)
{
1783
    g_autofree char *groupPath = NULL;
L
Laine Stump 已提交
1784 1785 1786
    DIR *groupDir = NULL;
    int ret = -1;
    struct dirent *ent;
E
Eric Blake 已提交
1787
    int direrr;
L
Laine Stump 已提交
1788 1789

    if (virAsprintf(&groupPath,
1790
                    PCI_SYSFS "devices/" VIR_PCI_DEVICE_ADDRESS_FMT "/iommu_group/devices",
1791
                    orig->domain, orig->bus, orig->slot, orig->function) < 0)
L
Laine Stump 已提交
1792 1793
        goto cleanup;

J
Ján Tomko 已提交
1794
    if (virDirOpenQuiet(&groupDir, groupPath) < 0) {
L
Laine Stump 已提交
1795 1796 1797 1798 1799
        /* just process the original device, nothing more */
        ret = (actor)(orig, opaque);
        goto cleanup;
    }

E
Eric Blake 已提交
1800
    while ((direrr = virDirRead(groupDir, &ent, groupPath)) > 0) {
L
Laine Stump 已提交
1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812
        virPCIDeviceAddress newDev;

        if (virPCIDeviceAddressParse(ent->d_name, &newDev) < 0) {
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("Found invalid device link '%s' in '%s'"),
                           ent->d_name, groupPath);
            goto cleanup;
        }

        if ((actor)(&newDev, opaque) < 0)
            goto cleanup;
    }
E
Eric Blake 已提交
1813
    if (direrr < 0)
L
Laine Stump 已提交
1814 1815 1816 1817
        goto cleanup;

    ret = 0;

1818
 cleanup:
J
Ján Tomko 已提交
1819
    VIR_DIR_CLOSE(groupDir);
L
Laine Stump 已提交
1820 1821 1822 1823 1824 1825 1826 1827
    return ret;
}


static int
virPCIDeviceGetIOMMUGroupAddOne(virPCIDeviceAddressPtr newDevAddr, void *opaque)
{
    virPCIDeviceListPtr groupList = opaque;
J
Ján Tomko 已提交
1828
    g_autoptr(virPCIDevice) newDev = NULL;
L
Laine Stump 已提交
1829 1830 1831

    if (!(newDev = virPCIDeviceNew(newDevAddr->domain, newDevAddr->bus,
                                   newDevAddr->slot, newDevAddr->function)))
1832
        return -1;
L
Laine Stump 已提交
1833 1834

    if (virPCIDeviceListAdd(groupList, newDev) < 0)
1835
        return -1;
L
Laine Stump 已提交
1836 1837

    newDev = NULL; /* it's now on the list */
1838
    return 0;
L
Laine Stump 已提交
1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855
}


/*
 * virPCIDeviceGetIOMMUGroupList - return a virPCIDeviceList containing
 * all of the devices in the same iommu_group as @dev.
 *
 * Return the new list, or NULL on failure
 */
virPCIDeviceListPtr
virPCIDeviceGetIOMMUGroupList(virPCIDevicePtr dev)
{
    virPCIDeviceListPtr groupList = virPCIDeviceListNew();

    if (!groupList)
        goto error;

1856
    if (virPCIDeviceAddressIOMMUGroupIterate(&(dev->address),
L
Laine Stump 已提交
1857 1858 1859 1860 1861 1862
                                             virPCIDeviceGetIOMMUGroupAddOne,
                                             groupList) < 0)
        goto error;

    return groupList;

1863
 error:
L
Laine Stump 已提交
1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887
    virObjectUnref(groupList);
    return NULL;
}


typedef struct {
    virPCIDeviceAddressPtr **iommuGroupDevices;
    size_t *nIommuGroupDevices;
} virPCIDeviceAddressList;
typedef virPCIDeviceAddressList *virPCIDeviceAddressListPtr;

static int
virPCIGetIOMMUGroupAddressesAddOne(virPCIDeviceAddressPtr newDevAddr, void *opaque)
{
    int ret = -1;
    virPCIDeviceAddressListPtr addrList = opaque;
    virPCIDeviceAddressPtr copyAddr;

    /* make a copy to insert onto the list */
    if (VIR_ALLOC(copyAddr) < 0)
        goto cleanup;

    *copyAddr = *newDevAddr;

1888 1889
    if (VIR_APPEND_ELEMENT(*addrList->iommuGroupDevices,
                           *addrList->nIommuGroupDevices, copyAddr) < 0)
L
Laine Stump 已提交
1890 1891 1892
        goto cleanup;

    ret = 0;
1893
 cleanup:
L
Laine Stump 已提交
1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920
    VIR_FREE(copyAddr);
    return ret;
}


/*
 * virPCIDeviceAddressGetIOMMUGroupAddresses - return a
 * virPCIDeviceList containing all of the devices in the same
 * iommu_group as @dev.
 *
 * Return the new list, or NULL on failure
 */
int
virPCIDeviceAddressGetIOMMUGroupAddresses(virPCIDeviceAddressPtr devAddr,
                                          virPCIDeviceAddressPtr **iommuGroupDevices,
                                          size_t *nIommuGroupDevices)
{
    int ret = -1;
    virPCIDeviceAddressList addrList = { iommuGroupDevices,
                                         nIommuGroupDevices };

    if (virPCIDeviceAddressIOMMUGroupIterate(devAddr,
                                             virPCIGetIOMMUGroupAddressesAddOne,
                                             &addrList) < 0)
        goto cleanup;

    ret = 0;
1921
 cleanup:
L
Laine Stump 已提交
1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932
    return ret;
}


/* virPCIDeviceAddressGetIOMMUGroupNum - return the group number of
 * this PCI device's iommu_group, or -2 if there is no iommu_group for
 * the device (or -1 if there was any other error)
 */
int
virPCIDeviceAddressGetIOMMUGroupNum(virPCIDeviceAddressPtr addr)
{
1933 1934 1935
    g_autofree char *devName = NULL;
    g_autofree char *devPath = NULL;
    g_autofree char *groupPath = NULL;
L
Laine Stump 已提交
1936 1937 1938
    const char *groupNumStr;
    unsigned int groupNum;

1939 1940 1941
    if (virAsprintf(&devName,
                    VIR_PCI_DEVICE_ADDRESS_FMT,
                    addr->domain, addr->bus, addr->slot, addr->function) < 0)
1942
        return -1;
L
Laine Stump 已提交
1943

1944
    if (!(devPath = virPCIFile(devName, "iommu_group")))
1945 1946 1947
        return -1;
    if (virFileIsLink(devPath) != 1)
        return -2;
L
Laine Stump 已提交
1948 1949 1950 1951
    if (virFileResolveLink(devPath, &groupPath) < 0) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Unable to resolve device %s iommu_group symlink %s"),
                       devName, devPath);
1952
        return -1;
L
Laine Stump 已提交
1953 1954 1955 1956 1957 1958 1959 1960
    }

    groupNumStr = last_component(groupPath);
    if (virStrToLong_ui(groupNumStr, NULL, 10, &groupNum) < 0) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("device %s iommu_group symlink %s has "
                         "invalid group number %s"),
                       devName, groupPath, groupNumStr);
1961
        return -1;
L
Laine Stump 已提交
1962 1963
    }

1964
    return groupNum;
L
Laine Stump 已提交
1965 1966 1967
}


1968 1969
/* virPCIDeviceGetIOMMUGroupDev - return the name of the device used
 * to control this PCI device's group (e.g. "/dev/vfio/15")
1970 1971
 */
char *
1972
virPCIDeviceGetIOMMUGroupDev(virPCIDevicePtr dev)
1973
{
1974 1975
    g_autofree char *devPath = NULL;
    g_autofree char *groupPath = NULL;
1976 1977
    char *groupDev = NULL;

1978
    if (!(devPath = virPCIFile(dev->name, "iommu_group")))
1979
        return NULL;
1980 1981 1982 1983
    if (virFileIsLink(devPath) != 1) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Invalid device %s iommu_group file %s is not a symlink"),
                       dev->name, devPath);
1984
        return NULL;
1985 1986 1987 1988 1989
    }
    if (virFileResolveLink(devPath, &groupPath) < 0) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Unable to resolve device %s iommu_group symlink %s"),
                       dev->name, devPath);
1990
        return NULL;
1991 1992
    }
    if (virAsprintf(&groupDev, "/dev/vfio/%s",
1993
                    last_component(groupPath)) < 0)
1994 1995
        return NULL;

1996 1997 1998
    return groupDev;
}

J
Jiri Denemark 已提交
1999
static int
2000
virPCIDeviceDownstreamLacksACS(virPCIDevicePtr dev)
J
Jiri Denemark 已提交
2001 2002 2003 2004
{
    uint16_t flags;
    uint16_t ctrl;
    unsigned int pos;
2005 2006
    int fd;
    int ret = 0;
2007
    uint16_t device_class;
J
Jiri Denemark 已提交
2008

2009
    if ((fd = virPCIDeviceConfigOpen(dev)) < 0)
J
Jiri Denemark 已提交
2010 2011
        return -1;

2012
    if (virPCIDeviceInit(dev, fd) < 0) {
2013 2014 2015 2016
        ret = -1;
        goto cleanup;
    }

2017 2018 2019
    if (virPCIDeviceReadClass(dev, &device_class) < 0)
        goto cleanup;

J
Jiri Denemark 已提交
2020
    pos = dev->pcie_cap_pos;
2021
    if (!pos || device_class != PCI_CLASS_BRIDGE_PCI)
2022
        goto cleanup;
J
Jiri Denemark 已提交
2023

2024
    flags = virPCIDeviceRead16(dev, fd, pos + PCI_EXP_FLAGS);
J
Jiri Denemark 已提交
2025
    if (((flags & PCI_EXP_FLAGS_TYPE) >> 4) != PCI_EXP_TYPE_DOWNSTREAM)
2026
        goto cleanup;
J
Jiri Denemark 已提交
2027

2028
    pos = virPCIDeviceFindExtendedCapabilityOffset(dev, fd, PCI_EXT_CAP_ID_ACS);
J
Jiri Denemark 已提交
2029 2030
    if (!pos) {
        VIR_DEBUG("%s %s: downstream port lacks ACS", dev->id, dev->name);
2031 2032
        ret = 1;
        goto cleanup;
J
Jiri Denemark 已提交
2033 2034
    }

2035
    ctrl = virPCIDeviceRead16(dev, fd, pos + PCI_EXT_ACS_CTRL);
J
Jiri Denemark 已提交
2036 2037 2038
    if ((ctrl & PCI_EXT_CAP_ACS_ENABLED) != PCI_EXT_CAP_ACS_ENABLED) {
        VIR_DEBUG("%s %s: downstream port has ACS disabled",
                  dev->id, dev->name);
2039 2040
        ret = 1;
        goto cleanup;
J
Jiri Denemark 已提交
2041 2042
    }

2043
 cleanup:
2044
    virPCIDeviceConfigClose(dev, fd);
2045
    return ret;
J
Jiri Denemark 已提交
2046 2047 2048
}

static int
2049
virPCIDeviceIsBehindSwitchLackingACS(virPCIDevicePtr dev)
J
Jiri Denemark 已提交
2050
{
J
Ján Tomko 已提交
2051
    g_autoptr(virPCIDevice) parent = NULL;
J
Jiri Denemark 已提交
2052

2053
    if (virPCIDeviceGetParent(dev, &parent) < 0)
2054
        return -1;
2055 2056 2057 2058 2059
    if (!parent) {
        /* if we have no parent, and this is the root bus, ACS doesn't come
         * into play since devices on the root bus can't P2P without going
         * through the root IOMMU.
         */
2060
        if (dev->address.bus == 0) {
2061
            return 0;
2062
        } else {
2063
            virReportError(VIR_ERR_INTERNAL_ERROR,
2064 2065 2066 2067
                           _("Failed to find parent device for %s"),
                           dev->name);
            return -1;
        }
J
Jiri Denemark 已提交
2068 2069 2070 2071 2072 2073 2074
    }

    /* XXX we should rather fail when we can't find device's parent and
     * stop the loop when we get to root instead of just stopping when no
     * parent can be found
     */
    do {
J
Ján Tomko 已提交
2075
        g_autoptr(virPCIDevice) tmp = NULL;
J
Jiri Denemark 已提交
2076
        int acs;
2077
        int ret;
J
Jiri Denemark 已提交
2078

2079
        acs = virPCIDeviceDownstreamLacksACS(parent);
J
Jiri Denemark 已提交
2080 2081 2082 2083 2084 2085 2086 2087 2088

        if (acs) {
            if (acs < 0)
                return -1;
            else
                return 1;
        }

        tmp = parent;
2089
        ret = virPCIDeviceGetParent(parent, &parent);
2090 2091
        if (ret < 0)
            return -1;
J
Jiri Denemark 已提交
2092 2093 2094 2095 2096
    } while (parent);

    return 0;
}

2097 2098
int virPCIDeviceIsAssignable(virPCIDevicePtr dev,
                             int strict_acs_check)
J
Jiri Denemark 已提交
2099 2100 2101 2102 2103 2104 2105 2106
{
    int ret;

    /* XXX This could be a great place to actually check that a non-managed
     * device isn't in use, e.g. by checking that device is either un-bound
     * or bound to a stub driver.
     */

2107
    ret = virPCIDeviceIsBehindSwitchLackingACS(dev);
J
Jiri Denemark 已提交
2108 2109 2110 2111 2112 2113 2114 2115
    if (ret < 0)
        return 0;

    if (ret) {
        if (!strict_acs_check) {
            VIR_DEBUG("%s %s: strict ACS check disabled; device assignment allowed",
                      dev->id, dev->name);
        } else {
2116
            virReportError(VIR_ERR_INTERNAL_ERROR,
J
Jiri Denemark 已提交
2117 2118 2119 2120 2121 2122 2123 2124 2125
                           _("Device %s is behind a switch lacking ACS and "
                             "cannot be assigned"),
                           dev->name);
            return 0;
        }
    }

    return 1;
}
2126 2127 2128 2129 2130 2131 2132 2133 2134 2135

static int
logStrToLong_ui(char const *s,
                char **end_ptr,
                int base,
                unsigned int *result)
{
    int ret = 0;

    ret = virStrToLong_ui(s, end_ptr, base, result);
2136
    if (ret != 0)
2137 2138 2139 2140
        VIR_ERROR(_("Failed to convert '%s' to unsigned int"), s);
    return ret;
}

2141 2142
int
virPCIDeviceAddressParse(char *address,
2143
                         virPCIDeviceAddressPtr bdf)
2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169
{
    char *p = NULL;
    int ret = -1;

    if ((address == NULL) || (logStrToLong_ui(address, &p, 16,
                                              &bdf->domain) == -1)) {
        goto out;
    }

    if ((p == NULL) || (logStrToLong_ui(p+1, &p, 16,
                                        &bdf->bus) == -1)) {
        goto out;
    }

    if ((p == NULL) || (logStrToLong_ui(p+1, &p, 16,
                                        &bdf->slot) == -1)) {
        goto out;
    }

    if ((p == NULL) || (logStrToLong_ui(p+1, &p, 16,
                                        &bdf->function) == -1)) {
        goto out;
    }

    ret = 0;

2170
 out:
2171 2172 2173
    return ret;
}

2174

2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199
bool
virZPCIDeviceAddressIsValid(virZPCIDeviceAddressPtr zpci)
{
    /* We don't need to check fid because fid covers
     * all range of uint32 type.
     */
    if (zpci->uid > VIR_DOMAIN_DEVICE_ZPCI_MAX_UID ||
        zpci->uid == 0) {
        virReportError(VIR_ERR_XML_ERROR,
                       _("Invalid PCI address uid='0x%.4x', "
                         "must be > 0x0000 and <= 0x%.4x"),
                       zpci->uid,
                       VIR_DOMAIN_DEVICE_ZPCI_MAX_UID);
        return false;
    }

    return true;
}

bool
virZPCIDeviceAddressIsEmpty(const virZPCIDeviceAddress *addr)
{
    return !(addr->uid || addr->fid);
}

2200
#ifdef __linux__
2201

2202
virPCIDeviceAddressPtr
2203
virPCIGetDeviceAddressFromSysfsLink(const char *device_link)
2204
{
2205
    virPCIDeviceAddressPtr bdf = NULL;
2206
    char *config_address = NULL;
2207
    g_autofree char *device_path = NULL;
2208 2209

    if (!virFileExists(device_link)) {
2210
        VIR_DEBUG("'%s' does not exist", device_link);
2211
        return NULL;
2212 2213
    }

2214
    device_path = virFileCanonicalizePath(device_link);
2215
    if (device_path == NULL) {
2216 2217 2218
        virReportSystemError(errno,
                             _("Failed to resolve device link '%s'"),
                             device_link);
2219
        return NULL;
2220 2221
    }

2222
    config_address = last_component(device_path);
2223
    if (VIR_ALLOC(bdf) < 0)
2224
        return NULL;
2225

2226
    if (virPCIDeviceAddressParse(config_address, bdf) < 0) {
2227
        virReportError(VIR_ERR_INTERNAL_ERROR,
2228 2229
                       _("Failed to parse PCI config address '%s'"),
                       config_address);
2230
        VIR_FREE(bdf);
2231
        return NULL;
2232 2233
    }

2234
    return bdf;
2235 2236
}

2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249
/**
 * virPCIGetPhysicalFunction:
 * @vf_sysfs_path: sysfs path for the virtual function
 * @pf: where to store the physical function's address
 *
 * Given @vf_sysfs_path, this function will store the pointer
 * to a newly-allocated virPCIDeviceAddress in @pf.
 *
 * @pf might be NULL if @vf_sysfs_path does not point to a
 * virtual function. If it's not NULL, then it should be
 * freed by the caller when no longer needed.
 *
 * Returns: >=0 on success, <0 on failure
2250 2251
 */
int
2252
virPCIGetPhysicalFunction(const char *vf_sysfs_path,
2253
                          virPCIDeviceAddressPtr *pf)
2254
{
2255
    g_autofree char *device_link = NULL;
2256

2257 2258
    *pf = NULL;

2259 2260
    if (virBuildPath(&device_link, vf_sysfs_path, "physfn") == -1) {
        virReportOOMError();
2261
        return -1;
2262 2263
    }

2264
    if ((*pf = virPCIGetDeviceAddressFromSysfsLink(device_link))) {
2265 2266
        VIR_DEBUG("PF for VF device '%s': " VIR_PCI_DEVICE_ADDRESS_FMT,
                  vf_sysfs_path,
2267 2268
                  (*pf)->domain, (*pf)->bus, (*pf)->slot, (*pf)->function);
    }
2269

2270
    return 0;
2271 2272
}

2273

2274 2275 2276 2277
/*
 * Returns virtual functions of a physical function
 */
int
2278 2279
virPCIGetVirtualFunctions(const char *sysfs_path,
                          virPCIDeviceAddressPtr **virtual_functions,
2280 2281
                          size_t *num_virtual_functions,
                          unsigned int *max_virtual_functions)
2282 2283
{
    int ret = -1;
2284
    size_t i;
2285 2286
    g_autofree char *totalvfs_file = NULL;
    g_autofree char *totalvfs_str = NULL;
2287
    virPCIDeviceAddressPtr config_addr = NULL;
2288

2289 2290
    *virtual_functions = NULL;
    *num_virtual_functions = 0;
2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306
    *max_virtual_functions = 0;

    if (virAsprintf(&totalvfs_file, "%s/sriov_totalvfs", sysfs_path) < 0)
       goto error;
    if (virFileExists(totalvfs_file)) {
        char *end = NULL; /* so that terminating \n doesn't create error */

        if (virFileReadAll(totalvfs_file, 16, &totalvfs_str) < 0)
            goto error;
        if (virStrToLong_ui(totalvfs_str, &end, 10, max_virtual_functions) < 0) {
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("Unrecognized value in %s: %s"),
                           totalvfs_file, totalvfs_str);
            goto error;
        }
    }
2307

2308
    do {
2309
        g_autofree char *device_link = NULL;
2310 2311 2312
        /* look for virtfn%d links until one isn't found */
        if (virAsprintf(&device_link, "%s/virtfn%zu", sysfs_path, *num_virtual_functions) < 0)
            goto error;
2313

2314 2315
        if (!virFileExists(device_link))
            break;
2316

2317
        if (!(config_addr = virPCIGetDeviceAddressFromSysfsLink(device_link))) {
2318 2319 2320 2321 2322
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("Failed to get SRIOV function from device link '%s'"),
                           device_link);
            goto error;
        }
2323

2324 2325
        if (VIR_APPEND_ELEMENT(*virtual_functions, *num_virtual_functions,
                               config_addr) < 0)
2326 2327
            goto error;
    } while (1);
2328

2329 2330
    VIR_DEBUG("Found %zu virtual functions for %s",
              *num_virtual_functions, sysfs_path);
2331
    ret = 0;
2332
 cleanup:
2333
    VIR_FREE(config_addr);
2334
    return ret;
2335

2336
 error:
2337 2338 2339
    for (i = 0; i < *num_virtual_functions; i++)
        VIR_FREE((*virtual_functions)[i]);
    VIR_FREE(*virtual_functions);
2340
    *num_virtual_functions = 0;
2341
    goto cleanup;
2342
}
2343

2344

2345 2346 2347 2348
/*
 * Returns 1 if vf device is a virtual function, 0 if not, -1 on error
 */
int
2349
virPCIIsVirtualFunction(const char *vf_sysfs_device_link)
2350
{
2351
    g_autofree char *vf_sysfs_physfn_link = NULL;
2352 2353

    if (virAsprintf(&vf_sysfs_physfn_link, "%s/physfn",
2354
                    vf_sysfs_device_link) < 0)
2355
        return -1;
2356

2357
    return virFileExists(vf_sysfs_physfn_link);
2358 2359 2360 2361 2362 2363
}

/*
 * Returns the sriov virtual function index of vf given its pf
 */
int
2364 2365 2366
virPCIGetVirtualFunctionIndex(const char *pf_sysfs_device_link,
                              const char *vf_sysfs_device_link,
                              int *vf_index)
2367
{
2368 2369
    int ret = -1;
    size_t i;
2370
    size_t num_virt_fns = 0;
2371
    unsigned int max_virt_fns = 0;
2372 2373
    virPCIDeviceAddressPtr vf_bdf = NULL;
    virPCIDeviceAddressPtr *virt_fns = NULL;
2374

2375
    if (!(vf_bdf = virPCIGetDeviceAddressFromSysfsLink(vf_sysfs_device_link)))
2376 2377
        return ret;

2378
    if (virPCIGetVirtualFunctions(pf_sysfs_device_link, &virt_fns,
2379
                                  &num_virt_fns, &max_virt_fns) < 0) {
2380
        virReportError(VIR_ERR_INTERNAL_ERROR,
2381
                       _("Error getting physical function's '%s' "
2382
                         "virtual_functions"), pf_sysfs_device_link);
2383 2384 2385 2386
        goto out;
    }

    for (i = 0; i < num_virt_fns; i++) {
2387
        if (virPCIDeviceAddressEqual(vf_bdf, virt_fns[i])) {
2388 2389 2390 2391
            *vf_index = i;
            ret = 0;
            break;
        }
2392 2393
    }

2394
 out:
2395 2396 2397

    /* free virtual functions */
    for (i = 0; i < num_virt_fns; i++)
2398
        VIR_FREE(virt_fns[i]);
2399

A
ajia@redhat.com 已提交
2400
    VIR_FREE(virt_fns);
2401 2402 2403 2404 2405
    VIR_FREE(vf_bdf);

    return ret;
}

2406 2407 2408 2409 2410
/*
 * Returns a path to the PCI sysfs file given the BDF of the PCI function
 */

int
2411
virPCIGetSysfsFile(char *virPCIDeviceName, char **pci_sysfs_device_link)
2412
{
2413 2414 2415 2416
    if (virAsprintf(pci_sysfs_device_link, PCI_SYSFS "devices/%s",
                    virPCIDeviceName) < 0)
        return -1;
    return 0;
2417 2418
}

R
Roopa Prabhu 已提交
2419
int
2420
virPCIDeviceAddressGetSysfsFile(virPCIDeviceAddressPtr addr,
2421
                                char **pci_sysfs_device_link)
R
Roopa Prabhu 已提交
2422
{
2423
    if (virAsprintf(pci_sysfs_device_link,
2424
                    PCI_SYSFS "devices/" VIR_PCI_DEVICE_ADDRESS_FMT,
2425 2426
                    addr->domain, addr->bus,
                    addr->slot, addr->function) < 0)
2427 2428
        return -1;
    return 0;
R
Roopa Prabhu 已提交
2429 2430
}

2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441
/**
 * virPCIGetNetName:
 * @device_link_sysfs_path: sysfs path to the PCI device
 * @idx: used to choose which netdev when there are several
 *       (ignored if physPortID is set)
 * @physPortID: match this string in the netdev's phys_port_id
 *       (or NULL to ignore and use idx instead)
 * @netname: used to return the name of the netdev
 *       (set to NULL (but returns success) if there is no netdev)
 *
 * Returns 0 on success, -1 on error (error has been logged)
2442 2443
 */
int
2444 2445 2446 2447
virPCIGetNetName(const char *device_link_sysfs_path,
                 size_t idx,
                 char *physPortID,
                 char **netname)
2448
{
2449 2450 2451
    g_autofree char *pcidev_sysfs_net_path = NULL;
    g_autofree char *firstEntryName = NULL;
    g_autofree char *thisPhysPortID = NULL;
2452 2453 2454
    int ret = -1;
    DIR *dir = NULL;
    struct dirent *entry = NULL;
2455
    size_t i = 0;
2456

2457 2458
    *netname = NULL;

2459 2460 2461 2462 2463 2464
    if (virBuildPath(&pcidev_sysfs_net_path, device_link_sysfs_path,
                     "net") == -1) {
        virReportOOMError();
        return -1;
    }

2465 2466 2467
    if (virDirOpenQuiet(&dir, pcidev_sysfs_net_path) < 0) {
        /* this *isn't* an error - caller needs to check for netname == NULL */
        ret = 0;
2468
        goto cleanup;
2469
    }
2470

E
Eric Blake 已提交
2471
    while (virDirRead(dir, &entry, pcidev_sysfs_net_path) > 0) {
2472 2473 2474 2475 2476 2477 2478 2479 2480 2481
        /* if the caller sent a physPortID, compare it to the
         * physportID of this netdev. If not, look for entry[idx].
         */
        if (physPortID) {
            if (virNetDevGetPhysPortID(entry->d_name, &thisPhysPortID) < 0)
                goto cleanup;

            /* if this one doesn't match, keep looking */
            if (STRNEQ_NULLABLE(physPortID, thisPhysPortID)) {
                VIR_FREE(thisPhysPortID);
2482 2483 2484 2485 2486
                /* save the first entry we find to use as a failsafe
                 * in case we don't match the phys_port_id. This is
                 * needed because some NIC drivers (e.g. i40e)
                 * implement phys_port_id for PFs, but not for VFs
                 */
2487 2488
                if (!firstEntryName)
                    firstEntryName = g_strdup(entry->d_name);
2489

2490 2491 2492 2493 2494 2495 2496
                continue;
            }
        } else {
            if (i++ < idx)
                continue;
        }

2497
        *netname = g_strdup(entry->d_name);
2498 2499

        ret = 0;
2500 2501 2502
        break;
    }

2503 2504
    if (ret < 0) {
        if (physPortID) {
2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519
            if (firstEntryName) {
                /* we didn't match the provided phys_port_id, but this
                 * is probably because phys_port_id isn't implemented
                 * for this NIC driver, so just return the first
                 * (probably only) netname we found.
                 */
                *netname = firstEntryName;
                firstEntryName = NULL;
                ret = 0;
            } else {
                virReportError(VIR_ERR_INTERNAL_ERROR,
                               _("Could not find network device with "
                                 "phys_port_id '%s' under PCI device at %s"),
                               physPortID, device_link_sysfs_path);
            }
2520 2521 2522 2523 2524
        } else {
            ret = 0; /* no netdev at the given index is *not* an error */
        }
    }
 cleanup:
J
Ján Tomko 已提交
2525
    VIR_DIR_CLOSE(dir);
2526
    return ret;
2527
}
R
Roopa Prabhu 已提交
2528 2529

int
2530
virPCIGetVirtualFunctionInfo(const char *vf_sysfs_device_path,
2531 2532 2533
                             int pfNetDevIdx,
                             char **pfname,
                             int *vf_index)
R
Roopa Prabhu 已提交
2534
{
2535
    virPCIDeviceAddressPtr pf_config_address = NULL;
2536 2537 2538
    g_autofree char *pf_sysfs_device_path = NULL;
    g_autofree char *vfname = NULL;
    g_autofree char *vfPhysPortID = NULL;
R
Roopa Prabhu 已提交
2539 2540
    int ret = -1;

2541
    if (virPCIGetPhysicalFunction(vf_sysfs_device_path, &pf_config_address) < 0)
2542
        goto cleanup;
R
Roopa Prabhu 已提交
2543

2544
    if (!pf_config_address)
2545
        goto cleanup;
2546

2547 2548
    if (virPCIDeviceAddressGetSysfsFile(pf_config_address,
                                        &pf_sysfs_device_path) < 0) {
2549 2550
        goto cleanup;
    }
R
Roopa Prabhu 已提交
2551

2552 2553 2554
    if (virPCIGetVirtualFunctionIndex(pf_sysfs_device_path,
                                      vf_sysfs_device_path, vf_index) < 0) {
        goto cleanup;
R
Roopa Prabhu 已提交
2555 2556
    }

2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576
    /* If the caller hasn't asked for a specific pfNetDevIdx, and VF
     * is bound to a netdev, learn that netdev's phys_port_id (if
     * available). This can be used to disambiguate when the PF has
     * multiple netdevs. If the VF isn't bound to a netdev, then we
     * return netdev[pfNetDevIdx] on the PF, which may or may not be
     * correct.
     */
    if (pfNetDevIdx == -1) {
        if (virPCIGetNetName(vf_sysfs_device_path, 0, NULL, &vfname) < 0)
            goto cleanup;

        if (vfname) {
            if (virNetDevGetPhysPortID(vfname, &vfPhysPortID) < 0)
                goto cleanup;
        }
        pfNetDevIdx = 0;
    }

    if (virPCIGetNetName(pf_sysfs_device_path,
                         pfNetDevIdx, vfPhysPortID, pfname) < 0) {
R
Roopa Prabhu 已提交
2577
        goto cleanup;
2578
    }
R
Roopa Prabhu 已提交
2579

2580 2581 2582 2583 2584 2585 2586 2587 2588
    if (!*pfname) {
        /* this shouldn't be possible. A VF can't exist unless its
         * PF device is bound to a network driver
         */
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("The PF device for VF %s has no network device name"),
                       vf_sysfs_device_path);
        goto cleanup;
    }
R
Roopa Prabhu 已提交
2589

2590
    ret = 0;
2591
 cleanup:
R
Roopa Prabhu 已提交
2592 2593 2594 2595 2596
    VIR_FREE(pf_config_address);

    return ret;
}

2597 2598 2599 2600 2601 2602 2603 2604 2605

ssize_t
virPCIGetMdevTypes(const char *sysfspath,
                   virMediatedDeviceTypePtr **types)
{
    ssize_t ret = -1;
    int dirret = -1;
    DIR *dir = NULL;
    struct dirent *entry;
2606
    g_autofree char *types_path = NULL;
J
Ján Tomko 已提交
2607
    g_autoptr(virMediatedDeviceType) mdev_type = NULL;
2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623
    virMediatedDeviceTypePtr *mdev_types = NULL;
    size_t ntypes = 0;
    size_t i;

    if (virAsprintf(&types_path, "%s/mdev_supported_types", sysfspath) < 0)
        return -1;

    if ((dirret = virDirOpenIfExists(&dir, types_path)) < 0)
        goto cleanup;

    if (dirret == 0) {
        ret = 0;
        goto cleanup;
    }

    while ((dirret = virDirRead(dir, &entry, types_path)) > 0) {
2624
        g_autofree char *tmppath = NULL;
2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638
        /* append the type id to the path and read the attributes from there */
        if (virAsprintf(&tmppath, "%s/%s", types_path, entry->d_name) < 0)
            goto cleanup;

        if (virMediatedDeviceTypeReadAttrs(tmppath, &mdev_type) < 0)
            goto cleanup;

        if (VIR_APPEND_ELEMENT(mdev_types, ntypes, mdev_type) < 0)
            goto cleanup;
    }

    if (dirret < 0)
        goto cleanup;

2639
    *types = g_steal_pointer(&mdev_types);
2640 2641 2642 2643 2644 2645 2646 2647 2648 2649
    ret = ntypes;
    ntypes = 0;
 cleanup:
    for (i = 0; i < ntypes; i++)
        virMediatedDeviceTypeFree(mdev_types[i]);
    VIR_FREE(mdev_types);
    VIR_DIR_CLOSE(dir);
    return ret;
}

2650
#else
2651 2652
static const char *unsupported = N_("not supported on non-linux platforms");

2653
virPCIDeviceAddressPtr
J
Ján Tomko 已提交
2654
virPCIGetDeviceAddressFromSysfsLink(const char *device_link G_GNUC_UNUSED)
2655 2656
{
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
2657
    return NULL;
2658 2659 2660
}


2661
int
J
Ján Tomko 已提交
2662 2663
virPCIGetPhysicalFunction(const char *vf_sysfs_path G_GNUC_UNUSED,
                          virPCIDeviceAddressPtr *pf G_GNUC_UNUSED)
2664
{
2665
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
2666 2667 2668 2669
    return -1;
}

int
J
Ján Tomko 已提交
2670 2671 2672 2673
virPCIGetVirtualFunctions(const char *sysfs_path G_GNUC_UNUSED,
                          virPCIDeviceAddressPtr **virtual_functions G_GNUC_UNUSED,
                          size_t *num_virtual_functions G_GNUC_UNUSED,
                          unsigned int *max_virtual_functions G_GNUC_UNUSED)
2674
{
2675
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
2676 2677
    return -1;
}
2678 2679

int
J
Ján Tomko 已提交
2680
virPCIIsVirtualFunction(const char *vf_sysfs_device_link G_GNUC_UNUSED)
2681
{
2682
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
2683 2684 2685 2686
    return -1;
}

int
J
Ján Tomko 已提交
2687 2688 2689
virPCIGetVirtualFunctionIndex(const char *pf_sysfs_device_link G_GNUC_UNUSED,
                              const char *vf_sysfs_device_link G_GNUC_UNUSED,
                              int *vf_index G_GNUC_UNUSED)
2690
{
2691
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
2692 2693 2694 2695
    return -1;

}

2696
int
J
Ján Tomko 已提交
2697 2698
virPCIGetSysfsFile(char *virPCIDeviceName G_GNUC_UNUSED,
                   char **pci_sysfs_device_link G_GNUC_UNUSED)
2699 2700 2701 2702 2703
{
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
    return -1;
}

2704
int
J
Ján Tomko 已提交
2705 2706
virPCIDeviceAddressGetSysfsFile(virPCIDeviceAddressPtr dev G_GNUC_UNUSED,
                                char **pci_sysfs_device_link G_GNUC_UNUSED)
2707
{
2708
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
2709 2710 2711
    return -1;
}

2712
int
J
Ján Tomko 已提交
2713 2714 2715 2716
virPCIGetNetName(const char *device_link_sysfs_path G_GNUC_UNUSED,
                 size_t idx G_GNUC_UNUSED,
                 char *physPortID G_GNUC_UNUSED,
                 char **netname G_GNUC_UNUSED)
2717
{
2718
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
2719 2720
    return -1;
}
R
Roopa Prabhu 已提交
2721 2722

int
J
Ján Tomko 已提交
2723 2724 2725 2726
virPCIGetVirtualFunctionInfo(const char *vf_sysfs_device_path G_GNUC_UNUSED,
                             int pfNetDevIdx G_GNUC_UNUSED,
                             char **pfname G_GNUC_UNUSED,
                             int *vf_index G_GNUC_UNUSED)
R
Roopa Prabhu 已提交
2727
{
2728
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
R
Roopa Prabhu 已提交
2729 2730
    return -1;
}
2731 2732 2733


ssize_t
J
Ján Tomko 已提交
2734 2735
virPCIGetMdevTypes(const char *sysfspath G_GNUC_UNUSED,
                   virMediatedDeviceTypePtr **types G_GNUC_UNUSED)
2736 2737 2738 2739
{
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
    return -1;
}
2740
#endif /* __linux__ */
2741 2742 2743 2744 2745 2746 2747

int
virPCIDeviceIsPCIExpress(virPCIDevicePtr dev)
{
    int fd;
    int ret = -1;

2748
    if ((fd = virPCIDeviceConfigOpen(dev)) < 0)
2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767
        return ret;

    if (virPCIDeviceInit(dev, fd) < 0)
        goto cleanup;

    ret = dev->pcie_cap_pos != 0;

 cleanup:
    virPCIDeviceConfigClose(dev, fd);
    return ret;
}

int
virPCIDeviceHasPCIExpressLink(virPCIDevicePtr dev)
{
    int fd;
    int ret = -1;
    uint16_t cap, type;

2768
    if ((fd = virPCIDeviceConfigOpen(dev)) < 0)
2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795
        return ret;

    if (virPCIDeviceInit(dev, fd) < 0)
        goto cleanup;

    cap = virPCIDeviceRead16(dev, fd, dev->pcie_cap_pos + PCI_CAP_FLAGS);
    type = (cap & PCI_EXP_FLAGS_TYPE) >> 4;

    ret = type != PCI_EXP_TYPE_ROOT_INT_EP && type != PCI_EXP_TYPE_ROOT_EC;

 cleanup:
    virPCIDeviceConfigClose(dev, fd);
    return ret;
}

int
virPCIDeviceGetLinkCapSta(virPCIDevicePtr dev,
                          int *cap_port,
                          unsigned int *cap_speed,
                          unsigned int *cap_width,
                          unsigned int *sta_speed,
                          unsigned int *sta_width)
{
    uint32_t t;
    int fd;
    int ret = -1;

2796
    if ((fd = virPCIDeviceConfigOpen(dev)) < 0)
2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824
        return ret;

    if (virPCIDeviceInit(dev, fd) < 0)
        goto cleanup;

    if (!dev->pcie_cap_pos) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("pci device %s is not a PCI-Express device"),
                       dev->name);
        goto cleanup;
    }

    t = virPCIDeviceRead32(dev, fd, dev->pcie_cap_pos + PCI_EXP_LNKCAP);

    *cap_port = t >> 24;
    *cap_speed = t & PCI_EXP_LNKCAP_SPEED;
    *cap_width = (t & PCI_EXP_LNKCAP_WIDTH) >> 4;

    t = virPCIDeviceRead16(dev, fd, dev->pcie_cap_pos + PCI_EXP_LNKSTA);

    *sta_speed = t & PCI_EXP_LNKSTA_SPEED;
    *sta_width = (t & PCI_EXP_LNKSTA_WIDTH) >> 4;
    ret = 0;

 cleanup:
    virPCIDeviceConfigClose(dev, fd);
    return ret;
}
2825 2826


2827 2828 2829 2830 2831 2832 2833
int virPCIGetHeaderType(virPCIDevicePtr dev, int *hdrType)
{
    int fd;
    uint8_t type;

    *hdrType = -1;

2834
    if ((fd = virPCIDeviceConfigOpen(dev)) < 0)
2835 2836 2837 2838 2839 2840 2841 2842 2843
        return -1;

    type = virPCIDeviceRead8(dev, fd, PCI_HEADER_TYPE);

    virPCIDeviceConfigClose(dev, fd);

    type &= PCI_HEADER_TYPE_MASK;
    if (type >= VIR_PCI_HEADER_LAST) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
2844 2845
                       _("Unknown PCI header type '%d' for device '%s'"),
                       type, dev->name);
2846 2847 2848 2849 2850 2851 2852 2853 2854
        return -1;
    }

    *hdrType = type;

    return 0;
}


2855 2856 2857 2858 2859 2860 2861 2862 2863 2864
void
virPCIEDeviceInfoFree(virPCIEDeviceInfoPtr dev)
{
    if (!dev)
        return;

    VIR_FREE(dev->link_cap);
    VIR_FREE(dev->link_sta);
    VIR_FREE(dev);
}
2865 2866 2867 2868 2869 2870

void
virPCIDeviceAddressFree(virPCIDeviceAddressPtr address)
{
    VIR_FREE(address);
}