virpci.c 78.8 KB
Newer Older
1
/*
2 3
 * virpci.c: helper APIs for managing host PCI devices
 *
4
 * Copyright (C) 2009-2015 Red Hat, Inc.
5 6 7 8 9 10 11 12 13 14 15 16
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library.  If not, see
O
Osier Yang 已提交
18
 * <http://www.gnu.org/licenses/>.
19 20 21 22
 */

#include <config.h>

23
#include "virpci.h"
24
#include "virnetdev.h"
25 26 27 28 29 30 31 32

#include <dirent.h>
#include <fcntl.h>
#include <inttypes.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>

33
#include "dirname.h"
34
#include "virlog.h"
35
#include "vircommand.h"
36
#include "virerror.h"
E
Eric Blake 已提交
37
#include "virfile.h"
38
#include "virkmod.h"
39 40
#include "virstring.h"
#include "virutil.h"
41
#include "viralloc.h"
42

43 44
VIR_LOG_INIT("util.pci");

45 46 47
#define PCI_SYSFS "/sys/bus/pci/"
#define PCI_ID_LEN 10   /* "XXXX XXXX" */

48 49
VIR_ENUM_IMPL(virPCIELinkSpeed,
              VIR_PCIE_LINK_SPEED_LAST,
50 51
              "", "2.5", "5", "8", "16",
);
52

53 54
VIR_ENUM_IMPL(virPCIStubDriver,
              VIR_PCI_STUB_DRIVER_LAST,
55 56 57
              "none",
              "pciback", /* XEN */
              "vfio-pci", /* VFIO */
58
);
59

60 61
VIR_ENUM_IMPL(virPCIHeader,
              VIR_PCI_HEADER_LAST,
62 63 64
              "endpoint",
              "pci-bridge",
              "cardbus-bridge",
65
);
66

67
struct _virPCIDevice {
68
    virPCIDeviceAddress address;
69

70
    char          *name;              /* domain:bus:slot.function */
71
    char          id[PCI_ID_LEN];     /* product vendor */
E
Eric Blake 已提交
72
    char          *path;
C
Chunyan Liu 已提交
73 74 75 76

    /* The driver:domain which uses the device */
    char          *used_by_drvname;
    char          *used_by_domname;
77

78 79
    unsigned int  pcie_cap_pos;
    unsigned int  pci_pm_cap_pos;
80 81
    bool          has_flr;
    bool          has_pm_reset;
82
    bool          managed;
83 84

    virPCIStubDriver stubDriver;
85 86

    /* used by reattach function */
87 88 89
    bool          unbind_from_stub;
    bool          remove_slot;
    bool          reprobe;
90 91
};

92
struct _virPCIDeviceList {
93 94
    virObjectLockable parent;

95
    size_t count;
96
    virPCIDevicePtr *devs;
97 98 99
};


100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116
/* For virReportOOMError()  and virReportSystemError() */
#define VIR_FROM_THIS VIR_FROM_NONE

/* Specifications referenced in comments:
 *  PCI30  - PCI Local Bus Specification 3.0
 *  PCIe20 - PCI Express Base Specification 2.0
 *  BR12   - PCI-to-PCI Bridge Architecture Specification 1.2
 *  PM12   - PCI Bus Power Management Interface Specification 1.2
 *  ECN_AF - Advanced Capabilities for Conventional PCI ECN
 */

/* Type 0 config space header length; PCI30 Section 6.1 Configuration Space Organization */
#define PCI_CONF_LEN            0x100
#define PCI_CONF_HEADER_LEN     0x40

/* PCI30 6.2.1 */
#define PCI_HEADER_TYPE         0x0e    /* Header type */
117 118 119
#define PCI_HEADER_TYPE_BRIDGE 0x1
#define PCI_HEADER_TYPE_MASK   0x7f
#define PCI_HEADER_TYPE_MULTI  0x80
120 121 122 123 124 125 126 127 128

/* PCI30 6.2.1  Device Identification */
#define PCI_CLASS_DEVICE        0x0a    /* Device class */

/* Class Code for bridge; PCI30 D.7  Base Class 06h */
#define PCI_CLASS_BRIDGE_PCI    0x0604

/* PCI30 6.2.3  Device Status */
#define PCI_STATUS              0x06    /* 16 bits */
129
#define PCI_STATUS_CAP_LIST    0x10    /* Support Capability List */
130 131 132

/* PCI30 6.7  Capabilities List */
#define PCI_CAPABILITY_LIST     0x34    /* Offset of first capability list entry */
133
#define PCI_CAP_FLAGS           2       /* Capability defined flags (16 bits) */
134 135 136 137 138 139 140 141 142 143

/* PM12 3.2.1  Capability Identifier */
#define PCI_CAP_ID_PM           0x01    /* Power Management */
/* PCI30 H Capability IDs */
#define PCI_CAP_ID_EXP          0x10    /* PCI Express */
/* ECN_AF 6.x.1.1  Capability ID for AF */
#define PCI_CAP_ID_AF           0x13    /* Advanced Features */

/* PCIe20 7.8.3  Device Capabilities Register (Offset 04h) */
#define PCI_EXP_DEVCAP          0x4     /* Device capabilities */
144 145
#define PCI_EXP_DEVCAP_FLR     (1<<28)  /* Function Level Reset */
#define PCI_EXP_LNKCAP          0xc     /* Link Capabilities */
146
#define PCI_EXP_LNKCAP_SPEED    0x0000f /* Maximum Link Speed */
147 148 149 150
#define PCI_EXP_LNKCAP_WIDTH    0x003f0 /* Maximum Link Width */
#define PCI_EXP_LNKSTA          0x12    /* Link Status */
#define PCI_EXP_LNKSTA_SPEED    0x000f  /* Negotiated Link Speed */
#define PCI_EXP_LNKSTA_WIDTH    0x03f0  /* Negotiated Link Width */
151 152 153 154 155 156 157

/* Header type 1 BR12 3.2 PCI-to-PCI Bridge Configuration Space Header Format */
#define PCI_PRIMARY_BUS         0x18    /* BR12 3.2.5.2 Primary bus number */
#define PCI_SECONDARY_BUS       0x19    /* BR12 3.2.5.3 Secondary bus number */
#define PCI_SUBORDINATE_BUS     0x1a    /* BR12 3.2.5.4 Highest bus number behind the bridge */
#define PCI_BRIDGE_CONTROL      0x3e
/* BR12 3.2.5.18  Bridge Control Register */
158
#define PCI_BRIDGE_CTL_RESET   0x40    /* Secondary bus reset */
159 160 161

/* PM12 3.2.4  Power Management Control/Status (Offset = 4) */
#define PCI_PM_CTRL                4    /* PM control and status register */
162 163 164 165
#define PCI_PM_CTRL_STATE_MASK    0x3  /* Current power state (D0 to D3) */
#define PCI_PM_CTRL_STATE_D0      0x0  /* D0 state */
#define PCI_PM_CTRL_STATE_D3hot   0x3  /* D3 state */
#define PCI_PM_CTRL_NO_SOFT_RESET 0x8  /* No reset for D3hot->D0 */
166 167 168

/* ECN_AF 6.x.1  Advanced Features Capability Structure */
#define PCI_AF_CAP              0x3     /* Advanced features capabilities */
169
#define PCI_AF_CAP_FLR         0x2     /* Function Level Reset */
170

J
Jiri Denemark 已提交
171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187
#define PCI_EXP_FLAGS           0x2
#define PCI_EXP_FLAGS_TYPE      0x00f0
#define PCI_EXP_TYPE_DOWNSTREAM 0x6

#define PCI_EXT_CAP_BASE          0x100
#define PCI_EXT_CAP_LIMIT         0x1000
#define PCI_EXT_CAP_ID_MASK       0x0000ffff
#define PCI_EXT_CAP_OFFSET_SHIFT  20
#define PCI_EXT_CAP_OFFSET_MASK   0x00000ffc

#define PCI_EXT_CAP_ID_ACS      0x000d
#define PCI_EXT_ACS_CTRL        0x06

#define PCI_EXT_CAP_ACS_SV      0x01
#define PCI_EXT_CAP_ACS_RR      0x04
#define PCI_EXT_CAP_ACS_CR      0x08
#define PCI_EXT_CAP_ACS_UF      0x10
188 189 190
#define PCI_EXT_CAP_ACS_ENABLED (PCI_EXT_CAP_ACS_SV | \
                                 PCI_EXT_CAP_ACS_RR | \
                                 PCI_EXT_CAP_ACS_CR | \
J
Jiri Denemark 已提交
191 192
                                 PCI_EXT_CAP_ACS_UF)

193 194 195
#define PCI_EXP_TYPE_ROOT_INT_EP 0x9    /* Root Complex Integrated Endpoint */
#define PCI_EXP_TYPE_ROOT_EC 0xa        /* Root Complex Event Collector */

196 197 198 199 200 201
static virClassPtr virPCIDeviceListClass;

static void virPCIDeviceListDispose(void *obj);

static int virPCIOnceInit(void)
{
202
    if (!VIR_CLASS_NEW(virPCIDeviceList, virClassForObjectLockable()))
203 204 205 206 207
        return -1;

    return 0;
}

208
VIR_ONCE_GLOBAL_INIT(virPCI);
209

L
Laine Stump 已提交
210

211 212
static char *
virPCIDriverDir(const char *driver)
L
Laine Stump 已提交
213
{
214
    char *buffer;
L
Laine Stump 已提交
215

216
    buffer = g_strdup_printf(PCI_SYSFS "drivers/%s", driver);
217
    return buffer;
L
Laine Stump 已提交
218 219 220
}


221 222
static char *
virPCIFile(const char *device, const char *file)
L
Laine Stump 已提交
223
{
224
    char *buffer;
L
Laine Stump 已提交
225

226
    buffer = g_strdup_printf(PCI_SYSFS "devices/%s/%s", device, file);
227
    return buffer;
L
Laine Stump 已提交
228 229 230 231 232 233 234 235 236 237
}


/* virPCIDeviceGetDriverPathAndName - put the path to the driver
 * directory of the driver in use for this device in @path and the
 * name of the driver in @name. Both could be NULL if it's not bound
 * to any driver.
 *
 * Return 0 for success, -1 for error.
 */
238
int
L
Laine Stump 已提交
239 240 241
virPCIDeviceGetDriverPathAndName(virPCIDevicePtr dev, char **path, char **name)
{
    int ret = -1;
242
    g_autofree char *drvlink = NULL;
L
Laine Stump 已提交
243 244 245

    *path = *name = NULL;
    /* drvlink = "/sys/bus/pci/dddd:bb:ss.ff/driver" */
246
    if (!(drvlink = virPCIFile(dev->name, "driver")))
L
Laine Stump 已提交
247 248
        goto cleanup;

249 250 251 252 253
    if (!virFileExists(drvlink)) {
        ret = 0;
        goto cleanup;
    }

L
Laine Stump 已提交
254 255 256 257 258 259 260 261 262 263 264 265 266 267
    if (virFileIsLink(drvlink) != 1) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Invalid device %s driver file %s is not a symlink"),
                       dev->name, drvlink);
        goto cleanup;
    }
    if (virFileResolveLink(drvlink, path) < 0) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Unable to resolve device %s driver symlink %s"),
                       dev->name, drvlink);
        goto cleanup;
    }
    /* path = "/sys/bus/pci/drivers/${drivername}" */

268
    *name = g_strdup(last_component(*path));
L
Laine Stump 已提交
269 270 271
    /* name = "${drivername}" */

    ret = 0;
272
 cleanup:
L
Laine Stump 已提交
273 274 275 276 277 278 279 280
    if (ret < 0) {
        VIR_FREE(*path);
        VIR_FREE(*name);
    }
    return ret;
}


281
static int
282
virPCIDeviceConfigOpenInternal(virPCIDevicePtr dev, bool readonly, bool fatal)
283 284 285
{
    int fd;

286
    fd = open(dev->path, readonly ? O_RDONLY : O_RDWR);
287

288
    if (fd < 0) {
289 290 291 292 293 294 295 296 297
        if (fatal) {
            virReportSystemError(errno,
                                 _("Failed to open config space file '%s'"),
                                 dev->path);
        } else {
            char ebuf[1024];
            VIR_WARN("Failed to open config space file '%s': %s",
                     dev->path, virStrerror(errno, ebuf, sizeof(ebuf)));
        }
298 299
        return -1;
    }
300

301
    VIR_DEBUG("%s %s: opened %s", dev->id, dev->name, dev->path);
302
    return fd;
303 304
}

305
static int
306
virPCIDeviceConfigOpen(virPCIDevicePtr dev)
307
{
308
    return virPCIDeviceConfigOpenInternal(dev, true, true);
309 310
}

311 312 313 314 315 316
static int
virPCIDeviceConfigOpenTry(virPCIDevicePtr dev)
{
    return virPCIDeviceConfigOpenInternal(dev, true, false);
}

317 318 319
static int
virPCIDeviceConfigOpenWrite(virPCIDevicePtr dev)
{
320
    return virPCIDeviceConfigOpenInternal(dev, false, true);
321 322
}

323
static void
324
virPCIDeviceConfigClose(virPCIDevicePtr dev, int cfgfd)
325
{
326 327 328 329 330
    if (VIR_CLOSE(cfgfd) < 0) {
        char ebuf[1024];
        VIR_WARN("Failed to close config space file '%s': %s",
                 dev->path, virStrerror(errno, ebuf, sizeof(ebuf)));
    }
331 332
}

333

334
static int
335 336
virPCIDeviceRead(virPCIDevicePtr dev,
                 int cfgfd,
337
                 unsigned int pos,
338
                 uint8_t *buf,
339
                 unsigned int buflen)
340 341 342
{
    memset(buf, 0, buflen);

343 344
    if (lseek(cfgfd, pos, SEEK_SET) != pos ||
        saferead(cfgfd, buf, buflen) != buflen) {
345
        char ebuf[1024];
346
        VIR_WARN("Failed to read from '%s' : %s", dev->path,
347 348 349 350 351 352 353
                 virStrerror(errno, ebuf, sizeof(ebuf)));
        return -1;
    }
    return 0;
}

static uint8_t
354
virPCIDeviceRead8(virPCIDevicePtr dev, int cfgfd, unsigned int pos)
355 356
{
    uint8_t buf;
357
    virPCIDeviceRead(dev, cfgfd, pos, &buf, sizeof(buf));
358 359 360 361
    return buf;
}

static uint16_t
362
virPCIDeviceRead16(virPCIDevicePtr dev, int cfgfd, unsigned int pos)
363 364
{
    uint8_t buf[2];
365
    virPCIDeviceRead(dev, cfgfd, pos, &buf[0], sizeof(buf));
366 367 368 369
    return (buf[0] << 0) | (buf[1] << 8);
}

static uint32_t
370
virPCIDeviceRead32(virPCIDevicePtr dev, int cfgfd, unsigned int pos)
371 372
{
    uint8_t buf[4];
373
    virPCIDeviceRead(dev, cfgfd, pos, &buf[0], sizeof(buf));
374 375 376
    return (buf[0] << 0) | (buf[1] << 8) | (buf[2] << 16) | (buf[3] << 24);
}

377 378 379
static int
virPCIDeviceReadClass(virPCIDevicePtr dev, uint16_t *device_class)
{
380 381
    g_autofree char *path = NULL;
    g_autofree char *id_str = NULL;
382 383
    unsigned int value;

384
    if (!(path = virPCIFile(dev->name, "class")))
385
        return -1;
386 387 388

    /* class string is '0xNNNNNN\n' ... i.e. 9 bytes */
    if (virFileReadAll(path, 9, &id_str) < 0)
389
        return -1;
390 391 392 393 394 395

    id_str[8] = '\0';
    if (virStrToLong_ui(id_str, NULL, 16, &value) < 0) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Unusual value in %s/devices/%s/class: %s"),
                       PCI_SYSFS, dev->name, id_str);
396
        return -1;
397 398 399
    }

    *device_class = (value >> 8) & 0xFFFF;
400
    return 0;
401 402
}

403
static int
404 405
virPCIDeviceWrite(virPCIDevicePtr dev,
                  int cfgfd,
406
                  unsigned int pos,
407
                  uint8_t *buf,
408
                  unsigned int buflen)
409
{
410 411
    if (lseek(cfgfd, pos, SEEK_SET) != pos ||
        safewrite(cfgfd, buf, buflen) != buflen) {
412
        char ebuf[1024];
413
        VIR_WARN("Failed to write to '%s' : %s", dev->path,
414 415 416 417 418 419 420
                 virStrerror(errno, ebuf, sizeof(ebuf)));
        return -1;
    }
    return 0;
}

static void
421
virPCIDeviceWrite16(virPCIDevicePtr dev, int cfgfd, unsigned int pos, uint16_t val)
422 423
{
    uint8_t buf[2] = { (val >> 0), (val >> 8) };
424
    virPCIDeviceWrite(dev, cfgfd, pos, &buf[0], sizeof(buf));
425 426 427
}

static void
428
virPCIDeviceWrite32(virPCIDevicePtr dev, int cfgfd, unsigned int pos, uint32_t val)
429
{
430
    uint8_t buf[4] = { (val >> 0), (val >> 8), (val >> 16), (val >> 24) };
431
    virPCIDeviceWrite(dev, cfgfd, pos, &buf[0], sizeof(buf));
432 433
}

E
Eric Blake 已提交
434 435
typedef int (*virPCIDeviceIterPredicate)(virPCIDevicePtr, virPCIDevicePtr,
                                         void *);
436 437 438 439 440 441 442

/* Iterate over available PCI devices calling @predicate
 * to compare each one to @dev.
 * Return -1 on error since we don't want to assume it is
 * safe to reset if there is an error.
 */
static int
443 444 445 446
virPCIDeviceIterDevices(virPCIDeviceIterPredicate predicate,
                        virPCIDevicePtr dev,
                        virPCIDevicePtr *matched,
                        void *data)
447 448 449
{
    DIR *dir;
    struct dirent *entry;
450
    int ret = 0;
451
    int rc;
452 453 454 455 456

    *matched = NULL;

    VIR_DEBUG("%s %s: iterating over " PCI_SYSFS "devices", dev->id, dev->name);

J
Ján Tomko 已提交
457
    if (virDirOpen(&dir, PCI_SYSFS "devices") < 0)
458 459
        return -1;

E
Eric Blake 已提交
460
    while ((ret = virDirRead(dir, &entry, PCI_SYSFS "devices")) > 0) {
461
        unsigned int domain, bus, slot, function;
J
Ján Tomko 已提交
462
        g_autoptr(virPCIDevice) check = NULL;
463
        char *tmp;
464

465 466 467 468 469 470 471 472 473
        /* expected format: <domain>:<bus>:<slot>.<function> */
        if (/* domain */
            virStrToLong_ui(entry->d_name, &tmp, 16, &domain) < 0 || *tmp != ':' ||
            /* bus */
            virStrToLong_ui(tmp + 1, &tmp, 16, &bus) < 0 || *tmp != ':' ||
            /* slot */
            virStrToLong_ui(tmp + 1, &tmp, 16, &slot) < 0 || *tmp != '.' ||
            /* function */
            virStrToLong_ui(tmp + 1, NULL, 16, &function) < 0) {
474 475 476 477
            VIR_WARN("Unusual entry in " PCI_SYSFS "devices: %s", entry->d_name);
            continue;
        }

478
        check = virPCIDeviceNew(domain, bus, slot, function);
479
        if (!check) {
480 481 482
            ret = -1;
            break;
        }
483

484 485 486 487 488
        rc = predicate(dev, check, data);
        if (rc < 0) {
            /* the predicate returned an error, bail */
            ret = -1;
            break;
489
        } else if (rc == 1) {
490
            VIR_DEBUG("%s %s: iter matched on %s", dev->id, dev->name, check->name);
491
            *matched = g_steal_pointer(&check);
492
            ret = 1;
493 494 495
            break;
        }
    }
J
Ján Tomko 已提交
496
    VIR_DIR_CLOSE(dir);
497
    return ret;
498 499 500
}

static uint8_t
501 502 503
virPCIDeviceFindCapabilityOffset(virPCIDevicePtr dev,
                                 int cfgfd,
                                 unsigned int capability)
504 505 506 507
{
    uint16_t status;
    uint8_t pos;

508
    status = virPCIDeviceRead16(dev, cfgfd, PCI_STATUS);
509 510 511
    if (!(status & PCI_STATUS_CAP_LIST))
        return 0;

512
    pos = virPCIDeviceRead8(dev, cfgfd, PCI_CAPABILITY_LIST);
513 514 515 516 517 518 519 520 521

    /* Zero indicates last capability, capabilities can't
     * be in the config space header and 0xff is returned
     * by the kernel if we don't have access to this region
     *
     * Note: we're not handling loops or extended
     * capabilities here.
     */
    while (pos >= PCI_CONF_HEADER_LEN && pos != 0xff) {
522
        uint8_t capid = virPCIDeviceRead8(dev, cfgfd, pos);
523 524 525 526 527 528
        if (capid == capability) {
            VIR_DEBUG("%s %s: found cap 0x%.2x at 0x%.2x",
                      dev->id, dev->name, capability, pos);
            return pos;
        }

529
        pos = virPCIDeviceRead8(dev, cfgfd, pos + 1);
530 531 532 533 534 535 536
    }

    VIR_DEBUG("%s %s: failed to find cap 0x%.2x", dev->id, dev->name, capability);

    return 0;
}

J
Jiri Denemark 已提交
537
static unsigned int
538 539
virPCIDeviceFindExtendedCapabilityOffset(virPCIDevicePtr dev,
                                         int cfgfd,
540
                                         unsigned int capability)
J
Jiri Denemark 已提交
541 542 543 544 545 546 547 548 549 550
{
    int ttl;
    unsigned int pos;
    uint32_t header;

    /* minimum 8 bytes per capability */
    ttl = (PCI_EXT_CAP_LIMIT - PCI_EXT_CAP_BASE) / 8;
    pos = PCI_EXT_CAP_BASE;

    while (ttl > 0 && pos >= PCI_EXT_CAP_BASE) {
551
        header = virPCIDeviceRead32(dev, cfgfd, pos);
J
Jiri Denemark 已提交
552 553 554 555 556 557 558 559 560 561 562

        if ((header & PCI_EXT_CAP_ID_MASK) == capability)
            return pos;

        pos = (header >> PCI_EXT_CAP_OFFSET_SHIFT) & PCI_EXT_CAP_OFFSET_MASK;
        ttl--;
    }

    return 0;
}

563 564 565 566
/* detects whether this device has FLR.  Returns 0 if the device does
 * not have FLR, 1 if it does, and -1 on error
 */
static int
567
virPCIDeviceDetectFunctionLevelReset(virPCIDevicePtr dev, int cfgfd)
568
{
M
Mark McLoughlin 已提交
569
    uint32_t caps;
570
    uint8_t pos;
571
    g_autofree char *path = NULL;
572
    int found;
573 574 575 576 577 578 579 580

    /* The PCIe Function Level Reset capability allows
     * individual device functions to be reset without
     * affecting any other functions on the device or
     * any other devices on the bus. This is only common
     * on SR-IOV NICs at the moment.
     */
    if (dev->pcie_cap_pos) {
581
        caps = virPCIDeviceRead32(dev, cfgfd, dev->pcie_cap_pos + PCI_EXP_DEVCAP);
582 583 584 585 586 587 588 589 590 591
        if (caps & PCI_EXP_DEVCAP_FLR) {
            VIR_DEBUG("%s %s: detected PCIe FLR capability", dev->id, dev->name);
            return 1;
        }
    }

    /* The PCI AF Function Level Reset capability is
     * the same thing, except for conventional PCI
     * devices. This is not common yet.
     */
592
    pos = virPCIDeviceFindCapabilityOffset(dev, cfgfd, PCI_CAP_ID_AF);
593
    if (pos) {
594
        caps = virPCIDeviceRead16(dev, cfgfd, pos + PCI_AF_CAP);
595 596 597 598 599 600
        if (caps & PCI_AF_CAP_FLR) {
            VIR_DEBUG("%s %s: detected PCI FLR capability", dev->id, dev->name);
            return 1;
        }
    }

601 602 603 604 605 606
    /* there are some buggy devices that do support FLR, but forget to
     * advertise that fact in their capabilities.  However, FLR is *required*
     * to be present for virtual functions (VFs), so if we see that this
     * device is a VF, we just assume FLR works
     */

607
    path = g_strdup_printf(PCI_SYSFS "devices/%s/physfn", dev->name);
608 609 610 611 612 613 614 615

    found = virFileExists(path);
    if (found) {
        VIR_DEBUG("%s %s: buggy device didn't advertise FLR, but is a VF; forcing flr on",
                  dev->id, dev->name);
        return 1;
    }

616 617 618 619 620 621 622 623 624
    VIR_DEBUG("%s %s: no FLR capability found", dev->id, dev->name);

    return 0;
}

/* Require the device has the PCI Power Management capability
 * and that a D3hot->D0 transition will results in a full
 * internal reset, not just a soft reset.
 */
625
static unsigned int
626
virPCIDeviceDetectPowerManagementReset(virPCIDevicePtr dev, int cfgfd)
627 628 629 630 631
{
    if (dev->pci_pm_cap_pos) {
        uint32_t ctl;

        /* require the NO_SOFT_RESET bit is clear */
632
        ctl = virPCIDeviceRead32(dev, cfgfd, dev->pci_pm_cap_pos + PCI_PM_CTRL);
633 634 635 636 637 638 639 640 641 642 643
        if (!(ctl & PCI_PM_CTRL_NO_SOFT_RESET)) {
            VIR_DEBUG("%s %s: detected PM reset capability", dev->id, dev->name);
            return 1;
        }
    }

    VIR_DEBUG("%s %s: no PM reset capability found", dev->id, dev->name);

    return 0;
}

644
/* Any active devices on the same domain/bus ? */
645
static int
646
virPCIDeviceSharesBusWithActive(virPCIDevicePtr dev, virPCIDevicePtr check, void *data)
647
{
648
    virPCIDeviceList *inactiveDevs = data;
649

650
    /* Different domain, different bus, or simply identical device */
651 652 653 654
    if (dev->address.domain != check->address.domain ||
        dev->address.bus != check->address.bus ||
        (dev->address.slot == check->address.slot &&
         dev->address.function == check->address.function))
655 656
        return 0;

657
    /* same bus, but inactive, i.e. about to be assigned to guest */
658
    if (inactiveDevs && virPCIDeviceListFind(inactiveDevs, check))
659
        return 0;
660

661
    return 1;
662 663
}

664 665 666
static virPCIDevicePtr
virPCIDeviceBusContainsActiveDevices(virPCIDevicePtr dev,
                                     virPCIDeviceList *inactiveDevs)
667
{
668 669 670
    virPCIDevicePtr active = NULL;
    if (virPCIDeviceIterDevices(virPCIDeviceSharesBusWithActive,
                                dev, &active, inactiveDevs) < 0)
671 672 673 674 675
        return NULL;
    return active;
}

/* Is @check the parent of @dev ? */
676
static int
677
virPCIDeviceIsParent(virPCIDevicePtr dev, virPCIDevicePtr check, void *data)
678 679 680
{
    uint16_t device_class;
    uint8_t header_type, secondary, subordinate;
681
    virPCIDevicePtr *best = data;
682 683
    int ret = 0;
    int fd;
684

685
    if (dev->address.domain != check->address.domain)
686 687
        return 0;

688
    if ((fd = virPCIDeviceConfigOpenTry(check)) < 0)
689 690
        return 0;

691
    /* Is it a bridge? */
692 693
    ret = virPCIDeviceReadClass(check, &device_class);
    if (ret < 0 || device_class != PCI_CLASS_BRIDGE_PCI)
694
        goto cleanup;
695 696

    /* Is it a plane? */
697
    header_type = virPCIDeviceRead8(check, fd, PCI_HEADER_TYPE);
698
    if ((header_type & PCI_HEADER_TYPE_MASK) != PCI_HEADER_TYPE_BRIDGE)
699
        goto cleanup;
700

701 702
    secondary   = virPCIDeviceRead8(check, fd, PCI_SECONDARY_BUS);
    subordinate = virPCIDeviceRead8(check, fd, PCI_SUBORDINATE_BUS);
703

704
    VIR_DEBUG("%s %s: found parent device %s", dev->id, dev->name, check->name);
705

706 707 708
    /* if the secondary bus exactly equals the device's bus, then we found
     * the direct parent.  No further work is necessary
     */
709
    if (dev->address.bus == secondary) {
710 711 712
        ret = 1;
        goto cleanup;
    }
713

714
    /* otherwise, SRIOV allows VFs to be on different buses than their PFs.
715 716 717
     * In this case, what we need to do is look for the "best" match; i.e.
     * the most restrictive match that still satisfies all of the conditions.
     */
718
    if (dev->address.bus > secondary && dev->address.bus <= subordinate) {
719
        if (*best == NULL) {
720 721 722 723
            *best = virPCIDeviceNew(check->address.domain,
                                    check->address.bus,
                                    check->address.slot,
                                    check->address.function);
724 725 726 727 728
            if (*best == NULL) {
                ret = -1;
                goto cleanup;
            }
        } else {
729 730 731 732
            /* OK, we had already recorded a previous "best" match for the
             * parent.  See if the current device is more restrictive than the
             * best, and if so, make it the new best
             */
733 734 735
            int bestfd;
            uint8_t best_secondary;

736
            if ((bestfd = virPCIDeviceConfigOpenTry(*best)) < 0)
737
                goto cleanup;
738 739
            best_secondary = virPCIDeviceRead8(*best, bestfd, PCI_SECONDARY_BUS);
            virPCIDeviceConfigClose(*best, bestfd);
740 741

            if (secondary > best_secondary) {
742
                virPCIDeviceFree(*best);
743 744 745 746
                *best = virPCIDeviceNew(check->address.domain,
                                        check->address.bus,
                                        check->address.slot,
                                        check->address.function);
747 748 749 750
                if (*best == NULL) {
                    ret = -1;
                    goto cleanup;
                }
751 752 753 754
            }
        }
    }

755
 cleanup:
756
    virPCIDeviceConfigClose(check, fd);
757
    return ret;
758 759
}

760
static int
761
virPCIDeviceGetParent(virPCIDevicePtr dev, virPCIDevicePtr *parent)
762
{
763
    virPCIDevicePtr best = NULL;
764 765 766
    int ret;

    *parent = NULL;
767
    ret = virPCIDeviceIterDevices(virPCIDeviceIsParent, dev, parent, &best);
768
    if (ret == 1)
769
        virPCIDeviceFree(best);
770 771 772
    else if (ret == 0)
        *parent = best;
    return ret;
773 774 775 776 777 778
}

/* Secondary Bus Reset is our sledgehammer - it resets all
 * devices behind a bus.
 */
static int
779 780 781
virPCIDeviceTrySecondaryBusReset(virPCIDevicePtr dev,
                                 int cfgfd,
                                 virPCIDeviceList *inactiveDevs)
782
{
J
Ján Tomko 已提交
783 784
    g_autoptr(virPCIDevice) parent = NULL;
    g_autoptr(virPCIDevice) conflict = NULL;
785 786 787
    uint8_t config_space[PCI_CONF_LEN];
    uint16_t ctl;
    int ret = -1;
788
    int parentfd;
789

790 791 792
    /* Refuse to do a secondary bus reset if there are other
     * devices/functions behind the bus are used by the host
     * or other guests.
793
     */
794
    if ((conflict = virPCIDeviceBusContainsActiveDevices(dev, inactiveDevs))) {
795
        virReportError(VIR_ERR_INTERNAL_ERROR,
796 797
                       _("Active %s devices on bus with %s, not doing bus reset"),
                       conflict->name, dev->name);
798 799 800 801
        return -1;
    }

    /* Find the parent bus */
802
    if (virPCIDeviceGetParent(dev, &parent) < 0)
803
        return -1;
804
    if (!parent) {
805
        virReportError(VIR_ERR_INTERNAL_ERROR,
806 807
                       _("Failed to find parent device for %s"),
                       dev->name);
808 809
        return -1;
    }
810
    if ((parentfd = virPCIDeviceConfigOpenWrite(parent)) < 0)
811
        goto out;
812 813 814 815 816 817 818

    VIR_DEBUG("%s %s: doing a secondary bus reset", dev->id, dev->name);

    /* Save and restore the device's config space; we only do this
     * for the supplied device since we refuse to do a reset if there
     * are multiple devices/functions
     */
819
    if (virPCIDeviceRead(dev, cfgfd, 0, config_space, PCI_CONF_LEN) < 0) {
820
        virReportError(VIR_ERR_INTERNAL_ERROR,
821
                       _("Failed to read PCI config space for %s"),
822
                       dev->name);
823 824 825 826 827 828
        goto out;
    }

    /* Read the control register, set the reset flag, wait 200ms,
     * unset the reset flag and wait 200ms.
     */
H
hexin 已提交
829
    ctl = virPCIDeviceRead16(dev, parentfd, PCI_BRIDGE_CONTROL);
830

831 832
    virPCIDeviceWrite16(parent, parentfd, PCI_BRIDGE_CONTROL,
                        ctl | PCI_BRIDGE_CTL_RESET);
833

834
    g_usleep(200 * 1000); /* sleep 200ms */
835

836
    virPCIDeviceWrite16(parent, parentfd, PCI_BRIDGE_CONTROL, ctl);
837

838
    g_usleep(200 * 1000); /* sleep 200ms */
839

840
    if (virPCIDeviceWrite(dev, cfgfd, 0, config_space, PCI_CONF_LEN) < 0) {
841
        virReportError(VIR_ERR_INTERNAL_ERROR,
842 843 844 845
                       _("Failed to restore PCI config space for %s"),
                       dev->name);
        goto out;
    }
846
    ret = 0;
847

848
 out:
849
    virPCIDeviceConfigClose(parent, parentfd);
850 851 852 853 854 855 856 857
    return ret;
}

/* Power management reset attempts to reset a device using a
 * D-state transition from D3hot to D0. Note, in detect_pm_reset()
 * above we require the device supports a full internal reset.
 */
static int
858
virPCIDeviceTryPowerManagementReset(virPCIDevicePtr dev, int cfgfd)
859 860 861 862 863 864 865 866
{
    uint8_t config_space[PCI_CONF_LEN];
    uint32_t ctl;

    if (!dev->pci_pm_cap_pos)
        return -1;

    /* Save and restore the device's config space. */
867
    if (virPCIDeviceRead(dev, cfgfd, 0, &config_space[0], PCI_CONF_LEN) < 0) {
868
        virReportError(VIR_ERR_INTERNAL_ERROR,
869
                       _("Failed to read PCI config space for %s"),
870
                       dev->name);
871 872 873 874 875
        return -1;
    }

    VIR_DEBUG("%s %s: doing a power management reset", dev->id, dev->name);

876
    ctl = virPCIDeviceRead32(dev, cfgfd, dev->pci_pm_cap_pos + PCI_PM_CTRL);
877 878
    ctl &= ~PCI_PM_CTRL_STATE_MASK;

879 880
    virPCIDeviceWrite32(dev, cfgfd, dev->pci_pm_cap_pos + PCI_PM_CTRL,
                        ctl | PCI_PM_CTRL_STATE_D3hot);
881

882
    g_usleep(10 * 1000); /* sleep 10ms */
883

884 885
    virPCIDeviceWrite32(dev, cfgfd, dev->pci_pm_cap_pos + PCI_PM_CTRL,
                        ctl | PCI_PM_CTRL_STATE_D0);
886

887
    g_usleep(10 * 1000); /* sleep 10ms */
888

889
    if (virPCIDeviceWrite(dev, cfgfd, 0, &config_space[0], PCI_CONF_LEN) < 0) {
890
        virReportError(VIR_ERR_INTERNAL_ERROR,
891 892 893 894
                       _("Failed to restore PCI config space for %s"),
                       dev->name);
        return -1;
    }
895 896 897 898 899

    return 0;
}

static int
900
virPCIDeviceInit(virPCIDevicePtr dev, int cfgfd)
901
{
902 903
    int flr;

904 905 906
    dev->pcie_cap_pos   = virPCIDeviceFindCapabilityOffset(dev, cfgfd, PCI_CAP_ID_EXP);
    dev->pci_pm_cap_pos = virPCIDeviceFindCapabilityOffset(dev, cfgfd, PCI_CAP_ID_PM);
    flr = virPCIDeviceDetectFunctionLevelReset(dev, cfgfd);
907
    if (flr < 0)
908
        return flr;
909 910
    dev->has_flr        = !!flr;
    dev->has_pm_reset   = !!virPCIDeviceDetectPowerManagementReset(dev, cfgfd);
911

912 913 914 915
    return 0;
}

int
916 917 918
virPCIDeviceReset(virPCIDevicePtr dev,
                  virPCIDeviceList *activeDevs,
                  virPCIDeviceList *inactiveDevs)
919
{
920 921
    g_autofree char *drvPath = NULL;
    g_autofree char *drvName = NULL;
922
    int ret = -1;
923
    int fd = -1;
924 925 926 927 928 929 930 931 932 933 934 935
    int hdrType = -1;

    if (virPCIGetHeaderType(dev, &hdrType) < 0)
        return -1;

    if (hdrType != VIR_PCI_HEADER_ENDPOINT) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Invalid attempt to reset PCI device %s. "
                         "Only PCI endpoint devices can be reset"),
                       dev->name);
        return -1;
    }
936

937
    if (activeDevs && virPCIDeviceListFind(activeDevs, dev)) {
938
        virReportError(VIR_ERR_INTERNAL_ERROR,
939 940 941 942
                       _("Not resetting active device %s"), dev->name);
        return -1;
    }

943 944 945 946 947 948 949 950
    /* If the device is currently bound to vfio-pci, ignore all
     * requests to reset it, since the vfio-pci driver will always
     * reset it whenever appropriate, so doing it ourselves would just
     * be redundant.
     */
    if (virPCIDeviceGetDriverPathAndName(dev, &drvPath, &drvName) < 0)
        goto cleanup;

951
    if (virPCIStubDriverTypeFromString(drvName) == VIR_PCI_STUB_DRIVER_VFIO) {
952 953 954 955 956 957 958
        VIR_DEBUG("Device %s is bound to vfio-pci - skip reset",
                  dev->name);
        ret = 0;
        goto cleanup;
    }
    VIR_DEBUG("Resetting device %s", dev->name);

959
    if ((fd = virPCIDeviceConfigOpenWrite(dev)) < 0)
960
        goto cleanup;
961

962
    if (virPCIDeviceInit(dev, fd) < 0)
963 964
        goto cleanup;

965 966 967
    /* KVM will perform FLR when starting and stopping
     * a guest, so there is no need for us to do it here.
     */
968 969 970 971
    if (dev->has_flr) {
        ret = 0;
        goto cleanup;
    }
972

973 974 975 976 977
    /* If the device supports PCI power management reset,
     * that's the next best thing because it only resets
     * the function, not the whole device.
     */
    if (dev->has_pm_reset)
978
        ret = virPCIDeviceTryPowerManagementReset(dev, fd);
979

980
    /* Bus reset is not an option with the root bus */
981
    if (ret < 0 && dev->address.bus != 0)
982
        ret = virPCIDeviceTrySecondaryBusReset(dev, fd, inactiveDevs);
983

984 985
    if (ret < 0) {
        virErrorPtr err = virGetLastError();
986
        virReportError(VIR_ERR_INTERNAL_ERROR,
987 988
                       _("Unable to reset PCI device %s: %s"),
                       dev->name,
989 990
                       err ? err->message :
                       _("no FLR, PM reset or bus reset available"));
991 992
    }

993
 cleanup:
994
    virPCIDeviceConfigClose(dev, fd);
995 996 997
    return ret;
}

998

999
static int
1000
virPCIProbeStubDriver(virPCIStubDriver driver)
1001
{
1002
    const char *drvname = NULL;
1003
    g_autofree char *drvpath = NULL;
1004
    bool probed = false;
1005

1006 1007 1008 1009 1010 1011 1012 1013
    if (driver == VIR_PCI_STUB_DRIVER_NONE ||
        !(drvname = virPCIStubDriverTypeToString(driver))) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       "%s",
                       _("Attempting to use unknown stub driver"));
        return -1;
    }

1014
 recheck:
1015
    if ((drvpath = virPCIDriverDir(drvname)) && virFileExists(drvpath))
1016 1017
        /* driver already loaded, return */
        return 0;
1018 1019

    if (!probed) {
1020
        g_autofree char *errbuf = NULL;
1021
        probed = true;
1022 1023
        if ((errbuf = virKModLoad(drvname, true))) {
            VIR_WARN("failed to load driver %s: %s", drvname, errbuf);
1024
            goto cleanup;
1025
        }
1026 1027

        goto recheck;
1028 1029
    }

1030
 cleanup:
1031 1032 1033
    /* If we know failure was because of blacklist, let's report that;
     * otherwise, report a more generic failure message
     */
1034
    if (virKModIsBlacklisted(drvname)) {
1035 1036 1037
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Failed to load PCI stub module %s: "
                         "administratively prohibited"),
1038
                       drvname);
1039 1040 1041
    } else {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Failed to load PCI stub module %s"),
1042
                       drvname);
1043 1044
    }

1045
    return -1;
1046 1047
}

1048
int
1049
virPCIDeviceUnbind(virPCIDevicePtr dev)
1050
{
1051 1052 1053
    g_autofree char *path = NULL;
    g_autofree char *drvpath = NULL;
    g_autofree char *driver = NULL;
1054 1055

    if (virPCIDeviceGetDriverPathAndName(dev, &drvpath, &driver) < 0)
1056
        return -1;
1057

1058
    if (!driver)
1059
        /* The device is not bound to any driver */
1060
        return 0;
1061

1062
    if (!(path = virPCIFile(dev->name, "driver/unbind")))
1063
        return -1;
1064 1065 1066 1067 1068 1069

    if (virFileExists(path)) {
        if (virFileWriteStr(path, dev->name, 0) < 0) {
            virReportSystemError(errno,
                                 _("Failed to unbind PCI device '%s' from %s"),
                                 dev->name, driver);
1070
            return -1;
1071 1072 1073
        }
    }

1074
    return 0;
1075 1076
}

1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101

/**
 * virPCIDeviceRebind:
 *  @dev: virPCIDevice object describing the device to rebind
 *
 * unbind a device from its driver, then immediately rebind it.
 *
 * Returns 0 on success, -1 on failure
 */
int virPCIDeviceRebind(virPCIDevicePtr dev)
{
    if (virPCIDeviceUnbind(dev) < 0)
        return -1;

    if (virFileWriteStr(PCI_SYSFS "drivers_probe", dev->name, 0) < 0) {
        virReportSystemError(errno,
                             _("Failed to trigger a probe for PCI device '%s'"),
                             dev->name);
        return -1;
    }

    return 0;
}


1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112
/*
 * Bind a PCI device to a driver using driver_override sysfs interface.
 * E.g.
 *
 *  echo driver-name > /sys/bus/pci/devices/0000:03:00.0/driver_override
 *  echo 0000:03:00.0 > /sys/bus/pci/devices/0000:03:00.0/driver/unbind
 *  echo 0000:03:00.0 > /sys/bus/pci/drivers_probe
 *
 * An empty driverName will cause the device to be bound to its
 * preferred driver.
 */
1113
static int
1114 1115 1116
virPCIDeviceBindWithDriverOverride(virPCIDevicePtr dev,
                                   const char *driverName)
{
1117
    g_autofree char *path = NULL;
1118 1119 1120 1121 1122 1123 1124 1125 1126

    if (!(path = virPCIFile(dev->name, "driver_override")))
        return -1;

    if (virFileWriteStr(path, driverName, 0) < 0) {
        virReportSystemError(errno,
                             _("Failed to add driver '%s' to driver_override "
                               " interface of PCI device '%s'"),
                             driverName, dev->name);
1127
        return -1;
1128 1129
    }

1130
    if (virPCIDeviceRebind(dev) < 0)
1131
        return -1;
1132

1133
    return 0;
1134 1135 1136
}

static int
1137
virPCIDeviceUnbindFromStub(virPCIDevicePtr dev)
1138 1139 1140 1141 1142 1143 1144 1145
{
    if (!dev->unbind_from_stub) {
        VIR_DEBUG("Unbind from stub skipped for PCI device %s", dev->name);
        return 0;
    }

    return virPCIDeviceBindWithDriverOverride(dev, "\n");
}
1146 1147

static int
1148
virPCIDeviceBindToStub(virPCIDevicePtr dev)
1149 1150
{
    const char *stubDriverName;
1151 1152
    g_autofree char *stubDriverPath = NULL;
    g_autofree char *driverLink = NULL;
1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168

    /* Check the device is configured to use one of the known stub drivers */
    if (dev->stubDriver == VIR_PCI_STUB_DRIVER_NONE) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("No stub driver configured for PCI device %s"),
                       dev->name);
        return -1;
    } else if (!(stubDriverName = virPCIStubDriverTypeToString(dev->stubDriver))) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Unknown stub driver configured for PCI device %s"),
                       dev->name);
        return -1;
    }

    if (!(stubDriverPath = virPCIDriverDir(stubDriverName))  ||
        !(driverLink = virPCIFile(dev->name, "driver")))
1169
        return -1;
1170 1171 1172 1173 1174 1175

    if (virFileExists(driverLink)) {
        if (virFileLinkPointsTo(driverLink, stubDriverPath)) {
            /* The device is already bound to the correct driver */
            VIR_DEBUG("Device %s is already bound to %s",
                      dev->name, stubDriverName);
1176
            return 0;
1177 1178 1179 1180
        }
    }

    if (virPCIDeviceBindWithDriverOverride(dev, stubDriverName) < 0)
1181
        return -1;
1182 1183

    dev->unbind_from_stub = true;
1184
    return 0;
1185 1186
}

1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204
/* virPCIDeviceDetach:
 *
 * Detach this device from the host driver, attach it to the stub
 * driver (previously set with virPCIDeviceSetStubDriver(), and add *a
 * copy* of the object to the inactiveDevs list (if provided). This
 * function will *never* consume dev, so the caller should free it.
 *
 * Returns 0 on success, -1 on failure (will fail if the device is
 * already in the activeDevs list, but will be a NOP if the device is
 * already bound to the stub).
 *
 * GENERAL NOTE: activeDevs should be a list of all PCI devices
 * currently in use by a domain. inactiveDevs is a list of all PCI
 * devices that libvirt has detached from the host driver + attached
 * to the stub driver, but hasn't yet assigned to a domain. Any device
 * that is still attached to its host driver should not be on either
 * list.
 */
1205
int
1206 1207
virPCIDeviceDetach(virPCIDevicePtr dev,
                   virPCIDeviceList *activeDevs,
1208
                   virPCIDeviceList *inactiveDevs)
1209
{
1210
    if (virPCIProbeStubDriver(dev->stubDriver) < 0)
1211 1212
        return -1;

1213
    if (activeDevs && virPCIDeviceListFind(activeDevs, dev)) {
1214
        virReportError(VIR_ERR_INTERNAL_ERROR,
1215 1216 1217 1218
                       _("Not detaching active device %s"), dev->name);
        return -1;
    }

1219
    if (virPCIDeviceBindToStub(dev) < 0)
1220 1221
        return -1;

1222 1223 1224
    /* Add *a copy of* the dev into list inactiveDevs, if
     * it's not already there.
     */
1225 1226 1227 1228
    if (inactiveDevs && !virPCIDeviceListFind(inactiveDevs, dev)) {
        VIR_DEBUG("Adding PCI device %s to inactive list", dev->name);
        if (virPCIDeviceListAddCopy(inactiveDevs, dev) < 0)
            return -1;
1229 1230 1231
    }

    return 0;
1232 1233
}

1234 1235 1236 1237
/*
 * Pre-condition: inactivePCIHostdevs & activePCIHostdevs
 * are locked
 */
1238
int
1239 1240
virPCIDeviceReattach(virPCIDevicePtr dev,
                     virPCIDeviceListPtr activeDevs,
1241
                     virPCIDeviceListPtr inactiveDevs)
1242
{
1243
    if (activeDevs && virPCIDeviceListFind(activeDevs, dev)) {
1244
        virReportError(VIR_ERR_INTERNAL_ERROR,
1245 1246 1247 1248
                       _("Not reattaching active device %s"), dev->name);
        return -1;
    }

1249
    if (virPCIDeviceUnbindFromStub(dev) < 0)
1250 1251 1252
        return -1;

    /* Steal the dev from list inactiveDevs */
1253 1254
    if (inactiveDevs) {
        VIR_DEBUG("Removing PCI device %s from inactive list", dev->name);
1255
        virPCIDeviceListDel(inactiveDevs, dev);
1256
    }
1257 1258

    return 0;
1259 1260 1261
}

static char *
1262
virPCIDeviceReadID(virPCIDevicePtr dev, const char *id_name)
1263
{
1264
    g_autofree char *path = NULL;
1265 1266
    char *id_str;

1267
    if (!(path = virPCIFile(dev->name, id_name)))
1268
        return NULL;
1269 1270

    /* ID string is '0xNNNN\n' ... i.e. 7 bytes */
1271
    if (virFileReadAll(path, 7, &id_str) < 0)
1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285
        return NULL;

    /* Check for 0x suffix */
    if (id_str[0] != '0' || id_str[1] != 'x') {
        VIR_FREE(id_str);
        return NULL;
    }

    /* Chop off the newline; we know the string is 7 bytes */
    id_str[6] = '\0';

    return id_str;
}

1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330
bool
virPCIDeviceAddressIsValid(virPCIDeviceAddressPtr addr,
                           bool report)
{
    if (addr->bus > 0xFF) {
        if (report)
            virReportError(VIR_ERR_XML_ERROR,
                           _("Invalid PCI address bus='0x%x', "
                             "must be <= 0xFF"),
                           addr->bus);
        return false;
    }
    if (addr->slot > 0x1F) {
        if (report)
            virReportError(VIR_ERR_XML_ERROR,
                           _("Invalid PCI address slot='0x%x', "
                             "must be <= 0x1F"),
                           addr->slot);
        return false;
    }
    if (addr->function > 7) {
        if (report)
            virReportError(VIR_ERR_XML_ERROR,
                           _("Invalid PCI address function=0x%x, "
                             "must be <= 7"),
                           addr->function);
        return false;
    }
    if (virPCIDeviceAddressIsEmpty(addr)) {
        if (report)
            virReportError(VIR_ERR_XML_ERROR, "%s",
                           _("Invalid PCI address 0000:00:00, at least "
                             "one of domain, bus, or slot must be > 0"));
        return false;
    }
    return true;
}

bool
virPCIDeviceAddressIsEmpty(const virPCIDeviceAddress *addr)
{
    return !(addr->domain || addr->bus || addr->slot);
}

bool
1331 1332
virPCIDeviceAddressEqual(const virPCIDeviceAddress *addr1,
                         const virPCIDeviceAddress *addr2)
1333 1334 1335 1336 1337 1338 1339 1340 1341 1342
{
    if (addr1->domain == addr2->domain &&
        addr1->bus == addr2->bus &&
        addr1->slot == addr2->slot &&
        addr1->function == addr2->function) {
        return true;
    }
    return false;
}

1343
char *
1344
virPCIDeviceAddressAsString(const virPCIDeviceAddress *addr)
1345 1346 1347
{
    char *str;

1348 1349 1350 1351 1352
    str = g_strdup_printf(VIR_PCI_DEVICE_ADDRESS_FMT,
                          addr->domain,
                          addr->bus,
                          addr->slot,
                          addr->function);
1353 1354 1355
    return str;
}

1356
virPCIDevicePtr
1357 1358 1359 1360
virPCIDeviceNew(unsigned int domain,
                unsigned int bus,
                unsigned int slot,
                unsigned int function)
1361
{
J
Ján Tomko 已提交
1362
    g_autoptr(virPCIDevice) dev = NULL;
1363 1364
    g_autofree char *vendor = NULL;
    g_autofree char *product = NULL;
1365

1366
    if (VIR_ALLOC(dev) < 0)
1367 1368
        return NULL;

1369 1370 1371 1372
    dev->address.domain = domain;
    dev->address.bus = bus;
    dev->address.slot = slot;
    dev->address.function = function;
1373

1374 1375
    dev->name = g_strdup_printf(VIR_PCI_DEVICE_ADDRESS_FMT, domain, bus, slot,
                                function);
1376

1377
    dev->path = g_strdup_printf(PCI_SYSFS "devices/%s/config", dev->name);
1378

1379
    if (!virFileExists(dev->path)) {
1380 1381 1382
        virReportSystemError(errno,
                             _("Device %s not found: could not access %s"),
                             dev->name, dev->path);
1383
        return NULL;
1384 1385
    }

1386 1387
    vendor  = virPCIDeviceReadID(dev, "vendor");
    product = virPCIDeviceReadID(dev, "device");
1388 1389

    if (!vendor || !product) {
1390
        virReportError(VIR_ERR_INTERNAL_ERROR,
1391 1392
                       _("Failed to read product/vendor ID for %s"),
                       dev->name);
1393
        return NULL;
1394 1395 1396
    }

    /* strings contain '0x' prefix */
1397 1398
    if (g_snprintf(dev->id, sizeof(dev->id), "%s %s", &vendor[2],
                   &product[2]) >= sizeof(dev->id)) {
1399
        virReportError(VIR_ERR_INTERNAL_ERROR,
E
Eric Blake 已提交
1400 1401
                       _("dev->id buffer overflow: %s %s"),
                       &vendor[2], &product[2]);
1402
        return NULL;
E
Eric Blake 已提交
1403
    }
1404 1405 1406

    VIR_DEBUG("%s %s: initialized", dev->id, dev->name);

J
Ján Tomko 已提交
1407
    return g_steal_pointer(&dev);
1408 1409
}

L
Laine Stump 已提交
1410 1411 1412 1413 1414 1415

virPCIDevicePtr
virPCIDeviceCopy(virPCIDevicePtr dev)
{
    virPCIDevicePtr copy;

1416
    if (VIR_ALLOC(copy) < 0)
L
Laine Stump 已提交
1417 1418 1419 1420
        return NULL;

    /* shallow copy to take care of most attributes */
    *copy = *dev;
1421
    copy->path = NULL;
C
Chunyan Liu 已提交
1422
    copy->used_by_drvname = copy->used_by_domname = NULL;
1423 1424 1425 1426
    copy->name = g_strdup(dev->name);
    copy->path = g_strdup(dev->path);
    copy->used_by_drvname = g_strdup(dev->used_by_drvname);
    copy->used_by_domname = g_strdup(dev->used_by_domname);
L
Laine Stump 已提交
1427 1428 1429 1430
    return copy;
}


1431
void
1432
virPCIDeviceFree(virPCIDevicePtr dev)
1433
{
1434 1435
    if (!dev)
        return;
1436
    VIR_DEBUG("%s %s: freeing", dev->id, dev->name);
1437
    VIR_FREE(dev->name);
E
Eric Blake 已提交
1438
    VIR_FREE(dev->path);
C
Chunyan Liu 已提交
1439 1440
    VIR_FREE(dev->used_by_drvname);
    VIR_FREE(dev->used_by_domname);
1441 1442
    VIR_FREE(dev);
}
1443

1444 1445 1446 1447 1448
/**
 * virPCIDeviceGetAddress:
 * @dev: device to get address from
 *
 * Take a PCI device on input and return its PCI address. The
1449
 * returned object is owned by the device and must not be freed.
1450
 *
1451
 * Returns: a pointer to the address, which can never be NULL.
1452 1453 1454 1455
 */
virPCIDeviceAddressPtr
virPCIDeviceGetAddress(virPCIDevicePtr dev)
{
1456
    return &(dev->address);
1457 1458
}

1459
const char *
1460
virPCIDeviceGetName(virPCIDevicePtr dev)
1461 1462 1463 1464
{
    return dev->name;
}

1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476
/**
 * virPCIDeviceGetConfigPath:
 *
 * Returns a pointer to a string containing the path of @dev's PCI
 * config file.
 */
const char *
virPCIDeviceGetConfigPath(virPCIDevicePtr dev)
{
    return dev->path;
}

1477
void virPCIDeviceSetManaged(virPCIDevicePtr dev, bool managed)
1478
{
1479
    dev->managed = managed;
1480 1481
}

1482
bool
1483
virPCIDeviceGetManaged(virPCIDevicePtr dev)
1484 1485 1486 1487
{
    return dev->managed;
}

1488 1489
void
virPCIDeviceSetStubDriver(virPCIDevicePtr dev, virPCIStubDriver driver)
1490
{
1491
    dev->stubDriver = driver;
1492 1493
}

1494
virPCIStubDriver
1495 1496 1497 1498 1499
virPCIDeviceGetStubDriver(virPCIDevicePtr dev)
{
    return dev->stubDriver;
}

1500
bool
1501
virPCIDeviceGetUnbindFromStub(virPCIDevicePtr dev)
1502 1503 1504 1505 1506
{
    return dev->unbind_from_stub;
}

void
1507
virPCIDeviceSetUnbindFromStub(virPCIDevicePtr dev, bool unbind)
1508
{
1509
    dev->unbind_from_stub = unbind;
1510 1511
}

1512
bool
1513
virPCIDeviceGetRemoveSlot(virPCIDevicePtr dev)
1514 1515 1516 1517 1518
{
    return dev->remove_slot;
}

void
1519
virPCIDeviceSetRemoveSlot(virPCIDevicePtr dev, bool remove_slot)
1520
{
1521
    dev->remove_slot = remove_slot;
1522 1523
}

1524
bool
1525
virPCIDeviceGetReprobe(virPCIDevicePtr dev)
1526 1527 1528 1529 1530
{
    return dev->reprobe;
}

void
1531
virPCIDeviceSetReprobe(virPCIDevicePtr dev, bool reprobe)
1532
{
1533
    dev->reprobe = reprobe;
1534 1535
}

C
Chunyan Liu 已提交
1536 1537 1538 1539
int
virPCIDeviceSetUsedBy(virPCIDevicePtr dev,
                      const char *drv_name,
                      const char *dom_name)
1540
{
C
Chunyan Liu 已提交
1541 1542
    VIR_FREE(dev->used_by_drvname);
    VIR_FREE(dev->used_by_domname);
1543 1544
    dev->used_by_drvname = g_strdup(drv_name);
    dev->used_by_domname = g_strdup(dom_name);
C
Chunyan Liu 已提交
1545 1546

    return 0;
1547 1548
}

C
Chunyan Liu 已提交
1549 1550 1551 1552
void
virPCIDeviceGetUsedBy(virPCIDevicePtr dev,
                      const char **drv_name,
                      const char **dom_name)
1553
{
C
Chunyan Liu 已提交
1554 1555
    *drv_name = dev->used_by_drvname;
    *dom_name = dev->used_by_domname;
1556 1557
}

1558 1559
virPCIDeviceListPtr
virPCIDeviceListNew(void)
1560
{
1561
    virPCIDeviceListPtr list;
1562

1563 1564 1565 1566
    if (virPCIInitialize() < 0)
        return NULL;

    if (!(list = virObjectLockableNew(virPCIDeviceListClass)))
1567 1568 1569 1570 1571
        return NULL;

    return list;
}

1572 1573
static void
virPCIDeviceListDispose(void *obj)
1574
{
1575
    virPCIDeviceListPtr list = obj;
1576
    size_t i;
1577 1578

    for (i = 0; i < list->count; i++) {
1579
        virPCIDeviceFree(list->devs[i]);
1580 1581 1582 1583 1584 1585 1586 1587
        list->devs[i] = NULL;
    }

    list->count = 0;
    VIR_FREE(list->devs);
}

int
1588 1589
virPCIDeviceListAdd(virPCIDeviceListPtr list,
                    virPCIDevicePtr dev)
1590
{
1591
    if (virPCIDeviceListFind(list, dev)) {
1592
        virReportError(VIR_ERR_INTERNAL_ERROR,
1593 1594 1595
                       _("Device %s is already in use"), dev->name);
        return -1;
    }
1596
    return VIR_APPEND_ELEMENT(list->devs, list->count, dev);
1597 1598
}

L
Laine Stump 已提交
1599 1600 1601 1602 1603

/* virPCIDeviceListAddCopy - add a *copy* of the device to this list */
int
virPCIDeviceListAddCopy(virPCIDeviceListPtr list, virPCIDevicePtr dev)
{
J
Ján Tomko 已提交
1604
    g_autoptr(virPCIDevice) copy = virPCIDeviceCopy(dev);
L
Laine Stump 已提交
1605 1606 1607

    if (!copy)
        return -1;
1608
    if (virPCIDeviceListAdd(list, copy) < 0)
L
Laine Stump 已提交
1609
        return -1;
1610 1611

    copy = NULL;
L
Laine Stump 已提交
1612 1613 1614 1615
    return 0;
}


1616 1617 1618
virPCIDevicePtr
virPCIDeviceListGet(virPCIDeviceListPtr list,
                    int idx)
1619 1620 1621 1622 1623 1624 1625 1626 1627
{
    if (idx >= list->count)
        return NULL;
    if (idx < 0)
        return NULL;

    return list->devs[idx];
}

1628
size_t
1629
virPCIDeviceListCount(virPCIDeviceListPtr list)
1630
{
1631 1632 1633
    return list->count;
}

1634 1635 1636
virPCIDevicePtr
virPCIDeviceListStealIndex(virPCIDeviceListPtr list,
                           int idx)
1637
{
1638
    virPCIDevicePtr ret;
1639

1640 1641
    if (idx < 0 || idx >= list->count)
        return NULL;
1642

1643
    ret = list->devs[idx];
1644
    VIR_DELETE_ELEMENT(list->devs, idx, list->count);
1645 1646 1647
    return ret;
}

1648 1649 1650
virPCIDevicePtr
virPCIDeviceListSteal(virPCIDeviceListPtr list,
                      virPCIDevicePtr dev)
1651
{
1652
    return virPCIDeviceListStealIndex(list, virPCIDeviceListFindIndex(list, dev));
1653 1654
}

1655
void
1656 1657
virPCIDeviceListDel(virPCIDeviceListPtr list,
                    virPCIDevicePtr dev)
1658
{
1659
    virPCIDeviceFree(virPCIDeviceListSteal(list, dev));
1660 1661
}

1662
int
1663
virPCIDeviceListFindIndex(virPCIDeviceListPtr list, virPCIDevicePtr dev)
1664
{
1665
    size_t i;
1666

1667 1668 1669 1670 1671 1672
    for (i = 0; i < list->count; i++) {
        virPCIDevicePtr other = list->devs[i];
        if (other->address.domain   == dev->address.domain &&
            other->address.bus      == dev->address.bus    &&
            other->address.slot     == dev->address.slot   &&
            other->address.function == dev->address.function)
1673
            return i;
1674
    }
1675 1676 1677
    return -1;
}

L
Laine Stump 已提交
1678 1679 1680 1681 1682 1683 1684 1685

virPCIDevicePtr
virPCIDeviceListFindByIDs(virPCIDeviceListPtr list,
                          unsigned int domain,
                          unsigned int bus,
                          unsigned int slot,
                          unsigned int function)
{
1686
    size_t i;
L
Laine Stump 已提交
1687 1688

    for (i = 0; i < list->count; i++) {
1689 1690 1691 1692 1693
        virPCIDevicePtr other = list->devs[i];
        if (other->address.domain   == domain &&
            other->address.bus      == bus    &&
            other->address.slot     == slot   &&
            other->address.function == function)
L
Laine Stump 已提交
1694 1695 1696 1697 1698 1699
            return list->devs[i];
    }
    return NULL;
}


1700 1701
virPCIDevicePtr
virPCIDeviceListFind(virPCIDeviceListPtr list, virPCIDevicePtr dev)
1702
{
1703
    int idx;
1704

1705 1706
    if ((idx = virPCIDeviceListFindIndex(list, dev)) >= 0)
        return list->devs[idx];
1707 1708
    else
        return NULL;
1709
}
1710 1711


1712 1713 1714
int virPCIDeviceFileIterate(virPCIDevicePtr dev,
                            virPCIDeviceFileActor actor,
                            void *opaque)
1715
{
1716
    g_autofree char *pcidir = NULL;
1717 1718 1719
    DIR *dir = NULL;
    int ret = -1;
    struct dirent *ent;
E
Eric Blake 已提交
1720
    int direrr;
1721

1722 1723 1724
    pcidir = g_strdup_printf("/sys/bus/pci/devices/" VIR_PCI_DEVICE_ADDRESS_FMT,
                             dev->address.domain, dev->address.bus, dev->address.slot,
                             dev->address.function);
1725

J
Ján Tomko 已提交
1726
    if (virDirOpen(&dir, pcidir) < 0)
1727 1728
        goto cleanup;

E
Eric Blake 已提交
1729
    while ((direrr = virDirRead(dir, &ent, pcidir)) > 0) {
1730
        g_autofree char *file = NULL;
1731
        /* Device assignment requires:
A
Alex Williamson 已提交
1732
         *   $PCIDIR/config, $PCIDIR/resource, $PCIDIR/resourceNNN,
1733
         *   $PCIDIR/rom, $PCIDIR/reset, $PCIDIR/vendor, $PCIDIR/device
1734 1735 1736
         */
        if (STREQ(ent->d_name, "config") ||
            STRPREFIX(ent->d_name, "resource") ||
A
Alex Williamson 已提交
1737
            STREQ(ent->d_name, "rom") ||
1738 1739
            STREQ(ent->d_name, "vendor") ||
            STREQ(ent->d_name, "device") ||
A
Alex Williamson 已提交
1740
            STREQ(ent->d_name, "reset")) {
1741
            file = g_strdup_printf("%s/%s", pcidir, ent->d_name);
1742
            if ((actor)(dev, file, opaque) < 0)
1743 1744 1745
                goto cleanup;
        }
    }
E
Eric Blake 已提交
1746 1747
    if (direrr < 0)
        goto cleanup;
1748 1749 1750

    ret = 0;

1751
 cleanup:
J
Ján Tomko 已提交
1752
    VIR_DIR_CLOSE(dir);
1753 1754
    return ret;
}
J
Jiri Denemark 已提交
1755

L
Laine Stump 已提交
1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766

/* virPCIDeviceAddressIOMMUGroupIterate:
 *   Call @actor for all devices in the same iommu_group as orig
 *   (including orig itself) Even if there is no iommu_group for the
 *   device, call @actor once for orig.
 */
int
virPCIDeviceAddressIOMMUGroupIterate(virPCIDeviceAddressPtr orig,
                                     virPCIDeviceAddressActor actor,
                                     void *opaque)
{
1767
    g_autofree char *groupPath = NULL;
L
Laine Stump 已提交
1768 1769 1770
    DIR *groupDir = NULL;
    int ret = -1;
    struct dirent *ent;
E
Eric Blake 已提交
1771
    int direrr;
L
Laine Stump 已提交
1772

1773 1774
    groupPath = g_strdup_printf(PCI_SYSFS "devices/" VIR_PCI_DEVICE_ADDRESS_FMT "/iommu_group/devices",
                                orig->domain, orig->bus, orig->slot, orig->function);
L
Laine Stump 已提交
1775

J
Ján Tomko 已提交
1776
    if (virDirOpenQuiet(&groupDir, groupPath) < 0) {
L
Laine Stump 已提交
1777 1778 1779 1780 1781
        /* just process the original device, nothing more */
        ret = (actor)(orig, opaque);
        goto cleanup;
    }

E
Eric Blake 已提交
1782
    while ((direrr = virDirRead(groupDir, &ent, groupPath)) > 0) {
L
Laine Stump 已提交
1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794
        virPCIDeviceAddress newDev;

        if (virPCIDeviceAddressParse(ent->d_name, &newDev) < 0) {
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("Found invalid device link '%s' in '%s'"),
                           ent->d_name, groupPath);
            goto cleanup;
        }

        if ((actor)(&newDev, opaque) < 0)
            goto cleanup;
    }
E
Eric Blake 已提交
1795
    if (direrr < 0)
L
Laine Stump 已提交
1796 1797 1798 1799
        goto cleanup;

    ret = 0;

1800
 cleanup:
J
Ján Tomko 已提交
1801
    VIR_DIR_CLOSE(groupDir);
L
Laine Stump 已提交
1802 1803 1804 1805 1806 1807 1808 1809
    return ret;
}


static int
virPCIDeviceGetIOMMUGroupAddOne(virPCIDeviceAddressPtr newDevAddr, void *opaque)
{
    virPCIDeviceListPtr groupList = opaque;
J
Ján Tomko 已提交
1810
    g_autoptr(virPCIDevice) newDev = NULL;
L
Laine Stump 已提交
1811 1812 1813

    if (!(newDev = virPCIDeviceNew(newDevAddr->domain, newDevAddr->bus,
                                   newDevAddr->slot, newDevAddr->function)))
1814
        return -1;
L
Laine Stump 已提交
1815 1816

    if (virPCIDeviceListAdd(groupList, newDev) < 0)
1817
        return -1;
L
Laine Stump 已提交
1818 1819

    newDev = NULL; /* it's now on the list */
1820
    return 0;
L
Laine Stump 已提交
1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837
}


/*
 * virPCIDeviceGetIOMMUGroupList - return a virPCIDeviceList containing
 * all of the devices in the same iommu_group as @dev.
 *
 * Return the new list, or NULL on failure
 */
virPCIDeviceListPtr
virPCIDeviceGetIOMMUGroupList(virPCIDevicePtr dev)
{
    virPCIDeviceListPtr groupList = virPCIDeviceListNew();

    if (!groupList)
        goto error;

1838
    if (virPCIDeviceAddressIOMMUGroupIterate(&(dev->address),
L
Laine Stump 已提交
1839 1840 1841 1842 1843 1844
                                             virPCIDeviceGetIOMMUGroupAddOne,
                                             groupList) < 0)
        goto error;

    return groupList;

1845
 error:
L
Laine Stump 已提交
1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869
    virObjectUnref(groupList);
    return NULL;
}


typedef struct {
    virPCIDeviceAddressPtr **iommuGroupDevices;
    size_t *nIommuGroupDevices;
} virPCIDeviceAddressList;
typedef virPCIDeviceAddressList *virPCIDeviceAddressListPtr;

static int
virPCIGetIOMMUGroupAddressesAddOne(virPCIDeviceAddressPtr newDevAddr, void *opaque)
{
    int ret = -1;
    virPCIDeviceAddressListPtr addrList = opaque;
    virPCIDeviceAddressPtr copyAddr;

    /* make a copy to insert onto the list */
    if (VIR_ALLOC(copyAddr) < 0)
        goto cleanup;

    *copyAddr = *newDevAddr;

1870 1871
    if (VIR_APPEND_ELEMENT(*addrList->iommuGroupDevices,
                           *addrList->nIommuGroupDevices, copyAddr) < 0)
L
Laine Stump 已提交
1872 1873 1874
        goto cleanup;

    ret = 0;
1875
 cleanup:
L
Laine Stump 已提交
1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898
    VIR_FREE(copyAddr);
    return ret;
}


/*
 * virPCIDeviceAddressGetIOMMUGroupAddresses - return a
 * virPCIDeviceList containing all of the devices in the same
 * iommu_group as @dev.
 *
 * Return the new list, or NULL on failure
 */
int
virPCIDeviceAddressGetIOMMUGroupAddresses(virPCIDeviceAddressPtr devAddr,
                                          virPCIDeviceAddressPtr **iommuGroupDevices,
                                          size_t *nIommuGroupDevices)
{
    virPCIDeviceAddressList addrList = { iommuGroupDevices,
                                         nIommuGroupDevices };

    if (virPCIDeviceAddressIOMMUGroupIterate(devAddr,
                                             virPCIGetIOMMUGroupAddressesAddOne,
                                             &addrList) < 0)
1899
        return -1;
L
Laine Stump 已提交
1900

1901
    return 0;
L
Laine Stump 已提交
1902 1903 1904 1905 1906 1907 1908 1909 1910 1911
}


/* virPCIDeviceAddressGetIOMMUGroupNum - return the group number of
 * this PCI device's iommu_group, or -2 if there is no iommu_group for
 * the device (or -1 if there was any other error)
 */
int
virPCIDeviceAddressGetIOMMUGroupNum(virPCIDeviceAddressPtr addr)
{
1912 1913 1914
    g_autofree char *devName = NULL;
    g_autofree char *devPath = NULL;
    g_autofree char *groupPath = NULL;
L
Laine Stump 已提交
1915 1916 1917
    const char *groupNumStr;
    unsigned int groupNum;

1918 1919
    devName = g_strdup_printf(VIR_PCI_DEVICE_ADDRESS_FMT, addr->domain, addr->bus,
                              addr->slot, addr->function);
L
Laine Stump 已提交
1920

1921
    if (!(devPath = virPCIFile(devName, "iommu_group")))
1922 1923 1924
        return -1;
    if (virFileIsLink(devPath) != 1)
        return -2;
L
Laine Stump 已提交
1925 1926 1927 1928
    if (virFileResolveLink(devPath, &groupPath) < 0) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Unable to resolve device %s iommu_group symlink %s"),
                       devName, devPath);
1929
        return -1;
L
Laine Stump 已提交
1930 1931 1932 1933 1934 1935 1936 1937
    }

    groupNumStr = last_component(groupPath);
    if (virStrToLong_ui(groupNumStr, NULL, 10, &groupNum) < 0) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("device %s iommu_group symlink %s has "
                         "invalid group number %s"),
                       devName, groupPath, groupNumStr);
1938
        return -1;
L
Laine Stump 已提交
1939 1940
    }

1941
    return groupNum;
L
Laine Stump 已提交
1942 1943 1944
}


1945 1946
/* virPCIDeviceGetIOMMUGroupDev - return the name of the device used
 * to control this PCI device's group (e.g. "/dev/vfio/15")
1947 1948
 */
char *
1949
virPCIDeviceGetIOMMUGroupDev(virPCIDevicePtr dev)
1950
{
1951 1952
    g_autofree char *devPath = NULL;
    g_autofree char *groupPath = NULL;
1953 1954
    char *groupDev = NULL;

1955
    if (!(devPath = virPCIFile(dev->name, "iommu_group")))
1956
        return NULL;
1957 1958 1959 1960
    if (virFileIsLink(devPath) != 1) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Invalid device %s iommu_group file %s is not a symlink"),
                       dev->name, devPath);
1961
        return NULL;
1962 1963 1964 1965 1966
    }
    if (virFileResolveLink(devPath, &groupPath) < 0) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Unable to resolve device %s iommu_group symlink %s"),
                       dev->name, devPath);
1967
        return NULL;
1968
    }
1969
    groupDev = g_strdup_printf("/dev/vfio/%s", last_component(groupPath));
1970

1971 1972 1973
    return groupDev;
}

J
Jiri Denemark 已提交
1974
static int
1975
virPCIDeviceDownstreamLacksACS(virPCIDevicePtr dev)
J
Jiri Denemark 已提交
1976 1977 1978 1979
{
    uint16_t flags;
    uint16_t ctrl;
    unsigned int pos;
1980 1981
    int fd;
    int ret = 0;
1982
    uint16_t device_class;
J
Jiri Denemark 已提交
1983

1984
    if ((fd = virPCIDeviceConfigOpen(dev)) < 0)
J
Jiri Denemark 已提交
1985 1986
        return -1;

1987
    if (virPCIDeviceInit(dev, fd) < 0) {
1988 1989 1990 1991
        ret = -1;
        goto cleanup;
    }

1992 1993 1994
    if (virPCIDeviceReadClass(dev, &device_class) < 0)
        goto cleanup;

J
Jiri Denemark 已提交
1995
    pos = dev->pcie_cap_pos;
1996
    if (!pos || device_class != PCI_CLASS_BRIDGE_PCI)
1997
        goto cleanup;
J
Jiri Denemark 已提交
1998

1999
    flags = virPCIDeviceRead16(dev, fd, pos + PCI_EXP_FLAGS);
J
Jiri Denemark 已提交
2000
    if (((flags & PCI_EXP_FLAGS_TYPE) >> 4) != PCI_EXP_TYPE_DOWNSTREAM)
2001
        goto cleanup;
J
Jiri Denemark 已提交
2002

2003
    pos = virPCIDeviceFindExtendedCapabilityOffset(dev, fd, PCI_EXT_CAP_ID_ACS);
J
Jiri Denemark 已提交
2004 2005
    if (!pos) {
        VIR_DEBUG("%s %s: downstream port lacks ACS", dev->id, dev->name);
2006 2007
        ret = 1;
        goto cleanup;
J
Jiri Denemark 已提交
2008 2009
    }

2010
    ctrl = virPCIDeviceRead16(dev, fd, pos + PCI_EXT_ACS_CTRL);
J
Jiri Denemark 已提交
2011 2012 2013
    if ((ctrl & PCI_EXT_CAP_ACS_ENABLED) != PCI_EXT_CAP_ACS_ENABLED) {
        VIR_DEBUG("%s %s: downstream port has ACS disabled",
                  dev->id, dev->name);
2014 2015
        ret = 1;
        goto cleanup;
J
Jiri Denemark 已提交
2016 2017
    }

2018
 cleanup:
2019
    virPCIDeviceConfigClose(dev, fd);
2020
    return ret;
J
Jiri Denemark 已提交
2021 2022 2023
}

static int
2024
virPCIDeviceIsBehindSwitchLackingACS(virPCIDevicePtr dev)
J
Jiri Denemark 已提交
2025
{
J
Ján Tomko 已提交
2026
    g_autoptr(virPCIDevice) parent = NULL;
J
Jiri Denemark 已提交
2027

2028
    if (virPCIDeviceGetParent(dev, &parent) < 0)
2029
        return -1;
2030 2031 2032 2033 2034
    if (!parent) {
        /* if we have no parent, and this is the root bus, ACS doesn't come
         * into play since devices on the root bus can't P2P without going
         * through the root IOMMU.
         */
2035
        if (dev->address.bus == 0) {
2036
            return 0;
2037
        } else {
2038
            virReportError(VIR_ERR_INTERNAL_ERROR,
2039 2040 2041 2042
                           _("Failed to find parent device for %s"),
                           dev->name);
            return -1;
        }
J
Jiri Denemark 已提交
2043 2044 2045 2046 2047 2048 2049
    }

    /* XXX we should rather fail when we can't find device's parent and
     * stop the loop when we get to root instead of just stopping when no
     * parent can be found
     */
    do {
J
Ján Tomko 已提交
2050
        g_autoptr(virPCIDevice) tmp = NULL;
J
Jiri Denemark 已提交
2051
        int acs;
2052
        int ret;
J
Jiri Denemark 已提交
2053

2054
        acs = virPCIDeviceDownstreamLacksACS(parent);
J
Jiri Denemark 已提交
2055 2056 2057 2058 2059 2060 2061 2062 2063

        if (acs) {
            if (acs < 0)
                return -1;
            else
                return 1;
        }

        tmp = parent;
2064
        ret = virPCIDeviceGetParent(parent, &parent);
2065 2066
        if (ret < 0)
            return -1;
J
Jiri Denemark 已提交
2067 2068 2069 2070 2071
    } while (parent);

    return 0;
}

2072 2073
int virPCIDeviceIsAssignable(virPCIDevicePtr dev,
                             int strict_acs_check)
J
Jiri Denemark 已提交
2074 2075 2076 2077 2078 2079 2080 2081
{
    int ret;

    /* XXX This could be a great place to actually check that a non-managed
     * device isn't in use, e.g. by checking that device is either un-bound
     * or bound to a stub driver.
     */

2082
    ret = virPCIDeviceIsBehindSwitchLackingACS(dev);
J
Jiri Denemark 已提交
2083 2084 2085 2086 2087 2088 2089 2090
    if (ret < 0)
        return 0;

    if (ret) {
        if (!strict_acs_check) {
            VIR_DEBUG("%s %s: strict ACS check disabled; device assignment allowed",
                      dev->id, dev->name);
        } else {
2091
            virReportError(VIR_ERR_INTERNAL_ERROR,
J
Jiri Denemark 已提交
2092 2093 2094 2095 2096 2097 2098 2099 2100
                           _("Device %s is behind a switch lacking ACS and "
                             "cannot be assigned"),
                           dev->name);
            return 0;
        }
    }

    return 1;
}
2101 2102 2103 2104 2105 2106 2107 2108 2109 2110

static int
logStrToLong_ui(char const *s,
                char **end_ptr,
                int base,
                unsigned int *result)
{
    int ret = 0;

    ret = virStrToLong_ui(s, end_ptr, base, result);
2111
    if (ret != 0)
2112 2113 2114 2115
        VIR_ERROR(_("Failed to convert '%s' to unsigned int"), s);
    return ret;
}

2116 2117
int
virPCIDeviceAddressParse(char *address,
2118
                         virPCIDeviceAddressPtr bdf)
2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144
{
    char *p = NULL;
    int ret = -1;

    if ((address == NULL) || (logStrToLong_ui(address, &p, 16,
                                              &bdf->domain) == -1)) {
        goto out;
    }

    if ((p == NULL) || (logStrToLong_ui(p+1, &p, 16,
                                        &bdf->bus) == -1)) {
        goto out;
    }

    if ((p == NULL) || (logStrToLong_ui(p+1, &p, 16,
                                        &bdf->slot) == -1)) {
        goto out;
    }

    if ((p == NULL) || (logStrToLong_ui(p+1, &p, 16,
                                        &bdf->function) == -1)) {
        goto out;
    }

    ret = 0;

2145
 out:
2146 2147 2148
    return ret;
}

2149

2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174
bool
virZPCIDeviceAddressIsValid(virZPCIDeviceAddressPtr zpci)
{
    /* We don't need to check fid because fid covers
     * all range of uint32 type.
     */
    if (zpci->uid > VIR_DOMAIN_DEVICE_ZPCI_MAX_UID ||
        zpci->uid == 0) {
        virReportError(VIR_ERR_XML_ERROR,
                       _("Invalid PCI address uid='0x%.4x', "
                         "must be > 0x0000 and <= 0x%.4x"),
                       zpci->uid,
                       VIR_DOMAIN_DEVICE_ZPCI_MAX_UID);
        return false;
    }

    return true;
}

bool
virZPCIDeviceAddressIsEmpty(const virZPCIDeviceAddress *addr)
{
    return !(addr->uid || addr->fid);
}

2175
#ifdef __linux__
2176

2177
virPCIDeviceAddressPtr
2178
virPCIGetDeviceAddressFromSysfsLink(const char *device_link)
2179
{
2180
    virPCIDeviceAddressPtr bdf = NULL;
2181
    char *config_address = NULL;
2182
    g_autofree char *device_path = NULL;
2183 2184

    if (!virFileExists(device_link)) {
2185
        VIR_DEBUG("'%s' does not exist", device_link);
2186
        return NULL;
2187 2188
    }

2189
    device_path = virFileCanonicalizePath(device_link);
2190
    if (device_path == NULL) {
2191 2192 2193
        virReportSystemError(errno,
                             _("Failed to resolve device link '%s'"),
                             device_link);
2194
        return NULL;
2195 2196
    }

2197
    config_address = last_component(device_path);
2198
    if (VIR_ALLOC(bdf) < 0)
2199
        return NULL;
2200

2201
    if (virPCIDeviceAddressParse(config_address, bdf) < 0) {
2202
        virReportError(VIR_ERR_INTERNAL_ERROR,
2203 2204
                       _("Failed to parse PCI config address '%s'"),
                       config_address);
2205
        VIR_FREE(bdf);
2206
        return NULL;
2207 2208
    }

2209
    return bdf;
2210 2211
}

2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224
/**
 * virPCIGetPhysicalFunction:
 * @vf_sysfs_path: sysfs path for the virtual function
 * @pf: where to store the physical function's address
 *
 * Given @vf_sysfs_path, this function will store the pointer
 * to a newly-allocated virPCIDeviceAddress in @pf.
 *
 * @pf might be NULL if @vf_sysfs_path does not point to a
 * virtual function. If it's not NULL, then it should be
 * freed by the caller when no longer needed.
 *
 * Returns: >=0 on success, <0 on failure
2225 2226
 */
int
2227
virPCIGetPhysicalFunction(const char *vf_sysfs_path,
2228
                          virPCIDeviceAddressPtr *pf)
2229
{
2230
    g_autofree char *device_link = NULL;
2231

2232 2233
    *pf = NULL;

2234 2235
    if (virBuildPath(&device_link, vf_sysfs_path, "physfn") == -1) {
        virReportOOMError();
2236
        return -1;
2237 2238
    }

2239
    if ((*pf = virPCIGetDeviceAddressFromSysfsLink(device_link))) {
2240 2241
        VIR_DEBUG("PF for VF device '%s': " VIR_PCI_DEVICE_ADDRESS_FMT,
                  vf_sysfs_path,
2242 2243
                  (*pf)->domain, (*pf)->bus, (*pf)->slot, (*pf)->function);
    }
2244

2245
    return 0;
2246 2247
}

2248

2249 2250 2251 2252
/*
 * Returns virtual functions of a physical function
 */
int
2253 2254
virPCIGetVirtualFunctions(const char *sysfs_path,
                          virPCIDeviceAddressPtr **virtual_functions,
2255 2256
                          size_t *num_virtual_functions,
                          unsigned int *max_virtual_functions)
2257 2258
{
    int ret = -1;
2259
    size_t i;
2260 2261
    g_autofree char *totalvfs_file = NULL;
    g_autofree char *totalvfs_str = NULL;
2262
    virPCIDeviceAddressPtr config_addr = NULL;
2263

2264 2265
    *virtual_functions = NULL;
    *num_virtual_functions = 0;
2266 2267
    *max_virtual_functions = 0;

2268
    totalvfs_file = g_strdup_printf("%s/sriov_totalvfs", sysfs_path);
2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280
    if (virFileExists(totalvfs_file)) {
        char *end = NULL; /* so that terminating \n doesn't create error */

        if (virFileReadAll(totalvfs_file, 16, &totalvfs_str) < 0)
            goto error;
        if (virStrToLong_ui(totalvfs_str, &end, 10, max_virtual_functions) < 0) {
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("Unrecognized value in %s: %s"),
                           totalvfs_file, totalvfs_str);
            goto error;
        }
    }
2281

2282
    do {
2283
        g_autofree char *device_link = NULL;
2284
        /* look for virtfn%d links until one isn't found */
2285 2286
        device_link = g_strdup_printf("%s/virtfn%zu", sysfs_path,
                                      *num_virtual_functions);
2287

2288 2289
        if (!virFileExists(device_link))
            break;
2290

2291
        if (!(config_addr = virPCIGetDeviceAddressFromSysfsLink(device_link))) {
2292 2293 2294 2295 2296
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("Failed to get SRIOV function from device link '%s'"),
                           device_link);
            goto error;
        }
2297

2298 2299
        if (VIR_APPEND_ELEMENT(*virtual_functions, *num_virtual_functions,
                               config_addr) < 0)
2300 2301
            goto error;
    } while (1);
2302

2303 2304
    VIR_DEBUG("Found %zu virtual functions for %s",
              *num_virtual_functions, sysfs_path);
2305
    ret = 0;
2306
 cleanup:
2307
    VIR_FREE(config_addr);
2308
    return ret;
2309

2310
 error:
2311 2312 2313
    for (i = 0; i < *num_virtual_functions; i++)
        VIR_FREE((*virtual_functions)[i]);
    VIR_FREE(*virtual_functions);
2314
    *num_virtual_functions = 0;
2315
    goto cleanup;
2316
}
2317

2318

2319 2320 2321 2322
/*
 * Returns 1 if vf device is a virtual function, 0 if not, -1 on error
 */
int
2323
virPCIIsVirtualFunction(const char *vf_sysfs_device_link)
2324
{
2325
    g_autofree char *vf_sysfs_physfn_link = NULL;
2326

2327
    vf_sysfs_physfn_link = g_strdup_printf("%s/physfn", vf_sysfs_device_link);
2328

2329
    return virFileExists(vf_sysfs_physfn_link);
2330 2331 2332 2333 2334 2335
}

/*
 * Returns the sriov virtual function index of vf given its pf
 */
int
2336 2337 2338
virPCIGetVirtualFunctionIndex(const char *pf_sysfs_device_link,
                              const char *vf_sysfs_device_link,
                              int *vf_index)
2339
{
2340 2341
    int ret = -1;
    size_t i;
2342
    size_t num_virt_fns = 0;
2343
    unsigned int max_virt_fns = 0;
2344 2345
    virPCIDeviceAddressPtr vf_bdf = NULL;
    virPCIDeviceAddressPtr *virt_fns = NULL;
2346

2347
    if (!(vf_bdf = virPCIGetDeviceAddressFromSysfsLink(vf_sysfs_device_link)))
2348 2349
        return ret;

2350
    if (virPCIGetVirtualFunctions(pf_sysfs_device_link, &virt_fns,
2351
                                  &num_virt_fns, &max_virt_fns) < 0) {
2352
        virReportError(VIR_ERR_INTERNAL_ERROR,
2353
                       _("Error getting physical function's '%s' "
2354
                         "virtual_functions"), pf_sysfs_device_link);
2355 2356 2357 2358
        goto out;
    }

    for (i = 0; i < num_virt_fns; i++) {
2359
        if (virPCIDeviceAddressEqual(vf_bdf, virt_fns[i])) {
2360 2361 2362 2363
            *vf_index = i;
            ret = 0;
            break;
        }
2364 2365
    }

2366
 out:
2367 2368 2369

    /* free virtual functions */
    for (i = 0; i < num_virt_fns; i++)
2370
        VIR_FREE(virt_fns[i]);
2371

A
ajia@redhat.com 已提交
2372
    VIR_FREE(virt_fns);
2373 2374 2375 2376 2377
    VIR_FREE(vf_bdf);

    return ret;
}

2378 2379 2380 2381 2382
/*
 * Returns a path to the PCI sysfs file given the BDF of the PCI function
 */

int
2383
virPCIGetSysfsFile(char *virPCIDeviceName, char **pci_sysfs_device_link)
2384
{
2385 2386
    *pci_sysfs_device_link = g_strdup_printf(PCI_SYSFS "devices/%s",
                                             virPCIDeviceName);
2387
    return 0;
2388 2389
}

R
Roopa Prabhu 已提交
2390
int
2391
virPCIDeviceAddressGetSysfsFile(virPCIDeviceAddressPtr addr,
2392
                                char **pci_sysfs_device_link)
R
Roopa Prabhu 已提交
2393
{
2394 2395
    *pci_sysfs_device_link = g_strdup_printf(PCI_SYSFS "devices/" VIR_PCI_DEVICE_ADDRESS_FMT, addr->domain,
                                             addr->bus, addr->slot, addr->function);
2396
    return 0;
R
Roopa Prabhu 已提交
2397 2398
}

2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409
/**
 * virPCIGetNetName:
 * @device_link_sysfs_path: sysfs path to the PCI device
 * @idx: used to choose which netdev when there are several
 *       (ignored if physPortID is set)
 * @physPortID: match this string in the netdev's phys_port_id
 *       (or NULL to ignore and use idx instead)
 * @netname: used to return the name of the netdev
 *       (set to NULL (but returns success) if there is no netdev)
 *
 * Returns 0 on success, -1 on error (error has been logged)
2410 2411
 */
int
2412 2413 2414 2415
virPCIGetNetName(const char *device_link_sysfs_path,
                 size_t idx,
                 char *physPortID,
                 char **netname)
2416
{
2417 2418 2419
    g_autofree char *pcidev_sysfs_net_path = NULL;
    g_autofree char *firstEntryName = NULL;
    g_autofree char *thisPhysPortID = NULL;
2420 2421 2422
    int ret = -1;
    DIR *dir = NULL;
    struct dirent *entry = NULL;
2423
    size_t i = 0;
2424

2425 2426
    *netname = NULL;

2427 2428 2429 2430 2431 2432
    if (virBuildPath(&pcidev_sysfs_net_path, device_link_sysfs_path,
                     "net") == -1) {
        virReportOOMError();
        return -1;
    }

2433 2434 2435
    if (virDirOpenQuiet(&dir, pcidev_sysfs_net_path) < 0) {
        /* this *isn't* an error - caller needs to check for netname == NULL */
        ret = 0;
2436
        goto cleanup;
2437
    }
2438

E
Eric Blake 已提交
2439
    while (virDirRead(dir, &entry, pcidev_sysfs_net_path) > 0) {
2440 2441 2442 2443 2444 2445 2446 2447 2448 2449
        /* if the caller sent a physPortID, compare it to the
         * physportID of this netdev. If not, look for entry[idx].
         */
        if (physPortID) {
            if (virNetDevGetPhysPortID(entry->d_name, &thisPhysPortID) < 0)
                goto cleanup;

            /* if this one doesn't match, keep looking */
            if (STRNEQ_NULLABLE(physPortID, thisPhysPortID)) {
                VIR_FREE(thisPhysPortID);
2450 2451 2452 2453 2454
                /* save the first entry we find to use as a failsafe
                 * in case we don't match the phys_port_id. This is
                 * needed because some NIC drivers (e.g. i40e)
                 * implement phys_port_id for PFs, but not for VFs
                 */
2455 2456
                if (!firstEntryName)
                    firstEntryName = g_strdup(entry->d_name);
2457

2458 2459 2460 2461 2462 2463 2464
                continue;
            }
        } else {
            if (i++ < idx)
                continue;
        }

2465
        *netname = g_strdup(entry->d_name);
2466 2467

        ret = 0;
2468 2469 2470
        break;
    }

2471 2472
    if (ret < 0) {
        if (physPortID) {
2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487
            if (firstEntryName) {
                /* we didn't match the provided phys_port_id, but this
                 * is probably because phys_port_id isn't implemented
                 * for this NIC driver, so just return the first
                 * (probably only) netname we found.
                 */
                *netname = firstEntryName;
                firstEntryName = NULL;
                ret = 0;
            } else {
                virReportError(VIR_ERR_INTERNAL_ERROR,
                               _("Could not find network device with "
                                 "phys_port_id '%s' under PCI device at %s"),
                               physPortID, device_link_sysfs_path);
            }
2488 2489 2490 2491 2492
        } else {
            ret = 0; /* no netdev at the given index is *not* an error */
        }
    }
 cleanup:
J
Ján Tomko 已提交
2493
    VIR_DIR_CLOSE(dir);
2494
    return ret;
2495
}
R
Roopa Prabhu 已提交
2496 2497

int
2498
virPCIGetVirtualFunctionInfo(const char *vf_sysfs_device_path,
2499 2500 2501
                             int pfNetDevIdx,
                             char **pfname,
                             int *vf_index)
R
Roopa Prabhu 已提交
2502
{
2503
    virPCIDeviceAddressPtr pf_config_address = NULL;
2504 2505 2506
    g_autofree char *pf_sysfs_device_path = NULL;
    g_autofree char *vfname = NULL;
    g_autofree char *vfPhysPortID = NULL;
R
Roopa Prabhu 已提交
2507 2508
    int ret = -1;

2509
    if (virPCIGetPhysicalFunction(vf_sysfs_device_path, &pf_config_address) < 0)
2510
        goto cleanup;
R
Roopa Prabhu 已提交
2511

2512
    if (!pf_config_address)
2513
        goto cleanup;
2514

2515 2516
    if (virPCIDeviceAddressGetSysfsFile(pf_config_address,
                                        &pf_sysfs_device_path) < 0) {
2517 2518
        goto cleanup;
    }
R
Roopa Prabhu 已提交
2519

2520 2521 2522
    if (virPCIGetVirtualFunctionIndex(pf_sysfs_device_path,
                                      vf_sysfs_device_path, vf_index) < 0) {
        goto cleanup;
R
Roopa Prabhu 已提交
2523 2524
    }

2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544
    /* If the caller hasn't asked for a specific pfNetDevIdx, and VF
     * is bound to a netdev, learn that netdev's phys_port_id (if
     * available). This can be used to disambiguate when the PF has
     * multiple netdevs. If the VF isn't bound to a netdev, then we
     * return netdev[pfNetDevIdx] on the PF, which may or may not be
     * correct.
     */
    if (pfNetDevIdx == -1) {
        if (virPCIGetNetName(vf_sysfs_device_path, 0, NULL, &vfname) < 0)
            goto cleanup;

        if (vfname) {
            if (virNetDevGetPhysPortID(vfname, &vfPhysPortID) < 0)
                goto cleanup;
        }
        pfNetDevIdx = 0;
    }

    if (virPCIGetNetName(pf_sysfs_device_path,
                         pfNetDevIdx, vfPhysPortID, pfname) < 0) {
R
Roopa Prabhu 已提交
2545
        goto cleanup;
2546
    }
R
Roopa Prabhu 已提交
2547

2548 2549 2550 2551 2552 2553 2554 2555 2556
    if (!*pfname) {
        /* this shouldn't be possible. A VF can't exist unless its
         * PF device is bound to a network driver
         */
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("The PF device for VF %s has no network device name"),
                       vf_sysfs_device_path);
        goto cleanup;
    }
R
Roopa Prabhu 已提交
2557

2558
    ret = 0;
2559
 cleanup:
R
Roopa Prabhu 已提交
2560 2561 2562 2563 2564
    VIR_FREE(pf_config_address);

    return ret;
}

2565 2566 2567 2568 2569 2570 2571 2572 2573

ssize_t
virPCIGetMdevTypes(const char *sysfspath,
                   virMediatedDeviceTypePtr **types)
{
    ssize_t ret = -1;
    int dirret = -1;
    DIR *dir = NULL;
    struct dirent *entry;
2574
    g_autofree char *types_path = NULL;
J
Ján Tomko 已提交
2575
    g_autoptr(virMediatedDeviceType) mdev_type = NULL;
2576 2577 2578 2579
    virMediatedDeviceTypePtr *mdev_types = NULL;
    size_t ntypes = 0;
    size_t i;

2580
    types_path = g_strdup_printf("%s/mdev_supported_types", sysfspath);
2581 2582 2583 2584 2585 2586 2587 2588 2589 2590

    if ((dirret = virDirOpenIfExists(&dir, types_path)) < 0)
        goto cleanup;

    if (dirret == 0) {
        ret = 0;
        goto cleanup;
    }

    while ((dirret = virDirRead(dir, &entry, types_path)) > 0) {
2591
        g_autofree char *tmppath = NULL;
2592
        /* append the type id to the path and read the attributes from there */
2593
        tmppath = g_strdup_printf("%s/%s", types_path, entry->d_name);
2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604

        if (virMediatedDeviceTypeReadAttrs(tmppath, &mdev_type) < 0)
            goto cleanup;

        if (VIR_APPEND_ELEMENT(mdev_types, ntypes, mdev_type) < 0)
            goto cleanup;
    }

    if (dirret < 0)
        goto cleanup;

2605
    *types = g_steal_pointer(&mdev_types);
2606 2607 2608 2609 2610 2611 2612 2613 2614 2615
    ret = ntypes;
    ntypes = 0;
 cleanup:
    for (i = 0; i < ntypes; i++)
        virMediatedDeviceTypeFree(mdev_types[i]);
    VIR_FREE(mdev_types);
    VIR_DIR_CLOSE(dir);
    return ret;
}

2616
#else
2617 2618
static const char *unsupported = N_("not supported on non-linux platforms");

2619
virPCIDeviceAddressPtr
J
Ján Tomko 已提交
2620
virPCIGetDeviceAddressFromSysfsLink(const char *device_link G_GNUC_UNUSED)
2621 2622
{
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
2623
    return NULL;
2624 2625 2626
}


2627
int
J
Ján Tomko 已提交
2628 2629
virPCIGetPhysicalFunction(const char *vf_sysfs_path G_GNUC_UNUSED,
                          virPCIDeviceAddressPtr *pf G_GNUC_UNUSED)
2630
{
2631
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
2632 2633 2634 2635
    return -1;
}

int
J
Ján Tomko 已提交
2636 2637 2638 2639
virPCIGetVirtualFunctions(const char *sysfs_path G_GNUC_UNUSED,
                          virPCIDeviceAddressPtr **virtual_functions G_GNUC_UNUSED,
                          size_t *num_virtual_functions G_GNUC_UNUSED,
                          unsigned int *max_virtual_functions G_GNUC_UNUSED)
2640
{
2641
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
2642 2643
    return -1;
}
2644 2645

int
J
Ján Tomko 已提交
2646
virPCIIsVirtualFunction(const char *vf_sysfs_device_link G_GNUC_UNUSED)
2647
{
2648
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
2649 2650 2651 2652
    return -1;
}

int
J
Ján Tomko 已提交
2653 2654 2655
virPCIGetVirtualFunctionIndex(const char *pf_sysfs_device_link G_GNUC_UNUSED,
                              const char *vf_sysfs_device_link G_GNUC_UNUSED,
                              int *vf_index G_GNUC_UNUSED)
2656
{
2657
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
2658 2659 2660 2661
    return -1;

}

2662
int
J
Ján Tomko 已提交
2663 2664
virPCIGetSysfsFile(char *virPCIDeviceName G_GNUC_UNUSED,
                   char **pci_sysfs_device_link G_GNUC_UNUSED)
2665 2666 2667 2668 2669
{
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
    return -1;
}

2670
int
J
Ján Tomko 已提交
2671 2672
virPCIDeviceAddressGetSysfsFile(virPCIDeviceAddressPtr dev G_GNUC_UNUSED,
                                char **pci_sysfs_device_link G_GNUC_UNUSED)
2673
{
2674
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
2675 2676 2677
    return -1;
}

2678
int
J
Ján Tomko 已提交
2679 2680 2681 2682
virPCIGetNetName(const char *device_link_sysfs_path G_GNUC_UNUSED,
                 size_t idx G_GNUC_UNUSED,
                 char *physPortID G_GNUC_UNUSED,
                 char **netname G_GNUC_UNUSED)
2683
{
2684
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
2685 2686
    return -1;
}
R
Roopa Prabhu 已提交
2687 2688

int
J
Ján Tomko 已提交
2689 2690 2691 2692
virPCIGetVirtualFunctionInfo(const char *vf_sysfs_device_path G_GNUC_UNUSED,
                             int pfNetDevIdx G_GNUC_UNUSED,
                             char **pfname G_GNUC_UNUSED,
                             int *vf_index G_GNUC_UNUSED)
R
Roopa Prabhu 已提交
2693
{
2694
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
R
Roopa Prabhu 已提交
2695 2696
    return -1;
}
2697 2698 2699


ssize_t
J
Ján Tomko 已提交
2700 2701
virPCIGetMdevTypes(const char *sysfspath G_GNUC_UNUSED,
                   virMediatedDeviceTypePtr **types G_GNUC_UNUSED)
2702 2703 2704 2705
{
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
    return -1;
}
2706
#endif /* __linux__ */
2707 2708 2709 2710 2711 2712 2713

int
virPCIDeviceIsPCIExpress(virPCIDevicePtr dev)
{
    int fd;
    int ret = -1;

2714
    if ((fd = virPCIDeviceConfigOpen(dev)) < 0)
2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733
        return ret;

    if (virPCIDeviceInit(dev, fd) < 0)
        goto cleanup;

    ret = dev->pcie_cap_pos != 0;

 cleanup:
    virPCIDeviceConfigClose(dev, fd);
    return ret;
}

int
virPCIDeviceHasPCIExpressLink(virPCIDevicePtr dev)
{
    int fd;
    int ret = -1;
    uint16_t cap, type;

2734
    if ((fd = virPCIDeviceConfigOpen(dev)) < 0)
2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761
        return ret;

    if (virPCIDeviceInit(dev, fd) < 0)
        goto cleanup;

    cap = virPCIDeviceRead16(dev, fd, dev->pcie_cap_pos + PCI_CAP_FLAGS);
    type = (cap & PCI_EXP_FLAGS_TYPE) >> 4;

    ret = type != PCI_EXP_TYPE_ROOT_INT_EP && type != PCI_EXP_TYPE_ROOT_EC;

 cleanup:
    virPCIDeviceConfigClose(dev, fd);
    return ret;
}

int
virPCIDeviceGetLinkCapSta(virPCIDevicePtr dev,
                          int *cap_port,
                          unsigned int *cap_speed,
                          unsigned int *cap_width,
                          unsigned int *sta_speed,
                          unsigned int *sta_width)
{
    uint32_t t;
    int fd;
    int ret = -1;

2762
    if ((fd = virPCIDeviceConfigOpen(dev)) < 0)
2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790
        return ret;

    if (virPCIDeviceInit(dev, fd) < 0)
        goto cleanup;

    if (!dev->pcie_cap_pos) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("pci device %s is not a PCI-Express device"),
                       dev->name);
        goto cleanup;
    }

    t = virPCIDeviceRead32(dev, fd, dev->pcie_cap_pos + PCI_EXP_LNKCAP);

    *cap_port = t >> 24;
    *cap_speed = t & PCI_EXP_LNKCAP_SPEED;
    *cap_width = (t & PCI_EXP_LNKCAP_WIDTH) >> 4;

    t = virPCIDeviceRead16(dev, fd, dev->pcie_cap_pos + PCI_EXP_LNKSTA);

    *sta_speed = t & PCI_EXP_LNKSTA_SPEED;
    *sta_width = (t & PCI_EXP_LNKSTA_WIDTH) >> 4;
    ret = 0;

 cleanup:
    virPCIDeviceConfigClose(dev, fd);
    return ret;
}
2791 2792


2793 2794 2795 2796 2797 2798 2799
int virPCIGetHeaderType(virPCIDevicePtr dev, int *hdrType)
{
    int fd;
    uint8_t type;

    *hdrType = -1;

2800
    if ((fd = virPCIDeviceConfigOpen(dev)) < 0)
2801 2802 2803 2804 2805 2806 2807 2808 2809
        return -1;

    type = virPCIDeviceRead8(dev, fd, PCI_HEADER_TYPE);

    virPCIDeviceConfigClose(dev, fd);

    type &= PCI_HEADER_TYPE_MASK;
    if (type >= VIR_PCI_HEADER_LAST) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
2810 2811
                       _("Unknown PCI header type '%d' for device '%s'"),
                       type, dev->name);
2812 2813 2814 2815 2816 2817 2818 2819 2820
        return -1;
    }

    *hdrType = type;

    return 0;
}


2821 2822 2823 2824 2825 2826 2827 2828 2829 2830
void
virPCIEDeviceInfoFree(virPCIEDeviceInfoPtr dev)
{
    if (!dev)
        return;

    VIR_FREE(dev->link_cap);
    VIR_FREE(dev->link_sta);
    VIR_FREE(dev);
}
2831 2832 2833 2834 2835 2836

void
virPCIDeviceAddressFree(virPCIDeviceAddressPtr address)
{
    VIR_FREE(address);
}