virpci.c 78.8 KB
Newer Older
1
/*
2 3
 * virpci.c: helper APIs for managing host PCI devices
 *
4
 * Copyright (C) 2009-2015 Red Hat, Inc.
5 6 7 8 9 10 11 12 13 14 15 16
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library.  If not, see
O
Osier Yang 已提交
18
 * <http://www.gnu.org/licenses/>.
19 20 21 22
 */

#include <config.h>

23
#include "virpci.h"
24
#include "virnetdev.h"
25 26 27 28 29 30 31 32

#include <dirent.h>
#include <fcntl.h>
#include <inttypes.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>

33
#include "dirname.h"
34
#include "virlog.h"
35
#include "vircommand.h"
36
#include "virerror.h"
E
Eric Blake 已提交
37
#include "virfile.h"
38
#include "virkmod.h"
39 40
#include "virstring.h"
#include "virutil.h"
41
#include "viralloc.h"
42

43 44
VIR_LOG_INIT("util.pci");

45 46 47
#define PCI_SYSFS "/sys/bus/pci/"
#define PCI_ID_LEN 10   /* "XXXX XXXX" */

48 49
VIR_ENUM_IMPL(virPCIELinkSpeed,
              VIR_PCIE_LINK_SPEED_LAST,
50 51
              "", "2.5", "5", "8", "16",
);
52

53 54
VIR_ENUM_IMPL(virPCIStubDriver,
              VIR_PCI_STUB_DRIVER_LAST,
55 56 57
              "none",
              "pciback", /* XEN */
              "vfio-pci", /* VFIO */
58
);
59

60 61
VIR_ENUM_IMPL(virPCIHeader,
              VIR_PCI_HEADER_LAST,
62 63 64
              "endpoint",
              "pci-bridge",
              "cardbus-bridge",
65
);
66

67
struct _virPCIDevice {
68
    virPCIDeviceAddress address;
69

70
    char          *name;              /* domain:bus:slot.function */
71
    char          id[PCI_ID_LEN];     /* product vendor */
E
Eric Blake 已提交
72
    char          *path;
C
Chunyan Liu 已提交
73 74 75 76

    /* The driver:domain which uses the device */
    char          *used_by_drvname;
    char          *used_by_domname;
77

78 79
    unsigned int  pcie_cap_pos;
    unsigned int  pci_pm_cap_pos;
80 81
    bool          has_flr;
    bool          has_pm_reset;
82
    bool          managed;
83 84

    virPCIStubDriver stubDriver;
85 86

    /* used by reattach function */
87 88 89
    bool          unbind_from_stub;
    bool          remove_slot;
    bool          reprobe;
90 91
};

92
struct _virPCIDeviceList {
93 94
    virObjectLockable parent;

95
    size_t count;
96
    virPCIDevicePtr *devs;
97 98 99
};


100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116
/* For virReportOOMError()  and virReportSystemError() */
#define VIR_FROM_THIS VIR_FROM_NONE

/* Specifications referenced in comments:
 *  PCI30  - PCI Local Bus Specification 3.0
 *  PCIe20 - PCI Express Base Specification 2.0
 *  BR12   - PCI-to-PCI Bridge Architecture Specification 1.2
 *  PM12   - PCI Bus Power Management Interface Specification 1.2
 *  ECN_AF - Advanced Capabilities for Conventional PCI ECN
 */

/* Type 0 config space header length; PCI30 Section 6.1 Configuration Space Organization */
#define PCI_CONF_LEN            0x100
#define PCI_CONF_HEADER_LEN     0x40

/* PCI30 6.2.1 */
#define PCI_HEADER_TYPE         0x0e    /* Header type */
117 118 119
#define PCI_HEADER_TYPE_BRIDGE 0x1
#define PCI_HEADER_TYPE_MASK   0x7f
#define PCI_HEADER_TYPE_MULTI  0x80
120 121 122 123 124 125 126 127 128

/* PCI30 6.2.1  Device Identification */
#define PCI_CLASS_DEVICE        0x0a    /* Device class */

/* Class Code for bridge; PCI30 D.7  Base Class 06h */
#define PCI_CLASS_BRIDGE_PCI    0x0604

/* PCI30 6.2.3  Device Status */
#define PCI_STATUS              0x06    /* 16 bits */
129
#define PCI_STATUS_CAP_LIST    0x10    /* Support Capability List */
130 131 132

/* PCI30 6.7  Capabilities List */
#define PCI_CAPABILITY_LIST     0x34    /* Offset of first capability list entry */
133
#define PCI_CAP_FLAGS           2       /* Capability defined flags (16 bits) */
134 135 136 137 138 139 140 141 142 143

/* PM12 3.2.1  Capability Identifier */
#define PCI_CAP_ID_PM           0x01    /* Power Management */
/* PCI30 H Capability IDs */
#define PCI_CAP_ID_EXP          0x10    /* PCI Express */
/* ECN_AF 6.x.1.1  Capability ID for AF */
#define PCI_CAP_ID_AF           0x13    /* Advanced Features */

/* PCIe20 7.8.3  Device Capabilities Register (Offset 04h) */
#define PCI_EXP_DEVCAP          0x4     /* Device capabilities */
144 145
#define PCI_EXP_DEVCAP_FLR     (1<<28)  /* Function Level Reset */
#define PCI_EXP_LNKCAP          0xc     /* Link Capabilities */
146
#define PCI_EXP_LNKCAP_SPEED    0x0000f /* Maximum Link Speed */
147 148 149 150
#define PCI_EXP_LNKCAP_WIDTH    0x003f0 /* Maximum Link Width */
#define PCI_EXP_LNKSTA          0x12    /* Link Status */
#define PCI_EXP_LNKSTA_SPEED    0x000f  /* Negotiated Link Speed */
#define PCI_EXP_LNKSTA_WIDTH    0x03f0  /* Negotiated Link Width */
151 152 153 154 155 156 157

/* Header type 1 BR12 3.2 PCI-to-PCI Bridge Configuration Space Header Format */
#define PCI_PRIMARY_BUS         0x18    /* BR12 3.2.5.2 Primary bus number */
#define PCI_SECONDARY_BUS       0x19    /* BR12 3.2.5.3 Secondary bus number */
#define PCI_SUBORDINATE_BUS     0x1a    /* BR12 3.2.5.4 Highest bus number behind the bridge */
#define PCI_BRIDGE_CONTROL      0x3e
/* BR12 3.2.5.18  Bridge Control Register */
158
#define PCI_BRIDGE_CTL_RESET   0x40    /* Secondary bus reset */
159 160 161

/* PM12 3.2.4  Power Management Control/Status (Offset = 4) */
#define PCI_PM_CTRL                4    /* PM control and status register */
162 163 164 165
#define PCI_PM_CTRL_STATE_MASK    0x3  /* Current power state (D0 to D3) */
#define PCI_PM_CTRL_STATE_D0      0x0  /* D0 state */
#define PCI_PM_CTRL_STATE_D3hot   0x3  /* D3 state */
#define PCI_PM_CTRL_NO_SOFT_RESET 0x8  /* No reset for D3hot->D0 */
166 167 168

/* ECN_AF 6.x.1  Advanced Features Capability Structure */
#define PCI_AF_CAP              0x3     /* Advanced features capabilities */
169
#define PCI_AF_CAP_FLR         0x2     /* Function Level Reset */
170

J
Jiri Denemark 已提交
171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187
#define PCI_EXP_FLAGS           0x2
#define PCI_EXP_FLAGS_TYPE      0x00f0
#define PCI_EXP_TYPE_DOWNSTREAM 0x6

#define PCI_EXT_CAP_BASE          0x100
#define PCI_EXT_CAP_LIMIT         0x1000
#define PCI_EXT_CAP_ID_MASK       0x0000ffff
#define PCI_EXT_CAP_OFFSET_SHIFT  20
#define PCI_EXT_CAP_OFFSET_MASK   0x00000ffc

#define PCI_EXT_CAP_ID_ACS      0x000d
#define PCI_EXT_ACS_CTRL        0x06

#define PCI_EXT_CAP_ACS_SV      0x01
#define PCI_EXT_CAP_ACS_RR      0x04
#define PCI_EXT_CAP_ACS_CR      0x08
#define PCI_EXT_CAP_ACS_UF      0x10
188 189 190
#define PCI_EXT_CAP_ACS_ENABLED (PCI_EXT_CAP_ACS_SV | \
                                 PCI_EXT_CAP_ACS_RR | \
                                 PCI_EXT_CAP_ACS_CR | \
J
Jiri Denemark 已提交
191 192
                                 PCI_EXT_CAP_ACS_UF)

193 194 195
#define PCI_EXP_TYPE_ROOT_INT_EP 0x9    /* Root Complex Integrated Endpoint */
#define PCI_EXP_TYPE_ROOT_EC 0xa        /* Root Complex Event Collector */

196 197 198 199 200 201
static virClassPtr virPCIDeviceListClass;

static void virPCIDeviceListDispose(void *obj);

static int virPCIOnceInit(void)
{
202
    if (!VIR_CLASS_NEW(virPCIDeviceList, virClassForObjectLockable()))
203 204 205 206 207
        return -1;

    return 0;
}

208
VIR_ONCE_GLOBAL_INIT(virPCI);
209

L
Laine Stump 已提交
210

211 212
static char *
virPCIDriverDir(const char *driver)
L
Laine Stump 已提交
213
{
214
    char *buffer;
L
Laine Stump 已提交
215

216
    buffer = g_strdup_printf(PCI_SYSFS "drivers/%s", driver);
217
    return buffer;
L
Laine Stump 已提交
218 219 220
}


221 222
static char *
virPCIFile(const char *device, const char *file)
L
Laine Stump 已提交
223
{
224
    char *buffer;
L
Laine Stump 已提交
225

226
    buffer = g_strdup_printf(PCI_SYSFS "devices/%s/%s", device, file);
227
    return buffer;
L
Laine Stump 已提交
228 229 230 231 232 233 234 235 236 237
}


/* virPCIDeviceGetDriverPathAndName - put the path to the driver
 * directory of the driver in use for this device in @path and the
 * name of the driver in @name. Both could be NULL if it's not bound
 * to any driver.
 *
 * Return 0 for success, -1 for error.
 */
238
int
L
Laine Stump 已提交
239 240 241
virPCIDeviceGetDriverPathAndName(virPCIDevicePtr dev, char **path, char **name)
{
    int ret = -1;
242
    g_autofree char *drvlink = NULL;
L
Laine Stump 已提交
243 244 245

    *path = *name = NULL;
    /* drvlink = "/sys/bus/pci/dddd:bb:ss.ff/driver" */
246
    if (!(drvlink = virPCIFile(dev->name, "driver")))
L
Laine Stump 已提交
247 248
        goto cleanup;

249 250 251 252 253
    if (!virFileExists(drvlink)) {
        ret = 0;
        goto cleanup;
    }

L
Laine Stump 已提交
254 255 256 257 258 259 260 261 262 263 264 265 266 267
    if (virFileIsLink(drvlink) != 1) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Invalid device %s driver file %s is not a symlink"),
                       dev->name, drvlink);
        goto cleanup;
    }
    if (virFileResolveLink(drvlink, path) < 0) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Unable to resolve device %s driver symlink %s"),
                       dev->name, drvlink);
        goto cleanup;
    }
    /* path = "/sys/bus/pci/drivers/${drivername}" */

268
    *name = g_strdup(last_component(*path));
L
Laine Stump 已提交
269 270 271
    /* name = "${drivername}" */

    ret = 0;
272
 cleanup:
L
Laine Stump 已提交
273 274 275 276 277 278 279 280
    if (ret < 0) {
        VIR_FREE(*path);
        VIR_FREE(*name);
    }
    return ret;
}


281
static int
282
virPCIDeviceConfigOpenInternal(virPCIDevicePtr dev, bool readonly, bool fatal)
283 284 285
{
    int fd;

286
    fd = open(dev->path, readonly ? O_RDONLY : O_RDWR);
287

288
    if (fd < 0) {
289 290 291 292 293 294 295 296 297
        if (fatal) {
            virReportSystemError(errno,
                                 _("Failed to open config space file '%s'"),
                                 dev->path);
        } else {
            char ebuf[1024];
            VIR_WARN("Failed to open config space file '%s': %s",
                     dev->path, virStrerror(errno, ebuf, sizeof(ebuf)));
        }
298 299
        return -1;
    }
300

301
    VIR_DEBUG("%s %s: opened %s", dev->id, dev->name, dev->path);
302
    return fd;
303 304
}

305
static int
306
virPCIDeviceConfigOpen(virPCIDevicePtr dev)
307
{
308
    return virPCIDeviceConfigOpenInternal(dev, true, true);
309 310
}

311 312 313 314 315 316
static int
virPCIDeviceConfigOpenTry(virPCIDevicePtr dev)
{
    return virPCIDeviceConfigOpenInternal(dev, true, false);
}

317 318 319
static int
virPCIDeviceConfigOpenWrite(virPCIDevicePtr dev)
{
320
    return virPCIDeviceConfigOpenInternal(dev, false, true);
321 322
}

323
static void
324
virPCIDeviceConfigClose(virPCIDevicePtr dev, int cfgfd)
325
{
326 327 328 329 330
    if (VIR_CLOSE(cfgfd) < 0) {
        char ebuf[1024];
        VIR_WARN("Failed to close config space file '%s': %s",
                 dev->path, virStrerror(errno, ebuf, sizeof(ebuf)));
    }
331 332
}

333

334
static int
335 336
virPCIDeviceRead(virPCIDevicePtr dev,
                 int cfgfd,
337
                 unsigned int pos,
338
                 uint8_t *buf,
339
                 unsigned int buflen)
340 341 342
{
    memset(buf, 0, buflen);

343 344
    if (lseek(cfgfd, pos, SEEK_SET) != pos ||
        saferead(cfgfd, buf, buflen) != buflen) {
345
        char ebuf[1024];
346
        VIR_WARN("Failed to read from '%s' : %s", dev->path,
347 348 349 350 351 352 353
                 virStrerror(errno, ebuf, sizeof(ebuf)));
        return -1;
    }
    return 0;
}

static uint8_t
354
virPCIDeviceRead8(virPCIDevicePtr dev, int cfgfd, unsigned int pos)
355 356
{
    uint8_t buf;
357
    virPCIDeviceRead(dev, cfgfd, pos, &buf, sizeof(buf));
358 359 360 361
    return buf;
}

static uint16_t
362
virPCIDeviceRead16(virPCIDevicePtr dev, int cfgfd, unsigned int pos)
363 364
{
    uint8_t buf[2];
365
    virPCIDeviceRead(dev, cfgfd, pos, &buf[0], sizeof(buf));
366 367 368 369
    return (buf[0] << 0) | (buf[1] << 8);
}

static uint32_t
370
virPCIDeviceRead32(virPCIDevicePtr dev, int cfgfd, unsigned int pos)
371 372
{
    uint8_t buf[4];
373
    virPCIDeviceRead(dev, cfgfd, pos, &buf[0], sizeof(buf));
374 375 376
    return (buf[0] << 0) | (buf[1] << 8) | (buf[2] << 16) | (buf[3] << 24);
}

377 378 379
static int
virPCIDeviceReadClass(virPCIDevicePtr dev, uint16_t *device_class)
{
380 381
    g_autofree char *path = NULL;
    g_autofree char *id_str = NULL;
382 383
    unsigned int value;

384
    if (!(path = virPCIFile(dev->name, "class")))
385
        return -1;
386 387 388

    /* class string is '0xNNNNNN\n' ... i.e. 9 bytes */
    if (virFileReadAll(path, 9, &id_str) < 0)
389
        return -1;
390 391 392 393 394 395

    id_str[8] = '\0';
    if (virStrToLong_ui(id_str, NULL, 16, &value) < 0) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Unusual value in %s/devices/%s/class: %s"),
                       PCI_SYSFS, dev->name, id_str);
396
        return -1;
397 398 399
    }

    *device_class = (value >> 8) & 0xFFFF;
400
    return 0;
401 402
}

403
static int
404 405
virPCIDeviceWrite(virPCIDevicePtr dev,
                  int cfgfd,
406
                  unsigned int pos,
407
                  uint8_t *buf,
408
                  unsigned int buflen)
409
{
410 411
    if (lseek(cfgfd, pos, SEEK_SET) != pos ||
        safewrite(cfgfd, buf, buflen) != buflen) {
412
        char ebuf[1024];
413
        VIR_WARN("Failed to write to '%s' : %s", dev->path,
414 415 416 417 418 419 420
                 virStrerror(errno, ebuf, sizeof(ebuf)));
        return -1;
    }
    return 0;
}

static void
421
virPCIDeviceWrite16(virPCIDevicePtr dev, int cfgfd, unsigned int pos, uint16_t val)
422 423
{
    uint8_t buf[2] = { (val >> 0), (val >> 8) };
424
    virPCIDeviceWrite(dev, cfgfd, pos, &buf[0], sizeof(buf));
425 426 427
}

static void
428
virPCIDeviceWrite32(virPCIDevicePtr dev, int cfgfd, unsigned int pos, uint32_t val)
429
{
430
    uint8_t buf[4] = { (val >> 0), (val >> 8), (val >> 16), (val >> 24) };
431
    virPCIDeviceWrite(dev, cfgfd, pos, &buf[0], sizeof(buf));
432 433
}

E
Eric Blake 已提交
434 435
typedef int (*virPCIDeviceIterPredicate)(virPCIDevicePtr, virPCIDevicePtr,
                                         void *);
436 437 438 439 440 441 442

/* Iterate over available PCI devices calling @predicate
 * to compare each one to @dev.
 * Return -1 on error since we don't want to assume it is
 * safe to reset if there is an error.
 */
static int
443 444 445 446
virPCIDeviceIterDevices(virPCIDeviceIterPredicate predicate,
                        virPCIDevicePtr dev,
                        virPCIDevicePtr *matched,
                        void *data)
447 448 449
{
    DIR *dir;
    struct dirent *entry;
450
    int ret = 0;
451
    int rc;
452 453 454 455 456

    *matched = NULL;

    VIR_DEBUG("%s %s: iterating over " PCI_SYSFS "devices", dev->id, dev->name);

J
Ján Tomko 已提交
457
    if (virDirOpen(&dir, PCI_SYSFS "devices") < 0)
458 459
        return -1;

E
Eric Blake 已提交
460
    while ((ret = virDirRead(dir, &entry, PCI_SYSFS "devices")) > 0) {
461
        unsigned int domain, bus, slot, function;
J
Ján Tomko 已提交
462
        g_autoptr(virPCIDevice) check = NULL;
463
        char *tmp;
464

465 466 467 468 469 470 471 472 473
        /* expected format: <domain>:<bus>:<slot>.<function> */
        if (/* domain */
            virStrToLong_ui(entry->d_name, &tmp, 16, &domain) < 0 || *tmp != ':' ||
            /* bus */
            virStrToLong_ui(tmp + 1, &tmp, 16, &bus) < 0 || *tmp != ':' ||
            /* slot */
            virStrToLong_ui(tmp + 1, &tmp, 16, &slot) < 0 || *tmp != '.' ||
            /* function */
            virStrToLong_ui(tmp + 1, NULL, 16, &function) < 0) {
474 475 476 477
            VIR_WARN("Unusual entry in " PCI_SYSFS "devices: %s", entry->d_name);
            continue;
        }

478
        check = virPCIDeviceNew(domain, bus, slot, function);
479
        if (!check) {
480 481 482
            ret = -1;
            break;
        }
483

484 485 486 487 488
        rc = predicate(dev, check, data);
        if (rc < 0) {
            /* the predicate returned an error, bail */
            ret = -1;
            break;
489
        } else if (rc == 1) {
490
            VIR_DEBUG("%s %s: iter matched on %s", dev->id, dev->name, check->name);
491
            *matched = g_steal_pointer(&check);
492
            ret = 1;
493 494 495
            break;
        }
    }
J
Ján Tomko 已提交
496
    VIR_DIR_CLOSE(dir);
497
    return ret;
498 499 500
}

static uint8_t
501 502 503
virPCIDeviceFindCapabilityOffset(virPCIDevicePtr dev,
                                 int cfgfd,
                                 unsigned int capability)
504 505 506 507
{
    uint16_t status;
    uint8_t pos;

508
    status = virPCIDeviceRead16(dev, cfgfd, PCI_STATUS);
509 510 511
    if (!(status & PCI_STATUS_CAP_LIST))
        return 0;

512
    pos = virPCIDeviceRead8(dev, cfgfd, PCI_CAPABILITY_LIST);
513 514 515 516 517 518 519 520 521

    /* Zero indicates last capability, capabilities can't
     * be in the config space header and 0xff is returned
     * by the kernel if we don't have access to this region
     *
     * Note: we're not handling loops or extended
     * capabilities here.
     */
    while (pos >= PCI_CONF_HEADER_LEN && pos != 0xff) {
522
        uint8_t capid = virPCIDeviceRead8(dev, cfgfd, pos);
523 524 525 526 527 528
        if (capid == capability) {
            VIR_DEBUG("%s %s: found cap 0x%.2x at 0x%.2x",
                      dev->id, dev->name, capability, pos);
            return pos;
        }

529
        pos = virPCIDeviceRead8(dev, cfgfd, pos + 1);
530 531 532 533 534 535 536
    }

    VIR_DEBUG("%s %s: failed to find cap 0x%.2x", dev->id, dev->name, capability);

    return 0;
}

J
Jiri Denemark 已提交
537
static unsigned int
538 539
virPCIDeviceFindExtendedCapabilityOffset(virPCIDevicePtr dev,
                                         int cfgfd,
540
                                         unsigned int capability)
J
Jiri Denemark 已提交
541 542 543 544 545 546 547 548 549 550
{
    int ttl;
    unsigned int pos;
    uint32_t header;

    /* minimum 8 bytes per capability */
    ttl = (PCI_EXT_CAP_LIMIT - PCI_EXT_CAP_BASE) / 8;
    pos = PCI_EXT_CAP_BASE;

    while (ttl > 0 && pos >= PCI_EXT_CAP_BASE) {
551
        header = virPCIDeviceRead32(dev, cfgfd, pos);
J
Jiri Denemark 已提交
552 553 554 555 556 557 558 559 560 561 562

        if ((header & PCI_EXT_CAP_ID_MASK) == capability)
            return pos;

        pos = (header >> PCI_EXT_CAP_OFFSET_SHIFT) & PCI_EXT_CAP_OFFSET_MASK;
        ttl--;
    }

    return 0;
}

563 564 565 566
/* detects whether this device has FLR.  Returns 0 if the device does
 * not have FLR, 1 if it does, and -1 on error
 */
static int
567
virPCIDeviceDetectFunctionLevelReset(virPCIDevicePtr dev, int cfgfd)
568
{
M
Mark McLoughlin 已提交
569
    uint32_t caps;
570
    uint8_t pos;
571
    g_autofree char *path = NULL;
572
    int found;
573 574 575 576 577 578 579 580

    /* The PCIe Function Level Reset capability allows
     * individual device functions to be reset without
     * affecting any other functions on the device or
     * any other devices on the bus. This is only common
     * on SR-IOV NICs at the moment.
     */
    if (dev->pcie_cap_pos) {
581
        caps = virPCIDeviceRead32(dev, cfgfd, dev->pcie_cap_pos + PCI_EXP_DEVCAP);
582 583 584 585 586 587 588 589 590 591
        if (caps & PCI_EXP_DEVCAP_FLR) {
            VIR_DEBUG("%s %s: detected PCIe FLR capability", dev->id, dev->name);
            return 1;
        }
    }

    /* The PCI AF Function Level Reset capability is
     * the same thing, except for conventional PCI
     * devices. This is not common yet.
     */
592
    pos = virPCIDeviceFindCapabilityOffset(dev, cfgfd, PCI_CAP_ID_AF);
593
    if (pos) {
594
        caps = virPCIDeviceRead16(dev, cfgfd, pos + PCI_AF_CAP);
595 596 597 598 599 600
        if (caps & PCI_AF_CAP_FLR) {
            VIR_DEBUG("%s %s: detected PCI FLR capability", dev->id, dev->name);
            return 1;
        }
    }

601 602 603 604 605 606
    /* there are some buggy devices that do support FLR, but forget to
     * advertise that fact in their capabilities.  However, FLR is *required*
     * to be present for virtual functions (VFs), so if we see that this
     * device is a VF, we just assume FLR works
     */

607
    path = g_strdup_printf(PCI_SYSFS "devices/%s/physfn", dev->name);
608 609 610 611 612 613 614 615

    found = virFileExists(path);
    if (found) {
        VIR_DEBUG("%s %s: buggy device didn't advertise FLR, but is a VF; forcing flr on",
                  dev->id, dev->name);
        return 1;
    }

616 617 618 619 620 621 622 623 624
    VIR_DEBUG("%s %s: no FLR capability found", dev->id, dev->name);

    return 0;
}

/* Require the device has the PCI Power Management capability
 * and that a D3hot->D0 transition will results in a full
 * internal reset, not just a soft reset.
 */
625
static unsigned int
626
virPCIDeviceDetectPowerManagementReset(virPCIDevicePtr dev, int cfgfd)
627 628 629 630 631
{
    if (dev->pci_pm_cap_pos) {
        uint32_t ctl;

        /* require the NO_SOFT_RESET bit is clear */
632
        ctl = virPCIDeviceRead32(dev, cfgfd, dev->pci_pm_cap_pos + PCI_PM_CTRL);
633 634 635 636 637 638 639 640 641 642 643
        if (!(ctl & PCI_PM_CTRL_NO_SOFT_RESET)) {
            VIR_DEBUG("%s %s: detected PM reset capability", dev->id, dev->name);
            return 1;
        }
    }

    VIR_DEBUG("%s %s: no PM reset capability found", dev->id, dev->name);

    return 0;
}

644
/* Any active devices on the same domain/bus ? */
645
static int
646
virPCIDeviceSharesBusWithActive(virPCIDevicePtr dev, virPCIDevicePtr check, void *data)
647
{
648
    virPCIDeviceList *inactiveDevs = data;
649

650
    /* Different domain, different bus, or simply identical device */
651 652 653 654
    if (dev->address.domain != check->address.domain ||
        dev->address.bus != check->address.bus ||
        (dev->address.slot == check->address.slot &&
         dev->address.function == check->address.function))
655 656
        return 0;

657
    /* same bus, but inactive, i.e. about to be assigned to guest */
658
    if (inactiveDevs && virPCIDeviceListFind(inactiveDevs, check))
659
        return 0;
660

661
    return 1;
662 663
}

664 665 666
static virPCIDevicePtr
virPCIDeviceBusContainsActiveDevices(virPCIDevicePtr dev,
                                     virPCIDeviceList *inactiveDevs)
667
{
668 669 670
    virPCIDevicePtr active = NULL;
    if (virPCIDeviceIterDevices(virPCIDeviceSharesBusWithActive,
                                dev, &active, inactiveDevs) < 0)
671 672 673 674 675
        return NULL;
    return active;
}

/* Is @check the parent of @dev ? */
676
static int
677
virPCIDeviceIsParent(virPCIDevicePtr dev, virPCIDevicePtr check, void *data)
678 679 680
{
    uint16_t device_class;
    uint8_t header_type, secondary, subordinate;
681
    virPCIDevicePtr *best = data;
682 683
    int ret = 0;
    int fd;
684

685
    if (dev->address.domain != check->address.domain)
686 687
        return 0;

688
    if ((fd = virPCIDeviceConfigOpenTry(check)) < 0)
689 690
        return 0;

691
    /* Is it a bridge? */
692 693
    ret = virPCIDeviceReadClass(check, &device_class);
    if (ret < 0 || device_class != PCI_CLASS_BRIDGE_PCI)
694
        goto cleanup;
695 696

    /* Is it a plane? */
697
    header_type = virPCIDeviceRead8(check, fd, PCI_HEADER_TYPE);
698
    if ((header_type & PCI_HEADER_TYPE_MASK) != PCI_HEADER_TYPE_BRIDGE)
699
        goto cleanup;
700

701 702
    secondary   = virPCIDeviceRead8(check, fd, PCI_SECONDARY_BUS);
    subordinate = virPCIDeviceRead8(check, fd, PCI_SUBORDINATE_BUS);
703

704
    VIR_DEBUG("%s %s: found parent device %s", dev->id, dev->name, check->name);
705

706 707 708
    /* if the secondary bus exactly equals the device's bus, then we found
     * the direct parent.  No further work is necessary
     */
709
    if (dev->address.bus == secondary) {
710 711 712
        ret = 1;
        goto cleanup;
    }
713

714
    /* otherwise, SRIOV allows VFs to be on different buses than their PFs.
715 716 717
     * In this case, what we need to do is look for the "best" match; i.e.
     * the most restrictive match that still satisfies all of the conditions.
     */
718
    if (dev->address.bus > secondary && dev->address.bus <= subordinate) {
719
        if (*best == NULL) {
720 721 722 723
            *best = virPCIDeviceNew(check->address.domain,
                                    check->address.bus,
                                    check->address.slot,
                                    check->address.function);
724 725 726 727 728
            if (*best == NULL) {
                ret = -1;
                goto cleanup;
            }
        } else {
729 730 731 732
            /* OK, we had already recorded a previous "best" match for the
             * parent.  See if the current device is more restrictive than the
             * best, and if so, make it the new best
             */
733 734 735
            int bestfd;
            uint8_t best_secondary;

736
            if ((bestfd = virPCIDeviceConfigOpenTry(*best)) < 0)
737
                goto cleanup;
738 739
            best_secondary = virPCIDeviceRead8(*best, bestfd, PCI_SECONDARY_BUS);
            virPCIDeviceConfigClose(*best, bestfd);
740 741

            if (secondary > best_secondary) {
742
                virPCIDeviceFree(*best);
743 744 745 746
                *best = virPCIDeviceNew(check->address.domain,
                                        check->address.bus,
                                        check->address.slot,
                                        check->address.function);
747 748 749 750
                if (*best == NULL) {
                    ret = -1;
                    goto cleanup;
                }
751 752 753 754
            }
        }
    }

755
 cleanup:
756
    virPCIDeviceConfigClose(check, fd);
757
    return ret;
758 759
}

760
static int
761
virPCIDeviceGetParent(virPCIDevicePtr dev, virPCIDevicePtr *parent)
762
{
763
    virPCIDevicePtr best = NULL;
764 765 766
    int ret;

    *parent = NULL;
767
    ret = virPCIDeviceIterDevices(virPCIDeviceIsParent, dev, parent, &best);
768
    if (ret == 1)
769
        virPCIDeviceFree(best);
770 771 772
    else if (ret == 0)
        *parent = best;
    return ret;
773 774 775 776 777 778
}

/* Secondary Bus Reset is our sledgehammer - it resets all
 * devices behind a bus.
 */
static int
779 780 781
virPCIDeviceTrySecondaryBusReset(virPCIDevicePtr dev,
                                 int cfgfd,
                                 virPCIDeviceList *inactiveDevs)
782
{
J
Ján Tomko 已提交
783 784
    g_autoptr(virPCIDevice) parent = NULL;
    g_autoptr(virPCIDevice) conflict = NULL;
785 786 787
    uint8_t config_space[PCI_CONF_LEN];
    uint16_t ctl;
    int ret = -1;
788
    int parentfd;
789

790 791 792
    /* Refuse to do a secondary bus reset if there are other
     * devices/functions behind the bus are used by the host
     * or other guests.
793
     */
794
    if ((conflict = virPCIDeviceBusContainsActiveDevices(dev, inactiveDevs))) {
795
        virReportError(VIR_ERR_INTERNAL_ERROR,
796 797
                       _("Active %s devices on bus with %s, not doing bus reset"),
                       conflict->name, dev->name);
798 799 800 801
        return -1;
    }

    /* Find the parent bus */
802
    if (virPCIDeviceGetParent(dev, &parent) < 0)
803
        return -1;
804
    if (!parent) {
805
        virReportError(VIR_ERR_INTERNAL_ERROR,
806 807
                       _("Failed to find parent device for %s"),
                       dev->name);
808 809
        return -1;
    }
810
    if ((parentfd = virPCIDeviceConfigOpenWrite(parent)) < 0)
811
        goto out;
812 813 814 815 816 817 818

    VIR_DEBUG("%s %s: doing a secondary bus reset", dev->id, dev->name);

    /* Save and restore the device's config space; we only do this
     * for the supplied device since we refuse to do a reset if there
     * are multiple devices/functions
     */
819
    if (virPCIDeviceRead(dev, cfgfd, 0, config_space, PCI_CONF_LEN) < 0) {
820
        virReportError(VIR_ERR_INTERNAL_ERROR,
821
                       _("Failed to read PCI config space for %s"),
822
                       dev->name);
823 824 825 826 827 828
        goto out;
    }

    /* Read the control register, set the reset flag, wait 200ms,
     * unset the reset flag and wait 200ms.
     */
H
hexin 已提交
829
    ctl = virPCIDeviceRead16(dev, parentfd, PCI_BRIDGE_CONTROL);
830

831 832
    virPCIDeviceWrite16(parent, parentfd, PCI_BRIDGE_CONTROL,
                        ctl | PCI_BRIDGE_CTL_RESET);
833

834
    g_usleep(200 * 1000); /* sleep 200ms */
835

836
    virPCIDeviceWrite16(parent, parentfd, PCI_BRIDGE_CONTROL, ctl);
837

838
    g_usleep(200 * 1000); /* sleep 200ms */
839

840
    if (virPCIDeviceWrite(dev, cfgfd, 0, config_space, PCI_CONF_LEN) < 0) {
841
        virReportError(VIR_ERR_INTERNAL_ERROR,
842 843 844 845
                       _("Failed to restore PCI config space for %s"),
                       dev->name);
        goto out;
    }
846
    ret = 0;
847

848
 out:
849
    virPCIDeviceConfigClose(parent, parentfd);
850 851 852 853 854 855 856 857
    return ret;
}

/* Power management reset attempts to reset a device using a
 * D-state transition from D3hot to D0. Note, in detect_pm_reset()
 * above we require the device supports a full internal reset.
 */
static int
858
virPCIDeviceTryPowerManagementReset(virPCIDevicePtr dev, int cfgfd)
859 860 861 862 863 864 865 866
{
    uint8_t config_space[PCI_CONF_LEN];
    uint32_t ctl;

    if (!dev->pci_pm_cap_pos)
        return -1;

    /* Save and restore the device's config space. */
867
    if (virPCIDeviceRead(dev, cfgfd, 0, &config_space[0], PCI_CONF_LEN) < 0) {
868
        virReportError(VIR_ERR_INTERNAL_ERROR,
869
                       _("Failed to read PCI config space for %s"),
870
                       dev->name);
871 872 873 874 875
        return -1;
    }

    VIR_DEBUG("%s %s: doing a power management reset", dev->id, dev->name);

876
    ctl = virPCIDeviceRead32(dev, cfgfd, dev->pci_pm_cap_pos + PCI_PM_CTRL);
877 878
    ctl &= ~PCI_PM_CTRL_STATE_MASK;

879 880
    virPCIDeviceWrite32(dev, cfgfd, dev->pci_pm_cap_pos + PCI_PM_CTRL,
                        ctl | PCI_PM_CTRL_STATE_D3hot);
881

882
    g_usleep(10 * 1000); /* sleep 10ms */
883

884 885
    virPCIDeviceWrite32(dev, cfgfd, dev->pci_pm_cap_pos + PCI_PM_CTRL,
                        ctl | PCI_PM_CTRL_STATE_D0);
886

887
    g_usleep(10 * 1000); /* sleep 10ms */
888

889
    if (virPCIDeviceWrite(dev, cfgfd, 0, &config_space[0], PCI_CONF_LEN) < 0) {
890
        virReportError(VIR_ERR_INTERNAL_ERROR,
891 892 893 894
                       _("Failed to restore PCI config space for %s"),
                       dev->name);
        return -1;
    }
895 896 897 898 899

    return 0;
}

static int
900
virPCIDeviceInit(virPCIDevicePtr dev, int cfgfd)
901
{
902 903
    int flr;

904 905 906
    dev->pcie_cap_pos   = virPCIDeviceFindCapabilityOffset(dev, cfgfd, PCI_CAP_ID_EXP);
    dev->pci_pm_cap_pos = virPCIDeviceFindCapabilityOffset(dev, cfgfd, PCI_CAP_ID_PM);
    flr = virPCIDeviceDetectFunctionLevelReset(dev, cfgfd);
907
    if (flr < 0)
908
        return flr;
909 910
    dev->has_flr        = !!flr;
    dev->has_pm_reset   = !!virPCIDeviceDetectPowerManagementReset(dev, cfgfd);
911

912 913 914 915
    return 0;
}

int
916 917 918
virPCIDeviceReset(virPCIDevicePtr dev,
                  virPCIDeviceList *activeDevs,
                  virPCIDeviceList *inactiveDevs)
919
{
920 921
    g_autofree char *drvPath = NULL;
    g_autofree char *drvName = NULL;
922
    int ret = -1;
923
    int fd = -1;
924 925 926 927 928 929 930 931 932 933 934 935
    int hdrType = -1;

    if (virPCIGetHeaderType(dev, &hdrType) < 0)
        return -1;

    if (hdrType != VIR_PCI_HEADER_ENDPOINT) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Invalid attempt to reset PCI device %s. "
                         "Only PCI endpoint devices can be reset"),
                       dev->name);
        return -1;
    }
936

937
    if (activeDevs && virPCIDeviceListFind(activeDevs, dev)) {
938
        virReportError(VIR_ERR_INTERNAL_ERROR,
939 940 941 942
                       _("Not resetting active device %s"), dev->name);
        return -1;
    }

943 944 945 946 947 948 949 950
    /* If the device is currently bound to vfio-pci, ignore all
     * requests to reset it, since the vfio-pci driver will always
     * reset it whenever appropriate, so doing it ourselves would just
     * be redundant.
     */
    if (virPCIDeviceGetDriverPathAndName(dev, &drvPath, &drvName) < 0)
        goto cleanup;

951
    if (virPCIStubDriverTypeFromString(drvName) == VIR_PCI_STUB_DRIVER_VFIO) {
952 953 954 955 956 957 958
        VIR_DEBUG("Device %s is bound to vfio-pci - skip reset",
                  dev->name);
        ret = 0;
        goto cleanup;
    }
    VIR_DEBUG("Resetting device %s", dev->name);

959
    if ((fd = virPCIDeviceConfigOpenWrite(dev)) < 0)
960
        goto cleanup;
961

962
    if (virPCIDeviceInit(dev, fd) < 0)
963 964
        goto cleanup;

965 966 967
    /* KVM will perform FLR when starting and stopping
     * a guest, so there is no need for us to do it here.
     */
968 969 970 971
    if (dev->has_flr) {
        ret = 0;
        goto cleanup;
    }
972

973 974 975 976 977
    /* If the device supports PCI power management reset,
     * that's the next best thing because it only resets
     * the function, not the whole device.
     */
    if (dev->has_pm_reset)
978
        ret = virPCIDeviceTryPowerManagementReset(dev, fd);
979

980
    /* Bus reset is not an option with the root bus */
981
    if (ret < 0 && dev->address.bus != 0)
982
        ret = virPCIDeviceTrySecondaryBusReset(dev, fd, inactiveDevs);
983

984 985
    if (ret < 0) {
        virErrorPtr err = virGetLastError();
986
        virReportError(VIR_ERR_INTERNAL_ERROR,
987 988
                       _("Unable to reset PCI device %s: %s"),
                       dev->name,
989 990
                       err ? err->message :
                       _("no FLR, PM reset or bus reset available"));
991 992
    }

993
 cleanup:
994
    virPCIDeviceConfigClose(dev, fd);
995 996 997
    return ret;
}

998

999
static int
1000
virPCIProbeStubDriver(virPCIStubDriver driver)
1001
{
1002
    const char *drvname = NULL;
1003
    g_autofree char *drvpath = NULL;
1004
    bool probed = false;
1005

1006 1007 1008 1009 1010 1011 1012 1013
    if (driver == VIR_PCI_STUB_DRIVER_NONE ||
        !(drvname = virPCIStubDriverTypeToString(driver))) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       "%s",
                       _("Attempting to use unknown stub driver"));
        return -1;
    }

1014
 recheck:
1015
    if ((drvpath = virPCIDriverDir(drvname)) && virFileExists(drvpath))
1016 1017
        /* driver already loaded, return */
        return 0;
1018 1019

    if (!probed) {
1020
        g_autofree char *errbuf = NULL;
1021
        probed = true;
1022 1023
        if ((errbuf = virKModLoad(drvname, true))) {
            VIR_WARN("failed to load driver %s: %s", drvname, errbuf);
1024
            goto cleanup;
1025
        }
1026 1027

        goto recheck;
1028 1029
    }

1030
 cleanup:
1031 1032 1033
    /* If we know failure was because of blacklist, let's report that;
     * otherwise, report a more generic failure message
     */
1034
    if (virKModIsBlacklisted(drvname)) {
1035 1036 1037
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Failed to load PCI stub module %s: "
                         "administratively prohibited"),
1038
                       drvname);
1039 1040 1041
    } else {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Failed to load PCI stub module %s"),
1042
                       drvname);
1043 1044
    }

1045
    return -1;
1046 1047
}

1048
int
1049
virPCIDeviceUnbind(virPCIDevicePtr dev)
1050
{
1051 1052 1053
    g_autofree char *path = NULL;
    g_autofree char *drvpath = NULL;
    g_autofree char *driver = NULL;
1054 1055

    if (virPCIDeviceGetDriverPathAndName(dev, &drvpath, &driver) < 0)
1056
        return -1;
1057

1058
    if (!driver)
1059
        /* The device is not bound to any driver */
1060
        return 0;
1061

1062
    if (!(path = virPCIFile(dev->name, "driver/unbind")))
1063
        return -1;
1064 1065 1066 1067 1068 1069

    if (virFileExists(path)) {
        if (virFileWriteStr(path, dev->name, 0) < 0) {
            virReportSystemError(errno,
                                 _("Failed to unbind PCI device '%s' from %s"),
                                 dev->name, driver);
1070
            return -1;
1071 1072 1073
        }
    }

1074
    return 0;
1075 1076
}

1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101

/**
 * virPCIDeviceRebind:
 *  @dev: virPCIDevice object describing the device to rebind
 *
 * unbind a device from its driver, then immediately rebind it.
 *
 * Returns 0 on success, -1 on failure
 */
int virPCIDeviceRebind(virPCIDevicePtr dev)
{
    if (virPCIDeviceUnbind(dev) < 0)
        return -1;

    if (virFileWriteStr(PCI_SYSFS "drivers_probe", dev->name, 0) < 0) {
        virReportSystemError(errno,
                             _("Failed to trigger a probe for PCI device '%s'"),
                             dev->name);
        return -1;
    }

    return 0;
}


1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112
/*
 * Bind a PCI device to a driver using driver_override sysfs interface.
 * E.g.
 *
 *  echo driver-name > /sys/bus/pci/devices/0000:03:00.0/driver_override
 *  echo 0000:03:00.0 > /sys/bus/pci/devices/0000:03:00.0/driver/unbind
 *  echo 0000:03:00.0 > /sys/bus/pci/drivers_probe
 *
 * An empty driverName will cause the device to be bound to its
 * preferred driver.
 */
1113
static int
1114 1115 1116
virPCIDeviceBindWithDriverOverride(virPCIDevicePtr dev,
                                   const char *driverName)
{
1117
    g_autofree char *path = NULL;
1118 1119 1120 1121 1122 1123 1124 1125 1126

    if (!(path = virPCIFile(dev->name, "driver_override")))
        return -1;

    if (virFileWriteStr(path, driverName, 0) < 0) {
        virReportSystemError(errno,
                             _("Failed to add driver '%s' to driver_override "
                               " interface of PCI device '%s'"),
                             driverName, dev->name);
1127
        return -1;
1128 1129
    }

1130
    if (virPCIDeviceRebind(dev) < 0)
1131
        return -1;
1132

1133
    return 0;
1134 1135 1136
}

static int
1137
virPCIDeviceUnbindFromStub(virPCIDevicePtr dev)
1138 1139 1140 1141 1142 1143 1144 1145
{
    if (!dev->unbind_from_stub) {
        VIR_DEBUG("Unbind from stub skipped for PCI device %s", dev->name);
        return 0;
    }

    return virPCIDeviceBindWithDriverOverride(dev, "\n");
}
1146 1147

static int
1148
virPCIDeviceBindToStub(virPCIDevicePtr dev)
1149 1150
{
    const char *stubDriverName;
1151 1152
    g_autofree char *stubDriverPath = NULL;
    g_autofree char *driverLink = NULL;
1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168

    /* Check the device is configured to use one of the known stub drivers */
    if (dev->stubDriver == VIR_PCI_STUB_DRIVER_NONE) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("No stub driver configured for PCI device %s"),
                       dev->name);
        return -1;
    } else if (!(stubDriverName = virPCIStubDriverTypeToString(dev->stubDriver))) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Unknown stub driver configured for PCI device %s"),
                       dev->name);
        return -1;
    }

    if (!(stubDriverPath = virPCIDriverDir(stubDriverName))  ||
        !(driverLink = virPCIFile(dev->name, "driver")))
1169
        return -1;
1170 1171 1172 1173 1174 1175

    if (virFileExists(driverLink)) {
        if (virFileLinkPointsTo(driverLink, stubDriverPath)) {
            /* The device is already bound to the correct driver */
            VIR_DEBUG("Device %s is already bound to %s",
                      dev->name, stubDriverName);
1176
            return 0;
1177 1178 1179 1180
        }
    }

    if (virPCIDeviceBindWithDriverOverride(dev, stubDriverName) < 0)
1181
        return -1;
1182 1183

    dev->unbind_from_stub = true;
1184
    return 0;
1185 1186
}

1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204
/* virPCIDeviceDetach:
 *
 * Detach this device from the host driver, attach it to the stub
 * driver (previously set with virPCIDeviceSetStubDriver(), and add *a
 * copy* of the object to the inactiveDevs list (if provided). This
 * function will *never* consume dev, so the caller should free it.
 *
 * Returns 0 on success, -1 on failure (will fail if the device is
 * already in the activeDevs list, but will be a NOP if the device is
 * already bound to the stub).
 *
 * GENERAL NOTE: activeDevs should be a list of all PCI devices
 * currently in use by a domain. inactiveDevs is a list of all PCI
 * devices that libvirt has detached from the host driver + attached
 * to the stub driver, but hasn't yet assigned to a domain. Any device
 * that is still attached to its host driver should not be on either
 * list.
 */
1205
int
1206 1207
virPCIDeviceDetach(virPCIDevicePtr dev,
                   virPCIDeviceList *activeDevs,
1208
                   virPCIDeviceList *inactiveDevs)
1209
{
1210
    if (virPCIProbeStubDriver(dev->stubDriver) < 0)
1211 1212
        return -1;

1213
    if (activeDevs && virPCIDeviceListFind(activeDevs, dev)) {
1214
        virReportError(VIR_ERR_INTERNAL_ERROR,
1215 1216 1217 1218
                       _("Not detaching active device %s"), dev->name);
        return -1;
    }

1219
    if (virPCIDeviceBindToStub(dev) < 0)
1220 1221
        return -1;

1222 1223 1224
    /* Add *a copy of* the dev into list inactiveDevs, if
     * it's not already there.
     */
1225 1226 1227 1228
    if (inactiveDevs && !virPCIDeviceListFind(inactiveDevs, dev)) {
        VIR_DEBUG("Adding PCI device %s to inactive list", dev->name);
        if (virPCIDeviceListAddCopy(inactiveDevs, dev) < 0)
            return -1;
1229 1230 1231
    }

    return 0;
1232 1233
}

1234 1235 1236 1237
/*
 * Pre-condition: inactivePCIHostdevs & activePCIHostdevs
 * are locked
 */
1238
int
1239 1240
virPCIDeviceReattach(virPCIDevicePtr dev,
                     virPCIDeviceListPtr activeDevs,
1241
                     virPCIDeviceListPtr inactiveDevs)
1242
{
1243
    if (activeDevs && virPCIDeviceListFind(activeDevs, dev)) {
1244
        virReportError(VIR_ERR_INTERNAL_ERROR,
1245 1246 1247 1248
                       _("Not reattaching active device %s"), dev->name);
        return -1;
    }

1249
    if (virPCIDeviceUnbindFromStub(dev) < 0)
1250 1251 1252
        return -1;

    /* Steal the dev from list inactiveDevs */
1253 1254
    if (inactiveDevs) {
        VIR_DEBUG("Removing PCI device %s from inactive list", dev->name);
1255
        virPCIDeviceListDel(inactiveDevs, dev);
1256
    }
1257 1258

    return 0;
1259 1260 1261
}

static char *
1262
virPCIDeviceReadID(virPCIDevicePtr dev, const char *id_name)
1263
{
1264
    g_autofree char *path = NULL;
1265 1266
    char *id_str;

1267
    if (!(path = virPCIFile(dev->name, id_name)))
1268
        return NULL;
1269 1270

    /* ID string is '0xNNNN\n' ... i.e. 7 bytes */
1271
    if (virFileReadAll(path, 7, &id_str) < 0)
1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285
        return NULL;

    /* Check for 0x suffix */
    if (id_str[0] != '0' || id_str[1] != 'x') {
        VIR_FREE(id_str);
        return NULL;
    }

    /* Chop off the newline; we know the string is 7 bytes */
    id_str[6] = '\0';

    return id_str;
}

1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330
bool
virPCIDeviceAddressIsValid(virPCIDeviceAddressPtr addr,
                           bool report)
{
    if (addr->bus > 0xFF) {
        if (report)
            virReportError(VIR_ERR_XML_ERROR,
                           _("Invalid PCI address bus='0x%x', "
                             "must be <= 0xFF"),
                           addr->bus);
        return false;
    }
    if (addr->slot > 0x1F) {
        if (report)
            virReportError(VIR_ERR_XML_ERROR,
                           _("Invalid PCI address slot='0x%x', "
                             "must be <= 0x1F"),
                           addr->slot);
        return false;
    }
    if (addr->function > 7) {
        if (report)
            virReportError(VIR_ERR_XML_ERROR,
                           _("Invalid PCI address function=0x%x, "
                             "must be <= 7"),
                           addr->function);
        return false;
    }
    if (virPCIDeviceAddressIsEmpty(addr)) {
        if (report)
            virReportError(VIR_ERR_XML_ERROR, "%s",
                           _("Invalid PCI address 0000:00:00, at least "
                             "one of domain, bus, or slot must be > 0"));
        return false;
    }
    return true;
}

bool
virPCIDeviceAddressIsEmpty(const virPCIDeviceAddress *addr)
{
    return !(addr->domain || addr->bus || addr->slot);
}

bool
1331 1332
virPCIDeviceAddressEqual(const virPCIDeviceAddress *addr1,
                         const virPCIDeviceAddress *addr2)
1333 1334 1335 1336 1337 1338 1339 1340 1341 1342
{
    if (addr1->domain == addr2->domain &&
        addr1->bus == addr2->bus &&
        addr1->slot == addr2->slot &&
        addr1->function == addr2->function) {
        return true;
    }
    return false;
}

1343
char *
1344
virPCIDeviceAddressAsString(const virPCIDeviceAddress *addr)
1345 1346 1347
{
    char *str;

1348 1349 1350 1351 1352
    str = g_strdup_printf(VIR_PCI_DEVICE_ADDRESS_FMT,
                          addr->domain,
                          addr->bus,
                          addr->slot,
                          addr->function);
1353 1354 1355
    return str;
}

1356
virPCIDevicePtr
1357 1358 1359 1360
virPCIDeviceNew(unsigned int domain,
                unsigned int bus,
                unsigned int slot,
                unsigned int function)
1361
{
J
Ján Tomko 已提交
1362
    g_autoptr(virPCIDevice) dev = NULL;
1363 1364
    g_autofree char *vendor = NULL;
    g_autofree char *product = NULL;
1365

1366
    if (VIR_ALLOC(dev) < 0)
1367 1368
        return NULL;

1369 1370 1371 1372
    dev->address.domain = domain;
    dev->address.bus = bus;
    dev->address.slot = slot;
    dev->address.function = function;
1373

1374 1375
    dev->name = g_strdup_printf(VIR_PCI_DEVICE_ADDRESS_FMT, domain, bus, slot,
                                function);
1376

1377
    dev->path = g_strdup_printf(PCI_SYSFS "devices/%s/config", dev->name);
1378

1379
    if (!virFileExists(dev->path)) {
1380 1381 1382
        virReportSystemError(errno,
                             _("Device %s not found: could not access %s"),
                             dev->name, dev->path);
1383
        return NULL;
1384 1385
    }

1386 1387
    vendor  = virPCIDeviceReadID(dev, "vendor");
    product = virPCIDeviceReadID(dev, "device");
1388 1389

    if (!vendor || !product) {
1390
        virReportError(VIR_ERR_INTERNAL_ERROR,
1391 1392
                       _("Failed to read product/vendor ID for %s"),
                       dev->name);
1393
        return NULL;
1394 1395 1396
    }

    /* strings contain '0x' prefix */
E
Eric Blake 已提交
1397 1398
    if (snprintf(dev->id, sizeof(dev->id), "%s %s", &vendor[2],
                 &product[2]) >= sizeof(dev->id)) {
1399
        virReportError(VIR_ERR_INTERNAL_ERROR,
E
Eric Blake 已提交
1400 1401
                       _("dev->id buffer overflow: %s %s"),
                       &vendor[2], &product[2]);
1402
        return NULL;
E
Eric Blake 已提交
1403
    }
1404 1405 1406

    VIR_DEBUG("%s %s: initialized", dev->id, dev->name);

J
Ján Tomko 已提交
1407
    return g_steal_pointer(&dev);
1408 1409
}

L
Laine Stump 已提交
1410 1411 1412 1413 1414 1415

virPCIDevicePtr
virPCIDeviceCopy(virPCIDevicePtr dev)
{
    virPCIDevicePtr copy;

1416
    if (VIR_ALLOC(copy) < 0)
L
Laine Stump 已提交
1417 1418 1419 1420
        return NULL;

    /* shallow copy to take care of most attributes */
    *copy = *dev;
1421
    copy->path = NULL;
C
Chunyan Liu 已提交
1422
    copy->used_by_drvname = copy->used_by_domname = NULL;
1423 1424 1425 1426
    copy->name = g_strdup(dev->name);
    copy->path = g_strdup(dev->path);
    copy->used_by_drvname = g_strdup(dev->used_by_drvname);
    copy->used_by_domname = g_strdup(dev->used_by_domname);
L
Laine Stump 已提交
1427 1428 1429 1430
    return copy;
}


1431
void
1432
virPCIDeviceFree(virPCIDevicePtr dev)
1433
{
1434 1435
    if (!dev)
        return;
1436
    VIR_DEBUG("%s %s: freeing", dev->id, dev->name);
1437
    VIR_FREE(dev->name);
E
Eric Blake 已提交
1438
    VIR_FREE(dev->path);
C
Chunyan Liu 已提交
1439 1440
    VIR_FREE(dev->used_by_drvname);
    VIR_FREE(dev->used_by_domname);
1441 1442
    VIR_FREE(dev);
}
1443

1444 1445 1446 1447 1448
/**
 * virPCIDeviceGetAddress:
 * @dev: device to get address from
 *
 * Take a PCI device on input and return its PCI address. The
1449
 * returned object is owned by the device and must not be freed.
1450
 *
1451
 * Returns: a pointer to the address, which can never be NULL.
1452 1453 1454 1455
 */
virPCIDeviceAddressPtr
virPCIDeviceGetAddress(virPCIDevicePtr dev)
{
1456
    return &(dev->address);
1457 1458
}

1459
const char *
1460
virPCIDeviceGetName(virPCIDevicePtr dev)
1461 1462 1463 1464
{
    return dev->name;
}

1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476
/**
 * virPCIDeviceGetConfigPath:
 *
 * Returns a pointer to a string containing the path of @dev's PCI
 * config file.
 */
const char *
virPCIDeviceGetConfigPath(virPCIDevicePtr dev)
{
    return dev->path;
}

1477
void virPCIDeviceSetManaged(virPCIDevicePtr dev, bool managed)
1478
{
1479
    dev->managed = managed;
1480 1481
}

1482
bool
1483
virPCIDeviceGetManaged(virPCIDevicePtr dev)
1484 1485 1486 1487
{
    return dev->managed;
}

1488 1489
void
virPCIDeviceSetStubDriver(virPCIDevicePtr dev, virPCIStubDriver driver)
1490
{
1491
    dev->stubDriver = driver;
1492 1493
}

1494
virPCIStubDriver
1495 1496 1497 1498 1499
virPCIDeviceGetStubDriver(virPCIDevicePtr dev)
{
    return dev->stubDriver;
}

1500
bool
1501
virPCIDeviceGetUnbindFromStub(virPCIDevicePtr dev)
1502 1503 1504 1505 1506
{
    return dev->unbind_from_stub;
}

void
1507
virPCIDeviceSetUnbindFromStub(virPCIDevicePtr dev, bool unbind)
1508
{
1509
    dev->unbind_from_stub = unbind;
1510 1511
}

1512
bool
1513
virPCIDeviceGetRemoveSlot(virPCIDevicePtr dev)
1514 1515 1516 1517 1518
{
    return dev->remove_slot;
}

void
1519
virPCIDeviceSetRemoveSlot(virPCIDevicePtr dev, bool remove_slot)
1520
{
1521
    dev->remove_slot = remove_slot;
1522 1523
}

1524
bool
1525
virPCIDeviceGetReprobe(virPCIDevicePtr dev)
1526 1527 1528 1529 1530
{
    return dev->reprobe;
}

void
1531
virPCIDeviceSetReprobe(virPCIDevicePtr dev, bool reprobe)
1532
{
1533
    dev->reprobe = reprobe;
1534 1535
}

C
Chunyan Liu 已提交
1536 1537 1538 1539
int
virPCIDeviceSetUsedBy(virPCIDevicePtr dev,
                      const char *drv_name,
                      const char *dom_name)
1540
{
C
Chunyan Liu 已提交
1541 1542
    VIR_FREE(dev->used_by_drvname);
    VIR_FREE(dev->used_by_domname);
1543 1544
    dev->used_by_drvname = g_strdup(drv_name);
    dev->used_by_domname = g_strdup(dom_name);
C
Chunyan Liu 已提交
1545 1546

    return 0;
1547 1548
}

C
Chunyan Liu 已提交
1549 1550 1551 1552
void
virPCIDeviceGetUsedBy(virPCIDevicePtr dev,
                      const char **drv_name,
                      const char **dom_name)
1553
{
C
Chunyan Liu 已提交
1554 1555
    *drv_name = dev->used_by_drvname;
    *dom_name = dev->used_by_domname;
1556 1557
}

1558 1559
virPCIDeviceListPtr
virPCIDeviceListNew(void)
1560
{
1561
    virPCIDeviceListPtr list;
1562

1563 1564 1565 1566
    if (virPCIInitialize() < 0)
        return NULL;

    if (!(list = virObjectLockableNew(virPCIDeviceListClass)))
1567 1568 1569 1570 1571
        return NULL;

    return list;
}

1572 1573
static void
virPCIDeviceListDispose(void *obj)
1574
{
1575
    virPCIDeviceListPtr list = obj;
1576
    size_t i;
1577 1578

    for (i = 0; i < list->count; i++) {
1579
        virPCIDeviceFree(list->devs[i]);
1580 1581 1582 1583 1584 1585 1586 1587
        list->devs[i] = NULL;
    }

    list->count = 0;
    VIR_FREE(list->devs);
}

int
1588 1589
virPCIDeviceListAdd(virPCIDeviceListPtr list,
                    virPCIDevicePtr dev)
1590
{
1591
    if (virPCIDeviceListFind(list, dev)) {
1592
        virReportError(VIR_ERR_INTERNAL_ERROR,
1593 1594 1595
                       _("Device %s is already in use"), dev->name);
        return -1;
    }
1596
    return VIR_APPEND_ELEMENT(list->devs, list->count, dev);
1597 1598
}

L
Laine Stump 已提交
1599 1600 1601 1602 1603

/* virPCIDeviceListAddCopy - add a *copy* of the device to this list */
int
virPCIDeviceListAddCopy(virPCIDeviceListPtr list, virPCIDevicePtr dev)
{
J
Ján Tomko 已提交
1604
    g_autoptr(virPCIDevice) copy = virPCIDeviceCopy(dev);
L
Laine Stump 已提交
1605 1606 1607

    if (!copy)
        return -1;
1608
    if (virPCIDeviceListAdd(list, copy) < 0)
L
Laine Stump 已提交
1609
        return -1;
1610 1611

    copy = NULL;
L
Laine Stump 已提交
1612 1613 1614 1615
    return 0;
}


1616 1617 1618
virPCIDevicePtr
virPCIDeviceListGet(virPCIDeviceListPtr list,
                    int idx)
1619 1620 1621 1622 1623 1624 1625 1626 1627
{
    if (idx >= list->count)
        return NULL;
    if (idx < 0)
        return NULL;

    return list->devs[idx];
}

1628
size_t
1629
virPCIDeviceListCount(virPCIDeviceListPtr list)
1630
{
1631 1632 1633
    return list->count;
}

1634 1635 1636
virPCIDevicePtr
virPCIDeviceListStealIndex(virPCIDeviceListPtr list,
                           int idx)
1637
{
1638
    virPCIDevicePtr ret;
1639

1640 1641
    if (idx < 0 || idx >= list->count)
        return NULL;
1642

1643
    ret = list->devs[idx];
1644
    VIR_DELETE_ELEMENT(list->devs, idx, list->count);
1645 1646 1647
    return ret;
}

1648 1649 1650
virPCIDevicePtr
virPCIDeviceListSteal(virPCIDeviceListPtr list,
                      virPCIDevicePtr dev)
1651
{
1652
    return virPCIDeviceListStealIndex(list, virPCIDeviceListFindIndex(list, dev));
1653 1654
}

1655
void
1656 1657
virPCIDeviceListDel(virPCIDeviceListPtr list,
                    virPCIDevicePtr dev)
1658
{
1659
    virPCIDeviceFree(virPCIDeviceListSteal(list, dev));
1660 1661
}

1662
int
1663
virPCIDeviceListFindIndex(virPCIDeviceListPtr list, virPCIDevicePtr dev)
1664
{
1665
    size_t i;
1666

1667 1668 1669 1670 1671 1672
    for (i = 0; i < list->count; i++) {
        virPCIDevicePtr other = list->devs[i];
        if (other->address.domain   == dev->address.domain &&
            other->address.bus      == dev->address.bus    &&
            other->address.slot     == dev->address.slot   &&
            other->address.function == dev->address.function)
1673
            return i;
1674
    }
1675 1676 1677
    return -1;
}

L
Laine Stump 已提交
1678 1679 1680 1681 1682 1683 1684 1685

virPCIDevicePtr
virPCIDeviceListFindByIDs(virPCIDeviceListPtr list,
                          unsigned int domain,
                          unsigned int bus,
                          unsigned int slot,
                          unsigned int function)
{
1686
    size_t i;
L
Laine Stump 已提交
1687 1688

    for (i = 0; i < list->count; i++) {
1689 1690 1691 1692 1693
        virPCIDevicePtr other = list->devs[i];
        if (other->address.domain   == domain &&
            other->address.bus      == bus    &&
            other->address.slot     == slot   &&
            other->address.function == function)
L
Laine Stump 已提交
1694 1695 1696 1697 1698 1699
            return list->devs[i];
    }
    return NULL;
}


1700 1701
virPCIDevicePtr
virPCIDeviceListFind(virPCIDeviceListPtr list, virPCIDevicePtr dev)
1702
{
1703
    int idx;
1704

1705 1706
    if ((idx = virPCIDeviceListFindIndex(list, dev)) >= 0)
        return list->devs[idx];
1707 1708
    else
        return NULL;
1709
}
1710 1711


1712 1713 1714
int virPCIDeviceFileIterate(virPCIDevicePtr dev,
                            virPCIDeviceFileActor actor,
                            void *opaque)
1715
{
1716
    g_autofree char *pcidir = NULL;
1717 1718 1719
    DIR *dir = NULL;
    int ret = -1;
    struct dirent *ent;
E
Eric Blake 已提交
1720
    int direrr;
1721

1722 1723 1724
    pcidir = g_strdup_printf("/sys/bus/pci/devices/" VIR_PCI_DEVICE_ADDRESS_FMT,
                             dev->address.domain, dev->address.bus, dev->address.slot,
                             dev->address.function);
1725

J
Ján Tomko 已提交
1726
    if (virDirOpen(&dir, pcidir) < 0)
1727 1728
        goto cleanup;

E
Eric Blake 已提交
1729
    while ((direrr = virDirRead(dir, &ent, pcidir)) > 0) {
1730
        g_autofree char *file = NULL;
1731
        /* Device assignment requires:
A
Alex Williamson 已提交
1732
         *   $PCIDIR/config, $PCIDIR/resource, $PCIDIR/resourceNNN,
1733
         *   $PCIDIR/rom, $PCIDIR/reset, $PCIDIR/vendor, $PCIDIR/device
1734 1735 1736
         */
        if (STREQ(ent->d_name, "config") ||
            STRPREFIX(ent->d_name, "resource") ||
A
Alex Williamson 已提交
1737
            STREQ(ent->d_name, "rom") ||
1738 1739
            STREQ(ent->d_name, "vendor") ||
            STREQ(ent->d_name, "device") ||
A
Alex Williamson 已提交
1740
            STREQ(ent->d_name, "reset")) {
1741
            file = g_strdup_printf("%s/%s", pcidir, ent->d_name);
1742
            if ((actor)(dev, file, opaque) < 0)
1743 1744 1745
                goto cleanup;
        }
    }
E
Eric Blake 已提交
1746 1747
    if (direrr < 0)
        goto cleanup;
1748 1749 1750

    ret = 0;

1751
 cleanup:
J
Ján Tomko 已提交
1752
    VIR_DIR_CLOSE(dir);
1753 1754
    return ret;
}
J
Jiri Denemark 已提交
1755

L
Laine Stump 已提交
1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766

/* virPCIDeviceAddressIOMMUGroupIterate:
 *   Call @actor for all devices in the same iommu_group as orig
 *   (including orig itself) Even if there is no iommu_group for the
 *   device, call @actor once for orig.
 */
int
virPCIDeviceAddressIOMMUGroupIterate(virPCIDeviceAddressPtr orig,
                                     virPCIDeviceAddressActor actor,
                                     void *opaque)
{
1767
    g_autofree char *groupPath = NULL;
L
Laine Stump 已提交
1768 1769 1770
    DIR *groupDir = NULL;
    int ret = -1;
    struct dirent *ent;
E
Eric Blake 已提交
1771
    int direrr;
L
Laine Stump 已提交
1772

1773 1774
    groupPath = g_strdup_printf(PCI_SYSFS "devices/" VIR_PCI_DEVICE_ADDRESS_FMT "/iommu_group/devices",
                                orig->domain, orig->bus, orig->slot, orig->function);
L
Laine Stump 已提交
1775

J
Ján Tomko 已提交
1776
    if (virDirOpenQuiet(&groupDir, groupPath) < 0) {
L
Laine Stump 已提交
1777 1778 1779 1780 1781
        /* just process the original device, nothing more */
        ret = (actor)(orig, opaque);
        goto cleanup;
    }

E
Eric Blake 已提交
1782
    while ((direrr = virDirRead(groupDir, &ent, groupPath)) > 0) {
L
Laine Stump 已提交
1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794
        virPCIDeviceAddress newDev;

        if (virPCIDeviceAddressParse(ent->d_name, &newDev) < 0) {
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("Found invalid device link '%s' in '%s'"),
                           ent->d_name, groupPath);
            goto cleanup;
        }

        if ((actor)(&newDev, opaque) < 0)
            goto cleanup;
    }
E
Eric Blake 已提交
1795
    if (direrr < 0)
L
Laine Stump 已提交
1796 1797 1798 1799
        goto cleanup;

    ret = 0;

1800
 cleanup:
J
Ján Tomko 已提交
1801
    VIR_DIR_CLOSE(groupDir);
L
Laine Stump 已提交
1802 1803 1804 1805 1806 1807 1808 1809
    return ret;
}


static int
virPCIDeviceGetIOMMUGroupAddOne(virPCIDeviceAddressPtr newDevAddr, void *opaque)
{
    virPCIDeviceListPtr groupList = opaque;
J
Ján Tomko 已提交
1810
    g_autoptr(virPCIDevice) newDev = NULL;
L
Laine Stump 已提交
1811 1812 1813

    if (!(newDev = virPCIDeviceNew(newDevAddr->domain, newDevAddr->bus,
                                   newDevAddr->slot, newDevAddr->function)))
1814
        return -1;
L
Laine Stump 已提交
1815 1816

    if (virPCIDeviceListAdd(groupList, newDev) < 0)
1817
        return -1;
L
Laine Stump 已提交
1818 1819

    newDev = NULL; /* it's now on the list */
1820
    return 0;
L
Laine Stump 已提交
1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837
}


/*
 * virPCIDeviceGetIOMMUGroupList - return a virPCIDeviceList containing
 * all of the devices in the same iommu_group as @dev.
 *
 * Return the new list, or NULL on failure
 */
virPCIDeviceListPtr
virPCIDeviceGetIOMMUGroupList(virPCIDevicePtr dev)
{
    virPCIDeviceListPtr groupList = virPCIDeviceListNew();

    if (!groupList)
        goto error;

1838
    if (virPCIDeviceAddressIOMMUGroupIterate(&(dev->address),
L
Laine Stump 已提交
1839 1840 1841 1842 1843 1844
                                             virPCIDeviceGetIOMMUGroupAddOne,
                                             groupList) < 0)
        goto error;

    return groupList;

1845
 error:
L
Laine Stump 已提交
1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869
    virObjectUnref(groupList);
    return NULL;
}


typedef struct {
    virPCIDeviceAddressPtr **iommuGroupDevices;
    size_t *nIommuGroupDevices;
} virPCIDeviceAddressList;
typedef virPCIDeviceAddressList *virPCIDeviceAddressListPtr;

static int
virPCIGetIOMMUGroupAddressesAddOne(virPCIDeviceAddressPtr newDevAddr, void *opaque)
{
    int ret = -1;
    virPCIDeviceAddressListPtr addrList = opaque;
    virPCIDeviceAddressPtr copyAddr;

    /* make a copy to insert onto the list */
    if (VIR_ALLOC(copyAddr) < 0)
        goto cleanup;

    *copyAddr = *newDevAddr;

1870 1871
    if (VIR_APPEND_ELEMENT(*addrList->iommuGroupDevices,
                           *addrList->nIommuGroupDevices, copyAddr) < 0)
L
Laine Stump 已提交
1872 1873 1874
        goto cleanup;

    ret = 0;
1875
 cleanup:
L
Laine Stump 已提交
1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902
    VIR_FREE(copyAddr);
    return ret;
}


/*
 * virPCIDeviceAddressGetIOMMUGroupAddresses - return a
 * virPCIDeviceList containing all of the devices in the same
 * iommu_group as @dev.
 *
 * Return the new list, or NULL on failure
 */
int
virPCIDeviceAddressGetIOMMUGroupAddresses(virPCIDeviceAddressPtr devAddr,
                                          virPCIDeviceAddressPtr **iommuGroupDevices,
                                          size_t *nIommuGroupDevices)
{
    int ret = -1;
    virPCIDeviceAddressList addrList = { iommuGroupDevices,
                                         nIommuGroupDevices };

    if (virPCIDeviceAddressIOMMUGroupIterate(devAddr,
                                             virPCIGetIOMMUGroupAddressesAddOne,
                                             &addrList) < 0)
        goto cleanup;

    ret = 0;
1903
 cleanup:
L
Laine Stump 已提交
1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914
    return ret;
}


/* virPCIDeviceAddressGetIOMMUGroupNum - return the group number of
 * this PCI device's iommu_group, or -2 if there is no iommu_group for
 * the device (or -1 if there was any other error)
 */
int
virPCIDeviceAddressGetIOMMUGroupNum(virPCIDeviceAddressPtr addr)
{
1915 1916 1917
    g_autofree char *devName = NULL;
    g_autofree char *devPath = NULL;
    g_autofree char *groupPath = NULL;
L
Laine Stump 已提交
1918 1919 1920
    const char *groupNumStr;
    unsigned int groupNum;

1921 1922
    devName = g_strdup_printf(VIR_PCI_DEVICE_ADDRESS_FMT, addr->domain, addr->bus,
                              addr->slot, addr->function);
L
Laine Stump 已提交
1923

1924
    if (!(devPath = virPCIFile(devName, "iommu_group")))
1925 1926 1927
        return -1;
    if (virFileIsLink(devPath) != 1)
        return -2;
L
Laine Stump 已提交
1928 1929 1930 1931
    if (virFileResolveLink(devPath, &groupPath) < 0) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Unable to resolve device %s iommu_group symlink %s"),
                       devName, devPath);
1932
        return -1;
L
Laine Stump 已提交
1933 1934 1935 1936 1937 1938 1939 1940
    }

    groupNumStr = last_component(groupPath);
    if (virStrToLong_ui(groupNumStr, NULL, 10, &groupNum) < 0) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("device %s iommu_group symlink %s has "
                         "invalid group number %s"),
                       devName, groupPath, groupNumStr);
1941
        return -1;
L
Laine Stump 已提交
1942 1943
    }

1944
    return groupNum;
L
Laine Stump 已提交
1945 1946 1947
}


1948 1949
/* virPCIDeviceGetIOMMUGroupDev - return the name of the device used
 * to control this PCI device's group (e.g. "/dev/vfio/15")
1950 1951
 */
char *
1952
virPCIDeviceGetIOMMUGroupDev(virPCIDevicePtr dev)
1953
{
1954 1955
    g_autofree char *devPath = NULL;
    g_autofree char *groupPath = NULL;
1956 1957
    char *groupDev = NULL;

1958
    if (!(devPath = virPCIFile(dev->name, "iommu_group")))
1959
        return NULL;
1960 1961 1962 1963
    if (virFileIsLink(devPath) != 1) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Invalid device %s iommu_group file %s is not a symlink"),
                       dev->name, devPath);
1964
        return NULL;
1965 1966 1967 1968 1969
    }
    if (virFileResolveLink(devPath, &groupPath) < 0) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Unable to resolve device %s iommu_group symlink %s"),
                       dev->name, devPath);
1970
        return NULL;
1971
    }
1972
    groupDev = g_strdup_printf("/dev/vfio/%s", last_component(groupPath));
1973

1974 1975 1976
    return groupDev;
}

J
Jiri Denemark 已提交
1977
static int
1978
virPCIDeviceDownstreamLacksACS(virPCIDevicePtr dev)
J
Jiri Denemark 已提交
1979 1980 1981 1982
{
    uint16_t flags;
    uint16_t ctrl;
    unsigned int pos;
1983 1984
    int fd;
    int ret = 0;
1985
    uint16_t device_class;
J
Jiri Denemark 已提交
1986

1987
    if ((fd = virPCIDeviceConfigOpen(dev)) < 0)
J
Jiri Denemark 已提交
1988 1989
        return -1;

1990
    if (virPCIDeviceInit(dev, fd) < 0) {
1991 1992 1993 1994
        ret = -1;
        goto cleanup;
    }

1995 1996 1997
    if (virPCIDeviceReadClass(dev, &device_class) < 0)
        goto cleanup;

J
Jiri Denemark 已提交
1998
    pos = dev->pcie_cap_pos;
1999
    if (!pos || device_class != PCI_CLASS_BRIDGE_PCI)
2000
        goto cleanup;
J
Jiri Denemark 已提交
2001

2002
    flags = virPCIDeviceRead16(dev, fd, pos + PCI_EXP_FLAGS);
J
Jiri Denemark 已提交
2003
    if (((flags & PCI_EXP_FLAGS_TYPE) >> 4) != PCI_EXP_TYPE_DOWNSTREAM)
2004
        goto cleanup;
J
Jiri Denemark 已提交
2005

2006
    pos = virPCIDeviceFindExtendedCapabilityOffset(dev, fd, PCI_EXT_CAP_ID_ACS);
J
Jiri Denemark 已提交
2007 2008
    if (!pos) {
        VIR_DEBUG("%s %s: downstream port lacks ACS", dev->id, dev->name);
2009 2010
        ret = 1;
        goto cleanup;
J
Jiri Denemark 已提交
2011 2012
    }

2013
    ctrl = virPCIDeviceRead16(dev, fd, pos + PCI_EXT_ACS_CTRL);
J
Jiri Denemark 已提交
2014 2015 2016
    if ((ctrl & PCI_EXT_CAP_ACS_ENABLED) != PCI_EXT_CAP_ACS_ENABLED) {
        VIR_DEBUG("%s %s: downstream port has ACS disabled",
                  dev->id, dev->name);
2017 2018
        ret = 1;
        goto cleanup;
J
Jiri Denemark 已提交
2019 2020
    }

2021
 cleanup:
2022
    virPCIDeviceConfigClose(dev, fd);
2023
    return ret;
J
Jiri Denemark 已提交
2024 2025 2026
}

static int
2027
virPCIDeviceIsBehindSwitchLackingACS(virPCIDevicePtr dev)
J
Jiri Denemark 已提交
2028
{
J
Ján Tomko 已提交
2029
    g_autoptr(virPCIDevice) parent = NULL;
J
Jiri Denemark 已提交
2030

2031
    if (virPCIDeviceGetParent(dev, &parent) < 0)
2032
        return -1;
2033 2034 2035 2036 2037
    if (!parent) {
        /* if we have no parent, and this is the root bus, ACS doesn't come
         * into play since devices on the root bus can't P2P without going
         * through the root IOMMU.
         */
2038
        if (dev->address.bus == 0) {
2039
            return 0;
2040
        } else {
2041
            virReportError(VIR_ERR_INTERNAL_ERROR,
2042 2043 2044 2045
                           _("Failed to find parent device for %s"),
                           dev->name);
            return -1;
        }
J
Jiri Denemark 已提交
2046 2047 2048 2049 2050 2051 2052
    }

    /* XXX we should rather fail when we can't find device's parent and
     * stop the loop when we get to root instead of just stopping when no
     * parent can be found
     */
    do {
J
Ján Tomko 已提交
2053
        g_autoptr(virPCIDevice) tmp = NULL;
J
Jiri Denemark 已提交
2054
        int acs;
2055
        int ret;
J
Jiri Denemark 已提交
2056

2057
        acs = virPCIDeviceDownstreamLacksACS(parent);
J
Jiri Denemark 已提交
2058 2059 2060 2061 2062 2063 2064 2065 2066

        if (acs) {
            if (acs < 0)
                return -1;
            else
                return 1;
        }

        tmp = parent;
2067
        ret = virPCIDeviceGetParent(parent, &parent);
2068 2069
        if (ret < 0)
            return -1;
J
Jiri Denemark 已提交
2070 2071 2072 2073 2074
    } while (parent);

    return 0;
}

2075 2076
int virPCIDeviceIsAssignable(virPCIDevicePtr dev,
                             int strict_acs_check)
J
Jiri Denemark 已提交
2077 2078 2079 2080 2081 2082 2083 2084
{
    int ret;

    /* XXX This could be a great place to actually check that a non-managed
     * device isn't in use, e.g. by checking that device is either un-bound
     * or bound to a stub driver.
     */

2085
    ret = virPCIDeviceIsBehindSwitchLackingACS(dev);
J
Jiri Denemark 已提交
2086 2087 2088 2089 2090 2091 2092 2093
    if (ret < 0)
        return 0;

    if (ret) {
        if (!strict_acs_check) {
            VIR_DEBUG("%s %s: strict ACS check disabled; device assignment allowed",
                      dev->id, dev->name);
        } else {
2094
            virReportError(VIR_ERR_INTERNAL_ERROR,
J
Jiri Denemark 已提交
2095 2096 2097 2098 2099 2100 2101 2102 2103
                           _("Device %s is behind a switch lacking ACS and "
                             "cannot be assigned"),
                           dev->name);
            return 0;
        }
    }

    return 1;
}
2104 2105 2106 2107 2108 2109 2110 2111 2112 2113

static int
logStrToLong_ui(char const *s,
                char **end_ptr,
                int base,
                unsigned int *result)
{
    int ret = 0;

    ret = virStrToLong_ui(s, end_ptr, base, result);
2114
    if (ret != 0)
2115 2116 2117 2118
        VIR_ERROR(_("Failed to convert '%s' to unsigned int"), s);
    return ret;
}

2119 2120
int
virPCIDeviceAddressParse(char *address,
2121
                         virPCIDeviceAddressPtr bdf)
2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147
{
    char *p = NULL;
    int ret = -1;

    if ((address == NULL) || (logStrToLong_ui(address, &p, 16,
                                              &bdf->domain) == -1)) {
        goto out;
    }

    if ((p == NULL) || (logStrToLong_ui(p+1, &p, 16,
                                        &bdf->bus) == -1)) {
        goto out;
    }

    if ((p == NULL) || (logStrToLong_ui(p+1, &p, 16,
                                        &bdf->slot) == -1)) {
        goto out;
    }

    if ((p == NULL) || (logStrToLong_ui(p+1, &p, 16,
                                        &bdf->function) == -1)) {
        goto out;
    }

    ret = 0;

2148
 out:
2149 2150 2151
    return ret;
}

2152

2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177
bool
virZPCIDeviceAddressIsValid(virZPCIDeviceAddressPtr zpci)
{
    /* We don't need to check fid because fid covers
     * all range of uint32 type.
     */
    if (zpci->uid > VIR_DOMAIN_DEVICE_ZPCI_MAX_UID ||
        zpci->uid == 0) {
        virReportError(VIR_ERR_XML_ERROR,
                       _("Invalid PCI address uid='0x%.4x', "
                         "must be > 0x0000 and <= 0x%.4x"),
                       zpci->uid,
                       VIR_DOMAIN_DEVICE_ZPCI_MAX_UID);
        return false;
    }

    return true;
}

bool
virZPCIDeviceAddressIsEmpty(const virZPCIDeviceAddress *addr)
{
    return !(addr->uid || addr->fid);
}

2178
#ifdef __linux__
2179

2180
virPCIDeviceAddressPtr
2181
virPCIGetDeviceAddressFromSysfsLink(const char *device_link)
2182
{
2183
    virPCIDeviceAddressPtr bdf = NULL;
2184
    char *config_address = NULL;
2185
    g_autofree char *device_path = NULL;
2186 2187

    if (!virFileExists(device_link)) {
2188
        VIR_DEBUG("'%s' does not exist", device_link);
2189
        return NULL;
2190 2191
    }

2192
    device_path = virFileCanonicalizePath(device_link);
2193
    if (device_path == NULL) {
2194 2195 2196
        virReportSystemError(errno,
                             _("Failed to resolve device link '%s'"),
                             device_link);
2197
        return NULL;
2198 2199
    }

2200
    config_address = last_component(device_path);
2201
    if (VIR_ALLOC(bdf) < 0)
2202
        return NULL;
2203

2204
    if (virPCIDeviceAddressParse(config_address, bdf) < 0) {
2205
        virReportError(VIR_ERR_INTERNAL_ERROR,
2206 2207
                       _("Failed to parse PCI config address '%s'"),
                       config_address);
2208
        VIR_FREE(bdf);
2209
        return NULL;
2210 2211
    }

2212
    return bdf;
2213 2214
}

2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227
/**
 * virPCIGetPhysicalFunction:
 * @vf_sysfs_path: sysfs path for the virtual function
 * @pf: where to store the physical function's address
 *
 * Given @vf_sysfs_path, this function will store the pointer
 * to a newly-allocated virPCIDeviceAddress in @pf.
 *
 * @pf might be NULL if @vf_sysfs_path does not point to a
 * virtual function. If it's not NULL, then it should be
 * freed by the caller when no longer needed.
 *
 * Returns: >=0 on success, <0 on failure
2228 2229
 */
int
2230
virPCIGetPhysicalFunction(const char *vf_sysfs_path,
2231
                          virPCIDeviceAddressPtr *pf)
2232
{
2233
    g_autofree char *device_link = NULL;
2234

2235 2236
    *pf = NULL;

2237 2238
    if (virBuildPath(&device_link, vf_sysfs_path, "physfn") == -1) {
        virReportOOMError();
2239
        return -1;
2240 2241
    }

2242
    if ((*pf = virPCIGetDeviceAddressFromSysfsLink(device_link))) {
2243 2244
        VIR_DEBUG("PF for VF device '%s': " VIR_PCI_DEVICE_ADDRESS_FMT,
                  vf_sysfs_path,
2245 2246
                  (*pf)->domain, (*pf)->bus, (*pf)->slot, (*pf)->function);
    }
2247

2248
    return 0;
2249 2250
}

2251

2252 2253 2254 2255
/*
 * Returns virtual functions of a physical function
 */
int
2256 2257
virPCIGetVirtualFunctions(const char *sysfs_path,
                          virPCIDeviceAddressPtr **virtual_functions,
2258 2259
                          size_t *num_virtual_functions,
                          unsigned int *max_virtual_functions)
2260 2261
{
    int ret = -1;
2262
    size_t i;
2263 2264
    g_autofree char *totalvfs_file = NULL;
    g_autofree char *totalvfs_str = NULL;
2265
    virPCIDeviceAddressPtr config_addr = NULL;
2266

2267 2268
    *virtual_functions = NULL;
    *num_virtual_functions = 0;
2269 2270
    *max_virtual_functions = 0;

2271
    totalvfs_file = g_strdup_printf("%s/sriov_totalvfs", sysfs_path);
2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283
    if (virFileExists(totalvfs_file)) {
        char *end = NULL; /* so that terminating \n doesn't create error */

        if (virFileReadAll(totalvfs_file, 16, &totalvfs_str) < 0)
            goto error;
        if (virStrToLong_ui(totalvfs_str, &end, 10, max_virtual_functions) < 0) {
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("Unrecognized value in %s: %s"),
                           totalvfs_file, totalvfs_str);
            goto error;
        }
    }
2284

2285
    do {
2286
        g_autofree char *device_link = NULL;
2287
        /* look for virtfn%d links until one isn't found */
2288 2289
        device_link = g_strdup_printf("%s/virtfn%zu", sysfs_path,
                                      *num_virtual_functions);
2290

2291 2292
        if (!virFileExists(device_link))
            break;
2293

2294
        if (!(config_addr = virPCIGetDeviceAddressFromSysfsLink(device_link))) {
2295 2296 2297 2298 2299
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("Failed to get SRIOV function from device link '%s'"),
                           device_link);
            goto error;
        }
2300

2301 2302
        if (VIR_APPEND_ELEMENT(*virtual_functions, *num_virtual_functions,
                               config_addr) < 0)
2303 2304
            goto error;
    } while (1);
2305

2306 2307
    VIR_DEBUG("Found %zu virtual functions for %s",
              *num_virtual_functions, sysfs_path);
2308
    ret = 0;
2309
 cleanup:
2310
    VIR_FREE(config_addr);
2311
    return ret;
2312

2313
 error:
2314 2315 2316
    for (i = 0; i < *num_virtual_functions; i++)
        VIR_FREE((*virtual_functions)[i]);
    VIR_FREE(*virtual_functions);
2317
    *num_virtual_functions = 0;
2318
    goto cleanup;
2319
}
2320

2321

2322 2323 2324 2325
/*
 * Returns 1 if vf device is a virtual function, 0 if not, -1 on error
 */
int
2326
virPCIIsVirtualFunction(const char *vf_sysfs_device_link)
2327
{
2328
    g_autofree char *vf_sysfs_physfn_link = NULL;
2329

2330
    vf_sysfs_physfn_link = g_strdup_printf("%s/physfn", vf_sysfs_device_link);
2331

2332
    return virFileExists(vf_sysfs_physfn_link);
2333 2334 2335 2336 2337 2338
}

/*
 * Returns the sriov virtual function index of vf given its pf
 */
int
2339 2340 2341
virPCIGetVirtualFunctionIndex(const char *pf_sysfs_device_link,
                              const char *vf_sysfs_device_link,
                              int *vf_index)
2342
{
2343 2344
    int ret = -1;
    size_t i;
2345
    size_t num_virt_fns = 0;
2346
    unsigned int max_virt_fns = 0;
2347 2348
    virPCIDeviceAddressPtr vf_bdf = NULL;
    virPCIDeviceAddressPtr *virt_fns = NULL;
2349

2350
    if (!(vf_bdf = virPCIGetDeviceAddressFromSysfsLink(vf_sysfs_device_link)))
2351 2352
        return ret;

2353
    if (virPCIGetVirtualFunctions(pf_sysfs_device_link, &virt_fns,
2354
                                  &num_virt_fns, &max_virt_fns) < 0) {
2355
        virReportError(VIR_ERR_INTERNAL_ERROR,
2356
                       _("Error getting physical function's '%s' "
2357
                         "virtual_functions"), pf_sysfs_device_link);
2358 2359 2360 2361
        goto out;
    }

    for (i = 0; i < num_virt_fns; i++) {
2362
        if (virPCIDeviceAddressEqual(vf_bdf, virt_fns[i])) {
2363 2364 2365 2366
            *vf_index = i;
            ret = 0;
            break;
        }
2367 2368
    }

2369
 out:
2370 2371 2372

    /* free virtual functions */
    for (i = 0; i < num_virt_fns; i++)
2373
        VIR_FREE(virt_fns[i]);
2374

A
ajia@redhat.com 已提交
2375
    VIR_FREE(virt_fns);
2376 2377 2378 2379 2380
    VIR_FREE(vf_bdf);

    return ret;
}

2381 2382 2383 2384 2385
/*
 * Returns a path to the PCI sysfs file given the BDF of the PCI function
 */

int
2386
virPCIGetSysfsFile(char *virPCIDeviceName, char **pci_sysfs_device_link)
2387
{
2388 2389
    *pci_sysfs_device_link = g_strdup_printf(PCI_SYSFS "devices/%s",
                                             virPCIDeviceName);
2390
    return 0;
2391 2392
}

R
Roopa Prabhu 已提交
2393
int
2394
virPCIDeviceAddressGetSysfsFile(virPCIDeviceAddressPtr addr,
2395
                                char **pci_sysfs_device_link)
R
Roopa Prabhu 已提交
2396
{
2397 2398
    *pci_sysfs_device_link = g_strdup_printf(PCI_SYSFS "devices/" VIR_PCI_DEVICE_ADDRESS_FMT, addr->domain,
                                             addr->bus, addr->slot, addr->function);
2399
    return 0;
R
Roopa Prabhu 已提交
2400 2401
}

2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412
/**
 * virPCIGetNetName:
 * @device_link_sysfs_path: sysfs path to the PCI device
 * @idx: used to choose which netdev when there are several
 *       (ignored if physPortID is set)
 * @physPortID: match this string in the netdev's phys_port_id
 *       (or NULL to ignore and use idx instead)
 * @netname: used to return the name of the netdev
 *       (set to NULL (but returns success) if there is no netdev)
 *
 * Returns 0 on success, -1 on error (error has been logged)
2413 2414
 */
int
2415 2416 2417 2418
virPCIGetNetName(const char *device_link_sysfs_path,
                 size_t idx,
                 char *physPortID,
                 char **netname)
2419
{
2420 2421 2422
    g_autofree char *pcidev_sysfs_net_path = NULL;
    g_autofree char *firstEntryName = NULL;
    g_autofree char *thisPhysPortID = NULL;
2423 2424 2425
    int ret = -1;
    DIR *dir = NULL;
    struct dirent *entry = NULL;
2426
    size_t i = 0;
2427

2428 2429
    *netname = NULL;

2430 2431 2432 2433 2434 2435
    if (virBuildPath(&pcidev_sysfs_net_path, device_link_sysfs_path,
                     "net") == -1) {
        virReportOOMError();
        return -1;
    }

2436 2437 2438
    if (virDirOpenQuiet(&dir, pcidev_sysfs_net_path) < 0) {
        /* this *isn't* an error - caller needs to check for netname == NULL */
        ret = 0;
2439
        goto cleanup;
2440
    }
2441

E
Eric Blake 已提交
2442
    while (virDirRead(dir, &entry, pcidev_sysfs_net_path) > 0) {
2443 2444 2445 2446 2447 2448 2449 2450 2451 2452
        /* if the caller sent a physPortID, compare it to the
         * physportID of this netdev. If not, look for entry[idx].
         */
        if (physPortID) {
            if (virNetDevGetPhysPortID(entry->d_name, &thisPhysPortID) < 0)
                goto cleanup;

            /* if this one doesn't match, keep looking */
            if (STRNEQ_NULLABLE(physPortID, thisPhysPortID)) {
                VIR_FREE(thisPhysPortID);
2453 2454 2455 2456 2457
                /* save the first entry we find to use as a failsafe
                 * in case we don't match the phys_port_id. This is
                 * needed because some NIC drivers (e.g. i40e)
                 * implement phys_port_id for PFs, but not for VFs
                 */
2458 2459
                if (!firstEntryName)
                    firstEntryName = g_strdup(entry->d_name);
2460

2461 2462 2463 2464 2465 2466 2467
                continue;
            }
        } else {
            if (i++ < idx)
                continue;
        }

2468
        *netname = g_strdup(entry->d_name);
2469 2470

        ret = 0;
2471 2472 2473
        break;
    }

2474 2475
    if (ret < 0) {
        if (physPortID) {
2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490
            if (firstEntryName) {
                /* we didn't match the provided phys_port_id, but this
                 * is probably because phys_port_id isn't implemented
                 * for this NIC driver, so just return the first
                 * (probably only) netname we found.
                 */
                *netname = firstEntryName;
                firstEntryName = NULL;
                ret = 0;
            } else {
                virReportError(VIR_ERR_INTERNAL_ERROR,
                               _("Could not find network device with "
                                 "phys_port_id '%s' under PCI device at %s"),
                               physPortID, device_link_sysfs_path);
            }
2491 2492 2493 2494 2495
        } else {
            ret = 0; /* no netdev at the given index is *not* an error */
        }
    }
 cleanup:
J
Ján Tomko 已提交
2496
    VIR_DIR_CLOSE(dir);
2497
    return ret;
2498
}
R
Roopa Prabhu 已提交
2499 2500

int
2501
virPCIGetVirtualFunctionInfo(const char *vf_sysfs_device_path,
2502 2503 2504
                             int pfNetDevIdx,
                             char **pfname,
                             int *vf_index)
R
Roopa Prabhu 已提交
2505
{
2506
    virPCIDeviceAddressPtr pf_config_address = NULL;
2507 2508 2509
    g_autofree char *pf_sysfs_device_path = NULL;
    g_autofree char *vfname = NULL;
    g_autofree char *vfPhysPortID = NULL;
R
Roopa Prabhu 已提交
2510 2511
    int ret = -1;

2512
    if (virPCIGetPhysicalFunction(vf_sysfs_device_path, &pf_config_address) < 0)
2513
        goto cleanup;
R
Roopa Prabhu 已提交
2514

2515
    if (!pf_config_address)
2516
        goto cleanup;
2517

2518 2519
    if (virPCIDeviceAddressGetSysfsFile(pf_config_address,
                                        &pf_sysfs_device_path) < 0) {
2520 2521
        goto cleanup;
    }
R
Roopa Prabhu 已提交
2522

2523 2524 2525
    if (virPCIGetVirtualFunctionIndex(pf_sysfs_device_path,
                                      vf_sysfs_device_path, vf_index) < 0) {
        goto cleanup;
R
Roopa Prabhu 已提交
2526 2527
    }

2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547
    /* If the caller hasn't asked for a specific pfNetDevIdx, and VF
     * is bound to a netdev, learn that netdev's phys_port_id (if
     * available). This can be used to disambiguate when the PF has
     * multiple netdevs. If the VF isn't bound to a netdev, then we
     * return netdev[pfNetDevIdx] on the PF, which may or may not be
     * correct.
     */
    if (pfNetDevIdx == -1) {
        if (virPCIGetNetName(vf_sysfs_device_path, 0, NULL, &vfname) < 0)
            goto cleanup;

        if (vfname) {
            if (virNetDevGetPhysPortID(vfname, &vfPhysPortID) < 0)
                goto cleanup;
        }
        pfNetDevIdx = 0;
    }

    if (virPCIGetNetName(pf_sysfs_device_path,
                         pfNetDevIdx, vfPhysPortID, pfname) < 0) {
R
Roopa Prabhu 已提交
2548
        goto cleanup;
2549
    }
R
Roopa Prabhu 已提交
2550

2551 2552 2553 2554 2555 2556 2557 2558 2559
    if (!*pfname) {
        /* this shouldn't be possible. A VF can't exist unless its
         * PF device is bound to a network driver
         */
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("The PF device for VF %s has no network device name"),
                       vf_sysfs_device_path);
        goto cleanup;
    }
R
Roopa Prabhu 已提交
2560

2561
    ret = 0;
2562
 cleanup:
R
Roopa Prabhu 已提交
2563 2564 2565 2566 2567
    VIR_FREE(pf_config_address);

    return ret;
}

2568 2569 2570 2571 2572 2573 2574 2575 2576

ssize_t
virPCIGetMdevTypes(const char *sysfspath,
                   virMediatedDeviceTypePtr **types)
{
    ssize_t ret = -1;
    int dirret = -1;
    DIR *dir = NULL;
    struct dirent *entry;
2577
    g_autofree char *types_path = NULL;
J
Ján Tomko 已提交
2578
    g_autoptr(virMediatedDeviceType) mdev_type = NULL;
2579 2580 2581 2582
    virMediatedDeviceTypePtr *mdev_types = NULL;
    size_t ntypes = 0;
    size_t i;

2583
    types_path = g_strdup_printf("%s/mdev_supported_types", sysfspath);
2584 2585 2586 2587 2588 2589 2590 2591 2592 2593

    if ((dirret = virDirOpenIfExists(&dir, types_path)) < 0)
        goto cleanup;

    if (dirret == 0) {
        ret = 0;
        goto cleanup;
    }

    while ((dirret = virDirRead(dir, &entry, types_path)) > 0) {
2594
        g_autofree char *tmppath = NULL;
2595
        /* append the type id to the path and read the attributes from there */
2596
        tmppath = g_strdup_printf("%s/%s", types_path, entry->d_name);
2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607

        if (virMediatedDeviceTypeReadAttrs(tmppath, &mdev_type) < 0)
            goto cleanup;

        if (VIR_APPEND_ELEMENT(mdev_types, ntypes, mdev_type) < 0)
            goto cleanup;
    }

    if (dirret < 0)
        goto cleanup;

2608
    *types = g_steal_pointer(&mdev_types);
2609 2610 2611 2612 2613 2614 2615 2616 2617 2618
    ret = ntypes;
    ntypes = 0;
 cleanup:
    for (i = 0; i < ntypes; i++)
        virMediatedDeviceTypeFree(mdev_types[i]);
    VIR_FREE(mdev_types);
    VIR_DIR_CLOSE(dir);
    return ret;
}

2619
#else
2620 2621
static const char *unsupported = N_("not supported on non-linux platforms");

2622
virPCIDeviceAddressPtr
J
Ján Tomko 已提交
2623
virPCIGetDeviceAddressFromSysfsLink(const char *device_link G_GNUC_UNUSED)
2624 2625
{
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
2626
    return NULL;
2627 2628 2629
}


2630
int
J
Ján Tomko 已提交
2631 2632
virPCIGetPhysicalFunction(const char *vf_sysfs_path G_GNUC_UNUSED,
                          virPCIDeviceAddressPtr *pf G_GNUC_UNUSED)
2633
{
2634
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
2635 2636 2637 2638
    return -1;
}

int
J
Ján Tomko 已提交
2639 2640 2641 2642
virPCIGetVirtualFunctions(const char *sysfs_path G_GNUC_UNUSED,
                          virPCIDeviceAddressPtr **virtual_functions G_GNUC_UNUSED,
                          size_t *num_virtual_functions G_GNUC_UNUSED,
                          unsigned int *max_virtual_functions G_GNUC_UNUSED)
2643
{
2644
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
2645 2646
    return -1;
}
2647 2648

int
J
Ján Tomko 已提交
2649
virPCIIsVirtualFunction(const char *vf_sysfs_device_link G_GNUC_UNUSED)
2650
{
2651
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
2652 2653 2654 2655
    return -1;
}

int
J
Ján Tomko 已提交
2656 2657 2658
virPCIGetVirtualFunctionIndex(const char *pf_sysfs_device_link G_GNUC_UNUSED,
                              const char *vf_sysfs_device_link G_GNUC_UNUSED,
                              int *vf_index G_GNUC_UNUSED)
2659
{
2660
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
2661 2662 2663 2664
    return -1;

}

2665
int
J
Ján Tomko 已提交
2666 2667
virPCIGetSysfsFile(char *virPCIDeviceName G_GNUC_UNUSED,
                   char **pci_sysfs_device_link G_GNUC_UNUSED)
2668 2669 2670 2671 2672
{
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
    return -1;
}

2673
int
J
Ján Tomko 已提交
2674 2675
virPCIDeviceAddressGetSysfsFile(virPCIDeviceAddressPtr dev G_GNUC_UNUSED,
                                char **pci_sysfs_device_link G_GNUC_UNUSED)
2676
{
2677
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
2678 2679 2680
    return -1;
}

2681
int
J
Ján Tomko 已提交
2682 2683 2684 2685
virPCIGetNetName(const char *device_link_sysfs_path G_GNUC_UNUSED,
                 size_t idx G_GNUC_UNUSED,
                 char *physPortID G_GNUC_UNUSED,
                 char **netname G_GNUC_UNUSED)
2686
{
2687
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
2688 2689
    return -1;
}
R
Roopa Prabhu 已提交
2690 2691

int
J
Ján Tomko 已提交
2692 2693 2694 2695
virPCIGetVirtualFunctionInfo(const char *vf_sysfs_device_path G_GNUC_UNUSED,
                             int pfNetDevIdx G_GNUC_UNUSED,
                             char **pfname G_GNUC_UNUSED,
                             int *vf_index G_GNUC_UNUSED)
R
Roopa Prabhu 已提交
2696
{
2697
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
R
Roopa Prabhu 已提交
2698 2699
    return -1;
}
2700 2701 2702


ssize_t
J
Ján Tomko 已提交
2703 2704
virPCIGetMdevTypes(const char *sysfspath G_GNUC_UNUSED,
                   virMediatedDeviceTypePtr **types G_GNUC_UNUSED)
2705 2706 2707 2708
{
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
    return -1;
}
2709
#endif /* __linux__ */
2710 2711 2712 2713 2714 2715 2716

int
virPCIDeviceIsPCIExpress(virPCIDevicePtr dev)
{
    int fd;
    int ret = -1;

2717
    if ((fd = virPCIDeviceConfigOpen(dev)) < 0)
2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736
        return ret;

    if (virPCIDeviceInit(dev, fd) < 0)
        goto cleanup;

    ret = dev->pcie_cap_pos != 0;

 cleanup:
    virPCIDeviceConfigClose(dev, fd);
    return ret;
}

int
virPCIDeviceHasPCIExpressLink(virPCIDevicePtr dev)
{
    int fd;
    int ret = -1;
    uint16_t cap, type;

2737
    if ((fd = virPCIDeviceConfigOpen(dev)) < 0)
2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764
        return ret;

    if (virPCIDeviceInit(dev, fd) < 0)
        goto cleanup;

    cap = virPCIDeviceRead16(dev, fd, dev->pcie_cap_pos + PCI_CAP_FLAGS);
    type = (cap & PCI_EXP_FLAGS_TYPE) >> 4;

    ret = type != PCI_EXP_TYPE_ROOT_INT_EP && type != PCI_EXP_TYPE_ROOT_EC;

 cleanup:
    virPCIDeviceConfigClose(dev, fd);
    return ret;
}

int
virPCIDeviceGetLinkCapSta(virPCIDevicePtr dev,
                          int *cap_port,
                          unsigned int *cap_speed,
                          unsigned int *cap_width,
                          unsigned int *sta_speed,
                          unsigned int *sta_width)
{
    uint32_t t;
    int fd;
    int ret = -1;

2765
    if ((fd = virPCIDeviceConfigOpen(dev)) < 0)
2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793
        return ret;

    if (virPCIDeviceInit(dev, fd) < 0)
        goto cleanup;

    if (!dev->pcie_cap_pos) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("pci device %s is not a PCI-Express device"),
                       dev->name);
        goto cleanup;
    }

    t = virPCIDeviceRead32(dev, fd, dev->pcie_cap_pos + PCI_EXP_LNKCAP);

    *cap_port = t >> 24;
    *cap_speed = t & PCI_EXP_LNKCAP_SPEED;
    *cap_width = (t & PCI_EXP_LNKCAP_WIDTH) >> 4;

    t = virPCIDeviceRead16(dev, fd, dev->pcie_cap_pos + PCI_EXP_LNKSTA);

    *sta_speed = t & PCI_EXP_LNKSTA_SPEED;
    *sta_width = (t & PCI_EXP_LNKSTA_WIDTH) >> 4;
    ret = 0;

 cleanup:
    virPCIDeviceConfigClose(dev, fd);
    return ret;
}
2794 2795


2796 2797 2798 2799 2800 2801 2802
int virPCIGetHeaderType(virPCIDevicePtr dev, int *hdrType)
{
    int fd;
    uint8_t type;

    *hdrType = -1;

2803
    if ((fd = virPCIDeviceConfigOpen(dev)) < 0)
2804 2805 2806 2807 2808 2809 2810 2811 2812
        return -1;

    type = virPCIDeviceRead8(dev, fd, PCI_HEADER_TYPE);

    virPCIDeviceConfigClose(dev, fd);

    type &= PCI_HEADER_TYPE_MASK;
    if (type >= VIR_PCI_HEADER_LAST) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
2813 2814
                       _("Unknown PCI header type '%d' for device '%s'"),
                       type, dev->name);
2815 2816 2817 2818 2819 2820 2821 2822 2823
        return -1;
    }

    *hdrType = type;

    return 0;
}


2824 2825 2826 2827 2828 2829 2830 2831 2832 2833
void
virPCIEDeviceInfoFree(virPCIEDeviceInfoPtr dev)
{
    if (!dev)
        return;

    VIR_FREE(dev->link_cap);
    VIR_FREE(dev->link_sta);
    VIR_FREE(dev);
}
2834 2835 2836 2837 2838 2839

void
virPCIDeviceAddressFree(virPCIDeviceAddressPtr address)
{
    VIR_FREE(address);
}