virpci.c 79.6 KB
Newer Older
1
/*
2 3
 * virpci.c: helper APIs for managing host PCI devices
 *
4
 * Copyright (C) 2009-2015 Red Hat, Inc.
5 6 7 8 9 10 11 12 13 14 15 16
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library.  If not, see
O
Osier Yang 已提交
18
 * <http://www.gnu.org/licenses/>.
19 20 21 22
 */

#include <config.h>

23
#include "virpci.h"
24
#include "virnetdev.h"
25 26 27 28 29 30 31 32

#include <dirent.h>
#include <fcntl.h>
#include <inttypes.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>

33
#include "dirname.h"
34
#include "virlog.h"
35
#include "vircommand.h"
36
#include "virerror.h"
E
Eric Blake 已提交
37
#include "virfile.h"
38
#include "virkmod.h"
39 40
#include "virstring.h"
#include "virutil.h"
41
#include "viralloc.h"
42

43 44
VIR_LOG_INIT("util.pci");

45 46 47
#define PCI_SYSFS "/sys/bus/pci/"
#define PCI_ID_LEN 10   /* "XXXX XXXX" */

48 49
VIR_ENUM_IMPL(virPCIELinkSpeed,
              VIR_PCIE_LINK_SPEED_LAST,
50 51
              "", "2.5", "5", "8", "16",
);
52

53 54
VIR_ENUM_IMPL(virPCIStubDriver,
              VIR_PCI_STUB_DRIVER_LAST,
55 56 57
              "none",
              "pciback", /* XEN */
              "vfio-pci", /* VFIO */
58
);
59

60 61
VIR_ENUM_IMPL(virPCIHeader,
              VIR_PCI_HEADER_LAST,
62 63 64
              "endpoint",
              "pci-bridge",
              "cardbus-bridge",
65
);
66

67
struct _virPCIDevice {
68
    virPCIDeviceAddress address;
69

70
    char          *name;              /* domain:bus:slot.function */
71
    char          id[PCI_ID_LEN];     /* product vendor */
E
Eric Blake 已提交
72
    char          *path;
C
Chunyan Liu 已提交
73 74 75 76

    /* The driver:domain which uses the device */
    char          *used_by_drvname;
    char          *used_by_domname;
77

78 79
    unsigned int  pcie_cap_pos;
    unsigned int  pci_pm_cap_pos;
80 81
    bool          has_flr;
    bool          has_pm_reset;
82
    bool          managed;
83 84

    virPCIStubDriver stubDriver;
85 86

    /* used by reattach function */
87 88 89
    bool          unbind_from_stub;
    bool          remove_slot;
    bool          reprobe;
90 91
};

92
struct _virPCIDeviceList {
93 94
    virObjectLockable parent;

95
    size_t count;
96
    virPCIDevicePtr *devs;
97 98 99
};


100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116
/* For virReportOOMError()  and virReportSystemError() */
#define VIR_FROM_THIS VIR_FROM_NONE

/* Specifications referenced in comments:
 *  PCI30  - PCI Local Bus Specification 3.0
 *  PCIe20 - PCI Express Base Specification 2.0
 *  BR12   - PCI-to-PCI Bridge Architecture Specification 1.2
 *  PM12   - PCI Bus Power Management Interface Specification 1.2
 *  ECN_AF - Advanced Capabilities for Conventional PCI ECN
 */

/* Type 0 config space header length; PCI30 Section 6.1 Configuration Space Organization */
#define PCI_CONF_LEN            0x100
#define PCI_CONF_HEADER_LEN     0x40

/* PCI30 6.2.1 */
#define PCI_HEADER_TYPE         0x0e    /* Header type */
117 118 119
#define PCI_HEADER_TYPE_BRIDGE 0x1
#define PCI_HEADER_TYPE_MASK   0x7f
#define PCI_HEADER_TYPE_MULTI  0x80
120 121 122 123 124 125 126 127 128

/* PCI30 6.2.1  Device Identification */
#define PCI_CLASS_DEVICE        0x0a    /* Device class */

/* Class Code for bridge; PCI30 D.7  Base Class 06h */
#define PCI_CLASS_BRIDGE_PCI    0x0604

/* PCI30 6.2.3  Device Status */
#define PCI_STATUS              0x06    /* 16 bits */
129
#define PCI_STATUS_CAP_LIST    0x10    /* Support Capability List */
130 131 132

/* PCI30 6.7  Capabilities List */
#define PCI_CAPABILITY_LIST     0x34    /* Offset of first capability list entry */
133
#define PCI_CAP_FLAGS           2       /* Capability defined flags (16 bits) */
134 135 136 137 138 139 140 141 142 143

/* PM12 3.2.1  Capability Identifier */
#define PCI_CAP_ID_PM           0x01    /* Power Management */
/* PCI30 H Capability IDs */
#define PCI_CAP_ID_EXP          0x10    /* PCI Express */
/* ECN_AF 6.x.1.1  Capability ID for AF */
#define PCI_CAP_ID_AF           0x13    /* Advanced Features */

/* PCIe20 7.8.3  Device Capabilities Register (Offset 04h) */
#define PCI_EXP_DEVCAP          0x4     /* Device capabilities */
144 145
#define PCI_EXP_DEVCAP_FLR     (1<<28)  /* Function Level Reset */
#define PCI_EXP_LNKCAP          0xc     /* Link Capabilities */
146
#define PCI_EXP_LNKCAP_SPEED    0x0000f /* Maximum Link Speed */
147 148 149 150
#define PCI_EXP_LNKCAP_WIDTH    0x003f0 /* Maximum Link Width */
#define PCI_EXP_LNKSTA          0x12    /* Link Status */
#define PCI_EXP_LNKSTA_SPEED    0x000f  /* Negotiated Link Speed */
#define PCI_EXP_LNKSTA_WIDTH    0x03f0  /* Negotiated Link Width */
151 152 153 154 155 156 157

/* Header type 1 BR12 3.2 PCI-to-PCI Bridge Configuration Space Header Format */
#define PCI_PRIMARY_BUS         0x18    /* BR12 3.2.5.2 Primary bus number */
#define PCI_SECONDARY_BUS       0x19    /* BR12 3.2.5.3 Secondary bus number */
#define PCI_SUBORDINATE_BUS     0x1a    /* BR12 3.2.5.4 Highest bus number behind the bridge */
#define PCI_BRIDGE_CONTROL      0x3e
/* BR12 3.2.5.18  Bridge Control Register */
158
#define PCI_BRIDGE_CTL_RESET   0x40    /* Secondary bus reset */
159 160 161

/* PM12 3.2.4  Power Management Control/Status (Offset = 4) */
#define PCI_PM_CTRL                4    /* PM control and status register */
162 163 164 165
#define PCI_PM_CTRL_STATE_MASK    0x3  /* Current power state (D0 to D3) */
#define PCI_PM_CTRL_STATE_D0      0x0  /* D0 state */
#define PCI_PM_CTRL_STATE_D3hot   0x3  /* D3 state */
#define PCI_PM_CTRL_NO_SOFT_RESET 0x8  /* No reset for D3hot->D0 */
166 167 168

/* ECN_AF 6.x.1  Advanced Features Capability Structure */
#define PCI_AF_CAP              0x3     /* Advanced features capabilities */
169
#define PCI_AF_CAP_FLR         0x2     /* Function Level Reset */
170

J
Jiri Denemark 已提交
171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187
#define PCI_EXP_FLAGS           0x2
#define PCI_EXP_FLAGS_TYPE      0x00f0
#define PCI_EXP_TYPE_DOWNSTREAM 0x6

#define PCI_EXT_CAP_BASE          0x100
#define PCI_EXT_CAP_LIMIT         0x1000
#define PCI_EXT_CAP_ID_MASK       0x0000ffff
#define PCI_EXT_CAP_OFFSET_SHIFT  20
#define PCI_EXT_CAP_OFFSET_MASK   0x00000ffc

#define PCI_EXT_CAP_ID_ACS      0x000d
#define PCI_EXT_ACS_CTRL        0x06

#define PCI_EXT_CAP_ACS_SV      0x01
#define PCI_EXT_CAP_ACS_RR      0x04
#define PCI_EXT_CAP_ACS_CR      0x08
#define PCI_EXT_CAP_ACS_UF      0x10
188 189 190
#define PCI_EXT_CAP_ACS_ENABLED (PCI_EXT_CAP_ACS_SV | \
                                 PCI_EXT_CAP_ACS_RR | \
                                 PCI_EXT_CAP_ACS_CR | \
J
Jiri Denemark 已提交
191 192
                                 PCI_EXT_CAP_ACS_UF)

193 194 195
#define PCI_EXP_TYPE_ROOT_INT_EP 0x9    /* Root Complex Integrated Endpoint */
#define PCI_EXP_TYPE_ROOT_EC 0xa        /* Root Complex Event Collector */

196 197 198 199 200 201
static virClassPtr virPCIDeviceListClass;

static void virPCIDeviceListDispose(void *obj);

static int virPCIOnceInit(void)
{
202
    if (!VIR_CLASS_NEW(virPCIDeviceList, virClassForObjectLockable()))
203 204 205 206 207
        return -1;

    return 0;
}

208
VIR_ONCE_GLOBAL_INIT(virPCI);
209

L
Laine Stump 已提交
210

211 212
static char *
virPCIDriverDir(const char *driver)
L
Laine Stump 已提交
213
{
214
    char *buffer;
L
Laine Stump 已提交
215

216
    buffer = g_strdup_printf(PCI_SYSFS "drivers/%s", driver);
217
    return buffer;
L
Laine Stump 已提交
218 219 220
}


221 222
static char *
virPCIFile(const char *device, const char *file)
L
Laine Stump 已提交
223
{
224
    char *buffer;
L
Laine Stump 已提交
225

226
    buffer = g_strdup_printf(PCI_SYSFS "devices/%s/%s", device, file);
227
    return buffer;
L
Laine Stump 已提交
228 229 230 231 232 233 234 235 236 237
}


/* virPCIDeviceGetDriverPathAndName - put the path to the driver
 * directory of the driver in use for this device in @path and the
 * name of the driver in @name. Both could be NULL if it's not bound
 * to any driver.
 *
 * Return 0 for success, -1 for error.
 */
238
int
L
Laine Stump 已提交
239 240 241
virPCIDeviceGetDriverPathAndName(virPCIDevicePtr dev, char **path, char **name)
{
    int ret = -1;
242
    g_autofree char *drvlink = NULL;
L
Laine Stump 已提交
243 244 245

    *path = *name = NULL;
    /* drvlink = "/sys/bus/pci/dddd:bb:ss.ff/driver" */
246
    if (!(drvlink = virPCIFile(dev->name, "driver")))
L
Laine Stump 已提交
247 248
        goto cleanup;

249 250 251 252 253
    if (!virFileExists(drvlink)) {
        ret = 0;
        goto cleanup;
    }

L
Laine Stump 已提交
254 255 256 257 258 259 260 261 262 263 264 265 266 267
    if (virFileIsLink(drvlink) != 1) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Invalid device %s driver file %s is not a symlink"),
                       dev->name, drvlink);
        goto cleanup;
    }
    if (virFileResolveLink(drvlink, path) < 0) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Unable to resolve device %s driver symlink %s"),
                       dev->name, drvlink);
        goto cleanup;
    }
    /* path = "/sys/bus/pci/drivers/${drivername}" */

268
    *name = g_strdup(last_component(*path));
L
Laine Stump 已提交
269 270 271
    /* name = "${drivername}" */

    ret = 0;
272
 cleanup:
L
Laine Stump 已提交
273 274 275 276 277 278 279 280
    if (ret < 0) {
        VIR_FREE(*path);
        VIR_FREE(*name);
    }
    return ret;
}


281
static int
282
virPCIDeviceConfigOpenInternal(virPCIDevicePtr dev, bool readonly, bool fatal)
283 284 285
{
    int fd;

286
    fd = open(dev->path, readonly ? O_RDONLY : O_RDWR);
287

288
    if (fd < 0) {
289 290 291 292 293 294 295 296 297
        if (fatal) {
            virReportSystemError(errno,
                                 _("Failed to open config space file '%s'"),
                                 dev->path);
        } else {
            char ebuf[1024];
            VIR_WARN("Failed to open config space file '%s': %s",
                     dev->path, virStrerror(errno, ebuf, sizeof(ebuf)));
        }
298 299
        return -1;
    }
300

301
    VIR_DEBUG("%s %s: opened %s", dev->id, dev->name, dev->path);
302
    return fd;
303 304
}

305
static int
306
virPCIDeviceConfigOpen(virPCIDevicePtr dev)
307
{
308
    return virPCIDeviceConfigOpenInternal(dev, true, true);
309 310
}

311 312 313 314 315 316
static int
virPCIDeviceConfigOpenTry(virPCIDevicePtr dev)
{
    return virPCIDeviceConfigOpenInternal(dev, true, false);
}

317 318 319
static int
virPCIDeviceConfigOpenWrite(virPCIDevicePtr dev)
{
320
    return virPCIDeviceConfigOpenInternal(dev, false, true);
321 322
}

323
static void
324
virPCIDeviceConfigClose(virPCIDevicePtr dev, int cfgfd)
325
{
326 327 328 329 330
    if (VIR_CLOSE(cfgfd) < 0) {
        char ebuf[1024];
        VIR_WARN("Failed to close config space file '%s': %s",
                 dev->path, virStrerror(errno, ebuf, sizeof(ebuf)));
    }
331 332
}

333

334
static int
335 336
virPCIDeviceRead(virPCIDevicePtr dev,
                 int cfgfd,
337
                 unsigned int pos,
338
                 uint8_t *buf,
339
                 unsigned int buflen)
340 341 342
{
    memset(buf, 0, buflen);

343 344
    if (lseek(cfgfd, pos, SEEK_SET) != pos ||
        saferead(cfgfd, buf, buflen) != buflen) {
345
        char ebuf[1024];
346
        VIR_WARN("Failed to read from '%s' : %s", dev->path,
347 348 349 350 351 352 353
                 virStrerror(errno, ebuf, sizeof(ebuf)));
        return -1;
    }
    return 0;
}

static uint8_t
354
virPCIDeviceRead8(virPCIDevicePtr dev, int cfgfd, unsigned int pos)
355 356
{
    uint8_t buf;
357
    virPCIDeviceRead(dev, cfgfd, pos, &buf, sizeof(buf));
358 359 360 361
    return buf;
}

static uint16_t
362
virPCIDeviceRead16(virPCIDevicePtr dev, int cfgfd, unsigned int pos)
363 364
{
    uint8_t buf[2];
365
    virPCIDeviceRead(dev, cfgfd, pos, &buf[0], sizeof(buf));
366 367 368 369
    return (buf[0] << 0) | (buf[1] << 8);
}

static uint32_t
370
virPCIDeviceRead32(virPCIDevicePtr dev, int cfgfd, unsigned int pos)
371 372
{
    uint8_t buf[4];
373
    virPCIDeviceRead(dev, cfgfd, pos, &buf[0], sizeof(buf));
374 375 376
    return (buf[0] << 0) | (buf[1] << 8) | (buf[2] << 16) | (buf[3] << 24);
}

377 378 379
static int
virPCIDeviceReadClass(virPCIDevicePtr dev, uint16_t *device_class)
{
380 381
    g_autofree char *path = NULL;
    g_autofree char *id_str = NULL;
382 383
    unsigned int value;

384
    if (!(path = virPCIFile(dev->name, "class")))
385
        return -1;
386 387 388

    /* class string is '0xNNNNNN\n' ... i.e. 9 bytes */
    if (virFileReadAll(path, 9, &id_str) < 0)
389
        return -1;
390 391 392 393 394 395

    id_str[8] = '\0';
    if (virStrToLong_ui(id_str, NULL, 16, &value) < 0) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Unusual value in %s/devices/%s/class: %s"),
                       PCI_SYSFS, dev->name, id_str);
396
        return -1;
397 398 399
    }

    *device_class = (value >> 8) & 0xFFFF;
400
    return 0;
401 402
}

403
static int
404 405
virPCIDeviceWrite(virPCIDevicePtr dev,
                  int cfgfd,
406
                  unsigned int pos,
407
                  uint8_t *buf,
408
                  unsigned int buflen)
409
{
410 411
    if (lseek(cfgfd, pos, SEEK_SET) != pos ||
        safewrite(cfgfd, buf, buflen) != buflen) {
412
        char ebuf[1024];
413
        VIR_WARN("Failed to write to '%s' : %s", dev->path,
414 415 416 417 418 419 420
                 virStrerror(errno, ebuf, sizeof(ebuf)));
        return -1;
    }
    return 0;
}

static void
421
virPCIDeviceWrite16(virPCIDevicePtr dev, int cfgfd, unsigned int pos, uint16_t val)
422 423
{
    uint8_t buf[2] = { (val >> 0), (val >> 8) };
424
    virPCIDeviceWrite(dev, cfgfd, pos, &buf[0], sizeof(buf));
425 426 427
}

static void
428
virPCIDeviceWrite32(virPCIDevicePtr dev, int cfgfd, unsigned int pos, uint32_t val)
429
{
430
    uint8_t buf[4] = { (val >> 0), (val >> 8), (val >> 16), (val >> 24) };
431
    virPCIDeviceWrite(dev, cfgfd, pos, &buf[0], sizeof(buf));
432 433
}

E
Eric Blake 已提交
434 435
typedef int (*virPCIDeviceIterPredicate)(virPCIDevicePtr, virPCIDevicePtr,
                                         void *);
436 437 438 439 440 441 442

/* Iterate over available PCI devices calling @predicate
 * to compare each one to @dev.
 * Return -1 on error since we don't want to assume it is
 * safe to reset if there is an error.
 */
static int
443 444 445 446
virPCIDeviceIterDevices(virPCIDeviceIterPredicate predicate,
                        virPCIDevicePtr dev,
                        virPCIDevicePtr *matched,
                        void *data)
447 448 449
{
    DIR *dir;
    struct dirent *entry;
450
    int ret = 0;
451
    int rc;
452 453 454 455 456

    *matched = NULL;

    VIR_DEBUG("%s %s: iterating over " PCI_SYSFS "devices", dev->id, dev->name);

J
Ján Tomko 已提交
457
    if (virDirOpen(&dir, PCI_SYSFS "devices") < 0)
458 459
        return -1;

E
Eric Blake 已提交
460
    while ((ret = virDirRead(dir, &entry, PCI_SYSFS "devices")) > 0) {
461
        unsigned int domain, bus, slot, function;
J
Ján Tomko 已提交
462
        g_autoptr(virPCIDevice) check = NULL;
463
        char *tmp;
464

465 466 467 468 469 470 471 472 473
        /* expected format: <domain>:<bus>:<slot>.<function> */
        if (/* domain */
            virStrToLong_ui(entry->d_name, &tmp, 16, &domain) < 0 || *tmp != ':' ||
            /* bus */
            virStrToLong_ui(tmp + 1, &tmp, 16, &bus) < 0 || *tmp != ':' ||
            /* slot */
            virStrToLong_ui(tmp + 1, &tmp, 16, &slot) < 0 || *tmp != '.' ||
            /* function */
            virStrToLong_ui(tmp + 1, NULL, 16, &function) < 0) {
474 475 476 477
            VIR_WARN("Unusual entry in " PCI_SYSFS "devices: %s", entry->d_name);
            continue;
        }

478
        check = virPCIDeviceNew(domain, bus, slot, function);
479
        if (!check) {
480 481 482
            ret = -1;
            break;
        }
483

484 485 486 487 488
        rc = predicate(dev, check, data);
        if (rc < 0) {
            /* the predicate returned an error, bail */
            ret = -1;
            break;
489
        } else if (rc == 1) {
490
            VIR_DEBUG("%s %s: iter matched on %s", dev->id, dev->name, check->name);
491
            *matched = g_steal_pointer(&check);
492
            ret = 1;
493 494 495
            break;
        }
    }
J
Ján Tomko 已提交
496
    VIR_DIR_CLOSE(dir);
497
    return ret;
498 499 500
}

static uint8_t
501 502 503
virPCIDeviceFindCapabilityOffset(virPCIDevicePtr dev,
                                 int cfgfd,
                                 unsigned int capability)
504 505 506 507
{
    uint16_t status;
    uint8_t pos;

508
    status = virPCIDeviceRead16(dev, cfgfd, PCI_STATUS);
509 510 511
    if (!(status & PCI_STATUS_CAP_LIST))
        return 0;

512
    pos = virPCIDeviceRead8(dev, cfgfd, PCI_CAPABILITY_LIST);
513 514 515 516 517 518 519 520 521

    /* Zero indicates last capability, capabilities can't
     * be in the config space header and 0xff is returned
     * by the kernel if we don't have access to this region
     *
     * Note: we're not handling loops or extended
     * capabilities here.
     */
    while (pos >= PCI_CONF_HEADER_LEN && pos != 0xff) {
522
        uint8_t capid = virPCIDeviceRead8(dev, cfgfd, pos);
523 524 525 526 527 528
        if (capid == capability) {
            VIR_DEBUG("%s %s: found cap 0x%.2x at 0x%.2x",
                      dev->id, dev->name, capability, pos);
            return pos;
        }

529
        pos = virPCIDeviceRead8(dev, cfgfd, pos + 1);
530 531 532 533 534 535 536
    }

    VIR_DEBUG("%s %s: failed to find cap 0x%.2x", dev->id, dev->name, capability);

    return 0;
}

J
Jiri Denemark 已提交
537
static unsigned int
538 539
virPCIDeviceFindExtendedCapabilityOffset(virPCIDevicePtr dev,
                                         int cfgfd,
540
                                         unsigned int capability)
J
Jiri Denemark 已提交
541 542 543 544 545 546 547 548 549 550
{
    int ttl;
    unsigned int pos;
    uint32_t header;

    /* minimum 8 bytes per capability */
    ttl = (PCI_EXT_CAP_LIMIT - PCI_EXT_CAP_BASE) / 8;
    pos = PCI_EXT_CAP_BASE;

    while (ttl > 0 && pos >= PCI_EXT_CAP_BASE) {
551
        header = virPCIDeviceRead32(dev, cfgfd, pos);
J
Jiri Denemark 已提交
552 553 554 555 556 557 558 559 560 561 562

        if ((header & PCI_EXT_CAP_ID_MASK) == capability)
            return pos;

        pos = (header >> PCI_EXT_CAP_OFFSET_SHIFT) & PCI_EXT_CAP_OFFSET_MASK;
        ttl--;
    }

    return 0;
}

563 564 565 566
/* detects whether this device has FLR.  Returns 0 if the device does
 * not have FLR, 1 if it does, and -1 on error
 */
static int
567
virPCIDeviceDetectFunctionLevelReset(virPCIDevicePtr dev, int cfgfd)
568
{
M
Mark McLoughlin 已提交
569
    uint32_t caps;
570
    uint8_t pos;
571
    g_autofree char *path = NULL;
572
    int found;
573 574 575 576 577 578 579 580

    /* The PCIe Function Level Reset capability allows
     * individual device functions to be reset without
     * affecting any other functions on the device or
     * any other devices on the bus. This is only common
     * on SR-IOV NICs at the moment.
     */
    if (dev->pcie_cap_pos) {
581
        caps = virPCIDeviceRead32(dev, cfgfd, dev->pcie_cap_pos + PCI_EXP_DEVCAP);
582 583 584 585 586 587 588 589 590 591
        if (caps & PCI_EXP_DEVCAP_FLR) {
            VIR_DEBUG("%s %s: detected PCIe FLR capability", dev->id, dev->name);
            return 1;
        }
    }

    /* The PCI AF Function Level Reset capability is
     * the same thing, except for conventional PCI
     * devices. This is not common yet.
     */
592
    pos = virPCIDeviceFindCapabilityOffset(dev, cfgfd, PCI_CAP_ID_AF);
593
    if (pos) {
594
        caps = virPCIDeviceRead16(dev, cfgfd, pos + PCI_AF_CAP);
595 596 597 598 599 600
        if (caps & PCI_AF_CAP_FLR) {
            VIR_DEBUG("%s %s: detected PCI FLR capability", dev->id, dev->name);
            return 1;
        }
    }

601 602 603 604 605 606
    /* there are some buggy devices that do support FLR, but forget to
     * advertise that fact in their capabilities.  However, FLR is *required*
     * to be present for virtual functions (VFs), so if we see that this
     * device is a VF, we just assume FLR works
     */

607
    path = g_strdup_printf(PCI_SYSFS "devices/%s/physfn", dev->name);
608 609 610 611 612 613 614 615

    found = virFileExists(path);
    if (found) {
        VIR_DEBUG("%s %s: buggy device didn't advertise FLR, but is a VF; forcing flr on",
                  dev->id, dev->name);
        return 1;
    }

616 617 618 619 620 621 622 623 624
    VIR_DEBUG("%s %s: no FLR capability found", dev->id, dev->name);

    return 0;
}

/* Require the device has the PCI Power Management capability
 * and that a D3hot->D0 transition will results in a full
 * internal reset, not just a soft reset.
 */
625
static unsigned int
626
virPCIDeviceDetectPowerManagementReset(virPCIDevicePtr dev, int cfgfd)
627 628 629 630 631
{
    if (dev->pci_pm_cap_pos) {
        uint32_t ctl;

        /* require the NO_SOFT_RESET bit is clear */
632
        ctl = virPCIDeviceRead32(dev, cfgfd, dev->pci_pm_cap_pos + PCI_PM_CTRL);
633 634 635 636 637 638 639 640 641 642 643
        if (!(ctl & PCI_PM_CTRL_NO_SOFT_RESET)) {
            VIR_DEBUG("%s %s: detected PM reset capability", dev->id, dev->name);
            return 1;
        }
    }

    VIR_DEBUG("%s %s: no PM reset capability found", dev->id, dev->name);

    return 0;
}

644
/* Any active devices on the same domain/bus ? */
645
static int
646
virPCIDeviceSharesBusWithActive(virPCIDevicePtr dev, virPCIDevicePtr check, void *data)
647
{
648
    virPCIDeviceList *inactiveDevs = data;
649

650
    /* Different domain, different bus, or simply identical device */
651 652 653 654
    if (dev->address.domain != check->address.domain ||
        dev->address.bus != check->address.bus ||
        (dev->address.slot == check->address.slot &&
         dev->address.function == check->address.function))
655 656
        return 0;

657
    /* same bus, but inactive, i.e. about to be assigned to guest */
658
    if (inactiveDevs && virPCIDeviceListFind(inactiveDevs, check))
659
        return 0;
660

661
    return 1;
662 663
}

664 665 666
static virPCIDevicePtr
virPCIDeviceBusContainsActiveDevices(virPCIDevicePtr dev,
                                     virPCIDeviceList *inactiveDevs)
667
{
668 669 670
    virPCIDevicePtr active = NULL;
    if (virPCIDeviceIterDevices(virPCIDeviceSharesBusWithActive,
                                dev, &active, inactiveDevs) < 0)
671 672 673 674 675
        return NULL;
    return active;
}

/* Is @check the parent of @dev ? */
676
static int
677
virPCIDeviceIsParent(virPCIDevicePtr dev, virPCIDevicePtr check, void *data)
678 679 680
{
    uint16_t device_class;
    uint8_t header_type, secondary, subordinate;
681
    virPCIDevicePtr *best = data;
682 683
    int ret = 0;
    int fd;
684

685
    if (dev->address.domain != check->address.domain)
686 687
        return 0;

688
    if ((fd = virPCIDeviceConfigOpenTry(check)) < 0)
689 690
        return 0;

691
    /* Is it a bridge? */
692 693
    ret = virPCIDeviceReadClass(check, &device_class);
    if (ret < 0 || device_class != PCI_CLASS_BRIDGE_PCI)
694
        goto cleanup;
695 696

    /* Is it a plane? */
697
    header_type = virPCIDeviceRead8(check, fd, PCI_HEADER_TYPE);
698
    if ((header_type & PCI_HEADER_TYPE_MASK) != PCI_HEADER_TYPE_BRIDGE)
699
        goto cleanup;
700

701 702
    secondary   = virPCIDeviceRead8(check, fd, PCI_SECONDARY_BUS);
    subordinate = virPCIDeviceRead8(check, fd, PCI_SUBORDINATE_BUS);
703

704
    VIR_DEBUG("%s %s: found parent device %s", dev->id, dev->name, check->name);
705

706 707 708
    /* if the secondary bus exactly equals the device's bus, then we found
     * the direct parent.  No further work is necessary
     */
709
    if (dev->address.bus == secondary) {
710 711 712
        ret = 1;
        goto cleanup;
    }
713

714
    /* otherwise, SRIOV allows VFs to be on different buses than their PFs.
715 716 717
     * In this case, what we need to do is look for the "best" match; i.e.
     * the most restrictive match that still satisfies all of the conditions.
     */
718
    if (dev->address.bus > secondary && dev->address.bus <= subordinate) {
719
        if (*best == NULL) {
720 721 722 723
            *best = virPCIDeviceNew(check->address.domain,
                                    check->address.bus,
                                    check->address.slot,
                                    check->address.function);
724 725 726 727 728
            if (*best == NULL) {
                ret = -1;
                goto cleanup;
            }
        } else {
729 730 731 732
            /* OK, we had already recorded a previous "best" match for the
             * parent.  See if the current device is more restrictive than the
             * best, and if so, make it the new best
             */
733 734 735
            int bestfd;
            uint8_t best_secondary;

736
            if ((bestfd = virPCIDeviceConfigOpenTry(*best)) < 0)
737
                goto cleanup;
738 739
            best_secondary = virPCIDeviceRead8(*best, bestfd, PCI_SECONDARY_BUS);
            virPCIDeviceConfigClose(*best, bestfd);
740 741

            if (secondary > best_secondary) {
742
                virPCIDeviceFree(*best);
743 744 745 746
                *best = virPCIDeviceNew(check->address.domain,
                                        check->address.bus,
                                        check->address.slot,
                                        check->address.function);
747 748 749 750
                if (*best == NULL) {
                    ret = -1;
                    goto cleanup;
                }
751 752 753 754
            }
        }
    }

755
 cleanup:
756
    virPCIDeviceConfigClose(check, fd);
757
    return ret;
758 759
}

760
static int
761
virPCIDeviceGetParent(virPCIDevicePtr dev, virPCIDevicePtr *parent)
762
{
763
    virPCIDevicePtr best = NULL;
764 765 766
    int ret;

    *parent = NULL;
767
    ret = virPCIDeviceIterDevices(virPCIDeviceIsParent, dev, parent, &best);
768
    if (ret == 1)
769
        virPCIDeviceFree(best);
770 771 772
    else if (ret == 0)
        *parent = best;
    return ret;
773 774 775 776 777 778
}

/* Secondary Bus Reset is our sledgehammer - it resets all
 * devices behind a bus.
 */
static int
779 780 781
virPCIDeviceTrySecondaryBusReset(virPCIDevicePtr dev,
                                 int cfgfd,
                                 virPCIDeviceList *inactiveDevs)
782
{
J
Ján Tomko 已提交
783 784
    g_autoptr(virPCIDevice) parent = NULL;
    g_autoptr(virPCIDevice) conflict = NULL;
785 786 787
    uint8_t config_space[PCI_CONF_LEN];
    uint16_t ctl;
    int ret = -1;
788
    int parentfd;
789

790 791 792
    /* Refuse to do a secondary bus reset if there are other
     * devices/functions behind the bus are used by the host
     * or other guests.
793
     */
794
    if ((conflict = virPCIDeviceBusContainsActiveDevices(dev, inactiveDevs))) {
795
        virReportError(VIR_ERR_INTERNAL_ERROR,
796 797
                       _("Active %s devices on bus with %s, not doing bus reset"),
                       conflict->name, dev->name);
798 799 800 801
        return -1;
    }

    /* Find the parent bus */
802
    if (virPCIDeviceGetParent(dev, &parent) < 0)
803
        return -1;
804
    if (!parent) {
805
        virReportError(VIR_ERR_INTERNAL_ERROR,
806 807
                       _("Failed to find parent device for %s"),
                       dev->name);
808 809
        return -1;
    }
810
    if ((parentfd = virPCIDeviceConfigOpenWrite(parent)) < 0)
811
        goto out;
812 813 814 815 816 817 818

    VIR_DEBUG("%s %s: doing a secondary bus reset", dev->id, dev->name);

    /* Save and restore the device's config space; we only do this
     * for the supplied device since we refuse to do a reset if there
     * are multiple devices/functions
     */
819
    if (virPCIDeviceRead(dev, cfgfd, 0, config_space, PCI_CONF_LEN) < 0) {
820
        virReportError(VIR_ERR_INTERNAL_ERROR,
821
                       _("Failed to read PCI config space for %s"),
822
                       dev->name);
823 824 825 826 827 828
        goto out;
    }

    /* Read the control register, set the reset flag, wait 200ms,
     * unset the reset flag and wait 200ms.
     */
H
hexin 已提交
829
    ctl = virPCIDeviceRead16(dev, parentfd, PCI_BRIDGE_CONTROL);
830

831 832
    virPCIDeviceWrite16(parent, parentfd, PCI_BRIDGE_CONTROL,
                        ctl | PCI_BRIDGE_CTL_RESET);
833

834
    g_usleep(200 * 1000); /* sleep 200ms */
835

836
    virPCIDeviceWrite16(parent, parentfd, PCI_BRIDGE_CONTROL, ctl);
837

838
    g_usleep(200 * 1000); /* sleep 200ms */
839

840
    if (virPCIDeviceWrite(dev, cfgfd, 0, config_space, PCI_CONF_LEN) < 0) {
841
        virReportError(VIR_ERR_INTERNAL_ERROR,
842 843 844 845
                       _("Failed to restore PCI config space for %s"),
                       dev->name);
        goto out;
    }
846
    ret = 0;
847

848
 out:
849
    virPCIDeviceConfigClose(parent, parentfd);
850 851 852 853 854 855 856 857
    return ret;
}

/* Power management reset attempts to reset a device using a
 * D-state transition from D3hot to D0. Note, in detect_pm_reset()
 * above we require the device supports a full internal reset.
 */
static int
858
virPCIDeviceTryPowerManagementReset(virPCIDevicePtr dev, int cfgfd)
859 860 861 862 863 864 865 866
{
    uint8_t config_space[PCI_CONF_LEN];
    uint32_t ctl;

    if (!dev->pci_pm_cap_pos)
        return -1;

    /* Save and restore the device's config space. */
867
    if (virPCIDeviceRead(dev, cfgfd, 0, &config_space[0], PCI_CONF_LEN) < 0) {
868
        virReportError(VIR_ERR_INTERNAL_ERROR,
869
                       _("Failed to read PCI config space for %s"),
870
                       dev->name);
871 872 873 874 875
        return -1;
    }

    VIR_DEBUG("%s %s: doing a power management reset", dev->id, dev->name);

876
    ctl = virPCIDeviceRead32(dev, cfgfd, dev->pci_pm_cap_pos + PCI_PM_CTRL);
877 878
    ctl &= ~PCI_PM_CTRL_STATE_MASK;

879 880
    virPCIDeviceWrite32(dev, cfgfd, dev->pci_pm_cap_pos + PCI_PM_CTRL,
                        ctl | PCI_PM_CTRL_STATE_D3hot);
881

882
    g_usleep(10 * 1000); /* sleep 10ms */
883

884 885
    virPCIDeviceWrite32(dev, cfgfd, dev->pci_pm_cap_pos + PCI_PM_CTRL,
                        ctl | PCI_PM_CTRL_STATE_D0);
886

887
    g_usleep(10 * 1000); /* sleep 10ms */
888

889
    if (virPCIDeviceWrite(dev, cfgfd, 0, &config_space[0], PCI_CONF_LEN) < 0) {
890
        virReportError(VIR_ERR_INTERNAL_ERROR,
891 892 893 894
                       _("Failed to restore PCI config space for %s"),
                       dev->name);
        return -1;
    }
895 896 897 898 899

    return 0;
}

static int
900
virPCIDeviceInit(virPCIDevicePtr dev, int cfgfd)
901
{
902 903
    int flr;

904 905 906
    dev->pcie_cap_pos   = virPCIDeviceFindCapabilityOffset(dev, cfgfd, PCI_CAP_ID_EXP);
    dev->pci_pm_cap_pos = virPCIDeviceFindCapabilityOffset(dev, cfgfd, PCI_CAP_ID_PM);
    flr = virPCIDeviceDetectFunctionLevelReset(dev, cfgfd);
907
    if (flr < 0)
908
        return flr;
909 910
    dev->has_flr        = !!flr;
    dev->has_pm_reset   = !!virPCIDeviceDetectPowerManagementReset(dev, cfgfd);
911

912 913 914 915
    return 0;
}

int
916 917 918
virPCIDeviceReset(virPCIDevicePtr dev,
                  virPCIDeviceList *activeDevs,
                  virPCIDeviceList *inactiveDevs)
919
{
920 921
    g_autofree char *drvPath = NULL;
    g_autofree char *drvName = NULL;
922
    int ret = -1;
923
    int fd = -1;
924 925 926 927 928 929 930 931 932 933 934 935
    int hdrType = -1;

    if (virPCIGetHeaderType(dev, &hdrType) < 0)
        return -1;

    if (hdrType != VIR_PCI_HEADER_ENDPOINT) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Invalid attempt to reset PCI device %s. "
                         "Only PCI endpoint devices can be reset"),
                       dev->name);
        return -1;
    }
936

937
    if (activeDevs && virPCIDeviceListFind(activeDevs, dev)) {
938
        virReportError(VIR_ERR_INTERNAL_ERROR,
939 940 941 942
                       _("Not resetting active device %s"), dev->name);
        return -1;
    }

943 944 945 946 947 948 949 950
    /* If the device is currently bound to vfio-pci, ignore all
     * requests to reset it, since the vfio-pci driver will always
     * reset it whenever appropriate, so doing it ourselves would just
     * be redundant.
     */
    if (virPCIDeviceGetDriverPathAndName(dev, &drvPath, &drvName) < 0)
        goto cleanup;

951
    if (virPCIStubDriverTypeFromString(drvName) == VIR_PCI_STUB_DRIVER_VFIO) {
952 953 954 955 956 957 958
        VIR_DEBUG("Device %s is bound to vfio-pci - skip reset",
                  dev->name);
        ret = 0;
        goto cleanup;
    }
    VIR_DEBUG("Resetting device %s", dev->name);

959
    if ((fd = virPCIDeviceConfigOpenWrite(dev)) < 0)
960
        goto cleanup;
961

962
    if (virPCIDeviceInit(dev, fd) < 0)
963 964
        goto cleanup;

965 966 967
    /* KVM will perform FLR when starting and stopping
     * a guest, so there is no need for us to do it here.
     */
968 969 970 971
    if (dev->has_flr) {
        ret = 0;
        goto cleanup;
    }
972

973 974 975 976 977
    /* If the device supports PCI power management reset,
     * that's the next best thing because it only resets
     * the function, not the whole device.
     */
    if (dev->has_pm_reset)
978
        ret = virPCIDeviceTryPowerManagementReset(dev, fd);
979

980
    /* Bus reset is not an option with the root bus */
981
    if (ret < 0 && dev->address.bus != 0)
982
        ret = virPCIDeviceTrySecondaryBusReset(dev, fd, inactiveDevs);
983

984 985
    if (ret < 0) {
        virErrorPtr err = virGetLastError();
986
        virReportError(VIR_ERR_INTERNAL_ERROR,
987 988
                       _("Unable to reset PCI device %s: %s"),
                       dev->name,
989 990
                       err ? err->message :
                       _("no FLR, PM reset or bus reset available"));
991 992
    }

993
 cleanup:
994
    virPCIDeviceConfigClose(dev, fd);
995 996 997
    return ret;
}

998

999
static int
1000
virPCIProbeStubDriver(virPCIStubDriver driver)
1001
{
1002
    const char *drvname = NULL;
1003
    g_autofree char *drvpath = NULL;
1004
    bool probed = false;
1005

1006 1007 1008 1009 1010 1011 1012 1013
    if (driver == VIR_PCI_STUB_DRIVER_NONE ||
        !(drvname = virPCIStubDriverTypeToString(driver))) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       "%s",
                       _("Attempting to use unknown stub driver"));
        return -1;
    }

1014
 recheck:
1015
    if ((drvpath = virPCIDriverDir(drvname)) && virFileExists(drvpath))
1016 1017
        /* driver already loaded, return */
        return 0;
1018 1019

    if (!probed) {
1020
        g_autofree char *errbuf = NULL;
1021
        probed = true;
1022 1023
        if ((errbuf = virKModLoad(drvname, true))) {
            VIR_WARN("failed to load driver %s: %s", drvname, errbuf);
1024
            goto cleanup;
1025
        }
1026 1027

        goto recheck;
1028 1029
    }

1030
 cleanup:
1031 1032 1033
    /* If we know failure was because of blacklist, let's report that;
     * otherwise, report a more generic failure message
     */
1034
    if (virKModIsBlacklisted(drvname)) {
1035 1036 1037
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Failed to load PCI stub module %s: "
                         "administratively prohibited"),
1038
                       drvname);
1039 1040 1041
    } else {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Failed to load PCI stub module %s"),
1042
                       drvname);
1043 1044
    }

1045
    return -1;
1046 1047
}

1048
int
1049
virPCIDeviceUnbind(virPCIDevicePtr dev)
1050
{
1051 1052 1053
    g_autofree char *path = NULL;
    g_autofree char *drvpath = NULL;
    g_autofree char *driver = NULL;
1054 1055

    if (virPCIDeviceGetDriverPathAndName(dev, &drvpath, &driver) < 0)
1056
        return -1;
1057

1058
    if (!driver)
1059
        /* The device is not bound to any driver */
1060
        return 0;
1061

1062
    if (!(path = virPCIFile(dev->name, "driver/unbind")))
1063
        return -1;
1064 1065 1066 1067 1068 1069

    if (virFileExists(path)) {
        if (virFileWriteStr(path, dev->name, 0) < 0) {
            virReportSystemError(errno,
                                 _("Failed to unbind PCI device '%s' from %s"),
                                 dev->name, driver);
1070
            return -1;
1071 1072 1073
        }
    }

1074
    return 0;
1075 1076
}

1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101

/**
 * virPCIDeviceRebind:
 *  @dev: virPCIDevice object describing the device to rebind
 *
 * unbind a device from its driver, then immediately rebind it.
 *
 * Returns 0 on success, -1 on failure
 */
int virPCIDeviceRebind(virPCIDevicePtr dev)
{
    if (virPCIDeviceUnbind(dev) < 0)
        return -1;

    if (virFileWriteStr(PCI_SYSFS "drivers_probe", dev->name, 0) < 0) {
        virReportSystemError(errno,
                             _("Failed to trigger a probe for PCI device '%s'"),
                             dev->name);
        return -1;
    }

    return 0;
}


1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112
/*
 * Bind a PCI device to a driver using driver_override sysfs interface.
 * E.g.
 *
 *  echo driver-name > /sys/bus/pci/devices/0000:03:00.0/driver_override
 *  echo 0000:03:00.0 > /sys/bus/pci/devices/0000:03:00.0/driver/unbind
 *  echo 0000:03:00.0 > /sys/bus/pci/drivers_probe
 *
 * An empty driverName will cause the device to be bound to its
 * preferred driver.
 */
1113
static int
1114 1115 1116
virPCIDeviceBindWithDriverOverride(virPCIDevicePtr dev,
                                   const char *driverName)
{
1117
    g_autofree char *path = NULL;
1118 1119 1120 1121 1122 1123 1124 1125 1126

    if (!(path = virPCIFile(dev->name, "driver_override")))
        return -1;

    if (virFileWriteStr(path, driverName, 0) < 0) {
        virReportSystemError(errno,
                             _("Failed to add driver '%s' to driver_override "
                               " interface of PCI device '%s'"),
                             driverName, dev->name);
1127
        return -1;
1128 1129
    }

1130
    if (virPCIDeviceRebind(dev) < 0)
1131
        return -1;
1132

1133
    return 0;
1134 1135 1136
}

static int
1137
virPCIDeviceUnbindFromStub(virPCIDevicePtr dev)
1138 1139 1140 1141 1142 1143 1144 1145
{
    if (!dev->unbind_from_stub) {
        VIR_DEBUG("Unbind from stub skipped for PCI device %s", dev->name);
        return 0;
    }

    return virPCIDeviceBindWithDriverOverride(dev, "\n");
}
1146 1147

static int
1148
virPCIDeviceBindToStub(virPCIDevicePtr dev)
1149 1150
{
    const char *stubDriverName;
1151 1152
    g_autofree char *stubDriverPath = NULL;
    g_autofree char *driverLink = NULL;
1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168

    /* Check the device is configured to use one of the known stub drivers */
    if (dev->stubDriver == VIR_PCI_STUB_DRIVER_NONE) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("No stub driver configured for PCI device %s"),
                       dev->name);
        return -1;
    } else if (!(stubDriverName = virPCIStubDriverTypeToString(dev->stubDriver))) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Unknown stub driver configured for PCI device %s"),
                       dev->name);
        return -1;
    }

    if (!(stubDriverPath = virPCIDriverDir(stubDriverName))  ||
        !(driverLink = virPCIFile(dev->name, "driver")))
1169
        return -1;
1170 1171 1172 1173 1174 1175

    if (virFileExists(driverLink)) {
        if (virFileLinkPointsTo(driverLink, stubDriverPath)) {
            /* The device is already bound to the correct driver */
            VIR_DEBUG("Device %s is already bound to %s",
                      dev->name, stubDriverName);
1176
            return 0;
1177 1178 1179 1180
        }
    }

    if (virPCIDeviceBindWithDriverOverride(dev, stubDriverName) < 0)
1181
        return -1;
1182 1183

    dev->unbind_from_stub = true;
1184
    return 0;
1185 1186
}

1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204
/* virPCIDeviceDetach:
 *
 * Detach this device from the host driver, attach it to the stub
 * driver (previously set with virPCIDeviceSetStubDriver(), and add *a
 * copy* of the object to the inactiveDevs list (if provided). This
 * function will *never* consume dev, so the caller should free it.
 *
 * Returns 0 on success, -1 on failure (will fail if the device is
 * already in the activeDevs list, but will be a NOP if the device is
 * already bound to the stub).
 *
 * GENERAL NOTE: activeDevs should be a list of all PCI devices
 * currently in use by a domain. inactiveDevs is a list of all PCI
 * devices that libvirt has detached from the host driver + attached
 * to the stub driver, but hasn't yet assigned to a domain. Any device
 * that is still attached to its host driver should not be on either
 * list.
 */
1205
int
1206 1207
virPCIDeviceDetach(virPCIDevicePtr dev,
                   virPCIDeviceList *activeDevs,
1208
                   virPCIDeviceList *inactiveDevs)
1209
{
1210
    if (virPCIProbeStubDriver(dev->stubDriver) < 0)
1211 1212
        return -1;

1213
    if (activeDevs && virPCIDeviceListFind(activeDevs, dev)) {
1214
        virReportError(VIR_ERR_INTERNAL_ERROR,
1215 1216 1217 1218
                       _("Not detaching active device %s"), dev->name);
        return -1;
    }

1219
    if (virPCIDeviceBindToStub(dev) < 0)
1220 1221
        return -1;

1222 1223 1224
    /* Add *a copy of* the dev into list inactiveDevs, if
     * it's not already there.
     */
1225 1226 1227 1228
    if (inactiveDevs && !virPCIDeviceListFind(inactiveDevs, dev)) {
        VIR_DEBUG("Adding PCI device %s to inactive list", dev->name);
        if (virPCIDeviceListAddCopy(inactiveDevs, dev) < 0)
            return -1;
1229 1230 1231
    }

    return 0;
1232 1233
}

1234 1235 1236 1237
/*
 * Pre-condition: inactivePCIHostdevs & activePCIHostdevs
 * are locked
 */
1238
int
1239 1240
virPCIDeviceReattach(virPCIDevicePtr dev,
                     virPCIDeviceListPtr activeDevs,
1241
                     virPCIDeviceListPtr inactiveDevs)
1242
{
1243
    if (activeDevs && virPCIDeviceListFind(activeDevs, dev)) {
1244
        virReportError(VIR_ERR_INTERNAL_ERROR,
1245 1246 1247 1248
                       _("Not reattaching active device %s"), dev->name);
        return -1;
    }

1249
    if (virPCIDeviceUnbindFromStub(dev) < 0)
1250 1251 1252
        return -1;

    /* Steal the dev from list inactiveDevs */
1253 1254
    if (inactiveDevs) {
        VIR_DEBUG("Removing PCI device %s from inactive list", dev->name);
1255
        virPCIDeviceListDel(inactiveDevs, dev);
1256
    }
1257 1258

    return 0;
1259 1260 1261
}

static char *
1262
virPCIDeviceReadID(virPCIDevicePtr dev, const char *id_name)
1263
{
1264
    g_autofree char *path = NULL;
1265 1266
    char *id_str;

1267
    if (!(path = virPCIFile(dev->name, id_name)))
1268
        return NULL;
1269 1270

    /* ID string is '0xNNNN\n' ... i.e. 7 bytes */
1271
    if (virFileReadAll(path, 7, &id_str) < 0)
1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285
        return NULL;

    /* Check for 0x suffix */
    if (id_str[0] != '0' || id_str[1] != 'x') {
        VIR_FREE(id_str);
        return NULL;
    }

    /* Chop off the newline; we know the string is 7 bytes */
    id_str[6] = '\0';

    return id_str;
}

1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330
bool
virPCIDeviceAddressIsValid(virPCIDeviceAddressPtr addr,
                           bool report)
{
    if (addr->bus > 0xFF) {
        if (report)
            virReportError(VIR_ERR_XML_ERROR,
                           _("Invalid PCI address bus='0x%x', "
                             "must be <= 0xFF"),
                           addr->bus);
        return false;
    }
    if (addr->slot > 0x1F) {
        if (report)
            virReportError(VIR_ERR_XML_ERROR,
                           _("Invalid PCI address slot='0x%x', "
                             "must be <= 0x1F"),
                           addr->slot);
        return false;
    }
    if (addr->function > 7) {
        if (report)
            virReportError(VIR_ERR_XML_ERROR,
                           _("Invalid PCI address function=0x%x, "
                             "must be <= 7"),
                           addr->function);
        return false;
    }
    if (virPCIDeviceAddressIsEmpty(addr)) {
        if (report)
            virReportError(VIR_ERR_XML_ERROR, "%s",
                           _("Invalid PCI address 0000:00:00, at least "
                             "one of domain, bus, or slot must be > 0"));
        return false;
    }
    return true;
}

bool
virPCIDeviceAddressIsEmpty(const virPCIDeviceAddress *addr)
{
    return !(addr->domain || addr->bus || addr->slot);
}

bool
1331 1332
virPCIDeviceAddressEqual(const virPCIDeviceAddress *addr1,
                         const virPCIDeviceAddress *addr2)
1333 1334 1335 1336 1337 1338 1339 1340 1341 1342
{
    if (addr1->domain == addr2->domain &&
        addr1->bus == addr2->bus &&
        addr1->slot == addr2->slot &&
        addr1->function == addr2->function) {
        return true;
    }
    return false;
}

1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356
/**
 * virPCIDeviceAddressCopy:
 * @dst: where to store address
 * @src: source address to copy
 *
 * Creates a deep copy of given @src address and stores it into
 * @dst which has to be pre-allocated by caller.
 */
void virPCIDeviceAddressCopy(virPCIDeviceAddressPtr dst,
                             const virPCIDeviceAddress *src)
{
    memcpy(dst, src, sizeof(*src));
}

1357
char *
1358
virPCIDeviceAddressAsString(const virPCIDeviceAddress *addr)
1359 1360 1361
{
    char *str;

1362 1363 1364 1365 1366
    str = g_strdup_printf(VIR_PCI_DEVICE_ADDRESS_FMT,
                          addr->domain,
                          addr->bus,
                          addr->slot,
                          addr->function);
1367 1368 1369
    return str;
}

1370
virPCIDevicePtr
1371 1372 1373 1374
virPCIDeviceNew(unsigned int domain,
                unsigned int bus,
                unsigned int slot,
                unsigned int function)
1375
{
J
Ján Tomko 已提交
1376
    g_autoptr(virPCIDevice) dev = NULL;
1377 1378
    g_autofree char *vendor = NULL;
    g_autofree char *product = NULL;
1379

1380
    if (VIR_ALLOC(dev) < 0)
1381 1382
        return NULL;

1383 1384 1385 1386
    dev->address.domain = domain;
    dev->address.bus = bus;
    dev->address.slot = slot;
    dev->address.function = function;
1387

1388 1389
    dev->name = g_strdup_printf(VIR_PCI_DEVICE_ADDRESS_FMT, domain, bus, slot,
                                function);
1390

1391
    dev->path = g_strdup_printf(PCI_SYSFS "devices/%s/config", dev->name);
1392

1393
    if (!virFileExists(dev->path)) {
1394 1395 1396
        virReportSystemError(errno,
                             _("Device %s not found: could not access %s"),
                             dev->name, dev->path);
1397
        return NULL;
1398 1399
    }

1400 1401
    vendor  = virPCIDeviceReadID(dev, "vendor");
    product = virPCIDeviceReadID(dev, "device");
1402 1403

    if (!vendor || !product) {
1404
        virReportError(VIR_ERR_INTERNAL_ERROR,
1405 1406
                       _("Failed to read product/vendor ID for %s"),
                       dev->name);
1407
        return NULL;
1408 1409 1410
    }

    /* strings contain '0x' prefix */
1411 1412
    if (g_snprintf(dev->id, sizeof(dev->id), "%s %s", &vendor[2],
                   &product[2]) >= sizeof(dev->id)) {
1413
        virReportError(VIR_ERR_INTERNAL_ERROR,
E
Eric Blake 已提交
1414 1415
                       _("dev->id buffer overflow: %s %s"),
                       &vendor[2], &product[2]);
1416
        return NULL;
E
Eric Blake 已提交
1417
    }
1418 1419 1420

    VIR_DEBUG("%s %s: initialized", dev->id, dev->name);

J
Ján Tomko 已提交
1421
    return g_steal_pointer(&dev);
1422 1423
}

L
Laine Stump 已提交
1424 1425 1426 1427 1428 1429

virPCIDevicePtr
virPCIDeviceCopy(virPCIDevicePtr dev)
{
    virPCIDevicePtr copy;

1430
    if (VIR_ALLOC(copy) < 0)
L
Laine Stump 已提交
1431 1432 1433 1434
        return NULL;

    /* shallow copy to take care of most attributes */
    *copy = *dev;
1435
    copy->path = NULL;
C
Chunyan Liu 已提交
1436
    copy->used_by_drvname = copy->used_by_domname = NULL;
1437 1438 1439 1440
    copy->name = g_strdup(dev->name);
    copy->path = g_strdup(dev->path);
    copy->used_by_drvname = g_strdup(dev->used_by_drvname);
    copy->used_by_domname = g_strdup(dev->used_by_domname);
L
Laine Stump 已提交
1441 1442 1443 1444
    return copy;
}


1445
void
1446
virPCIDeviceFree(virPCIDevicePtr dev)
1447
{
1448 1449
    if (!dev)
        return;
1450
    VIR_DEBUG("%s %s: freeing", dev->id, dev->name);
1451
    VIR_FREE(dev->name);
E
Eric Blake 已提交
1452
    VIR_FREE(dev->path);
C
Chunyan Liu 已提交
1453 1454
    VIR_FREE(dev->used_by_drvname);
    VIR_FREE(dev->used_by_domname);
1455 1456
    VIR_FREE(dev);
}
1457

1458 1459 1460 1461 1462
/**
 * virPCIDeviceGetAddress:
 * @dev: device to get address from
 *
 * Take a PCI device on input and return its PCI address. The
1463
 * returned object is owned by the device and must not be freed.
1464
 *
1465
 * Returns: a pointer to the address, which can never be NULL.
1466 1467 1468 1469
 */
virPCIDeviceAddressPtr
virPCIDeviceGetAddress(virPCIDevicePtr dev)
{
1470
    return &(dev->address);
1471 1472
}

1473
const char *
1474
virPCIDeviceGetName(virPCIDevicePtr dev)
1475 1476 1477 1478
{
    return dev->name;
}

1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490
/**
 * virPCIDeviceGetConfigPath:
 *
 * Returns a pointer to a string containing the path of @dev's PCI
 * config file.
 */
const char *
virPCIDeviceGetConfigPath(virPCIDevicePtr dev)
{
    return dev->path;
}

1491
void virPCIDeviceSetManaged(virPCIDevicePtr dev, bool managed)
1492
{
1493
    dev->managed = managed;
1494 1495
}

1496
bool
1497
virPCIDeviceGetManaged(virPCIDevicePtr dev)
1498 1499 1500 1501
{
    return dev->managed;
}

1502 1503
void
virPCIDeviceSetStubDriver(virPCIDevicePtr dev, virPCIStubDriver driver)
1504
{
1505
    dev->stubDriver = driver;
1506 1507
}

1508
virPCIStubDriver
1509 1510 1511 1512 1513
virPCIDeviceGetStubDriver(virPCIDevicePtr dev)
{
    return dev->stubDriver;
}

1514
bool
1515
virPCIDeviceGetUnbindFromStub(virPCIDevicePtr dev)
1516 1517 1518 1519 1520
{
    return dev->unbind_from_stub;
}

void
1521
virPCIDeviceSetUnbindFromStub(virPCIDevicePtr dev, bool unbind)
1522
{
1523
    dev->unbind_from_stub = unbind;
1524 1525
}

1526
bool
1527
virPCIDeviceGetRemoveSlot(virPCIDevicePtr dev)
1528 1529 1530 1531 1532
{
    return dev->remove_slot;
}

void
1533
virPCIDeviceSetRemoveSlot(virPCIDevicePtr dev, bool remove_slot)
1534
{
1535
    dev->remove_slot = remove_slot;
1536 1537
}

1538
bool
1539
virPCIDeviceGetReprobe(virPCIDevicePtr dev)
1540 1541 1542 1543 1544
{
    return dev->reprobe;
}

void
1545
virPCIDeviceSetReprobe(virPCIDevicePtr dev, bool reprobe)
1546
{
1547
    dev->reprobe = reprobe;
1548 1549
}

C
Chunyan Liu 已提交
1550 1551 1552 1553
int
virPCIDeviceSetUsedBy(virPCIDevicePtr dev,
                      const char *drv_name,
                      const char *dom_name)
1554
{
C
Chunyan Liu 已提交
1555 1556
    VIR_FREE(dev->used_by_drvname);
    VIR_FREE(dev->used_by_domname);
1557 1558
    dev->used_by_drvname = g_strdup(drv_name);
    dev->used_by_domname = g_strdup(dom_name);
C
Chunyan Liu 已提交
1559 1560

    return 0;
1561 1562
}

C
Chunyan Liu 已提交
1563 1564 1565 1566
void
virPCIDeviceGetUsedBy(virPCIDevicePtr dev,
                      const char **drv_name,
                      const char **dom_name)
1567
{
C
Chunyan Liu 已提交
1568 1569
    *drv_name = dev->used_by_drvname;
    *dom_name = dev->used_by_domname;
1570 1571
}

1572 1573
virPCIDeviceListPtr
virPCIDeviceListNew(void)
1574
{
1575
    virPCIDeviceListPtr list;
1576

1577 1578 1579 1580
    if (virPCIInitialize() < 0)
        return NULL;

    if (!(list = virObjectLockableNew(virPCIDeviceListClass)))
1581 1582 1583 1584 1585
        return NULL;

    return list;
}

1586 1587
static void
virPCIDeviceListDispose(void *obj)
1588
{
1589
    virPCIDeviceListPtr list = obj;
1590
    size_t i;
1591 1592

    for (i = 0; i < list->count; i++) {
1593
        virPCIDeviceFree(list->devs[i]);
1594 1595 1596 1597 1598 1599 1600 1601
        list->devs[i] = NULL;
    }

    list->count = 0;
    VIR_FREE(list->devs);
}

int
1602 1603
virPCIDeviceListAdd(virPCIDeviceListPtr list,
                    virPCIDevicePtr dev)
1604
{
1605
    if (virPCIDeviceListFind(list, dev)) {
1606
        virReportError(VIR_ERR_INTERNAL_ERROR,
1607 1608 1609
                       _("Device %s is already in use"), dev->name);
        return -1;
    }
1610
    return VIR_APPEND_ELEMENT(list->devs, list->count, dev);
1611 1612
}

L
Laine Stump 已提交
1613 1614 1615 1616 1617

/* virPCIDeviceListAddCopy - add a *copy* of the device to this list */
int
virPCIDeviceListAddCopy(virPCIDeviceListPtr list, virPCIDevicePtr dev)
{
J
Ján Tomko 已提交
1618
    g_autoptr(virPCIDevice) copy = virPCIDeviceCopy(dev);
L
Laine Stump 已提交
1619 1620 1621

    if (!copy)
        return -1;
1622
    if (virPCIDeviceListAdd(list, copy) < 0)
L
Laine Stump 已提交
1623
        return -1;
1624 1625

    copy = NULL;
L
Laine Stump 已提交
1626 1627 1628 1629
    return 0;
}


1630 1631 1632
virPCIDevicePtr
virPCIDeviceListGet(virPCIDeviceListPtr list,
                    int idx)
1633 1634 1635 1636 1637 1638 1639 1640 1641
{
    if (idx >= list->count)
        return NULL;
    if (idx < 0)
        return NULL;

    return list->devs[idx];
}

1642
size_t
1643
virPCIDeviceListCount(virPCIDeviceListPtr list)
1644
{
1645 1646 1647
    return list->count;
}

1648 1649 1650
virPCIDevicePtr
virPCIDeviceListStealIndex(virPCIDeviceListPtr list,
                           int idx)
1651
{
1652
    virPCIDevicePtr ret;
1653

1654 1655
    if (idx < 0 || idx >= list->count)
        return NULL;
1656

1657
    ret = list->devs[idx];
1658
    VIR_DELETE_ELEMENT(list->devs, idx, list->count);
1659 1660 1661
    return ret;
}

1662 1663 1664
virPCIDevicePtr
virPCIDeviceListSteal(virPCIDeviceListPtr list,
                      virPCIDevicePtr dev)
1665
{
1666
    return virPCIDeviceListStealIndex(list, virPCIDeviceListFindIndex(list, dev));
1667 1668
}

1669
void
1670 1671
virPCIDeviceListDel(virPCIDeviceListPtr list,
                    virPCIDevicePtr dev)
1672
{
1673
    virPCIDeviceFree(virPCIDeviceListSteal(list, dev));
1674 1675
}

1676
int
1677
virPCIDeviceListFindIndex(virPCIDeviceListPtr list, virPCIDevicePtr dev)
1678
{
1679
    size_t i;
1680

1681 1682 1683 1684 1685 1686
    for (i = 0; i < list->count; i++) {
        virPCIDevicePtr other = list->devs[i];
        if (other->address.domain   == dev->address.domain &&
            other->address.bus      == dev->address.bus    &&
            other->address.slot     == dev->address.slot   &&
            other->address.function == dev->address.function)
1687
            return i;
1688
    }
1689 1690 1691
    return -1;
}

L
Laine Stump 已提交
1692 1693 1694 1695 1696 1697 1698 1699

virPCIDevicePtr
virPCIDeviceListFindByIDs(virPCIDeviceListPtr list,
                          unsigned int domain,
                          unsigned int bus,
                          unsigned int slot,
                          unsigned int function)
{
1700
    size_t i;
L
Laine Stump 已提交
1701 1702

    for (i = 0; i < list->count; i++) {
1703 1704 1705 1706 1707
        virPCIDevicePtr other = list->devs[i];
        if (other->address.domain   == domain &&
            other->address.bus      == bus    &&
            other->address.slot     == slot   &&
            other->address.function == function)
L
Laine Stump 已提交
1708 1709 1710 1711 1712 1713
            return list->devs[i];
    }
    return NULL;
}


1714 1715
virPCIDevicePtr
virPCIDeviceListFind(virPCIDeviceListPtr list, virPCIDevicePtr dev)
1716
{
1717
    int idx;
1718

1719 1720
    if ((idx = virPCIDeviceListFindIndex(list, dev)) >= 0)
        return list->devs[idx];
1721 1722
    else
        return NULL;
1723
}
1724 1725


1726 1727 1728
int virPCIDeviceFileIterate(virPCIDevicePtr dev,
                            virPCIDeviceFileActor actor,
                            void *opaque)
1729
{
1730
    g_autofree char *pcidir = NULL;
1731 1732 1733
    DIR *dir = NULL;
    int ret = -1;
    struct dirent *ent;
E
Eric Blake 已提交
1734
    int direrr;
1735

1736 1737 1738
    pcidir = g_strdup_printf("/sys/bus/pci/devices/" VIR_PCI_DEVICE_ADDRESS_FMT,
                             dev->address.domain, dev->address.bus, dev->address.slot,
                             dev->address.function);
1739

J
Ján Tomko 已提交
1740
    if (virDirOpen(&dir, pcidir) < 0)
1741 1742
        goto cleanup;

E
Eric Blake 已提交
1743
    while ((direrr = virDirRead(dir, &ent, pcidir)) > 0) {
1744
        g_autofree char *file = NULL;
1745
        /* Device assignment requires:
A
Alex Williamson 已提交
1746
         *   $PCIDIR/config, $PCIDIR/resource, $PCIDIR/resourceNNN,
1747
         *   $PCIDIR/rom, $PCIDIR/reset, $PCIDIR/vendor, $PCIDIR/device
1748 1749 1750
         */
        if (STREQ(ent->d_name, "config") ||
            STRPREFIX(ent->d_name, "resource") ||
A
Alex Williamson 已提交
1751
            STREQ(ent->d_name, "rom") ||
1752 1753
            STREQ(ent->d_name, "vendor") ||
            STREQ(ent->d_name, "device") ||
A
Alex Williamson 已提交
1754
            STREQ(ent->d_name, "reset")) {
1755
            file = g_strdup_printf("%s/%s", pcidir, ent->d_name);
1756
            if ((actor)(dev, file, opaque) < 0)
1757 1758 1759
                goto cleanup;
        }
    }
E
Eric Blake 已提交
1760 1761
    if (direrr < 0)
        goto cleanup;
1762 1763 1764

    ret = 0;

1765
 cleanup:
J
Ján Tomko 已提交
1766
    VIR_DIR_CLOSE(dir);
1767 1768
    return ret;
}
J
Jiri Denemark 已提交
1769

L
Laine Stump 已提交
1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780

/* virPCIDeviceAddressIOMMUGroupIterate:
 *   Call @actor for all devices in the same iommu_group as orig
 *   (including orig itself) Even if there is no iommu_group for the
 *   device, call @actor once for orig.
 */
int
virPCIDeviceAddressIOMMUGroupIterate(virPCIDeviceAddressPtr orig,
                                     virPCIDeviceAddressActor actor,
                                     void *opaque)
{
1781
    g_autofree char *groupPath = NULL;
L
Laine Stump 已提交
1782 1783 1784
    DIR *groupDir = NULL;
    int ret = -1;
    struct dirent *ent;
E
Eric Blake 已提交
1785
    int direrr;
L
Laine Stump 已提交
1786

1787 1788
    groupPath = g_strdup_printf(PCI_SYSFS "devices/" VIR_PCI_DEVICE_ADDRESS_FMT "/iommu_group/devices",
                                orig->domain, orig->bus, orig->slot, orig->function);
L
Laine Stump 已提交
1789

J
Ján Tomko 已提交
1790
    if (virDirOpenQuiet(&groupDir, groupPath) < 0) {
L
Laine Stump 已提交
1791 1792 1793 1794 1795
        /* just process the original device, nothing more */
        ret = (actor)(orig, opaque);
        goto cleanup;
    }

E
Eric Blake 已提交
1796
    while ((direrr = virDirRead(groupDir, &ent, groupPath)) > 0) {
L
Laine Stump 已提交
1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808
        virPCIDeviceAddress newDev;

        if (virPCIDeviceAddressParse(ent->d_name, &newDev) < 0) {
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("Found invalid device link '%s' in '%s'"),
                           ent->d_name, groupPath);
            goto cleanup;
        }

        if ((actor)(&newDev, opaque) < 0)
            goto cleanup;
    }
E
Eric Blake 已提交
1809
    if (direrr < 0)
L
Laine Stump 已提交
1810 1811 1812 1813
        goto cleanup;

    ret = 0;

1814
 cleanup:
J
Ján Tomko 已提交
1815
    VIR_DIR_CLOSE(groupDir);
L
Laine Stump 已提交
1816 1817 1818 1819 1820 1821 1822 1823
    return ret;
}


static int
virPCIDeviceGetIOMMUGroupAddOne(virPCIDeviceAddressPtr newDevAddr, void *opaque)
{
    virPCIDeviceListPtr groupList = opaque;
J
Ján Tomko 已提交
1824
    g_autoptr(virPCIDevice) newDev = NULL;
L
Laine Stump 已提交
1825 1826 1827

    if (!(newDev = virPCIDeviceNew(newDevAddr->domain, newDevAddr->bus,
                                   newDevAddr->slot, newDevAddr->function)))
1828
        return -1;
L
Laine Stump 已提交
1829 1830

    if (virPCIDeviceListAdd(groupList, newDev) < 0)
1831
        return -1;
L
Laine Stump 已提交
1832 1833

    newDev = NULL; /* it's now on the list */
1834
    return 0;
L
Laine Stump 已提交
1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851
}


/*
 * virPCIDeviceGetIOMMUGroupList - return a virPCIDeviceList containing
 * all of the devices in the same iommu_group as @dev.
 *
 * Return the new list, or NULL on failure
 */
virPCIDeviceListPtr
virPCIDeviceGetIOMMUGroupList(virPCIDevicePtr dev)
{
    virPCIDeviceListPtr groupList = virPCIDeviceListNew();

    if (!groupList)
        goto error;

1852
    if (virPCIDeviceAddressIOMMUGroupIterate(&(dev->address),
L
Laine Stump 已提交
1853 1854 1855 1856 1857 1858
                                             virPCIDeviceGetIOMMUGroupAddOne,
                                             groupList) < 0)
        goto error;

    return groupList;

1859
 error:
L
Laine Stump 已提交
1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883
    virObjectUnref(groupList);
    return NULL;
}


typedef struct {
    virPCIDeviceAddressPtr **iommuGroupDevices;
    size_t *nIommuGroupDevices;
} virPCIDeviceAddressList;
typedef virPCIDeviceAddressList *virPCIDeviceAddressListPtr;

static int
virPCIGetIOMMUGroupAddressesAddOne(virPCIDeviceAddressPtr newDevAddr, void *opaque)
{
    int ret = -1;
    virPCIDeviceAddressListPtr addrList = opaque;
    virPCIDeviceAddressPtr copyAddr;

    /* make a copy to insert onto the list */
    if (VIR_ALLOC(copyAddr) < 0)
        goto cleanup;

    *copyAddr = *newDevAddr;

1884 1885
    if (VIR_APPEND_ELEMENT(*addrList->iommuGroupDevices,
                           *addrList->nIommuGroupDevices, copyAddr) < 0)
L
Laine Stump 已提交
1886 1887 1888
        goto cleanup;

    ret = 0;
1889
 cleanup:
L
Laine Stump 已提交
1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912
    VIR_FREE(copyAddr);
    return ret;
}


/*
 * virPCIDeviceAddressGetIOMMUGroupAddresses - return a
 * virPCIDeviceList containing all of the devices in the same
 * iommu_group as @dev.
 *
 * Return the new list, or NULL on failure
 */
int
virPCIDeviceAddressGetIOMMUGroupAddresses(virPCIDeviceAddressPtr devAddr,
                                          virPCIDeviceAddressPtr **iommuGroupDevices,
                                          size_t *nIommuGroupDevices)
{
    virPCIDeviceAddressList addrList = { iommuGroupDevices,
                                         nIommuGroupDevices };

    if (virPCIDeviceAddressIOMMUGroupIterate(devAddr,
                                             virPCIGetIOMMUGroupAddressesAddOne,
                                             &addrList) < 0)
1913
        return -1;
L
Laine Stump 已提交
1914

1915
    return 0;
L
Laine Stump 已提交
1916 1917 1918 1919 1920 1921 1922 1923 1924 1925
}


/* virPCIDeviceAddressGetIOMMUGroupNum - return the group number of
 * this PCI device's iommu_group, or -2 if there is no iommu_group for
 * the device (or -1 if there was any other error)
 */
int
virPCIDeviceAddressGetIOMMUGroupNum(virPCIDeviceAddressPtr addr)
{
1926 1927 1928
    g_autofree char *devName = NULL;
    g_autofree char *devPath = NULL;
    g_autofree char *groupPath = NULL;
L
Laine Stump 已提交
1929 1930 1931
    const char *groupNumStr;
    unsigned int groupNum;

1932 1933
    devName = g_strdup_printf(VIR_PCI_DEVICE_ADDRESS_FMT, addr->domain, addr->bus,
                              addr->slot, addr->function);
L
Laine Stump 已提交
1934

1935
    if (!(devPath = virPCIFile(devName, "iommu_group")))
1936 1937 1938
        return -1;
    if (virFileIsLink(devPath) != 1)
        return -2;
L
Laine Stump 已提交
1939 1940 1941 1942
    if (virFileResolveLink(devPath, &groupPath) < 0) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Unable to resolve device %s iommu_group symlink %s"),
                       devName, devPath);
1943
        return -1;
L
Laine Stump 已提交
1944 1945 1946 1947 1948 1949 1950 1951
    }

    groupNumStr = last_component(groupPath);
    if (virStrToLong_ui(groupNumStr, NULL, 10, &groupNum) < 0) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("device %s iommu_group symlink %s has "
                         "invalid group number %s"),
                       devName, groupPath, groupNumStr);
1952
        return -1;
L
Laine Stump 已提交
1953 1954
    }

1955
    return groupNum;
L
Laine Stump 已提交
1956 1957 1958
}


1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973
char *
virPCIDeviceAddressGetIOMMUGroupDev(const virPCIDeviceAddress *devAddr)
{
    g_autoptr(virPCIDevice) pci = NULL;

    if (!(pci = virPCIDeviceNew(devAddr->domain,
                                devAddr->bus,
                                devAddr->slot,
                                devAddr->function)))
        return NULL;

    return virPCIDeviceGetIOMMUGroupDev(pci);
}


1974 1975
/* virPCIDeviceGetIOMMUGroupDev - return the name of the device used
 * to control this PCI device's group (e.g. "/dev/vfio/15")
1976 1977
 */
char *
1978
virPCIDeviceGetIOMMUGroupDev(virPCIDevicePtr dev)
1979
{
1980 1981
    g_autofree char *devPath = NULL;
    g_autofree char *groupPath = NULL;
1982 1983
    char *groupDev = NULL;

1984
    if (!(devPath = virPCIFile(dev->name, "iommu_group")))
1985
        return NULL;
1986 1987 1988 1989
    if (virFileIsLink(devPath) != 1) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Invalid device %s iommu_group file %s is not a symlink"),
                       dev->name, devPath);
1990
        return NULL;
1991 1992 1993 1994 1995
    }
    if (virFileResolveLink(devPath, &groupPath) < 0) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Unable to resolve device %s iommu_group symlink %s"),
                       dev->name, devPath);
1996
        return NULL;
1997
    }
1998
    groupDev = g_strdup_printf("/dev/vfio/%s", last_component(groupPath));
1999

2000 2001 2002
    return groupDev;
}

J
Jiri Denemark 已提交
2003
static int
2004
virPCIDeviceDownstreamLacksACS(virPCIDevicePtr dev)
J
Jiri Denemark 已提交
2005 2006 2007 2008
{
    uint16_t flags;
    uint16_t ctrl;
    unsigned int pos;
2009 2010
    int fd;
    int ret = 0;
2011
    uint16_t device_class;
J
Jiri Denemark 已提交
2012

2013
    if ((fd = virPCIDeviceConfigOpen(dev)) < 0)
J
Jiri Denemark 已提交
2014 2015
        return -1;

2016
    if (virPCIDeviceInit(dev, fd) < 0) {
2017 2018 2019 2020
        ret = -1;
        goto cleanup;
    }

2021 2022 2023
    if (virPCIDeviceReadClass(dev, &device_class) < 0)
        goto cleanup;

J
Jiri Denemark 已提交
2024
    pos = dev->pcie_cap_pos;
2025
    if (!pos || device_class != PCI_CLASS_BRIDGE_PCI)
2026
        goto cleanup;
J
Jiri Denemark 已提交
2027

2028
    flags = virPCIDeviceRead16(dev, fd, pos + PCI_EXP_FLAGS);
J
Jiri Denemark 已提交
2029
    if (((flags & PCI_EXP_FLAGS_TYPE) >> 4) != PCI_EXP_TYPE_DOWNSTREAM)
2030
        goto cleanup;
J
Jiri Denemark 已提交
2031

2032
    pos = virPCIDeviceFindExtendedCapabilityOffset(dev, fd, PCI_EXT_CAP_ID_ACS);
J
Jiri Denemark 已提交
2033 2034
    if (!pos) {
        VIR_DEBUG("%s %s: downstream port lacks ACS", dev->id, dev->name);
2035 2036
        ret = 1;
        goto cleanup;
J
Jiri Denemark 已提交
2037 2038
    }

2039
    ctrl = virPCIDeviceRead16(dev, fd, pos + PCI_EXT_ACS_CTRL);
J
Jiri Denemark 已提交
2040 2041 2042
    if ((ctrl & PCI_EXT_CAP_ACS_ENABLED) != PCI_EXT_CAP_ACS_ENABLED) {
        VIR_DEBUG("%s %s: downstream port has ACS disabled",
                  dev->id, dev->name);
2043 2044
        ret = 1;
        goto cleanup;
J
Jiri Denemark 已提交
2045 2046
    }

2047
 cleanup:
2048
    virPCIDeviceConfigClose(dev, fd);
2049
    return ret;
J
Jiri Denemark 已提交
2050 2051 2052
}

static int
2053
virPCIDeviceIsBehindSwitchLackingACS(virPCIDevicePtr dev)
J
Jiri Denemark 已提交
2054
{
J
Ján Tomko 已提交
2055
    g_autoptr(virPCIDevice) parent = NULL;
J
Jiri Denemark 已提交
2056

2057
    if (virPCIDeviceGetParent(dev, &parent) < 0)
2058
        return -1;
2059 2060 2061 2062 2063
    if (!parent) {
        /* if we have no parent, and this is the root bus, ACS doesn't come
         * into play since devices on the root bus can't P2P without going
         * through the root IOMMU.
         */
2064
        if (dev->address.bus == 0) {
2065
            return 0;
2066
        } else {
2067
            virReportError(VIR_ERR_INTERNAL_ERROR,
2068 2069 2070 2071
                           _("Failed to find parent device for %s"),
                           dev->name);
            return -1;
        }
J
Jiri Denemark 已提交
2072 2073 2074 2075 2076 2077 2078
    }

    /* XXX we should rather fail when we can't find device's parent and
     * stop the loop when we get to root instead of just stopping when no
     * parent can be found
     */
    do {
J
Ján Tomko 已提交
2079
        g_autoptr(virPCIDevice) tmp = NULL;
J
Jiri Denemark 已提交
2080
        int acs;
2081
        int ret;
J
Jiri Denemark 已提交
2082

2083
        acs = virPCIDeviceDownstreamLacksACS(parent);
J
Jiri Denemark 已提交
2084 2085 2086 2087 2088 2089 2090 2091 2092

        if (acs) {
            if (acs < 0)
                return -1;
            else
                return 1;
        }

        tmp = parent;
2093
        ret = virPCIDeviceGetParent(parent, &parent);
2094 2095
        if (ret < 0)
            return -1;
J
Jiri Denemark 已提交
2096 2097 2098 2099 2100
    } while (parent);

    return 0;
}

2101 2102
int virPCIDeviceIsAssignable(virPCIDevicePtr dev,
                             int strict_acs_check)
J
Jiri Denemark 已提交
2103 2104 2105 2106 2107 2108 2109 2110
{
    int ret;

    /* XXX This could be a great place to actually check that a non-managed
     * device isn't in use, e.g. by checking that device is either un-bound
     * or bound to a stub driver.
     */

2111
    ret = virPCIDeviceIsBehindSwitchLackingACS(dev);
J
Jiri Denemark 已提交
2112 2113 2114 2115 2116 2117 2118 2119
    if (ret < 0)
        return 0;

    if (ret) {
        if (!strict_acs_check) {
            VIR_DEBUG("%s %s: strict ACS check disabled; device assignment allowed",
                      dev->id, dev->name);
        } else {
2120
            virReportError(VIR_ERR_INTERNAL_ERROR,
J
Jiri Denemark 已提交
2121 2122 2123 2124 2125 2126 2127 2128 2129
                           _("Device %s is behind a switch lacking ACS and "
                             "cannot be assigned"),
                           dev->name);
            return 0;
        }
    }

    return 1;
}
2130 2131 2132 2133 2134 2135 2136 2137 2138 2139

static int
logStrToLong_ui(char const *s,
                char **end_ptr,
                int base,
                unsigned int *result)
{
    int ret = 0;

    ret = virStrToLong_ui(s, end_ptr, base, result);
2140
    if (ret != 0)
2141 2142 2143 2144
        VIR_ERROR(_("Failed to convert '%s' to unsigned int"), s);
    return ret;
}

2145 2146
int
virPCIDeviceAddressParse(char *address,
2147
                         virPCIDeviceAddressPtr bdf)
2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173
{
    char *p = NULL;
    int ret = -1;

    if ((address == NULL) || (logStrToLong_ui(address, &p, 16,
                                              &bdf->domain) == -1)) {
        goto out;
    }

    if ((p == NULL) || (logStrToLong_ui(p+1, &p, 16,
                                        &bdf->bus) == -1)) {
        goto out;
    }

    if ((p == NULL) || (logStrToLong_ui(p+1, &p, 16,
                                        &bdf->slot) == -1)) {
        goto out;
    }

    if ((p == NULL) || (logStrToLong_ui(p+1, &p, 16,
                                        &bdf->function) == -1)) {
        goto out;
    }

    ret = 0;

2174
 out:
2175 2176 2177
    return ret;
}

2178

2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203
bool
virZPCIDeviceAddressIsValid(virZPCIDeviceAddressPtr zpci)
{
    /* We don't need to check fid because fid covers
     * all range of uint32 type.
     */
    if (zpci->uid > VIR_DOMAIN_DEVICE_ZPCI_MAX_UID ||
        zpci->uid == 0) {
        virReportError(VIR_ERR_XML_ERROR,
                       _("Invalid PCI address uid='0x%.4x', "
                         "must be > 0x0000 and <= 0x%.4x"),
                       zpci->uid,
                       VIR_DOMAIN_DEVICE_ZPCI_MAX_UID);
        return false;
    }

    return true;
}

bool
virZPCIDeviceAddressIsEmpty(const virZPCIDeviceAddress *addr)
{
    return !(addr->uid || addr->fid);
}

2204
#ifdef __linux__
2205

2206
virPCIDeviceAddressPtr
2207
virPCIGetDeviceAddressFromSysfsLink(const char *device_link)
2208
{
2209
    virPCIDeviceAddressPtr bdf = NULL;
2210
    char *config_address = NULL;
2211
    g_autofree char *device_path = NULL;
2212 2213

    if (!virFileExists(device_link)) {
2214
        VIR_DEBUG("'%s' does not exist", device_link);
2215
        return NULL;
2216 2217
    }

2218
    device_path = virFileCanonicalizePath(device_link);
2219
    if (device_path == NULL) {
2220 2221 2222
        virReportSystemError(errno,
                             _("Failed to resolve device link '%s'"),
                             device_link);
2223
        return NULL;
2224 2225
    }

2226
    config_address = last_component(device_path);
2227
    if (VIR_ALLOC(bdf) < 0)
2228
        return NULL;
2229

2230
    if (virPCIDeviceAddressParse(config_address, bdf) < 0) {
2231
        virReportError(VIR_ERR_INTERNAL_ERROR,
2232 2233
                       _("Failed to parse PCI config address '%s'"),
                       config_address);
2234
        VIR_FREE(bdf);
2235
        return NULL;
2236 2237
    }

2238
    return bdf;
2239 2240
}

2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253
/**
 * virPCIGetPhysicalFunction:
 * @vf_sysfs_path: sysfs path for the virtual function
 * @pf: where to store the physical function's address
 *
 * Given @vf_sysfs_path, this function will store the pointer
 * to a newly-allocated virPCIDeviceAddress in @pf.
 *
 * @pf might be NULL if @vf_sysfs_path does not point to a
 * virtual function. If it's not NULL, then it should be
 * freed by the caller when no longer needed.
 *
 * Returns: >=0 on success, <0 on failure
2254 2255
 */
int
2256
virPCIGetPhysicalFunction(const char *vf_sysfs_path,
2257
                          virPCIDeviceAddressPtr *pf)
2258
{
2259
    g_autofree char *device_link = NULL;
2260

2261 2262
    *pf = NULL;

2263 2264
    if (virBuildPath(&device_link, vf_sysfs_path, "physfn") == -1) {
        virReportOOMError();
2265
        return -1;
2266 2267
    }

2268
    if ((*pf = virPCIGetDeviceAddressFromSysfsLink(device_link))) {
2269 2270
        VIR_DEBUG("PF for VF device '%s': " VIR_PCI_DEVICE_ADDRESS_FMT,
                  vf_sysfs_path,
2271 2272
                  (*pf)->domain, (*pf)->bus, (*pf)->slot, (*pf)->function);
    }
2273

2274
    return 0;
2275 2276
}

2277

2278 2279 2280 2281
/*
 * Returns virtual functions of a physical function
 */
int
2282 2283
virPCIGetVirtualFunctions(const char *sysfs_path,
                          virPCIDeviceAddressPtr **virtual_functions,
2284 2285
                          size_t *num_virtual_functions,
                          unsigned int *max_virtual_functions)
2286 2287
{
    int ret = -1;
2288
    size_t i;
2289 2290
    g_autofree char *totalvfs_file = NULL;
    g_autofree char *totalvfs_str = NULL;
2291
    virPCIDeviceAddressPtr config_addr = NULL;
2292

2293 2294
    *virtual_functions = NULL;
    *num_virtual_functions = 0;
2295 2296
    *max_virtual_functions = 0;

2297
    totalvfs_file = g_strdup_printf("%s/sriov_totalvfs", sysfs_path);
2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309
    if (virFileExists(totalvfs_file)) {
        char *end = NULL; /* so that terminating \n doesn't create error */

        if (virFileReadAll(totalvfs_file, 16, &totalvfs_str) < 0)
            goto error;
        if (virStrToLong_ui(totalvfs_str, &end, 10, max_virtual_functions) < 0) {
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("Unrecognized value in %s: %s"),
                           totalvfs_file, totalvfs_str);
            goto error;
        }
    }
2310

2311
    do {
2312
        g_autofree char *device_link = NULL;
2313
        /* look for virtfn%d links until one isn't found */
2314 2315
        device_link = g_strdup_printf("%s/virtfn%zu", sysfs_path,
                                      *num_virtual_functions);
2316

2317 2318
        if (!virFileExists(device_link))
            break;
2319

2320
        if (!(config_addr = virPCIGetDeviceAddressFromSysfsLink(device_link))) {
2321 2322 2323 2324 2325
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("Failed to get SRIOV function from device link '%s'"),
                           device_link);
            goto error;
        }
2326

2327 2328
        if (VIR_APPEND_ELEMENT(*virtual_functions, *num_virtual_functions,
                               config_addr) < 0)
2329 2330
            goto error;
    } while (1);
2331

2332 2333
    VIR_DEBUG("Found %zu virtual functions for %s",
              *num_virtual_functions, sysfs_path);
2334
    ret = 0;
2335
 cleanup:
2336
    VIR_FREE(config_addr);
2337
    return ret;
2338

2339
 error:
2340 2341 2342
    for (i = 0; i < *num_virtual_functions; i++)
        VIR_FREE((*virtual_functions)[i]);
    VIR_FREE(*virtual_functions);
2343
    *num_virtual_functions = 0;
2344
    goto cleanup;
2345
}
2346

2347

2348 2349 2350 2351
/*
 * Returns 1 if vf device is a virtual function, 0 if not, -1 on error
 */
int
2352
virPCIIsVirtualFunction(const char *vf_sysfs_device_link)
2353
{
2354
    g_autofree char *vf_sysfs_physfn_link = NULL;
2355

2356
    vf_sysfs_physfn_link = g_strdup_printf("%s/physfn", vf_sysfs_device_link);
2357

2358
    return virFileExists(vf_sysfs_physfn_link);
2359 2360 2361 2362 2363 2364
}

/*
 * Returns the sriov virtual function index of vf given its pf
 */
int
2365 2366 2367
virPCIGetVirtualFunctionIndex(const char *pf_sysfs_device_link,
                              const char *vf_sysfs_device_link,
                              int *vf_index)
2368
{
2369 2370
    int ret = -1;
    size_t i;
2371
    size_t num_virt_fns = 0;
2372
    unsigned int max_virt_fns = 0;
2373 2374
    virPCIDeviceAddressPtr vf_bdf = NULL;
    virPCIDeviceAddressPtr *virt_fns = NULL;
2375

2376
    if (!(vf_bdf = virPCIGetDeviceAddressFromSysfsLink(vf_sysfs_device_link)))
2377 2378
        return ret;

2379
    if (virPCIGetVirtualFunctions(pf_sysfs_device_link, &virt_fns,
2380
                                  &num_virt_fns, &max_virt_fns) < 0) {
2381
        virReportError(VIR_ERR_INTERNAL_ERROR,
2382
                       _("Error getting physical function's '%s' "
2383
                         "virtual_functions"), pf_sysfs_device_link);
2384 2385 2386 2387
        goto out;
    }

    for (i = 0; i < num_virt_fns; i++) {
2388
        if (virPCIDeviceAddressEqual(vf_bdf, virt_fns[i])) {
2389 2390 2391 2392
            *vf_index = i;
            ret = 0;
            break;
        }
2393 2394
    }

2395
 out:
2396 2397 2398

    /* free virtual functions */
    for (i = 0; i < num_virt_fns; i++)
2399
        VIR_FREE(virt_fns[i]);
2400

A
ajia@redhat.com 已提交
2401
    VIR_FREE(virt_fns);
2402 2403 2404 2405 2406
    VIR_FREE(vf_bdf);

    return ret;
}

2407 2408 2409 2410 2411
/*
 * Returns a path to the PCI sysfs file given the BDF of the PCI function
 */

int
2412
virPCIGetSysfsFile(char *virPCIDeviceName, char **pci_sysfs_device_link)
2413
{
2414 2415
    *pci_sysfs_device_link = g_strdup_printf(PCI_SYSFS "devices/%s",
                                             virPCIDeviceName);
2416
    return 0;
2417 2418
}

R
Roopa Prabhu 已提交
2419
int
2420
virPCIDeviceAddressGetSysfsFile(virPCIDeviceAddressPtr addr,
2421
                                char **pci_sysfs_device_link)
R
Roopa Prabhu 已提交
2422
{
2423 2424
    *pci_sysfs_device_link = g_strdup_printf(PCI_SYSFS "devices/" VIR_PCI_DEVICE_ADDRESS_FMT, addr->domain,
                                             addr->bus, addr->slot, addr->function);
2425
    return 0;
R
Roopa Prabhu 已提交
2426 2427
}

2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438
/**
 * virPCIGetNetName:
 * @device_link_sysfs_path: sysfs path to the PCI device
 * @idx: used to choose which netdev when there are several
 *       (ignored if physPortID is set)
 * @physPortID: match this string in the netdev's phys_port_id
 *       (or NULL to ignore and use idx instead)
 * @netname: used to return the name of the netdev
 *       (set to NULL (but returns success) if there is no netdev)
 *
 * Returns 0 on success, -1 on error (error has been logged)
2439 2440
 */
int
2441 2442 2443 2444
virPCIGetNetName(const char *device_link_sysfs_path,
                 size_t idx,
                 char *physPortID,
                 char **netname)
2445
{
2446 2447 2448
    g_autofree char *pcidev_sysfs_net_path = NULL;
    g_autofree char *firstEntryName = NULL;
    g_autofree char *thisPhysPortID = NULL;
2449 2450 2451
    int ret = -1;
    DIR *dir = NULL;
    struct dirent *entry = NULL;
2452
    size_t i = 0;
2453

2454 2455
    *netname = NULL;

2456 2457 2458 2459 2460 2461
    if (virBuildPath(&pcidev_sysfs_net_path, device_link_sysfs_path,
                     "net") == -1) {
        virReportOOMError();
        return -1;
    }

2462 2463 2464
    if (virDirOpenQuiet(&dir, pcidev_sysfs_net_path) < 0) {
        /* this *isn't* an error - caller needs to check for netname == NULL */
        ret = 0;
2465
        goto cleanup;
2466
    }
2467

E
Eric Blake 已提交
2468
    while (virDirRead(dir, &entry, pcidev_sysfs_net_path) > 0) {
2469 2470 2471 2472 2473 2474 2475 2476 2477 2478
        /* if the caller sent a physPortID, compare it to the
         * physportID of this netdev. If not, look for entry[idx].
         */
        if (physPortID) {
            if (virNetDevGetPhysPortID(entry->d_name, &thisPhysPortID) < 0)
                goto cleanup;

            /* if this one doesn't match, keep looking */
            if (STRNEQ_NULLABLE(physPortID, thisPhysPortID)) {
                VIR_FREE(thisPhysPortID);
2479 2480 2481 2482 2483
                /* save the first entry we find to use as a failsafe
                 * in case we don't match the phys_port_id. This is
                 * needed because some NIC drivers (e.g. i40e)
                 * implement phys_port_id for PFs, but not for VFs
                 */
2484 2485
                if (!firstEntryName)
                    firstEntryName = g_strdup(entry->d_name);
2486

2487 2488 2489 2490 2491 2492 2493
                continue;
            }
        } else {
            if (i++ < idx)
                continue;
        }

2494
        *netname = g_strdup(entry->d_name);
2495 2496

        ret = 0;
2497 2498 2499
        break;
    }

2500 2501
    if (ret < 0) {
        if (physPortID) {
2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516
            if (firstEntryName) {
                /* we didn't match the provided phys_port_id, but this
                 * is probably because phys_port_id isn't implemented
                 * for this NIC driver, so just return the first
                 * (probably only) netname we found.
                 */
                *netname = firstEntryName;
                firstEntryName = NULL;
                ret = 0;
            } else {
                virReportError(VIR_ERR_INTERNAL_ERROR,
                               _("Could not find network device with "
                                 "phys_port_id '%s' under PCI device at %s"),
                               physPortID, device_link_sysfs_path);
            }
2517 2518 2519 2520 2521
        } else {
            ret = 0; /* no netdev at the given index is *not* an error */
        }
    }
 cleanup:
J
Ján Tomko 已提交
2522
    VIR_DIR_CLOSE(dir);
2523
    return ret;
2524
}
R
Roopa Prabhu 已提交
2525 2526

int
2527
virPCIGetVirtualFunctionInfo(const char *vf_sysfs_device_path,
2528 2529 2530
                             int pfNetDevIdx,
                             char **pfname,
                             int *vf_index)
R
Roopa Prabhu 已提交
2531
{
2532
    virPCIDeviceAddressPtr pf_config_address = NULL;
2533 2534 2535
    g_autofree char *pf_sysfs_device_path = NULL;
    g_autofree char *vfname = NULL;
    g_autofree char *vfPhysPortID = NULL;
R
Roopa Prabhu 已提交
2536 2537
    int ret = -1;

2538
    if (virPCIGetPhysicalFunction(vf_sysfs_device_path, &pf_config_address) < 0)
2539
        goto cleanup;
R
Roopa Prabhu 已提交
2540

2541
    if (!pf_config_address)
2542
        goto cleanup;
2543

2544 2545
    if (virPCIDeviceAddressGetSysfsFile(pf_config_address,
                                        &pf_sysfs_device_path) < 0) {
2546 2547
        goto cleanup;
    }
R
Roopa Prabhu 已提交
2548

2549 2550 2551
    if (virPCIGetVirtualFunctionIndex(pf_sysfs_device_path,
                                      vf_sysfs_device_path, vf_index) < 0) {
        goto cleanup;
R
Roopa Prabhu 已提交
2552 2553
    }

2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573
    /* If the caller hasn't asked for a specific pfNetDevIdx, and VF
     * is bound to a netdev, learn that netdev's phys_port_id (if
     * available). This can be used to disambiguate when the PF has
     * multiple netdevs. If the VF isn't bound to a netdev, then we
     * return netdev[pfNetDevIdx] on the PF, which may or may not be
     * correct.
     */
    if (pfNetDevIdx == -1) {
        if (virPCIGetNetName(vf_sysfs_device_path, 0, NULL, &vfname) < 0)
            goto cleanup;

        if (vfname) {
            if (virNetDevGetPhysPortID(vfname, &vfPhysPortID) < 0)
                goto cleanup;
        }
        pfNetDevIdx = 0;
    }

    if (virPCIGetNetName(pf_sysfs_device_path,
                         pfNetDevIdx, vfPhysPortID, pfname) < 0) {
R
Roopa Prabhu 已提交
2574
        goto cleanup;
2575
    }
R
Roopa Prabhu 已提交
2576

2577 2578 2579 2580 2581 2582 2583 2584 2585
    if (!*pfname) {
        /* this shouldn't be possible. A VF can't exist unless its
         * PF device is bound to a network driver
         */
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("The PF device for VF %s has no network device name"),
                       vf_sysfs_device_path);
        goto cleanup;
    }
R
Roopa Prabhu 已提交
2586

2587
    ret = 0;
2588
 cleanup:
R
Roopa Prabhu 已提交
2589 2590 2591 2592 2593
    VIR_FREE(pf_config_address);

    return ret;
}

2594 2595 2596 2597 2598 2599 2600 2601 2602

ssize_t
virPCIGetMdevTypes(const char *sysfspath,
                   virMediatedDeviceTypePtr **types)
{
    ssize_t ret = -1;
    int dirret = -1;
    DIR *dir = NULL;
    struct dirent *entry;
2603
    g_autofree char *types_path = NULL;
J
Ján Tomko 已提交
2604
    g_autoptr(virMediatedDeviceType) mdev_type = NULL;
2605 2606 2607 2608
    virMediatedDeviceTypePtr *mdev_types = NULL;
    size_t ntypes = 0;
    size_t i;

2609
    types_path = g_strdup_printf("%s/mdev_supported_types", sysfspath);
2610 2611 2612 2613 2614 2615 2616 2617 2618 2619

    if ((dirret = virDirOpenIfExists(&dir, types_path)) < 0)
        goto cleanup;

    if (dirret == 0) {
        ret = 0;
        goto cleanup;
    }

    while ((dirret = virDirRead(dir, &entry, types_path)) > 0) {
2620
        g_autofree char *tmppath = NULL;
2621
        /* append the type id to the path and read the attributes from there */
2622
        tmppath = g_strdup_printf("%s/%s", types_path, entry->d_name);
2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633

        if (virMediatedDeviceTypeReadAttrs(tmppath, &mdev_type) < 0)
            goto cleanup;

        if (VIR_APPEND_ELEMENT(mdev_types, ntypes, mdev_type) < 0)
            goto cleanup;
    }

    if (dirret < 0)
        goto cleanup;

2634
    *types = g_steal_pointer(&mdev_types);
2635 2636 2637 2638 2639 2640 2641 2642 2643 2644
    ret = ntypes;
    ntypes = 0;
 cleanup:
    for (i = 0; i < ntypes; i++)
        virMediatedDeviceTypeFree(mdev_types[i]);
    VIR_FREE(mdev_types);
    VIR_DIR_CLOSE(dir);
    return ret;
}

2645
#else
2646 2647
static const char *unsupported = N_("not supported on non-linux platforms");

2648
virPCIDeviceAddressPtr
J
Ján Tomko 已提交
2649
virPCIGetDeviceAddressFromSysfsLink(const char *device_link G_GNUC_UNUSED)
2650 2651
{
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
2652
    return NULL;
2653 2654 2655
}


2656
int
J
Ján Tomko 已提交
2657 2658
virPCIGetPhysicalFunction(const char *vf_sysfs_path G_GNUC_UNUSED,
                          virPCIDeviceAddressPtr *pf G_GNUC_UNUSED)
2659
{
2660
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
2661 2662 2663 2664
    return -1;
}

int
J
Ján Tomko 已提交
2665 2666 2667 2668
virPCIGetVirtualFunctions(const char *sysfs_path G_GNUC_UNUSED,
                          virPCIDeviceAddressPtr **virtual_functions G_GNUC_UNUSED,
                          size_t *num_virtual_functions G_GNUC_UNUSED,
                          unsigned int *max_virtual_functions G_GNUC_UNUSED)
2669
{
2670
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
2671 2672
    return -1;
}
2673 2674

int
J
Ján Tomko 已提交
2675
virPCIIsVirtualFunction(const char *vf_sysfs_device_link G_GNUC_UNUSED)
2676
{
2677
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
2678 2679 2680 2681
    return -1;
}

int
J
Ján Tomko 已提交
2682 2683 2684
virPCIGetVirtualFunctionIndex(const char *pf_sysfs_device_link G_GNUC_UNUSED,
                              const char *vf_sysfs_device_link G_GNUC_UNUSED,
                              int *vf_index G_GNUC_UNUSED)
2685
{
2686
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
2687 2688 2689 2690
    return -1;

}

2691
int
J
Ján Tomko 已提交
2692 2693
virPCIGetSysfsFile(char *virPCIDeviceName G_GNUC_UNUSED,
                   char **pci_sysfs_device_link G_GNUC_UNUSED)
2694 2695 2696 2697 2698
{
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
    return -1;
}

2699
int
J
Ján Tomko 已提交
2700 2701
virPCIDeviceAddressGetSysfsFile(virPCIDeviceAddressPtr dev G_GNUC_UNUSED,
                                char **pci_sysfs_device_link G_GNUC_UNUSED)
2702
{
2703
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
2704 2705 2706
    return -1;
}

2707
int
J
Ján Tomko 已提交
2708 2709 2710 2711
virPCIGetNetName(const char *device_link_sysfs_path G_GNUC_UNUSED,
                 size_t idx G_GNUC_UNUSED,
                 char *physPortID G_GNUC_UNUSED,
                 char **netname G_GNUC_UNUSED)
2712
{
2713
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
2714 2715
    return -1;
}
R
Roopa Prabhu 已提交
2716 2717

int
J
Ján Tomko 已提交
2718 2719 2720 2721
virPCIGetVirtualFunctionInfo(const char *vf_sysfs_device_path G_GNUC_UNUSED,
                             int pfNetDevIdx G_GNUC_UNUSED,
                             char **pfname G_GNUC_UNUSED,
                             int *vf_index G_GNUC_UNUSED)
R
Roopa Prabhu 已提交
2722
{
2723
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
R
Roopa Prabhu 已提交
2724 2725
    return -1;
}
2726 2727 2728


ssize_t
J
Ján Tomko 已提交
2729 2730
virPCIGetMdevTypes(const char *sysfspath G_GNUC_UNUSED,
                   virMediatedDeviceTypePtr **types G_GNUC_UNUSED)
2731 2732 2733 2734
{
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
    return -1;
}
2735
#endif /* __linux__ */
2736 2737 2738 2739 2740 2741 2742

int
virPCIDeviceIsPCIExpress(virPCIDevicePtr dev)
{
    int fd;
    int ret = -1;

2743
    if ((fd = virPCIDeviceConfigOpen(dev)) < 0)
2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762
        return ret;

    if (virPCIDeviceInit(dev, fd) < 0)
        goto cleanup;

    ret = dev->pcie_cap_pos != 0;

 cleanup:
    virPCIDeviceConfigClose(dev, fd);
    return ret;
}

int
virPCIDeviceHasPCIExpressLink(virPCIDevicePtr dev)
{
    int fd;
    int ret = -1;
    uint16_t cap, type;

2763
    if ((fd = virPCIDeviceConfigOpen(dev)) < 0)
2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790
        return ret;

    if (virPCIDeviceInit(dev, fd) < 0)
        goto cleanup;

    cap = virPCIDeviceRead16(dev, fd, dev->pcie_cap_pos + PCI_CAP_FLAGS);
    type = (cap & PCI_EXP_FLAGS_TYPE) >> 4;

    ret = type != PCI_EXP_TYPE_ROOT_INT_EP && type != PCI_EXP_TYPE_ROOT_EC;

 cleanup:
    virPCIDeviceConfigClose(dev, fd);
    return ret;
}

int
virPCIDeviceGetLinkCapSta(virPCIDevicePtr dev,
                          int *cap_port,
                          unsigned int *cap_speed,
                          unsigned int *cap_width,
                          unsigned int *sta_speed,
                          unsigned int *sta_width)
{
    uint32_t t;
    int fd;
    int ret = -1;

2791
    if ((fd = virPCIDeviceConfigOpen(dev)) < 0)
2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819
        return ret;

    if (virPCIDeviceInit(dev, fd) < 0)
        goto cleanup;

    if (!dev->pcie_cap_pos) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("pci device %s is not a PCI-Express device"),
                       dev->name);
        goto cleanup;
    }

    t = virPCIDeviceRead32(dev, fd, dev->pcie_cap_pos + PCI_EXP_LNKCAP);

    *cap_port = t >> 24;
    *cap_speed = t & PCI_EXP_LNKCAP_SPEED;
    *cap_width = (t & PCI_EXP_LNKCAP_WIDTH) >> 4;

    t = virPCIDeviceRead16(dev, fd, dev->pcie_cap_pos + PCI_EXP_LNKSTA);

    *sta_speed = t & PCI_EXP_LNKSTA_SPEED;
    *sta_width = (t & PCI_EXP_LNKSTA_WIDTH) >> 4;
    ret = 0;

 cleanup:
    virPCIDeviceConfigClose(dev, fd);
    return ret;
}
2820 2821


2822 2823 2824 2825 2826 2827 2828
int virPCIGetHeaderType(virPCIDevicePtr dev, int *hdrType)
{
    int fd;
    uint8_t type;

    *hdrType = -1;

2829
    if ((fd = virPCIDeviceConfigOpen(dev)) < 0)
2830 2831 2832 2833 2834 2835 2836 2837 2838
        return -1;

    type = virPCIDeviceRead8(dev, fd, PCI_HEADER_TYPE);

    virPCIDeviceConfigClose(dev, fd);

    type &= PCI_HEADER_TYPE_MASK;
    if (type >= VIR_PCI_HEADER_LAST) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
2839 2840
                       _("Unknown PCI header type '%d' for device '%s'"),
                       type, dev->name);
2841 2842 2843 2844 2845 2846 2847 2848 2849
        return -1;
    }

    *hdrType = type;

    return 0;
}


2850 2851 2852 2853 2854 2855 2856 2857 2858 2859
void
virPCIEDeviceInfoFree(virPCIEDeviceInfoPtr dev)
{
    if (!dev)
        return;

    VIR_FREE(dev->link_cap);
    VIR_FREE(dev->link_sta);
    VIR_FREE(dev);
}
2860 2861 2862 2863 2864 2865

void
virPCIDeviceAddressFree(virPCIDeviceAddressPtr address)
{
    VIR_FREE(address);
}