virpci.c 93.5 KB
Newer Older
1
/*
2 3
 * virpci.c: helper APIs for managing host PCI devices
 *
4
 * Copyright (C) 2009-2015 Red Hat, Inc.
5 6 7 8 9 10 11 12 13 14 15 16
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library.  If not, see
O
Osier Yang 已提交
18
 * <http://www.gnu.org/licenses/>.
19 20 21 22
 */

#include <config.h>

23
#include "virpci.h"
24
#include "virnetdev.h"
25 26 27 28 29 30 31 32

#include <dirent.h>
#include <fcntl.h>
#include <inttypes.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>

33
#include "dirname.h"
34
#include "virlog.h"
35
#include "vircommand.h"
36
#include "virerror.h"
E
Eric Blake 已提交
37
#include "virfile.h"
38
#include "virkmod.h"
39 40
#include "virstring.h"
#include "virutil.h"
41
#include "viralloc.h"
42

43 44
VIR_LOG_INIT("util.pci");

45 46 47
#define PCI_SYSFS "/sys/bus/pci/"
#define PCI_ID_LEN 10   /* "XXXX XXXX" */

48 49
VIR_ENUM_IMPL(virPCIELinkSpeed,
              VIR_PCIE_LINK_SPEED_LAST,
50 51
              "", "2.5", "5", "8", "16",
);
52

53 54
VIR_ENUM_IMPL(virPCIStubDriver,
              VIR_PCI_STUB_DRIVER_LAST,
55 56 57 58
              "none",
              "pciback", /* XEN */
              "pci-stub", /* KVM */
              "vfio-pci", /* VFIO */
59
);
60

61 62
VIR_ENUM_IMPL(virPCIHeader,
              VIR_PCI_HEADER_LAST,
63 64 65
              "endpoint",
              "pci-bridge",
              "cardbus-bridge",
66
);
67

68
struct _virPCIDevice {
69
    virPCIDeviceAddress address;
70

71
    char          *name;              /* domain:bus:slot.function */
72
    char          id[PCI_ID_LEN];     /* product vendor */
E
Eric Blake 已提交
73
    char          *path;
C
Chunyan Liu 已提交
74 75 76 77

    /* The driver:domain which uses the device */
    char          *used_by_drvname;
    char          *used_by_domname;
78

79 80
    unsigned int  pcie_cap_pos;
    unsigned int  pci_pm_cap_pos;
81 82
    bool          has_flr;
    bool          has_pm_reset;
83
    bool          managed;
84 85

    virPCIStubDriver stubDriver;
86 87

    /* used by reattach function */
88 89 90
    bool          unbind_from_stub;
    bool          remove_slot;
    bool          reprobe;
91 92
};

93
struct _virPCIDeviceList {
94 95
    virObjectLockable parent;

96
    size_t count;
97
    virPCIDevicePtr *devs;
98 99 100
};


101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
/* For virReportOOMError()  and virReportSystemError() */
#define VIR_FROM_THIS VIR_FROM_NONE

/* Specifications referenced in comments:
 *  PCI30  - PCI Local Bus Specification 3.0
 *  PCIe20 - PCI Express Base Specification 2.0
 *  BR12   - PCI-to-PCI Bridge Architecture Specification 1.2
 *  PM12   - PCI Bus Power Management Interface Specification 1.2
 *  ECN_AF - Advanced Capabilities for Conventional PCI ECN
 */

/* Type 0 config space header length; PCI30 Section 6.1 Configuration Space Organization */
#define PCI_CONF_LEN            0x100
#define PCI_CONF_HEADER_LEN     0x40

/* PCI30 6.2.1 */
#define PCI_HEADER_TYPE         0x0e    /* Header type */
118 119 120
#define PCI_HEADER_TYPE_BRIDGE 0x1
#define PCI_HEADER_TYPE_MASK   0x7f
#define PCI_HEADER_TYPE_MULTI  0x80
121 122 123 124 125 126 127 128 129

/* PCI30 6.2.1  Device Identification */
#define PCI_CLASS_DEVICE        0x0a    /* Device class */

/* Class Code for bridge; PCI30 D.7  Base Class 06h */
#define PCI_CLASS_BRIDGE_PCI    0x0604

/* PCI30 6.2.3  Device Status */
#define PCI_STATUS              0x06    /* 16 bits */
130
#define PCI_STATUS_CAP_LIST    0x10    /* Support Capability List */
131 132 133

/* PCI30 6.7  Capabilities List */
#define PCI_CAPABILITY_LIST     0x34    /* Offset of first capability list entry */
134
#define PCI_CAP_FLAGS           2       /* Capability defined flags (16 bits) */
135 136 137 138 139 140 141 142 143 144

/* PM12 3.2.1  Capability Identifier */
#define PCI_CAP_ID_PM           0x01    /* Power Management */
/* PCI30 H Capability IDs */
#define PCI_CAP_ID_EXP          0x10    /* PCI Express */
/* ECN_AF 6.x.1.1  Capability ID for AF */
#define PCI_CAP_ID_AF           0x13    /* Advanced Features */

/* PCIe20 7.8.3  Device Capabilities Register (Offset 04h) */
#define PCI_EXP_DEVCAP          0x4     /* Device capabilities */
145 146
#define PCI_EXP_DEVCAP_FLR     (1<<28)  /* Function Level Reset */
#define PCI_EXP_LNKCAP          0xc     /* Link Capabilities */
147
#define PCI_EXP_LNKCAP_SPEED    0x0000f /* Maximum Link Speed */
148 149 150 151
#define PCI_EXP_LNKCAP_WIDTH    0x003f0 /* Maximum Link Width */
#define PCI_EXP_LNKSTA          0x12    /* Link Status */
#define PCI_EXP_LNKSTA_SPEED    0x000f  /* Negotiated Link Speed */
#define PCI_EXP_LNKSTA_WIDTH    0x03f0  /* Negotiated Link Width */
152 153 154 155 156 157 158

/* Header type 1 BR12 3.2 PCI-to-PCI Bridge Configuration Space Header Format */
#define PCI_PRIMARY_BUS         0x18    /* BR12 3.2.5.2 Primary bus number */
#define PCI_SECONDARY_BUS       0x19    /* BR12 3.2.5.3 Secondary bus number */
#define PCI_SUBORDINATE_BUS     0x1a    /* BR12 3.2.5.4 Highest bus number behind the bridge */
#define PCI_BRIDGE_CONTROL      0x3e
/* BR12 3.2.5.18  Bridge Control Register */
159
#define PCI_BRIDGE_CTL_RESET   0x40    /* Secondary bus reset */
160 161 162

/* PM12 3.2.4  Power Management Control/Status (Offset = 4) */
#define PCI_PM_CTRL                4    /* PM control and status register */
163 164 165 166
#define PCI_PM_CTRL_STATE_MASK    0x3  /* Current power state (D0 to D3) */
#define PCI_PM_CTRL_STATE_D0      0x0  /* D0 state */
#define PCI_PM_CTRL_STATE_D3hot   0x3  /* D3 state */
#define PCI_PM_CTRL_NO_SOFT_RESET 0x8  /* No reset for D3hot->D0 */
167 168 169

/* ECN_AF 6.x.1  Advanced Features Capability Structure */
#define PCI_AF_CAP              0x3     /* Advanced features capabilities */
170
#define PCI_AF_CAP_FLR         0x2     /* Function Level Reset */
171

J
Jiri Denemark 已提交
172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188
#define PCI_EXP_FLAGS           0x2
#define PCI_EXP_FLAGS_TYPE      0x00f0
#define PCI_EXP_TYPE_DOWNSTREAM 0x6

#define PCI_EXT_CAP_BASE          0x100
#define PCI_EXT_CAP_LIMIT         0x1000
#define PCI_EXT_CAP_ID_MASK       0x0000ffff
#define PCI_EXT_CAP_OFFSET_SHIFT  20
#define PCI_EXT_CAP_OFFSET_MASK   0x00000ffc

#define PCI_EXT_CAP_ID_ACS      0x000d
#define PCI_EXT_ACS_CTRL        0x06

#define PCI_EXT_CAP_ACS_SV      0x01
#define PCI_EXT_CAP_ACS_RR      0x04
#define PCI_EXT_CAP_ACS_CR      0x08
#define PCI_EXT_CAP_ACS_UF      0x10
189 190 191
#define PCI_EXT_CAP_ACS_ENABLED (PCI_EXT_CAP_ACS_SV | \
                                 PCI_EXT_CAP_ACS_RR | \
                                 PCI_EXT_CAP_ACS_CR | \
J
Jiri Denemark 已提交
192 193
                                 PCI_EXT_CAP_ACS_UF)

194 195 196
#define PCI_EXP_TYPE_ROOT_INT_EP 0x9    /* Root Complex Integrated Endpoint */
#define PCI_EXP_TYPE_ROOT_EC 0xa        /* Root Complex Event Collector */

197 198 199 200 201 202
static virClassPtr virPCIDeviceListClass;

static void virPCIDeviceListDispose(void *obj);

static int virPCIOnceInit(void)
{
203
    if (!VIR_CLASS_NEW(virPCIDeviceList, virClassForObjectLockable()))
204 205 206 207 208
        return -1;

    return 0;
}

209
VIR_ONCE_GLOBAL_INIT(virPCI);
210

L
Laine Stump 已提交
211

212 213
static char *
virPCIDriverDir(const char *driver)
L
Laine Stump 已提交
214
{
215
    char *buffer;
L
Laine Stump 已提交
216

217 218
    ignore_value(virAsprintf(&buffer, PCI_SYSFS "drivers/%s", driver));
    return buffer;
L
Laine Stump 已提交
219 220 221
}


222 223
static char *
virPCIDriverFile(const char *driver, const char *file)
L
Laine Stump 已提交
224
{
225
    char *buffer;
L
Laine Stump 已提交
226

227 228
    ignore_value(virAsprintf(&buffer, PCI_SYSFS "drivers/%s/%s", driver, file));
    return buffer;
L
Laine Stump 已提交
229 230 231
}


232 233
static char *
virPCIFile(const char *device, const char *file)
L
Laine Stump 已提交
234
{
235
    char *buffer;
L
Laine Stump 已提交
236

237 238
    ignore_value(virAsprintf(&buffer, PCI_SYSFS "devices/%s/%s", device, file));
    return buffer;
L
Laine Stump 已提交
239 240 241 242 243 244 245 246 247 248
}


/* virPCIDeviceGetDriverPathAndName - put the path to the driver
 * directory of the driver in use for this device in @path and the
 * name of the driver in @name. Both could be NULL if it's not bound
 * to any driver.
 *
 * Return 0 for success, -1 for error.
 */
249
int
L
Laine Stump 已提交
250 251 252
virPCIDeviceGetDriverPathAndName(virPCIDevicePtr dev, char **path, char **name)
{
    int ret = -1;
253
    VIR_AUTOFREE(char *) drvlink = NULL;
L
Laine Stump 已提交
254 255 256

    *path = *name = NULL;
    /* drvlink = "/sys/bus/pci/dddd:bb:ss.ff/driver" */
257
    if (!(drvlink = virPCIFile(dev->name, "driver")))
L
Laine Stump 已提交
258 259
        goto cleanup;

260 261 262 263 264
    if (!virFileExists(drvlink)) {
        ret = 0;
        goto cleanup;
    }

L
Laine Stump 已提交
265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283
    if (virFileIsLink(drvlink) != 1) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Invalid device %s driver file %s is not a symlink"),
                       dev->name, drvlink);
        goto cleanup;
    }
    if (virFileResolveLink(drvlink, path) < 0) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Unable to resolve device %s driver symlink %s"),
                       dev->name, drvlink);
        goto cleanup;
    }
    /* path = "/sys/bus/pci/drivers/${drivername}" */

    if (VIR_STRDUP(*name, last_component(*path)) < 0)
        goto cleanup;
    /* name = "${drivername}" */

    ret = 0;
284
 cleanup:
L
Laine Stump 已提交
285 286 287 288 289 290 291 292
    if (ret < 0) {
        VIR_FREE(*path);
        VIR_FREE(*name);
    }
    return ret;
}


293
static int
294
virPCIDeviceConfigOpenInternal(virPCIDevicePtr dev, bool fatal)
295 296 297 298
{
    int fd;

    fd = open(dev->path, O_RDWR);
299

300
    if (fd < 0) {
301 302 303 304 305 306 307 308 309
        if (fatal) {
            virReportSystemError(errno,
                                 _("Failed to open config space file '%s'"),
                                 dev->path);
        } else {
            char ebuf[1024];
            VIR_WARN("Failed to open config space file '%s': %s",
                     dev->path, virStrerror(errno, ebuf, sizeof(ebuf)));
        }
310 311
        return -1;
    }
312

313
    VIR_DEBUG("%s %s: opened %s", dev->id, dev->name, dev->path);
314
    return fd;
315 316
}

317 318 319 320 321 322
static int
virPCIDeviceConfigOpen(virPCIDevicePtr dev, bool fatal)
{
    return virPCIDeviceConfigOpenInternal(dev, fatal);
}

323 324 325 326 327 328
static int
virPCIDeviceConfigOpenWrite(virPCIDevicePtr dev)
{
    return virPCIDeviceConfigOpenInternal(dev, true);
}

329
static void
330
virPCIDeviceConfigClose(virPCIDevicePtr dev, int cfgfd)
331
{
332 333 334 335 336
    if (VIR_CLOSE(cfgfd) < 0) {
        char ebuf[1024];
        VIR_WARN("Failed to close config space file '%s': %s",
                 dev->path, virStrerror(errno, ebuf, sizeof(ebuf)));
    }
337 338
}

339

340
static int
341 342
virPCIDeviceRead(virPCIDevicePtr dev,
                 int cfgfd,
343
                 unsigned int pos,
344
                 uint8_t *buf,
345
                 unsigned int buflen)
346 347 348
{
    memset(buf, 0, buflen);

349 350
    if (lseek(cfgfd, pos, SEEK_SET) != pos ||
        saferead(cfgfd, buf, buflen) != buflen) {
351
        char ebuf[1024];
352
        VIR_WARN("Failed to read from '%s' : %s", dev->path,
353 354 355 356 357 358 359
                 virStrerror(errno, ebuf, sizeof(ebuf)));
        return -1;
    }
    return 0;
}

static uint8_t
360
virPCIDeviceRead8(virPCIDevicePtr dev, int cfgfd, unsigned int pos)
361 362
{
    uint8_t buf;
363
    virPCIDeviceRead(dev, cfgfd, pos, &buf, sizeof(buf));
364 365 366 367
    return buf;
}

static uint16_t
368
virPCIDeviceRead16(virPCIDevicePtr dev, int cfgfd, unsigned int pos)
369 370
{
    uint8_t buf[2];
371
    virPCIDeviceRead(dev, cfgfd, pos, &buf[0], sizeof(buf));
372 373 374 375
    return (buf[0] << 0) | (buf[1] << 8);
}

static uint32_t
376
virPCIDeviceRead32(virPCIDevicePtr dev, int cfgfd, unsigned int pos)
377 378
{
    uint8_t buf[4];
379
    virPCIDeviceRead(dev, cfgfd, pos, &buf[0], sizeof(buf));
380 381 382
    return (buf[0] << 0) | (buf[1] << 8) | (buf[2] << 16) | (buf[3] << 24);
}

383 384 385
static int
virPCIDeviceReadClass(virPCIDevicePtr dev, uint16_t *device_class)
{
386 387
    VIR_AUTOFREE(char *) path = NULL;
    VIR_AUTOFREE(char *) id_str = NULL;
388 389
    unsigned int value;

390
    if (!(path = virPCIFile(dev->name, "class")))
391
        return -1;
392 393 394

    /* class string is '0xNNNNNN\n' ... i.e. 9 bytes */
    if (virFileReadAll(path, 9, &id_str) < 0)
395
        return -1;
396 397 398 399 400 401

    id_str[8] = '\0';
    if (virStrToLong_ui(id_str, NULL, 16, &value) < 0) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Unusual value in %s/devices/%s/class: %s"),
                       PCI_SYSFS, dev->name, id_str);
402
        return -1;
403 404 405
    }

    *device_class = (value >> 8) & 0xFFFF;
406
    return 0;
407 408
}

409
static int
410 411
virPCIDeviceWrite(virPCIDevicePtr dev,
                  int cfgfd,
412
                  unsigned int pos,
413
                  uint8_t *buf,
414
                  unsigned int buflen)
415
{
416 417
    if (lseek(cfgfd, pos, SEEK_SET) != pos ||
        safewrite(cfgfd, buf, buflen) != buflen) {
418
        char ebuf[1024];
419
        VIR_WARN("Failed to write to '%s' : %s", dev->path,
420 421 422 423 424 425 426
                 virStrerror(errno, ebuf, sizeof(ebuf)));
        return -1;
    }
    return 0;
}

static void
427
virPCIDeviceWrite16(virPCIDevicePtr dev, int cfgfd, unsigned int pos, uint16_t val)
428 429
{
    uint8_t buf[2] = { (val >> 0), (val >> 8) };
430
    virPCIDeviceWrite(dev, cfgfd, pos, &buf[0], sizeof(buf));
431 432 433
}

static void
434
virPCIDeviceWrite32(virPCIDevicePtr dev, int cfgfd, unsigned int pos, uint32_t val)
435
{
436
    uint8_t buf[4] = { (val >> 0), (val >> 8), (val >> 16), (val >> 24) };
437
    virPCIDeviceWrite(dev, cfgfd, pos, &buf[0], sizeof(buf));
438 439
}

E
Eric Blake 已提交
440 441
typedef int (*virPCIDeviceIterPredicate)(virPCIDevicePtr, virPCIDevicePtr,
                                         void *);
442 443 444 445 446 447 448

/* Iterate over available PCI devices calling @predicate
 * to compare each one to @dev.
 * Return -1 on error since we don't want to assume it is
 * safe to reset if there is an error.
 */
static int
449 450 451 452
virPCIDeviceIterDevices(virPCIDeviceIterPredicate predicate,
                        virPCIDevicePtr dev,
                        virPCIDevicePtr *matched,
                        void *data)
453 454 455
{
    DIR *dir;
    struct dirent *entry;
456
    int ret = 0;
457
    int rc;
458 459 460 461 462

    *matched = NULL;

    VIR_DEBUG("%s %s: iterating over " PCI_SYSFS "devices", dev->id, dev->name);

J
Ján Tomko 已提交
463
    if (virDirOpen(&dir, PCI_SYSFS "devices") < 0)
464 465
        return -1;

E
Eric Blake 已提交
466
    while ((ret = virDirRead(dir, &entry, PCI_SYSFS "devices")) > 0) {
467
        unsigned int domain, bus, slot, function;
468
        VIR_AUTOPTR(virPCIDevice) check = NULL;
469
        char *tmp;
470

471 472 473 474 475 476 477 478 479
        /* expected format: <domain>:<bus>:<slot>.<function> */
        if (/* domain */
            virStrToLong_ui(entry->d_name, &tmp, 16, &domain) < 0 || *tmp != ':' ||
            /* bus */
            virStrToLong_ui(tmp + 1, &tmp, 16, &bus) < 0 || *tmp != ':' ||
            /* slot */
            virStrToLong_ui(tmp + 1, &tmp, 16, &slot) < 0 || *tmp != '.' ||
            /* function */
            virStrToLong_ui(tmp + 1, NULL, 16, &function) < 0) {
480 481 482 483
            VIR_WARN("Unusual entry in " PCI_SYSFS "devices: %s", entry->d_name);
            continue;
        }

484
        check = virPCIDeviceNew(domain, bus, slot, function);
485
        if (!check) {
486 487 488
            ret = -1;
            break;
        }
489

490 491 492 493 494
        rc = predicate(dev, check, data);
        if (rc < 0) {
            /* the predicate returned an error, bail */
            ret = -1;
            break;
495
        } else if (rc == 1) {
496
            VIR_DEBUG("%s %s: iter matched on %s", dev->id, dev->name, check->name);
497
            VIR_STEAL_PTR(*matched, check);
498
            ret = 1;
499 500 501
            break;
        }
    }
J
Ján Tomko 已提交
502
    VIR_DIR_CLOSE(dir);
503
    return ret;
504 505 506
}

static uint8_t
507 508 509
virPCIDeviceFindCapabilityOffset(virPCIDevicePtr dev,
                                 int cfgfd,
                                 unsigned int capability)
510 511 512 513
{
    uint16_t status;
    uint8_t pos;

514
    status = virPCIDeviceRead16(dev, cfgfd, PCI_STATUS);
515 516 517
    if (!(status & PCI_STATUS_CAP_LIST))
        return 0;

518
    pos = virPCIDeviceRead8(dev, cfgfd, PCI_CAPABILITY_LIST);
519 520 521 522 523 524 525 526 527

    /* Zero indicates last capability, capabilities can't
     * be in the config space header and 0xff is returned
     * by the kernel if we don't have access to this region
     *
     * Note: we're not handling loops or extended
     * capabilities here.
     */
    while (pos >= PCI_CONF_HEADER_LEN && pos != 0xff) {
528
        uint8_t capid = virPCIDeviceRead8(dev, cfgfd, pos);
529 530 531 532 533 534
        if (capid == capability) {
            VIR_DEBUG("%s %s: found cap 0x%.2x at 0x%.2x",
                      dev->id, dev->name, capability, pos);
            return pos;
        }

535
        pos = virPCIDeviceRead8(dev, cfgfd, pos + 1);
536 537 538 539 540 541 542
    }

    VIR_DEBUG("%s %s: failed to find cap 0x%.2x", dev->id, dev->name, capability);

    return 0;
}

J
Jiri Denemark 已提交
543
static unsigned int
544 545
virPCIDeviceFindExtendedCapabilityOffset(virPCIDevicePtr dev,
                                         int cfgfd,
546
                                         unsigned int capability)
J
Jiri Denemark 已提交
547 548 549 550 551 552 553 554 555 556
{
    int ttl;
    unsigned int pos;
    uint32_t header;

    /* minimum 8 bytes per capability */
    ttl = (PCI_EXT_CAP_LIMIT - PCI_EXT_CAP_BASE) / 8;
    pos = PCI_EXT_CAP_BASE;

    while (ttl > 0 && pos >= PCI_EXT_CAP_BASE) {
557
        header = virPCIDeviceRead32(dev, cfgfd, pos);
J
Jiri Denemark 已提交
558 559 560 561 562 563 564 565 566 567 568

        if ((header & PCI_EXT_CAP_ID_MASK) == capability)
            return pos;

        pos = (header >> PCI_EXT_CAP_OFFSET_SHIFT) & PCI_EXT_CAP_OFFSET_MASK;
        ttl--;
    }

    return 0;
}

569 570 571 572
/* detects whether this device has FLR.  Returns 0 if the device does
 * not have FLR, 1 if it does, and -1 on error
 */
static int
573
virPCIDeviceDetectFunctionLevelReset(virPCIDevicePtr dev, int cfgfd)
574
{
M
Mark McLoughlin 已提交
575
    uint32_t caps;
576
    uint8_t pos;
577
    VIR_AUTOFREE(char *) path = NULL;
578
    int found;
579 580 581 582 583 584 585 586

    /* The PCIe Function Level Reset capability allows
     * individual device functions to be reset without
     * affecting any other functions on the device or
     * any other devices on the bus. This is only common
     * on SR-IOV NICs at the moment.
     */
    if (dev->pcie_cap_pos) {
587
        caps = virPCIDeviceRead32(dev, cfgfd, dev->pcie_cap_pos + PCI_EXP_DEVCAP);
588 589 590 591 592 593 594 595 596 597
        if (caps & PCI_EXP_DEVCAP_FLR) {
            VIR_DEBUG("%s %s: detected PCIe FLR capability", dev->id, dev->name);
            return 1;
        }
    }

    /* The PCI AF Function Level Reset capability is
     * the same thing, except for conventional PCI
     * devices. This is not common yet.
     */
598
    pos = virPCIDeviceFindCapabilityOffset(dev, cfgfd, PCI_CAP_ID_AF);
599
    if (pos) {
600
        caps = virPCIDeviceRead16(dev, cfgfd, pos + PCI_AF_CAP);
601 602 603 604 605 606
        if (caps & PCI_AF_CAP_FLR) {
            VIR_DEBUG("%s %s: detected PCI FLR capability", dev->id, dev->name);
            return 1;
        }
    }

607 608 609 610 611 612
    /* there are some buggy devices that do support FLR, but forget to
     * advertise that fact in their capabilities.  However, FLR is *required*
     * to be present for virtual functions (VFs), so if we see that this
     * device is a VF, we just assume FLR works
     */

613
    if (virAsprintf(&path, PCI_SYSFS "devices/%s/physfn", dev->name) < 0)
614 615 616 617 618 619 620 621 622
        return -1;

    found = virFileExists(path);
    if (found) {
        VIR_DEBUG("%s %s: buggy device didn't advertise FLR, but is a VF; forcing flr on",
                  dev->id, dev->name);
        return 1;
    }

623 624 625 626 627 628 629 630 631
    VIR_DEBUG("%s %s: no FLR capability found", dev->id, dev->name);

    return 0;
}

/* Require the device has the PCI Power Management capability
 * and that a D3hot->D0 transition will results in a full
 * internal reset, not just a soft reset.
 */
632
static unsigned int
633
virPCIDeviceDetectPowerManagementReset(virPCIDevicePtr dev, int cfgfd)
634 635 636 637 638
{
    if (dev->pci_pm_cap_pos) {
        uint32_t ctl;

        /* require the NO_SOFT_RESET bit is clear */
639
        ctl = virPCIDeviceRead32(dev, cfgfd, dev->pci_pm_cap_pos + PCI_PM_CTRL);
640 641 642 643 644 645 646 647 648 649 650
        if (!(ctl & PCI_PM_CTRL_NO_SOFT_RESET)) {
            VIR_DEBUG("%s %s: detected PM reset capability", dev->id, dev->name);
            return 1;
        }
    }

    VIR_DEBUG("%s %s: no PM reset capability found", dev->id, dev->name);

    return 0;
}

651
/* Any active devices on the same domain/bus ? */
652
static int
653
virPCIDeviceSharesBusWithActive(virPCIDevicePtr dev, virPCIDevicePtr check, void *data)
654
{
655
    virPCIDeviceList *inactiveDevs = data;
656

657
    /* Different domain, different bus, or simply identical device */
658 659 660 661
    if (dev->address.domain != check->address.domain ||
        dev->address.bus != check->address.bus ||
        (dev->address.slot == check->address.slot &&
         dev->address.function == check->address.function))
662 663
        return 0;

664
    /* same bus, but inactive, i.e. about to be assigned to guest */
665
    if (inactiveDevs && virPCIDeviceListFind(inactiveDevs, check))
666
        return 0;
667

668
    return 1;
669 670
}

671 672 673
static virPCIDevicePtr
virPCIDeviceBusContainsActiveDevices(virPCIDevicePtr dev,
                                     virPCIDeviceList *inactiveDevs)
674
{
675 676 677
    virPCIDevicePtr active = NULL;
    if (virPCIDeviceIterDevices(virPCIDeviceSharesBusWithActive,
                                dev, &active, inactiveDevs) < 0)
678 679 680 681 682
        return NULL;
    return active;
}

/* Is @check the parent of @dev ? */
683
static int
684
virPCIDeviceIsParent(virPCIDevicePtr dev, virPCIDevicePtr check, void *data)
685 686 687
{
    uint16_t device_class;
    uint8_t header_type, secondary, subordinate;
688
    virPCIDevicePtr *best = data;
689 690
    int ret = 0;
    int fd;
691

692
    if (dev->address.domain != check->address.domain)
693 694
        return 0;

695
    if ((fd = virPCIDeviceConfigOpen(check, false)) < 0)
696 697
        return 0;

698
    /* Is it a bridge? */
699 700
    ret = virPCIDeviceReadClass(check, &device_class);
    if (ret < 0 || device_class != PCI_CLASS_BRIDGE_PCI)
701
        goto cleanup;
702 703

    /* Is it a plane? */
704
    header_type = virPCIDeviceRead8(check, fd, PCI_HEADER_TYPE);
705
    if ((header_type & PCI_HEADER_TYPE_MASK) != PCI_HEADER_TYPE_BRIDGE)
706
        goto cleanup;
707

708 709
    secondary   = virPCIDeviceRead8(check, fd, PCI_SECONDARY_BUS);
    subordinate = virPCIDeviceRead8(check, fd, PCI_SUBORDINATE_BUS);
710

711
    VIR_DEBUG("%s %s: found parent device %s", dev->id, dev->name, check->name);
712

713 714 715
    /* if the secondary bus exactly equals the device's bus, then we found
     * the direct parent.  No further work is necessary
     */
716
    if (dev->address.bus == secondary) {
717 718 719
        ret = 1;
        goto cleanup;
    }
720

721
    /* otherwise, SRIOV allows VFs to be on different buses than their PFs.
722 723 724
     * In this case, what we need to do is look for the "best" match; i.e.
     * the most restrictive match that still satisfies all of the conditions.
     */
725
    if (dev->address.bus > secondary && dev->address.bus <= subordinate) {
726
        if (*best == NULL) {
727 728 729 730
            *best = virPCIDeviceNew(check->address.domain,
                                    check->address.bus,
                                    check->address.slot,
                                    check->address.function);
731 732 733 734 735
            if (*best == NULL) {
                ret = -1;
                goto cleanup;
            }
        } else {
736 737 738 739
            /* OK, we had already recorded a previous "best" match for the
             * parent.  See if the current device is more restrictive than the
             * best, and if so, make it the new best
             */
740 741 742
            int bestfd;
            uint8_t best_secondary;

743
            if ((bestfd = virPCIDeviceConfigOpen(*best, false)) < 0)
744
                goto cleanup;
745 746
            best_secondary = virPCIDeviceRead8(*best, bestfd, PCI_SECONDARY_BUS);
            virPCIDeviceConfigClose(*best, bestfd);
747 748

            if (secondary > best_secondary) {
749
                virPCIDeviceFree(*best);
750 751 752 753
                *best = virPCIDeviceNew(check->address.domain,
                                        check->address.bus,
                                        check->address.slot,
                                        check->address.function);
754 755 756 757
                if (*best == NULL) {
                    ret = -1;
                    goto cleanup;
                }
758 759 760 761
            }
        }
    }

762
 cleanup:
763
    virPCIDeviceConfigClose(check, fd);
764
    return ret;
765 766
}

767
static int
768
virPCIDeviceGetParent(virPCIDevicePtr dev, virPCIDevicePtr *parent)
769
{
770
    virPCIDevicePtr best = NULL;
771 772 773
    int ret;

    *parent = NULL;
774
    ret = virPCIDeviceIterDevices(virPCIDeviceIsParent, dev, parent, &best);
775
    if (ret == 1)
776
        virPCIDeviceFree(best);
777 778 779
    else if (ret == 0)
        *parent = best;
    return ret;
780 781 782 783 784 785
}

/* Secondary Bus Reset is our sledgehammer - it resets all
 * devices behind a bus.
 */
static int
786 787 788
virPCIDeviceTrySecondaryBusReset(virPCIDevicePtr dev,
                                 int cfgfd,
                                 virPCIDeviceList *inactiveDevs)
789
{
790 791
    VIR_AUTOPTR(virPCIDevice) parent = NULL;
    VIR_AUTOPTR(virPCIDevice) conflict = NULL;
792 793 794
    uint8_t config_space[PCI_CONF_LEN];
    uint16_t ctl;
    int ret = -1;
795
    int parentfd;
796

797 798 799
    /* Refuse to do a secondary bus reset if there are other
     * devices/functions behind the bus are used by the host
     * or other guests.
800
     */
801
    if ((conflict = virPCIDeviceBusContainsActiveDevices(dev, inactiveDevs))) {
802
        virReportError(VIR_ERR_INTERNAL_ERROR,
803 804
                       _("Active %s devices on bus with %s, not doing bus reset"),
                       conflict->name, dev->name);
805 806 807 808
        return -1;
    }

    /* Find the parent bus */
809
    if (virPCIDeviceGetParent(dev, &parent) < 0)
810
        return -1;
811
    if (!parent) {
812
        virReportError(VIR_ERR_INTERNAL_ERROR,
813 814
                       _("Failed to find parent device for %s"),
                       dev->name);
815 816
        return -1;
    }
817
    if ((parentfd = virPCIDeviceConfigOpenWrite(parent)) < 0)
818
        goto out;
819 820 821 822 823 824 825

    VIR_DEBUG("%s %s: doing a secondary bus reset", dev->id, dev->name);

    /* Save and restore the device's config space; we only do this
     * for the supplied device since we refuse to do a reset if there
     * are multiple devices/functions
     */
826
    if (virPCIDeviceRead(dev, cfgfd, 0, config_space, PCI_CONF_LEN) < 0) {
827
        virReportError(VIR_ERR_INTERNAL_ERROR,
828
                       _("Failed to read PCI config space for %s"),
829
                       dev->name);
830 831 832 833 834 835
        goto out;
    }

    /* Read the control register, set the reset flag, wait 200ms,
     * unset the reset flag and wait 200ms.
     */
836
    ctl = virPCIDeviceRead16(dev, cfgfd, PCI_BRIDGE_CONTROL);
837

838 839
    virPCIDeviceWrite16(parent, parentfd, PCI_BRIDGE_CONTROL,
                        ctl | PCI_BRIDGE_CTL_RESET);
840 841 842

    usleep(200 * 1000); /* sleep 200ms */

843
    virPCIDeviceWrite16(parent, parentfd, PCI_BRIDGE_CONTROL, ctl);
844 845 846

    usleep(200 * 1000); /* sleep 200ms */

847
    if (virPCIDeviceWrite(dev, cfgfd, 0, config_space, PCI_CONF_LEN) < 0) {
848
        virReportError(VIR_ERR_INTERNAL_ERROR,
849 850 851 852
                       _("Failed to restore PCI config space for %s"),
                       dev->name);
        goto out;
    }
853
    ret = 0;
854

855
 out:
856
    virPCIDeviceConfigClose(parent, parentfd);
857 858 859 860 861 862 863 864
    return ret;
}

/* Power management reset attempts to reset a device using a
 * D-state transition from D3hot to D0. Note, in detect_pm_reset()
 * above we require the device supports a full internal reset.
 */
static int
865
virPCIDeviceTryPowerManagementReset(virPCIDevicePtr dev, int cfgfd)
866 867 868 869 870 871 872 873
{
    uint8_t config_space[PCI_CONF_LEN];
    uint32_t ctl;

    if (!dev->pci_pm_cap_pos)
        return -1;

    /* Save and restore the device's config space. */
874
    if (virPCIDeviceRead(dev, cfgfd, 0, &config_space[0], PCI_CONF_LEN) < 0) {
875
        virReportError(VIR_ERR_INTERNAL_ERROR,
876
                       _("Failed to read PCI config space for %s"),
877
                       dev->name);
878 879 880 881 882
        return -1;
    }

    VIR_DEBUG("%s %s: doing a power management reset", dev->id, dev->name);

883
    ctl = virPCIDeviceRead32(dev, cfgfd, dev->pci_pm_cap_pos + PCI_PM_CTRL);
884 885
    ctl &= ~PCI_PM_CTRL_STATE_MASK;

886 887
    virPCIDeviceWrite32(dev, cfgfd, dev->pci_pm_cap_pos + PCI_PM_CTRL,
                        ctl | PCI_PM_CTRL_STATE_D3hot);
888 889 890

    usleep(10 * 1000); /* sleep 10ms */

891 892
    virPCIDeviceWrite32(dev, cfgfd, dev->pci_pm_cap_pos + PCI_PM_CTRL,
                        ctl | PCI_PM_CTRL_STATE_D0);
893 894 895

    usleep(10 * 1000); /* sleep 10ms */

896
    if (virPCIDeviceWrite(dev, cfgfd, 0, &config_space[0], PCI_CONF_LEN) < 0) {
897
        virReportError(VIR_ERR_INTERNAL_ERROR,
898 899 900 901
                       _("Failed to restore PCI config space for %s"),
                       dev->name);
        return -1;
    }
902 903 904 905 906

    return 0;
}

static int
907
virPCIDeviceInit(virPCIDevicePtr dev, int cfgfd)
908
{
909 910
    int flr;

911 912 913
    dev->pcie_cap_pos   = virPCIDeviceFindCapabilityOffset(dev, cfgfd, PCI_CAP_ID_EXP);
    dev->pci_pm_cap_pos = virPCIDeviceFindCapabilityOffset(dev, cfgfd, PCI_CAP_ID_PM);
    flr = virPCIDeviceDetectFunctionLevelReset(dev, cfgfd);
914
    if (flr < 0)
915
        return flr;
916 917
    dev->has_flr        = !!flr;
    dev->has_pm_reset   = !!virPCIDeviceDetectPowerManagementReset(dev, cfgfd);
918

919 920 921 922
    return 0;
}

int
923 924 925
virPCIDeviceReset(virPCIDevicePtr dev,
                  virPCIDeviceList *activeDevs,
                  virPCIDeviceList *inactiveDevs)
926
{
927 928
    VIR_AUTOFREE(char *) drvPath = NULL;
    VIR_AUTOFREE(char *) drvName = NULL;
929
    int ret = -1;
930
    int fd = -1;
931 932 933 934 935 936 937 938 939 940 941 942
    int hdrType = -1;

    if (virPCIGetHeaderType(dev, &hdrType) < 0)
        return -1;

    if (hdrType != VIR_PCI_HEADER_ENDPOINT) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Invalid attempt to reset PCI device %s. "
                         "Only PCI endpoint devices can be reset"),
                       dev->name);
        return -1;
    }
943

944
    if (activeDevs && virPCIDeviceListFind(activeDevs, dev)) {
945
        virReportError(VIR_ERR_INTERNAL_ERROR,
946 947 948 949
                       _("Not resetting active device %s"), dev->name);
        return -1;
    }

950 951 952 953 954 955 956 957
    /* If the device is currently bound to vfio-pci, ignore all
     * requests to reset it, since the vfio-pci driver will always
     * reset it whenever appropriate, so doing it ourselves would just
     * be redundant.
     */
    if (virPCIDeviceGetDriverPathAndName(dev, &drvPath, &drvName) < 0)
        goto cleanup;

958
    if (virPCIStubDriverTypeFromString(drvName) == VIR_PCI_STUB_DRIVER_VFIO) {
959 960 961 962 963 964 965
        VIR_DEBUG("Device %s is bound to vfio-pci - skip reset",
                  dev->name);
        ret = 0;
        goto cleanup;
    }
    VIR_DEBUG("Resetting device %s", dev->name);

966
    if ((fd = virPCIDeviceConfigOpenWrite(dev)) < 0)
967
        goto cleanup;
968

969
    if (virPCIDeviceInit(dev, fd) < 0)
970 971
        goto cleanup;

972 973 974
    /* KVM will perform FLR when starting and stopping
     * a guest, so there is no need for us to do it here.
     */
975 976 977 978
    if (dev->has_flr) {
        ret = 0;
        goto cleanup;
    }
979

980 981 982 983 984
    /* If the device supports PCI power management reset,
     * that's the next best thing because it only resets
     * the function, not the whole device.
     */
    if (dev->has_pm_reset)
985
        ret = virPCIDeviceTryPowerManagementReset(dev, fd);
986

987
    /* Bus reset is not an option with the root bus */
988
    if (ret < 0 && dev->address.bus != 0)
989
        ret = virPCIDeviceTrySecondaryBusReset(dev, fd, inactiveDevs);
990

991 992
    if (ret < 0) {
        virErrorPtr err = virGetLastError();
993
        virReportError(VIR_ERR_INTERNAL_ERROR,
994 995
                       _("Unable to reset PCI device %s: %s"),
                       dev->name,
996 997
                       err ? err->message :
                       _("no FLR, PM reset or bus reset available"));
998 999
    }

1000
 cleanup:
1001
    virPCIDeviceConfigClose(dev, fd);
1002 1003 1004
    return ret;
}

1005

1006
static int
1007
virPCIProbeStubDriver(virPCIStubDriver driver)
1008
{
1009
    const char *drvname = NULL;
1010
    VIR_AUTOFREE(char *) drvpath = NULL;
1011
    bool probed = false;
1012

1013 1014 1015 1016 1017 1018 1019 1020
    if (driver == VIR_PCI_STUB_DRIVER_NONE ||
        !(drvname = virPCIStubDriverTypeToString(driver))) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       "%s",
                       _("Attempting to use unknown stub driver"));
        return -1;
    }

1021
 recheck:
1022
    if ((drvpath = virPCIDriverDir(drvname)) && virFileExists(drvpath))
1023 1024
        /* driver already loaded, return */
        return 0;
1025 1026

    if (!probed) {
1027
        VIR_AUTOFREE(char *) errbuf = NULL;
1028
        probed = true;
1029 1030
        if ((errbuf = virKModLoad(drvname, true))) {
            VIR_WARN("failed to load driver %s: %s", drvname, errbuf);
1031
            goto cleanup;
1032
        }
1033 1034

        goto recheck;
1035 1036
    }

1037
 cleanup:
1038 1039 1040
    /* If we know failure was because of blacklist, let's report that;
     * otherwise, report a more generic failure message
     */
1041
    if (virKModIsBlacklisted(drvname)) {
1042 1043 1044
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Failed to load PCI stub module %s: "
                         "administratively prohibited"),
1045
                       drvname);
1046 1047 1048
    } else {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Failed to load PCI stub module %s"),
1049
                       drvname);
1050 1051
    }

1052
    return -1;
1053 1054
}

1055
int
1056
virPCIDeviceUnbind(virPCIDevicePtr dev)
1057
{
1058 1059 1060
    VIR_AUTOFREE(char *) path = NULL;
    VIR_AUTOFREE(char *) drvpath = NULL;
    VIR_AUTOFREE(char *) driver = NULL;
1061 1062

    if (virPCIDeviceGetDriverPathAndName(dev, &drvpath, &driver) < 0)
1063
        return -1;
1064

1065
    if (!driver)
1066
        /* The device is not bound to any driver */
1067
        return 0;
1068

1069
    if (!(path = virPCIFile(dev->name, "driver/unbind")))
1070
        return -1;
1071 1072 1073 1074 1075 1076

    if (virFileExists(path)) {
        if (virFileWriteStr(path, dev->name, 0) < 0) {
            virReportSystemError(errno,
                                 _("Failed to unbind PCI device '%s' from %s"),
                                 dev->name, driver);
1077
            return -1;
1078 1079 1080
        }
    }

1081
    return 0;
1082 1083
}

1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108

/**
 * virPCIDeviceRebind:
 *  @dev: virPCIDevice object describing the device to rebind
 *
 * unbind a device from its driver, then immediately rebind it.
 *
 * Returns 0 on success, -1 on failure
 */
int virPCIDeviceRebind(virPCIDevicePtr dev)
{
    if (virPCIDeviceUnbind(dev) < 0)
        return -1;

    if (virFileWriteStr(PCI_SYSFS "drivers_probe", dev->name, 0) < 0) {
        virReportSystemError(errno,
                             _("Failed to trigger a probe for PCI device '%s'"),
                             dev->name);
        return -1;
    }

    return 0;
}


1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119
/*
 * Bind a PCI device to a driver using driver_override sysfs interface.
 * E.g.
 *
 *  echo driver-name > /sys/bus/pci/devices/0000:03:00.0/driver_override
 *  echo 0000:03:00.0 > /sys/bus/pci/devices/0000:03:00.0/driver/unbind
 *  echo 0000:03:00.0 > /sys/bus/pci/drivers_probe
 *
 * An empty driverName will cause the device to be bound to its
 * preferred driver.
 */
1120
static int
1121 1122 1123
virPCIDeviceBindWithDriverOverride(virPCIDevicePtr dev,
                                   const char *driverName)
{
1124
    VIR_AUTOFREE(char *) path = NULL;
1125 1126 1127 1128 1129 1130 1131 1132 1133

    if (!(path = virPCIFile(dev->name, "driver_override")))
        return -1;

    if (virFileWriteStr(path, driverName, 0) < 0) {
        virReportSystemError(errno,
                             _("Failed to add driver '%s' to driver_override "
                               " interface of PCI device '%s'"),
                             driverName, dev->name);
1134
        return -1;
1135 1136
    }

1137
    if (virPCIDeviceRebind(dev) < 0)
1138
        return -1;
1139

1140
    return 0;
1141 1142 1143 1144
}

static int
virPCIDeviceUnbindFromStubWithNewid(virPCIDevicePtr dev)
1145 1146
{
    int result = -1;
1147 1148 1149
    VIR_AUTOFREE(char *) drvdir = NULL;
    VIR_AUTOFREE(char *) path = NULL;
    VIR_AUTOFREE(char *) driver = NULL;
1150

1151 1152 1153
    /* If the device is currently bound to one of the "well known"
     * stub drivers, then unbind it, otherwise ignore it.
     */
L
Laine Stump 已提交
1154
    if (virPCIDeviceGetDriverPathAndName(dev, &drvdir, &driver) < 0)
1155
        goto cleanup;
E
Eric Blake 已提交
1156

1157 1158
    if (!driver) {
        /* The device is not bound to any driver and we are almost done. */
1159
        VIR_DEBUG("PCI device %s is not bound to any driver", dev->name);
1160 1161 1162
        goto reprobe;
    }

1163 1164
    if (!dev->unbind_from_stub) {
        VIR_DEBUG("Unbind from stub skipped for PCI device %s", dev->name);
1165
        goto remove_slot;
1166
    }
1167

1168
    /* If the device isn't bound to a known stub, skip the unbind. */
1169
    if (virPCIStubDriverTypeFromString(driver) < 0 ||
1170 1171 1172
        virPCIStubDriverTypeFromString(driver) == VIR_PCI_STUB_DRIVER_NONE) {
        VIR_DEBUG("Unbind from stub skipped for PCI device %s because of "
                  "unknown stub driver", dev->name);
1173
        goto remove_slot;
1174
    }
1175

1176 1177
    VIR_DEBUG("Unbinding PCI device %s from stub driver %s",
              dev->name, driver);
1178

1179
    if (virPCIDeviceUnbind(dev) < 0)
1180
        goto cleanup;
1181
    dev->unbind_from_stub = false;
1182

1183
 remove_slot:
1184 1185
    if (!dev->remove_slot) {
        VIR_DEBUG("Slot removal skipped for PCI device %s", dev->name);
1186
        goto reprobe;
1187 1188 1189
    }

    VIR_DEBUG("Removing slot for PCI device %s", dev->name);
1190 1191

    /* Xen's pciback.ko wants you to use remove_slot on the specific device */
1192
    if (!(path = virPCIDriverFile(driver, "remove_slot")))
1193 1194 1195 1196
        goto cleanup;

    if (virFileExists(path) && virFileWriteStr(path, dev->name, 0) < 0) {
        virReportSystemError(errno,
1197
                             _("Failed to remove slot for PCI device '%s' from %s"),
1198 1199 1200
                             dev->name, driver);
        goto cleanup;
    }
1201
    dev->remove_slot = false;
1202

1203
 reprobe:
1204
    if (!dev->reprobe) {
1205
        VIR_DEBUG("Reprobe skipped for PCI device %s", dev->name);
1206 1207 1208
        result = 0;
        goto cleanup;
    }
1209

1210 1211
    VIR_DEBUG("Reprobing for PCI device %s", dev->name);

1212 1213 1214 1215 1216
    /* Trigger a re-probe of the device is not in the stub's dynamic
     * ID table. If the stub is available, but 'remove_id' isn't
     * available, then re-probing would just cause the device to be
     * re-bound to the stub.
     */
1217 1218
    VIR_FREE(path);
    if (driver && !(path = virPCIDriverFile(driver, "remove_id")))
1219 1220
        goto cleanup;

1221
    if (!driver || !virFileExists(drvdir) || virFileExists(path)) {
1222 1223 1224 1225 1226 1227 1228 1229 1230 1231
        if (virFileWriteStr(PCI_SYSFS "drivers_probe", dev->name, 0) < 0) {
            virReportSystemError(errno,
                                 _("Failed to trigger a re-probe for PCI device '%s'"),
                                 dev->name);
            goto cleanup;
        }
    }

    result = 0;

1232
 cleanup:
1233
    /* do not do it again */
1234 1235 1236
    dev->unbind_from_stub = false;
    dev->remove_slot = false;
    dev->reprobe = false;
1237

1238 1239 1240
    return result;
}

1241 1242 1243 1244 1245 1246 1247 1248 1249 1250
static int
virPCIDeviceUnbindFromStubWithOverride(virPCIDevicePtr dev)
{
    if (!dev->unbind_from_stub) {
        VIR_DEBUG("Unbind from stub skipped for PCI device %s", dev->name);
        return 0;
    }

    return virPCIDeviceBindWithDriverOverride(dev, "\n");
}
1251 1252

static int
1253 1254
virPCIDeviceUnbindFromStub(virPCIDevicePtr dev)
{
1255
    VIR_AUTOFREE(char *) path = NULL;
1256 1257 1258 1259 1260 1261 1262 1263 1264

    /*
     * Prefer using the device's driver_override interface, falling back
     * to the unpleasant new_id interface.
     */
    if (!(path = virPCIFile(dev->name, "driver_override")))
        return -1;

    if (virFileExists(path))
1265
        return virPCIDeviceUnbindFromStubWithOverride(dev);
1266

1267
    return virPCIDeviceUnbindFromStubWithNewid(dev);
1268 1269 1270 1271
}

static int
virPCIDeviceBindToStubWithNewid(virPCIDevicePtr dev)
1272
{
1273
    int result = -1;
E
Eric Blake 已提交
1274
    bool reprobe = false;
1275 1276 1277
    VIR_AUTOFREE(char *) stubDriverPath = NULL;
    VIR_AUTOFREE(char *) driverLink = NULL;
    VIR_AUTOFREE(char *) path = NULL; /* reused for different purposes */
1278
    VIR_AUTOPTR(virError) err = NULL;
1279
    const char *stubDriverName = NULL;
1280

1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293
    /* Check the device is configured to use one of the known stub drivers */
    if (dev->stubDriver == VIR_PCI_STUB_DRIVER_NONE) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("No stub driver configured for PCI device %s"),
                       dev->name);
        return -1;
    } else if (!(stubDriverName = virPCIStubDriverTypeToString(dev->stubDriver))) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Unknown stub driver configured for PCI device %s"),
                       dev->name);
        return -1;
    }

1294
    if (!(stubDriverPath = virPCIDriverDir(stubDriverName))  ||
1295
        !(driverLink = virPCIFile(dev->name, "driver")))
1296 1297
        goto cleanup;

1298 1299 1300 1301 1302
    if (virFileExists(driverLink)) {
        if (virFileLinkPointsTo(driverLink, stubDriverPath)) {
            /* The device is already bound to the correct driver */
            VIR_DEBUG("Device %s is already bound to %s",
                      dev->name, stubDriverName);
1303 1304 1305
            result = 0;
            goto cleanup;
        }
1306
        reprobe = true;
1307
    }
1308 1309 1310 1311 1312 1313 1314 1315 1316

    /* Add the PCI device ID to the stub's dynamic ID table;
     * this is needed to allow us to bind the device to the stub.
     * Note: if the device is not currently bound to any driver,
     * stub will immediately be bound to the device. Also, note
     * that if a new device with this ID is hotplugged, or if a probe
     * is triggered for such a device, it will also be immediately
     * bound by the stub.
     */
1317
    if (!(path = virPCIDriverFile(stubDriverName, "new_id")))
1318
        goto cleanup;
1319

1320
    if (virFileWriteStr(path, dev->id, 0) < 0) {
1321
        virReportSystemError(errno,
1322
                             _("Failed to add PCI device ID '%s' to %s"),
1323
                             dev->id, stubDriverName);
1324
        goto cleanup;
1325 1326
    }

1327
    /* check whether the device is bound to pci-stub when we write dev->id to
1328
     * ${stubDriver}/new_id.
1329
     */
1330
    if (virFileLinkPointsTo(driverLink, stubDriverPath)) {
1331 1332
        dev->unbind_from_stub = true;
        dev->remove_slot = true;
J
Jiri Denemark 已提交
1333
        result = 0;
1334 1335 1336
        goto remove_id;
    }

1337
    if (virPCIDeviceUnbind(dev) < 0)
J
Jiri Denemark 已提交
1338
        goto remove_id;
1339

1340 1341 1342
    /* If the device was bound to a driver we'll need to reprobe later */
    dev->reprobe = reprobe;

1343 1344
    /* If the device isn't already bound to pci-stub, try binding it now.
     */
1345
    if (!virFileLinkPointsTo(driverLink, stubDriverPath)) {
1346
        /* Xen's pciback.ko wants you to use new_slot first */
1347 1348
        VIR_FREE(path);
        if (!(path = virPCIDriverFile(stubDriverName, "new_slot")))
1349
            goto remove_id;
1350

1351
        if (virFileExists(path) && virFileWriteStr(path, dev->name, 0) < 0) {
1352
            virReportSystemError(errno,
1353 1354 1355
                                 _("Failed to add slot for "
                                   "PCI device '%s' to %s"),
                                 dev->name, stubDriverName);
1356
            goto remove_id;
1357
        }
1358
        dev->remove_slot = true;
1359

1360 1361
        VIR_FREE(path);
        if (!(path = virPCIDriverFile(stubDriverName, "bind")))
1362
            goto remove_id;
1363

1364
        if (virFileWriteStr(path, dev->name, 0) < 0) {
1365
            virReportSystemError(errno,
1366
                                 _("Failed to bind PCI device '%s' to %s"),
1367
                                 dev->name, stubDriverName);
1368
            goto remove_id;
1369
        }
1370
        dev->unbind_from_stub = true;
1371 1372
    }

J
Jiri Denemark 已提交
1373 1374
    result = 0;

1375
 remove_id:
J
Jiri Denemark 已提交
1376 1377
    err = virSaveLastError();

1378 1379 1380
    /* If 'remove_id' exists, remove the device id from pci-stub's dynamic
     * ID table so that 'drivers_probe' works below.
     */
1381 1382
    VIR_FREE(path);
    if (!(path = virPCIDriverFile(stubDriverName, "remove_id"))) {
E
Eric Blake 已提交
1383
        /* We do not remove PCI ID from pci-stub, and we cannot reprobe it */
1384 1385
        if (dev->reprobe) {
            VIR_WARN("Could not remove PCI ID '%s' from %s, and the device "
1386
                     "cannot be probed again.", dev->id, stubDriverName);
1387
        }
1388
        dev->reprobe = false;
J
Jiri Denemark 已提交
1389
        result = -1;
1390 1391 1392
        goto cleanup;
    }

1393
    if (virFileExists(path) && virFileWriteStr(path, dev->id, 0) < 0) {
1394
        virReportSystemError(errno,
1395
                             _("Failed to remove PCI ID '%s' from %s"),
1396
                             dev->id, stubDriverName);
1397

E
Eric Blake 已提交
1398
        /* remove PCI ID from pci-stub failed, and we cannot reprobe it */
1399 1400
        if (dev->reprobe) {
            VIR_WARN("Failed to remove PCI ID '%s' from %s, and the device "
1401
                     "cannot be probed again.", dev->id, stubDriverName);
1402
        }
1403
        dev->reprobe = false;
J
Jiri Denemark 已提交
1404
        result = -1;
1405
        goto cleanup;
1406 1407
    }

1408
 cleanup:
1409
    if (result < 0)
J
Jiri Denemark 已提交
1410 1411 1412 1413
        virPCIDeviceUnbindFromStub(dev);

    if (err)
        virSetError(err);
1414

1415
    return result;
1416 1417
}

1418 1419 1420 1421
static int
virPCIDeviceBindToStubWithOverride(virPCIDevicePtr dev)
{
    const char *stubDriverName;
1422 1423
    VIR_AUTOFREE(char *) stubDriverPath = NULL;
    VIR_AUTOFREE(char *) driverLink = NULL;
1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439

    /* Check the device is configured to use one of the known stub drivers */
    if (dev->stubDriver == VIR_PCI_STUB_DRIVER_NONE) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("No stub driver configured for PCI device %s"),
                       dev->name);
        return -1;
    } else if (!(stubDriverName = virPCIStubDriverTypeToString(dev->stubDriver))) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Unknown stub driver configured for PCI device %s"),
                       dev->name);
        return -1;
    }

    if (!(stubDriverPath = virPCIDriverDir(stubDriverName))  ||
        !(driverLink = virPCIFile(dev->name, "driver")))
1440
        return -1;
1441 1442 1443 1444 1445 1446

    if (virFileExists(driverLink)) {
        if (virFileLinkPointsTo(driverLink, stubDriverPath)) {
            /* The device is already bound to the correct driver */
            VIR_DEBUG("Device %s is already bound to %s",
                      dev->name, stubDriverName);
1447
            return 0;
1448 1449 1450 1451
        }
    }

    if (virPCIDeviceBindWithDriverOverride(dev, stubDriverName) < 0)
1452
        return -1;
1453 1454

    dev->unbind_from_stub = true;
1455
    return 0;
1456 1457 1458 1459 1460
}

static int
virPCIDeviceBindToStub(virPCIDevicePtr dev)
{
1461
    VIR_AUTOFREE(char *) path = NULL;
1462 1463 1464 1465 1466 1467 1468 1469 1470

    /*
     * Prefer using the device's driver_override interface, falling back
     * to the unpleasant new_id interface.
     */
    if (!(path = virPCIFile(dev->name, "driver_override")))
        return -1;

    if (virFileExists(path))
1471
        return virPCIDeviceBindToStubWithOverride(dev);
1472

1473
    return virPCIDeviceBindToStubWithNewid(dev);
1474 1475
}

1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493
/* virPCIDeviceDetach:
 *
 * Detach this device from the host driver, attach it to the stub
 * driver (previously set with virPCIDeviceSetStubDriver(), and add *a
 * copy* of the object to the inactiveDevs list (if provided). This
 * function will *never* consume dev, so the caller should free it.
 *
 * Returns 0 on success, -1 on failure (will fail if the device is
 * already in the activeDevs list, but will be a NOP if the device is
 * already bound to the stub).
 *
 * GENERAL NOTE: activeDevs should be a list of all PCI devices
 * currently in use by a domain. inactiveDevs is a list of all PCI
 * devices that libvirt has detached from the host driver + attached
 * to the stub driver, but hasn't yet assigned to a domain. Any device
 * that is still attached to its host driver should not be on either
 * list.
 */
1494
int
1495 1496
virPCIDeviceDetach(virPCIDevicePtr dev,
                   virPCIDeviceList *activeDevs,
1497
                   virPCIDeviceList *inactiveDevs)
1498
{
1499
    if (virPCIProbeStubDriver(dev->stubDriver) < 0)
1500 1501
        return -1;

1502
    if (activeDevs && virPCIDeviceListFind(activeDevs, dev)) {
1503
        virReportError(VIR_ERR_INTERNAL_ERROR,
1504 1505 1506 1507
                       _("Not detaching active device %s"), dev->name);
        return -1;
    }

1508
    if (virPCIDeviceBindToStub(dev) < 0)
1509 1510
        return -1;

1511 1512 1513
    /* Add *a copy of* the dev into list inactiveDevs, if
     * it's not already there.
     */
1514 1515 1516 1517
    if (inactiveDevs && !virPCIDeviceListFind(inactiveDevs, dev)) {
        VIR_DEBUG("Adding PCI device %s to inactive list", dev->name);
        if (virPCIDeviceListAddCopy(inactiveDevs, dev) < 0)
            return -1;
1518 1519 1520
    }

    return 0;
1521 1522
}

1523 1524 1525 1526
/*
 * Pre-condition: inactivePCIHostdevs & activePCIHostdevs
 * are locked
 */
1527
int
1528 1529
virPCIDeviceReattach(virPCIDevicePtr dev,
                     virPCIDeviceListPtr activeDevs,
1530
                     virPCIDeviceListPtr inactiveDevs)
1531
{
1532
    if (activeDevs && virPCIDeviceListFind(activeDevs, dev)) {
1533
        virReportError(VIR_ERR_INTERNAL_ERROR,
1534 1535 1536 1537
                       _("Not reattaching active device %s"), dev->name);
        return -1;
    }

1538 1539 1540 1541 1542 1543 1544 1545 1546 1547
    /* Wait for device cleanup if it is qemu/kvm */
    if (virPCIDeviceGetStubDriver(dev) == VIR_PCI_STUB_DRIVER_KVM) {
        int retries = 100;
        while (virPCIDeviceWaitForCleanup(dev, "kvm_assigned_device")
               && retries) {
            usleep(100*1000);
            retries--;
        }
    }

1548
    if (virPCIDeviceUnbindFromStub(dev) < 0)
1549 1550 1551
        return -1;

    /* Steal the dev from list inactiveDevs */
1552 1553
    if (inactiveDevs) {
        VIR_DEBUG("Removing PCI device %s from inactive list", dev->name);
1554
        virPCIDeviceListDel(inactiveDevs, dev);
1555
    }
1556 1557

    return 0;
1558 1559
}

1560 1561 1562 1563 1564
/* Certain hypervisors (like qemu/kvm) map the PCI bar(s) on
 * the host when doing device passthrough.  This can lead to a race
 * condition where the hypervisor is still cleaning up the device while
 * libvirt is trying to re-attach it to the host device driver.  To avoid
 * this situation, we look through /proc/iomem, and if the hypervisor is
E
Eric Blake 已提交
1565 1566
 * still holding on to the bar (denoted by the string in the matcher
 * variable), then we can wait around a bit for that to clear up.
1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586
 *
 * A typical /proc/iomem looks like this (snipped for brevity):
 * 00010000-0008efff : System RAM
 * 0008f000-0008ffff : reserved
 * ...
 * 00100000-cc9fcfff : System RAM
 *   00200000-00483d3b : Kernel code
 *   00483d3c-005c88df : Kernel data
 * cc9fd000-ccc71fff : ACPI Non-volatile Storage
 * ...
 * d0200000-d02fffff : PCI Bus #05
 *   d0200000-d021ffff : 0000:05:00.0
 *     d0200000-d021ffff : e1000e
 *   d0220000-d023ffff : 0000:05:00.0
 *     d0220000-d023ffff : e1000e
 * ...
 * f0000000-f0003fff : 0000:00:1b.0
 *   f0000000-f0003fff : kvm_assigned_device
 *
 * Returns 0 if we are clear to continue, and 1 if the hypervisor is still
E
Eric Blake 已提交
1587
 * holding on to the resource.
1588 1589
 */
int
1590
virPCIDeviceWaitForCleanup(virPCIDevicePtr dev, const char *matcher)
1591 1592 1593
{
    FILE *fp;
    char line[160];
1594
    char *tmp;
1595
    unsigned long long start, end;
1596
    unsigned int domain, bus, slot, function;
1597
    bool in_matching_device;
1598 1599 1600 1601 1602 1603 1604 1605 1606
    int ret;
    size_t match_depth;

    fp = fopen("/proc/iomem", "r");
    if (!fp) {
        /* If we failed to open iomem, we just basically ignore the error.  The
         * unbind might succeed anyway, and besides, it's very likely we have
         * no way to report the error
         */
1607
        VIR_DEBUG("Failed to open /proc/iomem, trying to continue anyway");
1608 1609 1610 1611
        return 0;
    }

    ret = 0;
1612
    in_matching_device = false;
1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623
    match_depth = 0;
    while (fgets(line, sizeof(line), fp) != 0) {
        /* the logic here is a bit confusing.  For each line, we look to
         * see if it matches the domain:bus:slot.function we were given.
         * If this line matches the DBSF, then any subsequent lines indented
         * by 2 spaces are the PCI regions for this device.  It's also
         * possible that none of the PCI regions are currently mapped, in
         * which case we have no indented regions.  This code handles all
         * of these situations
         */
        if (in_matching_device && (strspn(line, " ") == (match_depth + 2))) {
1624 1625 1626 1627 1628 1629
            /* expected format: <start>-<end> : <suffix> */
            if (/* start */
                virStrToLong_ull(line, &tmp, 16, &start) < 0 || *tmp != '-' ||
                /* end */
                virStrToLong_ull(tmp + 1, &tmp, 16, &end) < 0 ||
                (tmp = STRSKIP(tmp, " : ")) == NULL)
1630 1631
                continue;

1632
            if (STRPREFIX(tmp, matcher)) {
1633 1634 1635
                ret = 1;
                break;
            }
1636
        } else {
1637
            in_matching_device = false;
1638

1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652
            /* expected format: <start>-<end> : <domain>:<bus>:<slot>.<function> */
            if (/* start */
                virStrToLong_ull(line, &tmp, 16, &start) < 0 || *tmp != '-' ||
                /* end */
                virStrToLong_ull(tmp + 1, &tmp, 16, &end) < 0 ||
                (tmp = STRSKIP(tmp, " : ")) == NULL ||
                /* domain */
                virStrToLong_ui(tmp, &tmp, 16, &domain) < 0 || *tmp != ':' ||
                /* bus */
                virStrToLong_ui(tmp + 1, &tmp, 16, &bus) < 0 || *tmp != ':' ||
                /* slot */
                virStrToLong_ui(tmp + 1, &tmp, 16, &slot) < 0 || *tmp != '.' ||
                /* function */
                virStrToLong_ui(tmp + 1, &tmp, 16, &function) < 0 || *tmp != '\n')
1653 1654
                continue;

1655 1656
            if (domain != dev->address.domain || bus != dev->address.bus ||
                slot != dev->address.slot || function != dev->address.function)
1657
                continue;
1658
            in_matching_device = true;
1659 1660 1661 1662
            match_depth = strspn(line, " ");
        }
    }

E
Eric Blake 已提交
1663
    VIR_FORCE_FCLOSE(fp);
1664 1665 1666 1667

    return ret;
}

1668
static char *
1669
virPCIDeviceReadID(virPCIDevicePtr dev, const char *id_name)
1670
{
1671
    VIR_AUTOFREE(char *) path = NULL;
1672 1673
    char *id_str;

1674
    if (!(path = virPCIFile(dev->name, id_name)))
1675
        return NULL;
1676 1677

    /* ID string is '0xNNNN\n' ... i.e. 7 bytes */
1678
    if (virFileReadAll(path, 7, &id_str) < 0)
1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692
        return NULL;

    /* Check for 0x suffix */
    if (id_str[0] != '0' || id_str[1] != 'x') {
        VIR_FREE(id_str);
        return NULL;
    }

    /* Chop off the newline; we know the string is 7 bytes */
    id_str[6] = '\0';

    return id_str;
}

1693 1694 1695 1696
bool
virPCIDeviceAddressIsValid(virPCIDeviceAddressPtr addr,
                           bool report)
{
1697
    if (addr->domain > 0xFFFFFFFF) {
1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745
        if (report)
            virReportError(VIR_ERR_XML_ERROR,
                           _("Invalid PCI address domain='0x%x', "
                             "must be <= 0xFFFF"),
                           addr->domain);
        return false;
    }
    if (addr->bus > 0xFF) {
        if (report)
            virReportError(VIR_ERR_XML_ERROR,
                           _("Invalid PCI address bus='0x%x', "
                             "must be <= 0xFF"),
                           addr->bus);
        return false;
    }
    if (addr->slot > 0x1F) {
        if (report)
            virReportError(VIR_ERR_XML_ERROR,
                           _("Invalid PCI address slot='0x%x', "
                             "must be <= 0x1F"),
                           addr->slot);
        return false;
    }
    if (addr->function > 7) {
        if (report)
            virReportError(VIR_ERR_XML_ERROR,
                           _("Invalid PCI address function=0x%x, "
                             "must be <= 7"),
                           addr->function);
        return false;
    }
    if (virPCIDeviceAddressIsEmpty(addr)) {
        if (report)
            virReportError(VIR_ERR_XML_ERROR, "%s",
                           _("Invalid PCI address 0000:00:00, at least "
                             "one of domain, bus, or slot must be > 0"));
        return false;
    }
    return true;
}

bool
virPCIDeviceAddressIsEmpty(const virPCIDeviceAddress *addr)
{
    return !(addr->domain || addr->bus || addr->slot);
}

bool
1746 1747
virPCIDeviceAddressEqual(const virPCIDeviceAddress *addr1,
                         const virPCIDeviceAddress *addr2)
1748 1749 1750 1751 1752 1753 1754 1755 1756 1757
{
    if (addr1->domain == addr2->domain &&
        addr1->bus == addr2->bus &&
        addr1->slot == addr2->slot &&
        addr1->function == addr2->function) {
        return true;
    }
    return false;
}

1758
char *
1759
virPCIDeviceAddressAsString(const virPCIDeviceAddress *addr)
1760 1761 1762
{
    char *str;

1763 1764
    ignore_value(virAsprintf(&str,
                             VIR_PCI_DEVICE_ADDRESS_FMT,
1765 1766 1767 1768 1769 1770 1771
                             addr->domain,
                             addr->bus,
                             addr->slot,
                             addr->function));
    return str;
}

1772
virPCIDevicePtr
1773 1774 1775 1776
virPCIDeviceNew(unsigned int domain,
                unsigned int bus,
                unsigned int slot,
                unsigned int function)
1777
{
1778
    VIR_AUTOPTR(virPCIDevice) dev = NULL;
1779 1780
    VIR_AUTOFREE(char *) vendor = NULL;
    VIR_AUTOFREE(char *) product = NULL;
1781

1782
    if (VIR_ALLOC(dev) < 0)
1783 1784
        return NULL;

1785 1786 1787 1788
    dev->address.domain = domain;
    dev->address.bus = bus;
    dev->address.slot = slot;
    dev->address.function = function;
1789

1790
    if (virAsprintf(&dev->name,
1791
                    VIR_PCI_DEVICE_ADDRESS_FMT,
1792
                    domain, bus, slot, function) < 0)
1793
        return NULL;
1794

E
Eric Blake 已提交
1795
    if (virAsprintf(&dev->path, PCI_SYSFS "devices/%s/config",
1796
                    dev->name) < 0)
1797
        return NULL;
1798

1799
    if (!virFileExists(dev->path)) {
1800 1801 1802
        virReportSystemError(errno,
                             _("Device %s not found: could not access %s"),
                             dev->name, dev->path);
1803
        return NULL;
1804 1805
    }

1806 1807
    vendor  = virPCIDeviceReadID(dev, "vendor");
    product = virPCIDeviceReadID(dev, "device");
1808 1809

    if (!vendor || !product) {
1810
        virReportError(VIR_ERR_INTERNAL_ERROR,
1811 1812
                       _("Failed to read product/vendor ID for %s"),
                       dev->name);
1813
        return NULL;
1814 1815 1816
    }

    /* strings contain '0x' prefix */
E
Eric Blake 已提交
1817 1818
    if (snprintf(dev->id, sizeof(dev->id), "%s %s", &vendor[2],
                 &product[2]) >= sizeof(dev->id)) {
1819
        virReportError(VIR_ERR_INTERNAL_ERROR,
E
Eric Blake 已提交
1820 1821
                       _("dev->id buffer overflow: %s %s"),
                       &vendor[2], &product[2]);
1822
        return NULL;
E
Eric Blake 已提交
1823
    }
1824 1825 1826

    VIR_DEBUG("%s %s: initialized", dev->id, dev->name);

1827
    VIR_RETURN_PTR(dev);
1828 1829
}

L
Laine Stump 已提交
1830 1831 1832 1833 1834 1835

virPCIDevicePtr
virPCIDeviceCopy(virPCIDevicePtr dev)
{
    virPCIDevicePtr copy;

1836
    if (VIR_ALLOC(copy) < 0)
L
Laine Stump 已提交
1837 1838 1839 1840
        return NULL;

    /* shallow copy to take care of most attributes */
    *copy = *dev;
1841
    copy->path = NULL;
C
Chunyan Liu 已提交
1842
    copy->used_by_drvname = copy->used_by_domname = NULL;
1843 1844
    if (VIR_STRDUP(copy->name, dev->name) < 0 ||
        VIR_STRDUP(copy->path, dev->path) < 0 ||
C
Chunyan Liu 已提交
1845 1846
        VIR_STRDUP(copy->used_by_drvname, dev->used_by_drvname) < 0 ||
        VIR_STRDUP(copy->used_by_domname, dev->used_by_domname) < 0) {
L
Laine Stump 已提交
1847 1848 1849 1850
        goto error;
    }
    return copy;

1851
 error:
L
Laine Stump 已提交
1852 1853 1854 1855 1856
    virPCIDeviceFree(copy);
    return NULL;
}


1857
void
1858
virPCIDeviceFree(virPCIDevicePtr dev)
1859
{
1860 1861
    if (!dev)
        return;
1862
    VIR_DEBUG("%s %s: freeing", dev->id, dev->name);
1863
    VIR_FREE(dev->name);
E
Eric Blake 已提交
1864
    VIR_FREE(dev->path);
C
Chunyan Liu 已提交
1865 1866
    VIR_FREE(dev->used_by_drvname);
    VIR_FREE(dev->used_by_domname);
1867 1868
    VIR_FREE(dev);
}
1869

1870 1871 1872 1873 1874
/**
 * virPCIDeviceGetAddress:
 * @dev: device to get address from
 *
 * Take a PCI device on input and return its PCI address. The
1875
 * returned object is owned by the device and must not be freed.
1876
 *
1877
 * Returns: a pointer to the address, which can never be NULL.
1878 1879 1880 1881
 */
virPCIDeviceAddressPtr
virPCIDeviceGetAddress(virPCIDevicePtr dev)
{
1882
    return &(dev->address);
1883 1884
}

1885
const char *
1886
virPCIDeviceGetName(virPCIDevicePtr dev)
1887 1888 1889 1890
{
    return dev->name;
}

1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902
/**
 * virPCIDeviceGetConfigPath:
 *
 * Returns a pointer to a string containing the path of @dev's PCI
 * config file.
 */
const char *
virPCIDeviceGetConfigPath(virPCIDevicePtr dev)
{
    return dev->path;
}

1903
void virPCIDeviceSetManaged(virPCIDevicePtr dev, bool managed)
1904
{
1905
    dev->managed = managed;
1906 1907
}

1908
bool
1909
virPCIDeviceGetManaged(virPCIDevicePtr dev)
1910 1911 1912 1913
{
    return dev->managed;
}

1914 1915
void
virPCIDeviceSetStubDriver(virPCIDevicePtr dev, virPCIStubDriver driver)
1916
{
1917
    dev->stubDriver = driver;
1918 1919
}

1920
virPCIStubDriver
1921 1922 1923 1924 1925
virPCIDeviceGetStubDriver(virPCIDevicePtr dev)
{
    return dev->stubDriver;
}

1926
bool
1927
virPCIDeviceGetUnbindFromStub(virPCIDevicePtr dev)
1928 1929 1930 1931 1932
{
    return dev->unbind_from_stub;
}

void
1933
virPCIDeviceSetUnbindFromStub(virPCIDevicePtr dev, bool unbind)
1934
{
1935
    dev->unbind_from_stub = unbind;
1936 1937
}

1938
bool
1939
virPCIDeviceGetRemoveSlot(virPCIDevicePtr dev)
1940 1941 1942 1943 1944
{
    return dev->remove_slot;
}

void
1945
virPCIDeviceSetRemoveSlot(virPCIDevicePtr dev, bool remove_slot)
1946
{
1947
    dev->remove_slot = remove_slot;
1948 1949
}

1950
bool
1951
virPCIDeviceGetReprobe(virPCIDevicePtr dev)
1952 1953 1954 1955 1956
{
    return dev->reprobe;
}

void
1957
virPCIDeviceSetReprobe(virPCIDevicePtr dev, bool reprobe)
1958
{
1959
    dev->reprobe = reprobe;
1960 1961
}

C
Chunyan Liu 已提交
1962 1963 1964 1965
int
virPCIDeviceSetUsedBy(virPCIDevicePtr dev,
                      const char *drv_name,
                      const char *dom_name)
1966
{
C
Chunyan Liu 已提交
1967 1968 1969 1970 1971 1972 1973 1974
    VIR_FREE(dev->used_by_drvname);
    VIR_FREE(dev->used_by_domname);
    if (VIR_STRDUP(dev->used_by_drvname, drv_name) < 0)
        return -1;
    if (VIR_STRDUP(dev->used_by_domname, dom_name) < 0)
        return -1;

    return 0;
1975 1976
}

C
Chunyan Liu 已提交
1977 1978 1979 1980
void
virPCIDeviceGetUsedBy(virPCIDevicePtr dev,
                      const char **drv_name,
                      const char **dom_name)
1981
{
C
Chunyan Liu 已提交
1982 1983
    *drv_name = dev->used_by_drvname;
    *dom_name = dev->used_by_domname;
1984 1985
}

1986 1987
virPCIDeviceListPtr
virPCIDeviceListNew(void)
1988
{
1989
    virPCIDeviceListPtr list;
1990

1991 1992 1993 1994
    if (virPCIInitialize() < 0)
        return NULL;

    if (!(list = virObjectLockableNew(virPCIDeviceListClass)))
1995 1996 1997 1998 1999
        return NULL;

    return list;
}

2000 2001
static void
virPCIDeviceListDispose(void *obj)
2002
{
2003
    virPCIDeviceListPtr list = obj;
2004
    size_t i;
2005 2006

    for (i = 0; i < list->count; i++) {
2007
        virPCIDeviceFree(list->devs[i]);
2008 2009 2010 2011 2012 2013 2014 2015
        list->devs[i] = NULL;
    }

    list->count = 0;
    VIR_FREE(list->devs);
}

int
2016 2017
virPCIDeviceListAdd(virPCIDeviceListPtr list,
                    virPCIDevicePtr dev)
2018
{
2019
    if (virPCIDeviceListFind(list, dev)) {
2020
        virReportError(VIR_ERR_INTERNAL_ERROR,
2021 2022 2023
                       _("Device %s is already in use"), dev->name);
        return -1;
    }
2024
    return VIR_APPEND_ELEMENT(list->devs, list->count, dev);
2025 2026
}

L
Laine Stump 已提交
2027 2028 2029 2030 2031

/* virPCIDeviceListAddCopy - add a *copy* of the device to this list */
int
virPCIDeviceListAddCopy(virPCIDeviceListPtr list, virPCIDevicePtr dev)
{
2032
    VIR_AUTOPTR(virPCIDevice) copy = virPCIDeviceCopy(dev);
L
Laine Stump 已提交
2033 2034 2035

    if (!copy)
        return -1;
2036
    if (virPCIDeviceListAdd(list, copy) < 0)
L
Laine Stump 已提交
2037
        return -1;
2038 2039

    copy = NULL;
L
Laine Stump 已提交
2040 2041 2042 2043
    return 0;
}


2044 2045 2046
virPCIDevicePtr
virPCIDeviceListGet(virPCIDeviceListPtr list,
                    int idx)
2047 2048 2049 2050 2051 2052 2053 2054 2055
{
    if (idx >= list->count)
        return NULL;
    if (idx < 0)
        return NULL;

    return list->devs[idx];
}

2056
size_t
2057
virPCIDeviceListCount(virPCIDeviceListPtr list)
2058
{
2059 2060 2061
    return list->count;
}

2062 2063 2064
virPCIDevicePtr
virPCIDeviceListStealIndex(virPCIDeviceListPtr list,
                           int idx)
2065
{
2066
    virPCIDevicePtr ret;
2067

2068 2069
    if (idx < 0 || idx >= list->count)
        return NULL;
2070

2071
    ret = list->devs[idx];
2072
    VIR_DELETE_ELEMENT(list->devs, idx, list->count);
2073 2074 2075
    return ret;
}

2076 2077 2078
virPCIDevicePtr
virPCIDeviceListSteal(virPCIDeviceListPtr list,
                      virPCIDevicePtr dev)
2079
{
2080
    return virPCIDeviceListStealIndex(list, virPCIDeviceListFindIndex(list, dev));
2081 2082
}

2083
void
2084 2085
virPCIDeviceListDel(virPCIDeviceListPtr list,
                    virPCIDevicePtr dev)
2086
{
2087
    virPCIDeviceFree(virPCIDeviceListSteal(list, dev));
2088 2089
}

2090
int
2091
virPCIDeviceListFindIndex(virPCIDeviceListPtr list, virPCIDevicePtr dev)
2092
{
2093
    size_t i;
2094

2095 2096 2097 2098 2099 2100
    for (i = 0; i < list->count; i++) {
        virPCIDevicePtr other = list->devs[i];
        if (other->address.domain   == dev->address.domain &&
            other->address.bus      == dev->address.bus    &&
            other->address.slot     == dev->address.slot   &&
            other->address.function == dev->address.function)
2101
            return i;
2102
    }
2103 2104 2105
    return -1;
}

L
Laine Stump 已提交
2106 2107 2108 2109 2110 2111 2112 2113

virPCIDevicePtr
virPCIDeviceListFindByIDs(virPCIDeviceListPtr list,
                          unsigned int domain,
                          unsigned int bus,
                          unsigned int slot,
                          unsigned int function)
{
2114
    size_t i;
L
Laine Stump 已提交
2115 2116

    for (i = 0; i < list->count; i++) {
2117 2118 2119 2120 2121
        virPCIDevicePtr other = list->devs[i];
        if (other->address.domain   == domain &&
            other->address.bus      == bus    &&
            other->address.slot     == slot   &&
            other->address.function == function)
L
Laine Stump 已提交
2122 2123 2124 2125 2126 2127
            return list->devs[i];
    }
    return NULL;
}


2128 2129
virPCIDevicePtr
virPCIDeviceListFind(virPCIDeviceListPtr list, virPCIDevicePtr dev)
2130
{
2131
    int idx;
2132

2133 2134
    if ((idx = virPCIDeviceListFindIndex(list, dev)) >= 0)
        return list->devs[idx];
2135 2136
    else
        return NULL;
2137
}
2138 2139


2140 2141 2142
int virPCIDeviceFileIterate(virPCIDevicePtr dev,
                            virPCIDeviceFileActor actor,
                            void *opaque)
2143
{
2144
    VIR_AUTOFREE(char *) pcidir = NULL;
2145 2146 2147
    DIR *dir = NULL;
    int ret = -1;
    struct dirent *ent;
E
Eric Blake 已提交
2148
    int direrr;
2149

2150
    if (virAsprintf(&pcidir, "/sys/bus/pci/devices/" VIR_PCI_DEVICE_ADDRESS_FMT,
2151 2152
                    dev->address.domain, dev->address.bus,
                    dev->address.slot, dev->address.function) < 0)
2153 2154
        goto cleanup;

J
Ján Tomko 已提交
2155
    if (virDirOpen(&dir, pcidir) < 0)
2156 2157
        goto cleanup;

E
Eric Blake 已提交
2158
    while ((direrr = virDirRead(dir, &ent, pcidir)) > 0) {
2159
        VIR_AUTOFREE(char *) file = NULL;
2160
        /* Device assignment requires:
A
Alex Williamson 已提交
2161
         *   $PCIDIR/config, $PCIDIR/resource, $PCIDIR/resourceNNN,
2162
         *   $PCIDIR/rom, $PCIDIR/reset, $PCIDIR/vendor, $PCIDIR/device
2163 2164 2165
         */
        if (STREQ(ent->d_name, "config") ||
            STRPREFIX(ent->d_name, "resource") ||
A
Alex Williamson 已提交
2166
            STREQ(ent->d_name, "rom") ||
2167 2168
            STREQ(ent->d_name, "vendor") ||
            STREQ(ent->d_name, "device") ||
A
Alex Williamson 已提交
2169
            STREQ(ent->d_name, "reset")) {
2170
            if (virAsprintf(&file, "%s/%s", pcidir, ent->d_name) < 0)
2171
                goto cleanup;
2172
            if ((actor)(dev, file, opaque) < 0)
2173 2174 2175
                goto cleanup;
        }
    }
E
Eric Blake 已提交
2176 2177
    if (direrr < 0)
        goto cleanup;
2178 2179 2180

    ret = 0;

2181
 cleanup:
J
Ján Tomko 已提交
2182
    VIR_DIR_CLOSE(dir);
2183 2184
    return ret;
}
J
Jiri Denemark 已提交
2185

L
Laine Stump 已提交
2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196

/* virPCIDeviceAddressIOMMUGroupIterate:
 *   Call @actor for all devices in the same iommu_group as orig
 *   (including orig itself) Even if there is no iommu_group for the
 *   device, call @actor once for orig.
 */
int
virPCIDeviceAddressIOMMUGroupIterate(virPCIDeviceAddressPtr orig,
                                     virPCIDeviceAddressActor actor,
                                     void *opaque)
{
2197
    VIR_AUTOFREE(char *) groupPath = NULL;
L
Laine Stump 已提交
2198 2199 2200
    DIR *groupDir = NULL;
    int ret = -1;
    struct dirent *ent;
E
Eric Blake 已提交
2201
    int direrr;
L
Laine Stump 已提交
2202 2203

    if (virAsprintf(&groupPath,
2204
                    PCI_SYSFS "devices/" VIR_PCI_DEVICE_ADDRESS_FMT "/iommu_group/devices",
2205
                    orig->domain, orig->bus, orig->slot, orig->function) < 0)
L
Laine Stump 已提交
2206 2207
        goto cleanup;

J
Ján Tomko 已提交
2208
    if (virDirOpenQuiet(&groupDir, groupPath) < 0) {
L
Laine Stump 已提交
2209 2210 2211 2212 2213
        /* just process the original device, nothing more */
        ret = (actor)(orig, opaque);
        goto cleanup;
    }

E
Eric Blake 已提交
2214
    while ((direrr = virDirRead(groupDir, &ent, groupPath)) > 0) {
L
Laine Stump 已提交
2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226
        virPCIDeviceAddress newDev;

        if (virPCIDeviceAddressParse(ent->d_name, &newDev) < 0) {
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("Found invalid device link '%s' in '%s'"),
                           ent->d_name, groupPath);
            goto cleanup;
        }

        if ((actor)(&newDev, opaque) < 0)
            goto cleanup;
    }
E
Eric Blake 已提交
2227
    if (direrr < 0)
L
Laine Stump 已提交
2228 2229 2230 2231
        goto cleanup;

    ret = 0;

2232
 cleanup:
J
Ján Tomko 已提交
2233
    VIR_DIR_CLOSE(groupDir);
L
Laine Stump 已提交
2234 2235 2236 2237 2238 2239 2240 2241
    return ret;
}


static int
virPCIDeviceGetIOMMUGroupAddOne(virPCIDeviceAddressPtr newDevAddr, void *opaque)
{
    virPCIDeviceListPtr groupList = opaque;
2242
    VIR_AUTOPTR(virPCIDevice) newDev = NULL;
L
Laine Stump 已提交
2243 2244 2245

    if (!(newDev = virPCIDeviceNew(newDevAddr->domain, newDevAddr->bus,
                                   newDevAddr->slot, newDevAddr->function)))
2246
        return -1;
L
Laine Stump 已提交
2247 2248

    if (virPCIDeviceListAdd(groupList, newDev) < 0)
2249
        return -1;
L
Laine Stump 已提交
2250 2251

    newDev = NULL; /* it's now on the list */
2252
    return 0;
L
Laine Stump 已提交
2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269
}


/*
 * virPCIDeviceGetIOMMUGroupList - return a virPCIDeviceList containing
 * all of the devices in the same iommu_group as @dev.
 *
 * Return the new list, or NULL on failure
 */
virPCIDeviceListPtr
virPCIDeviceGetIOMMUGroupList(virPCIDevicePtr dev)
{
    virPCIDeviceListPtr groupList = virPCIDeviceListNew();

    if (!groupList)
        goto error;

2270
    if (virPCIDeviceAddressIOMMUGroupIterate(&(dev->address),
L
Laine Stump 已提交
2271 2272 2273 2274 2275 2276
                                             virPCIDeviceGetIOMMUGroupAddOne,
                                             groupList) < 0)
        goto error;

    return groupList;

2277
 error:
L
Laine Stump 已提交
2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301
    virObjectUnref(groupList);
    return NULL;
}


typedef struct {
    virPCIDeviceAddressPtr **iommuGroupDevices;
    size_t *nIommuGroupDevices;
} virPCIDeviceAddressList;
typedef virPCIDeviceAddressList *virPCIDeviceAddressListPtr;

static int
virPCIGetIOMMUGroupAddressesAddOne(virPCIDeviceAddressPtr newDevAddr, void *opaque)
{
    int ret = -1;
    virPCIDeviceAddressListPtr addrList = opaque;
    virPCIDeviceAddressPtr copyAddr;

    /* make a copy to insert onto the list */
    if (VIR_ALLOC(copyAddr) < 0)
        goto cleanup;

    *copyAddr = *newDevAddr;

2302 2303
    if (VIR_APPEND_ELEMENT(*addrList->iommuGroupDevices,
                           *addrList->nIommuGroupDevices, copyAddr) < 0)
L
Laine Stump 已提交
2304 2305 2306
        goto cleanup;

    ret = 0;
2307
 cleanup:
L
Laine Stump 已提交
2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334
    VIR_FREE(copyAddr);
    return ret;
}


/*
 * virPCIDeviceAddressGetIOMMUGroupAddresses - return a
 * virPCIDeviceList containing all of the devices in the same
 * iommu_group as @dev.
 *
 * Return the new list, or NULL on failure
 */
int
virPCIDeviceAddressGetIOMMUGroupAddresses(virPCIDeviceAddressPtr devAddr,
                                          virPCIDeviceAddressPtr **iommuGroupDevices,
                                          size_t *nIommuGroupDevices)
{
    int ret = -1;
    virPCIDeviceAddressList addrList = { iommuGroupDevices,
                                         nIommuGroupDevices };

    if (virPCIDeviceAddressIOMMUGroupIterate(devAddr,
                                             virPCIGetIOMMUGroupAddressesAddOne,
                                             &addrList) < 0)
        goto cleanup;

    ret = 0;
2335
 cleanup:
L
Laine Stump 已提交
2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346
    return ret;
}


/* virPCIDeviceAddressGetIOMMUGroupNum - return the group number of
 * this PCI device's iommu_group, or -2 if there is no iommu_group for
 * the device (or -1 if there was any other error)
 */
int
virPCIDeviceAddressGetIOMMUGroupNum(virPCIDeviceAddressPtr addr)
{
2347 2348 2349
    VIR_AUTOFREE(char *) devName = NULL;
    VIR_AUTOFREE(char *) devPath = NULL;
    VIR_AUTOFREE(char *) groupPath = NULL;
L
Laine Stump 已提交
2350 2351 2352
    const char *groupNumStr;
    unsigned int groupNum;

2353 2354 2355
    if (virAsprintf(&devName,
                    VIR_PCI_DEVICE_ADDRESS_FMT,
                    addr->domain, addr->bus, addr->slot, addr->function) < 0)
2356
        return -1;
L
Laine Stump 已提交
2357

2358
    if (!(devPath = virPCIFile(devName, "iommu_group")))
2359 2360 2361
        return -1;
    if (virFileIsLink(devPath) != 1)
        return -2;
L
Laine Stump 已提交
2362 2363 2364 2365
    if (virFileResolveLink(devPath, &groupPath) < 0) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Unable to resolve device %s iommu_group symlink %s"),
                       devName, devPath);
2366
        return -1;
L
Laine Stump 已提交
2367 2368 2369 2370 2371 2372 2373 2374
    }

    groupNumStr = last_component(groupPath);
    if (virStrToLong_ui(groupNumStr, NULL, 10, &groupNum) < 0) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("device %s iommu_group symlink %s has "
                         "invalid group number %s"),
                       devName, groupPath, groupNumStr);
2375
        return -1;
L
Laine Stump 已提交
2376 2377
    }

2378
    return groupNum;
L
Laine Stump 已提交
2379 2380 2381
}


2382 2383
/* virPCIDeviceGetIOMMUGroupDev - return the name of the device used
 * to control this PCI device's group (e.g. "/dev/vfio/15")
2384 2385
 */
char *
2386
virPCIDeviceGetIOMMUGroupDev(virPCIDevicePtr dev)
2387
{
2388 2389
    VIR_AUTOFREE(char *) devPath = NULL;
    VIR_AUTOFREE(char *) groupPath = NULL;
2390 2391
    char *groupDev = NULL;

2392
    if (!(devPath = virPCIFile(dev->name, "iommu_group")))
2393
        return NULL;
2394 2395 2396 2397
    if (virFileIsLink(devPath) != 1) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Invalid device %s iommu_group file %s is not a symlink"),
                       dev->name, devPath);
2398
        return NULL;
2399 2400 2401 2402 2403
    }
    if (virFileResolveLink(devPath, &groupPath) < 0) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Unable to resolve device %s iommu_group symlink %s"),
                       dev->name, devPath);
2404
        return NULL;
2405 2406
    }
    if (virAsprintf(&groupDev, "/dev/vfio/%s",
2407
                    last_component(groupPath)) < 0)
2408 2409
        return NULL;

2410 2411 2412
    return groupDev;
}

J
Jiri Denemark 已提交
2413
static int
2414
virPCIDeviceDownstreamLacksACS(virPCIDevicePtr dev)
J
Jiri Denemark 已提交
2415 2416 2417 2418
{
    uint16_t flags;
    uint16_t ctrl;
    unsigned int pos;
2419 2420
    int fd;
    int ret = 0;
2421
    uint16_t device_class;
J
Jiri Denemark 已提交
2422

2423
    if ((fd = virPCIDeviceConfigOpen(dev, true)) < 0)
J
Jiri Denemark 已提交
2424 2425
        return -1;

2426
    if (virPCIDeviceInit(dev, fd) < 0) {
2427 2428 2429 2430
        ret = -1;
        goto cleanup;
    }

2431 2432 2433
    if (virPCIDeviceReadClass(dev, &device_class) < 0)
        goto cleanup;

J
Jiri Denemark 已提交
2434
    pos = dev->pcie_cap_pos;
2435
    if (!pos || device_class != PCI_CLASS_BRIDGE_PCI)
2436
        goto cleanup;
J
Jiri Denemark 已提交
2437

2438
    flags = virPCIDeviceRead16(dev, fd, pos + PCI_EXP_FLAGS);
J
Jiri Denemark 已提交
2439
    if (((flags & PCI_EXP_FLAGS_TYPE) >> 4) != PCI_EXP_TYPE_DOWNSTREAM)
2440
        goto cleanup;
J
Jiri Denemark 已提交
2441

2442
    pos = virPCIDeviceFindExtendedCapabilityOffset(dev, fd, PCI_EXT_CAP_ID_ACS);
J
Jiri Denemark 已提交
2443 2444
    if (!pos) {
        VIR_DEBUG("%s %s: downstream port lacks ACS", dev->id, dev->name);
2445 2446
        ret = 1;
        goto cleanup;
J
Jiri Denemark 已提交
2447 2448
    }

2449
    ctrl = virPCIDeviceRead16(dev, fd, pos + PCI_EXT_ACS_CTRL);
J
Jiri Denemark 已提交
2450 2451 2452
    if ((ctrl & PCI_EXT_CAP_ACS_ENABLED) != PCI_EXT_CAP_ACS_ENABLED) {
        VIR_DEBUG("%s %s: downstream port has ACS disabled",
                  dev->id, dev->name);
2453 2454
        ret = 1;
        goto cleanup;
J
Jiri Denemark 已提交
2455 2456
    }

2457
 cleanup:
2458
    virPCIDeviceConfigClose(dev, fd);
2459
    return ret;
J
Jiri Denemark 已提交
2460 2461 2462
}

static int
2463
virPCIDeviceIsBehindSwitchLackingACS(virPCIDevicePtr dev)
J
Jiri Denemark 已提交
2464
{
2465
    VIR_AUTOPTR(virPCIDevice) parent = NULL;
J
Jiri Denemark 已提交
2466

2467
    if (virPCIDeviceGetParent(dev, &parent) < 0)
2468
        return -1;
2469 2470 2471 2472 2473
    if (!parent) {
        /* if we have no parent, and this is the root bus, ACS doesn't come
         * into play since devices on the root bus can't P2P without going
         * through the root IOMMU.
         */
2474
        if (dev->address.bus == 0) {
2475
            return 0;
2476
        } else {
2477
            virReportError(VIR_ERR_INTERNAL_ERROR,
2478 2479 2480 2481
                           _("Failed to find parent device for %s"),
                           dev->name);
            return -1;
        }
J
Jiri Denemark 已提交
2482 2483 2484 2485 2486 2487 2488
    }

    /* XXX we should rather fail when we can't find device's parent and
     * stop the loop when we get to root instead of just stopping when no
     * parent can be found
     */
    do {
2489
        VIR_AUTOPTR(virPCIDevice) tmp = NULL;
J
Jiri Denemark 已提交
2490
        int acs;
2491
        int ret;
J
Jiri Denemark 已提交
2492

2493
        acs = virPCIDeviceDownstreamLacksACS(parent);
J
Jiri Denemark 已提交
2494 2495 2496 2497 2498 2499 2500 2501 2502

        if (acs) {
            if (acs < 0)
                return -1;
            else
                return 1;
        }

        tmp = parent;
2503
        ret = virPCIDeviceGetParent(parent, &parent);
2504 2505
        if (ret < 0)
            return -1;
J
Jiri Denemark 已提交
2506 2507 2508 2509 2510
    } while (parent);

    return 0;
}

2511 2512
int virPCIDeviceIsAssignable(virPCIDevicePtr dev,
                             int strict_acs_check)
J
Jiri Denemark 已提交
2513 2514 2515 2516 2517 2518 2519 2520
{
    int ret;

    /* XXX This could be a great place to actually check that a non-managed
     * device isn't in use, e.g. by checking that device is either un-bound
     * or bound to a stub driver.
     */

2521
    ret = virPCIDeviceIsBehindSwitchLackingACS(dev);
J
Jiri Denemark 已提交
2522 2523 2524 2525 2526 2527 2528 2529
    if (ret < 0)
        return 0;

    if (ret) {
        if (!strict_acs_check) {
            VIR_DEBUG("%s %s: strict ACS check disabled; device assignment allowed",
                      dev->id, dev->name);
        } else {
2530
            virReportError(VIR_ERR_INTERNAL_ERROR,
J
Jiri Denemark 已提交
2531 2532 2533 2534 2535 2536 2537 2538 2539
                           _("Device %s is behind a switch lacking ACS and "
                             "cannot be assigned"),
                           dev->name);
            return 0;
        }
    }

    return 1;
}
2540 2541 2542 2543 2544 2545 2546 2547 2548 2549

static int
logStrToLong_ui(char const *s,
                char **end_ptr,
                int base,
                unsigned int *result)
{
    int ret = 0;

    ret = virStrToLong_ui(s, end_ptr, base, result);
2550
    if (ret != 0)
2551 2552 2553 2554
        VIR_ERROR(_("Failed to convert '%s' to unsigned int"), s);
    return ret;
}

2555 2556
int
virPCIDeviceAddressParse(char *address,
2557
                         virPCIDeviceAddressPtr bdf)
2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583
{
    char *p = NULL;
    int ret = -1;

    if ((address == NULL) || (logStrToLong_ui(address, &p, 16,
                                              &bdf->domain) == -1)) {
        goto out;
    }

    if ((p == NULL) || (logStrToLong_ui(p+1, &p, 16,
                                        &bdf->bus) == -1)) {
        goto out;
    }

    if ((p == NULL) || (logStrToLong_ui(p+1, &p, 16,
                                        &bdf->slot) == -1)) {
        goto out;
    }

    if ((p == NULL) || (logStrToLong_ui(p+1, &p, 16,
                                        &bdf->function) == -1)) {
        goto out;
    }

    ret = 0;

2584
 out:
2585 2586 2587
    return ret;
}

2588

2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613
bool
virZPCIDeviceAddressIsValid(virZPCIDeviceAddressPtr zpci)
{
    /* We don't need to check fid because fid covers
     * all range of uint32 type.
     */
    if (zpci->uid > VIR_DOMAIN_DEVICE_ZPCI_MAX_UID ||
        zpci->uid == 0) {
        virReportError(VIR_ERR_XML_ERROR,
                       _("Invalid PCI address uid='0x%.4x', "
                         "must be > 0x0000 and <= 0x%.4x"),
                       zpci->uid,
                       VIR_DOMAIN_DEVICE_ZPCI_MAX_UID);
        return false;
    }

    return true;
}

bool
virZPCIDeviceAddressIsEmpty(const virZPCIDeviceAddress *addr)
{
    return !(addr->uid || addr->fid);
}

2614
#ifdef __linux__
2615

2616
virPCIDeviceAddressPtr
2617
virPCIGetDeviceAddressFromSysfsLink(const char *device_link)
2618
{
2619
    virPCIDeviceAddressPtr bdf = NULL;
2620
    char *config_address = NULL;
2621
    VIR_AUTOFREE(char *) device_path = NULL;
2622 2623

    if (!virFileExists(device_link)) {
2624
        VIR_DEBUG("'%s' does not exist", device_link);
2625
        return NULL;
2626 2627
    }

2628
    device_path = virFileCanonicalizePath(device_link);
2629
    if (device_path == NULL) {
2630 2631 2632
        virReportSystemError(errno,
                             _("Failed to resolve device link '%s'"),
                             device_link);
2633
        return NULL;
2634 2635
    }

2636
    config_address = last_component(device_path);
2637
    if (VIR_ALLOC(bdf) < 0)
2638
        return NULL;
2639

2640
    if (virPCIDeviceAddressParse(config_address, bdf) < 0) {
2641
        virReportError(VIR_ERR_INTERNAL_ERROR,
2642 2643
                       _("Failed to parse PCI config address '%s'"),
                       config_address);
2644
        VIR_FREE(bdf);
2645
        return NULL;
2646 2647
    }

2648
    return bdf;
2649 2650
}

2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663
/**
 * virPCIGetPhysicalFunction:
 * @vf_sysfs_path: sysfs path for the virtual function
 * @pf: where to store the physical function's address
 *
 * Given @vf_sysfs_path, this function will store the pointer
 * to a newly-allocated virPCIDeviceAddress in @pf.
 *
 * @pf might be NULL if @vf_sysfs_path does not point to a
 * virtual function. If it's not NULL, then it should be
 * freed by the caller when no longer needed.
 *
 * Returns: >=0 on success, <0 on failure
2664 2665
 */
int
2666
virPCIGetPhysicalFunction(const char *vf_sysfs_path,
2667
                          virPCIDeviceAddressPtr *pf)
2668
{
2669
    VIR_AUTOFREE(char *) device_link = NULL;
2670

2671 2672
    *pf = NULL;

2673 2674
    if (virBuildPath(&device_link, vf_sysfs_path, "physfn") == -1) {
        virReportOOMError();
2675
        return -1;
2676 2677
    }

2678
    if ((*pf = virPCIGetDeviceAddressFromSysfsLink(device_link))) {
2679 2680
        VIR_DEBUG("PF for VF device '%s': " VIR_PCI_DEVICE_ADDRESS_FMT,
                  vf_sysfs_path,
2681 2682
                  (*pf)->domain, (*pf)->bus, (*pf)->slot, (*pf)->function);
    }
2683

2684
    return 0;
2685 2686
}

2687

2688 2689 2690 2691
/*
 * Returns virtual functions of a physical function
 */
int
2692 2693
virPCIGetVirtualFunctions(const char *sysfs_path,
                          virPCIDeviceAddressPtr **virtual_functions,
2694 2695
                          size_t *num_virtual_functions,
                          unsigned int *max_virtual_functions)
2696 2697
{
    int ret = -1;
2698
    size_t i;
2699 2700
    VIR_AUTOFREE(char *) totalvfs_file = NULL;
    VIR_AUTOFREE(char *) totalvfs_str = NULL;
2701
    virPCIDeviceAddressPtr config_addr = NULL;
2702

2703 2704
    *virtual_functions = NULL;
    *num_virtual_functions = 0;
2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720
    *max_virtual_functions = 0;

    if (virAsprintf(&totalvfs_file, "%s/sriov_totalvfs", sysfs_path) < 0)
       goto error;
    if (virFileExists(totalvfs_file)) {
        char *end = NULL; /* so that terminating \n doesn't create error */

        if (virFileReadAll(totalvfs_file, 16, &totalvfs_str) < 0)
            goto error;
        if (virStrToLong_ui(totalvfs_str, &end, 10, max_virtual_functions) < 0) {
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("Unrecognized value in %s: %s"),
                           totalvfs_file, totalvfs_str);
            goto error;
        }
    }
2721

2722
    do {
2723
        VIR_AUTOFREE(char *) device_link = NULL;
2724 2725 2726
        /* look for virtfn%d links until one isn't found */
        if (virAsprintf(&device_link, "%s/virtfn%zu", sysfs_path, *num_virtual_functions) < 0)
            goto error;
2727

2728 2729
        if (!virFileExists(device_link))
            break;
2730

2731
        if (!(config_addr = virPCIGetDeviceAddressFromSysfsLink(device_link))) {
2732 2733 2734 2735 2736
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("Failed to get SRIOV function from device link '%s'"),
                           device_link);
            goto error;
        }
2737

2738 2739
        if (VIR_APPEND_ELEMENT(*virtual_functions, *num_virtual_functions,
                               config_addr) < 0)
2740 2741
            goto error;
    } while (1);
2742

2743 2744
    VIR_DEBUG("Found %zu virtual functions for %s",
              *num_virtual_functions, sysfs_path);
2745
    ret = 0;
2746
 cleanup:
2747
    VIR_FREE(config_addr);
2748
    return ret;
2749

2750
 error:
2751 2752 2753
    for (i = 0; i < *num_virtual_functions; i++)
        VIR_FREE((*virtual_functions)[i]);
    VIR_FREE(*virtual_functions);
2754
    *num_virtual_functions = 0;
2755
    goto cleanup;
2756
}
2757

2758

2759 2760 2761 2762
/*
 * Returns 1 if vf device is a virtual function, 0 if not, -1 on error
 */
int
2763
virPCIIsVirtualFunction(const char *vf_sysfs_device_link)
2764
{
2765
    VIR_AUTOFREE(char *) vf_sysfs_physfn_link = NULL;
2766 2767

    if (virAsprintf(&vf_sysfs_physfn_link, "%s/physfn",
2768
                    vf_sysfs_device_link) < 0)
2769
        return -1;
2770

2771
    return virFileExists(vf_sysfs_physfn_link);
2772 2773 2774 2775 2776 2777
}

/*
 * Returns the sriov virtual function index of vf given its pf
 */
int
2778 2779 2780
virPCIGetVirtualFunctionIndex(const char *pf_sysfs_device_link,
                              const char *vf_sysfs_device_link,
                              int *vf_index)
2781
{
2782 2783
    int ret = -1;
    size_t i;
2784
    size_t num_virt_fns = 0;
2785
    unsigned int max_virt_fns = 0;
2786 2787
    virPCIDeviceAddressPtr vf_bdf = NULL;
    virPCIDeviceAddressPtr *virt_fns = NULL;
2788

2789
    if (!(vf_bdf = virPCIGetDeviceAddressFromSysfsLink(vf_sysfs_device_link)))
2790 2791
        return ret;

2792
    if (virPCIGetVirtualFunctions(pf_sysfs_device_link, &virt_fns,
2793
                                  &num_virt_fns, &max_virt_fns) < 0) {
2794
        virReportError(VIR_ERR_INTERNAL_ERROR,
2795
                       _("Error getting physical function's '%s' "
2796
                         "virtual_functions"), pf_sysfs_device_link);
2797 2798 2799 2800
        goto out;
    }

    for (i = 0; i < num_virt_fns; i++) {
2801
        if (virPCIDeviceAddressEqual(vf_bdf, virt_fns[i])) {
2802 2803 2804 2805
            *vf_index = i;
            ret = 0;
            break;
        }
2806 2807
    }

2808
 out:
2809 2810 2811

    /* free virtual functions */
    for (i = 0; i < num_virt_fns; i++)
2812
        VIR_FREE(virt_fns[i]);
2813

A
ajia@redhat.com 已提交
2814
    VIR_FREE(virt_fns);
2815 2816 2817 2818 2819
    VIR_FREE(vf_bdf);

    return ret;
}

2820 2821 2822 2823 2824
/*
 * Returns a path to the PCI sysfs file given the BDF of the PCI function
 */

int
2825
virPCIGetSysfsFile(char *virPCIDeviceName, char **pci_sysfs_device_link)
2826
{
2827 2828 2829 2830
    if (virAsprintf(pci_sysfs_device_link, PCI_SYSFS "devices/%s",
                    virPCIDeviceName) < 0)
        return -1;
    return 0;
2831 2832
}

R
Roopa Prabhu 已提交
2833
int
2834
virPCIDeviceAddressGetSysfsFile(virPCIDeviceAddressPtr addr,
2835
                                char **pci_sysfs_device_link)
R
Roopa Prabhu 已提交
2836
{
2837
    if (virAsprintf(pci_sysfs_device_link,
2838
                    PCI_SYSFS "devices/" VIR_PCI_DEVICE_ADDRESS_FMT,
2839 2840
                    addr->domain, addr->bus,
                    addr->slot, addr->function) < 0)
2841 2842
        return -1;
    return 0;
R
Roopa Prabhu 已提交
2843 2844
}

2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855
/**
 * virPCIGetNetName:
 * @device_link_sysfs_path: sysfs path to the PCI device
 * @idx: used to choose which netdev when there are several
 *       (ignored if physPortID is set)
 * @physPortID: match this string in the netdev's phys_port_id
 *       (or NULL to ignore and use idx instead)
 * @netname: used to return the name of the netdev
 *       (set to NULL (but returns success) if there is no netdev)
 *
 * Returns 0 on success, -1 on error (error has been logged)
2856 2857
 */
int
2858 2859 2860 2861
virPCIGetNetName(const char *device_link_sysfs_path,
                 size_t idx,
                 char *physPortID,
                 char **netname)
2862
{
2863 2864 2865
    VIR_AUTOFREE(char *) pcidev_sysfs_net_path = NULL;
    VIR_AUTOFREE(char *) firstEntryName = NULL;
    VIR_AUTOFREE(char *) thisPhysPortID = NULL;
2866 2867 2868
    int ret = -1;
    DIR *dir = NULL;
    struct dirent *entry = NULL;
2869
    size_t i = 0;
2870

2871 2872
    *netname = NULL;

2873 2874 2875 2876 2877 2878
    if (virBuildPath(&pcidev_sysfs_net_path, device_link_sysfs_path,
                     "net") == -1) {
        virReportOOMError();
        return -1;
    }

2879 2880 2881
    if (virDirOpenQuiet(&dir, pcidev_sysfs_net_path) < 0) {
        /* this *isn't* an error - caller needs to check for netname == NULL */
        ret = 0;
2882
        goto cleanup;
2883
    }
2884

E
Eric Blake 已提交
2885
    while (virDirRead(dir, &entry, pcidev_sysfs_net_path) > 0) {
2886 2887 2888 2889 2890 2891 2892 2893 2894 2895
        /* if the caller sent a physPortID, compare it to the
         * physportID of this netdev. If not, look for entry[idx].
         */
        if (physPortID) {
            if (virNetDevGetPhysPortID(entry->d_name, &thisPhysPortID) < 0)
                goto cleanup;

            /* if this one doesn't match, keep looking */
            if (STRNEQ_NULLABLE(physPortID, thisPhysPortID)) {
                VIR_FREE(thisPhysPortID);
2896 2897 2898 2899 2900 2901 2902 2903 2904 2905
                /* save the first entry we find to use as a failsafe
                 * in case we don't match the phys_port_id. This is
                 * needed because some NIC drivers (e.g. i40e)
                 * implement phys_port_id for PFs, but not for VFs
                 */
                if (!firstEntryName &&
                    VIR_STRDUP(firstEntryName, entry->d_name) < 0) {
                    goto cleanup;
                }

2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916
                continue;
            }
        } else {
            if (i++ < idx)
                continue;
        }

        if (VIR_STRDUP(*netname, entry->d_name) < 0)
            goto cleanup;

        ret = 0;
2917 2918 2919
        break;
    }

2920 2921
    if (ret < 0) {
        if (physPortID) {
2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936
            if (firstEntryName) {
                /* we didn't match the provided phys_port_id, but this
                 * is probably because phys_port_id isn't implemented
                 * for this NIC driver, so just return the first
                 * (probably only) netname we found.
                 */
                *netname = firstEntryName;
                firstEntryName = NULL;
                ret = 0;
            } else {
                virReportError(VIR_ERR_INTERNAL_ERROR,
                               _("Could not find network device with "
                                 "phys_port_id '%s' under PCI device at %s"),
                               physPortID, device_link_sysfs_path);
            }
2937 2938 2939 2940 2941
        } else {
            ret = 0; /* no netdev at the given index is *not* an error */
        }
    }
 cleanup:
J
Ján Tomko 已提交
2942
    VIR_DIR_CLOSE(dir);
2943
    return ret;
2944
}
R
Roopa Prabhu 已提交
2945 2946

int
2947
virPCIGetVirtualFunctionInfo(const char *vf_sysfs_device_path,
2948 2949 2950
                             int pfNetDevIdx,
                             char **pfname,
                             int *vf_index)
R
Roopa Prabhu 已提交
2951
{
2952
    virPCIDeviceAddressPtr pf_config_address = NULL;
2953 2954 2955
    VIR_AUTOFREE(char *) pf_sysfs_device_path = NULL;
    VIR_AUTOFREE(char *) vfname = NULL;
    VIR_AUTOFREE(char *) vfPhysPortID = NULL;
R
Roopa Prabhu 已提交
2956 2957
    int ret = -1;

2958
    if (virPCIGetPhysicalFunction(vf_sysfs_device_path, &pf_config_address) < 0)
2959
        goto cleanup;
R
Roopa Prabhu 已提交
2960

2961
    if (!pf_config_address)
2962
        goto cleanup;
2963

2964 2965
    if (virPCIDeviceAddressGetSysfsFile(pf_config_address,
                                        &pf_sysfs_device_path) < 0) {
2966 2967
        goto cleanup;
    }
R
Roopa Prabhu 已提交
2968

2969 2970 2971
    if (virPCIGetVirtualFunctionIndex(pf_sysfs_device_path,
                                      vf_sysfs_device_path, vf_index) < 0) {
        goto cleanup;
R
Roopa Prabhu 已提交
2972 2973
    }

2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993
    /* If the caller hasn't asked for a specific pfNetDevIdx, and VF
     * is bound to a netdev, learn that netdev's phys_port_id (if
     * available). This can be used to disambiguate when the PF has
     * multiple netdevs. If the VF isn't bound to a netdev, then we
     * return netdev[pfNetDevIdx] on the PF, which may or may not be
     * correct.
     */
    if (pfNetDevIdx == -1) {
        if (virPCIGetNetName(vf_sysfs_device_path, 0, NULL, &vfname) < 0)
            goto cleanup;

        if (vfname) {
            if (virNetDevGetPhysPortID(vfname, &vfPhysPortID) < 0)
                goto cleanup;
        }
        pfNetDevIdx = 0;
    }

    if (virPCIGetNetName(pf_sysfs_device_path,
                         pfNetDevIdx, vfPhysPortID, pfname) < 0) {
R
Roopa Prabhu 已提交
2994
        goto cleanup;
2995
    }
R
Roopa Prabhu 已提交
2996

2997 2998 2999 3000 3001 3002 3003 3004 3005
    if (!*pfname) {
        /* this shouldn't be possible. A VF can't exist unless its
         * PF device is bound to a network driver
         */
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("The PF device for VF %s has no network device name"),
                       vf_sysfs_device_path);
        goto cleanup;
    }
R
Roopa Prabhu 已提交
3006

3007
    ret = 0;
3008
 cleanup:
R
Roopa Prabhu 已提交
3009 3010 3011 3012 3013
    VIR_FREE(pf_config_address);

    return ret;
}

3014 3015 3016 3017 3018 3019 3020 3021 3022

ssize_t
virPCIGetMdevTypes(const char *sysfspath,
                   virMediatedDeviceTypePtr **types)
{
    ssize_t ret = -1;
    int dirret = -1;
    DIR *dir = NULL;
    struct dirent *entry;
3023
    VIR_AUTOFREE(char *) types_path = NULL;
3024
    VIR_AUTOPTR(virMediatedDeviceType) mdev_type = NULL;
3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040
    virMediatedDeviceTypePtr *mdev_types = NULL;
    size_t ntypes = 0;
    size_t i;

    if (virAsprintf(&types_path, "%s/mdev_supported_types", sysfspath) < 0)
        return -1;

    if ((dirret = virDirOpenIfExists(&dir, types_path)) < 0)
        goto cleanup;

    if (dirret == 0) {
        ret = 0;
        goto cleanup;
    }

    while ((dirret = virDirRead(dir, &entry, types_path)) > 0) {
3041
        VIR_AUTOFREE(char *) tmppath = NULL;
3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066
        /* append the type id to the path and read the attributes from there */
        if (virAsprintf(&tmppath, "%s/%s", types_path, entry->d_name) < 0)
            goto cleanup;

        if (virMediatedDeviceTypeReadAttrs(tmppath, &mdev_type) < 0)
            goto cleanup;

        if (VIR_APPEND_ELEMENT(mdev_types, ntypes, mdev_type) < 0)
            goto cleanup;
    }

    if (dirret < 0)
        goto cleanup;

    VIR_STEAL_PTR(*types, mdev_types);
    ret = ntypes;
    ntypes = 0;
 cleanup:
    for (i = 0; i < ntypes; i++)
        virMediatedDeviceTypeFree(mdev_types[i]);
    VIR_FREE(mdev_types);
    VIR_DIR_CLOSE(dir);
    return ret;
}

3067
#else
3068 3069
static const char *unsupported = N_("not supported on non-linux platforms");

3070 3071 3072 3073
virPCIDeviceAddressPtr
virPCIGetDeviceAddressFromSysfsLink(const char *device_link ATTRIBUTE_UNUSED)
{
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
3074
    return NULL;
3075 3076 3077
}


3078
int
3079
virPCIGetPhysicalFunction(const char *vf_sysfs_path ATTRIBUTE_UNUSED,
3080
                          virPCIDeviceAddressPtr *pf ATTRIBUTE_UNUSED)
3081
{
3082
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
3083 3084 3085 3086
    return -1;
}

int
3087 3088
virPCIGetVirtualFunctions(const char *sysfs_path ATTRIBUTE_UNUSED,
                          virPCIDeviceAddressPtr **virtual_functions ATTRIBUTE_UNUSED,
3089 3090
                          size_t *num_virtual_functions ATTRIBUTE_UNUSED,
                          unsigned int *max_virtual_functions ATTRIBUTE_UNUSED)
3091
{
3092
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
3093 3094
    return -1;
}
3095 3096

int
E
Eric Blake 已提交
3097
virPCIIsVirtualFunction(const char *vf_sysfs_device_link ATTRIBUTE_UNUSED)
3098
{
3099
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
3100 3101 3102 3103
    return -1;
}

int
3104 3105 3106
virPCIGetVirtualFunctionIndex(const char *pf_sysfs_device_link ATTRIBUTE_UNUSED,
                              const char *vf_sysfs_device_link ATTRIBUTE_UNUSED,
                              int *vf_index ATTRIBUTE_UNUSED)
3107
{
3108
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
3109 3110 3111 3112
    return -1;

}

3113 3114 3115 3116 3117 3118 3119 3120
int
virPCIGetSysfsFile(char *virPCIDeviceName ATTRIBUTE_UNUSED,
                   char **pci_sysfs_device_link ATTRIBUTE_UNUSED)
{
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
    return -1;
}

3121
int
3122 3123
virPCIDeviceAddressGetSysfsFile(virPCIDeviceAddressPtr dev ATTRIBUTE_UNUSED,
                                char **pci_sysfs_device_link ATTRIBUTE_UNUSED)
3124
{
3125
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
3126 3127 3128
    return -1;
}

3129
int
3130
virPCIGetNetName(const char *device_link_sysfs_path ATTRIBUTE_UNUSED,
3131 3132
                 size_t idx ATTRIBUTE_UNUSED,
                 char *physPortID ATTRIBUTE_UNUSED,
3133
                 char **netname ATTRIBUTE_UNUSED)
3134
{
3135
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
3136 3137
    return -1;
}
R
Roopa Prabhu 已提交
3138 3139

int
3140
virPCIGetVirtualFunctionInfo(const char *vf_sysfs_device_path ATTRIBUTE_UNUSED,
3141
                             int pfNetDevIdx ATTRIBUTE_UNUSED,
3142 3143
                             char **pfname ATTRIBUTE_UNUSED,
                             int *vf_index ATTRIBUTE_UNUSED)
R
Roopa Prabhu 已提交
3144
{
3145
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
R
Roopa Prabhu 已提交
3146 3147
    return -1;
}
3148 3149 3150 3151 3152 3153 3154 3155 3156


ssize_t
virPCIGetMdevTypes(const char *sysfspath ATTRIBUTE_UNUSED,
                   virMediatedDeviceTypePtr **types ATTRIBUTE_UNUSED)
{
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
    return -1;
}
3157
#endif /* __linux__ */
3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241

int
virPCIDeviceIsPCIExpress(virPCIDevicePtr dev)
{
    int fd;
    int ret = -1;

    if ((fd = virPCIDeviceConfigOpen(dev, true)) < 0)
        return ret;

    if (virPCIDeviceInit(dev, fd) < 0)
        goto cleanup;

    ret = dev->pcie_cap_pos != 0;

 cleanup:
    virPCIDeviceConfigClose(dev, fd);
    return ret;
}

int
virPCIDeviceHasPCIExpressLink(virPCIDevicePtr dev)
{
    int fd;
    int ret = -1;
    uint16_t cap, type;

    if ((fd = virPCIDeviceConfigOpen(dev, true)) < 0)
        return ret;

    if (virPCIDeviceInit(dev, fd) < 0)
        goto cleanup;

    cap = virPCIDeviceRead16(dev, fd, dev->pcie_cap_pos + PCI_CAP_FLAGS);
    type = (cap & PCI_EXP_FLAGS_TYPE) >> 4;

    ret = type != PCI_EXP_TYPE_ROOT_INT_EP && type != PCI_EXP_TYPE_ROOT_EC;

 cleanup:
    virPCIDeviceConfigClose(dev, fd);
    return ret;
}

int
virPCIDeviceGetLinkCapSta(virPCIDevicePtr dev,
                          int *cap_port,
                          unsigned int *cap_speed,
                          unsigned int *cap_width,
                          unsigned int *sta_speed,
                          unsigned int *sta_width)
{
    uint32_t t;
    int fd;
    int ret = -1;

    if ((fd = virPCIDeviceConfigOpen(dev, true)) < 0)
        return ret;

    if (virPCIDeviceInit(dev, fd) < 0)
        goto cleanup;

    if (!dev->pcie_cap_pos) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("pci device %s is not a PCI-Express device"),
                       dev->name);
        goto cleanup;
    }

    t = virPCIDeviceRead32(dev, fd, dev->pcie_cap_pos + PCI_EXP_LNKCAP);

    *cap_port = t >> 24;
    *cap_speed = t & PCI_EXP_LNKCAP_SPEED;
    *cap_width = (t & PCI_EXP_LNKCAP_WIDTH) >> 4;

    t = virPCIDeviceRead16(dev, fd, dev->pcie_cap_pos + PCI_EXP_LNKSTA);

    *sta_speed = t & PCI_EXP_LNKSTA_SPEED;
    *sta_width = (t & PCI_EXP_LNKSTA_WIDTH) >> 4;
    ret = 0;

 cleanup:
    virPCIDeviceConfigClose(dev, fd);
    return ret;
}
3242 3243


3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260
int virPCIGetHeaderType(virPCIDevicePtr dev, int *hdrType)
{
    int fd;
    uint8_t type;

    *hdrType = -1;

    if ((fd = virPCIDeviceConfigOpen(dev, true)) < 0)
        return -1;

    type = virPCIDeviceRead8(dev, fd, PCI_HEADER_TYPE);

    virPCIDeviceConfigClose(dev, fd);

    type &= PCI_HEADER_TYPE_MASK;
    if (type >= VIR_PCI_HEADER_LAST) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
3261 3262
                       _("Unknown PCI header type '%d' for device '%s'"),
                       type, dev->name);
3263 3264 3265 3266 3267 3268 3269 3270 3271
        return -1;
    }

    *hdrType = type;

    return 0;
}


3272 3273 3274 3275 3276 3277 3278 3279 3280 3281
void
virPCIEDeviceInfoFree(virPCIEDeviceInfoPtr dev)
{
    if (!dev)
        return;

    VIR_FREE(dev->link_cap);
    VIR_FREE(dev->link_sta);
    VIR_FREE(dev);
}
3282 3283 3284 3285 3286 3287

void
virPCIDeviceAddressFree(virPCIDeviceAddressPtr address)
{
    VIR_FREE(address);
}