virpci.c 92.9 KB
Newer Older
1
/*
2 3
 * virpci.c: helper APIs for managing host PCI devices
 *
4
 * Copyright (C) 2009-2015 Red Hat, Inc.
5 6 7 8 9 10 11 12 13 14 15 16
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library.  If not, see
O
Osier Yang 已提交
18
 * <http://www.gnu.org/licenses/>.
19 20 21 22
 */

#include <config.h>

23
#include "virpci.h"
24
#include "virnetdev.h"
25 26 27 28 29 30 31 32

#include <dirent.h>
#include <fcntl.h>
#include <inttypes.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>

33
#include "dirname.h"
34
#include "virlog.h"
35
#include "vircommand.h"
36
#include "virerror.h"
E
Eric Blake 已提交
37
#include "virfile.h"
38
#include "virkmod.h"
39 40
#include "virstring.h"
#include "virutil.h"
41
#include "viralloc.h"
42

43 44
VIR_LOG_INIT("util.pci");

45 46 47 48
#define PCI_SYSFS "/sys/bus/pci/"
#define PCI_ID_LEN 10   /* "XXXX XXXX" */
#define PCI_ADDR_LEN 13 /* "XXXX:XX:XX.X" */

49
VIR_ENUM_IMPL(virPCIELinkSpeed, VIR_PCIE_LINK_SPEED_LAST,
50 51
              "", "2.5", "5", "8", "16",
);
52

53 54 55 56 57
VIR_ENUM_IMPL(virPCIStubDriver, VIR_PCI_STUB_DRIVER_LAST,
              "none",
              "pciback", /* XEN */
              "pci-stub", /* KVM */
              "vfio-pci", /* VFIO */
58
);
59

60 61 62 63
VIR_ENUM_IMPL(virPCIHeader, VIR_PCI_HEADER_LAST,
              "endpoint",
              "pci-bridge",
              "cardbus-bridge",
64
);
65

66
struct _virPCIDevice {
67
    virPCIDeviceAddress address;
68 69 70

    char          name[PCI_ADDR_LEN]; /* domain:bus:slot.function */
    char          id[PCI_ID_LEN];     /* product vendor */
E
Eric Blake 已提交
71
    char          *path;
C
Chunyan Liu 已提交
72 73 74 75

    /* The driver:domain which uses the device */
    char          *used_by_drvname;
    char          *used_by_domname;
76

77 78
    unsigned int  pcie_cap_pos;
    unsigned int  pci_pm_cap_pos;
79 80
    bool          has_flr;
    bool          has_pm_reset;
81
    bool          managed;
82 83

    virPCIStubDriver stubDriver;
84 85

    /* used by reattach function */
86 87 88
    bool          unbind_from_stub;
    bool          remove_slot;
    bool          reprobe;
89 90
};

91
struct _virPCIDeviceList {
92 93
    virObjectLockable parent;

94
    size_t count;
95
    virPCIDevicePtr *devs;
96 97 98
};


99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115
/* For virReportOOMError()  and virReportSystemError() */
#define VIR_FROM_THIS VIR_FROM_NONE

/* Specifications referenced in comments:
 *  PCI30  - PCI Local Bus Specification 3.0
 *  PCIe20 - PCI Express Base Specification 2.0
 *  BR12   - PCI-to-PCI Bridge Architecture Specification 1.2
 *  PM12   - PCI Bus Power Management Interface Specification 1.2
 *  ECN_AF - Advanced Capabilities for Conventional PCI ECN
 */

/* Type 0 config space header length; PCI30 Section 6.1 Configuration Space Organization */
#define PCI_CONF_LEN            0x100
#define PCI_CONF_HEADER_LEN     0x40

/* PCI30 6.2.1 */
#define PCI_HEADER_TYPE         0x0e    /* Header type */
116 117 118
#define PCI_HEADER_TYPE_BRIDGE 0x1
#define PCI_HEADER_TYPE_MASK   0x7f
#define PCI_HEADER_TYPE_MULTI  0x80
119 120 121 122 123 124 125 126 127

/* PCI30 6.2.1  Device Identification */
#define PCI_CLASS_DEVICE        0x0a    /* Device class */

/* Class Code for bridge; PCI30 D.7  Base Class 06h */
#define PCI_CLASS_BRIDGE_PCI    0x0604

/* PCI30 6.2.3  Device Status */
#define PCI_STATUS              0x06    /* 16 bits */
128
#define PCI_STATUS_CAP_LIST    0x10    /* Support Capability List */
129 130 131

/* PCI30 6.7  Capabilities List */
#define PCI_CAPABILITY_LIST     0x34    /* Offset of first capability list entry */
132
#define PCI_CAP_FLAGS           2       /* Capability defined flags (16 bits) */
133 134 135 136 137 138 139 140 141 142

/* PM12 3.2.1  Capability Identifier */
#define PCI_CAP_ID_PM           0x01    /* Power Management */
/* PCI30 H Capability IDs */
#define PCI_CAP_ID_EXP          0x10    /* PCI Express */
/* ECN_AF 6.x.1.1  Capability ID for AF */
#define PCI_CAP_ID_AF           0x13    /* Advanced Features */

/* PCIe20 7.8.3  Device Capabilities Register (Offset 04h) */
#define PCI_EXP_DEVCAP          0x4     /* Device capabilities */
143 144
#define PCI_EXP_DEVCAP_FLR     (1<<28)  /* Function Level Reset */
#define PCI_EXP_LNKCAP          0xc     /* Link Capabilities */
145
#define PCI_EXP_LNKCAP_SPEED    0x0000f /* Maximum Link Speed */
146 147 148 149
#define PCI_EXP_LNKCAP_WIDTH    0x003f0 /* Maximum Link Width */
#define PCI_EXP_LNKSTA          0x12    /* Link Status */
#define PCI_EXP_LNKSTA_SPEED    0x000f  /* Negotiated Link Speed */
#define PCI_EXP_LNKSTA_WIDTH    0x03f0  /* Negotiated Link Width */
150 151 152 153 154 155 156

/* Header type 1 BR12 3.2 PCI-to-PCI Bridge Configuration Space Header Format */
#define PCI_PRIMARY_BUS         0x18    /* BR12 3.2.5.2 Primary bus number */
#define PCI_SECONDARY_BUS       0x19    /* BR12 3.2.5.3 Secondary bus number */
#define PCI_SUBORDINATE_BUS     0x1a    /* BR12 3.2.5.4 Highest bus number behind the bridge */
#define PCI_BRIDGE_CONTROL      0x3e
/* BR12 3.2.5.18  Bridge Control Register */
157
#define PCI_BRIDGE_CTL_RESET   0x40    /* Secondary bus reset */
158 159 160

/* PM12 3.2.4  Power Management Control/Status (Offset = 4) */
#define PCI_PM_CTRL                4    /* PM control and status register */
161 162 163 164
#define PCI_PM_CTRL_STATE_MASK    0x3  /* Current power state (D0 to D3) */
#define PCI_PM_CTRL_STATE_D0      0x0  /* D0 state */
#define PCI_PM_CTRL_STATE_D3hot   0x3  /* D3 state */
#define PCI_PM_CTRL_NO_SOFT_RESET 0x8  /* No reset for D3hot->D0 */
165 166 167

/* ECN_AF 6.x.1  Advanced Features Capability Structure */
#define PCI_AF_CAP              0x3     /* Advanced features capabilities */
168
#define PCI_AF_CAP_FLR         0x2     /* Function Level Reset */
169

J
Jiri Denemark 已提交
170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186
#define PCI_EXP_FLAGS           0x2
#define PCI_EXP_FLAGS_TYPE      0x00f0
#define PCI_EXP_TYPE_DOWNSTREAM 0x6

#define PCI_EXT_CAP_BASE          0x100
#define PCI_EXT_CAP_LIMIT         0x1000
#define PCI_EXT_CAP_ID_MASK       0x0000ffff
#define PCI_EXT_CAP_OFFSET_SHIFT  20
#define PCI_EXT_CAP_OFFSET_MASK   0x00000ffc

#define PCI_EXT_CAP_ID_ACS      0x000d
#define PCI_EXT_ACS_CTRL        0x06

#define PCI_EXT_CAP_ACS_SV      0x01
#define PCI_EXT_CAP_ACS_RR      0x04
#define PCI_EXT_CAP_ACS_CR      0x08
#define PCI_EXT_CAP_ACS_UF      0x10
187 188 189
#define PCI_EXT_CAP_ACS_ENABLED (PCI_EXT_CAP_ACS_SV | \
                                 PCI_EXT_CAP_ACS_RR | \
                                 PCI_EXT_CAP_ACS_CR | \
J
Jiri Denemark 已提交
190 191
                                 PCI_EXT_CAP_ACS_UF)

192 193 194
#define PCI_EXP_TYPE_ROOT_INT_EP 0x9    /* Root Complex Integrated Endpoint */
#define PCI_EXP_TYPE_ROOT_EC 0xa        /* Root Complex Event Collector */

195 196 197 198 199 200
static virClassPtr virPCIDeviceListClass;

static void virPCIDeviceListDispose(void *obj);

static int virPCIOnceInit(void)
{
201
    if (!VIR_CLASS_NEW(virPCIDeviceList, virClassForObjectLockable()))
202 203 204 205 206
        return -1;

    return 0;
}

207
VIR_ONCE_GLOBAL_INIT(virPCI);
208

L
Laine Stump 已提交
209

210 211
static char *
virPCIDriverDir(const char *driver)
L
Laine Stump 已提交
212
{
213
    char *buffer;
L
Laine Stump 已提交
214

215 216
    ignore_value(virAsprintf(&buffer, PCI_SYSFS "drivers/%s", driver));
    return buffer;
L
Laine Stump 已提交
217 218 219
}


220 221
static char *
virPCIDriverFile(const char *driver, const char *file)
L
Laine Stump 已提交
222
{
223
    char *buffer;
L
Laine Stump 已提交
224

225 226
    ignore_value(virAsprintf(&buffer, PCI_SYSFS "drivers/%s/%s", driver, file));
    return buffer;
L
Laine Stump 已提交
227 228 229
}


230 231
static char *
virPCIFile(const char *device, const char *file)
L
Laine Stump 已提交
232
{
233
    char *buffer;
L
Laine Stump 已提交
234

235 236
    ignore_value(virAsprintf(&buffer, PCI_SYSFS "devices/%s/%s", device, file));
    return buffer;
L
Laine Stump 已提交
237 238 239 240 241 242 243 244 245 246
}


/* virPCIDeviceGetDriverPathAndName - put the path to the driver
 * directory of the driver in use for this device in @path and the
 * name of the driver in @name. Both could be NULL if it's not bound
 * to any driver.
 *
 * Return 0 for success, -1 for error.
 */
247
int
L
Laine Stump 已提交
248 249 250
virPCIDeviceGetDriverPathAndName(virPCIDevicePtr dev, char **path, char **name)
{
    int ret = -1;
251
    VIR_AUTOFREE(char *) drvlink = NULL;
L
Laine Stump 已提交
252 253 254

    *path = *name = NULL;
    /* drvlink = "/sys/bus/pci/dddd:bb:ss.ff/driver" */
255
    if (!(drvlink = virPCIFile(dev->name, "driver")))
L
Laine Stump 已提交
256 257
        goto cleanup;

258 259 260 261 262
    if (!virFileExists(drvlink)) {
        ret = 0;
        goto cleanup;
    }

L
Laine Stump 已提交
263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281
    if (virFileIsLink(drvlink) != 1) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Invalid device %s driver file %s is not a symlink"),
                       dev->name, drvlink);
        goto cleanup;
    }
    if (virFileResolveLink(drvlink, path) < 0) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Unable to resolve device %s driver symlink %s"),
                       dev->name, drvlink);
        goto cleanup;
    }
    /* path = "/sys/bus/pci/drivers/${drivername}" */

    if (VIR_STRDUP(*name, last_component(*path)) < 0)
        goto cleanup;
    /* name = "${drivername}" */

    ret = 0;
282
 cleanup:
L
Laine Stump 已提交
283 284 285 286 287 288 289 290
    if (ret < 0) {
        VIR_FREE(*path);
        VIR_FREE(*name);
    }
    return ret;
}


291
static int
292
virPCIDeviceConfigOpen(virPCIDevicePtr dev, bool fatal)
293 294 295 296
{
    int fd;

    fd = open(dev->path, O_RDWR);
297

298
    if (fd < 0) {
299 300 301 302 303 304 305 306 307
        if (fatal) {
            virReportSystemError(errno,
                                 _("Failed to open config space file '%s'"),
                                 dev->path);
        } else {
            char ebuf[1024];
            VIR_WARN("Failed to open config space file '%s': %s",
                     dev->path, virStrerror(errno, ebuf, sizeof(ebuf)));
        }
308 309
        return -1;
    }
310

311
    VIR_DEBUG("%s %s: opened %s", dev->id, dev->name, dev->path);
312
    return fd;
313 314
}

315
static void
316
virPCIDeviceConfigClose(virPCIDevicePtr dev, int cfgfd)
317
{
318 319 320 321 322
    if (VIR_CLOSE(cfgfd) < 0) {
        char ebuf[1024];
        VIR_WARN("Failed to close config space file '%s': %s",
                 dev->path, virStrerror(errno, ebuf, sizeof(ebuf)));
    }
323 324
}

325

326
static int
327 328
virPCIDeviceRead(virPCIDevicePtr dev,
                 int cfgfd,
329
                 unsigned int pos,
330
                 uint8_t *buf,
331
                 unsigned int buflen)
332 333 334
{
    memset(buf, 0, buflen);

335 336
    if (lseek(cfgfd, pos, SEEK_SET) != pos ||
        saferead(cfgfd, buf, buflen) != buflen) {
337
        char ebuf[1024];
338
        VIR_WARN("Failed to read from '%s' : %s", dev->path,
339 340 341 342 343 344 345
                 virStrerror(errno, ebuf, sizeof(ebuf)));
        return -1;
    }
    return 0;
}

static uint8_t
346
virPCIDeviceRead8(virPCIDevicePtr dev, int cfgfd, unsigned int pos)
347 348
{
    uint8_t buf;
349
    virPCIDeviceRead(dev, cfgfd, pos, &buf, sizeof(buf));
350 351 352 353
    return buf;
}

static uint16_t
354
virPCIDeviceRead16(virPCIDevicePtr dev, int cfgfd, unsigned int pos)
355 356
{
    uint8_t buf[2];
357
    virPCIDeviceRead(dev, cfgfd, pos, &buf[0], sizeof(buf));
358 359 360 361
    return (buf[0] << 0) | (buf[1] << 8);
}

static uint32_t
362
virPCIDeviceRead32(virPCIDevicePtr dev, int cfgfd, unsigned int pos)
363 364
{
    uint8_t buf[4];
365
    virPCIDeviceRead(dev, cfgfd, pos, &buf[0], sizeof(buf));
366 367 368
    return (buf[0] << 0) | (buf[1] << 8) | (buf[2] << 16) | (buf[3] << 24);
}

369 370 371
static int
virPCIDeviceReadClass(virPCIDevicePtr dev, uint16_t *device_class)
{
372 373
    VIR_AUTOFREE(char *) path = NULL;
    VIR_AUTOFREE(char *) id_str = NULL;
374 375
    unsigned int value;

376
    if (!(path = virPCIFile(dev->name, "class")))
377
        return -1;
378 379 380

    /* class string is '0xNNNNNN\n' ... i.e. 9 bytes */
    if (virFileReadAll(path, 9, &id_str) < 0)
381
        return -1;
382 383 384 385 386 387

    id_str[8] = '\0';
    if (virStrToLong_ui(id_str, NULL, 16, &value) < 0) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Unusual value in %s/devices/%s/class: %s"),
                       PCI_SYSFS, dev->name, id_str);
388
        return -1;
389 390 391
    }

    *device_class = (value >> 8) & 0xFFFF;
392
    return 0;
393 394
}

395
static int
396 397
virPCIDeviceWrite(virPCIDevicePtr dev,
                  int cfgfd,
398
                  unsigned int pos,
399
                  uint8_t *buf,
400
                  unsigned int buflen)
401
{
402 403
    if (lseek(cfgfd, pos, SEEK_SET) != pos ||
        safewrite(cfgfd, buf, buflen) != buflen) {
404
        char ebuf[1024];
405
        VIR_WARN("Failed to write to '%s' : %s", dev->path,
406 407 408 409 410 411 412
                 virStrerror(errno, ebuf, sizeof(ebuf)));
        return -1;
    }
    return 0;
}

static void
413
virPCIDeviceWrite16(virPCIDevicePtr dev, int cfgfd, unsigned int pos, uint16_t val)
414 415
{
    uint8_t buf[2] = { (val >> 0), (val >> 8) };
416
    virPCIDeviceWrite(dev, cfgfd, pos, &buf[0], sizeof(buf));
417 418 419
}

static void
420
virPCIDeviceWrite32(virPCIDevicePtr dev, int cfgfd, unsigned int pos, uint32_t val)
421
{
422
    uint8_t buf[4] = { (val >> 0), (val >> 8), (val >> 16), (val >> 24) };
423
    virPCIDeviceWrite(dev, cfgfd, pos, &buf[0], sizeof(buf));
424 425
}

E
Eric Blake 已提交
426 427
typedef int (*virPCIDeviceIterPredicate)(virPCIDevicePtr, virPCIDevicePtr,
                                         void *);
428 429 430 431 432 433 434

/* Iterate over available PCI devices calling @predicate
 * to compare each one to @dev.
 * Return -1 on error since we don't want to assume it is
 * safe to reset if there is an error.
 */
static int
435 436 437 438
virPCIDeviceIterDevices(virPCIDeviceIterPredicate predicate,
                        virPCIDevicePtr dev,
                        virPCIDevicePtr *matched,
                        void *data)
439 440 441
{
    DIR *dir;
    struct dirent *entry;
442
    int ret = 0;
443
    int rc;
444 445 446 447 448

    *matched = NULL;

    VIR_DEBUG("%s %s: iterating over " PCI_SYSFS "devices", dev->id, dev->name);

J
Ján Tomko 已提交
449
    if (virDirOpen(&dir, PCI_SYSFS "devices") < 0)
450 451
        return -1;

E
Eric Blake 已提交
452
    while ((ret = virDirRead(dir, &entry, PCI_SYSFS "devices")) > 0) {
453
        unsigned int domain, bus, slot, function;
454
        VIR_AUTOPTR(virPCIDevice) check = NULL;
455
        char *tmp;
456

457 458 459 460 461 462 463 464 465
        /* expected format: <domain>:<bus>:<slot>.<function> */
        if (/* domain */
            virStrToLong_ui(entry->d_name, &tmp, 16, &domain) < 0 || *tmp != ':' ||
            /* bus */
            virStrToLong_ui(tmp + 1, &tmp, 16, &bus) < 0 || *tmp != ':' ||
            /* slot */
            virStrToLong_ui(tmp + 1, &tmp, 16, &slot) < 0 || *tmp != '.' ||
            /* function */
            virStrToLong_ui(tmp + 1, NULL, 16, &function) < 0) {
466 467 468 469
            VIR_WARN("Unusual entry in " PCI_SYSFS "devices: %s", entry->d_name);
            continue;
        }

470
        check = virPCIDeviceNew(domain, bus, slot, function);
471
        if (!check) {
472 473 474
            ret = -1;
            break;
        }
475

476 477 478 479 480
        rc = predicate(dev, check, data);
        if (rc < 0) {
            /* the predicate returned an error, bail */
            ret = -1;
            break;
481
        } else if (rc == 1) {
482
            VIR_DEBUG("%s %s: iter matched on %s", dev->id, dev->name, check->name);
483
            VIR_STEAL_PTR(*matched, check);
484
            ret = 1;
485 486 487
            break;
        }
    }
J
Ján Tomko 已提交
488
    VIR_DIR_CLOSE(dir);
489
    return ret;
490 491 492
}

static uint8_t
493 494 495
virPCIDeviceFindCapabilityOffset(virPCIDevicePtr dev,
                                 int cfgfd,
                                 unsigned int capability)
496 497 498 499
{
    uint16_t status;
    uint8_t pos;

500
    status = virPCIDeviceRead16(dev, cfgfd, PCI_STATUS);
501 502 503
    if (!(status & PCI_STATUS_CAP_LIST))
        return 0;

504
    pos = virPCIDeviceRead8(dev, cfgfd, PCI_CAPABILITY_LIST);
505 506 507 508 509 510 511 512 513

    /* Zero indicates last capability, capabilities can't
     * be in the config space header and 0xff is returned
     * by the kernel if we don't have access to this region
     *
     * Note: we're not handling loops or extended
     * capabilities here.
     */
    while (pos >= PCI_CONF_HEADER_LEN && pos != 0xff) {
514
        uint8_t capid = virPCIDeviceRead8(dev, cfgfd, pos);
515 516 517 518 519 520
        if (capid == capability) {
            VIR_DEBUG("%s %s: found cap 0x%.2x at 0x%.2x",
                      dev->id, dev->name, capability, pos);
            return pos;
        }

521
        pos = virPCIDeviceRead8(dev, cfgfd, pos + 1);
522 523 524 525 526 527 528
    }

    VIR_DEBUG("%s %s: failed to find cap 0x%.2x", dev->id, dev->name, capability);

    return 0;
}

J
Jiri Denemark 已提交
529
static unsigned int
530 531
virPCIDeviceFindExtendedCapabilityOffset(virPCIDevicePtr dev,
                                         int cfgfd,
532
                                         unsigned int capability)
J
Jiri Denemark 已提交
533 534 535 536 537 538 539 540 541 542
{
    int ttl;
    unsigned int pos;
    uint32_t header;

    /* minimum 8 bytes per capability */
    ttl = (PCI_EXT_CAP_LIMIT - PCI_EXT_CAP_BASE) / 8;
    pos = PCI_EXT_CAP_BASE;

    while (ttl > 0 && pos >= PCI_EXT_CAP_BASE) {
543
        header = virPCIDeviceRead32(dev, cfgfd, pos);
J
Jiri Denemark 已提交
544 545 546 547 548 549 550 551 552 553 554

        if ((header & PCI_EXT_CAP_ID_MASK) == capability)
            return pos;

        pos = (header >> PCI_EXT_CAP_OFFSET_SHIFT) & PCI_EXT_CAP_OFFSET_MASK;
        ttl--;
    }

    return 0;
}

555 556 557 558
/* detects whether this device has FLR.  Returns 0 if the device does
 * not have FLR, 1 if it does, and -1 on error
 */
static int
559
virPCIDeviceDetectFunctionLevelReset(virPCIDevicePtr dev, int cfgfd)
560
{
M
Mark McLoughlin 已提交
561
    uint32_t caps;
562
    uint8_t pos;
563
    VIR_AUTOFREE(char *) path = NULL;
564
    int found;
565 566 567 568 569 570 571 572

    /* The PCIe Function Level Reset capability allows
     * individual device functions to be reset without
     * affecting any other functions on the device or
     * any other devices on the bus. This is only common
     * on SR-IOV NICs at the moment.
     */
    if (dev->pcie_cap_pos) {
573
        caps = virPCIDeviceRead32(dev, cfgfd, dev->pcie_cap_pos + PCI_EXP_DEVCAP);
574 575 576 577 578 579 580 581 582 583
        if (caps & PCI_EXP_DEVCAP_FLR) {
            VIR_DEBUG("%s %s: detected PCIe FLR capability", dev->id, dev->name);
            return 1;
        }
    }

    /* The PCI AF Function Level Reset capability is
     * the same thing, except for conventional PCI
     * devices. This is not common yet.
     */
584
    pos = virPCIDeviceFindCapabilityOffset(dev, cfgfd, PCI_CAP_ID_AF);
585
    if (pos) {
586
        caps = virPCIDeviceRead16(dev, cfgfd, pos + PCI_AF_CAP);
587 588 589 590 591 592
        if (caps & PCI_AF_CAP_FLR) {
            VIR_DEBUG("%s %s: detected PCI FLR capability", dev->id, dev->name);
            return 1;
        }
    }

593 594 595 596 597 598
    /* there are some buggy devices that do support FLR, but forget to
     * advertise that fact in their capabilities.  However, FLR is *required*
     * to be present for virtual functions (VFs), so if we see that this
     * device is a VF, we just assume FLR works
     */

599
    if (virAsprintf(&path, PCI_SYSFS "devices/%s/physfn", dev->name) < 0)
600 601 602 603 604 605 606 607 608
        return -1;

    found = virFileExists(path);
    if (found) {
        VIR_DEBUG("%s %s: buggy device didn't advertise FLR, but is a VF; forcing flr on",
                  dev->id, dev->name);
        return 1;
    }

609 610 611 612 613 614 615 616 617
    VIR_DEBUG("%s %s: no FLR capability found", dev->id, dev->name);

    return 0;
}

/* Require the device has the PCI Power Management capability
 * and that a D3hot->D0 transition will results in a full
 * internal reset, not just a soft reset.
 */
618
static unsigned int
619
virPCIDeviceDetectPowerManagementReset(virPCIDevicePtr dev, int cfgfd)
620 621 622 623 624
{
    if (dev->pci_pm_cap_pos) {
        uint32_t ctl;

        /* require the NO_SOFT_RESET bit is clear */
625
        ctl = virPCIDeviceRead32(dev, cfgfd, dev->pci_pm_cap_pos + PCI_PM_CTRL);
626 627 628 629 630 631 632 633 634 635 636
        if (!(ctl & PCI_PM_CTRL_NO_SOFT_RESET)) {
            VIR_DEBUG("%s %s: detected PM reset capability", dev->id, dev->name);
            return 1;
        }
    }

    VIR_DEBUG("%s %s: no PM reset capability found", dev->id, dev->name);

    return 0;
}

637
/* Any active devices on the same domain/bus ? */
638
static int
639
virPCIDeviceSharesBusWithActive(virPCIDevicePtr dev, virPCIDevicePtr check, void *data)
640
{
641
    virPCIDeviceList *inactiveDevs = data;
642

643
    /* Different domain, different bus, or simply identical device */
644 645 646 647
    if (dev->address.domain != check->address.domain ||
        dev->address.bus != check->address.bus ||
        (dev->address.slot == check->address.slot &&
         dev->address.function == check->address.function))
648 649
        return 0;

650
    /* same bus, but inactive, i.e. about to be assigned to guest */
651
    if (inactiveDevs && virPCIDeviceListFind(inactiveDevs, check))
652
        return 0;
653

654
    return 1;
655 656
}

657 658 659
static virPCIDevicePtr
virPCIDeviceBusContainsActiveDevices(virPCIDevicePtr dev,
                                     virPCIDeviceList *inactiveDevs)
660
{
661 662 663
    virPCIDevicePtr active = NULL;
    if (virPCIDeviceIterDevices(virPCIDeviceSharesBusWithActive,
                                dev, &active, inactiveDevs) < 0)
664 665 666 667 668
        return NULL;
    return active;
}

/* Is @check the parent of @dev ? */
669
static int
670
virPCIDeviceIsParent(virPCIDevicePtr dev, virPCIDevicePtr check, void *data)
671 672 673
{
    uint16_t device_class;
    uint8_t header_type, secondary, subordinate;
674
    virPCIDevicePtr *best = data;
675 676
    int ret = 0;
    int fd;
677

678
    if (dev->address.domain != check->address.domain)
679 680
        return 0;

681
    if ((fd = virPCIDeviceConfigOpen(check, false)) < 0)
682 683
        return 0;

684
    /* Is it a bridge? */
685 686
    ret = virPCIDeviceReadClass(check, &device_class);
    if (ret < 0 || device_class != PCI_CLASS_BRIDGE_PCI)
687
        goto cleanup;
688 689

    /* Is it a plane? */
690
    header_type = virPCIDeviceRead8(check, fd, PCI_HEADER_TYPE);
691
    if ((header_type & PCI_HEADER_TYPE_MASK) != PCI_HEADER_TYPE_BRIDGE)
692
        goto cleanup;
693

694 695
    secondary   = virPCIDeviceRead8(check, fd, PCI_SECONDARY_BUS);
    subordinate = virPCIDeviceRead8(check, fd, PCI_SUBORDINATE_BUS);
696

697
    VIR_DEBUG("%s %s: found parent device %s", dev->id, dev->name, check->name);
698

699 700 701
    /* if the secondary bus exactly equals the device's bus, then we found
     * the direct parent.  No further work is necessary
     */
702
    if (dev->address.bus == secondary) {
703 704 705
        ret = 1;
        goto cleanup;
    }
706

707
    /* otherwise, SRIOV allows VFs to be on different buses than their PFs.
708 709 710
     * In this case, what we need to do is look for the "best" match; i.e.
     * the most restrictive match that still satisfies all of the conditions.
     */
711
    if (dev->address.bus > secondary && dev->address.bus <= subordinate) {
712
        if (*best == NULL) {
713 714 715 716
            *best = virPCIDeviceNew(check->address.domain,
                                    check->address.bus,
                                    check->address.slot,
                                    check->address.function);
717 718 719 720 721
            if (*best == NULL) {
                ret = -1;
                goto cleanup;
            }
        } else {
722 723 724 725
            /* OK, we had already recorded a previous "best" match for the
             * parent.  See if the current device is more restrictive than the
             * best, and if so, make it the new best
             */
726 727 728
            int bestfd;
            uint8_t best_secondary;

729
            if ((bestfd = virPCIDeviceConfigOpen(*best, false)) < 0)
730
                goto cleanup;
731 732
            best_secondary = virPCIDeviceRead8(*best, bestfd, PCI_SECONDARY_BUS);
            virPCIDeviceConfigClose(*best, bestfd);
733 734

            if (secondary > best_secondary) {
735
                virPCIDeviceFree(*best);
736 737 738 739
                *best = virPCIDeviceNew(check->address.domain,
                                        check->address.bus,
                                        check->address.slot,
                                        check->address.function);
740 741 742 743
                if (*best == NULL) {
                    ret = -1;
                    goto cleanup;
                }
744 745 746 747
            }
        }
    }

748
 cleanup:
749
    virPCIDeviceConfigClose(check, fd);
750
    return ret;
751 752
}

753
static int
754
virPCIDeviceGetParent(virPCIDevicePtr dev, virPCIDevicePtr *parent)
755
{
756
    virPCIDevicePtr best = NULL;
757 758 759
    int ret;

    *parent = NULL;
760
    ret = virPCIDeviceIterDevices(virPCIDeviceIsParent, dev, parent, &best);
761
    if (ret == 1)
762
        virPCIDeviceFree(best);
763 764 765
    else if (ret == 0)
        *parent = best;
    return ret;
766 767 768 769 770 771
}

/* Secondary Bus Reset is our sledgehammer - it resets all
 * devices behind a bus.
 */
static int
772 773 774
virPCIDeviceTrySecondaryBusReset(virPCIDevicePtr dev,
                                 int cfgfd,
                                 virPCIDeviceList *inactiveDevs)
775
{
776 777
    VIR_AUTOPTR(virPCIDevice) parent = NULL;
    VIR_AUTOPTR(virPCIDevice) conflict = NULL;
778 779 780
    uint8_t config_space[PCI_CONF_LEN];
    uint16_t ctl;
    int ret = -1;
781
    int parentfd;
782

783 784 785
    /* Refuse to do a secondary bus reset if there are other
     * devices/functions behind the bus are used by the host
     * or other guests.
786
     */
787
    if ((conflict = virPCIDeviceBusContainsActiveDevices(dev, inactiveDevs))) {
788
        virReportError(VIR_ERR_INTERNAL_ERROR,
789 790
                       _("Active %s devices on bus with %s, not doing bus reset"),
                       conflict->name, dev->name);
791 792 793 794
        return -1;
    }

    /* Find the parent bus */
795
    if (virPCIDeviceGetParent(dev, &parent) < 0)
796
        return -1;
797
    if (!parent) {
798
        virReportError(VIR_ERR_INTERNAL_ERROR,
799 800
                       _("Failed to find parent device for %s"),
                       dev->name);
801 802
        return -1;
    }
803
    if ((parentfd = virPCIDeviceConfigOpen(parent, true)) < 0)
804
        goto out;
805 806 807 808 809 810 811

    VIR_DEBUG("%s %s: doing a secondary bus reset", dev->id, dev->name);

    /* Save and restore the device's config space; we only do this
     * for the supplied device since we refuse to do a reset if there
     * are multiple devices/functions
     */
812
    if (virPCIDeviceRead(dev, cfgfd, 0, config_space, PCI_CONF_LEN) < 0) {
813
        virReportError(VIR_ERR_INTERNAL_ERROR,
814
                       _("Failed to read PCI config space for %s"),
815
                       dev->name);
816 817 818 819 820 821
        goto out;
    }

    /* Read the control register, set the reset flag, wait 200ms,
     * unset the reset flag and wait 200ms.
     */
822
    ctl = virPCIDeviceRead16(dev, cfgfd, PCI_BRIDGE_CONTROL);
823

824 825
    virPCIDeviceWrite16(parent, parentfd, PCI_BRIDGE_CONTROL,
                        ctl | PCI_BRIDGE_CTL_RESET);
826 827 828

    usleep(200 * 1000); /* sleep 200ms */

829
    virPCIDeviceWrite16(parent, parentfd, PCI_BRIDGE_CONTROL, ctl);
830 831 832

    usleep(200 * 1000); /* sleep 200ms */

833
    if (virPCIDeviceWrite(dev, cfgfd, 0, config_space, PCI_CONF_LEN) < 0) {
834
        virReportError(VIR_ERR_INTERNAL_ERROR,
835 836 837 838
                       _("Failed to restore PCI config space for %s"),
                       dev->name);
        goto out;
    }
839
    ret = 0;
840

841
 out:
842
    virPCIDeviceConfigClose(parent, parentfd);
843 844 845 846 847 848 849 850
    return ret;
}

/* Power management reset attempts to reset a device using a
 * D-state transition from D3hot to D0. Note, in detect_pm_reset()
 * above we require the device supports a full internal reset.
 */
static int
851
virPCIDeviceTryPowerManagementReset(virPCIDevicePtr dev, int cfgfd)
852 853 854 855 856 857 858 859
{
    uint8_t config_space[PCI_CONF_LEN];
    uint32_t ctl;

    if (!dev->pci_pm_cap_pos)
        return -1;

    /* Save and restore the device's config space. */
860
    if (virPCIDeviceRead(dev, cfgfd, 0, &config_space[0], PCI_CONF_LEN) < 0) {
861
        virReportError(VIR_ERR_INTERNAL_ERROR,
862
                       _("Failed to read PCI config space for %s"),
863
                       dev->name);
864 865 866 867 868
        return -1;
    }

    VIR_DEBUG("%s %s: doing a power management reset", dev->id, dev->name);

869
    ctl = virPCIDeviceRead32(dev, cfgfd, dev->pci_pm_cap_pos + PCI_PM_CTRL);
870 871
    ctl &= ~PCI_PM_CTRL_STATE_MASK;

872 873
    virPCIDeviceWrite32(dev, cfgfd, dev->pci_pm_cap_pos + PCI_PM_CTRL,
                        ctl | PCI_PM_CTRL_STATE_D3hot);
874 875 876

    usleep(10 * 1000); /* sleep 10ms */

877 878
    virPCIDeviceWrite32(dev, cfgfd, dev->pci_pm_cap_pos + PCI_PM_CTRL,
                        ctl | PCI_PM_CTRL_STATE_D0);
879 880 881

    usleep(10 * 1000); /* sleep 10ms */

882
    if (virPCIDeviceWrite(dev, cfgfd, 0, &config_space[0], PCI_CONF_LEN) < 0) {
883
        virReportError(VIR_ERR_INTERNAL_ERROR,
884 885 886 887
                       _("Failed to restore PCI config space for %s"),
                       dev->name);
        return -1;
    }
888 889 890 891 892

    return 0;
}

static int
893
virPCIDeviceInit(virPCIDevicePtr dev, int cfgfd)
894
{
895 896
    int flr;

897 898 899
    dev->pcie_cap_pos   = virPCIDeviceFindCapabilityOffset(dev, cfgfd, PCI_CAP_ID_EXP);
    dev->pci_pm_cap_pos = virPCIDeviceFindCapabilityOffset(dev, cfgfd, PCI_CAP_ID_PM);
    flr = virPCIDeviceDetectFunctionLevelReset(dev, cfgfd);
900
    if (flr < 0)
901
        return flr;
902 903
    dev->has_flr        = !!flr;
    dev->has_pm_reset   = !!virPCIDeviceDetectPowerManagementReset(dev, cfgfd);
904

905 906 907 908
    return 0;
}

int
909 910 911
virPCIDeviceReset(virPCIDevicePtr dev,
                  virPCIDeviceList *activeDevs,
                  virPCIDeviceList *inactiveDevs)
912
{
913 914
    VIR_AUTOFREE(char *) drvPath = NULL;
    VIR_AUTOFREE(char *) drvName = NULL;
915
    int ret = -1;
916
    int fd = -1;
917 918 919 920 921 922 923 924 925 926 927 928
    int hdrType = -1;

    if (virPCIGetHeaderType(dev, &hdrType) < 0)
        return -1;

    if (hdrType != VIR_PCI_HEADER_ENDPOINT) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Invalid attempt to reset PCI device %s. "
                         "Only PCI endpoint devices can be reset"),
                       dev->name);
        return -1;
    }
929

930
    if (activeDevs && virPCIDeviceListFind(activeDevs, dev)) {
931
        virReportError(VIR_ERR_INTERNAL_ERROR,
932 933 934 935
                       _("Not resetting active device %s"), dev->name);
        return -1;
    }

936 937 938 939 940 941 942 943
    /* If the device is currently bound to vfio-pci, ignore all
     * requests to reset it, since the vfio-pci driver will always
     * reset it whenever appropriate, so doing it ourselves would just
     * be redundant.
     */
    if (virPCIDeviceGetDriverPathAndName(dev, &drvPath, &drvName) < 0)
        goto cleanup;

944
    if (virPCIStubDriverTypeFromString(drvName) == VIR_PCI_STUB_DRIVER_VFIO) {
945 946 947 948 949 950 951
        VIR_DEBUG("Device %s is bound to vfio-pci - skip reset",
                  dev->name);
        ret = 0;
        goto cleanup;
    }
    VIR_DEBUG("Resetting device %s", dev->name);

952
    if ((fd = virPCIDeviceConfigOpen(dev, true)) < 0)
953
        goto cleanup;
954

955
    if (virPCIDeviceInit(dev, fd) < 0)
956 957
        goto cleanup;

958 959 960
    /* KVM will perform FLR when starting and stopping
     * a guest, so there is no need for us to do it here.
     */
961 962 963 964
    if (dev->has_flr) {
        ret = 0;
        goto cleanup;
    }
965

966 967 968 969 970
    /* If the device supports PCI power management reset,
     * that's the next best thing because it only resets
     * the function, not the whole device.
     */
    if (dev->has_pm_reset)
971
        ret = virPCIDeviceTryPowerManagementReset(dev, fd);
972

973
    /* Bus reset is not an option with the root bus */
974
    if (ret < 0 && dev->address.bus != 0)
975
        ret = virPCIDeviceTrySecondaryBusReset(dev, fd, inactiveDevs);
976

977 978
    if (ret < 0) {
        virErrorPtr err = virGetLastError();
979
        virReportError(VIR_ERR_INTERNAL_ERROR,
980 981
                       _("Unable to reset PCI device %s: %s"),
                       dev->name,
982 983
                       err ? err->message :
                       _("no FLR, PM reset or bus reset available"));
984 985
    }

986
 cleanup:
987
    virPCIDeviceConfigClose(dev, fd);
988 989 990
    return ret;
}

991

992
static int
993
virPCIProbeStubDriver(virPCIStubDriver driver)
994
{
995
    const char *drvname = NULL;
996
    VIR_AUTOFREE(char *) drvpath = NULL;
997
    bool probed = false;
998

999 1000 1001 1002 1003 1004 1005 1006
    if (driver == VIR_PCI_STUB_DRIVER_NONE ||
        !(drvname = virPCIStubDriverTypeToString(driver))) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       "%s",
                       _("Attempting to use unknown stub driver"));
        return -1;
    }

1007
 recheck:
1008
    if ((drvpath = virPCIDriverDir(drvname)) && virFileExists(drvpath))
1009 1010
        /* driver already loaded, return */
        return 0;
1011 1012

    if (!probed) {
1013
        VIR_AUTOFREE(char *) errbuf = NULL;
1014
        probed = true;
1015 1016
        if ((errbuf = virKModLoad(drvname, true))) {
            VIR_WARN("failed to load driver %s: %s", drvname, errbuf);
1017
            goto cleanup;
1018
        }
1019 1020

        goto recheck;
1021 1022
    }

1023
 cleanup:
1024 1025 1026
    /* If we know failure was because of blacklist, let's report that;
     * otherwise, report a more generic failure message
     */
1027
    if (virKModIsBlacklisted(drvname)) {
1028 1029 1030
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Failed to load PCI stub module %s: "
                         "administratively prohibited"),
1031
                       drvname);
1032 1033 1034
    } else {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Failed to load PCI stub module %s"),
1035
                       drvname);
1036 1037
    }

1038
    return -1;
1039 1040
}

1041
int
1042
virPCIDeviceUnbind(virPCIDevicePtr dev)
1043
{
1044 1045 1046
    VIR_AUTOFREE(char *) path = NULL;
    VIR_AUTOFREE(char *) drvpath = NULL;
    VIR_AUTOFREE(char *) driver = NULL;
1047 1048

    if (virPCIDeviceGetDriverPathAndName(dev, &drvpath, &driver) < 0)
1049
        return -1;
1050

1051
    if (!driver)
1052
        /* The device is not bound to any driver */
1053
        return 0;
1054

1055
    if (!(path = virPCIFile(dev->name, "driver/unbind")))
1056
        return -1;
1057 1058 1059 1060 1061 1062

    if (virFileExists(path)) {
        if (virFileWriteStr(path, dev->name, 0) < 0) {
            virReportSystemError(errno,
                                 _("Failed to unbind PCI device '%s' from %s"),
                                 dev->name, driver);
1063
            return -1;
1064 1065 1066
        }
    }

1067
    return 0;
1068 1069
}

1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094

/**
 * virPCIDeviceRebind:
 *  @dev: virPCIDevice object describing the device to rebind
 *
 * unbind a device from its driver, then immediately rebind it.
 *
 * Returns 0 on success, -1 on failure
 */
int virPCIDeviceRebind(virPCIDevicePtr dev)
{
    if (virPCIDeviceUnbind(dev) < 0)
        return -1;

    if (virFileWriteStr(PCI_SYSFS "drivers_probe", dev->name, 0) < 0) {
        virReportSystemError(errno,
                             _("Failed to trigger a probe for PCI device '%s'"),
                             dev->name);
        return -1;
    }

    return 0;
}


1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105
/*
 * Bind a PCI device to a driver using driver_override sysfs interface.
 * E.g.
 *
 *  echo driver-name > /sys/bus/pci/devices/0000:03:00.0/driver_override
 *  echo 0000:03:00.0 > /sys/bus/pci/devices/0000:03:00.0/driver/unbind
 *  echo 0000:03:00.0 > /sys/bus/pci/drivers_probe
 *
 * An empty driverName will cause the device to be bound to its
 * preferred driver.
 */
1106
static int
1107 1108 1109
virPCIDeviceBindWithDriverOverride(virPCIDevicePtr dev,
                                   const char *driverName)
{
1110
    VIR_AUTOFREE(char *) path = NULL;
1111 1112 1113 1114 1115 1116 1117 1118 1119

    if (!(path = virPCIFile(dev->name, "driver_override")))
        return -1;

    if (virFileWriteStr(path, driverName, 0) < 0) {
        virReportSystemError(errno,
                             _("Failed to add driver '%s' to driver_override "
                               " interface of PCI device '%s'"),
                             driverName, dev->name);
1120
        return -1;
1121 1122
    }

1123
    if (virPCIDeviceRebind(dev) < 0)
1124
        return -1;
1125

1126
    return 0;
1127 1128 1129 1130
}

static int
virPCIDeviceUnbindFromStubWithNewid(virPCIDevicePtr dev)
1131 1132
{
    int result = -1;
1133 1134 1135
    VIR_AUTOFREE(char *) drvdir = NULL;
    VIR_AUTOFREE(char *) path = NULL;
    VIR_AUTOFREE(char *) driver = NULL;
1136

1137 1138 1139
    /* If the device is currently bound to one of the "well known"
     * stub drivers, then unbind it, otherwise ignore it.
     */
L
Laine Stump 已提交
1140
    if (virPCIDeviceGetDriverPathAndName(dev, &drvdir, &driver) < 0)
1141
        goto cleanup;
E
Eric Blake 已提交
1142

1143 1144
    if (!driver) {
        /* The device is not bound to any driver and we are almost done. */
1145
        VIR_DEBUG("PCI device %s is not bound to any driver", dev->name);
1146 1147 1148
        goto reprobe;
    }

1149 1150
    if (!dev->unbind_from_stub) {
        VIR_DEBUG("Unbind from stub skipped for PCI device %s", dev->name);
1151
        goto remove_slot;
1152
    }
1153

1154
    /* If the device isn't bound to a known stub, skip the unbind. */
1155
    if (virPCIStubDriverTypeFromString(driver) < 0 ||
1156 1157 1158
        virPCIStubDriverTypeFromString(driver) == VIR_PCI_STUB_DRIVER_NONE) {
        VIR_DEBUG("Unbind from stub skipped for PCI device %s because of "
                  "unknown stub driver", dev->name);
1159
        goto remove_slot;
1160
    }
1161

1162 1163
    VIR_DEBUG("Unbinding PCI device %s from stub driver %s",
              dev->name, driver);
1164

1165
    if (virPCIDeviceUnbind(dev) < 0)
1166
        goto cleanup;
1167
    dev->unbind_from_stub = false;
1168

1169
 remove_slot:
1170 1171
    if (!dev->remove_slot) {
        VIR_DEBUG("Slot removal skipped for PCI device %s", dev->name);
1172
        goto reprobe;
1173 1174 1175
    }

    VIR_DEBUG("Removing slot for PCI device %s", dev->name);
1176 1177

    /* Xen's pciback.ko wants you to use remove_slot on the specific device */
1178
    if (!(path = virPCIDriverFile(driver, "remove_slot")))
1179 1180 1181 1182
        goto cleanup;

    if (virFileExists(path) && virFileWriteStr(path, dev->name, 0) < 0) {
        virReportSystemError(errno,
1183
                             _("Failed to remove slot for PCI device '%s' from %s"),
1184 1185 1186
                             dev->name, driver);
        goto cleanup;
    }
1187
    dev->remove_slot = false;
1188

1189
 reprobe:
1190
    if (!dev->reprobe) {
1191
        VIR_DEBUG("Reprobe skipped for PCI device %s", dev->name);
1192 1193 1194
        result = 0;
        goto cleanup;
    }
1195

1196 1197
    VIR_DEBUG("Reprobing for PCI device %s", dev->name);

1198 1199 1200 1201 1202
    /* Trigger a re-probe of the device is not in the stub's dynamic
     * ID table. If the stub is available, but 'remove_id' isn't
     * available, then re-probing would just cause the device to be
     * re-bound to the stub.
     */
1203 1204
    VIR_FREE(path);
    if (driver && !(path = virPCIDriverFile(driver, "remove_id")))
1205 1206
        goto cleanup;

1207
    if (!driver || !virFileExists(drvdir) || virFileExists(path)) {
1208 1209 1210 1211 1212 1213 1214 1215 1216 1217
        if (virFileWriteStr(PCI_SYSFS "drivers_probe", dev->name, 0) < 0) {
            virReportSystemError(errno,
                                 _("Failed to trigger a re-probe for PCI device '%s'"),
                                 dev->name);
            goto cleanup;
        }
    }

    result = 0;

1218
 cleanup:
1219
    /* do not do it again */
1220 1221 1222
    dev->unbind_from_stub = false;
    dev->remove_slot = false;
    dev->reprobe = false;
1223

1224 1225 1226
    return result;
}

1227 1228 1229 1230 1231 1232 1233 1234 1235 1236
static int
virPCIDeviceUnbindFromStubWithOverride(virPCIDevicePtr dev)
{
    if (!dev->unbind_from_stub) {
        VIR_DEBUG("Unbind from stub skipped for PCI device %s", dev->name);
        return 0;
    }

    return virPCIDeviceBindWithDriverOverride(dev, "\n");
}
1237 1238

static int
1239 1240
virPCIDeviceUnbindFromStub(virPCIDevicePtr dev)
{
1241
    VIR_AUTOFREE(char *) path = NULL;
1242 1243 1244 1245 1246 1247 1248 1249 1250

    /*
     * Prefer using the device's driver_override interface, falling back
     * to the unpleasant new_id interface.
     */
    if (!(path = virPCIFile(dev->name, "driver_override")))
        return -1;

    if (virFileExists(path))
1251
        return virPCIDeviceUnbindFromStubWithOverride(dev);
1252

1253
    return virPCIDeviceUnbindFromStubWithNewid(dev);
1254 1255 1256 1257
}

static int
virPCIDeviceBindToStubWithNewid(virPCIDevicePtr dev)
1258
{
1259
    int result = -1;
E
Eric Blake 已提交
1260
    bool reprobe = false;
1261 1262 1263
    VIR_AUTOFREE(char *) stubDriverPath = NULL;
    VIR_AUTOFREE(char *) driverLink = NULL;
    VIR_AUTOFREE(char *) path = NULL; /* reused for different purposes */
1264
    VIR_AUTOPTR(virError) err = NULL;
1265
    const char *stubDriverName = NULL;
1266

1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279
    /* Check the device is configured to use one of the known stub drivers */
    if (dev->stubDriver == VIR_PCI_STUB_DRIVER_NONE) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("No stub driver configured for PCI device %s"),
                       dev->name);
        return -1;
    } else if (!(stubDriverName = virPCIStubDriverTypeToString(dev->stubDriver))) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Unknown stub driver configured for PCI device %s"),
                       dev->name);
        return -1;
    }

1280
    if (!(stubDriverPath = virPCIDriverDir(stubDriverName))  ||
1281
        !(driverLink = virPCIFile(dev->name, "driver")))
1282 1283
        goto cleanup;

1284 1285 1286 1287 1288
    if (virFileExists(driverLink)) {
        if (virFileLinkPointsTo(driverLink, stubDriverPath)) {
            /* The device is already bound to the correct driver */
            VIR_DEBUG("Device %s is already bound to %s",
                      dev->name, stubDriverName);
1289 1290 1291
            result = 0;
            goto cleanup;
        }
1292
        reprobe = true;
1293
    }
1294 1295 1296 1297 1298 1299 1300 1301 1302

    /* Add the PCI device ID to the stub's dynamic ID table;
     * this is needed to allow us to bind the device to the stub.
     * Note: if the device is not currently bound to any driver,
     * stub will immediately be bound to the device. Also, note
     * that if a new device with this ID is hotplugged, or if a probe
     * is triggered for such a device, it will also be immediately
     * bound by the stub.
     */
1303
    if (!(path = virPCIDriverFile(stubDriverName, "new_id")))
1304
        goto cleanup;
1305

1306
    if (virFileWriteStr(path, dev->id, 0) < 0) {
1307
        virReportSystemError(errno,
1308
                             _("Failed to add PCI device ID '%s' to %s"),
1309
                             dev->id, stubDriverName);
1310
        goto cleanup;
1311 1312
    }

1313
    /* check whether the device is bound to pci-stub when we write dev->id to
1314
     * ${stubDriver}/new_id.
1315
     */
1316
    if (virFileLinkPointsTo(driverLink, stubDriverPath)) {
1317 1318
        dev->unbind_from_stub = true;
        dev->remove_slot = true;
J
Jiri Denemark 已提交
1319
        result = 0;
1320 1321 1322
        goto remove_id;
    }

1323
    if (virPCIDeviceUnbind(dev) < 0)
J
Jiri Denemark 已提交
1324
        goto remove_id;
1325

1326 1327 1328
    /* If the device was bound to a driver we'll need to reprobe later */
    dev->reprobe = reprobe;

1329 1330
    /* If the device isn't already bound to pci-stub, try binding it now.
     */
1331
    if (!virFileLinkPointsTo(driverLink, stubDriverPath)) {
1332
        /* Xen's pciback.ko wants you to use new_slot first */
1333 1334
        VIR_FREE(path);
        if (!(path = virPCIDriverFile(stubDriverName, "new_slot")))
1335
            goto remove_id;
1336

1337
        if (virFileExists(path) && virFileWriteStr(path, dev->name, 0) < 0) {
1338
            virReportSystemError(errno,
1339 1340 1341
                                 _("Failed to add slot for "
                                   "PCI device '%s' to %s"),
                                 dev->name, stubDriverName);
1342
            goto remove_id;
1343
        }
1344
        dev->remove_slot = true;
1345

1346 1347
        VIR_FREE(path);
        if (!(path = virPCIDriverFile(stubDriverName, "bind")))
1348
            goto remove_id;
1349

1350
        if (virFileWriteStr(path, dev->name, 0) < 0) {
1351
            virReportSystemError(errno,
1352
                                 _("Failed to bind PCI device '%s' to %s"),
1353
                                 dev->name, stubDriverName);
1354
            goto remove_id;
1355
        }
1356
        dev->unbind_from_stub = true;
1357 1358
    }

J
Jiri Denemark 已提交
1359 1360
    result = 0;

1361
 remove_id:
J
Jiri Denemark 已提交
1362 1363
    err = virSaveLastError();

1364 1365 1366
    /* If 'remove_id' exists, remove the device id from pci-stub's dynamic
     * ID table so that 'drivers_probe' works below.
     */
1367 1368
    VIR_FREE(path);
    if (!(path = virPCIDriverFile(stubDriverName, "remove_id"))) {
E
Eric Blake 已提交
1369
        /* We do not remove PCI ID from pci-stub, and we cannot reprobe it */
1370 1371
        if (dev->reprobe) {
            VIR_WARN("Could not remove PCI ID '%s' from %s, and the device "
1372
                     "cannot be probed again.", dev->id, stubDriverName);
1373
        }
1374
        dev->reprobe = false;
J
Jiri Denemark 已提交
1375
        result = -1;
1376 1377 1378
        goto cleanup;
    }

1379
    if (virFileExists(path) && virFileWriteStr(path, dev->id, 0) < 0) {
1380
        virReportSystemError(errno,
1381
                             _("Failed to remove PCI ID '%s' from %s"),
1382
                             dev->id, stubDriverName);
1383

E
Eric Blake 已提交
1384
        /* remove PCI ID from pci-stub failed, and we cannot reprobe it */
1385 1386
        if (dev->reprobe) {
            VIR_WARN("Failed to remove PCI ID '%s' from %s, and the device "
1387
                     "cannot be probed again.", dev->id, stubDriverName);
1388
        }
1389
        dev->reprobe = false;
J
Jiri Denemark 已提交
1390
        result = -1;
1391
        goto cleanup;
1392 1393
    }

1394
 cleanup:
1395
    if (result < 0)
J
Jiri Denemark 已提交
1396 1397 1398 1399
        virPCIDeviceUnbindFromStub(dev);

    if (err)
        virSetError(err);
1400

1401
    return result;
1402 1403
}

1404 1405 1406 1407
static int
virPCIDeviceBindToStubWithOverride(virPCIDevicePtr dev)
{
    const char *stubDriverName;
1408 1409
    VIR_AUTOFREE(char *) stubDriverPath = NULL;
    VIR_AUTOFREE(char *) driverLink = NULL;
1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425

    /* Check the device is configured to use one of the known stub drivers */
    if (dev->stubDriver == VIR_PCI_STUB_DRIVER_NONE) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("No stub driver configured for PCI device %s"),
                       dev->name);
        return -1;
    } else if (!(stubDriverName = virPCIStubDriverTypeToString(dev->stubDriver))) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Unknown stub driver configured for PCI device %s"),
                       dev->name);
        return -1;
    }

    if (!(stubDriverPath = virPCIDriverDir(stubDriverName))  ||
        !(driverLink = virPCIFile(dev->name, "driver")))
1426
        return -1;
1427 1428 1429 1430 1431 1432

    if (virFileExists(driverLink)) {
        if (virFileLinkPointsTo(driverLink, stubDriverPath)) {
            /* The device is already bound to the correct driver */
            VIR_DEBUG("Device %s is already bound to %s",
                      dev->name, stubDriverName);
1433
            return 0;
1434 1435 1436 1437
        }
    }

    if (virPCIDeviceBindWithDriverOverride(dev, stubDriverName) < 0)
1438
        return -1;
1439 1440

    dev->unbind_from_stub = true;
1441
    return 0;
1442 1443 1444 1445 1446
}

static int
virPCIDeviceBindToStub(virPCIDevicePtr dev)
{
1447
    VIR_AUTOFREE(char *) path = NULL;
1448 1449 1450 1451 1452 1453 1454 1455 1456

    /*
     * Prefer using the device's driver_override interface, falling back
     * to the unpleasant new_id interface.
     */
    if (!(path = virPCIFile(dev->name, "driver_override")))
        return -1;

    if (virFileExists(path))
1457
        return virPCIDeviceBindToStubWithOverride(dev);
1458

1459
    return virPCIDeviceBindToStubWithNewid(dev);
1460 1461
}

1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479
/* virPCIDeviceDetach:
 *
 * Detach this device from the host driver, attach it to the stub
 * driver (previously set with virPCIDeviceSetStubDriver(), and add *a
 * copy* of the object to the inactiveDevs list (if provided). This
 * function will *never* consume dev, so the caller should free it.
 *
 * Returns 0 on success, -1 on failure (will fail if the device is
 * already in the activeDevs list, but will be a NOP if the device is
 * already bound to the stub).
 *
 * GENERAL NOTE: activeDevs should be a list of all PCI devices
 * currently in use by a domain. inactiveDevs is a list of all PCI
 * devices that libvirt has detached from the host driver + attached
 * to the stub driver, but hasn't yet assigned to a domain. Any device
 * that is still attached to its host driver should not be on either
 * list.
 */
1480
int
1481 1482
virPCIDeviceDetach(virPCIDevicePtr dev,
                   virPCIDeviceList *activeDevs,
1483
                   virPCIDeviceList *inactiveDevs)
1484
{
1485
    if (virPCIProbeStubDriver(dev->stubDriver) < 0)
1486 1487
        return -1;

1488
    if (activeDevs && virPCIDeviceListFind(activeDevs, dev)) {
1489
        virReportError(VIR_ERR_INTERNAL_ERROR,
1490 1491 1492 1493
                       _("Not detaching active device %s"), dev->name);
        return -1;
    }

1494
    if (virPCIDeviceBindToStub(dev) < 0)
1495 1496
        return -1;

1497 1498 1499
    /* Add *a copy of* the dev into list inactiveDevs, if
     * it's not already there.
     */
1500 1501 1502 1503
    if (inactiveDevs && !virPCIDeviceListFind(inactiveDevs, dev)) {
        VIR_DEBUG("Adding PCI device %s to inactive list", dev->name);
        if (virPCIDeviceListAddCopy(inactiveDevs, dev) < 0)
            return -1;
1504 1505 1506
    }

    return 0;
1507 1508 1509
}

int
1510 1511
virPCIDeviceReattach(virPCIDevicePtr dev,
                     virPCIDeviceListPtr activeDevs,
1512
                     virPCIDeviceListPtr inactiveDevs)
1513
{
1514
    if (activeDevs && virPCIDeviceListFind(activeDevs, dev)) {
1515
        virReportError(VIR_ERR_INTERNAL_ERROR,
1516 1517 1518 1519
                       _("Not reattaching active device %s"), dev->name);
        return -1;
    }

1520
    if (virPCIDeviceUnbindFromStub(dev) < 0)
1521 1522 1523
        return -1;

    /* Steal the dev from list inactiveDevs */
1524 1525
    if (inactiveDevs) {
        VIR_DEBUG("Removing PCI device %s from inactive list", dev->name);
1526
        virPCIDeviceListDel(inactiveDevs, dev);
1527
    }
1528 1529

    return 0;
1530 1531
}

1532 1533 1534 1535 1536
/* Certain hypervisors (like qemu/kvm) map the PCI bar(s) on
 * the host when doing device passthrough.  This can lead to a race
 * condition where the hypervisor is still cleaning up the device while
 * libvirt is trying to re-attach it to the host device driver.  To avoid
 * this situation, we look through /proc/iomem, and if the hypervisor is
E
Eric Blake 已提交
1537 1538
 * still holding on to the bar (denoted by the string in the matcher
 * variable), then we can wait around a bit for that to clear up.
1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558
 *
 * A typical /proc/iomem looks like this (snipped for brevity):
 * 00010000-0008efff : System RAM
 * 0008f000-0008ffff : reserved
 * ...
 * 00100000-cc9fcfff : System RAM
 *   00200000-00483d3b : Kernel code
 *   00483d3c-005c88df : Kernel data
 * cc9fd000-ccc71fff : ACPI Non-volatile Storage
 * ...
 * d0200000-d02fffff : PCI Bus #05
 *   d0200000-d021ffff : 0000:05:00.0
 *     d0200000-d021ffff : e1000e
 *   d0220000-d023ffff : 0000:05:00.0
 *     d0220000-d023ffff : e1000e
 * ...
 * f0000000-f0003fff : 0000:00:1b.0
 *   f0000000-f0003fff : kvm_assigned_device
 *
 * Returns 0 if we are clear to continue, and 1 if the hypervisor is still
E
Eric Blake 已提交
1559
 * holding on to the resource.
1560 1561
 */
int
1562
virPCIDeviceWaitForCleanup(virPCIDevicePtr dev, const char *matcher)
1563 1564 1565
{
    FILE *fp;
    char line[160];
1566
    char *tmp;
1567
    unsigned long long start, end;
1568
    unsigned int domain, bus, slot, function;
1569
    bool in_matching_device;
1570 1571 1572 1573 1574 1575 1576 1577 1578
    int ret;
    size_t match_depth;

    fp = fopen("/proc/iomem", "r");
    if (!fp) {
        /* If we failed to open iomem, we just basically ignore the error.  The
         * unbind might succeed anyway, and besides, it's very likely we have
         * no way to report the error
         */
1579
        VIR_DEBUG("Failed to open /proc/iomem, trying to continue anyway");
1580 1581 1582 1583
        return 0;
    }

    ret = 0;
1584
    in_matching_device = false;
1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595
    match_depth = 0;
    while (fgets(line, sizeof(line), fp) != 0) {
        /* the logic here is a bit confusing.  For each line, we look to
         * see if it matches the domain:bus:slot.function we were given.
         * If this line matches the DBSF, then any subsequent lines indented
         * by 2 spaces are the PCI regions for this device.  It's also
         * possible that none of the PCI regions are currently mapped, in
         * which case we have no indented regions.  This code handles all
         * of these situations
         */
        if (in_matching_device && (strspn(line, " ") == (match_depth + 2))) {
1596 1597 1598 1599 1600 1601
            /* expected format: <start>-<end> : <suffix> */
            if (/* start */
                virStrToLong_ull(line, &tmp, 16, &start) < 0 || *tmp != '-' ||
                /* end */
                virStrToLong_ull(tmp + 1, &tmp, 16, &end) < 0 ||
                (tmp = STRSKIP(tmp, " : ")) == NULL)
1602 1603
                continue;

1604
            if (STRPREFIX(tmp, matcher)) {
1605 1606 1607
                ret = 1;
                break;
            }
1608
        } else {
1609
            in_matching_device = false;
1610

1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624
            /* expected format: <start>-<end> : <domain>:<bus>:<slot>.<function> */
            if (/* start */
                virStrToLong_ull(line, &tmp, 16, &start) < 0 || *tmp != '-' ||
                /* end */
                virStrToLong_ull(tmp + 1, &tmp, 16, &end) < 0 ||
                (tmp = STRSKIP(tmp, " : ")) == NULL ||
                /* domain */
                virStrToLong_ui(tmp, &tmp, 16, &domain) < 0 || *tmp != ':' ||
                /* bus */
                virStrToLong_ui(tmp + 1, &tmp, 16, &bus) < 0 || *tmp != ':' ||
                /* slot */
                virStrToLong_ui(tmp + 1, &tmp, 16, &slot) < 0 || *tmp != '.' ||
                /* function */
                virStrToLong_ui(tmp + 1, &tmp, 16, &function) < 0 || *tmp != '\n')
1625 1626
                continue;

1627 1628
            if (domain != dev->address.domain || bus != dev->address.bus ||
                slot != dev->address.slot || function != dev->address.function)
1629
                continue;
1630
            in_matching_device = true;
1631 1632 1633 1634
            match_depth = strspn(line, " ");
        }
    }

E
Eric Blake 已提交
1635
    VIR_FORCE_FCLOSE(fp);
1636 1637 1638 1639

    return ret;
}

1640
static char *
1641
virPCIDeviceReadID(virPCIDevicePtr dev, const char *id_name)
1642
{
1643
    VIR_AUTOFREE(char *) path = NULL;
1644 1645
    char *id_str;

1646
    if (!(path = virPCIFile(dev->name, id_name)))
1647
        return NULL;
1648 1649

    /* ID string is '0xNNNN\n' ... i.e. 7 bytes */
1650
    if (virFileReadAll(path, 7, &id_str) < 0)
1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664
        return NULL;

    /* Check for 0x suffix */
    if (id_str[0] != '0' || id_str[1] != 'x') {
        VIR_FREE(id_str);
        return NULL;
    }

    /* Chop off the newline; we know the string is 7 bytes */
    id_str[6] = '\0';

    return id_str;
}

1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729
bool
virPCIDeviceAddressIsValid(virPCIDeviceAddressPtr addr,
                           bool report)
{
    if (addr->domain > 0xFFFF) {
        if (report)
            virReportError(VIR_ERR_XML_ERROR,
                           _("Invalid PCI address domain='0x%x', "
                             "must be <= 0xFFFF"),
                           addr->domain);
        return false;
    }
    if (addr->bus > 0xFF) {
        if (report)
            virReportError(VIR_ERR_XML_ERROR,
                           _("Invalid PCI address bus='0x%x', "
                             "must be <= 0xFF"),
                           addr->bus);
        return false;
    }
    if (addr->slot > 0x1F) {
        if (report)
            virReportError(VIR_ERR_XML_ERROR,
                           _("Invalid PCI address slot='0x%x', "
                             "must be <= 0x1F"),
                           addr->slot);
        return false;
    }
    if (addr->function > 7) {
        if (report)
            virReportError(VIR_ERR_XML_ERROR,
                           _("Invalid PCI address function=0x%x, "
                             "must be <= 7"),
                           addr->function);
        return false;
    }
    if (virPCIDeviceAddressIsEmpty(addr)) {
        if (report)
            virReportError(VIR_ERR_XML_ERROR, "%s",
                           _("Invalid PCI address 0000:00:00, at least "
                             "one of domain, bus, or slot must be > 0"));
        return false;
    }
    return true;
}

bool
virPCIDeviceAddressIsEmpty(const virPCIDeviceAddress *addr)
{
    return !(addr->domain || addr->bus || addr->slot);
}

bool
virPCIDeviceAddressEqual(virPCIDeviceAddress *addr1,
                         virPCIDeviceAddress *addr2)
{
    if (addr1->domain == addr2->domain &&
        addr1->bus == addr2->bus &&
        addr1->slot == addr2->slot &&
        addr1->function == addr2->function) {
        return true;
    }
    return false;
}

1730
char *
1731
virPCIDeviceAddressAsString(virPCIDeviceAddressPtr addr)
1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742
{
    char *str;

    ignore_value(virAsprintf(&str, "%.4x:%.2x:%.2x.%.1x",
                             addr->domain,
                             addr->bus,
                             addr->slot,
                             addr->function));
    return str;
}

1743
virPCIDevicePtr
1744 1745 1746 1747
virPCIDeviceNew(unsigned int domain,
                unsigned int bus,
                unsigned int slot,
                unsigned int function)
1748
{
1749 1750
    virPCIDevicePtr ret = NULL;
    VIR_AUTOPTR(virPCIDevice) dev = NULL;
1751 1752
    VIR_AUTOFREE(char *) vendor = NULL;
    VIR_AUTOFREE(char *) product = NULL;
1753

1754
    if (VIR_ALLOC(dev) < 0)
1755 1756
        return NULL;

1757 1758 1759 1760
    dev->address.domain = domain;
    dev->address.bus = bus;
    dev->address.slot = slot;
    dev->address.function = function;
1761

E
Eric Blake 已提交
1762
    if (snprintf(dev->name, sizeof(dev->name), "%.4x:%.2x:%.2x.%.1x",
1763
                 domain, bus, slot, function) >= sizeof(dev->name)) {
1764
        virReportError(VIR_ERR_INTERNAL_ERROR,
E
Eric Blake 已提交
1765
                       _("dev->name buffer overflow: %.4x:%.2x:%.2x.%.1x"),
1766
                       domain, bus, slot, function);
1767
        goto cleanup;
E
Eric Blake 已提交
1768 1769
    }
    if (virAsprintf(&dev->path, PCI_SYSFS "devices/%s/config",
1770
                    dev->name) < 0)
1771
        goto cleanup;
1772

1773
    if (!virFileExists(dev->path)) {
1774 1775 1776
        virReportSystemError(errno,
                             _("Device %s not found: could not access %s"),
                             dev->name, dev->path);
1777
        goto cleanup;
1778 1779
    }

1780 1781
    vendor  = virPCIDeviceReadID(dev, "vendor");
    product = virPCIDeviceReadID(dev, "device");
1782 1783

    if (!vendor || !product) {
1784
        virReportError(VIR_ERR_INTERNAL_ERROR,
1785 1786
                       _("Failed to read product/vendor ID for %s"),
                       dev->name);
1787
        goto cleanup;
1788 1789 1790
    }

    /* strings contain '0x' prefix */
E
Eric Blake 已提交
1791 1792
    if (snprintf(dev->id, sizeof(dev->id), "%s %s", &vendor[2],
                 &product[2]) >= sizeof(dev->id)) {
1793
        virReportError(VIR_ERR_INTERNAL_ERROR,
E
Eric Blake 已提交
1794 1795
                       _("dev->id buffer overflow: %s %s"),
                       &vendor[2], &product[2]);
1796
        goto cleanup;
E
Eric Blake 已提交
1797
    }
1798 1799 1800

    VIR_DEBUG("%s %s: initialized", dev->id, dev->name);

1801
    VIR_STEAL_PTR(ret, dev);
E
Eric Blake 已提交
1802

1803 1804
 cleanup:
    return ret;
1805 1806
}

L
Laine Stump 已提交
1807 1808 1809 1810 1811 1812

virPCIDevicePtr
virPCIDeviceCopy(virPCIDevicePtr dev)
{
    virPCIDevicePtr copy;

1813
    if (VIR_ALLOC(copy) < 0)
L
Laine Stump 已提交
1814 1815 1816 1817
        return NULL;

    /* shallow copy to take care of most attributes */
    *copy = *dev;
1818
    copy->path = NULL;
C
Chunyan Liu 已提交
1819
    copy->used_by_drvname = copy->used_by_domname = NULL;
L
Laine Stump 已提交
1820
    if (VIR_STRDUP(copy->path, dev->path) < 0 ||
C
Chunyan Liu 已提交
1821 1822
        VIR_STRDUP(copy->used_by_drvname, dev->used_by_drvname) < 0 ||
        VIR_STRDUP(copy->used_by_domname, dev->used_by_domname) < 0) {
L
Laine Stump 已提交
1823 1824 1825 1826
        goto error;
    }
    return copy;

1827
 error:
L
Laine Stump 已提交
1828 1829 1830 1831 1832
    virPCIDeviceFree(copy);
    return NULL;
}


1833
void
1834
virPCIDeviceFree(virPCIDevicePtr dev)
1835
{
1836 1837
    if (!dev)
        return;
1838
    VIR_DEBUG("%s %s: freeing", dev->id, dev->name);
E
Eric Blake 已提交
1839
    VIR_FREE(dev->path);
C
Chunyan Liu 已提交
1840 1841
    VIR_FREE(dev->used_by_drvname);
    VIR_FREE(dev->used_by_domname);
1842 1843
    VIR_FREE(dev);
}
1844

1845 1846 1847 1848 1849
/**
 * virPCIDeviceGetAddress:
 * @dev: device to get address from
 *
 * Take a PCI device on input and return its PCI address. The
1850
 * returned object is owned by the device and must not be freed.
1851
 *
1852
 * Returns: a pointer to the address, which can never be NULL.
1853 1854 1855 1856
 */
virPCIDeviceAddressPtr
virPCIDeviceGetAddress(virPCIDevicePtr dev)
{
1857
    return &(dev->address);
1858 1859
}

1860
const char *
1861
virPCIDeviceGetName(virPCIDevicePtr dev)
1862 1863 1864 1865
{
    return dev->name;
}

1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877
/**
 * virPCIDeviceGetConfigPath:
 *
 * Returns a pointer to a string containing the path of @dev's PCI
 * config file.
 */
const char *
virPCIDeviceGetConfigPath(virPCIDevicePtr dev)
{
    return dev->path;
}

1878
void virPCIDeviceSetManaged(virPCIDevicePtr dev, bool managed)
1879
{
1880
    dev->managed = managed;
1881 1882
}

1883
bool
1884
virPCIDeviceGetManaged(virPCIDevicePtr dev)
1885 1886 1887 1888
{
    return dev->managed;
}

1889 1890
void
virPCIDeviceSetStubDriver(virPCIDevicePtr dev, virPCIStubDriver driver)
1891
{
1892
    dev->stubDriver = driver;
1893 1894
}

1895
virPCIStubDriver
1896 1897 1898 1899 1900
virPCIDeviceGetStubDriver(virPCIDevicePtr dev)
{
    return dev->stubDriver;
}

1901
bool
1902
virPCIDeviceGetUnbindFromStub(virPCIDevicePtr dev)
1903 1904 1905 1906 1907
{
    return dev->unbind_from_stub;
}

void
1908
virPCIDeviceSetUnbindFromStub(virPCIDevicePtr dev, bool unbind)
1909
{
1910
    dev->unbind_from_stub = unbind;
1911 1912
}

1913
bool
1914
virPCIDeviceGetRemoveSlot(virPCIDevicePtr dev)
1915 1916 1917 1918 1919
{
    return dev->remove_slot;
}

void
1920
virPCIDeviceSetRemoveSlot(virPCIDevicePtr dev, bool remove_slot)
1921
{
1922
    dev->remove_slot = remove_slot;
1923 1924
}

1925
bool
1926
virPCIDeviceGetReprobe(virPCIDevicePtr dev)
1927 1928 1929 1930 1931
{
    return dev->reprobe;
}

void
1932
virPCIDeviceSetReprobe(virPCIDevicePtr dev, bool reprobe)
1933
{
1934
    dev->reprobe = reprobe;
1935 1936
}

C
Chunyan Liu 已提交
1937 1938 1939 1940
int
virPCIDeviceSetUsedBy(virPCIDevicePtr dev,
                      const char *drv_name,
                      const char *dom_name)
1941
{
C
Chunyan Liu 已提交
1942 1943 1944 1945 1946 1947 1948 1949
    VIR_FREE(dev->used_by_drvname);
    VIR_FREE(dev->used_by_domname);
    if (VIR_STRDUP(dev->used_by_drvname, drv_name) < 0)
        return -1;
    if (VIR_STRDUP(dev->used_by_domname, dom_name) < 0)
        return -1;

    return 0;
1950 1951
}

C
Chunyan Liu 已提交
1952 1953 1954 1955
void
virPCIDeviceGetUsedBy(virPCIDevicePtr dev,
                      const char **drv_name,
                      const char **dom_name)
1956
{
C
Chunyan Liu 已提交
1957 1958
    *drv_name = dev->used_by_drvname;
    *dom_name = dev->used_by_domname;
1959 1960
}

1961 1962
virPCIDeviceListPtr
virPCIDeviceListNew(void)
1963
{
1964
    virPCIDeviceListPtr list;
1965

1966 1967 1968 1969
    if (virPCIInitialize() < 0)
        return NULL;

    if (!(list = virObjectLockableNew(virPCIDeviceListClass)))
1970 1971 1972 1973 1974
        return NULL;

    return list;
}

1975 1976
static void
virPCIDeviceListDispose(void *obj)
1977
{
1978
    virPCIDeviceListPtr list = obj;
1979
    size_t i;
1980 1981

    for (i = 0; i < list->count; i++) {
1982
        virPCIDeviceFree(list->devs[i]);
1983 1984 1985 1986 1987 1988 1989 1990
        list->devs[i] = NULL;
    }

    list->count = 0;
    VIR_FREE(list->devs);
}

int
1991 1992
virPCIDeviceListAdd(virPCIDeviceListPtr list,
                    virPCIDevicePtr dev)
1993
{
1994
    if (virPCIDeviceListFind(list, dev)) {
1995
        virReportError(VIR_ERR_INTERNAL_ERROR,
1996 1997 1998
                       _("Device %s is already in use"), dev->name);
        return -1;
    }
1999
    return VIR_APPEND_ELEMENT(list->devs, list->count, dev);
2000 2001
}

L
Laine Stump 已提交
2002 2003 2004 2005 2006

/* virPCIDeviceListAddCopy - add a *copy* of the device to this list */
int
virPCIDeviceListAddCopy(virPCIDeviceListPtr list, virPCIDevicePtr dev)
{
2007
    VIR_AUTOPTR(virPCIDevice) copy = virPCIDeviceCopy(dev);
L
Laine Stump 已提交
2008 2009 2010

    if (!copy)
        return -1;
2011
    if (virPCIDeviceListAdd(list, copy) < 0)
L
Laine Stump 已提交
2012
        return -1;
2013 2014

    copy = NULL;
L
Laine Stump 已提交
2015 2016 2017 2018
    return 0;
}


2019 2020 2021
virPCIDevicePtr
virPCIDeviceListGet(virPCIDeviceListPtr list,
                    int idx)
2022 2023 2024 2025 2026 2027 2028 2029 2030
{
    if (idx >= list->count)
        return NULL;
    if (idx < 0)
        return NULL;

    return list->devs[idx];
}

2031
size_t
2032
virPCIDeviceListCount(virPCIDeviceListPtr list)
2033
{
2034 2035 2036
    return list->count;
}

2037 2038 2039
virPCIDevicePtr
virPCIDeviceListStealIndex(virPCIDeviceListPtr list,
                           int idx)
2040
{
2041
    virPCIDevicePtr ret;
2042

2043 2044
    if (idx < 0 || idx >= list->count)
        return NULL;
2045

2046
    ret = list->devs[idx];
2047
    VIR_DELETE_ELEMENT(list->devs, idx, list->count);
2048 2049 2050
    return ret;
}

2051 2052 2053
virPCIDevicePtr
virPCIDeviceListSteal(virPCIDeviceListPtr list,
                      virPCIDevicePtr dev)
2054
{
2055
    return virPCIDeviceListStealIndex(list, virPCIDeviceListFindIndex(list, dev));
2056 2057
}

2058
void
2059 2060
virPCIDeviceListDel(virPCIDeviceListPtr list,
                    virPCIDevicePtr dev)
2061
{
2062
    virPCIDeviceFree(virPCIDeviceListSteal(list, dev));
2063 2064
}

2065
int
2066
virPCIDeviceListFindIndex(virPCIDeviceListPtr list, virPCIDevicePtr dev)
2067
{
2068
    size_t i;
2069

2070 2071 2072 2073 2074 2075
    for (i = 0; i < list->count; i++) {
        virPCIDevicePtr other = list->devs[i];
        if (other->address.domain   == dev->address.domain &&
            other->address.bus      == dev->address.bus    &&
            other->address.slot     == dev->address.slot   &&
            other->address.function == dev->address.function)
2076
            return i;
2077
    }
2078 2079 2080
    return -1;
}

L
Laine Stump 已提交
2081 2082 2083 2084 2085 2086 2087 2088

virPCIDevicePtr
virPCIDeviceListFindByIDs(virPCIDeviceListPtr list,
                          unsigned int domain,
                          unsigned int bus,
                          unsigned int slot,
                          unsigned int function)
{
2089
    size_t i;
L
Laine Stump 已提交
2090 2091

    for (i = 0; i < list->count; i++) {
2092 2093 2094 2095 2096
        virPCIDevicePtr other = list->devs[i];
        if (other->address.domain   == domain &&
            other->address.bus      == bus    &&
            other->address.slot     == slot   &&
            other->address.function == function)
L
Laine Stump 已提交
2097 2098 2099 2100 2101 2102
            return list->devs[i];
    }
    return NULL;
}


2103 2104
virPCIDevicePtr
virPCIDeviceListFind(virPCIDeviceListPtr list, virPCIDevicePtr dev)
2105
{
2106
    int idx;
2107

2108 2109
    if ((idx = virPCIDeviceListFindIndex(list, dev)) >= 0)
        return list->devs[idx];
2110 2111
    else
        return NULL;
2112
}
2113 2114


2115 2116 2117
int virPCIDeviceFileIterate(virPCIDevicePtr dev,
                            virPCIDeviceFileActor actor,
                            void *opaque)
2118
{
2119
    VIR_AUTOFREE(char *) pcidir = NULL;
2120 2121 2122
    DIR *dir = NULL;
    int ret = -1;
    struct dirent *ent;
E
Eric Blake 已提交
2123
    int direrr;
2124 2125

    if (virAsprintf(&pcidir, "/sys/bus/pci/devices/%04x:%02x:%02x.%x",
2126 2127
                    dev->address.domain, dev->address.bus,
                    dev->address.slot, dev->address.function) < 0)
2128 2129
        goto cleanup;

J
Ján Tomko 已提交
2130
    if (virDirOpen(&dir, pcidir) < 0)
2131 2132
        goto cleanup;

E
Eric Blake 已提交
2133
    while ((direrr = virDirRead(dir, &ent, pcidir)) > 0) {
2134
        VIR_AUTOFREE(char *) file = NULL;
2135
        /* Device assignment requires:
A
Alex Williamson 已提交
2136
         *   $PCIDIR/config, $PCIDIR/resource, $PCIDIR/resourceNNN,
2137
         *   $PCIDIR/rom, $PCIDIR/reset, $PCIDIR/vendor, $PCIDIR/device
2138 2139 2140
         */
        if (STREQ(ent->d_name, "config") ||
            STRPREFIX(ent->d_name, "resource") ||
A
Alex Williamson 已提交
2141
            STREQ(ent->d_name, "rom") ||
2142 2143
            STREQ(ent->d_name, "vendor") ||
            STREQ(ent->d_name, "device") ||
A
Alex Williamson 已提交
2144
            STREQ(ent->d_name, "reset")) {
2145
            if (virAsprintf(&file, "%s/%s", pcidir, ent->d_name) < 0)
2146
                goto cleanup;
2147
            if ((actor)(dev, file, opaque) < 0)
2148 2149 2150
                goto cleanup;
        }
    }
E
Eric Blake 已提交
2151 2152
    if (direrr < 0)
        goto cleanup;
2153 2154 2155

    ret = 0;

2156
 cleanup:
J
Ján Tomko 已提交
2157
    VIR_DIR_CLOSE(dir);
2158 2159
    return ret;
}
J
Jiri Denemark 已提交
2160

L
Laine Stump 已提交
2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171

/* virPCIDeviceAddressIOMMUGroupIterate:
 *   Call @actor for all devices in the same iommu_group as orig
 *   (including orig itself) Even if there is no iommu_group for the
 *   device, call @actor once for orig.
 */
int
virPCIDeviceAddressIOMMUGroupIterate(virPCIDeviceAddressPtr orig,
                                     virPCIDeviceAddressActor actor,
                                     void *opaque)
{
2172
    VIR_AUTOFREE(char *) groupPath = NULL;
L
Laine Stump 已提交
2173 2174 2175
    DIR *groupDir = NULL;
    int ret = -1;
    struct dirent *ent;
E
Eric Blake 已提交
2176
    int direrr;
L
Laine Stump 已提交
2177 2178 2179

    if (virAsprintf(&groupPath,
                    PCI_SYSFS "devices/%04x:%02x:%02x.%x/iommu_group/devices",
2180
                    orig->domain, orig->bus, orig->slot, orig->function) < 0)
L
Laine Stump 已提交
2181 2182
        goto cleanup;

J
Ján Tomko 已提交
2183
    if (virDirOpenQuiet(&groupDir, groupPath) < 0) {
L
Laine Stump 已提交
2184 2185 2186 2187 2188
        /* just process the original device, nothing more */
        ret = (actor)(orig, opaque);
        goto cleanup;
    }

E
Eric Blake 已提交
2189
    while ((direrr = virDirRead(groupDir, &ent, groupPath)) > 0) {
L
Laine Stump 已提交
2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201
        virPCIDeviceAddress newDev;

        if (virPCIDeviceAddressParse(ent->d_name, &newDev) < 0) {
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("Found invalid device link '%s' in '%s'"),
                           ent->d_name, groupPath);
            goto cleanup;
        }

        if ((actor)(&newDev, opaque) < 0)
            goto cleanup;
    }
E
Eric Blake 已提交
2202
    if (direrr < 0)
L
Laine Stump 已提交
2203 2204 2205 2206
        goto cleanup;

    ret = 0;

2207
 cleanup:
J
Ján Tomko 已提交
2208
    VIR_DIR_CLOSE(groupDir);
L
Laine Stump 已提交
2209 2210 2211 2212 2213 2214 2215 2216
    return ret;
}


static int
virPCIDeviceGetIOMMUGroupAddOne(virPCIDeviceAddressPtr newDevAddr, void *opaque)
{
    virPCIDeviceListPtr groupList = opaque;
2217
    VIR_AUTOPTR(virPCIDevice) newDev = NULL;
L
Laine Stump 已提交
2218 2219 2220

    if (!(newDev = virPCIDeviceNew(newDevAddr->domain, newDevAddr->bus,
                                   newDevAddr->slot, newDevAddr->function)))
2221
        return -1;
L
Laine Stump 已提交
2222 2223

    if (virPCIDeviceListAdd(groupList, newDev) < 0)
2224
        return -1;
L
Laine Stump 已提交
2225 2226

    newDev = NULL; /* it's now on the list */
2227
    return 0;
L
Laine Stump 已提交
2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244
}


/*
 * virPCIDeviceGetIOMMUGroupList - return a virPCIDeviceList containing
 * all of the devices in the same iommu_group as @dev.
 *
 * Return the new list, or NULL on failure
 */
virPCIDeviceListPtr
virPCIDeviceGetIOMMUGroupList(virPCIDevicePtr dev)
{
    virPCIDeviceListPtr groupList = virPCIDeviceListNew();

    if (!groupList)
        goto error;

2245
    if (virPCIDeviceAddressIOMMUGroupIterate(&(dev->address),
L
Laine Stump 已提交
2246 2247 2248 2249 2250 2251
                                             virPCIDeviceGetIOMMUGroupAddOne,
                                             groupList) < 0)
        goto error;

    return groupList;

2252
 error:
L
Laine Stump 已提交
2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276
    virObjectUnref(groupList);
    return NULL;
}


typedef struct {
    virPCIDeviceAddressPtr **iommuGroupDevices;
    size_t *nIommuGroupDevices;
} virPCIDeviceAddressList;
typedef virPCIDeviceAddressList *virPCIDeviceAddressListPtr;

static int
virPCIGetIOMMUGroupAddressesAddOne(virPCIDeviceAddressPtr newDevAddr, void *opaque)
{
    int ret = -1;
    virPCIDeviceAddressListPtr addrList = opaque;
    virPCIDeviceAddressPtr copyAddr;

    /* make a copy to insert onto the list */
    if (VIR_ALLOC(copyAddr) < 0)
        goto cleanup;

    *copyAddr = *newDevAddr;

2277 2278
    if (VIR_APPEND_ELEMENT(*addrList->iommuGroupDevices,
                           *addrList->nIommuGroupDevices, copyAddr) < 0)
L
Laine Stump 已提交
2279 2280 2281
        goto cleanup;

    ret = 0;
2282
 cleanup:
L
Laine Stump 已提交
2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309
    VIR_FREE(copyAddr);
    return ret;
}


/*
 * virPCIDeviceAddressGetIOMMUGroupAddresses - return a
 * virPCIDeviceList containing all of the devices in the same
 * iommu_group as @dev.
 *
 * Return the new list, or NULL on failure
 */
int
virPCIDeviceAddressGetIOMMUGroupAddresses(virPCIDeviceAddressPtr devAddr,
                                          virPCIDeviceAddressPtr **iommuGroupDevices,
                                          size_t *nIommuGroupDevices)
{
    int ret = -1;
    virPCIDeviceAddressList addrList = { iommuGroupDevices,
                                         nIommuGroupDevices };

    if (virPCIDeviceAddressIOMMUGroupIterate(devAddr,
                                             virPCIGetIOMMUGroupAddressesAddOne,
                                             &addrList) < 0)
        goto cleanup;

    ret = 0;
2310
 cleanup:
L
Laine Stump 已提交
2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321
    return ret;
}


/* virPCIDeviceAddressGetIOMMUGroupNum - return the group number of
 * this PCI device's iommu_group, or -2 if there is no iommu_group for
 * the device (or -1 if there was any other error)
 */
int
virPCIDeviceAddressGetIOMMUGroupNum(virPCIDeviceAddressPtr addr)
{
2322 2323 2324
    VIR_AUTOFREE(char *) devName = NULL;
    VIR_AUTOFREE(char *) devPath = NULL;
    VIR_AUTOFREE(char *) groupPath = NULL;
L
Laine Stump 已提交
2325 2326 2327 2328
    const char *groupNumStr;
    unsigned int groupNum;

    if (virAsprintf(&devName, "%.4x:%.2x:%.2x.%.1x", addr->domain,
2329
                    addr->bus, addr->slot, addr->function) < 0)
2330
        return -1;
L
Laine Stump 已提交
2331

2332
    if (!(devPath = virPCIFile(devName, "iommu_group")))
2333 2334 2335
        return -1;
    if (virFileIsLink(devPath) != 1)
        return -2;
L
Laine Stump 已提交
2336 2337 2338 2339
    if (virFileResolveLink(devPath, &groupPath) < 0) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Unable to resolve device %s iommu_group symlink %s"),
                       devName, devPath);
2340
        return -1;
L
Laine Stump 已提交
2341 2342 2343 2344 2345 2346 2347 2348
    }

    groupNumStr = last_component(groupPath);
    if (virStrToLong_ui(groupNumStr, NULL, 10, &groupNum) < 0) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("device %s iommu_group symlink %s has "
                         "invalid group number %s"),
                       devName, groupPath, groupNumStr);
2349
        return -1;
L
Laine Stump 已提交
2350 2351
    }

2352
    return groupNum;
L
Laine Stump 已提交
2353 2354 2355
}


2356 2357
/* virPCIDeviceGetIOMMUGroupDev - return the name of the device used
 * to control this PCI device's group (e.g. "/dev/vfio/15")
2358 2359
 */
char *
2360
virPCIDeviceGetIOMMUGroupDev(virPCIDevicePtr dev)
2361
{
2362 2363
    VIR_AUTOFREE(char *) devPath = NULL;
    VIR_AUTOFREE(char *) groupPath = NULL;
2364 2365
    char *groupDev = NULL;

2366
    if (!(devPath = virPCIFile(dev->name, "iommu_group")))
2367
        return NULL;
2368 2369 2370 2371
    if (virFileIsLink(devPath) != 1) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Invalid device %s iommu_group file %s is not a symlink"),
                       dev->name, devPath);
2372
        return NULL;
2373 2374 2375 2376 2377
    }
    if (virFileResolveLink(devPath, &groupPath) < 0) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Unable to resolve device %s iommu_group symlink %s"),
                       dev->name, devPath);
2378
        return NULL;
2379 2380
    }
    if (virAsprintf(&groupDev, "/dev/vfio/%s",
2381
                    last_component(groupPath)) < 0)
2382 2383
        return NULL;

2384 2385 2386
    return groupDev;
}

J
Jiri Denemark 已提交
2387
static int
2388
virPCIDeviceDownstreamLacksACS(virPCIDevicePtr dev)
J
Jiri Denemark 已提交
2389 2390 2391 2392
{
    uint16_t flags;
    uint16_t ctrl;
    unsigned int pos;
2393 2394
    int fd;
    int ret = 0;
2395
    uint16_t device_class;
J
Jiri Denemark 已提交
2396

2397
    if ((fd = virPCIDeviceConfigOpen(dev, true)) < 0)
J
Jiri Denemark 已提交
2398 2399
        return -1;

2400
    if (virPCIDeviceInit(dev, fd) < 0) {
2401 2402 2403 2404
        ret = -1;
        goto cleanup;
    }

2405 2406 2407
    if (virPCIDeviceReadClass(dev, &device_class) < 0)
        goto cleanup;

J
Jiri Denemark 已提交
2408
    pos = dev->pcie_cap_pos;
2409
    if (!pos || device_class != PCI_CLASS_BRIDGE_PCI)
2410
        goto cleanup;
J
Jiri Denemark 已提交
2411

2412
    flags = virPCIDeviceRead16(dev, fd, pos + PCI_EXP_FLAGS);
J
Jiri Denemark 已提交
2413
    if (((flags & PCI_EXP_FLAGS_TYPE) >> 4) != PCI_EXP_TYPE_DOWNSTREAM)
2414
        goto cleanup;
J
Jiri Denemark 已提交
2415

2416
    pos = virPCIDeviceFindExtendedCapabilityOffset(dev, fd, PCI_EXT_CAP_ID_ACS);
J
Jiri Denemark 已提交
2417 2418
    if (!pos) {
        VIR_DEBUG("%s %s: downstream port lacks ACS", dev->id, dev->name);
2419 2420
        ret = 1;
        goto cleanup;
J
Jiri Denemark 已提交
2421 2422
    }

2423
    ctrl = virPCIDeviceRead16(dev, fd, pos + PCI_EXT_ACS_CTRL);
J
Jiri Denemark 已提交
2424 2425 2426
    if ((ctrl & PCI_EXT_CAP_ACS_ENABLED) != PCI_EXT_CAP_ACS_ENABLED) {
        VIR_DEBUG("%s %s: downstream port has ACS disabled",
                  dev->id, dev->name);
2427 2428
        ret = 1;
        goto cleanup;
J
Jiri Denemark 已提交
2429 2430
    }

2431
 cleanup:
2432
    virPCIDeviceConfigClose(dev, fd);
2433
    return ret;
J
Jiri Denemark 已提交
2434 2435 2436
}

static int
2437
virPCIDeviceIsBehindSwitchLackingACS(virPCIDevicePtr dev)
J
Jiri Denemark 已提交
2438
{
2439
    VIR_AUTOPTR(virPCIDevice) parent = NULL;
J
Jiri Denemark 已提交
2440

2441
    if (virPCIDeviceGetParent(dev, &parent) < 0)
2442
        return -1;
2443 2444 2445 2446 2447
    if (!parent) {
        /* if we have no parent, and this is the root bus, ACS doesn't come
         * into play since devices on the root bus can't P2P without going
         * through the root IOMMU.
         */
2448
        if (dev->address.bus == 0) {
2449
            return 0;
2450
        } else {
2451
            virReportError(VIR_ERR_INTERNAL_ERROR,
2452 2453 2454 2455
                           _("Failed to find parent device for %s"),
                           dev->name);
            return -1;
        }
J
Jiri Denemark 已提交
2456 2457 2458 2459 2460 2461 2462
    }

    /* XXX we should rather fail when we can't find device's parent and
     * stop the loop when we get to root instead of just stopping when no
     * parent can be found
     */
    do {
2463
        VIR_AUTOPTR(virPCIDevice) tmp = NULL;
J
Jiri Denemark 已提交
2464
        int acs;
2465
        int ret;
J
Jiri Denemark 已提交
2466

2467
        acs = virPCIDeviceDownstreamLacksACS(parent);
J
Jiri Denemark 已提交
2468 2469 2470 2471 2472 2473 2474 2475 2476

        if (acs) {
            if (acs < 0)
                return -1;
            else
                return 1;
        }

        tmp = parent;
2477
        ret = virPCIDeviceGetParent(parent, &parent);
2478 2479
        if (ret < 0)
            return -1;
J
Jiri Denemark 已提交
2480 2481 2482 2483 2484
    } while (parent);

    return 0;
}

2485 2486
int virPCIDeviceIsAssignable(virPCIDevicePtr dev,
                             int strict_acs_check)
J
Jiri Denemark 已提交
2487 2488 2489 2490 2491 2492 2493 2494
{
    int ret;

    /* XXX This could be a great place to actually check that a non-managed
     * device isn't in use, e.g. by checking that device is either un-bound
     * or bound to a stub driver.
     */

2495
    ret = virPCIDeviceIsBehindSwitchLackingACS(dev);
J
Jiri Denemark 已提交
2496 2497 2498 2499 2500 2501 2502 2503
    if (ret < 0)
        return 0;

    if (ret) {
        if (!strict_acs_check) {
            VIR_DEBUG("%s %s: strict ACS check disabled; device assignment allowed",
                      dev->id, dev->name);
        } else {
2504
            virReportError(VIR_ERR_INTERNAL_ERROR,
J
Jiri Denemark 已提交
2505 2506 2507 2508 2509 2510 2511 2512 2513
                           _("Device %s is behind a switch lacking ACS and "
                             "cannot be assigned"),
                           dev->name);
            return 0;
        }
    }

    return 1;
}
2514 2515 2516 2517 2518 2519 2520 2521 2522 2523

static int
logStrToLong_ui(char const *s,
                char **end_ptr,
                int base,
                unsigned int *result)
{
    int ret = 0;

    ret = virStrToLong_ui(s, end_ptr, base, result);
2524
    if (ret != 0)
2525 2526 2527 2528
        VIR_ERROR(_("Failed to convert '%s' to unsigned int"), s);
    return ret;
}

2529 2530
int
virPCIDeviceAddressParse(char *address,
2531
                         virPCIDeviceAddressPtr bdf)
2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557
{
    char *p = NULL;
    int ret = -1;

    if ((address == NULL) || (logStrToLong_ui(address, &p, 16,
                                              &bdf->domain) == -1)) {
        goto out;
    }

    if ((p == NULL) || (logStrToLong_ui(p+1, &p, 16,
                                        &bdf->bus) == -1)) {
        goto out;
    }

    if ((p == NULL) || (logStrToLong_ui(p+1, &p, 16,
                                        &bdf->slot) == -1)) {
        goto out;
    }

    if ((p == NULL) || (logStrToLong_ui(p+1, &p, 16,
                                        &bdf->function) == -1)) {
        goto out;
    }

    ret = 0;

2558
 out:
2559 2560 2561
    return ret;
}

2562

2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587
bool
virZPCIDeviceAddressIsValid(virZPCIDeviceAddressPtr zpci)
{
    /* We don't need to check fid because fid covers
     * all range of uint32 type.
     */
    if (zpci->uid > VIR_DOMAIN_DEVICE_ZPCI_MAX_UID ||
        zpci->uid == 0) {
        virReportError(VIR_ERR_XML_ERROR,
                       _("Invalid PCI address uid='0x%.4x', "
                         "must be > 0x0000 and <= 0x%.4x"),
                       zpci->uid,
                       VIR_DOMAIN_DEVICE_ZPCI_MAX_UID);
        return false;
    }

    return true;
}

bool
virZPCIDeviceAddressIsEmpty(const virZPCIDeviceAddress *addr)
{
    return !(addr->uid || addr->fid);
}

2588
#ifdef __linux__
2589

2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602
/*
 * returns true if equal
 */
static bool
virPCIDeviceAddressIsEqual(virPCIDeviceAddressPtr bdf1,
                           virPCIDeviceAddressPtr bdf2)
{
    return ((bdf1->domain == bdf2->domain) &&
            (bdf1->bus == bdf2->bus) &&
            (bdf1->slot == bdf2->slot) &&
            (bdf1->function == bdf2->function));
}

2603
virPCIDeviceAddressPtr
2604
virPCIGetDeviceAddressFromSysfsLink(const char *device_link)
2605
{
2606
    virPCIDeviceAddressPtr bdf = NULL;
2607
    char *config_address = NULL;
2608
    VIR_AUTOFREE(char *) device_path = NULL;
2609 2610

    if (!virFileExists(device_link)) {
2611
        VIR_DEBUG("'%s' does not exist", device_link);
2612
        return NULL;
2613 2614
    }

2615
    device_path = virFileCanonicalizePath(device_link);
2616
    if (device_path == NULL) {
2617 2618 2619
        virReportSystemError(errno,
                             _("Failed to resolve device link '%s'"),
                             device_link);
2620
        return NULL;
2621 2622
    }

2623
    config_address = last_component(device_path);
2624
    if (VIR_ALLOC(bdf) < 0)
2625
        return NULL;
2626

2627
    if (virPCIDeviceAddressParse(config_address, bdf) < 0) {
2628
        virReportError(VIR_ERR_INTERNAL_ERROR,
2629 2630
                       _("Failed to parse PCI config address '%s'"),
                       config_address);
2631
        VIR_FREE(bdf);
2632
        return NULL;
2633 2634
    }

2635
    return bdf;
2636 2637
}

2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650
/**
 * virPCIGetPhysicalFunction:
 * @vf_sysfs_path: sysfs path for the virtual function
 * @pf: where to store the physical function's address
 *
 * Given @vf_sysfs_path, this function will store the pointer
 * to a newly-allocated virPCIDeviceAddress in @pf.
 *
 * @pf might be NULL if @vf_sysfs_path does not point to a
 * virtual function. If it's not NULL, then it should be
 * freed by the caller when no longer needed.
 *
 * Returns: >=0 on success, <0 on failure
2651 2652
 */
int
2653
virPCIGetPhysicalFunction(const char *vf_sysfs_path,
2654
                          virPCIDeviceAddressPtr *pf)
2655
{
2656
    VIR_AUTOFREE(char *) device_link = NULL;
2657

2658 2659
    *pf = NULL;

2660 2661
    if (virBuildPath(&device_link, vf_sysfs_path, "physfn") == -1) {
        virReportOOMError();
2662
        return -1;
2663 2664
    }

2665
    if ((*pf = virPCIGetDeviceAddressFromSysfsLink(device_link))) {
2666 2667 2668
        VIR_DEBUG("PF for VF device '%s': %.4x:%.2x:%.2x.%.1x", vf_sysfs_path,
                  (*pf)->domain, (*pf)->bus, (*pf)->slot, (*pf)->function);
    }
2669

2670
    return 0;
2671 2672
}

2673

2674 2675 2676 2677
/*
 * Returns virtual functions of a physical function
 */
int
2678 2679
virPCIGetVirtualFunctions(const char *sysfs_path,
                          virPCIDeviceAddressPtr **virtual_functions,
2680 2681
                          size_t *num_virtual_functions,
                          unsigned int *max_virtual_functions)
2682 2683
{
    int ret = -1;
2684
    size_t i;
2685 2686
    VIR_AUTOFREE(char *) totalvfs_file = NULL;
    VIR_AUTOFREE(char *) totalvfs_str = NULL;
2687
    virPCIDeviceAddressPtr config_addr = NULL;
2688

2689 2690
    *virtual_functions = NULL;
    *num_virtual_functions = 0;
2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706
    *max_virtual_functions = 0;

    if (virAsprintf(&totalvfs_file, "%s/sriov_totalvfs", sysfs_path) < 0)
       goto error;
    if (virFileExists(totalvfs_file)) {
        char *end = NULL; /* so that terminating \n doesn't create error */

        if (virFileReadAll(totalvfs_file, 16, &totalvfs_str) < 0)
            goto error;
        if (virStrToLong_ui(totalvfs_str, &end, 10, max_virtual_functions) < 0) {
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("Unrecognized value in %s: %s"),
                           totalvfs_file, totalvfs_str);
            goto error;
        }
    }
2707

2708
    do {
2709
        VIR_AUTOFREE(char *) device_link = NULL;
2710 2711 2712
        /* look for virtfn%d links until one isn't found */
        if (virAsprintf(&device_link, "%s/virtfn%zu", sysfs_path, *num_virtual_functions) < 0)
            goto error;
2713

2714 2715
        if (!virFileExists(device_link))
            break;
2716

2717
        if (!(config_addr = virPCIGetDeviceAddressFromSysfsLink(device_link))) {
2718 2719 2720 2721 2722
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("Failed to get SRIOV function from device link '%s'"),
                           device_link);
            goto error;
        }
2723

2724 2725
        if (VIR_APPEND_ELEMENT(*virtual_functions, *num_virtual_functions,
                               config_addr) < 0)
2726 2727
            goto error;
    } while (1);
2728

2729 2730
    VIR_DEBUG("Found %zu virtual functions for %s",
              *num_virtual_functions, sysfs_path);
2731
    ret = 0;
2732
 cleanup:
2733
    VIR_FREE(config_addr);
2734
    return ret;
2735

2736
 error:
2737 2738 2739
    for (i = 0; i < *num_virtual_functions; i++)
        VIR_FREE((*virtual_functions)[i]);
    VIR_FREE(*virtual_functions);
2740
    *num_virtual_functions = 0;
2741
    goto cleanup;
2742
}
2743

2744

2745 2746 2747 2748
/*
 * Returns 1 if vf device is a virtual function, 0 if not, -1 on error
 */
int
2749
virPCIIsVirtualFunction(const char *vf_sysfs_device_link)
2750
{
2751
    VIR_AUTOFREE(char *) vf_sysfs_physfn_link = NULL;
2752 2753

    if (virAsprintf(&vf_sysfs_physfn_link, "%s/physfn",
2754
                    vf_sysfs_device_link) < 0)
2755
        return -1;
2756

2757
    return virFileExists(vf_sysfs_physfn_link);
2758 2759 2760 2761 2762 2763
}

/*
 * Returns the sriov virtual function index of vf given its pf
 */
int
2764 2765 2766
virPCIGetVirtualFunctionIndex(const char *pf_sysfs_device_link,
                              const char *vf_sysfs_device_link,
                              int *vf_index)
2767
{
2768 2769
    int ret = -1;
    size_t i;
2770
    size_t num_virt_fns = 0;
2771
    unsigned int max_virt_fns = 0;
2772 2773
    virPCIDeviceAddressPtr vf_bdf = NULL;
    virPCIDeviceAddressPtr *virt_fns = NULL;
2774

2775
    if (!(vf_bdf = virPCIGetDeviceAddressFromSysfsLink(vf_sysfs_device_link)))
2776 2777
        return ret;

2778
    if (virPCIGetVirtualFunctions(pf_sysfs_device_link, &virt_fns,
2779
                                  &num_virt_fns, &max_virt_fns) < 0) {
2780
        virReportError(VIR_ERR_INTERNAL_ERROR,
2781
                       _("Error getting physical function's '%s' "
2782
                         "virtual_functions"), pf_sysfs_device_link);
2783 2784 2785 2786
        goto out;
    }

    for (i = 0; i < num_virt_fns; i++) {
2787 2788 2789 2790 2791
        if (virPCIDeviceAddressIsEqual(vf_bdf, virt_fns[i])) {
            *vf_index = i;
            ret = 0;
            break;
        }
2792 2793
    }

2794
 out:
2795 2796 2797

    /* free virtual functions */
    for (i = 0; i < num_virt_fns; i++)
2798
        VIR_FREE(virt_fns[i]);
2799

A
ajia@redhat.com 已提交
2800
    VIR_FREE(virt_fns);
2801 2802 2803 2804 2805
    VIR_FREE(vf_bdf);

    return ret;
}

2806 2807 2808 2809 2810
/*
 * Returns a path to the PCI sysfs file given the BDF of the PCI function
 */

int
2811
virPCIGetSysfsFile(char *virPCIDeviceName, char **pci_sysfs_device_link)
2812
{
2813 2814 2815 2816
    if (virAsprintf(pci_sysfs_device_link, PCI_SYSFS "devices/%s",
                    virPCIDeviceName) < 0)
        return -1;
    return 0;
2817 2818
}

R
Roopa Prabhu 已提交
2819
int
2820
virPCIDeviceAddressGetSysfsFile(virPCIDeviceAddressPtr addr,
2821
                                char **pci_sysfs_device_link)
R
Roopa Prabhu 已提交
2822
{
2823
    if (virAsprintf(pci_sysfs_device_link,
2824 2825 2826
                    PCI_SYSFS "devices/%04x:%02x:%02x.%x",
                    addr->domain, addr->bus,
                    addr->slot, addr->function) < 0)
2827 2828
        return -1;
    return 0;
R
Roopa Prabhu 已提交
2829 2830
}

2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841
/**
 * virPCIGetNetName:
 * @device_link_sysfs_path: sysfs path to the PCI device
 * @idx: used to choose which netdev when there are several
 *       (ignored if physPortID is set)
 * @physPortID: match this string in the netdev's phys_port_id
 *       (or NULL to ignore and use idx instead)
 * @netname: used to return the name of the netdev
 *       (set to NULL (but returns success) if there is no netdev)
 *
 * Returns 0 on success, -1 on error (error has been logged)
2842 2843
 */
int
2844 2845 2846 2847
virPCIGetNetName(const char *device_link_sysfs_path,
                 size_t idx,
                 char *physPortID,
                 char **netname)
2848
{
2849 2850 2851
    VIR_AUTOFREE(char *) pcidev_sysfs_net_path = NULL;
    VIR_AUTOFREE(char *) firstEntryName = NULL;
    VIR_AUTOFREE(char *) thisPhysPortID = NULL;
2852 2853 2854
    int ret = -1;
    DIR *dir = NULL;
    struct dirent *entry = NULL;
2855
    size_t i = 0;
2856

2857 2858
    *netname = NULL;

2859 2860 2861 2862 2863 2864
    if (virBuildPath(&pcidev_sysfs_net_path, device_link_sysfs_path,
                     "net") == -1) {
        virReportOOMError();
        return -1;
    }

2865 2866 2867
    if (virDirOpenQuiet(&dir, pcidev_sysfs_net_path) < 0) {
        /* this *isn't* an error - caller needs to check for netname == NULL */
        ret = 0;
2868
        goto cleanup;
2869
    }
2870

E
Eric Blake 已提交
2871
    while (virDirRead(dir, &entry, pcidev_sysfs_net_path) > 0) {
2872 2873 2874 2875 2876 2877 2878 2879 2880 2881
        /* if the caller sent a physPortID, compare it to the
         * physportID of this netdev. If not, look for entry[idx].
         */
        if (physPortID) {
            if (virNetDevGetPhysPortID(entry->d_name, &thisPhysPortID) < 0)
                goto cleanup;

            /* if this one doesn't match, keep looking */
            if (STRNEQ_NULLABLE(physPortID, thisPhysPortID)) {
                VIR_FREE(thisPhysPortID);
2882 2883 2884 2885 2886 2887 2888 2889 2890 2891
                /* save the first entry we find to use as a failsafe
                 * in case we don't match the phys_port_id. This is
                 * needed because some NIC drivers (e.g. i40e)
                 * implement phys_port_id for PFs, but not for VFs
                 */
                if (!firstEntryName &&
                    VIR_STRDUP(firstEntryName, entry->d_name) < 0) {
                    goto cleanup;
                }

2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902
                continue;
            }
        } else {
            if (i++ < idx)
                continue;
        }

        if (VIR_STRDUP(*netname, entry->d_name) < 0)
            goto cleanup;

        ret = 0;
2903 2904 2905
        break;
    }

2906 2907
    if (ret < 0) {
        if (physPortID) {
2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922
            if (firstEntryName) {
                /* we didn't match the provided phys_port_id, but this
                 * is probably because phys_port_id isn't implemented
                 * for this NIC driver, so just return the first
                 * (probably only) netname we found.
                 */
                *netname = firstEntryName;
                firstEntryName = NULL;
                ret = 0;
            } else {
                virReportError(VIR_ERR_INTERNAL_ERROR,
                               _("Could not find network device with "
                                 "phys_port_id '%s' under PCI device at %s"),
                               physPortID, device_link_sysfs_path);
            }
2923 2924 2925 2926 2927
        } else {
            ret = 0; /* no netdev at the given index is *not* an error */
        }
    }
 cleanup:
J
Ján Tomko 已提交
2928
    VIR_DIR_CLOSE(dir);
2929
    return ret;
2930
}
R
Roopa Prabhu 已提交
2931 2932

int
2933
virPCIGetVirtualFunctionInfo(const char *vf_sysfs_device_path,
2934 2935 2936
                             int pfNetDevIdx,
                             char **pfname,
                             int *vf_index)
R
Roopa Prabhu 已提交
2937
{
2938
    virPCIDeviceAddressPtr pf_config_address = NULL;
2939 2940 2941
    VIR_AUTOFREE(char *) pf_sysfs_device_path = NULL;
    VIR_AUTOFREE(char *) vfname = NULL;
    VIR_AUTOFREE(char *) vfPhysPortID = NULL;
R
Roopa Prabhu 已提交
2942 2943
    int ret = -1;

2944
    if (virPCIGetPhysicalFunction(vf_sysfs_device_path, &pf_config_address) < 0)
2945
        goto cleanup;
R
Roopa Prabhu 已提交
2946

2947
    if (!pf_config_address)
2948
        goto cleanup;
2949

2950 2951
    if (virPCIDeviceAddressGetSysfsFile(pf_config_address,
                                        &pf_sysfs_device_path) < 0) {
2952 2953
        goto cleanup;
    }
R
Roopa Prabhu 已提交
2954

2955 2956 2957
    if (virPCIGetVirtualFunctionIndex(pf_sysfs_device_path,
                                      vf_sysfs_device_path, vf_index) < 0) {
        goto cleanup;
R
Roopa Prabhu 已提交
2958 2959
    }

2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979
    /* If the caller hasn't asked for a specific pfNetDevIdx, and VF
     * is bound to a netdev, learn that netdev's phys_port_id (if
     * available). This can be used to disambiguate when the PF has
     * multiple netdevs. If the VF isn't bound to a netdev, then we
     * return netdev[pfNetDevIdx] on the PF, which may or may not be
     * correct.
     */
    if (pfNetDevIdx == -1) {
        if (virPCIGetNetName(vf_sysfs_device_path, 0, NULL, &vfname) < 0)
            goto cleanup;

        if (vfname) {
            if (virNetDevGetPhysPortID(vfname, &vfPhysPortID) < 0)
                goto cleanup;
        }
        pfNetDevIdx = 0;
    }

    if (virPCIGetNetName(pf_sysfs_device_path,
                         pfNetDevIdx, vfPhysPortID, pfname) < 0) {
R
Roopa Prabhu 已提交
2980
        goto cleanup;
2981
    }
R
Roopa Prabhu 已提交
2982

2983 2984 2985 2986 2987 2988 2989 2990 2991
    if (!*pfname) {
        /* this shouldn't be possible. A VF can't exist unless its
         * PF device is bound to a network driver
         */
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("The PF device for VF %s has no network device name"),
                       vf_sysfs_device_path);
        goto cleanup;
    }
R
Roopa Prabhu 已提交
2992

2993
    ret = 0;
2994
 cleanup:
R
Roopa Prabhu 已提交
2995 2996 2997 2998 2999
    VIR_FREE(pf_config_address);

    return ret;
}

3000 3001 3002 3003 3004 3005 3006 3007 3008

ssize_t
virPCIGetMdevTypes(const char *sysfspath,
                   virMediatedDeviceTypePtr **types)
{
    ssize_t ret = -1;
    int dirret = -1;
    DIR *dir = NULL;
    struct dirent *entry;
3009
    VIR_AUTOFREE(char *) types_path = NULL;
3010
    VIR_AUTOPTR(virMediatedDeviceType) mdev_type = NULL;
3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026
    virMediatedDeviceTypePtr *mdev_types = NULL;
    size_t ntypes = 0;
    size_t i;

    if (virAsprintf(&types_path, "%s/mdev_supported_types", sysfspath) < 0)
        return -1;

    if ((dirret = virDirOpenIfExists(&dir, types_path)) < 0)
        goto cleanup;

    if (dirret == 0) {
        ret = 0;
        goto cleanup;
    }

    while ((dirret = virDirRead(dir, &entry, types_path)) > 0) {
3027
        VIR_AUTOFREE(char *) tmppath = NULL;
3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052
        /* append the type id to the path and read the attributes from there */
        if (virAsprintf(&tmppath, "%s/%s", types_path, entry->d_name) < 0)
            goto cleanup;

        if (virMediatedDeviceTypeReadAttrs(tmppath, &mdev_type) < 0)
            goto cleanup;

        if (VIR_APPEND_ELEMENT(mdev_types, ntypes, mdev_type) < 0)
            goto cleanup;
    }

    if (dirret < 0)
        goto cleanup;

    VIR_STEAL_PTR(*types, mdev_types);
    ret = ntypes;
    ntypes = 0;
 cleanup:
    for (i = 0; i < ntypes; i++)
        virMediatedDeviceTypeFree(mdev_types[i]);
    VIR_FREE(mdev_types);
    VIR_DIR_CLOSE(dir);
    return ret;
}

3053
#else
3054 3055
static const char *unsupported = N_("not supported on non-linux platforms");

3056 3057 3058 3059
virPCIDeviceAddressPtr
virPCIGetDeviceAddressFromSysfsLink(const char *device_link ATTRIBUTE_UNUSED)
{
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
3060
    return NULL;
3061 3062 3063
}


3064
int
3065
virPCIGetPhysicalFunction(const char *vf_sysfs_path ATTRIBUTE_UNUSED,
3066
                          virPCIDeviceAddressPtr *pf ATTRIBUTE_UNUSED)
3067
{
3068
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
3069 3070 3071 3072
    return -1;
}

int
3073 3074
virPCIGetVirtualFunctions(const char *sysfs_path ATTRIBUTE_UNUSED,
                          virPCIDeviceAddressPtr **virtual_functions ATTRIBUTE_UNUSED,
3075 3076
                          size_t *num_virtual_functions ATTRIBUTE_UNUSED,
                          unsigned int *max_virtual_functions ATTRIBUTE_UNUSED)
3077
{
3078
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
3079 3080
    return -1;
}
3081 3082

int
E
Eric Blake 已提交
3083
virPCIIsVirtualFunction(const char *vf_sysfs_device_link ATTRIBUTE_UNUSED)
3084
{
3085
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
3086 3087 3088 3089
    return -1;
}

int
3090 3091 3092
virPCIGetVirtualFunctionIndex(const char *pf_sysfs_device_link ATTRIBUTE_UNUSED,
                              const char *vf_sysfs_device_link ATTRIBUTE_UNUSED,
                              int *vf_index ATTRIBUTE_UNUSED)
3093
{
3094
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
3095 3096 3097 3098
    return -1;

}

3099
int
3100 3101
virPCIDeviceAddressGetSysfsFile(virPCIDeviceAddressPtr dev ATTRIBUTE_UNUSED,
                                char **pci_sysfs_device_link ATTRIBUTE_UNUSED)
3102
{
3103
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
3104 3105 3106
    return -1;
}

3107
int
3108
virPCIGetNetName(const char *device_link_sysfs_path ATTRIBUTE_UNUSED,
3109 3110
                 size_t idx ATTRIBUTE_UNUSED,
                 char *physPortID ATTRIBUTE_UNUSED,
3111
                 char **netname ATTRIBUTE_UNUSED)
3112
{
3113
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
3114 3115
    return -1;
}
R
Roopa Prabhu 已提交
3116 3117

int
3118
virPCIGetVirtualFunctionInfo(const char *vf_sysfs_device_path ATTRIBUTE_UNUSED,
3119
                             int pfNetDevIdx ATTRIBUTE_UNUSED,
3120 3121
                             char **pfname ATTRIBUTE_UNUSED,
                             int *vf_index ATTRIBUTE_UNUSED)
R
Roopa Prabhu 已提交
3122
{
3123
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
R
Roopa Prabhu 已提交
3124 3125
    return -1;
}
3126 3127 3128 3129 3130 3131 3132 3133 3134


ssize_t
virPCIGetMdevTypes(const char *sysfspath ATTRIBUTE_UNUSED,
                   virMediatedDeviceTypePtr **types ATTRIBUTE_UNUSED)
{
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
    return -1;
}
3135
#endif /* __linux__ */
3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219

int
virPCIDeviceIsPCIExpress(virPCIDevicePtr dev)
{
    int fd;
    int ret = -1;

    if ((fd = virPCIDeviceConfigOpen(dev, true)) < 0)
        return ret;

    if (virPCIDeviceInit(dev, fd) < 0)
        goto cleanup;

    ret = dev->pcie_cap_pos != 0;

 cleanup:
    virPCIDeviceConfigClose(dev, fd);
    return ret;
}

int
virPCIDeviceHasPCIExpressLink(virPCIDevicePtr dev)
{
    int fd;
    int ret = -1;
    uint16_t cap, type;

    if ((fd = virPCIDeviceConfigOpen(dev, true)) < 0)
        return ret;

    if (virPCIDeviceInit(dev, fd) < 0)
        goto cleanup;

    cap = virPCIDeviceRead16(dev, fd, dev->pcie_cap_pos + PCI_CAP_FLAGS);
    type = (cap & PCI_EXP_FLAGS_TYPE) >> 4;

    ret = type != PCI_EXP_TYPE_ROOT_INT_EP && type != PCI_EXP_TYPE_ROOT_EC;

 cleanup:
    virPCIDeviceConfigClose(dev, fd);
    return ret;
}

int
virPCIDeviceGetLinkCapSta(virPCIDevicePtr dev,
                          int *cap_port,
                          unsigned int *cap_speed,
                          unsigned int *cap_width,
                          unsigned int *sta_speed,
                          unsigned int *sta_width)
{
    uint32_t t;
    int fd;
    int ret = -1;

    if ((fd = virPCIDeviceConfigOpen(dev, true)) < 0)
        return ret;

    if (virPCIDeviceInit(dev, fd) < 0)
        goto cleanup;

    if (!dev->pcie_cap_pos) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("pci device %s is not a PCI-Express device"),
                       dev->name);
        goto cleanup;
    }

    t = virPCIDeviceRead32(dev, fd, dev->pcie_cap_pos + PCI_EXP_LNKCAP);

    *cap_port = t >> 24;
    *cap_speed = t & PCI_EXP_LNKCAP_SPEED;
    *cap_width = (t & PCI_EXP_LNKCAP_WIDTH) >> 4;

    t = virPCIDeviceRead16(dev, fd, dev->pcie_cap_pos + PCI_EXP_LNKSTA);

    *sta_speed = t & PCI_EXP_LNKSTA_SPEED;
    *sta_width = (t & PCI_EXP_LNKSTA_WIDTH) >> 4;
    ret = 0;

 cleanup:
    virPCIDeviceConfigClose(dev, fd);
    return ret;
}
3220 3221


3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248
int virPCIGetHeaderType(virPCIDevicePtr dev, int *hdrType)
{
    int fd;
    uint8_t type;

    *hdrType = -1;

    if ((fd = virPCIDeviceConfigOpen(dev, true)) < 0)
        return -1;

    type = virPCIDeviceRead8(dev, fd, PCI_HEADER_TYPE);

    virPCIDeviceConfigClose(dev, fd);

    type &= PCI_HEADER_TYPE_MASK;
    if (type >= VIR_PCI_HEADER_LAST) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Unknown PCI header type '%d'"), type);
        return -1;
    }

    *hdrType = type;

    return 0;
}


3249 3250 3251 3252 3253 3254 3255 3256 3257 3258
void
virPCIEDeviceInfoFree(virPCIEDeviceInfoPtr dev)
{
    if (!dev)
        return;

    VIR_FREE(dev->link_cap);
    VIR_FREE(dev->link_sta);
    VIR_FREE(dev);
}
3259 3260 3261 3262 3263 3264

void
virPCIDeviceAddressFree(virPCIDeviceAddressPtr address)
{
    VIR_FREE(address);
}