提交 fa272574 编写于 作者: P Peter Maydell

Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging

* icount fix (Clement)
* dumping fixes for non-volatile memory (Marc-André, myself)
* x86 emulation fix (Rudolf)
* recent Hyper-V CPUID flag (Vitaly)
* Q35 doc fix (Daniel)
* lsi fix (Prasad)
* SCSI block limits emulation fixes (myself)
* qemu_thread_atexit rework (Peter)
* ivshmem memory leak fix (Igor)

# gpg: Signature made Tue 06 Nov 2018 21:34:30 GMT
# gpg:                using RSA key BFFBD25F78C7AE83
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>"
# gpg:                 aka "Paolo Bonzini <pbonzini@redhat.com>"
# Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4  E2F7 7E15 100C CD36 69B1
#      Subkey fingerprint: F133 3857 4B66 2389 866C  7682 BFFB D25F 78C7 AE83

* remotes/bonzini/tags/for-upstream:
  util/qemu-thread-posix: Fix qemu_thread_atexit* for OSX
  include/qemu/thread.h: Document qemu_thread_atexit* API
  scsi-generic: do not do VPD emulation for sense other than ILLEGAL_REQUEST
  scsi-generic: avoid invalid access to struct when emulating block limits
  scsi-generic: avoid out-of-bounds access to VPD page list
  scsi-generic: keep VPD page list sorted
  lsi53c895a: check message length value is valid
  scripts/dump-guest-memory: Synchronize with guest_phys_blocks_region_add
  memory-mapping: skip non-volatile memory regions in GuestPhysBlockList
  nvdimm: set non-volatile on the memory region
  memory: learn about non-volatile memory region
  target/i386: Clear RF on SYSCALL instruction
  MAINTAINERS: remove or downgrade myself to reviewer from some subsystems
  ivshmem: fix memory backend leak
  i386: clarify that the Q35 machine type implements a P35 chipset
  x86: hv_evmcs CPU flag support
  icount: fix deadlock when all cpus are sleeping
Signed-off-by: NPeter Maydell <peter.maydell@linaro.org>
......@@ -105,9 +105,9 @@ Guest CPU cores (TCG):
----------------------
Overall
L: qemu-devel@nongnu.org
M: Paolo Bonzini <pbonzini@redhat.com>
M: Peter Crosthwaite <crosthwaite.peter@gmail.com>
M: Richard Henderson <rth@twiddle.net>
R: Paolo Bonzini <pbonzini@redhat.com>
S: Maintained
F: cpus.c
F: exec.c
......@@ -1141,7 +1141,8 @@ F: hw/pci-host/ppce500.c
F: hw/net/fsl_etsec/
Character devices
M: Paolo Bonzini <pbonzini@redhat.com>
M: Marc-André Lureau <marcandre.lureau@redhat.com>
R: Paolo Bonzini <pbonzini@redhat.com>
S: Odd Fixes
F: hw/char/
......@@ -1528,8 +1529,8 @@ T: git git://github.com/famz/qemu.git bitmaps
T: git git://github.com/jnsnow/qemu.git bitmaps
Character device backends
M: Paolo Bonzini <pbonzini@redhat.com>
M: Marc-André Lureau <marcandre.lureau@redhat.com>
R: Paolo Bonzini <pbonzini@redhat.com>
S: Maintained
F: chardev/
F: include/chardev/
......@@ -1762,9 +1763,9 @@ F: tests/qmp-cmd-test.c
T: git git://repo.or.cz/qemu/armbru.git qapi-next
qtest
M: Paolo Bonzini <pbonzini@redhat.com>
M: Thomas Huth <thuth@redhat.com>
M: Laurent Vivier <lvivier@redhat.com>
R: Paolo Bonzini <pbonzini@redhat.com>
S: Maintained
F: qtest.c
F: tests/libqtest.*
......@@ -1871,7 +1872,6 @@ F: tests/test-io-*
Sockets
M: Daniel P. Berrange <berrange@redhat.com>
M: Gerd Hoffmann <kraxel@redhat.com>
M: Paolo Bonzini <pbonzini@redhat.com>
S: Maintained
F: include/qemu/sockets.h
F: util/qemu-sockets.c
......@@ -2058,13 +2058,12 @@ M: Ronnie Sahlberg <ronniesahlberg@gmail.com>
M: Paolo Bonzini <pbonzini@redhat.com>
M: Peter Lieven <pl@kamp.de>
L: qemu-block@nongnu.org
S: Supported
S: Odd Fixes
F: block/iscsi.c
F: block/iscsi-opts.c
Network Block Device (NBD)
M: Eric Blake <eblake@redhat.com>
M: Paolo Bonzini <pbonzini@redhat.com>
L: qemu-block@nongnu.org
S: Maintained
F: block/nbd*
......
......@@ -1554,6 +1554,14 @@ static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
atomic_mb_set(&cpu->exit_request, 0);
}
if (use_icount && all_cpu_threads_idle()) {
/*
* When all cpus are sleeping (e.g in WFI), to avoid a deadlock
* in the main_loop, wake it up in order to start the warp timer.
*/
qemu_notify_event();
}
qemu_tcg_rr_wait_io_event(cpu ? cpu : first_cpu);
deal_with_unplugged_cpus();
}
......
......@@ -435,6 +435,7 @@ Examples of such memory API functions are:
- memory_region_add_subregion()
- memory_region_del_subregion()
- memory_region_set_readonly()
- memory_region_set_nonvolatile()
- memory_region_set_enabled()
- memory_region_set_address()
- memory_region_set_alias_offset()
......
......@@ -116,6 +116,7 @@ static void nvdimm_prepare_memory_region(NVDIMMDevice *nvdimm, Error **errp)
nvdimm->nvdimm_mr = g_new(MemoryRegion, 1);
memory_region_init_alias(nvdimm->nvdimm_mr, OBJECT(dimm),
"nvdimm-memory", mr, 0, pmem_size);
memory_region_set_nonvolatile(nvdimm->nvdimm_mr, true);
nvdimm->nvdimm_mr->align = align;
}
......
......@@ -1279,6 +1279,7 @@ static void desugar_shm(IVShmemState *s)
object_property_set_bool(obj, true, "share", &error_abort);
object_property_add_child(OBJECT(s), "internal-shm-backend", obj,
&error_abort);
object_unref(obj);
user_creatable_complete(obj, &error_abort);
s->hostmem = MEMORY_BACKEND(obj);
}
......
......@@ -629,7 +629,15 @@ static void mch_class_init(ObjectClass *klass, void *data)
dc->desc = "Host bridge";
dc->vmsd = &vmstate_mch;
k->vendor_id = PCI_VENDOR_ID_INTEL;
k->device_id = PCI_DEVICE_ID_INTEL_Q35_MCH;
/*
* The 'q35' machine type implements an Intel Series 3 chipset,
* of which there are several variants. The key difference between
* the 82P35 MCH ('p35') and 82Q35 GMCH ('q35') variants is that
* the latter has an integrated graphics adapter. QEMU does not
* implement integrated graphics, so uses the PCI ID for the 82P35
* chipset.
*/
k->device_id = PCI_DEVICE_ID_INTEL_P35_MCH;
k->revision = MCH_HOST_BRIDGE_REVISION_DEFAULT;
k->class_id = PCI_CLASS_BRIDGE_HOST;
/*
......
common-obj-y += scsi-disk.o
common-obj-y += scsi-disk.o emulation.o
common-obj-y += scsi-generic.o scsi-bus.o
common-obj-$(CONFIG_LSI_SCSI_PCI) += lsi53c895a.o
common-obj-$(CONFIG_MPTSAS_SCSI_PCI) += mptsas.o mptconfig.o mptendian.o
......
#include "qemu/osdep.h"
#include "qemu/units.h"
#include "qemu/bswap.h"
#include "hw/scsi/emulation.h"
int scsi_emulate_block_limits(uint8_t *outbuf, const SCSIBlockLimits *bl)
{
/* required VPD size with unmap support */
memset(outbuf, 0, 0x3c);
outbuf[0] = bl->wsnz; /* wsnz */
if (bl->max_io_sectors) {
/* optimal transfer length granularity. This field and the optimal
* transfer length can't be greater than maximum transfer length.
*/
stw_be_p(outbuf + 2, MIN(bl->min_io_size, bl->max_io_sectors));
/* maximum transfer length */
stl_be_p(outbuf + 4, bl->max_io_sectors);
/* optimal transfer length */
stl_be_p(outbuf + 8, MIN(bl->opt_io_size, bl->max_io_sectors));
} else {
stw_be_p(outbuf + 2, bl->min_io_size);
stl_be_p(outbuf + 8, bl->opt_io_size);
}
/* max unmap LBA count */
stl_be_p(outbuf + 16, bl->max_unmap_sectors);
/* max unmap descriptors */
stl_be_p(outbuf + 20, bl->max_unmap_descr);
/* optimal unmap granularity; alignment is zero */
stl_be_p(outbuf + 24, bl->unmap_sectors);
/* max write same size, make it the same as maximum transfer length */
stl_be_p(outbuf + 36, bl->max_io_sectors);
return 0x3c;
}
......@@ -861,10 +861,11 @@ static void lsi_do_status(LSIState *s)
static void lsi_do_msgin(LSIState *s)
{
int len;
uint8_t len;
trace_lsi_do_msgin(s->dbc, s->msg_len);
s->sfbr = s->msg[0];
len = s->msg_len;
assert(len > 0 && len <= LSI_MAX_MSGIN_LEN);
if (len > s->dbc)
len = s->dbc;
pci_dma_write(PCI_DEVICE(s), s->dnad, s->msg, len);
......@@ -1705,8 +1706,10 @@ static uint8_t lsi_reg_readb(LSIState *s, int offset)
break;
case 0x58: /* SBDL */
/* Some drivers peek at the data bus during the MSG IN phase. */
if ((s->sstat1 & PHASE_MASK) == PHASE_MI)
if ((s->sstat1 & PHASE_MASK) == PHASE_MI) {
assert(s->msg_len > 0);
return s->msg[0];
}
ret = 0;
break;
case 0x59: /* SBDL high */
......@@ -2103,11 +2106,23 @@ static int lsi_pre_save(void *opaque)
return 0;
}
static int lsi_post_load(void *opaque, int version_id)
{
LSIState *s = opaque;
if (s->msg_len < 0 || s->msg_len > LSI_MAX_MSGIN_LEN) {
return -EINVAL;
}
return 0;
}
static const VMStateDescription vmstate_lsi_scsi = {
.name = "lsiscsi",
.version_id = 0,
.minimum_version_id = 0,
.pre_save = lsi_pre_save,
.post_load = lsi_post_load,
.fields = (VMStateField[]) {
VMSTATE_PCI_DEVICE(parent_obj, LSIState),
......
......@@ -33,6 +33,7 @@ do { printf("scsi-disk: " fmt , ## __VA_ARGS__); } while (0)
#include "qapi/error.h"
#include "qemu/error-report.h"
#include "hw/scsi/scsi.h"
#include "hw/scsi/emulation.h"
#include "scsi/constants.h"
#include "sysemu/sysemu.h"
#include "sysemu/block-backend.h"
......@@ -589,7 +590,7 @@ static uint8_t *scsi_get_buf(SCSIRequest *req)
return (uint8_t *)r->iov.iov_base;
}
int scsi_disk_emulate_vpd_page(SCSIRequest *req, uint8_t *outbuf)
static int scsi_disk_emulate_vpd_page(SCSIRequest *req, uint8_t *outbuf)
{
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
uint8_t page_code = req->cmd.buf[2];
......@@ -691,89 +692,36 @@ int scsi_disk_emulate_vpd_page(SCSIRequest *req, uint8_t *outbuf)
}
case 0xb0: /* block limits */
{
unsigned int unmap_sectors =
s->qdev.conf.discard_granularity / s->qdev.blocksize;
unsigned int min_io_size =
s->qdev.conf.min_io_size / s->qdev.blocksize;
unsigned int opt_io_size =
s->qdev.conf.opt_io_size / s->qdev.blocksize;
unsigned int max_unmap_sectors =
s->max_unmap_size / s->qdev.blocksize;
unsigned int max_io_sectors =
s->max_io_size / s->qdev.blocksize;
SCSIBlockLimits bl = {};
if (s->qdev.type == TYPE_ROM) {
DPRINTF("Inquiry (EVPD[%02X] not supported for CDROM\n",
page_code);
return -1;
}
bl.wsnz = 1;
bl.unmap_sectors =
s->qdev.conf.discard_granularity / s->qdev.blocksize;
bl.min_io_size =
s->qdev.conf.min_io_size / s->qdev.blocksize;
bl.opt_io_size =
s->qdev.conf.opt_io_size / s->qdev.blocksize;
bl.max_unmap_sectors =
s->max_unmap_size / s->qdev.blocksize;
bl.max_io_sectors =
s->max_io_size / s->qdev.blocksize;
/* 255 descriptors fit in 4 KiB with an 8-byte header */
bl.max_unmap_descr = 255;
if (s->qdev.type == TYPE_DISK) {
int max_transfer_blk = blk_get_max_transfer(s->qdev.conf.blk);
int max_io_sectors_blk =
max_transfer_blk / s->qdev.blocksize;
max_io_sectors =
MIN_NON_ZERO(max_io_sectors_blk, max_io_sectors);
/* min_io_size and opt_io_size can't be greater than
* max_io_sectors */
if (min_io_size) {
min_io_size = MIN(min_io_size, max_io_sectors);
}
if (opt_io_size) {
opt_io_size = MIN(opt_io_size, max_io_sectors);
}
bl.max_io_sectors =
MIN_NON_ZERO(max_io_sectors_blk, bl.max_io_sectors);
}
/* required VPD size with unmap support */
buflen = 0x40;
memset(outbuf + 4, 0, buflen - 4);
outbuf[4] = 0x1; /* wsnz */
/* optimal transfer length granularity */
outbuf[6] = (min_io_size >> 8) & 0xff;
outbuf[7] = min_io_size & 0xff;
/* maximum transfer length */
outbuf[8] = (max_io_sectors >> 24) & 0xff;
outbuf[9] = (max_io_sectors >> 16) & 0xff;
outbuf[10] = (max_io_sectors >> 8) & 0xff;
outbuf[11] = max_io_sectors & 0xff;
/* optimal transfer length */
outbuf[12] = (opt_io_size >> 24) & 0xff;
outbuf[13] = (opt_io_size >> 16) & 0xff;
outbuf[14] = (opt_io_size >> 8) & 0xff;
outbuf[15] = opt_io_size & 0xff;
/* max unmap LBA count, default is 1GB */
outbuf[20] = (max_unmap_sectors >> 24) & 0xff;
outbuf[21] = (max_unmap_sectors >> 16) & 0xff;
outbuf[22] = (max_unmap_sectors >> 8) & 0xff;
outbuf[23] = max_unmap_sectors & 0xff;
/* max unmap descriptors, 255 fit in 4 kb with an 8-byte header */
outbuf[24] = 0;
outbuf[25] = 0;
outbuf[26] = 0;
outbuf[27] = 255;
/* optimal unmap granularity */
outbuf[28] = (unmap_sectors >> 24) & 0xff;
outbuf[29] = (unmap_sectors >> 16) & 0xff;
outbuf[30] = (unmap_sectors >> 8) & 0xff;
outbuf[31] = unmap_sectors & 0xff;
/* max write same size */
outbuf[36] = 0;
outbuf[37] = 0;
outbuf[38] = 0;
outbuf[39] = 0;
outbuf[40] = (max_io_sectors >> 24) & 0xff;
outbuf[41] = (max_io_sectors >> 16) & 0xff;
outbuf[42] = (max_io_sectors >> 8) & 0xff;
outbuf[43] = max_io_sectors & 0xff;
buflen += scsi_emulate_block_limits(outbuf + buflen, &bl);
break;
}
case 0xb1: /* block device characteristics */
......
......@@ -16,6 +16,7 @@
#include "qemu-common.h"
#include "qemu/error-report.h"
#include "hw/scsi/scsi.h"
#include "hw/scsi/emulation.h"
#include "sysemu/block-backend.h"
#ifdef __linux__
......@@ -144,7 +145,7 @@ static int execute_command(BlockBackend *blk,
static void scsi_handle_inquiry_reply(SCSIGenericReq *r, SCSIDevice *s)
{
uint8_t page, page_len;
uint8_t page, page_idx;
/*
* EVPD set to zero returns the standard INQUIRY data.
......@@ -181,7 +182,7 @@ static void scsi_handle_inquiry_reply(SCSIGenericReq *r, SCSIDevice *s)
/* Also take care of the opt xfer len. */
stl_be_p(&r->buf[12],
MIN_NON_ZERO(max_transfer, ldl_be_p(&r->buf[12])));
} else if (page == 0x00 && s->needs_vpd_bl_emulation) {
} else if (s->needs_vpd_bl_emulation && page == 0x00) {
/*
* Now we're capable of supplying the VPD Block Limits
* response if the hardware can't. Add it in the INQUIRY
......@@ -190,17 +191,43 @@ static void scsi_handle_inquiry_reply(SCSIGenericReq *r, SCSIDevice *s)
*
* This way, the guest kernel will be aware of the support
* and will use it to proper setup the SCSI device.
*
* VPD page numbers must be sorted, so insert 0xb0 at the
* right place with an in-place insert. After the initialization
* part of the for loop is executed, the device response is
* at r[0] to r[page_idx - 1].
*/
page_len = r->buf[3];
r->buf[page_len + 4] = 0xb0;
r->buf[3] = ++page_len;
for (page_idx = lduw_be_p(r->buf + 2) + 4;
page_idx > 4 && r->buf[page_idx - 1] >= 0xb0;
page_idx--) {
if (page_idx < r->buflen) {
r->buf[page_idx] = r->buf[page_idx - 1];
}
}
r->buf[page_idx] = 0xb0;
stw_be_p(r->buf + 2, lduw_be_p(r->buf + 2) + 1);
}
}
}
static int scsi_emulate_block_limits(SCSIGenericReq *r)
static int scsi_generic_emulate_block_limits(SCSIGenericReq *r, SCSIDevice *s)
{
r->buflen = scsi_disk_emulate_vpd_page(&r->req, r->buf);
int len;
uint8_t buf[64];
SCSIBlockLimits bl = {
.max_io_sectors = blk_get_max_transfer(s->conf.blk) / s->blocksize
};
memset(r->buf, 0, r->buflen);
stb_p(buf, s->type);
stb_p(buf + 1, 0xb0);
len = scsi_emulate_block_limits(buf + 4, &bl);
assert(len <= sizeof(buf) - 4);
stw_be_p(buf + 2, len);
memcpy(r->buf, buf, MIN(r->buflen, len + 4));
r->io_header.sb_len_wr = 0;
/*
......@@ -219,7 +246,6 @@ static void scsi_read_complete(void * opaque, int ret)
{
SCSIGenericReq *r = (SCSIGenericReq *)opaque;
SCSIDevice *s = r->req.dev;
SCSISense sense;
int len;
assert(r->req.aiocb != NULL);
......@@ -242,13 +268,15 @@ static void scsi_read_complete(void * opaque, int ret)
* resulted in sense error but would need emulation.
* In this case, emulate a valid VPD response.
*/
if (s->needs_vpd_bl_emulation) {
int is_vpd_bl = r->req.cmd.buf[0] == INQUIRY &&
r->req.cmd.buf[1] & 0x01 &&
r->req.cmd.buf[2] == 0xb0;
if (is_vpd_bl && sg_io_sense_from_errno(-ret, &r->io_header, &sense)) {
len = scsi_emulate_block_limits(r);
if (s->needs_vpd_bl_emulation && ret == 0 &&
(r->io_header.driver_status & SG_ERR_DRIVER_SENSE) &&
r->req.cmd.buf[0] == INQUIRY &&
(r->req.cmd.buf[1] & 0x01) &&
r->req.cmd.buf[2] == 0xb0) {
SCSISense sense =
scsi_parse_sense_buf(r->req.sense, r->io_header.sb_len_wr);
if (sense.key == ILLEGAL_REQUEST) {
len = scsi_generic_emulate_block_limits(r, s);
/*
* No need to let scsi_read_complete go on and handle an
* INQUIRY VPD BL request we created manually.
......@@ -527,7 +555,7 @@ static void scsi_generic_set_vpd_bl_emulation(SCSIDevice *s)
}
page_len = buf[3];
for (i = 4; i < page_len + 4; i++) {
for (i = 4; i < MIN(sizeof(buf), page_len + 4); i++) {
if (buf[i] == 0xb0) {
s->needs_vpd_bl_emulation = false;
return;
......
......@@ -355,6 +355,7 @@ struct MemoryRegion {
bool ram;
bool subpage;
bool readonly; /* For RAM regions */
bool nonvolatile;
bool rom_device;
bool flush_coalesced_mmio;
bool global_locking;
......@@ -480,6 +481,7 @@ static inline FlatView *address_space_to_flatview(AddressSpace *as)
* @offset_within_address_space: the address of the first byte of the section
* relative to the region's address space
* @readonly: writes to this section are ignored
* @nonvolatile: this section is non-volatile
*/
struct MemoryRegionSection {
MemoryRegion *mr;
......@@ -488,6 +490,7 @@ struct MemoryRegionSection {
Int128 size;
hwaddr offset_within_address_space;
bool readonly;
bool nonvolatile;
};
/**
......@@ -1170,6 +1173,17 @@ static inline bool memory_region_is_rom(MemoryRegion *mr)
return mr->ram && mr->readonly;
}
/**
* memory_region_is_nonvolatile: check whether a memory region is non-volatile
*
* Returns %true is a memory region is non-volatile memory.
*
* @mr: the memory region being queried
*/
static inline bool memory_region_is_nonvolatile(MemoryRegion *mr)
{
return mr->nonvolatile;
}
/**
* memory_region_get_fd: Get a file descriptor backing a RAM memory region.
......@@ -1341,6 +1355,17 @@ void memory_region_reset_dirty(MemoryRegion *mr, hwaddr addr,
*/
void memory_region_set_readonly(MemoryRegion *mr, bool readonly);
/**
* memory_region_set_nonvolatile: Turn a memory region non-volatile
*
* Allows a memory region to be marked as non-volatile.
* only useful on RAM regions.
*
* @mr: the region being updated.
* @nonvolatile: whether rhe region is to be non-volatile.
*/
void memory_region_set_nonvolatile(MemoryRegion *mr, bool nonvolatile);
/**
* memory_region_rom_device_set_romd: enable/disable ROMD mode
*
......
......@@ -255,7 +255,7 @@
#define PCI_DEVICE_ID_INTEL_82801I_EHCI2 0x293c
#define PCI_DEVICE_ID_INTEL_82599_SFP_VF 0x10ed
#define PCI_DEVICE_ID_INTEL_Q35_MCH 0x29c0
#define PCI_DEVICE_ID_INTEL_P35_MCH 0x29c0
#define PCI_VENDOR_ID_XEN 0x5853
#define PCI_DEVICE_ID_XEN_PLATFORM 0x0001
......
#ifndef HW_SCSI_EMULATION_H
#define HW_SCSI_EMULATION_H 1
typedef struct SCSIBlockLimits {
bool wsnz;
uint16_t min_io_size;
uint32_t max_unmap_descr;
uint32_t opt_io_size;
uint32_t max_unmap_sectors;
uint32_t unmap_sectors;
uint32_t max_io_sectors;
} SCSIBlockLimits;
int scsi_emulate_block_limits(uint8_t *outbuf, const SCSIBlockLimits *bl);
#endif
......@@ -189,7 +189,6 @@ void scsi_device_report_change(SCSIDevice *dev, SCSISense sense);
void scsi_device_unit_attention_reported(SCSIDevice *dev);
void scsi_generic_read_device_inquiry(SCSIDevice *dev);
int scsi_device_get_sense(SCSIDevice *dev, uint8_t *buf, int len, bool fixed);
int scsi_disk_emulate_vpd_page(SCSIRequest *req, uint8_t *outbuf);
int scsi_SG_IO_FROM_DEV(BlockBackend *blk, uint8_t *cmd, uint8_t cmd_size,
uint8_t *buf, uint8_t buf_size);
SCSIDevice *scsi_device_find(SCSIBus *bus, int channel, int target, int lun);
......
......@@ -162,7 +162,29 @@ void qemu_thread_exit(void *retval);
void qemu_thread_naming(bool enable);
struct Notifier;
/**
* qemu_thread_atexit_add:
* @notifier: Notifier to add
*
* Add the specified notifier to a list which will be run via
* notifier_list_notify() when this thread exits (either by calling
* qemu_thread_exit() or by returning from its start_routine).
* The usual usage is that the caller passes a Notifier which is
* a per-thread variable; it can then use the callback to free
* other per-thread data.
*
* If the thread exits as part of the entire process exiting,
* it is unspecified whether notifiers are called or not.
*/
void qemu_thread_atexit_add(struct Notifier *notifier);
/**
* qemu_thread_atexit_remove:
* @notifier: Notifier to remove
*
* Remove the specified notifier from the thread-exit notification
* list. It is not valid to try to remove a notifier which is not
* on the list.
*/
void qemu_thread_atexit_remove(struct Notifier *notifier);
struct QemuSpin {
......
......@@ -216,6 +216,7 @@ struct FlatRange {
uint8_t dirty_log_mask;
bool romd_mode;
bool readonly;
bool nonvolatile;
};
#define FOR_EACH_FLAT_RANGE(var, view) \
......@@ -231,6 +232,7 @@ section_from_flat_range(FlatRange *fr, FlatView *fv)
.size = fr->addr.size,
.offset_within_address_space = int128_get64(fr->addr.start),
.readonly = fr->readonly,
.nonvolatile = fr->nonvolatile,
};
}
......@@ -240,7 +242,8 @@ static bool flatrange_equal(FlatRange *a, FlatRange *b)
&& addrrange_equal(a->addr, b->addr)
&& a->offset_in_region == b->offset_in_region
&& a->romd_mode == b->romd_mode
&& a->readonly == b->readonly;
&& a->readonly == b->readonly
&& a->nonvolatile == b->nonvolatile;
}
static FlatView *flatview_new(MemoryRegion *mr_root)
......@@ -312,7 +315,8 @@ static bool can_merge(FlatRange *r1, FlatRange *r2)
int128_make64(r2->offset_in_region))
&& r1->dirty_log_mask == r2->dirty_log_mask
&& r1->romd_mode == r2->romd_mode
&& r1->readonly == r2->readonly;
&& r1->readonly == r2->readonly
&& r1->nonvolatile == r2->nonvolatile;
}
/* Attempt to simplify a view by merging adjacent ranges */
......@@ -592,7 +596,8 @@ static void render_memory_region(FlatView *view,
MemoryRegion *mr,
Int128 base,
AddrRange clip,
bool readonly)
bool readonly,
bool nonvolatile)
{
MemoryRegion *subregion;
unsigned i;
......@@ -608,6 +613,7 @@ static void render_memory_region(FlatView *view,
int128_addto(&base, int128_make64(mr->addr));
readonly |= mr->readonly;
nonvolatile |= mr->nonvolatile;
tmp = addrrange_make(base, mr->size);
......@@ -620,13 +626,15 @@ static void render_memory_region(FlatView *view,
if (mr->alias) {
int128_subfrom(&base, int128_make64(mr->alias->addr));
int128_subfrom(&base, int128_make64(mr->alias_offset));
render_memory_region(view, mr->alias, base, clip, readonly);
render_memory_region(view, mr->alias, base, clip,
readonly, nonvolatile);
return;
}
/* Render subregions in priority order. */
QTAILQ_FOREACH(subregion, &mr->subregions, subregions_link) {
render_memory_region(view, subregion, base, clip, readonly);
render_memory_region(view, subregion, base, clip,
readonly, nonvolatile);
}
if (!mr->terminates) {
......@@ -641,6 +649,7 @@ static void render_memory_region(FlatView *view,
fr.dirty_log_mask = memory_region_get_dirty_log_mask(mr);
fr.romd_mode = mr->romd_mode;
fr.readonly = readonly;
fr.nonvolatile = nonvolatile;
/* Render the region itself into any gaps left by the current view. */
for (i = 0; i < view->nr && int128_nz(remain); ++i) {
......@@ -726,7 +735,8 @@ static FlatView *generate_memory_topology(MemoryRegion *mr)
if (mr) {
render_memory_region(view, mr, int128_zero(),
addrrange_make(int128_zero(), int128_2_64()), false);
addrrange_make(int128_zero(), int128_2_64()),
false, false);
}
flatview_simplify(view);
......@@ -2039,6 +2049,16 @@ void memory_region_set_readonly(MemoryRegion *mr, bool readonly)
}
}
void memory_region_set_nonvolatile(MemoryRegion *mr, bool nonvolatile)
{
if (mr->nonvolatile != nonvolatile) {
memory_region_transaction_begin();
mr->nonvolatile = nonvolatile;
memory_region_update_pending |= mr->enabled;
memory_region_transaction_commit();
}
}
void memory_region_rom_device_set_romd(MemoryRegion *mr, bool romd_mode)
{
if (mr->romd_mode != romd_mode) {
......@@ -2489,6 +2509,7 @@ static MemoryRegionSection memory_region_find_rcu(MemoryRegion *mr,
ret.size = range.size;
ret.offset_within_address_space = int128_get64(range.start);
ret.readonly = fr->readonly;
ret.nonvolatile = fr->nonvolatile;
return ret;
}
......@@ -2839,10 +2860,11 @@ static void mtree_print_mr(fprintf_function mon_printf, void *f,
QTAILQ_INSERT_TAIL(alias_print_queue, ml, mrqueue);
}
mon_printf(f, TARGET_FMT_plx "-" TARGET_FMT_plx
" (prio %d, %s): alias %s @%s " TARGET_FMT_plx
" (prio %d, %s%s): alias %s @%s " TARGET_FMT_plx
"-" TARGET_FMT_plx "%s",
cur_start, cur_end,
mr->priority,
mr->nonvolatile ? "nv-" : "",
memory_region_type((MemoryRegion *)mr),
memory_region_name(mr),
memory_region_name(mr->alias),
......@@ -2854,9 +2876,10 @@ static void mtree_print_mr(fprintf_function mon_printf, void *f,
}
} else {
mon_printf(f,
TARGET_FMT_plx "-" TARGET_FMT_plx " (prio %d, %s): %s%s",
TARGET_FMT_plx "-" TARGET_FMT_plx " (prio %d, %s%s): %s%s",
cur_start, cur_end,
mr->priority,
mr->nonvolatile ? "nv-" : "",
memory_region_type((MemoryRegion *)mr),
memory_region_name(mr),
mr->enabled ? "" : " [disabled]");
......@@ -2941,19 +2964,21 @@ static void mtree_print_flatview(gpointer key, gpointer value,
mr = range->mr;
if (range->offset_in_region) {
p(f, MTREE_INDENT TARGET_FMT_plx "-"
TARGET_FMT_plx " (prio %d, %s): %s @" TARGET_FMT_plx,
TARGET_FMT_plx " (prio %d, %s%s): %s @" TARGET_FMT_plx,
int128_get64(range->addr.start),
int128_get64(range->addr.start) + MR_SIZE(range->addr.size),
mr->priority,
range->nonvolatile ? "nv-" : "",
range->readonly ? "rom" : memory_region_type(mr),
memory_region_name(mr),
range->offset_in_region);
} else {
p(f, MTREE_INDENT TARGET_FMT_plx "-"
TARGET_FMT_plx " (prio %d, %s): %s",
TARGET_FMT_plx " (prio %d, %s%s): %s",
int128_get64(range->addr.start),
int128_get64(range->addr.start) + MR_SIZE(range->addr.size),
mr->priority,
range->nonvolatile ? "nv-" : "",
range->readonly ? "rom" : memory_region_type(mr),
memory_region_name(mr));
}
......
......@@ -206,7 +206,8 @@ static void guest_phys_blocks_region_add(MemoryListener *listener,
/* we only care about RAM */
if (!memory_region_is_ram(section->mr) ||
memory_region_is_ram_device(section->mr)) {
memory_region_is_ram_device(section->mr) ||
memory_region_is_nonvolatile(section->mr)) {
return;
}
......
......@@ -417,7 +417,9 @@ def get_guest_phys_blocks():
memory_region = flat_range["mr"].dereference()
# we only care about RAM
if not memory_region["ram"]:
if (not memory_region["ram"] or
memory_region["ram_device"] or
memory_region["nonvolatile"]):
continue
section_size = int128_get64(flat_range["addr"]["size"])
......
......@@ -5732,6 +5732,7 @@ static Property x86_cpu_properties[] = {
DEFINE_PROP_BOOL("hv-frequencies", X86CPU, hyperv_frequencies, false),
DEFINE_PROP_BOOL("hv-reenlightenment", X86CPU, hyperv_reenlightenment, false),
DEFINE_PROP_BOOL("hv-tlbflush", X86CPU, hyperv_tlbflush, false),
DEFINE_PROP_BOOL("hv-evmcs", X86CPU, hyperv_evmcs, false),
DEFINE_PROP_BOOL("hv-ipi", X86CPU, hyperv_ipi, false),
DEFINE_PROP_BOOL("check", X86CPU, check_cpuid, true),
DEFINE_PROP_BOOL("enforce", X86CPU, enforce_cpuid, false),
......
......@@ -1391,6 +1391,7 @@ struct X86CPU {
bool hyperv_frequencies;
bool hyperv_reenlightenment;
bool hyperv_tlbflush;
bool hyperv_evmcs;
bool hyperv_ipi;
bool check_cpuid;
bool enforce_cpuid;
......
......@@ -18,6 +18,7 @@
#define HV_CPUID_FEATURES 0x40000003
#define HV_CPUID_ENLIGHTMENT_INFO 0x40000004
#define HV_CPUID_IMPLEMENT_LIMITS 0x40000005
#define HV_CPUID_NESTED_FEATURES 0x4000000A
#define HV_CPUID_MIN 0x40000005
#define HV_CPUID_MAX 0x4000ffff
#define HV_HYPERVISOR_PRESENT_BIT 0x80000000
......@@ -60,6 +61,7 @@
#define HV_RELAXED_TIMING_RECOMMENDED (1u << 5)
#define HV_CLUSTER_IPI_RECOMMENDED (1u << 10)
#define HV_EX_PROCESSOR_MASKS_RECOMMENDED (1u << 11)
#define HV_ENLIGHTENED_VMCS_RECOMMENDED (1u << 14)
/*
* Basic virtualized MSRs
......
......@@ -869,6 +869,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
uint32_t unused;
struct kvm_cpuid_entry2 *c;
uint32_t signature[3];
uint16_t evmcs_version;
int kvm_base = KVM_CPUID_SIGNATURE;
int r;
Error *local_err = NULL;
......@@ -912,7 +913,8 @@ int kvm_arch_init_vcpu(CPUState *cs)
memset(signature, 0, 12);
memcpy(signature, cpu->hyperv_vendor_id, len);
}
c->eax = HV_CPUID_MIN;
c->eax = cpu->hyperv_evmcs ?
HV_CPUID_NESTED_FEATURES : HV_CPUID_IMPLEMENT_LIMITS;
c->ebx = signature[0];
c->ecx = signature[1];
c->edx = signature[2];
......@@ -970,7 +972,16 @@ int kvm_arch_init_vcpu(CPUState *cs)
c->eax |= HV_CLUSTER_IPI_RECOMMENDED;
c->eax |= HV_EX_PROCESSOR_MASKS_RECOMMENDED;
}
if (cpu->hyperv_evmcs) {
if (kvm_vcpu_enable_cap(cs, KVM_CAP_HYPERV_ENLIGHTENED_VMCS, 0,
(uintptr_t)&evmcs_version)) {
fprintf(stderr, "Hyper-V Enlightened VMCS "
"(requested by 'hv-evmcs' cpu flag) "
"is not supported by kernel\n");
return -ENOSYS;
}
c->eax |= HV_ENLIGHTENED_VMCS_RECOMMENDED;
}
c->ebx = cpu->hyperv_spinlock_attempts;
c = &cpuid_data.entries[cpuid_i++];
......@@ -981,6 +992,21 @@ int kvm_arch_init_vcpu(CPUState *cs)
kvm_base = KVM_CPUID_SIGNATURE_NEXT;
has_msr_hv_hypercall = true;
if (cpu->hyperv_evmcs) {
__u32 function;
/* Create zeroed 0x40000006..0x40000009 leaves */
for (function = HV_CPUID_IMPLEMENT_LIMITS + 1;
function < HV_CPUID_NESTED_FEATURES; function++) {
c = &cpuid_data.entries[cpuid_i++];
c->function = function;
}
c = &cpuid_data.entries[cpuid_i++];
c->function = HV_CPUID_NESTED_FEATURES;
c->eax = evmcs_version;
}
}
if (cpu->expose_kvm) {
......
......@@ -991,11 +991,11 @@ void helper_syscall(CPUX86State *env, int next_eip_addend)
int code64;
env->regs[R_ECX] = env->eip + next_eip_addend;
env->regs[11] = cpu_compute_eflags(env);
env->regs[11] = cpu_compute_eflags(env) & ~RF_MASK;
code64 = env->hflags & HF_CS64_MASK;
env->eflags &= ~env->fmask;
env->eflags &= ~(env->fmask | RF_MASK);
cpu_load_eflags(env, env->eflags, 0);
cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc,
0, 0xffffffff,
......
......@@ -443,42 +443,34 @@ void qemu_event_wait(QemuEvent *ev)
}
}
static pthread_key_t exit_key;
union NotifierThreadData {
void *ptr;
NotifierList list;
};
QEMU_BUILD_BUG_ON(sizeof(union NotifierThreadData) != sizeof(void *));
static __thread NotifierList thread_exit;
/*
* Note that in this implementation you can register a thread-exit
* notifier for the main thread, but it will never be called.
* This is OK because main thread exit can only happen when the
* entire process is exiting, and the API allows notifiers to not
* be called on process exit.
*/
void qemu_thread_atexit_add(Notifier *notifier)
{
union NotifierThreadData ntd;
ntd.ptr = pthread_getspecific(exit_key);
notifier_list_add(&ntd.list, notifier);
pthread_setspecific(exit_key, ntd.ptr);
notifier_list_add(&thread_exit, notifier);
}
void qemu_thread_atexit_remove(Notifier *notifier)
{
union NotifierThreadData ntd;
ntd.ptr = pthread_getspecific(exit_key);
notifier_remove(notifier);
pthread_setspecific(exit_key, ntd.ptr);
}
static void qemu_thread_atexit_run(void *arg)
{
union NotifierThreadData ntd = { .ptr = arg };
notifier_list_notify(&ntd.list, NULL);
}
static void __attribute__((constructor)) qemu_thread_atexit_init(void)
static void qemu_thread_atexit_notify(void *arg)
{
pthread_key_create(&exit_key, qemu_thread_atexit_run);
/*
* Called when non-main thread exits (via qemu_thread_exit()
* or by returning from its start routine.)
*/
notifier_list_notify(&thread_exit, NULL);
}
typedef struct {
void *(*start_routine)(void *);
void *arg;
......@@ -490,6 +482,7 @@ static void *qemu_thread_start(void *args)
QemuThreadArgs *qemu_thread_args = args;
void *(*start_routine)(void *) = qemu_thread_args->start_routine;
void *arg = qemu_thread_args->arg;
void *r;
#ifdef CONFIG_PTHREAD_SETNAME_NP
/* Attempt to set the threads name; note that this is for debug, so
......@@ -501,7 +494,10 @@ static void *qemu_thread_start(void *args)
#endif
g_free(qemu_thread_args->name);
g_free(qemu_thread_args);
return start_routine(arg);
pthread_cleanup_push(qemu_thread_atexit_notify, NULL);
r = start_routine(arg);
pthread_cleanup_pop(1);
return r;
}
void qemu_thread_create(QemuThread *thread, const char *name,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册