exec.c 90.5 KB
Newer Older
B
bellard 已提交
1
/*
2
 *  Virtual page mapping
3
 *
B
bellard 已提交
4 5 6 7 8 9 10 11 12 13 14 15 16
 *  Copyright (c) 2003 Fabrice Bellard
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
B
bellard 已提交
18
 */
P
Peter Maydell 已提交
19
#include "qemu/osdep.h"
20
#include "qapi/error.h"
21
#ifndef _WIN32
B
bellard 已提交
22
#endif
B
bellard 已提交
23

24
#include "qemu/cutils.h"
B
bellard 已提交
25
#include "cpu.h"
26
#include "exec/exec-all.h"
B
bellard 已提交
27
#include "tcg.h"
28
#include "hw/qdev-core.h"
29
#if !defined(CONFIG_USER_ONLY)
30
#include "hw/boards.h"
31
#include "hw/xen/xen.h"
32
#endif
33
#include "sysemu/kvm.h"
34
#include "sysemu/sysemu.h"
35 36
#include "qemu/timer.h"
#include "qemu/config-file.h"
37
#include "qemu/error-report.h"
38
#if defined(CONFIG_USER_ONLY)
39
#include "qemu.h"
J
Jun Nakajima 已提交
40
#else /* !CONFIG_USER_ONLY */
41 42
#include "hw/hw.h"
#include "exec/memory.h"
P
Paolo Bonzini 已提交
43
#include "exec/ioport.h"
44 45
#include "sysemu/dma.h"
#include "exec/address-spaces.h"
46
#include "sysemu/xen-mapcache.h"
47
#include "trace.h"
48
#endif
49
#include "exec/cpu-all.h"
M
Mike Day 已提交
50
#include "qemu/rcu_queue.h"
51
#include "qemu/main-loop.h"
52
#include "translate-all.h"
53
#include "sysemu/replay.h"
54

55
#include "exec/memory-internal.h"
56
#include "exec/ram_addr.h"
57
#include "exec/log.h"
58

59 60
#include "migration/vmstate.h"

61
#include "qemu/range.h"
62 63 64
#ifndef _WIN32
#include "qemu/mmap-alloc.h"
#endif
65

66
//#define DEBUG_SUBPAGE
T
ths 已提交
67

68
#if !defined(CONFIG_USER_ONLY)
M
Mike Day 已提交
69 70 71
/* ram_list is read under rcu_read_lock()/rcu_read_unlock().  Writes
 * are protected by the ramlist lock.
 */
M
Mike Day 已提交
72
RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
A
Avi Kivity 已提交
73 74

static MemoryRegion *system_memory;
75
static MemoryRegion *system_io;
A
Avi Kivity 已提交
76

77 78
AddressSpace address_space_io;
AddressSpace address_space_memory;
79

80
MemoryRegion io_mem_rom, io_mem_notdirty;
81
static MemoryRegion io_mem_unassigned;
82

83 84 85
/* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
#define RAM_PREALLOC   (1 << 0)

86 87 88
/* RAM is mmap-ed with MAP_SHARED */
#define RAM_SHARED     (1 << 1)

89 90 91 92 93
/* Only a portion of RAM (used_length) is actually used, and migrated.
 * This used_length size can change across reboots.
 */
#define RAM_RESIZEABLE (1 << 2)

94
#endif
95

96 97 98 99 100
#ifdef TARGET_PAGE_BITS_VARY
int target_page_bits;
bool target_page_bits_decided;
#endif

A
Andreas Färber 已提交
101
struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
B
bellard 已提交
102 103
/* current CPU in the current thread. It is only valid inside
   cpu_exec() */
P
Paolo Bonzini 已提交
104
__thread CPUState *current_cpu;
P
pbrook 已提交
105
/* 0 = Do not count executed instructions.
T
ths 已提交
106
   1 = Precise instruction counting.
P
pbrook 已提交
107
   2 = Adaptive rate instruction counting.  */
108
int use_icount;
B
bellard 已提交
109

110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128
bool set_preferred_target_page_bits(int bits)
{
    /* The target page size is the lowest common denominator for all
     * the CPUs in the system, so we can only make it smaller, never
     * larger. And we can't make it smaller once we've committed to
     * a particular size.
     */
#ifdef TARGET_PAGE_BITS_VARY
    assert(bits >= TARGET_PAGE_BITS_MIN);
    if (target_page_bits == 0 || target_page_bits > bits) {
        if (target_page_bits_decided) {
            return false;
        }
        target_page_bits = bits;
    }
#endif
    return true;
}

129
#if !defined(CONFIG_USER_ONLY)
130

131 132 133 134 135 136 137 138 139 140
static void finalize_target_page_bits(void)
{
#ifdef TARGET_PAGE_BITS_VARY
    if (target_page_bits == 0) {
        target_page_bits = TARGET_PAGE_BITS_MIN;
    }
    target_page_bits_decided = true;
#endif
}

141 142 143
typedef struct PhysPageEntry PhysPageEntry;

struct PhysPageEntry {
M
Michael S. Tsirkin 已提交
144
    /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
145
    uint32_t skip : 6;
M
Michael S. Tsirkin 已提交
146
     /* index into phys_sections (!skip) or phys_map_nodes (skip) */
147
    uint32_t ptr : 26;
148 149
};

150 151
#define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)

152
/* Size of the L2 (and L3, etc) page tables.  */
153
#define ADDR_SPACE_BITS 64
154

M
Michael S. Tsirkin 已提交
155
#define P_L2_BITS 9
156 157 158 159 160
#define P_L2_SIZE (1 << P_L2_BITS)

#define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)

typedef PhysPageEntry Node[P_L2_SIZE];
161

162
typedef struct PhysPageMap {
163 164
    struct rcu_head rcu;

165 166 167 168 169 170 171 172
    unsigned sections_nb;
    unsigned sections_nb_alloc;
    unsigned nodes_nb;
    unsigned nodes_nb_alloc;
    Node *nodes;
    MemoryRegionSection *sections;
} PhysPageMap;

173
struct AddressSpaceDispatch {
174 175
    struct rcu_head rcu;

176
    MemoryRegionSection *mru_section;
177 178 179 180
    /* This is a multi-level map on the physical address space.
     * The bottom level has pointers to MemoryRegionSections.
     */
    PhysPageEntry phys_map;
181
    PhysPageMap map;
182
    AddressSpace *as;
183 184
};

185 186 187
#define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
typedef struct subpage_t {
    MemoryRegion iomem;
188
    AddressSpace *as;
189
    hwaddr base;
190
    uint16_t sub_section[];
191 192
} subpage_t;

193 194 195 196
#define PHYS_SECTION_UNASSIGNED 0
#define PHYS_SECTION_NOTDIRTY 1
#define PHYS_SECTION_ROM 2
#define PHYS_SECTION_WATCH 3
197

198
static void io_mem_init(void);
A
Avi Kivity 已提交
199
static void memory_map_init(void);
200
static void tcg_commit(MemoryListener *listener);
201

202
static MemoryRegion io_mem_watch;
203 204 205 206 207 208 209 210 211 212 213 214 215 216 217

/**
 * CPUAddressSpace: all the information a CPU needs about an AddressSpace
 * @cpu: the CPU whose AddressSpace this is
 * @as: the AddressSpace itself
 * @memory_dispatch: its dispatch pointer (cached, RCU protected)
 * @tcg_as_listener: listener for tracking changes to the AddressSpace
 */
struct CPUAddressSpace {
    CPUState *cpu;
    AddressSpace *as;
    struct AddressSpaceDispatch *memory_dispatch;
    MemoryListener tcg_as_listener;
};

218
#endif
B
bellard 已提交
219

220
#if !defined(CONFIG_USER_ONLY)
221

222
static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
223
{
224
    static unsigned alloc_hint = 16;
225
    if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
226
        map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, alloc_hint);
227 228
        map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
        map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
229
        alloc_hint = map->nodes_nb_alloc;
230
    }
231 232
}

233
static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
234 235
{
    unsigned i;
236
    uint32_t ret;
237 238
    PhysPageEntry e;
    PhysPageEntry *p;
239

240
    ret = map->nodes_nb++;
241
    p = map->nodes[ret];
242
    assert(ret != PHYS_MAP_NODE_NIL);
243
    assert(ret != map->nodes_nb_alloc);
244 245 246

    e.skip = leaf ? 0 : 1;
    e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
247
    for (i = 0; i < P_L2_SIZE; ++i) {
248
        memcpy(&p[i], &e, sizeof(e));
249
    }
250
    return ret;
251 252
}

253 254
static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
                                hwaddr *index, hwaddr *nb, uint16_t leaf,
255
                                int level)
256 257
{
    PhysPageEntry *p;
258
    hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
259

M
Michael S. Tsirkin 已提交
260
    if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
261
        lp->ptr = phys_map_node_alloc(map, level == 0);
B
bellard 已提交
262
    }
263
    p = map->nodes[lp->ptr];
264
    lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
265

266
    while (*nb && lp < &p[P_L2_SIZE]) {
267
        if ((*index & (step - 1)) == 0 && *nb >= step) {
M
Michael S. Tsirkin 已提交
268
            lp->skip = 0;
269
            lp->ptr = leaf;
270 271
            *index += step;
            *nb -= step;
272
        } else {
273
            phys_page_set_level(map, lp, index, nb, leaf, level - 1);
274 275
        }
        ++lp;
276 277 278
    }
}

A
Avi Kivity 已提交
279
static void phys_page_set(AddressSpaceDispatch *d,
A
Avi Kivity 已提交
280
                          hwaddr index, hwaddr nb,
281
                          uint16_t leaf)
282
{
283
    /* Wildly overreserve - it doesn't matter much. */
284
    phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
285

286
    phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
B
bellard 已提交
287 288
}

289 290 291
/* Compact a non leaf page entry. Simply detect that the entry has a single child,
 * and update our entry so we can skip it and go directly to the destination.
 */
292
static void phys_page_compact(PhysPageEntry *lp, Node *nodes)
293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311
{
    unsigned valid_ptr = P_L2_SIZE;
    int valid = 0;
    PhysPageEntry *p;
    int i;

    if (lp->ptr == PHYS_MAP_NODE_NIL) {
        return;
    }

    p = nodes[lp->ptr];
    for (i = 0; i < P_L2_SIZE; i++) {
        if (p[i].ptr == PHYS_MAP_NODE_NIL) {
            continue;
        }

        valid_ptr = i;
        valid++;
        if (p[i].skip) {
312
            phys_page_compact(&p[i], nodes);
313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344
        }
    }

    /* We can only compress if there's only one child. */
    if (valid != 1) {
        return;
    }

    assert(valid_ptr < P_L2_SIZE);

    /* Don't compress if it won't fit in the # of bits we have. */
    if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
        return;
    }

    lp->ptr = p[valid_ptr].ptr;
    if (!p[valid_ptr].skip) {
        /* If our only child is a leaf, make this a leaf. */
        /* By design, we should have made this node a leaf to begin with so we
         * should never reach here.
         * But since it's so simple to handle this, let's do it just in case we
         * change this rule.
         */
        lp->skip = 0;
    } else {
        lp->skip += p[valid_ptr].skip;
    }
}

static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
{
    if (d->phys_map.skip) {
345
        phys_page_compact(&d->phys_map, d->map.nodes);
346 347 348
    }
}

F
Fam Zheng 已提交
349 350 351 352 353 354
static inline bool section_covers_addr(const MemoryRegionSection *section,
                                       hwaddr addr)
{
    /* Memory topology clips a memory region to [0, 2^64); size.hi > 0 means
     * the section must cover the entire address space.
     */
355
    return int128_gethi(section->size) ||
F
Fam Zheng 已提交
356
           range_covers_byte(section->offset_within_address_space,
357
                             int128_getlo(section->size), addr);
F
Fam Zheng 已提交
358 359
}

360
static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
361
                                           Node *nodes, MemoryRegionSection *sections)
B
bellard 已提交
362
{
363
    PhysPageEntry *p;
364
    hwaddr index = addr >> TARGET_PAGE_BITS;
365
    int i;
366

M
Michael S. Tsirkin 已提交
367
    for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
368
        if (lp.ptr == PHYS_MAP_NODE_NIL) {
369
            return &sections[PHYS_SECTION_UNASSIGNED];
370
        }
371
        p = nodes[lp.ptr];
372
        lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
373
    }
374

F
Fam Zheng 已提交
375
    if (section_covers_addr(&sections[lp.ptr], addr)) {
376 377 378 379
        return &sections[lp.ptr];
    } else {
        return &sections[PHYS_SECTION_UNASSIGNED];
    }
380 381
}

B
Blue Swirl 已提交
382 383
bool memory_region_is_unassigned(MemoryRegion *mr)
{
P
Paolo Bonzini 已提交
384
    return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
385
        && mr != &io_mem_watch;
B
bellard 已提交
386
}
387

388
/* Called from RCU critical section */
389
static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
390 391
                                                        hwaddr addr,
                                                        bool resolve_subpage)
392
{
393
    MemoryRegionSection *section = atomic_read(&d->mru_section);
394
    subpage_t *subpage;
395
    bool update;
396

397 398 399 400 401 402 403 404
    if (section && section != &d->map.sections[PHYS_SECTION_UNASSIGNED] &&
        section_covers_addr(section, addr)) {
        update = false;
    } else {
        section = phys_page_find(d->phys_map, addr, d->map.nodes,
                                 d->map.sections);
        update = true;
    }
405 406
    if (resolve_subpage && section->mr->subpage) {
        subpage = container_of(section->mr, subpage_t, iomem);
407
        section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
408
    }
409 410 411
    if (update) {
        atomic_set(&d->mru_section, section);
    }
412
    return section;
413 414
}

415
/* Called from RCU critical section */
416
static MemoryRegionSection *
417
address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
418
                                 hwaddr *plen, bool resolve_subpage)
419 420
{
    MemoryRegionSection *section;
421
    MemoryRegion *mr;
422
    Int128 diff;
423

424
    section = address_space_lookup_region(d, addr, resolve_subpage);
425 426 427 428 429 430
    /* Compute offset within MemoryRegionSection */
    addr -= section->offset_within_address_space;

    /* Compute offset within MemoryRegion */
    *xlat = addr + section->offset_within_region;

431
    mr = section->mr;
432 433 434 435 436 437 438 439 440 441 442 443

    /* MMIO registers can be expected to perform full-width accesses based only
     * on their address, without considering adjacent registers that could
     * decode to completely different MemoryRegions.  When such registers
     * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
     * regions overlap wildly.  For this reason we cannot clamp the accesses
     * here.
     *
     * If the length is small (as is the case for address_space_ldl/stl),
     * everything works fine.  If the incoming length is large, however,
     * the caller really has to do the clamping through memory_access_size.
     */
444
    if (memory_region_is_ram(mr)) {
445
        diff = int128_sub(section->size, int128_make64(addr));
446 447
        *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
    }
448 449
    return section;
}
450

451
/* Called from RCU critical section */
452 453 454
MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
                                      hwaddr *xlat, hwaddr *plen,
                                      bool is_write)
455
{
A
Avi Kivity 已提交
456 457 458 459 460
    IOMMUTLBEntry iotlb;
    MemoryRegionSection *section;
    MemoryRegion *mr;

    for (;;) {
461 462
        AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
        section = address_space_translate_internal(d, addr, &addr, plen, true);
A
Avi Kivity 已提交
463 464 465 466 467 468
        mr = section->mr;

        if (!mr->iommu_ops) {
            break;
        }

469
        iotlb = mr->iommu_ops->translate(mr, addr, is_write);
A
Avi Kivity 已提交
470 471
        addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
                | (addr & iotlb.addr_mask));
472
        *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
A
Avi Kivity 已提交
473 474 475 476 477 478 479 480
        if (!(iotlb.perm & (1 << is_write))) {
            mr = &io_mem_unassigned;
            break;
        }

        as = iotlb.target_as;
    }

481
    if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
482
        hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
483
        *plen = MIN(page, *plen);
484 485
    }

A
Avi Kivity 已提交
486 487
    *xlat = addr;
    return mr;
488 489
}

490
/* Called from RCU critical section */
491
MemoryRegionSection *
492
address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
P
Paolo Bonzini 已提交
493
                                  hwaddr *xlat, hwaddr *plen)
494
{
A
Avi Kivity 已提交
495
    MemoryRegionSection *section;
496
    AddressSpaceDispatch *d = atomic_rcu_read(&cpu->cpu_ases[asidx].memory_dispatch);
497 498

    section = address_space_translate_internal(d, addr, xlat, plen, false);
A
Avi Kivity 已提交
499 500 501

    assert(!section->mr->iommu_ops);
    return section;
502
}
503
#endif
B
bellard 已提交
504

505
#if !defined(CONFIG_USER_ONLY)
506 507

static int cpu_common_post_load(void *opaque, int version_id)
B
bellard 已提交
508
{
509
    CPUState *cpu = opaque;
B
bellard 已提交
510

511 512
    /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
       version_id is increased. */
513
    cpu->interrupt_request &= ~0x01;
514
    tlb_flush(cpu, 1);
515 516

    return 0;
B
bellard 已提交
517
}
B
bellard 已提交
518

519 520 521 522
static int cpu_common_pre_load(void *opaque)
{
    CPUState *cpu = opaque;

523
    cpu->exception_index = -1;
524 525 526 527 528 529 530 531

    return 0;
}

static bool cpu_common_exception_index_needed(void *opaque)
{
    CPUState *cpu = opaque;

532
    return tcg_enabled() && cpu->exception_index != -1;
533 534 535 536 537 538
}

static const VMStateDescription vmstate_cpu_common_exception_index = {
    .name = "cpu_common/exception_index",
    .version_id = 1,
    .minimum_version_id = 1,
539
    .needed = cpu_common_exception_index_needed,
540 541 542 543 544 545
    .fields = (VMStateField[]) {
        VMSTATE_INT32(exception_index, CPUState),
        VMSTATE_END_OF_LIST()
    }
};

546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563
static bool cpu_common_crash_occurred_needed(void *opaque)
{
    CPUState *cpu = opaque;

    return cpu->crash_occurred;
}

static const VMStateDescription vmstate_cpu_common_crash_occurred = {
    .name = "cpu_common/crash_occurred",
    .version_id = 1,
    .minimum_version_id = 1,
    .needed = cpu_common_crash_occurred_needed,
    .fields = (VMStateField[]) {
        VMSTATE_BOOL(crash_occurred, CPUState),
        VMSTATE_END_OF_LIST()
    }
};

564
const VMStateDescription vmstate_cpu_common = {
565 566 567
    .name = "cpu_common",
    .version_id = 1,
    .minimum_version_id = 1,
568
    .pre_load = cpu_common_pre_load,
569
    .post_load = cpu_common_post_load,
570
    .fields = (VMStateField[]) {
571 572
        VMSTATE_UINT32(halted, CPUState),
        VMSTATE_UINT32(interrupt_request, CPUState),
573
        VMSTATE_END_OF_LIST()
574
    },
575 576
    .subsections = (const VMStateDescription*[]) {
        &vmstate_cpu_common_exception_index,
577
        &vmstate_cpu_common_crash_occurred,
578
        NULL
579 580
    }
};
581

582
#endif
B
bellard 已提交
583

584
CPUState *qemu_get_cpu(int index)
B
bellard 已提交
585
{
A
Andreas Färber 已提交
586
    CPUState *cpu;
B
bellard 已提交
587

A
Andreas Färber 已提交
588
    CPU_FOREACH(cpu) {
589
        if (cpu->cpu_index == index) {
A
Andreas Färber 已提交
590
            return cpu;
591
        }
B
bellard 已提交
592
    }
593

A
Andreas Färber 已提交
594
    return NULL;
B
bellard 已提交
595 596
}

597
#if !defined(CONFIG_USER_ONLY)
598
void cpu_address_space_init(CPUState *cpu, AddressSpace *as, int asidx)
599
{
600 601 602 603 604
    CPUAddressSpace *newas;

    /* Target code should have set num_ases before calling us */
    assert(asidx < cpu->num_ases);

605 606 607 608 609
    if (asidx == 0) {
        /* address space 0 gets the convenience alias */
        cpu->as = as;
    }

610 611
    /* KVM cannot currently support multiple address spaces. */
    assert(asidx == 0 || !kvm_enabled());
612

613 614
    if (!cpu->cpu_ases) {
        cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases);
615
    }
616

617 618 619
    newas = &cpu->cpu_ases[asidx];
    newas->cpu = cpu;
    newas->as = as;
620
    if (tcg_enabled()) {
621 622
        newas->tcg_as_listener.commit = tcg_commit;
        memory_listener_register(&newas->tcg_as_listener, as);
623
    }
624
}
625 626 627 628 629 630

AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx)
{
    /* Return the AddressSpace corresponding to the specified index */
    return cpu->cpu_ases[asidx].as;
}
631 632
#endif

633
void cpu_exec_unrealizefn(CPUState *cpu)
634
{
635 636
    CPUClass *cc = CPU_GET_CLASS(cpu);

637
    cpu_list_remove(cpu);
638 639 640 641 642 643 644

    if (cc->vmsd != NULL) {
        vmstate_unregister(NULL, cc->vmsd, cpu);
    }
    if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
        vmstate_unregister(NULL, &vmstate_cpu_common, cpu);
    }
645 646
}

L
Laurent Vivier 已提交
647
void cpu_exec_initfn(CPUState *cpu)
B
bellard 已提交
648
{
649
    cpu->as = NULL;
650
    cpu->num_ases = 0;
651

652 653
#ifndef CONFIG_USER_ONLY
    cpu->thread_id = qemu_get_thread_id();
654 655 656 657 658 659 660 661 662 663 664 665 666 667

    /* This is a softmmu CPU object, so create a property for it
     * so users can wire up its memory. (This can't go in qom/cpu.c
     * because that file is compiled only once for both user-mode
     * and system builds.) The default if no link is set up is to use
     * the system address space.
     */
    object_property_add_link(OBJECT(cpu), "memory", TYPE_MEMORY_REGION,
                             (Object **)&cpu->memory,
                             qdev_prop_allow_set_link_before_realize,
                             OBJ_PROP_LINK_UNREF_ON_RELEASE,
                             &error_abort);
    cpu->memory = system_memory;
    object_ref(OBJECT(cpu->memory));
668
#endif
L
Laurent Vivier 已提交
669 670
}

671
void cpu_exec_realizefn(CPUState *cpu, Error **errp)
L
Laurent Vivier 已提交
672 673
{
    CPUClass *cc ATTRIBUTE_UNUSED = CPU_GET_CLASS(cpu);
674

675
    cpu_list_add(cpu);
676 677

#ifndef CONFIG_USER_ONLY
678
    if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
679
        vmstate_register(NULL, cpu->cpu_index, &vmstate_cpu_common, cpu);
680
    }
681
    if (cc->vmsd != NULL) {
682
        vmstate_register(NULL, cpu->cpu_index, cc->vmsd, cpu);
683
    }
684
#endif
B
bellard 已提交
685 686
}

687
static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
688
{
689 690 691 692 693 694
    /* Flush the whole TB as this will not have race conditions
     * even if we don't have proper locking yet.
     * Ideally we would just invalidate the TBs for the
     * specified PC.
     */
    tb_flush(cpu);
695
}
B
bellard 已提交
696

697
#if defined(CONFIG_USER_ONLY)
698
void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
699 700 701 702

{
}

703 704 705 706 707 708 709 710 711 712
int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
                          int flags)
{
    return -ENOSYS;
}

void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
{
}

713
int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
714 715 716 717 718
                          int flags, CPUWatchpoint **watchpoint)
{
    return -ENOSYS;
}
#else
719
/* Add a watchpoint.  */
720
int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
721
                          int flags, CPUWatchpoint **watchpoint)
722
{
723
    CPUWatchpoint *wp;
724

725
    /* forbid ranges which are empty or run off the end of the address space */
726
    if (len == 0 || (addr + len - 1) < addr) {
727 728
        error_report("tried to set invalid watchpoint at %"
                     VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
729 730
        return -EINVAL;
    }
731
    wp = g_malloc(sizeof(*wp));
732 733

    wp->vaddr = addr;
734
    wp->len = len;
735 736
    wp->flags = flags;

737
    /* keep all GDB-injected watchpoints in front */
738 739 740 741 742
    if (flags & BP_GDB) {
        QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
    } else {
        QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
    }
743

744
    tlb_flush_page(cpu, addr);
745 746 747 748

    if (watchpoint)
        *watchpoint = wp;
    return 0;
749 750
}

751
/* Remove a specific watchpoint.  */
752
int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
753
                          int flags)
754
{
755
    CPUWatchpoint *wp;
756

757
    QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
758
        if (addr == wp->vaddr && len == wp->len
759
                && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
760
            cpu_watchpoint_remove_by_ref(cpu, wp);
761 762 763
            return 0;
        }
    }
764
    return -ENOENT;
765 766
}

767
/* Remove a specific watchpoint by reference.  */
768
void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
769
{
770
    QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
771

772
    tlb_flush_page(cpu, watchpoint->vaddr);
773

774
    g_free(watchpoint);
775 776 777
}

/* Remove all matching watchpoints.  */
778
void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
779
{
780
    CPUWatchpoint *wp, *next;
781

782
    QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
783 784 785
        if (wp->flags & mask) {
            cpu_watchpoint_remove_by_ref(cpu, wp);
        }
786
    }
787
}
788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808

/* Return true if this watchpoint address matches the specified
 * access (ie the address range covered by the watchpoint overlaps
 * partially or completely with the address range covered by the
 * access).
 */
static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
                                                  vaddr addr,
                                                  vaddr len)
{
    /* We know the lengths are non-zero, but a little caution is
     * required to avoid errors in the case where the range ends
     * exactly at the top of the address space and so addr + len
     * wraps round to zero.
     */
    vaddr wpend = wp->vaddr + wp->len - 1;
    vaddr addrend = addr + len - 1;

    return !(addr > wpend || wp->vaddr > addrend);
}

809
#endif
810

811
/* Add a breakpoint.  */
812
int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
813
                          CPUBreakpoint **breakpoint)
B
bellard 已提交
814
{
815
    CPUBreakpoint *bp;
816

817
    bp = g_malloc(sizeof(*bp));
B
bellard 已提交
818

819 820 821
    bp->pc = pc;
    bp->flags = flags;

822
    /* keep all GDB-injected breakpoints in front */
823
    if (flags & BP_GDB) {
824
        QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
825
    } else {
826
        QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
827
    }
828

829
    breakpoint_invalidate(cpu, pc);
830

831
    if (breakpoint) {
832
        *breakpoint = bp;
833
    }
B
bellard 已提交
834 835 836
    return 0;
}

837
/* Remove a specific breakpoint.  */
838
int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
839 840 841
{
    CPUBreakpoint *bp;

842
    QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
843
        if (bp->pc == pc && bp->flags == flags) {
844
            cpu_breakpoint_remove_by_ref(cpu, bp);
845 846
            return 0;
        }
847
    }
848
    return -ENOENT;
849 850
}

851
/* Remove a specific breakpoint by reference.  */
852
void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
B
bellard 已提交
853
{
854 855 856
    QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);

    breakpoint_invalidate(cpu, breakpoint->pc);
857

858
    g_free(breakpoint);
859 860 861
}

/* Remove all matching breakpoints. */
862
void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
863
{
864
    CPUBreakpoint *bp, *next;
865

866
    QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
867 868 869
        if (bp->flags & mask) {
            cpu_breakpoint_remove_by_ref(cpu, bp);
        }
870
    }
B
bellard 已提交
871 872
}

B
bellard 已提交
873 874
/* enable or disable single step mode. EXCP_DEBUG is returned by the
   CPU loop after each instruction */
875
void cpu_single_step(CPUState *cpu, int enabled)
B
bellard 已提交
876
{
877 878 879
    if (cpu->singlestep_enabled != enabled) {
        cpu->singlestep_enabled = enabled;
        if (kvm_enabled()) {
880
            kvm_update_guest_debug(cpu, 0);
881
        } else {
S
Stuart Brady 已提交
882
            /* must flush all the translated code to avoid inconsistencies */
883
            /* XXX: only flush what is necessary */
884
            tb_flush(cpu);
885
        }
B
bellard 已提交
886 887 888
    }
}

889
void cpu_abort(CPUState *cpu, const char *fmt, ...)
B
bellard 已提交
890 891
{
    va_list ap;
P
pbrook 已提交
892
    va_list ap2;
B
bellard 已提交
893 894

    va_start(ap, fmt);
P
pbrook 已提交
895
    va_copy(ap2, ap);
B
bellard 已提交
896 897 898
    fprintf(stderr, "qemu: fatal: ");
    vfprintf(stderr, fmt, ap);
    fprintf(stderr, "\n");
899
    cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
900
    if (qemu_log_separate()) {
901
        qemu_log_lock();
902 903 904
        qemu_log("qemu: fatal: ");
        qemu_log_vprintf(fmt, ap2);
        qemu_log("\n");
905
        log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
906
        qemu_log_flush();
907
        qemu_log_unlock();
908
        qemu_log_close();
909
    }
P
pbrook 已提交
910
    va_end(ap2);
911
    va_end(ap);
912
    replay_finish();
913 914 915 916 917 918 919 920
#if defined(CONFIG_USER_ONLY)
    {
        struct sigaction act;
        sigfillset(&act.sa_mask);
        act.sa_handler = SIG_DFL;
        sigaction(SIGABRT, &act, NULL);
    }
#endif
B
bellard 已提交
921 922 923
    abort();
}

924
#if !defined(CONFIG_USER_ONLY)
M
Mike Day 已提交
925
/* Called from RCU critical section */
P
Paolo Bonzini 已提交
926 927 928 929
static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
{
    RAMBlock *block;

P
Paolo Bonzini 已提交
930
    block = atomic_rcu_read(&ram_list.mru_block);
931
    if (block && addr - block->offset < block->max_length) {
932
        return block;
P
Paolo Bonzini 已提交
933
    }
M
Mike Day 已提交
934
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
935
        if (addr - block->offset < block->max_length) {
P
Paolo Bonzini 已提交
936 937 938 939 940 941 942 943
            goto found;
        }
    }

    fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
    abort();

found:
P
Paolo Bonzini 已提交
944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959
    /* It is safe to write mru_block outside the iothread lock.  This
     * is what happens:
     *
     *     mru_block = xxx
     *     rcu_read_unlock()
     *                                        xxx removed from list
     *                  rcu_read_lock()
     *                  read mru_block
     *                                        mru_block = NULL;
     *                                        call_rcu(reclaim_ramblock, xxx);
     *                  rcu_read_unlock()
     *
     * atomic_rcu_set is not needed here.  The block was already published
     * when it was placed into the list.  Here we're just making an extra
     * copy of the pointer.
     */
P
Paolo Bonzini 已提交
960 961 962 963
    ram_list.mru_block = block;
    return block;
}

964
static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
J
Juan Quintela 已提交
965
{
966
    CPUState *cpu;
P
Paolo Bonzini 已提交
967
    ram_addr_t start1;
968 969 970 971 972
    RAMBlock *block;
    ram_addr_t end;

    end = TARGET_PAGE_ALIGN(start + length);
    start &= TARGET_PAGE_MASK;
J
Juan Quintela 已提交
973

M
Mike Day 已提交
974
    rcu_read_lock();
P
Paolo Bonzini 已提交
975 976
    block = qemu_get_ram_block(start);
    assert(block == qemu_get_ram_block(end - 1));
977
    start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
978 979 980
    CPU_FOREACH(cpu) {
        tlb_reset_dirty(cpu, start1, length);
    }
M
Mike Day 已提交
981
    rcu_read_unlock();
J
Juan Quintela 已提交
982 983
}

P
pbrook 已提交
984
/* Note: start and end must be within the same ram block.  */
985 986 987
bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
                                              ram_addr_t length,
                                              unsigned client)
988
{
989
    DirtyMemoryBlocks *blocks;
990
    unsigned long end, page;
991
    bool dirty = false;
992 993 994 995

    if (length == 0) {
        return false;
    }
B
bellard 已提交
996

997 998
    end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
    page = start >> TARGET_PAGE_BITS;
999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014

    rcu_read_lock();

    blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);

    while (page < end) {
        unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
        unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
        unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset);

        dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx],
                                              offset, num);
        page += num;
    }

    rcu_read_unlock();
1015 1016

    if (dirty && tcg_enabled()) {
1017
        tlb_reset_dirty_range_all(start, length);
P
pbrook 已提交
1018
    }
1019 1020

    return dirty;
1021 1022
}

1023
/* Called from RCU critical section */
1024
hwaddr memory_region_section_get_iotlb(CPUState *cpu,
1025 1026 1027 1028 1029
                                       MemoryRegionSection *section,
                                       target_ulong vaddr,
                                       hwaddr paddr, hwaddr xlat,
                                       int prot,
                                       target_ulong *address)
B
Blue Swirl 已提交
1030
{
A
Avi Kivity 已提交
1031
    hwaddr iotlb;
B
Blue Swirl 已提交
1032 1033
    CPUWatchpoint *wp;

1034
    if (memory_region_is_ram(section->mr)) {
B
Blue Swirl 已提交
1035
        /* Normal RAM.  */
1036
        iotlb = memory_region_get_ram_addr(section->mr) + xlat;
B
Blue Swirl 已提交
1037
        if (!section->readonly) {
1038
            iotlb |= PHYS_SECTION_NOTDIRTY;
B
Blue Swirl 已提交
1039
        } else {
1040
            iotlb |= PHYS_SECTION_ROM;
B
Blue Swirl 已提交
1041 1042
        }
    } else {
1043 1044 1045 1046
        AddressSpaceDispatch *d;

        d = atomic_rcu_read(&section->address_space->dispatch);
        iotlb = section - d->map.sections;
1047
        iotlb += xlat;
B
Blue Swirl 已提交
1048 1049 1050 1051
    }

    /* Make accesses to pages with watchpoints go via the
       watchpoint trap routines.  */
1052
    QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1053
        if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
B
Blue Swirl 已提交
1054 1055
            /* Avoid trapping reads of pages with a write breakpoint. */
            if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1056
                iotlb = PHYS_SECTION_WATCH + paddr;
B
Blue Swirl 已提交
1057 1058 1059 1060 1061 1062 1063 1064
                *address |= TLB_MMIO;
                break;
            }
        }
    }

    return iotlb;
}
1065 1066
#endif /* defined(CONFIG_USER_ONLY) */

1067
#if !defined(CONFIG_USER_ONLY)
1068

A
Anthony Liguori 已提交
1069
static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1070
                             uint16_t section);
1071
static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1072

1073 1074
static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
                               qemu_anon_ram_alloc;
1075 1076 1077 1078 1079 1080

/*
 * Set a custom physical guest memory alloator.
 * Accelerators with unusual needs may need this.  Hopefully, we can
 * get rid of it eventually.
 */
1081
void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1082 1083 1084 1085
{
    phys_mem_alloc = alloc;
}

1086 1087
static uint16_t phys_section_add(PhysPageMap *map,
                                 MemoryRegionSection *section)
1088
{
1089 1090 1091 1092
    /* The physical section number is ORed with a page-aligned
     * pointer to produce the iotlb entries.  Thus it should
     * never overflow into the page-aligned value.
     */
1093
    assert(map->sections_nb < TARGET_PAGE_SIZE);
1094

1095 1096 1097 1098
    if (map->sections_nb == map->sections_nb_alloc) {
        map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
        map->sections = g_renew(MemoryRegionSection, map->sections,
                                map->sections_nb_alloc);
1099
    }
1100
    map->sections[map->sections_nb] = *section;
P
Paolo Bonzini 已提交
1101
    memory_region_ref(section->mr);
1102
    return map->sections_nb++;
1103 1104
}

1105 1106
static void phys_section_destroy(MemoryRegion *mr)
{
D
Don Slutz 已提交
1107 1108
    bool have_sub_page = mr->subpage;

P
Paolo Bonzini 已提交
1109 1110
    memory_region_unref(mr);

D
Don Slutz 已提交
1111
    if (have_sub_page) {
1112
        subpage_t *subpage = container_of(mr, subpage_t, iomem);
P
Peter Crosthwaite 已提交
1113
        object_unref(OBJECT(&subpage->iomem));
1114 1115 1116 1117
        g_free(subpage);
    }
}

P
Paolo Bonzini 已提交
1118
static void phys_sections_free(PhysPageMap *map)
1119
{
1120 1121
    while (map->sections_nb > 0) {
        MemoryRegionSection *section = &map->sections[--map->sections_nb];
1122 1123
        phys_section_destroy(section->mr);
    }
1124 1125
    g_free(map->sections);
    g_free(map->nodes);
1126 1127
}

A
Avi Kivity 已提交
1128
static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1129 1130
{
    subpage_t *subpage;
A
Avi Kivity 已提交
1131
    hwaddr base = section->offset_within_address_space
1132
        & TARGET_PAGE_MASK;
1133
    MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1134
                                                   d->map.nodes, d->map.sections);
1135 1136
    MemoryRegionSection subsection = {
        .offset_within_address_space = base,
1137
        .size = int128_make64(TARGET_PAGE_SIZE),
1138
    };
A
Avi Kivity 已提交
1139
    hwaddr start, end;
1140

1141
    assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1142

1143
    if (!(existing->mr->subpage)) {
1144
        subpage = subpage_init(d->as, base);
1145
        subsection.address_space = d->as;
1146
        subsection.mr = &subpage->iomem;
A
Avi Kivity 已提交
1147
        phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1148
                      phys_section_add(&d->map, &subsection));
1149
    } else {
1150
        subpage = container_of(existing->mr, subpage_t, iomem);
1151 1152
    }
    start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1153
    end = start + int128_get64(section->size) - 1;
1154 1155
    subpage_register(subpage, start, end,
                     phys_section_add(&d->map, section));
1156 1157 1158
}


1159 1160
static void register_multipage(AddressSpaceDispatch *d,
                               MemoryRegionSection *section)
1161
{
A
Avi Kivity 已提交
1162
    hwaddr start_addr = section->offset_within_address_space;
1163
    uint16_t section_index = phys_section_add(&d->map, section);
1164 1165
    uint64_t num_pages = int128_get64(int128_rshift(section->size,
                                                    TARGET_PAGE_BITS));
1166

1167 1168
    assert(num_pages);
    phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1169 1170
}

A
Avi Kivity 已提交
1171
static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1172
{
1173
    AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1174
    AddressSpaceDispatch *d = as->next_dispatch;
1175
    MemoryRegionSection now = *section, remain = *section;
1176
    Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1177

1178 1179 1180 1181
    if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
        uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
                       - now.offset_within_address_space;

1182
        now.size = int128_min(int128_make64(left), now.size);
A
Avi Kivity 已提交
1183
        register_subpage(d, &now);
1184
    } else {
1185
        now.size = int128_zero();
1186
    }
1187 1188 1189 1190
    while (int128_ne(remain.size, now.size)) {
        remain.size = int128_sub(remain.size, now.size);
        remain.offset_within_address_space += int128_get64(now.size);
        remain.offset_within_region += int128_get64(now.size);
1191
        now = remain;
1192
        if (int128_lt(remain.size, page_size)) {
1193
            register_subpage(d, &now);
1194
        } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1195
            now.size = page_size;
A
Avi Kivity 已提交
1196
            register_subpage(d, &now);
1197
        } else {
1198
            now.size = int128_and(now.size, int128_neg(page_size));
A
Avi Kivity 已提交
1199
            register_multipage(d, &now);
1200
        }
1201 1202 1203
    }
}

1204 1205 1206 1207 1208 1209
void qemu_flush_coalesced_mmio_buffer(void)
{
    if (kvm_enabled())
        kvm_flush_coalesced_mmio_buffer();
}

1210 1211 1212 1213 1214 1215 1216 1217 1218 1219
void qemu_mutex_lock_ramlist(void)
{
    qemu_mutex_lock(&ram_list.mutex);
}

void qemu_mutex_unlock_ramlist(void)
{
    qemu_mutex_unlock(&ram_list.mutex);
}

1220
#ifdef __linux__
1221 1222 1223 1224 1225 1226 1227 1228 1229
static int64_t get_file_size(int fd)
{
    int64_t size = lseek(fd, 0, SEEK_END);
    if (size < 0) {
        return -errno;
    }
    return size;
}

A
Alex Williamson 已提交
1230 1231
static void *file_ram_alloc(RAMBlock *block,
                            ram_addr_t memory,
1232 1233
                            const char *path,
                            Error **errp)
1234
{
1235
    bool unlink_on_error = false;
1236
    char *filename;
1237 1238
    char *sanitized_name;
    char *c;
1239
    void *area = MAP_FAILED;
1240
    int fd = -1;
1241
    int64_t file_size;
1242 1243

    if (kvm_enabled() && !kvm_has_sync_mmu()) {
1244 1245
        error_setg(errp,
                   "host lacks kvm mmu notifiers, -mem-path unsupported");
1246
        return NULL;
1247 1248
    }

1249 1250 1251 1252 1253
    for (;;) {
        fd = open(path, O_RDWR);
        if (fd >= 0) {
            /* @path names an existing file, use it */
            break;
1254
        }
1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270
        if (errno == ENOENT) {
            /* @path names a file that doesn't exist, create it */
            fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644);
            if (fd >= 0) {
                unlink_on_error = true;
                break;
            }
        } else if (errno == EISDIR) {
            /* @path names a directory, create a file there */
            /* Make name safe to use with mkstemp by replacing '/' with '_'. */
            sanitized_name = g_strdup(memory_region_name(block->mr));
            for (c = sanitized_name; *c != '\0'; c++) {
                if (*c == '/') {
                    *c = '_';
                }
            }
1271

1272 1273 1274
            filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
                                       sanitized_name);
            g_free(sanitized_name);
1275

1276 1277 1278 1279 1280 1281 1282
            fd = mkstemp(filename);
            if (fd >= 0) {
                unlink(filename);
                g_free(filename);
                break;
            }
            g_free(filename);
1283
        }
1284 1285 1286 1287 1288 1289 1290 1291 1292 1293
        if (errno != EEXIST && errno != EINTR) {
            error_setg_errno(errp, errno,
                             "can't open backing store %s for guest RAM",
                             path);
            goto error;
        }
        /*
         * Try again on EINTR and EEXIST.  The latter happens when
         * something else creates the file between our two open().
         */
1294
    }
1295

1296
    block->page_size = qemu_fd_getpagesize(fd);
1297 1298 1299 1300 1301 1302
    block->mr->align = block->page_size;
#if defined(__s390x__)
    if (kvm_enabled()) {
        block->mr->align = MAX(block->mr->align, QEMU_VMALLOC_ALIGN);
    }
#endif
1303

1304 1305
    file_size = get_file_size(fd);

1306
    if (memory < block->page_size) {
1307
        error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1308 1309
                   "or larger than page size 0x%zx",
                   memory, block->page_size);
1310
        goto error;
1311 1312
    }

1313 1314 1315 1316 1317 1318 1319
    if (file_size > 0 && file_size < memory) {
        error_setg(errp, "backing store %s size 0x%" PRIx64
                   " does not match 'size' option 0x" RAM_ADDR_FMT,
                   path, file_size, memory);
        goto error;
    }

1320
    memory = ROUND_UP(memory, block->page_size);
1321 1322 1323 1324 1325 1326

    /*
     * ftruncate is not supported by hugetlbfs in older
     * hosts, so don't bother bailing out on errors.
     * If anything goes wrong with it under other filesystems,
     * mmap will fail.
1327 1328 1329 1330 1331 1332 1333 1334
     *
     * Do not truncate the non-empty backend file to avoid corrupting
     * the existing data in the file. Disabling shrinking is not
     * enough. For example, the current vNVDIMM implementation stores
     * the guest NVDIMM labels at the end of the backend file. If the
     * backend file is later extended, QEMU will not be able to find
     * those labels. Therefore, extending the non-empty backend file
     * is disabled as well.
1335
     */
1336
    if (!file_size && ftruncate(fd, memory)) {
Y
Yoshiaki Tamura 已提交
1337
        perror("ftruncate");
1338
    }
1339

1340 1341
    area = qemu_ram_mmap(fd, memory, block->mr->align,
                         block->flags & RAM_SHARED);
1342
    if (area == MAP_FAILED) {
1343
        error_setg_errno(errp, errno,
1344
                         "unable to map backing store for guest RAM");
1345
        goto error;
1346
    }
1347 1348

    if (mem_prealloc) {
1349 1350 1351 1352
        os_mem_prealloc(fd, area, memory, errp);
        if (errp && *errp) {
            goto error;
        }
1353 1354
    }

A
Alex Williamson 已提交
1355
    block->fd = fd;
1356
    return area;
1357 1358

error:
1359 1360 1361
    if (area != MAP_FAILED) {
        qemu_ram_munmap(area, memory);
    }
1362 1363 1364
    if (unlink_on_error) {
        unlink(path);
    }
1365 1366 1367
    if (fd != -1) {
        close(fd);
    }
1368
    return NULL;
1369 1370 1371
}
#endif

M
Mike Day 已提交
1372
/* Called with the ramlist lock held.  */
1373
static ram_addr_t find_ram_offset(ram_addr_t size)
A
Alex Williamson 已提交
1374 1375
{
    RAMBlock *block, *next_block;
A
Alex Williamson 已提交
1376
    ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
A
Alex Williamson 已提交
1377

1378 1379
    assert(size != 0); /* it would hand out same offset multiple times */

M
Mike Day 已提交
1380
    if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
A
Alex Williamson 已提交
1381
        return 0;
M
Mike Day 已提交
1382
    }
A
Alex Williamson 已提交
1383

M
Mike Day 已提交
1384
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1385
        ram_addr_t end, next = RAM_ADDR_MAX;
A
Alex Williamson 已提交
1386

1387
        end = block->offset + block->max_length;
A
Alex Williamson 已提交
1388

M
Mike Day 已提交
1389
        QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
A
Alex Williamson 已提交
1390 1391 1392 1393 1394
            if (next_block->offset >= end) {
                next = MIN(next, next_block->offset);
            }
        }
        if (next - end >= size && next - end < mingap) {
A
Alex Williamson 已提交
1395
            offset = end;
A
Alex Williamson 已提交
1396 1397 1398
            mingap = next - end;
        }
    }
A
Alex Williamson 已提交
1399 1400 1401 1402 1403 1404 1405

    if (offset == RAM_ADDR_MAX) {
        fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
                (uint64_t)size);
        abort();
    }

A
Alex Williamson 已提交
1406 1407 1408
    return offset;
}

J
Juan Quintela 已提交
1409
ram_addr_t last_ram_offset(void)
1410 1411 1412 1413
{
    RAMBlock *block;
    ram_addr_t last = 0;

M
Mike Day 已提交
1414 1415
    rcu_read_lock();
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1416
        last = MAX(last, block->offset + block->max_length);
M
Mike Day 已提交
1417
    }
M
Mike Day 已提交
1418
    rcu_read_unlock();
1419 1420 1421
    return last;
}

1422 1423 1424 1425 1426
static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
{
    int ret;

    /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1427
    if (!machine_dump_guest_core(current_machine)) {
1428 1429 1430 1431 1432 1433 1434 1435 1436
        ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
        if (ret) {
            perror("qemu_madvise");
            fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
                            "but dump_guest_core=off specified\n");
        }
    }
}

D
Dr. David Alan Gilbert 已提交
1437 1438 1439 1440 1441
const char *qemu_ram_get_idstr(RAMBlock *rb)
{
    return rb->idstr;
}

1442
/* Called with iothread lock held.  */
G
Gonglei 已提交
1443
void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
1444
{
G
Gonglei 已提交
1445
    RAMBlock *block;
1446

1447 1448
    assert(new_block);
    assert(!new_block->idstr[0]);
1449

1450 1451
    if (dev) {
        char *id = qdev_get_dev_path(dev);
1452 1453
        if (id) {
            snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1454
            g_free(id);
1455 1456 1457 1458
        }
    }
    pstrcat(new_block->idstr, sizeof(new_block->idstr), name);

G
Gonglei 已提交
1459
    rcu_read_lock();
M
Mike Day 已提交
1460
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
G
Gonglei 已提交
1461 1462
        if (block != new_block &&
            !strcmp(block->idstr, new_block->idstr)) {
1463 1464 1465 1466 1467
            fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
                    new_block->idstr);
            abort();
        }
    }
M
Mike Day 已提交
1468
    rcu_read_unlock();
1469 1470
}

1471
/* Called with iothread lock held.  */
G
Gonglei 已提交
1472
void qemu_ram_unset_idstr(RAMBlock *block)
1473
{
1474 1475 1476 1477
    /* FIXME: arch_init.c assumes that this is not called throughout
     * migration.  Ignore the problem since hot-unplug during migration
     * does not work anyway.
     */
1478 1479 1480 1481 1482
    if (block) {
        memset(block->idstr, 0, sizeof(block->idstr));
    }
}

1483 1484 1485 1486 1487
size_t qemu_ram_pagesize(RAMBlock *rb)
{
    return rb->page_size;
}

1488 1489
static int memory_try_enable_merging(void *addr, size_t len)
{
1490
    if (!machine_mem_merge(current_machine)) {
1491 1492 1493 1494 1495 1496 1497
        /* disabled by the user */
        return 0;
    }

    return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
}

1498 1499 1500 1501 1502 1503 1504
/* Only legal before guest might have detected the memory size: e.g. on
 * incoming migration, or right after reset.
 *
 * As memory core doesn't know how is memory accessed, it is up to
 * resize callback to update device state and/or add assertions to detect
 * misuse, if necessary.
 */
G
Gonglei 已提交
1505
int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp)
1506 1507 1508
{
    assert(block);

1509
    newsize = HOST_PAGE_ALIGN(newsize);
1510

1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532
    if (block->used_length == newsize) {
        return 0;
    }

    if (!(block->flags & RAM_RESIZEABLE)) {
        error_setg_errno(errp, EINVAL,
                         "Length mismatch: %s: 0x" RAM_ADDR_FMT
                         " in != 0x" RAM_ADDR_FMT, block->idstr,
                         newsize, block->used_length);
        return -EINVAL;
    }

    if (block->max_length < newsize) {
        error_setg_errno(errp, EINVAL,
                         "Length too large: %s: 0x" RAM_ADDR_FMT
                         " > 0x" RAM_ADDR_FMT, block->idstr,
                         newsize, block->max_length);
        return -EINVAL;
    }

    cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
    block->used_length = newsize;
1533 1534
    cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
                                        DIRTY_CLIENTS_ALL);
1535 1536 1537 1538 1539 1540 1541
    memory_region_set_size(block->mr, newsize);
    if (block->resized) {
        block->resized(block->idstr, newsize, block->host);
    }
    return 0;
}

1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582
/* Called with ram_list.mutex held */
static void dirty_memory_extend(ram_addr_t old_ram_size,
                                ram_addr_t new_ram_size)
{
    ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size,
                                             DIRTY_MEMORY_BLOCK_SIZE);
    ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size,
                                             DIRTY_MEMORY_BLOCK_SIZE);
    int i;

    /* Only need to extend if block count increased */
    if (new_num_blocks <= old_num_blocks) {
        return;
    }

    for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
        DirtyMemoryBlocks *old_blocks;
        DirtyMemoryBlocks *new_blocks;
        int j;

        old_blocks = atomic_rcu_read(&ram_list.dirty_memory[i]);
        new_blocks = g_malloc(sizeof(*new_blocks) +
                              sizeof(new_blocks->blocks[0]) * new_num_blocks);

        if (old_num_blocks) {
            memcpy(new_blocks->blocks, old_blocks->blocks,
                   old_num_blocks * sizeof(old_blocks->blocks[0]));
        }

        for (j = old_num_blocks; j < new_num_blocks; j++) {
            new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
        }

        atomic_rcu_set(&ram_list.dirty_memory[i], new_blocks);

        if (old_blocks) {
            g_free_rcu(old_blocks, rcu);
        }
    }
}

1583
static void ram_block_add(RAMBlock *new_block, Error **errp)
1584
{
1585
    RAMBlock *block;
M
Mike Day 已提交
1586
    RAMBlock *last_block = NULL;
1587
    ram_addr_t old_ram_size, new_ram_size;
1588
    Error *err = NULL;
1589 1590

    old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1591

1592
    qemu_mutex_lock_ramlist();
1593
    new_block->offset = find_ram_offset(new_block->max_length);
1594 1595 1596

    if (!new_block->host) {
        if (xen_enabled()) {
1597
            xen_ram_alloc(new_block->offset, new_block->max_length,
1598 1599 1600 1601
                          new_block->mr, &err);
            if (err) {
                error_propagate(errp, err);
                qemu_mutex_unlock_ramlist();
1602
                return;
1603
            }
1604
        } else {
1605
            new_block->host = phys_mem_alloc(new_block->max_length,
1606
                                             &new_block->mr->align);
1607
            if (!new_block->host) {
1608 1609 1610 1611
                error_setg_errno(errp, errno,
                                 "cannot set up guest memory '%s'",
                                 memory_region_name(new_block->mr));
                qemu_mutex_unlock_ramlist();
1612
                return;
1613
            }
1614
            memory_try_enable_merging(new_block->host, new_block->max_length);
1615
        }
1616
    }
P
pbrook 已提交
1617

L
Li Zhijian 已提交
1618 1619 1620 1621
    new_ram_size = MAX(old_ram_size,
              (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
    if (new_ram_size > old_ram_size) {
        migration_bitmap_extend(old_ram_size, new_ram_size);
1622
        dirty_memory_extend(old_ram_size, new_ram_size);
L
Li Zhijian 已提交
1623
    }
M
Mike Day 已提交
1624 1625 1626 1627
    /* Keep the list sorted from biggest to smallest block.  Unlike QTAILQ,
     * QLIST (which has an RCU-friendly variant) does not have insertion at
     * tail, so save the last element in last_block.
     */
M
Mike Day 已提交
1628
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
M
Mike Day 已提交
1629
        last_block = block;
1630
        if (block->max_length < new_block->max_length) {
1631 1632 1633 1634
            break;
        }
    }
    if (block) {
M
Mike Day 已提交
1635
        QLIST_INSERT_BEFORE_RCU(block, new_block, next);
M
Mike Day 已提交
1636
    } else if (last_block) {
M
Mike Day 已提交
1637
        QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
M
Mike Day 已提交
1638
    } else { /* list is empty */
M
Mike Day 已提交
1639
        QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1640
    }
1641
    ram_list.mru_block = NULL;
P
pbrook 已提交
1642

M
Mike Day 已提交
1643 1644
    /* Write list before version */
    smp_wmb();
U
Umesh Deshpande 已提交
1645
    ram_list.version++;
1646
    qemu_mutex_unlock_ramlist();
U
Umesh Deshpande 已提交
1647

1648
    cpu_physical_memory_set_dirty_range(new_block->offset,
1649 1650
                                        new_block->used_length,
                                        DIRTY_CLIENTS_ALL);
P
pbrook 已提交
1651

1652 1653 1654
    if (new_block->host) {
        qemu_ram_setup_dump(new_block->host, new_block->max_length);
        qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
C
Cao jin 已提交
1655
        /* MADV_DONTFORK is also needed by KVM in absence of synchronous MMU */
1656
        qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1657
    }
P
pbrook 已提交
1658
}
B
bellard 已提交
1659

1660
#ifdef __linux__
1661 1662 1663
RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
                                   bool share, const char *mem_path,
                                   Error **errp)
1664 1665
{
    RAMBlock *new_block;
1666
    Error *local_err = NULL;
1667 1668

    if (xen_enabled()) {
1669
        error_setg(errp, "-mem-path not supported with Xen");
1670
        return NULL;
1671 1672 1673 1674 1675 1676 1677 1678
    }

    if (phys_mem_alloc != qemu_anon_ram_alloc) {
        /*
         * file_ram_alloc() needs to allocate just like
         * phys_mem_alloc, but we haven't bothered to provide
         * a hook there.
         */
1679 1680
        error_setg(errp,
                   "-mem-path not supported with this accelerator");
1681
        return NULL;
1682 1683
    }

1684
    size = HOST_PAGE_ALIGN(size);
1685 1686
    new_block = g_malloc0(sizeof(*new_block));
    new_block->mr = mr;
1687 1688
    new_block->used_length = size;
    new_block->max_length = size;
1689
    new_block->flags = share ? RAM_SHARED : 0;
1690 1691 1692 1693
    new_block->host = file_ram_alloc(new_block, size,
                                     mem_path, errp);
    if (!new_block->host) {
        g_free(new_block);
1694
        return NULL;
1695 1696
    }

1697
    ram_block_add(new_block, &local_err);
1698 1699 1700
    if (local_err) {
        g_free(new_block);
        error_propagate(errp, local_err);
1701
        return NULL;
1702
    }
1703
    return new_block;
1704
}
1705
#endif
1706

1707
static
1708 1709 1710 1711 1712 1713
RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
                                  void (*resized)(const char*,
                                                  uint64_t length,
                                                  void *host),
                                  void *host, bool resizeable,
                                  MemoryRegion *mr, Error **errp)
1714 1715
{
    RAMBlock *new_block;
1716
    Error *local_err = NULL;
1717

1718 1719
    size = HOST_PAGE_ALIGN(size);
    max_size = HOST_PAGE_ALIGN(max_size);
1720 1721
    new_block = g_malloc0(sizeof(*new_block));
    new_block->mr = mr;
1722
    new_block->resized = resized;
1723 1724
    new_block->used_length = size;
    new_block->max_length = max_size;
1725
    assert(max_size >= size);
1726
    new_block->fd = -1;
1727
    new_block->page_size = getpagesize();
1728 1729
    new_block->host = host;
    if (host) {
1730
        new_block->flags |= RAM_PREALLOC;
1731
    }
1732 1733 1734
    if (resizeable) {
        new_block->flags |= RAM_RESIZEABLE;
    }
1735
    ram_block_add(new_block, &local_err);
1736 1737 1738
    if (local_err) {
        g_free(new_block);
        error_propagate(errp, local_err);
1739
        return NULL;
1740
    }
1741
    return new_block;
1742 1743
}

1744
RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1745 1746 1747 1748 1749
                                   MemoryRegion *mr, Error **errp)
{
    return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
}

1750
RAMBlock *qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1751
{
1752 1753 1754
    return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
}

1755
RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1756 1757 1758 1759 1760 1761
                                     void (*resized)(const char*,
                                                     uint64_t length,
                                                     void *host),
                                     MemoryRegion *mr, Error **errp)
{
    return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1762 1763
}

P
Paolo Bonzini 已提交
1764 1765 1766 1767 1768 1769 1770 1771
static void reclaim_ramblock(RAMBlock *block)
{
    if (block->flags & RAM_PREALLOC) {
        ;
    } else if (xen_enabled()) {
        xen_invalidate_map_cache_entry(block->host);
#ifndef _WIN32
    } else if (block->fd >= 0) {
1772
        qemu_ram_munmap(block->host, block->max_length);
P
Paolo Bonzini 已提交
1773 1774 1775 1776 1777 1778 1779 1780
        close(block->fd);
#endif
    } else {
        qemu_anon_ram_free(block->host, block->max_length);
    }
    g_free(block);
}

1781
void qemu_ram_free(RAMBlock *block)
B
bellard 已提交
1782
{
1783 1784 1785 1786
    if (!block) {
        return;
    }

1787
    qemu_mutex_lock_ramlist();
1788 1789 1790 1791 1792 1793
    QLIST_REMOVE_RCU(block, next);
    ram_list.mru_block = NULL;
    /* Write list before version */
    smp_wmb();
    ram_list.version++;
    call_rcu(block, reclaim_ramblock, rcu);
1794
    qemu_mutex_unlock_ramlist();
B
bellard 已提交
1795 1796
}

H
Huang Ying 已提交
1797 1798 1799 1800 1801 1802 1803 1804
#ifndef _WIN32
void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
{
    RAMBlock *block;
    ram_addr_t offset;
    int flags;
    void *area, *vaddr;

M
Mike Day 已提交
1805
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
H
Huang Ying 已提交
1806
        offset = addr - block->offset;
1807
        if (offset < block->max_length) {
1808
            vaddr = ramblock_ptr(block, offset);
1809
            if (block->flags & RAM_PREALLOC) {
H
Huang Ying 已提交
1810
                ;
1811 1812
            } else if (xen_enabled()) {
                abort();
H
Huang Ying 已提交
1813 1814
            } else {
                flags = MAP_FIXED;
1815
                if (block->fd >= 0) {
1816 1817
                    flags |= (block->flags & RAM_SHARED ?
                              MAP_SHARED : MAP_PRIVATE);
1818 1819
                    area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
                                flags, block->fd, offset);
H
Huang Ying 已提交
1820
                } else {
1821 1822 1823 1824 1825 1826 1827
                    /*
                     * Remap needs to match alloc.  Accelerators that
                     * set phys_mem_alloc never remap.  If they did,
                     * we'd need a remap hook here.
                     */
                    assert(phys_mem_alloc == qemu_anon_ram_alloc);

H
Huang Ying 已提交
1828 1829 1830 1831 1832
                    flags |= MAP_PRIVATE | MAP_ANONYMOUS;
                    area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
                                flags, -1, 0);
                }
                if (area != vaddr) {
1833 1834
                    fprintf(stderr, "Could not remap addr: "
                            RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
H
Huang Ying 已提交
1835 1836 1837
                            length, addr);
                    exit(1);
                }
1838
                memory_try_enable_merging(vaddr, length);
1839
                qemu_ram_setup_dump(vaddr, length);
H
Huang Ying 已提交
1840 1841 1842 1843 1844 1845
            }
        }
    }
}
#endif /* !_WIN32 */

1846
/* Return a host pointer to ram allocated with qemu_ram_alloc.
1847 1848 1849
 * This should not be used for general purpose DMA.  Use address_space_map
 * or address_space_rw instead. For local memory (e.g. video ram) that the
 * device owns, use memory_region_get_ram_ptr.
M
Mike Day 已提交
1850
 *
1851
 * Called within RCU critical section.
1852
 */
1853
void *qemu_map_ram_ptr(RAMBlock *ram_block, ram_addr_t addr)
1854
{
1855 1856 1857 1858
    RAMBlock *block = ram_block;

    if (block == NULL) {
        block = qemu_get_ram_block(addr);
1859
        addr -= block->offset;
1860
    }
1861 1862

    if (xen_enabled() && block->host == NULL) {
1863 1864 1865 1866 1867
        /* We need to check if the requested address is in the RAM
         * because we don't want to map the entire memory in QEMU.
         * In that case just map until the end of the page.
         */
        if (block->offset == 0) {
1868
            return xen_map_cache(addr, 0, 0);
1869
        }
1870 1871

        block->host = xen_map_cache(block->offset, block->max_length, 1);
1872
    }
1873
    return ramblock_ptr(block, addr);
1874 1875
}

1876
/* Return a host pointer to guest's ram. Similar to qemu_map_ram_ptr
1877
 * but takes a size argument.
M
Mike Day 已提交
1878
 *
1879
 * Called within RCU critical section.
1880
 */
1881 1882
static void *qemu_ram_ptr_length(RAMBlock *ram_block, ram_addr_t addr,
                                 hwaddr *size)
1883
{
1884
    RAMBlock *block = ram_block;
1885 1886 1887
    if (*size == 0) {
        return NULL;
    }
1888

1889 1890
    if (block == NULL) {
        block = qemu_get_ram_block(addr);
1891
        addr -= block->offset;
1892
    }
1893
    *size = MIN(*size, block->max_length - addr);
1894 1895 1896 1897 1898 1899 1900 1901

    if (xen_enabled() && block->host == NULL) {
        /* We need to check if the requested address is in the RAM
         * because we don't want to map the entire memory in QEMU.
         * In that case just map the requested area.
         */
        if (block->offset == 0) {
            return xen_map_cache(addr, *size, 1);
1902 1903
        }

1904
        block->host = xen_map_cache(block->offset, block->max_length, 1);
1905
    }
1906

1907
    return ramblock_ptr(block, addr);
1908 1909
}

D
Dr. David Alan Gilbert 已提交
1910 1911 1912 1913 1914 1915 1916 1917 1918 1919
/*
 * Translates a host ptr back to a RAMBlock, a ram_addr and an offset
 * in that RAMBlock.
 *
 * ptr: Host pointer to look up
 * round_offset: If true round the result offset down to a page boundary
 * *ram_addr: set to result ram_addr
 * *offset: set to result offset within the RAMBlock
 *
 * Returns: RAMBlock (or NULL if not found)
1920 1921 1922 1923 1924 1925 1926
 *
 * By the time this function returns, the returned pointer is not protected
 * by RCU anymore.  If the caller is not within an RCU critical section and
 * does not hold the iothread lock, it must have other means of protecting the
 * pointer, such as a reference to the region that includes the incoming
 * ram_addr_t.
 */
D
Dr. David Alan Gilbert 已提交
1927 1928
RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
                                   ram_addr_t *offset)
P
pbrook 已提交
1929
{
P
pbrook 已提交
1930 1931 1932
    RAMBlock *block;
    uint8_t *host = ptr;

1933
    if (xen_enabled()) {
1934
        ram_addr_t ram_addr;
M
Mike Day 已提交
1935
        rcu_read_lock();
1936 1937
        ram_addr = xen_ram_addr_from_mapcache(ptr);
        block = qemu_get_ram_block(ram_addr);
D
Dr. David Alan Gilbert 已提交
1938
        if (block) {
1939
            *offset = ram_addr - block->offset;
D
Dr. David Alan Gilbert 已提交
1940
        }
M
Mike Day 已提交
1941
        rcu_read_unlock();
D
Dr. David Alan Gilbert 已提交
1942
        return block;
1943 1944
    }

M
Mike Day 已提交
1945 1946
    rcu_read_lock();
    block = atomic_rcu_read(&ram_list.mru_block);
1947
    if (block && block->host && host - block->host < block->max_length) {
1948 1949 1950
        goto found;
    }

M
Mike Day 已提交
1951
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
J
Jun Nakajima 已提交
1952 1953 1954 1955
        /* This case append when the block is not mapped. */
        if (block->host == NULL) {
            continue;
        }
1956
        if (host - block->host < block->max_length) {
1957
            goto found;
A
Alex Williamson 已提交
1958
        }
P
pbrook 已提交
1959
    }
J
Jun Nakajima 已提交
1960

M
Mike Day 已提交
1961
    rcu_read_unlock();
1962
    return NULL;
1963 1964

found:
D
Dr. David Alan Gilbert 已提交
1965 1966 1967 1968
    *offset = (host - block->host);
    if (round_offset) {
        *offset &= TARGET_PAGE_MASK;
    }
M
Mike Day 已提交
1969
    rcu_read_unlock();
D
Dr. David Alan Gilbert 已提交
1970 1971 1972
    return block;
}

D
Dr. David Alan Gilbert 已提交
1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992
/*
 * Finds the named RAMBlock
 *
 * name: The name of RAMBlock to find
 *
 * Returns: RAMBlock (or NULL if not found)
 */
RAMBlock *qemu_ram_block_by_name(const char *name)
{
    RAMBlock *block;

    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
        if (!strcmp(name, block->idstr)) {
            return block;
        }
    }

    return NULL;
}

D
Dr. David Alan Gilbert 已提交
1993 1994
/* Some of the softmmu routines need to translate from a host pointer
   (typically a TLB entry) back to a ram offset.  */
1995
ram_addr_t qemu_ram_addr_from_host(void *ptr)
D
Dr. David Alan Gilbert 已提交
1996 1997
{
    RAMBlock *block;
1998
    ram_addr_t offset;
D
Dr. David Alan Gilbert 已提交
1999

2000
    block = qemu_ram_block_from_host(ptr, false, &offset);
D
Dr. David Alan Gilbert 已提交
2001
    if (!block) {
2002
        return RAM_ADDR_INVALID;
D
Dr. David Alan Gilbert 已提交
2003 2004
    }

2005
    return block->offset + offset;
M
Marcelo Tosatti 已提交
2006
}
A
Alex Williamson 已提交
2007

2008
/* Called within RCU critical section.  */
A
Avi Kivity 已提交
2009
static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
2010
                               uint64_t val, unsigned size)
2011
{
2012 2013
    bool locked = false;

2014
    if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
2015 2016
        locked = true;
        tb_lock();
2017
        tb_invalidate_phys_page_fast(ram_addr, size);
2018
    }
2019 2020
    switch (size) {
    case 1:
2021
        stb_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2022 2023
        break;
    case 2:
2024
        stw_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2025 2026
        break;
    case 4:
2027
        stl_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2028 2029 2030
        break;
    default:
        abort();
2031
    }
2032 2033 2034 2035 2036

    if (locked) {
        tb_unlock();
    }

2037 2038 2039 2040 2041
    /* Set both VGA and migration bits for simplicity and to remove
     * the notdirty callback faster.
     */
    cpu_physical_memory_set_dirty_range(ram_addr, size,
                                        DIRTY_CLIENTS_NOCODE);
B
bellard 已提交
2042 2043
    /* we remove the notdirty callback only if the code has been
       flushed */
2044
    if (!cpu_physical_memory_is_clean(ram_addr)) {
2045
        tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
2046
    }
2047 2048
}

2049 2050 2051 2052 2053 2054
static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
                                 unsigned size, bool is_write)
{
    return is_write;
}

2055 2056
static const MemoryRegionOps notdirty_mem_ops = {
    .write = notdirty_mem_write,
2057
    .valid.accepts = notdirty_mem_accepts,
2058
    .endianness = DEVICE_NATIVE_ENDIAN,
2059 2060
};

P
pbrook 已提交
2061
/* Generate a debug exception if a watchpoint has been hit.  */
2062
static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
P
pbrook 已提交
2063
{
2064
    CPUState *cpu = current_cpu;
2065
    CPUClass *cc = CPU_GET_CLASS(cpu);
2066
    CPUArchState *env = cpu->env_ptr;
2067
    target_ulong pc, cs_base;
P
pbrook 已提交
2068
    target_ulong vaddr;
2069
    CPUWatchpoint *wp;
2070
    uint32_t cpu_flags;
P
pbrook 已提交
2071

2072
    if (cpu->watchpoint_hit) {
2073 2074 2075
        /* We re-entered the check after replacing the TB. Now raise
         * the debug interrupt so that is will trigger after the
         * current instruction. */
2076
        cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
2077 2078
        return;
    }
2079
    vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2080
    QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
2081 2082
        if (cpu_watchpoint_address_matches(wp, vaddr, len)
            && (wp->flags & flags)) {
2083 2084 2085 2086 2087 2088
            if (flags == BP_MEM_READ) {
                wp->flags |= BP_WATCHPOINT_HIT_READ;
            } else {
                wp->flags |= BP_WATCHPOINT_HIT_WRITE;
            }
            wp->hitaddr = vaddr;
2089
            wp->hitattrs = attrs;
2090
            if (!cpu->watchpoint_hit) {
2091 2092 2093 2094 2095
                if (wp->flags & BP_CPU &&
                    !cc->debug_check_watchpoint(cpu, wp)) {
                    wp->flags &= ~BP_WATCHPOINT_HIT;
                    continue;
                }
2096
                cpu->watchpoint_hit = wp;
2097 2098 2099 2100 2101 2102

                /* The tb_lock will be reset when cpu_loop_exit or
                 * cpu_loop_exit_noexc longjmp back into the cpu_exec
                 * main loop.
                 */
                tb_lock();
2103
                tb_check_watchpoint(cpu);
2104
                if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2105
                    cpu->exception_index = EXCP_DEBUG;
2106
                    cpu_loop_exit(cpu);
2107 2108
                } else {
                    cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2109
                    tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
2110
                    cpu_loop_exit_noexc(cpu);
2111
                }
2112
            }
2113 2114
        } else {
            wp->flags &= ~BP_WATCHPOINT_HIT;
P
pbrook 已提交
2115 2116 2117 2118
        }
    }
}

2119 2120 2121
/* Watchpoint access routines.  Watchpoints are inserted using TLB tricks,
   so these check for a hit then pass through to the normal out-of-line
   phys routines.  */
2122 2123
static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
                                  unsigned size, MemTxAttrs attrs)
2124
{
2125 2126
    MemTxResult res;
    uint64_t data;
2127 2128
    int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
    AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2129 2130

    check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
2131
    switch (size) {
2132
    case 1:
2133
        data = address_space_ldub(as, addr, attrs, &res);
2134 2135
        break;
    case 2:
2136
        data = address_space_lduw(as, addr, attrs, &res);
2137 2138
        break;
    case 4:
2139
        data = address_space_ldl(as, addr, attrs, &res);
2140
        break;
2141 2142
    default: abort();
    }
2143 2144
    *pdata = data;
    return res;
2145 2146
}

2147 2148 2149
static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
                                   uint64_t val, unsigned size,
                                   MemTxAttrs attrs)
2150
{
2151
    MemTxResult res;
2152 2153
    int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
    AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2154 2155

    check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
2156
    switch (size) {
2157
    case 1:
2158
        address_space_stb(as, addr, val, attrs, &res);
2159 2160
        break;
    case 2:
2161
        address_space_stw(as, addr, val, attrs, &res);
2162 2163
        break;
    case 4:
2164
        address_space_stl(as, addr, val, attrs, &res);
2165
        break;
2166 2167
    default: abort();
    }
2168
    return res;
2169 2170
}

2171
static const MemoryRegionOps watch_mem_ops = {
2172 2173
    .read_with_attrs = watch_mem_read,
    .write_with_attrs = watch_mem_write,
2174
    .endianness = DEVICE_NATIVE_ENDIAN,
2175 2176
};

2177 2178
static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
                                unsigned len, MemTxAttrs attrs)
2179
{
2180
    subpage_t *subpage = opaque;
2181
    uint8_t buf[8];
2182
    MemTxResult res;
2183

2184
#if defined(DEBUG_SUBPAGE)
A
Amos Kong 已提交
2185
    printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2186
           subpage, len, addr);
2187
#endif
2188 2189 2190 2191
    res = address_space_read(subpage->as, addr + subpage->base,
                             attrs, buf, len);
    if (res) {
        return res;
2192
    }
2193 2194
    switch (len) {
    case 1:
2195 2196
        *data = ldub_p(buf);
        return MEMTX_OK;
2197
    case 2:
2198 2199
        *data = lduw_p(buf);
        return MEMTX_OK;
2200
    case 4:
2201 2202
        *data = ldl_p(buf);
        return MEMTX_OK;
2203
    case 8:
2204 2205
        *data = ldq_p(buf);
        return MEMTX_OK;
2206 2207 2208
    default:
        abort();
    }
2209 2210
}

2211 2212
static MemTxResult subpage_write(void *opaque, hwaddr addr,
                                 uint64_t value, unsigned len, MemTxAttrs attrs)
2213
{
2214
    subpage_t *subpage = opaque;
2215
    uint8_t buf[8];
2216

2217
#if defined(DEBUG_SUBPAGE)
A
Amos Kong 已提交
2218
    printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2219 2220
           " value %"PRIx64"\n",
           __func__, subpage, len, addr, value);
2221
#endif
2222 2223 2224 2225 2226 2227 2228 2229 2230 2231
    switch (len) {
    case 1:
        stb_p(buf, value);
        break;
    case 2:
        stw_p(buf, value);
        break;
    case 4:
        stl_p(buf, value);
        break;
2232 2233 2234
    case 8:
        stq_p(buf, value);
        break;
2235 2236 2237
    default:
        abort();
    }
2238 2239
    return address_space_write(subpage->as, addr + subpage->base,
                               attrs, buf, len);
2240 2241
}

2242
static bool subpage_accepts(void *opaque, hwaddr addr,
A
Amos Kong 已提交
2243
                            unsigned len, bool is_write)
2244
{
2245
    subpage_t *subpage = opaque;
2246
#if defined(DEBUG_SUBPAGE)
A
Amos Kong 已提交
2247
    printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2248
           __func__, subpage, is_write ? 'w' : 'r', len, addr);
2249 2250
#endif

2251
    return address_space_access_valid(subpage->as, addr + subpage->base,
A
Amos Kong 已提交
2252
                                      len, is_write);
2253 2254
}

2255
static const MemoryRegionOps subpage_ops = {
2256 2257
    .read_with_attrs = subpage_read,
    .write_with_attrs = subpage_write,
2258 2259 2260 2261
    .impl.min_access_size = 1,
    .impl.max_access_size = 8,
    .valid.min_access_size = 1,
    .valid.max_access_size = 8,
2262
    .valid.accepts = subpage_accepts,
2263
    .endianness = DEVICE_NATIVE_ENDIAN,
2264 2265
};

A
Anthony Liguori 已提交
2266
static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2267
                             uint16_t section)
2268 2269 2270 2271 2272 2273 2274 2275
{
    int idx, eidx;

    if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
        return -1;
    idx = SUBPAGE_IDX(start);
    eidx = SUBPAGE_IDX(end);
#if defined(DEBUG_SUBPAGE)
A
Amos Kong 已提交
2276 2277
    printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
           __func__, mmio, start, end, idx, eidx, section);
2278 2279
#endif
    for (; idx <= eidx; idx++) {
2280
        mmio->sub_section[idx] = section;
2281 2282 2283 2284 2285
    }

    return 0;
}

2286
static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2287
{
A
Anthony Liguori 已提交
2288
    subpage_t *mmio;
2289

2290
    mmio = g_malloc0(sizeof(subpage_t) + TARGET_PAGE_SIZE * sizeof(uint16_t));
2291
    mmio->as = as;
2292
    mmio->base = base;
2293
    memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
P
Peter Crosthwaite 已提交
2294
                          NULL, TARGET_PAGE_SIZE);
A
Avi Kivity 已提交
2295
    mmio->iomem.subpage = true;
2296
#if defined(DEBUG_SUBPAGE)
A
Amos Kong 已提交
2297 2298
    printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
           mmio, base, TARGET_PAGE_SIZE);
2299
#endif
2300
    subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2301 2302 2303 2304

    return mmio;
}

2305 2306
static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
                              MemoryRegion *mr)
2307
{
2308
    assert(as);
2309
    MemoryRegionSection section = {
2310
        .address_space = as,
2311 2312 2313
        .mr = mr,
        .offset_within_address_space = 0,
        .offset_within_region = 0,
2314
        .size = int128_2_64(),
2315 2316
    };

2317
    return phys_section_add(map, &section);
2318 2319
}

2320
MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index, MemTxAttrs attrs)
2321
{
2322 2323
    int asidx = cpu_asidx_from_attrs(cpu, attrs);
    CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx];
2324
    AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
2325
    MemoryRegionSection *sections = d->map.sections;
P
Paolo Bonzini 已提交
2326 2327

    return sections[index & ~TARGET_PAGE_MASK].mr;
2328 2329
}

A
Avi Kivity 已提交
2330 2331
static void io_mem_init(void)
{
2332
    memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2333
    memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2334
                          NULL, UINT64_MAX);
2335
    memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2336
                          NULL, UINT64_MAX);
2337
    memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2338
                          NULL, UINT64_MAX);
A
Avi Kivity 已提交
2339 2340
}

A
Avi Kivity 已提交
2341
static void mem_begin(MemoryListener *listener)
2342 2343
{
    AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2344 2345 2346
    AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
    uint16_t n;

2347
    n = dummy_section(&d->map, as, &io_mem_unassigned);
2348
    assert(n == PHYS_SECTION_UNASSIGNED);
2349
    n = dummy_section(&d->map, as, &io_mem_notdirty);
2350
    assert(n == PHYS_SECTION_NOTDIRTY);
2351
    n = dummy_section(&d->map, as, &io_mem_rom);
2352
    assert(n == PHYS_SECTION_ROM);
2353
    n = dummy_section(&d->map, as, &io_mem_watch);
2354
    assert(n == PHYS_SECTION_WATCH);
2355

M
Michael S. Tsirkin 已提交
2356
    d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2357 2358 2359 2360
    d->as = as;
    as->next_dispatch = d;
}

2361 2362 2363 2364 2365 2366
static void address_space_dispatch_free(AddressSpaceDispatch *d)
{
    phys_sections_free(&d->map);
    g_free(d);
}

2367
static void mem_commit(MemoryListener *listener)
A
Avi Kivity 已提交
2368
{
2369
    AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2370 2371 2372
    AddressSpaceDispatch *cur = as->dispatch;
    AddressSpaceDispatch *next = as->next_dispatch;

2373
    phys_page_compact_all(next, next->map.nodes_nb);
2374

2375
    atomic_rcu_set(&as->dispatch, next);
2376
    if (cur) {
2377
        call_rcu(cur, address_space_dispatch_free, rcu);
2378
    }
2379 2380
}

2381
static void tcg_commit(MemoryListener *listener)
2382
{
2383 2384
    CPUAddressSpace *cpuas;
    AddressSpaceDispatch *d;
2385 2386 2387

    /* since each CPU stores ram addresses in its TLB cache, we must
       reset the modified entries */
2388 2389 2390 2391 2392 2393 2394
    cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
    cpu_reloading_memory_map();
    /* The CPU and TLB are protected by the iothread lock.
     * We reload the dispatch pointer now because cpu_reloading_memory_map()
     * may have split the RCU critical section.
     */
    d = atomic_rcu_read(&cpuas->as->dispatch);
2395
    atomic_rcu_set(&cpuas->memory_dispatch, d);
2396
    tlb_flush(cpuas->cpu, 1);
2397 2398
}

A
Avi Kivity 已提交
2399 2400
void address_space_init_dispatch(AddressSpace *as)
{
2401
    as->dispatch = NULL;
2402
    as->dispatch_listener = (MemoryListener) {
A
Avi Kivity 已提交
2403
        .begin = mem_begin,
2404
        .commit = mem_commit,
A
Avi Kivity 已提交
2405 2406 2407 2408
        .region_add = mem_add,
        .region_nop = mem_add,
        .priority = 0,
    };
2409
    memory_listener_register(&as->dispatch_listener, as);
A
Avi Kivity 已提交
2410 2411
}

2412 2413 2414 2415 2416
void address_space_unregister(AddressSpace *as)
{
    memory_listener_unregister(&as->dispatch_listener);
}

A
Avi Kivity 已提交
2417 2418 2419 2420
void address_space_destroy_dispatch(AddressSpace *as)
{
    AddressSpaceDispatch *d = as->dispatch;

2421 2422 2423 2424
    atomic_rcu_set(&as->dispatch, NULL);
    if (d) {
        call_rcu(d, address_space_dispatch_free, rcu);
    }
A
Avi Kivity 已提交
2425 2426
}

A
Avi Kivity 已提交
2427 2428
static void memory_map_init(void)
{
2429
    system_memory = g_malloc(sizeof(*system_memory));
2430

2431
    memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2432
    address_space_init(&address_space_memory, system_memory, "memory");
2433

2434
    system_io = g_malloc(sizeof(*system_io));
2435 2436
    memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
                          65536);
2437
    address_space_init(&address_space_io, system_io, "I/O");
A
Avi Kivity 已提交
2438 2439 2440 2441 2442 2443 2444
}

MemoryRegion *get_system_memory(void)
{
    return system_memory;
}

2445 2446 2447 2448 2449
MemoryRegion *get_system_io(void)
{
    return system_io;
}

2450 2451
#endif /* !defined(CONFIG_USER_ONLY) */

B
bellard 已提交
2452 2453
/* physical memory access (slow version, mainly for debug) */
#if defined(CONFIG_USER_ONLY)
2454
int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
P
Paul Brook 已提交
2455
                        uint8_t *buf, int len, int is_write)
B
bellard 已提交
2456 2457 2458
{
    int l, flags;
    target_ulong page;
2459
    void * p;
B
bellard 已提交
2460 2461 2462 2463 2464 2465 2466 2467

    while (len > 0) {
        page = addr & TARGET_PAGE_MASK;
        l = (page + TARGET_PAGE_SIZE) - addr;
        if (l > len)
            l = len;
        flags = page_get_flags(page);
        if (!(flags & PAGE_VALID))
P
Paul Brook 已提交
2468
            return -1;
B
bellard 已提交
2469 2470
        if (is_write) {
            if (!(flags & PAGE_WRITE))
P
Paul Brook 已提交
2471
                return -1;
2472
            /* XXX: this code should not depend on lock_user */
A
aurel32 已提交
2473
            if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
P
Paul Brook 已提交
2474
                return -1;
A
aurel32 已提交
2475 2476
            memcpy(p, buf, l);
            unlock_user(p, addr, l);
B
bellard 已提交
2477 2478
        } else {
            if (!(flags & PAGE_READ))
P
Paul Brook 已提交
2479
                return -1;
2480
            /* XXX: this code should not depend on lock_user */
A
aurel32 已提交
2481
            if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
P
Paul Brook 已提交
2482
                return -1;
A
aurel32 已提交
2483
            memcpy(buf, p, l);
A
aurel32 已提交
2484
            unlock_user(p, addr, 0);
B
bellard 已提交
2485 2486 2487 2488 2489
        }
        len -= l;
        buf += l;
        addr += l;
    }
P
Paul Brook 已提交
2490
    return 0;
B
bellard 已提交
2491
}
B
bellard 已提交
2492

B
bellard 已提交
2493
#else
2494

2495
static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
A
Avi Kivity 已提交
2496
                                     hwaddr length)
2497
{
2498
    uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2499 2500
    addr += memory_region_get_ram_addr(mr);

2501 2502 2503 2504 2505 2506 2507 2508 2509
    /* No early return if dirty_log_mask is or becomes 0, because
     * cpu_physical_memory_set_dirty_range will still call
     * xen_modified_memory.
     */
    if (dirty_log_mask) {
        dirty_log_mask =
            cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
    }
    if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2510
        tb_lock();
2511
        tb_invalidate_phys_range(addr, addr + length);
2512
        tb_unlock();
2513
        dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2514
    }
2515
    cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2516 2517
}

2518
static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2519
{
2520
    unsigned access_size_max = mr->ops->valid.max_access_size;
2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533

    /* Regions are assumed to support 1-4 byte accesses unless
       otherwise specified.  */
    if (access_size_max == 0) {
        access_size_max = 4;
    }

    /* Bound the maximum access by the alignment of the address.  */
    if (!mr->ops->impl.unaligned) {
        unsigned align_size_max = addr & -addr;
        if (align_size_max != 0 && align_size_max < access_size_max) {
            access_size_max = align_size_max;
        }
2534
    }
2535 2536 2537 2538

    /* Don't attempt accesses larger than the maximum.  */
    if (l > access_size_max) {
        l = access_size_max;
2539
    }
2540
    l = pow2floor(l);
2541 2542

    return l;
2543 2544
}

2545
static bool prepare_mmio_access(MemoryRegion *mr)
2546
{
2547 2548 2549 2550 2551 2552 2553 2554
    bool unlocked = !qemu_mutex_iothread_locked();
    bool release_lock = false;

    if (unlocked && mr->global_locking) {
        qemu_mutex_lock_iothread();
        unlocked = false;
        release_lock = true;
    }
2555
    if (mr->flush_coalesced_mmio) {
2556 2557 2558
        if (unlocked) {
            qemu_mutex_lock_iothread();
        }
2559
        qemu_flush_coalesced_mmio_buffer();
2560 2561 2562
        if (unlocked) {
            qemu_mutex_unlock_iothread();
        }
2563
    }
2564 2565

    return release_lock;
2566 2567
}

2568 2569 2570 2571 2572 2573
/* Called within RCU critical section.  */
static MemTxResult address_space_write_continue(AddressSpace *as, hwaddr addr,
                                                MemTxAttrs attrs,
                                                const uint8_t *buf,
                                                int len, hwaddr addr1,
                                                hwaddr l, MemoryRegion *mr)
B
bellard 已提交
2574 2575
{
    uint8_t *ptr;
2576
    uint64_t val;
2577
    MemTxResult result = MEMTX_OK;
2578
    bool release_lock = false;
2579

2580
    for (;;) {
2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612
        if (!memory_access_is_direct(mr, true)) {
            release_lock |= prepare_mmio_access(mr);
            l = memory_access_size(mr, l, addr1);
            /* XXX: could force current_cpu to NULL to avoid
               potential bugs */
            switch (l) {
            case 8:
                /* 64 bit write access */
                val = ldq_p(buf);
                result |= memory_region_dispatch_write(mr, addr1, val, 8,
                                                       attrs);
                break;
            case 4:
                /* 32 bit write access */
                val = ldl_p(buf);
                result |= memory_region_dispatch_write(mr, addr1, val, 4,
                                                       attrs);
                break;
            case 2:
                /* 16 bit write access */
                val = lduw_p(buf);
                result |= memory_region_dispatch_write(mr, addr1, val, 2,
                                                       attrs);
                break;
            case 1:
                /* 8 bit write access */
                val = ldub_p(buf);
                result |= memory_region_dispatch_write(mr, addr1, val, 1,
                                                       attrs);
                break;
            default:
                abort();
B
bellard 已提交
2613 2614
            }
        } else {
2615
            /* RAM case */
2616
            ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2617 2618
            memcpy(ptr, buf, l);
            invalidate_and_set_dirty(mr, addr1, l);
B
bellard 已提交
2619
        }
2620 2621 2622 2623 2624 2625

        if (release_lock) {
            qemu_mutex_unlock_iothread();
            release_lock = false;
        }

B
bellard 已提交
2626 2627 2628
        len -= l;
        buf += l;
        addr += l;
2629 2630 2631 2632 2633 2634 2635

        if (!len) {
            break;
        }

        l = len;
        mr = address_space_translate(as, addr, &addr1, &l, true);
B
bellard 已提交
2636
    }
2637

2638
    return result;
B
bellard 已提交
2639
}
B
bellard 已提交
2640

2641 2642
MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
                                const uint8_t *buf, int len)
A
Avi Kivity 已提交
2643
{
2644 2645 2646 2647 2648
    hwaddr l;
    hwaddr addr1;
    MemoryRegion *mr;
    MemTxResult result = MEMTX_OK;

2649 2650
    if (len > 0) {
        rcu_read_lock();
2651
        l = len;
2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670
        mr = address_space_translate(as, addr, &addr1, &l, true);
        result = address_space_write_continue(as, addr, attrs, buf, len,
                                              addr1, l, mr);
        rcu_read_unlock();
    }

    return result;
}

/* Called within RCU critical section.  */
MemTxResult address_space_read_continue(AddressSpace *as, hwaddr addr,
                                        MemTxAttrs attrs, uint8_t *buf,
                                        int len, hwaddr addr1, hwaddr l,
                                        MemoryRegion *mr)
{
    uint8_t *ptr;
    uint64_t val;
    MemTxResult result = MEMTX_OK;
    bool release_lock = false;
2671

2672
    for (;;) {
2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706
        if (!memory_access_is_direct(mr, false)) {
            /* I/O case */
            release_lock |= prepare_mmio_access(mr);
            l = memory_access_size(mr, l, addr1);
            switch (l) {
            case 8:
                /* 64 bit read access */
                result |= memory_region_dispatch_read(mr, addr1, &val, 8,
                                                      attrs);
                stq_p(buf, val);
                break;
            case 4:
                /* 32 bit read access */
                result |= memory_region_dispatch_read(mr, addr1, &val, 4,
                                                      attrs);
                stl_p(buf, val);
                break;
            case 2:
                /* 16 bit read access */
                result |= memory_region_dispatch_read(mr, addr1, &val, 2,
                                                      attrs);
                stw_p(buf, val);
                break;
            case 1:
                /* 8 bit read access */
                result |= memory_region_dispatch_read(mr, addr1, &val, 1,
                                                      attrs);
                stb_p(buf, val);
                break;
            default:
                abort();
            }
        } else {
            /* RAM case */
2707
            ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718
            memcpy(buf, ptr, l);
        }

        if (release_lock) {
            qemu_mutex_unlock_iothread();
            release_lock = false;
        }

        len -= l;
        buf += l;
        addr += l;
2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730

        if (!len) {
            break;
        }

        l = len;
        mr = address_space_translate(as, addr, &addr1, &l, false);
    }

    return result;
}

2731 2732
MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
                                    MemTxAttrs attrs, uint8_t *buf, int len)
2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745
{
    hwaddr l;
    hwaddr addr1;
    MemoryRegion *mr;
    MemTxResult result = MEMTX_OK;

    if (len > 0) {
        rcu_read_lock();
        l = len;
        mr = address_space_translate(as, addr, &addr1, &l, false);
        result = address_space_read_continue(as, addr, attrs, buf, len,
                                             addr1, l, mr);
        rcu_read_unlock();
2746 2747 2748
    }

    return result;
A
Avi Kivity 已提交
2749 2750
}

2751 2752 2753 2754 2755 2756 2757 2758 2759
MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
                             uint8_t *buf, int len, bool is_write)
{
    if (is_write) {
        return address_space_write(as, addr, attrs, (uint8_t *)buf, len);
    } else {
        return address_space_read(as, addr, attrs, (uint8_t *)buf, len);
    }
}
A
Avi Kivity 已提交
2760

A
Avi Kivity 已提交
2761
void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
A
Avi Kivity 已提交
2762 2763
                            int len, int is_write)
{
2764 2765
    address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
                     buf, len, is_write);
A
Avi Kivity 已提交
2766 2767
}

2768 2769 2770 2771 2772
enum write_rom_type {
    WRITE_DATA,
    FLUSH_CACHE,
};

2773
static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2774
    hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
B
bellard 已提交
2775
{
2776
    hwaddr l;
B
bellard 已提交
2777
    uint8_t *ptr;
2778
    hwaddr addr1;
2779
    MemoryRegion *mr;
2780

2781
    rcu_read_lock();
B
bellard 已提交
2782
    while (len > 0) {
2783
        l = len;
2784
        mr = address_space_translate(as, addr, &addr1, &l, true);
2785

2786 2787
        if (!(memory_region_is_ram(mr) ||
              memory_region_is_romd(mr))) {
2788
            l = memory_access_size(mr, l, addr1);
B
bellard 已提交
2789 2790
        } else {
            /* ROM/RAM case */
2791
            ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2792 2793 2794
            switch (type) {
            case WRITE_DATA:
                memcpy(ptr, buf, l);
2795
                invalidate_and_set_dirty(mr, addr1, l);
2796 2797 2798 2799 2800
                break;
            case FLUSH_CACHE:
                flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
                break;
            }
B
bellard 已提交
2801 2802 2803 2804 2805
        }
        len -= l;
        buf += l;
        addr += l;
    }
2806
    rcu_read_unlock();
B
bellard 已提交
2807 2808
}

2809
/* used for ROM loading : can write in RAM and ROM */
2810
void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2811 2812
                                   const uint8_t *buf, int len)
{
2813
    cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827
}

void cpu_flush_icache_range(hwaddr start, int len)
{
    /*
     * This function should do the same thing as an icache flush that was
     * triggered from within the guest. For TCG we are always cache coherent,
     * so there is no need to flush anything. For KVM / Xen we need to flush
     * the host's instruction cache at least.
     */
    if (tcg_enabled()) {
        return;
    }

2828 2829
    cpu_physical_memory_write_rom_internal(&address_space_memory,
                                           start, NULL, len, FLUSH_CACHE);
2830 2831
}

2832
typedef struct {
2833
    MemoryRegion *mr;
2834
    void *buffer;
A
Avi Kivity 已提交
2835 2836
    hwaddr addr;
    hwaddr len;
F
Fam Zheng 已提交
2837
    bool in_use;
2838 2839 2840 2841
} BounceBuffer;

static BounceBuffer bounce;

2842
typedef struct MapClient {
2843
    QEMUBH *bh;
B
Blue Swirl 已提交
2844
    QLIST_ENTRY(MapClient) link;
2845 2846
} MapClient;

2847
QemuMutex map_client_list_lock;
B
Blue Swirl 已提交
2848 2849
static QLIST_HEAD(map_client_list, MapClient) map_client_list
    = QLIST_HEAD_INITIALIZER(map_client_list);
2850

2851 2852 2853 2854 2855 2856
static void cpu_unregister_map_client_do(MapClient *client)
{
    QLIST_REMOVE(client, link);
    g_free(client);
}

2857 2858 2859 2860 2861 2862
static void cpu_notify_map_clients_locked(void)
{
    MapClient *client;

    while (!QLIST_EMPTY(&map_client_list)) {
        client = QLIST_FIRST(&map_client_list);
2863 2864
        qemu_bh_schedule(client->bh);
        cpu_unregister_map_client_do(client);
2865 2866 2867
    }
}

2868
void cpu_register_map_client(QEMUBH *bh)
2869
{
2870
    MapClient *client = g_malloc(sizeof(*client));
2871

2872
    qemu_mutex_lock(&map_client_list_lock);
2873
    client->bh = bh;
B
Blue Swirl 已提交
2874
    QLIST_INSERT_HEAD(&map_client_list, client, link);
2875 2876 2877
    if (!atomic_read(&bounce.in_use)) {
        cpu_notify_map_clients_locked();
    }
2878
    qemu_mutex_unlock(&map_client_list_lock);
2879 2880
}

2881
void cpu_exec_init_all(void)
2882
{
2883
    qemu_mutex_init(&ram_list.mutex);
2884 2885 2886 2887 2888 2889 2890 2891
    /* The data structures we set up here depend on knowing the page size,
     * so no more changes can be made after this point.
     * In an ideal world, nothing we did before we had finished the
     * machine setup would care about the target page size, and we could
     * do this much later, rather than requiring board models to state
     * up front what their requirements are.
     */
    finalize_target_page_bits();
2892
    io_mem_init();
2893
    memory_map_init();
2894
    qemu_mutex_init(&map_client_list_lock);
2895 2896
}

2897
void cpu_unregister_map_client(QEMUBH *bh)
2898 2899 2900
{
    MapClient *client;

2901 2902 2903 2904 2905 2906
    qemu_mutex_lock(&map_client_list_lock);
    QLIST_FOREACH(client, &map_client_list, link) {
        if (client->bh == bh) {
            cpu_unregister_map_client_do(client);
            break;
        }
2907
    }
2908
    qemu_mutex_unlock(&map_client_list_lock);
2909 2910 2911 2912
}

static void cpu_notify_map_clients(void)
{
2913
    qemu_mutex_lock(&map_client_list_lock);
2914
    cpu_notify_map_clients_locked();
2915
    qemu_mutex_unlock(&map_client_list_lock);
2916 2917
}

2918 2919
bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
{
2920
    MemoryRegion *mr;
2921 2922
    hwaddr l, xlat;

2923
    rcu_read_lock();
2924 2925
    while (len > 0) {
        l = len;
2926 2927 2928 2929
        mr = address_space_translate(as, addr, &xlat, &l, is_write);
        if (!memory_access_is_direct(mr, is_write)) {
            l = memory_access_size(mr, l, addr);
            if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2930 2931 2932 2933 2934 2935 2936
                return false;
            }
        }

        len -= l;
        addr += l;
    }
2937
    rcu_read_unlock();
2938 2939 2940
    return true;
}

2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965
static hwaddr
address_space_extend_translation(AddressSpace *as, hwaddr addr, hwaddr target_len,
                                 MemoryRegion *mr, hwaddr base, hwaddr len,
                                 bool is_write)
{
    hwaddr done = 0;
    hwaddr xlat;
    MemoryRegion *this_mr;

    for (;;) {
        target_len -= len;
        addr += len;
        done += len;
        if (target_len == 0) {
            return done;
        }

        len = target_len;
        this_mr = address_space_translate(as, addr, &xlat, &len, is_write);
        if (this_mr != mr || xlat != base + done) {
            return done;
        }
    }
}

2966 2967 2968 2969
/* Map a physical memory region into a host virtual address.
 * May map a subset of the requested range, given by and returned in *plen.
 * May return NULL if resources needed to perform the mapping are exhausted.
 * Use only for reads OR writes - not for read-modify-write operations.
2970 2971
 * Use cpu_register_map_client() to know when retrying the map operation is
 * likely to succeed.
2972
 */
A
Avi Kivity 已提交
2973
void *address_space_map(AddressSpace *as,
A
Avi Kivity 已提交
2974 2975
                        hwaddr addr,
                        hwaddr *plen,
A
Avi Kivity 已提交
2976
                        bool is_write)
2977
{
A
Avi Kivity 已提交
2978
    hwaddr len = *plen;
2979 2980
    hwaddr l, xlat;
    MemoryRegion *mr;
2981
    void *ptr;
2982

2983 2984 2985
    if (len == 0) {
        return NULL;
    }
2986

2987
    l = len;
2988
    rcu_read_lock();
2989
    mr = address_space_translate(as, addr, &xlat, &l, is_write);
2990

2991
    if (!memory_access_is_direct(mr, is_write)) {
F
Fam Zheng 已提交
2992
        if (atomic_xchg(&bounce.in_use, true)) {
2993
            rcu_read_unlock();
2994
            return NULL;
2995
        }
2996 2997 2998
        /* Avoid unbounded allocations */
        l = MIN(l, TARGET_PAGE_SIZE);
        bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2999 3000
        bounce.addr = addr;
        bounce.len = l;
3001 3002 3003

        memory_region_ref(mr);
        bounce.mr = mr;
3004
        if (!is_write) {
3005 3006
            address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
                               bounce.buffer, l);
3007
        }
3008

3009
        rcu_read_unlock();
3010 3011 3012 3013 3014
        *plen = l;
        return bounce.buffer;
    }


3015
    memory_region_ref(mr);
3016 3017
    *plen = address_space_extend_translation(as, addr, len, mr, xlat, l, is_write);
    ptr = qemu_ram_ptr_length(mr->ram_block, xlat, plen);
3018 3019 3020
    rcu_read_unlock();

    return ptr;
3021 3022
}

A
Avi Kivity 已提交
3023
/* Unmaps a memory region previously mapped by address_space_map().
3024 3025 3026
 * Will also mark the memory as dirty if is_write == 1.  access_len gives
 * the amount of memory that was actually read or written by the caller.
 */
A
Avi Kivity 已提交
3027 3028
void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
                         int is_write, hwaddr access_len)
3029 3030
{
    if (buffer != bounce.buffer) {
3031 3032 3033
        MemoryRegion *mr;
        ram_addr_t addr1;

3034
        mr = memory_region_from_host(buffer, &addr1);
3035
        assert(mr != NULL);
3036
        if (is_write) {
3037
            invalidate_and_set_dirty(mr, addr1, access_len);
3038
        }
3039
        if (xen_enabled()) {
J
Jan Kiszka 已提交
3040
            xen_invalidate_map_cache_entry(buffer);
A
Anthony PERARD 已提交
3041
        }
3042
        memory_region_unref(mr);
3043 3044 3045
        return;
    }
    if (is_write) {
3046 3047
        address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
                            bounce.buffer, access_len);
3048
    }
3049
    qemu_vfree(bounce.buffer);
3050
    bounce.buffer = NULL;
3051
    memory_region_unref(bounce.mr);
F
Fam Zheng 已提交
3052
    atomic_mb_set(&bounce.in_use, false);
3053
    cpu_notify_map_clients();
3054
}
B
bellard 已提交
3055

A
Avi Kivity 已提交
3056 3057
void *cpu_physical_memory_map(hwaddr addr,
                              hwaddr *plen,
A
Avi Kivity 已提交
3058 3059 3060 3061 3062
                              int is_write)
{
    return address_space_map(&address_space_memory, addr, plen, is_write);
}

A
Avi Kivity 已提交
3063 3064
void cpu_physical_memory_unmap(void *buffer, hwaddr len,
                               int is_write, hwaddr access_len)
A
Avi Kivity 已提交
3065 3066 3067 3068
{
    return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
}

P
Paolo Bonzini 已提交
3069 3070 3071 3072 3073 3074 3075 3076 3077 3078
#define ARG1_DECL                AddressSpace *as
#define ARG1                     as
#define SUFFIX
#define TRANSLATE(...)           address_space_translate(as, __VA_ARGS__)
#define IS_DIRECT(mr, is_write)  memory_access_is_direct(mr, is_write)
#define MAP_RAM(mr, ofs)         qemu_map_ram_ptr((mr)->ram_block, ofs)
#define INVALIDATE(mr, ofs, len) invalidate_and_set_dirty(mr, ofs, len)
#define RCU_READ_LOCK(...)       rcu_read_lock()
#define RCU_READ_UNLOCK(...)     rcu_read_unlock()
#include "memory_ldst.inc.c"
3079

3080
/* virtual memory access for debug (includes writing to ROM) */
3081
int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3082
                        uint8_t *buf, int len, int is_write)
B
bellard 已提交
3083 3084
{
    int l;
A
Avi Kivity 已提交
3085
    hwaddr phys_addr;
3086
    target_ulong page;
B
bellard 已提交
3087 3088

    while (len > 0) {
3089 3090 3091
        int asidx;
        MemTxAttrs attrs;

B
bellard 已提交
3092
        page = addr & TARGET_PAGE_MASK;
3093 3094
        phys_addr = cpu_get_phys_page_attrs_debug(cpu, page, &attrs);
        asidx = cpu_asidx_from_attrs(cpu, attrs);
B
bellard 已提交
3095 3096 3097 3098 3099 3100
        /* if no physical page mapped, return an error */
        if (phys_addr == -1)
            return -1;
        l = (page + TARGET_PAGE_SIZE) - addr;
        if (l > len)
            l = len;
3101
        phys_addr += (addr & ~TARGET_PAGE_MASK);
3102
        if (is_write) {
3103 3104
            cpu_physical_memory_write_rom(cpu->cpu_ases[asidx].as,
                                          phys_addr, buf, l);
3105
        } else {
3106 3107
            address_space_rw(cpu->cpu_ases[asidx].as, phys_addr,
                             MEMTXATTRS_UNSPECIFIED,
3108
                             buf, l, 0);
3109
        }
B
bellard 已提交
3110 3111 3112 3113 3114 3115
        len -= l;
        buf += l;
        addr += l;
    }
    return 0;
}
3116 3117 3118 3119 3120 3121 3122 3123 3124 3125

/*
 * Allows code that needs to deal with migration bitmaps etc to still be built
 * target independent.
 */
size_t qemu_target_page_bits(void)
{
    return TARGET_PAGE_BITS;
}

P
Paul Brook 已提交
3126
#endif
B
bellard 已提交
3127

3128 3129 3130 3131
/*
 * A helper function for the _utterly broken_ virtio device model to find out if
 * it's running on a big endian machine. Don't do this at home kids!
 */
3132 3133
bool target_words_bigendian(void);
bool target_words_bigendian(void)
3134 3135 3136 3137 3138 3139 3140 3141
{
#if defined(TARGET_WORDS_BIGENDIAN)
    return true;
#else
    return false;
#endif
}

3142
#ifndef CONFIG_USER_ONLY
A
Avi Kivity 已提交
3143
bool cpu_physical_memory_is_io(hwaddr phys_addr)
3144
{
3145
    MemoryRegion*mr;
3146
    hwaddr l = 1;
3147
    bool res;
3148

3149
    rcu_read_lock();
3150 3151
    mr = address_space_translate(&address_space_memory,
                                 phys_addr, &phys_addr, &l, false);
3152

3153 3154 3155
    res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
    rcu_read_unlock();
    return res;
3156
}
3157

3158
int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3159 3160
{
    RAMBlock *block;
3161
    int ret = 0;
3162

M
Mike Day 已提交
3163 3164
    rcu_read_lock();
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3165 3166 3167 3168 3169
        ret = func(block->idstr, block->host, block->offset,
                   block->used_length, opaque);
        if (ret) {
            break;
        }
3170
    }
M
Mike Day 已提交
3171
    rcu_read_unlock();
3172
    return ret;
3173
}
3174
#endif