exec.c 95.7 KB
Newer Older
B
bellard 已提交
1
/*
2
 *  Virtual page mapping
3
 *
B
bellard 已提交
4 5 6 7 8 9 10 11 12 13 14 15 16
 *  Copyright (c) 2003 Fabrice Bellard
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
B
bellard 已提交
18
 */
B
bellard 已提交
19
#include "config.h"
20
#ifndef _WIN32
B
bellard 已提交
21
#include <sys/types.h>
B
bellard 已提交
22 23
#include <sys/mman.h>
#endif
B
bellard 已提交
24

25
#include "qemu-common.h"
B
bellard 已提交
26
#include "cpu.h"
B
bellard 已提交
27
#include "tcg.h"
28
#include "hw/hw.h"
29
#if !defined(CONFIG_USER_ONLY)
30
#include "hw/boards.h"
31
#endif
32
#include "hw/qdev.h"
33
#include "qemu/osdep.h"
34
#include "sysemu/kvm.h"
35
#include "sysemu/sysemu.h"
P
Paolo Bonzini 已提交
36
#include "hw/xen/xen.h"
37 38
#include "qemu/timer.h"
#include "qemu/config-file.h"
39
#include "qemu/error-report.h"
40
#include "exec/memory.h"
41
#include "sysemu/dma.h"
42
#include "exec/address-spaces.h"
43 44
#if defined(CONFIG_USER_ONLY)
#include <qemu.h>
J
Jun Nakajima 已提交
45
#else /* !CONFIG_USER_ONLY */
46
#include "sysemu/xen-mapcache.h"
47
#include "trace.h"
48
#endif
49
#include "exec/cpu-all.h"
M
Mike Day 已提交
50
#include "qemu/rcu_queue.h"
51
#include "exec/cputlb.h"
52
#include "translate-all.h"
53

54
#include "exec/memory-internal.h"
55
#include "exec/ram_addr.h"
56

57 58
#include "qemu/range.h"

59
//#define DEBUG_SUBPAGE
T
ths 已提交
60

61
#if !defined(CONFIG_USER_ONLY)
M
Mike Day 已提交
62 63 64
/* ram_list is read under rcu_read_lock()/rcu_read_unlock().  Writes
 * are protected by the ramlist lock.
 */
M
Mike Day 已提交
65
RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
A
Avi Kivity 已提交
66 67

static MemoryRegion *system_memory;
68
static MemoryRegion *system_io;
A
Avi Kivity 已提交
69

70 71
AddressSpace address_space_io;
AddressSpace address_space_memory;
72

73
MemoryRegion io_mem_rom, io_mem_notdirty;
74
static MemoryRegion io_mem_unassigned;
75

76 77 78
/* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
#define RAM_PREALLOC   (1 << 0)

79 80 81
/* RAM is mmap-ed with MAP_SHARED */
#define RAM_SHARED     (1 << 1)

82 83 84 85 86
/* Only a portion of RAM (used_length) is actually used, and migrated.
 * This used_length size can change across reboots.
 */
#define RAM_RESIZEABLE (1 << 2)

87
#endif
88

A
Andreas Färber 已提交
89
struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
B
bellard 已提交
90 91
/* current CPU in the current thread. It is only valid inside
   cpu_exec() */
92
DEFINE_TLS(CPUState *, current_cpu);
P
pbrook 已提交
93
/* 0 = Do not count executed instructions.
T
ths 已提交
94
   1 = Precise instruction counting.
P
pbrook 已提交
95
   2 = Adaptive rate instruction counting.  */
96
int use_icount;
B
bellard 已提交
97

98
#if !defined(CONFIG_USER_ONLY)
99

100 101 102
typedef struct PhysPageEntry PhysPageEntry;

struct PhysPageEntry {
M
Michael S. Tsirkin 已提交
103
    /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
104
    uint32_t skip : 6;
M
Michael S. Tsirkin 已提交
105
     /* index into phys_sections (!skip) or phys_map_nodes (skip) */
106
    uint32_t ptr : 26;
107 108
};

109 110
#define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)

111
/* Size of the L2 (and L3, etc) page tables.  */
112
#define ADDR_SPACE_BITS 64
113

M
Michael S. Tsirkin 已提交
114
#define P_L2_BITS 9
115 116 117 118 119
#define P_L2_SIZE (1 << P_L2_BITS)

#define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)

typedef PhysPageEntry Node[P_L2_SIZE];
120

121
typedef struct PhysPageMap {
122 123
    struct rcu_head rcu;

124 125 126 127 128 129 130 131
    unsigned sections_nb;
    unsigned sections_nb_alloc;
    unsigned nodes_nb;
    unsigned nodes_nb_alloc;
    Node *nodes;
    MemoryRegionSection *sections;
} PhysPageMap;

132
struct AddressSpaceDispatch {
133 134
    struct rcu_head rcu;

135 136 137 138
    /* This is a multi-level map on the physical address space.
     * The bottom level has pointers to MemoryRegionSections.
     */
    PhysPageEntry phys_map;
139
    PhysPageMap map;
140
    AddressSpace *as;
141 142
};

143 144 145
#define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
typedef struct subpage_t {
    MemoryRegion iomem;
146
    AddressSpace *as;
147 148 149 150
    hwaddr base;
    uint16_t sub_section[TARGET_PAGE_SIZE];
} subpage_t;

151 152 153 154
#define PHYS_SECTION_UNASSIGNED 0
#define PHYS_SECTION_NOTDIRTY 1
#define PHYS_SECTION_ROM 2
#define PHYS_SECTION_WATCH 3
155

156
static void io_mem_init(void);
A
Avi Kivity 已提交
157
static void memory_map_init(void);
158
static void tcg_commit(MemoryListener *listener);
159

160
static MemoryRegion io_mem_watch;
161
#endif
B
bellard 已提交
162

163
#if !defined(CONFIG_USER_ONLY)
164

165
static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
166
{
167 168 169 170
    if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
        map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
        map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
        map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
171
    }
172 173
}

174
static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
175 176
{
    unsigned i;
177
    uint32_t ret;
178 179
    PhysPageEntry e;
    PhysPageEntry *p;
180

181
    ret = map->nodes_nb++;
182
    p = map->nodes[ret];
183
    assert(ret != PHYS_MAP_NODE_NIL);
184
    assert(ret != map->nodes_nb_alloc);
185 186 187

    e.skip = leaf ? 0 : 1;
    e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
188
    for (i = 0; i < P_L2_SIZE; ++i) {
189
        memcpy(&p[i], &e, sizeof(e));
190
    }
191
    return ret;
192 193
}

194 195
static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
                                hwaddr *index, hwaddr *nb, uint16_t leaf,
196
                                int level)
197 198
{
    PhysPageEntry *p;
199
    hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
200

M
Michael S. Tsirkin 已提交
201
    if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
202
        lp->ptr = phys_map_node_alloc(map, level == 0);
B
bellard 已提交
203
    }
204
    p = map->nodes[lp->ptr];
205
    lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
206

207
    while (*nb && lp < &p[P_L2_SIZE]) {
208
        if ((*index & (step - 1)) == 0 && *nb >= step) {
M
Michael S. Tsirkin 已提交
209
            lp->skip = 0;
210
            lp->ptr = leaf;
211 212
            *index += step;
            *nb -= step;
213
        } else {
214
            phys_page_set_level(map, lp, index, nb, leaf, level - 1);
215 216
        }
        ++lp;
217 218 219
    }
}

A
Avi Kivity 已提交
220
static void phys_page_set(AddressSpaceDispatch *d,
A
Avi Kivity 已提交
221
                          hwaddr index, hwaddr nb,
222
                          uint16_t leaf)
223
{
224
    /* Wildly overreserve - it doesn't matter much. */
225
    phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
226

227
    phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
B
bellard 已提交
228 229
}

230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287
/* Compact a non leaf page entry. Simply detect that the entry has a single child,
 * and update our entry so we can skip it and go directly to the destination.
 */
static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
{
    unsigned valid_ptr = P_L2_SIZE;
    int valid = 0;
    PhysPageEntry *p;
    int i;

    if (lp->ptr == PHYS_MAP_NODE_NIL) {
        return;
    }

    p = nodes[lp->ptr];
    for (i = 0; i < P_L2_SIZE; i++) {
        if (p[i].ptr == PHYS_MAP_NODE_NIL) {
            continue;
        }

        valid_ptr = i;
        valid++;
        if (p[i].skip) {
            phys_page_compact(&p[i], nodes, compacted);
        }
    }

    /* We can only compress if there's only one child. */
    if (valid != 1) {
        return;
    }

    assert(valid_ptr < P_L2_SIZE);

    /* Don't compress if it won't fit in the # of bits we have. */
    if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
        return;
    }

    lp->ptr = p[valid_ptr].ptr;
    if (!p[valid_ptr].skip) {
        /* If our only child is a leaf, make this a leaf. */
        /* By design, we should have made this node a leaf to begin with so we
         * should never reach here.
         * But since it's so simple to handle this, let's do it just in case we
         * change this rule.
         */
        lp->skip = 0;
    } else {
        lp->skip += p[valid_ptr].skip;
    }
}

static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
{
    DECLARE_BITMAP(compacted, nodes_nb);

    if (d->phys_map.skip) {
288
        phys_page_compact(&d->phys_map, d->map.nodes, compacted);
289 290 291
    }
}

292
static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
293
                                           Node *nodes, MemoryRegionSection *sections)
B
bellard 已提交
294
{
295
    PhysPageEntry *p;
296
    hwaddr index = addr >> TARGET_PAGE_BITS;
297
    int i;
298

M
Michael S. Tsirkin 已提交
299
    for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
300
        if (lp.ptr == PHYS_MAP_NODE_NIL) {
301
            return &sections[PHYS_SECTION_UNASSIGNED];
302
        }
303
        p = nodes[lp.ptr];
304
        lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
305
    }
306 307 308 309 310 311 312 313

    if (sections[lp.ptr].size.hi ||
        range_covers_byte(sections[lp.ptr].offset_within_address_space,
                          sections[lp.ptr].size.lo, addr)) {
        return &sections[lp.ptr];
    } else {
        return &sections[PHYS_SECTION_UNASSIGNED];
    }
314 315
}

B
Blue Swirl 已提交
316 317
bool memory_region_is_unassigned(MemoryRegion *mr)
{
P
Paolo Bonzini 已提交
318
    return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
319
        && mr != &io_mem_watch;
B
bellard 已提交
320
}
321

322
/* Called from RCU critical section */
323
static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
324 325
                                                        hwaddr addr,
                                                        bool resolve_subpage)
326
{
327 328 329
    MemoryRegionSection *section;
    subpage_t *subpage;

330
    section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
331 332
    if (resolve_subpage && section->mr->subpage) {
        subpage = container_of(section->mr, subpage_t, iomem);
333
        section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
334 335
    }
    return section;
336 337
}

338
/* Called from RCU critical section */
339
static MemoryRegionSection *
340
address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
341
                                 hwaddr *plen, bool resolve_subpage)
342 343
{
    MemoryRegionSection *section;
344
    MemoryRegion *mr;
345
    Int128 diff;
346

347
    section = address_space_lookup_region(d, addr, resolve_subpage);
348 349 350 351 352 353
    /* Compute offset within MemoryRegionSection */
    addr -= section->offset_within_address_space;

    /* Compute offset within MemoryRegion */
    *xlat = addr + section->offset_within_region;

354 355
    mr = section->mr;
    if (memory_region_is_ram(mr)) {
356
        diff = int128_sub(section->size, int128_make64(addr));
357 358
        *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
    }
359 360
    return section;
}
361

362 363 364 365 366 367 368 369 370 371 372 373
static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
{
    if (memory_region_is_ram(mr)) {
        return !(is_write && mr->readonly);
    }
    if (memory_region_is_romd(mr)) {
        return !is_write;
    }

    return false;
}

374
/* Called from RCU critical section */
375 376 377
MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
                                      hwaddr *xlat, hwaddr *plen,
                                      bool is_write)
378
{
A
Avi Kivity 已提交
379 380 381 382 383
    IOMMUTLBEntry iotlb;
    MemoryRegionSection *section;
    MemoryRegion *mr;

    for (;;) {
384 385
        AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
        section = address_space_translate_internal(d, addr, &addr, plen, true);
A
Avi Kivity 已提交
386 387 388 389 390 391
        mr = section->mr;

        if (!mr->iommu_ops) {
            break;
        }

392
        iotlb = mr->iommu_ops->translate(mr, addr, is_write);
A
Avi Kivity 已提交
393 394
        addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
                | (addr & iotlb.addr_mask));
395
        *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
A
Avi Kivity 已提交
396 397 398 399 400 401 402 403
        if (!(iotlb.perm & (1 << is_write))) {
            mr = &io_mem_unassigned;
            break;
        }

        as = iotlb.target_as;
    }

404
    if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
405
        hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
406
        *plen = MIN(page, *plen);
407 408
    }

A
Avi Kivity 已提交
409 410
    *xlat = addr;
    return mr;
411 412
}

413
/* Called from RCU critical section */
414
MemoryRegionSection *
P
Paolo Bonzini 已提交
415 416
address_space_translate_for_iotlb(CPUState *cpu, hwaddr addr,
                                  hwaddr *xlat, hwaddr *plen)
417
{
A
Avi Kivity 已提交
418
    MemoryRegionSection *section;
P
Paolo Bonzini 已提交
419 420
    section = address_space_translate_internal(cpu->memory_dispatch,
                                               addr, xlat, plen, false);
A
Avi Kivity 已提交
421 422 423

    assert(!section->mr->iommu_ops);
    return section;
424
}
425
#endif
B
bellard 已提交
426

427
#if !defined(CONFIG_USER_ONLY)
428 429

static int cpu_common_post_load(void *opaque, int version_id)
B
bellard 已提交
430
{
431
    CPUState *cpu = opaque;
B
bellard 已提交
432

433 434
    /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
       version_id is increased. */
435
    cpu->interrupt_request &= ~0x01;
436
    tlb_flush(cpu, 1);
437 438

    return 0;
B
bellard 已提交
439
}
B
bellard 已提交
440

441 442 443 444
static int cpu_common_pre_load(void *opaque)
{
    CPUState *cpu = opaque;

445
    cpu->exception_index = -1;
446 447 448 449 450 451 452 453

    return 0;
}

static bool cpu_common_exception_index_needed(void *opaque)
{
    CPUState *cpu = opaque;

454
    return tcg_enabled() && cpu->exception_index != -1;
455 456 457 458 459 460
}

static const VMStateDescription vmstate_cpu_common_exception_index = {
    .name = "cpu_common/exception_index",
    .version_id = 1,
    .minimum_version_id = 1,
461
    .needed = cpu_common_exception_index_needed,
462 463 464 465 466 467
    .fields = (VMStateField[]) {
        VMSTATE_INT32(exception_index, CPUState),
        VMSTATE_END_OF_LIST()
    }
};

468
const VMStateDescription vmstate_cpu_common = {
469 470 471
    .name = "cpu_common",
    .version_id = 1,
    .minimum_version_id = 1,
472
    .pre_load = cpu_common_pre_load,
473
    .post_load = cpu_common_post_load,
474
    .fields = (VMStateField[]) {
475 476
        VMSTATE_UINT32(halted, CPUState),
        VMSTATE_UINT32(interrupt_request, CPUState),
477
        VMSTATE_END_OF_LIST()
478
    },
479 480 481
    .subsections = (const VMStateDescription*[]) {
        &vmstate_cpu_common_exception_index,
        NULL
482 483
    }
};
484

485
#endif
B
bellard 已提交
486

487
CPUState *qemu_get_cpu(int index)
B
bellard 已提交
488
{
A
Andreas Färber 已提交
489
    CPUState *cpu;
B
bellard 已提交
490

A
Andreas Färber 已提交
491
    CPU_FOREACH(cpu) {
492
        if (cpu->cpu_index == index) {
A
Andreas Färber 已提交
493
            return cpu;
494
        }
B
bellard 已提交
495
    }
496

A
Andreas Färber 已提交
497
    return NULL;
B
bellard 已提交
498 499
}

500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515
#if !defined(CONFIG_USER_ONLY)
void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
{
    /* We only support one address space per cpu at the moment.  */
    assert(cpu->as == as);

    if (cpu->tcg_as_listener) {
        memory_listener_unregister(cpu->tcg_as_listener);
    } else {
        cpu->tcg_as_listener = g_new0(MemoryListener, 1);
    }
    cpu->tcg_as_listener->commit = tcg_commit;
    memory_listener_register(cpu->tcg_as_listener, as);
}
#endif

516
void cpu_exec_init(CPUArchState *env)
B
bellard 已提交
517
{
518
    CPUState *cpu = ENV_GET_CPU(env);
519
    CPUClass *cc = CPU_GET_CLASS(cpu);
A
Andreas Färber 已提交
520
    CPUState *some_cpu;
521 522 523 524 525 526
    int cpu_index;

#if defined(CONFIG_USER_ONLY)
    cpu_list_lock();
#endif
    cpu_index = 0;
A
Andreas Färber 已提交
527
    CPU_FOREACH(some_cpu) {
528 529
        cpu_index++;
    }
530
    cpu->cpu_index = cpu_index;
531
    cpu->numa_node = 0;
532
    QTAILQ_INIT(&cpu->breakpoints);
533
    QTAILQ_INIT(&cpu->watchpoints);
534
#ifndef CONFIG_USER_ONLY
535
    cpu->as = &address_space_memory;
536
    cpu->thread_id = qemu_get_thread_id();
537
    cpu_reload_memory_map(cpu);
538
#endif
A
Andreas Färber 已提交
539
    QTAILQ_INSERT_TAIL(&cpus, cpu, node);
540 541 542
#if defined(CONFIG_USER_ONLY)
    cpu_list_unlock();
#endif
543 544 545
    if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
        vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
    }
546 547 548
#if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
    register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
                    cpu_save, cpu_load, env);
549
    assert(cc->vmsd == NULL);
550
    assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
551
#endif
552 553 554
    if (cc->vmsd != NULL) {
        vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
    }
B
bellard 已提交
555 556
}

557
#if defined(CONFIG_USER_ONLY)
558
static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
559 560 561 562
{
    tb_invalidate_phys_page_range(pc, pc + 1, 0);
}
#else
563
static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
564
{
565 566
    hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
    if (phys != -1) {
567
        tb_invalidate_phys_addr(cpu->as,
568
                                phys | (pc & ~TARGET_PAGE_MASK));
569
    }
570
}
B
bellard 已提交
571
#endif
B
bellard 已提交
572

573
#if defined(CONFIG_USER_ONLY)
574
void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
575 576 577 578

{
}

579 580 581 582 583 584 585 586 587 588
int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
                          int flags)
{
    return -ENOSYS;
}

void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
{
}

589
int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
590 591 592 593 594
                          int flags, CPUWatchpoint **watchpoint)
{
    return -ENOSYS;
}
#else
595
/* Add a watchpoint.  */
596
int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
597
                          int flags, CPUWatchpoint **watchpoint)
598
{
599
    CPUWatchpoint *wp;
600

601
    /* forbid ranges which are empty or run off the end of the address space */
602
    if (len == 0 || (addr + len - 1) < addr) {
603 604
        error_report("tried to set invalid watchpoint at %"
                     VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
605 606
        return -EINVAL;
    }
607
    wp = g_malloc(sizeof(*wp));
608 609

    wp->vaddr = addr;
610
    wp->len = len;
611 612
    wp->flags = flags;

613
    /* keep all GDB-injected watchpoints in front */
614 615 616 617 618
    if (flags & BP_GDB) {
        QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
    } else {
        QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
    }
619

620
    tlb_flush_page(cpu, addr);
621 622 623 624

    if (watchpoint)
        *watchpoint = wp;
    return 0;
625 626
}

627
/* Remove a specific watchpoint.  */
628
int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
629
                          int flags)
630
{
631
    CPUWatchpoint *wp;
632

633
    QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
634
        if (addr == wp->vaddr && len == wp->len
635
                && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
636
            cpu_watchpoint_remove_by_ref(cpu, wp);
637 638 639
            return 0;
        }
    }
640
    return -ENOENT;
641 642
}

643
/* Remove a specific watchpoint by reference.  */
644
void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
645
{
646
    QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
647

648
    tlb_flush_page(cpu, watchpoint->vaddr);
649

650
    g_free(watchpoint);
651 652 653
}

/* Remove all matching watchpoints.  */
654
void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
655
{
656
    CPUWatchpoint *wp, *next;
657

658
    QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
659 660 661
        if (wp->flags & mask) {
            cpu_watchpoint_remove_by_ref(cpu, wp);
        }
662
    }
663
}
664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684

/* Return true if this watchpoint address matches the specified
 * access (ie the address range covered by the watchpoint overlaps
 * partially or completely with the address range covered by the
 * access).
 */
static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
                                                  vaddr addr,
                                                  vaddr len)
{
    /* We know the lengths are non-zero, but a little caution is
     * required to avoid errors in the case where the range ends
     * exactly at the top of the address space and so addr + len
     * wraps round to zero.
     */
    vaddr wpend = wp->vaddr + wp->len - 1;
    vaddr addrend = addr + len - 1;

    return !(addr > wpend || wp->vaddr > addrend);
}

685
#endif
686

687
/* Add a breakpoint.  */
688
int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
689
                          CPUBreakpoint **breakpoint)
B
bellard 已提交
690
{
691
    CPUBreakpoint *bp;
692

693
    bp = g_malloc(sizeof(*bp));
B
bellard 已提交
694

695 696 697
    bp->pc = pc;
    bp->flags = flags;

698
    /* keep all GDB-injected breakpoints in front */
699
    if (flags & BP_GDB) {
700
        QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
701
    } else {
702
        QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
703
    }
704

705
    breakpoint_invalidate(cpu, pc);
706

707
    if (breakpoint) {
708
        *breakpoint = bp;
709
    }
B
bellard 已提交
710 711 712
    return 0;
}

713
/* Remove a specific breakpoint.  */
714
int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
715 716 717
{
    CPUBreakpoint *bp;

718
    QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
719
        if (bp->pc == pc && bp->flags == flags) {
720
            cpu_breakpoint_remove_by_ref(cpu, bp);
721 722
            return 0;
        }
723
    }
724
    return -ENOENT;
725 726
}

727
/* Remove a specific breakpoint by reference.  */
728
void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
B
bellard 已提交
729
{
730 731 732
    QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);

    breakpoint_invalidate(cpu, breakpoint->pc);
733

734
    g_free(breakpoint);
735 736 737
}

/* Remove all matching breakpoints. */
738
void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
739
{
740
    CPUBreakpoint *bp, *next;
741

742
    QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
743 744 745
        if (bp->flags & mask) {
            cpu_breakpoint_remove_by_ref(cpu, bp);
        }
746
    }
B
bellard 已提交
747 748
}

B
bellard 已提交
749 750
/* enable or disable single step mode. EXCP_DEBUG is returned by the
   CPU loop after each instruction */
751
void cpu_single_step(CPUState *cpu, int enabled)
B
bellard 已提交
752
{
753 754 755
    if (cpu->singlestep_enabled != enabled) {
        cpu->singlestep_enabled = enabled;
        if (kvm_enabled()) {
756
            kvm_update_guest_debug(cpu, 0);
757
        } else {
S
Stuart Brady 已提交
758
            /* must flush all the translated code to avoid inconsistencies */
759
            /* XXX: only flush what is necessary */
760
            CPUArchState *env = cpu->env_ptr;
761 762
            tb_flush(env);
        }
B
bellard 已提交
763 764 765
    }
}

766
void cpu_abort(CPUState *cpu, const char *fmt, ...)
B
bellard 已提交
767 768
{
    va_list ap;
P
pbrook 已提交
769
    va_list ap2;
B
bellard 已提交
770 771

    va_start(ap, fmt);
P
pbrook 已提交
772
    va_copy(ap2, ap);
B
bellard 已提交
773 774 775
    fprintf(stderr, "qemu: fatal: ");
    vfprintf(stderr, fmt, ap);
    fprintf(stderr, "\n");
776
    cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
777 778 779 780
    if (qemu_log_enabled()) {
        qemu_log("qemu: fatal: ");
        qemu_log_vprintf(fmt, ap2);
        qemu_log("\n");
781
        log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
782
        qemu_log_flush();
783
        qemu_log_close();
784
    }
P
pbrook 已提交
785
    va_end(ap2);
786
    va_end(ap);
787 788 789 790 791 792 793 794
#if defined(CONFIG_USER_ONLY)
    {
        struct sigaction act;
        sigfillset(&act.sa_mask);
        act.sa_handler = SIG_DFL;
        sigaction(SIGABRT, &act, NULL);
    }
#endif
B
bellard 已提交
795 796 797
    abort();
}

798
#if !defined(CONFIG_USER_ONLY)
M
Mike Day 已提交
799
/* Called from RCU critical section */
P
Paolo Bonzini 已提交
800 801 802 803
static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
{
    RAMBlock *block;

P
Paolo Bonzini 已提交
804
    block = atomic_rcu_read(&ram_list.mru_block);
805
    if (block && addr - block->offset < block->max_length) {
P
Paolo Bonzini 已提交
806 807
        goto found;
    }
M
Mike Day 已提交
808
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
809
        if (addr - block->offset < block->max_length) {
P
Paolo Bonzini 已提交
810 811 812 813 814 815 816 817
            goto found;
        }
    }

    fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
    abort();

found:
P
Paolo Bonzini 已提交
818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833
    /* It is safe to write mru_block outside the iothread lock.  This
     * is what happens:
     *
     *     mru_block = xxx
     *     rcu_read_unlock()
     *                                        xxx removed from list
     *                  rcu_read_lock()
     *                  read mru_block
     *                                        mru_block = NULL;
     *                                        call_rcu(reclaim_ramblock, xxx);
     *                  rcu_read_unlock()
     *
     * atomic_rcu_set is not needed here.  The block was already published
     * when it was placed into the list.  Here we're just making an extra
     * copy of the pointer.
     */
P
Paolo Bonzini 已提交
834 835 836 837
    ram_list.mru_block = block;
    return block;
}

838
static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
J
Juan Quintela 已提交
839
{
P
Paolo Bonzini 已提交
840
    ram_addr_t start1;
841 842 843 844 845
    RAMBlock *block;
    ram_addr_t end;

    end = TARGET_PAGE_ALIGN(start + length);
    start &= TARGET_PAGE_MASK;
J
Juan Quintela 已提交
846

M
Mike Day 已提交
847
    rcu_read_lock();
P
Paolo Bonzini 已提交
848 849
    block = qemu_get_ram_block(start);
    assert(block == qemu_get_ram_block(end - 1));
850
    start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
P
Paolo Bonzini 已提交
851
    cpu_tlb_reset_dirty_all(start1, length);
M
Mike Day 已提交
852
    rcu_read_unlock();
J
Juan Quintela 已提交
853 854
}

P
pbrook 已提交
855
/* Note: start and end must be within the same ram block.  */
856 857 858
bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
                                              ram_addr_t length,
                                              unsigned client)
859
{
860 861 862 863 864 865
    unsigned long end, page;
    bool dirty;

    if (length == 0) {
        return false;
    }
B
bellard 已提交
866

867 868 869 870 871 872
    end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
    page = start >> TARGET_PAGE_BITS;
    dirty = bitmap_test_and_clear_atomic(ram_list.dirty_memory[client],
                                         page, end - page);

    if (dirty && tcg_enabled()) {
873
        tlb_reset_dirty_range_all(start, length);
P
pbrook 已提交
874
    }
875 876

    return dirty;
877 878
}

879
/* Called from RCU critical section */
880
hwaddr memory_region_section_get_iotlb(CPUState *cpu,
881 882 883 884 885
                                       MemoryRegionSection *section,
                                       target_ulong vaddr,
                                       hwaddr paddr, hwaddr xlat,
                                       int prot,
                                       target_ulong *address)
B
Blue Swirl 已提交
886
{
A
Avi Kivity 已提交
887
    hwaddr iotlb;
B
Blue Swirl 已提交
888 889
    CPUWatchpoint *wp;

890
    if (memory_region_is_ram(section->mr)) {
B
Blue Swirl 已提交
891 892
        /* Normal RAM.  */
        iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
893
            + xlat;
B
Blue Swirl 已提交
894
        if (!section->readonly) {
895
            iotlb |= PHYS_SECTION_NOTDIRTY;
B
Blue Swirl 已提交
896
        } else {
897
            iotlb |= PHYS_SECTION_ROM;
B
Blue Swirl 已提交
898 899
        }
    } else {
900
        iotlb = section - section->address_space->dispatch->map.sections;
901
        iotlb += xlat;
B
Blue Swirl 已提交
902 903 904 905
    }

    /* Make accesses to pages with watchpoints go via the
       watchpoint trap routines.  */
906
    QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
907
        if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
B
Blue Swirl 已提交
908 909
            /* Avoid trapping reads of pages with a write breakpoint. */
            if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
910
                iotlb = PHYS_SECTION_WATCH + paddr;
B
Blue Swirl 已提交
911 912 913 914 915 916 917 918
                *address |= TLB_MMIO;
                break;
            }
        }
    }

    return iotlb;
}
919 920
#endif /* defined(CONFIG_USER_ONLY) */

921
#if !defined(CONFIG_USER_ONLY)
922

A
Anthony Liguori 已提交
923
static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
924
                             uint16_t section);
925
static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
926

927 928
static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
                               qemu_anon_ram_alloc;
929 930 931 932 933 934

/*
 * Set a custom physical guest memory alloator.
 * Accelerators with unusual needs may need this.  Hopefully, we can
 * get rid of it eventually.
 */
935
void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
936 937 938 939
{
    phys_mem_alloc = alloc;
}

940 941
static uint16_t phys_section_add(PhysPageMap *map,
                                 MemoryRegionSection *section)
942
{
943 944 945 946
    /* The physical section number is ORed with a page-aligned
     * pointer to produce the iotlb entries.  Thus it should
     * never overflow into the page-aligned value.
     */
947
    assert(map->sections_nb < TARGET_PAGE_SIZE);
948

949 950 951 952
    if (map->sections_nb == map->sections_nb_alloc) {
        map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
        map->sections = g_renew(MemoryRegionSection, map->sections,
                                map->sections_nb_alloc);
953
    }
954
    map->sections[map->sections_nb] = *section;
P
Paolo Bonzini 已提交
955
    memory_region_ref(section->mr);
956
    return map->sections_nb++;
957 958
}

959 960
static void phys_section_destroy(MemoryRegion *mr)
{
P
Paolo Bonzini 已提交
961 962
    memory_region_unref(mr);

963 964
    if (mr->subpage) {
        subpage_t *subpage = container_of(mr, subpage_t, iomem);
P
Peter Crosthwaite 已提交
965
        object_unref(OBJECT(&subpage->iomem));
966 967 968 969
        g_free(subpage);
    }
}

P
Paolo Bonzini 已提交
970
static void phys_sections_free(PhysPageMap *map)
971
{
972 973
    while (map->sections_nb > 0) {
        MemoryRegionSection *section = &map->sections[--map->sections_nb];
974 975
        phys_section_destroy(section->mr);
    }
976 977
    g_free(map->sections);
    g_free(map->nodes);
978 979
}

A
Avi Kivity 已提交
980
static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
981 982
{
    subpage_t *subpage;
A
Avi Kivity 已提交
983
    hwaddr base = section->offset_within_address_space
984
        & TARGET_PAGE_MASK;
985
    MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
986
                                                   d->map.nodes, d->map.sections);
987 988
    MemoryRegionSection subsection = {
        .offset_within_address_space = base,
989
        .size = int128_make64(TARGET_PAGE_SIZE),
990
    };
A
Avi Kivity 已提交
991
    hwaddr start, end;
992

993
    assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
994

995
    if (!(existing->mr->subpage)) {
996
        subpage = subpage_init(d->as, base);
997
        subsection.address_space = d->as;
998
        subsection.mr = &subpage->iomem;
A
Avi Kivity 已提交
999
        phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1000
                      phys_section_add(&d->map, &subsection));
1001
    } else {
1002
        subpage = container_of(existing->mr, subpage_t, iomem);
1003 1004
    }
    start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1005
    end = start + int128_get64(section->size) - 1;
1006 1007
    subpage_register(subpage, start, end,
                     phys_section_add(&d->map, section));
1008 1009 1010
}


1011 1012
static void register_multipage(AddressSpaceDispatch *d,
                               MemoryRegionSection *section)
1013
{
A
Avi Kivity 已提交
1014
    hwaddr start_addr = section->offset_within_address_space;
1015
    uint16_t section_index = phys_section_add(&d->map, section);
1016 1017
    uint64_t num_pages = int128_get64(int128_rshift(section->size,
                                                    TARGET_PAGE_BITS));
1018

1019 1020
    assert(num_pages);
    phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1021 1022
}

A
Avi Kivity 已提交
1023
static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1024
{
1025
    AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1026
    AddressSpaceDispatch *d = as->next_dispatch;
1027
    MemoryRegionSection now = *section, remain = *section;
1028
    Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1029

1030 1031 1032 1033
    if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
        uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
                       - now.offset_within_address_space;

1034
        now.size = int128_min(int128_make64(left), now.size);
A
Avi Kivity 已提交
1035
        register_subpage(d, &now);
1036
    } else {
1037
        now.size = int128_zero();
1038
    }
1039 1040 1041 1042
    while (int128_ne(remain.size, now.size)) {
        remain.size = int128_sub(remain.size, now.size);
        remain.offset_within_address_space += int128_get64(now.size);
        remain.offset_within_region += int128_get64(now.size);
1043
        now = remain;
1044
        if (int128_lt(remain.size, page_size)) {
1045
            register_subpage(d, &now);
1046
        } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1047
            now.size = page_size;
A
Avi Kivity 已提交
1048
            register_subpage(d, &now);
1049
        } else {
1050
            now.size = int128_and(now.size, int128_neg(page_size));
A
Avi Kivity 已提交
1051
            register_multipage(d, &now);
1052
        }
1053 1054 1055
    }
}

1056 1057 1058 1059 1060 1061
void qemu_flush_coalesced_mmio_buffer(void)
{
    if (kvm_enabled())
        kvm_flush_coalesced_mmio_buffer();
}

1062 1063 1064 1065 1066 1067 1068 1069 1070 1071
void qemu_mutex_lock_ramlist(void)
{
    qemu_mutex_lock(&ram_list.mutex);
}

void qemu_mutex_unlock_ramlist(void)
{
    qemu_mutex_unlock(&ram_list.mutex);
}

1072
#ifdef __linux__
1073 1074 1075 1076 1077

#include <sys/vfs.h>

#define HUGETLBFS_MAGIC       0x958458f6

1078
static long gethugepagesize(const char *path, Error **errp)
1079 1080 1081 1082 1083
{
    struct statfs fs;
    int ret;

    do {
Y
Yoshiaki Tamura 已提交
1084
        ret = statfs(path, &fs);
1085 1086 1087
    } while (ret != 0 && errno == EINTR);

    if (ret != 0) {
1088 1089
        error_setg_errno(errp, errno, "failed to get page size of file %s",
                         path);
Y
Yoshiaki Tamura 已提交
1090
        return 0;
1091 1092 1093
    }

    if (fs.f_type != HUGETLBFS_MAGIC)
Y
Yoshiaki Tamura 已提交
1094
        fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1095 1096 1097 1098

    return fs.f_bsize;
}

A
Alex Williamson 已提交
1099 1100
static void *file_ram_alloc(RAMBlock *block,
                            ram_addr_t memory,
1101 1102
                            const char *path,
                            Error **errp)
1103 1104
{
    char *filename;
1105 1106
    char *sanitized_name;
    char *c;
1107
    void *area = NULL;
1108
    int fd;
1109
    uint64_t hpagesize;
1110
    Error *local_err = NULL;
1111

1112 1113 1114
    hpagesize = gethugepagesize(path, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
1115
        goto error;
1116
    }
1117
    block->mr->align = hpagesize;
1118 1119

    if (memory < hpagesize) {
1120 1121 1122 1123
        error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
                   "or larger than huge page size 0x%" PRIx64,
                   memory, hpagesize);
        goto error;
1124 1125 1126
    }

    if (kvm_enabled() && !kvm_has_sync_mmu()) {
1127 1128
        error_setg(errp,
                   "host lacks kvm mmu notifiers, -mem-path unsupported");
1129
        goto error;
1130 1131
    }

1132
    /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1133
    sanitized_name = g_strdup(memory_region_name(block->mr));
1134 1135 1136 1137 1138 1139 1140 1141
    for (c = sanitized_name; *c != '\0'; c++) {
        if (*c == '/')
            *c = '_';
    }

    filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
                               sanitized_name);
    g_free(sanitized_name);
1142 1143 1144

    fd = mkstemp(filename);
    if (fd < 0) {
1145 1146
        error_setg_errno(errp, errno,
                         "unable to create backing store for hugepages");
1147
        g_free(filename);
1148
        goto error;
1149 1150
    }
    unlink(filename);
1151
    g_free(filename);
1152 1153 1154 1155 1156 1157 1158 1159 1160

    memory = (memory+hpagesize-1) & ~(hpagesize-1);

    /*
     * ftruncate is not supported by hugetlbfs in older
     * hosts, so don't bother bailing out on errors.
     * If anything goes wrong with it under other filesystems,
     * mmap will fail.
     */
1161
    if (ftruncate(fd, memory)) {
Y
Yoshiaki Tamura 已提交
1162
        perror("ftruncate");
1163
    }
1164

1165 1166 1167
    area = mmap(0, memory, PROT_READ | PROT_WRITE,
                (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE),
                fd, 0);
1168
    if (area == MAP_FAILED) {
1169 1170
        error_setg_errno(errp, errno,
                         "unable to map backing store for hugepages");
Y
Yoshiaki Tamura 已提交
1171
        close(fd);
1172
        goto error;
1173
    }
1174 1175

    if (mem_prealloc) {
1176
        os_mem_prealloc(fd, area, memory);
1177 1178
    }

A
Alex Williamson 已提交
1179
    block->fd = fd;
1180
    return area;
1181 1182 1183

error:
    if (mem_prealloc) {
1184
        error_report("%s", error_get_pretty(*errp));
1185 1186 1187
        exit(1);
    }
    return NULL;
1188 1189 1190
}
#endif

M
Mike Day 已提交
1191
/* Called with the ramlist lock held.  */
1192
static ram_addr_t find_ram_offset(ram_addr_t size)
A
Alex Williamson 已提交
1193 1194
{
    RAMBlock *block, *next_block;
A
Alex Williamson 已提交
1195
    ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
A
Alex Williamson 已提交
1196

1197 1198
    assert(size != 0); /* it would hand out same offset multiple times */

M
Mike Day 已提交
1199
    if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
A
Alex Williamson 已提交
1200
        return 0;
M
Mike Day 已提交
1201
    }
A
Alex Williamson 已提交
1202

M
Mike Day 已提交
1203
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1204
        ram_addr_t end, next = RAM_ADDR_MAX;
A
Alex Williamson 已提交
1205

1206
        end = block->offset + block->max_length;
A
Alex Williamson 已提交
1207

M
Mike Day 已提交
1208
        QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
A
Alex Williamson 已提交
1209 1210 1211 1212 1213
            if (next_block->offset >= end) {
                next = MIN(next, next_block->offset);
            }
        }
        if (next - end >= size && next - end < mingap) {
A
Alex Williamson 已提交
1214
            offset = end;
A
Alex Williamson 已提交
1215 1216 1217
            mingap = next - end;
        }
    }
A
Alex Williamson 已提交
1218 1219 1220 1221 1222 1223 1224

    if (offset == RAM_ADDR_MAX) {
        fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
                (uint64_t)size);
        abort();
    }

A
Alex Williamson 已提交
1225 1226 1227
    return offset;
}

J
Juan Quintela 已提交
1228
ram_addr_t last_ram_offset(void)
1229 1230 1231 1232
{
    RAMBlock *block;
    ram_addr_t last = 0;

M
Mike Day 已提交
1233 1234
    rcu_read_lock();
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1235
        last = MAX(last, block->offset + block->max_length);
M
Mike Day 已提交
1236
    }
M
Mike Day 已提交
1237
    rcu_read_unlock();
1238 1239 1240
    return last;
}

1241 1242 1243 1244 1245
static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
{
    int ret;

    /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1246
    if (!machine_dump_guest_core(current_machine)) {
1247 1248 1249 1250 1251 1252 1253 1254 1255
        ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
        if (ret) {
            perror("qemu_madvise");
            fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
                            "but dump_guest_core=off specified\n");
        }
    }
}

M
Mike Day 已提交
1256 1257 1258
/* Called within an RCU critical section, or while the ramlist lock
 * is held.
 */
1259
static RAMBlock *find_ram_block(ram_addr_t addr)
1260
{
1261
    RAMBlock *block;
1262

M
Mike Day 已提交
1263
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1264
        if (block->offset == addr) {
1265
            return block;
1266 1267
        }
    }
1268 1269 1270 1271

    return NULL;
}

1272
/* Called with iothread lock held.  */
1273 1274
void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
{
1275
    RAMBlock *new_block, *block;
1276

M
Mike Day 已提交
1277
    rcu_read_lock();
1278
    new_block = find_ram_block(addr);
1279 1280
    assert(new_block);
    assert(!new_block->idstr[0]);
1281

1282 1283
    if (dev) {
        char *id = qdev_get_dev_path(dev);
1284 1285
        if (id) {
            snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1286
            g_free(id);
1287 1288 1289 1290
        }
    }
    pstrcat(new_block->idstr, sizeof(new_block->idstr), name);

M
Mike Day 已提交
1291
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1292
        if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1293 1294 1295 1296 1297
            fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
                    new_block->idstr);
            abort();
        }
    }
M
Mike Day 已提交
1298
    rcu_read_unlock();
1299 1300
}

1301
/* Called with iothread lock held.  */
1302 1303
void qemu_ram_unset_idstr(ram_addr_t addr)
{
1304
    RAMBlock *block;
1305

1306 1307 1308 1309 1310
    /* FIXME: arch_init.c assumes that this is not called throughout
     * migration.  Ignore the problem since hot-unplug during migration
     * does not work anyway.
     */

M
Mike Day 已提交
1311
    rcu_read_lock();
1312
    block = find_ram_block(addr);
1313 1314 1315
    if (block) {
        memset(block->idstr, 0, sizeof(block->idstr));
    }
M
Mike Day 已提交
1316
    rcu_read_unlock();
1317 1318
}

1319 1320
static int memory_try_enable_merging(void *addr, size_t len)
{
1321
    if (!machine_mem_merge(current_machine)) {
1322 1323 1324 1325 1326 1327 1328
        /* disabled by the user */
        return 0;
    }

    return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
}

1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341
/* Only legal before guest might have detected the memory size: e.g. on
 * incoming migration, or right after reset.
 *
 * As memory core doesn't know how is memory accessed, it is up to
 * resize callback to update device state and/or add assertions to detect
 * misuse, if necessary.
 */
int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
{
    RAMBlock *block = find_ram_block(base);

    assert(block);

1342 1343
    newsize = TARGET_PAGE_ALIGN(newsize);

1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365
    if (block->used_length == newsize) {
        return 0;
    }

    if (!(block->flags & RAM_RESIZEABLE)) {
        error_setg_errno(errp, EINVAL,
                         "Length mismatch: %s: 0x" RAM_ADDR_FMT
                         " in != 0x" RAM_ADDR_FMT, block->idstr,
                         newsize, block->used_length);
        return -EINVAL;
    }

    if (block->max_length < newsize) {
        error_setg_errno(errp, EINVAL,
                         "Length too large: %s: 0x" RAM_ADDR_FMT
                         " > 0x" RAM_ADDR_FMT, block->idstr,
                         newsize, block->max_length);
        return -EINVAL;
    }

    cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
    block->used_length = newsize;
1366 1367
    cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
                                        DIRTY_CLIENTS_ALL);
1368 1369 1370 1371 1372 1373 1374
    memory_region_set_size(block->mr, newsize);
    if (block->resized) {
        block->resized(block->idstr, newsize, block->host);
    }
    return 0;
}

1375
static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
1376
{
1377
    RAMBlock *block;
M
Mike Day 已提交
1378
    RAMBlock *last_block = NULL;
1379 1380 1381
    ram_addr_t old_ram_size, new_ram_size;

    old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1382

1383
    qemu_mutex_lock_ramlist();
1384
    new_block->offset = find_ram_offset(new_block->max_length);
1385 1386 1387

    if (!new_block->host) {
        if (xen_enabled()) {
1388 1389
            xen_ram_alloc(new_block->offset, new_block->max_length,
                          new_block->mr);
1390
        } else {
1391
            new_block->host = phys_mem_alloc(new_block->max_length,
1392
                                             &new_block->mr->align);
1393
            if (!new_block->host) {
1394 1395 1396 1397 1398
                error_setg_errno(errp, errno,
                                 "cannot set up guest memory '%s'",
                                 memory_region_name(new_block->mr));
                qemu_mutex_unlock_ramlist();
                return -1;
1399
            }
1400
            memory_try_enable_merging(new_block->host, new_block->max_length);
1401
        }
1402
    }
P
pbrook 已提交
1403

M
Mike Day 已提交
1404 1405 1406 1407
    /* Keep the list sorted from biggest to smallest block.  Unlike QTAILQ,
     * QLIST (which has an RCU-friendly variant) does not have insertion at
     * tail, so save the last element in last_block.
     */
M
Mike Day 已提交
1408
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
M
Mike Day 已提交
1409
        last_block = block;
1410
        if (block->max_length < new_block->max_length) {
1411 1412 1413 1414
            break;
        }
    }
    if (block) {
M
Mike Day 已提交
1415
        QLIST_INSERT_BEFORE_RCU(block, new_block, next);
M
Mike Day 已提交
1416
    } else if (last_block) {
M
Mike Day 已提交
1417
        QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
M
Mike Day 已提交
1418
    } else { /* list is empty */
M
Mike Day 已提交
1419
        QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1420
    }
1421
    ram_list.mru_block = NULL;
P
pbrook 已提交
1422

M
Mike Day 已提交
1423 1424
    /* Write list before version */
    smp_wmb();
U
Umesh Deshpande 已提交
1425
    ram_list.version++;
1426
    qemu_mutex_unlock_ramlist();
U
Umesh Deshpande 已提交
1427

1428 1429 1430
    new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;

    if (new_ram_size > old_ram_size) {
1431
        int i;
1432 1433

        /* ram_list.dirty_memory[] is protected by the iothread lock.  */
1434 1435 1436 1437 1438
        for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
            ram_list.dirty_memory[i] =
                bitmap_zero_extend(ram_list.dirty_memory[i],
                                   old_ram_size, new_ram_size);
       }
1439
    }
1440
    cpu_physical_memory_set_dirty_range(new_block->offset,
1441 1442
                                        new_block->used_length,
                                        DIRTY_CLIENTS_ALL);
P
pbrook 已提交
1443

1444 1445 1446 1447 1448 1449 1450
    if (new_block->host) {
        qemu_ram_setup_dump(new_block->host, new_block->max_length);
        qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
        qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
        if (kvm_enabled()) {
            kvm_setup_guest_memory(new_block->host, new_block->max_length);
        }
1451
    }
1452

P
pbrook 已提交
1453 1454
    return new_block->offset;
}
B
bellard 已提交
1455

1456
#ifdef __linux__
1457
ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1458
                                    bool share, const char *mem_path,
1459
                                    Error **errp)
1460 1461
{
    RAMBlock *new_block;
1462 1463
    ram_addr_t addr;
    Error *local_err = NULL;
1464 1465

    if (xen_enabled()) {
1466 1467
        error_setg(errp, "-mem-path not supported with Xen");
        return -1;
1468 1469 1470 1471 1472 1473 1474 1475
    }

    if (phys_mem_alloc != qemu_anon_ram_alloc) {
        /*
         * file_ram_alloc() needs to allocate just like
         * phys_mem_alloc, but we haven't bothered to provide
         * a hook there.
         */
1476 1477 1478
        error_setg(errp,
                   "-mem-path not supported with this accelerator");
        return -1;
1479 1480 1481 1482 1483
    }

    size = TARGET_PAGE_ALIGN(size);
    new_block = g_malloc0(sizeof(*new_block));
    new_block->mr = mr;
1484 1485
    new_block->used_length = size;
    new_block->max_length = size;
1486
    new_block->flags = share ? RAM_SHARED : 0;
1487 1488 1489 1490 1491 1492 1493
    new_block->host = file_ram_alloc(new_block, size,
                                     mem_path, errp);
    if (!new_block->host) {
        g_free(new_block);
        return -1;
    }

1494 1495 1496 1497 1498 1499 1500
    addr = ram_block_add(new_block, &local_err);
    if (local_err) {
        g_free(new_block);
        error_propagate(errp, local_err);
        return -1;
    }
    return addr;
1501
}
1502
#endif
1503

1504 1505 1506 1507 1508 1509
static
ram_addr_t qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
                                   void (*resized)(const char*,
                                                   uint64_t length,
                                                   void *host),
                                   void *host, bool resizeable,
1510
                                   MemoryRegion *mr, Error **errp)
1511 1512
{
    RAMBlock *new_block;
1513 1514
    ram_addr_t addr;
    Error *local_err = NULL;
1515 1516

    size = TARGET_PAGE_ALIGN(size);
1517
    max_size = TARGET_PAGE_ALIGN(max_size);
1518 1519
    new_block = g_malloc0(sizeof(*new_block));
    new_block->mr = mr;
1520
    new_block->resized = resized;
1521 1522
    new_block->used_length = size;
    new_block->max_length = max_size;
1523
    assert(max_size >= size);
1524 1525 1526
    new_block->fd = -1;
    new_block->host = host;
    if (host) {
1527
        new_block->flags |= RAM_PREALLOC;
1528
    }
1529 1530 1531
    if (resizeable) {
        new_block->flags |= RAM_RESIZEABLE;
    }
1532 1533 1534 1535 1536 1537 1538
    addr = ram_block_add(new_block, &local_err);
    if (local_err) {
        g_free(new_block);
        error_propagate(errp, local_err);
        return -1;
    }
    return addr;
1539 1540
}

1541 1542 1543 1544 1545 1546
ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
                                   MemoryRegion *mr, Error **errp)
{
    return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
}

1547
ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1548
{
1549 1550 1551 1552 1553 1554 1555 1556 1557 1558
    return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
}

ram_addr_t qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
                                     void (*resized)(const char*,
                                                     uint64_t length,
                                                     void *host),
                                     MemoryRegion *mr, Error **errp)
{
    return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1559 1560
}

1561 1562 1563 1564
void qemu_ram_free_from_ptr(ram_addr_t addr)
{
    RAMBlock *block;

1565
    qemu_mutex_lock_ramlist();
M
Mike Day 已提交
1566
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1567
        if (addr == block->offset) {
M
Mike Day 已提交
1568
            QLIST_REMOVE_RCU(block, next);
1569
            ram_list.mru_block = NULL;
M
Mike Day 已提交
1570 1571
            /* Write list before version */
            smp_wmb();
U
Umesh Deshpande 已提交
1572
            ram_list.version++;
P
Paolo Bonzini 已提交
1573
            g_free_rcu(block, rcu);
1574
            break;
1575 1576
        }
    }
1577
    qemu_mutex_unlock_ramlist();
1578 1579
}

P
Paolo Bonzini 已提交
1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596
static void reclaim_ramblock(RAMBlock *block)
{
    if (block->flags & RAM_PREALLOC) {
        ;
    } else if (xen_enabled()) {
        xen_invalidate_map_cache_entry(block->host);
#ifndef _WIN32
    } else if (block->fd >= 0) {
        munmap(block->host, block->max_length);
        close(block->fd);
#endif
    } else {
        qemu_anon_ram_free(block->host, block->max_length);
    }
    g_free(block);
}

A
Anthony Liguori 已提交
1597
void qemu_ram_free(ram_addr_t addr)
B
bellard 已提交
1598
{
A
Alex Williamson 已提交
1599 1600
    RAMBlock *block;

1601
    qemu_mutex_lock_ramlist();
M
Mike Day 已提交
1602
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
A
Alex Williamson 已提交
1603
        if (addr == block->offset) {
M
Mike Day 已提交
1604
            QLIST_REMOVE_RCU(block, next);
1605
            ram_list.mru_block = NULL;
M
Mike Day 已提交
1606 1607
            /* Write list before version */
            smp_wmb();
U
Umesh Deshpande 已提交
1608
            ram_list.version++;
P
Paolo Bonzini 已提交
1609
            call_rcu(block, reclaim_ramblock, rcu);
1610
            break;
A
Alex Williamson 已提交
1611 1612
        }
    }
1613
    qemu_mutex_unlock_ramlist();
B
bellard 已提交
1614 1615
}

H
Huang Ying 已提交
1616 1617 1618 1619 1620 1621 1622 1623
#ifndef _WIN32
void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
{
    RAMBlock *block;
    ram_addr_t offset;
    int flags;
    void *area, *vaddr;

M
Mike Day 已提交
1624
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
H
Huang Ying 已提交
1625
        offset = addr - block->offset;
1626
        if (offset < block->max_length) {
1627
            vaddr = ramblock_ptr(block, offset);
1628
            if (block->flags & RAM_PREALLOC) {
H
Huang Ying 已提交
1629
                ;
1630 1631
            } else if (xen_enabled()) {
                abort();
H
Huang Ying 已提交
1632 1633
            } else {
                flags = MAP_FIXED;
1634
                if (block->fd >= 0) {
1635 1636
                    flags |= (block->flags & RAM_SHARED ?
                              MAP_SHARED : MAP_PRIVATE);
1637 1638
                    area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
                                flags, block->fd, offset);
H
Huang Ying 已提交
1639
                } else {
1640 1641 1642 1643 1644 1645 1646
                    /*
                     * Remap needs to match alloc.  Accelerators that
                     * set phys_mem_alloc never remap.  If they did,
                     * we'd need a remap hook here.
                     */
                    assert(phys_mem_alloc == qemu_anon_ram_alloc);

H
Huang Ying 已提交
1647 1648 1649 1650 1651
                    flags |= MAP_PRIVATE | MAP_ANONYMOUS;
                    area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
                                flags, -1, 0);
                }
                if (area != vaddr) {
1652 1653
                    fprintf(stderr, "Could not remap addr: "
                            RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
H
Huang Ying 已提交
1654 1655 1656
                            length, addr);
                    exit(1);
                }
1657
                memory_try_enable_merging(vaddr, length);
1658
                qemu_ram_setup_dump(vaddr, length);
H
Huang Ying 已提交
1659 1660 1661 1662 1663 1664
            }
        }
    }
}
#endif /* !_WIN32 */

1665 1666
int qemu_get_ram_fd(ram_addr_t addr)
{
1667 1668
    RAMBlock *block;
    int fd;
1669

M
Mike Day 已提交
1670
    rcu_read_lock();
1671 1672
    block = qemu_get_ram_block(addr);
    fd = block->fd;
M
Mike Day 已提交
1673
    rcu_read_unlock();
1674
    return fd;
1675 1676
}

1677 1678
void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
{
1679 1680
    RAMBlock *block;
    void *ptr;
1681

M
Mike Day 已提交
1682
    rcu_read_lock();
1683 1684
    block = qemu_get_ram_block(addr);
    ptr = ramblock_ptr(block, 0);
M
Mike Day 已提交
1685
    rcu_read_unlock();
1686
    return ptr;
1687 1688
}

1689
/* Return a host pointer to ram allocated with qemu_ram_alloc.
1690 1691 1692
 * This should not be used for general purpose DMA.  Use address_space_map
 * or address_space_rw instead. For local memory (e.g. video ram) that the
 * device owns, use memory_region_get_ram_ptr.
M
Mike Day 已提交
1693 1694 1695 1696 1697 1698
 *
 * By the time this function returns, the returned pointer is not protected
 * by RCU anymore.  If the caller is not within an RCU critical section and
 * does not hold the iothread lock, it must have other means of protecting the
 * pointer, such as a reference to the region that includes the incoming
 * ram_addr_t.
1699 1700 1701
 */
void *qemu_get_ram_ptr(ram_addr_t addr)
{
1702 1703
    RAMBlock *block;
    void *ptr;
1704

M
Mike Day 已提交
1705
    rcu_read_lock();
1706 1707 1708
    block = qemu_get_ram_block(addr);

    if (xen_enabled() && block->host == NULL) {
1709 1710 1711 1712 1713
        /* We need to check if the requested address is in the RAM
         * because we don't want to map the entire memory in QEMU.
         * In that case just map until the end of the page.
         */
        if (block->offset == 0) {
1714
            ptr = xen_map_cache(addr, 0, 0);
M
Mike Day 已提交
1715
            goto unlock;
1716
        }
1717 1718

        block->host = xen_map_cache(block->offset, block->max_length, 1);
1719
    }
1720 1721
    ptr = ramblock_ptr(block, addr - block->offset);

M
Mike Day 已提交
1722 1723
unlock:
    rcu_read_unlock();
1724
    return ptr;
1725 1726
}

1727
/* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1728
 * but takes a size argument.
M
Mike Day 已提交
1729 1730 1731 1732 1733 1734
 *
 * By the time this function returns, the returned pointer is not protected
 * by RCU anymore.  If the caller is not within an RCU critical section and
 * does not hold the iothread lock, it must have other means of protecting the
 * pointer, such as a reference to the region that includes the incoming
 * ram_addr_t.
1735
 */
1736
static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1737
{
1738
    void *ptr;
1739 1740 1741
    if (*size == 0) {
        return NULL;
    }
1742
    if (xen_enabled()) {
J
Jan Kiszka 已提交
1743
        return xen_map_cache(addr, *size, 1);
1744
    } else {
1745
        RAMBlock *block;
M
Mike Day 已提交
1746 1747
        rcu_read_lock();
        QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1748 1749 1750
            if (addr - block->offset < block->max_length) {
                if (addr - block->offset + *size > block->max_length)
                    *size = block->max_length - addr + block->offset;
1751
                ptr = ramblock_ptr(block, addr - block->offset);
M
Mike Day 已提交
1752
                rcu_read_unlock();
1753
                return ptr;
1754 1755 1756 1757 1758 1759 1760 1761
            }
        }

        fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
        abort();
    }
}

1762
/* Some of the softmmu routines need to translate from a host pointer
1763 1764 1765 1766 1767 1768 1769 1770
 * (typically a TLB entry) back to a ram offset.
 *
 * By the time this function returns, the returned pointer is not protected
 * by RCU anymore.  If the caller is not within an RCU critical section and
 * does not hold the iothread lock, it must have other means of protecting the
 * pointer, such as a reference to the region that includes the incoming
 * ram_addr_t.
 */
1771
MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
P
pbrook 已提交
1772
{
P
pbrook 已提交
1773 1774
    RAMBlock *block;
    uint8_t *host = ptr;
1775
    MemoryRegion *mr;
P
pbrook 已提交
1776

1777
    if (xen_enabled()) {
M
Mike Day 已提交
1778
        rcu_read_lock();
J
Jan Kiszka 已提交
1779
        *ram_addr = xen_ram_addr_from_mapcache(ptr);
1780
        mr = qemu_get_ram_block(*ram_addr)->mr;
M
Mike Day 已提交
1781
        rcu_read_unlock();
1782
        return mr;
1783 1784
    }

M
Mike Day 已提交
1785 1786
    rcu_read_lock();
    block = atomic_rcu_read(&ram_list.mru_block);
1787
    if (block && block->host && host - block->host < block->max_length) {
1788 1789 1790
        goto found;
    }

M
Mike Day 已提交
1791
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
J
Jun Nakajima 已提交
1792 1793 1794 1795
        /* This case append when the block is not mapped. */
        if (block->host == NULL) {
            continue;
        }
1796
        if (host - block->host < block->max_length) {
1797
            goto found;
A
Alex Williamson 已提交
1798
        }
P
pbrook 已提交
1799
    }
J
Jun Nakajima 已提交
1800

M
Mike Day 已提交
1801
    rcu_read_unlock();
1802
    return NULL;
1803 1804 1805

found:
    *ram_addr = block->offset + (host - block->host);
1806
    mr = block->mr;
M
Mike Day 已提交
1807
    rcu_read_unlock();
1808
    return mr;
M
Marcelo Tosatti 已提交
1809
}
A
Alex Williamson 已提交
1810

A
Avi Kivity 已提交
1811
static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1812
                               uint64_t val, unsigned size)
1813
{
1814
    if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1815
        tb_invalidate_phys_page_fast(ram_addr, size);
1816
    }
1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828
    switch (size) {
    case 1:
        stb_p(qemu_get_ram_ptr(ram_addr), val);
        break;
    case 2:
        stw_p(qemu_get_ram_ptr(ram_addr), val);
        break;
    case 4:
        stl_p(qemu_get_ram_ptr(ram_addr), val);
        break;
    default:
        abort();
1829
    }
1830 1831 1832 1833 1834
    /* Set both VGA and migration bits for simplicity and to remove
     * the notdirty callback faster.
     */
    cpu_physical_memory_set_dirty_range(ram_addr, size,
                                        DIRTY_CLIENTS_NOCODE);
B
bellard 已提交
1835 1836
    /* we remove the notdirty callback only if the code has been
       flushed */
1837
    if (!cpu_physical_memory_is_clean(ram_addr)) {
1838
        CPUArchState *env = current_cpu->env_ptr;
1839
        tlb_set_dirty(env, current_cpu->mem_io_vaddr);
1840
    }
1841 1842
}

1843 1844 1845 1846 1847 1848
static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
                                 unsigned size, bool is_write)
{
    return is_write;
}

1849 1850
static const MemoryRegionOps notdirty_mem_ops = {
    .write = notdirty_mem_write,
1851
    .valid.accepts = notdirty_mem_accepts,
1852
    .endianness = DEVICE_NATIVE_ENDIAN,
1853 1854
};

P
pbrook 已提交
1855
/* Generate a debug exception if a watchpoint has been hit.  */
1856
static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
P
pbrook 已提交
1857
{
1858 1859
    CPUState *cpu = current_cpu;
    CPUArchState *env = cpu->env_ptr;
1860
    target_ulong pc, cs_base;
P
pbrook 已提交
1861
    target_ulong vaddr;
1862
    CPUWatchpoint *wp;
1863
    int cpu_flags;
P
pbrook 已提交
1864

1865
    if (cpu->watchpoint_hit) {
1866 1867 1868
        /* We re-entered the check after replacing the TB. Now raise
         * the debug interrupt so that is will trigger after the
         * current instruction. */
1869
        cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
1870 1871
        return;
    }
1872
    vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1873
    QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1874 1875
        if (cpu_watchpoint_address_matches(wp, vaddr, len)
            && (wp->flags & flags)) {
1876 1877 1878 1879 1880 1881
            if (flags == BP_MEM_READ) {
                wp->flags |= BP_WATCHPOINT_HIT_READ;
            } else {
                wp->flags |= BP_WATCHPOINT_HIT_WRITE;
            }
            wp->hitaddr = vaddr;
1882
            wp->hitattrs = attrs;
1883 1884
            if (!cpu->watchpoint_hit) {
                cpu->watchpoint_hit = wp;
1885
                tb_check_watchpoint(cpu);
1886
                if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1887
                    cpu->exception_index = EXCP_DEBUG;
1888
                    cpu_loop_exit(cpu);
1889 1890
                } else {
                    cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1891
                    tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
1892
                    cpu_resume_from_signal(cpu, NULL);
1893
                }
1894
            }
1895 1896
        } else {
            wp->flags &= ~BP_WATCHPOINT_HIT;
P
pbrook 已提交
1897 1898 1899 1900
        }
    }
}

1901 1902 1903
/* Watchpoint access routines.  Watchpoints are inserted using TLB tricks,
   so these check for a hit then pass through to the normal out-of-line
   phys routines.  */
1904 1905
static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
                                  unsigned size, MemTxAttrs attrs)
1906
{
1907 1908 1909 1910
    MemTxResult res;
    uint64_t data;

    check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
1911
    switch (size) {
1912 1913 1914 1915 1916 1917 1918 1919 1920
    case 1:
        data = address_space_ldub(&address_space_memory, addr, attrs, &res);
        break;
    case 2:
        data = address_space_lduw(&address_space_memory, addr, attrs, &res);
        break;
    case 4:
        data = address_space_ldl(&address_space_memory, addr, attrs, &res);
        break;
1921 1922
    default: abort();
    }
1923 1924
    *pdata = data;
    return res;
1925 1926
}

1927 1928 1929
static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
                                   uint64_t val, unsigned size,
                                   MemTxAttrs attrs)
1930
{
1931 1932 1933
    MemTxResult res;

    check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
1934
    switch (size) {
1935
    case 1:
1936
        address_space_stb(&address_space_memory, addr, val, attrs, &res);
1937 1938
        break;
    case 2:
1939
        address_space_stw(&address_space_memory, addr, val, attrs, &res);
1940 1941
        break;
    case 4:
1942
        address_space_stl(&address_space_memory, addr, val, attrs, &res);
1943
        break;
1944 1945
    default: abort();
    }
1946
    return res;
1947 1948
}

1949
static const MemoryRegionOps watch_mem_ops = {
1950 1951
    .read_with_attrs = watch_mem_read,
    .write_with_attrs = watch_mem_write,
1952
    .endianness = DEVICE_NATIVE_ENDIAN,
1953 1954
};

1955 1956
static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
                                unsigned len, MemTxAttrs attrs)
1957
{
1958
    subpage_t *subpage = opaque;
1959
    uint8_t buf[8];
1960
    MemTxResult res;
1961

1962
#if defined(DEBUG_SUBPAGE)
A
Amos Kong 已提交
1963
    printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
1964
           subpage, len, addr);
1965
#endif
1966 1967 1968 1969
    res = address_space_read(subpage->as, addr + subpage->base,
                             attrs, buf, len);
    if (res) {
        return res;
1970
    }
1971 1972
    switch (len) {
    case 1:
1973 1974
        *data = ldub_p(buf);
        return MEMTX_OK;
1975
    case 2:
1976 1977
        *data = lduw_p(buf);
        return MEMTX_OK;
1978
    case 4:
1979 1980
        *data = ldl_p(buf);
        return MEMTX_OK;
1981
    case 8:
1982 1983
        *data = ldq_p(buf);
        return MEMTX_OK;
1984 1985 1986
    default:
        abort();
    }
1987 1988
}

1989 1990
static MemTxResult subpage_write(void *opaque, hwaddr addr,
                                 uint64_t value, unsigned len, MemTxAttrs attrs)
1991
{
1992
    subpage_t *subpage = opaque;
1993
    uint8_t buf[8];
1994

1995
#if defined(DEBUG_SUBPAGE)
A
Amos Kong 已提交
1996
    printf("%s: subpage %p len %u addr " TARGET_FMT_plx
1997 1998
           " value %"PRIx64"\n",
           __func__, subpage, len, addr, value);
1999
#endif
2000 2001 2002 2003 2004 2005 2006 2007 2008 2009
    switch (len) {
    case 1:
        stb_p(buf, value);
        break;
    case 2:
        stw_p(buf, value);
        break;
    case 4:
        stl_p(buf, value);
        break;
2010 2011 2012
    case 8:
        stq_p(buf, value);
        break;
2013 2014 2015
    default:
        abort();
    }
2016 2017
    return address_space_write(subpage->as, addr + subpage->base,
                               attrs, buf, len);
2018 2019
}

2020
static bool subpage_accepts(void *opaque, hwaddr addr,
A
Amos Kong 已提交
2021
                            unsigned len, bool is_write)
2022
{
2023
    subpage_t *subpage = opaque;
2024
#if defined(DEBUG_SUBPAGE)
A
Amos Kong 已提交
2025
    printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2026
           __func__, subpage, is_write ? 'w' : 'r', len, addr);
2027 2028
#endif

2029
    return address_space_access_valid(subpage->as, addr + subpage->base,
A
Amos Kong 已提交
2030
                                      len, is_write);
2031 2032
}

2033
static const MemoryRegionOps subpage_ops = {
2034 2035
    .read_with_attrs = subpage_read,
    .write_with_attrs = subpage_write,
2036 2037 2038 2039
    .impl.min_access_size = 1,
    .impl.max_access_size = 8,
    .valid.min_access_size = 1,
    .valid.max_access_size = 8,
2040
    .valid.accepts = subpage_accepts,
2041
    .endianness = DEVICE_NATIVE_ENDIAN,
2042 2043
};

A
Anthony Liguori 已提交
2044
static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2045
                             uint16_t section)
2046 2047 2048 2049 2050 2051 2052 2053
{
    int idx, eidx;

    if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
        return -1;
    idx = SUBPAGE_IDX(start);
    eidx = SUBPAGE_IDX(end);
#if defined(DEBUG_SUBPAGE)
A
Amos Kong 已提交
2054 2055
    printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
           __func__, mmio, start, end, idx, eidx, section);
2056 2057
#endif
    for (; idx <= eidx; idx++) {
2058
        mmio->sub_section[idx] = section;
2059 2060 2061 2062 2063
    }

    return 0;
}

2064
static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2065
{
A
Anthony Liguori 已提交
2066
    subpage_t *mmio;
2067

2068
    mmio = g_malloc0(sizeof(subpage_t));
2069

2070
    mmio->as = as;
2071
    mmio->base = base;
2072
    memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
P
Peter Crosthwaite 已提交
2073
                          NULL, TARGET_PAGE_SIZE);
A
Avi Kivity 已提交
2074
    mmio->iomem.subpage = true;
2075
#if defined(DEBUG_SUBPAGE)
A
Amos Kong 已提交
2076 2077
    printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
           mmio, base, TARGET_PAGE_SIZE);
2078
#endif
2079
    subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2080 2081 2082 2083

    return mmio;
}

2084 2085
static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
                              MemoryRegion *mr)
2086
{
2087
    assert(as);
2088
    MemoryRegionSection section = {
2089
        .address_space = as,
2090 2091 2092
        .mr = mr,
        .offset_within_address_space = 0,
        .offset_within_region = 0,
2093
        .size = int128_2_64(),
2094 2095
    };

2096
    return phys_section_add(map, &section);
2097 2098
}

P
Paolo Bonzini 已提交
2099
MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index)
2100
{
2101 2102
    AddressSpaceDispatch *d = atomic_rcu_read(&cpu->memory_dispatch);
    MemoryRegionSection *sections = d->map.sections;
P
Paolo Bonzini 已提交
2103 2104

    return sections[index & ~TARGET_PAGE_MASK].mr;
2105 2106
}

A
Avi Kivity 已提交
2107 2108
static void io_mem_init(void)
{
2109
    memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2110
    memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2111
                          NULL, UINT64_MAX);
2112
    memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2113
                          NULL, UINT64_MAX);
2114
    memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2115
                          NULL, UINT64_MAX);
A
Avi Kivity 已提交
2116 2117
}

A
Avi Kivity 已提交
2118
static void mem_begin(MemoryListener *listener)
2119 2120
{
    AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2121 2122 2123
    AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
    uint16_t n;

2124
    n = dummy_section(&d->map, as, &io_mem_unassigned);
2125
    assert(n == PHYS_SECTION_UNASSIGNED);
2126
    n = dummy_section(&d->map, as, &io_mem_notdirty);
2127
    assert(n == PHYS_SECTION_NOTDIRTY);
2128
    n = dummy_section(&d->map, as, &io_mem_rom);
2129
    assert(n == PHYS_SECTION_ROM);
2130
    n = dummy_section(&d->map, as, &io_mem_watch);
2131
    assert(n == PHYS_SECTION_WATCH);
2132

M
Michael S. Tsirkin 已提交
2133
    d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2134 2135 2136 2137
    d->as = as;
    as->next_dispatch = d;
}

2138 2139 2140 2141 2142 2143
static void address_space_dispatch_free(AddressSpaceDispatch *d)
{
    phys_sections_free(&d->map);
    g_free(d);
}

2144
static void mem_commit(MemoryListener *listener)
A
Avi Kivity 已提交
2145
{
2146
    AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2147 2148 2149
    AddressSpaceDispatch *cur = as->dispatch;
    AddressSpaceDispatch *next = as->next_dispatch;

2150
    phys_page_compact_all(next, next->map.nodes_nb);
2151

2152
    atomic_rcu_set(&as->dispatch, next);
2153
    if (cur) {
2154
        call_rcu(cur, address_space_dispatch_free, rcu);
2155
    }
2156 2157
}

2158
static void tcg_commit(MemoryListener *listener)
2159
{
2160
    CPUState *cpu;
2161 2162 2163 2164

    /* since each CPU stores ram addresses in its TLB cache, we must
       reset the modified entries */
    /* XXX: slow ! */
A
Andreas Färber 已提交
2165
    CPU_FOREACH(cpu) {
2166 2167 2168 2169 2170
        /* FIXME: Disentangle the cpu.h circular files deps so we can
           directly get the right CPU from listener.  */
        if (cpu->tcg_as_listener != listener) {
            continue;
        }
2171
        cpu_reload_memory_map(cpu);
2172
    }
2173 2174
}

A
Avi Kivity 已提交
2175 2176
void address_space_init_dispatch(AddressSpace *as)
{
2177
    as->dispatch = NULL;
2178
    as->dispatch_listener = (MemoryListener) {
A
Avi Kivity 已提交
2179
        .begin = mem_begin,
2180
        .commit = mem_commit,
A
Avi Kivity 已提交
2181 2182 2183 2184
        .region_add = mem_add,
        .region_nop = mem_add,
        .priority = 0,
    };
2185
    memory_listener_register(&as->dispatch_listener, as);
A
Avi Kivity 已提交
2186 2187
}

2188 2189 2190 2191 2192
void address_space_unregister(AddressSpace *as)
{
    memory_listener_unregister(&as->dispatch_listener);
}

A
Avi Kivity 已提交
2193 2194 2195 2196
void address_space_destroy_dispatch(AddressSpace *as)
{
    AddressSpaceDispatch *d = as->dispatch;

2197 2198 2199 2200
    atomic_rcu_set(&as->dispatch, NULL);
    if (d) {
        call_rcu(d, address_space_dispatch_free, rcu);
    }
A
Avi Kivity 已提交
2201 2202
}

A
Avi Kivity 已提交
2203 2204
static void memory_map_init(void)
{
2205
    system_memory = g_malloc(sizeof(*system_memory));
2206

2207
    memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2208
    address_space_init(&address_space_memory, system_memory, "memory");
2209

2210
    system_io = g_malloc(sizeof(*system_io));
2211 2212
    memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
                          65536);
2213
    address_space_init(&address_space_io, system_io, "I/O");
A
Avi Kivity 已提交
2214 2215 2216 2217 2218 2219 2220
}

MemoryRegion *get_system_memory(void)
{
    return system_memory;
}

2221 2222 2223 2224 2225
MemoryRegion *get_system_io(void)
{
    return system_io;
}

2226 2227
#endif /* !defined(CONFIG_USER_ONLY) */

B
bellard 已提交
2228 2229
/* physical memory access (slow version, mainly for debug) */
#if defined(CONFIG_USER_ONLY)
2230
int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
P
Paul Brook 已提交
2231
                        uint8_t *buf, int len, int is_write)
B
bellard 已提交
2232 2233 2234
{
    int l, flags;
    target_ulong page;
2235
    void * p;
B
bellard 已提交
2236 2237 2238 2239 2240 2241 2242 2243

    while (len > 0) {
        page = addr & TARGET_PAGE_MASK;
        l = (page + TARGET_PAGE_SIZE) - addr;
        if (l > len)
            l = len;
        flags = page_get_flags(page);
        if (!(flags & PAGE_VALID))
P
Paul Brook 已提交
2244
            return -1;
B
bellard 已提交
2245 2246
        if (is_write) {
            if (!(flags & PAGE_WRITE))
P
Paul Brook 已提交
2247
                return -1;
2248
            /* XXX: this code should not depend on lock_user */
A
aurel32 已提交
2249
            if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
P
Paul Brook 已提交
2250
                return -1;
A
aurel32 已提交
2251 2252
            memcpy(p, buf, l);
            unlock_user(p, addr, l);
B
bellard 已提交
2253 2254
        } else {
            if (!(flags & PAGE_READ))
P
Paul Brook 已提交
2255
                return -1;
2256
            /* XXX: this code should not depend on lock_user */
A
aurel32 已提交
2257
            if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
P
Paul Brook 已提交
2258
                return -1;
A
aurel32 已提交
2259
            memcpy(buf, p, l);
A
aurel32 已提交
2260
            unlock_user(p, addr, 0);
B
bellard 已提交
2261 2262 2263 2264 2265
        }
        len -= l;
        buf += l;
        addr += l;
    }
P
Paul Brook 已提交
2266
    return 0;
B
bellard 已提交
2267
}
B
bellard 已提交
2268

B
bellard 已提交
2269
#else
2270

2271
static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
A
Avi Kivity 已提交
2272
                                     hwaddr length)
2273
{
2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285
    uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
    /* No early return if dirty_log_mask is or becomes 0, because
     * cpu_physical_memory_set_dirty_range will still call
     * xen_modified_memory.
     */
    if (dirty_log_mask) {
        dirty_log_mask =
            cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
    }
    if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
        tb_invalidate_phys_range(addr, addr + length);
        dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2286
    }
2287
    cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2288 2289
}

2290
static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2291
{
2292
    unsigned access_size_max = mr->ops->valid.max_access_size;
2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305

    /* Regions are assumed to support 1-4 byte accesses unless
       otherwise specified.  */
    if (access_size_max == 0) {
        access_size_max = 4;
    }

    /* Bound the maximum access by the alignment of the address.  */
    if (!mr->ops->impl.unaligned) {
        unsigned align_size_max = addr & -addr;
        if (align_size_max != 0 && align_size_max < access_size_max) {
            access_size_max = align_size_max;
        }
2306
    }
2307 2308 2309 2310

    /* Don't attempt accesses larger than the maximum.  */
    if (l > access_size_max) {
        l = access_size_max;
2311
    }
2312 2313 2314
    if (l & (l - 1)) {
        l = 1 << (qemu_fls(l) - 1);
    }
2315 2316

    return l;
2317 2318
}

2319 2320
MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
                             uint8_t *buf, int len, bool is_write)
B
bellard 已提交
2321
{
2322
    hwaddr l;
B
bellard 已提交
2323
    uint8_t *ptr;
2324
    uint64_t val;
2325
    hwaddr addr1;
2326
    MemoryRegion *mr;
2327
    MemTxResult result = MEMTX_OK;
2328

2329
    rcu_read_lock();
B
bellard 已提交
2330
    while (len > 0) {
2331
        l = len;
2332
        mr = address_space_translate(as, addr, &addr1, &l, is_write);
2333

B
bellard 已提交
2334
        if (is_write) {
2335 2336
            if (!memory_access_is_direct(mr, is_write)) {
                l = memory_access_size(mr, l, addr1);
2337
                /* XXX: could force current_cpu to NULL to avoid
B
bellard 已提交
2338
                   potential bugs */
2339 2340 2341 2342
                switch (l) {
                case 8:
                    /* 64 bit write access */
                    val = ldq_p(buf);
2343 2344
                    result |= memory_region_dispatch_write(mr, addr1, val, 8,
                                                           attrs);
2345 2346
                    break;
                case 4:
B
bellard 已提交
2347
                    /* 32 bit write access */
B
bellard 已提交
2348
                    val = ldl_p(buf);
2349 2350
                    result |= memory_region_dispatch_write(mr, addr1, val, 4,
                                                           attrs);
2351 2352
                    break;
                case 2:
B
bellard 已提交
2353
                    /* 16 bit write access */
B
bellard 已提交
2354
                    val = lduw_p(buf);
2355 2356
                    result |= memory_region_dispatch_write(mr, addr1, val, 2,
                                                           attrs);
2357 2358
                    break;
                case 1:
B
bellard 已提交
2359
                    /* 8 bit write access */
B
bellard 已提交
2360
                    val = ldub_p(buf);
2361 2362
                    result |= memory_region_dispatch_write(mr, addr1, val, 1,
                                                           attrs);
2363 2364 2365
                    break;
                default:
                    abort();
B
bellard 已提交
2366
                }
2367
            } else {
2368
                addr1 += memory_region_get_ram_addr(mr);
B
bellard 已提交
2369
                /* RAM case */
P
pbrook 已提交
2370
                ptr = qemu_get_ram_ptr(addr1);
B
bellard 已提交
2371
                memcpy(ptr, buf, l);
2372
                invalidate_and_set_dirty(mr, addr1, l);
B
bellard 已提交
2373 2374
            }
        } else {
2375
            if (!memory_access_is_direct(mr, is_write)) {
B
bellard 已提交
2376
                /* I/O case */
2377
                l = memory_access_size(mr, l, addr1);
2378 2379 2380
                switch (l) {
                case 8:
                    /* 64 bit read access */
2381 2382
                    result |= memory_region_dispatch_read(mr, addr1, &val, 8,
                                                          attrs);
2383 2384 2385
                    stq_p(buf, val);
                    break;
                case 4:
B
bellard 已提交
2386
                    /* 32 bit read access */
2387 2388
                    result |= memory_region_dispatch_read(mr, addr1, &val, 4,
                                                          attrs);
B
bellard 已提交
2389
                    stl_p(buf, val);
2390 2391
                    break;
                case 2:
B
bellard 已提交
2392
                    /* 16 bit read access */
2393 2394
                    result |= memory_region_dispatch_read(mr, addr1, &val, 2,
                                                          attrs);
B
bellard 已提交
2395
                    stw_p(buf, val);
2396 2397
                    break;
                case 1:
B
bellard 已提交
2398
                    /* 8 bit read access */
2399 2400
                    result |= memory_region_dispatch_read(mr, addr1, &val, 1,
                                                          attrs);
B
bellard 已提交
2401
                    stb_p(buf, val);
2402 2403 2404
                    break;
                default:
                    abort();
B
bellard 已提交
2405 2406 2407
                }
            } else {
                /* RAM case */
2408
                ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2409
                memcpy(buf, ptr, l);
B
bellard 已提交
2410 2411 2412 2413 2414 2415
            }
        }
        len -= l;
        buf += l;
        addr += l;
    }
2416
    rcu_read_unlock();
2417

2418
    return result;
B
bellard 已提交
2419
}
B
bellard 已提交
2420

2421 2422
MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
                                const uint8_t *buf, int len)
A
Avi Kivity 已提交
2423
{
2424
    return address_space_rw(as, addr, attrs, (uint8_t *)buf, len, true);
A
Avi Kivity 已提交
2425 2426
}

2427 2428
MemTxResult address_space_read(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
                               uint8_t *buf, int len)
A
Avi Kivity 已提交
2429
{
2430
    return address_space_rw(as, addr, attrs, buf, len, false);
A
Avi Kivity 已提交
2431 2432 2433
}


A
Avi Kivity 已提交
2434
void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
A
Avi Kivity 已提交
2435 2436
                            int len, int is_write)
{
2437 2438
    address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
                     buf, len, is_write);
A
Avi Kivity 已提交
2439 2440
}

2441 2442 2443 2444 2445
enum write_rom_type {
    WRITE_DATA,
    FLUSH_CACHE,
};

2446
static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2447
    hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
B
bellard 已提交
2448
{
2449
    hwaddr l;
B
bellard 已提交
2450
    uint8_t *ptr;
2451
    hwaddr addr1;
2452
    MemoryRegion *mr;
2453

2454
    rcu_read_lock();
B
bellard 已提交
2455
    while (len > 0) {
2456
        l = len;
2457
        mr = address_space_translate(as, addr, &addr1, &l, true);
2458

2459 2460
        if (!(memory_region_is_ram(mr) ||
              memory_region_is_romd(mr))) {
B
bellard 已提交
2461 2462
            /* do nothing */
        } else {
2463
            addr1 += memory_region_get_ram_addr(mr);
B
bellard 已提交
2464
            /* ROM/RAM case */
P
pbrook 已提交
2465
            ptr = qemu_get_ram_ptr(addr1);
2466 2467 2468
            switch (type) {
            case WRITE_DATA:
                memcpy(ptr, buf, l);
2469
                invalidate_and_set_dirty(mr, addr1, l);
2470 2471 2472 2473 2474
                break;
            case FLUSH_CACHE:
                flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
                break;
            }
B
bellard 已提交
2475 2476 2477 2478 2479
        }
        len -= l;
        buf += l;
        addr += l;
    }
2480
    rcu_read_unlock();
B
bellard 已提交
2481 2482
}

2483
/* used for ROM loading : can write in RAM and ROM */
2484
void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2485 2486
                                   const uint8_t *buf, int len)
{
2487
    cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501
}

void cpu_flush_icache_range(hwaddr start, int len)
{
    /*
     * This function should do the same thing as an icache flush that was
     * triggered from within the guest. For TCG we are always cache coherent,
     * so there is no need to flush anything. For KVM / Xen we need to flush
     * the host's instruction cache at least.
     */
    if (tcg_enabled()) {
        return;
    }

2502 2503
    cpu_physical_memory_write_rom_internal(&address_space_memory,
                                           start, NULL, len, FLUSH_CACHE);
2504 2505
}

2506
typedef struct {
2507
    MemoryRegion *mr;
2508
    void *buffer;
A
Avi Kivity 已提交
2509 2510
    hwaddr addr;
    hwaddr len;
F
Fam Zheng 已提交
2511
    bool in_use;
2512 2513 2514 2515
} BounceBuffer;

static BounceBuffer bounce;

2516
typedef struct MapClient {
2517
    QEMUBH *bh;
B
Blue Swirl 已提交
2518
    QLIST_ENTRY(MapClient) link;
2519 2520
} MapClient;

2521
QemuMutex map_client_list_lock;
B
Blue Swirl 已提交
2522 2523
static QLIST_HEAD(map_client_list, MapClient) map_client_list
    = QLIST_HEAD_INITIALIZER(map_client_list);
2524

2525 2526 2527 2528 2529 2530
static void cpu_unregister_map_client_do(MapClient *client)
{
    QLIST_REMOVE(client, link);
    g_free(client);
}

2531 2532 2533 2534 2535 2536
static void cpu_notify_map_clients_locked(void)
{
    MapClient *client;

    while (!QLIST_EMPTY(&map_client_list)) {
        client = QLIST_FIRST(&map_client_list);
2537 2538
        qemu_bh_schedule(client->bh);
        cpu_unregister_map_client_do(client);
2539 2540 2541
    }
}

2542
void cpu_register_map_client(QEMUBH *bh)
2543
{
2544
    MapClient *client = g_malloc(sizeof(*client));
2545

2546
    qemu_mutex_lock(&map_client_list_lock);
2547
    client->bh = bh;
B
Blue Swirl 已提交
2548
    QLIST_INSERT_HEAD(&map_client_list, client, link);
2549 2550 2551
    if (!atomic_read(&bounce.in_use)) {
        cpu_notify_map_clients_locked();
    }
2552
    qemu_mutex_unlock(&map_client_list_lock);
2553 2554
}

2555
void cpu_exec_init_all(void)
2556
{
2557 2558 2559 2560
    qemu_mutex_init(&ram_list.mutex);
    memory_map_init();
    io_mem_init();
    qemu_mutex_init(&map_client_list_lock);
2561 2562
}

2563
void cpu_unregister_map_client(QEMUBH *bh)
2564 2565 2566
{
    MapClient *client;

2567 2568 2569 2570 2571 2572
    qemu_mutex_lock(&map_client_list_lock);
    QLIST_FOREACH(client, &map_client_list, link) {
        if (client->bh == bh) {
            cpu_unregister_map_client_do(client);
            break;
        }
2573
    }
2574
    qemu_mutex_unlock(&map_client_list_lock);
2575 2576 2577 2578
}

static void cpu_notify_map_clients(void)
{
2579
    qemu_mutex_lock(&map_client_list_lock);
2580
    cpu_notify_map_clients_locked();
2581
    qemu_mutex_unlock(&map_client_list_lock);
2582 2583
}

2584 2585
bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
{
2586
    MemoryRegion *mr;
2587 2588
    hwaddr l, xlat;

2589
    rcu_read_lock();
2590 2591
    while (len > 0) {
        l = len;
2592 2593 2594 2595
        mr = address_space_translate(as, addr, &xlat, &l, is_write);
        if (!memory_access_is_direct(mr, is_write)) {
            l = memory_access_size(mr, l, addr);
            if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2596 2597 2598 2599 2600 2601 2602
                return false;
            }
        }

        len -= l;
        addr += l;
    }
2603
    rcu_read_unlock();
2604 2605 2606
    return true;
}

2607 2608 2609 2610
/* Map a physical memory region into a host virtual address.
 * May map a subset of the requested range, given by and returned in *plen.
 * May return NULL if resources needed to perform the mapping are exhausted.
 * Use only for reads OR writes - not for read-modify-write operations.
2611 2612
 * Use cpu_register_map_client() to know when retrying the map operation is
 * likely to succeed.
2613
 */
A
Avi Kivity 已提交
2614
void *address_space_map(AddressSpace *as,
A
Avi Kivity 已提交
2615 2616
                        hwaddr addr,
                        hwaddr *plen,
A
Avi Kivity 已提交
2617
                        bool is_write)
2618
{
A
Avi Kivity 已提交
2619
    hwaddr len = *plen;
2620 2621 2622 2623
    hwaddr done = 0;
    hwaddr l, xlat, base;
    MemoryRegion *mr, *this_mr;
    ram_addr_t raddr;
2624

2625 2626 2627
    if (len == 0) {
        return NULL;
    }
2628

2629
    l = len;
2630
    rcu_read_lock();
2631
    mr = address_space_translate(as, addr, &xlat, &l, is_write);
2632

2633
    if (!memory_access_is_direct(mr, is_write)) {
F
Fam Zheng 已提交
2634
        if (atomic_xchg(&bounce.in_use, true)) {
2635
            rcu_read_unlock();
2636
            return NULL;
2637
        }
2638 2639 2640
        /* Avoid unbounded allocations */
        l = MIN(l, TARGET_PAGE_SIZE);
        bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2641 2642
        bounce.addr = addr;
        bounce.len = l;
2643 2644 2645

        memory_region_ref(mr);
        bounce.mr = mr;
2646
        if (!is_write) {
2647 2648
            address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
                               bounce.buffer, l);
2649
        }
2650

2651
        rcu_read_unlock();
2652 2653 2654 2655 2656 2657 2658 2659
        *plen = l;
        return bounce.buffer;
    }

    base = xlat;
    raddr = memory_region_get_ram_addr(mr);

    for (;;) {
2660 2661
        len -= l;
        addr += l;
2662 2663 2664 2665 2666 2667 2668 2669 2670 2671
        done += l;
        if (len == 0) {
            break;
        }

        l = len;
        this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
        if (this_mr != mr || xlat != base + done) {
            break;
        }
2672
    }
2673

2674
    memory_region_ref(mr);
2675
    rcu_read_unlock();
2676 2677
    *plen = done;
    return qemu_ram_ptr_length(raddr + base, plen);
2678 2679
}

A
Avi Kivity 已提交
2680
/* Unmaps a memory region previously mapped by address_space_map().
2681 2682 2683
 * Will also mark the memory as dirty if is_write == 1.  access_len gives
 * the amount of memory that was actually read or written by the caller.
 */
A
Avi Kivity 已提交
2684 2685
void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
                         int is_write, hwaddr access_len)
2686 2687
{
    if (buffer != bounce.buffer) {
2688 2689 2690 2691 2692
        MemoryRegion *mr;
        ram_addr_t addr1;

        mr = qemu_ram_addr_from_host(buffer, &addr1);
        assert(mr != NULL);
2693
        if (is_write) {
2694
            invalidate_and_set_dirty(mr, addr1, access_len);
2695
        }
2696
        if (xen_enabled()) {
J
Jan Kiszka 已提交
2697
            xen_invalidate_map_cache_entry(buffer);
A
Anthony PERARD 已提交
2698
        }
2699
        memory_region_unref(mr);
2700 2701 2702
        return;
    }
    if (is_write) {
2703 2704
        address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
                            bounce.buffer, access_len);
2705
    }
2706
    qemu_vfree(bounce.buffer);
2707
    bounce.buffer = NULL;
2708
    memory_region_unref(bounce.mr);
F
Fam Zheng 已提交
2709
    atomic_mb_set(&bounce.in_use, false);
2710
    cpu_notify_map_clients();
2711
}
B
bellard 已提交
2712

A
Avi Kivity 已提交
2713 2714
void *cpu_physical_memory_map(hwaddr addr,
                              hwaddr *plen,
A
Avi Kivity 已提交
2715 2716 2717 2718 2719
                              int is_write)
{
    return address_space_map(&address_space_memory, addr, plen, is_write);
}

A
Avi Kivity 已提交
2720 2721
void cpu_physical_memory_unmap(void *buffer, hwaddr len,
                               int is_write, hwaddr access_len)
A
Avi Kivity 已提交
2722 2723 2724 2725
{
    return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
}

B
bellard 已提交
2726
/* warning: addr must be aligned */
2727 2728 2729 2730
static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
                                                  MemTxAttrs attrs,
                                                  MemTxResult *result,
                                                  enum device_endian endian)
B
bellard 已提交
2731 2732
{
    uint8_t *ptr;
2733
    uint64_t val;
2734
    MemoryRegion *mr;
2735 2736
    hwaddr l = 4;
    hwaddr addr1;
2737
    MemTxResult r;
B
bellard 已提交
2738

2739
    rcu_read_lock();
2740
    mr = address_space_translate(as, addr, &addr1, &l, false);
2741
    if (l < 4 || !memory_access_is_direct(mr, false)) {
B
bellard 已提交
2742
        /* I/O case */
2743
        r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
2744 2745 2746 2747 2748 2749 2750 2751 2752
#if defined(TARGET_WORDS_BIGENDIAN)
        if (endian == DEVICE_LITTLE_ENDIAN) {
            val = bswap32(val);
        }
#else
        if (endian == DEVICE_BIG_ENDIAN) {
            val = bswap32(val);
        }
#endif
B
bellard 已提交
2753 2754
    } else {
        /* RAM case */
2755
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2756
                                & TARGET_PAGE_MASK)
2757
                               + addr1);
2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768
        switch (endian) {
        case DEVICE_LITTLE_ENDIAN:
            val = ldl_le_p(ptr);
            break;
        case DEVICE_BIG_ENDIAN:
            val = ldl_be_p(ptr);
            break;
        default:
            val = ldl_p(ptr);
            break;
        }
2769 2770 2771 2772
        r = MEMTX_OK;
    }
    if (result) {
        *result = r;
B
bellard 已提交
2773
    }
2774
    rcu_read_unlock();
B
bellard 已提交
2775 2776 2777
    return val;
}

2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798
uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
                           MemTxAttrs attrs, MemTxResult *result)
{
    return address_space_ldl_internal(as, addr, attrs, result,
                                      DEVICE_NATIVE_ENDIAN);
}

uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
                              MemTxAttrs attrs, MemTxResult *result)
{
    return address_space_ldl_internal(as, addr, attrs, result,
                                      DEVICE_LITTLE_ENDIAN);
}

uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
                              MemTxAttrs attrs, MemTxResult *result)
{
    return address_space_ldl_internal(as, addr, attrs, result,
                                      DEVICE_BIG_ENDIAN);
}

2799
uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
2800
{
2801
    return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2802 2803
}

2804
uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
2805
{
2806
    return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2807 2808
}

2809
uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
2810
{
2811
    return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2812 2813
}

B
bellard 已提交
2814
/* warning: addr must be aligned */
2815 2816 2817 2818
static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
                                                  MemTxAttrs attrs,
                                                  MemTxResult *result,
                                                  enum device_endian endian)
B
bellard 已提交
2819 2820 2821
{
    uint8_t *ptr;
    uint64_t val;
2822
    MemoryRegion *mr;
2823 2824
    hwaddr l = 8;
    hwaddr addr1;
2825
    MemTxResult r;
B
bellard 已提交
2826

2827
    rcu_read_lock();
2828
    mr = address_space_translate(as, addr, &addr1, &l,
2829 2830
                                 false);
    if (l < 8 || !memory_access_is_direct(mr, false)) {
B
bellard 已提交
2831
        /* I/O case */
2832
        r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
2833 2834 2835 2836 2837 2838 2839 2840
#if defined(TARGET_WORDS_BIGENDIAN)
        if (endian == DEVICE_LITTLE_ENDIAN) {
            val = bswap64(val);
        }
#else
        if (endian == DEVICE_BIG_ENDIAN) {
            val = bswap64(val);
        }
B
bellard 已提交
2841 2842 2843
#endif
    } else {
        /* RAM case */
2844
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2845
                                & TARGET_PAGE_MASK)
2846
                               + addr1);
2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857
        switch (endian) {
        case DEVICE_LITTLE_ENDIAN:
            val = ldq_le_p(ptr);
            break;
        case DEVICE_BIG_ENDIAN:
            val = ldq_be_p(ptr);
            break;
        default:
            val = ldq_p(ptr);
            break;
        }
2858 2859 2860 2861
        r = MEMTX_OK;
    }
    if (result) {
        *result = r;
B
bellard 已提交
2862
    }
2863
    rcu_read_unlock();
B
bellard 已提交
2864 2865 2866
    return val;
}

2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887
uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
                           MemTxAttrs attrs, MemTxResult *result)
{
    return address_space_ldq_internal(as, addr, attrs, result,
                                      DEVICE_NATIVE_ENDIAN);
}

uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
                           MemTxAttrs attrs, MemTxResult *result)
{
    return address_space_ldq_internal(as, addr, attrs, result,
                                      DEVICE_LITTLE_ENDIAN);
}

uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
                           MemTxAttrs attrs, MemTxResult *result)
{
    return address_space_ldq_internal(as, addr, attrs, result,
                                      DEVICE_BIG_ENDIAN);
}

2888
uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
2889
{
2890
    return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2891 2892
}

2893
uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
2894
{
2895
    return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2896 2897
}

2898
uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
2899
{
2900
    return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2901 2902
}

B
bellard 已提交
2903
/* XXX: optimize */
2904 2905
uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
                            MemTxAttrs attrs, MemTxResult *result)
B
bellard 已提交
2906 2907
{
    uint8_t val;
2908 2909 2910 2911 2912 2913
    MemTxResult r;

    r = address_space_rw(as, addr, attrs, &val, 1, 0);
    if (result) {
        *result = r;
    }
B
bellard 已提交
2914 2915 2916
    return val;
}

2917 2918 2919 2920 2921
uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
{
    return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
}

2922
/* warning: addr must be aligned */
2923 2924 2925 2926 2927
static inline uint32_t address_space_lduw_internal(AddressSpace *as,
                                                   hwaddr addr,
                                                   MemTxAttrs attrs,
                                                   MemTxResult *result,
                                                   enum device_endian endian)
B
bellard 已提交
2928
{
2929 2930
    uint8_t *ptr;
    uint64_t val;
2931
    MemoryRegion *mr;
2932 2933
    hwaddr l = 2;
    hwaddr addr1;
2934
    MemTxResult r;
2935

2936
    rcu_read_lock();
2937
    mr = address_space_translate(as, addr, &addr1, &l,
2938 2939
                                 false);
    if (l < 2 || !memory_access_is_direct(mr, false)) {
2940
        /* I/O case */
2941
        r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
2942 2943 2944 2945 2946 2947 2948 2949 2950
#if defined(TARGET_WORDS_BIGENDIAN)
        if (endian == DEVICE_LITTLE_ENDIAN) {
            val = bswap16(val);
        }
#else
        if (endian == DEVICE_BIG_ENDIAN) {
            val = bswap16(val);
        }
#endif
2951 2952
    } else {
        /* RAM case */
2953
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2954
                                & TARGET_PAGE_MASK)
2955
                               + addr1);
2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966
        switch (endian) {
        case DEVICE_LITTLE_ENDIAN:
            val = lduw_le_p(ptr);
            break;
        case DEVICE_BIG_ENDIAN:
            val = lduw_be_p(ptr);
            break;
        default:
            val = lduw_p(ptr);
            break;
        }
2967 2968 2969 2970
        r = MEMTX_OK;
    }
    if (result) {
        *result = r;
2971
    }
2972
    rcu_read_unlock();
2973
    return val;
B
bellard 已提交
2974 2975
}

2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996
uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
                           MemTxAttrs attrs, MemTxResult *result)
{
    return address_space_lduw_internal(as, addr, attrs, result,
                                       DEVICE_NATIVE_ENDIAN);
}

uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
                           MemTxAttrs attrs, MemTxResult *result)
{
    return address_space_lduw_internal(as, addr, attrs, result,
                                       DEVICE_LITTLE_ENDIAN);
}

uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
                           MemTxAttrs attrs, MemTxResult *result)
{
    return address_space_lduw_internal(as, addr, attrs, result,
                                       DEVICE_BIG_ENDIAN);
}

2997
uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
2998
{
2999
    return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3000 3001
}

3002
uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3003
{
3004
    return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3005 3006
}

3007
uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3008
{
3009
    return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3010 3011
}

B
bellard 已提交
3012 3013 3014
/* warning: addr must be aligned. The ram page is not masked as dirty
   and the code inside is not invalidated. It is useful if the dirty
   bits are used to track modified PTEs */
3015 3016
void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
                                MemTxAttrs attrs, MemTxResult *result)
B
bellard 已提交
3017 3018
{
    uint8_t *ptr;
3019
    MemoryRegion *mr;
3020 3021
    hwaddr l = 4;
    hwaddr addr1;
3022
    MemTxResult r;
3023
    uint8_t dirty_log_mask;
B
bellard 已提交
3024

3025
    rcu_read_lock();
3026
    mr = address_space_translate(as, addr, &addr1, &l,
3027 3028
                                 true);
    if (l < 4 || !memory_access_is_direct(mr, true)) {
3029
        r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
B
bellard 已提交
3030
    } else {
3031
        addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
P
pbrook 已提交
3032
        ptr = qemu_get_ram_ptr(addr1);
B
bellard 已提交
3033
        stl_p(ptr, val);
A
aliguori 已提交
3034

3035 3036
        dirty_log_mask = memory_region_get_dirty_log_mask(mr);
        dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3037
        cpu_physical_memory_set_dirty_range(addr1, 4, dirty_log_mask);
3038 3039 3040 3041
        r = MEMTX_OK;
    }
    if (result) {
        *result = r;
B
bellard 已提交
3042
    }
3043
    rcu_read_unlock();
B
bellard 已提交
3044 3045
}

3046 3047 3048 3049 3050
void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
{
    address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
}

B
bellard 已提交
3051
/* warning: addr must be aligned */
3052 3053 3054 3055 3056
static inline void address_space_stl_internal(AddressSpace *as,
                                              hwaddr addr, uint32_t val,
                                              MemTxAttrs attrs,
                                              MemTxResult *result,
                                              enum device_endian endian)
B
bellard 已提交
3057 3058
{
    uint8_t *ptr;
3059
    MemoryRegion *mr;
3060 3061
    hwaddr l = 4;
    hwaddr addr1;
3062
    MemTxResult r;
B
bellard 已提交
3063

3064
    rcu_read_lock();
3065
    mr = address_space_translate(as, addr, &addr1, &l,
3066 3067
                                 true);
    if (l < 4 || !memory_access_is_direct(mr, true)) {
3068 3069 3070 3071 3072 3073 3074 3075 3076
#if defined(TARGET_WORDS_BIGENDIAN)
        if (endian == DEVICE_LITTLE_ENDIAN) {
            val = bswap32(val);
        }
#else
        if (endian == DEVICE_BIG_ENDIAN) {
            val = bswap32(val);
        }
#endif
3077
        r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
B
bellard 已提交
3078 3079
    } else {
        /* RAM case */
3080
        addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
P
pbrook 已提交
3081
        ptr = qemu_get_ram_ptr(addr1);
3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092
        switch (endian) {
        case DEVICE_LITTLE_ENDIAN:
            stl_le_p(ptr, val);
            break;
        case DEVICE_BIG_ENDIAN:
            stl_be_p(ptr, val);
            break;
        default:
            stl_p(ptr, val);
            break;
        }
3093
        invalidate_and_set_dirty(mr, addr1, 4);
3094 3095 3096 3097
        r = MEMTX_OK;
    }
    if (result) {
        *result = r;
B
bellard 已提交
3098
    }
3099
    rcu_read_unlock();
B
bellard 已提交
3100 3101
}

3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122
void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
                       MemTxAttrs attrs, MemTxResult *result)
{
    address_space_stl_internal(as, addr, val, attrs, result,
                               DEVICE_NATIVE_ENDIAN);
}

void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
                       MemTxAttrs attrs, MemTxResult *result)
{
    address_space_stl_internal(as, addr, val, attrs, result,
                               DEVICE_LITTLE_ENDIAN);
}

void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
                       MemTxAttrs attrs, MemTxResult *result)
{
    address_space_stl_internal(as, addr, val, attrs, result,
                               DEVICE_BIG_ENDIAN);
}

3123
void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3124
{
3125
    address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3126 3127
}

3128
void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3129
{
3130
    address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3131 3132
}

3133
void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3134
{
3135
    address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3136 3137
}

B
bellard 已提交
3138
/* XXX: optimize */
3139 3140
void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
                       MemTxAttrs attrs, MemTxResult *result)
B
bellard 已提交
3141 3142
{
    uint8_t v = val;
3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153
    MemTxResult r;

    r = address_space_rw(as, addr, attrs, &v, 1, 1);
    if (result) {
        *result = r;
    }
}

void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
{
    address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
B
bellard 已提交
3154 3155
}

3156
/* warning: addr must be aligned */
3157 3158 3159 3160 3161
static inline void address_space_stw_internal(AddressSpace *as,
                                              hwaddr addr, uint32_t val,
                                              MemTxAttrs attrs,
                                              MemTxResult *result,
                                              enum device_endian endian)
B
bellard 已提交
3162
{
3163
    uint8_t *ptr;
3164
    MemoryRegion *mr;
3165 3166
    hwaddr l = 2;
    hwaddr addr1;
3167
    MemTxResult r;
3168

3169
    rcu_read_lock();
3170
    mr = address_space_translate(as, addr, &addr1, &l, true);
3171
    if (l < 2 || !memory_access_is_direct(mr, true)) {
3172 3173 3174 3175 3176 3177 3178 3179 3180
#if defined(TARGET_WORDS_BIGENDIAN)
        if (endian == DEVICE_LITTLE_ENDIAN) {
            val = bswap16(val);
        }
#else
        if (endian == DEVICE_BIG_ENDIAN) {
            val = bswap16(val);
        }
#endif
3181
        r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3182 3183
    } else {
        /* RAM case */
3184
        addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3185
        ptr = qemu_get_ram_ptr(addr1);
3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196
        switch (endian) {
        case DEVICE_LITTLE_ENDIAN:
            stw_le_p(ptr, val);
            break;
        case DEVICE_BIG_ENDIAN:
            stw_be_p(ptr, val);
            break;
        default:
            stw_p(ptr, val);
            break;
        }
3197
        invalidate_and_set_dirty(mr, addr1, 2);
3198 3199 3200 3201
        r = MEMTX_OK;
    }
    if (result) {
        *result = r;
3202
    }
3203
    rcu_read_unlock();
B
bellard 已提交
3204 3205
}

3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226
void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
                       MemTxAttrs attrs, MemTxResult *result)
{
    address_space_stw_internal(as, addr, val, attrs, result,
                               DEVICE_NATIVE_ENDIAN);
}

void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
                       MemTxAttrs attrs, MemTxResult *result)
{
    address_space_stw_internal(as, addr, val, attrs, result,
                               DEVICE_LITTLE_ENDIAN);
}

void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
                       MemTxAttrs attrs, MemTxResult *result)
{
    address_space_stw_internal(as, addr, val, attrs, result,
                               DEVICE_BIG_ENDIAN);
}

3227
void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3228
{
3229
    address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3230 3231
}

3232
void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3233
{
3234
    address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3235 3236
}

3237
void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3238
{
3239
    address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3240 3241
}

B
bellard 已提交
3242
/* XXX: optimize */
3243 3244
void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
                       MemTxAttrs attrs, MemTxResult *result)
B
bellard 已提交
3245
{
3246
    MemTxResult r;
B
bellard 已提交
3247
    val = tswap64(val);
3248 3249 3250 3251
    r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
    if (result) {
        *result = r;
    }
B
bellard 已提交
3252 3253
}

3254 3255
void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
                       MemTxAttrs attrs, MemTxResult *result)
3256
{
3257
    MemTxResult r;
3258
    val = cpu_to_le64(val);
3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282
    r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
    if (result) {
        *result = r;
    }
}
void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
                       MemTxAttrs attrs, MemTxResult *result)
{
    MemTxResult r;
    val = cpu_to_be64(val);
    r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
    if (result) {
        *result = r;
    }
}

void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
{
    address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
}

void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
{
    address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3283 3284
}

3285
void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3286
{
3287
    address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3288 3289
}

3290
/* virtual memory access for debug (includes writing to ROM) */
3291
int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3292
                        uint8_t *buf, int len, int is_write)
B
bellard 已提交
3293 3294
{
    int l;
A
Avi Kivity 已提交
3295
    hwaddr phys_addr;
3296
    target_ulong page;
B
bellard 已提交
3297 3298 3299

    while (len > 0) {
        page = addr & TARGET_PAGE_MASK;
3300
        phys_addr = cpu_get_phys_page_debug(cpu, page);
B
bellard 已提交
3301 3302 3303 3304 3305 3306
        /* if no physical page mapped, return an error */
        if (phys_addr == -1)
            return -1;
        l = (page + TARGET_PAGE_SIZE) - addr;
        if (l > len)
            l = len;
3307
        phys_addr += (addr & ~TARGET_PAGE_MASK);
3308 3309 3310
        if (is_write) {
            cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
        } else {
3311 3312
            address_space_rw(cpu->as, phys_addr, MEMTXATTRS_UNSPECIFIED,
                             buf, l, 0);
3313
        }
B
bellard 已提交
3314 3315 3316 3317 3318 3319
        len -= l;
        buf += l;
        addr += l;
    }
    return 0;
}
P
Paul Brook 已提交
3320
#endif
B
bellard 已提交
3321

3322 3323 3324 3325
/*
 * A helper function for the _utterly broken_ virtio device model to find out if
 * it's running on a big endian machine. Don't do this at home kids!
 */
3326 3327
bool target_words_bigendian(void);
bool target_words_bigendian(void)
3328 3329 3330 3331 3332 3333 3334 3335
{
#if defined(TARGET_WORDS_BIGENDIAN)
    return true;
#else
    return false;
#endif
}

3336
#ifndef CONFIG_USER_ONLY
A
Avi Kivity 已提交
3337
bool cpu_physical_memory_is_io(hwaddr phys_addr)
3338
{
3339
    MemoryRegion*mr;
3340
    hwaddr l = 1;
3341
    bool res;
3342

3343
    rcu_read_lock();
3344 3345
    mr = address_space_translate(&address_space_memory,
                                 phys_addr, &phys_addr, &l, false);
3346

3347 3348 3349
    res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
    rcu_read_unlock();
    return res;
3350
}
3351

3352
int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3353 3354
{
    RAMBlock *block;
3355
    int ret = 0;
3356

M
Mike Day 已提交
3357 3358
    rcu_read_lock();
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3359 3360 3361 3362 3363
        ret = func(block->idstr, block->host, block->offset,
                   block->used_length, opaque);
        if (ret) {
            break;
        }
3364
    }
M
Mike Day 已提交
3365
    rcu_read_unlock();
3366
    return ret;
3367
}
3368
#endif