exec.c 76.5 KB
Newer Older
B
bellard 已提交
1
/*
2
 *  Virtual page mapping
3
 *
B
bellard 已提交
4 5 6 7 8 9 10 11 12 13 14 15 16
 *  Copyright (c) 2003 Fabrice Bellard
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
B
bellard 已提交
18
 */
B
bellard 已提交
19
#include "config.h"
20
#ifndef _WIN32
B
bellard 已提交
21
#include <sys/types.h>
B
bellard 已提交
22 23
#include <sys/mman.h>
#endif
B
bellard 已提交
24

25
#include "qemu-common.h"
B
bellard 已提交
26
#include "cpu.h"
B
bellard 已提交
27
#include "tcg.h"
28
#include "hw/hw.h"
29
#include "hw/qdev.h"
30
#include "qemu/osdep.h"
31
#include "sysemu/kvm.h"
32
#include "sysemu/sysemu.h"
P
Paolo Bonzini 已提交
33
#include "hw/xen/xen.h"
34 35
#include "qemu/timer.h"
#include "qemu/config-file.h"
36
#include "exec/memory.h"
37
#include "sysemu/dma.h"
38
#include "exec/address-spaces.h"
39 40
#if defined(CONFIG_USER_ONLY)
#include <qemu.h>
J
Jun Nakajima 已提交
41
#else /* !CONFIG_USER_ONLY */
42
#include "sysemu/xen-mapcache.h"
43
#include "trace.h"
44
#endif
45
#include "exec/cpu-all.h"
B
bellard 已提交
46

47
#include "exec/cputlb.h"
48
#include "translate-all.h"
49

50
#include "exec/memory-internal.h"
51
#include "exec/ram_addr.h"
52
#include "qemu/cache-utils.h"
53

54 55
#include "qemu/range.h"

56
//#define DEBUG_SUBPAGE
T
ths 已提交
57

58
#if !defined(CONFIG_USER_ONLY)
59
static bool in_migration;
P
pbrook 已提交
60

P
Paolo Bonzini 已提交
61
RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
A
Avi Kivity 已提交
62 63

static MemoryRegion *system_memory;
64
static MemoryRegion *system_io;
A
Avi Kivity 已提交
65

66 67
AddressSpace address_space_io;
AddressSpace address_space_memory;
68

69
MemoryRegion io_mem_rom, io_mem_notdirty;
70
static MemoryRegion io_mem_unassigned;
71

72
#endif
73

A
Andreas Färber 已提交
74
struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
B
bellard 已提交
75 76
/* current CPU in the current thread. It is only valid inside
   cpu_exec() */
77
DEFINE_TLS(CPUState *, current_cpu);
P
pbrook 已提交
78
/* 0 = Do not count executed instructions.
T
ths 已提交
79
   1 = Precise instruction counting.
P
pbrook 已提交
80
   2 = Adaptive rate instruction counting.  */
81
int use_icount;
B
bellard 已提交
82

83
#if !defined(CONFIG_USER_ONLY)
84

85 86 87
typedef struct PhysPageEntry PhysPageEntry;

struct PhysPageEntry {
M
Michael S. Tsirkin 已提交
88
    /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
89
    uint32_t skip : 6;
M
Michael S. Tsirkin 已提交
90
     /* index into phys_sections (!skip) or phys_map_nodes (skip) */
91
    uint32_t ptr : 26;
92 93
};

94 95
#define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)

96
/* Size of the L2 (and L3, etc) page tables.  */
97
#define ADDR_SPACE_BITS 64
98

M
Michael S. Tsirkin 已提交
99
#define P_L2_BITS 9
100 101 102 103 104
#define P_L2_SIZE (1 << P_L2_BITS)

#define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)

typedef PhysPageEntry Node[P_L2_SIZE];
105

106 107 108 109 110 111 112 113 114
typedef struct PhysPageMap {
    unsigned sections_nb;
    unsigned sections_nb_alloc;
    unsigned nodes_nb;
    unsigned nodes_nb_alloc;
    Node *nodes;
    MemoryRegionSection *sections;
} PhysPageMap;

115 116 117 118 119
struct AddressSpaceDispatch {
    /* This is a multi-level map on the physical address space.
     * The bottom level has pointers to MemoryRegionSections.
     */
    PhysPageEntry phys_map;
120
    PhysPageMap map;
121
    AddressSpace *as;
122 123
};

124 125 126
#define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
typedef struct subpage_t {
    MemoryRegion iomem;
127
    AddressSpace *as;
128 129 130 131
    hwaddr base;
    uint16_t sub_section[TARGET_PAGE_SIZE];
} subpage_t;

132 133 134 135
#define PHYS_SECTION_UNASSIGNED 0
#define PHYS_SECTION_NOTDIRTY 1
#define PHYS_SECTION_ROM 2
#define PHYS_SECTION_WATCH 3
136

137
static void io_mem_init(void);
A
Avi Kivity 已提交
138
static void memory_map_init(void);
139
static void tcg_commit(MemoryListener *listener);
140

141
static MemoryRegion io_mem_watch;
142
#endif
B
bellard 已提交
143

144
#if !defined(CONFIG_USER_ONLY)
145

146
static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
147
{
148 149 150 151
    if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
        map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
        map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
        map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
152
    }
153 154
}

155
static uint32_t phys_map_node_alloc(PhysPageMap *map)
156 157
{
    unsigned i;
158
    uint32_t ret;
159

160
    ret = map->nodes_nb++;
161
    assert(ret != PHYS_MAP_NODE_NIL);
162
    assert(ret != map->nodes_nb_alloc);
163
    for (i = 0; i < P_L2_SIZE; ++i) {
164 165
        map->nodes[ret][i].skip = 1;
        map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
166
    }
167
    return ret;
168 169
}

170 171
static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
                                hwaddr *index, hwaddr *nb, uint16_t leaf,
172
                                int level)
173 174 175
{
    PhysPageEntry *p;
    int i;
176
    hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
177

M
Michael S. Tsirkin 已提交
178
    if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
179 180
        lp->ptr = phys_map_node_alloc(map);
        p = map->nodes[lp->ptr];
181
        if (level == 0) {
182
            for (i = 0; i < P_L2_SIZE; i++) {
M
Michael S. Tsirkin 已提交
183
                p[i].skip = 0;
184
                p[i].ptr = PHYS_SECTION_UNASSIGNED;
185
            }
P
pbrook 已提交
186
        }
187
    } else {
188
        p = map->nodes[lp->ptr];
B
bellard 已提交
189
    }
190
    lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
191

192
    while (*nb && lp < &p[P_L2_SIZE]) {
193
        if ((*index & (step - 1)) == 0 && *nb >= step) {
M
Michael S. Tsirkin 已提交
194
            lp->skip = 0;
195
            lp->ptr = leaf;
196 197
            *index += step;
            *nb -= step;
198
        } else {
199
            phys_page_set_level(map, lp, index, nb, leaf, level - 1);
200 201
        }
        ++lp;
202 203 204
    }
}

A
Avi Kivity 已提交
205
static void phys_page_set(AddressSpaceDispatch *d,
A
Avi Kivity 已提交
206
                          hwaddr index, hwaddr nb,
207
                          uint16_t leaf)
208
{
209
    /* Wildly overreserve - it doesn't matter much. */
210
    phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
211

212
    phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
B
bellard 已提交
213 214
}

215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272
/* Compact a non leaf page entry. Simply detect that the entry has a single child,
 * and update our entry so we can skip it and go directly to the destination.
 */
static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
{
    unsigned valid_ptr = P_L2_SIZE;
    int valid = 0;
    PhysPageEntry *p;
    int i;

    if (lp->ptr == PHYS_MAP_NODE_NIL) {
        return;
    }

    p = nodes[lp->ptr];
    for (i = 0; i < P_L2_SIZE; i++) {
        if (p[i].ptr == PHYS_MAP_NODE_NIL) {
            continue;
        }

        valid_ptr = i;
        valid++;
        if (p[i].skip) {
            phys_page_compact(&p[i], nodes, compacted);
        }
    }

    /* We can only compress if there's only one child. */
    if (valid != 1) {
        return;
    }

    assert(valid_ptr < P_L2_SIZE);

    /* Don't compress if it won't fit in the # of bits we have. */
    if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
        return;
    }

    lp->ptr = p[valid_ptr].ptr;
    if (!p[valid_ptr].skip) {
        /* If our only child is a leaf, make this a leaf. */
        /* By design, we should have made this node a leaf to begin with so we
         * should never reach here.
         * But since it's so simple to handle this, let's do it just in case we
         * change this rule.
         */
        lp->skip = 0;
    } else {
        lp->skip += p[valid_ptr].skip;
    }
}

static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
{
    DECLARE_BITMAP(compacted, nodes_nb);

    if (d->phys_map.skip) {
273
        phys_page_compact(&d->phys_map, d->map.nodes, compacted);
274 275 276
    }
}

277
static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
278
                                           Node *nodes, MemoryRegionSection *sections)
B
bellard 已提交
279
{
280
    PhysPageEntry *p;
281
    hwaddr index = addr >> TARGET_PAGE_BITS;
282
    int i;
283

M
Michael S. Tsirkin 已提交
284
    for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
285
        if (lp.ptr == PHYS_MAP_NODE_NIL) {
286
            return &sections[PHYS_SECTION_UNASSIGNED];
287
        }
288
        p = nodes[lp.ptr];
289
        lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
290
    }
291 292 293 294 295 296 297 298

    if (sections[lp.ptr].size.hi ||
        range_covers_byte(sections[lp.ptr].offset_within_address_space,
                          sections[lp.ptr].size.lo, addr)) {
        return &sections[lp.ptr];
    } else {
        return &sections[PHYS_SECTION_UNASSIGNED];
    }
299 300
}

B
Blue Swirl 已提交
301 302
bool memory_region_is_unassigned(MemoryRegion *mr)
{
P
Paolo Bonzini 已提交
303
    return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
304
        && mr != &io_mem_watch;
B
bellard 已提交
305
}
306

307
static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
308 309
                                                        hwaddr addr,
                                                        bool resolve_subpage)
310
{
311 312 313
    MemoryRegionSection *section;
    subpage_t *subpage;

314
    section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
315 316
    if (resolve_subpage && section->mr->subpage) {
        subpage = container_of(section->mr, subpage_t, iomem);
317
        section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
318 319
    }
    return section;
320 321
}

322
static MemoryRegionSection *
323
address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
324
                                 hwaddr *plen, bool resolve_subpage)
325 326
{
    MemoryRegionSection *section;
327
    Int128 diff;
328

329
    section = address_space_lookup_region(d, addr, resolve_subpage);
330 331 332 333 334 335 336
    /* Compute offset within MemoryRegionSection */
    addr -= section->offset_within_address_space;

    /* Compute offset within MemoryRegion */
    *xlat = addr + section->offset_within_region;

    diff = int128_sub(section->mr->size, int128_make64(addr));
337
    *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
338 339
    return section;
}
340

341 342 343 344 345 346 347 348 349 350 351 352
static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
{
    if (memory_region_is_ram(mr)) {
        return !(is_write && mr->readonly);
    }
    if (memory_region_is_romd(mr)) {
        return !is_write;
    }

    return false;
}

353 354 355
MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
                                      hwaddr *xlat, hwaddr *plen,
                                      bool is_write)
356
{
A
Avi Kivity 已提交
357 358 359 360 361 362
    IOMMUTLBEntry iotlb;
    MemoryRegionSection *section;
    MemoryRegion *mr;
    hwaddr len = *plen;

    for (;;) {
363
        section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true);
A
Avi Kivity 已提交
364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381
        mr = section->mr;

        if (!mr->iommu_ops) {
            break;
        }

        iotlb = mr->iommu_ops->translate(mr, addr);
        addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
                | (addr & iotlb.addr_mask));
        len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
        if (!(iotlb.perm & (1 << is_write))) {
            mr = &io_mem_unassigned;
            break;
        }

        as = iotlb.target_as;
    }

382 383 384 385 386
    if (memory_access_is_direct(mr, is_write)) {
        hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
        len = MIN(page, len);
    }

A
Avi Kivity 已提交
387 388 389
    *plen = len;
    *xlat = addr;
    return mr;
390 391 392 393 394 395
}

MemoryRegionSection *
address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat,
                                  hwaddr *plen)
{
A
Avi Kivity 已提交
396
    MemoryRegionSection *section;
397
    section = address_space_translate_internal(as->dispatch, addr, xlat, plen, false);
A
Avi Kivity 已提交
398 399 400

    assert(!section->mr->iommu_ops);
    return section;
401
}
402
#endif
B
bellard 已提交
403

404
void cpu_exec_init_all(void)
405
{
406
#if !defined(CONFIG_USER_ONLY)
407
    qemu_mutex_init(&ram_list.mutex);
408 409
    memory_map_init();
    io_mem_init();
410
#endif
411
}
412

413
#if !defined(CONFIG_USER_ONLY)
414 415

static int cpu_common_post_load(void *opaque, int version_id)
B
bellard 已提交
416
{
417
    CPUState *cpu = opaque;
B
bellard 已提交
418

419 420
    /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
       version_id is increased. */
421 422
    cpu->interrupt_request &= ~0x01;
    tlb_flush(cpu->env_ptr, 1);
423 424

    return 0;
B
bellard 已提交
425
}
B
bellard 已提交
426

427
const VMStateDescription vmstate_cpu_common = {
428 429 430 431 432 433
    .name = "cpu_common",
    .version_id = 1,
    .minimum_version_id = 1,
    .minimum_version_id_old = 1,
    .post_load = cpu_common_post_load,
    .fields      = (VMStateField []) {
434 435
        VMSTATE_UINT32(halted, CPUState),
        VMSTATE_UINT32(interrupt_request, CPUState),
436 437 438
        VMSTATE_END_OF_LIST()
    }
};
439

440
#endif
B
bellard 已提交
441

442
CPUState *qemu_get_cpu(int index)
B
bellard 已提交
443
{
A
Andreas Färber 已提交
444
    CPUState *cpu;
B
bellard 已提交
445

A
Andreas Färber 已提交
446
    CPU_FOREACH(cpu) {
447
        if (cpu->cpu_index == index) {
A
Andreas Färber 已提交
448
            return cpu;
449
        }
B
bellard 已提交
450
    }
451

A
Andreas Färber 已提交
452
    return NULL;
B
bellard 已提交
453 454
}

455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470
#if !defined(CONFIG_USER_ONLY)
void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
{
    /* We only support one address space per cpu at the moment.  */
    assert(cpu->as == as);

    if (cpu->tcg_as_listener) {
        memory_listener_unregister(cpu->tcg_as_listener);
    } else {
        cpu->tcg_as_listener = g_new0(MemoryListener, 1);
    }
    cpu->tcg_as_listener->commit = tcg_commit;
    memory_listener_register(cpu->tcg_as_listener, as);
}
#endif

471
void cpu_exec_init(CPUArchState *env)
B
bellard 已提交
472
{
473
    CPUState *cpu = ENV_GET_CPU(env);
474
    CPUClass *cc = CPU_GET_CLASS(cpu);
A
Andreas Färber 已提交
475
    CPUState *some_cpu;
476 477 478 479 480 481
    int cpu_index;

#if defined(CONFIG_USER_ONLY)
    cpu_list_lock();
#endif
    cpu_index = 0;
A
Andreas Färber 已提交
482
    CPU_FOREACH(some_cpu) {
483 484
        cpu_index++;
    }
485
    cpu->cpu_index = cpu_index;
486
    cpu->numa_node = 0;
487 488 489
    QTAILQ_INIT(&env->breakpoints);
    QTAILQ_INIT(&env->watchpoints);
#ifndef CONFIG_USER_ONLY
490
    cpu->as = &address_space_memory;
491 492
    cpu->thread_id = qemu_get_thread_id();
#endif
A
Andreas Färber 已提交
493
    QTAILQ_INSERT_TAIL(&cpus, cpu, node);
494 495 496
#if defined(CONFIG_USER_ONLY)
    cpu_list_unlock();
#endif
497 498 499
    if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
        vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
    }
500 501 502
#if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
    register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
                    cpu_save, cpu_load, env);
503
    assert(cc->vmsd == NULL);
504
    assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
505
#endif
506 507 508
    if (cc->vmsd != NULL) {
        vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
    }
B
bellard 已提交
509 510
}

B
bellard 已提交
511
#if defined(TARGET_HAS_ICE)
512
#if defined(CONFIG_USER_ONLY)
513
static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
514 515 516 517
{
    tb_invalidate_phys_page_range(pc, pc + 1, 0);
}
#else
518
static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
519
{
520 521
    hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
    if (phys != -1) {
522
        tb_invalidate_phys_addr(cpu->as,
523
                                phys | (pc & ~TARGET_PAGE_MASK));
524
    }
525
}
B
bellard 已提交
526
#endif
527
#endif /* TARGET_HAS_ICE */
B
bellard 已提交
528

529
#if defined(CONFIG_USER_ONLY)
530
void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
531 532 533 534

{
}

535
int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
536 537 538 539 540
                          int flags, CPUWatchpoint **watchpoint)
{
    return -ENOSYS;
}
#else
541
/* Add a watchpoint.  */
542
int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
543
                          int flags, CPUWatchpoint **watchpoint)
544
{
545
    target_ulong len_mask = ~(len - 1);
546
    CPUWatchpoint *wp;
547

548
    /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
549 550
    if ((len & (len - 1)) || (addr & ~len_mask) ||
            len == 0 || len > TARGET_PAGE_SIZE) {
551 552 553 554
        fprintf(stderr, "qemu: tried to set invalid watchpoint at "
                TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
        return -EINVAL;
    }
555
    wp = g_malloc(sizeof(*wp));
556 557

    wp->vaddr = addr;
558
    wp->len_mask = len_mask;
559 560
    wp->flags = flags;

561
    /* keep all GDB-injected watchpoints in front */
562
    if (flags & BP_GDB)
B
Blue Swirl 已提交
563
        QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
564
    else
B
Blue Swirl 已提交
565
        QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
566 567

    tlb_flush_page(env, addr);
568 569 570 571

    if (watchpoint)
        *watchpoint = wp;
    return 0;
572 573
}

574
/* Remove a specific watchpoint.  */
575
int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
576
                          int flags)
577
{
578
    target_ulong len_mask = ~(len - 1);
579
    CPUWatchpoint *wp;
580

B
Blue Swirl 已提交
581
    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
582
        if (addr == wp->vaddr && len_mask == wp->len_mask
583
                && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
584
            cpu_watchpoint_remove_by_ref(env, wp);
585 586 587
            return 0;
        }
    }
588
    return -ENOENT;
589 590
}

591
/* Remove a specific watchpoint by reference.  */
592
void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
593
{
B
Blue Swirl 已提交
594
    QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
595

596 597
    tlb_flush_page(env, watchpoint->vaddr);

598
    g_free(watchpoint);
599 600 601
}

/* Remove all matching watchpoints.  */
602
void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
603
{
604
    CPUWatchpoint *wp, *next;
605

B
Blue Swirl 已提交
606
    QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
607 608
        if (wp->flags & mask)
            cpu_watchpoint_remove_by_ref(env, wp);
609
    }
610
}
611
#endif
612

613
/* Add a breakpoint.  */
614
int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
615
                          CPUBreakpoint **breakpoint)
B
bellard 已提交
616
{
B
bellard 已提交
617
#if defined(TARGET_HAS_ICE)
618
    CPUBreakpoint *bp;
619

620
    bp = g_malloc(sizeof(*bp));
B
bellard 已提交
621

622 623 624
    bp->pc = pc;
    bp->flags = flags;

625
    /* keep all GDB-injected breakpoints in front */
626
    if (flags & BP_GDB) {
B
Blue Swirl 已提交
627
        QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
628
    } else {
B
Blue Swirl 已提交
629
        QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
630
    }
631

632
    breakpoint_invalidate(ENV_GET_CPU(env), pc);
633

634
    if (breakpoint) {
635
        *breakpoint = bp;
636
    }
B
bellard 已提交
637 638
    return 0;
#else
639
    return -ENOSYS;
B
bellard 已提交
640 641 642
#endif
}

643
/* Remove a specific breakpoint.  */
644
int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
645
{
646
#if defined(TARGET_HAS_ICE)
647 648
    CPUBreakpoint *bp;

B
Blue Swirl 已提交
649
    QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
650 651 652 653
        if (bp->pc == pc && bp->flags == flags) {
            cpu_breakpoint_remove_by_ref(env, bp);
            return 0;
        }
654
    }
655 656 657
    return -ENOENT;
#else
    return -ENOSYS;
658 659 660
#endif
}

661
/* Remove a specific breakpoint by reference.  */
662
void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
B
bellard 已提交
663
{
B
bellard 已提交
664
#if defined(TARGET_HAS_ICE)
B
Blue Swirl 已提交
665
    QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
B
bellard 已提交
666

667
    breakpoint_invalidate(ENV_GET_CPU(env), breakpoint->pc);
668

669
    g_free(breakpoint);
670 671 672 673
#endif
}

/* Remove all matching breakpoints. */
674
void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
675 676
{
#if defined(TARGET_HAS_ICE)
677
    CPUBreakpoint *bp, *next;
678

B
Blue Swirl 已提交
679
    QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
680 681
        if (bp->flags & mask)
            cpu_breakpoint_remove_by_ref(env, bp);
682
    }
B
bellard 已提交
683 684 685
#endif
}

B
bellard 已提交
686 687
/* enable or disable single step mode. EXCP_DEBUG is returned by the
   CPU loop after each instruction */
688
void cpu_single_step(CPUState *cpu, int enabled)
B
bellard 已提交
689
{
B
bellard 已提交
690
#if defined(TARGET_HAS_ICE)
691 692 693
    if (cpu->singlestep_enabled != enabled) {
        cpu->singlestep_enabled = enabled;
        if (kvm_enabled()) {
694
            kvm_update_guest_debug(cpu, 0);
695
        } else {
S
Stuart Brady 已提交
696
            /* must flush all the translated code to avoid inconsistencies */
697
            /* XXX: only flush what is necessary */
698
            CPUArchState *env = cpu->env_ptr;
699 700
            tb_flush(env);
        }
B
bellard 已提交
701 702 703 704
    }
#endif
}

705
void cpu_abort(CPUArchState *env, const char *fmt, ...)
B
bellard 已提交
706
{
707
    CPUState *cpu = ENV_GET_CPU(env);
B
bellard 已提交
708
    va_list ap;
P
pbrook 已提交
709
    va_list ap2;
B
bellard 已提交
710 711

    va_start(ap, fmt);
P
pbrook 已提交
712
    va_copy(ap2, ap);
B
bellard 已提交
713 714 715
    fprintf(stderr, "qemu: fatal: ");
    vfprintf(stderr, fmt, ap);
    fprintf(stderr, "\n");
716
    cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
717 718 719 720
    if (qemu_log_enabled()) {
        qemu_log("qemu: fatal: ");
        qemu_log_vprintf(fmt, ap2);
        qemu_log("\n");
721
        log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
722
        qemu_log_flush();
723
        qemu_log_close();
724
    }
P
pbrook 已提交
725
    va_end(ap2);
726
    va_end(ap);
727 728 729 730 731 732 733 734
#if defined(CONFIG_USER_ONLY)
    {
        struct sigaction act;
        sigfillset(&act.sa_mask);
        act.sa_handler = SIG_DFL;
        sigaction(SIGABRT, &act, NULL);
    }
#endif
B
bellard 已提交
735 736 737
    abort();
}

738
#if !defined(CONFIG_USER_ONLY)
P
Paolo Bonzini 已提交
739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761
static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
{
    RAMBlock *block;

    /* The list is protected by the iothread lock here.  */
    block = ram_list.mru_block;
    if (block && addr - block->offset < block->length) {
        goto found;
    }
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
        if (addr - block->offset < block->length) {
            goto found;
        }
    }

    fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
    abort();

found:
    ram_list.mru_block = block;
    return block;
}

762
static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
J
Juan Quintela 已提交
763
{
P
Paolo Bonzini 已提交
764
    ram_addr_t start1;
765 766 767 768 769
    RAMBlock *block;
    ram_addr_t end;

    end = TARGET_PAGE_ALIGN(start + length);
    start &= TARGET_PAGE_MASK;
J
Juan Quintela 已提交
770

P
Paolo Bonzini 已提交
771 772 773 774
    block = qemu_get_ram_block(start);
    assert(block == qemu_get_ram_block(end - 1));
    start1 = (uintptr_t)block->host + (start - block->offset);
    cpu_tlb_reset_dirty_all(start1, length);
J
Juan Quintela 已提交
775 776
}

P
pbrook 已提交
777
/* Note: start and end must be within the same ram block.  */
778
void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
779
                                     unsigned client)
780 781 782
{
    if (length == 0)
        return;
783
    cpu_physical_memory_clear_dirty_range(start, length, client);
B
bellard 已提交
784

J
Juan Quintela 已提交
785
    if (tcg_enabled()) {
786
        tlb_reset_dirty_range_all(start, length);
P
pbrook 已提交
787
    }
788 789
}

790
static void cpu_physical_memory_set_dirty_tracking(bool enable)
A
aliguori 已提交
791 792 793 794
{
    in_migration = enable;
}

A
Avi Kivity 已提交
795
hwaddr memory_region_section_get_iotlb(CPUArchState *env,
796 797 798 799 800
                                       MemoryRegionSection *section,
                                       target_ulong vaddr,
                                       hwaddr paddr, hwaddr xlat,
                                       int prot,
                                       target_ulong *address)
B
Blue Swirl 已提交
801
{
A
Avi Kivity 已提交
802
    hwaddr iotlb;
B
Blue Swirl 已提交
803 804
    CPUWatchpoint *wp;

805
    if (memory_region_is_ram(section->mr)) {
B
Blue Swirl 已提交
806 807
        /* Normal RAM.  */
        iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
808
            + xlat;
B
Blue Swirl 已提交
809
        if (!section->readonly) {
810
            iotlb |= PHYS_SECTION_NOTDIRTY;
B
Blue Swirl 已提交
811
        } else {
812
            iotlb |= PHYS_SECTION_ROM;
B
Blue Swirl 已提交
813 814
        }
    } else {
815
        iotlb = section - section->address_space->dispatch->map.sections;
816
        iotlb += xlat;
B
Blue Swirl 已提交
817 818 819 820 821 822 823 824
    }

    /* Make accesses to pages with watchpoints go via the
       watchpoint trap routines.  */
    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
        if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
            /* Avoid trapping reads of pages with a write breakpoint. */
            if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
825
                iotlb = PHYS_SECTION_WATCH + paddr;
B
Blue Swirl 已提交
826 827 828 829 830 831 832 833
                *address |= TLB_MMIO;
                break;
            }
        }
    }

    return iotlb;
}
834 835
#endif /* defined(CONFIG_USER_ONLY) */

836
#if !defined(CONFIG_USER_ONLY)
837

A
Anthony Liguori 已提交
838
static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
839
                             uint16_t section);
840
static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
841

842
static void *(*phys_mem_alloc)(size_t size) = qemu_anon_ram_alloc;
843 844 845 846 847 848

/*
 * Set a custom physical guest memory alloator.
 * Accelerators with unusual needs may need this.  Hopefully, we can
 * get rid of it eventually.
 */
849
void phys_mem_set_alloc(void *(*alloc)(size_t))
850 851 852 853
{
    phys_mem_alloc = alloc;
}

854 855
static uint16_t phys_section_add(PhysPageMap *map,
                                 MemoryRegionSection *section)
856
{
857 858 859 860
    /* The physical section number is ORed with a page-aligned
     * pointer to produce the iotlb entries.  Thus it should
     * never overflow into the page-aligned value.
     */
861
    assert(map->sections_nb < TARGET_PAGE_SIZE);
862

863 864 865 866
    if (map->sections_nb == map->sections_nb_alloc) {
        map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
        map->sections = g_renew(MemoryRegionSection, map->sections,
                                map->sections_nb_alloc);
867
    }
868
    map->sections[map->sections_nb] = *section;
P
Paolo Bonzini 已提交
869
    memory_region_ref(section->mr);
870
    return map->sections_nb++;
871 872
}

873 874
static void phys_section_destroy(MemoryRegion *mr)
{
P
Paolo Bonzini 已提交
875 876
    memory_region_unref(mr);

877 878 879 880 881 882 883
    if (mr->subpage) {
        subpage_t *subpage = container_of(mr, subpage_t, iomem);
        memory_region_destroy(&subpage->iomem);
        g_free(subpage);
    }
}

P
Paolo Bonzini 已提交
884
static void phys_sections_free(PhysPageMap *map)
885
{
886 887
    while (map->sections_nb > 0) {
        MemoryRegionSection *section = &map->sections[--map->sections_nb];
888 889
        phys_section_destroy(section->mr);
    }
890 891
    g_free(map->sections);
    g_free(map->nodes);
892 893
}

A
Avi Kivity 已提交
894
static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
895 896
{
    subpage_t *subpage;
A
Avi Kivity 已提交
897
    hwaddr base = section->offset_within_address_space
898
        & TARGET_PAGE_MASK;
899
    MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
900
                                                   d->map.nodes, d->map.sections);
901 902
    MemoryRegionSection subsection = {
        .offset_within_address_space = base,
903
        .size = int128_make64(TARGET_PAGE_SIZE),
904
    };
A
Avi Kivity 已提交
905
    hwaddr start, end;
906

907
    assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
908

909
    if (!(existing->mr->subpage)) {
910
        subpage = subpage_init(d->as, base);
911
        subsection.address_space = d->as;
912
        subsection.mr = &subpage->iomem;
A
Avi Kivity 已提交
913
        phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
914
                      phys_section_add(&d->map, &subsection));
915
    } else {
916
        subpage = container_of(existing->mr, subpage_t, iomem);
917 918
    }
    start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
919
    end = start + int128_get64(section->size) - 1;
920 921
    subpage_register(subpage, start, end,
                     phys_section_add(&d->map, section));
922 923 924
}


925 926
static void register_multipage(AddressSpaceDispatch *d,
                               MemoryRegionSection *section)
927
{
A
Avi Kivity 已提交
928
    hwaddr start_addr = section->offset_within_address_space;
929
    uint16_t section_index = phys_section_add(&d->map, section);
930 931
    uint64_t num_pages = int128_get64(int128_rshift(section->size,
                                                    TARGET_PAGE_BITS));
932

933 934
    assert(num_pages);
    phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
935 936
}

A
Avi Kivity 已提交
937
static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
938
{
939
    AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
940
    AddressSpaceDispatch *d = as->next_dispatch;
941
    MemoryRegionSection now = *section, remain = *section;
942
    Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
943

944 945 946 947
    if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
        uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
                       - now.offset_within_address_space;

948
        now.size = int128_min(int128_make64(left), now.size);
A
Avi Kivity 已提交
949
        register_subpage(d, &now);
950
    } else {
951
        now.size = int128_zero();
952
    }
953 954 955 956
    while (int128_ne(remain.size, now.size)) {
        remain.size = int128_sub(remain.size, now.size);
        remain.offset_within_address_space += int128_get64(now.size);
        remain.offset_within_region += int128_get64(now.size);
957
        now = remain;
958
        if (int128_lt(remain.size, page_size)) {
959
            register_subpage(d, &now);
960
        } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
961
            now.size = page_size;
A
Avi Kivity 已提交
962
            register_subpage(d, &now);
963
        } else {
964
            now.size = int128_and(now.size, int128_neg(page_size));
A
Avi Kivity 已提交
965
            register_multipage(d, &now);
966
        }
967 968 969
    }
}

970 971 972 973 974 975
void qemu_flush_coalesced_mmio_buffer(void)
{
    if (kvm_enabled())
        kvm_flush_coalesced_mmio_buffer();
}

976 977 978 979 980 981 982 983 984 985
void qemu_mutex_lock_ramlist(void)
{
    qemu_mutex_lock(&ram_list.mutex);
}

void qemu_mutex_unlock_ramlist(void)
{
    qemu_mutex_unlock(&ram_list.mutex);
}

986
#ifdef __linux__
987 988 989 990 991 992 993 994 995 996 997

#include <sys/vfs.h>

#define HUGETLBFS_MAGIC       0x958458f6

static long gethugepagesize(const char *path)
{
    struct statfs fs;
    int ret;

    do {
Y
Yoshiaki Tamura 已提交
998
        ret = statfs(path, &fs);
999 1000 1001
    } while (ret != 0 && errno == EINTR);

    if (ret != 0) {
Y
Yoshiaki Tamura 已提交
1002 1003
        perror(path);
        return 0;
1004 1005 1006
    }

    if (fs.f_type != HUGETLBFS_MAGIC)
Y
Yoshiaki Tamura 已提交
1007
        fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1008 1009 1010 1011

    return fs.f_bsize;
}

1012 1013 1014 1015 1016 1017 1018
static sigjmp_buf sigjump;

static void sigbus_handler(int signal)
{
    siglongjmp(sigjump, 1);
}

A
Alex Williamson 已提交
1019 1020 1021
static void *file_ram_alloc(RAMBlock *block,
                            ram_addr_t memory,
                            const char *path)
1022 1023
{
    char *filename;
1024 1025
    char *sanitized_name;
    char *c;
1026 1027 1028 1029 1030 1031
    void *area;
    int fd;
    unsigned long hpagesize;

    hpagesize = gethugepagesize(path);
    if (!hpagesize) {
1032
        goto error;
1033 1034 1035 1036 1037 1038 1039 1040
    }

    if (memory < hpagesize) {
        return NULL;
    }

    if (kvm_enabled() && !kvm_has_sync_mmu()) {
        fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
1041
        goto error;
1042 1043
    }

1044 1045 1046 1047 1048 1049 1050 1051 1052 1053
    /* Make name safe to use with mkstemp by replacing '/' with '_'. */
    sanitized_name = g_strdup(block->mr->name);
    for (c = sanitized_name; *c != '\0'; c++) {
        if (*c == '/')
            *c = '_';
    }

    filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
                               sanitized_name);
    g_free(sanitized_name);
1054 1055 1056

    fd = mkstemp(filename);
    if (fd < 0) {
Y
Yoshiaki Tamura 已提交
1057
        perror("unable to create backing store for hugepages");
1058
        g_free(filename);
1059
        goto error;
1060 1061
    }
    unlink(filename);
1062
    g_free(filename);
1063 1064 1065 1066 1067 1068 1069 1070 1071 1072

    memory = (memory+hpagesize-1) & ~(hpagesize-1);

    /*
     * ftruncate is not supported by hugetlbfs in older
     * hosts, so don't bother bailing out on errors.
     * If anything goes wrong with it under other filesystems,
     * mmap will fail.
     */
    if (ftruncate(fd, memory))
Y
Yoshiaki Tamura 已提交
1073
        perror("ftruncate");
1074 1075 1076

    area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
    if (area == MAP_FAILED) {
Y
Yoshiaki Tamura 已提交
1077 1078
        perror("file_ram_alloc: can't mmap RAM pages");
        close(fd);
1079
        goto error;
1080
    }
1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107

    if (mem_prealloc) {
        int ret, i;
        struct sigaction act, oldact;
        sigset_t set, oldset;

        memset(&act, 0, sizeof(act));
        act.sa_handler = &sigbus_handler;
        act.sa_flags = 0;

        ret = sigaction(SIGBUS, &act, &oldact);
        if (ret) {
            perror("file_ram_alloc: failed to install signal handler");
            exit(1);
        }

        /* unblock SIGBUS */
        sigemptyset(&set);
        sigaddset(&set, SIGBUS);
        pthread_sigmask(SIG_UNBLOCK, &set, &oldset);

        if (sigsetjmp(sigjump, 1)) {
            fprintf(stderr, "file_ram_alloc: failed to preallocate pages\n");
            exit(1);
        }

        /* MAP_POPULATE silently ignores failures */
1108
        for (i = 0; i < (memory/hpagesize); i++) {
1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120
            memset(area + (hpagesize*i), 0, 1);
        }

        ret = sigaction(SIGBUS, &oldact, NULL);
        if (ret) {
            perror("file_ram_alloc: failed to reinstall signal handler");
            exit(1);
        }

        pthread_sigmask(SIG_SETMASK, &oldset, NULL);
    }

A
Alex Williamson 已提交
1121
    block->fd = fd;
1122
    return area;
1123 1124 1125 1126 1127 1128

error:
    if (mem_prealloc) {
        exit(1);
    }
    return NULL;
1129
}
1130 1131 1132 1133 1134 1135 1136 1137
#else
static void *file_ram_alloc(RAMBlock *block,
                            ram_addr_t memory,
                            const char *path)
{
    fprintf(stderr, "-mem-path not supported on this host\n");
    exit(1);
}
1138 1139
#endif

1140
static ram_addr_t find_ram_offset(ram_addr_t size)
A
Alex Williamson 已提交
1141 1142
{
    RAMBlock *block, *next_block;
A
Alex Williamson 已提交
1143
    ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
A
Alex Williamson 已提交
1144

1145 1146
    assert(size != 0); /* it would hand out same offset multiple times */

P
Paolo Bonzini 已提交
1147
    if (QTAILQ_EMPTY(&ram_list.blocks))
A
Alex Williamson 已提交
1148 1149
        return 0;

P
Paolo Bonzini 已提交
1150
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1151
        ram_addr_t end, next = RAM_ADDR_MAX;
A
Alex Williamson 已提交
1152 1153 1154

        end = block->offset + block->length;

P
Paolo Bonzini 已提交
1155
        QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
A
Alex Williamson 已提交
1156 1157 1158 1159 1160
            if (next_block->offset >= end) {
                next = MIN(next, next_block->offset);
            }
        }
        if (next - end >= size && next - end < mingap) {
A
Alex Williamson 已提交
1161
            offset = end;
A
Alex Williamson 已提交
1162 1163 1164
            mingap = next - end;
        }
    }
A
Alex Williamson 已提交
1165 1166 1167 1168 1169 1170 1171

    if (offset == RAM_ADDR_MAX) {
        fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
                (uint64_t)size);
        abort();
    }

A
Alex Williamson 已提交
1172 1173 1174
    return offset;
}

J
Juan Quintela 已提交
1175
ram_addr_t last_ram_offset(void)
1176 1177 1178 1179
{
    RAMBlock *block;
    ram_addr_t last = 0;

P
Paolo Bonzini 已提交
1180
    QTAILQ_FOREACH(block, &ram_list.blocks, next)
1181 1182 1183 1184 1185
        last = MAX(last, block->offset + block->length);

    return last;
}

1186 1187 1188 1189 1190
static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
{
    int ret;

    /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1191 1192
    if (!qemu_opt_get_bool(qemu_get_machine_opts(),
                           "dump-guest-core", true)) {
1193 1194 1195 1196 1197 1198 1199 1200 1201
        ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
        if (ret) {
            perror("qemu_madvise");
            fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
                            "but dump_guest_core=off specified\n");
        }
    }
}

1202
void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1203 1204 1205
{
    RAMBlock *new_block, *block;

1206
    new_block = NULL;
P
Paolo Bonzini 已提交
1207
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1208 1209 1210 1211 1212 1213 1214
        if (block->offset == addr) {
            new_block = block;
            break;
        }
    }
    assert(new_block);
    assert(!new_block->idstr[0]);
1215

1216 1217
    if (dev) {
        char *id = qdev_get_dev_path(dev);
1218 1219
        if (id) {
            snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1220
            g_free(id);
1221 1222 1223 1224
        }
    }
    pstrcat(new_block->idstr, sizeof(new_block->idstr), name);

1225 1226
    /* This assumes the iothread lock is taken here too.  */
    qemu_mutex_lock_ramlist();
P
Paolo Bonzini 已提交
1227
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1228
        if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1229 1230 1231 1232 1233
            fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
                    new_block->idstr);
            abort();
        }
    }
1234
    qemu_mutex_unlock_ramlist();
1235 1236
}

1237 1238
static int memory_try_enable_merging(void *addr, size_t len)
{
1239
    if (!qemu_opt_get_bool(qemu_get_machine_opts(), "mem-merge", true)) {
1240 1241 1242 1243 1244 1245 1246
        /* disabled by the user */
        return 0;
    }

    return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
}

1247 1248 1249
ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
                                   MemoryRegion *mr)
{
1250
    RAMBlock *block, *new_block;
1251 1252 1253
    ram_addr_t old_ram_size, new_ram_size;

    old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1254 1255 1256

    size = TARGET_PAGE_ALIGN(size);
    new_block = g_malloc0(sizeof(*new_block));
1257
    new_block->fd = -1;
1258

1259 1260
    /* This assumes the iothread lock is taken here too.  */
    qemu_mutex_lock_ramlist();
A
Avi Kivity 已提交
1261
    new_block->mr = mr;
J
Jun Nakajima 已提交
1262
    new_block->offset = find_ram_offset(size);
1263 1264
    if (host) {
        new_block->host = host;
H
Huang Ying 已提交
1265
        new_block->flags |= RAM_PREALLOC_MASK;
1266 1267 1268 1269 1270 1271
    } else if (xen_enabled()) {
        if (mem_path) {
            fprintf(stderr, "-mem-path not supported with Xen\n");
            exit(1);
        }
        xen_ram_alloc(new_block->offset, size, mr);
1272 1273
    } else {
        if (mem_path) {
1274 1275 1276 1277 1278 1279 1280 1281 1282 1283
            if (phys_mem_alloc != qemu_anon_ram_alloc) {
                /*
                 * file_ram_alloc() needs to allocate just like
                 * phys_mem_alloc, but we haven't bothered to provide
                 * a hook there.
                 */
                fprintf(stderr,
                        "-mem-path not supported with this accelerator\n");
                exit(1);
            }
1284
            new_block->host = file_ram_alloc(new_block, size, mem_path);
1285 1286
        }
        if (!new_block->host) {
1287
            new_block->host = phys_mem_alloc(size);
1288 1289 1290 1291 1292
            if (!new_block->host) {
                fprintf(stderr, "Cannot set up guest memory '%s': %s\n",
                        new_block->mr->name, strerror(errno));
                exit(1);
            }
1293
            memory_try_enable_merging(new_block->host, size);
1294
        }
1295
    }
P
pbrook 已提交
1296 1297
    new_block->length = size;

1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308
    /* Keep the list sorted from biggest to smallest block.  */
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
        if (block->length < new_block->length) {
            break;
        }
    }
    if (block) {
        QTAILQ_INSERT_BEFORE(block, new_block, next);
    } else {
        QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
    }
1309
    ram_list.mru_block = NULL;
P
pbrook 已提交
1310

U
Umesh Deshpande 已提交
1311
    ram_list.version++;
1312
    qemu_mutex_unlock_ramlist();
U
Umesh Deshpande 已提交
1313

1314 1315 1316
    new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;

    if (new_ram_size > old_ram_size) {
1317 1318 1319 1320 1321 1322
        int i;
        for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
            ram_list.dirty_memory[i] =
                bitmap_zero_extend(ram_list.dirty_memory[i],
                                   old_ram_size, new_ram_size);
       }
1323
    }
1324
    cpu_physical_memory_set_dirty_range(new_block->offset, size);
P
pbrook 已提交
1325

1326
    qemu_ram_setup_dump(new_block->host, size);
1327
    qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
1328
    qemu_madvise(new_block->host, size, QEMU_MADV_DONTFORK);
1329

1330 1331 1332
    if (kvm_enabled())
        kvm_setup_guest_memory(new_block->host, size);

P
pbrook 已提交
1333 1334
    return new_block->offset;
}
B
bellard 已提交
1335

1336
ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1337
{
1338
    return qemu_ram_alloc_from_ptr(size, NULL, mr);
1339 1340
}

1341 1342 1343 1344
void qemu_ram_free_from_ptr(ram_addr_t addr)
{
    RAMBlock *block;

1345 1346
    /* This assumes the iothread lock is taken here too.  */
    qemu_mutex_lock_ramlist();
P
Paolo Bonzini 已提交
1347
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1348
        if (addr == block->offset) {
P
Paolo Bonzini 已提交
1349
            QTAILQ_REMOVE(&ram_list.blocks, block, next);
1350
            ram_list.mru_block = NULL;
U
Umesh Deshpande 已提交
1351
            ram_list.version++;
1352
            g_free(block);
1353
            break;
1354 1355
        }
    }
1356
    qemu_mutex_unlock_ramlist();
1357 1358
}

A
Anthony Liguori 已提交
1359
void qemu_ram_free(ram_addr_t addr)
B
bellard 已提交
1360
{
A
Alex Williamson 已提交
1361 1362
    RAMBlock *block;

1363 1364
    /* This assumes the iothread lock is taken here too.  */
    qemu_mutex_lock_ramlist();
P
Paolo Bonzini 已提交
1365
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
A
Alex Williamson 已提交
1366
        if (addr == block->offset) {
P
Paolo Bonzini 已提交
1367
            QTAILQ_REMOVE(&ram_list.blocks, block, next);
1368
            ram_list.mru_block = NULL;
U
Umesh Deshpande 已提交
1369
            ram_list.version++;
H
Huang Ying 已提交
1370 1371
            if (block->flags & RAM_PREALLOC_MASK) {
                ;
1372 1373
            } else if (xen_enabled()) {
                xen_invalidate_map_cache_entry(block->host);
1374
#ifndef _WIN32
1375 1376 1377
            } else if (block->fd >= 0) {
                munmap(block->host, block->length);
                close(block->fd);
1378
#endif
A
Alex Williamson 已提交
1379
            } else {
1380
                qemu_anon_ram_free(block->host, block->length);
A
Alex Williamson 已提交
1381
            }
1382
            g_free(block);
1383
            break;
A
Alex Williamson 已提交
1384 1385
        }
    }
1386
    qemu_mutex_unlock_ramlist();
A
Alex Williamson 已提交
1387

B
bellard 已提交
1388 1389
}

H
Huang Ying 已提交
1390 1391 1392 1393 1394 1395 1396 1397
#ifndef _WIN32
void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
{
    RAMBlock *block;
    ram_addr_t offset;
    int flags;
    void *area, *vaddr;

P
Paolo Bonzini 已提交
1398
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
H
Huang Ying 已提交
1399 1400 1401 1402 1403
        offset = addr - block->offset;
        if (offset < block->length) {
            vaddr = block->host + offset;
            if (block->flags & RAM_PREALLOC_MASK) {
                ;
1404 1405
            } else if (xen_enabled()) {
                abort();
H
Huang Ying 已提交
1406 1407 1408
            } else {
                flags = MAP_FIXED;
                munmap(vaddr, length);
1409
                if (block->fd >= 0) {
H
Huang Ying 已提交
1410
#ifdef MAP_POPULATE
1411 1412
                    flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
                        MAP_PRIVATE;
1413
#else
1414
                    flags |= MAP_PRIVATE;
H
Huang Ying 已提交
1415
#endif
1416 1417
                    area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
                                flags, block->fd, offset);
H
Huang Ying 已提交
1418
                } else {
1419 1420 1421 1422 1423 1424 1425
                    /*
                     * Remap needs to match alloc.  Accelerators that
                     * set phys_mem_alloc never remap.  If they did,
                     * we'd need a remap hook here.
                     */
                    assert(phys_mem_alloc == qemu_anon_ram_alloc);

H
Huang Ying 已提交
1426 1427 1428 1429 1430
                    flags |= MAP_PRIVATE | MAP_ANONYMOUS;
                    area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
                                flags, -1, 0);
                }
                if (area != vaddr) {
1431 1432
                    fprintf(stderr, "Could not remap addr: "
                            RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
H
Huang Ying 已提交
1433 1434 1435
                            length, addr);
                    exit(1);
                }
1436
                memory_try_enable_merging(vaddr, length);
1437
                qemu_ram_setup_dump(vaddr, length);
H
Huang Ying 已提交
1438 1439 1440 1441 1442 1443 1444
            }
            return;
        }
    }
}
#endif /* !_WIN32 */

1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456
/* Return a host pointer to ram allocated with qemu_ram_alloc.
   With the exception of the softmmu code in this file, this should
   only be used for local memory (e.g. video ram) that the device owns,
   and knows it isn't going to access beyond the end of the block.

   It should not be used for general purpose DMA.
   Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
 */
void *qemu_get_ram_ptr(ram_addr_t addr)
{
    RAMBlock *block = qemu_get_ram_block(addr);

1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469
    if (xen_enabled()) {
        /* We need to check if the requested address is in the RAM
         * because we don't want to map the entire memory in QEMU.
         * In that case just map until the end of the page.
         */
        if (block->offset == 0) {
            return xen_map_cache(addr, 0, 0);
        } else if (block->host == NULL) {
            block->host =
                xen_map_cache(block->offset, block->length, 1);
        }
    }
    return block->host + (addr - block->offset);
1470 1471
}

1472 1473
/* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
 * but takes a size argument */
1474
static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1475
{
1476 1477 1478
    if (*size == 0) {
        return NULL;
    }
1479
    if (xen_enabled()) {
J
Jan Kiszka 已提交
1480
        return xen_map_cache(addr, *size, 1);
1481
    } else {
1482 1483
        RAMBlock *block;

P
Paolo Bonzini 已提交
1484
        QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496
            if (addr - block->offset < block->length) {
                if (addr - block->offset + *size > block->length)
                    *size = block->length - addr + block->offset;
                return block->host + (addr - block->offset);
            }
        }

        fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
        abort();
    }
}

1497 1498
/* Some of the softmmu routines need to translate from a host pointer
   (typically a TLB entry) back to a ram offset.  */
1499
MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
P
pbrook 已提交
1500
{
P
pbrook 已提交
1501 1502 1503
    RAMBlock *block;
    uint8_t *host = ptr;

1504
    if (xen_enabled()) {
J
Jan Kiszka 已提交
1505
        *ram_addr = xen_ram_addr_from_mapcache(ptr);
1506
        return qemu_get_ram_block(*ram_addr)->mr;
1507 1508
    }

1509 1510 1511 1512 1513
    block = ram_list.mru_block;
    if (block && block->host && host - block->host < block->length) {
        goto found;
    }

P
Paolo Bonzini 已提交
1514
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
J
Jun Nakajima 已提交
1515 1516 1517 1518
        /* This case append when the block is not mapped. */
        if (block->host == NULL) {
            continue;
        }
A
Alex Williamson 已提交
1519
        if (host - block->host < block->length) {
1520
            goto found;
A
Alex Williamson 已提交
1521
        }
P
pbrook 已提交
1522
    }
J
Jun Nakajima 已提交
1523

1524
    return NULL;
1525 1526 1527

found:
    *ram_addr = block->offset + (host - block->host);
1528
    return block->mr;
M
Marcelo Tosatti 已提交
1529
}
A
Alex Williamson 已提交
1530

A
Avi Kivity 已提交
1531
static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1532
                               uint64_t val, unsigned size)
1533
{
1534
    if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1535
        tb_invalidate_phys_page_fast(ram_addr, size);
1536
    }
1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548
    switch (size) {
    case 1:
        stb_p(qemu_get_ram_ptr(ram_addr), val);
        break;
    case 2:
        stw_p(qemu_get_ram_ptr(ram_addr), val);
        break;
    case 4:
        stl_p(qemu_get_ram_ptr(ram_addr), val);
        break;
    default:
        abort();
1549
    }
1550 1551
    cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_MIGRATION);
    cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_VGA);
B
bellard 已提交
1552 1553
    /* we remove the notdirty callback only if the code has been
       flushed */
1554
    if (!cpu_physical_memory_is_clean(ram_addr)) {
1555
        CPUArchState *env = current_cpu->env_ptr;
1556
        tlb_set_dirty(env, current_cpu->mem_io_vaddr);
1557
    }
1558 1559
}

1560 1561 1562 1563 1564 1565
static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
                                 unsigned size, bool is_write)
{
    return is_write;
}

1566 1567
static const MemoryRegionOps notdirty_mem_ops = {
    .write = notdirty_mem_write,
1568
    .valid.accepts = notdirty_mem_accepts,
1569
    .endianness = DEVICE_NATIVE_ENDIAN,
1570 1571
};

P
pbrook 已提交
1572
/* Generate a debug exception if a watchpoint has been hit.  */
1573
static void check_watchpoint(int offset, int len_mask, int flags)
P
pbrook 已提交
1574
{
1575 1576
    CPUState *cpu = current_cpu;
    CPUArchState *env = cpu->env_ptr;
1577
    target_ulong pc, cs_base;
P
pbrook 已提交
1578
    target_ulong vaddr;
1579
    CPUWatchpoint *wp;
1580
    int cpu_flags;
P
pbrook 已提交
1581

1582 1583 1584 1585
    if (env->watchpoint_hit) {
        /* We re-entered the check after replacing the TB. Now raise
         * the debug interrupt so that is will trigger after the
         * current instruction. */
1586
        cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
1587 1588
        return;
    }
1589
    vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
B
Blue Swirl 已提交
1590
    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1591 1592
        if ((vaddr == (wp->vaddr & len_mask) ||
             (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1593 1594 1595
            wp->flags |= BP_WATCHPOINT_HIT;
            if (!env->watchpoint_hit) {
                env->watchpoint_hit = wp;
B
Blue Swirl 已提交
1596
                tb_check_watchpoint(env);
1597 1598
                if (wp->flags & BP_STOP_BEFORE_ACCESS) {
                    env->exception_index = EXCP_DEBUG;
1599
                    cpu_loop_exit(env);
1600 1601 1602
                } else {
                    cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
                    tb_gen_code(env, pc, cs_base, cpu_flags, 1);
1603
                    cpu_resume_from_signal(env, NULL);
1604
                }
1605
            }
1606 1607
        } else {
            wp->flags &= ~BP_WATCHPOINT_HIT;
P
pbrook 已提交
1608 1609 1610 1611
        }
    }
}

1612 1613 1614
/* Watchpoint access routines.  Watchpoints are inserted using TLB tricks,
   so these check for a hit then pass through to the normal out-of-line
   phys routines.  */
A
Avi Kivity 已提交
1615
static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1616
                               unsigned size)
1617
{
1618 1619
    check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
    switch (size) {
1620
    case 1: return ldub_phys(&address_space_memory, addr);
1621
    case 2: return lduw_phys(&address_space_memory, addr);
1622
    case 4: return ldl_phys(&address_space_memory, addr);
1623 1624
    default: abort();
    }
1625 1626
}

A
Avi Kivity 已提交
1627
static void watch_mem_write(void *opaque, hwaddr addr,
1628
                            uint64_t val, unsigned size)
1629
{
1630 1631
    check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
    switch (size) {
1632
    case 1:
1633
        stb_phys(&address_space_memory, addr, val);
1634 1635
        break;
    case 2:
1636
        stw_phys(&address_space_memory, addr, val);
1637 1638
        break;
    case 4:
1639
        stl_phys(&address_space_memory, addr, val);
1640
        break;
1641 1642
    default: abort();
    }
1643 1644
}

1645 1646 1647 1648
static const MemoryRegionOps watch_mem_ops = {
    .read = watch_mem_read,
    .write = watch_mem_write,
    .endianness = DEVICE_NATIVE_ENDIAN,
1649 1650
};

A
Avi Kivity 已提交
1651
static uint64_t subpage_read(void *opaque, hwaddr addr,
1652
                             unsigned len)
1653
{
1654 1655
    subpage_t *subpage = opaque;
    uint8_t buf[4];
1656

1657
#if defined(DEBUG_SUBPAGE)
A
Amos Kong 已提交
1658
    printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
1659
           subpage, len, addr);
1660
#endif
1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671
    address_space_read(subpage->as, addr + subpage->base, buf, len);
    switch (len) {
    case 1:
        return ldub_p(buf);
    case 2:
        return lduw_p(buf);
    case 4:
        return ldl_p(buf);
    default:
        abort();
    }
1672 1673
}

A
Avi Kivity 已提交
1674
static void subpage_write(void *opaque, hwaddr addr,
1675
                          uint64_t value, unsigned len)
1676
{
1677 1678 1679
    subpage_t *subpage = opaque;
    uint8_t buf[4];

1680
#if defined(DEBUG_SUBPAGE)
A
Amos Kong 已提交
1681
    printf("%s: subpage %p len %u addr " TARGET_FMT_plx
1682 1683
           " value %"PRIx64"\n",
           __func__, subpage, len, addr, value);
1684
#endif
1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698
    switch (len) {
    case 1:
        stb_p(buf, value);
        break;
    case 2:
        stw_p(buf, value);
        break;
    case 4:
        stl_p(buf, value);
        break;
    default:
        abort();
    }
    address_space_write(subpage->as, addr + subpage->base, buf, len);
1699 1700
}

1701
static bool subpage_accepts(void *opaque, hwaddr addr,
A
Amos Kong 已提交
1702
                            unsigned len, bool is_write)
1703
{
1704
    subpage_t *subpage = opaque;
1705
#if defined(DEBUG_SUBPAGE)
A
Amos Kong 已提交
1706
    printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
1707
           __func__, subpage, is_write ? 'w' : 'r', len, addr);
1708 1709
#endif

1710
    return address_space_access_valid(subpage->as, addr + subpage->base,
A
Amos Kong 已提交
1711
                                      len, is_write);
1712 1713
}

1714 1715 1716
static const MemoryRegionOps subpage_ops = {
    .read = subpage_read,
    .write = subpage_write,
1717
    .valid.accepts = subpage_accepts,
1718
    .endianness = DEVICE_NATIVE_ENDIAN,
1719 1720
};

A
Anthony Liguori 已提交
1721
static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1722
                             uint16_t section)
1723 1724 1725 1726 1727 1728 1729 1730
{
    int idx, eidx;

    if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
        return -1;
    idx = SUBPAGE_IDX(start);
    eidx = SUBPAGE_IDX(end);
#if defined(DEBUG_SUBPAGE)
A
Amos Kong 已提交
1731 1732
    printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
           __func__, mmio, start, end, idx, eidx, section);
1733 1734
#endif
    for (; idx <= eidx; idx++) {
1735
        mmio->sub_section[idx] = section;
1736 1737 1738 1739 1740
    }

    return 0;
}

1741
static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
1742
{
A
Anthony Liguori 已提交
1743
    subpage_t *mmio;
1744

1745
    mmio = g_malloc0(sizeof(subpage_t));
1746

1747
    mmio->as = as;
1748
    mmio->base = base;
1749
    memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
1750
                          "subpage", TARGET_PAGE_SIZE);
A
Avi Kivity 已提交
1751
    mmio->iomem.subpage = true;
1752
#if defined(DEBUG_SUBPAGE)
A
Amos Kong 已提交
1753 1754
    printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
           mmio, base, TARGET_PAGE_SIZE);
1755
#endif
1756
    subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
1757 1758 1759 1760

    return mmio;
}

1761
static uint16_t dummy_section(PhysPageMap *map, MemoryRegion *mr)
1762 1763
{
    MemoryRegionSection section = {
1764
        .address_space = &address_space_memory,
1765 1766 1767
        .mr = mr,
        .offset_within_address_space = 0,
        .offset_within_region = 0,
1768
        .size = int128_2_64(),
1769 1770
    };

1771
    return phys_section_add(map, &section);
1772 1773
}

1774
MemoryRegion *iotlb_to_region(AddressSpace *as, hwaddr index)
1775
{
1776
    return as->dispatch->map.sections[index & ~TARGET_PAGE_MASK].mr;
1777 1778
}

A
Avi Kivity 已提交
1779 1780
static void io_mem_init(void)
{
1781 1782
    memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, "rom", UINT64_MAX);
    memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
1783
                          "unassigned", UINT64_MAX);
1784
    memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
1785
                          "notdirty", UINT64_MAX);
1786
    memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
1787
                          "watch", UINT64_MAX);
A
Avi Kivity 已提交
1788 1789
}

A
Avi Kivity 已提交
1790
static void mem_begin(MemoryListener *listener)
1791 1792
{
    AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803
    AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
    uint16_t n;

    n = dummy_section(&d->map, &io_mem_unassigned);
    assert(n == PHYS_SECTION_UNASSIGNED);
    n = dummy_section(&d->map, &io_mem_notdirty);
    assert(n == PHYS_SECTION_NOTDIRTY);
    n = dummy_section(&d->map, &io_mem_rom);
    assert(n == PHYS_SECTION_ROM);
    n = dummy_section(&d->map, &io_mem_watch);
    assert(n == PHYS_SECTION_WATCH);
1804

M
Michael S. Tsirkin 已提交
1805
    d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
1806 1807 1808 1809 1810
    d->as = as;
    as->next_dispatch = d;
}

static void mem_commit(MemoryListener *listener)
A
Avi Kivity 已提交
1811
{
1812
    AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1813 1814 1815
    AddressSpaceDispatch *cur = as->dispatch;
    AddressSpaceDispatch *next = as->next_dispatch;

1816
    phys_page_compact_all(next, next->map.nodes_nb);
1817

1818
    as->dispatch = next;
1819

1820 1821 1822 1823
    if (cur) {
        phys_sections_free(&cur->map);
        g_free(cur);
    }
1824 1825
}

1826
static void tcg_commit(MemoryListener *listener)
1827
{
1828
    CPUState *cpu;
1829 1830 1831 1832

    /* since each CPU stores ram addresses in its TLB cache, we must
       reset the modified entries */
    /* XXX: slow ! */
A
Andreas Färber 已提交
1833
    CPU_FOREACH(cpu) {
1834 1835
        CPUArchState *env = cpu->env_ptr;

1836 1837 1838 1839 1840
        /* FIXME: Disentangle the cpu.h circular files deps so we can
           directly get the right CPU from listener.  */
        if (cpu->tcg_as_listener != listener) {
            continue;
        }
1841 1842
        tlb_flush(env, 1);
    }
1843 1844
}

1845 1846
static void core_log_global_start(MemoryListener *listener)
{
1847
    cpu_physical_memory_set_dirty_tracking(true);
1848 1849 1850 1851
}

static void core_log_global_stop(MemoryListener *listener)
{
1852
    cpu_physical_memory_set_dirty_tracking(false);
1853 1854 1855 1856 1857
}

static MemoryListener core_memory_listener = {
    .log_global_start = core_log_global_start,
    .log_global_stop = core_log_global_stop,
A
Avi Kivity 已提交
1858
    .priority = 1,
1859 1860
};

A
Avi Kivity 已提交
1861 1862
void address_space_init_dispatch(AddressSpace *as)
{
1863
    as->dispatch = NULL;
1864
    as->dispatch_listener = (MemoryListener) {
A
Avi Kivity 已提交
1865
        .begin = mem_begin,
1866
        .commit = mem_commit,
A
Avi Kivity 已提交
1867 1868 1869 1870
        .region_add = mem_add,
        .region_nop = mem_add,
        .priority = 0,
    };
1871
    memory_listener_register(&as->dispatch_listener, as);
A
Avi Kivity 已提交
1872 1873
}

A
Avi Kivity 已提交
1874 1875 1876 1877
void address_space_destroy_dispatch(AddressSpace *as)
{
    AddressSpaceDispatch *d = as->dispatch;

1878
    memory_listener_unregister(&as->dispatch_listener);
A
Avi Kivity 已提交
1879 1880 1881 1882
    g_free(d);
    as->dispatch = NULL;
}

A
Avi Kivity 已提交
1883 1884
static void memory_map_init(void)
{
1885
    system_memory = g_malloc(sizeof(*system_memory));
1886

1887
    memory_region_init(system_memory, NULL, "system", UINT64_MAX);
1888
    address_space_init(&address_space_memory, system_memory, "memory");
1889

1890
    system_io = g_malloc(sizeof(*system_io));
1891 1892
    memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
                          65536);
1893
    address_space_init(&address_space_io, system_io, "I/O");
1894

1895
    memory_listener_register(&core_memory_listener, &address_space_memory);
A
Avi Kivity 已提交
1896 1897 1898 1899 1900 1901 1902
}

MemoryRegion *get_system_memory(void)
{
    return system_memory;
}

1903 1904 1905 1906 1907
MemoryRegion *get_system_io(void)
{
    return system_io;
}

1908 1909
#endif /* !defined(CONFIG_USER_ONLY) */

B
bellard 已提交
1910 1911
/* physical memory access (slow version, mainly for debug) */
#if defined(CONFIG_USER_ONLY)
1912
int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
P
Paul Brook 已提交
1913
                        uint8_t *buf, int len, int is_write)
B
bellard 已提交
1914 1915 1916
{
    int l, flags;
    target_ulong page;
1917
    void * p;
B
bellard 已提交
1918 1919 1920 1921 1922 1923 1924 1925

    while (len > 0) {
        page = addr & TARGET_PAGE_MASK;
        l = (page + TARGET_PAGE_SIZE) - addr;
        if (l > len)
            l = len;
        flags = page_get_flags(page);
        if (!(flags & PAGE_VALID))
P
Paul Brook 已提交
1926
            return -1;
B
bellard 已提交
1927 1928
        if (is_write) {
            if (!(flags & PAGE_WRITE))
P
Paul Brook 已提交
1929
                return -1;
1930
            /* XXX: this code should not depend on lock_user */
A
aurel32 已提交
1931
            if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
P
Paul Brook 已提交
1932
                return -1;
A
aurel32 已提交
1933 1934
            memcpy(p, buf, l);
            unlock_user(p, addr, l);
B
bellard 已提交
1935 1936
        } else {
            if (!(flags & PAGE_READ))
P
Paul Brook 已提交
1937
                return -1;
1938
            /* XXX: this code should not depend on lock_user */
A
aurel32 已提交
1939
            if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
P
Paul Brook 已提交
1940
                return -1;
A
aurel32 已提交
1941
            memcpy(buf, p, l);
A
aurel32 已提交
1942
            unlock_user(p, addr, 0);
B
bellard 已提交
1943 1944 1945 1946 1947
        }
        len -= l;
        buf += l;
        addr += l;
    }
P
Paul Brook 已提交
1948
    return 0;
B
bellard 已提交
1949
}
B
bellard 已提交
1950

B
bellard 已提交
1951
#else
1952

A
Avi Kivity 已提交
1953 1954
static void invalidate_and_set_dirty(hwaddr addr,
                                     hwaddr length)
1955
{
1956
    if (cpu_physical_memory_is_clean(addr)) {
1957 1958 1959
        /* invalidate code */
        tb_invalidate_phys_page_range(addr, addr + length, 0);
        /* set dirty bit */
1960 1961
        cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_VGA);
        cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_MIGRATION);
1962
    }
1963
    xen_modified_memory(addr, length);
1964 1965
}

1966
static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
1967
{
1968
    unsigned access_size_max = mr->ops->valid.max_access_size;
1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981

    /* Regions are assumed to support 1-4 byte accesses unless
       otherwise specified.  */
    if (access_size_max == 0) {
        access_size_max = 4;
    }

    /* Bound the maximum access by the alignment of the address.  */
    if (!mr->ops->impl.unaligned) {
        unsigned align_size_max = addr & -addr;
        if (align_size_max != 0 && align_size_max < access_size_max) {
            access_size_max = align_size_max;
        }
1982
    }
1983 1984 1985 1986

    /* Don't attempt accesses larger than the maximum.  */
    if (l > access_size_max) {
        l = access_size_max;
1987
    }
1988 1989 1990
    if (l & (l - 1)) {
        l = 1 << (qemu_fls(l) - 1);
    }
1991 1992

    return l;
1993 1994
}

1995
bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
A
Avi Kivity 已提交
1996
                      int len, bool is_write)
B
bellard 已提交
1997
{
1998
    hwaddr l;
B
bellard 已提交
1999
    uint8_t *ptr;
2000
    uint64_t val;
2001
    hwaddr addr1;
2002
    MemoryRegion *mr;
2003
    bool error = false;
2004

B
bellard 已提交
2005
    while (len > 0) {
2006
        l = len;
2007
        mr = address_space_translate(as, addr, &addr1, &l, is_write);
2008

B
bellard 已提交
2009
        if (is_write) {
2010 2011
            if (!memory_access_is_direct(mr, is_write)) {
                l = memory_access_size(mr, l, addr1);
2012
                /* XXX: could force current_cpu to NULL to avoid
B
bellard 已提交
2013
                   potential bugs */
2014 2015 2016 2017 2018 2019 2020
                switch (l) {
                case 8:
                    /* 64 bit write access */
                    val = ldq_p(buf);
                    error |= io_mem_write(mr, addr1, val, 8);
                    break;
                case 4:
B
bellard 已提交
2021
                    /* 32 bit write access */
B
bellard 已提交
2022
                    val = ldl_p(buf);
2023
                    error |= io_mem_write(mr, addr1, val, 4);
2024 2025
                    break;
                case 2:
B
bellard 已提交
2026
                    /* 16 bit write access */
B
bellard 已提交
2027
                    val = lduw_p(buf);
2028
                    error |= io_mem_write(mr, addr1, val, 2);
2029 2030
                    break;
                case 1:
B
bellard 已提交
2031
                    /* 8 bit write access */
B
bellard 已提交
2032
                    val = ldub_p(buf);
2033
                    error |= io_mem_write(mr, addr1, val, 1);
2034 2035 2036
                    break;
                default:
                    abort();
B
bellard 已提交
2037
                }
2038
            } else {
2039
                addr1 += memory_region_get_ram_addr(mr);
B
bellard 已提交
2040
                /* RAM case */
P
pbrook 已提交
2041
                ptr = qemu_get_ram_ptr(addr1);
B
bellard 已提交
2042
                memcpy(ptr, buf, l);
2043
                invalidate_and_set_dirty(addr1, l);
B
bellard 已提交
2044 2045
            }
        } else {
2046
            if (!memory_access_is_direct(mr, is_write)) {
B
bellard 已提交
2047
                /* I/O case */
2048
                l = memory_access_size(mr, l, addr1);
2049 2050 2051 2052 2053 2054 2055
                switch (l) {
                case 8:
                    /* 64 bit read access */
                    error |= io_mem_read(mr, addr1, &val, 8);
                    stq_p(buf, val);
                    break;
                case 4:
B
bellard 已提交
2056
                    /* 32 bit read access */
2057
                    error |= io_mem_read(mr, addr1, &val, 4);
B
bellard 已提交
2058
                    stl_p(buf, val);
2059 2060
                    break;
                case 2:
B
bellard 已提交
2061
                    /* 16 bit read access */
2062
                    error |= io_mem_read(mr, addr1, &val, 2);
B
bellard 已提交
2063
                    stw_p(buf, val);
2064 2065
                    break;
                case 1:
B
bellard 已提交
2066
                    /* 8 bit read access */
2067
                    error |= io_mem_read(mr, addr1, &val, 1);
B
bellard 已提交
2068
                    stb_p(buf, val);
2069 2070 2071
                    break;
                default:
                    abort();
B
bellard 已提交
2072 2073 2074
                }
            } else {
                /* RAM case */
2075
                ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2076
                memcpy(buf, ptr, l);
B
bellard 已提交
2077 2078 2079 2080 2081 2082
            }
        }
        len -= l;
        buf += l;
        addr += l;
    }
2083 2084

    return error;
B
bellard 已提交
2085
}
B
bellard 已提交
2086

2087
bool address_space_write(AddressSpace *as, hwaddr addr,
A
Avi Kivity 已提交
2088 2089
                         const uint8_t *buf, int len)
{
2090
    return address_space_rw(as, addr, (uint8_t *)buf, len, true);
A
Avi Kivity 已提交
2091 2092
}

2093
bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
A
Avi Kivity 已提交
2094
{
2095
    return address_space_rw(as, addr, buf, len, false);
A
Avi Kivity 已提交
2096 2097 2098
}


A
Avi Kivity 已提交
2099
void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
A
Avi Kivity 已提交
2100 2101
                            int len, int is_write)
{
2102
    address_space_rw(&address_space_memory, addr, buf, len, is_write);
A
Avi Kivity 已提交
2103 2104
}

2105 2106 2107 2108 2109
enum write_rom_type {
    WRITE_DATA,
    FLUSH_CACHE,
};

2110
static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2111
    hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
B
bellard 已提交
2112
{
2113
    hwaddr l;
B
bellard 已提交
2114
    uint8_t *ptr;
2115
    hwaddr addr1;
2116
    MemoryRegion *mr;
2117

B
bellard 已提交
2118
    while (len > 0) {
2119
        l = len;
2120
        mr = address_space_translate(as, addr, &addr1, &l, true);
2121

2122 2123
        if (!(memory_region_is_ram(mr) ||
              memory_region_is_romd(mr))) {
B
bellard 已提交
2124 2125
            /* do nothing */
        } else {
2126
            addr1 += memory_region_get_ram_addr(mr);
B
bellard 已提交
2127
            /* ROM/RAM case */
P
pbrook 已提交
2128
            ptr = qemu_get_ram_ptr(addr1);
2129 2130 2131 2132 2133 2134 2135 2136 2137
            switch (type) {
            case WRITE_DATA:
                memcpy(ptr, buf, l);
                invalidate_and_set_dirty(addr1, l);
                break;
            case FLUSH_CACHE:
                flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
                break;
            }
B
bellard 已提交
2138 2139 2140 2141 2142 2143 2144
        }
        len -= l;
        buf += l;
        addr += l;
    }
}

2145
/* used for ROM loading : can write in RAM and ROM */
2146
void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2147 2148
                                   const uint8_t *buf, int len)
{
2149
    cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163
}

void cpu_flush_icache_range(hwaddr start, int len)
{
    /*
     * This function should do the same thing as an icache flush that was
     * triggered from within the guest. For TCG we are always cache coherent,
     * so there is no need to flush anything. For KVM / Xen we need to flush
     * the host's instruction cache at least.
     */
    if (tcg_enabled()) {
        return;
    }

2164 2165
    cpu_physical_memory_write_rom_internal(&address_space_memory,
                                           start, NULL, len, FLUSH_CACHE);
2166 2167
}

2168
typedef struct {
2169
    MemoryRegion *mr;
2170
    void *buffer;
A
Avi Kivity 已提交
2171 2172
    hwaddr addr;
    hwaddr len;
2173 2174 2175 2176
} BounceBuffer;

static BounceBuffer bounce;

2177 2178 2179
typedef struct MapClient {
    void *opaque;
    void (*callback)(void *opaque);
B
Blue Swirl 已提交
2180
    QLIST_ENTRY(MapClient) link;
2181 2182
} MapClient;

B
Blue Swirl 已提交
2183 2184
static QLIST_HEAD(map_client_list, MapClient) map_client_list
    = QLIST_HEAD_INITIALIZER(map_client_list);
2185 2186 2187

void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
{
2188
    MapClient *client = g_malloc(sizeof(*client));
2189 2190 2191

    client->opaque = opaque;
    client->callback = callback;
B
Blue Swirl 已提交
2192
    QLIST_INSERT_HEAD(&map_client_list, client, link);
2193 2194 2195
    return client;
}

B
Blue Swirl 已提交
2196
static void cpu_unregister_map_client(void *_client)
2197 2198 2199
{
    MapClient *client = (MapClient *)_client;

B
Blue Swirl 已提交
2200
    QLIST_REMOVE(client, link);
2201
    g_free(client);
2202 2203 2204 2205 2206 2207
}

static void cpu_notify_map_clients(void)
{
    MapClient *client;

B
Blue Swirl 已提交
2208 2209
    while (!QLIST_EMPTY(&map_client_list)) {
        client = QLIST_FIRST(&map_client_list);
2210
        client->callback(client->opaque);
2211
        cpu_unregister_map_client(client);
2212 2213 2214
    }
}

2215 2216
bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
{
2217
    MemoryRegion *mr;
2218 2219 2220 2221
    hwaddr l, xlat;

    while (len > 0) {
        l = len;
2222 2223 2224 2225
        mr = address_space_translate(as, addr, &xlat, &l, is_write);
        if (!memory_access_is_direct(mr, is_write)) {
            l = memory_access_size(mr, l, addr);
            if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2226 2227 2228 2229 2230 2231 2232 2233 2234 2235
                return false;
            }
        }

        len -= l;
        addr += l;
    }
    return true;
}

2236 2237 2238 2239
/* Map a physical memory region into a host virtual address.
 * May map a subset of the requested range, given by and returned in *plen.
 * May return NULL if resources needed to perform the mapping are exhausted.
 * Use only for reads OR writes - not for read-modify-write operations.
2240 2241
 * Use cpu_register_map_client() to know when retrying the map operation is
 * likely to succeed.
2242
 */
A
Avi Kivity 已提交
2243
void *address_space_map(AddressSpace *as,
A
Avi Kivity 已提交
2244 2245
                        hwaddr addr,
                        hwaddr *plen,
A
Avi Kivity 已提交
2246
                        bool is_write)
2247
{
A
Avi Kivity 已提交
2248
    hwaddr len = *plen;
2249 2250 2251 2252
    hwaddr done = 0;
    hwaddr l, xlat, base;
    MemoryRegion *mr, *this_mr;
    ram_addr_t raddr;
2253

2254 2255 2256
    if (len == 0) {
        return NULL;
    }
2257

2258 2259 2260 2261 2262
    l = len;
    mr = address_space_translate(as, addr, &xlat, &l, is_write);
    if (!memory_access_is_direct(mr, is_write)) {
        if (bounce.buffer) {
            return NULL;
2263
        }
2264 2265 2266
        /* Avoid unbounded allocations */
        l = MIN(l, TARGET_PAGE_SIZE);
        bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2267 2268
        bounce.addr = addr;
        bounce.len = l;
2269 2270 2271

        memory_region_ref(mr);
        bounce.mr = mr;
2272 2273
        if (!is_write) {
            address_space_read(as, addr, bounce.buffer, l);
2274
        }
2275

2276 2277 2278 2279 2280 2281 2282 2283
        *plen = l;
        return bounce.buffer;
    }

    base = xlat;
    raddr = memory_region_get_ram_addr(mr);

    for (;;) {
2284 2285
        len -= l;
        addr += l;
2286 2287 2288 2289 2290 2291 2292 2293 2294 2295
        done += l;
        if (len == 0) {
            break;
        }

        l = len;
        this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
        if (this_mr != mr || xlat != base + done) {
            break;
        }
2296
    }
2297

2298
    memory_region_ref(mr);
2299 2300
    *plen = done;
    return qemu_ram_ptr_length(raddr + base, plen);
2301 2302
}

A
Avi Kivity 已提交
2303
/* Unmaps a memory region previously mapped by address_space_map().
2304 2305 2306
 * Will also mark the memory as dirty if is_write == 1.  access_len gives
 * the amount of memory that was actually read or written by the caller.
 */
A
Avi Kivity 已提交
2307 2308
void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
                         int is_write, hwaddr access_len)
2309 2310
{
    if (buffer != bounce.buffer) {
2311 2312 2313 2314 2315
        MemoryRegion *mr;
        ram_addr_t addr1;

        mr = qemu_ram_addr_from_host(buffer, &addr1);
        assert(mr != NULL);
2316 2317 2318 2319 2320 2321
        if (is_write) {
            while (access_len) {
                unsigned l;
                l = TARGET_PAGE_SIZE;
                if (l > access_len)
                    l = access_len;
2322
                invalidate_and_set_dirty(addr1, l);
2323 2324 2325 2326
                addr1 += l;
                access_len -= l;
            }
        }
2327
        if (xen_enabled()) {
J
Jan Kiszka 已提交
2328
            xen_invalidate_map_cache_entry(buffer);
A
Anthony PERARD 已提交
2329
        }
2330
        memory_region_unref(mr);
2331 2332 2333
        return;
    }
    if (is_write) {
A
Avi Kivity 已提交
2334
        address_space_write(as, bounce.addr, bounce.buffer, access_len);
2335
    }
2336
    qemu_vfree(bounce.buffer);
2337
    bounce.buffer = NULL;
2338
    memory_region_unref(bounce.mr);
2339
    cpu_notify_map_clients();
2340
}
B
bellard 已提交
2341

A
Avi Kivity 已提交
2342 2343
void *cpu_physical_memory_map(hwaddr addr,
                              hwaddr *plen,
A
Avi Kivity 已提交
2344 2345 2346 2347 2348
                              int is_write)
{
    return address_space_map(&address_space_memory, addr, plen, is_write);
}

A
Avi Kivity 已提交
2349 2350
void cpu_physical_memory_unmap(void *buffer, hwaddr len,
                               int is_write, hwaddr access_len)
A
Avi Kivity 已提交
2351 2352 2353 2354
{
    return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
}

B
bellard 已提交
2355
/* warning: addr must be aligned */
2356
static inline uint32_t ldl_phys_internal(AddressSpace *as, hwaddr addr,
2357
                                         enum device_endian endian)
B
bellard 已提交
2358 2359
{
    uint8_t *ptr;
2360
    uint64_t val;
2361
    MemoryRegion *mr;
2362 2363
    hwaddr l = 4;
    hwaddr addr1;
B
bellard 已提交
2364

2365
    mr = address_space_translate(as, addr, &addr1, &l, false);
2366
    if (l < 4 || !memory_access_is_direct(mr, false)) {
B
bellard 已提交
2367
        /* I/O case */
2368
        io_mem_read(mr, addr1, &val, 4);
2369 2370 2371 2372 2373 2374 2375 2376 2377
#if defined(TARGET_WORDS_BIGENDIAN)
        if (endian == DEVICE_LITTLE_ENDIAN) {
            val = bswap32(val);
        }
#else
        if (endian == DEVICE_BIG_ENDIAN) {
            val = bswap32(val);
        }
#endif
B
bellard 已提交
2378 2379
    } else {
        /* RAM case */
2380
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2381
                                & TARGET_PAGE_MASK)
2382
                               + addr1);
2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393
        switch (endian) {
        case DEVICE_LITTLE_ENDIAN:
            val = ldl_le_p(ptr);
            break;
        case DEVICE_BIG_ENDIAN:
            val = ldl_be_p(ptr);
            break;
        default:
            val = ldl_p(ptr);
            break;
        }
B
bellard 已提交
2394 2395 2396 2397
    }
    return val;
}

2398
uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
2399
{
2400
    return ldl_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2401 2402
}

2403
uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
2404
{
2405
    return ldl_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2406 2407
}

2408
uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
2409
{
2410
    return ldl_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2411 2412
}

B
bellard 已提交
2413
/* warning: addr must be aligned */
2414
static inline uint64_t ldq_phys_internal(AddressSpace *as, hwaddr addr,
2415
                                         enum device_endian endian)
B
bellard 已提交
2416 2417 2418
{
    uint8_t *ptr;
    uint64_t val;
2419
    MemoryRegion *mr;
2420 2421
    hwaddr l = 8;
    hwaddr addr1;
B
bellard 已提交
2422

2423
    mr = address_space_translate(as, addr, &addr1, &l,
2424 2425
                                 false);
    if (l < 8 || !memory_access_is_direct(mr, false)) {
B
bellard 已提交
2426
        /* I/O case */
2427
        io_mem_read(mr, addr1, &val, 8);
2428 2429 2430 2431 2432 2433 2434 2435
#if defined(TARGET_WORDS_BIGENDIAN)
        if (endian == DEVICE_LITTLE_ENDIAN) {
            val = bswap64(val);
        }
#else
        if (endian == DEVICE_BIG_ENDIAN) {
            val = bswap64(val);
        }
B
bellard 已提交
2436 2437 2438
#endif
    } else {
        /* RAM case */
2439
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2440
                                & TARGET_PAGE_MASK)
2441
                               + addr1);
2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452
        switch (endian) {
        case DEVICE_LITTLE_ENDIAN:
            val = ldq_le_p(ptr);
            break;
        case DEVICE_BIG_ENDIAN:
            val = ldq_be_p(ptr);
            break;
        default:
            val = ldq_p(ptr);
            break;
        }
B
bellard 已提交
2453 2454 2455 2456
    }
    return val;
}

2457
uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
2458
{
2459
    return ldq_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2460 2461
}

2462
uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
2463
{
2464
    return ldq_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2465 2466
}

2467
uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
2468
{
2469
    return ldq_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2470 2471
}

B
bellard 已提交
2472
/* XXX: optimize */
2473
uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
B
bellard 已提交
2474 2475
{
    uint8_t val;
2476
    address_space_rw(as, addr, &val, 1, 0);
B
bellard 已提交
2477 2478 2479
    return val;
}

2480
/* warning: addr must be aligned */
2481
static inline uint32_t lduw_phys_internal(AddressSpace *as, hwaddr addr,
2482
                                          enum device_endian endian)
B
bellard 已提交
2483
{
2484 2485
    uint8_t *ptr;
    uint64_t val;
2486
    MemoryRegion *mr;
2487 2488
    hwaddr l = 2;
    hwaddr addr1;
2489

2490
    mr = address_space_translate(as, addr, &addr1, &l,
2491 2492
                                 false);
    if (l < 2 || !memory_access_is_direct(mr, false)) {
2493
        /* I/O case */
2494
        io_mem_read(mr, addr1, &val, 2);
2495 2496 2497 2498 2499 2500 2501 2502 2503
#if defined(TARGET_WORDS_BIGENDIAN)
        if (endian == DEVICE_LITTLE_ENDIAN) {
            val = bswap16(val);
        }
#else
        if (endian == DEVICE_BIG_ENDIAN) {
            val = bswap16(val);
        }
#endif
2504 2505
    } else {
        /* RAM case */
2506
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2507
                                & TARGET_PAGE_MASK)
2508
                               + addr1);
2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519
        switch (endian) {
        case DEVICE_LITTLE_ENDIAN:
            val = lduw_le_p(ptr);
            break;
        case DEVICE_BIG_ENDIAN:
            val = lduw_be_p(ptr);
            break;
        default:
            val = lduw_p(ptr);
            break;
        }
2520 2521
    }
    return val;
B
bellard 已提交
2522 2523
}

2524
uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
2525
{
2526
    return lduw_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2527 2528
}

2529
uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
2530
{
2531
    return lduw_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2532 2533
}

2534
uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
2535
{
2536
    return lduw_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2537 2538
}

B
bellard 已提交
2539 2540 2541
/* warning: addr must be aligned. The ram page is not masked as dirty
   and the code inside is not invalidated. It is useful if the dirty
   bits are used to track modified PTEs */
2542
void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
B
bellard 已提交
2543 2544
{
    uint8_t *ptr;
2545
    MemoryRegion *mr;
2546 2547
    hwaddr l = 4;
    hwaddr addr1;
B
bellard 已提交
2548

2549
    mr = address_space_translate(as, addr, &addr1, &l,
2550 2551 2552
                                 true);
    if (l < 4 || !memory_access_is_direct(mr, true)) {
        io_mem_write(mr, addr1, val, 4);
B
bellard 已提交
2553
    } else {
2554
        addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
P
pbrook 已提交
2555
        ptr = qemu_get_ram_ptr(addr1);
B
bellard 已提交
2556
        stl_p(ptr, val);
A
aliguori 已提交
2557 2558

        if (unlikely(in_migration)) {
2559
            if (cpu_physical_memory_is_clean(addr1)) {
A
aliguori 已提交
2560 2561 2562
                /* invalidate code */
                tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
                /* set dirty bit */
2563 2564 2565
                cpu_physical_memory_set_dirty_flag(addr1,
                                                   DIRTY_MEMORY_MIGRATION);
                cpu_physical_memory_set_dirty_flag(addr1, DIRTY_MEMORY_VGA);
A
aliguori 已提交
2566 2567
            }
        }
B
bellard 已提交
2568 2569 2570 2571
    }
}

/* warning: addr must be aligned */
2572 2573
static inline void stl_phys_internal(AddressSpace *as,
                                     hwaddr addr, uint32_t val,
2574
                                     enum device_endian endian)
B
bellard 已提交
2575 2576
{
    uint8_t *ptr;
2577
    MemoryRegion *mr;
2578 2579
    hwaddr l = 4;
    hwaddr addr1;
B
bellard 已提交
2580

2581
    mr = address_space_translate(as, addr, &addr1, &l,
2582 2583
                                 true);
    if (l < 4 || !memory_access_is_direct(mr, true)) {
2584 2585 2586 2587 2588 2589 2590 2591 2592
#if defined(TARGET_WORDS_BIGENDIAN)
        if (endian == DEVICE_LITTLE_ENDIAN) {
            val = bswap32(val);
        }
#else
        if (endian == DEVICE_BIG_ENDIAN) {
            val = bswap32(val);
        }
#endif
2593
        io_mem_write(mr, addr1, val, 4);
B
bellard 已提交
2594 2595
    } else {
        /* RAM case */
2596
        addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
P
pbrook 已提交
2597
        ptr = qemu_get_ram_ptr(addr1);
2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608
        switch (endian) {
        case DEVICE_LITTLE_ENDIAN:
            stl_le_p(ptr, val);
            break;
        case DEVICE_BIG_ENDIAN:
            stl_be_p(ptr, val);
            break;
        default:
            stl_p(ptr, val);
            break;
        }
2609
        invalidate_and_set_dirty(addr1, 4);
B
bellard 已提交
2610 2611 2612
    }
}

2613
void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2614
{
2615
    stl_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2616 2617
}

2618
void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2619
{
2620
    stl_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2621 2622
}

2623
void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2624
{
2625
    stl_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2626 2627
}

B
bellard 已提交
2628
/* XXX: optimize */
2629
void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
B
bellard 已提交
2630 2631
{
    uint8_t v = val;
2632
    address_space_rw(as, addr, &v, 1, 1);
B
bellard 已提交
2633 2634
}

2635
/* warning: addr must be aligned */
2636 2637
static inline void stw_phys_internal(AddressSpace *as,
                                     hwaddr addr, uint32_t val,
2638
                                     enum device_endian endian)
B
bellard 已提交
2639
{
2640
    uint8_t *ptr;
2641
    MemoryRegion *mr;
2642 2643
    hwaddr l = 2;
    hwaddr addr1;
2644

2645
    mr = address_space_translate(as, addr, &addr1, &l, true);
2646
    if (l < 2 || !memory_access_is_direct(mr, true)) {
2647 2648 2649 2650 2651 2652 2653 2654 2655
#if defined(TARGET_WORDS_BIGENDIAN)
        if (endian == DEVICE_LITTLE_ENDIAN) {
            val = bswap16(val);
        }
#else
        if (endian == DEVICE_BIG_ENDIAN) {
            val = bswap16(val);
        }
#endif
2656
        io_mem_write(mr, addr1, val, 2);
2657 2658
    } else {
        /* RAM case */
2659
        addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2660
        ptr = qemu_get_ram_ptr(addr1);
2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671
        switch (endian) {
        case DEVICE_LITTLE_ENDIAN:
            stw_le_p(ptr, val);
            break;
        case DEVICE_BIG_ENDIAN:
            stw_be_p(ptr, val);
            break;
        default:
            stw_p(ptr, val);
            break;
        }
2672
        invalidate_and_set_dirty(addr1, 2);
2673
    }
B
bellard 已提交
2674 2675
}

2676
void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2677
{
2678
    stw_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2679 2680
}

2681
void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2682
{
2683
    stw_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2684 2685
}

2686
void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2687
{
2688
    stw_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2689 2690
}

B
bellard 已提交
2691
/* XXX: optimize */
2692
void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
B
bellard 已提交
2693 2694
{
    val = tswap64(val);
2695
    address_space_rw(as, addr, (void *) &val, 8, 1);
B
bellard 已提交
2696 2697
}

2698
void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2699 2700
{
    val = cpu_to_le64(val);
2701
    address_space_rw(as, addr, (void *) &val, 8, 1);
2702 2703
}

2704
void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2705 2706
{
    val = cpu_to_be64(val);
2707
    address_space_rw(as, addr, (void *) &val, 8, 1);
2708 2709
}

2710
/* virtual memory access for debug (includes writing to ROM) */
2711
int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2712
                        uint8_t *buf, int len, int is_write)
B
bellard 已提交
2713 2714
{
    int l;
A
Avi Kivity 已提交
2715
    hwaddr phys_addr;
2716
    target_ulong page;
B
bellard 已提交
2717 2718 2719

    while (len > 0) {
        page = addr & TARGET_PAGE_MASK;
2720
        phys_addr = cpu_get_phys_page_debug(cpu, page);
B
bellard 已提交
2721 2722 2723 2724 2725 2726
        /* if no physical page mapped, return an error */
        if (phys_addr == -1)
            return -1;
        l = (page + TARGET_PAGE_SIZE) - addr;
        if (l > len)
            l = len;
2727
        phys_addr += (addr & ~TARGET_PAGE_MASK);
2728 2729 2730 2731 2732
        if (is_write) {
            cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
        } else {
            address_space_rw(cpu->as, phys_addr, buf, l, 0);
        }
B
bellard 已提交
2733 2734 2735 2736 2737 2738
        len -= l;
        buf += l;
        addr += l;
    }
    return 0;
}
P
Paul Brook 已提交
2739
#endif
B
bellard 已提交
2740

2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758
#if !defined(CONFIG_USER_ONLY)

/*
 * A helper function for the _utterly broken_ virtio device model to find out if
 * it's running on a big endian machine. Don't do this at home kids!
 */
bool virtio_is_big_endian(void);
bool virtio_is_big_endian(void)
{
#if defined(TARGET_WORDS_BIGENDIAN)
    return true;
#else
    return false;
#endif
}

#endif

2759
#ifndef CONFIG_USER_ONLY
A
Avi Kivity 已提交
2760
bool cpu_physical_memory_is_io(hwaddr phys_addr)
2761
{
2762
    MemoryRegion*mr;
2763
    hwaddr l = 1;
2764

2765 2766
    mr = address_space_translate(&address_space_memory,
                                 phys_addr, &phys_addr, &l, false);
2767

2768 2769
    return !(memory_region_is_ram(mr) ||
             memory_region_is_romd(mr));
2770
}
2771 2772 2773 2774 2775 2776 2777 2778 2779

void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
{
    RAMBlock *block;

    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
        func(block->host, block->offset, block->length, opaque);
    }
}
2780
#endif