exec.c 75.3 KB
Newer Older
B
bellard 已提交
1
/*
2
 *  Virtual page mapping
3
 *
B
bellard 已提交
4 5 6 7 8 9 10 11 12 13 14 15 16
 *  Copyright (c) 2003 Fabrice Bellard
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
B
bellard 已提交
18
 */
B
bellard 已提交
19
#include "config.h"
B
bellard 已提交
20 21 22
#ifdef _WIN32
#include <windows.h>
#else
B
bellard 已提交
23
#include <sys/types.h>
B
bellard 已提交
24 25
#include <sys/mman.h>
#endif
B
bellard 已提交
26

27
#include "qemu-common.h"
B
bellard 已提交
28
#include "cpu.h"
B
bellard 已提交
29
#include "tcg.h"
30
#include "hw/hw.h"
31
#include "hw/qdev.h"
32
#include "qemu/osdep.h"
33
#include "sysemu/kvm.h"
34
#include "sysemu/sysemu.h"
P
Paolo Bonzini 已提交
35
#include "hw/xen/xen.h"
36 37
#include "qemu/timer.h"
#include "qemu/config-file.h"
38
#include "exec/memory.h"
39
#include "sysemu/dma.h"
40
#include "exec/address-spaces.h"
41 42
#if defined(CONFIG_USER_ONLY)
#include <qemu.h>
J
Jun Nakajima 已提交
43
#else /* !CONFIG_USER_ONLY */
44
#include "sysemu/xen-mapcache.h"
45
#include "trace.h"
46
#endif
47
#include "exec/cpu-all.h"
B
bellard 已提交
48

49
#include "exec/cputlb.h"
50
#include "translate-all.h"
51

52
#include "exec/memory-internal.h"
53
#include "exec/ram_addr.h"
54
#include "qemu/cache-utils.h"
55

56 57
#include "qemu/range.h"

58
//#define DEBUG_SUBPAGE
T
ths 已提交
59

60
#if !defined(CONFIG_USER_ONLY)
61
static bool in_migration;
P
pbrook 已提交
62

P
Paolo Bonzini 已提交
63
RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
A
Avi Kivity 已提交
64 65

static MemoryRegion *system_memory;
66
static MemoryRegion *system_io;
A
Avi Kivity 已提交
67

68 69
AddressSpace address_space_io;
AddressSpace address_space_memory;
70

71
MemoryRegion io_mem_rom, io_mem_notdirty;
72
static MemoryRegion io_mem_unassigned;
73

74
#endif
75

A
Andreas Färber 已提交
76
struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
B
bellard 已提交
77 78
/* current CPU in the current thread. It is only valid inside
   cpu_exec() */
79
DEFINE_TLS(CPUState *, current_cpu);
P
pbrook 已提交
80
/* 0 = Do not count executed instructions.
T
ths 已提交
81
   1 = Precise instruction counting.
P
pbrook 已提交
82
   2 = Adaptive rate instruction counting.  */
83
int use_icount;
B
bellard 已提交
84

85
#if !defined(CONFIG_USER_ONLY)
86

87 88 89
typedef struct PhysPageEntry PhysPageEntry;

struct PhysPageEntry {
M
Michael S. Tsirkin 已提交
90
    /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
91
    uint32_t skip : 6;
M
Michael S. Tsirkin 已提交
92
     /* index into phys_sections (!skip) or phys_map_nodes (skip) */
93
    uint32_t ptr : 26;
94 95
};

96 97
#define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)

98
/* Size of the L2 (and L3, etc) page tables.  */
99
#define ADDR_SPACE_BITS 64
100

M
Michael S. Tsirkin 已提交
101
#define P_L2_BITS 9
102 103 104 105 106
#define P_L2_SIZE (1 << P_L2_BITS)

#define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)

typedef PhysPageEntry Node[P_L2_SIZE];
107

108 109 110 111 112 113 114 115 116
typedef struct PhysPageMap {
    unsigned sections_nb;
    unsigned sections_nb_alloc;
    unsigned nodes_nb;
    unsigned nodes_nb_alloc;
    Node *nodes;
    MemoryRegionSection *sections;
} PhysPageMap;

117 118 119 120 121
struct AddressSpaceDispatch {
    /* This is a multi-level map on the physical address space.
     * The bottom level has pointers to MemoryRegionSections.
     */
    PhysPageEntry phys_map;
122
    PhysPageMap map;
123
    AddressSpace *as;
124 125
};

126 127 128
#define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
typedef struct subpage_t {
    MemoryRegion iomem;
129
    AddressSpace *as;
130 131 132 133
    hwaddr base;
    uint16_t sub_section[TARGET_PAGE_SIZE];
} subpage_t;

134 135 136 137
#define PHYS_SECTION_UNASSIGNED 0
#define PHYS_SECTION_NOTDIRTY 1
#define PHYS_SECTION_ROM 2
#define PHYS_SECTION_WATCH 3
138

139
static void io_mem_init(void);
A
Avi Kivity 已提交
140
static void memory_map_init(void);
141

142
static MemoryRegion io_mem_watch;
143
#endif
B
bellard 已提交
144

145
#if !defined(CONFIG_USER_ONLY)
146

147
static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
148
{
149 150 151 152
    if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
        map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
        map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
        map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
153
    }
154 155
}

156
static uint32_t phys_map_node_alloc(PhysPageMap *map)
157 158
{
    unsigned i;
159
    uint32_t ret;
160

161
    ret = map->nodes_nb++;
162
    assert(ret != PHYS_MAP_NODE_NIL);
163
    assert(ret != map->nodes_nb_alloc);
164
    for (i = 0; i < P_L2_SIZE; ++i) {
165 166
        map->nodes[ret][i].skip = 1;
        map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
167
    }
168
    return ret;
169 170
}

171 172
static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
                                hwaddr *index, hwaddr *nb, uint16_t leaf,
173
                                int level)
174 175 176
{
    PhysPageEntry *p;
    int i;
177
    hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
178

M
Michael S. Tsirkin 已提交
179
    if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
180 181
        lp->ptr = phys_map_node_alloc(map);
        p = map->nodes[lp->ptr];
182
        if (level == 0) {
183
            for (i = 0; i < P_L2_SIZE; i++) {
M
Michael S. Tsirkin 已提交
184
                p[i].skip = 0;
185
                p[i].ptr = PHYS_SECTION_UNASSIGNED;
186
            }
P
pbrook 已提交
187
        }
188
    } else {
189
        p = map->nodes[lp->ptr];
B
bellard 已提交
190
    }
191
    lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
192

193
    while (*nb && lp < &p[P_L2_SIZE]) {
194
        if ((*index & (step - 1)) == 0 && *nb >= step) {
M
Michael S. Tsirkin 已提交
195
            lp->skip = 0;
196
            lp->ptr = leaf;
197 198
            *index += step;
            *nb -= step;
199
        } else {
200
            phys_page_set_level(map, lp, index, nb, leaf, level - 1);
201 202
        }
        ++lp;
203 204 205
    }
}

A
Avi Kivity 已提交
206
static void phys_page_set(AddressSpaceDispatch *d,
A
Avi Kivity 已提交
207
                          hwaddr index, hwaddr nb,
208
                          uint16_t leaf)
209
{
210
    /* Wildly overreserve - it doesn't matter much. */
211
    phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
212

213
    phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
B
bellard 已提交
214 215
}

216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273
/* Compact a non leaf page entry. Simply detect that the entry has a single child,
 * and update our entry so we can skip it and go directly to the destination.
 */
static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
{
    unsigned valid_ptr = P_L2_SIZE;
    int valid = 0;
    PhysPageEntry *p;
    int i;

    if (lp->ptr == PHYS_MAP_NODE_NIL) {
        return;
    }

    p = nodes[lp->ptr];
    for (i = 0; i < P_L2_SIZE; i++) {
        if (p[i].ptr == PHYS_MAP_NODE_NIL) {
            continue;
        }

        valid_ptr = i;
        valid++;
        if (p[i].skip) {
            phys_page_compact(&p[i], nodes, compacted);
        }
    }

    /* We can only compress if there's only one child. */
    if (valid != 1) {
        return;
    }

    assert(valid_ptr < P_L2_SIZE);

    /* Don't compress if it won't fit in the # of bits we have. */
    if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
        return;
    }

    lp->ptr = p[valid_ptr].ptr;
    if (!p[valid_ptr].skip) {
        /* If our only child is a leaf, make this a leaf. */
        /* By design, we should have made this node a leaf to begin with so we
         * should never reach here.
         * But since it's so simple to handle this, let's do it just in case we
         * change this rule.
         */
        lp->skip = 0;
    } else {
        lp->skip += p[valid_ptr].skip;
    }
}

static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
{
    DECLARE_BITMAP(compacted, nodes_nb);

    if (d->phys_map.skip) {
274
        phys_page_compact(&d->phys_map, d->map.nodes, compacted);
275 276 277
    }
}

278
static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
279
                                           Node *nodes, MemoryRegionSection *sections)
B
bellard 已提交
280
{
281
    PhysPageEntry *p;
282
    hwaddr index = addr >> TARGET_PAGE_BITS;
283
    int i;
284

M
Michael S. Tsirkin 已提交
285
    for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
286
        if (lp.ptr == PHYS_MAP_NODE_NIL) {
287
            return &sections[PHYS_SECTION_UNASSIGNED];
288
        }
289
        p = nodes[lp.ptr];
290
        lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
291
    }
292 293 294 295 296 297 298 299

    if (sections[lp.ptr].size.hi ||
        range_covers_byte(sections[lp.ptr].offset_within_address_space,
                          sections[lp.ptr].size.lo, addr)) {
        return &sections[lp.ptr];
    } else {
        return &sections[PHYS_SECTION_UNASSIGNED];
    }
300 301
}

B
Blue Swirl 已提交
302 303
bool memory_region_is_unassigned(MemoryRegion *mr)
{
P
Paolo Bonzini 已提交
304
    return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
305
        && mr != &io_mem_watch;
B
bellard 已提交
306
}
307

308
static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
309 310
                                                        hwaddr addr,
                                                        bool resolve_subpage)
311
{
312 313 314
    MemoryRegionSection *section;
    subpage_t *subpage;

315
    section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
316 317
    if (resolve_subpage && section->mr->subpage) {
        subpage = container_of(section->mr, subpage_t, iomem);
318
        section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
319 320
    }
    return section;
321 322
}

323
static MemoryRegionSection *
324
address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
325
                                 hwaddr *plen, bool resolve_subpage)
326 327
{
    MemoryRegionSection *section;
328
    Int128 diff;
329

330
    section = address_space_lookup_region(d, addr, resolve_subpage);
331 332 333 334 335 336 337
    /* Compute offset within MemoryRegionSection */
    addr -= section->offset_within_address_space;

    /* Compute offset within MemoryRegion */
    *xlat = addr + section->offset_within_region;

    diff = int128_sub(section->mr->size, int128_make64(addr));
338
    *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
339 340
    return section;
}
341

342 343 344 345 346 347 348 349 350 351 352 353
static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
{
    if (memory_region_is_ram(mr)) {
        return !(is_write && mr->readonly);
    }
    if (memory_region_is_romd(mr)) {
        return !is_write;
    }

    return false;
}

354 355 356
MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
                                      hwaddr *xlat, hwaddr *plen,
                                      bool is_write)
357
{
A
Avi Kivity 已提交
358 359 360 361 362 363
    IOMMUTLBEntry iotlb;
    MemoryRegionSection *section;
    MemoryRegion *mr;
    hwaddr len = *plen;

    for (;;) {
364
        section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true);
A
Avi Kivity 已提交
365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382
        mr = section->mr;

        if (!mr->iommu_ops) {
            break;
        }

        iotlb = mr->iommu_ops->translate(mr, addr);
        addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
                | (addr & iotlb.addr_mask));
        len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
        if (!(iotlb.perm & (1 << is_write))) {
            mr = &io_mem_unassigned;
            break;
        }

        as = iotlb.target_as;
    }

383 384 385 386 387
    if (memory_access_is_direct(mr, is_write)) {
        hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
        len = MIN(page, len);
    }

A
Avi Kivity 已提交
388 389 390
    *plen = len;
    *xlat = addr;
    return mr;
391 392 393 394 395 396
}

MemoryRegionSection *
address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat,
                                  hwaddr *plen)
{
A
Avi Kivity 已提交
397
    MemoryRegionSection *section;
398
    section = address_space_translate_internal(as->dispatch, addr, xlat, plen, false);
A
Avi Kivity 已提交
399 400 401

    assert(!section->mr->iommu_ops);
    return section;
402
}
403
#endif
B
bellard 已提交
404

405
void cpu_exec_init_all(void)
406
{
407
#if !defined(CONFIG_USER_ONLY)
408
    qemu_mutex_init(&ram_list.mutex);
409 410
    memory_map_init();
    io_mem_init();
411
#endif
412
}
413

414
#if !defined(CONFIG_USER_ONLY)
415 416

static int cpu_common_post_load(void *opaque, int version_id)
B
bellard 已提交
417
{
418
    CPUState *cpu = opaque;
B
bellard 已提交
419

420 421
    /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
       version_id is increased. */
422 423
    cpu->interrupt_request &= ~0x01;
    tlb_flush(cpu->env_ptr, 1);
424 425

    return 0;
B
bellard 已提交
426
}
B
bellard 已提交
427

428
const VMStateDescription vmstate_cpu_common = {
429 430 431 432 433 434
    .name = "cpu_common",
    .version_id = 1,
    .minimum_version_id = 1,
    .minimum_version_id_old = 1,
    .post_load = cpu_common_post_load,
    .fields      = (VMStateField []) {
435 436
        VMSTATE_UINT32(halted, CPUState),
        VMSTATE_UINT32(interrupt_request, CPUState),
437 438 439
        VMSTATE_END_OF_LIST()
    }
};
440

441
#endif
B
bellard 已提交
442

443
CPUState *qemu_get_cpu(int index)
B
bellard 已提交
444
{
A
Andreas Färber 已提交
445
    CPUState *cpu;
B
bellard 已提交
446

A
Andreas Färber 已提交
447
    CPU_FOREACH(cpu) {
448
        if (cpu->cpu_index == index) {
A
Andreas Färber 已提交
449
            return cpu;
450
        }
B
bellard 已提交
451
    }
452

A
Andreas Färber 已提交
453
    return NULL;
B
bellard 已提交
454 455
}

456
void cpu_exec_init(CPUArchState *env)
B
bellard 已提交
457
{
458
    CPUState *cpu = ENV_GET_CPU(env);
459
    CPUClass *cc = CPU_GET_CLASS(cpu);
A
Andreas Färber 已提交
460
    CPUState *some_cpu;
461 462 463 464 465 466
    int cpu_index;

#if defined(CONFIG_USER_ONLY)
    cpu_list_lock();
#endif
    cpu_index = 0;
A
Andreas Färber 已提交
467
    CPU_FOREACH(some_cpu) {
468 469
        cpu_index++;
    }
470
    cpu->cpu_index = cpu_index;
471
    cpu->numa_node = 0;
472 473 474 475 476
    QTAILQ_INIT(&env->breakpoints);
    QTAILQ_INIT(&env->watchpoints);
#ifndef CONFIG_USER_ONLY
    cpu->thread_id = qemu_get_thread_id();
#endif
A
Andreas Färber 已提交
477
    QTAILQ_INSERT_TAIL(&cpus, cpu, node);
478 479 480
#if defined(CONFIG_USER_ONLY)
    cpu_list_unlock();
#endif
481 482 483
    if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
        vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
    }
484 485 486
#if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
    register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
                    cpu_save, cpu_load, env);
487
    assert(cc->vmsd == NULL);
488
    assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
489
#endif
490 491 492
    if (cc->vmsd != NULL) {
        vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
    }
B
bellard 已提交
493 494
}

B
bellard 已提交
495
#if defined(TARGET_HAS_ICE)
496
#if defined(CONFIG_USER_ONLY)
497
static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
498 499 500 501
{
    tb_invalidate_phys_page_range(pc, pc + 1, 0);
}
#else
502
static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
503
{
504 505
    hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
    if (phys != -1) {
506 507
        tb_invalidate_phys_addr(&address_space_memory,
                                phys | (pc & ~TARGET_PAGE_MASK));
508
    }
509
}
B
bellard 已提交
510
#endif
511
#endif /* TARGET_HAS_ICE */
B
bellard 已提交
512

513
#if defined(CONFIG_USER_ONLY)
514
void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
515 516 517 518

{
}

519
int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
520 521 522 523 524
                          int flags, CPUWatchpoint **watchpoint)
{
    return -ENOSYS;
}
#else
525
/* Add a watchpoint.  */
526
int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
527
                          int flags, CPUWatchpoint **watchpoint)
528
{
529
    target_ulong len_mask = ~(len - 1);
530
    CPUWatchpoint *wp;
531

532
    /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
533 534
    if ((len & (len - 1)) || (addr & ~len_mask) ||
            len == 0 || len > TARGET_PAGE_SIZE) {
535 536 537 538
        fprintf(stderr, "qemu: tried to set invalid watchpoint at "
                TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
        return -EINVAL;
    }
539
    wp = g_malloc(sizeof(*wp));
540 541

    wp->vaddr = addr;
542
    wp->len_mask = len_mask;
543 544
    wp->flags = flags;

545
    /* keep all GDB-injected watchpoints in front */
546
    if (flags & BP_GDB)
B
Blue Swirl 已提交
547
        QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
548
    else
B
Blue Swirl 已提交
549
        QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
550 551

    tlb_flush_page(env, addr);
552 553 554 555

    if (watchpoint)
        *watchpoint = wp;
    return 0;
556 557
}

558
/* Remove a specific watchpoint.  */
559
int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
560
                          int flags)
561
{
562
    target_ulong len_mask = ~(len - 1);
563
    CPUWatchpoint *wp;
564

B
Blue Swirl 已提交
565
    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
566
        if (addr == wp->vaddr && len_mask == wp->len_mask
567
                && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
568
            cpu_watchpoint_remove_by_ref(env, wp);
569 570 571
            return 0;
        }
    }
572
    return -ENOENT;
573 574
}

575
/* Remove a specific watchpoint by reference.  */
576
void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
577
{
B
Blue Swirl 已提交
578
    QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
579

580 581
    tlb_flush_page(env, watchpoint->vaddr);

582
    g_free(watchpoint);
583 584 585
}

/* Remove all matching watchpoints.  */
586
void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
587
{
588
    CPUWatchpoint *wp, *next;
589

B
Blue Swirl 已提交
590
    QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
591 592
        if (wp->flags & mask)
            cpu_watchpoint_remove_by_ref(env, wp);
593
    }
594
}
595
#endif
596

597
/* Add a breakpoint.  */
598
int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
599
                          CPUBreakpoint **breakpoint)
B
bellard 已提交
600
{
B
bellard 已提交
601
#if defined(TARGET_HAS_ICE)
602
    CPUBreakpoint *bp;
603

604
    bp = g_malloc(sizeof(*bp));
B
bellard 已提交
605

606 607 608
    bp->pc = pc;
    bp->flags = flags;

609
    /* keep all GDB-injected breakpoints in front */
610
    if (flags & BP_GDB) {
B
Blue Swirl 已提交
611
        QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
612
    } else {
B
Blue Swirl 已提交
613
        QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
614
    }
615

616
    breakpoint_invalidate(ENV_GET_CPU(env), pc);
617

618
    if (breakpoint) {
619
        *breakpoint = bp;
620
    }
B
bellard 已提交
621 622
    return 0;
#else
623
    return -ENOSYS;
B
bellard 已提交
624 625 626
#endif
}

627
/* Remove a specific breakpoint.  */
628
int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
629
{
630
#if defined(TARGET_HAS_ICE)
631 632
    CPUBreakpoint *bp;

B
Blue Swirl 已提交
633
    QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
634 635 636 637
        if (bp->pc == pc && bp->flags == flags) {
            cpu_breakpoint_remove_by_ref(env, bp);
            return 0;
        }
638
    }
639 640 641
    return -ENOENT;
#else
    return -ENOSYS;
642 643 644
#endif
}

645
/* Remove a specific breakpoint by reference.  */
646
void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
B
bellard 已提交
647
{
B
bellard 已提交
648
#if defined(TARGET_HAS_ICE)
B
Blue Swirl 已提交
649
    QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
B
bellard 已提交
650

651
    breakpoint_invalidate(ENV_GET_CPU(env), breakpoint->pc);
652

653
    g_free(breakpoint);
654 655 656 657
#endif
}

/* Remove all matching breakpoints. */
658
void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
659 660
{
#if defined(TARGET_HAS_ICE)
661
    CPUBreakpoint *bp, *next;
662

B
Blue Swirl 已提交
663
    QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
664 665
        if (bp->flags & mask)
            cpu_breakpoint_remove_by_ref(env, bp);
666
    }
B
bellard 已提交
667 668 669
#endif
}

B
bellard 已提交
670 671
/* enable or disable single step mode. EXCP_DEBUG is returned by the
   CPU loop after each instruction */
672
void cpu_single_step(CPUState *cpu, int enabled)
B
bellard 已提交
673
{
B
bellard 已提交
674
#if defined(TARGET_HAS_ICE)
675 676 677
    if (cpu->singlestep_enabled != enabled) {
        cpu->singlestep_enabled = enabled;
        if (kvm_enabled()) {
678
            kvm_update_guest_debug(cpu, 0);
679
        } else {
S
Stuart Brady 已提交
680
            /* must flush all the translated code to avoid inconsistencies */
681
            /* XXX: only flush what is necessary */
682
            CPUArchState *env = cpu->env_ptr;
683 684
            tb_flush(env);
        }
B
bellard 已提交
685 686 687 688
    }
#endif
}

689
void cpu_abort(CPUArchState *env, const char *fmt, ...)
B
bellard 已提交
690
{
691
    CPUState *cpu = ENV_GET_CPU(env);
B
bellard 已提交
692
    va_list ap;
P
pbrook 已提交
693
    va_list ap2;
B
bellard 已提交
694 695

    va_start(ap, fmt);
P
pbrook 已提交
696
    va_copy(ap2, ap);
B
bellard 已提交
697 698 699
    fprintf(stderr, "qemu: fatal: ");
    vfprintf(stderr, fmt, ap);
    fprintf(stderr, "\n");
700
    cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
701 702 703 704
    if (qemu_log_enabled()) {
        qemu_log("qemu: fatal: ");
        qemu_log_vprintf(fmt, ap2);
        qemu_log("\n");
705
        log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
706
        qemu_log_flush();
707
        qemu_log_close();
708
    }
P
pbrook 已提交
709
    va_end(ap2);
710
    va_end(ap);
711 712 713 714 715 716 717 718
#if defined(CONFIG_USER_ONLY)
    {
        struct sigaction act;
        sigfillset(&act.sa_mask);
        act.sa_handler = SIG_DFL;
        sigaction(SIGABRT, &act, NULL);
    }
#endif
B
bellard 已提交
719 720 721
    abort();
}

722
#if !defined(CONFIG_USER_ONLY)
P
Paolo Bonzini 已提交
723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745
static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
{
    RAMBlock *block;

    /* The list is protected by the iothread lock here.  */
    block = ram_list.mru_block;
    if (block && addr - block->offset < block->length) {
        goto found;
    }
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
        if (addr - block->offset < block->length) {
            goto found;
        }
    }

    fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
    abort();

found:
    ram_list.mru_block = block;
    return block;
}

746
static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
J
Juan Quintela 已提交
747
{
P
Paolo Bonzini 已提交
748
    ram_addr_t start1;
749 750 751 752 753
    RAMBlock *block;
    ram_addr_t end;

    end = TARGET_PAGE_ALIGN(start + length);
    start &= TARGET_PAGE_MASK;
J
Juan Quintela 已提交
754

P
Paolo Bonzini 已提交
755 756 757 758
    block = qemu_get_ram_block(start);
    assert(block == qemu_get_ram_block(end - 1));
    start1 = (uintptr_t)block->host + (start - block->offset);
    cpu_tlb_reset_dirty_all(start1, length);
J
Juan Quintela 已提交
759 760
}

P
pbrook 已提交
761
/* Note: start and end must be within the same ram block.  */
762
void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
763
                                     unsigned client)
764 765 766
{
    if (length == 0)
        return;
767
    cpu_physical_memory_clear_dirty_range(start, length, client);
B
bellard 已提交
768

J
Juan Quintela 已提交
769
    if (tcg_enabled()) {
770
        tlb_reset_dirty_range_all(start, length);
P
pbrook 已提交
771
    }
772 773
}

774
static void cpu_physical_memory_set_dirty_tracking(bool enable)
A
aliguori 已提交
775 776 777 778
{
    in_migration = enable;
}

A
Avi Kivity 已提交
779
hwaddr memory_region_section_get_iotlb(CPUArchState *env,
780 781 782 783 784
                                       MemoryRegionSection *section,
                                       target_ulong vaddr,
                                       hwaddr paddr, hwaddr xlat,
                                       int prot,
                                       target_ulong *address)
B
Blue Swirl 已提交
785
{
A
Avi Kivity 已提交
786
    hwaddr iotlb;
B
Blue Swirl 已提交
787 788
    CPUWatchpoint *wp;

789
    if (memory_region_is_ram(section->mr)) {
B
Blue Swirl 已提交
790 791
        /* Normal RAM.  */
        iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
792
            + xlat;
B
Blue Swirl 已提交
793
        if (!section->readonly) {
794
            iotlb |= PHYS_SECTION_NOTDIRTY;
B
Blue Swirl 已提交
795
        } else {
796
            iotlb |= PHYS_SECTION_ROM;
B
Blue Swirl 已提交
797 798
        }
    } else {
799
        iotlb = section - section->address_space->dispatch->map.sections;
800
        iotlb += xlat;
B
Blue Swirl 已提交
801 802 803 804 805 806 807 808
    }

    /* Make accesses to pages with watchpoints go via the
       watchpoint trap routines.  */
    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
        if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
            /* Avoid trapping reads of pages with a write breakpoint. */
            if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
809
                iotlb = PHYS_SECTION_WATCH + paddr;
B
Blue Swirl 已提交
810 811 812 813 814 815 816 817
                *address |= TLB_MMIO;
                break;
            }
        }
    }

    return iotlb;
}
818 819
#endif /* defined(CONFIG_USER_ONLY) */

820
#if !defined(CONFIG_USER_ONLY)
821

A
Anthony Liguori 已提交
822
static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
823
                             uint16_t section);
824
static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
825

826
static void *(*phys_mem_alloc)(size_t size) = qemu_anon_ram_alloc;
827 828 829 830 831 832

/*
 * Set a custom physical guest memory alloator.
 * Accelerators with unusual needs may need this.  Hopefully, we can
 * get rid of it eventually.
 */
833
void phys_mem_set_alloc(void *(*alloc)(size_t))
834 835 836 837
{
    phys_mem_alloc = alloc;
}

838 839
static uint16_t phys_section_add(PhysPageMap *map,
                                 MemoryRegionSection *section)
840
{
841 842 843 844
    /* The physical section number is ORed with a page-aligned
     * pointer to produce the iotlb entries.  Thus it should
     * never overflow into the page-aligned value.
     */
845
    assert(map->sections_nb < TARGET_PAGE_SIZE);
846

847 848 849 850
    if (map->sections_nb == map->sections_nb_alloc) {
        map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
        map->sections = g_renew(MemoryRegionSection, map->sections,
                                map->sections_nb_alloc);
851
    }
852
    map->sections[map->sections_nb] = *section;
P
Paolo Bonzini 已提交
853
    memory_region_ref(section->mr);
854
    return map->sections_nb++;
855 856
}

857 858
static void phys_section_destroy(MemoryRegion *mr)
{
P
Paolo Bonzini 已提交
859 860
    memory_region_unref(mr);

861 862 863 864 865 866 867
    if (mr->subpage) {
        subpage_t *subpage = container_of(mr, subpage_t, iomem);
        memory_region_destroy(&subpage->iomem);
        g_free(subpage);
    }
}

P
Paolo Bonzini 已提交
868
static void phys_sections_free(PhysPageMap *map)
869
{
870 871
    while (map->sections_nb > 0) {
        MemoryRegionSection *section = &map->sections[--map->sections_nb];
872 873
        phys_section_destroy(section->mr);
    }
874 875
    g_free(map->sections);
    g_free(map->nodes);
876 877
}

A
Avi Kivity 已提交
878
static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
879 880
{
    subpage_t *subpage;
A
Avi Kivity 已提交
881
    hwaddr base = section->offset_within_address_space
882
        & TARGET_PAGE_MASK;
883
    MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
884
                                                   d->map.nodes, d->map.sections);
885 886
    MemoryRegionSection subsection = {
        .offset_within_address_space = base,
887
        .size = int128_make64(TARGET_PAGE_SIZE),
888
    };
A
Avi Kivity 已提交
889
    hwaddr start, end;
890

891
    assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
892

893
    if (!(existing->mr->subpage)) {
894
        subpage = subpage_init(d->as, base);
895
        subsection.address_space = d->as;
896
        subsection.mr = &subpage->iomem;
A
Avi Kivity 已提交
897
        phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
898
                      phys_section_add(&d->map, &subsection));
899
    } else {
900
        subpage = container_of(existing->mr, subpage_t, iomem);
901 902
    }
    start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
903
    end = start + int128_get64(section->size) - 1;
904 905
    subpage_register(subpage, start, end,
                     phys_section_add(&d->map, section));
906 907 908
}


909 910
static void register_multipage(AddressSpaceDispatch *d,
                               MemoryRegionSection *section)
911
{
A
Avi Kivity 已提交
912
    hwaddr start_addr = section->offset_within_address_space;
913
    uint16_t section_index = phys_section_add(&d->map, section);
914 915
    uint64_t num_pages = int128_get64(int128_rshift(section->size,
                                                    TARGET_PAGE_BITS));
916

917 918
    assert(num_pages);
    phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
919 920
}

A
Avi Kivity 已提交
921
static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
922
{
923
    AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
924
    AddressSpaceDispatch *d = as->next_dispatch;
925
    MemoryRegionSection now = *section, remain = *section;
926
    Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
927

928 929 930 931
    if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
        uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
                       - now.offset_within_address_space;

932
        now.size = int128_min(int128_make64(left), now.size);
A
Avi Kivity 已提交
933
        register_subpage(d, &now);
934
    } else {
935
        now.size = int128_zero();
936
    }
937 938 939 940
    while (int128_ne(remain.size, now.size)) {
        remain.size = int128_sub(remain.size, now.size);
        remain.offset_within_address_space += int128_get64(now.size);
        remain.offset_within_region += int128_get64(now.size);
941
        now = remain;
942
        if (int128_lt(remain.size, page_size)) {
943
            register_subpage(d, &now);
944
        } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
945
            now.size = page_size;
A
Avi Kivity 已提交
946
            register_subpage(d, &now);
947
        } else {
948
            now.size = int128_and(now.size, int128_neg(page_size));
A
Avi Kivity 已提交
949
            register_multipage(d, &now);
950
        }
951 952 953
    }
}

954 955 956 957 958 959
void qemu_flush_coalesced_mmio_buffer(void)
{
    if (kvm_enabled())
        kvm_flush_coalesced_mmio_buffer();
}

960 961 962 963 964 965 966 967 968 969
void qemu_mutex_lock_ramlist(void)
{
    qemu_mutex_lock(&ram_list.mutex);
}

void qemu_mutex_unlock_ramlist(void)
{
    qemu_mutex_unlock(&ram_list.mutex);
}

970
#ifdef __linux__
971 972 973 974 975 976 977 978 979 980 981

#include <sys/vfs.h>

#define HUGETLBFS_MAGIC       0x958458f6

static long gethugepagesize(const char *path)
{
    struct statfs fs;
    int ret;

    do {
Y
Yoshiaki Tamura 已提交
982
        ret = statfs(path, &fs);
983 984 985
    } while (ret != 0 && errno == EINTR);

    if (ret != 0) {
Y
Yoshiaki Tamura 已提交
986 987
        perror(path);
        return 0;
988 989 990
    }

    if (fs.f_type != HUGETLBFS_MAGIC)
Y
Yoshiaki Tamura 已提交
991
        fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
992 993 994 995

    return fs.f_bsize;
}

996 997 998 999 1000 1001 1002
static sigjmp_buf sigjump;

static void sigbus_handler(int signal)
{
    siglongjmp(sigjump, 1);
}

A
Alex Williamson 已提交
1003 1004 1005
static void *file_ram_alloc(RAMBlock *block,
                            ram_addr_t memory,
                            const char *path)
1006 1007
{
    char *filename;
1008 1009
    char *sanitized_name;
    char *c;
1010 1011 1012 1013 1014 1015
    void *area;
    int fd;
    unsigned long hpagesize;

    hpagesize = gethugepagesize(path);
    if (!hpagesize) {
Y
Yoshiaki Tamura 已提交
1016
        return NULL;
1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027
    }

    if (memory < hpagesize) {
        return NULL;
    }

    if (kvm_enabled() && !kvm_has_sync_mmu()) {
        fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
        return NULL;
    }

1028 1029 1030 1031 1032 1033 1034 1035 1036 1037
    /* Make name safe to use with mkstemp by replacing '/' with '_'. */
    sanitized_name = g_strdup(block->mr->name);
    for (c = sanitized_name; *c != '\0'; c++) {
        if (*c == '/')
            *c = '_';
    }

    filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
                               sanitized_name);
    g_free(sanitized_name);
1038 1039 1040

    fd = mkstemp(filename);
    if (fd < 0) {
Y
Yoshiaki Tamura 已提交
1041
        perror("unable to create backing store for hugepages");
1042
        g_free(filename);
Y
Yoshiaki Tamura 已提交
1043
        return NULL;
1044 1045
    }
    unlink(filename);
1046
    g_free(filename);
1047 1048 1049 1050 1051 1052 1053 1054 1055 1056

    memory = (memory+hpagesize-1) & ~(hpagesize-1);

    /*
     * ftruncate is not supported by hugetlbfs in older
     * hosts, so don't bother bailing out on errors.
     * If anything goes wrong with it under other filesystems,
     * mmap will fail.
     */
    if (ftruncate(fd, memory))
Y
Yoshiaki Tamura 已提交
1057
        perror("ftruncate");
1058 1059 1060

    area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
    if (area == MAP_FAILED) {
Y
Yoshiaki Tamura 已提交
1061 1062 1063
        perror("file_ram_alloc: can't mmap RAM pages");
        close(fd);
        return (NULL);
1064
    }
1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091

    if (mem_prealloc) {
        int ret, i;
        struct sigaction act, oldact;
        sigset_t set, oldset;

        memset(&act, 0, sizeof(act));
        act.sa_handler = &sigbus_handler;
        act.sa_flags = 0;

        ret = sigaction(SIGBUS, &act, &oldact);
        if (ret) {
            perror("file_ram_alloc: failed to install signal handler");
            exit(1);
        }

        /* unblock SIGBUS */
        sigemptyset(&set);
        sigaddset(&set, SIGBUS);
        pthread_sigmask(SIG_UNBLOCK, &set, &oldset);

        if (sigsetjmp(sigjump, 1)) {
            fprintf(stderr, "file_ram_alloc: failed to preallocate pages\n");
            exit(1);
        }

        /* MAP_POPULATE silently ignores failures */
1092
        for (i = 0; i < (memory/hpagesize); i++) {
1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104
            memset(area + (hpagesize*i), 0, 1);
        }

        ret = sigaction(SIGBUS, &oldact, NULL);
        if (ret) {
            perror("file_ram_alloc: failed to reinstall signal handler");
            exit(1);
        }

        pthread_sigmask(SIG_SETMASK, &oldset, NULL);
    }

A
Alex Williamson 已提交
1105
    block->fd = fd;
1106 1107
    return area;
}
1108 1109 1110 1111 1112 1113 1114 1115
#else
static void *file_ram_alloc(RAMBlock *block,
                            ram_addr_t memory,
                            const char *path)
{
    fprintf(stderr, "-mem-path not supported on this host\n");
    exit(1);
}
1116 1117
#endif

1118
static ram_addr_t find_ram_offset(ram_addr_t size)
A
Alex Williamson 已提交
1119 1120
{
    RAMBlock *block, *next_block;
A
Alex Williamson 已提交
1121
    ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
A
Alex Williamson 已提交
1122

1123 1124
    assert(size != 0); /* it would hand out same offset multiple times */

P
Paolo Bonzini 已提交
1125
    if (QTAILQ_EMPTY(&ram_list.blocks))
A
Alex Williamson 已提交
1126 1127
        return 0;

P
Paolo Bonzini 已提交
1128
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1129
        ram_addr_t end, next = RAM_ADDR_MAX;
A
Alex Williamson 已提交
1130 1131 1132

        end = block->offset + block->length;

P
Paolo Bonzini 已提交
1133
        QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
A
Alex Williamson 已提交
1134 1135 1136 1137 1138
            if (next_block->offset >= end) {
                next = MIN(next, next_block->offset);
            }
        }
        if (next - end >= size && next - end < mingap) {
A
Alex Williamson 已提交
1139
            offset = end;
A
Alex Williamson 已提交
1140 1141 1142
            mingap = next - end;
        }
    }
A
Alex Williamson 已提交
1143 1144 1145 1146 1147 1148 1149

    if (offset == RAM_ADDR_MAX) {
        fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
                (uint64_t)size);
        abort();
    }

A
Alex Williamson 已提交
1150 1151 1152
    return offset;
}

J
Juan Quintela 已提交
1153
ram_addr_t last_ram_offset(void)
1154 1155 1156 1157
{
    RAMBlock *block;
    ram_addr_t last = 0;

P
Paolo Bonzini 已提交
1158
    QTAILQ_FOREACH(block, &ram_list.blocks, next)
1159 1160 1161 1162 1163
        last = MAX(last, block->offset + block->length);

    return last;
}

1164 1165 1166 1167 1168
static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
{
    int ret;

    /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1169 1170
    if (!qemu_opt_get_bool(qemu_get_machine_opts(),
                           "dump-guest-core", true)) {
1171 1172 1173 1174 1175 1176 1177 1178 1179
        ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
        if (ret) {
            perror("qemu_madvise");
            fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
                            "but dump_guest_core=off specified\n");
        }
    }
}

1180
void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1181 1182 1183
{
    RAMBlock *new_block, *block;

1184
    new_block = NULL;
P
Paolo Bonzini 已提交
1185
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1186 1187 1188 1189 1190 1191 1192
        if (block->offset == addr) {
            new_block = block;
            break;
        }
    }
    assert(new_block);
    assert(!new_block->idstr[0]);
1193

1194 1195
    if (dev) {
        char *id = qdev_get_dev_path(dev);
1196 1197
        if (id) {
            snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1198
            g_free(id);
1199 1200 1201 1202
        }
    }
    pstrcat(new_block->idstr, sizeof(new_block->idstr), name);

1203 1204
    /* This assumes the iothread lock is taken here too.  */
    qemu_mutex_lock_ramlist();
P
Paolo Bonzini 已提交
1205
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1206
        if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1207 1208 1209 1210 1211
            fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
                    new_block->idstr);
            abort();
        }
    }
1212
    qemu_mutex_unlock_ramlist();
1213 1214
}

1215 1216
static int memory_try_enable_merging(void *addr, size_t len)
{
1217
    if (!qemu_opt_get_bool(qemu_get_machine_opts(), "mem-merge", true)) {
1218 1219 1220 1221 1222 1223 1224
        /* disabled by the user */
        return 0;
    }

    return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
}

1225 1226 1227
ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
                                   MemoryRegion *mr)
{
1228
    RAMBlock *block, *new_block;
1229 1230 1231
    ram_addr_t old_ram_size, new_ram_size;

    old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1232 1233 1234

    size = TARGET_PAGE_ALIGN(size);
    new_block = g_malloc0(sizeof(*new_block));
1235
    new_block->fd = -1;
1236

1237 1238
    /* This assumes the iothread lock is taken here too.  */
    qemu_mutex_lock_ramlist();
A
Avi Kivity 已提交
1239
    new_block->mr = mr;
J
Jun Nakajima 已提交
1240
    new_block->offset = find_ram_offset(size);
1241 1242
    if (host) {
        new_block->host = host;
H
Huang Ying 已提交
1243
        new_block->flags |= RAM_PREALLOC_MASK;
1244 1245 1246 1247 1248 1249
    } else if (xen_enabled()) {
        if (mem_path) {
            fprintf(stderr, "-mem-path not supported with Xen\n");
            exit(1);
        }
        xen_ram_alloc(new_block->offset, size, mr);
1250 1251
    } else {
        if (mem_path) {
1252 1253 1254 1255 1256 1257 1258 1259 1260 1261
            if (phys_mem_alloc != qemu_anon_ram_alloc) {
                /*
                 * file_ram_alloc() needs to allocate just like
                 * phys_mem_alloc, but we haven't bothered to provide
                 * a hook there.
                 */
                fprintf(stderr,
                        "-mem-path not supported with this accelerator\n");
                exit(1);
            }
1262
            new_block->host = file_ram_alloc(new_block, size, mem_path);
1263 1264
        }
        if (!new_block->host) {
1265
            new_block->host = phys_mem_alloc(size);
1266 1267 1268 1269 1270
            if (!new_block->host) {
                fprintf(stderr, "Cannot set up guest memory '%s': %s\n",
                        new_block->mr->name, strerror(errno));
                exit(1);
            }
1271
            memory_try_enable_merging(new_block->host, size);
1272
        }
1273
    }
P
pbrook 已提交
1274 1275
    new_block->length = size;

1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286
    /* Keep the list sorted from biggest to smallest block.  */
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
        if (block->length < new_block->length) {
            break;
        }
    }
    if (block) {
        QTAILQ_INSERT_BEFORE(block, new_block, next);
    } else {
        QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
    }
1287
    ram_list.mru_block = NULL;
P
pbrook 已提交
1288

U
Umesh Deshpande 已提交
1289
    ram_list.version++;
1290
    qemu_mutex_unlock_ramlist();
U
Umesh Deshpande 已提交
1291

1292 1293 1294
    new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;

    if (new_ram_size > old_ram_size) {
1295 1296 1297 1298 1299 1300
        int i;
        for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
            ram_list.dirty_memory[i] =
                bitmap_zero_extend(ram_list.dirty_memory[i],
                                   old_ram_size, new_ram_size);
       }
1301
    }
1302
    cpu_physical_memory_set_dirty_range(new_block->offset, size);
P
pbrook 已提交
1303

1304
    qemu_ram_setup_dump(new_block->host, size);
1305
    qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
1306
    qemu_madvise(new_block->host, size, QEMU_MADV_DONTFORK);
1307

1308 1309 1310
    if (kvm_enabled())
        kvm_setup_guest_memory(new_block->host, size);

P
pbrook 已提交
1311 1312
    return new_block->offset;
}
B
bellard 已提交
1313

1314
ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1315
{
1316
    return qemu_ram_alloc_from_ptr(size, NULL, mr);
1317 1318
}

1319 1320 1321 1322
void qemu_ram_free_from_ptr(ram_addr_t addr)
{
    RAMBlock *block;

1323 1324
    /* This assumes the iothread lock is taken here too.  */
    qemu_mutex_lock_ramlist();
P
Paolo Bonzini 已提交
1325
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1326
        if (addr == block->offset) {
P
Paolo Bonzini 已提交
1327
            QTAILQ_REMOVE(&ram_list.blocks, block, next);
1328
            ram_list.mru_block = NULL;
U
Umesh Deshpande 已提交
1329
            ram_list.version++;
1330
            g_free(block);
1331
            break;
1332 1333
        }
    }
1334
    qemu_mutex_unlock_ramlist();
1335 1336
}

A
Anthony Liguori 已提交
1337
void qemu_ram_free(ram_addr_t addr)
B
bellard 已提交
1338
{
A
Alex Williamson 已提交
1339 1340
    RAMBlock *block;

1341 1342
    /* This assumes the iothread lock is taken here too.  */
    qemu_mutex_lock_ramlist();
P
Paolo Bonzini 已提交
1343
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
A
Alex Williamson 已提交
1344
        if (addr == block->offset) {
P
Paolo Bonzini 已提交
1345
            QTAILQ_REMOVE(&ram_list.blocks, block, next);
1346
            ram_list.mru_block = NULL;
U
Umesh Deshpande 已提交
1347
            ram_list.version++;
H
Huang Ying 已提交
1348 1349
            if (block->flags & RAM_PREALLOC_MASK) {
                ;
1350 1351
            } else if (xen_enabled()) {
                xen_invalidate_map_cache_entry(block->host);
1352
#ifndef _WIN32
1353 1354 1355
            } else if (block->fd >= 0) {
                munmap(block->host, block->length);
                close(block->fd);
1356
#endif
A
Alex Williamson 已提交
1357
            } else {
1358
                qemu_anon_ram_free(block->host, block->length);
A
Alex Williamson 已提交
1359
            }
1360
            g_free(block);
1361
            break;
A
Alex Williamson 已提交
1362 1363
        }
    }
1364
    qemu_mutex_unlock_ramlist();
A
Alex Williamson 已提交
1365

B
bellard 已提交
1366 1367
}

H
Huang Ying 已提交
1368 1369 1370 1371 1372 1373 1374 1375
#ifndef _WIN32
void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
{
    RAMBlock *block;
    ram_addr_t offset;
    int flags;
    void *area, *vaddr;

P
Paolo Bonzini 已提交
1376
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
H
Huang Ying 已提交
1377 1378 1379 1380 1381
        offset = addr - block->offset;
        if (offset < block->length) {
            vaddr = block->host + offset;
            if (block->flags & RAM_PREALLOC_MASK) {
                ;
1382 1383
            } else if (xen_enabled()) {
                abort();
H
Huang Ying 已提交
1384 1385 1386
            } else {
                flags = MAP_FIXED;
                munmap(vaddr, length);
1387
                if (block->fd >= 0) {
H
Huang Ying 已提交
1388
#ifdef MAP_POPULATE
1389 1390
                    flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
                        MAP_PRIVATE;
1391
#else
1392
                    flags |= MAP_PRIVATE;
H
Huang Ying 已提交
1393
#endif
1394 1395
                    area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
                                flags, block->fd, offset);
H
Huang Ying 已提交
1396
                } else {
1397 1398 1399 1400 1401 1402 1403
                    /*
                     * Remap needs to match alloc.  Accelerators that
                     * set phys_mem_alloc never remap.  If they did,
                     * we'd need a remap hook here.
                     */
                    assert(phys_mem_alloc == qemu_anon_ram_alloc);

H
Huang Ying 已提交
1404 1405 1406 1407 1408
                    flags |= MAP_PRIVATE | MAP_ANONYMOUS;
                    area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
                                flags, -1, 0);
                }
                if (area != vaddr) {
1409 1410
                    fprintf(stderr, "Could not remap addr: "
                            RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
H
Huang Ying 已提交
1411 1412 1413
                            length, addr);
                    exit(1);
                }
1414
                memory_try_enable_merging(vaddr, length);
1415
                qemu_ram_setup_dump(vaddr, length);
H
Huang Ying 已提交
1416 1417 1418 1419 1420 1421 1422
            }
            return;
        }
    }
}
#endif /* !_WIN32 */

1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434
/* Return a host pointer to ram allocated with qemu_ram_alloc.
   With the exception of the softmmu code in this file, this should
   only be used for local memory (e.g. video ram) that the device owns,
   and knows it isn't going to access beyond the end of the block.

   It should not be used for general purpose DMA.
   Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
 */
void *qemu_get_ram_ptr(ram_addr_t addr)
{
    RAMBlock *block = qemu_get_ram_block(addr);

1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447
    if (xen_enabled()) {
        /* We need to check if the requested address is in the RAM
         * because we don't want to map the entire memory in QEMU.
         * In that case just map until the end of the page.
         */
        if (block->offset == 0) {
            return xen_map_cache(addr, 0, 0);
        } else if (block->host == NULL) {
            block->host =
                xen_map_cache(block->offset, block->length, 1);
        }
    }
    return block->host + (addr - block->offset);
1448 1449
}

1450 1451
/* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
 * but takes a size argument */
1452
static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1453
{
1454 1455 1456
    if (*size == 0) {
        return NULL;
    }
1457
    if (xen_enabled()) {
J
Jan Kiszka 已提交
1458
        return xen_map_cache(addr, *size, 1);
1459
    } else {
1460 1461
        RAMBlock *block;

P
Paolo Bonzini 已提交
1462
        QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474
            if (addr - block->offset < block->length) {
                if (addr - block->offset + *size > block->length)
                    *size = block->length - addr + block->offset;
                return block->host + (addr - block->offset);
            }
        }

        fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
        abort();
    }
}

1475 1476
/* Some of the softmmu routines need to translate from a host pointer
   (typically a TLB entry) back to a ram offset.  */
1477
MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
P
pbrook 已提交
1478
{
P
pbrook 已提交
1479 1480 1481
    RAMBlock *block;
    uint8_t *host = ptr;

1482
    if (xen_enabled()) {
J
Jan Kiszka 已提交
1483
        *ram_addr = xen_ram_addr_from_mapcache(ptr);
1484
        return qemu_get_ram_block(*ram_addr)->mr;
1485 1486
    }

1487 1488 1489 1490 1491
    block = ram_list.mru_block;
    if (block && block->host && host - block->host < block->length) {
        goto found;
    }

P
Paolo Bonzini 已提交
1492
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
J
Jun Nakajima 已提交
1493 1494 1495 1496
        /* This case append when the block is not mapped. */
        if (block->host == NULL) {
            continue;
        }
A
Alex Williamson 已提交
1497
        if (host - block->host < block->length) {
1498
            goto found;
A
Alex Williamson 已提交
1499
        }
P
pbrook 已提交
1500
    }
J
Jun Nakajima 已提交
1501

1502
    return NULL;
1503 1504 1505

found:
    *ram_addr = block->offset + (host - block->host);
1506
    return block->mr;
M
Marcelo Tosatti 已提交
1507
}
A
Alex Williamson 已提交
1508

A
Avi Kivity 已提交
1509
static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1510
                               uint64_t val, unsigned size)
1511
{
1512
    if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1513
        tb_invalidate_phys_page_fast(ram_addr, size);
1514
    }
1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526
    switch (size) {
    case 1:
        stb_p(qemu_get_ram_ptr(ram_addr), val);
        break;
    case 2:
        stw_p(qemu_get_ram_ptr(ram_addr), val);
        break;
    case 4:
        stl_p(qemu_get_ram_ptr(ram_addr), val);
        break;
    default:
        abort();
1527
    }
1528 1529
    cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_MIGRATION);
    cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_VGA);
B
bellard 已提交
1530 1531
    /* we remove the notdirty callback only if the code has been
       flushed */
1532
    if (!cpu_physical_memory_is_clean(ram_addr)) {
1533 1534 1535
        CPUArchState *env = current_cpu->env_ptr;
        tlb_set_dirty(env, env->mem_io_vaddr);
    }
1536 1537
}

1538 1539 1540 1541 1542 1543
static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
                                 unsigned size, bool is_write)
{
    return is_write;
}

1544 1545
static const MemoryRegionOps notdirty_mem_ops = {
    .write = notdirty_mem_write,
1546
    .valid.accepts = notdirty_mem_accepts,
1547
    .endianness = DEVICE_NATIVE_ENDIAN,
1548 1549
};

P
pbrook 已提交
1550
/* Generate a debug exception if a watchpoint has been hit.  */
1551
static void check_watchpoint(int offset, int len_mask, int flags)
P
pbrook 已提交
1552
{
1553
    CPUArchState *env = current_cpu->env_ptr;
1554
    target_ulong pc, cs_base;
P
pbrook 已提交
1555
    target_ulong vaddr;
1556
    CPUWatchpoint *wp;
1557
    int cpu_flags;
P
pbrook 已提交
1558

1559 1560 1561 1562
    if (env->watchpoint_hit) {
        /* We re-entered the check after replacing the TB. Now raise
         * the debug interrupt so that is will trigger after the
         * current instruction. */
1563
        cpu_interrupt(ENV_GET_CPU(env), CPU_INTERRUPT_DEBUG);
1564 1565
        return;
    }
P
pbrook 已提交
1566
    vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
B
Blue Swirl 已提交
1567
    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1568 1569
        if ((vaddr == (wp->vaddr & len_mask) ||
             (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1570 1571 1572
            wp->flags |= BP_WATCHPOINT_HIT;
            if (!env->watchpoint_hit) {
                env->watchpoint_hit = wp;
B
Blue Swirl 已提交
1573
                tb_check_watchpoint(env);
1574 1575
                if (wp->flags & BP_STOP_BEFORE_ACCESS) {
                    env->exception_index = EXCP_DEBUG;
1576
                    cpu_loop_exit(env);
1577 1578 1579
                } else {
                    cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
                    tb_gen_code(env, pc, cs_base, cpu_flags, 1);
1580
                    cpu_resume_from_signal(env, NULL);
1581
                }
1582
            }
1583 1584
        } else {
            wp->flags &= ~BP_WATCHPOINT_HIT;
P
pbrook 已提交
1585 1586 1587 1588
        }
    }
}

1589 1590 1591
/* Watchpoint access routines.  Watchpoints are inserted using TLB tricks,
   so these check for a hit then pass through to the normal out-of-line
   phys routines.  */
A
Avi Kivity 已提交
1592
static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1593
                               unsigned size)
1594
{
1595 1596 1597 1598 1599 1600 1601
    check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
    switch (size) {
    case 1: return ldub_phys(addr);
    case 2: return lduw_phys(addr);
    case 4: return ldl_phys(addr);
    default: abort();
    }
1602 1603
}

A
Avi Kivity 已提交
1604
static void watch_mem_write(void *opaque, hwaddr addr,
1605
                            uint64_t val, unsigned size)
1606
{
1607 1608
    check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
    switch (size) {
1609 1610 1611 1612 1613 1614 1615 1616 1617
    case 1:
        stb_phys(addr, val);
        break;
    case 2:
        stw_phys(addr, val);
        break;
    case 4:
        stl_phys(addr, val);
        break;
1618 1619
    default: abort();
    }
1620 1621
}

1622 1623 1624 1625
static const MemoryRegionOps watch_mem_ops = {
    .read = watch_mem_read,
    .write = watch_mem_write,
    .endianness = DEVICE_NATIVE_ENDIAN,
1626 1627
};

A
Avi Kivity 已提交
1628
static uint64_t subpage_read(void *opaque, hwaddr addr,
1629
                             unsigned len)
1630
{
1631 1632
    subpage_t *subpage = opaque;
    uint8_t buf[4];
1633

1634
#if defined(DEBUG_SUBPAGE)
A
Amos Kong 已提交
1635
    printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
1636
           subpage, len, addr);
1637
#endif
1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648
    address_space_read(subpage->as, addr + subpage->base, buf, len);
    switch (len) {
    case 1:
        return ldub_p(buf);
    case 2:
        return lduw_p(buf);
    case 4:
        return ldl_p(buf);
    default:
        abort();
    }
1649 1650
}

A
Avi Kivity 已提交
1651
static void subpage_write(void *opaque, hwaddr addr,
1652
                          uint64_t value, unsigned len)
1653
{
1654 1655 1656
    subpage_t *subpage = opaque;
    uint8_t buf[4];

1657
#if defined(DEBUG_SUBPAGE)
A
Amos Kong 已提交
1658
    printf("%s: subpage %p len %u addr " TARGET_FMT_plx
1659 1660
           " value %"PRIx64"\n",
           __func__, subpage, len, addr, value);
1661
#endif
1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675
    switch (len) {
    case 1:
        stb_p(buf, value);
        break;
    case 2:
        stw_p(buf, value);
        break;
    case 4:
        stl_p(buf, value);
        break;
    default:
        abort();
    }
    address_space_write(subpage->as, addr + subpage->base, buf, len);
1676 1677
}

1678
static bool subpage_accepts(void *opaque, hwaddr addr,
A
Amos Kong 已提交
1679
                            unsigned len, bool is_write)
1680
{
1681
    subpage_t *subpage = opaque;
1682
#if defined(DEBUG_SUBPAGE)
A
Amos Kong 已提交
1683
    printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
1684
           __func__, subpage, is_write ? 'w' : 'r', len, addr);
1685 1686
#endif

1687
    return address_space_access_valid(subpage->as, addr + subpage->base,
A
Amos Kong 已提交
1688
                                      len, is_write);
1689 1690
}

1691 1692 1693
static const MemoryRegionOps subpage_ops = {
    .read = subpage_read,
    .write = subpage_write,
1694
    .valid.accepts = subpage_accepts,
1695
    .endianness = DEVICE_NATIVE_ENDIAN,
1696 1697
};

A
Anthony Liguori 已提交
1698
static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1699
                             uint16_t section)
1700 1701 1702 1703 1704 1705 1706 1707
{
    int idx, eidx;

    if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
        return -1;
    idx = SUBPAGE_IDX(start);
    eidx = SUBPAGE_IDX(end);
#if defined(DEBUG_SUBPAGE)
A
Amos Kong 已提交
1708 1709
    printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
           __func__, mmio, start, end, idx, eidx, section);
1710 1711
#endif
    for (; idx <= eidx; idx++) {
1712
        mmio->sub_section[idx] = section;
1713 1714 1715 1716 1717
    }

    return 0;
}

1718
static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
1719
{
A
Anthony Liguori 已提交
1720
    subpage_t *mmio;
1721

1722
    mmio = g_malloc0(sizeof(subpage_t));
1723

1724
    mmio->as = as;
1725
    mmio->base = base;
1726
    memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
1727
                          "subpage", TARGET_PAGE_SIZE);
A
Avi Kivity 已提交
1728
    mmio->iomem.subpage = true;
1729
#if defined(DEBUG_SUBPAGE)
A
Amos Kong 已提交
1730 1731
    printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
           mmio, base, TARGET_PAGE_SIZE);
1732
#endif
1733
    subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
1734 1735 1736 1737

    return mmio;
}

1738
static uint16_t dummy_section(PhysPageMap *map, MemoryRegion *mr)
1739 1740
{
    MemoryRegionSection section = {
1741
        .address_space = &address_space_memory,
1742 1743 1744
        .mr = mr,
        .offset_within_address_space = 0,
        .offset_within_region = 0,
1745
        .size = int128_2_64(),
1746 1747
    };

1748
    return phys_section_add(map, &section);
1749 1750
}

1751
MemoryRegion *iotlb_to_region(AddressSpace *as, hwaddr index)
1752
{
1753
    return as->dispatch->map.sections[index & ~TARGET_PAGE_MASK].mr;
1754 1755
}

A
Avi Kivity 已提交
1756 1757
static void io_mem_init(void)
{
1758 1759
    memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, "rom", UINT64_MAX);
    memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
1760
                          "unassigned", UINT64_MAX);
1761
    memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
1762
                          "notdirty", UINT64_MAX);
1763
    memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
1764
                          "watch", UINT64_MAX);
A
Avi Kivity 已提交
1765 1766
}

A
Avi Kivity 已提交
1767
static void mem_begin(MemoryListener *listener)
1768 1769
{
    AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780
    AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
    uint16_t n;

    n = dummy_section(&d->map, &io_mem_unassigned);
    assert(n == PHYS_SECTION_UNASSIGNED);
    n = dummy_section(&d->map, &io_mem_notdirty);
    assert(n == PHYS_SECTION_NOTDIRTY);
    n = dummy_section(&d->map, &io_mem_rom);
    assert(n == PHYS_SECTION_ROM);
    n = dummy_section(&d->map, &io_mem_watch);
    assert(n == PHYS_SECTION_WATCH);
1781

M
Michael S. Tsirkin 已提交
1782
    d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
1783 1784 1785 1786 1787
    d->as = as;
    as->next_dispatch = d;
}

static void mem_commit(MemoryListener *listener)
A
Avi Kivity 已提交
1788
{
1789
    AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1790 1791 1792
    AddressSpaceDispatch *cur = as->dispatch;
    AddressSpaceDispatch *next = as->next_dispatch;

1793
    phys_page_compact_all(next, next->map.nodes_nb);
1794

1795
    as->dispatch = next;
1796

1797 1798 1799 1800
    if (cur) {
        phys_sections_free(&cur->map);
        g_free(cur);
    }
1801 1802
}

1803
static void tcg_commit(MemoryListener *listener)
1804
{
1805
    CPUState *cpu;
1806 1807 1808 1809

    /* since each CPU stores ram addresses in its TLB cache, we must
       reset the modified entries */
    /* XXX: slow ! */
A
Andreas Färber 已提交
1810
    CPU_FOREACH(cpu) {
1811 1812
        CPUArchState *env = cpu->env_ptr;

1813 1814
        tlb_flush(env, 1);
    }
1815 1816
}

1817 1818
static void core_log_global_start(MemoryListener *listener)
{
1819
    cpu_physical_memory_set_dirty_tracking(true);
1820 1821 1822 1823
}

static void core_log_global_stop(MemoryListener *listener)
{
1824
    cpu_physical_memory_set_dirty_tracking(false);
1825 1826 1827 1828 1829
}

static MemoryListener core_memory_listener = {
    .log_global_start = core_log_global_start,
    .log_global_stop = core_log_global_stop,
A
Avi Kivity 已提交
1830
    .priority = 1,
1831 1832
};

1833 1834 1835 1836
static MemoryListener tcg_memory_listener = {
    .commit = tcg_commit,
};

A
Avi Kivity 已提交
1837 1838
void address_space_init_dispatch(AddressSpace *as)
{
1839
    as->dispatch = NULL;
1840
    as->dispatch_listener = (MemoryListener) {
A
Avi Kivity 已提交
1841
        .begin = mem_begin,
1842
        .commit = mem_commit,
A
Avi Kivity 已提交
1843 1844 1845 1846
        .region_add = mem_add,
        .region_nop = mem_add,
        .priority = 0,
    };
1847
    memory_listener_register(&as->dispatch_listener, as);
A
Avi Kivity 已提交
1848 1849
}

A
Avi Kivity 已提交
1850 1851 1852 1853
void address_space_destroy_dispatch(AddressSpace *as)
{
    AddressSpaceDispatch *d = as->dispatch;

1854
    memory_listener_unregister(&as->dispatch_listener);
A
Avi Kivity 已提交
1855 1856 1857 1858
    g_free(d);
    as->dispatch = NULL;
}

A
Avi Kivity 已提交
1859 1860
static void memory_map_init(void)
{
1861
    system_memory = g_malloc(sizeof(*system_memory));
1862

1863
    memory_region_init(system_memory, NULL, "system", UINT64_MAX);
1864
    address_space_init(&address_space_memory, system_memory, "memory");
1865

1866
    system_io = g_malloc(sizeof(*system_io));
1867 1868
    memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
                          65536);
1869
    address_space_init(&address_space_io, system_io, "I/O");
1870

1871
    memory_listener_register(&core_memory_listener, &address_space_memory);
1872 1873 1874
    if (tcg_enabled()) {
        memory_listener_register(&tcg_memory_listener, &address_space_memory);
    }
A
Avi Kivity 已提交
1875 1876 1877 1878 1879 1880 1881
}

MemoryRegion *get_system_memory(void)
{
    return system_memory;
}

1882 1883 1884 1885 1886
MemoryRegion *get_system_io(void)
{
    return system_io;
}

1887 1888
#endif /* !defined(CONFIG_USER_ONLY) */

B
bellard 已提交
1889 1890
/* physical memory access (slow version, mainly for debug) */
#if defined(CONFIG_USER_ONLY)
1891
int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
P
Paul Brook 已提交
1892
                        uint8_t *buf, int len, int is_write)
B
bellard 已提交
1893 1894 1895
{
    int l, flags;
    target_ulong page;
1896
    void * p;
B
bellard 已提交
1897 1898 1899 1900 1901 1902 1903 1904

    while (len > 0) {
        page = addr & TARGET_PAGE_MASK;
        l = (page + TARGET_PAGE_SIZE) - addr;
        if (l > len)
            l = len;
        flags = page_get_flags(page);
        if (!(flags & PAGE_VALID))
P
Paul Brook 已提交
1905
            return -1;
B
bellard 已提交
1906 1907
        if (is_write) {
            if (!(flags & PAGE_WRITE))
P
Paul Brook 已提交
1908
                return -1;
1909
            /* XXX: this code should not depend on lock_user */
A
aurel32 已提交
1910
            if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
P
Paul Brook 已提交
1911
                return -1;
A
aurel32 已提交
1912 1913
            memcpy(p, buf, l);
            unlock_user(p, addr, l);
B
bellard 已提交
1914 1915
        } else {
            if (!(flags & PAGE_READ))
P
Paul Brook 已提交
1916
                return -1;
1917
            /* XXX: this code should not depend on lock_user */
A
aurel32 已提交
1918
            if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
P
Paul Brook 已提交
1919
                return -1;
A
aurel32 已提交
1920
            memcpy(buf, p, l);
A
aurel32 已提交
1921
            unlock_user(p, addr, 0);
B
bellard 已提交
1922 1923 1924 1925 1926
        }
        len -= l;
        buf += l;
        addr += l;
    }
P
Paul Brook 已提交
1927
    return 0;
B
bellard 已提交
1928
}
B
bellard 已提交
1929

B
bellard 已提交
1930
#else
1931

A
Avi Kivity 已提交
1932 1933
static void invalidate_and_set_dirty(hwaddr addr,
                                     hwaddr length)
1934
{
1935
    if (cpu_physical_memory_is_clean(addr)) {
1936 1937 1938
        /* invalidate code */
        tb_invalidate_phys_page_range(addr, addr + length, 0);
        /* set dirty bit */
1939 1940
        cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_VGA);
        cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_MIGRATION);
1941
    }
1942
    xen_modified_memory(addr, length);
1943 1944
}

1945
static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
1946
{
1947
    unsigned access_size_max = mr->ops->valid.max_access_size;
1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960

    /* Regions are assumed to support 1-4 byte accesses unless
       otherwise specified.  */
    if (access_size_max == 0) {
        access_size_max = 4;
    }

    /* Bound the maximum access by the alignment of the address.  */
    if (!mr->ops->impl.unaligned) {
        unsigned align_size_max = addr & -addr;
        if (align_size_max != 0 && align_size_max < access_size_max) {
            access_size_max = align_size_max;
        }
1961
    }
1962 1963 1964 1965

    /* Don't attempt accesses larger than the maximum.  */
    if (l > access_size_max) {
        l = access_size_max;
1966
    }
1967 1968 1969
    if (l & (l - 1)) {
        l = 1 << (qemu_fls(l) - 1);
    }
1970 1971

    return l;
1972 1973
}

1974
bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
A
Avi Kivity 已提交
1975
                      int len, bool is_write)
B
bellard 已提交
1976
{
1977
    hwaddr l;
B
bellard 已提交
1978
    uint8_t *ptr;
1979
    uint64_t val;
1980
    hwaddr addr1;
1981
    MemoryRegion *mr;
1982
    bool error = false;
1983

B
bellard 已提交
1984
    while (len > 0) {
1985
        l = len;
1986
        mr = address_space_translate(as, addr, &addr1, &l, is_write);
1987

B
bellard 已提交
1988
        if (is_write) {
1989 1990
            if (!memory_access_is_direct(mr, is_write)) {
                l = memory_access_size(mr, l, addr1);
1991
                /* XXX: could force current_cpu to NULL to avoid
B
bellard 已提交
1992
                   potential bugs */
1993 1994 1995 1996 1997 1998 1999
                switch (l) {
                case 8:
                    /* 64 bit write access */
                    val = ldq_p(buf);
                    error |= io_mem_write(mr, addr1, val, 8);
                    break;
                case 4:
B
bellard 已提交
2000
                    /* 32 bit write access */
B
bellard 已提交
2001
                    val = ldl_p(buf);
2002
                    error |= io_mem_write(mr, addr1, val, 4);
2003 2004
                    break;
                case 2:
B
bellard 已提交
2005
                    /* 16 bit write access */
B
bellard 已提交
2006
                    val = lduw_p(buf);
2007
                    error |= io_mem_write(mr, addr1, val, 2);
2008 2009
                    break;
                case 1:
B
bellard 已提交
2010
                    /* 8 bit write access */
B
bellard 已提交
2011
                    val = ldub_p(buf);
2012
                    error |= io_mem_write(mr, addr1, val, 1);
2013 2014 2015
                    break;
                default:
                    abort();
B
bellard 已提交
2016
                }
2017
            } else {
2018
                addr1 += memory_region_get_ram_addr(mr);
B
bellard 已提交
2019
                /* RAM case */
P
pbrook 已提交
2020
                ptr = qemu_get_ram_ptr(addr1);
B
bellard 已提交
2021
                memcpy(ptr, buf, l);
2022
                invalidate_and_set_dirty(addr1, l);
B
bellard 已提交
2023 2024
            }
        } else {
2025
            if (!memory_access_is_direct(mr, is_write)) {
B
bellard 已提交
2026
                /* I/O case */
2027
                l = memory_access_size(mr, l, addr1);
2028 2029 2030 2031 2032 2033 2034
                switch (l) {
                case 8:
                    /* 64 bit read access */
                    error |= io_mem_read(mr, addr1, &val, 8);
                    stq_p(buf, val);
                    break;
                case 4:
B
bellard 已提交
2035
                    /* 32 bit read access */
2036
                    error |= io_mem_read(mr, addr1, &val, 4);
B
bellard 已提交
2037
                    stl_p(buf, val);
2038 2039
                    break;
                case 2:
B
bellard 已提交
2040
                    /* 16 bit read access */
2041
                    error |= io_mem_read(mr, addr1, &val, 2);
B
bellard 已提交
2042
                    stw_p(buf, val);
2043 2044
                    break;
                case 1:
B
bellard 已提交
2045
                    /* 8 bit read access */
2046
                    error |= io_mem_read(mr, addr1, &val, 1);
B
bellard 已提交
2047
                    stb_p(buf, val);
2048 2049 2050
                    break;
                default:
                    abort();
B
bellard 已提交
2051 2052 2053
                }
            } else {
                /* RAM case */
2054
                ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2055
                memcpy(buf, ptr, l);
B
bellard 已提交
2056 2057 2058 2059 2060 2061
            }
        }
        len -= l;
        buf += l;
        addr += l;
    }
2062 2063

    return error;
B
bellard 已提交
2064
}
B
bellard 已提交
2065

2066
bool address_space_write(AddressSpace *as, hwaddr addr,
A
Avi Kivity 已提交
2067 2068
                         const uint8_t *buf, int len)
{
2069
    return address_space_rw(as, addr, (uint8_t *)buf, len, true);
A
Avi Kivity 已提交
2070 2071
}

2072
bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
A
Avi Kivity 已提交
2073
{
2074
    return address_space_rw(as, addr, buf, len, false);
A
Avi Kivity 已提交
2075 2076 2077
}


A
Avi Kivity 已提交
2078
void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
A
Avi Kivity 已提交
2079 2080
                            int len, int is_write)
{
2081
    address_space_rw(&address_space_memory, addr, buf, len, is_write);
A
Avi Kivity 已提交
2082 2083
}

2084 2085 2086 2087 2088 2089 2090
enum write_rom_type {
    WRITE_DATA,
    FLUSH_CACHE,
};

static inline void cpu_physical_memory_write_rom_internal(
    hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
B
bellard 已提交
2091
{
2092
    hwaddr l;
B
bellard 已提交
2093
    uint8_t *ptr;
2094
    hwaddr addr1;
2095
    MemoryRegion *mr;
2096

B
bellard 已提交
2097
    while (len > 0) {
2098
        l = len;
2099 2100
        mr = address_space_translate(&address_space_memory,
                                     addr, &addr1, &l, true);
2101

2102 2103
        if (!(memory_region_is_ram(mr) ||
              memory_region_is_romd(mr))) {
B
bellard 已提交
2104 2105
            /* do nothing */
        } else {
2106
            addr1 += memory_region_get_ram_addr(mr);
B
bellard 已提交
2107
            /* ROM/RAM case */
P
pbrook 已提交
2108
            ptr = qemu_get_ram_ptr(addr1);
2109 2110 2111 2112 2113 2114 2115 2116 2117
            switch (type) {
            case WRITE_DATA:
                memcpy(ptr, buf, l);
                invalidate_and_set_dirty(addr1, l);
                break;
            case FLUSH_CACHE:
                flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
                break;
            }
B
bellard 已提交
2118 2119 2120 2121 2122 2123 2124
        }
        len -= l;
        buf += l;
        addr += l;
    }
}

2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146
/* used for ROM loading : can write in RAM and ROM */
void cpu_physical_memory_write_rom(hwaddr addr,
                                   const uint8_t *buf, int len)
{
    cpu_physical_memory_write_rom_internal(addr, buf, len, WRITE_DATA);
}

void cpu_flush_icache_range(hwaddr start, int len)
{
    /*
     * This function should do the same thing as an icache flush that was
     * triggered from within the guest. For TCG we are always cache coherent,
     * so there is no need to flush anything. For KVM / Xen we need to flush
     * the host's instruction cache at least.
     */
    if (tcg_enabled()) {
        return;
    }

    cpu_physical_memory_write_rom_internal(start, NULL, len, FLUSH_CACHE);
}

2147
typedef struct {
2148
    MemoryRegion *mr;
2149
    void *buffer;
A
Avi Kivity 已提交
2150 2151
    hwaddr addr;
    hwaddr len;
2152 2153 2154 2155
} BounceBuffer;

static BounceBuffer bounce;

2156 2157 2158
typedef struct MapClient {
    void *opaque;
    void (*callback)(void *opaque);
B
Blue Swirl 已提交
2159
    QLIST_ENTRY(MapClient) link;
2160 2161
} MapClient;

B
Blue Swirl 已提交
2162 2163
static QLIST_HEAD(map_client_list, MapClient) map_client_list
    = QLIST_HEAD_INITIALIZER(map_client_list);
2164 2165 2166

void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
{
2167
    MapClient *client = g_malloc(sizeof(*client));
2168 2169 2170

    client->opaque = opaque;
    client->callback = callback;
B
Blue Swirl 已提交
2171
    QLIST_INSERT_HEAD(&map_client_list, client, link);
2172 2173 2174
    return client;
}

B
Blue Swirl 已提交
2175
static void cpu_unregister_map_client(void *_client)
2176 2177 2178
{
    MapClient *client = (MapClient *)_client;

B
Blue Swirl 已提交
2179
    QLIST_REMOVE(client, link);
2180
    g_free(client);
2181 2182 2183 2184 2185 2186
}

static void cpu_notify_map_clients(void)
{
    MapClient *client;

B
Blue Swirl 已提交
2187 2188
    while (!QLIST_EMPTY(&map_client_list)) {
        client = QLIST_FIRST(&map_client_list);
2189
        client->callback(client->opaque);
2190
        cpu_unregister_map_client(client);
2191 2192 2193
    }
}

2194 2195
bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
{
2196
    MemoryRegion *mr;
2197 2198 2199 2200
    hwaddr l, xlat;

    while (len > 0) {
        l = len;
2201 2202 2203 2204
        mr = address_space_translate(as, addr, &xlat, &l, is_write);
        if (!memory_access_is_direct(mr, is_write)) {
            l = memory_access_size(mr, l, addr);
            if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2205 2206 2207 2208 2209 2210 2211 2212 2213 2214
                return false;
            }
        }

        len -= l;
        addr += l;
    }
    return true;
}

2215 2216 2217 2218
/* Map a physical memory region into a host virtual address.
 * May map a subset of the requested range, given by and returned in *plen.
 * May return NULL if resources needed to perform the mapping are exhausted.
 * Use only for reads OR writes - not for read-modify-write operations.
2219 2220
 * Use cpu_register_map_client() to know when retrying the map operation is
 * likely to succeed.
2221
 */
A
Avi Kivity 已提交
2222
void *address_space_map(AddressSpace *as,
A
Avi Kivity 已提交
2223 2224
                        hwaddr addr,
                        hwaddr *plen,
A
Avi Kivity 已提交
2225
                        bool is_write)
2226
{
A
Avi Kivity 已提交
2227
    hwaddr len = *plen;
2228 2229 2230 2231
    hwaddr done = 0;
    hwaddr l, xlat, base;
    MemoryRegion *mr, *this_mr;
    ram_addr_t raddr;
2232

2233 2234 2235
    if (len == 0) {
        return NULL;
    }
2236

2237 2238 2239 2240 2241
    l = len;
    mr = address_space_translate(as, addr, &xlat, &l, is_write);
    if (!memory_access_is_direct(mr, is_write)) {
        if (bounce.buffer) {
            return NULL;
2242
        }
2243 2244 2245
        /* Avoid unbounded allocations */
        l = MIN(l, TARGET_PAGE_SIZE);
        bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2246 2247
        bounce.addr = addr;
        bounce.len = l;
2248 2249 2250

        memory_region_ref(mr);
        bounce.mr = mr;
2251 2252
        if (!is_write) {
            address_space_read(as, addr, bounce.buffer, l);
2253
        }
2254

2255 2256 2257 2258 2259 2260 2261 2262
        *plen = l;
        return bounce.buffer;
    }

    base = xlat;
    raddr = memory_region_get_ram_addr(mr);

    for (;;) {
2263 2264
        len -= l;
        addr += l;
2265 2266 2267 2268 2269 2270 2271 2272 2273 2274
        done += l;
        if (len == 0) {
            break;
        }

        l = len;
        this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
        if (this_mr != mr || xlat != base + done) {
            break;
        }
2275
    }
2276

2277
    memory_region_ref(mr);
2278 2279
    *plen = done;
    return qemu_ram_ptr_length(raddr + base, plen);
2280 2281
}

A
Avi Kivity 已提交
2282
/* Unmaps a memory region previously mapped by address_space_map().
2283 2284 2285
 * Will also mark the memory as dirty if is_write == 1.  access_len gives
 * the amount of memory that was actually read or written by the caller.
 */
A
Avi Kivity 已提交
2286 2287
void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
                         int is_write, hwaddr access_len)
2288 2289
{
    if (buffer != bounce.buffer) {
2290 2291 2292 2293 2294
        MemoryRegion *mr;
        ram_addr_t addr1;

        mr = qemu_ram_addr_from_host(buffer, &addr1);
        assert(mr != NULL);
2295 2296 2297 2298 2299 2300
        if (is_write) {
            while (access_len) {
                unsigned l;
                l = TARGET_PAGE_SIZE;
                if (l > access_len)
                    l = access_len;
2301
                invalidate_and_set_dirty(addr1, l);
2302 2303 2304 2305
                addr1 += l;
                access_len -= l;
            }
        }
2306
        if (xen_enabled()) {
J
Jan Kiszka 已提交
2307
            xen_invalidate_map_cache_entry(buffer);
A
Anthony PERARD 已提交
2308
        }
2309
        memory_region_unref(mr);
2310 2311 2312
        return;
    }
    if (is_write) {
A
Avi Kivity 已提交
2313
        address_space_write(as, bounce.addr, bounce.buffer, access_len);
2314
    }
2315
    qemu_vfree(bounce.buffer);
2316
    bounce.buffer = NULL;
2317
    memory_region_unref(bounce.mr);
2318
    cpu_notify_map_clients();
2319
}
B
bellard 已提交
2320

A
Avi Kivity 已提交
2321 2322
void *cpu_physical_memory_map(hwaddr addr,
                              hwaddr *plen,
A
Avi Kivity 已提交
2323 2324 2325 2326 2327
                              int is_write)
{
    return address_space_map(&address_space_memory, addr, plen, is_write);
}

A
Avi Kivity 已提交
2328 2329
void cpu_physical_memory_unmap(void *buffer, hwaddr len,
                               int is_write, hwaddr access_len)
A
Avi Kivity 已提交
2330 2331 2332 2333
{
    return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
}

B
bellard 已提交
2334
/* warning: addr must be aligned */
A
Avi Kivity 已提交
2335
static inline uint32_t ldl_phys_internal(hwaddr addr,
2336
                                         enum device_endian endian)
B
bellard 已提交
2337 2338
{
    uint8_t *ptr;
2339
    uint64_t val;
2340
    MemoryRegion *mr;
2341 2342
    hwaddr l = 4;
    hwaddr addr1;
B
bellard 已提交
2343

2344 2345 2346
    mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
                                 false);
    if (l < 4 || !memory_access_is_direct(mr, false)) {
B
bellard 已提交
2347
        /* I/O case */
2348
        io_mem_read(mr, addr1, &val, 4);
2349 2350 2351 2352 2353 2354 2355 2356 2357
#if defined(TARGET_WORDS_BIGENDIAN)
        if (endian == DEVICE_LITTLE_ENDIAN) {
            val = bswap32(val);
        }
#else
        if (endian == DEVICE_BIG_ENDIAN) {
            val = bswap32(val);
        }
#endif
B
bellard 已提交
2358 2359
    } else {
        /* RAM case */
2360
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2361
                                & TARGET_PAGE_MASK)
2362
                               + addr1);
2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373
        switch (endian) {
        case DEVICE_LITTLE_ENDIAN:
            val = ldl_le_p(ptr);
            break;
        case DEVICE_BIG_ENDIAN:
            val = ldl_be_p(ptr);
            break;
        default:
            val = ldl_p(ptr);
            break;
        }
B
bellard 已提交
2374 2375 2376 2377
    }
    return val;
}

A
Avi Kivity 已提交
2378
uint32_t ldl_phys(hwaddr addr)
2379 2380 2381 2382
{
    return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
}

A
Avi Kivity 已提交
2383
uint32_t ldl_le_phys(hwaddr addr)
2384 2385 2386 2387
{
    return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
}

A
Avi Kivity 已提交
2388
uint32_t ldl_be_phys(hwaddr addr)
2389 2390 2391 2392
{
    return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
}

B
bellard 已提交
2393
/* warning: addr must be aligned */
A
Avi Kivity 已提交
2394
static inline uint64_t ldq_phys_internal(hwaddr addr,
2395
                                         enum device_endian endian)
B
bellard 已提交
2396 2397 2398
{
    uint8_t *ptr;
    uint64_t val;
2399
    MemoryRegion *mr;
2400 2401
    hwaddr l = 8;
    hwaddr addr1;
B
bellard 已提交
2402

2403 2404 2405
    mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
                                 false);
    if (l < 8 || !memory_access_is_direct(mr, false)) {
B
bellard 已提交
2406
        /* I/O case */
2407
        io_mem_read(mr, addr1, &val, 8);
2408 2409 2410 2411 2412 2413 2414 2415
#if defined(TARGET_WORDS_BIGENDIAN)
        if (endian == DEVICE_LITTLE_ENDIAN) {
            val = bswap64(val);
        }
#else
        if (endian == DEVICE_BIG_ENDIAN) {
            val = bswap64(val);
        }
B
bellard 已提交
2416 2417 2418
#endif
    } else {
        /* RAM case */
2419
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2420
                                & TARGET_PAGE_MASK)
2421
                               + addr1);
2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432
        switch (endian) {
        case DEVICE_LITTLE_ENDIAN:
            val = ldq_le_p(ptr);
            break;
        case DEVICE_BIG_ENDIAN:
            val = ldq_be_p(ptr);
            break;
        default:
            val = ldq_p(ptr);
            break;
        }
B
bellard 已提交
2433 2434 2435 2436
    }
    return val;
}

A
Avi Kivity 已提交
2437
uint64_t ldq_phys(hwaddr addr)
2438 2439 2440 2441
{
    return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
}

A
Avi Kivity 已提交
2442
uint64_t ldq_le_phys(hwaddr addr)
2443 2444 2445 2446
{
    return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
}

A
Avi Kivity 已提交
2447
uint64_t ldq_be_phys(hwaddr addr)
2448 2449 2450 2451
{
    return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
}

B
bellard 已提交
2452
/* XXX: optimize */
A
Avi Kivity 已提交
2453
uint32_t ldub_phys(hwaddr addr)
B
bellard 已提交
2454 2455 2456 2457 2458 2459
{
    uint8_t val;
    cpu_physical_memory_read(addr, &val, 1);
    return val;
}

2460
/* warning: addr must be aligned */
A
Avi Kivity 已提交
2461
static inline uint32_t lduw_phys_internal(hwaddr addr,
2462
                                          enum device_endian endian)
B
bellard 已提交
2463
{
2464 2465
    uint8_t *ptr;
    uint64_t val;
2466
    MemoryRegion *mr;
2467 2468
    hwaddr l = 2;
    hwaddr addr1;
2469

2470 2471 2472
    mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
                                 false);
    if (l < 2 || !memory_access_is_direct(mr, false)) {
2473
        /* I/O case */
2474
        io_mem_read(mr, addr1, &val, 2);
2475 2476 2477 2478 2479 2480 2481 2482 2483
#if defined(TARGET_WORDS_BIGENDIAN)
        if (endian == DEVICE_LITTLE_ENDIAN) {
            val = bswap16(val);
        }
#else
        if (endian == DEVICE_BIG_ENDIAN) {
            val = bswap16(val);
        }
#endif
2484 2485
    } else {
        /* RAM case */
2486
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2487
                                & TARGET_PAGE_MASK)
2488
                               + addr1);
2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499
        switch (endian) {
        case DEVICE_LITTLE_ENDIAN:
            val = lduw_le_p(ptr);
            break;
        case DEVICE_BIG_ENDIAN:
            val = lduw_be_p(ptr);
            break;
        default:
            val = lduw_p(ptr);
            break;
        }
2500 2501
    }
    return val;
B
bellard 已提交
2502 2503
}

A
Avi Kivity 已提交
2504
uint32_t lduw_phys(hwaddr addr)
2505 2506 2507 2508
{
    return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
}

A
Avi Kivity 已提交
2509
uint32_t lduw_le_phys(hwaddr addr)
2510 2511 2512 2513
{
    return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
}

A
Avi Kivity 已提交
2514
uint32_t lduw_be_phys(hwaddr addr)
2515 2516 2517 2518
{
    return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
}

B
bellard 已提交
2519 2520 2521
/* warning: addr must be aligned. The ram page is not masked as dirty
   and the code inside is not invalidated. It is useful if the dirty
   bits are used to track modified PTEs */
A
Avi Kivity 已提交
2522
void stl_phys_notdirty(hwaddr addr, uint32_t val)
B
bellard 已提交
2523 2524
{
    uint8_t *ptr;
2525
    MemoryRegion *mr;
2526 2527
    hwaddr l = 4;
    hwaddr addr1;
B
bellard 已提交
2528

2529 2530 2531 2532
    mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
                                 true);
    if (l < 4 || !memory_access_is_direct(mr, true)) {
        io_mem_write(mr, addr1, val, 4);
B
bellard 已提交
2533
    } else {
2534
        addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
P
pbrook 已提交
2535
        ptr = qemu_get_ram_ptr(addr1);
B
bellard 已提交
2536
        stl_p(ptr, val);
A
aliguori 已提交
2537 2538

        if (unlikely(in_migration)) {
2539
            if (cpu_physical_memory_is_clean(addr1)) {
A
aliguori 已提交
2540 2541 2542
                /* invalidate code */
                tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
                /* set dirty bit */
2543 2544 2545
                cpu_physical_memory_set_dirty_flag(addr1,
                                                   DIRTY_MEMORY_MIGRATION);
                cpu_physical_memory_set_dirty_flag(addr1, DIRTY_MEMORY_VGA);
A
aliguori 已提交
2546 2547
            }
        }
B
bellard 已提交
2548 2549 2550 2551
    }
}

/* warning: addr must be aligned */
A
Avi Kivity 已提交
2552
static inline void stl_phys_internal(hwaddr addr, uint32_t val,
2553
                                     enum device_endian endian)
B
bellard 已提交
2554 2555
{
    uint8_t *ptr;
2556
    MemoryRegion *mr;
2557 2558
    hwaddr l = 4;
    hwaddr addr1;
B
bellard 已提交
2559

2560 2561 2562
    mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
                                 true);
    if (l < 4 || !memory_access_is_direct(mr, true)) {
2563 2564 2565 2566 2567 2568 2569 2570 2571
#if defined(TARGET_WORDS_BIGENDIAN)
        if (endian == DEVICE_LITTLE_ENDIAN) {
            val = bswap32(val);
        }
#else
        if (endian == DEVICE_BIG_ENDIAN) {
            val = bswap32(val);
        }
#endif
2572
        io_mem_write(mr, addr1, val, 4);
B
bellard 已提交
2573 2574
    } else {
        /* RAM case */
2575
        addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
P
pbrook 已提交
2576
        ptr = qemu_get_ram_ptr(addr1);
2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587
        switch (endian) {
        case DEVICE_LITTLE_ENDIAN:
            stl_le_p(ptr, val);
            break;
        case DEVICE_BIG_ENDIAN:
            stl_be_p(ptr, val);
            break;
        default:
            stl_p(ptr, val);
            break;
        }
2588
        invalidate_and_set_dirty(addr1, 4);
B
bellard 已提交
2589 2590 2591
    }
}

A
Avi Kivity 已提交
2592
void stl_phys(hwaddr addr, uint32_t val)
2593 2594 2595 2596
{
    stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
}

A
Avi Kivity 已提交
2597
void stl_le_phys(hwaddr addr, uint32_t val)
2598 2599 2600 2601
{
    stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
}

A
Avi Kivity 已提交
2602
void stl_be_phys(hwaddr addr, uint32_t val)
2603 2604 2605 2606
{
    stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
}

B
bellard 已提交
2607
/* XXX: optimize */
A
Avi Kivity 已提交
2608
void stb_phys(hwaddr addr, uint32_t val)
B
bellard 已提交
2609 2610 2611 2612 2613
{
    uint8_t v = val;
    cpu_physical_memory_write(addr, &v, 1);
}

2614
/* warning: addr must be aligned */
A
Avi Kivity 已提交
2615
static inline void stw_phys_internal(hwaddr addr, uint32_t val,
2616
                                     enum device_endian endian)
B
bellard 已提交
2617
{
2618
    uint8_t *ptr;
2619
    MemoryRegion *mr;
2620 2621
    hwaddr l = 2;
    hwaddr addr1;
2622

2623 2624 2625
    mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
                                 true);
    if (l < 2 || !memory_access_is_direct(mr, true)) {
2626 2627 2628 2629 2630 2631 2632 2633 2634
#if defined(TARGET_WORDS_BIGENDIAN)
        if (endian == DEVICE_LITTLE_ENDIAN) {
            val = bswap16(val);
        }
#else
        if (endian == DEVICE_BIG_ENDIAN) {
            val = bswap16(val);
        }
#endif
2635
        io_mem_write(mr, addr1, val, 2);
2636 2637
    } else {
        /* RAM case */
2638
        addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2639
        ptr = qemu_get_ram_ptr(addr1);
2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650
        switch (endian) {
        case DEVICE_LITTLE_ENDIAN:
            stw_le_p(ptr, val);
            break;
        case DEVICE_BIG_ENDIAN:
            stw_be_p(ptr, val);
            break;
        default:
            stw_p(ptr, val);
            break;
        }
2651
        invalidate_and_set_dirty(addr1, 2);
2652
    }
B
bellard 已提交
2653 2654
}

A
Avi Kivity 已提交
2655
void stw_phys(hwaddr addr, uint32_t val)
2656 2657 2658 2659
{
    stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
}

A
Avi Kivity 已提交
2660
void stw_le_phys(hwaddr addr, uint32_t val)
2661 2662 2663 2664
{
    stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
}

A
Avi Kivity 已提交
2665
void stw_be_phys(hwaddr addr, uint32_t val)
2666 2667 2668 2669
{
    stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
}

B
bellard 已提交
2670
/* XXX: optimize */
A
Avi Kivity 已提交
2671
void stq_phys(hwaddr addr, uint64_t val)
B
bellard 已提交
2672 2673
{
    val = tswap64(val);
2674
    cpu_physical_memory_write(addr, &val, 8);
B
bellard 已提交
2675 2676
}

A
Avi Kivity 已提交
2677
void stq_le_phys(hwaddr addr, uint64_t val)
2678 2679 2680 2681 2682
{
    val = cpu_to_le64(val);
    cpu_physical_memory_write(addr, &val, 8);
}

A
Avi Kivity 已提交
2683
void stq_be_phys(hwaddr addr, uint64_t val)
2684 2685 2686 2687 2688
{
    val = cpu_to_be64(val);
    cpu_physical_memory_write(addr, &val, 8);
}

2689
/* virtual memory access for debug (includes writing to ROM) */
2690
int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2691
                        uint8_t *buf, int len, int is_write)
B
bellard 已提交
2692 2693
{
    int l;
A
Avi Kivity 已提交
2694
    hwaddr phys_addr;
2695
    target_ulong page;
B
bellard 已提交
2696 2697 2698

    while (len > 0) {
        page = addr & TARGET_PAGE_MASK;
2699
        phys_addr = cpu_get_phys_page_debug(cpu, page);
B
bellard 已提交
2700 2701 2702 2703 2704 2705
        /* if no physical page mapped, return an error */
        if (phys_addr == -1)
            return -1;
        l = (page + TARGET_PAGE_SIZE) - addr;
        if (l > len)
            l = len;
2706 2707 2708 2709 2710
        phys_addr += (addr & ~TARGET_PAGE_MASK);
        if (is_write)
            cpu_physical_memory_write_rom(phys_addr, buf, l);
        else
            cpu_physical_memory_rw(phys_addr, buf, l, is_write);
B
bellard 已提交
2711 2712 2713 2714 2715 2716
        len -= l;
        buf += l;
        addr += l;
    }
    return 0;
}
P
Paul Brook 已提交
2717
#endif
B
bellard 已提交
2718

2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736
#if !defined(CONFIG_USER_ONLY)

/*
 * A helper function for the _utterly broken_ virtio device model to find out if
 * it's running on a big endian machine. Don't do this at home kids!
 */
bool virtio_is_big_endian(void);
bool virtio_is_big_endian(void)
{
#if defined(TARGET_WORDS_BIGENDIAN)
    return true;
#else
    return false;
#endif
}

#endif

2737
#ifndef CONFIG_USER_ONLY
A
Avi Kivity 已提交
2738
bool cpu_physical_memory_is_io(hwaddr phys_addr)
2739
{
2740
    MemoryRegion*mr;
2741
    hwaddr l = 1;
2742

2743 2744
    mr = address_space_translate(&address_space_memory,
                                 phys_addr, &phys_addr, &l, false);
2745

2746 2747
    return !(memory_region_is_ram(mr) ||
             memory_region_is_romd(mr));
2748
}
2749 2750 2751 2752 2753 2754 2755 2756 2757

void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
{
    RAMBlock *block;

    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
        func(block->host, block->offset, block->length, opaque);
    }
}
2758
#endif