exec.c 76.0 KB
Newer Older
B
bellard 已提交
1
/*
2
 *  Virtual page mapping
3
 *
B
bellard 已提交
4 5 6 7 8 9 10 11 12 13 14 15 16
 *  Copyright (c) 2003 Fabrice Bellard
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
B
bellard 已提交
18
 */
B
bellard 已提交
19
#include "config.h"
20
#ifndef _WIN32
B
bellard 已提交
21
#include <sys/types.h>
B
bellard 已提交
22 23
#include <sys/mman.h>
#endif
B
bellard 已提交
24

25
#include "qemu-common.h"
B
bellard 已提交
26
#include "cpu.h"
B
bellard 已提交
27
#include "tcg.h"
28
#include "hw/hw.h"
29
#include "hw/qdev.h"
30
#include "qemu/osdep.h"
31
#include "sysemu/kvm.h"
32
#include "sysemu/sysemu.h"
P
Paolo Bonzini 已提交
33
#include "hw/xen/xen.h"
34 35
#include "qemu/timer.h"
#include "qemu/config-file.h"
36
#include "qemu/error-report.h"
37
#include "exec/memory.h"
38
#include "sysemu/dma.h"
39
#include "exec/address-spaces.h"
40 41
#if defined(CONFIG_USER_ONLY)
#include <qemu.h>
J
Jun Nakajima 已提交
42
#else /* !CONFIG_USER_ONLY */
43
#include "sysemu/xen-mapcache.h"
44
#include "trace.h"
45
#endif
46
#include "exec/cpu-all.h"
B
bellard 已提交
47

48
#include "exec/cputlb.h"
49
#include "translate-all.h"
50

51
#include "exec/memory-internal.h"
52
#include "exec/ram_addr.h"
53
#include "qemu/cache-utils.h"
54

55 56
#include "qemu/range.h"

57
//#define DEBUG_SUBPAGE
T
ths 已提交
58

59
#if !defined(CONFIG_USER_ONLY)
60
static bool in_migration;
P
pbrook 已提交
61

P
Paolo Bonzini 已提交
62
RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
A
Avi Kivity 已提交
63 64

static MemoryRegion *system_memory;
65
static MemoryRegion *system_io;
A
Avi Kivity 已提交
66

67 68
AddressSpace address_space_io;
AddressSpace address_space_memory;
69

70
MemoryRegion io_mem_rom, io_mem_notdirty;
71
static MemoryRegion io_mem_unassigned;
72

73
#endif
74

A
Andreas Färber 已提交
75
struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
B
bellard 已提交
76 77
/* current CPU in the current thread. It is only valid inside
   cpu_exec() */
78
DEFINE_TLS(CPUState *, current_cpu);
P
pbrook 已提交
79
/* 0 = Do not count executed instructions.
T
ths 已提交
80
   1 = Precise instruction counting.
P
pbrook 已提交
81
   2 = Adaptive rate instruction counting.  */
82
int use_icount;
B
bellard 已提交
83

84
#if !defined(CONFIG_USER_ONLY)
85

86 87 88
typedef struct PhysPageEntry PhysPageEntry;

struct PhysPageEntry {
M
Michael S. Tsirkin 已提交
89
    /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
90
    uint32_t skip : 6;
M
Michael S. Tsirkin 已提交
91
     /* index into phys_sections (!skip) or phys_map_nodes (skip) */
92
    uint32_t ptr : 26;
93 94
};

95 96
#define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)

97
/* Size of the L2 (and L3, etc) page tables.  */
98
#define ADDR_SPACE_BITS 64
99

M
Michael S. Tsirkin 已提交
100
#define P_L2_BITS 9
101 102 103 104 105
#define P_L2_SIZE (1 << P_L2_BITS)

#define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)

typedef PhysPageEntry Node[P_L2_SIZE];
106

107 108 109 110 111 112 113 114 115
typedef struct PhysPageMap {
    unsigned sections_nb;
    unsigned sections_nb_alloc;
    unsigned nodes_nb;
    unsigned nodes_nb_alloc;
    Node *nodes;
    MemoryRegionSection *sections;
} PhysPageMap;

116 117 118 119 120
struct AddressSpaceDispatch {
    /* This is a multi-level map on the physical address space.
     * The bottom level has pointers to MemoryRegionSections.
     */
    PhysPageEntry phys_map;
121
    PhysPageMap map;
122
    AddressSpace *as;
123 124
};

125 126 127
#define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
typedef struct subpage_t {
    MemoryRegion iomem;
128
    AddressSpace *as;
129 130 131 132
    hwaddr base;
    uint16_t sub_section[TARGET_PAGE_SIZE];
} subpage_t;

133 134 135 136
#define PHYS_SECTION_UNASSIGNED 0
#define PHYS_SECTION_NOTDIRTY 1
#define PHYS_SECTION_ROM 2
#define PHYS_SECTION_WATCH 3
137

138
static void io_mem_init(void);
A
Avi Kivity 已提交
139
static void memory_map_init(void);
140
static void tcg_commit(MemoryListener *listener);
141

142
static MemoryRegion io_mem_watch;
143
#endif
B
bellard 已提交
144

145
#if !defined(CONFIG_USER_ONLY)
146

147
static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
148
{
149 150 151 152
    if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
        map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
        map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
        map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
153
    }
154 155
}

156
static uint32_t phys_map_node_alloc(PhysPageMap *map)
157 158
{
    unsigned i;
159
    uint32_t ret;
160

161
    ret = map->nodes_nb++;
162
    assert(ret != PHYS_MAP_NODE_NIL);
163
    assert(ret != map->nodes_nb_alloc);
164
    for (i = 0; i < P_L2_SIZE; ++i) {
165 166
        map->nodes[ret][i].skip = 1;
        map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
167
    }
168
    return ret;
169 170
}

171 172
static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
                                hwaddr *index, hwaddr *nb, uint16_t leaf,
173
                                int level)
174 175 176
{
    PhysPageEntry *p;
    int i;
177
    hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
178

M
Michael S. Tsirkin 已提交
179
    if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
180 181
        lp->ptr = phys_map_node_alloc(map);
        p = map->nodes[lp->ptr];
182
        if (level == 0) {
183
            for (i = 0; i < P_L2_SIZE; i++) {
M
Michael S. Tsirkin 已提交
184
                p[i].skip = 0;
185
                p[i].ptr = PHYS_SECTION_UNASSIGNED;
186
            }
P
pbrook 已提交
187
        }
188
    } else {
189
        p = map->nodes[lp->ptr];
B
bellard 已提交
190
    }
191
    lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
192

193
    while (*nb && lp < &p[P_L2_SIZE]) {
194
        if ((*index & (step - 1)) == 0 && *nb >= step) {
M
Michael S. Tsirkin 已提交
195
            lp->skip = 0;
196
            lp->ptr = leaf;
197 198
            *index += step;
            *nb -= step;
199
        } else {
200
            phys_page_set_level(map, lp, index, nb, leaf, level - 1);
201 202
        }
        ++lp;
203 204 205
    }
}

A
Avi Kivity 已提交
206
static void phys_page_set(AddressSpaceDispatch *d,
A
Avi Kivity 已提交
207
                          hwaddr index, hwaddr nb,
208
                          uint16_t leaf)
209
{
210
    /* Wildly overreserve - it doesn't matter much. */
211
    phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
212

213
    phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
B
bellard 已提交
214 215
}

216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273
/* Compact a non leaf page entry. Simply detect that the entry has a single child,
 * and update our entry so we can skip it and go directly to the destination.
 */
static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
{
    unsigned valid_ptr = P_L2_SIZE;
    int valid = 0;
    PhysPageEntry *p;
    int i;

    if (lp->ptr == PHYS_MAP_NODE_NIL) {
        return;
    }

    p = nodes[lp->ptr];
    for (i = 0; i < P_L2_SIZE; i++) {
        if (p[i].ptr == PHYS_MAP_NODE_NIL) {
            continue;
        }

        valid_ptr = i;
        valid++;
        if (p[i].skip) {
            phys_page_compact(&p[i], nodes, compacted);
        }
    }

    /* We can only compress if there's only one child. */
    if (valid != 1) {
        return;
    }

    assert(valid_ptr < P_L2_SIZE);

    /* Don't compress if it won't fit in the # of bits we have. */
    if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
        return;
    }

    lp->ptr = p[valid_ptr].ptr;
    if (!p[valid_ptr].skip) {
        /* If our only child is a leaf, make this a leaf. */
        /* By design, we should have made this node a leaf to begin with so we
         * should never reach here.
         * But since it's so simple to handle this, let's do it just in case we
         * change this rule.
         */
        lp->skip = 0;
    } else {
        lp->skip += p[valid_ptr].skip;
    }
}

static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
{
    DECLARE_BITMAP(compacted, nodes_nb);

    if (d->phys_map.skip) {
274
        phys_page_compact(&d->phys_map, d->map.nodes, compacted);
275 276 277
    }
}

278
static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
279
                                           Node *nodes, MemoryRegionSection *sections)
B
bellard 已提交
280
{
281
    PhysPageEntry *p;
282
    hwaddr index = addr >> TARGET_PAGE_BITS;
283
    int i;
284

M
Michael S. Tsirkin 已提交
285
    for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
286
        if (lp.ptr == PHYS_MAP_NODE_NIL) {
287
            return &sections[PHYS_SECTION_UNASSIGNED];
288
        }
289
        p = nodes[lp.ptr];
290
        lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
291
    }
292 293 294 295 296 297 298 299

    if (sections[lp.ptr].size.hi ||
        range_covers_byte(sections[lp.ptr].offset_within_address_space,
                          sections[lp.ptr].size.lo, addr)) {
        return &sections[lp.ptr];
    } else {
        return &sections[PHYS_SECTION_UNASSIGNED];
    }
300 301
}

B
Blue Swirl 已提交
302 303
bool memory_region_is_unassigned(MemoryRegion *mr)
{
P
Paolo Bonzini 已提交
304
    return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
305
        && mr != &io_mem_watch;
B
bellard 已提交
306
}
307

308
static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
309 310
                                                        hwaddr addr,
                                                        bool resolve_subpage)
311
{
312 313 314
    MemoryRegionSection *section;
    subpage_t *subpage;

315
    section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
316 317
    if (resolve_subpage && section->mr->subpage) {
        subpage = container_of(section->mr, subpage_t, iomem);
318
        section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
319 320
    }
    return section;
321 322
}

323
static MemoryRegionSection *
324
address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
325
                                 hwaddr *plen, bool resolve_subpage)
326 327
{
    MemoryRegionSection *section;
328
    Int128 diff;
329

330
    section = address_space_lookup_region(d, addr, resolve_subpage);
331 332 333 334 335 336 337
    /* Compute offset within MemoryRegionSection */
    addr -= section->offset_within_address_space;

    /* Compute offset within MemoryRegion */
    *xlat = addr + section->offset_within_region;

    diff = int128_sub(section->mr->size, int128_make64(addr));
338
    *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
339 340
    return section;
}
341

342 343 344 345 346 347 348 349 350 351 352 353
static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
{
    if (memory_region_is_ram(mr)) {
        return !(is_write && mr->readonly);
    }
    if (memory_region_is_romd(mr)) {
        return !is_write;
    }

    return false;
}

354 355 356
MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
                                      hwaddr *xlat, hwaddr *plen,
                                      bool is_write)
357
{
A
Avi Kivity 已提交
358 359 360 361 362 363
    IOMMUTLBEntry iotlb;
    MemoryRegionSection *section;
    MemoryRegion *mr;
    hwaddr len = *plen;

    for (;;) {
364
        section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true);
A
Avi Kivity 已提交
365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382
        mr = section->mr;

        if (!mr->iommu_ops) {
            break;
        }

        iotlb = mr->iommu_ops->translate(mr, addr);
        addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
                | (addr & iotlb.addr_mask));
        len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
        if (!(iotlb.perm & (1 << is_write))) {
            mr = &io_mem_unassigned;
            break;
        }

        as = iotlb.target_as;
    }

383
    if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
384 385 386 387
        hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
        len = MIN(page, len);
    }

A
Avi Kivity 已提交
388 389 390
    *plen = len;
    *xlat = addr;
    return mr;
391 392 393 394 395 396
}

MemoryRegionSection *
address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat,
                                  hwaddr *plen)
{
A
Avi Kivity 已提交
397
    MemoryRegionSection *section;
398
    section = address_space_translate_internal(as->dispatch, addr, xlat, plen, false);
A
Avi Kivity 已提交
399 400 401

    assert(!section->mr->iommu_ops);
    return section;
402
}
403
#endif
B
bellard 已提交
404

405
void cpu_exec_init_all(void)
406
{
407
#if !defined(CONFIG_USER_ONLY)
408
    qemu_mutex_init(&ram_list.mutex);
409 410
    memory_map_init();
    io_mem_init();
411
#endif
412
}
413

414
#if !defined(CONFIG_USER_ONLY)
415 416

static int cpu_common_post_load(void *opaque, int version_id)
B
bellard 已提交
417
{
418
    CPUState *cpu = opaque;
B
bellard 已提交
419

420 421
    /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
       version_id is increased. */
422
    cpu->interrupt_request &= ~0x01;
423
    tlb_flush(cpu, 1);
424 425

    return 0;
B
bellard 已提交
426
}
B
bellard 已提交
427

428
const VMStateDescription vmstate_cpu_common = {
429 430 431 432
    .name = "cpu_common",
    .version_id = 1,
    .minimum_version_id = 1,
    .post_load = cpu_common_post_load,
433
    .fields = (VMStateField[]) {
434 435
        VMSTATE_UINT32(halted, CPUState),
        VMSTATE_UINT32(interrupt_request, CPUState),
436 437 438
        VMSTATE_END_OF_LIST()
    }
};
439

440
#endif
B
bellard 已提交
441

442
CPUState *qemu_get_cpu(int index)
B
bellard 已提交
443
{
A
Andreas Färber 已提交
444
    CPUState *cpu;
B
bellard 已提交
445

A
Andreas Färber 已提交
446
    CPU_FOREACH(cpu) {
447
        if (cpu->cpu_index == index) {
A
Andreas Färber 已提交
448
            return cpu;
449
        }
B
bellard 已提交
450
    }
451

A
Andreas Färber 已提交
452
    return NULL;
B
bellard 已提交
453 454
}

455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470
#if !defined(CONFIG_USER_ONLY)
void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
{
    /* We only support one address space per cpu at the moment.  */
    assert(cpu->as == as);

    if (cpu->tcg_as_listener) {
        memory_listener_unregister(cpu->tcg_as_listener);
    } else {
        cpu->tcg_as_listener = g_new0(MemoryListener, 1);
    }
    cpu->tcg_as_listener->commit = tcg_commit;
    memory_listener_register(cpu->tcg_as_listener, as);
}
#endif

471
void cpu_exec_init(CPUArchState *env)
B
bellard 已提交
472
{
473
    CPUState *cpu = ENV_GET_CPU(env);
474
    CPUClass *cc = CPU_GET_CLASS(cpu);
A
Andreas Färber 已提交
475
    CPUState *some_cpu;
476 477 478 479 480 481
    int cpu_index;

#if defined(CONFIG_USER_ONLY)
    cpu_list_lock();
#endif
    cpu_index = 0;
A
Andreas Färber 已提交
482
    CPU_FOREACH(some_cpu) {
483 484
        cpu_index++;
    }
485
    cpu->cpu_index = cpu_index;
486
    cpu->numa_node = 0;
487
    QTAILQ_INIT(&cpu->breakpoints);
488
    QTAILQ_INIT(&cpu->watchpoints);
489
#ifndef CONFIG_USER_ONLY
490
    cpu->as = &address_space_memory;
491 492
    cpu->thread_id = qemu_get_thread_id();
#endif
A
Andreas Färber 已提交
493
    QTAILQ_INSERT_TAIL(&cpus, cpu, node);
494 495 496
#if defined(CONFIG_USER_ONLY)
    cpu_list_unlock();
#endif
497 498 499
    if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
        vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
    }
500 501 502
#if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
    register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
                    cpu_save, cpu_load, env);
503
    assert(cc->vmsd == NULL);
504
    assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
505
#endif
506 507 508
    if (cc->vmsd != NULL) {
        vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
    }
B
bellard 已提交
509 510
}

B
bellard 已提交
511
#if defined(TARGET_HAS_ICE)
512
#if defined(CONFIG_USER_ONLY)
513
static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
514 515 516 517
{
    tb_invalidate_phys_page_range(pc, pc + 1, 0);
}
#else
518
static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
519
{
520 521
    hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
    if (phys != -1) {
522
        tb_invalidate_phys_addr(cpu->as,
523
                                phys | (pc & ~TARGET_PAGE_MASK));
524
    }
525
}
B
bellard 已提交
526
#endif
527
#endif /* TARGET_HAS_ICE */
B
bellard 已提交
528

529
#if defined(CONFIG_USER_ONLY)
530
void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
531 532 533 534

{
}

535
int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
536 537 538 539 540
                          int flags, CPUWatchpoint **watchpoint)
{
    return -ENOSYS;
}
#else
541
/* Add a watchpoint.  */
542
int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
543
                          int flags, CPUWatchpoint **watchpoint)
544
{
545
    vaddr len_mask = ~(len - 1);
546
    CPUWatchpoint *wp;
547

548
    /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
549 550
    if ((len & (len - 1)) || (addr & ~len_mask) ||
            len == 0 || len > TARGET_PAGE_SIZE) {
551 552
        error_report("tried to set invalid watchpoint at %"
                     VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
553 554
        return -EINVAL;
    }
555
    wp = g_malloc(sizeof(*wp));
556 557

    wp->vaddr = addr;
558
    wp->len_mask = len_mask;
559 560
    wp->flags = flags;

561
    /* keep all GDB-injected watchpoints in front */
562 563 564 565 566
    if (flags & BP_GDB) {
        QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
    } else {
        QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
    }
567

568
    tlb_flush_page(cpu, addr);
569 570 571 572

    if (watchpoint)
        *watchpoint = wp;
    return 0;
573 574
}

575
/* Remove a specific watchpoint.  */
576
int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
577
                          int flags)
578
{
579
    vaddr len_mask = ~(len - 1);
580
    CPUWatchpoint *wp;
581

582
    QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
583
        if (addr == wp->vaddr && len_mask == wp->len_mask
584
                && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
585
            cpu_watchpoint_remove_by_ref(cpu, wp);
586 587 588
            return 0;
        }
    }
589
    return -ENOENT;
590 591
}

592
/* Remove a specific watchpoint by reference.  */
593
void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
594
{
595
    QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
596

597
    tlb_flush_page(cpu, watchpoint->vaddr);
598

599
    g_free(watchpoint);
600 601 602
}

/* Remove all matching watchpoints.  */
603
void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
604
{
605
    CPUWatchpoint *wp, *next;
606

607
    QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
608 609 610
        if (wp->flags & mask) {
            cpu_watchpoint_remove_by_ref(cpu, wp);
        }
611
    }
612
}
613
#endif
614

615
/* Add a breakpoint.  */
616
int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
617
                          CPUBreakpoint **breakpoint)
B
bellard 已提交
618
{
B
bellard 已提交
619
#if defined(TARGET_HAS_ICE)
620
    CPUBreakpoint *bp;
621

622
    bp = g_malloc(sizeof(*bp));
B
bellard 已提交
623

624 625 626
    bp->pc = pc;
    bp->flags = flags;

627
    /* keep all GDB-injected breakpoints in front */
628
    if (flags & BP_GDB) {
629
        QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
630
    } else {
631
        QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
632
    }
633

634
    breakpoint_invalidate(cpu, pc);
635

636
    if (breakpoint) {
637
        *breakpoint = bp;
638
    }
B
bellard 已提交
639 640
    return 0;
#else
641
    return -ENOSYS;
B
bellard 已提交
642 643 644
#endif
}

645
/* Remove a specific breakpoint.  */
646
int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
647
{
648
#if defined(TARGET_HAS_ICE)
649 650
    CPUBreakpoint *bp;

651
    QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
652
        if (bp->pc == pc && bp->flags == flags) {
653
            cpu_breakpoint_remove_by_ref(cpu, bp);
654 655
            return 0;
        }
656
    }
657 658 659
    return -ENOENT;
#else
    return -ENOSYS;
660 661 662
#endif
}

663
/* Remove a specific breakpoint by reference.  */
664
void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
B
bellard 已提交
665
{
B
bellard 已提交
666
#if defined(TARGET_HAS_ICE)
667 668 669
    QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);

    breakpoint_invalidate(cpu, breakpoint->pc);
670

671
    g_free(breakpoint);
672 673 674 675
#endif
}

/* Remove all matching breakpoints. */
676
void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
677 678
{
#if defined(TARGET_HAS_ICE)
679
    CPUBreakpoint *bp, *next;
680

681
    QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
682 683 684
        if (bp->flags & mask) {
            cpu_breakpoint_remove_by_ref(cpu, bp);
        }
685
    }
B
bellard 已提交
686 687 688
#endif
}

B
bellard 已提交
689 690
/* enable or disable single step mode. EXCP_DEBUG is returned by the
   CPU loop after each instruction */
691
void cpu_single_step(CPUState *cpu, int enabled)
B
bellard 已提交
692
{
B
bellard 已提交
693
#if defined(TARGET_HAS_ICE)
694 695 696
    if (cpu->singlestep_enabled != enabled) {
        cpu->singlestep_enabled = enabled;
        if (kvm_enabled()) {
697
            kvm_update_guest_debug(cpu, 0);
698
        } else {
S
Stuart Brady 已提交
699
            /* must flush all the translated code to avoid inconsistencies */
700
            /* XXX: only flush what is necessary */
701
            CPUArchState *env = cpu->env_ptr;
702 703
            tb_flush(env);
        }
B
bellard 已提交
704 705 706 707
    }
#endif
}

708
void cpu_abort(CPUState *cpu, const char *fmt, ...)
B
bellard 已提交
709 710
{
    va_list ap;
P
pbrook 已提交
711
    va_list ap2;
B
bellard 已提交
712 713

    va_start(ap, fmt);
P
pbrook 已提交
714
    va_copy(ap2, ap);
B
bellard 已提交
715 716 717
    fprintf(stderr, "qemu: fatal: ");
    vfprintf(stderr, fmt, ap);
    fprintf(stderr, "\n");
718
    cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
719 720 721 722
    if (qemu_log_enabled()) {
        qemu_log("qemu: fatal: ");
        qemu_log_vprintf(fmt, ap2);
        qemu_log("\n");
723
        log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
724
        qemu_log_flush();
725
        qemu_log_close();
726
    }
P
pbrook 已提交
727
    va_end(ap2);
728
    va_end(ap);
729 730 731 732 733 734 735 736
#if defined(CONFIG_USER_ONLY)
    {
        struct sigaction act;
        sigfillset(&act.sa_mask);
        act.sa_handler = SIG_DFL;
        sigaction(SIGABRT, &act, NULL);
    }
#endif
B
bellard 已提交
737 738 739
    abort();
}

740
#if !defined(CONFIG_USER_ONLY)
P
Paolo Bonzini 已提交
741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763
static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
{
    RAMBlock *block;

    /* The list is protected by the iothread lock here.  */
    block = ram_list.mru_block;
    if (block && addr - block->offset < block->length) {
        goto found;
    }
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
        if (addr - block->offset < block->length) {
            goto found;
        }
    }

    fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
    abort();

found:
    ram_list.mru_block = block;
    return block;
}

764
static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
J
Juan Quintela 已提交
765
{
P
Paolo Bonzini 已提交
766
    ram_addr_t start1;
767 768 769 770 771
    RAMBlock *block;
    ram_addr_t end;

    end = TARGET_PAGE_ALIGN(start + length);
    start &= TARGET_PAGE_MASK;
J
Juan Quintela 已提交
772

P
Paolo Bonzini 已提交
773 774 775 776
    block = qemu_get_ram_block(start);
    assert(block == qemu_get_ram_block(end - 1));
    start1 = (uintptr_t)block->host + (start - block->offset);
    cpu_tlb_reset_dirty_all(start1, length);
J
Juan Quintela 已提交
777 778
}

P
pbrook 已提交
779
/* Note: start and end must be within the same ram block.  */
780
void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
781
                                     unsigned client)
782 783 784
{
    if (length == 0)
        return;
785
    cpu_physical_memory_clear_dirty_range(start, length, client);
B
bellard 已提交
786

J
Juan Quintela 已提交
787
    if (tcg_enabled()) {
788
        tlb_reset_dirty_range_all(start, length);
P
pbrook 已提交
789
    }
790 791
}

792
static void cpu_physical_memory_set_dirty_tracking(bool enable)
A
aliguori 已提交
793 794 795 796
{
    in_migration = enable;
}

797
hwaddr memory_region_section_get_iotlb(CPUState *cpu,
798 799 800 801 802
                                       MemoryRegionSection *section,
                                       target_ulong vaddr,
                                       hwaddr paddr, hwaddr xlat,
                                       int prot,
                                       target_ulong *address)
B
Blue Swirl 已提交
803
{
A
Avi Kivity 已提交
804
    hwaddr iotlb;
B
Blue Swirl 已提交
805 806
    CPUWatchpoint *wp;

807
    if (memory_region_is_ram(section->mr)) {
B
Blue Swirl 已提交
808 809
        /* Normal RAM.  */
        iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
810
            + xlat;
B
Blue Swirl 已提交
811
        if (!section->readonly) {
812
            iotlb |= PHYS_SECTION_NOTDIRTY;
B
Blue Swirl 已提交
813
        } else {
814
            iotlb |= PHYS_SECTION_ROM;
B
Blue Swirl 已提交
815 816
        }
    } else {
817
        iotlb = section - section->address_space->dispatch->map.sections;
818
        iotlb += xlat;
B
Blue Swirl 已提交
819 820 821 822
    }

    /* Make accesses to pages with watchpoints go via the
       watchpoint trap routines.  */
823
    QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
B
Blue Swirl 已提交
824 825 826
        if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
            /* Avoid trapping reads of pages with a write breakpoint. */
            if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
827
                iotlb = PHYS_SECTION_WATCH + paddr;
B
Blue Swirl 已提交
828 829 830 831 832 833 834 835
                *address |= TLB_MMIO;
                break;
            }
        }
    }

    return iotlb;
}
836 837
#endif /* defined(CONFIG_USER_ONLY) */

838
#if !defined(CONFIG_USER_ONLY)
839

A
Anthony Liguori 已提交
840
static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
841
                             uint16_t section);
842
static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
843

844
static void *(*phys_mem_alloc)(size_t size) = qemu_anon_ram_alloc;
845 846 847 848 849 850

/*
 * Set a custom physical guest memory alloator.
 * Accelerators with unusual needs may need this.  Hopefully, we can
 * get rid of it eventually.
 */
851
void phys_mem_set_alloc(void *(*alloc)(size_t))
852 853 854 855
{
    phys_mem_alloc = alloc;
}

856 857
static uint16_t phys_section_add(PhysPageMap *map,
                                 MemoryRegionSection *section)
858
{
859 860 861 862
    /* The physical section number is ORed with a page-aligned
     * pointer to produce the iotlb entries.  Thus it should
     * never overflow into the page-aligned value.
     */
863
    assert(map->sections_nb < TARGET_PAGE_SIZE);
864

865 866 867 868
    if (map->sections_nb == map->sections_nb_alloc) {
        map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
        map->sections = g_renew(MemoryRegionSection, map->sections,
                                map->sections_nb_alloc);
869
    }
870
    map->sections[map->sections_nb] = *section;
P
Paolo Bonzini 已提交
871
    memory_region_ref(section->mr);
872
    return map->sections_nb++;
873 874
}

875 876
static void phys_section_destroy(MemoryRegion *mr)
{
P
Paolo Bonzini 已提交
877 878
    memory_region_unref(mr);

879 880 881 882 883 884 885
    if (mr->subpage) {
        subpage_t *subpage = container_of(mr, subpage_t, iomem);
        memory_region_destroy(&subpage->iomem);
        g_free(subpage);
    }
}

P
Paolo Bonzini 已提交
886
static void phys_sections_free(PhysPageMap *map)
887
{
888 889
    while (map->sections_nb > 0) {
        MemoryRegionSection *section = &map->sections[--map->sections_nb];
890 891
        phys_section_destroy(section->mr);
    }
892 893
    g_free(map->sections);
    g_free(map->nodes);
894 895
}

A
Avi Kivity 已提交
896
static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
897 898
{
    subpage_t *subpage;
A
Avi Kivity 已提交
899
    hwaddr base = section->offset_within_address_space
900
        & TARGET_PAGE_MASK;
901
    MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
902
                                                   d->map.nodes, d->map.sections);
903 904
    MemoryRegionSection subsection = {
        .offset_within_address_space = base,
905
        .size = int128_make64(TARGET_PAGE_SIZE),
906
    };
A
Avi Kivity 已提交
907
    hwaddr start, end;
908

909
    assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
910

911
    if (!(existing->mr->subpage)) {
912
        subpage = subpage_init(d->as, base);
913
        subsection.address_space = d->as;
914
        subsection.mr = &subpage->iomem;
A
Avi Kivity 已提交
915
        phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
916
                      phys_section_add(&d->map, &subsection));
917
    } else {
918
        subpage = container_of(existing->mr, subpage_t, iomem);
919 920
    }
    start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
921
    end = start + int128_get64(section->size) - 1;
922 923
    subpage_register(subpage, start, end,
                     phys_section_add(&d->map, section));
924 925 926
}


927 928
static void register_multipage(AddressSpaceDispatch *d,
                               MemoryRegionSection *section)
929
{
A
Avi Kivity 已提交
930
    hwaddr start_addr = section->offset_within_address_space;
931
    uint16_t section_index = phys_section_add(&d->map, section);
932 933
    uint64_t num_pages = int128_get64(int128_rshift(section->size,
                                                    TARGET_PAGE_BITS));
934

935 936
    assert(num_pages);
    phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
937 938
}

A
Avi Kivity 已提交
939
static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
940
{
941
    AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
942
    AddressSpaceDispatch *d = as->next_dispatch;
943
    MemoryRegionSection now = *section, remain = *section;
944
    Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
945

946 947 948 949
    if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
        uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
                       - now.offset_within_address_space;

950
        now.size = int128_min(int128_make64(left), now.size);
A
Avi Kivity 已提交
951
        register_subpage(d, &now);
952
    } else {
953
        now.size = int128_zero();
954
    }
955 956 957 958
    while (int128_ne(remain.size, now.size)) {
        remain.size = int128_sub(remain.size, now.size);
        remain.offset_within_address_space += int128_get64(now.size);
        remain.offset_within_region += int128_get64(now.size);
959
        now = remain;
960
        if (int128_lt(remain.size, page_size)) {
961
            register_subpage(d, &now);
962
        } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
963
            now.size = page_size;
A
Avi Kivity 已提交
964
            register_subpage(d, &now);
965
        } else {
966
            now.size = int128_and(now.size, int128_neg(page_size));
A
Avi Kivity 已提交
967
            register_multipage(d, &now);
968
        }
969 970 971
    }
}

972 973 974 975 976 977
void qemu_flush_coalesced_mmio_buffer(void)
{
    if (kvm_enabled())
        kvm_flush_coalesced_mmio_buffer();
}

978 979 980 981 982 983 984 985 986 987
void qemu_mutex_lock_ramlist(void)
{
    qemu_mutex_lock(&ram_list.mutex);
}

void qemu_mutex_unlock_ramlist(void)
{
    qemu_mutex_unlock(&ram_list.mutex);
}

988
#ifdef __linux__
989 990 991 992 993 994 995 996 997 998 999

#include <sys/vfs.h>

#define HUGETLBFS_MAGIC       0x958458f6

static long gethugepagesize(const char *path)
{
    struct statfs fs;
    int ret;

    do {
Y
Yoshiaki Tamura 已提交
1000
        ret = statfs(path, &fs);
1001 1002 1003
    } while (ret != 0 && errno == EINTR);

    if (ret != 0) {
Y
Yoshiaki Tamura 已提交
1004 1005
        perror(path);
        return 0;
1006 1007 1008
    }

    if (fs.f_type != HUGETLBFS_MAGIC)
Y
Yoshiaki Tamura 已提交
1009
        fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1010 1011 1012 1013

    return fs.f_bsize;
}

A
Alex Williamson 已提交
1014 1015 1016
static void *file_ram_alloc(RAMBlock *block,
                            ram_addr_t memory,
                            const char *path)
1017 1018
{
    char *filename;
1019 1020
    char *sanitized_name;
    char *c;
1021 1022 1023 1024 1025 1026
    void *area;
    int fd;
    unsigned long hpagesize;

    hpagesize = gethugepagesize(path);
    if (!hpagesize) {
1027
        goto error;
1028 1029 1030 1031 1032 1033 1034 1035
    }

    if (memory < hpagesize) {
        return NULL;
    }

    if (kvm_enabled() && !kvm_has_sync_mmu()) {
        fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
1036
        goto error;
1037 1038
    }

1039 1040 1041 1042 1043 1044 1045 1046 1047 1048
    /* Make name safe to use with mkstemp by replacing '/' with '_'. */
    sanitized_name = g_strdup(block->mr->name);
    for (c = sanitized_name; *c != '\0'; c++) {
        if (*c == '/')
            *c = '_';
    }

    filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
                               sanitized_name);
    g_free(sanitized_name);
1049 1050 1051

    fd = mkstemp(filename);
    if (fd < 0) {
Y
Yoshiaki Tamura 已提交
1052
        perror("unable to create backing store for hugepages");
1053
        g_free(filename);
1054
        goto error;
1055 1056
    }
    unlink(filename);
1057
    g_free(filename);
1058 1059 1060 1061 1062 1063 1064 1065 1066 1067

    memory = (memory+hpagesize-1) & ~(hpagesize-1);

    /*
     * ftruncate is not supported by hugetlbfs in older
     * hosts, so don't bother bailing out on errors.
     * If anything goes wrong with it under other filesystems,
     * mmap will fail.
     */
    if (ftruncate(fd, memory))
Y
Yoshiaki Tamura 已提交
1068
        perror("ftruncate");
1069 1070 1071

    area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
    if (area == MAP_FAILED) {
Y
Yoshiaki Tamura 已提交
1072 1073
        perror("file_ram_alloc: can't mmap RAM pages");
        close(fd);
1074
        goto error;
1075
    }
1076 1077

    if (mem_prealloc) {
1078
        os_mem_prealloc(fd, area, memory);
1079 1080
    }

A
Alex Williamson 已提交
1081
    block->fd = fd;
1082
    return area;
1083 1084 1085 1086 1087 1088

error:
    if (mem_prealloc) {
        exit(1);
    }
    return NULL;
1089
}
1090 1091 1092 1093 1094 1095 1096 1097
#else
static void *file_ram_alloc(RAMBlock *block,
                            ram_addr_t memory,
                            const char *path)
{
    fprintf(stderr, "-mem-path not supported on this host\n");
    exit(1);
}
1098 1099
#endif

1100
static ram_addr_t find_ram_offset(ram_addr_t size)
A
Alex Williamson 已提交
1101 1102
{
    RAMBlock *block, *next_block;
A
Alex Williamson 已提交
1103
    ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
A
Alex Williamson 已提交
1104

1105 1106
    assert(size != 0); /* it would hand out same offset multiple times */

P
Paolo Bonzini 已提交
1107
    if (QTAILQ_EMPTY(&ram_list.blocks))
A
Alex Williamson 已提交
1108 1109
        return 0;

P
Paolo Bonzini 已提交
1110
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1111
        ram_addr_t end, next = RAM_ADDR_MAX;
A
Alex Williamson 已提交
1112 1113 1114

        end = block->offset + block->length;

P
Paolo Bonzini 已提交
1115
        QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
A
Alex Williamson 已提交
1116 1117 1118 1119 1120
            if (next_block->offset >= end) {
                next = MIN(next, next_block->offset);
            }
        }
        if (next - end >= size && next - end < mingap) {
A
Alex Williamson 已提交
1121
            offset = end;
A
Alex Williamson 已提交
1122 1123 1124
            mingap = next - end;
        }
    }
A
Alex Williamson 已提交
1125 1126 1127 1128 1129 1130 1131

    if (offset == RAM_ADDR_MAX) {
        fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
                (uint64_t)size);
        abort();
    }

A
Alex Williamson 已提交
1132 1133 1134
    return offset;
}

J
Juan Quintela 已提交
1135
ram_addr_t last_ram_offset(void)
1136 1137 1138 1139
{
    RAMBlock *block;
    ram_addr_t last = 0;

P
Paolo Bonzini 已提交
1140
    QTAILQ_FOREACH(block, &ram_list.blocks, next)
1141 1142 1143 1144 1145
        last = MAX(last, block->offset + block->length);

    return last;
}

1146 1147 1148 1149 1150
static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
{
    int ret;

    /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1151 1152
    if (!qemu_opt_get_bool(qemu_get_machine_opts(),
                           "dump-guest-core", true)) {
1153 1154 1155 1156 1157 1158 1159 1160 1161
        ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
        if (ret) {
            perror("qemu_madvise");
            fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
                            "but dump_guest_core=off specified\n");
        }
    }
}

1162
static RAMBlock *find_ram_block(ram_addr_t addr)
1163
{
1164
    RAMBlock *block;
1165

P
Paolo Bonzini 已提交
1166
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1167
        if (block->offset == addr) {
1168
            return block;
1169 1170
        }
    }
1171 1172 1173 1174 1175 1176 1177 1178 1179

    return NULL;
}

void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
{
    RAMBlock *new_block = find_ram_block(addr);
    RAMBlock *block;

1180 1181
    assert(new_block);
    assert(!new_block->idstr[0]);
1182

1183 1184
    if (dev) {
        char *id = qdev_get_dev_path(dev);
1185 1186
        if (id) {
            snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1187
            g_free(id);
1188 1189 1190 1191
        }
    }
    pstrcat(new_block->idstr, sizeof(new_block->idstr), name);

1192 1193
    /* This assumes the iothread lock is taken here too.  */
    qemu_mutex_lock_ramlist();
P
Paolo Bonzini 已提交
1194
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1195
        if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1196 1197 1198 1199 1200
            fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
                    new_block->idstr);
            abort();
        }
    }
1201
    qemu_mutex_unlock_ramlist();
1202 1203
}

1204 1205 1206 1207 1208 1209 1210 1211 1212
void qemu_ram_unset_idstr(ram_addr_t addr)
{
    RAMBlock *block = find_ram_block(addr);

    if (block) {
        memset(block->idstr, 0, sizeof(block->idstr));
    }
}

1213 1214
static int memory_try_enable_merging(void *addr, size_t len)
{
1215
    if (!qemu_opt_get_bool(qemu_get_machine_opts(), "mem-merge", true)) {
1216 1217 1218 1219 1220 1221 1222
        /* disabled by the user */
        return 0;
    }

    return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
}

1223
static ram_addr_t ram_block_add(RAMBlock *new_block)
1224
{
1225
    RAMBlock *block;
1226 1227 1228
    ram_addr_t old_ram_size, new_ram_size;

    old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1229

1230 1231
    /* This assumes the iothread lock is taken here too.  */
    qemu_mutex_lock_ramlist();
1232 1233 1234 1235 1236 1237 1238
    new_block->offset = find_ram_offset(new_block->length);

    if (!new_block->host) {
        if (xen_enabled()) {
            xen_ram_alloc(new_block->offset, new_block->length, new_block->mr);
        } else {
            new_block->host = phys_mem_alloc(new_block->length);
1239 1240 1241 1242 1243
            if (!new_block->host) {
                fprintf(stderr, "Cannot set up guest memory '%s': %s\n",
                        new_block->mr->name, strerror(errno));
                exit(1);
            }
1244
            memory_try_enable_merging(new_block->host, new_block->length);
1245
        }
1246
    }
P
pbrook 已提交
1247

1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258
    /* Keep the list sorted from biggest to smallest block.  */
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
        if (block->length < new_block->length) {
            break;
        }
    }
    if (block) {
        QTAILQ_INSERT_BEFORE(block, new_block, next);
    } else {
        QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
    }
1259
    ram_list.mru_block = NULL;
P
pbrook 已提交
1260

U
Umesh Deshpande 已提交
1261
    ram_list.version++;
1262
    qemu_mutex_unlock_ramlist();
U
Umesh Deshpande 已提交
1263

1264 1265 1266
    new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;

    if (new_ram_size > old_ram_size) {
1267 1268 1269 1270 1271 1272
        int i;
        for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
            ram_list.dirty_memory[i] =
                bitmap_zero_extend(ram_list.dirty_memory[i],
                                   old_ram_size, new_ram_size);
       }
1273
    }
1274
    cpu_physical_memory_set_dirty_range(new_block->offset, new_block->length);
P
pbrook 已提交
1275

1276 1277 1278
    qemu_ram_setup_dump(new_block->host, new_block->length);
    qemu_madvise(new_block->host, new_block->length, QEMU_MADV_HUGEPAGE);
    qemu_madvise(new_block->host, new_block->length, QEMU_MADV_DONTFORK);
1279

1280 1281 1282
    if (kvm_enabled()) {
        kvm_setup_guest_memory(new_block->host, new_block->length);
    }
1283

P
pbrook 已提交
1284 1285
    return new_block->offset;
}
B
bellard 已提交
1286

1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332
ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
                                    const char *mem_path)
{
    RAMBlock *new_block;

    if (xen_enabled()) {
        fprintf(stderr, "-mem-path not supported with Xen\n");
        exit(1);
    }

    if (phys_mem_alloc != qemu_anon_ram_alloc) {
        /*
         * file_ram_alloc() needs to allocate just like
         * phys_mem_alloc, but we haven't bothered to provide
         * a hook there.
         */
        fprintf(stderr,
                "-mem-path not supported with this accelerator\n");
        exit(1);
    }

    size = TARGET_PAGE_ALIGN(size);
    new_block = g_malloc0(sizeof(*new_block));
    new_block->mr = mr;
    new_block->length = size;
    new_block->host = file_ram_alloc(new_block, size, mem_path);
    return ram_block_add(new_block);
}

ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
                                   MemoryRegion *mr)
{
    RAMBlock *new_block;

    size = TARGET_PAGE_ALIGN(size);
    new_block = g_malloc0(sizeof(*new_block));
    new_block->mr = mr;
    new_block->length = size;
    new_block->fd = -1;
    new_block->host = host;
    if (host) {
        new_block->flags |= RAM_PREALLOC_MASK;
    }
    return ram_block_add(new_block);
}

1333
ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1334
{
1335
    return qemu_ram_alloc_from_ptr(size, NULL, mr);
1336 1337
}

1338 1339 1340 1341
void qemu_ram_free_from_ptr(ram_addr_t addr)
{
    RAMBlock *block;

1342 1343
    /* This assumes the iothread lock is taken here too.  */
    qemu_mutex_lock_ramlist();
P
Paolo Bonzini 已提交
1344
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1345
        if (addr == block->offset) {
P
Paolo Bonzini 已提交
1346
            QTAILQ_REMOVE(&ram_list.blocks, block, next);
1347
            ram_list.mru_block = NULL;
U
Umesh Deshpande 已提交
1348
            ram_list.version++;
1349
            g_free(block);
1350
            break;
1351 1352
        }
    }
1353
    qemu_mutex_unlock_ramlist();
1354 1355
}

A
Anthony Liguori 已提交
1356
void qemu_ram_free(ram_addr_t addr)
B
bellard 已提交
1357
{
A
Alex Williamson 已提交
1358 1359
    RAMBlock *block;

1360 1361
    /* This assumes the iothread lock is taken here too.  */
    qemu_mutex_lock_ramlist();
P
Paolo Bonzini 已提交
1362
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
A
Alex Williamson 已提交
1363
        if (addr == block->offset) {
P
Paolo Bonzini 已提交
1364
            QTAILQ_REMOVE(&ram_list.blocks, block, next);
1365
            ram_list.mru_block = NULL;
U
Umesh Deshpande 已提交
1366
            ram_list.version++;
H
Huang Ying 已提交
1367 1368
            if (block->flags & RAM_PREALLOC_MASK) {
                ;
1369 1370
            } else if (xen_enabled()) {
                xen_invalidate_map_cache_entry(block->host);
1371
#ifndef _WIN32
1372 1373 1374
            } else if (block->fd >= 0) {
                munmap(block->host, block->length);
                close(block->fd);
1375
#endif
A
Alex Williamson 已提交
1376
            } else {
1377
                qemu_anon_ram_free(block->host, block->length);
A
Alex Williamson 已提交
1378
            }
1379
            g_free(block);
1380
            break;
A
Alex Williamson 已提交
1381 1382
        }
    }
1383
    qemu_mutex_unlock_ramlist();
A
Alex Williamson 已提交
1384

B
bellard 已提交
1385 1386
}

H
Huang Ying 已提交
1387 1388 1389 1390 1391 1392 1393 1394
#ifndef _WIN32
void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
{
    RAMBlock *block;
    ram_addr_t offset;
    int flags;
    void *area, *vaddr;

P
Paolo Bonzini 已提交
1395
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
H
Huang Ying 已提交
1396 1397 1398 1399 1400
        offset = addr - block->offset;
        if (offset < block->length) {
            vaddr = block->host + offset;
            if (block->flags & RAM_PREALLOC_MASK) {
                ;
1401 1402
            } else if (xen_enabled()) {
                abort();
H
Huang Ying 已提交
1403 1404 1405
            } else {
                flags = MAP_FIXED;
                munmap(vaddr, length);
1406
                if (block->fd >= 0) {
H
Huang Ying 已提交
1407
#ifdef MAP_POPULATE
1408 1409
                    flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
                        MAP_PRIVATE;
1410
#else
1411
                    flags |= MAP_PRIVATE;
H
Huang Ying 已提交
1412
#endif
1413 1414
                    area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
                                flags, block->fd, offset);
H
Huang Ying 已提交
1415
                } else {
1416 1417 1418 1419 1420 1421 1422
                    /*
                     * Remap needs to match alloc.  Accelerators that
                     * set phys_mem_alloc never remap.  If they did,
                     * we'd need a remap hook here.
                     */
                    assert(phys_mem_alloc == qemu_anon_ram_alloc);

H
Huang Ying 已提交
1423 1424 1425 1426 1427
                    flags |= MAP_PRIVATE | MAP_ANONYMOUS;
                    area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
                                flags, -1, 0);
                }
                if (area != vaddr) {
1428 1429
                    fprintf(stderr, "Could not remap addr: "
                            RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
H
Huang Ying 已提交
1430 1431 1432
                            length, addr);
                    exit(1);
                }
1433
                memory_try_enable_merging(vaddr, length);
1434
                qemu_ram_setup_dump(vaddr, length);
H
Huang Ying 已提交
1435 1436 1437 1438 1439 1440 1441
            }
            return;
        }
    }
}
#endif /* !_WIN32 */

1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453
/* Return a host pointer to ram allocated with qemu_ram_alloc.
   With the exception of the softmmu code in this file, this should
   only be used for local memory (e.g. video ram) that the device owns,
   and knows it isn't going to access beyond the end of the block.

   It should not be used for general purpose DMA.
   Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
 */
void *qemu_get_ram_ptr(ram_addr_t addr)
{
    RAMBlock *block = qemu_get_ram_block(addr);

1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466
    if (xen_enabled()) {
        /* We need to check if the requested address is in the RAM
         * because we don't want to map the entire memory in QEMU.
         * In that case just map until the end of the page.
         */
        if (block->offset == 0) {
            return xen_map_cache(addr, 0, 0);
        } else if (block->host == NULL) {
            block->host =
                xen_map_cache(block->offset, block->length, 1);
        }
    }
    return block->host + (addr - block->offset);
1467 1468
}

1469 1470
/* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
 * but takes a size argument */
1471
static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1472
{
1473 1474 1475
    if (*size == 0) {
        return NULL;
    }
1476
    if (xen_enabled()) {
J
Jan Kiszka 已提交
1477
        return xen_map_cache(addr, *size, 1);
1478
    } else {
1479 1480
        RAMBlock *block;

P
Paolo Bonzini 已提交
1481
        QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493
            if (addr - block->offset < block->length) {
                if (addr - block->offset + *size > block->length)
                    *size = block->length - addr + block->offset;
                return block->host + (addr - block->offset);
            }
        }

        fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
        abort();
    }
}

1494 1495
/* Some of the softmmu routines need to translate from a host pointer
   (typically a TLB entry) back to a ram offset.  */
1496
MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
P
pbrook 已提交
1497
{
P
pbrook 已提交
1498 1499 1500
    RAMBlock *block;
    uint8_t *host = ptr;

1501
    if (xen_enabled()) {
J
Jan Kiszka 已提交
1502
        *ram_addr = xen_ram_addr_from_mapcache(ptr);
1503
        return qemu_get_ram_block(*ram_addr)->mr;
1504 1505
    }

1506 1507 1508 1509 1510
    block = ram_list.mru_block;
    if (block && block->host && host - block->host < block->length) {
        goto found;
    }

P
Paolo Bonzini 已提交
1511
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
J
Jun Nakajima 已提交
1512 1513 1514 1515
        /* This case append when the block is not mapped. */
        if (block->host == NULL) {
            continue;
        }
A
Alex Williamson 已提交
1516
        if (host - block->host < block->length) {
1517
            goto found;
A
Alex Williamson 已提交
1518
        }
P
pbrook 已提交
1519
    }
J
Jun Nakajima 已提交
1520

1521
    return NULL;
1522 1523 1524

found:
    *ram_addr = block->offset + (host - block->host);
1525
    return block->mr;
M
Marcelo Tosatti 已提交
1526
}
A
Alex Williamson 已提交
1527

A
Avi Kivity 已提交
1528
static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1529
                               uint64_t val, unsigned size)
1530
{
1531
    if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1532
        tb_invalidate_phys_page_fast(ram_addr, size);
1533
    }
1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545
    switch (size) {
    case 1:
        stb_p(qemu_get_ram_ptr(ram_addr), val);
        break;
    case 2:
        stw_p(qemu_get_ram_ptr(ram_addr), val);
        break;
    case 4:
        stl_p(qemu_get_ram_ptr(ram_addr), val);
        break;
    default:
        abort();
1546
    }
1547 1548
    cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_MIGRATION);
    cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_VGA);
B
bellard 已提交
1549 1550
    /* we remove the notdirty callback only if the code has been
       flushed */
1551
    if (!cpu_physical_memory_is_clean(ram_addr)) {
1552
        CPUArchState *env = current_cpu->env_ptr;
1553
        tlb_set_dirty(env, current_cpu->mem_io_vaddr);
1554
    }
1555 1556
}

1557 1558 1559 1560 1561 1562
static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
                                 unsigned size, bool is_write)
{
    return is_write;
}

1563 1564
static const MemoryRegionOps notdirty_mem_ops = {
    .write = notdirty_mem_write,
1565
    .valid.accepts = notdirty_mem_accepts,
1566
    .endianness = DEVICE_NATIVE_ENDIAN,
1567 1568
};

P
pbrook 已提交
1569
/* Generate a debug exception if a watchpoint has been hit.  */
1570
static void check_watchpoint(int offset, int len_mask, int flags)
P
pbrook 已提交
1571
{
1572 1573
    CPUState *cpu = current_cpu;
    CPUArchState *env = cpu->env_ptr;
1574
    target_ulong pc, cs_base;
P
pbrook 已提交
1575
    target_ulong vaddr;
1576
    CPUWatchpoint *wp;
1577
    int cpu_flags;
P
pbrook 已提交
1578

1579
    if (cpu->watchpoint_hit) {
1580 1581 1582
        /* We re-entered the check after replacing the TB. Now raise
         * the debug interrupt so that is will trigger after the
         * current instruction. */
1583
        cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
1584 1585
        return;
    }
1586
    vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1587
    QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1588 1589
        if ((vaddr == (wp->vaddr & len_mask) ||
             (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1590
            wp->flags |= BP_WATCHPOINT_HIT;
1591 1592
            if (!cpu->watchpoint_hit) {
                cpu->watchpoint_hit = wp;
1593
                tb_check_watchpoint(cpu);
1594
                if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1595
                    cpu->exception_index = EXCP_DEBUG;
1596
                    cpu_loop_exit(cpu);
1597 1598
                } else {
                    cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1599
                    tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
1600
                    cpu_resume_from_signal(cpu, NULL);
1601
                }
1602
            }
1603 1604
        } else {
            wp->flags &= ~BP_WATCHPOINT_HIT;
P
pbrook 已提交
1605 1606 1607 1608
        }
    }
}

1609 1610 1611
/* Watchpoint access routines.  Watchpoints are inserted using TLB tricks,
   so these check for a hit then pass through to the normal out-of-line
   phys routines.  */
A
Avi Kivity 已提交
1612
static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1613
                               unsigned size)
1614
{
1615 1616
    check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
    switch (size) {
1617
    case 1: return ldub_phys(&address_space_memory, addr);
1618
    case 2: return lduw_phys(&address_space_memory, addr);
1619
    case 4: return ldl_phys(&address_space_memory, addr);
1620 1621
    default: abort();
    }
1622 1623
}

A
Avi Kivity 已提交
1624
static void watch_mem_write(void *opaque, hwaddr addr,
1625
                            uint64_t val, unsigned size)
1626
{
1627 1628
    check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
    switch (size) {
1629
    case 1:
1630
        stb_phys(&address_space_memory, addr, val);
1631 1632
        break;
    case 2:
1633
        stw_phys(&address_space_memory, addr, val);
1634 1635
        break;
    case 4:
1636
        stl_phys(&address_space_memory, addr, val);
1637
        break;
1638 1639
    default: abort();
    }
1640 1641
}

1642 1643 1644 1645
static const MemoryRegionOps watch_mem_ops = {
    .read = watch_mem_read,
    .write = watch_mem_write,
    .endianness = DEVICE_NATIVE_ENDIAN,
1646 1647
};

A
Avi Kivity 已提交
1648
static uint64_t subpage_read(void *opaque, hwaddr addr,
1649
                             unsigned len)
1650
{
1651 1652
    subpage_t *subpage = opaque;
    uint8_t buf[4];
1653

1654
#if defined(DEBUG_SUBPAGE)
A
Amos Kong 已提交
1655
    printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
1656
           subpage, len, addr);
1657
#endif
1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668
    address_space_read(subpage->as, addr + subpage->base, buf, len);
    switch (len) {
    case 1:
        return ldub_p(buf);
    case 2:
        return lduw_p(buf);
    case 4:
        return ldl_p(buf);
    default:
        abort();
    }
1669 1670
}

A
Avi Kivity 已提交
1671
static void subpage_write(void *opaque, hwaddr addr,
1672
                          uint64_t value, unsigned len)
1673
{
1674 1675 1676
    subpage_t *subpage = opaque;
    uint8_t buf[4];

1677
#if defined(DEBUG_SUBPAGE)
A
Amos Kong 已提交
1678
    printf("%s: subpage %p len %u addr " TARGET_FMT_plx
1679 1680
           " value %"PRIx64"\n",
           __func__, subpage, len, addr, value);
1681
#endif
1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695
    switch (len) {
    case 1:
        stb_p(buf, value);
        break;
    case 2:
        stw_p(buf, value);
        break;
    case 4:
        stl_p(buf, value);
        break;
    default:
        abort();
    }
    address_space_write(subpage->as, addr + subpage->base, buf, len);
1696 1697
}

1698
static bool subpage_accepts(void *opaque, hwaddr addr,
A
Amos Kong 已提交
1699
                            unsigned len, bool is_write)
1700
{
1701
    subpage_t *subpage = opaque;
1702
#if defined(DEBUG_SUBPAGE)
A
Amos Kong 已提交
1703
    printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
1704
           __func__, subpage, is_write ? 'w' : 'r', len, addr);
1705 1706
#endif

1707
    return address_space_access_valid(subpage->as, addr + subpage->base,
A
Amos Kong 已提交
1708
                                      len, is_write);
1709 1710
}

1711 1712 1713
static const MemoryRegionOps subpage_ops = {
    .read = subpage_read,
    .write = subpage_write,
1714
    .valid.accepts = subpage_accepts,
1715
    .endianness = DEVICE_NATIVE_ENDIAN,
1716 1717
};

A
Anthony Liguori 已提交
1718
static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1719
                             uint16_t section)
1720 1721 1722 1723 1724 1725 1726 1727
{
    int idx, eidx;

    if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
        return -1;
    idx = SUBPAGE_IDX(start);
    eidx = SUBPAGE_IDX(end);
#if defined(DEBUG_SUBPAGE)
A
Amos Kong 已提交
1728 1729
    printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
           __func__, mmio, start, end, idx, eidx, section);
1730 1731
#endif
    for (; idx <= eidx; idx++) {
1732
        mmio->sub_section[idx] = section;
1733 1734 1735 1736 1737
    }

    return 0;
}

1738
static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
1739
{
A
Anthony Liguori 已提交
1740
    subpage_t *mmio;
1741

1742
    mmio = g_malloc0(sizeof(subpage_t));
1743

1744
    mmio->as = as;
1745
    mmio->base = base;
1746
    memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
1747
                          "subpage", TARGET_PAGE_SIZE);
A
Avi Kivity 已提交
1748
    mmio->iomem.subpage = true;
1749
#if defined(DEBUG_SUBPAGE)
A
Amos Kong 已提交
1750 1751
    printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
           mmio, base, TARGET_PAGE_SIZE);
1752
#endif
1753
    subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
1754 1755 1756 1757

    return mmio;
}

1758 1759
static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
                              MemoryRegion *mr)
1760
{
1761
    assert(as);
1762
    MemoryRegionSection section = {
1763
        .address_space = as,
1764 1765 1766
        .mr = mr,
        .offset_within_address_space = 0,
        .offset_within_region = 0,
1767
        .size = int128_2_64(),
1768 1769
    };

1770
    return phys_section_add(map, &section);
1771 1772
}

1773
MemoryRegion *iotlb_to_region(AddressSpace *as, hwaddr index)
1774
{
1775
    return as->dispatch->map.sections[index & ~TARGET_PAGE_MASK].mr;
1776 1777
}

A
Avi Kivity 已提交
1778 1779
static void io_mem_init(void)
{
1780 1781
    memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, "rom", UINT64_MAX);
    memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
1782
                          "unassigned", UINT64_MAX);
1783
    memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
1784
                          "notdirty", UINT64_MAX);
1785
    memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
1786
                          "watch", UINT64_MAX);
A
Avi Kivity 已提交
1787 1788
}

A
Avi Kivity 已提交
1789
static void mem_begin(MemoryListener *listener)
1790 1791
{
    AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1792 1793 1794
    AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
    uint16_t n;

1795
    n = dummy_section(&d->map, as, &io_mem_unassigned);
1796
    assert(n == PHYS_SECTION_UNASSIGNED);
1797
    n = dummy_section(&d->map, as, &io_mem_notdirty);
1798
    assert(n == PHYS_SECTION_NOTDIRTY);
1799
    n = dummy_section(&d->map, as, &io_mem_rom);
1800
    assert(n == PHYS_SECTION_ROM);
1801
    n = dummy_section(&d->map, as, &io_mem_watch);
1802
    assert(n == PHYS_SECTION_WATCH);
1803

M
Michael S. Tsirkin 已提交
1804
    d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
1805 1806 1807 1808 1809
    d->as = as;
    as->next_dispatch = d;
}

static void mem_commit(MemoryListener *listener)
A
Avi Kivity 已提交
1810
{
1811
    AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1812 1813 1814
    AddressSpaceDispatch *cur = as->dispatch;
    AddressSpaceDispatch *next = as->next_dispatch;

1815
    phys_page_compact_all(next, next->map.nodes_nb);
1816

1817
    as->dispatch = next;
1818

1819 1820 1821 1822
    if (cur) {
        phys_sections_free(&cur->map);
        g_free(cur);
    }
1823 1824
}

1825
static void tcg_commit(MemoryListener *listener)
1826
{
1827
    CPUState *cpu;
1828 1829 1830 1831

    /* since each CPU stores ram addresses in its TLB cache, we must
       reset the modified entries */
    /* XXX: slow ! */
A
Andreas Färber 已提交
1832
    CPU_FOREACH(cpu) {
1833 1834 1835 1836 1837
        /* FIXME: Disentangle the cpu.h circular files deps so we can
           directly get the right CPU from listener.  */
        if (cpu->tcg_as_listener != listener) {
            continue;
        }
1838
        tlb_flush(cpu, 1);
1839
    }
1840 1841
}

1842 1843
static void core_log_global_start(MemoryListener *listener)
{
1844
    cpu_physical_memory_set_dirty_tracking(true);
1845 1846 1847 1848
}

static void core_log_global_stop(MemoryListener *listener)
{
1849
    cpu_physical_memory_set_dirty_tracking(false);
1850 1851 1852 1853 1854
}

static MemoryListener core_memory_listener = {
    .log_global_start = core_log_global_start,
    .log_global_stop = core_log_global_stop,
A
Avi Kivity 已提交
1855
    .priority = 1,
1856 1857
};

A
Avi Kivity 已提交
1858 1859
void address_space_init_dispatch(AddressSpace *as)
{
1860
    as->dispatch = NULL;
1861
    as->dispatch_listener = (MemoryListener) {
A
Avi Kivity 已提交
1862
        .begin = mem_begin,
1863
        .commit = mem_commit,
A
Avi Kivity 已提交
1864 1865 1866 1867
        .region_add = mem_add,
        .region_nop = mem_add,
        .priority = 0,
    };
1868
    memory_listener_register(&as->dispatch_listener, as);
A
Avi Kivity 已提交
1869 1870
}

A
Avi Kivity 已提交
1871 1872 1873 1874
void address_space_destroy_dispatch(AddressSpace *as)
{
    AddressSpaceDispatch *d = as->dispatch;

1875
    memory_listener_unregister(&as->dispatch_listener);
A
Avi Kivity 已提交
1876 1877 1878 1879
    g_free(d);
    as->dispatch = NULL;
}

A
Avi Kivity 已提交
1880 1881
static void memory_map_init(void)
{
1882
    system_memory = g_malloc(sizeof(*system_memory));
1883

1884
    memory_region_init(system_memory, NULL, "system", UINT64_MAX);
1885
    address_space_init(&address_space_memory, system_memory, "memory");
1886

1887
    system_io = g_malloc(sizeof(*system_io));
1888 1889
    memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
                          65536);
1890
    address_space_init(&address_space_io, system_io, "I/O");
1891

1892
    memory_listener_register(&core_memory_listener, &address_space_memory);
A
Avi Kivity 已提交
1893 1894 1895 1896 1897 1898 1899
}

MemoryRegion *get_system_memory(void)
{
    return system_memory;
}

1900 1901 1902 1903 1904
MemoryRegion *get_system_io(void)
{
    return system_io;
}

1905 1906
#endif /* !defined(CONFIG_USER_ONLY) */

B
bellard 已提交
1907 1908
/* physical memory access (slow version, mainly for debug) */
#if defined(CONFIG_USER_ONLY)
1909
int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
P
Paul Brook 已提交
1910
                        uint8_t *buf, int len, int is_write)
B
bellard 已提交
1911 1912 1913
{
    int l, flags;
    target_ulong page;
1914
    void * p;
B
bellard 已提交
1915 1916 1917 1918 1919 1920 1921 1922

    while (len > 0) {
        page = addr & TARGET_PAGE_MASK;
        l = (page + TARGET_PAGE_SIZE) - addr;
        if (l > len)
            l = len;
        flags = page_get_flags(page);
        if (!(flags & PAGE_VALID))
P
Paul Brook 已提交
1923
            return -1;
B
bellard 已提交
1924 1925
        if (is_write) {
            if (!(flags & PAGE_WRITE))
P
Paul Brook 已提交
1926
                return -1;
1927
            /* XXX: this code should not depend on lock_user */
A
aurel32 已提交
1928
            if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
P
Paul Brook 已提交
1929
                return -1;
A
aurel32 已提交
1930 1931
            memcpy(p, buf, l);
            unlock_user(p, addr, l);
B
bellard 已提交
1932 1933
        } else {
            if (!(flags & PAGE_READ))
P
Paul Brook 已提交
1934
                return -1;
1935
            /* XXX: this code should not depend on lock_user */
A
aurel32 已提交
1936
            if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
P
Paul Brook 已提交
1937
                return -1;
A
aurel32 已提交
1938
            memcpy(buf, p, l);
A
aurel32 已提交
1939
            unlock_user(p, addr, 0);
B
bellard 已提交
1940 1941 1942 1943 1944
        }
        len -= l;
        buf += l;
        addr += l;
    }
P
Paul Brook 已提交
1945
    return 0;
B
bellard 已提交
1946
}
B
bellard 已提交
1947

B
bellard 已提交
1948
#else
1949

A
Avi Kivity 已提交
1950 1951
static void invalidate_and_set_dirty(hwaddr addr,
                                     hwaddr length)
1952
{
1953
    if (cpu_physical_memory_is_clean(addr)) {
1954 1955 1956
        /* invalidate code */
        tb_invalidate_phys_page_range(addr, addr + length, 0);
        /* set dirty bit */
1957 1958
        cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_VGA);
        cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_MIGRATION);
1959
    }
1960
    xen_modified_memory(addr, length);
1961 1962
}

1963
static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
1964
{
1965
    unsigned access_size_max = mr->ops->valid.max_access_size;
1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978

    /* Regions are assumed to support 1-4 byte accesses unless
       otherwise specified.  */
    if (access_size_max == 0) {
        access_size_max = 4;
    }

    /* Bound the maximum access by the alignment of the address.  */
    if (!mr->ops->impl.unaligned) {
        unsigned align_size_max = addr & -addr;
        if (align_size_max != 0 && align_size_max < access_size_max) {
            access_size_max = align_size_max;
        }
1979
    }
1980 1981 1982 1983

    /* Don't attempt accesses larger than the maximum.  */
    if (l > access_size_max) {
        l = access_size_max;
1984
    }
1985 1986 1987
    if (l & (l - 1)) {
        l = 1 << (qemu_fls(l) - 1);
    }
1988 1989

    return l;
1990 1991
}

1992
bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
A
Avi Kivity 已提交
1993
                      int len, bool is_write)
B
bellard 已提交
1994
{
1995
    hwaddr l;
B
bellard 已提交
1996
    uint8_t *ptr;
1997
    uint64_t val;
1998
    hwaddr addr1;
1999
    MemoryRegion *mr;
2000
    bool error = false;
2001

B
bellard 已提交
2002
    while (len > 0) {
2003
        l = len;
2004
        mr = address_space_translate(as, addr, &addr1, &l, is_write);
2005

B
bellard 已提交
2006
        if (is_write) {
2007 2008
            if (!memory_access_is_direct(mr, is_write)) {
                l = memory_access_size(mr, l, addr1);
2009
                /* XXX: could force current_cpu to NULL to avoid
B
bellard 已提交
2010
                   potential bugs */
2011 2012 2013 2014 2015 2016 2017
                switch (l) {
                case 8:
                    /* 64 bit write access */
                    val = ldq_p(buf);
                    error |= io_mem_write(mr, addr1, val, 8);
                    break;
                case 4:
B
bellard 已提交
2018
                    /* 32 bit write access */
B
bellard 已提交
2019
                    val = ldl_p(buf);
2020
                    error |= io_mem_write(mr, addr1, val, 4);
2021 2022
                    break;
                case 2:
B
bellard 已提交
2023
                    /* 16 bit write access */
B
bellard 已提交
2024
                    val = lduw_p(buf);
2025
                    error |= io_mem_write(mr, addr1, val, 2);
2026 2027
                    break;
                case 1:
B
bellard 已提交
2028
                    /* 8 bit write access */
B
bellard 已提交
2029
                    val = ldub_p(buf);
2030
                    error |= io_mem_write(mr, addr1, val, 1);
2031 2032 2033
                    break;
                default:
                    abort();
B
bellard 已提交
2034
                }
2035
            } else {
2036
                addr1 += memory_region_get_ram_addr(mr);
B
bellard 已提交
2037
                /* RAM case */
P
pbrook 已提交
2038
                ptr = qemu_get_ram_ptr(addr1);
B
bellard 已提交
2039
                memcpy(ptr, buf, l);
2040
                invalidate_and_set_dirty(addr1, l);
B
bellard 已提交
2041 2042
            }
        } else {
2043
            if (!memory_access_is_direct(mr, is_write)) {
B
bellard 已提交
2044
                /* I/O case */
2045
                l = memory_access_size(mr, l, addr1);
2046 2047 2048 2049 2050 2051 2052
                switch (l) {
                case 8:
                    /* 64 bit read access */
                    error |= io_mem_read(mr, addr1, &val, 8);
                    stq_p(buf, val);
                    break;
                case 4:
B
bellard 已提交
2053
                    /* 32 bit read access */
2054
                    error |= io_mem_read(mr, addr1, &val, 4);
B
bellard 已提交
2055
                    stl_p(buf, val);
2056 2057
                    break;
                case 2:
B
bellard 已提交
2058
                    /* 16 bit read access */
2059
                    error |= io_mem_read(mr, addr1, &val, 2);
B
bellard 已提交
2060
                    stw_p(buf, val);
2061 2062
                    break;
                case 1:
B
bellard 已提交
2063
                    /* 8 bit read access */
2064
                    error |= io_mem_read(mr, addr1, &val, 1);
B
bellard 已提交
2065
                    stb_p(buf, val);
2066 2067 2068
                    break;
                default:
                    abort();
B
bellard 已提交
2069 2070 2071
                }
            } else {
                /* RAM case */
2072
                ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2073
                memcpy(buf, ptr, l);
B
bellard 已提交
2074 2075 2076 2077 2078 2079
            }
        }
        len -= l;
        buf += l;
        addr += l;
    }
2080 2081

    return error;
B
bellard 已提交
2082
}
B
bellard 已提交
2083

2084
bool address_space_write(AddressSpace *as, hwaddr addr,
A
Avi Kivity 已提交
2085 2086
                         const uint8_t *buf, int len)
{
2087
    return address_space_rw(as, addr, (uint8_t *)buf, len, true);
A
Avi Kivity 已提交
2088 2089
}

2090
bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
A
Avi Kivity 已提交
2091
{
2092
    return address_space_rw(as, addr, buf, len, false);
A
Avi Kivity 已提交
2093 2094 2095
}


A
Avi Kivity 已提交
2096
void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
A
Avi Kivity 已提交
2097 2098
                            int len, int is_write)
{
2099
    address_space_rw(&address_space_memory, addr, buf, len, is_write);
A
Avi Kivity 已提交
2100 2101
}

2102 2103 2104 2105 2106
enum write_rom_type {
    WRITE_DATA,
    FLUSH_CACHE,
};

2107
static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2108
    hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
B
bellard 已提交
2109
{
2110
    hwaddr l;
B
bellard 已提交
2111
    uint8_t *ptr;
2112
    hwaddr addr1;
2113
    MemoryRegion *mr;
2114

B
bellard 已提交
2115
    while (len > 0) {
2116
        l = len;
2117
        mr = address_space_translate(as, addr, &addr1, &l, true);
2118

2119 2120
        if (!(memory_region_is_ram(mr) ||
              memory_region_is_romd(mr))) {
B
bellard 已提交
2121 2122
            /* do nothing */
        } else {
2123
            addr1 += memory_region_get_ram_addr(mr);
B
bellard 已提交
2124
            /* ROM/RAM case */
P
pbrook 已提交
2125
            ptr = qemu_get_ram_ptr(addr1);
2126 2127 2128 2129 2130 2131 2132 2133 2134
            switch (type) {
            case WRITE_DATA:
                memcpy(ptr, buf, l);
                invalidate_and_set_dirty(addr1, l);
                break;
            case FLUSH_CACHE:
                flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
                break;
            }
B
bellard 已提交
2135 2136 2137 2138 2139 2140 2141
        }
        len -= l;
        buf += l;
        addr += l;
    }
}

2142
/* used for ROM loading : can write in RAM and ROM */
2143
void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2144 2145
                                   const uint8_t *buf, int len)
{
2146
    cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160
}

void cpu_flush_icache_range(hwaddr start, int len)
{
    /*
     * This function should do the same thing as an icache flush that was
     * triggered from within the guest. For TCG we are always cache coherent,
     * so there is no need to flush anything. For KVM / Xen we need to flush
     * the host's instruction cache at least.
     */
    if (tcg_enabled()) {
        return;
    }

2161 2162
    cpu_physical_memory_write_rom_internal(&address_space_memory,
                                           start, NULL, len, FLUSH_CACHE);
2163 2164
}

2165
typedef struct {
2166
    MemoryRegion *mr;
2167
    void *buffer;
A
Avi Kivity 已提交
2168 2169
    hwaddr addr;
    hwaddr len;
2170 2171 2172 2173
} BounceBuffer;

static BounceBuffer bounce;

2174 2175 2176
typedef struct MapClient {
    void *opaque;
    void (*callback)(void *opaque);
B
Blue Swirl 已提交
2177
    QLIST_ENTRY(MapClient) link;
2178 2179
} MapClient;

B
Blue Swirl 已提交
2180 2181
static QLIST_HEAD(map_client_list, MapClient) map_client_list
    = QLIST_HEAD_INITIALIZER(map_client_list);
2182 2183 2184

void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
{
2185
    MapClient *client = g_malloc(sizeof(*client));
2186 2187 2188

    client->opaque = opaque;
    client->callback = callback;
B
Blue Swirl 已提交
2189
    QLIST_INSERT_HEAD(&map_client_list, client, link);
2190 2191 2192
    return client;
}

B
Blue Swirl 已提交
2193
static void cpu_unregister_map_client(void *_client)
2194 2195 2196
{
    MapClient *client = (MapClient *)_client;

B
Blue Swirl 已提交
2197
    QLIST_REMOVE(client, link);
2198
    g_free(client);
2199 2200 2201 2202 2203 2204
}

static void cpu_notify_map_clients(void)
{
    MapClient *client;

B
Blue Swirl 已提交
2205 2206
    while (!QLIST_EMPTY(&map_client_list)) {
        client = QLIST_FIRST(&map_client_list);
2207
        client->callback(client->opaque);
2208
        cpu_unregister_map_client(client);
2209 2210 2211
    }
}

2212 2213
bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
{
2214
    MemoryRegion *mr;
2215 2216 2217 2218
    hwaddr l, xlat;

    while (len > 0) {
        l = len;
2219 2220 2221 2222
        mr = address_space_translate(as, addr, &xlat, &l, is_write);
        if (!memory_access_is_direct(mr, is_write)) {
            l = memory_access_size(mr, l, addr);
            if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2223 2224 2225 2226 2227 2228 2229 2230 2231 2232
                return false;
            }
        }

        len -= l;
        addr += l;
    }
    return true;
}

2233 2234 2235 2236
/* Map a physical memory region into a host virtual address.
 * May map a subset of the requested range, given by and returned in *plen.
 * May return NULL if resources needed to perform the mapping are exhausted.
 * Use only for reads OR writes - not for read-modify-write operations.
2237 2238
 * Use cpu_register_map_client() to know when retrying the map operation is
 * likely to succeed.
2239
 */
A
Avi Kivity 已提交
2240
void *address_space_map(AddressSpace *as,
A
Avi Kivity 已提交
2241 2242
                        hwaddr addr,
                        hwaddr *plen,
A
Avi Kivity 已提交
2243
                        bool is_write)
2244
{
A
Avi Kivity 已提交
2245
    hwaddr len = *plen;
2246 2247 2248 2249
    hwaddr done = 0;
    hwaddr l, xlat, base;
    MemoryRegion *mr, *this_mr;
    ram_addr_t raddr;
2250

2251 2252 2253
    if (len == 0) {
        return NULL;
    }
2254

2255 2256 2257 2258 2259
    l = len;
    mr = address_space_translate(as, addr, &xlat, &l, is_write);
    if (!memory_access_is_direct(mr, is_write)) {
        if (bounce.buffer) {
            return NULL;
2260
        }
2261 2262 2263
        /* Avoid unbounded allocations */
        l = MIN(l, TARGET_PAGE_SIZE);
        bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2264 2265
        bounce.addr = addr;
        bounce.len = l;
2266 2267 2268

        memory_region_ref(mr);
        bounce.mr = mr;
2269 2270
        if (!is_write) {
            address_space_read(as, addr, bounce.buffer, l);
2271
        }
2272

2273 2274 2275 2276 2277 2278 2279 2280
        *plen = l;
        return bounce.buffer;
    }

    base = xlat;
    raddr = memory_region_get_ram_addr(mr);

    for (;;) {
2281 2282
        len -= l;
        addr += l;
2283 2284 2285 2286 2287 2288 2289 2290 2291 2292
        done += l;
        if (len == 0) {
            break;
        }

        l = len;
        this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
        if (this_mr != mr || xlat != base + done) {
            break;
        }
2293
    }
2294

2295
    memory_region_ref(mr);
2296 2297
    *plen = done;
    return qemu_ram_ptr_length(raddr + base, plen);
2298 2299
}

A
Avi Kivity 已提交
2300
/* Unmaps a memory region previously mapped by address_space_map().
2301 2302 2303
 * Will also mark the memory as dirty if is_write == 1.  access_len gives
 * the amount of memory that was actually read or written by the caller.
 */
A
Avi Kivity 已提交
2304 2305
void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
                         int is_write, hwaddr access_len)
2306 2307
{
    if (buffer != bounce.buffer) {
2308 2309 2310 2311 2312
        MemoryRegion *mr;
        ram_addr_t addr1;

        mr = qemu_ram_addr_from_host(buffer, &addr1);
        assert(mr != NULL);
2313 2314 2315 2316 2317 2318
        if (is_write) {
            while (access_len) {
                unsigned l;
                l = TARGET_PAGE_SIZE;
                if (l > access_len)
                    l = access_len;
2319
                invalidate_and_set_dirty(addr1, l);
2320 2321 2322 2323
                addr1 += l;
                access_len -= l;
            }
        }
2324
        if (xen_enabled()) {
J
Jan Kiszka 已提交
2325
            xen_invalidate_map_cache_entry(buffer);
A
Anthony PERARD 已提交
2326
        }
2327
        memory_region_unref(mr);
2328 2329 2330
        return;
    }
    if (is_write) {
A
Avi Kivity 已提交
2331
        address_space_write(as, bounce.addr, bounce.buffer, access_len);
2332
    }
2333
    qemu_vfree(bounce.buffer);
2334
    bounce.buffer = NULL;
2335
    memory_region_unref(bounce.mr);
2336
    cpu_notify_map_clients();
2337
}
B
bellard 已提交
2338

A
Avi Kivity 已提交
2339 2340
void *cpu_physical_memory_map(hwaddr addr,
                              hwaddr *plen,
A
Avi Kivity 已提交
2341 2342 2343 2344 2345
                              int is_write)
{
    return address_space_map(&address_space_memory, addr, plen, is_write);
}

A
Avi Kivity 已提交
2346 2347
void cpu_physical_memory_unmap(void *buffer, hwaddr len,
                               int is_write, hwaddr access_len)
A
Avi Kivity 已提交
2348 2349 2350 2351
{
    return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
}

B
bellard 已提交
2352
/* warning: addr must be aligned */
2353
static inline uint32_t ldl_phys_internal(AddressSpace *as, hwaddr addr,
2354
                                         enum device_endian endian)
B
bellard 已提交
2355 2356
{
    uint8_t *ptr;
2357
    uint64_t val;
2358
    MemoryRegion *mr;
2359 2360
    hwaddr l = 4;
    hwaddr addr1;
B
bellard 已提交
2361

2362
    mr = address_space_translate(as, addr, &addr1, &l, false);
2363
    if (l < 4 || !memory_access_is_direct(mr, false)) {
B
bellard 已提交
2364
        /* I/O case */
2365
        io_mem_read(mr, addr1, &val, 4);
2366 2367 2368 2369 2370 2371 2372 2373 2374
#if defined(TARGET_WORDS_BIGENDIAN)
        if (endian == DEVICE_LITTLE_ENDIAN) {
            val = bswap32(val);
        }
#else
        if (endian == DEVICE_BIG_ENDIAN) {
            val = bswap32(val);
        }
#endif
B
bellard 已提交
2375 2376
    } else {
        /* RAM case */
2377
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2378
                                & TARGET_PAGE_MASK)
2379
                               + addr1);
2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390
        switch (endian) {
        case DEVICE_LITTLE_ENDIAN:
            val = ldl_le_p(ptr);
            break;
        case DEVICE_BIG_ENDIAN:
            val = ldl_be_p(ptr);
            break;
        default:
            val = ldl_p(ptr);
            break;
        }
B
bellard 已提交
2391 2392 2393 2394
    }
    return val;
}

2395
uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
2396
{
2397
    return ldl_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2398 2399
}

2400
uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
2401
{
2402
    return ldl_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2403 2404
}

2405
uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
2406
{
2407
    return ldl_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2408 2409
}

B
bellard 已提交
2410
/* warning: addr must be aligned */
2411
static inline uint64_t ldq_phys_internal(AddressSpace *as, hwaddr addr,
2412
                                         enum device_endian endian)
B
bellard 已提交
2413 2414 2415
{
    uint8_t *ptr;
    uint64_t val;
2416
    MemoryRegion *mr;
2417 2418
    hwaddr l = 8;
    hwaddr addr1;
B
bellard 已提交
2419

2420
    mr = address_space_translate(as, addr, &addr1, &l,
2421 2422
                                 false);
    if (l < 8 || !memory_access_is_direct(mr, false)) {
B
bellard 已提交
2423
        /* I/O case */
2424
        io_mem_read(mr, addr1, &val, 8);
2425 2426 2427 2428 2429 2430 2431 2432
#if defined(TARGET_WORDS_BIGENDIAN)
        if (endian == DEVICE_LITTLE_ENDIAN) {
            val = bswap64(val);
        }
#else
        if (endian == DEVICE_BIG_ENDIAN) {
            val = bswap64(val);
        }
B
bellard 已提交
2433 2434 2435
#endif
    } else {
        /* RAM case */
2436
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2437
                                & TARGET_PAGE_MASK)
2438
                               + addr1);
2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449
        switch (endian) {
        case DEVICE_LITTLE_ENDIAN:
            val = ldq_le_p(ptr);
            break;
        case DEVICE_BIG_ENDIAN:
            val = ldq_be_p(ptr);
            break;
        default:
            val = ldq_p(ptr);
            break;
        }
B
bellard 已提交
2450 2451 2452 2453
    }
    return val;
}

2454
uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
2455
{
2456
    return ldq_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2457 2458
}

2459
uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
2460
{
2461
    return ldq_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2462 2463
}

2464
uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
2465
{
2466
    return ldq_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2467 2468
}

B
bellard 已提交
2469
/* XXX: optimize */
2470
uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
B
bellard 已提交
2471 2472
{
    uint8_t val;
2473
    address_space_rw(as, addr, &val, 1, 0);
B
bellard 已提交
2474 2475 2476
    return val;
}

2477
/* warning: addr must be aligned */
2478
static inline uint32_t lduw_phys_internal(AddressSpace *as, hwaddr addr,
2479
                                          enum device_endian endian)
B
bellard 已提交
2480
{
2481 2482
    uint8_t *ptr;
    uint64_t val;
2483
    MemoryRegion *mr;
2484 2485
    hwaddr l = 2;
    hwaddr addr1;
2486

2487
    mr = address_space_translate(as, addr, &addr1, &l,
2488 2489
                                 false);
    if (l < 2 || !memory_access_is_direct(mr, false)) {
2490
        /* I/O case */
2491
        io_mem_read(mr, addr1, &val, 2);
2492 2493 2494 2495 2496 2497 2498 2499 2500
#if defined(TARGET_WORDS_BIGENDIAN)
        if (endian == DEVICE_LITTLE_ENDIAN) {
            val = bswap16(val);
        }
#else
        if (endian == DEVICE_BIG_ENDIAN) {
            val = bswap16(val);
        }
#endif
2501 2502
    } else {
        /* RAM case */
2503
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2504
                                & TARGET_PAGE_MASK)
2505
                               + addr1);
2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516
        switch (endian) {
        case DEVICE_LITTLE_ENDIAN:
            val = lduw_le_p(ptr);
            break;
        case DEVICE_BIG_ENDIAN:
            val = lduw_be_p(ptr);
            break;
        default:
            val = lduw_p(ptr);
            break;
        }
2517 2518
    }
    return val;
B
bellard 已提交
2519 2520
}

2521
uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
2522
{
2523
    return lduw_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2524 2525
}

2526
uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
2527
{
2528
    return lduw_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2529 2530
}

2531
uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
2532
{
2533
    return lduw_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2534 2535
}

B
bellard 已提交
2536 2537 2538
/* warning: addr must be aligned. The ram page is not masked as dirty
   and the code inside is not invalidated. It is useful if the dirty
   bits are used to track modified PTEs */
2539
void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
B
bellard 已提交
2540 2541
{
    uint8_t *ptr;
2542
    MemoryRegion *mr;
2543 2544
    hwaddr l = 4;
    hwaddr addr1;
B
bellard 已提交
2545

2546
    mr = address_space_translate(as, addr, &addr1, &l,
2547 2548 2549
                                 true);
    if (l < 4 || !memory_access_is_direct(mr, true)) {
        io_mem_write(mr, addr1, val, 4);
B
bellard 已提交
2550
    } else {
2551
        addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
P
pbrook 已提交
2552
        ptr = qemu_get_ram_ptr(addr1);
B
bellard 已提交
2553
        stl_p(ptr, val);
A
aliguori 已提交
2554 2555

        if (unlikely(in_migration)) {
2556
            if (cpu_physical_memory_is_clean(addr1)) {
A
aliguori 已提交
2557 2558 2559
                /* invalidate code */
                tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
                /* set dirty bit */
2560 2561 2562
                cpu_physical_memory_set_dirty_flag(addr1,
                                                   DIRTY_MEMORY_MIGRATION);
                cpu_physical_memory_set_dirty_flag(addr1, DIRTY_MEMORY_VGA);
A
aliguori 已提交
2563 2564
            }
        }
B
bellard 已提交
2565 2566 2567 2568
    }
}

/* warning: addr must be aligned */
2569 2570
static inline void stl_phys_internal(AddressSpace *as,
                                     hwaddr addr, uint32_t val,
2571
                                     enum device_endian endian)
B
bellard 已提交
2572 2573
{
    uint8_t *ptr;
2574
    MemoryRegion *mr;
2575 2576
    hwaddr l = 4;
    hwaddr addr1;
B
bellard 已提交
2577

2578
    mr = address_space_translate(as, addr, &addr1, &l,
2579 2580
                                 true);
    if (l < 4 || !memory_access_is_direct(mr, true)) {
2581 2582 2583 2584 2585 2586 2587 2588 2589
#if defined(TARGET_WORDS_BIGENDIAN)
        if (endian == DEVICE_LITTLE_ENDIAN) {
            val = bswap32(val);
        }
#else
        if (endian == DEVICE_BIG_ENDIAN) {
            val = bswap32(val);
        }
#endif
2590
        io_mem_write(mr, addr1, val, 4);
B
bellard 已提交
2591 2592
    } else {
        /* RAM case */
2593
        addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
P
pbrook 已提交
2594
        ptr = qemu_get_ram_ptr(addr1);
2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605
        switch (endian) {
        case DEVICE_LITTLE_ENDIAN:
            stl_le_p(ptr, val);
            break;
        case DEVICE_BIG_ENDIAN:
            stl_be_p(ptr, val);
            break;
        default:
            stl_p(ptr, val);
            break;
        }
2606
        invalidate_and_set_dirty(addr1, 4);
B
bellard 已提交
2607 2608 2609
    }
}

2610
void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2611
{
2612
    stl_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2613 2614
}

2615
void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2616
{
2617
    stl_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2618 2619
}

2620
void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2621
{
2622
    stl_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2623 2624
}

B
bellard 已提交
2625
/* XXX: optimize */
2626
void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
B
bellard 已提交
2627 2628
{
    uint8_t v = val;
2629
    address_space_rw(as, addr, &v, 1, 1);
B
bellard 已提交
2630 2631
}

2632
/* warning: addr must be aligned */
2633 2634
static inline void stw_phys_internal(AddressSpace *as,
                                     hwaddr addr, uint32_t val,
2635
                                     enum device_endian endian)
B
bellard 已提交
2636
{
2637
    uint8_t *ptr;
2638
    MemoryRegion *mr;
2639 2640
    hwaddr l = 2;
    hwaddr addr1;
2641

2642
    mr = address_space_translate(as, addr, &addr1, &l, true);
2643
    if (l < 2 || !memory_access_is_direct(mr, true)) {
2644 2645 2646 2647 2648 2649 2650 2651 2652
#if defined(TARGET_WORDS_BIGENDIAN)
        if (endian == DEVICE_LITTLE_ENDIAN) {
            val = bswap16(val);
        }
#else
        if (endian == DEVICE_BIG_ENDIAN) {
            val = bswap16(val);
        }
#endif
2653
        io_mem_write(mr, addr1, val, 2);
2654 2655
    } else {
        /* RAM case */
2656
        addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2657
        ptr = qemu_get_ram_ptr(addr1);
2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668
        switch (endian) {
        case DEVICE_LITTLE_ENDIAN:
            stw_le_p(ptr, val);
            break;
        case DEVICE_BIG_ENDIAN:
            stw_be_p(ptr, val);
            break;
        default:
            stw_p(ptr, val);
            break;
        }
2669
        invalidate_and_set_dirty(addr1, 2);
2670
    }
B
bellard 已提交
2671 2672
}

2673
void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2674
{
2675
    stw_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2676 2677
}

2678
void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2679
{
2680
    stw_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2681 2682
}

2683
void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2684
{
2685
    stw_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2686 2687
}

B
bellard 已提交
2688
/* XXX: optimize */
2689
void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
B
bellard 已提交
2690 2691
{
    val = tswap64(val);
2692
    address_space_rw(as, addr, (void *) &val, 8, 1);
B
bellard 已提交
2693 2694
}

2695
void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2696 2697
{
    val = cpu_to_le64(val);
2698
    address_space_rw(as, addr, (void *) &val, 8, 1);
2699 2700
}

2701
void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2702 2703
{
    val = cpu_to_be64(val);
2704
    address_space_rw(as, addr, (void *) &val, 8, 1);
2705 2706
}

2707
/* virtual memory access for debug (includes writing to ROM) */
2708
int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2709
                        uint8_t *buf, int len, int is_write)
B
bellard 已提交
2710 2711
{
    int l;
A
Avi Kivity 已提交
2712
    hwaddr phys_addr;
2713
    target_ulong page;
B
bellard 已提交
2714 2715 2716

    while (len > 0) {
        page = addr & TARGET_PAGE_MASK;
2717
        phys_addr = cpu_get_phys_page_debug(cpu, page);
B
bellard 已提交
2718 2719 2720 2721 2722 2723
        /* if no physical page mapped, return an error */
        if (phys_addr == -1)
            return -1;
        l = (page + TARGET_PAGE_SIZE) - addr;
        if (l > len)
            l = len;
2724
        phys_addr += (addr & ~TARGET_PAGE_MASK);
2725 2726 2727 2728 2729
        if (is_write) {
            cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
        } else {
            address_space_rw(cpu->as, phys_addr, buf, l, 0);
        }
B
bellard 已提交
2730 2731 2732 2733 2734 2735
        len -= l;
        buf += l;
        addr += l;
    }
    return 0;
}
P
Paul Brook 已提交
2736
#endif
B
bellard 已提交
2737

2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755
#if !defined(CONFIG_USER_ONLY)

/*
 * A helper function for the _utterly broken_ virtio device model to find out if
 * it's running on a big endian machine. Don't do this at home kids!
 */
bool virtio_is_big_endian(void);
bool virtio_is_big_endian(void)
{
#if defined(TARGET_WORDS_BIGENDIAN)
    return true;
#else
    return false;
#endif
}

#endif

2756
#ifndef CONFIG_USER_ONLY
A
Avi Kivity 已提交
2757
bool cpu_physical_memory_is_io(hwaddr phys_addr)
2758
{
2759
    MemoryRegion*mr;
2760
    hwaddr l = 1;
2761

2762 2763
    mr = address_space_translate(&address_space_memory,
                                 phys_addr, &phys_addr, &l, false);
2764

2765 2766
    return !(memory_region_is_ram(mr) ||
             memory_region_is_romd(mr));
2767
}
2768 2769 2770 2771 2772 2773 2774 2775 2776

void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
{
    RAMBlock *block;

    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
        func(block->host, block->offset, block->length, opaque);
    }
}
2777
#endif