exec.c 99.1 KB
Newer Older
B
bellard 已提交
1
/*
2
 *  Virtual page mapping
3
 *
B
bellard 已提交
4 5 6 7 8 9 10 11 12 13 14 15 16
 *  Copyright (c) 2003 Fabrice Bellard
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
B
bellard 已提交
18
 */
B
bellard 已提交
19
#include "config.h"
20
#ifndef _WIN32
B
bellard 已提交
21
#include <sys/types.h>
B
bellard 已提交
22 23
#include <sys/mman.h>
#endif
B
bellard 已提交
24

25
#include "qemu-common.h"
B
bellard 已提交
26
#include "cpu.h"
B
bellard 已提交
27
#include "tcg.h"
28
#include "hw/hw.h"
29
#if !defined(CONFIG_USER_ONLY)
30
#include "hw/boards.h"
31
#endif
32
#include "hw/qdev.h"
33
#include "qemu/osdep.h"
34
#include "sysemu/kvm.h"
35
#include "sysemu/sysemu.h"
P
Paolo Bonzini 已提交
36
#include "hw/xen/xen.h"
37 38
#include "qemu/timer.h"
#include "qemu/config-file.h"
39
#include "qemu/error-report.h"
40
#include "exec/memory.h"
41
#include "sysemu/dma.h"
42
#include "exec/address-spaces.h"
43 44
#if defined(CONFIG_USER_ONLY)
#include <qemu.h>
J
Jun Nakajima 已提交
45
#else /* !CONFIG_USER_ONLY */
46
#include "sysemu/xen-mapcache.h"
47
#include "trace.h"
48
#endif
49
#include "exec/cpu-all.h"
M
Mike Day 已提交
50
#include "qemu/rcu_queue.h"
51
#include "qemu/main-loop.h"
52
#include "exec/cputlb.h"
53
#include "translate-all.h"
54

55
#include "exec/memory-internal.h"
56
#include "exec/ram_addr.h"
57

58 59
#include "qemu/range.h"

60
//#define DEBUG_SUBPAGE
T
ths 已提交
61

62
#if !defined(CONFIG_USER_ONLY)
M
Mike Day 已提交
63 64 65
/* ram_list is read under rcu_read_lock()/rcu_read_unlock().  Writes
 * are protected by the ramlist lock.
 */
M
Mike Day 已提交
66
RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
A
Avi Kivity 已提交
67 68

static MemoryRegion *system_memory;
69
static MemoryRegion *system_io;
A
Avi Kivity 已提交
70

71 72
AddressSpace address_space_io;
AddressSpace address_space_memory;
73

74
MemoryRegion io_mem_rom, io_mem_notdirty;
75
static MemoryRegion io_mem_unassigned;
76

77 78 79
/* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
#define RAM_PREALLOC   (1 << 0)

80 81 82
/* RAM is mmap-ed with MAP_SHARED */
#define RAM_SHARED     (1 << 1)

83 84 85 86 87
/* Only a portion of RAM (used_length) is actually used, and migrated.
 * This used_length size can change across reboots.
 */
#define RAM_RESIZEABLE (1 << 2)

88
#endif
89

A
Andreas Färber 已提交
90
struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
B
bellard 已提交
91 92
/* current CPU in the current thread. It is only valid inside
   cpu_exec() */
93
DEFINE_TLS(CPUState *, current_cpu);
P
pbrook 已提交
94
/* 0 = Do not count executed instructions.
T
ths 已提交
95
   1 = Precise instruction counting.
P
pbrook 已提交
96
   2 = Adaptive rate instruction counting.  */
97
int use_icount;
B
bellard 已提交
98

99
#if !defined(CONFIG_USER_ONLY)
100

101 102 103
typedef struct PhysPageEntry PhysPageEntry;

struct PhysPageEntry {
M
Michael S. Tsirkin 已提交
104
    /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
105
    uint32_t skip : 6;
M
Michael S. Tsirkin 已提交
106
     /* index into phys_sections (!skip) or phys_map_nodes (skip) */
107
    uint32_t ptr : 26;
108 109
};

110 111
#define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)

112
/* Size of the L2 (and L3, etc) page tables.  */
113
#define ADDR_SPACE_BITS 64
114

M
Michael S. Tsirkin 已提交
115
#define P_L2_BITS 9
116 117 118 119 120
#define P_L2_SIZE (1 << P_L2_BITS)

#define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)

typedef PhysPageEntry Node[P_L2_SIZE];
121

122
typedef struct PhysPageMap {
123 124
    struct rcu_head rcu;

125 126 127 128 129 130 131 132
    unsigned sections_nb;
    unsigned sections_nb_alloc;
    unsigned nodes_nb;
    unsigned nodes_nb_alloc;
    Node *nodes;
    MemoryRegionSection *sections;
} PhysPageMap;

133
struct AddressSpaceDispatch {
134 135
    struct rcu_head rcu;

136 137 138 139
    /* This is a multi-level map on the physical address space.
     * The bottom level has pointers to MemoryRegionSections.
     */
    PhysPageEntry phys_map;
140
    PhysPageMap map;
141
    AddressSpace *as;
142 143
};

144 145 146
#define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
typedef struct subpage_t {
    MemoryRegion iomem;
147
    AddressSpace *as;
148 149 150 151
    hwaddr base;
    uint16_t sub_section[TARGET_PAGE_SIZE];
} subpage_t;

152 153 154 155
#define PHYS_SECTION_UNASSIGNED 0
#define PHYS_SECTION_NOTDIRTY 1
#define PHYS_SECTION_ROM 2
#define PHYS_SECTION_WATCH 3
156

157
static void io_mem_init(void);
A
Avi Kivity 已提交
158
static void memory_map_init(void);
159
static void tcg_commit(MemoryListener *listener);
160

161
static MemoryRegion io_mem_watch;
162
#endif
B
bellard 已提交
163

164
#if !defined(CONFIG_USER_ONLY)
165

166
static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
167
{
168 169 170 171
    if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
        map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
        map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
        map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
172
    }
173 174
}

175
static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
176 177
{
    unsigned i;
178
    uint32_t ret;
179 180
    PhysPageEntry e;
    PhysPageEntry *p;
181

182
    ret = map->nodes_nb++;
183
    p = map->nodes[ret];
184
    assert(ret != PHYS_MAP_NODE_NIL);
185
    assert(ret != map->nodes_nb_alloc);
186 187 188

    e.skip = leaf ? 0 : 1;
    e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
189
    for (i = 0; i < P_L2_SIZE; ++i) {
190
        memcpy(&p[i], &e, sizeof(e));
191
    }
192
    return ret;
193 194
}

195 196
static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
                                hwaddr *index, hwaddr *nb, uint16_t leaf,
197
                                int level)
198 199
{
    PhysPageEntry *p;
200
    hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
201

M
Michael S. Tsirkin 已提交
202
    if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
203
        lp->ptr = phys_map_node_alloc(map, level == 0);
B
bellard 已提交
204
    }
205
    p = map->nodes[lp->ptr];
206
    lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
207

208
    while (*nb && lp < &p[P_L2_SIZE]) {
209
        if ((*index & (step - 1)) == 0 && *nb >= step) {
M
Michael S. Tsirkin 已提交
210
            lp->skip = 0;
211
            lp->ptr = leaf;
212 213
            *index += step;
            *nb -= step;
214
        } else {
215
            phys_page_set_level(map, lp, index, nb, leaf, level - 1);
216 217
        }
        ++lp;
218 219 220
    }
}

A
Avi Kivity 已提交
221
static void phys_page_set(AddressSpaceDispatch *d,
A
Avi Kivity 已提交
222
                          hwaddr index, hwaddr nb,
223
                          uint16_t leaf)
224
{
225
    /* Wildly overreserve - it doesn't matter much. */
226
    phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
227

228
    phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
B
bellard 已提交
229 230
}

231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288
/* Compact a non leaf page entry. Simply detect that the entry has a single child,
 * and update our entry so we can skip it and go directly to the destination.
 */
static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
{
    unsigned valid_ptr = P_L2_SIZE;
    int valid = 0;
    PhysPageEntry *p;
    int i;

    if (lp->ptr == PHYS_MAP_NODE_NIL) {
        return;
    }

    p = nodes[lp->ptr];
    for (i = 0; i < P_L2_SIZE; i++) {
        if (p[i].ptr == PHYS_MAP_NODE_NIL) {
            continue;
        }

        valid_ptr = i;
        valid++;
        if (p[i].skip) {
            phys_page_compact(&p[i], nodes, compacted);
        }
    }

    /* We can only compress if there's only one child. */
    if (valid != 1) {
        return;
    }

    assert(valid_ptr < P_L2_SIZE);

    /* Don't compress if it won't fit in the # of bits we have. */
    if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
        return;
    }

    lp->ptr = p[valid_ptr].ptr;
    if (!p[valid_ptr].skip) {
        /* If our only child is a leaf, make this a leaf. */
        /* By design, we should have made this node a leaf to begin with so we
         * should never reach here.
         * But since it's so simple to handle this, let's do it just in case we
         * change this rule.
         */
        lp->skip = 0;
    } else {
        lp->skip += p[valid_ptr].skip;
    }
}

static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
{
    DECLARE_BITMAP(compacted, nodes_nb);

    if (d->phys_map.skip) {
289
        phys_page_compact(&d->phys_map, d->map.nodes, compacted);
290 291 292
    }
}

293
static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
294
                                           Node *nodes, MemoryRegionSection *sections)
B
bellard 已提交
295
{
296
    PhysPageEntry *p;
297
    hwaddr index = addr >> TARGET_PAGE_BITS;
298
    int i;
299

M
Michael S. Tsirkin 已提交
300
    for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
301
        if (lp.ptr == PHYS_MAP_NODE_NIL) {
302
            return &sections[PHYS_SECTION_UNASSIGNED];
303
        }
304
        p = nodes[lp.ptr];
305
        lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
306
    }
307 308 309 310 311 312 313 314

    if (sections[lp.ptr].size.hi ||
        range_covers_byte(sections[lp.ptr].offset_within_address_space,
                          sections[lp.ptr].size.lo, addr)) {
        return &sections[lp.ptr];
    } else {
        return &sections[PHYS_SECTION_UNASSIGNED];
    }
315 316
}

B
Blue Swirl 已提交
317 318
bool memory_region_is_unassigned(MemoryRegion *mr)
{
P
Paolo Bonzini 已提交
319
    return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
320
        && mr != &io_mem_watch;
B
bellard 已提交
321
}
322

323
/* Called from RCU critical section */
324
static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
325 326
                                                        hwaddr addr,
                                                        bool resolve_subpage)
327
{
328 329 330
    MemoryRegionSection *section;
    subpage_t *subpage;

331
    section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
332 333
    if (resolve_subpage && section->mr->subpage) {
        subpage = container_of(section->mr, subpage_t, iomem);
334
        section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
335 336
    }
    return section;
337 338
}

339
/* Called from RCU critical section */
340
static MemoryRegionSection *
341
address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
342
                                 hwaddr *plen, bool resolve_subpage)
343 344
{
    MemoryRegionSection *section;
345
    MemoryRegion *mr;
346
    Int128 diff;
347

348
    section = address_space_lookup_region(d, addr, resolve_subpage);
349 350 351 352 353 354
    /* Compute offset within MemoryRegionSection */
    addr -= section->offset_within_address_space;

    /* Compute offset within MemoryRegion */
    *xlat = addr + section->offset_within_region;

355
    mr = section->mr;
356 357 358 359 360 361 362 363 364 365 366 367

    /* MMIO registers can be expected to perform full-width accesses based only
     * on their address, without considering adjacent registers that could
     * decode to completely different MemoryRegions.  When such registers
     * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
     * regions overlap wildly.  For this reason we cannot clamp the accesses
     * here.
     *
     * If the length is small (as is the case for address_space_ldl/stl),
     * everything works fine.  If the incoming length is large, however,
     * the caller really has to do the clamping through memory_access_size.
     */
368
    if (memory_region_is_ram(mr)) {
369
        diff = int128_sub(section->size, int128_make64(addr));
370 371
        *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
    }
372 373
    return section;
}
374

375 376 377 378 379 380 381 382 383 384 385 386
static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
{
    if (memory_region_is_ram(mr)) {
        return !(is_write && mr->readonly);
    }
    if (memory_region_is_romd(mr)) {
        return !is_write;
    }

    return false;
}

387
/* Called from RCU critical section */
388 389 390
MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
                                      hwaddr *xlat, hwaddr *plen,
                                      bool is_write)
391
{
A
Avi Kivity 已提交
392 393 394 395 396
    IOMMUTLBEntry iotlb;
    MemoryRegionSection *section;
    MemoryRegion *mr;

    for (;;) {
397 398
        AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
        section = address_space_translate_internal(d, addr, &addr, plen, true);
A
Avi Kivity 已提交
399 400 401 402 403 404
        mr = section->mr;

        if (!mr->iommu_ops) {
            break;
        }

405
        iotlb = mr->iommu_ops->translate(mr, addr, is_write);
A
Avi Kivity 已提交
406 407
        addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
                | (addr & iotlb.addr_mask));
408
        *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
A
Avi Kivity 已提交
409 410 411 412 413 414 415 416
        if (!(iotlb.perm & (1 << is_write))) {
            mr = &io_mem_unassigned;
            break;
        }

        as = iotlb.target_as;
    }

417
    if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
418
        hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
419
        *plen = MIN(page, *plen);
420 421
    }

A
Avi Kivity 已提交
422 423
    *xlat = addr;
    return mr;
424 425
}

426
/* Called from RCU critical section */
427
MemoryRegionSection *
P
Paolo Bonzini 已提交
428 429
address_space_translate_for_iotlb(CPUState *cpu, hwaddr addr,
                                  hwaddr *xlat, hwaddr *plen)
430
{
A
Avi Kivity 已提交
431
    MemoryRegionSection *section;
P
Paolo Bonzini 已提交
432 433
    section = address_space_translate_internal(cpu->memory_dispatch,
                                               addr, xlat, plen, false);
A
Avi Kivity 已提交
434 435 436

    assert(!section->mr->iommu_ops);
    return section;
437
}
438
#endif
B
bellard 已提交
439

440
#if !defined(CONFIG_USER_ONLY)
441 442

static int cpu_common_post_load(void *opaque, int version_id)
B
bellard 已提交
443
{
444
    CPUState *cpu = opaque;
B
bellard 已提交
445

446 447
    /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
       version_id is increased. */
448
    cpu->interrupt_request &= ~0x01;
449
    tlb_flush(cpu, 1);
450 451

    return 0;
B
bellard 已提交
452
}
B
bellard 已提交
453

454 455 456 457
static int cpu_common_pre_load(void *opaque)
{
    CPUState *cpu = opaque;

458
    cpu->exception_index = -1;
459 460 461 462 463 464 465 466

    return 0;
}

static bool cpu_common_exception_index_needed(void *opaque)
{
    CPUState *cpu = opaque;

467
    return tcg_enabled() && cpu->exception_index != -1;
468 469 470 471 472 473
}

static const VMStateDescription vmstate_cpu_common_exception_index = {
    .name = "cpu_common/exception_index",
    .version_id = 1,
    .minimum_version_id = 1,
474
    .needed = cpu_common_exception_index_needed,
475 476 477 478 479 480
    .fields = (VMStateField[]) {
        VMSTATE_INT32(exception_index, CPUState),
        VMSTATE_END_OF_LIST()
    }
};

481
const VMStateDescription vmstate_cpu_common = {
482 483 484
    .name = "cpu_common",
    .version_id = 1,
    .minimum_version_id = 1,
485
    .pre_load = cpu_common_pre_load,
486
    .post_load = cpu_common_post_load,
487
    .fields = (VMStateField[]) {
488 489
        VMSTATE_UINT32(halted, CPUState),
        VMSTATE_UINT32(interrupt_request, CPUState),
490
        VMSTATE_END_OF_LIST()
491
    },
492 493 494
    .subsections = (const VMStateDescription*[]) {
        &vmstate_cpu_common_exception_index,
        NULL
495 496
    }
};
497

498
#endif
B
bellard 已提交
499

500
CPUState *qemu_get_cpu(int index)
B
bellard 已提交
501
{
A
Andreas Färber 已提交
502
    CPUState *cpu;
B
bellard 已提交
503

A
Andreas Färber 已提交
504
    CPU_FOREACH(cpu) {
505
        if (cpu->cpu_index == index) {
A
Andreas Färber 已提交
506
            return cpu;
507
        }
B
bellard 已提交
508
    }
509

A
Andreas Färber 已提交
510
    return NULL;
B
bellard 已提交
511 512
}

513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528
#if !defined(CONFIG_USER_ONLY)
void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
{
    /* We only support one address space per cpu at the moment.  */
    assert(cpu->as == as);

    if (cpu->tcg_as_listener) {
        memory_listener_unregister(cpu->tcg_as_listener);
    } else {
        cpu->tcg_as_listener = g_new0(MemoryListener, 1);
    }
    cpu->tcg_as_listener->commit = tcg_commit;
    memory_listener_register(cpu->tcg_as_listener, as);
}
#endif

529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573
#ifndef CONFIG_USER_ONLY
static DECLARE_BITMAP(cpu_index_map, MAX_CPUMASK_BITS);

static int cpu_get_free_index(Error **errp)
{
    int cpu = find_first_zero_bit(cpu_index_map, MAX_CPUMASK_BITS);

    if (cpu >= MAX_CPUMASK_BITS) {
        error_setg(errp, "Trying to use more CPUs than max of %d",
                   MAX_CPUMASK_BITS);
        return -1;
    }

    bitmap_set(cpu_index_map, cpu, 1);
    return cpu;
}

void cpu_exec_exit(CPUState *cpu)
{
    if (cpu->cpu_index == -1) {
        /* cpu_index was never allocated by this @cpu or was already freed. */
        return;
    }

    bitmap_clear(cpu_index_map, cpu->cpu_index, 1);
    cpu->cpu_index = -1;
}
#else

static int cpu_get_free_index(Error **errp)
{
    CPUState *some_cpu;
    int cpu_index = 0;

    CPU_FOREACH(some_cpu) {
        cpu_index++;
    }
    return cpu_index;
}

void cpu_exec_exit(CPUState *cpu)
{
}
#endif

574
void cpu_exec_init(CPUArchState *env, Error **errp)
B
bellard 已提交
575
{
576
    CPUState *cpu = ENV_GET_CPU(env);
577
    CPUClass *cc = CPU_GET_CLASS(cpu);
578
    int cpu_index;
579
    Error *local_err = NULL;
580

581 582 583 584 585 586
#ifndef CONFIG_USER_ONLY
    cpu->as = &address_space_memory;
    cpu->thread_id = qemu_get_thread_id();
    cpu_reload_memory_map(cpu);
#endif

587 588 589
#if defined(CONFIG_USER_ONLY)
    cpu_list_lock();
#endif
590 591 592 593 594 595 596
    cpu_index = cpu->cpu_index = cpu_get_free_index(&local_err);
    if (local_err) {
        error_propagate(errp, local_err);
#if defined(CONFIG_USER_ONLY)
        cpu_list_unlock();
#endif
        return;
597
    }
A
Andreas Färber 已提交
598
    QTAILQ_INSERT_TAIL(&cpus, cpu, node);
599 600 601
#if defined(CONFIG_USER_ONLY)
    cpu_list_unlock();
#endif
602 603 604
    if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
        vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
    }
605 606 607
#if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
    register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
                    cpu_save, cpu_load, env);
608
    assert(cc->vmsd == NULL);
609
    assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
610
#endif
611 612 613
    if (cc->vmsd != NULL) {
        vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
    }
B
bellard 已提交
614 615
}

616
#if defined(CONFIG_USER_ONLY)
617
static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
618 619 620 621
{
    tb_invalidate_phys_page_range(pc, pc + 1, 0);
}
#else
622
static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
623
{
624 625
    hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
    if (phys != -1) {
626
        tb_invalidate_phys_addr(cpu->as,
627
                                phys | (pc & ~TARGET_PAGE_MASK));
628
    }
629
}
B
bellard 已提交
630
#endif
B
bellard 已提交
631

632
#if defined(CONFIG_USER_ONLY)
633
void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
634 635 636 637

{
}

638 639 640 641 642 643 644 645 646 647
int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
                          int flags)
{
    return -ENOSYS;
}

void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
{
}

648
int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
649 650 651 652 653
                          int flags, CPUWatchpoint **watchpoint)
{
    return -ENOSYS;
}
#else
654
/* Add a watchpoint.  */
655
int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
656
                          int flags, CPUWatchpoint **watchpoint)
657
{
658
    CPUWatchpoint *wp;
659

660
    /* forbid ranges which are empty or run off the end of the address space */
661
    if (len == 0 || (addr + len - 1) < addr) {
662 663
        error_report("tried to set invalid watchpoint at %"
                     VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
664 665
        return -EINVAL;
    }
666
    wp = g_malloc(sizeof(*wp));
667 668

    wp->vaddr = addr;
669
    wp->len = len;
670 671
    wp->flags = flags;

672
    /* keep all GDB-injected watchpoints in front */
673 674 675 676 677
    if (flags & BP_GDB) {
        QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
    } else {
        QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
    }
678

679
    tlb_flush_page(cpu, addr);
680 681 682 683

    if (watchpoint)
        *watchpoint = wp;
    return 0;
684 685
}

686
/* Remove a specific watchpoint.  */
687
int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
688
                          int flags)
689
{
690
    CPUWatchpoint *wp;
691

692
    QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
693
        if (addr == wp->vaddr && len == wp->len
694
                && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
695
            cpu_watchpoint_remove_by_ref(cpu, wp);
696 697 698
            return 0;
        }
    }
699
    return -ENOENT;
700 701
}

702
/* Remove a specific watchpoint by reference.  */
703
void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
704
{
705
    QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
706

707
    tlb_flush_page(cpu, watchpoint->vaddr);
708

709
    g_free(watchpoint);
710 711 712
}

/* Remove all matching watchpoints.  */
713
void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
714
{
715
    CPUWatchpoint *wp, *next;
716

717
    QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
718 719 720
        if (wp->flags & mask) {
            cpu_watchpoint_remove_by_ref(cpu, wp);
        }
721
    }
722
}
723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743

/* Return true if this watchpoint address matches the specified
 * access (ie the address range covered by the watchpoint overlaps
 * partially or completely with the address range covered by the
 * access).
 */
static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
                                                  vaddr addr,
                                                  vaddr len)
{
    /* We know the lengths are non-zero, but a little caution is
     * required to avoid errors in the case where the range ends
     * exactly at the top of the address space and so addr + len
     * wraps round to zero.
     */
    vaddr wpend = wp->vaddr + wp->len - 1;
    vaddr addrend = addr + len - 1;

    return !(addr > wpend || wp->vaddr > addrend);
}

744
#endif
745

746
/* Add a breakpoint.  */
747
int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
748
                          CPUBreakpoint **breakpoint)
B
bellard 已提交
749
{
750
    CPUBreakpoint *bp;
751

752
    bp = g_malloc(sizeof(*bp));
B
bellard 已提交
753

754 755 756
    bp->pc = pc;
    bp->flags = flags;

757
    /* keep all GDB-injected breakpoints in front */
758
    if (flags & BP_GDB) {
759
        QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
760
    } else {
761
        QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
762
    }
763

764
    breakpoint_invalidate(cpu, pc);
765

766
    if (breakpoint) {
767
        *breakpoint = bp;
768
    }
B
bellard 已提交
769 770 771
    return 0;
}

772
/* Remove a specific breakpoint.  */
773
int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
774 775 776
{
    CPUBreakpoint *bp;

777
    QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
778
        if (bp->pc == pc && bp->flags == flags) {
779
            cpu_breakpoint_remove_by_ref(cpu, bp);
780 781
            return 0;
        }
782
    }
783
    return -ENOENT;
784 785
}

786
/* Remove a specific breakpoint by reference.  */
787
void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
B
bellard 已提交
788
{
789 790 791
    QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);

    breakpoint_invalidate(cpu, breakpoint->pc);
792

793
    g_free(breakpoint);
794 795 796
}

/* Remove all matching breakpoints. */
797
void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
798
{
799
    CPUBreakpoint *bp, *next;
800

801
    QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
802 803 804
        if (bp->flags & mask) {
            cpu_breakpoint_remove_by_ref(cpu, bp);
        }
805
    }
B
bellard 已提交
806 807
}

B
bellard 已提交
808 809
/* enable or disable single step mode. EXCP_DEBUG is returned by the
   CPU loop after each instruction */
810
void cpu_single_step(CPUState *cpu, int enabled)
B
bellard 已提交
811
{
812 813 814
    if (cpu->singlestep_enabled != enabled) {
        cpu->singlestep_enabled = enabled;
        if (kvm_enabled()) {
815
            kvm_update_guest_debug(cpu, 0);
816
        } else {
S
Stuart Brady 已提交
817
            /* must flush all the translated code to avoid inconsistencies */
818
            /* XXX: only flush what is necessary */
819
            tb_flush(cpu);
820
        }
B
bellard 已提交
821 822 823
    }
}

824
void cpu_abort(CPUState *cpu, const char *fmt, ...)
B
bellard 已提交
825 826
{
    va_list ap;
P
pbrook 已提交
827
    va_list ap2;
B
bellard 已提交
828 829

    va_start(ap, fmt);
P
pbrook 已提交
830
    va_copy(ap2, ap);
B
bellard 已提交
831 832 833
    fprintf(stderr, "qemu: fatal: ");
    vfprintf(stderr, fmt, ap);
    fprintf(stderr, "\n");
834
    cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
835 836 837 838
    if (qemu_log_enabled()) {
        qemu_log("qemu: fatal: ");
        qemu_log_vprintf(fmt, ap2);
        qemu_log("\n");
839
        log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
840
        qemu_log_flush();
841
        qemu_log_close();
842
    }
P
pbrook 已提交
843
    va_end(ap2);
844
    va_end(ap);
845 846 847 848 849 850 851 852
#if defined(CONFIG_USER_ONLY)
    {
        struct sigaction act;
        sigfillset(&act.sa_mask);
        act.sa_handler = SIG_DFL;
        sigaction(SIGABRT, &act, NULL);
    }
#endif
B
bellard 已提交
853 854 855
    abort();
}

856
#if !defined(CONFIG_USER_ONLY)
M
Mike Day 已提交
857
/* Called from RCU critical section */
P
Paolo Bonzini 已提交
858 859 860 861
static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
{
    RAMBlock *block;

P
Paolo Bonzini 已提交
862
    block = atomic_rcu_read(&ram_list.mru_block);
863
    if (block && addr - block->offset < block->max_length) {
P
Paolo Bonzini 已提交
864 865
        goto found;
    }
M
Mike Day 已提交
866
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
867
        if (addr - block->offset < block->max_length) {
P
Paolo Bonzini 已提交
868 869 870 871 872 873 874 875
            goto found;
        }
    }

    fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
    abort();

found:
P
Paolo Bonzini 已提交
876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891
    /* It is safe to write mru_block outside the iothread lock.  This
     * is what happens:
     *
     *     mru_block = xxx
     *     rcu_read_unlock()
     *                                        xxx removed from list
     *                  rcu_read_lock()
     *                  read mru_block
     *                                        mru_block = NULL;
     *                                        call_rcu(reclaim_ramblock, xxx);
     *                  rcu_read_unlock()
     *
     * atomic_rcu_set is not needed here.  The block was already published
     * when it was placed into the list.  Here we're just making an extra
     * copy of the pointer.
     */
P
Paolo Bonzini 已提交
892 893 894 895
    ram_list.mru_block = block;
    return block;
}

896
static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
J
Juan Quintela 已提交
897
{
P
Paolo Bonzini 已提交
898
    ram_addr_t start1;
899 900 901 902 903
    RAMBlock *block;
    ram_addr_t end;

    end = TARGET_PAGE_ALIGN(start + length);
    start &= TARGET_PAGE_MASK;
J
Juan Quintela 已提交
904

M
Mike Day 已提交
905
    rcu_read_lock();
P
Paolo Bonzini 已提交
906 907
    block = qemu_get_ram_block(start);
    assert(block == qemu_get_ram_block(end - 1));
908
    start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
P
Paolo Bonzini 已提交
909
    cpu_tlb_reset_dirty_all(start1, length);
M
Mike Day 已提交
910
    rcu_read_unlock();
J
Juan Quintela 已提交
911 912
}

P
pbrook 已提交
913
/* Note: start and end must be within the same ram block.  */
914 915 916
bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
                                              ram_addr_t length,
                                              unsigned client)
917
{
918 919 920 921 922 923
    unsigned long end, page;
    bool dirty;

    if (length == 0) {
        return false;
    }
B
bellard 已提交
924

925 926 927 928 929 930
    end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
    page = start >> TARGET_PAGE_BITS;
    dirty = bitmap_test_and_clear_atomic(ram_list.dirty_memory[client],
                                         page, end - page);

    if (dirty && tcg_enabled()) {
931
        tlb_reset_dirty_range_all(start, length);
P
pbrook 已提交
932
    }
933 934

    return dirty;
935 936
}

937
/* Called from RCU critical section */
938
hwaddr memory_region_section_get_iotlb(CPUState *cpu,
939 940 941 942 943
                                       MemoryRegionSection *section,
                                       target_ulong vaddr,
                                       hwaddr paddr, hwaddr xlat,
                                       int prot,
                                       target_ulong *address)
B
Blue Swirl 已提交
944
{
A
Avi Kivity 已提交
945
    hwaddr iotlb;
B
Blue Swirl 已提交
946 947
    CPUWatchpoint *wp;

948
    if (memory_region_is_ram(section->mr)) {
B
Blue Swirl 已提交
949 950
        /* Normal RAM.  */
        iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
951
            + xlat;
B
Blue Swirl 已提交
952
        if (!section->readonly) {
953
            iotlb |= PHYS_SECTION_NOTDIRTY;
B
Blue Swirl 已提交
954
        } else {
955
            iotlb |= PHYS_SECTION_ROM;
B
Blue Swirl 已提交
956 957
        }
    } else {
958
        iotlb = section - section->address_space->dispatch->map.sections;
959
        iotlb += xlat;
B
Blue Swirl 已提交
960 961 962 963
    }

    /* Make accesses to pages with watchpoints go via the
       watchpoint trap routines.  */
964
    QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
965
        if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
B
Blue Swirl 已提交
966 967
            /* Avoid trapping reads of pages with a write breakpoint. */
            if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
968
                iotlb = PHYS_SECTION_WATCH + paddr;
B
Blue Swirl 已提交
969 970 971 972 973 974 975 976
                *address |= TLB_MMIO;
                break;
            }
        }
    }

    return iotlb;
}
977 978
#endif /* defined(CONFIG_USER_ONLY) */

979
#if !defined(CONFIG_USER_ONLY)
980

A
Anthony Liguori 已提交
981
static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
982
                             uint16_t section);
983
static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
984

985 986
static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
                               qemu_anon_ram_alloc;
987 988 989 990 991 992

/*
 * Set a custom physical guest memory alloator.
 * Accelerators with unusual needs may need this.  Hopefully, we can
 * get rid of it eventually.
 */
993
void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
994 995 996 997
{
    phys_mem_alloc = alloc;
}

998 999
static uint16_t phys_section_add(PhysPageMap *map,
                                 MemoryRegionSection *section)
1000
{
1001 1002 1003 1004
    /* The physical section number is ORed with a page-aligned
     * pointer to produce the iotlb entries.  Thus it should
     * never overflow into the page-aligned value.
     */
1005
    assert(map->sections_nb < TARGET_PAGE_SIZE);
1006

1007 1008 1009 1010
    if (map->sections_nb == map->sections_nb_alloc) {
        map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
        map->sections = g_renew(MemoryRegionSection, map->sections,
                                map->sections_nb_alloc);
1011
    }
1012
    map->sections[map->sections_nb] = *section;
P
Paolo Bonzini 已提交
1013
    memory_region_ref(section->mr);
1014
    return map->sections_nb++;
1015 1016
}

1017 1018
static void phys_section_destroy(MemoryRegion *mr)
{
P
Paolo Bonzini 已提交
1019 1020
    memory_region_unref(mr);

1021 1022
    if (mr->subpage) {
        subpage_t *subpage = container_of(mr, subpage_t, iomem);
P
Peter Crosthwaite 已提交
1023
        object_unref(OBJECT(&subpage->iomem));
1024 1025 1026 1027
        g_free(subpage);
    }
}

P
Paolo Bonzini 已提交
1028
static void phys_sections_free(PhysPageMap *map)
1029
{
1030 1031
    while (map->sections_nb > 0) {
        MemoryRegionSection *section = &map->sections[--map->sections_nb];
1032 1033
        phys_section_destroy(section->mr);
    }
1034 1035
    g_free(map->sections);
    g_free(map->nodes);
1036 1037
}

A
Avi Kivity 已提交
1038
static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1039 1040
{
    subpage_t *subpage;
A
Avi Kivity 已提交
1041
    hwaddr base = section->offset_within_address_space
1042
        & TARGET_PAGE_MASK;
1043
    MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1044
                                                   d->map.nodes, d->map.sections);
1045 1046
    MemoryRegionSection subsection = {
        .offset_within_address_space = base,
1047
        .size = int128_make64(TARGET_PAGE_SIZE),
1048
    };
A
Avi Kivity 已提交
1049
    hwaddr start, end;
1050

1051
    assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1052

1053
    if (!(existing->mr->subpage)) {
1054
        subpage = subpage_init(d->as, base);
1055
        subsection.address_space = d->as;
1056
        subsection.mr = &subpage->iomem;
A
Avi Kivity 已提交
1057
        phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1058
                      phys_section_add(&d->map, &subsection));
1059
    } else {
1060
        subpage = container_of(existing->mr, subpage_t, iomem);
1061 1062
    }
    start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1063
    end = start + int128_get64(section->size) - 1;
1064 1065
    subpage_register(subpage, start, end,
                     phys_section_add(&d->map, section));
1066 1067 1068
}


1069 1070
static void register_multipage(AddressSpaceDispatch *d,
                               MemoryRegionSection *section)
1071
{
A
Avi Kivity 已提交
1072
    hwaddr start_addr = section->offset_within_address_space;
1073
    uint16_t section_index = phys_section_add(&d->map, section);
1074 1075
    uint64_t num_pages = int128_get64(int128_rshift(section->size,
                                                    TARGET_PAGE_BITS));
1076

1077 1078
    assert(num_pages);
    phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1079 1080
}

A
Avi Kivity 已提交
1081
static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1082
{
1083
    AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1084
    AddressSpaceDispatch *d = as->next_dispatch;
1085
    MemoryRegionSection now = *section, remain = *section;
1086
    Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1087

1088 1089 1090 1091
    if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
        uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
                       - now.offset_within_address_space;

1092
        now.size = int128_min(int128_make64(left), now.size);
A
Avi Kivity 已提交
1093
        register_subpage(d, &now);
1094
    } else {
1095
        now.size = int128_zero();
1096
    }
1097 1098 1099 1100
    while (int128_ne(remain.size, now.size)) {
        remain.size = int128_sub(remain.size, now.size);
        remain.offset_within_address_space += int128_get64(now.size);
        remain.offset_within_region += int128_get64(now.size);
1101
        now = remain;
1102
        if (int128_lt(remain.size, page_size)) {
1103
            register_subpage(d, &now);
1104
        } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1105
            now.size = page_size;
A
Avi Kivity 已提交
1106
            register_subpage(d, &now);
1107
        } else {
1108
            now.size = int128_and(now.size, int128_neg(page_size));
A
Avi Kivity 已提交
1109
            register_multipage(d, &now);
1110
        }
1111 1112 1113
    }
}

1114 1115 1116 1117 1118 1119
void qemu_flush_coalesced_mmio_buffer(void)
{
    if (kvm_enabled())
        kvm_flush_coalesced_mmio_buffer();
}

1120 1121 1122 1123 1124 1125 1126 1127 1128 1129
void qemu_mutex_lock_ramlist(void)
{
    qemu_mutex_lock(&ram_list.mutex);
}

void qemu_mutex_unlock_ramlist(void)
{
    qemu_mutex_unlock(&ram_list.mutex);
}

1130
#ifdef __linux__
1131 1132 1133 1134 1135

#include <sys/vfs.h>

#define HUGETLBFS_MAGIC       0x958458f6

1136
static long gethugepagesize(const char *path, Error **errp)
1137 1138 1139 1140 1141
{
    struct statfs fs;
    int ret;

    do {
Y
Yoshiaki Tamura 已提交
1142
        ret = statfs(path, &fs);
1143 1144 1145
    } while (ret != 0 && errno == EINTR);

    if (ret != 0) {
1146 1147
        error_setg_errno(errp, errno, "failed to get page size of file %s",
                         path);
Y
Yoshiaki Tamura 已提交
1148
        return 0;
1149 1150 1151
    }

    if (fs.f_type != HUGETLBFS_MAGIC)
Y
Yoshiaki Tamura 已提交
1152
        fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1153 1154 1155 1156

    return fs.f_bsize;
}

A
Alex Williamson 已提交
1157 1158
static void *file_ram_alloc(RAMBlock *block,
                            ram_addr_t memory,
1159 1160
                            const char *path,
                            Error **errp)
1161 1162
{
    char *filename;
1163 1164
    char *sanitized_name;
    char *c;
1165
    void *area = NULL;
1166
    int fd;
1167
    uint64_t hpagesize;
1168
    Error *local_err = NULL;
1169

1170 1171 1172
    hpagesize = gethugepagesize(path, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
1173
        goto error;
1174
    }
1175
    block->mr->align = hpagesize;
1176 1177

    if (memory < hpagesize) {
1178 1179 1180 1181
        error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
                   "or larger than huge page size 0x%" PRIx64,
                   memory, hpagesize);
        goto error;
1182 1183 1184
    }

    if (kvm_enabled() && !kvm_has_sync_mmu()) {
1185 1186
        error_setg(errp,
                   "host lacks kvm mmu notifiers, -mem-path unsupported");
1187
        goto error;
1188 1189
    }

1190
    /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1191
    sanitized_name = g_strdup(memory_region_name(block->mr));
1192 1193 1194 1195 1196 1197 1198 1199
    for (c = sanitized_name; *c != '\0'; c++) {
        if (*c == '/')
            *c = '_';
    }

    filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
                               sanitized_name);
    g_free(sanitized_name);
1200 1201 1202

    fd = mkstemp(filename);
    if (fd < 0) {
1203 1204
        error_setg_errno(errp, errno,
                         "unable to create backing store for hugepages");
1205
        g_free(filename);
1206
        goto error;
1207 1208
    }
    unlink(filename);
1209
    g_free(filename);
1210 1211 1212 1213 1214 1215 1216 1217 1218

    memory = (memory+hpagesize-1) & ~(hpagesize-1);

    /*
     * ftruncate is not supported by hugetlbfs in older
     * hosts, so don't bother bailing out on errors.
     * If anything goes wrong with it under other filesystems,
     * mmap will fail.
     */
1219
    if (ftruncate(fd, memory)) {
Y
Yoshiaki Tamura 已提交
1220
        perror("ftruncate");
1221
    }
1222

1223 1224 1225
    area = mmap(0, memory, PROT_READ | PROT_WRITE,
                (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE),
                fd, 0);
1226
    if (area == MAP_FAILED) {
1227 1228
        error_setg_errno(errp, errno,
                         "unable to map backing store for hugepages");
Y
Yoshiaki Tamura 已提交
1229
        close(fd);
1230
        goto error;
1231
    }
1232 1233

    if (mem_prealloc) {
1234
        os_mem_prealloc(fd, area, memory);
1235 1236
    }

A
Alex Williamson 已提交
1237
    block->fd = fd;
1238
    return area;
1239 1240 1241

error:
    if (mem_prealloc) {
1242
        error_report("%s", error_get_pretty(*errp));
1243 1244 1245
        exit(1);
    }
    return NULL;
1246 1247 1248
}
#endif

M
Mike Day 已提交
1249
/* Called with the ramlist lock held.  */
1250
static ram_addr_t find_ram_offset(ram_addr_t size)
A
Alex Williamson 已提交
1251 1252
{
    RAMBlock *block, *next_block;
A
Alex Williamson 已提交
1253
    ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
A
Alex Williamson 已提交
1254

1255 1256
    assert(size != 0); /* it would hand out same offset multiple times */

M
Mike Day 已提交
1257
    if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
A
Alex Williamson 已提交
1258
        return 0;
M
Mike Day 已提交
1259
    }
A
Alex Williamson 已提交
1260

M
Mike Day 已提交
1261
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1262
        ram_addr_t end, next = RAM_ADDR_MAX;
A
Alex Williamson 已提交
1263

1264
        end = block->offset + block->max_length;
A
Alex Williamson 已提交
1265

M
Mike Day 已提交
1266
        QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
A
Alex Williamson 已提交
1267 1268 1269 1270 1271
            if (next_block->offset >= end) {
                next = MIN(next, next_block->offset);
            }
        }
        if (next - end >= size && next - end < mingap) {
A
Alex Williamson 已提交
1272
            offset = end;
A
Alex Williamson 已提交
1273 1274 1275
            mingap = next - end;
        }
    }
A
Alex Williamson 已提交
1276 1277 1278 1279 1280 1281 1282

    if (offset == RAM_ADDR_MAX) {
        fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
                (uint64_t)size);
        abort();
    }

A
Alex Williamson 已提交
1283 1284 1285
    return offset;
}

J
Juan Quintela 已提交
1286
ram_addr_t last_ram_offset(void)
1287 1288 1289 1290
{
    RAMBlock *block;
    ram_addr_t last = 0;

M
Mike Day 已提交
1291 1292
    rcu_read_lock();
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1293
        last = MAX(last, block->offset + block->max_length);
M
Mike Day 已提交
1294
    }
M
Mike Day 已提交
1295
    rcu_read_unlock();
1296 1297 1298
    return last;
}

1299 1300 1301 1302 1303
static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
{
    int ret;

    /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1304
    if (!machine_dump_guest_core(current_machine)) {
1305 1306 1307 1308 1309 1310 1311 1312 1313
        ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
        if (ret) {
            perror("qemu_madvise");
            fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
                            "but dump_guest_core=off specified\n");
        }
    }
}

M
Mike Day 已提交
1314 1315 1316
/* Called within an RCU critical section, or while the ramlist lock
 * is held.
 */
1317
static RAMBlock *find_ram_block(ram_addr_t addr)
1318
{
1319
    RAMBlock *block;
1320

M
Mike Day 已提交
1321
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1322
        if (block->offset == addr) {
1323
            return block;
1324 1325
        }
    }
1326 1327 1328 1329

    return NULL;
}

1330
/* Called with iothread lock held.  */
1331 1332
void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
{
1333
    RAMBlock *new_block, *block;
1334

M
Mike Day 已提交
1335
    rcu_read_lock();
1336
    new_block = find_ram_block(addr);
1337 1338
    assert(new_block);
    assert(!new_block->idstr[0]);
1339

1340 1341
    if (dev) {
        char *id = qdev_get_dev_path(dev);
1342 1343
        if (id) {
            snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1344
            g_free(id);
1345 1346 1347 1348
        }
    }
    pstrcat(new_block->idstr, sizeof(new_block->idstr), name);

M
Mike Day 已提交
1349
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1350
        if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1351 1352 1353 1354 1355
            fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
                    new_block->idstr);
            abort();
        }
    }
M
Mike Day 已提交
1356
    rcu_read_unlock();
1357 1358
}

1359
/* Called with iothread lock held.  */
1360 1361
void qemu_ram_unset_idstr(ram_addr_t addr)
{
1362
    RAMBlock *block;
1363

1364 1365 1366 1367 1368
    /* FIXME: arch_init.c assumes that this is not called throughout
     * migration.  Ignore the problem since hot-unplug during migration
     * does not work anyway.
     */

M
Mike Day 已提交
1369
    rcu_read_lock();
1370
    block = find_ram_block(addr);
1371 1372 1373
    if (block) {
        memset(block->idstr, 0, sizeof(block->idstr));
    }
M
Mike Day 已提交
1374
    rcu_read_unlock();
1375 1376
}

1377 1378
static int memory_try_enable_merging(void *addr, size_t len)
{
1379
    if (!machine_mem_merge(current_machine)) {
1380 1381 1382 1383 1384 1385 1386
        /* disabled by the user */
        return 0;
    }

    return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
}

1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399
/* Only legal before guest might have detected the memory size: e.g. on
 * incoming migration, or right after reset.
 *
 * As memory core doesn't know how is memory accessed, it is up to
 * resize callback to update device state and/or add assertions to detect
 * misuse, if necessary.
 */
int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
{
    RAMBlock *block = find_ram_block(base);

    assert(block);

1400 1401
    newsize = TARGET_PAGE_ALIGN(newsize);

1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423
    if (block->used_length == newsize) {
        return 0;
    }

    if (!(block->flags & RAM_RESIZEABLE)) {
        error_setg_errno(errp, EINVAL,
                         "Length mismatch: %s: 0x" RAM_ADDR_FMT
                         " in != 0x" RAM_ADDR_FMT, block->idstr,
                         newsize, block->used_length);
        return -EINVAL;
    }

    if (block->max_length < newsize) {
        error_setg_errno(errp, EINVAL,
                         "Length too large: %s: 0x" RAM_ADDR_FMT
                         " > 0x" RAM_ADDR_FMT, block->idstr,
                         newsize, block->max_length);
        return -EINVAL;
    }

    cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
    block->used_length = newsize;
1424 1425
    cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
                                        DIRTY_CLIENTS_ALL);
1426 1427 1428 1429 1430 1431 1432
    memory_region_set_size(block->mr, newsize);
    if (block->resized) {
        block->resized(block->idstr, newsize, block->host);
    }
    return 0;
}

1433
static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
1434
{
1435
    RAMBlock *block;
M
Mike Day 已提交
1436
    RAMBlock *last_block = NULL;
1437 1438 1439
    ram_addr_t old_ram_size, new_ram_size;

    old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1440

1441
    qemu_mutex_lock_ramlist();
1442
    new_block->offset = find_ram_offset(new_block->max_length);
1443 1444 1445

    if (!new_block->host) {
        if (xen_enabled()) {
1446 1447
            xen_ram_alloc(new_block->offset, new_block->max_length,
                          new_block->mr);
1448
        } else {
1449
            new_block->host = phys_mem_alloc(new_block->max_length,
1450
                                             &new_block->mr->align);
1451
            if (!new_block->host) {
1452 1453 1454 1455 1456
                error_setg_errno(errp, errno,
                                 "cannot set up guest memory '%s'",
                                 memory_region_name(new_block->mr));
                qemu_mutex_unlock_ramlist();
                return -1;
1457
            }
1458
            memory_try_enable_merging(new_block->host, new_block->max_length);
1459
        }
1460
    }
P
pbrook 已提交
1461

L
Li Zhijian 已提交
1462 1463 1464 1465 1466
    new_ram_size = MAX(old_ram_size,
              (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
    if (new_ram_size > old_ram_size) {
        migration_bitmap_extend(old_ram_size, new_ram_size);
    }
M
Mike Day 已提交
1467 1468 1469 1470
    /* Keep the list sorted from biggest to smallest block.  Unlike QTAILQ,
     * QLIST (which has an RCU-friendly variant) does not have insertion at
     * tail, so save the last element in last_block.
     */
M
Mike Day 已提交
1471
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
M
Mike Day 已提交
1472
        last_block = block;
1473
        if (block->max_length < new_block->max_length) {
1474 1475 1476 1477
            break;
        }
    }
    if (block) {
M
Mike Day 已提交
1478
        QLIST_INSERT_BEFORE_RCU(block, new_block, next);
M
Mike Day 已提交
1479
    } else if (last_block) {
M
Mike Day 已提交
1480
        QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
M
Mike Day 已提交
1481
    } else { /* list is empty */
M
Mike Day 已提交
1482
        QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1483
    }
1484
    ram_list.mru_block = NULL;
P
pbrook 已提交
1485

M
Mike Day 已提交
1486 1487
    /* Write list before version */
    smp_wmb();
U
Umesh Deshpande 已提交
1488
    ram_list.version++;
1489
    qemu_mutex_unlock_ramlist();
U
Umesh Deshpande 已提交
1490

1491 1492 1493
    new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;

    if (new_ram_size > old_ram_size) {
1494
        int i;
1495 1496

        /* ram_list.dirty_memory[] is protected by the iothread lock.  */
1497 1498 1499 1500 1501
        for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
            ram_list.dirty_memory[i] =
                bitmap_zero_extend(ram_list.dirty_memory[i],
                                   old_ram_size, new_ram_size);
       }
1502
    }
1503
    cpu_physical_memory_set_dirty_range(new_block->offset,
1504 1505
                                        new_block->used_length,
                                        DIRTY_CLIENTS_ALL);
P
pbrook 已提交
1506

1507 1508 1509 1510 1511 1512 1513
    if (new_block->host) {
        qemu_ram_setup_dump(new_block->host, new_block->max_length);
        qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
        qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
        if (kvm_enabled()) {
            kvm_setup_guest_memory(new_block->host, new_block->max_length);
        }
1514
    }
1515

P
pbrook 已提交
1516 1517
    return new_block->offset;
}
B
bellard 已提交
1518

1519
#ifdef __linux__
1520
ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1521
                                    bool share, const char *mem_path,
1522
                                    Error **errp)
1523 1524
{
    RAMBlock *new_block;
1525 1526
    ram_addr_t addr;
    Error *local_err = NULL;
1527 1528

    if (xen_enabled()) {
1529 1530
        error_setg(errp, "-mem-path not supported with Xen");
        return -1;
1531 1532 1533 1534 1535 1536 1537 1538
    }

    if (phys_mem_alloc != qemu_anon_ram_alloc) {
        /*
         * file_ram_alloc() needs to allocate just like
         * phys_mem_alloc, but we haven't bothered to provide
         * a hook there.
         */
1539 1540 1541
        error_setg(errp,
                   "-mem-path not supported with this accelerator");
        return -1;
1542 1543 1544 1545 1546
    }

    size = TARGET_PAGE_ALIGN(size);
    new_block = g_malloc0(sizeof(*new_block));
    new_block->mr = mr;
1547 1548
    new_block->used_length = size;
    new_block->max_length = size;
1549
    new_block->flags = share ? RAM_SHARED : 0;
1550 1551 1552 1553 1554 1555 1556
    new_block->host = file_ram_alloc(new_block, size,
                                     mem_path, errp);
    if (!new_block->host) {
        g_free(new_block);
        return -1;
    }

1557 1558 1559 1560 1561 1562 1563
    addr = ram_block_add(new_block, &local_err);
    if (local_err) {
        g_free(new_block);
        error_propagate(errp, local_err);
        return -1;
    }
    return addr;
1564
}
1565
#endif
1566

1567 1568 1569 1570 1571 1572
static
ram_addr_t qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
                                   void (*resized)(const char*,
                                                   uint64_t length,
                                                   void *host),
                                   void *host, bool resizeable,
1573
                                   MemoryRegion *mr, Error **errp)
1574 1575
{
    RAMBlock *new_block;
1576 1577
    ram_addr_t addr;
    Error *local_err = NULL;
1578 1579

    size = TARGET_PAGE_ALIGN(size);
1580
    max_size = TARGET_PAGE_ALIGN(max_size);
1581 1582
    new_block = g_malloc0(sizeof(*new_block));
    new_block->mr = mr;
1583
    new_block->resized = resized;
1584 1585
    new_block->used_length = size;
    new_block->max_length = max_size;
1586
    assert(max_size >= size);
1587 1588 1589
    new_block->fd = -1;
    new_block->host = host;
    if (host) {
1590
        new_block->flags |= RAM_PREALLOC;
1591
    }
1592 1593 1594
    if (resizeable) {
        new_block->flags |= RAM_RESIZEABLE;
    }
1595 1596 1597 1598 1599 1600 1601
    addr = ram_block_add(new_block, &local_err);
    if (local_err) {
        g_free(new_block);
        error_propagate(errp, local_err);
        return -1;
    }
    return addr;
1602 1603
}

1604 1605 1606 1607 1608 1609
ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
                                   MemoryRegion *mr, Error **errp)
{
    return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
}

1610
ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1611
{
1612 1613 1614 1615 1616 1617 1618 1619 1620 1621
    return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
}

ram_addr_t qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
                                     void (*resized)(const char*,
                                                     uint64_t length,
                                                     void *host),
                                     MemoryRegion *mr, Error **errp)
{
    return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1622 1623
}

1624 1625 1626 1627
void qemu_ram_free_from_ptr(ram_addr_t addr)
{
    RAMBlock *block;

1628
    qemu_mutex_lock_ramlist();
M
Mike Day 已提交
1629
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1630
        if (addr == block->offset) {
M
Mike Day 已提交
1631
            QLIST_REMOVE_RCU(block, next);
1632
            ram_list.mru_block = NULL;
M
Mike Day 已提交
1633 1634
            /* Write list before version */
            smp_wmb();
U
Umesh Deshpande 已提交
1635
            ram_list.version++;
P
Paolo Bonzini 已提交
1636
            g_free_rcu(block, rcu);
1637
            break;
1638 1639
        }
    }
1640
    qemu_mutex_unlock_ramlist();
1641 1642
}

P
Paolo Bonzini 已提交
1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659
static void reclaim_ramblock(RAMBlock *block)
{
    if (block->flags & RAM_PREALLOC) {
        ;
    } else if (xen_enabled()) {
        xen_invalidate_map_cache_entry(block->host);
#ifndef _WIN32
    } else if (block->fd >= 0) {
        munmap(block->host, block->max_length);
        close(block->fd);
#endif
    } else {
        qemu_anon_ram_free(block->host, block->max_length);
    }
    g_free(block);
}

A
Anthony Liguori 已提交
1660
void qemu_ram_free(ram_addr_t addr)
B
bellard 已提交
1661
{
A
Alex Williamson 已提交
1662 1663
    RAMBlock *block;

1664
    qemu_mutex_lock_ramlist();
M
Mike Day 已提交
1665
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
A
Alex Williamson 已提交
1666
        if (addr == block->offset) {
M
Mike Day 已提交
1667
            QLIST_REMOVE_RCU(block, next);
1668
            ram_list.mru_block = NULL;
M
Mike Day 已提交
1669 1670
            /* Write list before version */
            smp_wmb();
U
Umesh Deshpande 已提交
1671
            ram_list.version++;
P
Paolo Bonzini 已提交
1672
            call_rcu(block, reclaim_ramblock, rcu);
1673
            break;
A
Alex Williamson 已提交
1674 1675
        }
    }
1676
    qemu_mutex_unlock_ramlist();
B
bellard 已提交
1677 1678
}

H
Huang Ying 已提交
1679 1680 1681 1682 1683 1684 1685 1686
#ifndef _WIN32
void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
{
    RAMBlock *block;
    ram_addr_t offset;
    int flags;
    void *area, *vaddr;

M
Mike Day 已提交
1687
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
H
Huang Ying 已提交
1688
        offset = addr - block->offset;
1689
        if (offset < block->max_length) {
1690
            vaddr = ramblock_ptr(block, offset);
1691
            if (block->flags & RAM_PREALLOC) {
H
Huang Ying 已提交
1692
                ;
1693 1694
            } else if (xen_enabled()) {
                abort();
H
Huang Ying 已提交
1695 1696
            } else {
                flags = MAP_FIXED;
1697
                if (block->fd >= 0) {
1698 1699
                    flags |= (block->flags & RAM_SHARED ?
                              MAP_SHARED : MAP_PRIVATE);
1700 1701
                    area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
                                flags, block->fd, offset);
H
Huang Ying 已提交
1702
                } else {
1703 1704 1705 1706 1707 1708 1709
                    /*
                     * Remap needs to match alloc.  Accelerators that
                     * set phys_mem_alloc never remap.  If they did,
                     * we'd need a remap hook here.
                     */
                    assert(phys_mem_alloc == qemu_anon_ram_alloc);

H
Huang Ying 已提交
1710 1711 1712 1713 1714
                    flags |= MAP_PRIVATE | MAP_ANONYMOUS;
                    area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
                                flags, -1, 0);
                }
                if (area != vaddr) {
1715 1716
                    fprintf(stderr, "Could not remap addr: "
                            RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
H
Huang Ying 已提交
1717 1718 1719
                            length, addr);
                    exit(1);
                }
1720
                memory_try_enable_merging(vaddr, length);
1721
                qemu_ram_setup_dump(vaddr, length);
H
Huang Ying 已提交
1722 1723 1724 1725 1726 1727
            }
        }
    }
}
#endif /* !_WIN32 */

1728 1729
int qemu_get_ram_fd(ram_addr_t addr)
{
1730 1731
    RAMBlock *block;
    int fd;
1732

M
Mike Day 已提交
1733
    rcu_read_lock();
1734 1735
    block = qemu_get_ram_block(addr);
    fd = block->fd;
M
Mike Day 已提交
1736
    rcu_read_unlock();
1737
    return fd;
1738 1739
}

1740 1741
void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
{
1742 1743
    RAMBlock *block;
    void *ptr;
1744

M
Mike Day 已提交
1745
    rcu_read_lock();
1746 1747
    block = qemu_get_ram_block(addr);
    ptr = ramblock_ptr(block, 0);
M
Mike Day 已提交
1748
    rcu_read_unlock();
1749
    return ptr;
1750 1751
}

1752
/* Return a host pointer to ram allocated with qemu_ram_alloc.
1753 1754 1755
 * This should not be used for general purpose DMA.  Use address_space_map
 * or address_space_rw instead. For local memory (e.g. video ram) that the
 * device owns, use memory_region_get_ram_ptr.
M
Mike Day 已提交
1756 1757 1758 1759 1760 1761
 *
 * By the time this function returns, the returned pointer is not protected
 * by RCU anymore.  If the caller is not within an RCU critical section and
 * does not hold the iothread lock, it must have other means of protecting the
 * pointer, such as a reference to the region that includes the incoming
 * ram_addr_t.
1762 1763 1764
 */
void *qemu_get_ram_ptr(ram_addr_t addr)
{
1765 1766
    RAMBlock *block;
    void *ptr;
1767

M
Mike Day 已提交
1768
    rcu_read_lock();
1769 1770 1771
    block = qemu_get_ram_block(addr);

    if (xen_enabled() && block->host == NULL) {
1772 1773 1774 1775 1776
        /* We need to check if the requested address is in the RAM
         * because we don't want to map the entire memory in QEMU.
         * In that case just map until the end of the page.
         */
        if (block->offset == 0) {
1777
            ptr = xen_map_cache(addr, 0, 0);
M
Mike Day 已提交
1778
            goto unlock;
1779
        }
1780 1781

        block->host = xen_map_cache(block->offset, block->max_length, 1);
1782
    }
1783 1784
    ptr = ramblock_ptr(block, addr - block->offset);

M
Mike Day 已提交
1785 1786
unlock:
    rcu_read_unlock();
1787
    return ptr;
1788 1789
}

1790
/* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1791
 * but takes a size argument.
M
Mike Day 已提交
1792 1793 1794 1795 1796 1797
 *
 * By the time this function returns, the returned pointer is not protected
 * by RCU anymore.  If the caller is not within an RCU critical section and
 * does not hold the iothread lock, it must have other means of protecting the
 * pointer, such as a reference to the region that includes the incoming
 * ram_addr_t.
1798
 */
1799
static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1800
{
1801
    void *ptr;
1802 1803 1804
    if (*size == 0) {
        return NULL;
    }
1805
    if (xen_enabled()) {
J
Jan Kiszka 已提交
1806
        return xen_map_cache(addr, *size, 1);
1807
    } else {
1808
        RAMBlock *block;
M
Mike Day 已提交
1809 1810
        rcu_read_lock();
        QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1811 1812 1813
            if (addr - block->offset < block->max_length) {
                if (addr - block->offset + *size > block->max_length)
                    *size = block->max_length - addr + block->offset;
1814
                ptr = ramblock_ptr(block, addr - block->offset);
M
Mike Day 已提交
1815
                rcu_read_unlock();
1816
                return ptr;
1817 1818 1819 1820 1821 1822 1823 1824
            }
        }

        fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
        abort();
    }
}

1825
/* Some of the softmmu routines need to translate from a host pointer
1826 1827 1828 1829 1830 1831 1832 1833
 * (typically a TLB entry) back to a ram offset.
 *
 * By the time this function returns, the returned pointer is not protected
 * by RCU anymore.  If the caller is not within an RCU critical section and
 * does not hold the iothread lock, it must have other means of protecting the
 * pointer, such as a reference to the region that includes the incoming
 * ram_addr_t.
 */
1834
MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
P
pbrook 已提交
1835
{
P
pbrook 已提交
1836 1837
    RAMBlock *block;
    uint8_t *host = ptr;
1838
    MemoryRegion *mr;
P
pbrook 已提交
1839

1840
    if (xen_enabled()) {
M
Mike Day 已提交
1841
        rcu_read_lock();
J
Jan Kiszka 已提交
1842
        *ram_addr = xen_ram_addr_from_mapcache(ptr);
1843
        mr = qemu_get_ram_block(*ram_addr)->mr;
M
Mike Day 已提交
1844
        rcu_read_unlock();
1845
        return mr;
1846 1847
    }

M
Mike Day 已提交
1848 1849
    rcu_read_lock();
    block = atomic_rcu_read(&ram_list.mru_block);
1850
    if (block && block->host && host - block->host < block->max_length) {
1851 1852 1853
        goto found;
    }

M
Mike Day 已提交
1854
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
J
Jun Nakajima 已提交
1855 1856 1857 1858
        /* This case append when the block is not mapped. */
        if (block->host == NULL) {
            continue;
        }
1859
        if (host - block->host < block->max_length) {
1860
            goto found;
A
Alex Williamson 已提交
1861
        }
P
pbrook 已提交
1862
    }
J
Jun Nakajima 已提交
1863

M
Mike Day 已提交
1864
    rcu_read_unlock();
1865
    return NULL;
1866 1867 1868

found:
    *ram_addr = block->offset + (host - block->host);
1869
    mr = block->mr;
M
Mike Day 已提交
1870
    rcu_read_unlock();
1871
    return mr;
M
Marcelo Tosatti 已提交
1872
}
A
Alex Williamson 已提交
1873

A
Avi Kivity 已提交
1874
static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1875
                               uint64_t val, unsigned size)
1876
{
1877
    if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1878
        tb_invalidate_phys_page_fast(ram_addr, size);
1879
    }
1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891
    switch (size) {
    case 1:
        stb_p(qemu_get_ram_ptr(ram_addr), val);
        break;
    case 2:
        stw_p(qemu_get_ram_ptr(ram_addr), val);
        break;
    case 4:
        stl_p(qemu_get_ram_ptr(ram_addr), val);
        break;
    default:
        abort();
1892
    }
1893 1894 1895 1896 1897
    /* Set both VGA and migration bits for simplicity and to remove
     * the notdirty callback faster.
     */
    cpu_physical_memory_set_dirty_range(ram_addr, size,
                                        DIRTY_CLIENTS_NOCODE);
B
bellard 已提交
1898 1899
    /* we remove the notdirty callback only if the code has been
       flushed */
1900
    if (!cpu_physical_memory_is_clean(ram_addr)) {
1901
        CPUArchState *env = current_cpu->env_ptr;
1902
        tlb_set_dirty(env, current_cpu->mem_io_vaddr);
1903
    }
1904 1905
}

1906 1907 1908 1909 1910 1911
static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
                                 unsigned size, bool is_write)
{
    return is_write;
}

1912 1913
static const MemoryRegionOps notdirty_mem_ops = {
    .write = notdirty_mem_write,
1914
    .valid.accepts = notdirty_mem_accepts,
1915
    .endianness = DEVICE_NATIVE_ENDIAN,
1916 1917
};

P
pbrook 已提交
1918
/* Generate a debug exception if a watchpoint has been hit.  */
1919
static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
P
pbrook 已提交
1920
{
1921 1922
    CPUState *cpu = current_cpu;
    CPUArchState *env = cpu->env_ptr;
1923
    target_ulong pc, cs_base;
P
pbrook 已提交
1924
    target_ulong vaddr;
1925
    CPUWatchpoint *wp;
1926
    int cpu_flags;
P
pbrook 已提交
1927

1928
    if (cpu->watchpoint_hit) {
1929 1930 1931
        /* We re-entered the check after replacing the TB. Now raise
         * the debug interrupt so that is will trigger after the
         * current instruction. */
1932
        cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
1933 1934
        return;
    }
1935
    vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1936
    QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1937 1938
        if (cpu_watchpoint_address_matches(wp, vaddr, len)
            && (wp->flags & flags)) {
1939 1940 1941 1942 1943 1944
            if (flags == BP_MEM_READ) {
                wp->flags |= BP_WATCHPOINT_HIT_READ;
            } else {
                wp->flags |= BP_WATCHPOINT_HIT_WRITE;
            }
            wp->hitaddr = vaddr;
1945
            wp->hitattrs = attrs;
1946 1947
            if (!cpu->watchpoint_hit) {
                cpu->watchpoint_hit = wp;
1948
                tb_check_watchpoint(cpu);
1949
                if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1950
                    cpu->exception_index = EXCP_DEBUG;
1951
                    cpu_loop_exit(cpu);
1952 1953
                } else {
                    cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1954
                    tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
1955
                    cpu_resume_from_signal(cpu, NULL);
1956
                }
1957
            }
1958 1959
        } else {
            wp->flags &= ~BP_WATCHPOINT_HIT;
P
pbrook 已提交
1960 1961 1962 1963
        }
    }
}

1964 1965 1966
/* Watchpoint access routines.  Watchpoints are inserted using TLB tricks,
   so these check for a hit then pass through to the normal out-of-line
   phys routines.  */
1967 1968
static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
                                  unsigned size, MemTxAttrs attrs)
1969
{
1970 1971 1972 1973
    MemTxResult res;
    uint64_t data;

    check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
1974
    switch (size) {
1975 1976 1977 1978 1979 1980 1981 1982 1983
    case 1:
        data = address_space_ldub(&address_space_memory, addr, attrs, &res);
        break;
    case 2:
        data = address_space_lduw(&address_space_memory, addr, attrs, &res);
        break;
    case 4:
        data = address_space_ldl(&address_space_memory, addr, attrs, &res);
        break;
1984 1985
    default: abort();
    }
1986 1987
    *pdata = data;
    return res;
1988 1989
}

1990 1991 1992
static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
                                   uint64_t val, unsigned size,
                                   MemTxAttrs attrs)
1993
{
1994 1995 1996
    MemTxResult res;

    check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
1997
    switch (size) {
1998
    case 1:
1999
        address_space_stb(&address_space_memory, addr, val, attrs, &res);
2000 2001
        break;
    case 2:
2002
        address_space_stw(&address_space_memory, addr, val, attrs, &res);
2003 2004
        break;
    case 4:
2005
        address_space_stl(&address_space_memory, addr, val, attrs, &res);
2006
        break;
2007 2008
    default: abort();
    }
2009
    return res;
2010 2011
}

2012
static const MemoryRegionOps watch_mem_ops = {
2013 2014
    .read_with_attrs = watch_mem_read,
    .write_with_attrs = watch_mem_write,
2015
    .endianness = DEVICE_NATIVE_ENDIAN,
2016 2017
};

2018 2019
static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
                                unsigned len, MemTxAttrs attrs)
2020
{
2021
    subpage_t *subpage = opaque;
2022
    uint8_t buf[8];
2023
    MemTxResult res;
2024

2025
#if defined(DEBUG_SUBPAGE)
A
Amos Kong 已提交
2026
    printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2027
           subpage, len, addr);
2028
#endif
2029 2030 2031 2032
    res = address_space_read(subpage->as, addr + subpage->base,
                             attrs, buf, len);
    if (res) {
        return res;
2033
    }
2034 2035
    switch (len) {
    case 1:
2036 2037
        *data = ldub_p(buf);
        return MEMTX_OK;
2038
    case 2:
2039 2040
        *data = lduw_p(buf);
        return MEMTX_OK;
2041
    case 4:
2042 2043
        *data = ldl_p(buf);
        return MEMTX_OK;
2044
    case 8:
2045 2046
        *data = ldq_p(buf);
        return MEMTX_OK;
2047 2048 2049
    default:
        abort();
    }
2050 2051
}

2052 2053
static MemTxResult subpage_write(void *opaque, hwaddr addr,
                                 uint64_t value, unsigned len, MemTxAttrs attrs)
2054
{
2055
    subpage_t *subpage = opaque;
2056
    uint8_t buf[8];
2057

2058
#if defined(DEBUG_SUBPAGE)
A
Amos Kong 已提交
2059
    printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2060 2061
           " value %"PRIx64"\n",
           __func__, subpage, len, addr, value);
2062
#endif
2063 2064 2065 2066 2067 2068 2069 2070 2071 2072
    switch (len) {
    case 1:
        stb_p(buf, value);
        break;
    case 2:
        stw_p(buf, value);
        break;
    case 4:
        stl_p(buf, value);
        break;
2073 2074 2075
    case 8:
        stq_p(buf, value);
        break;
2076 2077 2078
    default:
        abort();
    }
2079 2080
    return address_space_write(subpage->as, addr + subpage->base,
                               attrs, buf, len);
2081 2082
}

2083
static bool subpage_accepts(void *opaque, hwaddr addr,
A
Amos Kong 已提交
2084
                            unsigned len, bool is_write)
2085
{
2086
    subpage_t *subpage = opaque;
2087
#if defined(DEBUG_SUBPAGE)
A
Amos Kong 已提交
2088
    printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2089
           __func__, subpage, is_write ? 'w' : 'r', len, addr);
2090 2091
#endif

2092
    return address_space_access_valid(subpage->as, addr + subpage->base,
A
Amos Kong 已提交
2093
                                      len, is_write);
2094 2095
}

2096
static const MemoryRegionOps subpage_ops = {
2097 2098
    .read_with_attrs = subpage_read,
    .write_with_attrs = subpage_write,
2099 2100 2101 2102
    .impl.min_access_size = 1,
    .impl.max_access_size = 8,
    .valid.min_access_size = 1,
    .valid.max_access_size = 8,
2103
    .valid.accepts = subpage_accepts,
2104
    .endianness = DEVICE_NATIVE_ENDIAN,
2105 2106
};

A
Anthony Liguori 已提交
2107
static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2108
                             uint16_t section)
2109 2110 2111 2112 2113 2114 2115 2116
{
    int idx, eidx;

    if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
        return -1;
    idx = SUBPAGE_IDX(start);
    eidx = SUBPAGE_IDX(end);
#if defined(DEBUG_SUBPAGE)
A
Amos Kong 已提交
2117 2118
    printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
           __func__, mmio, start, end, idx, eidx, section);
2119 2120
#endif
    for (; idx <= eidx; idx++) {
2121
        mmio->sub_section[idx] = section;
2122 2123 2124 2125 2126
    }

    return 0;
}

2127
static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2128
{
A
Anthony Liguori 已提交
2129
    subpage_t *mmio;
2130

2131
    mmio = g_malloc0(sizeof(subpage_t));
2132

2133
    mmio->as = as;
2134
    mmio->base = base;
2135
    memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
P
Peter Crosthwaite 已提交
2136
                          NULL, TARGET_PAGE_SIZE);
A
Avi Kivity 已提交
2137
    mmio->iomem.subpage = true;
2138
#if defined(DEBUG_SUBPAGE)
A
Amos Kong 已提交
2139 2140
    printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
           mmio, base, TARGET_PAGE_SIZE);
2141
#endif
2142
    subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2143 2144 2145 2146

    return mmio;
}

2147 2148
static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
                              MemoryRegion *mr)
2149
{
2150
    assert(as);
2151
    MemoryRegionSection section = {
2152
        .address_space = as,
2153 2154 2155
        .mr = mr,
        .offset_within_address_space = 0,
        .offset_within_region = 0,
2156
        .size = int128_2_64(),
2157 2158
    };

2159
    return phys_section_add(map, &section);
2160 2161
}

P
Paolo Bonzini 已提交
2162
MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index)
2163
{
2164 2165
    AddressSpaceDispatch *d = atomic_rcu_read(&cpu->memory_dispatch);
    MemoryRegionSection *sections = d->map.sections;
P
Paolo Bonzini 已提交
2166 2167

    return sections[index & ~TARGET_PAGE_MASK].mr;
2168 2169
}

A
Avi Kivity 已提交
2170 2171
static void io_mem_init(void)
{
2172
    memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2173
    memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2174
                          NULL, UINT64_MAX);
2175
    memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2176
                          NULL, UINT64_MAX);
2177
    memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2178
                          NULL, UINT64_MAX);
A
Avi Kivity 已提交
2179 2180
}

A
Avi Kivity 已提交
2181
static void mem_begin(MemoryListener *listener)
2182 2183
{
    AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2184 2185 2186
    AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
    uint16_t n;

2187
    n = dummy_section(&d->map, as, &io_mem_unassigned);
2188
    assert(n == PHYS_SECTION_UNASSIGNED);
2189
    n = dummy_section(&d->map, as, &io_mem_notdirty);
2190
    assert(n == PHYS_SECTION_NOTDIRTY);
2191
    n = dummy_section(&d->map, as, &io_mem_rom);
2192
    assert(n == PHYS_SECTION_ROM);
2193
    n = dummy_section(&d->map, as, &io_mem_watch);
2194
    assert(n == PHYS_SECTION_WATCH);
2195

M
Michael S. Tsirkin 已提交
2196
    d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2197 2198 2199 2200
    d->as = as;
    as->next_dispatch = d;
}

2201 2202 2203 2204 2205 2206
static void address_space_dispatch_free(AddressSpaceDispatch *d)
{
    phys_sections_free(&d->map);
    g_free(d);
}

2207
static void mem_commit(MemoryListener *listener)
A
Avi Kivity 已提交
2208
{
2209
    AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2210 2211 2212
    AddressSpaceDispatch *cur = as->dispatch;
    AddressSpaceDispatch *next = as->next_dispatch;

2213
    phys_page_compact_all(next, next->map.nodes_nb);
2214

2215
    atomic_rcu_set(&as->dispatch, next);
2216
    if (cur) {
2217
        call_rcu(cur, address_space_dispatch_free, rcu);
2218
    }
2219 2220
}

2221
static void tcg_commit(MemoryListener *listener)
2222
{
2223
    CPUState *cpu;
2224 2225 2226 2227

    /* since each CPU stores ram addresses in its TLB cache, we must
       reset the modified entries */
    /* XXX: slow ! */
A
Andreas Färber 已提交
2228
    CPU_FOREACH(cpu) {
2229 2230 2231 2232 2233
        /* FIXME: Disentangle the cpu.h circular files deps so we can
           directly get the right CPU from listener.  */
        if (cpu->tcg_as_listener != listener) {
            continue;
        }
2234
        cpu_reload_memory_map(cpu);
2235
    }
2236 2237
}

A
Avi Kivity 已提交
2238 2239
void address_space_init_dispatch(AddressSpace *as)
{
2240
    as->dispatch = NULL;
2241
    as->dispatch_listener = (MemoryListener) {
A
Avi Kivity 已提交
2242
        .begin = mem_begin,
2243
        .commit = mem_commit,
A
Avi Kivity 已提交
2244 2245 2246 2247
        .region_add = mem_add,
        .region_nop = mem_add,
        .priority = 0,
    };
2248
    memory_listener_register(&as->dispatch_listener, as);
A
Avi Kivity 已提交
2249 2250
}

2251 2252 2253 2254 2255
void address_space_unregister(AddressSpace *as)
{
    memory_listener_unregister(&as->dispatch_listener);
}

A
Avi Kivity 已提交
2256 2257 2258 2259
void address_space_destroy_dispatch(AddressSpace *as)
{
    AddressSpaceDispatch *d = as->dispatch;

2260 2261 2262 2263
    atomic_rcu_set(&as->dispatch, NULL);
    if (d) {
        call_rcu(d, address_space_dispatch_free, rcu);
    }
A
Avi Kivity 已提交
2264 2265
}

A
Avi Kivity 已提交
2266 2267
static void memory_map_init(void)
{
2268
    system_memory = g_malloc(sizeof(*system_memory));
2269

2270
    memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2271
    address_space_init(&address_space_memory, system_memory, "memory");
2272

2273
    system_io = g_malloc(sizeof(*system_io));
2274 2275
    memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
                          65536);
2276
    address_space_init(&address_space_io, system_io, "I/O");
A
Avi Kivity 已提交
2277 2278 2279 2280 2281 2282 2283
}

MemoryRegion *get_system_memory(void)
{
    return system_memory;
}

2284 2285 2286 2287 2288
MemoryRegion *get_system_io(void)
{
    return system_io;
}

2289 2290
#endif /* !defined(CONFIG_USER_ONLY) */

B
bellard 已提交
2291 2292
/* physical memory access (slow version, mainly for debug) */
#if defined(CONFIG_USER_ONLY)
2293
int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
P
Paul Brook 已提交
2294
                        uint8_t *buf, int len, int is_write)
B
bellard 已提交
2295 2296 2297
{
    int l, flags;
    target_ulong page;
2298
    void * p;
B
bellard 已提交
2299 2300 2301 2302 2303 2304 2305 2306

    while (len > 0) {
        page = addr & TARGET_PAGE_MASK;
        l = (page + TARGET_PAGE_SIZE) - addr;
        if (l > len)
            l = len;
        flags = page_get_flags(page);
        if (!(flags & PAGE_VALID))
P
Paul Brook 已提交
2307
            return -1;
B
bellard 已提交
2308 2309
        if (is_write) {
            if (!(flags & PAGE_WRITE))
P
Paul Brook 已提交
2310
                return -1;
2311
            /* XXX: this code should not depend on lock_user */
A
aurel32 已提交
2312
            if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
P
Paul Brook 已提交
2313
                return -1;
A
aurel32 已提交
2314 2315
            memcpy(p, buf, l);
            unlock_user(p, addr, l);
B
bellard 已提交
2316 2317
        } else {
            if (!(flags & PAGE_READ))
P
Paul Brook 已提交
2318
                return -1;
2319
            /* XXX: this code should not depend on lock_user */
A
aurel32 已提交
2320
            if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
P
Paul Brook 已提交
2321
                return -1;
A
aurel32 已提交
2322
            memcpy(buf, p, l);
A
aurel32 已提交
2323
            unlock_user(p, addr, 0);
B
bellard 已提交
2324 2325 2326 2327 2328
        }
        len -= l;
        buf += l;
        addr += l;
    }
P
Paul Brook 已提交
2329
    return 0;
B
bellard 已提交
2330
}
B
bellard 已提交
2331

B
bellard 已提交
2332
#else
2333

2334
static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
A
Avi Kivity 已提交
2335
                                     hwaddr length)
2336
{
2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348
    uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
    /* No early return if dirty_log_mask is or becomes 0, because
     * cpu_physical_memory_set_dirty_range will still call
     * xen_modified_memory.
     */
    if (dirty_log_mask) {
        dirty_log_mask =
            cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
    }
    if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
        tb_invalidate_phys_range(addr, addr + length);
        dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2349
    }
2350
    cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2351 2352
}

2353
static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2354
{
2355
    unsigned access_size_max = mr->ops->valid.max_access_size;
2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368

    /* Regions are assumed to support 1-4 byte accesses unless
       otherwise specified.  */
    if (access_size_max == 0) {
        access_size_max = 4;
    }

    /* Bound the maximum access by the alignment of the address.  */
    if (!mr->ops->impl.unaligned) {
        unsigned align_size_max = addr & -addr;
        if (align_size_max != 0 && align_size_max < access_size_max) {
            access_size_max = align_size_max;
        }
2369
    }
2370 2371 2372 2373

    /* Don't attempt accesses larger than the maximum.  */
    if (l > access_size_max) {
        l = access_size_max;
2374
    }
2375 2376 2377
    if (l & (l - 1)) {
        l = 1 << (qemu_fls(l) - 1);
    }
2378 2379

    return l;
2380 2381
}

2382
static bool prepare_mmio_access(MemoryRegion *mr)
2383
{
2384 2385 2386 2387 2388 2389 2390 2391
    bool unlocked = !qemu_mutex_iothread_locked();
    bool release_lock = false;

    if (unlocked && mr->global_locking) {
        qemu_mutex_lock_iothread();
        unlocked = false;
        release_lock = true;
    }
2392
    if (mr->flush_coalesced_mmio) {
2393 2394 2395
        if (unlocked) {
            qemu_mutex_lock_iothread();
        }
2396
        qemu_flush_coalesced_mmio_buffer();
2397 2398 2399
        if (unlocked) {
            qemu_mutex_unlock_iothread();
        }
2400
    }
2401 2402

    return release_lock;
2403 2404
}

2405 2406
MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
                             uint8_t *buf, int len, bool is_write)
B
bellard 已提交
2407
{
2408
    hwaddr l;
B
bellard 已提交
2409
    uint8_t *ptr;
2410
    uint64_t val;
2411
    hwaddr addr1;
2412
    MemoryRegion *mr;
2413
    MemTxResult result = MEMTX_OK;
2414
    bool release_lock = false;
2415

2416
    rcu_read_lock();
B
bellard 已提交
2417
    while (len > 0) {
2418
        l = len;
2419
        mr = address_space_translate(as, addr, &addr1, &l, is_write);
2420

B
bellard 已提交
2421
        if (is_write) {
2422
            if (!memory_access_is_direct(mr, is_write)) {
2423
                release_lock |= prepare_mmio_access(mr);
2424
                l = memory_access_size(mr, l, addr1);
2425
                /* XXX: could force current_cpu to NULL to avoid
B
bellard 已提交
2426
                   potential bugs */
2427 2428 2429 2430
                switch (l) {
                case 8:
                    /* 64 bit write access */
                    val = ldq_p(buf);
2431 2432
                    result |= memory_region_dispatch_write(mr, addr1, val, 8,
                                                           attrs);
2433 2434
                    break;
                case 4:
B
bellard 已提交
2435
                    /* 32 bit write access */
B
bellard 已提交
2436
                    val = ldl_p(buf);
2437 2438
                    result |= memory_region_dispatch_write(mr, addr1, val, 4,
                                                           attrs);
2439 2440
                    break;
                case 2:
B
bellard 已提交
2441
                    /* 16 bit write access */
B
bellard 已提交
2442
                    val = lduw_p(buf);
2443 2444
                    result |= memory_region_dispatch_write(mr, addr1, val, 2,
                                                           attrs);
2445 2446
                    break;
                case 1:
B
bellard 已提交
2447
                    /* 8 bit write access */
B
bellard 已提交
2448
                    val = ldub_p(buf);
2449 2450
                    result |= memory_region_dispatch_write(mr, addr1, val, 1,
                                                           attrs);
2451 2452 2453
                    break;
                default:
                    abort();
B
bellard 已提交
2454
                }
2455
            } else {
2456
                addr1 += memory_region_get_ram_addr(mr);
B
bellard 已提交
2457
                /* RAM case */
P
pbrook 已提交
2458
                ptr = qemu_get_ram_ptr(addr1);
B
bellard 已提交
2459
                memcpy(ptr, buf, l);
2460
                invalidate_and_set_dirty(mr, addr1, l);
B
bellard 已提交
2461 2462
            }
        } else {
2463
            if (!memory_access_is_direct(mr, is_write)) {
B
bellard 已提交
2464
                /* I/O case */
2465
                release_lock |= prepare_mmio_access(mr);
2466
                l = memory_access_size(mr, l, addr1);
2467 2468 2469
                switch (l) {
                case 8:
                    /* 64 bit read access */
2470 2471
                    result |= memory_region_dispatch_read(mr, addr1, &val, 8,
                                                          attrs);
2472 2473 2474
                    stq_p(buf, val);
                    break;
                case 4:
B
bellard 已提交
2475
                    /* 32 bit read access */
2476 2477
                    result |= memory_region_dispatch_read(mr, addr1, &val, 4,
                                                          attrs);
B
bellard 已提交
2478
                    stl_p(buf, val);
2479 2480
                    break;
                case 2:
B
bellard 已提交
2481
                    /* 16 bit read access */
2482 2483
                    result |= memory_region_dispatch_read(mr, addr1, &val, 2,
                                                          attrs);
B
bellard 已提交
2484
                    stw_p(buf, val);
2485 2486
                    break;
                case 1:
B
bellard 已提交
2487
                    /* 8 bit read access */
2488 2489
                    result |= memory_region_dispatch_read(mr, addr1, &val, 1,
                                                          attrs);
B
bellard 已提交
2490
                    stb_p(buf, val);
2491 2492 2493
                    break;
                default:
                    abort();
B
bellard 已提交
2494 2495 2496
                }
            } else {
                /* RAM case */
2497
                ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2498
                memcpy(buf, ptr, l);
B
bellard 已提交
2499 2500
            }
        }
2501 2502 2503 2504 2505 2506

        if (release_lock) {
            qemu_mutex_unlock_iothread();
            release_lock = false;
        }

B
bellard 已提交
2507 2508 2509 2510
        len -= l;
        buf += l;
        addr += l;
    }
2511
    rcu_read_unlock();
2512

2513
    return result;
B
bellard 已提交
2514
}
B
bellard 已提交
2515

2516 2517
MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
                                const uint8_t *buf, int len)
A
Avi Kivity 已提交
2518
{
2519
    return address_space_rw(as, addr, attrs, (uint8_t *)buf, len, true);
A
Avi Kivity 已提交
2520 2521
}

2522 2523
MemTxResult address_space_read(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
                               uint8_t *buf, int len)
A
Avi Kivity 已提交
2524
{
2525
    return address_space_rw(as, addr, attrs, buf, len, false);
A
Avi Kivity 已提交
2526 2527 2528
}


A
Avi Kivity 已提交
2529
void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
A
Avi Kivity 已提交
2530 2531
                            int len, int is_write)
{
2532 2533
    address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
                     buf, len, is_write);
A
Avi Kivity 已提交
2534 2535
}

2536 2537 2538 2539 2540
enum write_rom_type {
    WRITE_DATA,
    FLUSH_CACHE,
};

2541
static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2542
    hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
B
bellard 已提交
2543
{
2544
    hwaddr l;
B
bellard 已提交
2545
    uint8_t *ptr;
2546
    hwaddr addr1;
2547
    MemoryRegion *mr;
2548

2549
    rcu_read_lock();
B
bellard 已提交
2550
    while (len > 0) {
2551
        l = len;
2552
        mr = address_space_translate(as, addr, &addr1, &l, true);
2553

2554 2555
        if (!(memory_region_is_ram(mr) ||
              memory_region_is_romd(mr))) {
2556
            l = memory_access_size(mr, l, addr1);
B
bellard 已提交
2557
        } else {
2558
            addr1 += memory_region_get_ram_addr(mr);
B
bellard 已提交
2559
            /* ROM/RAM case */
P
pbrook 已提交
2560
            ptr = qemu_get_ram_ptr(addr1);
2561 2562 2563
            switch (type) {
            case WRITE_DATA:
                memcpy(ptr, buf, l);
2564
                invalidate_and_set_dirty(mr, addr1, l);
2565 2566 2567 2568 2569
                break;
            case FLUSH_CACHE:
                flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
                break;
            }
B
bellard 已提交
2570 2571 2572 2573 2574
        }
        len -= l;
        buf += l;
        addr += l;
    }
2575
    rcu_read_unlock();
B
bellard 已提交
2576 2577
}

2578
/* used for ROM loading : can write in RAM and ROM */
2579
void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2580 2581
                                   const uint8_t *buf, int len)
{
2582
    cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596
}

void cpu_flush_icache_range(hwaddr start, int len)
{
    /*
     * This function should do the same thing as an icache flush that was
     * triggered from within the guest. For TCG we are always cache coherent,
     * so there is no need to flush anything. For KVM / Xen we need to flush
     * the host's instruction cache at least.
     */
    if (tcg_enabled()) {
        return;
    }

2597 2598
    cpu_physical_memory_write_rom_internal(&address_space_memory,
                                           start, NULL, len, FLUSH_CACHE);
2599 2600
}

2601
typedef struct {
2602
    MemoryRegion *mr;
2603
    void *buffer;
A
Avi Kivity 已提交
2604 2605
    hwaddr addr;
    hwaddr len;
F
Fam Zheng 已提交
2606
    bool in_use;
2607 2608 2609 2610
} BounceBuffer;

static BounceBuffer bounce;

2611
typedef struct MapClient {
2612
    QEMUBH *bh;
B
Blue Swirl 已提交
2613
    QLIST_ENTRY(MapClient) link;
2614 2615
} MapClient;

2616
QemuMutex map_client_list_lock;
B
Blue Swirl 已提交
2617 2618
static QLIST_HEAD(map_client_list, MapClient) map_client_list
    = QLIST_HEAD_INITIALIZER(map_client_list);
2619

2620 2621 2622 2623 2624 2625
static void cpu_unregister_map_client_do(MapClient *client)
{
    QLIST_REMOVE(client, link);
    g_free(client);
}

2626 2627 2628 2629 2630 2631
static void cpu_notify_map_clients_locked(void)
{
    MapClient *client;

    while (!QLIST_EMPTY(&map_client_list)) {
        client = QLIST_FIRST(&map_client_list);
2632 2633
        qemu_bh_schedule(client->bh);
        cpu_unregister_map_client_do(client);
2634 2635 2636
    }
}

2637
void cpu_register_map_client(QEMUBH *bh)
2638
{
2639
    MapClient *client = g_malloc(sizeof(*client));
2640

2641
    qemu_mutex_lock(&map_client_list_lock);
2642
    client->bh = bh;
B
Blue Swirl 已提交
2643
    QLIST_INSERT_HEAD(&map_client_list, client, link);
2644 2645 2646
    if (!atomic_read(&bounce.in_use)) {
        cpu_notify_map_clients_locked();
    }
2647
    qemu_mutex_unlock(&map_client_list_lock);
2648 2649
}

2650
void cpu_exec_init_all(void)
2651
{
2652 2653 2654 2655
    qemu_mutex_init(&ram_list.mutex);
    memory_map_init();
    io_mem_init();
    qemu_mutex_init(&map_client_list_lock);
2656 2657
}

2658
void cpu_unregister_map_client(QEMUBH *bh)
2659 2660 2661
{
    MapClient *client;

2662 2663 2664 2665 2666 2667
    qemu_mutex_lock(&map_client_list_lock);
    QLIST_FOREACH(client, &map_client_list, link) {
        if (client->bh == bh) {
            cpu_unregister_map_client_do(client);
            break;
        }
2668
    }
2669
    qemu_mutex_unlock(&map_client_list_lock);
2670 2671 2672 2673
}

static void cpu_notify_map_clients(void)
{
2674
    qemu_mutex_lock(&map_client_list_lock);
2675
    cpu_notify_map_clients_locked();
2676
    qemu_mutex_unlock(&map_client_list_lock);
2677 2678
}

2679 2680
bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
{
2681
    MemoryRegion *mr;
2682 2683
    hwaddr l, xlat;

2684
    rcu_read_lock();
2685 2686
    while (len > 0) {
        l = len;
2687 2688 2689 2690
        mr = address_space_translate(as, addr, &xlat, &l, is_write);
        if (!memory_access_is_direct(mr, is_write)) {
            l = memory_access_size(mr, l, addr);
            if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2691 2692 2693 2694 2695 2696 2697
                return false;
            }
        }

        len -= l;
        addr += l;
    }
2698
    rcu_read_unlock();
2699 2700 2701
    return true;
}

2702 2703 2704 2705
/* Map a physical memory region into a host virtual address.
 * May map a subset of the requested range, given by and returned in *plen.
 * May return NULL if resources needed to perform the mapping are exhausted.
 * Use only for reads OR writes - not for read-modify-write operations.
2706 2707
 * Use cpu_register_map_client() to know when retrying the map operation is
 * likely to succeed.
2708
 */
A
Avi Kivity 已提交
2709
void *address_space_map(AddressSpace *as,
A
Avi Kivity 已提交
2710 2711
                        hwaddr addr,
                        hwaddr *plen,
A
Avi Kivity 已提交
2712
                        bool is_write)
2713
{
A
Avi Kivity 已提交
2714
    hwaddr len = *plen;
2715 2716 2717 2718
    hwaddr done = 0;
    hwaddr l, xlat, base;
    MemoryRegion *mr, *this_mr;
    ram_addr_t raddr;
2719

2720 2721 2722
    if (len == 0) {
        return NULL;
    }
2723

2724
    l = len;
2725
    rcu_read_lock();
2726
    mr = address_space_translate(as, addr, &xlat, &l, is_write);
2727

2728
    if (!memory_access_is_direct(mr, is_write)) {
F
Fam Zheng 已提交
2729
        if (atomic_xchg(&bounce.in_use, true)) {
2730
            rcu_read_unlock();
2731
            return NULL;
2732
        }
2733 2734 2735
        /* Avoid unbounded allocations */
        l = MIN(l, TARGET_PAGE_SIZE);
        bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2736 2737
        bounce.addr = addr;
        bounce.len = l;
2738 2739 2740

        memory_region_ref(mr);
        bounce.mr = mr;
2741
        if (!is_write) {
2742 2743
            address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
                               bounce.buffer, l);
2744
        }
2745

2746
        rcu_read_unlock();
2747 2748 2749 2750 2751 2752 2753 2754
        *plen = l;
        return bounce.buffer;
    }

    base = xlat;
    raddr = memory_region_get_ram_addr(mr);

    for (;;) {
2755 2756
        len -= l;
        addr += l;
2757 2758 2759 2760 2761 2762 2763 2764 2765 2766
        done += l;
        if (len == 0) {
            break;
        }

        l = len;
        this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
        if (this_mr != mr || xlat != base + done) {
            break;
        }
2767
    }
2768

2769
    memory_region_ref(mr);
2770
    rcu_read_unlock();
2771 2772
    *plen = done;
    return qemu_ram_ptr_length(raddr + base, plen);
2773 2774
}

A
Avi Kivity 已提交
2775
/* Unmaps a memory region previously mapped by address_space_map().
2776 2777 2778
 * Will also mark the memory as dirty if is_write == 1.  access_len gives
 * the amount of memory that was actually read or written by the caller.
 */
A
Avi Kivity 已提交
2779 2780
void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
                         int is_write, hwaddr access_len)
2781 2782
{
    if (buffer != bounce.buffer) {
2783 2784 2785 2786 2787
        MemoryRegion *mr;
        ram_addr_t addr1;

        mr = qemu_ram_addr_from_host(buffer, &addr1);
        assert(mr != NULL);
2788
        if (is_write) {
2789
            invalidate_and_set_dirty(mr, addr1, access_len);
2790
        }
2791
        if (xen_enabled()) {
J
Jan Kiszka 已提交
2792
            xen_invalidate_map_cache_entry(buffer);
A
Anthony PERARD 已提交
2793
        }
2794
        memory_region_unref(mr);
2795 2796 2797
        return;
    }
    if (is_write) {
2798 2799
        address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
                            bounce.buffer, access_len);
2800
    }
2801
    qemu_vfree(bounce.buffer);
2802
    bounce.buffer = NULL;
2803
    memory_region_unref(bounce.mr);
F
Fam Zheng 已提交
2804
    atomic_mb_set(&bounce.in_use, false);
2805
    cpu_notify_map_clients();
2806
}
B
bellard 已提交
2807

A
Avi Kivity 已提交
2808 2809
void *cpu_physical_memory_map(hwaddr addr,
                              hwaddr *plen,
A
Avi Kivity 已提交
2810 2811 2812 2813 2814
                              int is_write)
{
    return address_space_map(&address_space_memory, addr, plen, is_write);
}

A
Avi Kivity 已提交
2815 2816
void cpu_physical_memory_unmap(void *buffer, hwaddr len,
                               int is_write, hwaddr access_len)
A
Avi Kivity 已提交
2817 2818 2819 2820
{
    return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
}

B
bellard 已提交
2821
/* warning: addr must be aligned */
2822 2823 2824 2825
static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
                                                  MemTxAttrs attrs,
                                                  MemTxResult *result,
                                                  enum device_endian endian)
B
bellard 已提交
2826 2827
{
    uint8_t *ptr;
2828
    uint64_t val;
2829
    MemoryRegion *mr;
2830 2831
    hwaddr l = 4;
    hwaddr addr1;
2832
    MemTxResult r;
2833
    bool release_lock = false;
B
bellard 已提交
2834

2835
    rcu_read_lock();
2836
    mr = address_space_translate(as, addr, &addr1, &l, false);
2837
    if (l < 4 || !memory_access_is_direct(mr, false)) {
2838
        release_lock |= prepare_mmio_access(mr);
2839

B
bellard 已提交
2840
        /* I/O case */
2841
        r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
2842 2843 2844 2845 2846 2847 2848 2849 2850
#if defined(TARGET_WORDS_BIGENDIAN)
        if (endian == DEVICE_LITTLE_ENDIAN) {
            val = bswap32(val);
        }
#else
        if (endian == DEVICE_BIG_ENDIAN) {
            val = bswap32(val);
        }
#endif
B
bellard 已提交
2851 2852
    } else {
        /* RAM case */
2853
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2854
                                & TARGET_PAGE_MASK)
2855
                               + addr1);
2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866
        switch (endian) {
        case DEVICE_LITTLE_ENDIAN:
            val = ldl_le_p(ptr);
            break;
        case DEVICE_BIG_ENDIAN:
            val = ldl_be_p(ptr);
            break;
        default:
            val = ldl_p(ptr);
            break;
        }
2867 2868 2869 2870
        r = MEMTX_OK;
    }
    if (result) {
        *result = r;
B
bellard 已提交
2871
    }
2872 2873 2874
    if (release_lock) {
        qemu_mutex_unlock_iothread();
    }
2875
    rcu_read_unlock();
B
bellard 已提交
2876 2877 2878
    return val;
}

2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899
uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
                           MemTxAttrs attrs, MemTxResult *result)
{
    return address_space_ldl_internal(as, addr, attrs, result,
                                      DEVICE_NATIVE_ENDIAN);
}

uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
                              MemTxAttrs attrs, MemTxResult *result)
{
    return address_space_ldl_internal(as, addr, attrs, result,
                                      DEVICE_LITTLE_ENDIAN);
}

uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
                              MemTxAttrs attrs, MemTxResult *result)
{
    return address_space_ldl_internal(as, addr, attrs, result,
                                      DEVICE_BIG_ENDIAN);
}

2900
uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
2901
{
2902
    return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2903 2904
}

2905
uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
2906
{
2907
    return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2908 2909
}

2910
uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
2911
{
2912
    return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2913 2914
}

B
bellard 已提交
2915
/* warning: addr must be aligned */
2916 2917 2918 2919
static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
                                                  MemTxAttrs attrs,
                                                  MemTxResult *result,
                                                  enum device_endian endian)
B
bellard 已提交
2920 2921 2922
{
    uint8_t *ptr;
    uint64_t val;
2923
    MemoryRegion *mr;
2924 2925
    hwaddr l = 8;
    hwaddr addr1;
2926
    MemTxResult r;
2927
    bool release_lock = false;
B
bellard 已提交
2928

2929
    rcu_read_lock();
2930
    mr = address_space_translate(as, addr, &addr1, &l,
2931 2932
                                 false);
    if (l < 8 || !memory_access_is_direct(mr, false)) {
2933
        release_lock |= prepare_mmio_access(mr);
2934

B
bellard 已提交
2935
        /* I/O case */
2936
        r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
2937 2938 2939 2940 2941 2942 2943 2944
#if defined(TARGET_WORDS_BIGENDIAN)
        if (endian == DEVICE_LITTLE_ENDIAN) {
            val = bswap64(val);
        }
#else
        if (endian == DEVICE_BIG_ENDIAN) {
            val = bswap64(val);
        }
B
bellard 已提交
2945 2946 2947
#endif
    } else {
        /* RAM case */
2948
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2949
                                & TARGET_PAGE_MASK)
2950
                               + addr1);
2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961
        switch (endian) {
        case DEVICE_LITTLE_ENDIAN:
            val = ldq_le_p(ptr);
            break;
        case DEVICE_BIG_ENDIAN:
            val = ldq_be_p(ptr);
            break;
        default:
            val = ldq_p(ptr);
            break;
        }
2962 2963 2964 2965
        r = MEMTX_OK;
    }
    if (result) {
        *result = r;
B
bellard 已提交
2966
    }
2967 2968 2969
    if (release_lock) {
        qemu_mutex_unlock_iothread();
    }
2970
    rcu_read_unlock();
B
bellard 已提交
2971 2972 2973
    return val;
}

2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994
uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
                           MemTxAttrs attrs, MemTxResult *result)
{
    return address_space_ldq_internal(as, addr, attrs, result,
                                      DEVICE_NATIVE_ENDIAN);
}

uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
                           MemTxAttrs attrs, MemTxResult *result)
{
    return address_space_ldq_internal(as, addr, attrs, result,
                                      DEVICE_LITTLE_ENDIAN);
}

uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
                           MemTxAttrs attrs, MemTxResult *result)
{
    return address_space_ldq_internal(as, addr, attrs, result,
                                      DEVICE_BIG_ENDIAN);
}

2995
uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
2996
{
2997
    return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2998 2999
}

3000
uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3001
{
3002
    return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3003 3004
}

3005
uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3006
{
3007
    return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3008 3009
}

B
bellard 已提交
3010
/* XXX: optimize */
3011 3012
uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
                            MemTxAttrs attrs, MemTxResult *result)
B
bellard 已提交
3013 3014
{
    uint8_t val;
3015 3016 3017 3018 3019 3020
    MemTxResult r;

    r = address_space_rw(as, addr, attrs, &val, 1, 0);
    if (result) {
        *result = r;
    }
B
bellard 已提交
3021 3022 3023
    return val;
}

3024 3025 3026 3027 3028
uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
{
    return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
}

3029
/* warning: addr must be aligned */
3030 3031 3032 3033 3034
static inline uint32_t address_space_lduw_internal(AddressSpace *as,
                                                   hwaddr addr,
                                                   MemTxAttrs attrs,
                                                   MemTxResult *result,
                                                   enum device_endian endian)
B
bellard 已提交
3035
{
3036 3037
    uint8_t *ptr;
    uint64_t val;
3038
    MemoryRegion *mr;
3039 3040
    hwaddr l = 2;
    hwaddr addr1;
3041
    MemTxResult r;
3042
    bool release_lock = false;
3043

3044
    rcu_read_lock();
3045
    mr = address_space_translate(as, addr, &addr1, &l,
3046 3047
                                 false);
    if (l < 2 || !memory_access_is_direct(mr, false)) {
3048
        release_lock |= prepare_mmio_access(mr);
3049

3050
        /* I/O case */
3051
        r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3052 3053 3054 3055 3056 3057 3058 3059 3060
#if defined(TARGET_WORDS_BIGENDIAN)
        if (endian == DEVICE_LITTLE_ENDIAN) {
            val = bswap16(val);
        }
#else
        if (endian == DEVICE_BIG_ENDIAN) {
            val = bswap16(val);
        }
#endif
3061 3062
    } else {
        /* RAM case */
3063
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
3064
                                & TARGET_PAGE_MASK)
3065
                               + addr1);
3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076
        switch (endian) {
        case DEVICE_LITTLE_ENDIAN:
            val = lduw_le_p(ptr);
            break;
        case DEVICE_BIG_ENDIAN:
            val = lduw_be_p(ptr);
            break;
        default:
            val = lduw_p(ptr);
            break;
        }
3077 3078 3079 3080
        r = MEMTX_OK;
    }
    if (result) {
        *result = r;
3081
    }
3082 3083 3084
    if (release_lock) {
        qemu_mutex_unlock_iothread();
    }
3085
    rcu_read_unlock();
3086
    return val;
B
bellard 已提交
3087 3088
}

3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109
uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
                           MemTxAttrs attrs, MemTxResult *result)
{
    return address_space_lduw_internal(as, addr, attrs, result,
                                       DEVICE_NATIVE_ENDIAN);
}

uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
                           MemTxAttrs attrs, MemTxResult *result)
{
    return address_space_lduw_internal(as, addr, attrs, result,
                                       DEVICE_LITTLE_ENDIAN);
}

uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
                           MemTxAttrs attrs, MemTxResult *result)
{
    return address_space_lduw_internal(as, addr, attrs, result,
                                       DEVICE_BIG_ENDIAN);
}

3110
uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3111
{
3112
    return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3113 3114
}

3115
uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3116
{
3117
    return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3118 3119
}

3120
uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3121
{
3122
    return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3123 3124
}

B
bellard 已提交
3125 3126 3127
/* warning: addr must be aligned. The ram page is not masked as dirty
   and the code inside is not invalidated. It is useful if the dirty
   bits are used to track modified PTEs */
3128 3129
void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
                                MemTxAttrs attrs, MemTxResult *result)
B
bellard 已提交
3130 3131
{
    uint8_t *ptr;
3132
    MemoryRegion *mr;
3133 3134
    hwaddr l = 4;
    hwaddr addr1;
3135
    MemTxResult r;
3136
    uint8_t dirty_log_mask;
3137
    bool release_lock = false;
B
bellard 已提交
3138

3139
    rcu_read_lock();
3140
    mr = address_space_translate(as, addr, &addr1, &l,
3141 3142
                                 true);
    if (l < 4 || !memory_access_is_direct(mr, true)) {
3143
        release_lock |= prepare_mmio_access(mr);
3144

3145
        r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
B
bellard 已提交
3146
    } else {
3147
        addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
P
pbrook 已提交
3148
        ptr = qemu_get_ram_ptr(addr1);
B
bellard 已提交
3149
        stl_p(ptr, val);
A
aliguori 已提交
3150

3151 3152
        dirty_log_mask = memory_region_get_dirty_log_mask(mr);
        dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3153
        cpu_physical_memory_set_dirty_range(addr1, 4, dirty_log_mask);
3154 3155 3156 3157
        r = MEMTX_OK;
    }
    if (result) {
        *result = r;
B
bellard 已提交
3158
    }
3159 3160 3161
    if (release_lock) {
        qemu_mutex_unlock_iothread();
    }
3162
    rcu_read_unlock();
B
bellard 已提交
3163 3164
}

3165 3166 3167 3168 3169
void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
{
    address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
}

B
bellard 已提交
3170
/* warning: addr must be aligned */
3171 3172 3173 3174 3175
static inline void address_space_stl_internal(AddressSpace *as,
                                              hwaddr addr, uint32_t val,
                                              MemTxAttrs attrs,
                                              MemTxResult *result,
                                              enum device_endian endian)
B
bellard 已提交
3176 3177
{
    uint8_t *ptr;
3178
    MemoryRegion *mr;
3179 3180
    hwaddr l = 4;
    hwaddr addr1;
3181
    MemTxResult r;
3182
    bool release_lock = false;
B
bellard 已提交
3183

3184
    rcu_read_lock();
3185
    mr = address_space_translate(as, addr, &addr1, &l,
3186 3187
                                 true);
    if (l < 4 || !memory_access_is_direct(mr, true)) {
3188
        release_lock |= prepare_mmio_access(mr);
3189

3190 3191 3192 3193 3194 3195 3196 3197 3198
#if defined(TARGET_WORDS_BIGENDIAN)
        if (endian == DEVICE_LITTLE_ENDIAN) {
            val = bswap32(val);
        }
#else
        if (endian == DEVICE_BIG_ENDIAN) {
            val = bswap32(val);
        }
#endif
3199
        r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
B
bellard 已提交
3200 3201
    } else {
        /* RAM case */
3202
        addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
P
pbrook 已提交
3203
        ptr = qemu_get_ram_ptr(addr1);
3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214
        switch (endian) {
        case DEVICE_LITTLE_ENDIAN:
            stl_le_p(ptr, val);
            break;
        case DEVICE_BIG_ENDIAN:
            stl_be_p(ptr, val);
            break;
        default:
            stl_p(ptr, val);
            break;
        }
3215
        invalidate_and_set_dirty(mr, addr1, 4);
3216 3217 3218 3219
        r = MEMTX_OK;
    }
    if (result) {
        *result = r;
B
bellard 已提交
3220
    }
3221 3222 3223
    if (release_lock) {
        qemu_mutex_unlock_iothread();
    }
3224
    rcu_read_unlock();
B
bellard 已提交
3225 3226
}

3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247
void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
                       MemTxAttrs attrs, MemTxResult *result)
{
    address_space_stl_internal(as, addr, val, attrs, result,
                               DEVICE_NATIVE_ENDIAN);
}

void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
                       MemTxAttrs attrs, MemTxResult *result)
{
    address_space_stl_internal(as, addr, val, attrs, result,
                               DEVICE_LITTLE_ENDIAN);
}

void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
                       MemTxAttrs attrs, MemTxResult *result)
{
    address_space_stl_internal(as, addr, val, attrs, result,
                               DEVICE_BIG_ENDIAN);
}

3248
void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3249
{
3250
    address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3251 3252
}

3253
void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3254
{
3255
    address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3256 3257
}

3258
void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3259
{
3260
    address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3261 3262
}

B
bellard 已提交
3263
/* XXX: optimize */
3264 3265
void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
                       MemTxAttrs attrs, MemTxResult *result)
B
bellard 已提交
3266 3267
{
    uint8_t v = val;
3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278
    MemTxResult r;

    r = address_space_rw(as, addr, attrs, &v, 1, 1);
    if (result) {
        *result = r;
    }
}

void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
{
    address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
B
bellard 已提交
3279 3280
}

3281
/* warning: addr must be aligned */
3282 3283 3284 3285 3286
static inline void address_space_stw_internal(AddressSpace *as,
                                              hwaddr addr, uint32_t val,
                                              MemTxAttrs attrs,
                                              MemTxResult *result,
                                              enum device_endian endian)
B
bellard 已提交
3287
{
3288
    uint8_t *ptr;
3289
    MemoryRegion *mr;
3290 3291
    hwaddr l = 2;
    hwaddr addr1;
3292
    MemTxResult r;
3293
    bool release_lock = false;
3294

3295
    rcu_read_lock();
3296
    mr = address_space_translate(as, addr, &addr1, &l, true);
3297
    if (l < 2 || !memory_access_is_direct(mr, true)) {
3298
        release_lock |= prepare_mmio_access(mr);
3299

3300 3301 3302 3303 3304 3305 3306 3307 3308
#if defined(TARGET_WORDS_BIGENDIAN)
        if (endian == DEVICE_LITTLE_ENDIAN) {
            val = bswap16(val);
        }
#else
        if (endian == DEVICE_BIG_ENDIAN) {
            val = bswap16(val);
        }
#endif
3309
        r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3310 3311
    } else {
        /* RAM case */
3312
        addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3313
        ptr = qemu_get_ram_ptr(addr1);
3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324
        switch (endian) {
        case DEVICE_LITTLE_ENDIAN:
            stw_le_p(ptr, val);
            break;
        case DEVICE_BIG_ENDIAN:
            stw_be_p(ptr, val);
            break;
        default:
            stw_p(ptr, val);
            break;
        }
3325
        invalidate_and_set_dirty(mr, addr1, 2);
3326 3327 3328 3329
        r = MEMTX_OK;
    }
    if (result) {
        *result = r;
3330
    }
3331 3332 3333
    if (release_lock) {
        qemu_mutex_unlock_iothread();
    }
3334
    rcu_read_unlock();
B
bellard 已提交
3335 3336
}

3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357
void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
                       MemTxAttrs attrs, MemTxResult *result)
{
    address_space_stw_internal(as, addr, val, attrs, result,
                               DEVICE_NATIVE_ENDIAN);
}

void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
                       MemTxAttrs attrs, MemTxResult *result)
{
    address_space_stw_internal(as, addr, val, attrs, result,
                               DEVICE_LITTLE_ENDIAN);
}

void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
                       MemTxAttrs attrs, MemTxResult *result)
{
    address_space_stw_internal(as, addr, val, attrs, result,
                               DEVICE_BIG_ENDIAN);
}

3358
void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3359
{
3360
    address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3361 3362
}

3363
void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3364
{
3365
    address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3366 3367
}

3368
void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3369
{
3370
    address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3371 3372
}

B
bellard 已提交
3373
/* XXX: optimize */
3374 3375
void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
                       MemTxAttrs attrs, MemTxResult *result)
B
bellard 已提交
3376
{
3377
    MemTxResult r;
B
bellard 已提交
3378
    val = tswap64(val);
3379 3380 3381 3382
    r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
    if (result) {
        *result = r;
    }
B
bellard 已提交
3383 3384
}

3385 3386
void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
                       MemTxAttrs attrs, MemTxResult *result)
3387
{
3388
    MemTxResult r;
3389
    val = cpu_to_le64(val);
3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413
    r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
    if (result) {
        *result = r;
    }
}
void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
                       MemTxAttrs attrs, MemTxResult *result)
{
    MemTxResult r;
    val = cpu_to_be64(val);
    r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
    if (result) {
        *result = r;
    }
}

void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
{
    address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
}

void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
{
    address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3414 3415
}

3416
void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3417
{
3418
    address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3419 3420
}

3421
/* virtual memory access for debug (includes writing to ROM) */
3422
int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3423
                        uint8_t *buf, int len, int is_write)
B
bellard 已提交
3424 3425
{
    int l;
A
Avi Kivity 已提交
3426
    hwaddr phys_addr;
3427
    target_ulong page;
B
bellard 已提交
3428 3429 3430

    while (len > 0) {
        page = addr & TARGET_PAGE_MASK;
3431
        phys_addr = cpu_get_phys_page_debug(cpu, page);
B
bellard 已提交
3432 3433 3434 3435 3436 3437
        /* if no physical page mapped, return an error */
        if (phys_addr == -1)
            return -1;
        l = (page + TARGET_PAGE_SIZE) - addr;
        if (l > len)
            l = len;
3438
        phys_addr += (addr & ~TARGET_PAGE_MASK);
3439 3440 3441
        if (is_write) {
            cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
        } else {
3442 3443
            address_space_rw(cpu->as, phys_addr, MEMTXATTRS_UNSPECIFIED,
                             buf, l, 0);
3444
        }
B
bellard 已提交
3445 3446 3447 3448 3449 3450
        len -= l;
        buf += l;
        addr += l;
    }
    return 0;
}
P
Paul Brook 已提交
3451
#endif
B
bellard 已提交
3452

3453 3454 3455 3456
/*
 * A helper function for the _utterly broken_ virtio device model to find out if
 * it's running on a big endian machine. Don't do this at home kids!
 */
3457 3458
bool target_words_bigendian(void);
bool target_words_bigendian(void)
3459 3460 3461 3462 3463 3464 3465 3466
{
#if defined(TARGET_WORDS_BIGENDIAN)
    return true;
#else
    return false;
#endif
}

3467
#ifndef CONFIG_USER_ONLY
A
Avi Kivity 已提交
3468
bool cpu_physical_memory_is_io(hwaddr phys_addr)
3469
{
3470
    MemoryRegion*mr;
3471
    hwaddr l = 1;
3472
    bool res;
3473

3474
    rcu_read_lock();
3475 3476
    mr = address_space_translate(&address_space_memory,
                                 phys_addr, &phys_addr, &l, false);
3477

3478 3479 3480
    res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
    rcu_read_unlock();
    return res;
3481
}
3482

3483
int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3484 3485
{
    RAMBlock *block;
3486
    int ret = 0;
3487

M
Mike Day 已提交
3488 3489
    rcu_read_lock();
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3490 3491 3492 3493 3494
        ret = func(block->idstr, block->host, block->offset,
                   block->used_length, opaque);
        if (ret) {
            break;
        }
3495
    }
M
Mike Day 已提交
3496
    rcu_read_unlock();
3497
    return ret;
3498
}
3499
#endif