exec.c 105.8 KB
Newer Older
B
bellard 已提交
1
/*
2
 *  Virtual page mapping
3
 *
B
bellard 已提交
4 5 6 7 8 9 10 11 12 13 14 15 16
 *  Copyright (c) 2003 Fabrice Bellard
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
B
bellard 已提交
18
 */
P
Peter Maydell 已提交
19
#include "qemu/osdep.h"
20
#ifndef _WIN32
B
bellard 已提交
21 22
#include <sys/mman.h>
#endif
B
bellard 已提交
23

24
#include "qemu-common.h"
B
bellard 已提交
25
#include "cpu.h"
B
bellard 已提交
26
#include "tcg.h"
27
#include "hw/hw.h"
28
#if !defined(CONFIG_USER_ONLY)
29
#include "hw/boards.h"
30
#endif
31
#include "hw/qdev.h"
32
#include "sysemu/kvm.h"
33
#include "sysemu/sysemu.h"
P
Paolo Bonzini 已提交
34
#include "hw/xen/xen.h"
35 36
#include "qemu/timer.h"
#include "qemu/config-file.h"
37
#include "qemu/error-report.h"
38
#include "exec/memory.h"
39
#include "sysemu/dma.h"
40
#include "exec/address-spaces.h"
41 42
#if defined(CONFIG_USER_ONLY)
#include <qemu.h>
J
Jun Nakajima 已提交
43
#else /* !CONFIG_USER_ONLY */
44
#include "sysemu/xen-mapcache.h"
45
#include "trace.h"
46
#endif
47
#include "exec/cpu-all.h"
M
Mike Day 已提交
48
#include "qemu/rcu_queue.h"
49
#include "qemu/main-loop.h"
50
#include "translate-all.h"
51
#include "sysemu/replay.h"
52

53
#include "exec/memory-internal.h"
54
#include "exec/ram_addr.h"
55
#include "exec/log.h"
56

57
#include "qemu/range.h"
58 59 60
#ifndef _WIN32
#include "qemu/mmap-alloc.h"
#endif
61

62
//#define DEBUG_SUBPAGE
T
ths 已提交
63

64
#if !defined(CONFIG_USER_ONLY)
M
Mike Day 已提交
65 66 67
/* ram_list is read under rcu_read_lock()/rcu_read_unlock().  Writes
 * are protected by the ramlist lock.
 */
M
Mike Day 已提交
68
RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
A
Avi Kivity 已提交
69 70

static MemoryRegion *system_memory;
71
static MemoryRegion *system_io;
A
Avi Kivity 已提交
72

73 74
AddressSpace address_space_io;
AddressSpace address_space_memory;
75

76
MemoryRegion io_mem_rom, io_mem_notdirty;
77
static MemoryRegion io_mem_unassigned;
78

79 80 81
/* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
#define RAM_PREALLOC   (1 << 0)

82 83 84
/* RAM is mmap-ed with MAP_SHARED */
#define RAM_SHARED     (1 << 1)

85 86 87 88 89
/* Only a portion of RAM (used_length) is actually used, and migrated.
 * This used_length size can change across reboots.
 */
#define RAM_RESIZEABLE (1 << 2)

90
#endif
91

A
Andreas Färber 已提交
92
struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
B
bellard 已提交
93 94
/* current CPU in the current thread. It is only valid inside
   cpu_exec() */
P
Paolo Bonzini 已提交
95
__thread CPUState *current_cpu;
P
pbrook 已提交
96
/* 0 = Do not count executed instructions.
T
ths 已提交
97
   1 = Precise instruction counting.
P
pbrook 已提交
98
   2 = Adaptive rate instruction counting.  */
99
int use_icount;
B
bellard 已提交
100

101
#if !defined(CONFIG_USER_ONLY)
102

103 104 105
typedef struct PhysPageEntry PhysPageEntry;

struct PhysPageEntry {
M
Michael S. Tsirkin 已提交
106
    /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
107
    uint32_t skip : 6;
M
Michael S. Tsirkin 已提交
108
     /* index into phys_sections (!skip) or phys_map_nodes (skip) */
109
    uint32_t ptr : 26;
110 111
};

112 113
#define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)

114
/* Size of the L2 (and L3, etc) page tables.  */
115
#define ADDR_SPACE_BITS 64
116

M
Michael S. Tsirkin 已提交
117
#define P_L2_BITS 9
118 119 120 121 122
#define P_L2_SIZE (1 << P_L2_BITS)

#define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)

typedef PhysPageEntry Node[P_L2_SIZE];
123

124
typedef struct PhysPageMap {
125 126
    struct rcu_head rcu;

127 128 129 130 131 132 133 134
    unsigned sections_nb;
    unsigned sections_nb_alloc;
    unsigned nodes_nb;
    unsigned nodes_nb_alloc;
    Node *nodes;
    MemoryRegionSection *sections;
} PhysPageMap;

135
struct AddressSpaceDispatch {
136 137
    struct rcu_head rcu;

138 139 140 141
    /* This is a multi-level map on the physical address space.
     * The bottom level has pointers to MemoryRegionSections.
     */
    PhysPageEntry phys_map;
142
    PhysPageMap map;
143
    AddressSpace *as;
144 145
};

146 147 148
#define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
typedef struct subpage_t {
    MemoryRegion iomem;
149
    AddressSpace *as;
150 151 152 153
    hwaddr base;
    uint16_t sub_section[TARGET_PAGE_SIZE];
} subpage_t;

154 155 156 157
#define PHYS_SECTION_UNASSIGNED 0
#define PHYS_SECTION_NOTDIRTY 1
#define PHYS_SECTION_ROM 2
#define PHYS_SECTION_WATCH 3
158

159
static void io_mem_init(void);
A
Avi Kivity 已提交
160
static void memory_map_init(void);
161
static void tcg_commit(MemoryListener *listener);
162

163
static MemoryRegion io_mem_watch;
164 165 166 167 168 169 170 171 172 173 174 175 176 177 178

/**
 * CPUAddressSpace: all the information a CPU needs about an AddressSpace
 * @cpu: the CPU whose AddressSpace this is
 * @as: the AddressSpace itself
 * @memory_dispatch: its dispatch pointer (cached, RCU protected)
 * @tcg_as_listener: listener for tracking changes to the AddressSpace
 */
struct CPUAddressSpace {
    CPUState *cpu;
    AddressSpace *as;
    struct AddressSpaceDispatch *memory_dispatch;
    MemoryListener tcg_as_listener;
};

179
#endif
B
bellard 已提交
180

181
#if !defined(CONFIG_USER_ONLY)
182

183
static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
184
{
185 186 187 188
    if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
        map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
        map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
        map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
189
    }
190 191
}

192
static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
193 194
{
    unsigned i;
195
    uint32_t ret;
196 197
    PhysPageEntry e;
    PhysPageEntry *p;
198

199
    ret = map->nodes_nb++;
200
    p = map->nodes[ret];
201
    assert(ret != PHYS_MAP_NODE_NIL);
202
    assert(ret != map->nodes_nb_alloc);
203 204 205

    e.skip = leaf ? 0 : 1;
    e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
206
    for (i = 0; i < P_L2_SIZE; ++i) {
207
        memcpy(&p[i], &e, sizeof(e));
208
    }
209
    return ret;
210 211
}

212 213
static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
                                hwaddr *index, hwaddr *nb, uint16_t leaf,
214
                                int level)
215 216
{
    PhysPageEntry *p;
217
    hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
218

M
Michael S. Tsirkin 已提交
219
    if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
220
        lp->ptr = phys_map_node_alloc(map, level == 0);
B
bellard 已提交
221
    }
222
    p = map->nodes[lp->ptr];
223
    lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
224

225
    while (*nb && lp < &p[P_L2_SIZE]) {
226
        if ((*index & (step - 1)) == 0 && *nb >= step) {
M
Michael S. Tsirkin 已提交
227
            lp->skip = 0;
228
            lp->ptr = leaf;
229 230
            *index += step;
            *nb -= step;
231
        } else {
232
            phys_page_set_level(map, lp, index, nb, leaf, level - 1);
233 234
        }
        ++lp;
235 236 237
    }
}

A
Avi Kivity 已提交
238
static void phys_page_set(AddressSpaceDispatch *d,
A
Avi Kivity 已提交
239
                          hwaddr index, hwaddr nb,
240
                          uint16_t leaf)
241
{
242
    /* Wildly overreserve - it doesn't matter much. */
243
    phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
244

245
    phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
B
bellard 已提交
246 247
}

248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305
/* Compact a non leaf page entry. Simply detect that the entry has a single child,
 * and update our entry so we can skip it and go directly to the destination.
 */
static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
{
    unsigned valid_ptr = P_L2_SIZE;
    int valid = 0;
    PhysPageEntry *p;
    int i;

    if (lp->ptr == PHYS_MAP_NODE_NIL) {
        return;
    }

    p = nodes[lp->ptr];
    for (i = 0; i < P_L2_SIZE; i++) {
        if (p[i].ptr == PHYS_MAP_NODE_NIL) {
            continue;
        }

        valid_ptr = i;
        valid++;
        if (p[i].skip) {
            phys_page_compact(&p[i], nodes, compacted);
        }
    }

    /* We can only compress if there's only one child. */
    if (valid != 1) {
        return;
    }

    assert(valid_ptr < P_L2_SIZE);

    /* Don't compress if it won't fit in the # of bits we have. */
    if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
        return;
    }

    lp->ptr = p[valid_ptr].ptr;
    if (!p[valid_ptr].skip) {
        /* If our only child is a leaf, make this a leaf. */
        /* By design, we should have made this node a leaf to begin with so we
         * should never reach here.
         * But since it's so simple to handle this, let's do it just in case we
         * change this rule.
         */
        lp->skip = 0;
    } else {
        lp->skip += p[valid_ptr].skip;
    }
}

static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
{
    DECLARE_BITMAP(compacted, nodes_nb);

    if (d->phys_map.skip) {
306
        phys_page_compact(&d->phys_map, d->map.nodes, compacted);
307 308 309
    }
}

F
Fam Zheng 已提交
310 311 312 313 314 315 316 317 318 319 320
static inline bool section_covers_addr(const MemoryRegionSection *section,
                                       hwaddr addr)
{
    /* Memory topology clips a memory region to [0, 2^64); size.hi > 0 means
     * the section must cover the entire address space.
     */
    return section->size.hi ||
           range_covers_byte(section->offset_within_address_space,
                             section->size.lo, addr);
}

321
static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
322
                                           Node *nodes, MemoryRegionSection *sections)
B
bellard 已提交
323
{
324
    PhysPageEntry *p;
325
    hwaddr index = addr >> TARGET_PAGE_BITS;
326
    int i;
327

M
Michael S. Tsirkin 已提交
328
    for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
329
        if (lp.ptr == PHYS_MAP_NODE_NIL) {
330
            return &sections[PHYS_SECTION_UNASSIGNED];
331
        }
332
        p = nodes[lp.ptr];
333
        lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
334
    }
335

F
Fam Zheng 已提交
336
    if (section_covers_addr(&sections[lp.ptr], addr)) {
337 338 339 340
        return &sections[lp.ptr];
    } else {
        return &sections[PHYS_SECTION_UNASSIGNED];
    }
341 342
}

B
Blue Swirl 已提交
343 344
bool memory_region_is_unassigned(MemoryRegion *mr)
{
P
Paolo Bonzini 已提交
345
    return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
346
        && mr != &io_mem_watch;
B
bellard 已提交
347
}
348

349
/* Called from RCU critical section */
350
static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
351 352
                                                        hwaddr addr,
                                                        bool resolve_subpage)
353
{
354 355 356
    MemoryRegionSection *section;
    subpage_t *subpage;

357
    section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
358 359
    if (resolve_subpage && section->mr->subpage) {
        subpage = container_of(section->mr, subpage_t, iomem);
360
        section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
361 362
    }
    return section;
363 364
}

365
/* Called from RCU critical section */
366
static MemoryRegionSection *
367
address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
368
                                 hwaddr *plen, bool resolve_subpage)
369 370
{
    MemoryRegionSection *section;
371
    MemoryRegion *mr;
372
    Int128 diff;
373

374
    section = address_space_lookup_region(d, addr, resolve_subpage);
375 376 377 378 379 380
    /* Compute offset within MemoryRegionSection */
    addr -= section->offset_within_address_space;

    /* Compute offset within MemoryRegion */
    *xlat = addr + section->offset_within_region;

381
    mr = section->mr;
382 383 384 385 386 387 388 389 390 391 392 393

    /* MMIO registers can be expected to perform full-width accesses based only
     * on their address, without considering adjacent registers that could
     * decode to completely different MemoryRegions.  When such registers
     * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
     * regions overlap wildly.  For this reason we cannot clamp the accesses
     * here.
     *
     * If the length is small (as is the case for address_space_ldl/stl),
     * everything works fine.  If the incoming length is large, however,
     * the caller really has to do the clamping through memory_access_size.
     */
394
    if (memory_region_is_ram(mr)) {
395
        diff = int128_sub(section->size, int128_make64(addr));
396 397
        *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
    }
398 399
    return section;
}
400

401
/* Called from RCU critical section */
402 403 404
MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
                                      hwaddr *xlat, hwaddr *plen,
                                      bool is_write)
405
{
A
Avi Kivity 已提交
406 407 408 409 410
    IOMMUTLBEntry iotlb;
    MemoryRegionSection *section;
    MemoryRegion *mr;

    for (;;) {
411 412
        AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
        section = address_space_translate_internal(d, addr, &addr, plen, true);
A
Avi Kivity 已提交
413 414 415 416 417 418
        mr = section->mr;

        if (!mr->iommu_ops) {
            break;
        }

419
        iotlb = mr->iommu_ops->translate(mr, addr, is_write);
A
Avi Kivity 已提交
420 421
        addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
                | (addr & iotlb.addr_mask));
422
        *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
A
Avi Kivity 已提交
423 424 425 426 427 428 429 430
        if (!(iotlb.perm & (1 << is_write))) {
            mr = &io_mem_unassigned;
            break;
        }

        as = iotlb.target_as;
    }

431
    if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
432
        hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
433
        *plen = MIN(page, *plen);
434 435
    }

A
Avi Kivity 已提交
436 437
    *xlat = addr;
    return mr;
438 439
}

440
/* Called from RCU critical section */
441
MemoryRegionSection *
442
address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
P
Paolo Bonzini 已提交
443
                                  hwaddr *xlat, hwaddr *plen)
444
{
A
Avi Kivity 已提交
445
    MemoryRegionSection *section;
446 447 448
    AddressSpaceDispatch *d = cpu->cpu_ases[asidx].memory_dispatch;

    section = address_space_translate_internal(d, addr, xlat, plen, false);
A
Avi Kivity 已提交
449 450 451

    assert(!section->mr->iommu_ops);
    return section;
452
}
453
#endif
B
bellard 已提交
454

455
#if !defined(CONFIG_USER_ONLY)
456 457

static int cpu_common_post_load(void *opaque, int version_id)
B
bellard 已提交
458
{
459
    CPUState *cpu = opaque;
B
bellard 已提交
460

461 462
    /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
       version_id is increased. */
463
    cpu->interrupt_request &= ~0x01;
464
    tlb_flush(cpu, 1);
465 466

    return 0;
B
bellard 已提交
467
}
B
bellard 已提交
468

469 470 471 472
static int cpu_common_pre_load(void *opaque)
{
    CPUState *cpu = opaque;

473
    cpu->exception_index = -1;
474 475 476 477 478 479 480 481

    return 0;
}

static bool cpu_common_exception_index_needed(void *opaque)
{
    CPUState *cpu = opaque;

482
    return tcg_enabled() && cpu->exception_index != -1;
483 484 485 486 487 488
}

static const VMStateDescription vmstate_cpu_common_exception_index = {
    .name = "cpu_common/exception_index",
    .version_id = 1,
    .minimum_version_id = 1,
489
    .needed = cpu_common_exception_index_needed,
490 491 492 493 494 495
    .fields = (VMStateField[]) {
        VMSTATE_INT32(exception_index, CPUState),
        VMSTATE_END_OF_LIST()
    }
};

496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513
static bool cpu_common_crash_occurred_needed(void *opaque)
{
    CPUState *cpu = opaque;

    return cpu->crash_occurred;
}

static const VMStateDescription vmstate_cpu_common_crash_occurred = {
    .name = "cpu_common/crash_occurred",
    .version_id = 1,
    .minimum_version_id = 1,
    .needed = cpu_common_crash_occurred_needed,
    .fields = (VMStateField[]) {
        VMSTATE_BOOL(crash_occurred, CPUState),
        VMSTATE_END_OF_LIST()
    }
};

514
const VMStateDescription vmstate_cpu_common = {
515 516 517
    .name = "cpu_common",
    .version_id = 1,
    .minimum_version_id = 1,
518
    .pre_load = cpu_common_pre_load,
519
    .post_load = cpu_common_post_load,
520
    .fields = (VMStateField[]) {
521 522
        VMSTATE_UINT32(halted, CPUState),
        VMSTATE_UINT32(interrupt_request, CPUState),
523
        VMSTATE_END_OF_LIST()
524
    },
525 526
    .subsections = (const VMStateDescription*[]) {
        &vmstate_cpu_common_exception_index,
527
        &vmstate_cpu_common_crash_occurred,
528
        NULL
529 530
    }
};
531

532
#endif
B
bellard 已提交
533

534
CPUState *qemu_get_cpu(int index)
B
bellard 已提交
535
{
A
Andreas Färber 已提交
536
    CPUState *cpu;
B
bellard 已提交
537

A
Andreas Färber 已提交
538
    CPU_FOREACH(cpu) {
539
        if (cpu->cpu_index == index) {
A
Andreas Färber 已提交
540
            return cpu;
541
        }
B
bellard 已提交
542
    }
543

A
Andreas Färber 已提交
544
    return NULL;
B
bellard 已提交
545 546
}

547
#if !defined(CONFIG_USER_ONLY)
548
void cpu_address_space_init(CPUState *cpu, AddressSpace *as, int asidx)
549
{
550 551 552 553 554
    CPUAddressSpace *newas;

    /* Target code should have set num_ases before calling us */
    assert(asidx < cpu->num_ases);

555 556 557 558 559
    if (asidx == 0) {
        /* address space 0 gets the convenience alias */
        cpu->as = as;
    }

560 561
    /* KVM cannot currently support multiple address spaces. */
    assert(asidx == 0 || !kvm_enabled());
562

563 564
    if (!cpu->cpu_ases) {
        cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases);
565
    }
566

567 568 569
    newas = &cpu->cpu_ases[asidx];
    newas->cpu = cpu;
    newas->as = as;
570
    if (tcg_enabled()) {
571 572
        newas->tcg_as_listener.commit = tcg_commit;
        memory_listener_register(&newas->tcg_as_listener, as);
573
    }
574
}
575 576 577 578 579 580

AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx)
{
    /* Return the AddressSpace corresponding to the specified index */
    return cpu->cpu_ases[asidx].as;
}
581 582
#endif

583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627
#ifndef CONFIG_USER_ONLY
static DECLARE_BITMAP(cpu_index_map, MAX_CPUMASK_BITS);

static int cpu_get_free_index(Error **errp)
{
    int cpu = find_first_zero_bit(cpu_index_map, MAX_CPUMASK_BITS);

    if (cpu >= MAX_CPUMASK_BITS) {
        error_setg(errp, "Trying to use more CPUs than max of %d",
                   MAX_CPUMASK_BITS);
        return -1;
    }

    bitmap_set(cpu_index_map, cpu, 1);
    return cpu;
}

void cpu_exec_exit(CPUState *cpu)
{
    if (cpu->cpu_index == -1) {
        /* cpu_index was never allocated by this @cpu or was already freed. */
        return;
    }

    bitmap_clear(cpu_index_map, cpu->cpu_index, 1);
    cpu->cpu_index = -1;
}
#else

static int cpu_get_free_index(Error **errp)
{
    CPUState *some_cpu;
    int cpu_index = 0;

    CPU_FOREACH(some_cpu) {
        cpu_index++;
    }
    return cpu_index;
}

void cpu_exec_exit(CPUState *cpu)
{
}
#endif

628
void cpu_exec_init(CPUState *cpu, Error **errp)
B
bellard 已提交
629
{
630
    CPUClass *cc = CPU_GET_CLASS(cpu);
631
    int cpu_index;
632
    Error *local_err = NULL;
633

634
    cpu->as = NULL;
635
    cpu->num_ases = 0;
636

637 638
#ifndef CONFIG_USER_ONLY
    cpu->thread_id = qemu_get_thread_id();
639 640 641 642 643 644 645 646 647 648 649 650 651 652

    /* This is a softmmu CPU object, so create a property for it
     * so users can wire up its memory. (This can't go in qom/cpu.c
     * because that file is compiled only once for both user-mode
     * and system builds.) The default if no link is set up is to use
     * the system address space.
     */
    object_property_add_link(OBJECT(cpu), "memory", TYPE_MEMORY_REGION,
                             (Object **)&cpu->memory,
                             qdev_prop_allow_set_link_before_realize,
                             OBJ_PROP_LINK_UNREF_ON_RELEASE,
                             &error_abort);
    cpu->memory = system_memory;
    object_ref(OBJECT(cpu->memory));
653 654
#endif

655 656 657
#if defined(CONFIG_USER_ONLY)
    cpu_list_lock();
#endif
658 659 660 661 662 663 664
    cpu_index = cpu->cpu_index = cpu_get_free_index(&local_err);
    if (local_err) {
        error_propagate(errp, local_err);
#if defined(CONFIG_USER_ONLY)
        cpu_list_unlock();
#endif
        return;
665
    }
A
Andreas Färber 已提交
666
    QTAILQ_INSERT_TAIL(&cpus, cpu, node);
667 668 669
#if defined(CONFIG_USER_ONLY)
    cpu_list_unlock();
#endif
670 671 672
    if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
        vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
    }
673 674 675
    if (cc->vmsd != NULL) {
        vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
    }
B
bellard 已提交
676 677
}

678
#if defined(CONFIG_USER_ONLY)
679
static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
680 681 682 683
{
    tb_invalidate_phys_page_range(pc, pc + 1, 0);
}
#else
684
static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
685
{
686 687 688
    MemTxAttrs attrs;
    hwaddr phys = cpu_get_phys_page_attrs_debug(cpu, pc, &attrs);
    int asidx = cpu_asidx_from_attrs(cpu, attrs);
689
    if (phys != -1) {
690
        tb_invalidate_phys_addr(cpu->cpu_ases[asidx].as,
691
                                phys | (pc & ~TARGET_PAGE_MASK));
692
    }
693
}
B
bellard 已提交
694
#endif
B
bellard 已提交
695

696
#if defined(CONFIG_USER_ONLY)
697
void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
698 699 700 701

{
}

702 703 704 705 706 707 708 709 710 711
int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
                          int flags)
{
    return -ENOSYS;
}

void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
{
}

712
int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
713 714 715 716 717
                          int flags, CPUWatchpoint **watchpoint)
{
    return -ENOSYS;
}
#else
718
/* Add a watchpoint.  */
719
int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
720
                          int flags, CPUWatchpoint **watchpoint)
721
{
722
    CPUWatchpoint *wp;
723

724
    /* forbid ranges which are empty or run off the end of the address space */
725
    if (len == 0 || (addr + len - 1) < addr) {
726 727
        error_report("tried to set invalid watchpoint at %"
                     VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
728 729
        return -EINVAL;
    }
730
    wp = g_malloc(sizeof(*wp));
731 732

    wp->vaddr = addr;
733
    wp->len = len;
734 735
    wp->flags = flags;

736
    /* keep all GDB-injected watchpoints in front */
737 738 739 740 741
    if (flags & BP_GDB) {
        QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
    } else {
        QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
    }
742

743
    tlb_flush_page(cpu, addr);
744 745 746 747

    if (watchpoint)
        *watchpoint = wp;
    return 0;
748 749
}

750
/* Remove a specific watchpoint.  */
751
int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
752
                          int flags)
753
{
754
    CPUWatchpoint *wp;
755

756
    QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
757
        if (addr == wp->vaddr && len == wp->len
758
                && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
759
            cpu_watchpoint_remove_by_ref(cpu, wp);
760 761 762
            return 0;
        }
    }
763
    return -ENOENT;
764 765
}

766
/* Remove a specific watchpoint by reference.  */
767
void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
768
{
769
    QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
770

771
    tlb_flush_page(cpu, watchpoint->vaddr);
772

773
    g_free(watchpoint);
774 775 776
}

/* Remove all matching watchpoints.  */
777
void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
778
{
779
    CPUWatchpoint *wp, *next;
780

781
    QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
782 783 784
        if (wp->flags & mask) {
            cpu_watchpoint_remove_by_ref(cpu, wp);
        }
785
    }
786
}
787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807

/* Return true if this watchpoint address matches the specified
 * access (ie the address range covered by the watchpoint overlaps
 * partially or completely with the address range covered by the
 * access).
 */
static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
                                                  vaddr addr,
                                                  vaddr len)
{
    /* We know the lengths are non-zero, but a little caution is
     * required to avoid errors in the case where the range ends
     * exactly at the top of the address space and so addr + len
     * wraps round to zero.
     */
    vaddr wpend = wp->vaddr + wp->len - 1;
    vaddr addrend = addr + len - 1;

    return !(addr > wpend || wp->vaddr > addrend);
}

808
#endif
809

810
/* Add a breakpoint.  */
811
int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
812
                          CPUBreakpoint **breakpoint)
B
bellard 已提交
813
{
814
    CPUBreakpoint *bp;
815

816
    bp = g_malloc(sizeof(*bp));
B
bellard 已提交
817

818 819 820
    bp->pc = pc;
    bp->flags = flags;

821
    /* keep all GDB-injected breakpoints in front */
822
    if (flags & BP_GDB) {
823
        QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
824
    } else {
825
        QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
826
    }
827

828
    breakpoint_invalidate(cpu, pc);
829

830
    if (breakpoint) {
831
        *breakpoint = bp;
832
    }
B
bellard 已提交
833 834 835
    return 0;
}

836
/* Remove a specific breakpoint.  */
837
int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
838 839 840
{
    CPUBreakpoint *bp;

841
    QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
842
        if (bp->pc == pc && bp->flags == flags) {
843
            cpu_breakpoint_remove_by_ref(cpu, bp);
844 845
            return 0;
        }
846
    }
847
    return -ENOENT;
848 849
}

850
/* Remove a specific breakpoint by reference.  */
851
void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
B
bellard 已提交
852
{
853 854 855
    QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);

    breakpoint_invalidate(cpu, breakpoint->pc);
856

857
    g_free(breakpoint);
858 859 860
}

/* Remove all matching breakpoints. */
861
void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
862
{
863
    CPUBreakpoint *bp, *next;
864

865
    QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
866 867 868
        if (bp->flags & mask) {
            cpu_breakpoint_remove_by_ref(cpu, bp);
        }
869
    }
B
bellard 已提交
870 871
}

B
bellard 已提交
872 873
/* enable or disable single step mode. EXCP_DEBUG is returned by the
   CPU loop after each instruction */
874
void cpu_single_step(CPUState *cpu, int enabled)
B
bellard 已提交
875
{
876 877 878
    if (cpu->singlestep_enabled != enabled) {
        cpu->singlestep_enabled = enabled;
        if (kvm_enabled()) {
879
            kvm_update_guest_debug(cpu, 0);
880
        } else {
S
Stuart Brady 已提交
881
            /* must flush all the translated code to avoid inconsistencies */
882
            /* XXX: only flush what is necessary */
883
            tb_flush(cpu);
884
        }
B
bellard 已提交
885 886 887
    }
}

888
void cpu_abort(CPUState *cpu, const char *fmt, ...)
B
bellard 已提交
889 890
{
    va_list ap;
P
pbrook 已提交
891
    va_list ap2;
B
bellard 已提交
892 893

    va_start(ap, fmt);
P
pbrook 已提交
894
    va_copy(ap2, ap);
B
bellard 已提交
895 896 897
    fprintf(stderr, "qemu: fatal: ");
    vfprintf(stderr, fmt, ap);
    fprintf(stderr, "\n");
898
    cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
899
    if (qemu_log_separate()) {
900 901 902
        qemu_log("qemu: fatal: ");
        qemu_log_vprintf(fmt, ap2);
        qemu_log("\n");
903
        log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
904
        qemu_log_flush();
905
        qemu_log_close();
906
    }
P
pbrook 已提交
907
    va_end(ap2);
908
    va_end(ap);
909
    replay_finish();
910 911 912 913 914 915 916 917
#if defined(CONFIG_USER_ONLY)
    {
        struct sigaction act;
        sigfillset(&act.sa_mask);
        act.sa_handler = SIG_DFL;
        sigaction(SIGABRT, &act, NULL);
    }
#endif
B
bellard 已提交
918 919 920
    abort();
}

921
#if !defined(CONFIG_USER_ONLY)
M
Mike Day 已提交
922
/* Called from RCU critical section */
P
Paolo Bonzini 已提交
923 924 925 926
static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
{
    RAMBlock *block;

P
Paolo Bonzini 已提交
927
    block = atomic_rcu_read(&ram_list.mru_block);
928
    if (block && addr - block->offset < block->max_length) {
929
        return block;
P
Paolo Bonzini 已提交
930
    }
M
Mike Day 已提交
931
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
932
        if (addr - block->offset < block->max_length) {
P
Paolo Bonzini 已提交
933 934 935 936 937 938 939 940
            goto found;
        }
    }

    fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
    abort();

found:
P
Paolo Bonzini 已提交
941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956
    /* It is safe to write mru_block outside the iothread lock.  This
     * is what happens:
     *
     *     mru_block = xxx
     *     rcu_read_unlock()
     *                                        xxx removed from list
     *                  rcu_read_lock()
     *                  read mru_block
     *                                        mru_block = NULL;
     *                                        call_rcu(reclaim_ramblock, xxx);
     *                  rcu_read_unlock()
     *
     * atomic_rcu_set is not needed here.  The block was already published
     * when it was placed into the list.  Here we're just making an extra
     * copy of the pointer.
     */
P
Paolo Bonzini 已提交
957 958 959 960
    ram_list.mru_block = block;
    return block;
}

961
static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
J
Juan Quintela 已提交
962
{
963
    CPUState *cpu;
P
Paolo Bonzini 已提交
964
    ram_addr_t start1;
965 966 967 968 969
    RAMBlock *block;
    ram_addr_t end;

    end = TARGET_PAGE_ALIGN(start + length);
    start &= TARGET_PAGE_MASK;
J
Juan Quintela 已提交
970

M
Mike Day 已提交
971
    rcu_read_lock();
P
Paolo Bonzini 已提交
972 973
    block = qemu_get_ram_block(start);
    assert(block == qemu_get_ram_block(end - 1));
974
    start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
975 976 977
    CPU_FOREACH(cpu) {
        tlb_reset_dirty(cpu, start1, length);
    }
M
Mike Day 已提交
978
    rcu_read_unlock();
J
Juan Quintela 已提交
979 980
}

P
pbrook 已提交
981
/* Note: start and end must be within the same ram block.  */
982 983 984
bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
                                              ram_addr_t length,
                                              unsigned client)
985
{
986
    DirtyMemoryBlocks *blocks;
987
    unsigned long end, page;
988
    bool dirty = false;
989 990 991 992

    if (length == 0) {
        return false;
    }
B
bellard 已提交
993

994 995
    end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
    page = start >> TARGET_PAGE_BITS;
996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011

    rcu_read_lock();

    blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);

    while (page < end) {
        unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
        unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
        unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset);

        dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx],
                                              offset, num);
        page += num;
    }

    rcu_read_unlock();
1012 1013

    if (dirty && tcg_enabled()) {
1014
        tlb_reset_dirty_range_all(start, length);
P
pbrook 已提交
1015
    }
1016 1017

    return dirty;
1018 1019
}

1020
/* Called from RCU critical section */
1021
hwaddr memory_region_section_get_iotlb(CPUState *cpu,
1022 1023 1024 1025 1026
                                       MemoryRegionSection *section,
                                       target_ulong vaddr,
                                       hwaddr paddr, hwaddr xlat,
                                       int prot,
                                       target_ulong *address)
B
Blue Swirl 已提交
1027
{
A
Avi Kivity 已提交
1028
    hwaddr iotlb;
B
Blue Swirl 已提交
1029 1030
    CPUWatchpoint *wp;

1031
    if (memory_region_is_ram(section->mr)) {
B
Blue Swirl 已提交
1032 1033
        /* Normal RAM.  */
        iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1034
            + xlat;
B
Blue Swirl 已提交
1035
        if (!section->readonly) {
1036
            iotlb |= PHYS_SECTION_NOTDIRTY;
B
Blue Swirl 已提交
1037
        } else {
1038
            iotlb |= PHYS_SECTION_ROM;
B
Blue Swirl 已提交
1039 1040
        }
    } else {
1041 1042 1043 1044
        AddressSpaceDispatch *d;

        d = atomic_rcu_read(&section->address_space->dispatch);
        iotlb = section - d->map.sections;
1045
        iotlb += xlat;
B
Blue Swirl 已提交
1046 1047 1048 1049
    }

    /* Make accesses to pages with watchpoints go via the
       watchpoint trap routines.  */
1050
    QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1051
        if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
B
Blue Swirl 已提交
1052 1053
            /* Avoid trapping reads of pages with a write breakpoint. */
            if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1054
                iotlb = PHYS_SECTION_WATCH + paddr;
B
Blue Swirl 已提交
1055 1056 1057 1058 1059 1060 1061 1062
                *address |= TLB_MMIO;
                break;
            }
        }
    }

    return iotlb;
}
1063 1064
#endif /* defined(CONFIG_USER_ONLY) */

1065
#if !defined(CONFIG_USER_ONLY)
1066

A
Anthony Liguori 已提交
1067
static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1068
                             uint16_t section);
1069
static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1070

1071 1072
static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
                               qemu_anon_ram_alloc;
1073 1074 1075 1076 1077 1078

/*
 * Set a custom physical guest memory alloator.
 * Accelerators with unusual needs may need this.  Hopefully, we can
 * get rid of it eventually.
 */
1079
void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1080 1081 1082 1083
{
    phys_mem_alloc = alloc;
}

1084 1085
static uint16_t phys_section_add(PhysPageMap *map,
                                 MemoryRegionSection *section)
1086
{
1087 1088 1089 1090
    /* The physical section number is ORed with a page-aligned
     * pointer to produce the iotlb entries.  Thus it should
     * never overflow into the page-aligned value.
     */
1091
    assert(map->sections_nb < TARGET_PAGE_SIZE);
1092

1093 1094 1095 1096
    if (map->sections_nb == map->sections_nb_alloc) {
        map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
        map->sections = g_renew(MemoryRegionSection, map->sections,
                                map->sections_nb_alloc);
1097
    }
1098
    map->sections[map->sections_nb] = *section;
P
Paolo Bonzini 已提交
1099
    memory_region_ref(section->mr);
1100
    return map->sections_nb++;
1101 1102
}

1103 1104
static void phys_section_destroy(MemoryRegion *mr)
{
D
Don Slutz 已提交
1105 1106
    bool have_sub_page = mr->subpage;

P
Paolo Bonzini 已提交
1107 1108
    memory_region_unref(mr);

D
Don Slutz 已提交
1109
    if (have_sub_page) {
1110
        subpage_t *subpage = container_of(mr, subpage_t, iomem);
P
Peter Crosthwaite 已提交
1111
        object_unref(OBJECT(&subpage->iomem));
1112 1113 1114 1115
        g_free(subpage);
    }
}

P
Paolo Bonzini 已提交
1116
static void phys_sections_free(PhysPageMap *map)
1117
{
1118 1119
    while (map->sections_nb > 0) {
        MemoryRegionSection *section = &map->sections[--map->sections_nb];
1120 1121
        phys_section_destroy(section->mr);
    }
1122 1123
    g_free(map->sections);
    g_free(map->nodes);
1124 1125
}

A
Avi Kivity 已提交
1126
static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1127 1128
{
    subpage_t *subpage;
A
Avi Kivity 已提交
1129
    hwaddr base = section->offset_within_address_space
1130
        & TARGET_PAGE_MASK;
1131
    MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1132
                                                   d->map.nodes, d->map.sections);
1133 1134
    MemoryRegionSection subsection = {
        .offset_within_address_space = base,
1135
        .size = int128_make64(TARGET_PAGE_SIZE),
1136
    };
A
Avi Kivity 已提交
1137
    hwaddr start, end;
1138

1139
    assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1140

1141
    if (!(existing->mr->subpage)) {
1142
        subpage = subpage_init(d->as, base);
1143
        subsection.address_space = d->as;
1144
        subsection.mr = &subpage->iomem;
A
Avi Kivity 已提交
1145
        phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1146
                      phys_section_add(&d->map, &subsection));
1147
    } else {
1148
        subpage = container_of(existing->mr, subpage_t, iomem);
1149 1150
    }
    start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1151
    end = start + int128_get64(section->size) - 1;
1152 1153
    subpage_register(subpage, start, end,
                     phys_section_add(&d->map, section));
1154 1155 1156
}


1157 1158
static void register_multipage(AddressSpaceDispatch *d,
                               MemoryRegionSection *section)
1159
{
A
Avi Kivity 已提交
1160
    hwaddr start_addr = section->offset_within_address_space;
1161
    uint16_t section_index = phys_section_add(&d->map, section);
1162 1163
    uint64_t num_pages = int128_get64(int128_rshift(section->size,
                                                    TARGET_PAGE_BITS));
1164

1165 1166
    assert(num_pages);
    phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1167 1168
}

A
Avi Kivity 已提交
1169
static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1170
{
1171
    AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1172
    AddressSpaceDispatch *d = as->next_dispatch;
1173
    MemoryRegionSection now = *section, remain = *section;
1174
    Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1175

1176 1177 1178 1179
    if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
        uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
                       - now.offset_within_address_space;

1180
        now.size = int128_min(int128_make64(left), now.size);
A
Avi Kivity 已提交
1181
        register_subpage(d, &now);
1182
    } else {
1183
        now.size = int128_zero();
1184
    }
1185 1186 1187 1188
    while (int128_ne(remain.size, now.size)) {
        remain.size = int128_sub(remain.size, now.size);
        remain.offset_within_address_space += int128_get64(now.size);
        remain.offset_within_region += int128_get64(now.size);
1189
        now = remain;
1190
        if (int128_lt(remain.size, page_size)) {
1191
            register_subpage(d, &now);
1192
        } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1193
            now.size = page_size;
A
Avi Kivity 已提交
1194
            register_subpage(d, &now);
1195
        } else {
1196
            now.size = int128_and(now.size, int128_neg(page_size));
A
Avi Kivity 已提交
1197
            register_multipage(d, &now);
1198
        }
1199 1200 1201
    }
}

1202 1203 1204 1205 1206 1207
void qemu_flush_coalesced_mmio_buffer(void)
{
    if (kvm_enabled())
        kvm_flush_coalesced_mmio_buffer();
}

1208 1209 1210 1211 1212 1213 1214 1215 1216 1217
void qemu_mutex_lock_ramlist(void)
{
    qemu_mutex_lock(&ram_list.mutex);
}

void qemu_mutex_unlock_ramlist(void)
{
    qemu_mutex_unlock(&ram_list.mutex);
}

1218
#ifdef __linux__
1219 1220 1221 1222 1223

#include <sys/vfs.h>

#define HUGETLBFS_MAGIC       0x958458f6

1224
static long gethugepagesize(const char *path, Error **errp)
1225 1226 1227 1228 1229
{
    struct statfs fs;
    int ret;

    do {
Y
Yoshiaki Tamura 已提交
1230
        ret = statfs(path, &fs);
1231 1232 1233
    } while (ret != 0 && errno == EINTR);

    if (ret != 0) {
1234 1235
        error_setg_errno(errp, errno, "failed to get page size of file %s",
                         path);
Y
Yoshiaki Tamura 已提交
1236
        return 0;
1237 1238 1239 1240 1241
    }

    return fs.f_bsize;
}

A
Alex Williamson 已提交
1242 1243
static void *file_ram_alloc(RAMBlock *block,
                            ram_addr_t memory,
1244 1245
                            const char *path,
                            Error **errp)
1246
{
1247
    struct stat st;
1248
    char *filename;
1249 1250
    char *sanitized_name;
    char *c;
1251
    void *area;
1252
    int fd;
1253
    uint64_t hpagesize;
1254
    Error *local_err = NULL;
1255

1256 1257 1258
    hpagesize = gethugepagesize(path, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
1259
        goto error;
1260
    }
1261
    block->mr->align = hpagesize;
1262 1263

    if (memory < hpagesize) {
1264 1265 1266 1267
        error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
                   "or larger than huge page size 0x%" PRIx64,
                   memory, hpagesize);
        goto error;
1268 1269 1270
    }

    if (kvm_enabled() && !kvm_has_sync_mmu()) {
1271 1272
        error_setg(errp,
                   "host lacks kvm mmu notifiers, -mem-path unsupported");
1273
        goto error;
1274 1275
    }

1276 1277 1278 1279 1280 1281 1282 1283
    if (!stat(path, &st) && S_ISDIR(st.st_mode)) {
        /* Make name safe to use with mkstemp by replacing '/' with '_'. */
        sanitized_name = g_strdup(memory_region_name(block->mr));
        for (c = sanitized_name; *c != '\0'; c++) {
            if (*c == '/') {
                *c = '_';
            }
        }
1284

1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296
        filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
                                   sanitized_name);
        g_free(sanitized_name);

        fd = mkstemp(filename);
        if (fd >= 0) {
            unlink(filename);
        }
        g_free(filename);
    } else {
        fd = open(path, O_RDWR | O_CREAT, 0644);
    }
1297 1298

    if (fd < 0) {
1299 1300
        error_setg_errno(errp, errno,
                         "unable to create backing store for hugepages");
1301
        goto error;
1302 1303
    }

1304
    memory = ROUND_UP(memory, hpagesize);
1305 1306 1307 1308 1309 1310 1311

    /*
     * ftruncate is not supported by hugetlbfs in older
     * hosts, so don't bother bailing out on errors.
     * If anything goes wrong with it under other filesystems,
     * mmap will fail.
     */
1312
    if (ftruncate(fd, memory)) {
Y
Yoshiaki Tamura 已提交
1313
        perror("ftruncate");
1314
    }
1315

1316
    area = qemu_ram_mmap(fd, memory, hpagesize, block->flags & RAM_SHARED);
1317
    if (area == MAP_FAILED) {
1318 1319
        error_setg_errno(errp, errno,
                         "unable to map backing store for hugepages");
Y
Yoshiaki Tamura 已提交
1320
        close(fd);
1321
        goto error;
1322
    }
1323 1324

    if (mem_prealloc) {
1325
        os_mem_prealloc(fd, area, memory);
1326 1327
    }

A
Alex Williamson 已提交
1328
    block->fd = fd;
1329
    return area;
1330 1331 1332

error:
    return NULL;
1333 1334 1335
}
#endif

M
Mike Day 已提交
1336
/* Called with the ramlist lock held.  */
1337
static ram_addr_t find_ram_offset(ram_addr_t size)
A
Alex Williamson 已提交
1338 1339
{
    RAMBlock *block, *next_block;
A
Alex Williamson 已提交
1340
    ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
A
Alex Williamson 已提交
1341

1342 1343
    assert(size != 0); /* it would hand out same offset multiple times */

M
Mike Day 已提交
1344
    if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
A
Alex Williamson 已提交
1345
        return 0;
M
Mike Day 已提交
1346
    }
A
Alex Williamson 已提交
1347

M
Mike Day 已提交
1348
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1349
        ram_addr_t end, next = RAM_ADDR_MAX;
A
Alex Williamson 已提交
1350

1351
        end = block->offset + block->max_length;
A
Alex Williamson 已提交
1352

M
Mike Day 已提交
1353
        QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
A
Alex Williamson 已提交
1354 1355 1356 1357 1358
            if (next_block->offset >= end) {
                next = MIN(next, next_block->offset);
            }
        }
        if (next - end >= size && next - end < mingap) {
A
Alex Williamson 已提交
1359
            offset = end;
A
Alex Williamson 已提交
1360 1361 1362
            mingap = next - end;
        }
    }
A
Alex Williamson 已提交
1363 1364 1365 1366 1367 1368 1369

    if (offset == RAM_ADDR_MAX) {
        fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
                (uint64_t)size);
        abort();
    }

A
Alex Williamson 已提交
1370 1371 1372
    return offset;
}

J
Juan Quintela 已提交
1373
ram_addr_t last_ram_offset(void)
1374 1375 1376 1377
{
    RAMBlock *block;
    ram_addr_t last = 0;

M
Mike Day 已提交
1378 1379
    rcu_read_lock();
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1380
        last = MAX(last, block->offset + block->max_length);
M
Mike Day 已提交
1381
    }
M
Mike Day 已提交
1382
    rcu_read_unlock();
1383 1384 1385
    return last;
}

1386 1387 1388 1389 1390
static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
{
    int ret;

    /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1391
    if (!machine_dump_guest_core(current_machine)) {
1392 1393 1394 1395 1396 1397 1398 1399 1400
        ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
        if (ret) {
            perror("qemu_madvise");
            fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
                            "but dump_guest_core=off specified\n");
        }
    }
}

M
Mike Day 已提交
1401 1402 1403
/* Called within an RCU critical section, or while the ramlist lock
 * is held.
 */
1404
static RAMBlock *find_ram_block(ram_addr_t addr)
1405
{
1406
    RAMBlock *block;
1407

M
Mike Day 已提交
1408
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1409
        if (block->offset == addr) {
1410
            return block;
1411 1412
        }
    }
1413 1414 1415 1416

    return NULL;
}

D
Dr. David Alan Gilbert 已提交
1417 1418 1419 1420 1421
const char *qemu_ram_get_idstr(RAMBlock *rb)
{
    return rb->idstr;
}

1422
/* Called with iothread lock held.  */
1423 1424
void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
{
1425
    RAMBlock *new_block, *block;
1426

M
Mike Day 已提交
1427
    rcu_read_lock();
1428
    new_block = find_ram_block(addr);
1429 1430
    assert(new_block);
    assert(!new_block->idstr[0]);
1431

1432 1433
    if (dev) {
        char *id = qdev_get_dev_path(dev);
1434 1435
        if (id) {
            snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1436
            g_free(id);
1437 1438 1439 1440
        }
    }
    pstrcat(new_block->idstr, sizeof(new_block->idstr), name);

M
Mike Day 已提交
1441
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1442
        if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1443 1444 1445 1446 1447
            fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
                    new_block->idstr);
            abort();
        }
    }
M
Mike Day 已提交
1448
    rcu_read_unlock();
1449 1450
}

1451
/* Called with iothread lock held.  */
1452 1453
void qemu_ram_unset_idstr(ram_addr_t addr)
{
1454
    RAMBlock *block;
1455

1456 1457 1458 1459 1460
    /* FIXME: arch_init.c assumes that this is not called throughout
     * migration.  Ignore the problem since hot-unplug during migration
     * does not work anyway.
     */

M
Mike Day 已提交
1461
    rcu_read_lock();
1462
    block = find_ram_block(addr);
1463 1464 1465
    if (block) {
        memset(block->idstr, 0, sizeof(block->idstr));
    }
M
Mike Day 已提交
1466
    rcu_read_unlock();
1467 1468
}

1469 1470
static int memory_try_enable_merging(void *addr, size_t len)
{
1471
    if (!machine_mem_merge(current_machine)) {
1472 1473 1474 1475 1476 1477 1478
        /* disabled by the user */
        return 0;
    }

    return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
}

1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491
/* Only legal before guest might have detected the memory size: e.g. on
 * incoming migration, or right after reset.
 *
 * As memory core doesn't know how is memory accessed, it is up to
 * resize callback to update device state and/or add assertions to detect
 * misuse, if necessary.
 */
int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
{
    RAMBlock *block = find_ram_block(base);

    assert(block);

1492
    newsize = HOST_PAGE_ALIGN(newsize);
1493

1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515
    if (block->used_length == newsize) {
        return 0;
    }

    if (!(block->flags & RAM_RESIZEABLE)) {
        error_setg_errno(errp, EINVAL,
                         "Length mismatch: %s: 0x" RAM_ADDR_FMT
                         " in != 0x" RAM_ADDR_FMT, block->idstr,
                         newsize, block->used_length);
        return -EINVAL;
    }

    if (block->max_length < newsize) {
        error_setg_errno(errp, EINVAL,
                         "Length too large: %s: 0x" RAM_ADDR_FMT
                         " > 0x" RAM_ADDR_FMT, block->idstr,
                         newsize, block->max_length);
        return -EINVAL;
    }

    cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
    block->used_length = newsize;
1516 1517
    cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
                                        DIRTY_CLIENTS_ALL);
1518 1519 1520 1521 1522 1523 1524
    memory_region_set_size(block->mr, newsize);
    if (block->resized) {
        block->resized(block->idstr, newsize, block->host);
    }
    return 0;
}

1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565
/* Called with ram_list.mutex held */
static void dirty_memory_extend(ram_addr_t old_ram_size,
                                ram_addr_t new_ram_size)
{
    ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size,
                                             DIRTY_MEMORY_BLOCK_SIZE);
    ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size,
                                             DIRTY_MEMORY_BLOCK_SIZE);
    int i;

    /* Only need to extend if block count increased */
    if (new_num_blocks <= old_num_blocks) {
        return;
    }

    for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
        DirtyMemoryBlocks *old_blocks;
        DirtyMemoryBlocks *new_blocks;
        int j;

        old_blocks = atomic_rcu_read(&ram_list.dirty_memory[i]);
        new_blocks = g_malloc(sizeof(*new_blocks) +
                              sizeof(new_blocks->blocks[0]) * new_num_blocks);

        if (old_num_blocks) {
            memcpy(new_blocks->blocks, old_blocks->blocks,
                   old_num_blocks * sizeof(old_blocks->blocks[0]));
        }

        for (j = old_num_blocks; j < new_num_blocks; j++) {
            new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
        }

        atomic_rcu_set(&ram_list.dirty_memory[i], new_blocks);

        if (old_blocks) {
            g_free_rcu(old_blocks, rcu);
        }
    }
}

1566
static void ram_block_add(RAMBlock *new_block, Error **errp)
1567
{
1568
    RAMBlock *block;
M
Mike Day 已提交
1569
    RAMBlock *last_block = NULL;
1570
    ram_addr_t old_ram_size, new_ram_size;
1571
    Error *err = NULL;
1572 1573

    old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1574

1575
    qemu_mutex_lock_ramlist();
1576
    new_block->offset = find_ram_offset(new_block->max_length);
1577 1578 1579

    if (!new_block->host) {
        if (xen_enabled()) {
1580
            xen_ram_alloc(new_block->offset, new_block->max_length,
1581 1582 1583 1584 1585
                          new_block->mr, &err);
            if (err) {
                error_propagate(errp, err);
                qemu_mutex_unlock_ramlist();
            }
1586
        } else {
1587
            new_block->host = phys_mem_alloc(new_block->max_length,
1588
                                             &new_block->mr->align);
1589
            if (!new_block->host) {
1590 1591 1592 1593
                error_setg_errno(errp, errno,
                                 "cannot set up guest memory '%s'",
                                 memory_region_name(new_block->mr));
                qemu_mutex_unlock_ramlist();
1594
            }
1595
            memory_try_enable_merging(new_block->host, new_block->max_length);
1596
        }
1597
    }
P
pbrook 已提交
1598

L
Li Zhijian 已提交
1599 1600 1601 1602
    new_ram_size = MAX(old_ram_size,
              (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
    if (new_ram_size > old_ram_size) {
        migration_bitmap_extend(old_ram_size, new_ram_size);
1603
        dirty_memory_extend(old_ram_size, new_ram_size);
L
Li Zhijian 已提交
1604
    }
M
Mike Day 已提交
1605 1606 1607 1608
    /* Keep the list sorted from biggest to smallest block.  Unlike QTAILQ,
     * QLIST (which has an RCU-friendly variant) does not have insertion at
     * tail, so save the last element in last_block.
     */
M
Mike Day 已提交
1609
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
M
Mike Day 已提交
1610
        last_block = block;
1611
        if (block->max_length < new_block->max_length) {
1612 1613 1614 1615
            break;
        }
    }
    if (block) {
M
Mike Day 已提交
1616
        QLIST_INSERT_BEFORE_RCU(block, new_block, next);
M
Mike Day 已提交
1617
    } else if (last_block) {
M
Mike Day 已提交
1618
        QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
M
Mike Day 已提交
1619
    } else { /* list is empty */
M
Mike Day 已提交
1620
        QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1621
    }
1622
    ram_list.mru_block = NULL;
P
pbrook 已提交
1623

M
Mike Day 已提交
1624 1625
    /* Write list before version */
    smp_wmb();
U
Umesh Deshpande 已提交
1626
    ram_list.version++;
1627
    qemu_mutex_unlock_ramlist();
U
Umesh Deshpande 已提交
1628

1629
    cpu_physical_memory_set_dirty_range(new_block->offset,
1630 1631
                                        new_block->used_length,
                                        DIRTY_CLIENTS_ALL);
P
pbrook 已提交
1632

1633 1634 1635 1636 1637 1638 1639
    if (new_block->host) {
        qemu_ram_setup_dump(new_block->host, new_block->max_length);
        qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
        qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
        if (kvm_enabled()) {
            kvm_setup_guest_memory(new_block->host, new_block->max_length);
        }
1640
    }
P
pbrook 已提交
1641
}
B
bellard 已提交
1642

1643
#ifdef __linux__
1644 1645 1646
RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
                                   bool share, const char *mem_path,
                                   Error **errp)
1647 1648
{
    RAMBlock *new_block;
1649
    Error *local_err = NULL;
1650 1651

    if (xen_enabled()) {
1652
        error_setg(errp, "-mem-path not supported with Xen");
1653
        return NULL;
1654 1655 1656 1657 1658 1659 1660 1661
    }

    if (phys_mem_alloc != qemu_anon_ram_alloc) {
        /*
         * file_ram_alloc() needs to allocate just like
         * phys_mem_alloc, but we haven't bothered to provide
         * a hook there.
         */
1662 1663
        error_setg(errp,
                   "-mem-path not supported with this accelerator");
1664
        return NULL;
1665 1666
    }

1667
    size = HOST_PAGE_ALIGN(size);
1668 1669
    new_block = g_malloc0(sizeof(*new_block));
    new_block->mr = mr;
1670 1671
    new_block->used_length = size;
    new_block->max_length = size;
1672
    new_block->flags = share ? RAM_SHARED : 0;
1673 1674 1675 1676
    new_block->host = file_ram_alloc(new_block, size,
                                     mem_path, errp);
    if (!new_block->host) {
        g_free(new_block);
1677
        return NULL;
1678 1679
    }

1680
    ram_block_add(new_block, &local_err);
1681 1682 1683
    if (local_err) {
        g_free(new_block);
        error_propagate(errp, local_err);
1684
        return NULL;
1685
    }
1686
    return new_block;
1687
}
1688
#endif
1689

1690
static
1691 1692 1693 1694 1695 1696
RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
                                  void (*resized)(const char*,
                                                  uint64_t length,
                                                  void *host),
                                  void *host, bool resizeable,
                                  MemoryRegion *mr, Error **errp)
1697 1698
{
    RAMBlock *new_block;
1699
    Error *local_err = NULL;
1700

1701 1702
    size = HOST_PAGE_ALIGN(size);
    max_size = HOST_PAGE_ALIGN(max_size);
1703 1704
    new_block = g_malloc0(sizeof(*new_block));
    new_block->mr = mr;
1705
    new_block->resized = resized;
1706 1707
    new_block->used_length = size;
    new_block->max_length = max_size;
1708
    assert(max_size >= size);
1709 1710 1711
    new_block->fd = -1;
    new_block->host = host;
    if (host) {
1712
        new_block->flags |= RAM_PREALLOC;
1713
    }
1714 1715 1716
    if (resizeable) {
        new_block->flags |= RAM_RESIZEABLE;
    }
1717
    ram_block_add(new_block, &local_err);
1718 1719 1720
    if (local_err) {
        g_free(new_block);
        error_propagate(errp, local_err);
1721
        return NULL;
1722
    }
1723
    return new_block;
1724 1725
}

1726
RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1727 1728 1729 1730 1731
                                   MemoryRegion *mr, Error **errp)
{
    return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
}

1732
RAMBlock *qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1733
{
1734 1735 1736
    return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
}

1737
RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1738 1739 1740 1741 1742 1743
                                     void (*resized)(const char*,
                                                     uint64_t length,
                                                     void *host),
                                     MemoryRegion *mr, Error **errp)
{
    return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1744 1745
}

P
Paolo Bonzini 已提交
1746 1747 1748 1749 1750 1751 1752 1753
static void reclaim_ramblock(RAMBlock *block)
{
    if (block->flags & RAM_PREALLOC) {
        ;
    } else if (xen_enabled()) {
        xen_invalidate_map_cache_entry(block->host);
#ifndef _WIN32
    } else if (block->fd >= 0) {
1754
        qemu_ram_munmap(block->host, block->max_length);
P
Paolo Bonzini 已提交
1755 1756 1757 1758 1759 1760 1761 1762
        close(block->fd);
#endif
    } else {
        qemu_anon_ram_free(block->host, block->max_length);
    }
    g_free(block);
}

1763
void qemu_ram_free(RAMBlock *block)
B
bellard 已提交
1764
{
1765
    qemu_mutex_lock_ramlist();
1766 1767 1768 1769 1770 1771
    QLIST_REMOVE_RCU(block, next);
    ram_list.mru_block = NULL;
    /* Write list before version */
    smp_wmb();
    ram_list.version++;
    call_rcu(block, reclaim_ramblock, rcu);
1772
    qemu_mutex_unlock_ramlist();
B
bellard 已提交
1773 1774
}

H
Huang Ying 已提交
1775 1776 1777 1778 1779 1780 1781 1782
#ifndef _WIN32
void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
{
    RAMBlock *block;
    ram_addr_t offset;
    int flags;
    void *area, *vaddr;

M
Mike Day 已提交
1783
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
H
Huang Ying 已提交
1784
        offset = addr - block->offset;
1785
        if (offset < block->max_length) {
1786
            vaddr = ramblock_ptr(block, offset);
1787
            if (block->flags & RAM_PREALLOC) {
H
Huang Ying 已提交
1788
                ;
1789 1790
            } else if (xen_enabled()) {
                abort();
H
Huang Ying 已提交
1791 1792
            } else {
                flags = MAP_FIXED;
1793
                if (block->fd >= 0) {
1794 1795
                    flags |= (block->flags & RAM_SHARED ?
                              MAP_SHARED : MAP_PRIVATE);
1796 1797
                    area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
                                flags, block->fd, offset);
H
Huang Ying 已提交
1798
                } else {
1799 1800 1801 1802 1803 1804 1805
                    /*
                     * Remap needs to match alloc.  Accelerators that
                     * set phys_mem_alloc never remap.  If they did,
                     * we'd need a remap hook here.
                     */
                    assert(phys_mem_alloc == qemu_anon_ram_alloc);

H
Huang Ying 已提交
1806 1807 1808 1809 1810
                    flags |= MAP_PRIVATE | MAP_ANONYMOUS;
                    area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
                                flags, -1, 0);
                }
                if (area != vaddr) {
1811 1812
                    fprintf(stderr, "Could not remap addr: "
                            RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
H
Huang Ying 已提交
1813 1814 1815
                            length, addr);
                    exit(1);
                }
1816
                memory_try_enable_merging(vaddr, length);
1817
                qemu_ram_setup_dump(vaddr, length);
H
Huang Ying 已提交
1818 1819 1820 1821 1822 1823
            }
        }
    }
}
#endif /* !_WIN32 */

1824 1825
int qemu_get_ram_fd(ram_addr_t addr)
{
1826 1827
    RAMBlock *block;
    int fd;
1828

M
Mike Day 已提交
1829
    rcu_read_lock();
1830 1831
    block = qemu_get_ram_block(addr);
    fd = block->fd;
M
Mike Day 已提交
1832
    rcu_read_unlock();
1833
    return fd;
1834 1835
}

1836 1837 1838 1839 1840 1841 1842 1843 1844 1845
void qemu_set_ram_fd(ram_addr_t addr, int fd)
{
    RAMBlock *block;

    rcu_read_lock();
    block = qemu_get_ram_block(addr);
    block->fd = fd;
    rcu_read_unlock();
}

1846 1847
void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
{
1848 1849
    RAMBlock *block;
    void *ptr;
1850

M
Mike Day 已提交
1851
    rcu_read_lock();
1852 1853
    block = qemu_get_ram_block(addr);
    ptr = ramblock_ptr(block, 0);
M
Mike Day 已提交
1854
    rcu_read_unlock();
1855
    return ptr;
1856 1857
}

1858
/* Return a host pointer to ram allocated with qemu_ram_alloc.
1859 1860 1861
 * This should not be used for general purpose DMA.  Use address_space_map
 * or address_space_rw instead. For local memory (e.g. video ram) that the
 * device owns, use memory_region_get_ram_ptr.
M
Mike Day 已提交
1862
 *
1863
 * Called within RCU critical section.
1864
 */
1865
void *qemu_get_ram_ptr(RAMBlock *ram_block, ram_addr_t addr)
1866
{
1867 1868 1869 1870 1871
    RAMBlock *block = ram_block;

    if (block == NULL) {
        block = qemu_get_ram_block(addr);
    }
1872 1873

    if (xen_enabled() && block->host == NULL) {
1874 1875 1876 1877 1878
        /* We need to check if the requested address is in the RAM
         * because we don't want to map the entire memory in QEMU.
         * In that case just map until the end of the page.
         */
        if (block->offset == 0) {
1879
            return xen_map_cache(addr, 0, 0);
1880
        }
1881 1882

        block->host = xen_map_cache(block->offset, block->max_length, 1);
1883
    }
1884
    return ramblock_ptr(block, addr - block->offset);
1885 1886
}

1887
/* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1888
 * but takes a size argument.
M
Mike Day 已提交
1889
 *
1890
 * Called within RCU critical section.
1891
 */
1892 1893
static void *qemu_ram_ptr_length(RAMBlock *ram_block, ram_addr_t addr,
                                 hwaddr *size)
1894
{
1895
    RAMBlock *block = ram_block;
1896
    ram_addr_t offset_inside_block;
1897 1898 1899
    if (*size == 0) {
        return NULL;
    }
1900

1901 1902 1903
    if (block == NULL) {
        block = qemu_get_ram_block(addr);
    }
1904 1905 1906 1907 1908 1909 1910 1911 1912 1913
    offset_inside_block = addr - block->offset;
    *size = MIN(*size, block->max_length - offset_inside_block);

    if (xen_enabled() && block->host == NULL) {
        /* We need to check if the requested address is in the RAM
         * because we don't want to map the entire memory in QEMU.
         * In that case just map the requested area.
         */
        if (block->offset == 0) {
            return xen_map_cache(addr, *size, 1);
1914 1915
        }

1916
        block->host = xen_map_cache(block->offset, block->max_length, 1);
1917
    }
1918 1919

    return ramblock_ptr(block, offset_inside_block);
1920 1921
}

D
Dr. David Alan Gilbert 已提交
1922 1923 1924 1925 1926 1927 1928 1929 1930 1931
/*
 * Translates a host ptr back to a RAMBlock, a ram_addr and an offset
 * in that RAMBlock.
 *
 * ptr: Host pointer to look up
 * round_offset: If true round the result offset down to a page boundary
 * *ram_addr: set to result ram_addr
 * *offset: set to result offset within the RAMBlock
 *
 * Returns: RAMBlock (or NULL if not found)
1932 1933 1934 1935 1936 1937 1938
 *
 * By the time this function returns, the returned pointer is not protected
 * by RCU anymore.  If the caller is not within an RCU critical section and
 * does not hold the iothread lock, it must have other means of protecting the
 * pointer, such as a reference to the region that includes the incoming
 * ram_addr_t.
 */
D
Dr. David Alan Gilbert 已提交
1939 1940 1941
RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
                                   ram_addr_t *ram_addr,
                                   ram_addr_t *offset)
P
pbrook 已提交
1942
{
P
pbrook 已提交
1943 1944 1945
    RAMBlock *block;
    uint8_t *host = ptr;

1946
    if (xen_enabled()) {
M
Mike Day 已提交
1947
        rcu_read_lock();
J
Jan Kiszka 已提交
1948
        *ram_addr = xen_ram_addr_from_mapcache(ptr);
D
Dr. David Alan Gilbert 已提交
1949 1950 1951 1952
        block = qemu_get_ram_block(*ram_addr);
        if (block) {
            *offset = (host - block->host);
        }
M
Mike Day 已提交
1953
        rcu_read_unlock();
D
Dr. David Alan Gilbert 已提交
1954
        return block;
1955 1956
    }

M
Mike Day 已提交
1957 1958
    rcu_read_lock();
    block = atomic_rcu_read(&ram_list.mru_block);
1959
    if (block && block->host && host - block->host < block->max_length) {
1960 1961 1962
        goto found;
    }

M
Mike Day 已提交
1963
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
J
Jun Nakajima 已提交
1964 1965 1966 1967
        /* This case append when the block is not mapped. */
        if (block->host == NULL) {
            continue;
        }
1968
        if (host - block->host < block->max_length) {
1969
            goto found;
A
Alex Williamson 已提交
1970
        }
P
pbrook 已提交
1971
    }
J
Jun Nakajima 已提交
1972

M
Mike Day 已提交
1973
    rcu_read_unlock();
1974
    return NULL;
1975 1976

found:
D
Dr. David Alan Gilbert 已提交
1977 1978 1979 1980 1981
    *offset = (host - block->host);
    if (round_offset) {
        *offset &= TARGET_PAGE_MASK;
    }
    *ram_addr = block->offset + *offset;
M
Mike Day 已提交
1982
    rcu_read_unlock();
D
Dr. David Alan Gilbert 已提交
1983 1984 1985
    return block;
}

D
Dr. David Alan Gilbert 已提交
1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005
/*
 * Finds the named RAMBlock
 *
 * name: The name of RAMBlock to find
 *
 * Returns: RAMBlock (or NULL if not found)
 */
RAMBlock *qemu_ram_block_by_name(const char *name)
{
    RAMBlock *block;

    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
        if (!strcmp(name, block->idstr)) {
            return block;
        }
    }

    return NULL;
}

D
Dr. David Alan Gilbert 已提交
2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019
/* Some of the softmmu routines need to translate from a host pointer
   (typically a TLB entry) back to a ram offset.  */
MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
{
    RAMBlock *block;
    ram_addr_t offset; /* Not used */

    block = qemu_ram_block_from_host(ptr, false, ram_addr, &offset);

    if (!block) {
        return NULL;
    }

    return block->mr;
M
Marcelo Tosatti 已提交
2020
}
A
Alex Williamson 已提交
2021

2022
/* Called within RCU critical section.  */
A
Avi Kivity 已提交
2023
static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
2024
                               uint64_t val, unsigned size)
2025
{
2026
    if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
2027
        tb_invalidate_phys_page_fast(ram_addr, size);
2028
    }
2029 2030
    switch (size) {
    case 1:
2031
        stb_p(qemu_get_ram_ptr(NULL, ram_addr), val);
2032 2033
        break;
    case 2:
2034
        stw_p(qemu_get_ram_ptr(NULL, ram_addr), val);
2035 2036
        break;
    case 4:
2037
        stl_p(qemu_get_ram_ptr(NULL, ram_addr), val);
2038 2039 2040
        break;
    default:
        abort();
2041
    }
2042 2043 2044 2045 2046
    /* Set both VGA and migration bits for simplicity and to remove
     * the notdirty callback faster.
     */
    cpu_physical_memory_set_dirty_range(ram_addr, size,
                                        DIRTY_CLIENTS_NOCODE);
B
bellard 已提交
2047 2048
    /* we remove the notdirty callback only if the code has been
       flushed */
2049
    if (!cpu_physical_memory_is_clean(ram_addr)) {
2050
        tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
2051
    }
2052 2053
}

2054 2055 2056 2057 2058 2059
static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
                                 unsigned size, bool is_write)
{
    return is_write;
}

2060 2061
static const MemoryRegionOps notdirty_mem_ops = {
    .write = notdirty_mem_write,
2062
    .valid.accepts = notdirty_mem_accepts,
2063
    .endianness = DEVICE_NATIVE_ENDIAN,
2064 2065
};

P
pbrook 已提交
2066
/* Generate a debug exception if a watchpoint has been hit.  */
2067
static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
P
pbrook 已提交
2068
{
2069
    CPUState *cpu = current_cpu;
2070
    CPUClass *cc = CPU_GET_CLASS(cpu);
2071
    CPUArchState *env = cpu->env_ptr;
2072
    target_ulong pc, cs_base;
P
pbrook 已提交
2073
    target_ulong vaddr;
2074
    CPUWatchpoint *wp;
2075
    int cpu_flags;
P
pbrook 已提交
2076

2077
    if (cpu->watchpoint_hit) {
2078 2079 2080
        /* We re-entered the check after replacing the TB. Now raise
         * the debug interrupt so that is will trigger after the
         * current instruction. */
2081
        cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
2082 2083
        return;
    }
2084
    vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2085
    QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
2086 2087
        if (cpu_watchpoint_address_matches(wp, vaddr, len)
            && (wp->flags & flags)) {
2088 2089 2090 2091 2092 2093
            if (flags == BP_MEM_READ) {
                wp->flags |= BP_WATCHPOINT_HIT_READ;
            } else {
                wp->flags |= BP_WATCHPOINT_HIT_WRITE;
            }
            wp->hitaddr = vaddr;
2094
            wp->hitattrs = attrs;
2095
            if (!cpu->watchpoint_hit) {
2096 2097 2098 2099 2100
                if (wp->flags & BP_CPU &&
                    !cc->debug_check_watchpoint(cpu, wp)) {
                    wp->flags &= ~BP_WATCHPOINT_HIT;
                    continue;
                }
2101
                cpu->watchpoint_hit = wp;
2102
                tb_check_watchpoint(cpu);
2103
                if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2104
                    cpu->exception_index = EXCP_DEBUG;
2105
                    cpu_loop_exit(cpu);
2106 2107
                } else {
                    cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2108
                    tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
2109
                    cpu_resume_from_signal(cpu, NULL);
2110
                }
2111
            }
2112 2113
        } else {
            wp->flags &= ~BP_WATCHPOINT_HIT;
P
pbrook 已提交
2114 2115 2116 2117
        }
    }
}

2118 2119 2120
/* Watchpoint access routines.  Watchpoints are inserted using TLB tricks,
   so these check for a hit then pass through to the normal out-of-line
   phys routines.  */
2121 2122
static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
                                  unsigned size, MemTxAttrs attrs)
2123
{
2124 2125
    MemTxResult res;
    uint64_t data;
2126 2127
    int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
    AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2128 2129

    check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
2130
    switch (size) {
2131
    case 1:
2132
        data = address_space_ldub(as, addr, attrs, &res);
2133 2134
        break;
    case 2:
2135
        data = address_space_lduw(as, addr, attrs, &res);
2136 2137
        break;
    case 4:
2138
        data = address_space_ldl(as, addr, attrs, &res);
2139
        break;
2140 2141
    default: abort();
    }
2142 2143
    *pdata = data;
    return res;
2144 2145
}

2146 2147 2148
static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
                                   uint64_t val, unsigned size,
                                   MemTxAttrs attrs)
2149
{
2150
    MemTxResult res;
2151 2152
    int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
    AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2153 2154

    check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
2155
    switch (size) {
2156
    case 1:
2157
        address_space_stb(as, addr, val, attrs, &res);
2158 2159
        break;
    case 2:
2160
        address_space_stw(as, addr, val, attrs, &res);
2161 2162
        break;
    case 4:
2163
        address_space_stl(as, addr, val, attrs, &res);
2164
        break;
2165 2166
    default: abort();
    }
2167
    return res;
2168 2169
}

2170
static const MemoryRegionOps watch_mem_ops = {
2171 2172
    .read_with_attrs = watch_mem_read,
    .write_with_attrs = watch_mem_write,
2173
    .endianness = DEVICE_NATIVE_ENDIAN,
2174 2175
};

2176 2177
static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
                                unsigned len, MemTxAttrs attrs)
2178
{
2179
    subpage_t *subpage = opaque;
2180
    uint8_t buf[8];
2181
    MemTxResult res;
2182

2183
#if defined(DEBUG_SUBPAGE)
A
Amos Kong 已提交
2184
    printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2185
           subpage, len, addr);
2186
#endif
2187 2188 2189 2190
    res = address_space_read(subpage->as, addr + subpage->base,
                             attrs, buf, len);
    if (res) {
        return res;
2191
    }
2192 2193
    switch (len) {
    case 1:
2194 2195
        *data = ldub_p(buf);
        return MEMTX_OK;
2196
    case 2:
2197 2198
        *data = lduw_p(buf);
        return MEMTX_OK;
2199
    case 4:
2200 2201
        *data = ldl_p(buf);
        return MEMTX_OK;
2202
    case 8:
2203 2204
        *data = ldq_p(buf);
        return MEMTX_OK;
2205 2206 2207
    default:
        abort();
    }
2208 2209
}

2210 2211
static MemTxResult subpage_write(void *opaque, hwaddr addr,
                                 uint64_t value, unsigned len, MemTxAttrs attrs)
2212
{
2213
    subpage_t *subpage = opaque;
2214
    uint8_t buf[8];
2215

2216
#if defined(DEBUG_SUBPAGE)
A
Amos Kong 已提交
2217
    printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2218 2219
           " value %"PRIx64"\n",
           __func__, subpage, len, addr, value);
2220
#endif
2221 2222 2223 2224 2225 2226 2227 2228 2229 2230
    switch (len) {
    case 1:
        stb_p(buf, value);
        break;
    case 2:
        stw_p(buf, value);
        break;
    case 4:
        stl_p(buf, value);
        break;
2231 2232 2233
    case 8:
        stq_p(buf, value);
        break;
2234 2235 2236
    default:
        abort();
    }
2237 2238
    return address_space_write(subpage->as, addr + subpage->base,
                               attrs, buf, len);
2239 2240
}

2241
static bool subpage_accepts(void *opaque, hwaddr addr,
A
Amos Kong 已提交
2242
                            unsigned len, bool is_write)
2243
{
2244
    subpage_t *subpage = opaque;
2245
#if defined(DEBUG_SUBPAGE)
A
Amos Kong 已提交
2246
    printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2247
           __func__, subpage, is_write ? 'w' : 'r', len, addr);
2248 2249
#endif

2250
    return address_space_access_valid(subpage->as, addr + subpage->base,
A
Amos Kong 已提交
2251
                                      len, is_write);
2252 2253
}

2254
static const MemoryRegionOps subpage_ops = {
2255 2256
    .read_with_attrs = subpage_read,
    .write_with_attrs = subpage_write,
2257 2258 2259 2260
    .impl.min_access_size = 1,
    .impl.max_access_size = 8,
    .valid.min_access_size = 1,
    .valid.max_access_size = 8,
2261
    .valid.accepts = subpage_accepts,
2262
    .endianness = DEVICE_NATIVE_ENDIAN,
2263 2264
};

A
Anthony Liguori 已提交
2265
static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2266
                             uint16_t section)
2267 2268 2269 2270 2271 2272 2273 2274
{
    int idx, eidx;

    if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
        return -1;
    idx = SUBPAGE_IDX(start);
    eidx = SUBPAGE_IDX(end);
#if defined(DEBUG_SUBPAGE)
A
Amos Kong 已提交
2275 2276
    printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
           __func__, mmio, start, end, idx, eidx, section);
2277 2278
#endif
    for (; idx <= eidx; idx++) {
2279
        mmio->sub_section[idx] = section;
2280 2281 2282 2283 2284
    }

    return 0;
}

2285
static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2286
{
A
Anthony Liguori 已提交
2287
    subpage_t *mmio;
2288

2289
    mmio = g_malloc0(sizeof(subpage_t));
2290

2291
    mmio->as = as;
2292
    mmio->base = base;
2293
    memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
P
Peter Crosthwaite 已提交
2294
                          NULL, TARGET_PAGE_SIZE);
A
Avi Kivity 已提交
2295
    mmio->iomem.subpage = true;
2296
#if defined(DEBUG_SUBPAGE)
A
Amos Kong 已提交
2297 2298
    printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
           mmio, base, TARGET_PAGE_SIZE);
2299
#endif
2300
    subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2301 2302 2303 2304

    return mmio;
}

2305 2306
static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
                              MemoryRegion *mr)
2307
{
2308
    assert(as);
2309
    MemoryRegionSection section = {
2310
        .address_space = as,
2311 2312 2313
        .mr = mr,
        .offset_within_address_space = 0,
        .offset_within_region = 0,
2314
        .size = int128_2_64(),
2315 2316
    };

2317
    return phys_section_add(map, &section);
2318 2319
}

2320
MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index, MemTxAttrs attrs)
2321
{
2322 2323
    int asidx = cpu_asidx_from_attrs(cpu, attrs);
    CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx];
2324
    AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
2325
    MemoryRegionSection *sections = d->map.sections;
P
Paolo Bonzini 已提交
2326 2327

    return sections[index & ~TARGET_PAGE_MASK].mr;
2328 2329
}

A
Avi Kivity 已提交
2330 2331
static void io_mem_init(void)
{
2332
    memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2333
    memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2334
                          NULL, UINT64_MAX);
2335
    memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2336
                          NULL, UINT64_MAX);
2337
    memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2338
                          NULL, UINT64_MAX);
A
Avi Kivity 已提交
2339 2340
}

A
Avi Kivity 已提交
2341
static void mem_begin(MemoryListener *listener)
2342 2343
{
    AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2344 2345 2346
    AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
    uint16_t n;

2347
    n = dummy_section(&d->map, as, &io_mem_unassigned);
2348
    assert(n == PHYS_SECTION_UNASSIGNED);
2349
    n = dummy_section(&d->map, as, &io_mem_notdirty);
2350
    assert(n == PHYS_SECTION_NOTDIRTY);
2351
    n = dummy_section(&d->map, as, &io_mem_rom);
2352
    assert(n == PHYS_SECTION_ROM);
2353
    n = dummy_section(&d->map, as, &io_mem_watch);
2354
    assert(n == PHYS_SECTION_WATCH);
2355

M
Michael S. Tsirkin 已提交
2356
    d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2357 2358 2359 2360
    d->as = as;
    as->next_dispatch = d;
}

2361 2362 2363 2364 2365 2366
static void address_space_dispatch_free(AddressSpaceDispatch *d)
{
    phys_sections_free(&d->map);
    g_free(d);
}

2367
static void mem_commit(MemoryListener *listener)
A
Avi Kivity 已提交
2368
{
2369
    AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2370 2371 2372
    AddressSpaceDispatch *cur = as->dispatch;
    AddressSpaceDispatch *next = as->next_dispatch;

2373
    phys_page_compact_all(next, next->map.nodes_nb);
2374

2375
    atomic_rcu_set(&as->dispatch, next);
2376
    if (cur) {
2377
        call_rcu(cur, address_space_dispatch_free, rcu);
2378
    }
2379 2380
}

2381
static void tcg_commit(MemoryListener *listener)
2382
{
2383 2384
    CPUAddressSpace *cpuas;
    AddressSpaceDispatch *d;
2385 2386 2387

    /* since each CPU stores ram addresses in its TLB cache, we must
       reset the modified entries */
2388 2389 2390 2391 2392 2393 2394 2395 2396
    cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
    cpu_reloading_memory_map();
    /* The CPU and TLB are protected by the iothread lock.
     * We reload the dispatch pointer now because cpu_reloading_memory_map()
     * may have split the RCU critical section.
     */
    d = atomic_rcu_read(&cpuas->as->dispatch);
    cpuas->memory_dispatch = d;
    tlb_flush(cpuas->cpu, 1);
2397 2398
}

A
Avi Kivity 已提交
2399 2400
void address_space_init_dispatch(AddressSpace *as)
{
2401
    as->dispatch = NULL;
2402
    as->dispatch_listener = (MemoryListener) {
A
Avi Kivity 已提交
2403
        .begin = mem_begin,
2404
        .commit = mem_commit,
A
Avi Kivity 已提交
2405 2406 2407 2408
        .region_add = mem_add,
        .region_nop = mem_add,
        .priority = 0,
    };
2409
    memory_listener_register(&as->dispatch_listener, as);
A
Avi Kivity 已提交
2410 2411
}

2412 2413 2414 2415 2416
void address_space_unregister(AddressSpace *as)
{
    memory_listener_unregister(&as->dispatch_listener);
}

A
Avi Kivity 已提交
2417 2418 2419 2420
void address_space_destroy_dispatch(AddressSpace *as)
{
    AddressSpaceDispatch *d = as->dispatch;

2421 2422 2423 2424
    atomic_rcu_set(&as->dispatch, NULL);
    if (d) {
        call_rcu(d, address_space_dispatch_free, rcu);
    }
A
Avi Kivity 已提交
2425 2426
}

A
Avi Kivity 已提交
2427 2428
static void memory_map_init(void)
{
2429
    system_memory = g_malloc(sizeof(*system_memory));
2430

2431
    memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2432
    address_space_init(&address_space_memory, system_memory, "memory");
2433

2434
    system_io = g_malloc(sizeof(*system_io));
2435 2436
    memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
                          65536);
2437
    address_space_init(&address_space_io, system_io, "I/O");
A
Avi Kivity 已提交
2438 2439 2440 2441 2442 2443 2444
}

MemoryRegion *get_system_memory(void)
{
    return system_memory;
}

2445 2446 2447 2448 2449
MemoryRegion *get_system_io(void)
{
    return system_io;
}

2450 2451
#endif /* !defined(CONFIG_USER_ONLY) */

B
bellard 已提交
2452 2453
/* physical memory access (slow version, mainly for debug) */
#if defined(CONFIG_USER_ONLY)
2454
int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
P
Paul Brook 已提交
2455
                        uint8_t *buf, int len, int is_write)
B
bellard 已提交
2456 2457 2458
{
    int l, flags;
    target_ulong page;
2459
    void * p;
B
bellard 已提交
2460 2461 2462 2463 2464 2465 2466 2467

    while (len > 0) {
        page = addr & TARGET_PAGE_MASK;
        l = (page + TARGET_PAGE_SIZE) - addr;
        if (l > len)
            l = len;
        flags = page_get_flags(page);
        if (!(flags & PAGE_VALID))
P
Paul Brook 已提交
2468
            return -1;
B
bellard 已提交
2469 2470
        if (is_write) {
            if (!(flags & PAGE_WRITE))
P
Paul Brook 已提交
2471
                return -1;
2472
            /* XXX: this code should not depend on lock_user */
A
aurel32 已提交
2473
            if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
P
Paul Brook 已提交
2474
                return -1;
A
aurel32 已提交
2475 2476
            memcpy(p, buf, l);
            unlock_user(p, addr, l);
B
bellard 已提交
2477 2478
        } else {
            if (!(flags & PAGE_READ))
P
Paul Brook 已提交
2479
                return -1;
2480
            /* XXX: this code should not depend on lock_user */
A
aurel32 已提交
2481
            if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
P
Paul Brook 已提交
2482
                return -1;
A
aurel32 已提交
2483
            memcpy(buf, p, l);
A
aurel32 已提交
2484
            unlock_user(p, addr, 0);
B
bellard 已提交
2485 2486 2487 2488 2489
        }
        len -= l;
        buf += l;
        addr += l;
    }
P
Paul Brook 已提交
2490
    return 0;
B
bellard 已提交
2491
}
B
bellard 已提交
2492

B
bellard 已提交
2493
#else
2494

2495
static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
A
Avi Kivity 已提交
2496
                                     hwaddr length)
2497
{
2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509
    uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
    /* No early return if dirty_log_mask is or becomes 0, because
     * cpu_physical_memory_set_dirty_range will still call
     * xen_modified_memory.
     */
    if (dirty_log_mask) {
        dirty_log_mask =
            cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
    }
    if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
        tb_invalidate_phys_range(addr, addr + length);
        dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2510
    }
2511
    cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2512 2513
}

2514
static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2515
{
2516
    unsigned access_size_max = mr->ops->valid.max_access_size;
2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529

    /* Regions are assumed to support 1-4 byte accesses unless
       otherwise specified.  */
    if (access_size_max == 0) {
        access_size_max = 4;
    }

    /* Bound the maximum access by the alignment of the address.  */
    if (!mr->ops->impl.unaligned) {
        unsigned align_size_max = addr & -addr;
        if (align_size_max != 0 && align_size_max < access_size_max) {
            access_size_max = align_size_max;
        }
2530
    }
2531 2532 2533 2534

    /* Don't attempt accesses larger than the maximum.  */
    if (l > access_size_max) {
        l = access_size_max;
2535
    }
2536
    l = pow2floor(l);
2537 2538

    return l;
2539 2540
}

2541
static bool prepare_mmio_access(MemoryRegion *mr)
2542
{
2543 2544 2545 2546 2547 2548 2549 2550
    bool unlocked = !qemu_mutex_iothread_locked();
    bool release_lock = false;

    if (unlocked && mr->global_locking) {
        qemu_mutex_lock_iothread();
        unlocked = false;
        release_lock = true;
    }
2551
    if (mr->flush_coalesced_mmio) {
2552 2553 2554
        if (unlocked) {
            qemu_mutex_lock_iothread();
        }
2555
        qemu_flush_coalesced_mmio_buffer();
2556 2557 2558
        if (unlocked) {
            qemu_mutex_unlock_iothread();
        }
2559
    }
2560 2561

    return release_lock;
2562 2563
}

2564 2565 2566 2567 2568 2569
/* Called within RCU critical section.  */
static MemTxResult address_space_write_continue(AddressSpace *as, hwaddr addr,
                                                MemTxAttrs attrs,
                                                const uint8_t *buf,
                                                int len, hwaddr addr1,
                                                hwaddr l, MemoryRegion *mr)
B
bellard 已提交
2570 2571
{
    uint8_t *ptr;
2572
    uint64_t val;
2573
    MemTxResult result = MEMTX_OK;
2574
    bool release_lock = false;
2575

2576
    for (;;) {
2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608
        if (!memory_access_is_direct(mr, true)) {
            release_lock |= prepare_mmio_access(mr);
            l = memory_access_size(mr, l, addr1);
            /* XXX: could force current_cpu to NULL to avoid
               potential bugs */
            switch (l) {
            case 8:
                /* 64 bit write access */
                val = ldq_p(buf);
                result |= memory_region_dispatch_write(mr, addr1, val, 8,
                                                       attrs);
                break;
            case 4:
                /* 32 bit write access */
                val = ldl_p(buf);
                result |= memory_region_dispatch_write(mr, addr1, val, 4,
                                                       attrs);
                break;
            case 2:
                /* 16 bit write access */
                val = lduw_p(buf);
                result |= memory_region_dispatch_write(mr, addr1, val, 2,
                                                       attrs);
                break;
            case 1:
                /* 8 bit write access */
                val = ldub_p(buf);
                result |= memory_region_dispatch_write(mr, addr1, val, 1,
                                                       attrs);
                break;
            default:
                abort();
B
bellard 已提交
2609 2610
            }
        } else {
2611 2612
            addr1 += memory_region_get_ram_addr(mr);
            /* RAM case */
2613
            ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
2614 2615
            memcpy(ptr, buf, l);
            invalidate_and_set_dirty(mr, addr1, l);
B
bellard 已提交
2616
        }
2617 2618 2619 2620 2621 2622

        if (release_lock) {
            qemu_mutex_unlock_iothread();
            release_lock = false;
        }

B
bellard 已提交
2623 2624 2625
        len -= l;
        buf += l;
        addr += l;
2626 2627 2628 2629 2630 2631 2632

        if (!len) {
            break;
        }

        l = len;
        mr = address_space_translate(as, addr, &addr1, &l, true);
B
bellard 已提交
2633
    }
2634

2635
    return result;
B
bellard 已提交
2636
}
B
bellard 已提交
2637

2638 2639
MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
                                const uint8_t *buf, int len)
A
Avi Kivity 已提交
2640
{
2641 2642 2643 2644 2645
    hwaddr l;
    hwaddr addr1;
    MemoryRegion *mr;
    MemTxResult result = MEMTX_OK;

2646 2647
    if (len > 0) {
        rcu_read_lock();
2648
        l = len;
2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667
        mr = address_space_translate(as, addr, &addr1, &l, true);
        result = address_space_write_continue(as, addr, attrs, buf, len,
                                              addr1, l, mr);
        rcu_read_unlock();
    }

    return result;
}

/* Called within RCU critical section.  */
MemTxResult address_space_read_continue(AddressSpace *as, hwaddr addr,
                                        MemTxAttrs attrs, uint8_t *buf,
                                        int len, hwaddr addr1, hwaddr l,
                                        MemoryRegion *mr)
{
    uint8_t *ptr;
    uint64_t val;
    MemTxResult result = MEMTX_OK;
    bool release_lock = false;
2668

2669
    for (;;) {
2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703
        if (!memory_access_is_direct(mr, false)) {
            /* I/O case */
            release_lock |= prepare_mmio_access(mr);
            l = memory_access_size(mr, l, addr1);
            switch (l) {
            case 8:
                /* 64 bit read access */
                result |= memory_region_dispatch_read(mr, addr1, &val, 8,
                                                      attrs);
                stq_p(buf, val);
                break;
            case 4:
                /* 32 bit read access */
                result |= memory_region_dispatch_read(mr, addr1, &val, 4,
                                                      attrs);
                stl_p(buf, val);
                break;
            case 2:
                /* 16 bit read access */
                result |= memory_region_dispatch_read(mr, addr1, &val, 2,
                                                      attrs);
                stw_p(buf, val);
                break;
            case 1:
                /* 8 bit read access */
                result |= memory_region_dispatch_read(mr, addr1, &val, 1,
                                                      attrs);
                stb_p(buf, val);
                break;
            default:
                abort();
            }
        } else {
            /* RAM case */
F
Fam Zheng 已提交
2704 2705
            ptr = qemu_get_ram_ptr(mr->ram_block,
                                   memory_region_get_ram_addr(mr) + addr1);
2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716
            memcpy(buf, ptr, l);
        }

        if (release_lock) {
            qemu_mutex_unlock_iothread();
            release_lock = false;
        }

        len -= l;
        buf += l;
        addr += l;
2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728

        if (!len) {
            break;
        }

        l = len;
        mr = address_space_translate(as, addr, &addr1, &l, false);
    }

    return result;
}

2729 2730
MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
                                    MemTxAttrs attrs, uint8_t *buf, int len)
2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743
{
    hwaddr l;
    hwaddr addr1;
    MemoryRegion *mr;
    MemTxResult result = MEMTX_OK;

    if (len > 0) {
        rcu_read_lock();
        l = len;
        mr = address_space_translate(as, addr, &addr1, &l, false);
        result = address_space_read_continue(as, addr, attrs, buf, len,
                                             addr1, l, mr);
        rcu_read_unlock();
2744 2745 2746
    }

    return result;
A
Avi Kivity 已提交
2747 2748
}

2749 2750 2751 2752 2753 2754 2755 2756 2757
MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
                             uint8_t *buf, int len, bool is_write)
{
    if (is_write) {
        return address_space_write(as, addr, attrs, (uint8_t *)buf, len);
    } else {
        return address_space_read(as, addr, attrs, (uint8_t *)buf, len);
    }
}
A
Avi Kivity 已提交
2758

A
Avi Kivity 已提交
2759
void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
A
Avi Kivity 已提交
2760 2761
                            int len, int is_write)
{
2762 2763
    address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
                     buf, len, is_write);
A
Avi Kivity 已提交
2764 2765
}

2766 2767 2768 2769 2770
enum write_rom_type {
    WRITE_DATA,
    FLUSH_CACHE,
};

2771
static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2772
    hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
B
bellard 已提交
2773
{
2774
    hwaddr l;
B
bellard 已提交
2775
    uint8_t *ptr;
2776
    hwaddr addr1;
2777
    MemoryRegion *mr;
2778

2779
    rcu_read_lock();
B
bellard 已提交
2780
    while (len > 0) {
2781
        l = len;
2782
        mr = address_space_translate(as, addr, &addr1, &l, true);
2783

2784 2785
        if (!(memory_region_is_ram(mr) ||
              memory_region_is_romd(mr))) {
2786
            l = memory_access_size(mr, l, addr1);
B
bellard 已提交
2787
        } else {
2788
            addr1 += memory_region_get_ram_addr(mr);
B
bellard 已提交
2789
            /* ROM/RAM case */
2790
            ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
2791 2792 2793
            switch (type) {
            case WRITE_DATA:
                memcpy(ptr, buf, l);
2794
                invalidate_and_set_dirty(mr, addr1, l);
2795 2796 2797 2798 2799
                break;
            case FLUSH_CACHE:
                flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
                break;
            }
B
bellard 已提交
2800 2801 2802 2803 2804
        }
        len -= l;
        buf += l;
        addr += l;
    }
2805
    rcu_read_unlock();
B
bellard 已提交
2806 2807
}

2808
/* used for ROM loading : can write in RAM and ROM */
2809
void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2810 2811
                                   const uint8_t *buf, int len)
{
2812
    cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826
}

void cpu_flush_icache_range(hwaddr start, int len)
{
    /*
     * This function should do the same thing as an icache flush that was
     * triggered from within the guest. For TCG we are always cache coherent,
     * so there is no need to flush anything. For KVM / Xen we need to flush
     * the host's instruction cache at least.
     */
    if (tcg_enabled()) {
        return;
    }

2827 2828
    cpu_physical_memory_write_rom_internal(&address_space_memory,
                                           start, NULL, len, FLUSH_CACHE);
2829 2830
}

2831
typedef struct {
2832
    MemoryRegion *mr;
2833
    void *buffer;
A
Avi Kivity 已提交
2834 2835
    hwaddr addr;
    hwaddr len;
F
Fam Zheng 已提交
2836
    bool in_use;
2837 2838 2839 2840
} BounceBuffer;

static BounceBuffer bounce;

2841
typedef struct MapClient {
2842
    QEMUBH *bh;
B
Blue Swirl 已提交
2843
    QLIST_ENTRY(MapClient) link;
2844 2845
} MapClient;

2846
QemuMutex map_client_list_lock;
B
Blue Swirl 已提交
2847 2848
static QLIST_HEAD(map_client_list, MapClient) map_client_list
    = QLIST_HEAD_INITIALIZER(map_client_list);
2849

2850 2851 2852 2853 2854 2855
static void cpu_unregister_map_client_do(MapClient *client)
{
    QLIST_REMOVE(client, link);
    g_free(client);
}

2856 2857 2858 2859 2860 2861
static void cpu_notify_map_clients_locked(void)
{
    MapClient *client;

    while (!QLIST_EMPTY(&map_client_list)) {
        client = QLIST_FIRST(&map_client_list);
2862 2863
        qemu_bh_schedule(client->bh);
        cpu_unregister_map_client_do(client);
2864 2865 2866
    }
}

2867
void cpu_register_map_client(QEMUBH *bh)
2868
{
2869
    MapClient *client = g_malloc(sizeof(*client));
2870

2871
    qemu_mutex_lock(&map_client_list_lock);
2872
    client->bh = bh;
B
Blue Swirl 已提交
2873
    QLIST_INSERT_HEAD(&map_client_list, client, link);
2874 2875 2876
    if (!atomic_read(&bounce.in_use)) {
        cpu_notify_map_clients_locked();
    }
2877
    qemu_mutex_unlock(&map_client_list_lock);
2878 2879
}

2880
void cpu_exec_init_all(void)
2881
{
2882 2883
    qemu_mutex_init(&ram_list.mutex);
    io_mem_init();
2884
    memory_map_init();
2885
    qemu_mutex_init(&map_client_list_lock);
2886 2887
}

2888
void cpu_unregister_map_client(QEMUBH *bh)
2889 2890 2891
{
    MapClient *client;

2892 2893 2894 2895 2896 2897
    qemu_mutex_lock(&map_client_list_lock);
    QLIST_FOREACH(client, &map_client_list, link) {
        if (client->bh == bh) {
            cpu_unregister_map_client_do(client);
            break;
        }
2898
    }
2899
    qemu_mutex_unlock(&map_client_list_lock);
2900 2901 2902 2903
}

static void cpu_notify_map_clients(void)
{
2904
    qemu_mutex_lock(&map_client_list_lock);
2905
    cpu_notify_map_clients_locked();
2906
    qemu_mutex_unlock(&map_client_list_lock);
2907 2908
}

2909 2910
bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
{
2911
    MemoryRegion *mr;
2912 2913
    hwaddr l, xlat;

2914
    rcu_read_lock();
2915 2916
    while (len > 0) {
        l = len;
2917 2918 2919 2920
        mr = address_space_translate(as, addr, &xlat, &l, is_write);
        if (!memory_access_is_direct(mr, is_write)) {
            l = memory_access_size(mr, l, addr);
            if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2921 2922 2923 2924 2925 2926 2927
                return false;
            }
        }

        len -= l;
        addr += l;
    }
2928
    rcu_read_unlock();
2929 2930 2931
    return true;
}

2932 2933 2934 2935
/* Map a physical memory region into a host virtual address.
 * May map a subset of the requested range, given by and returned in *plen.
 * May return NULL if resources needed to perform the mapping are exhausted.
 * Use only for reads OR writes - not for read-modify-write operations.
2936 2937
 * Use cpu_register_map_client() to know when retrying the map operation is
 * likely to succeed.
2938
 */
A
Avi Kivity 已提交
2939
void *address_space_map(AddressSpace *as,
A
Avi Kivity 已提交
2940 2941
                        hwaddr addr,
                        hwaddr *plen,
A
Avi Kivity 已提交
2942
                        bool is_write)
2943
{
A
Avi Kivity 已提交
2944
    hwaddr len = *plen;
2945 2946 2947 2948
    hwaddr done = 0;
    hwaddr l, xlat, base;
    MemoryRegion *mr, *this_mr;
    ram_addr_t raddr;
2949
    void *ptr;
2950

2951 2952 2953
    if (len == 0) {
        return NULL;
    }
2954

2955
    l = len;
2956
    rcu_read_lock();
2957
    mr = address_space_translate(as, addr, &xlat, &l, is_write);
2958

2959
    if (!memory_access_is_direct(mr, is_write)) {
F
Fam Zheng 已提交
2960
        if (atomic_xchg(&bounce.in_use, true)) {
2961
            rcu_read_unlock();
2962
            return NULL;
2963
        }
2964 2965 2966
        /* Avoid unbounded allocations */
        l = MIN(l, TARGET_PAGE_SIZE);
        bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2967 2968
        bounce.addr = addr;
        bounce.len = l;
2969 2970 2971

        memory_region_ref(mr);
        bounce.mr = mr;
2972
        if (!is_write) {
2973 2974
            address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
                               bounce.buffer, l);
2975
        }
2976

2977
        rcu_read_unlock();
2978 2979 2980 2981 2982 2983 2984 2985
        *plen = l;
        return bounce.buffer;
    }

    base = xlat;
    raddr = memory_region_get_ram_addr(mr);

    for (;;) {
2986 2987
        len -= l;
        addr += l;
2988 2989 2990 2991 2992 2993 2994 2995 2996 2997
        done += l;
        if (len == 0) {
            break;
        }

        l = len;
        this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
        if (this_mr != mr || xlat != base + done) {
            break;
        }
2998
    }
2999

3000
    memory_region_ref(mr);
3001
    *plen = done;
3002
    ptr = qemu_ram_ptr_length(mr->ram_block, raddr + base, plen);
3003 3004 3005
    rcu_read_unlock();

    return ptr;
3006 3007
}

A
Avi Kivity 已提交
3008
/* Unmaps a memory region previously mapped by address_space_map().
3009 3010 3011
 * Will also mark the memory as dirty if is_write == 1.  access_len gives
 * the amount of memory that was actually read or written by the caller.
 */
A
Avi Kivity 已提交
3012 3013
void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
                         int is_write, hwaddr access_len)
3014 3015
{
    if (buffer != bounce.buffer) {
3016 3017 3018 3019 3020
        MemoryRegion *mr;
        ram_addr_t addr1;

        mr = qemu_ram_addr_from_host(buffer, &addr1);
        assert(mr != NULL);
3021
        if (is_write) {
3022
            invalidate_and_set_dirty(mr, addr1, access_len);
3023
        }
3024
        if (xen_enabled()) {
J
Jan Kiszka 已提交
3025
            xen_invalidate_map_cache_entry(buffer);
A
Anthony PERARD 已提交
3026
        }
3027
        memory_region_unref(mr);
3028 3029 3030
        return;
    }
    if (is_write) {
3031 3032
        address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
                            bounce.buffer, access_len);
3033
    }
3034
    qemu_vfree(bounce.buffer);
3035
    bounce.buffer = NULL;
3036
    memory_region_unref(bounce.mr);
F
Fam Zheng 已提交
3037
    atomic_mb_set(&bounce.in_use, false);
3038
    cpu_notify_map_clients();
3039
}
B
bellard 已提交
3040

A
Avi Kivity 已提交
3041 3042
void *cpu_physical_memory_map(hwaddr addr,
                              hwaddr *plen,
A
Avi Kivity 已提交
3043 3044 3045 3046 3047
                              int is_write)
{
    return address_space_map(&address_space_memory, addr, plen, is_write);
}

A
Avi Kivity 已提交
3048 3049
void cpu_physical_memory_unmap(void *buffer, hwaddr len,
                               int is_write, hwaddr access_len)
A
Avi Kivity 已提交
3050 3051 3052 3053
{
    return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
}

B
bellard 已提交
3054
/* warning: addr must be aligned */
3055 3056 3057 3058
static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
                                                  MemTxAttrs attrs,
                                                  MemTxResult *result,
                                                  enum device_endian endian)
B
bellard 已提交
3059 3060
{
    uint8_t *ptr;
3061
    uint64_t val;
3062
    MemoryRegion *mr;
3063 3064
    hwaddr l = 4;
    hwaddr addr1;
3065
    MemTxResult r;
3066
    bool release_lock = false;
B
bellard 已提交
3067

3068
    rcu_read_lock();
3069
    mr = address_space_translate(as, addr, &addr1, &l, false);
3070
    if (l < 4 || !memory_access_is_direct(mr, false)) {
3071
        release_lock |= prepare_mmio_access(mr);
3072

B
bellard 已提交
3073
        /* I/O case */
3074
        r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
3075 3076 3077 3078 3079 3080 3081 3082 3083
#if defined(TARGET_WORDS_BIGENDIAN)
        if (endian == DEVICE_LITTLE_ENDIAN) {
            val = bswap32(val);
        }
#else
        if (endian == DEVICE_BIG_ENDIAN) {
            val = bswap32(val);
        }
#endif
B
bellard 已提交
3084 3085
    } else {
        /* RAM case */
3086 3087
        ptr = qemu_get_ram_ptr(mr->ram_block,
                               (memory_region_get_ram_addr(mr)
3088
                                & TARGET_PAGE_MASK)
3089
                               + addr1);
3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100
        switch (endian) {
        case DEVICE_LITTLE_ENDIAN:
            val = ldl_le_p(ptr);
            break;
        case DEVICE_BIG_ENDIAN:
            val = ldl_be_p(ptr);
            break;
        default:
            val = ldl_p(ptr);
            break;
        }
3101 3102 3103 3104
        r = MEMTX_OK;
    }
    if (result) {
        *result = r;
B
bellard 已提交
3105
    }
3106 3107 3108
    if (release_lock) {
        qemu_mutex_unlock_iothread();
    }
3109
    rcu_read_unlock();
B
bellard 已提交
3110 3111 3112
    return val;
}

3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133
uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
                           MemTxAttrs attrs, MemTxResult *result)
{
    return address_space_ldl_internal(as, addr, attrs, result,
                                      DEVICE_NATIVE_ENDIAN);
}

uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
                              MemTxAttrs attrs, MemTxResult *result)
{
    return address_space_ldl_internal(as, addr, attrs, result,
                                      DEVICE_LITTLE_ENDIAN);
}

uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
                              MemTxAttrs attrs, MemTxResult *result)
{
    return address_space_ldl_internal(as, addr, attrs, result,
                                      DEVICE_BIG_ENDIAN);
}

3134
uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
3135
{
3136
    return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3137 3138
}

3139
uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
3140
{
3141
    return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3142 3143
}

3144
uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
3145
{
3146
    return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3147 3148
}

B
bellard 已提交
3149
/* warning: addr must be aligned */
3150 3151 3152 3153
static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
                                                  MemTxAttrs attrs,
                                                  MemTxResult *result,
                                                  enum device_endian endian)
B
bellard 已提交
3154 3155 3156
{
    uint8_t *ptr;
    uint64_t val;
3157
    MemoryRegion *mr;
3158 3159
    hwaddr l = 8;
    hwaddr addr1;
3160
    MemTxResult r;
3161
    bool release_lock = false;
B
bellard 已提交
3162

3163
    rcu_read_lock();
3164
    mr = address_space_translate(as, addr, &addr1, &l,
3165 3166
                                 false);
    if (l < 8 || !memory_access_is_direct(mr, false)) {
3167
        release_lock |= prepare_mmio_access(mr);
3168

B
bellard 已提交
3169
        /* I/O case */
3170
        r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
3171 3172 3173 3174 3175 3176 3177 3178
#if defined(TARGET_WORDS_BIGENDIAN)
        if (endian == DEVICE_LITTLE_ENDIAN) {
            val = bswap64(val);
        }
#else
        if (endian == DEVICE_BIG_ENDIAN) {
            val = bswap64(val);
        }
B
bellard 已提交
3179 3180 3181
#endif
    } else {
        /* RAM case */
3182 3183
        ptr = qemu_get_ram_ptr(mr->ram_block,
                               (memory_region_get_ram_addr(mr)
3184
                                & TARGET_PAGE_MASK)
3185
                               + addr1);
3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196
        switch (endian) {
        case DEVICE_LITTLE_ENDIAN:
            val = ldq_le_p(ptr);
            break;
        case DEVICE_BIG_ENDIAN:
            val = ldq_be_p(ptr);
            break;
        default:
            val = ldq_p(ptr);
            break;
        }
3197 3198 3199 3200
        r = MEMTX_OK;
    }
    if (result) {
        *result = r;
B
bellard 已提交
3201
    }
3202 3203 3204
    if (release_lock) {
        qemu_mutex_unlock_iothread();
    }
3205
    rcu_read_unlock();
B
bellard 已提交
3206 3207 3208
    return val;
}

3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229
uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
                           MemTxAttrs attrs, MemTxResult *result)
{
    return address_space_ldq_internal(as, addr, attrs, result,
                                      DEVICE_NATIVE_ENDIAN);
}

uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
                           MemTxAttrs attrs, MemTxResult *result)
{
    return address_space_ldq_internal(as, addr, attrs, result,
                                      DEVICE_LITTLE_ENDIAN);
}

uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
                           MemTxAttrs attrs, MemTxResult *result)
{
    return address_space_ldq_internal(as, addr, attrs, result,
                                      DEVICE_BIG_ENDIAN);
}

3230
uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
3231
{
3232
    return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3233 3234
}

3235
uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3236
{
3237
    return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3238 3239
}

3240
uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3241
{
3242
    return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3243 3244
}

B
bellard 已提交
3245
/* XXX: optimize */
3246 3247
uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
                            MemTxAttrs attrs, MemTxResult *result)
B
bellard 已提交
3248 3249
{
    uint8_t val;
3250 3251 3252 3253 3254 3255
    MemTxResult r;

    r = address_space_rw(as, addr, attrs, &val, 1, 0);
    if (result) {
        *result = r;
    }
B
bellard 已提交
3256 3257 3258
    return val;
}

3259 3260 3261 3262 3263
uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
{
    return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
}

3264
/* warning: addr must be aligned */
3265 3266 3267 3268 3269
static inline uint32_t address_space_lduw_internal(AddressSpace *as,
                                                   hwaddr addr,
                                                   MemTxAttrs attrs,
                                                   MemTxResult *result,
                                                   enum device_endian endian)
B
bellard 已提交
3270
{
3271 3272
    uint8_t *ptr;
    uint64_t val;
3273
    MemoryRegion *mr;
3274 3275
    hwaddr l = 2;
    hwaddr addr1;
3276
    MemTxResult r;
3277
    bool release_lock = false;
3278

3279
    rcu_read_lock();
3280
    mr = address_space_translate(as, addr, &addr1, &l,
3281 3282
                                 false);
    if (l < 2 || !memory_access_is_direct(mr, false)) {
3283
        release_lock |= prepare_mmio_access(mr);
3284

3285
        /* I/O case */
3286
        r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3287 3288 3289 3290 3291 3292 3293 3294 3295
#if defined(TARGET_WORDS_BIGENDIAN)
        if (endian == DEVICE_LITTLE_ENDIAN) {
            val = bswap16(val);
        }
#else
        if (endian == DEVICE_BIG_ENDIAN) {
            val = bswap16(val);
        }
#endif
3296 3297
    } else {
        /* RAM case */
3298 3299
        ptr = qemu_get_ram_ptr(mr->ram_block,
                               (memory_region_get_ram_addr(mr)
3300
                                & TARGET_PAGE_MASK)
3301
                               + addr1);
3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312
        switch (endian) {
        case DEVICE_LITTLE_ENDIAN:
            val = lduw_le_p(ptr);
            break;
        case DEVICE_BIG_ENDIAN:
            val = lduw_be_p(ptr);
            break;
        default:
            val = lduw_p(ptr);
            break;
        }
3313 3314 3315 3316
        r = MEMTX_OK;
    }
    if (result) {
        *result = r;
3317
    }
3318 3319 3320
    if (release_lock) {
        qemu_mutex_unlock_iothread();
    }
3321
    rcu_read_unlock();
3322
    return val;
B
bellard 已提交
3323 3324
}

3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345
uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
                           MemTxAttrs attrs, MemTxResult *result)
{
    return address_space_lduw_internal(as, addr, attrs, result,
                                       DEVICE_NATIVE_ENDIAN);
}

uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
                           MemTxAttrs attrs, MemTxResult *result)
{
    return address_space_lduw_internal(as, addr, attrs, result,
                                       DEVICE_LITTLE_ENDIAN);
}

uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
                           MemTxAttrs attrs, MemTxResult *result)
{
    return address_space_lduw_internal(as, addr, attrs, result,
                                       DEVICE_BIG_ENDIAN);
}

3346
uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3347
{
3348
    return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3349 3350
}

3351
uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3352
{
3353
    return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3354 3355
}

3356
uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3357
{
3358
    return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3359 3360
}

B
bellard 已提交
3361 3362 3363
/* warning: addr must be aligned. The ram page is not masked as dirty
   and the code inside is not invalidated. It is useful if the dirty
   bits are used to track modified PTEs */
3364 3365
void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
                                MemTxAttrs attrs, MemTxResult *result)
B
bellard 已提交
3366 3367
{
    uint8_t *ptr;
3368
    MemoryRegion *mr;
3369 3370
    hwaddr l = 4;
    hwaddr addr1;
3371
    MemTxResult r;
3372
    uint8_t dirty_log_mask;
3373
    bool release_lock = false;
B
bellard 已提交
3374

3375
    rcu_read_lock();
3376
    mr = address_space_translate(as, addr, &addr1, &l,
3377 3378
                                 true);
    if (l < 4 || !memory_access_is_direct(mr, true)) {
3379
        release_lock |= prepare_mmio_access(mr);
3380

3381
        r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
B
bellard 已提交
3382
    } else {
3383
        addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3384
        ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
B
bellard 已提交
3385
        stl_p(ptr, val);
A
aliguori 已提交
3386

3387 3388
        dirty_log_mask = memory_region_get_dirty_log_mask(mr);
        dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3389
        cpu_physical_memory_set_dirty_range(addr1, 4, dirty_log_mask);
3390 3391 3392 3393
        r = MEMTX_OK;
    }
    if (result) {
        *result = r;
B
bellard 已提交
3394
    }
3395 3396 3397
    if (release_lock) {
        qemu_mutex_unlock_iothread();
    }
3398
    rcu_read_unlock();
B
bellard 已提交
3399 3400
}

3401 3402 3403 3404 3405
void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
{
    address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
}

B
bellard 已提交
3406
/* warning: addr must be aligned */
3407 3408 3409 3410 3411
static inline void address_space_stl_internal(AddressSpace *as,
                                              hwaddr addr, uint32_t val,
                                              MemTxAttrs attrs,
                                              MemTxResult *result,
                                              enum device_endian endian)
B
bellard 已提交
3412 3413
{
    uint8_t *ptr;
3414
    MemoryRegion *mr;
3415 3416
    hwaddr l = 4;
    hwaddr addr1;
3417
    MemTxResult r;
3418
    bool release_lock = false;
B
bellard 已提交
3419

3420
    rcu_read_lock();
3421
    mr = address_space_translate(as, addr, &addr1, &l,
3422 3423
                                 true);
    if (l < 4 || !memory_access_is_direct(mr, true)) {
3424
        release_lock |= prepare_mmio_access(mr);
3425

3426 3427 3428 3429 3430 3431 3432 3433 3434
#if defined(TARGET_WORDS_BIGENDIAN)
        if (endian == DEVICE_LITTLE_ENDIAN) {
            val = bswap32(val);
        }
#else
        if (endian == DEVICE_BIG_ENDIAN) {
            val = bswap32(val);
        }
#endif
3435
        r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
B
bellard 已提交
3436 3437
    } else {
        /* RAM case */
3438
        addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3439
        ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450
        switch (endian) {
        case DEVICE_LITTLE_ENDIAN:
            stl_le_p(ptr, val);
            break;
        case DEVICE_BIG_ENDIAN:
            stl_be_p(ptr, val);
            break;
        default:
            stl_p(ptr, val);
            break;
        }
3451
        invalidate_and_set_dirty(mr, addr1, 4);
3452 3453 3454 3455
        r = MEMTX_OK;
    }
    if (result) {
        *result = r;
B
bellard 已提交
3456
    }
3457 3458 3459
    if (release_lock) {
        qemu_mutex_unlock_iothread();
    }
3460
    rcu_read_unlock();
B
bellard 已提交
3461 3462
}

3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483
void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
                       MemTxAttrs attrs, MemTxResult *result)
{
    address_space_stl_internal(as, addr, val, attrs, result,
                               DEVICE_NATIVE_ENDIAN);
}

void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
                       MemTxAttrs attrs, MemTxResult *result)
{
    address_space_stl_internal(as, addr, val, attrs, result,
                               DEVICE_LITTLE_ENDIAN);
}

void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
                       MemTxAttrs attrs, MemTxResult *result)
{
    address_space_stl_internal(as, addr, val, attrs, result,
                               DEVICE_BIG_ENDIAN);
}

3484
void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3485
{
3486
    address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3487 3488
}

3489
void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3490
{
3491
    address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3492 3493
}

3494
void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3495
{
3496
    address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3497 3498
}

B
bellard 已提交
3499
/* XXX: optimize */
3500 3501
void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
                       MemTxAttrs attrs, MemTxResult *result)
B
bellard 已提交
3502 3503
{
    uint8_t v = val;
3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514
    MemTxResult r;

    r = address_space_rw(as, addr, attrs, &v, 1, 1);
    if (result) {
        *result = r;
    }
}

void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
{
    address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
B
bellard 已提交
3515 3516
}

3517
/* warning: addr must be aligned */
3518 3519 3520 3521 3522
static inline void address_space_stw_internal(AddressSpace *as,
                                              hwaddr addr, uint32_t val,
                                              MemTxAttrs attrs,
                                              MemTxResult *result,
                                              enum device_endian endian)
B
bellard 已提交
3523
{
3524
    uint8_t *ptr;
3525
    MemoryRegion *mr;
3526 3527
    hwaddr l = 2;
    hwaddr addr1;
3528
    MemTxResult r;
3529
    bool release_lock = false;
3530

3531
    rcu_read_lock();
3532
    mr = address_space_translate(as, addr, &addr1, &l, true);
3533
    if (l < 2 || !memory_access_is_direct(mr, true)) {
3534
        release_lock |= prepare_mmio_access(mr);
3535

3536 3537 3538 3539 3540 3541 3542 3543 3544
#if defined(TARGET_WORDS_BIGENDIAN)
        if (endian == DEVICE_LITTLE_ENDIAN) {
            val = bswap16(val);
        }
#else
        if (endian == DEVICE_BIG_ENDIAN) {
            val = bswap16(val);
        }
#endif
3545
        r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3546 3547
    } else {
        /* RAM case */
3548
        addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3549
        ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560
        switch (endian) {
        case DEVICE_LITTLE_ENDIAN:
            stw_le_p(ptr, val);
            break;
        case DEVICE_BIG_ENDIAN:
            stw_be_p(ptr, val);
            break;
        default:
            stw_p(ptr, val);
            break;
        }
3561
        invalidate_and_set_dirty(mr, addr1, 2);
3562 3563 3564 3565
        r = MEMTX_OK;
    }
    if (result) {
        *result = r;
3566
    }
3567 3568 3569
    if (release_lock) {
        qemu_mutex_unlock_iothread();
    }
3570
    rcu_read_unlock();
B
bellard 已提交
3571 3572
}

3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593
void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
                       MemTxAttrs attrs, MemTxResult *result)
{
    address_space_stw_internal(as, addr, val, attrs, result,
                               DEVICE_NATIVE_ENDIAN);
}

void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
                       MemTxAttrs attrs, MemTxResult *result)
{
    address_space_stw_internal(as, addr, val, attrs, result,
                               DEVICE_LITTLE_ENDIAN);
}

void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
                       MemTxAttrs attrs, MemTxResult *result)
{
    address_space_stw_internal(as, addr, val, attrs, result,
                               DEVICE_BIG_ENDIAN);
}

3594
void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3595
{
3596
    address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3597 3598
}

3599
void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3600
{
3601
    address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3602 3603
}

3604
void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3605
{
3606
    address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3607 3608
}

B
bellard 已提交
3609
/* XXX: optimize */
3610 3611
void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
                       MemTxAttrs attrs, MemTxResult *result)
B
bellard 已提交
3612
{
3613
    MemTxResult r;
B
bellard 已提交
3614
    val = tswap64(val);
3615 3616 3617 3618
    r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
    if (result) {
        *result = r;
    }
B
bellard 已提交
3619 3620
}

3621 3622
void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
                       MemTxAttrs attrs, MemTxResult *result)
3623
{
3624
    MemTxResult r;
3625
    val = cpu_to_le64(val);
3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649
    r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
    if (result) {
        *result = r;
    }
}
void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
                       MemTxAttrs attrs, MemTxResult *result)
{
    MemTxResult r;
    val = cpu_to_be64(val);
    r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
    if (result) {
        *result = r;
    }
}

void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
{
    address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
}

void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
{
    address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3650 3651
}

3652
void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3653
{
3654
    address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3655 3656
}

3657
/* virtual memory access for debug (includes writing to ROM) */
3658
int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3659
                        uint8_t *buf, int len, int is_write)
B
bellard 已提交
3660 3661
{
    int l;
A
Avi Kivity 已提交
3662
    hwaddr phys_addr;
3663
    target_ulong page;
B
bellard 已提交
3664 3665

    while (len > 0) {
3666 3667 3668
        int asidx;
        MemTxAttrs attrs;

B
bellard 已提交
3669
        page = addr & TARGET_PAGE_MASK;
3670 3671
        phys_addr = cpu_get_phys_page_attrs_debug(cpu, page, &attrs);
        asidx = cpu_asidx_from_attrs(cpu, attrs);
B
bellard 已提交
3672 3673 3674 3675 3676 3677
        /* if no physical page mapped, return an error */
        if (phys_addr == -1)
            return -1;
        l = (page + TARGET_PAGE_SIZE) - addr;
        if (l > len)
            l = len;
3678
        phys_addr += (addr & ~TARGET_PAGE_MASK);
3679
        if (is_write) {
3680 3681
            cpu_physical_memory_write_rom(cpu->cpu_ases[asidx].as,
                                          phys_addr, buf, l);
3682
        } else {
3683 3684
            address_space_rw(cpu->cpu_ases[asidx].as, phys_addr,
                             MEMTXATTRS_UNSPECIFIED,
3685
                             buf, l, 0);
3686
        }
B
bellard 已提交
3687 3688 3689 3690 3691 3692
        len -= l;
        buf += l;
        addr += l;
    }
    return 0;
}
3693 3694 3695 3696 3697 3698 3699 3700 3701 3702

/*
 * Allows code that needs to deal with migration bitmaps etc to still be built
 * target independent.
 */
size_t qemu_target_page_bits(void)
{
    return TARGET_PAGE_BITS;
}

P
Paul Brook 已提交
3703
#endif
B
bellard 已提交
3704

3705 3706 3707 3708
/*
 * A helper function for the _utterly broken_ virtio device model to find out if
 * it's running on a big endian machine. Don't do this at home kids!
 */
3709 3710
bool target_words_bigendian(void);
bool target_words_bigendian(void)
3711 3712 3713 3714 3715 3716 3717 3718
{
#if defined(TARGET_WORDS_BIGENDIAN)
    return true;
#else
    return false;
#endif
}

3719
#ifndef CONFIG_USER_ONLY
A
Avi Kivity 已提交
3720
bool cpu_physical_memory_is_io(hwaddr phys_addr)
3721
{
3722
    MemoryRegion*mr;
3723
    hwaddr l = 1;
3724
    bool res;
3725

3726
    rcu_read_lock();
3727 3728
    mr = address_space_translate(&address_space_memory,
                                 phys_addr, &phys_addr, &l, false);
3729

3730 3731 3732
    res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
    rcu_read_unlock();
    return res;
3733
}
3734

3735
int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3736 3737
{
    RAMBlock *block;
3738
    int ret = 0;
3739

M
Mike Day 已提交
3740 3741
    rcu_read_lock();
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3742 3743 3744 3745 3746
        ret = func(block->idstr, block->host, block->offset,
                   block->used_length, opaque);
        if (ret) {
            break;
        }
3747
    }
M
Mike Day 已提交
3748
    rcu_read_unlock();
3749
    return ret;
3750
}
3751
#endif