exec.c 105.3 KB
Newer Older
B
bellard 已提交
1
/*
2
 *  Virtual page mapping
3
 *
B
bellard 已提交
4 5 6 7 8 9 10 11 12 13 14 15 16
 *  Copyright (c) 2003 Fabrice Bellard
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
B
bellard 已提交
18
 */
P
Peter Maydell 已提交
19
#include "qemu/osdep.h"
20
#ifndef _WIN32
B
bellard 已提交
21 22
#include <sys/mman.h>
#endif
B
bellard 已提交
23

24
#include "qemu-common.h"
B
bellard 已提交
25
#include "cpu.h"
B
bellard 已提交
26
#include "tcg.h"
27
#include "hw/hw.h"
28
#if !defined(CONFIG_USER_ONLY)
29
#include "hw/boards.h"
30
#endif
31
#include "hw/qdev.h"
32
#include "sysemu/kvm.h"
33
#include "sysemu/sysemu.h"
P
Paolo Bonzini 已提交
34
#include "hw/xen/xen.h"
35 36
#include "qemu/timer.h"
#include "qemu/config-file.h"
37
#include "qemu/error-report.h"
38
#include "exec/memory.h"
39
#include "sysemu/dma.h"
40
#include "exec/address-spaces.h"
41 42
#if defined(CONFIG_USER_ONLY)
#include <qemu.h>
J
Jun Nakajima 已提交
43
#else /* !CONFIG_USER_ONLY */
44
#include "sysemu/xen-mapcache.h"
45
#include "trace.h"
46
#endif
47
#include "exec/cpu-all.h"
M
Mike Day 已提交
48
#include "qemu/rcu_queue.h"
49
#include "qemu/main-loop.h"
50
#include "translate-all.h"
51
#include "sysemu/replay.h"
52

53
#include "exec/memory-internal.h"
54
#include "exec/ram_addr.h"
55
#include "exec/log.h"
56

57
#include "qemu/range.h"
58 59 60
#ifndef _WIN32
#include "qemu/mmap-alloc.h"
#endif
61

62
//#define DEBUG_SUBPAGE
T
ths 已提交
63

64
#if !defined(CONFIG_USER_ONLY)
M
Mike Day 已提交
65 66 67
/* ram_list is read under rcu_read_lock()/rcu_read_unlock().  Writes
 * are protected by the ramlist lock.
 */
M
Mike Day 已提交
68
RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
A
Avi Kivity 已提交
69 70

static MemoryRegion *system_memory;
71
static MemoryRegion *system_io;
A
Avi Kivity 已提交
72

73 74
AddressSpace address_space_io;
AddressSpace address_space_memory;
75

76
MemoryRegion io_mem_rom, io_mem_notdirty;
77
static MemoryRegion io_mem_unassigned;
78

79 80 81
/* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
#define RAM_PREALLOC   (1 << 0)

82 83 84
/* RAM is mmap-ed with MAP_SHARED */
#define RAM_SHARED     (1 << 1)

85 86 87 88 89
/* Only a portion of RAM (used_length) is actually used, and migrated.
 * This used_length size can change across reboots.
 */
#define RAM_RESIZEABLE (1 << 2)

90
#endif
91

A
Andreas Färber 已提交
92
struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
B
bellard 已提交
93 94
/* current CPU in the current thread. It is only valid inside
   cpu_exec() */
P
Paolo Bonzini 已提交
95
__thread CPUState *current_cpu;
P
pbrook 已提交
96
/* 0 = Do not count executed instructions.
T
ths 已提交
97
   1 = Precise instruction counting.
P
pbrook 已提交
98
   2 = Adaptive rate instruction counting.  */
99
int use_icount;
B
bellard 已提交
100

101
#if !defined(CONFIG_USER_ONLY)
102

103 104 105
typedef struct PhysPageEntry PhysPageEntry;

struct PhysPageEntry {
M
Michael S. Tsirkin 已提交
106
    /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
107
    uint32_t skip : 6;
M
Michael S. Tsirkin 已提交
108
     /* index into phys_sections (!skip) or phys_map_nodes (skip) */
109
    uint32_t ptr : 26;
110 111
};

112 113
#define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)

114
/* Size of the L2 (and L3, etc) page tables.  */
115
#define ADDR_SPACE_BITS 64
116

M
Michael S. Tsirkin 已提交
117
#define P_L2_BITS 9
118 119 120 121 122
#define P_L2_SIZE (1 << P_L2_BITS)

#define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)

typedef PhysPageEntry Node[P_L2_SIZE];
123

124
typedef struct PhysPageMap {
125 126
    struct rcu_head rcu;

127 128 129 130 131 132 133 134
    unsigned sections_nb;
    unsigned sections_nb_alloc;
    unsigned nodes_nb;
    unsigned nodes_nb_alloc;
    Node *nodes;
    MemoryRegionSection *sections;
} PhysPageMap;

135
struct AddressSpaceDispatch {
136 137
    struct rcu_head rcu;

138 139 140 141
    /* This is a multi-level map on the physical address space.
     * The bottom level has pointers to MemoryRegionSections.
     */
    PhysPageEntry phys_map;
142
    PhysPageMap map;
143
    AddressSpace *as;
144 145
};

146 147 148
#define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
typedef struct subpage_t {
    MemoryRegion iomem;
149
    AddressSpace *as;
150 151 152 153
    hwaddr base;
    uint16_t sub_section[TARGET_PAGE_SIZE];
} subpage_t;

154 155 156 157
#define PHYS_SECTION_UNASSIGNED 0
#define PHYS_SECTION_NOTDIRTY 1
#define PHYS_SECTION_ROM 2
#define PHYS_SECTION_WATCH 3
158

159
static void io_mem_init(void);
A
Avi Kivity 已提交
160
static void memory_map_init(void);
161
static void tcg_commit(MemoryListener *listener);
162

163
static MemoryRegion io_mem_watch;
164 165 166 167 168 169 170 171 172 173 174 175 176 177 178

/**
 * CPUAddressSpace: all the information a CPU needs about an AddressSpace
 * @cpu: the CPU whose AddressSpace this is
 * @as: the AddressSpace itself
 * @memory_dispatch: its dispatch pointer (cached, RCU protected)
 * @tcg_as_listener: listener for tracking changes to the AddressSpace
 */
struct CPUAddressSpace {
    CPUState *cpu;
    AddressSpace *as;
    struct AddressSpaceDispatch *memory_dispatch;
    MemoryListener tcg_as_listener;
};

179
#endif
B
bellard 已提交
180

181
#if !defined(CONFIG_USER_ONLY)
182

183
static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
184
{
185 186 187 188
    if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
        map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
        map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
        map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
189
    }
190 191
}

192
static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
193 194
{
    unsigned i;
195
    uint32_t ret;
196 197
    PhysPageEntry e;
    PhysPageEntry *p;
198

199
    ret = map->nodes_nb++;
200
    p = map->nodes[ret];
201
    assert(ret != PHYS_MAP_NODE_NIL);
202
    assert(ret != map->nodes_nb_alloc);
203 204 205

    e.skip = leaf ? 0 : 1;
    e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
206
    for (i = 0; i < P_L2_SIZE; ++i) {
207
        memcpy(&p[i], &e, sizeof(e));
208
    }
209
    return ret;
210 211
}

212 213
static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
                                hwaddr *index, hwaddr *nb, uint16_t leaf,
214
                                int level)
215 216
{
    PhysPageEntry *p;
217
    hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
218

M
Michael S. Tsirkin 已提交
219
    if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
220
        lp->ptr = phys_map_node_alloc(map, level == 0);
B
bellard 已提交
221
    }
222
    p = map->nodes[lp->ptr];
223
    lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
224

225
    while (*nb && lp < &p[P_L2_SIZE]) {
226
        if ((*index & (step - 1)) == 0 && *nb >= step) {
M
Michael S. Tsirkin 已提交
227
            lp->skip = 0;
228
            lp->ptr = leaf;
229 230
            *index += step;
            *nb -= step;
231
        } else {
232
            phys_page_set_level(map, lp, index, nb, leaf, level - 1);
233 234
        }
        ++lp;
235 236 237
    }
}

A
Avi Kivity 已提交
238
static void phys_page_set(AddressSpaceDispatch *d,
A
Avi Kivity 已提交
239
                          hwaddr index, hwaddr nb,
240
                          uint16_t leaf)
241
{
242
    /* Wildly overreserve - it doesn't matter much. */
243
    phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
244

245
    phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
B
bellard 已提交
246 247
}

248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305
/* Compact a non leaf page entry. Simply detect that the entry has a single child,
 * and update our entry so we can skip it and go directly to the destination.
 */
static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
{
    unsigned valid_ptr = P_L2_SIZE;
    int valid = 0;
    PhysPageEntry *p;
    int i;

    if (lp->ptr == PHYS_MAP_NODE_NIL) {
        return;
    }

    p = nodes[lp->ptr];
    for (i = 0; i < P_L2_SIZE; i++) {
        if (p[i].ptr == PHYS_MAP_NODE_NIL) {
            continue;
        }

        valid_ptr = i;
        valid++;
        if (p[i].skip) {
            phys_page_compact(&p[i], nodes, compacted);
        }
    }

    /* We can only compress if there's only one child. */
    if (valid != 1) {
        return;
    }

    assert(valid_ptr < P_L2_SIZE);

    /* Don't compress if it won't fit in the # of bits we have. */
    if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
        return;
    }

    lp->ptr = p[valid_ptr].ptr;
    if (!p[valid_ptr].skip) {
        /* If our only child is a leaf, make this a leaf. */
        /* By design, we should have made this node a leaf to begin with so we
         * should never reach here.
         * But since it's so simple to handle this, let's do it just in case we
         * change this rule.
         */
        lp->skip = 0;
    } else {
        lp->skip += p[valid_ptr].skip;
    }
}

static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
{
    DECLARE_BITMAP(compacted, nodes_nb);

    if (d->phys_map.skip) {
306
        phys_page_compact(&d->phys_map, d->map.nodes, compacted);
307 308 309
    }
}

310
static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
311
                                           Node *nodes, MemoryRegionSection *sections)
B
bellard 已提交
312
{
313
    PhysPageEntry *p;
314
    hwaddr index = addr >> TARGET_PAGE_BITS;
315
    int i;
316

M
Michael S. Tsirkin 已提交
317
    for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
318
        if (lp.ptr == PHYS_MAP_NODE_NIL) {
319
            return &sections[PHYS_SECTION_UNASSIGNED];
320
        }
321
        p = nodes[lp.ptr];
322
        lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
323
    }
324 325 326 327 328 329 330 331

    if (sections[lp.ptr].size.hi ||
        range_covers_byte(sections[lp.ptr].offset_within_address_space,
                          sections[lp.ptr].size.lo, addr)) {
        return &sections[lp.ptr];
    } else {
        return &sections[PHYS_SECTION_UNASSIGNED];
    }
332 333
}

B
Blue Swirl 已提交
334 335
bool memory_region_is_unassigned(MemoryRegion *mr)
{
P
Paolo Bonzini 已提交
336
    return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
337
        && mr != &io_mem_watch;
B
bellard 已提交
338
}
339

340
/* Called from RCU critical section */
341
static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
342 343
                                                        hwaddr addr,
                                                        bool resolve_subpage)
344
{
345 346 347
    MemoryRegionSection *section;
    subpage_t *subpage;

348
    section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
349 350
    if (resolve_subpage && section->mr->subpage) {
        subpage = container_of(section->mr, subpage_t, iomem);
351
        section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
352 353
    }
    return section;
354 355
}

356
/* Called from RCU critical section */
357
static MemoryRegionSection *
358
address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
359
                                 hwaddr *plen, bool resolve_subpage)
360 361
{
    MemoryRegionSection *section;
362
    MemoryRegion *mr;
363
    Int128 diff;
364

365
    section = address_space_lookup_region(d, addr, resolve_subpage);
366 367 368 369 370 371
    /* Compute offset within MemoryRegionSection */
    addr -= section->offset_within_address_space;

    /* Compute offset within MemoryRegion */
    *xlat = addr + section->offset_within_region;

372
    mr = section->mr;
373 374 375 376 377 378 379 380 381 382 383 384

    /* MMIO registers can be expected to perform full-width accesses based only
     * on their address, without considering adjacent registers that could
     * decode to completely different MemoryRegions.  When such registers
     * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
     * regions overlap wildly.  For this reason we cannot clamp the accesses
     * here.
     *
     * If the length is small (as is the case for address_space_ldl/stl),
     * everything works fine.  If the incoming length is large, however,
     * the caller really has to do the clamping through memory_access_size.
     */
385
    if (memory_region_is_ram(mr)) {
386
        diff = int128_sub(section->size, int128_make64(addr));
387 388
        *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
    }
389 390
    return section;
}
391

392
/* Called from RCU critical section */
393 394 395
MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
                                      hwaddr *xlat, hwaddr *plen,
                                      bool is_write)
396
{
A
Avi Kivity 已提交
397 398 399 400 401
    IOMMUTLBEntry iotlb;
    MemoryRegionSection *section;
    MemoryRegion *mr;

    for (;;) {
402 403
        AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
        section = address_space_translate_internal(d, addr, &addr, plen, true);
A
Avi Kivity 已提交
404 405 406 407 408 409
        mr = section->mr;

        if (!mr->iommu_ops) {
            break;
        }

410
        iotlb = mr->iommu_ops->translate(mr, addr, is_write);
A
Avi Kivity 已提交
411 412
        addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
                | (addr & iotlb.addr_mask));
413
        *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
A
Avi Kivity 已提交
414 415 416 417 418 419 420 421
        if (!(iotlb.perm & (1 << is_write))) {
            mr = &io_mem_unassigned;
            break;
        }

        as = iotlb.target_as;
    }

422
    if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
423
        hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
424
        *plen = MIN(page, *plen);
425 426
    }

A
Avi Kivity 已提交
427 428
    *xlat = addr;
    return mr;
429 430
}

431
/* Called from RCU critical section */
432
MemoryRegionSection *
433
address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
P
Paolo Bonzini 已提交
434
                                  hwaddr *xlat, hwaddr *plen)
435
{
A
Avi Kivity 已提交
436
    MemoryRegionSection *section;
437 438 439
    AddressSpaceDispatch *d = cpu->cpu_ases[asidx].memory_dispatch;

    section = address_space_translate_internal(d, addr, xlat, plen, false);
A
Avi Kivity 已提交
440 441 442

    assert(!section->mr->iommu_ops);
    return section;
443
}
444
#endif
B
bellard 已提交
445

446
#if !defined(CONFIG_USER_ONLY)
447 448

static int cpu_common_post_load(void *opaque, int version_id)
B
bellard 已提交
449
{
450
    CPUState *cpu = opaque;
B
bellard 已提交
451

452 453
    /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
       version_id is increased. */
454
    cpu->interrupt_request &= ~0x01;
455
    tlb_flush(cpu, 1);
456 457

    return 0;
B
bellard 已提交
458
}
B
bellard 已提交
459

460 461 462 463
static int cpu_common_pre_load(void *opaque)
{
    CPUState *cpu = opaque;

464
    cpu->exception_index = -1;
465 466 467 468 469 470 471 472

    return 0;
}

static bool cpu_common_exception_index_needed(void *opaque)
{
    CPUState *cpu = opaque;

473
    return tcg_enabled() && cpu->exception_index != -1;
474 475 476 477 478 479
}

static const VMStateDescription vmstate_cpu_common_exception_index = {
    .name = "cpu_common/exception_index",
    .version_id = 1,
    .minimum_version_id = 1,
480
    .needed = cpu_common_exception_index_needed,
481 482 483 484 485 486
    .fields = (VMStateField[]) {
        VMSTATE_INT32(exception_index, CPUState),
        VMSTATE_END_OF_LIST()
    }
};

487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504
static bool cpu_common_crash_occurred_needed(void *opaque)
{
    CPUState *cpu = opaque;

    return cpu->crash_occurred;
}

static const VMStateDescription vmstate_cpu_common_crash_occurred = {
    .name = "cpu_common/crash_occurred",
    .version_id = 1,
    .minimum_version_id = 1,
    .needed = cpu_common_crash_occurred_needed,
    .fields = (VMStateField[]) {
        VMSTATE_BOOL(crash_occurred, CPUState),
        VMSTATE_END_OF_LIST()
    }
};

505
const VMStateDescription vmstate_cpu_common = {
506 507 508
    .name = "cpu_common",
    .version_id = 1,
    .minimum_version_id = 1,
509
    .pre_load = cpu_common_pre_load,
510
    .post_load = cpu_common_post_load,
511
    .fields = (VMStateField[]) {
512 513
        VMSTATE_UINT32(halted, CPUState),
        VMSTATE_UINT32(interrupt_request, CPUState),
514
        VMSTATE_END_OF_LIST()
515
    },
516 517
    .subsections = (const VMStateDescription*[]) {
        &vmstate_cpu_common_exception_index,
518
        &vmstate_cpu_common_crash_occurred,
519
        NULL
520 521
    }
};
522

523
#endif
B
bellard 已提交
524

525
CPUState *qemu_get_cpu(int index)
B
bellard 已提交
526
{
A
Andreas Färber 已提交
527
    CPUState *cpu;
B
bellard 已提交
528

A
Andreas Färber 已提交
529
    CPU_FOREACH(cpu) {
530
        if (cpu->cpu_index == index) {
A
Andreas Färber 已提交
531
            return cpu;
532
        }
B
bellard 已提交
533
    }
534

A
Andreas Färber 已提交
535
    return NULL;
B
bellard 已提交
536 537
}

538
#if !defined(CONFIG_USER_ONLY)
539
void cpu_address_space_init(CPUState *cpu, AddressSpace *as, int asidx)
540
{
541 542 543 544 545
    CPUAddressSpace *newas;

    /* Target code should have set num_ases before calling us */
    assert(asidx < cpu->num_ases);

546 547 548 549 550
    if (asidx == 0) {
        /* address space 0 gets the convenience alias */
        cpu->as = as;
    }

551 552
    /* KVM cannot currently support multiple address spaces. */
    assert(asidx == 0 || !kvm_enabled());
553

554 555
    if (!cpu->cpu_ases) {
        cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases);
556
    }
557

558 559 560
    newas = &cpu->cpu_ases[asidx];
    newas->cpu = cpu;
    newas->as = as;
561
    if (tcg_enabled()) {
562 563
        newas->tcg_as_listener.commit = tcg_commit;
        memory_listener_register(&newas->tcg_as_listener, as);
564
    }
565
}
566 567 568 569 570 571

AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx)
{
    /* Return the AddressSpace corresponding to the specified index */
    return cpu->cpu_ases[asidx].as;
}
572 573
#endif

574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618
#ifndef CONFIG_USER_ONLY
static DECLARE_BITMAP(cpu_index_map, MAX_CPUMASK_BITS);

static int cpu_get_free_index(Error **errp)
{
    int cpu = find_first_zero_bit(cpu_index_map, MAX_CPUMASK_BITS);

    if (cpu >= MAX_CPUMASK_BITS) {
        error_setg(errp, "Trying to use more CPUs than max of %d",
                   MAX_CPUMASK_BITS);
        return -1;
    }

    bitmap_set(cpu_index_map, cpu, 1);
    return cpu;
}

void cpu_exec_exit(CPUState *cpu)
{
    if (cpu->cpu_index == -1) {
        /* cpu_index was never allocated by this @cpu or was already freed. */
        return;
    }

    bitmap_clear(cpu_index_map, cpu->cpu_index, 1);
    cpu->cpu_index = -1;
}
#else

static int cpu_get_free_index(Error **errp)
{
    CPUState *some_cpu;
    int cpu_index = 0;

    CPU_FOREACH(some_cpu) {
        cpu_index++;
    }
    return cpu_index;
}

void cpu_exec_exit(CPUState *cpu)
{
}
#endif

619
void cpu_exec_init(CPUState *cpu, Error **errp)
B
bellard 已提交
620
{
621
    CPUClass *cc = CPU_GET_CLASS(cpu);
622
    int cpu_index;
623
    Error *local_err = NULL;
624

625
    cpu->as = NULL;
626
    cpu->num_ases = 0;
627

628 629
#ifndef CONFIG_USER_ONLY
    cpu->thread_id = qemu_get_thread_id();
630 631 632 633 634 635 636 637 638 639 640 641 642 643

    /* This is a softmmu CPU object, so create a property for it
     * so users can wire up its memory. (This can't go in qom/cpu.c
     * because that file is compiled only once for both user-mode
     * and system builds.) The default if no link is set up is to use
     * the system address space.
     */
    object_property_add_link(OBJECT(cpu), "memory", TYPE_MEMORY_REGION,
                             (Object **)&cpu->memory,
                             qdev_prop_allow_set_link_before_realize,
                             OBJ_PROP_LINK_UNREF_ON_RELEASE,
                             &error_abort);
    cpu->memory = system_memory;
    object_ref(OBJECT(cpu->memory));
644 645
#endif

646 647 648
#if defined(CONFIG_USER_ONLY)
    cpu_list_lock();
#endif
649 650 651 652 653 654 655
    cpu_index = cpu->cpu_index = cpu_get_free_index(&local_err);
    if (local_err) {
        error_propagate(errp, local_err);
#if defined(CONFIG_USER_ONLY)
        cpu_list_unlock();
#endif
        return;
656
    }
A
Andreas Färber 已提交
657
    QTAILQ_INSERT_TAIL(&cpus, cpu, node);
658 659 660
#if defined(CONFIG_USER_ONLY)
    cpu_list_unlock();
#endif
661 662 663
    if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
        vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
    }
664 665 666
    if (cc->vmsd != NULL) {
        vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
    }
B
bellard 已提交
667 668
}

669
#if defined(CONFIG_USER_ONLY)
670
static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
671 672 673 674
{
    tb_invalidate_phys_page_range(pc, pc + 1, 0);
}
#else
675
static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
676
{
677 678 679
    MemTxAttrs attrs;
    hwaddr phys = cpu_get_phys_page_attrs_debug(cpu, pc, &attrs);
    int asidx = cpu_asidx_from_attrs(cpu, attrs);
680
    if (phys != -1) {
681
        tb_invalidate_phys_addr(cpu->cpu_ases[asidx].as,
682
                                phys | (pc & ~TARGET_PAGE_MASK));
683
    }
684
}
B
bellard 已提交
685
#endif
B
bellard 已提交
686

687
#if defined(CONFIG_USER_ONLY)
688
void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
689 690 691 692

{
}

693 694 695 696 697 698 699 700 701 702
int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
                          int flags)
{
    return -ENOSYS;
}

void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
{
}

703
int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
704 705 706 707 708
                          int flags, CPUWatchpoint **watchpoint)
{
    return -ENOSYS;
}
#else
709
/* Add a watchpoint.  */
710
int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
711
                          int flags, CPUWatchpoint **watchpoint)
712
{
713
    CPUWatchpoint *wp;
714

715
    /* forbid ranges which are empty or run off the end of the address space */
716
    if (len == 0 || (addr + len - 1) < addr) {
717 718
        error_report("tried to set invalid watchpoint at %"
                     VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
719 720
        return -EINVAL;
    }
721
    wp = g_malloc(sizeof(*wp));
722 723

    wp->vaddr = addr;
724
    wp->len = len;
725 726
    wp->flags = flags;

727
    /* keep all GDB-injected watchpoints in front */
728 729 730 731 732
    if (flags & BP_GDB) {
        QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
    } else {
        QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
    }
733

734
    tlb_flush_page(cpu, addr);
735 736 737 738

    if (watchpoint)
        *watchpoint = wp;
    return 0;
739 740
}

741
/* Remove a specific watchpoint.  */
742
int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
743
                          int flags)
744
{
745
    CPUWatchpoint *wp;
746

747
    QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
748
        if (addr == wp->vaddr && len == wp->len
749
                && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
750
            cpu_watchpoint_remove_by_ref(cpu, wp);
751 752 753
            return 0;
        }
    }
754
    return -ENOENT;
755 756
}

757
/* Remove a specific watchpoint by reference.  */
758
void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
759
{
760
    QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
761

762
    tlb_flush_page(cpu, watchpoint->vaddr);
763

764
    g_free(watchpoint);
765 766 767
}

/* Remove all matching watchpoints.  */
768
void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
769
{
770
    CPUWatchpoint *wp, *next;
771

772
    QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
773 774 775
        if (wp->flags & mask) {
            cpu_watchpoint_remove_by_ref(cpu, wp);
        }
776
    }
777
}
778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798

/* Return true if this watchpoint address matches the specified
 * access (ie the address range covered by the watchpoint overlaps
 * partially or completely with the address range covered by the
 * access).
 */
static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
                                                  vaddr addr,
                                                  vaddr len)
{
    /* We know the lengths are non-zero, but a little caution is
     * required to avoid errors in the case where the range ends
     * exactly at the top of the address space and so addr + len
     * wraps round to zero.
     */
    vaddr wpend = wp->vaddr + wp->len - 1;
    vaddr addrend = addr + len - 1;

    return !(addr > wpend || wp->vaddr > addrend);
}

799
#endif
800

801
/* Add a breakpoint.  */
802
int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
803
                          CPUBreakpoint **breakpoint)
B
bellard 已提交
804
{
805
    CPUBreakpoint *bp;
806

807
    bp = g_malloc(sizeof(*bp));
B
bellard 已提交
808

809 810 811
    bp->pc = pc;
    bp->flags = flags;

812
    /* keep all GDB-injected breakpoints in front */
813
    if (flags & BP_GDB) {
814
        QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
815
    } else {
816
        QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
817
    }
818

819
    breakpoint_invalidate(cpu, pc);
820

821
    if (breakpoint) {
822
        *breakpoint = bp;
823
    }
B
bellard 已提交
824 825 826
    return 0;
}

827
/* Remove a specific breakpoint.  */
828
int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
829 830 831
{
    CPUBreakpoint *bp;

832
    QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
833
        if (bp->pc == pc && bp->flags == flags) {
834
            cpu_breakpoint_remove_by_ref(cpu, bp);
835 836
            return 0;
        }
837
    }
838
    return -ENOENT;
839 840
}

841
/* Remove a specific breakpoint by reference.  */
842
void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
B
bellard 已提交
843
{
844 845 846
    QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);

    breakpoint_invalidate(cpu, breakpoint->pc);
847

848
    g_free(breakpoint);
849 850 851
}

/* Remove all matching breakpoints. */
852
void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
853
{
854
    CPUBreakpoint *bp, *next;
855

856
    QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
857 858 859
        if (bp->flags & mask) {
            cpu_breakpoint_remove_by_ref(cpu, bp);
        }
860
    }
B
bellard 已提交
861 862
}

B
bellard 已提交
863 864
/* enable or disable single step mode. EXCP_DEBUG is returned by the
   CPU loop after each instruction */
865
void cpu_single_step(CPUState *cpu, int enabled)
B
bellard 已提交
866
{
867 868 869
    if (cpu->singlestep_enabled != enabled) {
        cpu->singlestep_enabled = enabled;
        if (kvm_enabled()) {
870
            kvm_update_guest_debug(cpu, 0);
871
        } else {
S
Stuart Brady 已提交
872
            /* must flush all the translated code to avoid inconsistencies */
873
            /* XXX: only flush what is necessary */
874
            tb_flush(cpu);
875
        }
B
bellard 已提交
876 877 878
    }
}

879
void cpu_abort(CPUState *cpu, const char *fmt, ...)
B
bellard 已提交
880 881
{
    va_list ap;
P
pbrook 已提交
882
    va_list ap2;
B
bellard 已提交
883 884

    va_start(ap, fmt);
P
pbrook 已提交
885
    va_copy(ap2, ap);
B
bellard 已提交
886 887 888
    fprintf(stderr, "qemu: fatal: ");
    vfprintf(stderr, fmt, ap);
    fprintf(stderr, "\n");
889
    cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
890
    if (qemu_log_separate()) {
891 892 893
        qemu_log("qemu: fatal: ");
        qemu_log_vprintf(fmt, ap2);
        qemu_log("\n");
894
        log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
895
        qemu_log_flush();
896
        qemu_log_close();
897
    }
P
pbrook 已提交
898
    va_end(ap2);
899
    va_end(ap);
900
    replay_finish();
901 902 903 904 905 906 907 908
#if defined(CONFIG_USER_ONLY)
    {
        struct sigaction act;
        sigfillset(&act.sa_mask);
        act.sa_handler = SIG_DFL;
        sigaction(SIGABRT, &act, NULL);
    }
#endif
B
bellard 已提交
909 910 911
    abort();
}

912
#if !defined(CONFIG_USER_ONLY)
M
Mike Day 已提交
913
/* Called from RCU critical section */
P
Paolo Bonzini 已提交
914 915 916 917
static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
{
    RAMBlock *block;

P
Paolo Bonzini 已提交
918
    block = atomic_rcu_read(&ram_list.mru_block);
919
    if (block && addr - block->offset < block->max_length) {
920
        return block;
P
Paolo Bonzini 已提交
921
    }
M
Mike Day 已提交
922
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
923
        if (addr - block->offset < block->max_length) {
P
Paolo Bonzini 已提交
924 925 926 927 928 929 930 931
            goto found;
        }
    }

    fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
    abort();

found:
P
Paolo Bonzini 已提交
932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947
    /* It is safe to write mru_block outside the iothread lock.  This
     * is what happens:
     *
     *     mru_block = xxx
     *     rcu_read_unlock()
     *                                        xxx removed from list
     *                  rcu_read_lock()
     *                  read mru_block
     *                                        mru_block = NULL;
     *                                        call_rcu(reclaim_ramblock, xxx);
     *                  rcu_read_unlock()
     *
     * atomic_rcu_set is not needed here.  The block was already published
     * when it was placed into the list.  Here we're just making an extra
     * copy of the pointer.
     */
P
Paolo Bonzini 已提交
948 949 950 951
    ram_list.mru_block = block;
    return block;
}

952
static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
J
Juan Quintela 已提交
953
{
954
    CPUState *cpu;
P
Paolo Bonzini 已提交
955
    ram_addr_t start1;
956 957 958 959 960
    RAMBlock *block;
    ram_addr_t end;

    end = TARGET_PAGE_ALIGN(start + length);
    start &= TARGET_PAGE_MASK;
J
Juan Quintela 已提交
961

M
Mike Day 已提交
962
    rcu_read_lock();
P
Paolo Bonzini 已提交
963 964
    block = qemu_get_ram_block(start);
    assert(block == qemu_get_ram_block(end - 1));
965
    start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
966 967 968
    CPU_FOREACH(cpu) {
        tlb_reset_dirty(cpu, start1, length);
    }
M
Mike Day 已提交
969
    rcu_read_unlock();
J
Juan Quintela 已提交
970 971
}

P
pbrook 已提交
972
/* Note: start and end must be within the same ram block.  */
973 974 975
bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
                                              ram_addr_t length,
                                              unsigned client)
976
{
977
    DirtyMemoryBlocks *blocks;
978
    unsigned long end, page;
979
    bool dirty = false;
980 981 982 983

    if (length == 0) {
        return false;
    }
B
bellard 已提交
984

985 986
    end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
    page = start >> TARGET_PAGE_BITS;
987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002

    rcu_read_lock();

    blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);

    while (page < end) {
        unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
        unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
        unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset);

        dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx],
                                              offset, num);
        page += num;
    }

    rcu_read_unlock();
1003 1004

    if (dirty && tcg_enabled()) {
1005
        tlb_reset_dirty_range_all(start, length);
P
pbrook 已提交
1006
    }
1007 1008

    return dirty;
1009 1010
}

1011
/* Called from RCU critical section */
1012
hwaddr memory_region_section_get_iotlb(CPUState *cpu,
1013 1014 1015 1016 1017
                                       MemoryRegionSection *section,
                                       target_ulong vaddr,
                                       hwaddr paddr, hwaddr xlat,
                                       int prot,
                                       target_ulong *address)
B
Blue Swirl 已提交
1018
{
A
Avi Kivity 已提交
1019
    hwaddr iotlb;
B
Blue Swirl 已提交
1020 1021
    CPUWatchpoint *wp;

1022
    if (memory_region_is_ram(section->mr)) {
B
Blue Swirl 已提交
1023 1024
        /* Normal RAM.  */
        iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1025
            + xlat;
B
Blue Swirl 已提交
1026
        if (!section->readonly) {
1027
            iotlb |= PHYS_SECTION_NOTDIRTY;
B
Blue Swirl 已提交
1028
        } else {
1029
            iotlb |= PHYS_SECTION_ROM;
B
Blue Swirl 已提交
1030 1031
        }
    } else {
1032 1033 1034 1035
        AddressSpaceDispatch *d;

        d = atomic_rcu_read(&section->address_space->dispatch);
        iotlb = section - d->map.sections;
1036
        iotlb += xlat;
B
Blue Swirl 已提交
1037 1038 1039 1040
    }

    /* Make accesses to pages with watchpoints go via the
       watchpoint trap routines.  */
1041
    QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1042
        if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
B
Blue Swirl 已提交
1043 1044
            /* Avoid trapping reads of pages with a write breakpoint. */
            if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1045
                iotlb = PHYS_SECTION_WATCH + paddr;
B
Blue Swirl 已提交
1046 1047 1048 1049 1050 1051 1052 1053
                *address |= TLB_MMIO;
                break;
            }
        }
    }

    return iotlb;
}
1054 1055
#endif /* defined(CONFIG_USER_ONLY) */

1056
#if !defined(CONFIG_USER_ONLY)
1057

A
Anthony Liguori 已提交
1058
static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1059
                             uint16_t section);
1060
static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1061

1062 1063
static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
                               qemu_anon_ram_alloc;
1064 1065 1066 1067 1068 1069

/*
 * Set a custom physical guest memory alloator.
 * Accelerators with unusual needs may need this.  Hopefully, we can
 * get rid of it eventually.
 */
1070
void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1071 1072 1073 1074
{
    phys_mem_alloc = alloc;
}

1075 1076
static uint16_t phys_section_add(PhysPageMap *map,
                                 MemoryRegionSection *section)
1077
{
1078 1079 1080 1081
    /* The physical section number is ORed with a page-aligned
     * pointer to produce the iotlb entries.  Thus it should
     * never overflow into the page-aligned value.
     */
1082
    assert(map->sections_nb < TARGET_PAGE_SIZE);
1083

1084 1085 1086 1087
    if (map->sections_nb == map->sections_nb_alloc) {
        map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
        map->sections = g_renew(MemoryRegionSection, map->sections,
                                map->sections_nb_alloc);
1088
    }
1089
    map->sections[map->sections_nb] = *section;
P
Paolo Bonzini 已提交
1090
    memory_region_ref(section->mr);
1091
    return map->sections_nb++;
1092 1093
}

1094 1095
static void phys_section_destroy(MemoryRegion *mr)
{
D
Don Slutz 已提交
1096 1097
    bool have_sub_page = mr->subpage;

P
Paolo Bonzini 已提交
1098 1099
    memory_region_unref(mr);

D
Don Slutz 已提交
1100
    if (have_sub_page) {
1101
        subpage_t *subpage = container_of(mr, subpage_t, iomem);
P
Peter Crosthwaite 已提交
1102
        object_unref(OBJECT(&subpage->iomem));
1103 1104 1105 1106
        g_free(subpage);
    }
}

P
Paolo Bonzini 已提交
1107
static void phys_sections_free(PhysPageMap *map)
1108
{
1109 1110
    while (map->sections_nb > 0) {
        MemoryRegionSection *section = &map->sections[--map->sections_nb];
1111 1112
        phys_section_destroy(section->mr);
    }
1113 1114
    g_free(map->sections);
    g_free(map->nodes);
1115 1116
}

A
Avi Kivity 已提交
1117
static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1118 1119
{
    subpage_t *subpage;
A
Avi Kivity 已提交
1120
    hwaddr base = section->offset_within_address_space
1121
        & TARGET_PAGE_MASK;
1122
    MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1123
                                                   d->map.nodes, d->map.sections);
1124 1125
    MemoryRegionSection subsection = {
        .offset_within_address_space = base,
1126
        .size = int128_make64(TARGET_PAGE_SIZE),
1127
    };
A
Avi Kivity 已提交
1128
    hwaddr start, end;
1129

1130
    assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1131

1132
    if (!(existing->mr->subpage)) {
1133
        subpage = subpage_init(d->as, base);
1134
        subsection.address_space = d->as;
1135
        subsection.mr = &subpage->iomem;
A
Avi Kivity 已提交
1136
        phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1137
                      phys_section_add(&d->map, &subsection));
1138
    } else {
1139
        subpage = container_of(existing->mr, subpage_t, iomem);
1140 1141
    }
    start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1142
    end = start + int128_get64(section->size) - 1;
1143 1144
    subpage_register(subpage, start, end,
                     phys_section_add(&d->map, section));
1145 1146 1147
}


1148 1149
static void register_multipage(AddressSpaceDispatch *d,
                               MemoryRegionSection *section)
1150
{
A
Avi Kivity 已提交
1151
    hwaddr start_addr = section->offset_within_address_space;
1152
    uint16_t section_index = phys_section_add(&d->map, section);
1153 1154
    uint64_t num_pages = int128_get64(int128_rshift(section->size,
                                                    TARGET_PAGE_BITS));
1155

1156 1157
    assert(num_pages);
    phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1158 1159
}

A
Avi Kivity 已提交
1160
static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1161
{
1162
    AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1163
    AddressSpaceDispatch *d = as->next_dispatch;
1164
    MemoryRegionSection now = *section, remain = *section;
1165
    Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1166

1167 1168 1169 1170
    if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
        uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
                       - now.offset_within_address_space;

1171
        now.size = int128_min(int128_make64(left), now.size);
A
Avi Kivity 已提交
1172
        register_subpage(d, &now);
1173
    } else {
1174
        now.size = int128_zero();
1175
    }
1176 1177 1178 1179
    while (int128_ne(remain.size, now.size)) {
        remain.size = int128_sub(remain.size, now.size);
        remain.offset_within_address_space += int128_get64(now.size);
        remain.offset_within_region += int128_get64(now.size);
1180
        now = remain;
1181
        if (int128_lt(remain.size, page_size)) {
1182
            register_subpage(d, &now);
1183
        } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1184
            now.size = page_size;
A
Avi Kivity 已提交
1185
            register_subpage(d, &now);
1186
        } else {
1187
            now.size = int128_and(now.size, int128_neg(page_size));
A
Avi Kivity 已提交
1188
            register_multipage(d, &now);
1189
        }
1190 1191 1192
    }
}

1193 1194 1195 1196 1197 1198
void qemu_flush_coalesced_mmio_buffer(void)
{
    if (kvm_enabled())
        kvm_flush_coalesced_mmio_buffer();
}

1199 1200 1201 1202 1203 1204 1205 1206 1207 1208
void qemu_mutex_lock_ramlist(void)
{
    qemu_mutex_lock(&ram_list.mutex);
}

void qemu_mutex_unlock_ramlist(void)
{
    qemu_mutex_unlock(&ram_list.mutex);
}

1209
#ifdef __linux__
1210 1211 1212 1213 1214

#include <sys/vfs.h>

#define HUGETLBFS_MAGIC       0x958458f6

1215
static long gethugepagesize(const char *path, Error **errp)
1216 1217 1218 1219 1220
{
    struct statfs fs;
    int ret;

    do {
Y
Yoshiaki Tamura 已提交
1221
        ret = statfs(path, &fs);
1222 1223 1224
    } while (ret != 0 && errno == EINTR);

    if (ret != 0) {
1225 1226
        error_setg_errno(errp, errno, "failed to get page size of file %s",
                         path);
Y
Yoshiaki Tamura 已提交
1227
        return 0;
1228 1229 1230 1231 1232
    }

    return fs.f_bsize;
}

A
Alex Williamson 已提交
1233 1234
static void *file_ram_alloc(RAMBlock *block,
                            ram_addr_t memory,
1235 1236
                            const char *path,
                            Error **errp)
1237
{
1238
    struct stat st;
1239
    char *filename;
1240 1241
    char *sanitized_name;
    char *c;
1242
    void *area;
1243
    int fd;
1244
    uint64_t hpagesize;
1245
    Error *local_err = NULL;
1246

1247 1248 1249
    hpagesize = gethugepagesize(path, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
1250
        goto error;
1251
    }
1252
    block->mr->align = hpagesize;
1253 1254

    if (memory < hpagesize) {
1255 1256 1257 1258
        error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
                   "or larger than huge page size 0x%" PRIx64,
                   memory, hpagesize);
        goto error;
1259 1260 1261
    }

    if (kvm_enabled() && !kvm_has_sync_mmu()) {
1262 1263
        error_setg(errp,
                   "host lacks kvm mmu notifiers, -mem-path unsupported");
1264
        goto error;
1265 1266
    }

1267 1268 1269 1270 1271 1272 1273 1274
    if (!stat(path, &st) && S_ISDIR(st.st_mode)) {
        /* Make name safe to use with mkstemp by replacing '/' with '_'. */
        sanitized_name = g_strdup(memory_region_name(block->mr));
        for (c = sanitized_name; *c != '\0'; c++) {
            if (*c == '/') {
                *c = '_';
            }
        }
1275

1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287
        filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
                                   sanitized_name);
        g_free(sanitized_name);

        fd = mkstemp(filename);
        if (fd >= 0) {
            unlink(filename);
        }
        g_free(filename);
    } else {
        fd = open(path, O_RDWR | O_CREAT, 0644);
    }
1288 1289

    if (fd < 0) {
1290 1291
        error_setg_errno(errp, errno,
                         "unable to create backing store for hugepages");
1292
        goto error;
1293 1294
    }

1295
    memory = ROUND_UP(memory, hpagesize);
1296 1297 1298 1299 1300 1301 1302

    /*
     * ftruncate is not supported by hugetlbfs in older
     * hosts, so don't bother bailing out on errors.
     * If anything goes wrong with it under other filesystems,
     * mmap will fail.
     */
1303
    if (ftruncate(fd, memory)) {
Y
Yoshiaki Tamura 已提交
1304
        perror("ftruncate");
1305
    }
1306

1307
    area = qemu_ram_mmap(fd, memory, hpagesize, block->flags & RAM_SHARED);
1308
    if (area == MAP_FAILED) {
1309 1310
        error_setg_errno(errp, errno,
                         "unable to map backing store for hugepages");
Y
Yoshiaki Tamura 已提交
1311
        close(fd);
1312
        goto error;
1313
    }
1314 1315

    if (mem_prealloc) {
1316
        os_mem_prealloc(fd, area, memory);
1317 1318
    }

A
Alex Williamson 已提交
1319
    block->fd = fd;
1320
    return area;
1321 1322 1323

error:
    return NULL;
1324 1325 1326
}
#endif

M
Mike Day 已提交
1327
/* Called with the ramlist lock held.  */
1328
static ram_addr_t find_ram_offset(ram_addr_t size)
A
Alex Williamson 已提交
1329 1330
{
    RAMBlock *block, *next_block;
A
Alex Williamson 已提交
1331
    ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
A
Alex Williamson 已提交
1332

1333 1334
    assert(size != 0); /* it would hand out same offset multiple times */

M
Mike Day 已提交
1335
    if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
A
Alex Williamson 已提交
1336
        return 0;
M
Mike Day 已提交
1337
    }
A
Alex Williamson 已提交
1338

M
Mike Day 已提交
1339
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1340
        ram_addr_t end, next = RAM_ADDR_MAX;
A
Alex Williamson 已提交
1341

1342
        end = block->offset + block->max_length;
A
Alex Williamson 已提交
1343

M
Mike Day 已提交
1344
        QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
A
Alex Williamson 已提交
1345 1346 1347 1348 1349
            if (next_block->offset >= end) {
                next = MIN(next, next_block->offset);
            }
        }
        if (next - end >= size && next - end < mingap) {
A
Alex Williamson 已提交
1350
            offset = end;
A
Alex Williamson 已提交
1351 1352 1353
            mingap = next - end;
        }
    }
A
Alex Williamson 已提交
1354 1355 1356 1357 1358 1359 1360

    if (offset == RAM_ADDR_MAX) {
        fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
                (uint64_t)size);
        abort();
    }

A
Alex Williamson 已提交
1361 1362 1363
    return offset;
}

J
Juan Quintela 已提交
1364
ram_addr_t last_ram_offset(void)
1365 1366 1367 1368
{
    RAMBlock *block;
    ram_addr_t last = 0;

M
Mike Day 已提交
1369 1370
    rcu_read_lock();
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1371
        last = MAX(last, block->offset + block->max_length);
M
Mike Day 已提交
1372
    }
M
Mike Day 已提交
1373
    rcu_read_unlock();
1374 1375 1376
    return last;
}

1377 1378 1379 1380 1381
static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
{
    int ret;

    /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1382
    if (!machine_dump_guest_core(current_machine)) {
1383 1384 1385 1386 1387 1388 1389 1390 1391
        ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
        if (ret) {
            perror("qemu_madvise");
            fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
                            "but dump_guest_core=off specified\n");
        }
    }
}

M
Mike Day 已提交
1392 1393 1394
/* Called within an RCU critical section, or while the ramlist lock
 * is held.
 */
1395
static RAMBlock *find_ram_block(ram_addr_t addr)
1396
{
1397
    RAMBlock *block;
1398

M
Mike Day 已提交
1399
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1400
        if (block->offset == addr) {
1401
            return block;
1402 1403
        }
    }
1404 1405 1406 1407

    return NULL;
}

D
Dr. David Alan Gilbert 已提交
1408 1409 1410 1411 1412
const char *qemu_ram_get_idstr(RAMBlock *rb)
{
    return rb->idstr;
}

1413
/* Called with iothread lock held.  */
1414 1415
void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
{
1416
    RAMBlock *new_block, *block;
1417

M
Mike Day 已提交
1418
    rcu_read_lock();
1419
    new_block = find_ram_block(addr);
1420 1421
    assert(new_block);
    assert(!new_block->idstr[0]);
1422

1423 1424
    if (dev) {
        char *id = qdev_get_dev_path(dev);
1425 1426
        if (id) {
            snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1427
            g_free(id);
1428 1429 1430 1431
        }
    }
    pstrcat(new_block->idstr, sizeof(new_block->idstr), name);

M
Mike Day 已提交
1432
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1433
        if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1434 1435 1436 1437 1438
            fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
                    new_block->idstr);
            abort();
        }
    }
M
Mike Day 已提交
1439
    rcu_read_unlock();
1440 1441
}

1442
/* Called with iothread lock held.  */
1443 1444
void qemu_ram_unset_idstr(ram_addr_t addr)
{
1445
    RAMBlock *block;
1446

1447 1448 1449 1450 1451
    /* FIXME: arch_init.c assumes that this is not called throughout
     * migration.  Ignore the problem since hot-unplug during migration
     * does not work anyway.
     */

M
Mike Day 已提交
1452
    rcu_read_lock();
1453
    block = find_ram_block(addr);
1454 1455 1456
    if (block) {
        memset(block->idstr, 0, sizeof(block->idstr));
    }
M
Mike Day 已提交
1457
    rcu_read_unlock();
1458 1459
}

1460 1461
static int memory_try_enable_merging(void *addr, size_t len)
{
1462
    if (!machine_mem_merge(current_machine)) {
1463 1464 1465 1466 1467 1468 1469
        /* disabled by the user */
        return 0;
    }

    return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
}

1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482
/* Only legal before guest might have detected the memory size: e.g. on
 * incoming migration, or right after reset.
 *
 * As memory core doesn't know how is memory accessed, it is up to
 * resize callback to update device state and/or add assertions to detect
 * misuse, if necessary.
 */
int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
{
    RAMBlock *block = find_ram_block(base);

    assert(block);

1483
    newsize = HOST_PAGE_ALIGN(newsize);
1484

1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506
    if (block->used_length == newsize) {
        return 0;
    }

    if (!(block->flags & RAM_RESIZEABLE)) {
        error_setg_errno(errp, EINVAL,
                         "Length mismatch: %s: 0x" RAM_ADDR_FMT
                         " in != 0x" RAM_ADDR_FMT, block->idstr,
                         newsize, block->used_length);
        return -EINVAL;
    }

    if (block->max_length < newsize) {
        error_setg_errno(errp, EINVAL,
                         "Length too large: %s: 0x" RAM_ADDR_FMT
                         " > 0x" RAM_ADDR_FMT, block->idstr,
                         newsize, block->max_length);
        return -EINVAL;
    }

    cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
    block->used_length = newsize;
1507 1508
    cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
                                        DIRTY_CLIENTS_ALL);
1509 1510 1511 1512 1513 1514 1515
    memory_region_set_size(block->mr, newsize);
    if (block->resized) {
        block->resized(block->idstr, newsize, block->host);
    }
    return 0;
}

1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556
/* Called with ram_list.mutex held */
static void dirty_memory_extend(ram_addr_t old_ram_size,
                                ram_addr_t new_ram_size)
{
    ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size,
                                             DIRTY_MEMORY_BLOCK_SIZE);
    ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size,
                                             DIRTY_MEMORY_BLOCK_SIZE);
    int i;

    /* Only need to extend if block count increased */
    if (new_num_blocks <= old_num_blocks) {
        return;
    }

    for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
        DirtyMemoryBlocks *old_blocks;
        DirtyMemoryBlocks *new_blocks;
        int j;

        old_blocks = atomic_rcu_read(&ram_list.dirty_memory[i]);
        new_blocks = g_malloc(sizeof(*new_blocks) +
                              sizeof(new_blocks->blocks[0]) * new_num_blocks);

        if (old_num_blocks) {
            memcpy(new_blocks->blocks, old_blocks->blocks,
                   old_num_blocks * sizeof(old_blocks->blocks[0]));
        }

        for (j = old_num_blocks; j < new_num_blocks; j++) {
            new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
        }

        atomic_rcu_set(&ram_list.dirty_memory[i], new_blocks);

        if (old_blocks) {
            g_free_rcu(old_blocks, rcu);
        }
    }
}

1557
static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
1558
{
1559
    RAMBlock *block;
M
Mike Day 已提交
1560
    RAMBlock *last_block = NULL;
1561
    ram_addr_t old_ram_size, new_ram_size;
1562
    Error *err = NULL;
1563 1564

    old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1565

1566
    qemu_mutex_lock_ramlist();
1567
    new_block->offset = find_ram_offset(new_block->max_length);
1568 1569 1570

    if (!new_block->host) {
        if (xen_enabled()) {
1571
            xen_ram_alloc(new_block->offset, new_block->max_length,
1572 1573 1574 1575 1576 1577
                          new_block->mr, &err);
            if (err) {
                error_propagate(errp, err);
                qemu_mutex_unlock_ramlist();
                return -1;
            }
1578
        } else {
1579
            new_block->host = phys_mem_alloc(new_block->max_length,
1580
                                             &new_block->mr->align);
1581
            if (!new_block->host) {
1582 1583 1584 1585 1586
                error_setg_errno(errp, errno,
                                 "cannot set up guest memory '%s'",
                                 memory_region_name(new_block->mr));
                qemu_mutex_unlock_ramlist();
                return -1;
1587
            }
1588
            memory_try_enable_merging(new_block->host, new_block->max_length);
1589
        }
1590
    }
P
pbrook 已提交
1591

L
Li Zhijian 已提交
1592 1593 1594 1595
    new_ram_size = MAX(old_ram_size,
              (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
    if (new_ram_size > old_ram_size) {
        migration_bitmap_extend(old_ram_size, new_ram_size);
1596
        dirty_memory_extend(old_ram_size, new_ram_size);
L
Li Zhijian 已提交
1597
    }
M
Mike Day 已提交
1598 1599 1600 1601
    /* Keep the list sorted from biggest to smallest block.  Unlike QTAILQ,
     * QLIST (which has an RCU-friendly variant) does not have insertion at
     * tail, so save the last element in last_block.
     */
M
Mike Day 已提交
1602
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
M
Mike Day 已提交
1603
        last_block = block;
1604
        if (block->max_length < new_block->max_length) {
1605 1606 1607 1608
            break;
        }
    }
    if (block) {
M
Mike Day 已提交
1609
        QLIST_INSERT_BEFORE_RCU(block, new_block, next);
M
Mike Day 已提交
1610
    } else if (last_block) {
M
Mike Day 已提交
1611
        QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
M
Mike Day 已提交
1612
    } else { /* list is empty */
M
Mike Day 已提交
1613
        QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1614
    }
1615
    ram_list.mru_block = NULL;
P
pbrook 已提交
1616

M
Mike Day 已提交
1617 1618
    /* Write list before version */
    smp_wmb();
U
Umesh Deshpande 已提交
1619
    ram_list.version++;
1620
    qemu_mutex_unlock_ramlist();
U
Umesh Deshpande 已提交
1621

1622
    cpu_physical_memory_set_dirty_range(new_block->offset,
1623 1624
                                        new_block->used_length,
                                        DIRTY_CLIENTS_ALL);
P
pbrook 已提交
1625

1626 1627 1628 1629 1630 1631 1632
    if (new_block->host) {
        qemu_ram_setup_dump(new_block->host, new_block->max_length);
        qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
        qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
        if (kvm_enabled()) {
            kvm_setup_guest_memory(new_block->host, new_block->max_length);
        }
1633
    }
1634

P
pbrook 已提交
1635 1636
    return new_block->offset;
}
B
bellard 已提交
1637

1638
#ifdef __linux__
1639
ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1640
                                    bool share, const char *mem_path,
1641
                                    Error **errp)
1642 1643
{
    RAMBlock *new_block;
1644 1645
    ram_addr_t addr;
    Error *local_err = NULL;
1646 1647

    if (xen_enabled()) {
1648 1649
        error_setg(errp, "-mem-path not supported with Xen");
        return -1;
1650 1651 1652 1653 1654 1655 1656 1657
    }

    if (phys_mem_alloc != qemu_anon_ram_alloc) {
        /*
         * file_ram_alloc() needs to allocate just like
         * phys_mem_alloc, but we haven't bothered to provide
         * a hook there.
         */
1658 1659 1660
        error_setg(errp,
                   "-mem-path not supported with this accelerator");
        return -1;
1661 1662
    }

1663
    size = HOST_PAGE_ALIGN(size);
1664 1665
    new_block = g_malloc0(sizeof(*new_block));
    new_block->mr = mr;
1666 1667
    new_block->used_length = size;
    new_block->max_length = size;
1668
    new_block->flags = share ? RAM_SHARED : 0;
1669 1670 1671 1672 1673 1674 1675
    new_block->host = file_ram_alloc(new_block, size,
                                     mem_path, errp);
    if (!new_block->host) {
        g_free(new_block);
        return -1;
    }

1676 1677 1678 1679 1680 1681 1682
    addr = ram_block_add(new_block, &local_err);
    if (local_err) {
        g_free(new_block);
        error_propagate(errp, local_err);
        return -1;
    }
    return addr;
1683
}
1684
#endif
1685

1686 1687 1688 1689 1690 1691
static
ram_addr_t qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
                                   void (*resized)(const char*,
                                                   uint64_t length,
                                                   void *host),
                                   void *host, bool resizeable,
1692
                                   MemoryRegion *mr, Error **errp)
1693 1694
{
    RAMBlock *new_block;
1695 1696
    ram_addr_t addr;
    Error *local_err = NULL;
1697

1698 1699
    size = HOST_PAGE_ALIGN(size);
    max_size = HOST_PAGE_ALIGN(max_size);
1700 1701
    new_block = g_malloc0(sizeof(*new_block));
    new_block->mr = mr;
1702
    new_block->resized = resized;
1703 1704
    new_block->used_length = size;
    new_block->max_length = max_size;
1705
    assert(max_size >= size);
1706 1707 1708
    new_block->fd = -1;
    new_block->host = host;
    if (host) {
1709
        new_block->flags |= RAM_PREALLOC;
1710
    }
1711 1712 1713
    if (resizeable) {
        new_block->flags |= RAM_RESIZEABLE;
    }
1714 1715 1716 1717 1718 1719
    addr = ram_block_add(new_block, &local_err);
    if (local_err) {
        g_free(new_block);
        error_propagate(errp, local_err);
        return -1;
    }
1720 1721

    mr->ram_block = new_block;
1722
    return addr;
1723 1724
}

1725 1726 1727 1728 1729 1730
ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
                                   MemoryRegion *mr, Error **errp)
{
    return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
}

1731
ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1732
{
1733 1734 1735 1736 1737 1738 1739 1740 1741 1742
    return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
}

ram_addr_t qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
                                     void (*resized)(const char*,
                                                     uint64_t length,
                                                     void *host),
                                     MemoryRegion *mr, Error **errp)
{
    return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1743 1744
}

P
Paolo Bonzini 已提交
1745 1746 1747 1748 1749 1750 1751 1752
static void reclaim_ramblock(RAMBlock *block)
{
    if (block->flags & RAM_PREALLOC) {
        ;
    } else if (xen_enabled()) {
        xen_invalidate_map_cache_entry(block->host);
#ifndef _WIN32
    } else if (block->fd >= 0) {
1753
        qemu_ram_munmap(block->host, block->max_length);
P
Paolo Bonzini 已提交
1754 1755 1756 1757 1758 1759 1760 1761
        close(block->fd);
#endif
    } else {
        qemu_anon_ram_free(block->host, block->max_length);
    }
    g_free(block);
}

A
Anthony Liguori 已提交
1762
void qemu_ram_free(ram_addr_t addr)
B
bellard 已提交
1763
{
A
Alex Williamson 已提交
1764 1765
    RAMBlock *block;

1766
    qemu_mutex_lock_ramlist();
M
Mike Day 已提交
1767
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
A
Alex Williamson 已提交
1768
        if (addr == block->offset) {
M
Mike Day 已提交
1769
            QLIST_REMOVE_RCU(block, next);
1770
            ram_list.mru_block = NULL;
M
Mike Day 已提交
1771 1772
            /* Write list before version */
            smp_wmb();
U
Umesh Deshpande 已提交
1773
            ram_list.version++;
P
Paolo Bonzini 已提交
1774
            call_rcu(block, reclaim_ramblock, rcu);
1775
            break;
A
Alex Williamson 已提交
1776 1777
        }
    }
1778
    qemu_mutex_unlock_ramlist();
B
bellard 已提交
1779 1780
}

H
Huang Ying 已提交
1781 1782 1783 1784 1785 1786 1787 1788
#ifndef _WIN32
void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
{
    RAMBlock *block;
    ram_addr_t offset;
    int flags;
    void *area, *vaddr;

M
Mike Day 已提交
1789
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
H
Huang Ying 已提交
1790
        offset = addr - block->offset;
1791
        if (offset < block->max_length) {
1792
            vaddr = ramblock_ptr(block, offset);
1793
            if (block->flags & RAM_PREALLOC) {
H
Huang Ying 已提交
1794
                ;
1795 1796
            } else if (xen_enabled()) {
                abort();
H
Huang Ying 已提交
1797 1798
            } else {
                flags = MAP_FIXED;
1799
                if (block->fd >= 0) {
1800 1801
                    flags |= (block->flags & RAM_SHARED ?
                              MAP_SHARED : MAP_PRIVATE);
1802 1803
                    area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
                                flags, block->fd, offset);
H
Huang Ying 已提交
1804
                } else {
1805 1806 1807 1808 1809 1810 1811
                    /*
                     * Remap needs to match alloc.  Accelerators that
                     * set phys_mem_alloc never remap.  If they did,
                     * we'd need a remap hook here.
                     */
                    assert(phys_mem_alloc == qemu_anon_ram_alloc);

H
Huang Ying 已提交
1812 1813 1814 1815 1816
                    flags |= MAP_PRIVATE | MAP_ANONYMOUS;
                    area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
                                flags, -1, 0);
                }
                if (area != vaddr) {
1817 1818
                    fprintf(stderr, "Could not remap addr: "
                            RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
H
Huang Ying 已提交
1819 1820 1821
                            length, addr);
                    exit(1);
                }
1822
                memory_try_enable_merging(vaddr, length);
1823
                qemu_ram_setup_dump(vaddr, length);
H
Huang Ying 已提交
1824 1825 1826 1827 1828 1829
            }
        }
    }
}
#endif /* !_WIN32 */

1830 1831
int qemu_get_ram_fd(ram_addr_t addr)
{
1832 1833
    RAMBlock *block;
    int fd;
1834

M
Mike Day 已提交
1835
    rcu_read_lock();
1836 1837
    block = qemu_get_ram_block(addr);
    fd = block->fd;
M
Mike Day 已提交
1838
    rcu_read_unlock();
1839
    return fd;
1840 1841
}

1842 1843 1844 1845 1846 1847 1848 1849 1850 1851
void qemu_set_ram_fd(ram_addr_t addr, int fd)
{
    RAMBlock *block;

    rcu_read_lock();
    block = qemu_get_ram_block(addr);
    block->fd = fd;
    rcu_read_unlock();
}

1852 1853
void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
{
1854 1855
    RAMBlock *block;
    void *ptr;
1856

M
Mike Day 已提交
1857
    rcu_read_lock();
1858 1859
    block = qemu_get_ram_block(addr);
    ptr = ramblock_ptr(block, 0);
M
Mike Day 已提交
1860
    rcu_read_unlock();
1861
    return ptr;
1862 1863
}

1864
/* Return a host pointer to ram allocated with qemu_ram_alloc.
1865 1866 1867
 * This should not be used for general purpose DMA.  Use address_space_map
 * or address_space_rw instead. For local memory (e.g. video ram) that the
 * device owns, use memory_region_get_ram_ptr.
M
Mike Day 已提交
1868
 *
1869
 * Called within RCU critical section.
1870 1871 1872
 */
void *qemu_get_ram_ptr(ram_addr_t addr)
{
1873
    RAMBlock *block = qemu_get_ram_block(addr);
1874 1875

    if (xen_enabled() && block->host == NULL) {
1876 1877 1878 1879 1880
        /* We need to check if the requested address is in the RAM
         * because we don't want to map the entire memory in QEMU.
         * In that case just map until the end of the page.
         */
        if (block->offset == 0) {
1881
            return xen_map_cache(addr, 0, 0);
1882
        }
1883 1884

        block->host = xen_map_cache(block->offset, block->max_length, 1);
1885
    }
1886
    return ramblock_ptr(block, addr - block->offset);
1887 1888
}

1889
/* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1890
 * but takes a size argument.
M
Mike Day 已提交
1891
 *
1892
 * Called within RCU critical section.
1893
 */
1894
static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1895
{
1896 1897
    RAMBlock *block;
    ram_addr_t offset_inside_block;
1898 1899 1900
    if (*size == 0) {
        return NULL;
    }
1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912

    block = qemu_get_ram_block(addr);
    offset_inside_block = addr - block->offset;
    *size = MIN(*size, block->max_length - offset_inside_block);

    if (xen_enabled() && block->host == NULL) {
        /* We need to check if the requested address is in the RAM
         * because we don't want to map the entire memory in QEMU.
         * In that case just map the requested area.
         */
        if (block->offset == 0) {
            return xen_map_cache(addr, *size, 1);
1913 1914
        }

1915
        block->host = xen_map_cache(block->offset, block->max_length, 1);
1916
    }
1917 1918

    return ramblock_ptr(block, offset_inside_block);
1919 1920
}

D
Dr. David Alan Gilbert 已提交
1921 1922 1923 1924 1925 1926 1927 1928 1929 1930
/*
 * Translates a host ptr back to a RAMBlock, a ram_addr and an offset
 * in that RAMBlock.
 *
 * ptr: Host pointer to look up
 * round_offset: If true round the result offset down to a page boundary
 * *ram_addr: set to result ram_addr
 * *offset: set to result offset within the RAMBlock
 *
 * Returns: RAMBlock (or NULL if not found)
1931 1932 1933 1934 1935 1936 1937
 *
 * By the time this function returns, the returned pointer is not protected
 * by RCU anymore.  If the caller is not within an RCU critical section and
 * does not hold the iothread lock, it must have other means of protecting the
 * pointer, such as a reference to the region that includes the incoming
 * ram_addr_t.
 */
D
Dr. David Alan Gilbert 已提交
1938 1939 1940
RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
                                   ram_addr_t *ram_addr,
                                   ram_addr_t *offset)
P
pbrook 已提交
1941
{
P
pbrook 已提交
1942 1943 1944
    RAMBlock *block;
    uint8_t *host = ptr;

1945
    if (xen_enabled()) {
M
Mike Day 已提交
1946
        rcu_read_lock();
J
Jan Kiszka 已提交
1947
        *ram_addr = xen_ram_addr_from_mapcache(ptr);
D
Dr. David Alan Gilbert 已提交
1948 1949 1950 1951
        block = qemu_get_ram_block(*ram_addr);
        if (block) {
            *offset = (host - block->host);
        }
M
Mike Day 已提交
1952
        rcu_read_unlock();
D
Dr. David Alan Gilbert 已提交
1953
        return block;
1954 1955
    }

M
Mike Day 已提交
1956 1957
    rcu_read_lock();
    block = atomic_rcu_read(&ram_list.mru_block);
1958
    if (block && block->host && host - block->host < block->max_length) {
1959 1960 1961
        goto found;
    }

M
Mike Day 已提交
1962
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
J
Jun Nakajima 已提交
1963 1964 1965 1966
        /* This case append when the block is not mapped. */
        if (block->host == NULL) {
            continue;
        }
1967
        if (host - block->host < block->max_length) {
1968
            goto found;
A
Alex Williamson 已提交
1969
        }
P
pbrook 已提交
1970
    }
J
Jun Nakajima 已提交
1971

M
Mike Day 已提交
1972
    rcu_read_unlock();
1973
    return NULL;
1974 1975

found:
D
Dr. David Alan Gilbert 已提交
1976 1977 1978 1979 1980
    *offset = (host - block->host);
    if (round_offset) {
        *offset &= TARGET_PAGE_MASK;
    }
    *ram_addr = block->offset + *offset;
M
Mike Day 已提交
1981
    rcu_read_unlock();
D
Dr. David Alan Gilbert 已提交
1982 1983 1984
    return block;
}

D
Dr. David Alan Gilbert 已提交
1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004
/*
 * Finds the named RAMBlock
 *
 * name: The name of RAMBlock to find
 *
 * Returns: RAMBlock (or NULL if not found)
 */
RAMBlock *qemu_ram_block_by_name(const char *name)
{
    RAMBlock *block;

    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
        if (!strcmp(name, block->idstr)) {
            return block;
        }
    }

    return NULL;
}

D
Dr. David Alan Gilbert 已提交
2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018
/* Some of the softmmu routines need to translate from a host pointer
   (typically a TLB entry) back to a ram offset.  */
MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
{
    RAMBlock *block;
    ram_addr_t offset; /* Not used */

    block = qemu_ram_block_from_host(ptr, false, ram_addr, &offset);

    if (!block) {
        return NULL;
    }

    return block->mr;
M
Marcelo Tosatti 已提交
2019
}
A
Alex Williamson 已提交
2020

2021
/* Called within RCU critical section.  */
A
Avi Kivity 已提交
2022
static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
2023
                               uint64_t val, unsigned size)
2024
{
2025
    if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
2026
        tb_invalidate_phys_page_fast(ram_addr, size);
2027
    }
2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039
    switch (size) {
    case 1:
        stb_p(qemu_get_ram_ptr(ram_addr), val);
        break;
    case 2:
        stw_p(qemu_get_ram_ptr(ram_addr), val);
        break;
    case 4:
        stl_p(qemu_get_ram_ptr(ram_addr), val);
        break;
    default:
        abort();
2040
    }
2041 2042 2043 2044 2045
    /* Set both VGA and migration bits for simplicity and to remove
     * the notdirty callback faster.
     */
    cpu_physical_memory_set_dirty_range(ram_addr, size,
                                        DIRTY_CLIENTS_NOCODE);
B
bellard 已提交
2046 2047
    /* we remove the notdirty callback only if the code has been
       flushed */
2048
    if (!cpu_physical_memory_is_clean(ram_addr)) {
2049
        tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
2050
    }
2051 2052
}

2053 2054 2055 2056 2057 2058
static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
                                 unsigned size, bool is_write)
{
    return is_write;
}

2059 2060
static const MemoryRegionOps notdirty_mem_ops = {
    .write = notdirty_mem_write,
2061
    .valid.accepts = notdirty_mem_accepts,
2062
    .endianness = DEVICE_NATIVE_ENDIAN,
2063 2064
};

P
pbrook 已提交
2065
/* Generate a debug exception if a watchpoint has been hit.  */
2066
static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
P
pbrook 已提交
2067
{
2068
    CPUState *cpu = current_cpu;
2069
    CPUClass *cc = CPU_GET_CLASS(cpu);
2070
    CPUArchState *env = cpu->env_ptr;
2071
    target_ulong pc, cs_base;
P
pbrook 已提交
2072
    target_ulong vaddr;
2073
    CPUWatchpoint *wp;
2074
    int cpu_flags;
P
pbrook 已提交
2075

2076
    if (cpu->watchpoint_hit) {
2077 2078 2079
        /* We re-entered the check after replacing the TB. Now raise
         * the debug interrupt so that is will trigger after the
         * current instruction. */
2080
        cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
2081 2082
        return;
    }
2083
    vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2084
    QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
2085 2086
        if (cpu_watchpoint_address_matches(wp, vaddr, len)
            && (wp->flags & flags)) {
2087 2088 2089 2090 2091 2092
            if (flags == BP_MEM_READ) {
                wp->flags |= BP_WATCHPOINT_HIT_READ;
            } else {
                wp->flags |= BP_WATCHPOINT_HIT_WRITE;
            }
            wp->hitaddr = vaddr;
2093
            wp->hitattrs = attrs;
2094
            if (!cpu->watchpoint_hit) {
2095 2096 2097 2098 2099
                if (wp->flags & BP_CPU &&
                    !cc->debug_check_watchpoint(cpu, wp)) {
                    wp->flags &= ~BP_WATCHPOINT_HIT;
                    continue;
                }
2100
                cpu->watchpoint_hit = wp;
2101
                tb_check_watchpoint(cpu);
2102
                if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2103
                    cpu->exception_index = EXCP_DEBUG;
2104
                    cpu_loop_exit(cpu);
2105 2106
                } else {
                    cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2107
                    tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
2108
                    cpu_resume_from_signal(cpu, NULL);
2109
                }
2110
            }
2111 2112
        } else {
            wp->flags &= ~BP_WATCHPOINT_HIT;
P
pbrook 已提交
2113 2114 2115 2116
        }
    }
}

2117 2118 2119
/* Watchpoint access routines.  Watchpoints are inserted using TLB tricks,
   so these check for a hit then pass through to the normal out-of-line
   phys routines.  */
2120 2121
static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
                                  unsigned size, MemTxAttrs attrs)
2122
{
2123 2124
    MemTxResult res;
    uint64_t data;
2125 2126
    int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
    AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2127 2128

    check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
2129
    switch (size) {
2130
    case 1:
2131
        data = address_space_ldub(as, addr, attrs, &res);
2132 2133
        break;
    case 2:
2134
        data = address_space_lduw(as, addr, attrs, &res);
2135 2136
        break;
    case 4:
2137
        data = address_space_ldl(as, addr, attrs, &res);
2138
        break;
2139 2140
    default: abort();
    }
2141 2142
    *pdata = data;
    return res;
2143 2144
}

2145 2146 2147
static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
                                   uint64_t val, unsigned size,
                                   MemTxAttrs attrs)
2148
{
2149
    MemTxResult res;
2150 2151
    int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
    AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2152 2153

    check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
2154
    switch (size) {
2155
    case 1:
2156
        address_space_stb(as, addr, val, attrs, &res);
2157 2158
        break;
    case 2:
2159
        address_space_stw(as, addr, val, attrs, &res);
2160 2161
        break;
    case 4:
2162
        address_space_stl(as, addr, val, attrs, &res);
2163
        break;
2164 2165
    default: abort();
    }
2166
    return res;
2167 2168
}

2169
static const MemoryRegionOps watch_mem_ops = {
2170 2171
    .read_with_attrs = watch_mem_read,
    .write_with_attrs = watch_mem_write,
2172
    .endianness = DEVICE_NATIVE_ENDIAN,
2173 2174
};

2175 2176
static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
                                unsigned len, MemTxAttrs attrs)
2177
{
2178
    subpage_t *subpage = opaque;
2179
    uint8_t buf[8];
2180
    MemTxResult res;
2181

2182
#if defined(DEBUG_SUBPAGE)
A
Amos Kong 已提交
2183
    printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2184
           subpage, len, addr);
2185
#endif
2186 2187 2188 2189
    res = address_space_read(subpage->as, addr + subpage->base,
                             attrs, buf, len);
    if (res) {
        return res;
2190
    }
2191 2192
    switch (len) {
    case 1:
2193 2194
        *data = ldub_p(buf);
        return MEMTX_OK;
2195
    case 2:
2196 2197
        *data = lduw_p(buf);
        return MEMTX_OK;
2198
    case 4:
2199 2200
        *data = ldl_p(buf);
        return MEMTX_OK;
2201
    case 8:
2202 2203
        *data = ldq_p(buf);
        return MEMTX_OK;
2204 2205 2206
    default:
        abort();
    }
2207 2208
}

2209 2210
static MemTxResult subpage_write(void *opaque, hwaddr addr,
                                 uint64_t value, unsigned len, MemTxAttrs attrs)
2211
{
2212
    subpage_t *subpage = opaque;
2213
    uint8_t buf[8];
2214

2215
#if defined(DEBUG_SUBPAGE)
A
Amos Kong 已提交
2216
    printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2217 2218
           " value %"PRIx64"\n",
           __func__, subpage, len, addr, value);
2219
#endif
2220 2221 2222 2223 2224 2225 2226 2227 2228 2229
    switch (len) {
    case 1:
        stb_p(buf, value);
        break;
    case 2:
        stw_p(buf, value);
        break;
    case 4:
        stl_p(buf, value);
        break;
2230 2231 2232
    case 8:
        stq_p(buf, value);
        break;
2233 2234 2235
    default:
        abort();
    }
2236 2237
    return address_space_write(subpage->as, addr + subpage->base,
                               attrs, buf, len);
2238 2239
}

2240
static bool subpage_accepts(void *opaque, hwaddr addr,
A
Amos Kong 已提交
2241
                            unsigned len, bool is_write)
2242
{
2243
    subpage_t *subpage = opaque;
2244
#if defined(DEBUG_SUBPAGE)
A
Amos Kong 已提交
2245
    printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2246
           __func__, subpage, is_write ? 'w' : 'r', len, addr);
2247 2248
#endif

2249
    return address_space_access_valid(subpage->as, addr + subpage->base,
A
Amos Kong 已提交
2250
                                      len, is_write);
2251 2252
}

2253
static const MemoryRegionOps subpage_ops = {
2254 2255
    .read_with_attrs = subpage_read,
    .write_with_attrs = subpage_write,
2256 2257 2258 2259
    .impl.min_access_size = 1,
    .impl.max_access_size = 8,
    .valid.min_access_size = 1,
    .valid.max_access_size = 8,
2260
    .valid.accepts = subpage_accepts,
2261
    .endianness = DEVICE_NATIVE_ENDIAN,
2262 2263
};

A
Anthony Liguori 已提交
2264
static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2265
                             uint16_t section)
2266 2267 2268 2269 2270 2271 2272 2273
{
    int idx, eidx;

    if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
        return -1;
    idx = SUBPAGE_IDX(start);
    eidx = SUBPAGE_IDX(end);
#if defined(DEBUG_SUBPAGE)
A
Amos Kong 已提交
2274 2275
    printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
           __func__, mmio, start, end, idx, eidx, section);
2276 2277
#endif
    for (; idx <= eidx; idx++) {
2278
        mmio->sub_section[idx] = section;
2279 2280 2281 2282 2283
    }

    return 0;
}

2284
static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2285
{
A
Anthony Liguori 已提交
2286
    subpage_t *mmio;
2287

2288
    mmio = g_malloc0(sizeof(subpage_t));
2289

2290
    mmio->as = as;
2291
    mmio->base = base;
2292
    memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
P
Peter Crosthwaite 已提交
2293
                          NULL, TARGET_PAGE_SIZE);
A
Avi Kivity 已提交
2294
    mmio->iomem.subpage = true;
2295
#if defined(DEBUG_SUBPAGE)
A
Amos Kong 已提交
2296 2297
    printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
           mmio, base, TARGET_PAGE_SIZE);
2298
#endif
2299
    subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2300 2301 2302 2303

    return mmio;
}

2304 2305
static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
                              MemoryRegion *mr)
2306
{
2307
    assert(as);
2308
    MemoryRegionSection section = {
2309
        .address_space = as,
2310 2311 2312
        .mr = mr,
        .offset_within_address_space = 0,
        .offset_within_region = 0,
2313
        .size = int128_2_64(),
2314 2315
    };

2316
    return phys_section_add(map, &section);
2317 2318
}

2319
MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index, MemTxAttrs attrs)
2320
{
2321 2322
    int asidx = cpu_asidx_from_attrs(cpu, attrs);
    CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx];
2323
    AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
2324
    MemoryRegionSection *sections = d->map.sections;
P
Paolo Bonzini 已提交
2325 2326

    return sections[index & ~TARGET_PAGE_MASK].mr;
2327 2328
}

A
Avi Kivity 已提交
2329 2330
static void io_mem_init(void)
{
2331
    memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2332
    memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2333
                          NULL, UINT64_MAX);
2334
    memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2335
                          NULL, UINT64_MAX);
2336
    memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2337
                          NULL, UINT64_MAX);
A
Avi Kivity 已提交
2338 2339
}

A
Avi Kivity 已提交
2340
static void mem_begin(MemoryListener *listener)
2341 2342
{
    AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2343 2344 2345
    AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
    uint16_t n;

2346
    n = dummy_section(&d->map, as, &io_mem_unassigned);
2347
    assert(n == PHYS_SECTION_UNASSIGNED);
2348
    n = dummy_section(&d->map, as, &io_mem_notdirty);
2349
    assert(n == PHYS_SECTION_NOTDIRTY);
2350
    n = dummy_section(&d->map, as, &io_mem_rom);
2351
    assert(n == PHYS_SECTION_ROM);
2352
    n = dummy_section(&d->map, as, &io_mem_watch);
2353
    assert(n == PHYS_SECTION_WATCH);
2354

M
Michael S. Tsirkin 已提交
2355
    d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2356 2357 2358 2359
    d->as = as;
    as->next_dispatch = d;
}

2360 2361 2362 2363 2364 2365
static void address_space_dispatch_free(AddressSpaceDispatch *d)
{
    phys_sections_free(&d->map);
    g_free(d);
}

2366
static void mem_commit(MemoryListener *listener)
A
Avi Kivity 已提交
2367
{
2368
    AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2369 2370 2371
    AddressSpaceDispatch *cur = as->dispatch;
    AddressSpaceDispatch *next = as->next_dispatch;

2372
    phys_page_compact_all(next, next->map.nodes_nb);
2373

2374
    atomic_rcu_set(&as->dispatch, next);
2375
    if (cur) {
2376
        call_rcu(cur, address_space_dispatch_free, rcu);
2377
    }
2378 2379
}

2380
static void tcg_commit(MemoryListener *listener)
2381
{
2382 2383
    CPUAddressSpace *cpuas;
    AddressSpaceDispatch *d;
2384 2385 2386

    /* since each CPU stores ram addresses in its TLB cache, we must
       reset the modified entries */
2387 2388 2389 2390 2391 2392 2393 2394 2395
    cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
    cpu_reloading_memory_map();
    /* The CPU and TLB are protected by the iothread lock.
     * We reload the dispatch pointer now because cpu_reloading_memory_map()
     * may have split the RCU critical section.
     */
    d = atomic_rcu_read(&cpuas->as->dispatch);
    cpuas->memory_dispatch = d;
    tlb_flush(cpuas->cpu, 1);
2396 2397
}

A
Avi Kivity 已提交
2398 2399
void address_space_init_dispatch(AddressSpace *as)
{
2400
    as->dispatch = NULL;
2401
    as->dispatch_listener = (MemoryListener) {
A
Avi Kivity 已提交
2402
        .begin = mem_begin,
2403
        .commit = mem_commit,
A
Avi Kivity 已提交
2404 2405 2406 2407
        .region_add = mem_add,
        .region_nop = mem_add,
        .priority = 0,
    };
2408
    memory_listener_register(&as->dispatch_listener, as);
A
Avi Kivity 已提交
2409 2410
}

2411 2412 2413 2414 2415
void address_space_unregister(AddressSpace *as)
{
    memory_listener_unregister(&as->dispatch_listener);
}

A
Avi Kivity 已提交
2416 2417 2418 2419
void address_space_destroy_dispatch(AddressSpace *as)
{
    AddressSpaceDispatch *d = as->dispatch;

2420 2421 2422 2423
    atomic_rcu_set(&as->dispatch, NULL);
    if (d) {
        call_rcu(d, address_space_dispatch_free, rcu);
    }
A
Avi Kivity 已提交
2424 2425
}

A
Avi Kivity 已提交
2426 2427
static void memory_map_init(void)
{
2428
    system_memory = g_malloc(sizeof(*system_memory));
2429

2430
    memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2431
    address_space_init(&address_space_memory, system_memory, "memory");
2432

2433
    system_io = g_malloc(sizeof(*system_io));
2434 2435
    memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
                          65536);
2436
    address_space_init(&address_space_io, system_io, "I/O");
A
Avi Kivity 已提交
2437 2438 2439 2440 2441 2442 2443
}

MemoryRegion *get_system_memory(void)
{
    return system_memory;
}

2444 2445 2446 2447 2448
MemoryRegion *get_system_io(void)
{
    return system_io;
}

2449 2450
#endif /* !defined(CONFIG_USER_ONLY) */

B
bellard 已提交
2451 2452
/* physical memory access (slow version, mainly for debug) */
#if defined(CONFIG_USER_ONLY)
2453
int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
P
Paul Brook 已提交
2454
                        uint8_t *buf, int len, int is_write)
B
bellard 已提交
2455 2456 2457
{
    int l, flags;
    target_ulong page;
2458
    void * p;
B
bellard 已提交
2459 2460 2461 2462 2463 2464 2465 2466

    while (len > 0) {
        page = addr & TARGET_PAGE_MASK;
        l = (page + TARGET_PAGE_SIZE) - addr;
        if (l > len)
            l = len;
        flags = page_get_flags(page);
        if (!(flags & PAGE_VALID))
P
Paul Brook 已提交
2467
            return -1;
B
bellard 已提交
2468 2469
        if (is_write) {
            if (!(flags & PAGE_WRITE))
P
Paul Brook 已提交
2470
                return -1;
2471
            /* XXX: this code should not depend on lock_user */
A
aurel32 已提交
2472
            if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
P
Paul Brook 已提交
2473
                return -1;
A
aurel32 已提交
2474 2475
            memcpy(p, buf, l);
            unlock_user(p, addr, l);
B
bellard 已提交
2476 2477
        } else {
            if (!(flags & PAGE_READ))
P
Paul Brook 已提交
2478
                return -1;
2479
            /* XXX: this code should not depend on lock_user */
A
aurel32 已提交
2480
            if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
P
Paul Brook 已提交
2481
                return -1;
A
aurel32 已提交
2482
            memcpy(buf, p, l);
A
aurel32 已提交
2483
            unlock_user(p, addr, 0);
B
bellard 已提交
2484 2485 2486 2487 2488
        }
        len -= l;
        buf += l;
        addr += l;
    }
P
Paul Brook 已提交
2489
    return 0;
B
bellard 已提交
2490
}
B
bellard 已提交
2491

B
bellard 已提交
2492
#else
2493

2494
static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
A
Avi Kivity 已提交
2495
                                     hwaddr length)
2496
{
2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508
    uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
    /* No early return if dirty_log_mask is or becomes 0, because
     * cpu_physical_memory_set_dirty_range will still call
     * xen_modified_memory.
     */
    if (dirty_log_mask) {
        dirty_log_mask =
            cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
    }
    if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
        tb_invalidate_phys_range(addr, addr + length);
        dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2509
    }
2510
    cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2511 2512
}

2513
static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2514
{
2515
    unsigned access_size_max = mr->ops->valid.max_access_size;
2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528

    /* Regions are assumed to support 1-4 byte accesses unless
       otherwise specified.  */
    if (access_size_max == 0) {
        access_size_max = 4;
    }

    /* Bound the maximum access by the alignment of the address.  */
    if (!mr->ops->impl.unaligned) {
        unsigned align_size_max = addr & -addr;
        if (align_size_max != 0 && align_size_max < access_size_max) {
            access_size_max = align_size_max;
        }
2529
    }
2530 2531 2532 2533

    /* Don't attempt accesses larger than the maximum.  */
    if (l > access_size_max) {
        l = access_size_max;
2534
    }
2535
    l = pow2floor(l);
2536 2537

    return l;
2538 2539
}

2540
static bool prepare_mmio_access(MemoryRegion *mr)
2541
{
2542 2543 2544 2545 2546 2547 2548 2549
    bool unlocked = !qemu_mutex_iothread_locked();
    bool release_lock = false;

    if (unlocked && mr->global_locking) {
        qemu_mutex_lock_iothread();
        unlocked = false;
        release_lock = true;
    }
2550
    if (mr->flush_coalesced_mmio) {
2551 2552 2553
        if (unlocked) {
            qemu_mutex_lock_iothread();
        }
2554
        qemu_flush_coalesced_mmio_buffer();
2555 2556 2557
        if (unlocked) {
            qemu_mutex_unlock_iothread();
        }
2558
    }
2559 2560

    return release_lock;
2561 2562
}

2563 2564 2565 2566 2567 2568
/* Called within RCU critical section.  */
static MemTxResult address_space_write_continue(AddressSpace *as, hwaddr addr,
                                                MemTxAttrs attrs,
                                                const uint8_t *buf,
                                                int len, hwaddr addr1,
                                                hwaddr l, MemoryRegion *mr)
B
bellard 已提交
2569 2570
{
    uint8_t *ptr;
2571
    uint64_t val;
2572
    MemTxResult result = MEMTX_OK;
2573
    bool release_lock = false;
2574

2575
    for (;;) {
2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607
        if (!memory_access_is_direct(mr, true)) {
            release_lock |= prepare_mmio_access(mr);
            l = memory_access_size(mr, l, addr1);
            /* XXX: could force current_cpu to NULL to avoid
               potential bugs */
            switch (l) {
            case 8:
                /* 64 bit write access */
                val = ldq_p(buf);
                result |= memory_region_dispatch_write(mr, addr1, val, 8,
                                                       attrs);
                break;
            case 4:
                /* 32 bit write access */
                val = ldl_p(buf);
                result |= memory_region_dispatch_write(mr, addr1, val, 4,
                                                       attrs);
                break;
            case 2:
                /* 16 bit write access */
                val = lduw_p(buf);
                result |= memory_region_dispatch_write(mr, addr1, val, 2,
                                                       attrs);
                break;
            case 1:
                /* 8 bit write access */
                val = ldub_p(buf);
                result |= memory_region_dispatch_write(mr, addr1, val, 1,
                                                       attrs);
                break;
            default:
                abort();
B
bellard 已提交
2608 2609
            }
        } else {
2610 2611 2612 2613 2614
            addr1 += memory_region_get_ram_addr(mr);
            /* RAM case */
            ptr = qemu_get_ram_ptr(addr1);
            memcpy(ptr, buf, l);
            invalidate_and_set_dirty(mr, addr1, l);
B
bellard 已提交
2615
        }
2616 2617 2618 2619 2620 2621

        if (release_lock) {
            qemu_mutex_unlock_iothread();
            release_lock = false;
        }

B
bellard 已提交
2622 2623 2624
        len -= l;
        buf += l;
        addr += l;
2625 2626 2627 2628 2629 2630 2631

        if (!len) {
            break;
        }

        l = len;
        mr = address_space_translate(as, addr, &addr1, &l, true);
B
bellard 已提交
2632
    }
2633

2634
    return result;
B
bellard 已提交
2635
}
B
bellard 已提交
2636

2637 2638
MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
                                const uint8_t *buf, int len)
A
Avi Kivity 已提交
2639
{
2640 2641 2642 2643 2644
    hwaddr l;
    hwaddr addr1;
    MemoryRegion *mr;
    MemTxResult result = MEMTX_OK;

2645 2646
    if (len > 0) {
        rcu_read_lock();
2647
        l = len;
2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666
        mr = address_space_translate(as, addr, &addr1, &l, true);
        result = address_space_write_continue(as, addr, attrs, buf, len,
                                              addr1, l, mr);
        rcu_read_unlock();
    }

    return result;
}

/* Called within RCU critical section.  */
MemTxResult address_space_read_continue(AddressSpace *as, hwaddr addr,
                                        MemTxAttrs attrs, uint8_t *buf,
                                        int len, hwaddr addr1, hwaddr l,
                                        MemoryRegion *mr)
{
    uint8_t *ptr;
    uint64_t val;
    MemTxResult result = MEMTX_OK;
    bool release_lock = false;
2667

2668
    for (;;) {
2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714
        if (!memory_access_is_direct(mr, false)) {
            /* I/O case */
            release_lock |= prepare_mmio_access(mr);
            l = memory_access_size(mr, l, addr1);
            switch (l) {
            case 8:
                /* 64 bit read access */
                result |= memory_region_dispatch_read(mr, addr1, &val, 8,
                                                      attrs);
                stq_p(buf, val);
                break;
            case 4:
                /* 32 bit read access */
                result |= memory_region_dispatch_read(mr, addr1, &val, 4,
                                                      attrs);
                stl_p(buf, val);
                break;
            case 2:
                /* 16 bit read access */
                result |= memory_region_dispatch_read(mr, addr1, &val, 2,
                                                      attrs);
                stw_p(buf, val);
                break;
            case 1:
                /* 8 bit read access */
                result |= memory_region_dispatch_read(mr, addr1, &val, 1,
                                                      attrs);
                stb_p(buf, val);
                break;
            default:
                abort();
            }
        } else {
            /* RAM case */
            ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
            memcpy(buf, ptr, l);
        }

        if (release_lock) {
            qemu_mutex_unlock_iothread();
            release_lock = false;
        }

        len -= l;
        buf += l;
        addr += l;
2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726

        if (!len) {
            break;
        }

        l = len;
        mr = address_space_translate(as, addr, &addr1, &l, false);
    }

    return result;
}

2727 2728
MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
                                    MemTxAttrs attrs, uint8_t *buf, int len)
2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741
{
    hwaddr l;
    hwaddr addr1;
    MemoryRegion *mr;
    MemTxResult result = MEMTX_OK;

    if (len > 0) {
        rcu_read_lock();
        l = len;
        mr = address_space_translate(as, addr, &addr1, &l, false);
        result = address_space_read_continue(as, addr, attrs, buf, len,
                                             addr1, l, mr);
        rcu_read_unlock();
2742 2743 2744
    }

    return result;
A
Avi Kivity 已提交
2745 2746
}

2747 2748 2749 2750 2751 2752 2753 2754 2755
MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
                             uint8_t *buf, int len, bool is_write)
{
    if (is_write) {
        return address_space_write(as, addr, attrs, (uint8_t *)buf, len);
    } else {
        return address_space_read(as, addr, attrs, (uint8_t *)buf, len);
    }
}
A
Avi Kivity 已提交
2756

A
Avi Kivity 已提交
2757
void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
A
Avi Kivity 已提交
2758 2759
                            int len, int is_write)
{
2760 2761
    address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
                     buf, len, is_write);
A
Avi Kivity 已提交
2762 2763
}

2764 2765 2766 2767 2768
enum write_rom_type {
    WRITE_DATA,
    FLUSH_CACHE,
};

2769
static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2770
    hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
B
bellard 已提交
2771
{
2772
    hwaddr l;
B
bellard 已提交
2773
    uint8_t *ptr;
2774
    hwaddr addr1;
2775
    MemoryRegion *mr;
2776

2777
    rcu_read_lock();
B
bellard 已提交
2778
    while (len > 0) {
2779
        l = len;
2780
        mr = address_space_translate(as, addr, &addr1, &l, true);
2781

2782 2783
        if (!(memory_region_is_ram(mr) ||
              memory_region_is_romd(mr))) {
2784
            l = memory_access_size(mr, l, addr1);
B
bellard 已提交
2785
        } else {
2786
            addr1 += memory_region_get_ram_addr(mr);
B
bellard 已提交
2787
            /* ROM/RAM case */
P
pbrook 已提交
2788
            ptr = qemu_get_ram_ptr(addr1);
2789 2790 2791
            switch (type) {
            case WRITE_DATA:
                memcpy(ptr, buf, l);
2792
                invalidate_and_set_dirty(mr, addr1, l);
2793 2794 2795 2796 2797
                break;
            case FLUSH_CACHE:
                flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
                break;
            }
B
bellard 已提交
2798 2799 2800 2801 2802
        }
        len -= l;
        buf += l;
        addr += l;
    }
2803
    rcu_read_unlock();
B
bellard 已提交
2804 2805
}

2806
/* used for ROM loading : can write in RAM and ROM */
2807
void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2808 2809
                                   const uint8_t *buf, int len)
{
2810
    cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824
}

void cpu_flush_icache_range(hwaddr start, int len)
{
    /*
     * This function should do the same thing as an icache flush that was
     * triggered from within the guest. For TCG we are always cache coherent,
     * so there is no need to flush anything. For KVM / Xen we need to flush
     * the host's instruction cache at least.
     */
    if (tcg_enabled()) {
        return;
    }

2825 2826
    cpu_physical_memory_write_rom_internal(&address_space_memory,
                                           start, NULL, len, FLUSH_CACHE);
2827 2828
}

2829
typedef struct {
2830
    MemoryRegion *mr;
2831
    void *buffer;
A
Avi Kivity 已提交
2832 2833
    hwaddr addr;
    hwaddr len;
F
Fam Zheng 已提交
2834
    bool in_use;
2835 2836 2837 2838
} BounceBuffer;

static BounceBuffer bounce;

2839
typedef struct MapClient {
2840
    QEMUBH *bh;
B
Blue Swirl 已提交
2841
    QLIST_ENTRY(MapClient) link;
2842 2843
} MapClient;

2844
QemuMutex map_client_list_lock;
B
Blue Swirl 已提交
2845 2846
static QLIST_HEAD(map_client_list, MapClient) map_client_list
    = QLIST_HEAD_INITIALIZER(map_client_list);
2847

2848 2849 2850 2851 2852 2853
static void cpu_unregister_map_client_do(MapClient *client)
{
    QLIST_REMOVE(client, link);
    g_free(client);
}

2854 2855 2856 2857 2858 2859
static void cpu_notify_map_clients_locked(void)
{
    MapClient *client;

    while (!QLIST_EMPTY(&map_client_list)) {
        client = QLIST_FIRST(&map_client_list);
2860 2861
        qemu_bh_schedule(client->bh);
        cpu_unregister_map_client_do(client);
2862 2863 2864
    }
}

2865
void cpu_register_map_client(QEMUBH *bh)
2866
{
2867
    MapClient *client = g_malloc(sizeof(*client));
2868

2869
    qemu_mutex_lock(&map_client_list_lock);
2870
    client->bh = bh;
B
Blue Swirl 已提交
2871
    QLIST_INSERT_HEAD(&map_client_list, client, link);
2872 2873 2874
    if (!atomic_read(&bounce.in_use)) {
        cpu_notify_map_clients_locked();
    }
2875
    qemu_mutex_unlock(&map_client_list_lock);
2876 2877
}

2878
void cpu_exec_init_all(void)
2879
{
2880 2881
    qemu_mutex_init(&ram_list.mutex);
    io_mem_init();
2882
    memory_map_init();
2883
    qemu_mutex_init(&map_client_list_lock);
2884 2885
}

2886
void cpu_unregister_map_client(QEMUBH *bh)
2887 2888 2889
{
    MapClient *client;

2890 2891 2892 2893 2894 2895
    qemu_mutex_lock(&map_client_list_lock);
    QLIST_FOREACH(client, &map_client_list, link) {
        if (client->bh == bh) {
            cpu_unregister_map_client_do(client);
            break;
        }
2896
    }
2897
    qemu_mutex_unlock(&map_client_list_lock);
2898 2899 2900 2901
}

static void cpu_notify_map_clients(void)
{
2902
    qemu_mutex_lock(&map_client_list_lock);
2903
    cpu_notify_map_clients_locked();
2904
    qemu_mutex_unlock(&map_client_list_lock);
2905 2906
}

2907 2908
bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
{
2909
    MemoryRegion *mr;
2910 2911
    hwaddr l, xlat;

2912
    rcu_read_lock();
2913 2914
    while (len > 0) {
        l = len;
2915 2916 2917 2918
        mr = address_space_translate(as, addr, &xlat, &l, is_write);
        if (!memory_access_is_direct(mr, is_write)) {
            l = memory_access_size(mr, l, addr);
            if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2919 2920 2921 2922 2923 2924 2925
                return false;
            }
        }

        len -= l;
        addr += l;
    }
2926
    rcu_read_unlock();
2927 2928 2929
    return true;
}

2930 2931 2932 2933
/* Map a physical memory region into a host virtual address.
 * May map a subset of the requested range, given by and returned in *plen.
 * May return NULL if resources needed to perform the mapping are exhausted.
 * Use only for reads OR writes - not for read-modify-write operations.
2934 2935
 * Use cpu_register_map_client() to know when retrying the map operation is
 * likely to succeed.
2936
 */
A
Avi Kivity 已提交
2937
void *address_space_map(AddressSpace *as,
A
Avi Kivity 已提交
2938 2939
                        hwaddr addr,
                        hwaddr *plen,
A
Avi Kivity 已提交
2940
                        bool is_write)
2941
{
A
Avi Kivity 已提交
2942
    hwaddr len = *plen;
2943 2944 2945 2946
    hwaddr done = 0;
    hwaddr l, xlat, base;
    MemoryRegion *mr, *this_mr;
    ram_addr_t raddr;
2947
    void *ptr;
2948

2949 2950 2951
    if (len == 0) {
        return NULL;
    }
2952

2953
    l = len;
2954
    rcu_read_lock();
2955
    mr = address_space_translate(as, addr, &xlat, &l, is_write);
2956

2957
    if (!memory_access_is_direct(mr, is_write)) {
F
Fam Zheng 已提交
2958
        if (atomic_xchg(&bounce.in_use, true)) {
2959
            rcu_read_unlock();
2960
            return NULL;
2961
        }
2962 2963 2964
        /* Avoid unbounded allocations */
        l = MIN(l, TARGET_PAGE_SIZE);
        bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2965 2966
        bounce.addr = addr;
        bounce.len = l;
2967 2968 2969

        memory_region_ref(mr);
        bounce.mr = mr;
2970
        if (!is_write) {
2971 2972
            address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
                               bounce.buffer, l);
2973
        }
2974

2975
        rcu_read_unlock();
2976 2977 2978 2979 2980 2981 2982 2983
        *plen = l;
        return bounce.buffer;
    }

    base = xlat;
    raddr = memory_region_get_ram_addr(mr);

    for (;;) {
2984 2985
        len -= l;
        addr += l;
2986 2987 2988 2989 2990 2991 2992 2993 2994 2995
        done += l;
        if (len == 0) {
            break;
        }

        l = len;
        this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
        if (this_mr != mr || xlat != base + done) {
            break;
        }
2996
    }
2997

2998
    memory_region_ref(mr);
2999
    *plen = done;
3000 3001 3002 3003
    ptr = qemu_ram_ptr_length(raddr + base, plen);
    rcu_read_unlock();

    return ptr;
3004 3005
}

A
Avi Kivity 已提交
3006
/* Unmaps a memory region previously mapped by address_space_map().
3007 3008 3009
 * Will also mark the memory as dirty if is_write == 1.  access_len gives
 * the amount of memory that was actually read or written by the caller.
 */
A
Avi Kivity 已提交
3010 3011
void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
                         int is_write, hwaddr access_len)
3012 3013
{
    if (buffer != bounce.buffer) {
3014 3015 3016 3017 3018
        MemoryRegion *mr;
        ram_addr_t addr1;

        mr = qemu_ram_addr_from_host(buffer, &addr1);
        assert(mr != NULL);
3019
        if (is_write) {
3020
            invalidate_and_set_dirty(mr, addr1, access_len);
3021
        }
3022
        if (xen_enabled()) {
J
Jan Kiszka 已提交
3023
            xen_invalidate_map_cache_entry(buffer);
A
Anthony PERARD 已提交
3024
        }
3025
        memory_region_unref(mr);
3026 3027 3028
        return;
    }
    if (is_write) {
3029 3030
        address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
                            bounce.buffer, access_len);
3031
    }
3032
    qemu_vfree(bounce.buffer);
3033
    bounce.buffer = NULL;
3034
    memory_region_unref(bounce.mr);
F
Fam Zheng 已提交
3035
    atomic_mb_set(&bounce.in_use, false);
3036
    cpu_notify_map_clients();
3037
}
B
bellard 已提交
3038

A
Avi Kivity 已提交
3039 3040
void *cpu_physical_memory_map(hwaddr addr,
                              hwaddr *plen,
A
Avi Kivity 已提交
3041 3042 3043 3044 3045
                              int is_write)
{
    return address_space_map(&address_space_memory, addr, plen, is_write);
}

A
Avi Kivity 已提交
3046 3047
void cpu_physical_memory_unmap(void *buffer, hwaddr len,
                               int is_write, hwaddr access_len)
A
Avi Kivity 已提交
3048 3049 3050 3051
{
    return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
}

B
bellard 已提交
3052
/* warning: addr must be aligned */
3053 3054 3055 3056
static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
                                                  MemTxAttrs attrs,
                                                  MemTxResult *result,
                                                  enum device_endian endian)
B
bellard 已提交
3057 3058
{
    uint8_t *ptr;
3059
    uint64_t val;
3060
    MemoryRegion *mr;
3061 3062
    hwaddr l = 4;
    hwaddr addr1;
3063
    MemTxResult r;
3064
    bool release_lock = false;
B
bellard 已提交
3065

3066
    rcu_read_lock();
3067
    mr = address_space_translate(as, addr, &addr1, &l, false);
3068
    if (l < 4 || !memory_access_is_direct(mr, false)) {
3069
        release_lock |= prepare_mmio_access(mr);
3070

B
bellard 已提交
3071
        /* I/O case */
3072
        r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
3073 3074 3075 3076 3077 3078 3079 3080 3081
#if defined(TARGET_WORDS_BIGENDIAN)
        if (endian == DEVICE_LITTLE_ENDIAN) {
            val = bswap32(val);
        }
#else
        if (endian == DEVICE_BIG_ENDIAN) {
            val = bswap32(val);
        }
#endif
B
bellard 已提交
3082 3083
    } else {
        /* RAM case */
3084
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
3085
                                & TARGET_PAGE_MASK)
3086
                               + addr1);
3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097
        switch (endian) {
        case DEVICE_LITTLE_ENDIAN:
            val = ldl_le_p(ptr);
            break;
        case DEVICE_BIG_ENDIAN:
            val = ldl_be_p(ptr);
            break;
        default:
            val = ldl_p(ptr);
            break;
        }
3098 3099 3100 3101
        r = MEMTX_OK;
    }
    if (result) {
        *result = r;
B
bellard 已提交
3102
    }
3103 3104 3105
    if (release_lock) {
        qemu_mutex_unlock_iothread();
    }
3106
    rcu_read_unlock();
B
bellard 已提交
3107 3108 3109
    return val;
}

3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130
uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
                           MemTxAttrs attrs, MemTxResult *result)
{
    return address_space_ldl_internal(as, addr, attrs, result,
                                      DEVICE_NATIVE_ENDIAN);
}

uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
                              MemTxAttrs attrs, MemTxResult *result)
{
    return address_space_ldl_internal(as, addr, attrs, result,
                                      DEVICE_LITTLE_ENDIAN);
}

uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
                              MemTxAttrs attrs, MemTxResult *result)
{
    return address_space_ldl_internal(as, addr, attrs, result,
                                      DEVICE_BIG_ENDIAN);
}

3131
uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
3132
{
3133
    return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3134 3135
}

3136
uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
3137
{
3138
    return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3139 3140
}

3141
uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
3142
{
3143
    return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3144 3145
}

B
bellard 已提交
3146
/* warning: addr must be aligned */
3147 3148 3149 3150
static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
                                                  MemTxAttrs attrs,
                                                  MemTxResult *result,
                                                  enum device_endian endian)
B
bellard 已提交
3151 3152 3153
{
    uint8_t *ptr;
    uint64_t val;
3154
    MemoryRegion *mr;
3155 3156
    hwaddr l = 8;
    hwaddr addr1;
3157
    MemTxResult r;
3158
    bool release_lock = false;
B
bellard 已提交
3159

3160
    rcu_read_lock();
3161
    mr = address_space_translate(as, addr, &addr1, &l,
3162 3163
                                 false);
    if (l < 8 || !memory_access_is_direct(mr, false)) {
3164
        release_lock |= prepare_mmio_access(mr);
3165

B
bellard 已提交
3166
        /* I/O case */
3167
        r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
3168 3169 3170 3171 3172 3173 3174 3175
#if defined(TARGET_WORDS_BIGENDIAN)
        if (endian == DEVICE_LITTLE_ENDIAN) {
            val = bswap64(val);
        }
#else
        if (endian == DEVICE_BIG_ENDIAN) {
            val = bswap64(val);
        }
B
bellard 已提交
3176 3177 3178
#endif
    } else {
        /* RAM case */
3179
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
3180
                                & TARGET_PAGE_MASK)
3181
                               + addr1);
3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192
        switch (endian) {
        case DEVICE_LITTLE_ENDIAN:
            val = ldq_le_p(ptr);
            break;
        case DEVICE_BIG_ENDIAN:
            val = ldq_be_p(ptr);
            break;
        default:
            val = ldq_p(ptr);
            break;
        }
3193 3194 3195 3196
        r = MEMTX_OK;
    }
    if (result) {
        *result = r;
B
bellard 已提交
3197
    }
3198 3199 3200
    if (release_lock) {
        qemu_mutex_unlock_iothread();
    }
3201
    rcu_read_unlock();
B
bellard 已提交
3202 3203 3204
    return val;
}

3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225
uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
                           MemTxAttrs attrs, MemTxResult *result)
{
    return address_space_ldq_internal(as, addr, attrs, result,
                                      DEVICE_NATIVE_ENDIAN);
}

uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
                           MemTxAttrs attrs, MemTxResult *result)
{
    return address_space_ldq_internal(as, addr, attrs, result,
                                      DEVICE_LITTLE_ENDIAN);
}

uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
                           MemTxAttrs attrs, MemTxResult *result)
{
    return address_space_ldq_internal(as, addr, attrs, result,
                                      DEVICE_BIG_ENDIAN);
}

3226
uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
3227
{
3228
    return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3229 3230
}

3231
uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3232
{
3233
    return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3234 3235
}

3236
uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3237
{
3238
    return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3239 3240
}

B
bellard 已提交
3241
/* XXX: optimize */
3242 3243
uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
                            MemTxAttrs attrs, MemTxResult *result)
B
bellard 已提交
3244 3245
{
    uint8_t val;
3246 3247 3248 3249 3250 3251
    MemTxResult r;

    r = address_space_rw(as, addr, attrs, &val, 1, 0);
    if (result) {
        *result = r;
    }
B
bellard 已提交
3252 3253 3254
    return val;
}

3255 3256 3257 3258 3259
uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
{
    return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
}

3260
/* warning: addr must be aligned */
3261 3262 3263 3264 3265
static inline uint32_t address_space_lduw_internal(AddressSpace *as,
                                                   hwaddr addr,
                                                   MemTxAttrs attrs,
                                                   MemTxResult *result,
                                                   enum device_endian endian)
B
bellard 已提交
3266
{
3267 3268
    uint8_t *ptr;
    uint64_t val;
3269
    MemoryRegion *mr;
3270 3271
    hwaddr l = 2;
    hwaddr addr1;
3272
    MemTxResult r;
3273
    bool release_lock = false;
3274

3275
    rcu_read_lock();
3276
    mr = address_space_translate(as, addr, &addr1, &l,
3277 3278
                                 false);
    if (l < 2 || !memory_access_is_direct(mr, false)) {
3279
        release_lock |= prepare_mmio_access(mr);
3280

3281
        /* I/O case */
3282
        r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3283 3284 3285 3286 3287 3288 3289 3290 3291
#if defined(TARGET_WORDS_BIGENDIAN)
        if (endian == DEVICE_LITTLE_ENDIAN) {
            val = bswap16(val);
        }
#else
        if (endian == DEVICE_BIG_ENDIAN) {
            val = bswap16(val);
        }
#endif
3292 3293
    } else {
        /* RAM case */
3294
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
3295
                                & TARGET_PAGE_MASK)
3296
                               + addr1);
3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307
        switch (endian) {
        case DEVICE_LITTLE_ENDIAN:
            val = lduw_le_p(ptr);
            break;
        case DEVICE_BIG_ENDIAN:
            val = lduw_be_p(ptr);
            break;
        default:
            val = lduw_p(ptr);
            break;
        }
3308 3309 3310 3311
        r = MEMTX_OK;
    }
    if (result) {
        *result = r;
3312
    }
3313 3314 3315
    if (release_lock) {
        qemu_mutex_unlock_iothread();
    }
3316
    rcu_read_unlock();
3317
    return val;
B
bellard 已提交
3318 3319
}

3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340
uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
                           MemTxAttrs attrs, MemTxResult *result)
{
    return address_space_lduw_internal(as, addr, attrs, result,
                                       DEVICE_NATIVE_ENDIAN);
}

uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
                           MemTxAttrs attrs, MemTxResult *result)
{
    return address_space_lduw_internal(as, addr, attrs, result,
                                       DEVICE_LITTLE_ENDIAN);
}

uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
                           MemTxAttrs attrs, MemTxResult *result)
{
    return address_space_lduw_internal(as, addr, attrs, result,
                                       DEVICE_BIG_ENDIAN);
}

3341
uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3342
{
3343
    return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3344 3345
}

3346
uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3347
{
3348
    return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3349 3350
}

3351
uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3352
{
3353
    return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3354 3355
}

B
bellard 已提交
3356 3357 3358
/* warning: addr must be aligned. The ram page is not masked as dirty
   and the code inside is not invalidated. It is useful if the dirty
   bits are used to track modified PTEs */
3359 3360
void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
                                MemTxAttrs attrs, MemTxResult *result)
B
bellard 已提交
3361 3362
{
    uint8_t *ptr;
3363
    MemoryRegion *mr;
3364 3365
    hwaddr l = 4;
    hwaddr addr1;
3366
    MemTxResult r;
3367
    uint8_t dirty_log_mask;
3368
    bool release_lock = false;
B
bellard 已提交
3369

3370
    rcu_read_lock();
3371
    mr = address_space_translate(as, addr, &addr1, &l,
3372 3373
                                 true);
    if (l < 4 || !memory_access_is_direct(mr, true)) {
3374
        release_lock |= prepare_mmio_access(mr);
3375

3376
        r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
B
bellard 已提交
3377
    } else {
3378
        addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
P
pbrook 已提交
3379
        ptr = qemu_get_ram_ptr(addr1);
B
bellard 已提交
3380
        stl_p(ptr, val);
A
aliguori 已提交
3381

3382 3383
        dirty_log_mask = memory_region_get_dirty_log_mask(mr);
        dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3384
        cpu_physical_memory_set_dirty_range(addr1, 4, dirty_log_mask);
3385 3386 3387 3388
        r = MEMTX_OK;
    }
    if (result) {
        *result = r;
B
bellard 已提交
3389
    }
3390 3391 3392
    if (release_lock) {
        qemu_mutex_unlock_iothread();
    }
3393
    rcu_read_unlock();
B
bellard 已提交
3394 3395
}

3396 3397 3398 3399 3400
void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
{
    address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
}

B
bellard 已提交
3401
/* warning: addr must be aligned */
3402 3403 3404 3405 3406
static inline void address_space_stl_internal(AddressSpace *as,
                                              hwaddr addr, uint32_t val,
                                              MemTxAttrs attrs,
                                              MemTxResult *result,
                                              enum device_endian endian)
B
bellard 已提交
3407 3408
{
    uint8_t *ptr;
3409
    MemoryRegion *mr;
3410 3411
    hwaddr l = 4;
    hwaddr addr1;
3412
    MemTxResult r;
3413
    bool release_lock = false;
B
bellard 已提交
3414

3415
    rcu_read_lock();
3416
    mr = address_space_translate(as, addr, &addr1, &l,
3417 3418
                                 true);
    if (l < 4 || !memory_access_is_direct(mr, true)) {
3419
        release_lock |= prepare_mmio_access(mr);
3420

3421 3422 3423 3424 3425 3426 3427 3428 3429
#if defined(TARGET_WORDS_BIGENDIAN)
        if (endian == DEVICE_LITTLE_ENDIAN) {
            val = bswap32(val);
        }
#else
        if (endian == DEVICE_BIG_ENDIAN) {
            val = bswap32(val);
        }
#endif
3430
        r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
B
bellard 已提交
3431 3432
    } else {
        /* RAM case */
3433
        addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
P
pbrook 已提交
3434
        ptr = qemu_get_ram_ptr(addr1);
3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445
        switch (endian) {
        case DEVICE_LITTLE_ENDIAN:
            stl_le_p(ptr, val);
            break;
        case DEVICE_BIG_ENDIAN:
            stl_be_p(ptr, val);
            break;
        default:
            stl_p(ptr, val);
            break;
        }
3446
        invalidate_and_set_dirty(mr, addr1, 4);
3447 3448 3449 3450
        r = MEMTX_OK;
    }
    if (result) {
        *result = r;
B
bellard 已提交
3451
    }
3452 3453 3454
    if (release_lock) {
        qemu_mutex_unlock_iothread();
    }
3455
    rcu_read_unlock();
B
bellard 已提交
3456 3457
}

3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478
void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
                       MemTxAttrs attrs, MemTxResult *result)
{
    address_space_stl_internal(as, addr, val, attrs, result,
                               DEVICE_NATIVE_ENDIAN);
}

void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
                       MemTxAttrs attrs, MemTxResult *result)
{
    address_space_stl_internal(as, addr, val, attrs, result,
                               DEVICE_LITTLE_ENDIAN);
}

void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
                       MemTxAttrs attrs, MemTxResult *result)
{
    address_space_stl_internal(as, addr, val, attrs, result,
                               DEVICE_BIG_ENDIAN);
}

3479
void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3480
{
3481
    address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3482 3483
}

3484
void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3485
{
3486
    address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3487 3488
}

3489
void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3490
{
3491
    address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3492 3493
}

B
bellard 已提交
3494
/* XXX: optimize */
3495 3496
void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
                       MemTxAttrs attrs, MemTxResult *result)
B
bellard 已提交
3497 3498
{
    uint8_t v = val;
3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509
    MemTxResult r;

    r = address_space_rw(as, addr, attrs, &v, 1, 1);
    if (result) {
        *result = r;
    }
}

void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
{
    address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
B
bellard 已提交
3510 3511
}

3512
/* warning: addr must be aligned */
3513 3514 3515 3516 3517
static inline void address_space_stw_internal(AddressSpace *as,
                                              hwaddr addr, uint32_t val,
                                              MemTxAttrs attrs,
                                              MemTxResult *result,
                                              enum device_endian endian)
B
bellard 已提交
3518
{
3519
    uint8_t *ptr;
3520
    MemoryRegion *mr;
3521 3522
    hwaddr l = 2;
    hwaddr addr1;
3523
    MemTxResult r;
3524
    bool release_lock = false;
3525

3526
    rcu_read_lock();
3527
    mr = address_space_translate(as, addr, &addr1, &l, true);
3528
    if (l < 2 || !memory_access_is_direct(mr, true)) {
3529
        release_lock |= prepare_mmio_access(mr);
3530

3531 3532 3533 3534 3535 3536 3537 3538 3539
#if defined(TARGET_WORDS_BIGENDIAN)
        if (endian == DEVICE_LITTLE_ENDIAN) {
            val = bswap16(val);
        }
#else
        if (endian == DEVICE_BIG_ENDIAN) {
            val = bswap16(val);
        }
#endif
3540
        r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3541 3542
    } else {
        /* RAM case */
3543
        addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3544
        ptr = qemu_get_ram_ptr(addr1);
3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555
        switch (endian) {
        case DEVICE_LITTLE_ENDIAN:
            stw_le_p(ptr, val);
            break;
        case DEVICE_BIG_ENDIAN:
            stw_be_p(ptr, val);
            break;
        default:
            stw_p(ptr, val);
            break;
        }
3556
        invalidate_and_set_dirty(mr, addr1, 2);
3557 3558 3559 3560
        r = MEMTX_OK;
    }
    if (result) {
        *result = r;
3561
    }
3562 3563 3564
    if (release_lock) {
        qemu_mutex_unlock_iothread();
    }
3565
    rcu_read_unlock();
B
bellard 已提交
3566 3567
}

3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588
void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
                       MemTxAttrs attrs, MemTxResult *result)
{
    address_space_stw_internal(as, addr, val, attrs, result,
                               DEVICE_NATIVE_ENDIAN);
}

void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
                       MemTxAttrs attrs, MemTxResult *result)
{
    address_space_stw_internal(as, addr, val, attrs, result,
                               DEVICE_LITTLE_ENDIAN);
}

void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
                       MemTxAttrs attrs, MemTxResult *result)
{
    address_space_stw_internal(as, addr, val, attrs, result,
                               DEVICE_BIG_ENDIAN);
}

3589
void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3590
{
3591
    address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3592 3593
}

3594
void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3595
{
3596
    address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3597 3598
}

3599
void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3600
{
3601
    address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3602 3603
}

B
bellard 已提交
3604
/* XXX: optimize */
3605 3606
void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
                       MemTxAttrs attrs, MemTxResult *result)
B
bellard 已提交
3607
{
3608
    MemTxResult r;
B
bellard 已提交
3609
    val = tswap64(val);
3610 3611 3612 3613
    r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
    if (result) {
        *result = r;
    }
B
bellard 已提交
3614 3615
}

3616 3617
void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
                       MemTxAttrs attrs, MemTxResult *result)
3618
{
3619
    MemTxResult r;
3620
    val = cpu_to_le64(val);
3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644
    r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
    if (result) {
        *result = r;
    }
}
void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
                       MemTxAttrs attrs, MemTxResult *result)
{
    MemTxResult r;
    val = cpu_to_be64(val);
    r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
    if (result) {
        *result = r;
    }
}

void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
{
    address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
}

void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
{
    address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3645 3646
}

3647
void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3648
{
3649
    address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3650 3651
}

3652
/* virtual memory access for debug (includes writing to ROM) */
3653
int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3654
                        uint8_t *buf, int len, int is_write)
B
bellard 已提交
3655 3656
{
    int l;
A
Avi Kivity 已提交
3657
    hwaddr phys_addr;
3658
    target_ulong page;
B
bellard 已提交
3659 3660

    while (len > 0) {
3661 3662 3663
        int asidx;
        MemTxAttrs attrs;

B
bellard 已提交
3664
        page = addr & TARGET_PAGE_MASK;
3665 3666
        phys_addr = cpu_get_phys_page_attrs_debug(cpu, page, &attrs);
        asidx = cpu_asidx_from_attrs(cpu, attrs);
B
bellard 已提交
3667 3668 3669 3670 3671 3672
        /* if no physical page mapped, return an error */
        if (phys_addr == -1)
            return -1;
        l = (page + TARGET_PAGE_SIZE) - addr;
        if (l > len)
            l = len;
3673
        phys_addr += (addr & ~TARGET_PAGE_MASK);
3674
        if (is_write) {
3675 3676
            cpu_physical_memory_write_rom(cpu->cpu_ases[asidx].as,
                                          phys_addr, buf, l);
3677
        } else {
3678 3679
            address_space_rw(cpu->cpu_ases[asidx].as, phys_addr,
                             MEMTXATTRS_UNSPECIFIED,
3680
                             buf, l, 0);
3681
        }
B
bellard 已提交
3682 3683 3684 3685 3686 3687
        len -= l;
        buf += l;
        addr += l;
    }
    return 0;
}
3688 3689 3690 3691 3692 3693 3694 3695 3696 3697

/*
 * Allows code that needs to deal with migration bitmaps etc to still be built
 * target independent.
 */
size_t qemu_target_page_bits(void)
{
    return TARGET_PAGE_BITS;
}

P
Paul Brook 已提交
3698
#endif
B
bellard 已提交
3699

3700 3701 3702 3703
/*
 * A helper function for the _utterly broken_ virtio device model to find out if
 * it's running on a big endian machine. Don't do this at home kids!
 */
3704 3705
bool target_words_bigendian(void);
bool target_words_bigendian(void)
3706 3707 3708 3709 3710 3711 3712 3713
{
#if defined(TARGET_WORDS_BIGENDIAN)
    return true;
#else
    return false;
#endif
}

3714
#ifndef CONFIG_USER_ONLY
A
Avi Kivity 已提交
3715
bool cpu_physical_memory_is_io(hwaddr phys_addr)
3716
{
3717
    MemoryRegion*mr;
3718
    hwaddr l = 1;
3719
    bool res;
3720

3721
    rcu_read_lock();
3722 3723
    mr = address_space_translate(&address_space_memory,
                                 phys_addr, &phys_addr, &l, false);
3724

3725 3726 3727
    res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
    rcu_read_unlock();
    return res;
3728
}
3729

3730
int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3731 3732
{
    RAMBlock *block;
3733
    int ret = 0;
3734

M
Mike Day 已提交
3735 3736
    rcu_read_lock();
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3737 3738 3739 3740 3741
        ret = func(block->idstr, block->host, block->offset,
                   block->used_length, opaque);
        if (ret) {
            break;
        }
3742
    }
M
Mike Day 已提交
3743
    rcu_read_unlock();
3744
    return ret;
3745
}
3746
#endif