exec.c 100.6 KB
Newer Older
B
bellard 已提交
1
/*
2
 *  Virtual page mapping
3
 *
B
bellard 已提交
4 5 6 7 8 9 10 11 12 13 14 15 16
 *  Copyright (c) 2003 Fabrice Bellard
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
B
bellard 已提交
18
 */
B
bellard 已提交
19
#include "config.h"
20
#ifndef _WIN32
B
bellard 已提交
21
#include <sys/types.h>
B
bellard 已提交
22 23
#include <sys/mman.h>
#endif
B
bellard 已提交
24

25
#include "qemu-common.h"
B
bellard 已提交
26
#include "cpu.h"
B
bellard 已提交
27
#include "tcg.h"
28
#include "hw/hw.h"
29
#if !defined(CONFIG_USER_ONLY)
30
#include "hw/boards.h"
31
#endif
32
#include "hw/qdev.h"
33
#include "qemu/osdep.h"
34
#include "sysemu/kvm.h"
35
#include "sysemu/sysemu.h"
P
Paolo Bonzini 已提交
36
#include "hw/xen/xen.h"
37 38
#include "qemu/timer.h"
#include "qemu/config-file.h"
39
#include "qemu/error-report.h"
40
#include "exec/memory.h"
41
#include "sysemu/dma.h"
42
#include "exec/address-spaces.h"
43 44
#if defined(CONFIG_USER_ONLY)
#include <qemu.h>
J
Jun Nakajima 已提交
45
#else /* !CONFIG_USER_ONLY */
46
#include "sysemu/xen-mapcache.h"
47
#include "trace.h"
48
#endif
49
#include "exec/cpu-all.h"
M
Mike Day 已提交
50
#include "qemu/rcu_queue.h"
51
#include "qemu/main-loop.h"
52
#include "translate-all.h"
53
#include "sysemu/replay.h"
54

55
#include "exec/memory-internal.h"
56
#include "exec/ram_addr.h"
57

58
#include "qemu/range.h"
59 60 61
#ifndef _WIN32
#include "qemu/mmap-alloc.h"
#endif
62

63
//#define DEBUG_SUBPAGE
T
ths 已提交
64

65
#if !defined(CONFIG_USER_ONLY)
M
Mike Day 已提交
66 67 68
/* ram_list is read under rcu_read_lock()/rcu_read_unlock().  Writes
 * are protected by the ramlist lock.
 */
M
Mike Day 已提交
69
RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
A
Avi Kivity 已提交
70 71

static MemoryRegion *system_memory;
72
static MemoryRegion *system_io;
A
Avi Kivity 已提交
73

74 75
AddressSpace address_space_io;
AddressSpace address_space_memory;
76

77
MemoryRegion io_mem_rom, io_mem_notdirty;
78
static MemoryRegion io_mem_unassigned;
79

80 81 82
/* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
#define RAM_PREALLOC   (1 << 0)

83 84 85
/* RAM is mmap-ed with MAP_SHARED */
#define RAM_SHARED     (1 << 1)

86 87 88 89 90
/* Only a portion of RAM (used_length) is actually used, and migrated.
 * This used_length size can change across reboots.
 */
#define RAM_RESIZEABLE (1 << 2)

91
/* RAM is backed by an mmapped file.
92
 */
93
#define RAM_FILE (1 << 3)
94
#endif
95

A
Andreas Färber 已提交
96
struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
B
bellard 已提交
97 98
/* current CPU in the current thread. It is only valid inside
   cpu_exec() */
P
Paolo Bonzini 已提交
99
__thread CPUState *current_cpu;
P
pbrook 已提交
100
/* 0 = Do not count executed instructions.
T
ths 已提交
101
   1 = Precise instruction counting.
P
pbrook 已提交
102
   2 = Adaptive rate instruction counting.  */
103
int use_icount;
B
bellard 已提交
104

105
#if !defined(CONFIG_USER_ONLY)
106

107 108 109
typedef struct PhysPageEntry PhysPageEntry;

struct PhysPageEntry {
M
Michael S. Tsirkin 已提交
110
    /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
111
    uint32_t skip : 6;
M
Michael S. Tsirkin 已提交
112
     /* index into phys_sections (!skip) or phys_map_nodes (skip) */
113
    uint32_t ptr : 26;
114 115
};

116 117
#define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)

118
/* Size of the L2 (and L3, etc) page tables.  */
119
#define ADDR_SPACE_BITS 64
120

M
Michael S. Tsirkin 已提交
121
#define P_L2_BITS 9
122 123 124 125 126
#define P_L2_SIZE (1 << P_L2_BITS)

#define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)

typedef PhysPageEntry Node[P_L2_SIZE];
127

128
typedef struct PhysPageMap {
129 130
    struct rcu_head rcu;

131 132 133 134 135 136 137 138
    unsigned sections_nb;
    unsigned sections_nb_alloc;
    unsigned nodes_nb;
    unsigned nodes_nb_alloc;
    Node *nodes;
    MemoryRegionSection *sections;
} PhysPageMap;

139
struct AddressSpaceDispatch {
140 141
    struct rcu_head rcu;

142 143 144 145
    /* This is a multi-level map on the physical address space.
     * The bottom level has pointers to MemoryRegionSections.
     */
    PhysPageEntry phys_map;
146
    PhysPageMap map;
147
    AddressSpace *as;
148 149
};

150 151 152
#define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
typedef struct subpage_t {
    MemoryRegion iomem;
153
    AddressSpace *as;
154 155 156 157
    hwaddr base;
    uint16_t sub_section[TARGET_PAGE_SIZE];
} subpage_t;

158 159 160 161
#define PHYS_SECTION_UNASSIGNED 0
#define PHYS_SECTION_NOTDIRTY 1
#define PHYS_SECTION_ROM 2
#define PHYS_SECTION_WATCH 3
162

163
static void io_mem_init(void);
A
Avi Kivity 已提交
164
static void memory_map_init(void);
165
static void tcg_commit(MemoryListener *listener);
166

167
static MemoryRegion io_mem_watch;
168 169 170 171 172 173 174 175 176 177 178 179 180 181 182

/**
 * CPUAddressSpace: all the information a CPU needs about an AddressSpace
 * @cpu: the CPU whose AddressSpace this is
 * @as: the AddressSpace itself
 * @memory_dispatch: its dispatch pointer (cached, RCU protected)
 * @tcg_as_listener: listener for tracking changes to the AddressSpace
 */
struct CPUAddressSpace {
    CPUState *cpu;
    AddressSpace *as;
    struct AddressSpaceDispatch *memory_dispatch;
    MemoryListener tcg_as_listener;
};

183
#endif
B
bellard 已提交
184

185
#if !defined(CONFIG_USER_ONLY)
186

187
static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
188
{
189 190 191 192
    if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
        map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
        map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
        map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
193
    }
194 195
}

196
static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
197 198
{
    unsigned i;
199
    uint32_t ret;
200 201
    PhysPageEntry e;
    PhysPageEntry *p;
202

203
    ret = map->nodes_nb++;
204
    p = map->nodes[ret];
205
    assert(ret != PHYS_MAP_NODE_NIL);
206
    assert(ret != map->nodes_nb_alloc);
207 208 209

    e.skip = leaf ? 0 : 1;
    e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
210
    for (i = 0; i < P_L2_SIZE; ++i) {
211
        memcpy(&p[i], &e, sizeof(e));
212
    }
213
    return ret;
214 215
}

216 217
static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
                                hwaddr *index, hwaddr *nb, uint16_t leaf,
218
                                int level)
219 220
{
    PhysPageEntry *p;
221
    hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
222

M
Michael S. Tsirkin 已提交
223
    if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
224
        lp->ptr = phys_map_node_alloc(map, level == 0);
B
bellard 已提交
225
    }
226
    p = map->nodes[lp->ptr];
227
    lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
228

229
    while (*nb && lp < &p[P_L2_SIZE]) {
230
        if ((*index & (step - 1)) == 0 && *nb >= step) {
M
Michael S. Tsirkin 已提交
231
            lp->skip = 0;
232
            lp->ptr = leaf;
233 234
            *index += step;
            *nb -= step;
235
        } else {
236
            phys_page_set_level(map, lp, index, nb, leaf, level - 1);
237 238
        }
        ++lp;
239 240 241
    }
}

A
Avi Kivity 已提交
242
static void phys_page_set(AddressSpaceDispatch *d,
A
Avi Kivity 已提交
243
                          hwaddr index, hwaddr nb,
244
                          uint16_t leaf)
245
{
246
    /* Wildly overreserve - it doesn't matter much. */
247
    phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
248

249
    phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
B
bellard 已提交
250 251
}

252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309
/* Compact a non leaf page entry. Simply detect that the entry has a single child,
 * and update our entry so we can skip it and go directly to the destination.
 */
static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
{
    unsigned valid_ptr = P_L2_SIZE;
    int valid = 0;
    PhysPageEntry *p;
    int i;

    if (lp->ptr == PHYS_MAP_NODE_NIL) {
        return;
    }

    p = nodes[lp->ptr];
    for (i = 0; i < P_L2_SIZE; i++) {
        if (p[i].ptr == PHYS_MAP_NODE_NIL) {
            continue;
        }

        valid_ptr = i;
        valid++;
        if (p[i].skip) {
            phys_page_compact(&p[i], nodes, compacted);
        }
    }

    /* We can only compress if there's only one child. */
    if (valid != 1) {
        return;
    }

    assert(valid_ptr < P_L2_SIZE);

    /* Don't compress if it won't fit in the # of bits we have. */
    if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
        return;
    }

    lp->ptr = p[valid_ptr].ptr;
    if (!p[valid_ptr].skip) {
        /* If our only child is a leaf, make this a leaf. */
        /* By design, we should have made this node a leaf to begin with so we
         * should never reach here.
         * But since it's so simple to handle this, let's do it just in case we
         * change this rule.
         */
        lp->skip = 0;
    } else {
        lp->skip += p[valid_ptr].skip;
    }
}

static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
{
    DECLARE_BITMAP(compacted, nodes_nb);

    if (d->phys_map.skip) {
310
        phys_page_compact(&d->phys_map, d->map.nodes, compacted);
311 312 313
    }
}

314
static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
315
                                           Node *nodes, MemoryRegionSection *sections)
B
bellard 已提交
316
{
317
    PhysPageEntry *p;
318
    hwaddr index = addr >> TARGET_PAGE_BITS;
319
    int i;
320

M
Michael S. Tsirkin 已提交
321
    for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
322
        if (lp.ptr == PHYS_MAP_NODE_NIL) {
323
            return &sections[PHYS_SECTION_UNASSIGNED];
324
        }
325
        p = nodes[lp.ptr];
326
        lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
327
    }
328 329 330 331 332 333 334 335

    if (sections[lp.ptr].size.hi ||
        range_covers_byte(sections[lp.ptr].offset_within_address_space,
                          sections[lp.ptr].size.lo, addr)) {
        return &sections[lp.ptr];
    } else {
        return &sections[PHYS_SECTION_UNASSIGNED];
    }
336 337
}

B
Blue Swirl 已提交
338 339
bool memory_region_is_unassigned(MemoryRegion *mr)
{
P
Paolo Bonzini 已提交
340
    return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
341
        && mr != &io_mem_watch;
B
bellard 已提交
342
}
343

344
/* Called from RCU critical section */
345
static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
346 347
                                                        hwaddr addr,
                                                        bool resolve_subpage)
348
{
349 350 351
    MemoryRegionSection *section;
    subpage_t *subpage;

352
    section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
353 354
    if (resolve_subpage && section->mr->subpage) {
        subpage = container_of(section->mr, subpage_t, iomem);
355
        section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
356 357
    }
    return section;
358 359
}

360
/* Called from RCU critical section */
361
static MemoryRegionSection *
362
address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
363
                                 hwaddr *plen, bool resolve_subpage)
364 365
{
    MemoryRegionSection *section;
366
    MemoryRegion *mr;
367
    Int128 diff;
368

369
    section = address_space_lookup_region(d, addr, resolve_subpage);
370 371 372 373 374 375
    /* Compute offset within MemoryRegionSection */
    addr -= section->offset_within_address_space;

    /* Compute offset within MemoryRegion */
    *xlat = addr + section->offset_within_region;

376
    mr = section->mr;
377 378 379 380 381 382 383 384 385 386 387 388

    /* MMIO registers can be expected to perform full-width accesses based only
     * on their address, without considering adjacent registers that could
     * decode to completely different MemoryRegions.  When such registers
     * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
     * regions overlap wildly.  For this reason we cannot clamp the accesses
     * here.
     *
     * If the length is small (as is the case for address_space_ldl/stl),
     * everything works fine.  If the incoming length is large, however,
     * the caller really has to do the clamping through memory_access_size.
     */
389
    if (memory_region_is_ram(mr)) {
390
        diff = int128_sub(section->size, int128_make64(addr));
391 392
        *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
    }
393 394
    return section;
}
395

396 397 398 399 400 401 402 403 404 405 406 407
static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
{
    if (memory_region_is_ram(mr)) {
        return !(is_write && mr->readonly);
    }
    if (memory_region_is_romd(mr)) {
        return !is_write;
    }

    return false;
}

408
/* Called from RCU critical section */
409 410 411
MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
                                      hwaddr *xlat, hwaddr *plen,
                                      bool is_write)
412
{
A
Avi Kivity 已提交
413 414 415 416 417
    IOMMUTLBEntry iotlb;
    MemoryRegionSection *section;
    MemoryRegion *mr;

    for (;;) {
418 419
        AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
        section = address_space_translate_internal(d, addr, &addr, plen, true);
A
Avi Kivity 已提交
420 421 422 423 424 425
        mr = section->mr;

        if (!mr->iommu_ops) {
            break;
        }

426
        iotlb = mr->iommu_ops->translate(mr, addr, is_write);
A
Avi Kivity 已提交
427 428
        addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
                | (addr & iotlb.addr_mask));
429
        *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
A
Avi Kivity 已提交
430 431 432 433 434 435 436 437
        if (!(iotlb.perm & (1 << is_write))) {
            mr = &io_mem_unassigned;
            break;
        }

        as = iotlb.target_as;
    }

438
    if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
439
        hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
440
        *plen = MIN(page, *plen);
441 442
    }

A
Avi Kivity 已提交
443 444
    *xlat = addr;
    return mr;
445 446
}

447
/* Called from RCU critical section */
448
MemoryRegionSection *
P
Paolo Bonzini 已提交
449 450
address_space_translate_for_iotlb(CPUState *cpu, hwaddr addr,
                                  hwaddr *xlat, hwaddr *plen)
451
{
A
Avi Kivity 已提交
452
    MemoryRegionSection *section;
453
    section = address_space_translate_internal(cpu->cpu_ases[0].memory_dispatch,
P
Paolo Bonzini 已提交
454
                                               addr, xlat, plen, false);
A
Avi Kivity 已提交
455 456 457

    assert(!section->mr->iommu_ops);
    return section;
458
}
459
#endif
B
bellard 已提交
460

461
#if !defined(CONFIG_USER_ONLY)
462 463

static int cpu_common_post_load(void *opaque, int version_id)
B
bellard 已提交
464
{
465
    CPUState *cpu = opaque;
B
bellard 已提交
466

467 468
    /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
       version_id is increased. */
469
    cpu->interrupt_request &= ~0x01;
470
    tlb_flush(cpu, 1);
471 472

    return 0;
B
bellard 已提交
473
}
B
bellard 已提交
474

475 476 477 478
static int cpu_common_pre_load(void *opaque)
{
    CPUState *cpu = opaque;

479
    cpu->exception_index = -1;
480 481 482 483 484 485 486 487

    return 0;
}

static bool cpu_common_exception_index_needed(void *opaque)
{
    CPUState *cpu = opaque;

488
    return tcg_enabled() && cpu->exception_index != -1;
489 490 491 492 493 494
}

static const VMStateDescription vmstate_cpu_common_exception_index = {
    .name = "cpu_common/exception_index",
    .version_id = 1,
    .minimum_version_id = 1,
495
    .needed = cpu_common_exception_index_needed,
496 497 498 499 500 501
    .fields = (VMStateField[]) {
        VMSTATE_INT32(exception_index, CPUState),
        VMSTATE_END_OF_LIST()
    }
};

502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519
static bool cpu_common_crash_occurred_needed(void *opaque)
{
    CPUState *cpu = opaque;

    return cpu->crash_occurred;
}

static const VMStateDescription vmstate_cpu_common_crash_occurred = {
    .name = "cpu_common/crash_occurred",
    .version_id = 1,
    .minimum_version_id = 1,
    .needed = cpu_common_crash_occurred_needed,
    .fields = (VMStateField[]) {
        VMSTATE_BOOL(crash_occurred, CPUState),
        VMSTATE_END_OF_LIST()
    }
};

520
const VMStateDescription vmstate_cpu_common = {
521 522 523
    .name = "cpu_common",
    .version_id = 1,
    .minimum_version_id = 1,
524
    .pre_load = cpu_common_pre_load,
525
    .post_load = cpu_common_post_load,
526
    .fields = (VMStateField[]) {
527 528
        VMSTATE_UINT32(halted, CPUState),
        VMSTATE_UINT32(interrupt_request, CPUState),
529
        VMSTATE_END_OF_LIST()
530
    },
531 532
    .subsections = (const VMStateDescription*[]) {
        &vmstate_cpu_common_exception_index,
533
        &vmstate_cpu_common_crash_occurred,
534
        NULL
535 536
    }
};
537

538
#endif
B
bellard 已提交
539

540
CPUState *qemu_get_cpu(int index)
B
bellard 已提交
541
{
A
Andreas Färber 已提交
542
    CPUState *cpu;
B
bellard 已提交
543

A
Andreas Färber 已提交
544
    CPU_FOREACH(cpu) {
545
        if (cpu->cpu_index == index) {
A
Andreas Färber 已提交
546
            return cpu;
547
        }
B
bellard 已提交
548
    }
549

A
Andreas Färber 已提交
550
    return NULL;
B
bellard 已提交
551 552
}

553 554 555 556 557 558
#if !defined(CONFIG_USER_ONLY)
void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
{
    /* We only support one address space per cpu at the moment.  */
    assert(cpu->as == as);

559 560 561
    if (cpu->cpu_ases) {
        /* We've already registered the listener for our only AS */
        return;
562
    }
563 564 565 566 567 568

    cpu->cpu_ases = g_new0(CPUAddressSpace, 1);
    cpu->cpu_ases[0].cpu = cpu;
    cpu->cpu_ases[0].as = as;
    cpu->cpu_ases[0].tcg_as_listener.commit = tcg_commit;
    memory_listener_register(&cpu->cpu_ases[0].tcg_as_listener, as);
569 570 571
}
#endif

572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616
#ifndef CONFIG_USER_ONLY
static DECLARE_BITMAP(cpu_index_map, MAX_CPUMASK_BITS);

static int cpu_get_free_index(Error **errp)
{
    int cpu = find_first_zero_bit(cpu_index_map, MAX_CPUMASK_BITS);

    if (cpu >= MAX_CPUMASK_BITS) {
        error_setg(errp, "Trying to use more CPUs than max of %d",
                   MAX_CPUMASK_BITS);
        return -1;
    }

    bitmap_set(cpu_index_map, cpu, 1);
    return cpu;
}

void cpu_exec_exit(CPUState *cpu)
{
    if (cpu->cpu_index == -1) {
        /* cpu_index was never allocated by this @cpu or was already freed. */
        return;
    }

    bitmap_clear(cpu_index_map, cpu->cpu_index, 1);
    cpu->cpu_index = -1;
}
#else

static int cpu_get_free_index(Error **errp)
{
    CPUState *some_cpu;
    int cpu_index = 0;

    CPU_FOREACH(some_cpu) {
        cpu_index++;
    }
    return cpu_index;
}

void cpu_exec_exit(CPUState *cpu)
{
}
#endif

617
void cpu_exec_init(CPUState *cpu, Error **errp)
B
bellard 已提交
618
{
619
    CPUClass *cc = CPU_GET_CLASS(cpu);
620
    int cpu_index;
621
    Error *local_err = NULL;
622

623 624 625 626 627
#ifndef CONFIG_USER_ONLY
    cpu->as = &address_space_memory;
    cpu->thread_id = qemu_get_thread_id();
#endif

628 629 630
#if defined(CONFIG_USER_ONLY)
    cpu_list_lock();
#endif
631 632 633 634 635 636 637
    cpu_index = cpu->cpu_index = cpu_get_free_index(&local_err);
    if (local_err) {
        error_propagate(errp, local_err);
#if defined(CONFIG_USER_ONLY)
        cpu_list_unlock();
#endif
        return;
638
    }
A
Andreas Färber 已提交
639
    QTAILQ_INSERT_TAIL(&cpus, cpu, node);
640 641 642
#if defined(CONFIG_USER_ONLY)
    cpu_list_unlock();
#endif
643 644 645
    if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
        vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
    }
646 647
#if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
    register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
648
                    cpu_save, cpu_load, cpu->env_ptr);
649
    assert(cc->vmsd == NULL);
650
    assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
651
#endif
652 653 654
    if (cc->vmsd != NULL) {
        vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
    }
B
bellard 已提交
655 656
}

657
#if defined(CONFIG_USER_ONLY)
658
static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
659 660 661 662
{
    tb_invalidate_phys_page_range(pc, pc + 1, 0);
}
#else
663
static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
664
{
665 666
    hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
    if (phys != -1) {
667
        tb_invalidate_phys_addr(cpu->as,
668
                                phys | (pc & ~TARGET_PAGE_MASK));
669
    }
670
}
B
bellard 已提交
671
#endif
B
bellard 已提交
672

673
#if defined(CONFIG_USER_ONLY)
674
void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
675 676 677 678

{
}

679 680 681 682 683 684 685 686 687 688
int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
                          int flags)
{
    return -ENOSYS;
}

void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
{
}

689
int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
690 691 692 693 694
                          int flags, CPUWatchpoint **watchpoint)
{
    return -ENOSYS;
}
#else
695
/* Add a watchpoint.  */
696
int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
697
                          int flags, CPUWatchpoint **watchpoint)
698
{
699
    CPUWatchpoint *wp;
700

701
    /* forbid ranges which are empty or run off the end of the address space */
702
    if (len == 0 || (addr + len - 1) < addr) {
703 704
        error_report("tried to set invalid watchpoint at %"
                     VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
705 706
        return -EINVAL;
    }
707
    wp = g_malloc(sizeof(*wp));
708 709

    wp->vaddr = addr;
710
    wp->len = len;
711 712
    wp->flags = flags;

713
    /* keep all GDB-injected watchpoints in front */
714 715 716 717 718
    if (flags & BP_GDB) {
        QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
    } else {
        QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
    }
719

720
    tlb_flush_page(cpu, addr);
721 722 723 724

    if (watchpoint)
        *watchpoint = wp;
    return 0;
725 726
}

727
/* Remove a specific watchpoint.  */
728
int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
729
                          int flags)
730
{
731
    CPUWatchpoint *wp;
732

733
    QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
734
        if (addr == wp->vaddr && len == wp->len
735
                && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
736
            cpu_watchpoint_remove_by_ref(cpu, wp);
737 738 739
            return 0;
        }
    }
740
    return -ENOENT;
741 742
}

743
/* Remove a specific watchpoint by reference.  */
744
void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
745
{
746
    QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
747

748
    tlb_flush_page(cpu, watchpoint->vaddr);
749

750
    g_free(watchpoint);
751 752 753
}

/* Remove all matching watchpoints.  */
754
void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
755
{
756
    CPUWatchpoint *wp, *next;
757

758
    QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
759 760 761
        if (wp->flags & mask) {
            cpu_watchpoint_remove_by_ref(cpu, wp);
        }
762
    }
763
}
764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784

/* Return true if this watchpoint address matches the specified
 * access (ie the address range covered by the watchpoint overlaps
 * partially or completely with the address range covered by the
 * access).
 */
static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
                                                  vaddr addr,
                                                  vaddr len)
{
    /* We know the lengths are non-zero, but a little caution is
     * required to avoid errors in the case where the range ends
     * exactly at the top of the address space and so addr + len
     * wraps round to zero.
     */
    vaddr wpend = wp->vaddr + wp->len - 1;
    vaddr addrend = addr + len - 1;

    return !(addr > wpend || wp->vaddr > addrend);
}

785
#endif
786

787
/* Add a breakpoint.  */
788
int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
789
                          CPUBreakpoint **breakpoint)
B
bellard 已提交
790
{
791
    CPUBreakpoint *bp;
792

793
    bp = g_malloc(sizeof(*bp));
B
bellard 已提交
794

795 796 797
    bp->pc = pc;
    bp->flags = flags;

798
    /* keep all GDB-injected breakpoints in front */
799
    if (flags & BP_GDB) {
800
        QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
801
    } else {
802
        QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
803
    }
804

805
    breakpoint_invalidate(cpu, pc);
806

807
    if (breakpoint) {
808
        *breakpoint = bp;
809
    }
B
bellard 已提交
810 811 812
    return 0;
}

813
/* Remove a specific breakpoint.  */
814
int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
815 816 817
{
    CPUBreakpoint *bp;

818
    QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
819
        if (bp->pc == pc && bp->flags == flags) {
820
            cpu_breakpoint_remove_by_ref(cpu, bp);
821 822
            return 0;
        }
823
    }
824
    return -ENOENT;
825 826
}

827
/* Remove a specific breakpoint by reference.  */
828
void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
B
bellard 已提交
829
{
830 831 832
    QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);

    breakpoint_invalidate(cpu, breakpoint->pc);
833

834
    g_free(breakpoint);
835 836 837
}

/* Remove all matching breakpoints. */
838
void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
839
{
840
    CPUBreakpoint *bp, *next;
841

842
    QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
843 844 845
        if (bp->flags & mask) {
            cpu_breakpoint_remove_by_ref(cpu, bp);
        }
846
    }
B
bellard 已提交
847 848
}

B
bellard 已提交
849 850
/* enable or disable single step mode. EXCP_DEBUG is returned by the
   CPU loop after each instruction */
851
void cpu_single_step(CPUState *cpu, int enabled)
B
bellard 已提交
852
{
853 854 855
    if (cpu->singlestep_enabled != enabled) {
        cpu->singlestep_enabled = enabled;
        if (kvm_enabled()) {
856
            kvm_update_guest_debug(cpu, 0);
857
        } else {
S
Stuart Brady 已提交
858
            /* must flush all the translated code to avoid inconsistencies */
859
            /* XXX: only flush what is necessary */
860
            tb_flush(cpu);
861
        }
B
bellard 已提交
862 863 864
    }
}

865
void cpu_abort(CPUState *cpu, const char *fmt, ...)
B
bellard 已提交
866 867
{
    va_list ap;
P
pbrook 已提交
868
    va_list ap2;
B
bellard 已提交
869 870

    va_start(ap, fmt);
P
pbrook 已提交
871
    va_copy(ap2, ap);
B
bellard 已提交
872 873 874
    fprintf(stderr, "qemu: fatal: ");
    vfprintf(stderr, fmt, ap);
    fprintf(stderr, "\n");
875
    cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
876 877 878 879
    if (qemu_log_enabled()) {
        qemu_log("qemu: fatal: ");
        qemu_log_vprintf(fmt, ap2);
        qemu_log("\n");
880
        log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
881
        qemu_log_flush();
882
        qemu_log_close();
883
    }
P
pbrook 已提交
884
    va_end(ap2);
885
    va_end(ap);
886
    replay_finish();
887 888 889 890 891 892 893 894
#if defined(CONFIG_USER_ONLY)
    {
        struct sigaction act;
        sigfillset(&act.sa_mask);
        act.sa_handler = SIG_DFL;
        sigaction(SIGABRT, &act, NULL);
    }
#endif
B
bellard 已提交
895 896 897
    abort();
}

898
#if !defined(CONFIG_USER_ONLY)
M
Mike Day 已提交
899
/* Called from RCU critical section */
P
Paolo Bonzini 已提交
900 901 902 903
static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
{
    RAMBlock *block;

P
Paolo Bonzini 已提交
904
    block = atomic_rcu_read(&ram_list.mru_block);
905
    if (block && addr - block->offset < block->max_length) {
906
        return block;
P
Paolo Bonzini 已提交
907
    }
M
Mike Day 已提交
908
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
909
        if (addr - block->offset < block->max_length) {
P
Paolo Bonzini 已提交
910 911 912 913 914 915 916 917
            goto found;
        }
    }

    fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
    abort();

found:
P
Paolo Bonzini 已提交
918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933
    /* It is safe to write mru_block outside the iothread lock.  This
     * is what happens:
     *
     *     mru_block = xxx
     *     rcu_read_unlock()
     *                                        xxx removed from list
     *                  rcu_read_lock()
     *                  read mru_block
     *                                        mru_block = NULL;
     *                                        call_rcu(reclaim_ramblock, xxx);
     *                  rcu_read_unlock()
     *
     * atomic_rcu_set is not needed here.  The block was already published
     * when it was placed into the list.  Here we're just making an extra
     * copy of the pointer.
     */
P
Paolo Bonzini 已提交
934 935 936 937
    ram_list.mru_block = block;
    return block;
}

938
static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
J
Juan Quintela 已提交
939
{
940
    CPUState *cpu;
P
Paolo Bonzini 已提交
941
    ram_addr_t start1;
942 943 944 945 946
    RAMBlock *block;
    ram_addr_t end;

    end = TARGET_PAGE_ALIGN(start + length);
    start &= TARGET_PAGE_MASK;
J
Juan Quintela 已提交
947

M
Mike Day 已提交
948
    rcu_read_lock();
P
Paolo Bonzini 已提交
949 950
    block = qemu_get_ram_block(start);
    assert(block == qemu_get_ram_block(end - 1));
951
    start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
952 953 954
    CPU_FOREACH(cpu) {
        tlb_reset_dirty(cpu, start1, length);
    }
M
Mike Day 已提交
955
    rcu_read_unlock();
J
Juan Quintela 已提交
956 957
}

P
pbrook 已提交
958
/* Note: start and end must be within the same ram block.  */
959 960 961
bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
                                              ram_addr_t length,
                                              unsigned client)
962
{
963 964 965 966 967 968
    unsigned long end, page;
    bool dirty;

    if (length == 0) {
        return false;
    }
B
bellard 已提交
969

970 971 972 973 974 975
    end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
    page = start >> TARGET_PAGE_BITS;
    dirty = bitmap_test_and_clear_atomic(ram_list.dirty_memory[client],
                                         page, end - page);

    if (dirty && tcg_enabled()) {
976
        tlb_reset_dirty_range_all(start, length);
P
pbrook 已提交
977
    }
978 979

    return dirty;
980 981
}

982
/* Called from RCU critical section */
983
hwaddr memory_region_section_get_iotlb(CPUState *cpu,
984 985 986 987 988
                                       MemoryRegionSection *section,
                                       target_ulong vaddr,
                                       hwaddr paddr, hwaddr xlat,
                                       int prot,
                                       target_ulong *address)
B
Blue Swirl 已提交
989
{
A
Avi Kivity 已提交
990
    hwaddr iotlb;
B
Blue Swirl 已提交
991 992
    CPUWatchpoint *wp;

993
    if (memory_region_is_ram(section->mr)) {
B
Blue Swirl 已提交
994 995
        /* Normal RAM.  */
        iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
996
            + xlat;
B
Blue Swirl 已提交
997
        if (!section->readonly) {
998
            iotlb |= PHYS_SECTION_NOTDIRTY;
B
Blue Swirl 已提交
999
        } else {
1000
            iotlb |= PHYS_SECTION_ROM;
B
Blue Swirl 已提交
1001 1002
        }
    } else {
1003 1004 1005 1006
        AddressSpaceDispatch *d;

        d = atomic_rcu_read(&section->address_space->dispatch);
        iotlb = section - d->map.sections;
1007
        iotlb += xlat;
B
Blue Swirl 已提交
1008 1009 1010 1011
    }

    /* Make accesses to pages with watchpoints go via the
       watchpoint trap routines.  */
1012
    QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1013
        if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
B
Blue Swirl 已提交
1014 1015
            /* Avoid trapping reads of pages with a write breakpoint. */
            if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1016
                iotlb = PHYS_SECTION_WATCH + paddr;
B
Blue Swirl 已提交
1017 1018 1019 1020 1021 1022 1023 1024
                *address |= TLB_MMIO;
                break;
            }
        }
    }

    return iotlb;
}
1025 1026
#endif /* defined(CONFIG_USER_ONLY) */

1027
#if !defined(CONFIG_USER_ONLY)
1028

A
Anthony Liguori 已提交
1029
static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1030
                             uint16_t section);
1031
static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1032

1033 1034
static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
                               qemu_anon_ram_alloc;
1035 1036 1037 1038 1039 1040

/*
 * Set a custom physical guest memory alloator.
 * Accelerators with unusual needs may need this.  Hopefully, we can
 * get rid of it eventually.
 */
1041
void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1042 1043 1044 1045
{
    phys_mem_alloc = alloc;
}

1046 1047
static uint16_t phys_section_add(PhysPageMap *map,
                                 MemoryRegionSection *section)
1048
{
1049 1050 1051 1052
    /* The physical section number is ORed with a page-aligned
     * pointer to produce the iotlb entries.  Thus it should
     * never overflow into the page-aligned value.
     */
1053
    assert(map->sections_nb < TARGET_PAGE_SIZE);
1054

1055 1056 1057 1058
    if (map->sections_nb == map->sections_nb_alloc) {
        map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
        map->sections = g_renew(MemoryRegionSection, map->sections,
                                map->sections_nb_alloc);
1059
    }
1060
    map->sections[map->sections_nb] = *section;
P
Paolo Bonzini 已提交
1061
    memory_region_ref(section->mr);
1062
    return map->sections_nb++;
1063 1064
}

1065 1066
static void phys_section_destroy(MemoryRegion *mr)
{
P
Paolo Bonzini 已提交
1067 1068
    memory_region_unref(mr);

1069 1070
    if (mr->subpage) {
        subpage_t *subpage = container_of(mr, subpage_t, iomem);
P
Peter Crosthwaite 已提交
1071
        object_unref(OBJECT(&subpage->iomem));
1072 1073 1074 1075
        g_free(subpage);
    }
}

P
Paolo Bonzini 已提交
1076
static void phys_sections_free(PhysPageMap *map)
1077
{
1078 1079
    while (map->sections_nb > 0) {
        MemoryRegionSection *section = &map->sections[--map->sections_nb];
1080 1081
        phys_section_destroy(section->mr);
    }
1082 1083
    g_free(map->sections);
    g_free(map->nodes);
1084 1085
}

A
Avi Kivity 已提交
1086
static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1087 1088
{
    subpage_t *subpage;
A
Avi Kivity 已提交
1089
    hwaddr base = section->offset_within_address_space
1090
        & TARGET_PAGE_MASK;
1091
    MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1092
                                                   d->map.nodes, d->map.sections);
1093 1094
    MemoryRegionSection subsection = {
        .offset_within_address_space = base,
1095
        .size = int128_make64(TARGET_PAGE_SIZE),
1096
    };
A
Avi Kivity 已提交
1097
    hwaddr start, end;
1098

1099
    assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1100

1101
    if (!(existing->mr->subpage)) {
1102
        subpage = subpage_init(d->as, base);
1103
        subsection.address_space = d->as;
1104
        subsection.mr = &subpage->iomem;
A
Avi Kivity 已提交
1105
        phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1106
                      phys_section_add(&d->map, &subsection));
1107
    } else {
1108
        subpage = container_of(existing->mr, subpage_t, iomem);
1109 1110
    }
    start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1111
    end = start + int128_get64(section->size) - 1;
1112 1113
    subpage_register(subpage, start, end,
                     phys_section_add(&d->map, section));
1114 1115 1116
}


1117 1118
static void register_multipage(AddressSpaceDispatch *d,
                               MemoryRegionSection *section)
1119
{
A
Avi Kivity 已提交
1120
    hwaddr start_addr = section->offset_within_address_space;
1121
    uint16_t section_index = phys_section_add(&d->map, section);
1122 1123
    uint64_t num_pages = int128_get64(int128_rshift(section->size,
                                                    TARGET_PAGE_BITS));
1124

1125 1126
    assert(num_pages);
    phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1127 1128
}

A
Avi Kivity 已提交
1129
static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1130
{
1131
    AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1132
    AddressSpaceDispatch *d = as->next_dispatch;
1133
    MemoryRegionSection now = *section, remain = *section;
1134
    Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1135

1136 1137 1138 1139
    if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
        uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
                       - now.offset_within_address_space;

1140
        now.size = int128_min(int128_make64(left), now.size);
A
Avi Kivity 已提交
1141
        register_subpage(d, &now);
1142
    } else {
1143
        now.size = int128_zero();
1144
    }
1145 1146 1147 1148
    while (int128_ne(remain.size, now.size)) {
        remain.size = int128_sub(remain.size, now.size);
        remain.offset_within_address_space += int128_get64(now.size);
        remain.offset_within_region += int128_get64(now.size);
1149
        now = remain;
1150
        if (int128_lt(remain.size, page_size)) {
1151
            register_subpage(d, &now);
1152
        } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1153
            now.size = page_size;
A
Avi Kivity 已提交
1154
            register_subpage(d, &now);
1155
        } else {
1156
            now.size = int128_and(now.size, int128_neg(page_size));
A
Avi Kivity 已提交
1157
            register_multipage(d, &now);
1158
        }
1159 1160 1161
    }
}

1162 1163 1164 1165 1166 1167
void qemu_flush_coalesced_mmio_buffer(void)
{
    if (kvm_enabled())
        kvm_flush_coalesced_mmio_buffer();
}

1168 1169 1170 1171 1172 1173 1174 1175 1176 1177
void qemu_mutex_lock_ramlist(void)
{
    qemu_mutex_lock(&ram_list.mutex);
}

void qemu_mutex_unlock_ramlist(void)
{
    qemu_mutex_unlock(&ram_list.mutex);
}

1178
#ifdef __linux__
1179 1180 1181 1182 1183

#include <sys/vfs.h>

#define HUGETLBFS_MAGIC       0x958458f6

1184
static long gethugepagesize(const char *path, Error **errp)
1185 1186 1187 1188 1189
{
    struct statfs fs;
    int ret;

    do {
Y
Yoshiaki Tamura 已提交
1190
        ret = statfs(path, &fs);
1191 1192 1193
    } while (ret != 0 && errno == EINTR);

    if (ret != 0) {
1194 1195
        error_setg_errno(errp, errno, "failed to get page size of file %s",
                         path);
Y
Yoshiaki Tamura 已提交
1196
        return 0;
1197 1198 1199
    }

    if (fs.f_type != HUGETLBFS_MAGIC)
Y
Yoshiaki Tamura 已提交
1200
        fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1201 1202 1203 1204

    return fs.f_bsize;
}

A
Alex Williamson 已提交
1205 1206
static void *file_ram_alloc(RAMBlock *block,
                            ram_addr_t memory,
1207 1208
                            const char *path,
                            Error **errp)
1209
{
1210
    struct stat st;
1211
    char *filename;
1212 1213
    char *sanitized_name;
    char *c;
1214
    void *area;
1215
    int fd;
1216
    uint64_t hpagesize;
1217
    Error *local_err = NULL;
1218

1219 1220 1221
    hpagesize = gethugepagesize(path, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
1222
        goto error;
1223
    }
1224
    block->mr->align = hpagesize;
1225 1226

    if (memory < hpagesize) {
1227 1228 1229 1230
        error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
                   "or larger than huge page size 0x%" PRIx64,
                   memory, hpagesize);
        goto error;
1231 1232 1233
    }

    if (kvm_enabled() && !kvm_has_sync_mmu()) {
1234 1235
        error_setg(errp,
                   "host lacks kvm mmu notifiers, -mem-path unsupported");
1236
        goto error;
1237 1238
    }

1239 1240 1241 1242 1243 1244 1245 1246
    if (!stat(path, &st) && S_ISDIR(st.st_mode)) {
        /* Make name safe to use with mkstemp by replacing '/' with '_'. */
        sanitized_name = g_strdup(memory_region_name(block->mr));
        for (c = sanitized_name; *c != '\0'; c++) {
            if (*c == '/') {
                *c = '_';
            }
        }
1247

1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259
        filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
                                   sanitized_name);
        g_free(sanitized_name);

        fd = mkstemp(filename);
        if (fd >= 0) {
            unlink(filename);
        }
        g_free(filename);
    } else {
        fd = open(path, O_RDWR | O_CREAT, 0644);
    }
1260 1261

    if (fd < 0) {
1262 1263
        error_setg_errno(errp, errno,
                         "unable to create backing store for hugepages");
1264
        goto error;
1265 1266
    }

1267
    memory = ROUND_UP(memory, hpagesize);
1268 1269 1270 1271 1272 1273 1274

    /*
     * ftruncate is not supported by hugetlbfs in older
     * hosts, so don't bother bailing out on errors.
     * If anything goes wrong with it under other filesystems,
     * mmap will fail.
     */
1275
    if (ftruncate(fd, memory)) {
Y
Yoshiaki Tamura 已提交
1276
        perror("ftruncate");
1277
    }
1278

1279
    area = qemu_ram_mmap(fd, memory, hpagesize, block->flags & RAM_SHARED);
1280
    if (area == MAP_FAILED) {
1281 1282
        error_setg_errno(errp, errno,
                         "unable to map backing store for hugepages");
Y
Yoshiaki Tamura 已提交
1283
        close(fd);
1284
        goto error;
1285
    }
1286 1287

    if (mem_prealloc) {
1288
        os_mem_prealloc(fd, area, memory);
1289 1290
    }

A
Alex Williamson 已提交
1291
    block->fd = fd;
1292
    return area;
1293 1294 1295

error:
    return NULL;
1296 1297 1298
}
#endif

M
Mike Day 已提交
1299
/* Called with the ramlist lock held.  */
1300
static ram_addr_t find_ram_offset(ram_addr_t size)
A
Alex Williamson 已提交
1301 1302
{
    RAMBlock *block, *next_block;
A
Alex Williamson 已提交
1303
    ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
A
Alex Williamson 已提交
1304

1305 1306
    assert(size != 0); /* it would hand out same offset multiple times */

M
Mike Day 已提交
1307
    if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
A
Alex Williamson 已提交
1308
        return 0;
M
Mike Day 已提交
1309
    }
A
Alex Williamson 已提交
1310

M
Mike Day 已提交
1311
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1312
        ram_addr_t end, next = RAM_ADDR_MAX;
A
Alex Williamson 已提交
1313

1314
        end = block->offset + block->max_length;
A
Alex Williamson 已提交
1315

M
Mike Day 已提交
1316
        QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
A
Alex Williamson 已提交
1317 1318 1319 1320 1321
            if (next_block->offset >= end) {
                next = MIN(next, next_block->offset);
            }
        }
        if (next - end >= size && next - end < mingap) {
A
Alex Williamson 已提交
1322
            offset = end;
A
Alex Williamson 已提交
1323 1324 1325
            mingap = next - end;
        }
    }
A
Alex Williamson 已提交
1326 1327 1328 1329 1330 1331 1332

    if (offset == RAM_ADDR_MAX) {
        fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
                (uint64_t)size);
        abort();
    }

A
Alex Williamson 已提交
1333 1334 1335
    return offset;
}

J
Juan Quintela 已提交
1336
ram_addr_t last_ram_offset(void)
1337 1338 1339 1340
{
    RAMBlock *block;
    ram_addr_t last = 0;

M
Mike Day 已提交
1341 1342
    rcu_read_lock();
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1343
        last = MAX(last, block->offset + block->max_length);
M
Mike Day 已提交
1344
    }
M
Mike Day 已提交
1345
    rcu_read_unlock();
1346 1347 1348
    return last;
}

1349 1350 1351 1352 1353
static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
{
    int ret;

    /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1354
    if (!machine_dump_guest_core(current_machine)) {
1355 1356 1357 1358 1359 1360 1361 1362 1363
        ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
        if (ret) {
            perror("qemu_madvise");
            fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
                            "but dump_guest_core=off specified\n");
        }
    }
}

M
Mike Day 已提交
1364 1365 1366
/* Called within an RCU critical section, or while the ramlist lock
 * is held.
 */
1367
static RAMBlock *find_ram_block(ram_addr_t addr)
1368
{
1369
    RAMBlock *block;
1370

M
Mike Day 已提交
1371
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1372
        if (block->offset == addr) {
1373
            return block;
1374 1375
        }
    }
1376 1377 1378 1379

    return NULL;
}

1380
/* Called with iothread lock held.  */
1381 1382
void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
{
1383
    RAMBlock *new_block, *block;
1384

M
Mike Day 已提交
1385
    rcu_read_lock();
1386
    new_block = find_ram_block(addr);
1387 1388
    assert(new_block);
    assert(!new_block->idstr[0]);
1389

1390 1391
    if (dev) {
        char *id = qdev_get_dev_path(dev);
1392 1393
        if (id) {
            snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1394
            g_free(id);
1395 1396 1397 1398
        }
    }
    pstrcat(new_block->idstr, sizeof(new_block->idstr), name);

M
Mike Day 已提交
1399
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1400
        if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1401 1402 1403 1404 1405
            fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
                    new_block->idstr);
            abort();
        }
    }
M
Mike Day 已提交
1406
    rcu_read_unlock();
1407 1408
}

1409
/* Called with iothread lock held.  */
1410 1411
void qemu_ram_unset_idstr(ram_addr_t addr)
{
1412
    RAMBlock *block;
1413

1414 1415 1416 1417 1418
    /* FIXME: arch_init.c assumes that this is not called throughout
     * migration.  Ignore the problem since hot-unplug during migration
     * does not work anyway.
     */

M
Mike Day 已提交
1419
    rcu_read_lock();
1420
    block = find_ram_block(addr);
1421 1422 1423
    if (block) {
        memset(block->idstr, 0, sizeof(block->idstr));
    }
M
Mike Day 已提交
1424
    rcu_read_unlock();
1425 1426
}

1427 1428
static int memory_try_enable_merging(void *addr, size_t len)
{
1429
    if (!machine_mem_merge(current_machine)) {
1430 1431 1432 1433 1434 1435 1436
        /* disabled by the user */
        return 0;
    }

    return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
}

1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449
/* Only legal before guest might have detected the memory size: e.g. on
 * incoming migration, or right after reset.
 *
 * As memory core doesn't know how is memory accessed, it is up to
 * resize callback to update device state and/or add assertions to detect
 * misuse, if necessary.
 */
int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
{
    RAMBlock *block = find_ram_block(base);

    assert(block);

1450 1451
    newsize = TARGET_PAGE_ALIGN(newsize);

1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473
    if (block->used_length == newsize) {
        return 0;
    }

    if (!(block->flags & RAM_RESIZEABLE)) {
        error_setg_errno(errp, EINVAL,
                         "Length mismatch: %s: 0x" RAM_ADDR_FMT
                         " in != 0x" RAM_ADDR_FMT, block->idstr,
                         newsize, block->used_length);
        return -EINVAL;
    }

    if (block->max_length < newsize) {
        error_setg_errno(errp, EINVAL,
                         "Length too large: %s: 0x" RAM_ADDR_FMT
                         " > 0x" RAM_ADDR_FMT, block->idstr,
                         newsize, block->max_length);
        return -EINVAL;
    }

    cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
    block->used_length = newsize;
1474 1475
    cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
                                        DIRTY_CLIENTS_ALL);
1476 1477 1478 1479 1480 1481 1482
    memory_region_set_size(block->mr, newsize);
    if (block->resized) {
        block->resized(block->idstr, newsize, block->host);
    }
    return 0;
}

1483
static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
1484
{
1485
    RAMBlock *block;
M
Mike Day 已提交
1486
    RAMBlock *last_block = NULL;
1487 1488 1489
    ram_addr_t old_ram_size, new_ram_size;

    old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1490

1491
    qemu_mutex_lock_ramlist();
1492
    new_block->offset = find_ram_offset(new_block->max_length);
1493 1494 1495

    if (!new_block->host) {
        if (xen_enabled()) {
1496 1497
            xen_ram_alloc(new_block->offset, new_block->max_length,
                          new_block->mr);
1498
        } else {
1499
            new_block->host = phys_mem_alloc(new_block->max_length,
1500
                                             &new_block->mr->align);
1501
            if (!new_block->host) {
1502 1503 1504 1505 1506
                error_setg_errno(errp, errno,
                                 "cannot set up guest memory '%s'",
                                 memory_region_name(new_block->mr));
                qemu_mutex_unlock_ramlist();
                return -1;
1507
            }
1508
            memory_try_enable_merging(new_block->host, new_block->max_length);
1509
        }
1510
    }
P
pbrook 已提交
1511

L
Li Zhijian 已提交
1512 1513 1514 1515 1516
    new_ram_size = MAX(old_ram_size,
              (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
    if (new_ram_size > old_ram_size) {
        migration_bitmap_extend(old_ram_size, new_ram_size);
    }
M
Mike Day 已提交
1517 1518 1519 1520
    /* Keep the list sorted from biggest to smallest block.  Unlike QTAILQ,
     * QLIST (which has an RCU-friendly variant) does not have insertion at
     * tail, so save the last element in last_block.
     */
M
Mike Day 已提交
1521
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
M
Mike Day 已提交
1522
        last_block = block;
1523
        if (block->max_length < new_block->max_length) {
1524 1525 1526 1527
            break;
        }
    }
    if (block) {
M
Mike Day 已提交
1528
        QLIST_INSERT_BEFORE_RCU(block, new_block, next);
M
Mike Day 已提交
1529
    } else if (last_block) {
M
Mike Day 已提交
1530
        QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
M
Mike Day 已提交
1531
    } else { /* list is empty */
M
Mike Day 已提交
1532
        QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1533
    }
1534
    ram_list.mru_block = NULL;
P
pbrook 已提交
1535

M
Mike Day 已提交
1536 1537
    /* Write list before version */
    smp_wmb();
U
Umesh Deshpande 已提交
1538
    ram_list.version++;
1539
    qemu_mutex_unlock_ramlist();
U
Umesh Deshpande 已提交
1540

1541 1542 1543
    new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;

    if (new_ram_size > old_ram_size) {
1544
        int i;
1545 1546

        /* ram_list.dirty_memory[] is protected by the iothread lock.  */
1547 1548 1549 1550 1551
        for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
            ram_list.dirty_memory[i] =
                bitmap_zero_extend(ram_list.dirty_memory[i],
                                   old_ram_size, new_ram_size);
       }
1552
    }
1553
    cpu_physical_memory_set_dirty_range(new_block->offset,
1554 1555
                                        new_block->used_length,
                                        DIRTY_CLIENTS_ALL);
P
pbrook 已提交
1556

1557 1558 1559 1560 1561 1562 1563
    if (new_block->host) {
        qemu_ram_setup_dump(new_block->host, new_block->max_length);
        qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
        qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
        if (kvm_enabled()) {
            kvm_setup_guest_memory(new_block->host, new_block->max_length);
        }
1564
    }
1565

P
pbrook 已提交
1566 1567
    return new_block->offset;
}
B
bellard 已提交
1568

1569
#ifdef __linux__
1570
ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1571
                                    bool share, const char *mem_path,
1572
                                    Error **errp)
1573 1574
{
    RAMBlock *new_block;
1575 1576
    ram_addr_t addr;
    Error *local_err = NULL;
1577 1578

    if (xen_enabled()) {
1579 1580
        error_setg(errp, "-mem-path not supported with Xen");
        return -1;
1581 1582 1583 1584 1585 1586 1587 1588
    }

    if (phys_mem_alloc != qemu_anon_ram_alloc) {
        /*
         * file_ram_alloc() needs to allocate just like
         * phys_mem_alloc, but we haven't bothered to provide
         * a hook there.
         */
1589 1590 1591
        error_setg(errp,
                   "-mem-path not supported with this accelerator");
        return -1;
1592 1593 1594 1595 1596
    }

    size = TARGET_PAGE_ALIGN(size);
    new_block = g_malloc0(sizeof(*new_block));
    new_block->mr = mr;
1597 1598
    new_block->used_length = size;
    new_block->max_length = size;
1599
    new_block->flags = share ? RAM_SHARED : 0;
1600
    new_block->flags |= RAM_FILE;
1601 1602 1603 1604 1605 1606 1607
    new_block->host = file_ram_alloc(new_block, size,
                                     mem_path, errp);
    if (!new_block->host) {
        g_free(new_block);
        return -1;
    }

1608 1609 1610 1611 1612 1613 1614
    addr = ram_block_add(new_block, &local_err);
    if (local_err) {
        g_free(new_block);
        error_propagate(errp, local_err);
        return -1;
    }
    return addr;
1615
}
1616
#endif
1617

1618 1619 1620 1621 1622 1623
static
ram_addr_t qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
                                   void (*resized)(const char*,
                                                   uint64_t length,
                                                   void *host),
                                   void *host, bool resizeable,
1624
                                   MemoryRegion *mr, Error **errp)
1625 1626
{
    RAMBlock *new_block;
1627 1628
    ram_addr_t addr;
    Error *local_err = NULL;
1629 1630

    size = TARGET_PAGE_ALIGN(size);
1631
    max_size = TARGET_PAGE_ALIGN(max_size);
1632 1633
    new_block = g_malloc0(sizeof(*new_block));
    new_block->mr = mr;
1634
    new_block->resized = resized;
1635 1636
    new_block->used_length = size;
    new_block->max_length = max_size;
1637
    assert(max_size >= size);
1638 1639 1640
    new_block->fd = -1;
    new_block->host = host;
    if (host) {
1641
        new_block->flags |= RAM_PREALLOC;
1642
    }
1643 1644 1645
    if (resizeable) {
        new_block->flags |= RAM_RESIZEABLE;
    }
1646 1647 1648 1649 1650 1651 1652
    addr = ram_block_add(new_block, &local_err);
    if (local_err) {
        g_free(new_block);
        error_propagate(errp, local_err);
        return -1;
    }
    return addr;
1653 1654
}

1655 1656 1657 1658 1659 1660
ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
                                   MemoryRegion *mr, Error **errp)
{
    return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
}

1661
ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1662
{
1663 1664 1665 1666 1667 1668 1669 1670 1671 1672
    return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
}

ram_addr_t qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
                                     void (*resized)(const char*,
                                                     uint64_t length,
                                                     void *host),
                                     MemoryRegion *mr, Error **errp)
{
    return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1673 1674
}

1675 1676 1677 1678
void qemu_ram_free_from_ptr(ram_addr_t addr)
{
    RAMBlock *block;

1679
    qemu_mutex_lock_ramlist();
M
Mike Day 已提交
1680
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1681
        if (addr == block->offset) {
M
Mike Day 已提交
1682
            QLIST_REMOVE_RCU(block, next);
1683
            ram_list.mru_block = NULL;
M
Mike Day 已提交
1684 1685
            /* Write list before version */
            smp_wmb();
U
Umesh Deshpande 已提交
1686
            ram_list.version++;
P
Paolo Bonzini 已提交
1687
            g_free_rcu(block, rcu);
1688
            break;
1689 1690
        }
    }
1691
    qemu_mutex_unlock_ramlist();
1692 1693
}

P
Paolo Bonzini 已提交
1694 1695 1696 1697 1698 1699 1700 1701
static void reclaim_ramblock(RAMBlock *block)
{
    if (block->flags & RAM_PREALLOC) {
        ;
    } else if (xen_enabled()) {
        xen_invalidate_map_cache_entry(block->host);
#ifndef _WIN32
    } else if (block->fd >= 0) {
1702 1703
        if (block->flags & RAM_FILE) {
            qemu_ram_munmap(block->host, block->max_length);
1704 1705 1706
        } else {
            munmap(block->host, block->max_length);
        }
P
Paolo Bonzini 已提交
1707 1708 1709 1710 1711 1712 1713 1714
        close(block->fd);
#endif
    } else {
        qemu_anon_ram_free(block->host, block->max_length);
    }
    g_free(block);
}

A
Anthony Liguori 已提交
1715
void qemu_ram_free(ram_addr_t addr)
B
bellard 已提交
1716
{
A
Alex Williamson 已提交
1717 1718
    RAMBlock *block;

1719
    qemu_mutex_lock_ramlist();
M
Mike Day 已提交
1720
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
A
Alex Williamson 已提交
1721
        if (addr == block->offset) {
M
Mike Day 已提交
1722
            QLIST_REMOVE_RCU(block, next);
1723
            ram_list.mru_block = NULL;
M
Mike Day 已提交
1724 1725
            /* Write list before version */
            smp_wmb();
U
Umesh Deshpande 已提交
1726
            ram_list.version++;
P
Paolo Bonzini 已提交
1727
            call_rcu(block, reclaim_ramblock, rcu);
1728
            break;
A
Alex Williamson 已提交
1729 1730
        }
    }
1731
    qemu_mutex_unlock_ramlist();
B
bellard 已提交
1732 1733
}

H
Huang Ying 已提交
1734 1735 1736 1737 1738 1739 1740 1741
#ifndef _WIN32
void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
{
    RAMBlock *block;
    ram_addr_t offset;
    int flags;
    void *area, *vaddr;

M
Mike Day 已提交
1742
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
H
Huang Ying 已提交
1743
        offset = addr - block->offset;
1744
        if (offset < block->max_length) {
1745
            vaddr = ramblock_ptr(block, offset);
1746
            if (block->flags & RAM_PREALLOC) {
H
Huang Ying 已提交
1747
                ;
1748 1749
            } else if (xen_enabled()) {
                abort();
H
Huang Ying 已提交
1750 1751
            } else {
                flags = MAP_FIXED;
1752
                if (block->fd >= 0) {
1753 1754
                    flags |= (block->flags & RAM_SHARED ?
                              MAP_SHARED : MAP_PRIVATE);
1755 1756
                    area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
                                flags, block->fd, offset);
H
Huang Ying 已提交
1757
                } else {
1758 1759 1760 1761 1762 1763 1764
                    /*
                     * Remap needs to match alloc.  Accelerators that
                     * set phys_mem_alloc never remap.  If they did,
                     * we'd need a remap hook here.
                     */
                    assert(phys_mem_alloc == qemu_anon_ram_alloc);

H
Huang Ying 已提交
1765 1766 1767 1768 1769
                    flags |= MAP_PRIVATE | MAP_ANONYMOUS;
                    area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
                                flags, -1, 0);
                }
                if (area != vaddr) {
1770 1771
                    fprintf(stderr, "Could not remap addr: "
                            RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
H
Huang Ying 已提交
1772 1773 1774
                            length, addr);
                    exit(1);
                }
1775
                memory_try_enable_merging(vaddr, length);
1776
                qemu_ram_setup_dump(vaddr, length);
H
Huang Ying 已提交
1777 1778 1779 1780 1781 1782
            }
        }
    }
}
#endif /* !_WIN32 */

1783 1784
int qemu_get_ram_fd(ram_addr_t addr)
{
1785 1786
    RAMBlock *block;
    int fd;
1787

M
Mike Day 已提交
1788
    rcu_read_lock();
1789 1790
    block = qemu_get_ram_block(addr);
    fd = block->fd;
M
Mike Day 已提交
1791
    rcu_read_unlock();
1792
    return fd;
1793 1794
}

1795 1796
void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
{
1797 1798
    RAMBlock *block;
    void *ptr;
1799

M
Mike Day 已提交
1800
    rcu_read_lock();
1801 1802
    block = qemu_get_ram_block(addr);
    ptr = ramblock_ptr(block, 0);
M
Mike Day 已提交
1803
    rcu_read_unlock();
1804
    return ptr;
1805 1806
}

1807
/* Return a host pointer to ram allocated with qemu_ram_alloc.
1808 1809 1810
 * This should not be used for general purpose DMA.  Use address_space_map
 * or address_space_rw instead. For local memory (e.g. video ram) that the
 * device owns, use memory_region_get_ram_ptr.
M
Mike Day 已提交
1811 1812 1813 1814 1815 1816
 *
 * By the time this function returns, the returned pointer is not protected
 * by RCU anymore.  If the caller is not within an RCU critical section and
 * does not hold the iothread lock, it must have other means of protecting the
 * pointer, such as a reference to the region that includes the incoming
 * ram_addr_t.
1817 1818 1819
 */
void *qemu_get_ram_ptr(ram_addr_t addr)
{
1820 1821
    RAMBlock *block;
    void *ptr;
1822

M
Mike Day 已提交
1823
    rcu_read_lock();
1824 1825 1826
    block = qemu_get_ram_block(addr);

    if (xen_enabled() && block->host == NULL) {
1827 1828 1829 1830 1831
        /* We need to check if the requested address is in the RAM
         * because we don't want to map the entire memory in QEMU.
         * In that case just map until the end of the page.
         */
        if (block->offset == 0) {
1832
            ptr = xen_map_cache(addr, 0, 0);
M
Mike Day 已提交
1833
            goto unlock;
1834
        }
1835 1836

        block->host = xen_map_cache(block->offset, block->max_length, 1);
1837
    }
1838 1839
    ptr = ramblock_ptr(block, addr - block->offset);

M
Mike Day 已提交
1840 1841
unlock:
    rcu_read_unlock();
1842
    return ptr;
1843 1844
}

1845
/* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1846
 * but takes a size argument.
M
Mike Day 已提交
1847 1848 1849 1850 1851 1852
 *
 * By the time this function returns, the returned pointer is not protected
 * by RCU anymore.  If the caller is not within an RCU critical section and
 * does not hold the iothread lock, it must have other means of protecting the
 * pointer, such as a reference to the region that includes the incoming
 * ram_addr_t.
1853
 */
1854
static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1855
{
1856
    void *ptr;
1857 1858 1859
    if (*size == 0) {
        return NULL;
    }
1860
    if (xen_enabled()) {
J
Jan Kiszka 已提交
1861
        return xen_map_cache(addr, *size, 1);
1862
    } else {
1863
        RAMBlock *block;
M
Mike Day 已提交
1864 1865
        rcu_read_lock();
        QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1866 1867 1868
            if (addr - block->offset < block->max_length) {
                if (addr - block->offset + *size > block->max_length)
                    *size = block->max_length - addr + block->offset;
1869
                ptr = ramblock_ptr(block, addr - block->offset);
M
Mike Day 已提交
1870
                rcu_read_unlock();
1871
                return ptr;
1872 1873 1874 1875 1876 1877 1878 1879
            }
        }

        fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
        abort();
    }
}

1880
/* Some of the softmmu routines need to translate from a host pointer
1881 1882 1883 1884 1885 1886 1887 1888
 * (typically a TLB entry) back to a ram offset.
 *
 * By the time this function returns, the returned pointer is not protected
 * by RCU anymore.  If the caller is not within an RCU critical section and
 * does not hold the iothread lock, it must have other means of protecting the
 * pointer, such as a reference to the region that includes the incoming
 * ram_addr_t.
 */
1889
MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
P
pbrook 已提交
1890
{
P
pbrook 已提交
1891 1892
    RAMBlock *block;
    uint8_t *host = ptr;
1893
    MemoryRegion *mr;
P
pbrook 已提交
1894

1895
    if (xen_enabled()) {
M
Mike Day 已提交
1896
        rcu_read_lock();
J
Jan Kiszka 已提交
1897
        *ram_addr = xen_ram_addr_from_mapcache(ptr);
1898
        mr = qemu_get_ram_block(*ram_addr)->mr;
M
Mike Day 已提交
1899
        rcu_read_unlock();
1900
        return mr;
1901 1902
    }

M
Mike Day 已提交
1903 1904
    rcu_read_lock();
    block = atomic_rcu_read(&ram_list.mru_block);
1905
    if (block && block->host && host - block->host < block->max_length) {
1906 1907 1908
        goto found;
    }

M
Mike Day 已提交
1909
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
J
Jun Nakajima 已提交
1910 1911 1912 1913
        /* This case append when the block is not mapped. */
        if (block->host == NULL) {
            continue;
        }
1914
        if (host - block->host < block->max_length) {
1915
            goto found;
A
Alex Williamson 已提交
1916
        }
P
pbrook 已提交
1917
    }
J
Jun Nakajima 已提交
1918

M
Mike Day 已提交
1919
    rcu_read_unlock();
1920
    return NULL;
1921 1922 1923

found:
    *ram_addr = block->offset + (host - block->host);
1924
    mr = block->mr;
M
Mike Day 已提交
1925
    rcu_read_unlock();
1926
    return mr;
M
Marcelo Tosatti 已提交
1927
}
A
Alex Williamson 已提交
1928

A
Avi Kivity 已提交
1929
static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1930
                               uint64_t val, unsigned size)
1931
{
1932
    if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1933
        tb_invalidate_phys_page_fast(ram_addr, size);
1934
    }
1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946
    switch (size) {
    case 1:
        stb_p(qemu_get_ram_ptr(ram_addr), val);
        break;
    case 2:
        stw_p(qemu_get_ram_ptr(ram_addr), val);
        break;
    case 4:
        stl_p(qemu_get_ram_ptr(ram_addr), val);
        break;
    default:
        abort();
1947
    }
1948 1949 1950 1951 1952
    /* Set both VGA and migration bits for simplicity and to remove
     * the notdirty callback faster.
     */
    cpu_physical_memory_set_dirty_range(ram_addr, size,
                                        DIRTY_CLIENTS_NOCODE);
B
bellard 已提交
1953 1954
    /* we remove the notdirty callback only if the code has been
       flushed */
1955
    if (!cpu_physical_memory_is_clean(ram_addr)) {
1956
        tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
1957
    }
1958 1959
}

1960 1961 1962 1963 1964 1965
static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
                                 unsigned size, bool is_write)
{
    return is_write;
}

1966 1967
static const MemoryRegionOps notdirty_mem_ops = {
    .write = notdirty_mem_write,
1968
    .valid.accepts = notdirty_mem_accepts,
1969
    .endianness = DEVICE_NATIVE_ENDIAN,
1970 1971
};

P
pbrook 已提交
1972
/* Generate a debug exception if a watchpoint has been hit.  */
1973
static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
P
pbrook 已提交
1974
{
1975 1976
    CPUState *cpu = current_cpu;
    CPUArchState *env = cpu->env_ptr;
1977
    target_ulong pc, cs_base;
P
pbrook 已提交
1978
    target_ulong vaddr;
1979
    CPUWatchpoint *wp;
1980
    int cpu_flags;
P
pbrook 已提交
1981

1982
    if (cpu->watchpoint_hit) {
1983 1984 1985
        /* We re-entered the check after replacing the TB. Now raise
         * the debug interrupt so that is will trigger after the
         * current instruction. */
1986
        cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
1987 1988
        return;
    }
1989
    vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1990
    QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1991 1992
        if (cpu_watchpoint_address_matches(wp, vaddr, len)
            && (wp->flags & flags)) {
1993 1994 1995 1996 1997 1998
            if (flags == BP_MEM_READ) {
                wp->flags |= BP_WATCHPOINT_HIT_READ;
            } else {
                wp->flags |= BP_WATCHPOINT_HIT_WRITE;
            }
            wp->hitaddr = vaddr;
1999
            wp->hitattrs = attrs;
2000 2001
            if (!cpu->watchpoint_hit) {
                cpu->watchpoint_hit = wp;
2002
                tb_check_watchpoint(cpu);
2003
                if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2004
                    cpu->exception_index = EXCP_DEBUG;
2005
                    cpu_loop_exit(cpu);
2006 2007
                } else {
                    cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2008
                    tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
2009
                    cpu_resume_from_signal(cpu, NULL);
2010
                }
2011
            }
2012 2013
        } else {
            wp->flags &= ~BP_WATCHPOINT_HIT;
P
pbrook 已提交
2014 2015 2016 2017
        }
    }
}

2018 2019 2020
/* Watchpoint access routines.  Watchpoints are inserted using TLB tricks,
   so these check for a hit then pass through to the normal out-of-line
   phys routines.  */
2021 2022
static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
                                  unsigned size, MemTxAttrs attrs)
2023
{
2024 2025 2026 2027
    MemTxResult res;
    uint64_t data;

    check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
2028
    switch (size) {
2029 2030 2031 2032 2033 2034 2035 2036 2037
    case 1:
        data = address_space_ldub(&address_space_memory, addr, attrs, &res);
        break;
    case 2:
        data = address_space_lduw(&address_space_memory, addr, attrs, &res);
        break;
    case 4:
        data = address_space_ldl(&address_space_memory, addr, attrs, &res);
        break;
2038 2039
    default: abort();
    }
2040 2041
    *pdata = data;
    return res;
2042 2043
}

2044 2045 2046
static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
                                   uint64_t val, unsigned size,
                                   MemTxAttrs attrs)
2047
{
2048 2049 2050
    MemTxResult res;

    check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
2051
    switch (size) {
2052
    case 1:
2053
        address_space_stb(&address_space_memory, addr, val, attrs, &res);
2054 2055
        break;
    case 2:
2056
        address_space_stw(&address_space_memory, addr, val, attrs, &res);
2057 2058
        break;
    case 4:
2059
        address_space_stl(&address_space_memory, addr, val, attrs, &res);
2060
        break;
2061 2062
    default: abort();
    }
2063
    return res;
2064 2065
}

2066
static const MemoryRegionOps watch_mem_ops = {
2067 2068
    .read_with_attrs = watch_mem_read,
    .write_with_attrs = watch_mem_write,
2069
    .endianness = DEVICE_NATIVE_ENDIAN,
2070 2071
};

2072 2073
static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
                                unsigned len, MemTxAttrs attrs)
2074
{
2075
    subpage_t *subpage = opaque;
2076
    uint8_t buf[8];
2077
    MemTxResult res;
2078

2079
#if defined(DEBUG_SUBPAGE)
A
Amos Kong 已提交
2080
    printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2081
           subpage, len, addr);
2082
#endif
2083 2084 2085 2086
    res = address_space_read(subpage->as, addr + subpage->base,
                             attrs, buf, len);
    if (res) {
        return res;
2087
    }
2088 2089
    switch (len) {
    case 1:
2090 2091
        *data = ldub_p(buf);
        return MEMTX_OK;
2092
    case 2:
2093 2094
        *data = lduw_p(buf);
        return MEMTX_OK;
2095
    case 4:
2096 2097
        *data = ldl_p(buf);
        return MEMTX_OK;
2098
    case 8:
2099 2100
        *data = ldq_p(buf);
        return MEMTX_OK;
2101 2102 2103
    default:
        abort();
    }
2104 2105
}

2106 2107
static MemTxResult subpage_write(void *opaque, hwaddr addr,
                                 uint64_t value, unsigned len, MemTxAttrs attrs)
2108
{
2109
    subpage_t *subpage = opaque;
2110
    uint8_t buf[8];
2111

2112
#if defined(DEBUG_SUBPAGE)
A
Amos Kong 已提交
2113
    printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2114 2115
           " value %"PRIx64"\n",
           __func__, subpage, len, addr, value);
2116
#endif
2117 2118 2119 2120 2121 2122 2123 2124 2125 2126
    switch (len) {
    case 1:
        stb_p(buf, value);
        break;
    case 2:
        stw_p(buf, value);
        break;
    case 4:
        stl_p(buf, value);
        break;
2127 2128 2129
    case 8:
        stq_p(buf, value);
        break;
2130 2131 2132
    default:
        abort();
    }
2133 2134
    return address_space_write(subpage->as, addr + subpage->base,
                               attrs, buf, len);
2135 2136
}

2137
static bool subpage_accepts(void *opaque, hwaddr addr,
A
Amos Kong 已提交
2138
                            unsigned len, bool is_write)
2139
{
2140
    subpage_t *subpage = opaque;
2141
#if defined(DEBUG_SUBPAGE)
A
Amos Kong 已提交
2142
    printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2143
           __func__, subpage, is_write ? 'w' : 'r', len, addr);
2144 2145
#endif

2146
    return address_space_access_valid(subpage->as, addr + subpage->base,
A
Amos Kong 已提交
2147
                                      len, is_write);
2148 2149
}

2150
static const MemoryRegionOps subpage_ops = {
2151 2152
    .read_with_attrs = subpage_read,
    .write_with_attrs = subpage_write,
2153 2154 2155 2156
    .impl.min_access_size = 1,
    .impl.max_access_size = 8,
    .valid.min_access_size = 1,
    .valid.max_access_size = 8,
2157
    .valid.accepts = subpage_accepts,
2158
    .endianness = DEVICE_NATIVE_ENDIAN,
2159 2160
};

A
Anthony Liguori 已提交
2161
static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2162
                             uint16_t section)
2163 2164 2165 2166 2167 2168 2169 2170
{
    int idx, eidx;

    if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
        return -1;
    idx = SUBPAGE_IDX(start);
    eidx = SUBPAGE_IDX(end);
#if defined(DEBUG_SUBPAGE)
A
Amos Kong 已提交
2171 2172
    printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
           __func__, mmio, start, end, idx, eidx, section);
2173 2174
#endif
    for (; idx <= eidx; idx++) {
2175
        mmio->sub_section[idx] = section;
2176 2177 2178 2179 2180
    }

    return 0;
}

2181
static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2182
{
A
Anthony Liguori 已提交
2183
    subpage_t *mmio;
2184

2185
    mmio = g_malloc0(sizeof(subpage_t));
2186

2187
    mmio->as = as;
2188
    mmio->base = base;
2189
    memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
P
Peter Crosthwaite 已提交
2190
                          NULL, TARGET_PAGE_SIZE);
A
Avi Kivity 已提交
2191
    mmio->iomem.subpage = true;
2192
#if defined(DEBUG_SUBPAGE)
A
Amos Kong 已提交
2193 2194
    printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
           mmio, base, TARGET_PAGE_SIZE);
2195
#endif
2196
    subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2197 2198 2199 2200

    return mmio;
}

2201 2202
static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
                              MemoryRegion *mr)
2203
{
2204
    assert(as);
2205
    MemoryRegionSection section = {
2206
        .address_space = as,
2207 2208 2209
        .mr = mr,
        .offset_within_address_space = 0,
        .offset_within_region = 0,
2210
        .size = int128_2_64(),
2211 2212
    };

2213
    return phys_section_add(map, &section);
2214 2215
}

P
Paolo Bonzini 已提交
2216
MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index)
2217
{
2218 2219
    CPUAddressSpace *cpuas = &cpu->cpu_ases[0];
    AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
2220
    MemoryRegionSection *sections = d->map.sections;
P
Paolo Bonzini 已提交
2221 2222

    return sections[index & ~TARGET_PAGE_MASK].mr;
2223 2224
}

A
Avi Kivity 已提交
2225 2226
static void io_mem_init(void)
{
2227
    memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2228
    memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2229
                          NULL, UINT64_MAX);
2230
    memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2231
                          NULL, UINT64_MAX);
2232
    memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2233
                          NULL, UINT64_MAX);
A
Avi Kivity 已提交
2234 2235
}

A
Avi Kivity 已提交
2236
static void mem_begin(MemoryListener *listener)
2237 2238
{
    AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2239 2240 2241
    AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
    uint16_t n;

2242
    n = dummy_section(&d->map, as, &io_mem_unassigned);
2243
    assert(n == PHYS_SECTION_UNASSIGNED);
2244
    n = dummy_section(&d->map, as, &io_mem_notdirty);
2245
    assert(n == PHYS_SECTION_NOTDIRTY);
2246
    n = dummy_section(&d->map, as, &io_mem_rom);
2247
    assert(n == PHYS_SECTION_ROM);
2248
    n = dummy_section(&d->map, as, &io_mem_watch);
2249
    assert(n == PHYS_SECTION_WATCH);
2250

M
Michael S. Tsirkin 已提交
2251
    d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2252 2253 2254 2255
    d->as = as;
    as->next_dispatch = d;
}

2256 2257 2258 2259 2260 2261
static void address_space_dispatch_free(AddressSpaceDispatch *d)
{
    phys_sections_free(&d->map);
    g_free(d);
}

2262
static void mem_commit(MemoryListener *listener)
A
Avi Kivity 已提交
2263
{
2264
    AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2265 2266 2267
    AddressSpaceDispatch *cur = as->dispatch;
    AddressSpaceDispatch *next = as->next_dispatch;

2268
    phys_page_compact_all(next, next->map.nodes_nb);
2269

2270
    atomic_rcu_set(&as->dispatch, next);
2271
    if (cur) {
2272
        call_rcu(cur, address_space_dispatch_free, rcu);
2273
    }
2274 2275
}

2276
static void tcg_commit(MemoryListener *listener)
2277
{
2278 2279
    CPUAddressSpace *cpuas;
    AddressSpaceDispatch *d;
2280 2281 2282

    /* since each CPU stores ram addresses in its TLB cache, we must
       reset the modified entries */
2283 2284 2285 2286 2287 2288 2289 2290 2291
    cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
    cpu_reloading_memory_map();
    /* The CPU and TLB are protected by the iothread lock.
     * We reload the dispatch pointer now because cpu_reloading_memory_map()
     * may have split the RCU critical section.
     */
    d = atomic_rcu_read(&cpuas->as->dispatch);
    cpuas->memory_dispatch = d;
    tlb_flush(cpuas->cpu, 1);
2292 2293
}

A
Avi Kivity 已提交
2294 2295
void address_space_init_dispatch(AddressSpace *as)
{
2296
    as->dispatch = NULL;
2297
    as->dispatch_listener = (MemoryListener) {
A
Avi Kivity 已提交
2298
        .begin = mem_begin,
2299
        .commit = mem_commit,
A
Avi Kivity 已提交
2300 2301 2302 2303
        .region_add = mem_add,
        .region_nop = mem_add,
        .priority = 0,
    };
2304
    memory_listener_register(&as->dispatch_listener, as);
A
Avi Kivity 已提交
2305 2306
}

2307 2308 2309 2310 2311
void address_space_unregister(AddressSpace *as)
{
    memory_listener_unregister(&as->dispatch_listener);
}

A
Avi Kivity 已提交
2312 2313 2314 2315
void address_space_destroy_dispatch(AddressSpace *as)
{
    AddressSpaceDispatch *d = as->dispatch;

2316 2317 2318 2319
    atomic_rcu_set(&as->dispatch, NULL);
    if (d) {
        call_rcu(d, address_space_dispatch_free, rcu);
    }
A
Avi Kivity 已提交
2320 2321
}

A
Avi Kivity 已提交
2322 2323
static void memory_map_init(void)
{
2324
    system_memory = g_malloc(sizeof(*system_memory));
2325

2326
    memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2327
    address_space_init(&address_space_memory, system_memory, "memory");
2328

2329
    system_io = g_malloc(sizeof(*system_io));
2330 2331
    memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
                          65536);
2332
    address_space_init(&address_space_io, system_io, "I/O");
A
Avi Kivity 已提交
2333 2334 2335 2336 2337 2338 2339
}

MemoryRegion *get_system_memory(void)
{
    return system_memory;
}

2340 2341 2342 2343 2344
MemoryRegion *get_system_io(void)
{
    return system_io;
}

2345 2346
#endif /* !defined(CONFIG_USER_ONLY) */

B
bellard 已提交
2347 2348
/* physical memory access (slow version, mainly for debug) */
#if defined(CONFIG_USER_ONLY)
2349
int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
P
Paul Brook 已提交
2350
                        uint8_t *buf, int len, int is_write)
B
bellard 已提交
2351 2352 2353
{
    int l, flags;
    target_ulong page;
2354
    void * p;
B
bellard 已提交
2355 2356 2357 2358 2359 2360 2361 2362

    while (len > 0) {
        page = addr & TARGET_PAGE_MASK;
        l = (page + TARGET_PAGE_SIZE) - addr;
        if (l > len)
            l = len;
        flags = page_get_flags(page);
        if (!(flags & PAGE_VALID))
P
Paul Brook 已提交
2363
            return -1;
B
bellard 已提交
2364 2365
        if (is_write) {
            if (!(flags & PAGE_WRITE))
P
Paul Brook 已提交
2366
                return -1;
2367
            /* XXX: this code should not depend on lock_user */
A
aurel32 已提交
2368
            if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
P
Paul Brook 已提交
2369
                return -1;
A
aurel32 已提交
2370 2371
            memcpy(p, buf, l);
            unlock_user(p, addr, l);
B
bellard 已提交
2372 2373
        } else {
            if (!(flags & PAGE_READ))
P
Paul Brook 已提交
2374
                return -1;
2375
            /* XXX: this code should not depend on lock_user */
A
aurel32 已提交
2376
            if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
P
Paul Brook 已提交
2377
                return -1;
A
aurel32 已提交
2378
            memcpy(buf, p, l);
A
aurel32 已提交
2379
            unlock_user(p, addr, 0);
B
bellard 已提交
2380 2381 2382 2383 2384
        }
        len -= l;
        buf += l;
        addr += l;
    }
P
Paul Brook 已提交
2385
    return 0;
B
bellard 已提交
2386
}
B
bellard 已提交
2387

B
bellard 已提交
2388
#else
2389

2390
static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
A
Avi Kivity 已提交
2391
                                     hwaddr length)
2392
{
2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404
    uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
    /* No early return if dirty_log_mask is or becomes 0, because
     * cpu_physical_memory_set_dirty_range will still call
     * xen_modified_memory.
     */
    if (dirty_log_mask) {
        dirty_log_mask =
            cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
    }
    if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
        tb_invalidate_phys_range(addr, addr + length);
        dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2405
    }
2406
    cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2407 2408
}

2409
static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2410
{
2411
    unsigned access_size_max = mr->ops->valid.max_access_size;
2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424

    /* Regions are assumed to support 1-4 byte accesses unless
       otherwise specified.  */
    if (access_size_max == 0) {
        access_size_max = 4;
    }

    /* Bound the maximum access by the alignment of the address.  */
    if (!mr->ops->impl.unaligned) {
        unsigned align_size_max = addr & -addr;
        if (align_size_max != 0 && align_size_max < access_size_max) {
            access_size_max = align_size_max;
        }
2425
    }
2426 2427 2428 2429

    /* Don't attempt accesses larger than the maximum.  */
    if (l > access_size_max) {
        l = access_size_max;
2430
    }
2431
    l = pow2floor(l);
2432 2433

    return l;
2434 2435
}

2436
static bool prepare_mmio_access(MemoryRegion *mr)
2437
{
2438 2439 2440 2441 2442 2443 2444 2445
    bool unlocked = !qemu_mutex_iothread_locked();
    bool release_lock = false;

    if (unlocked && mr->global_locking) {
        qemu_mutex_lock_iothread();
        unlocked = false;
        release_lock = true;
    }
2446
    if (mr->flush_coalesced_mmio) {
2447 2448 2449
        if (unlocked) {
            qemu_mutex_lock_iothread();
        }
2450
        qemu_flush_coalesced_mmio_buffer();
2451 2452 2453
        if (unlocked) {
            qemu_mutex_unlock_iothread();
        }
2454
    }
2455 2456

    return release_lock;
2457 2458
}

2459 2460
MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
                             uint8_t *buf, int len, bool is_write)
B
bellard 已提交
2461
{
2462
    hwaddr l;
B
bellard 已提交
2463
    uint8_t *ptr;
2464
    uint64_t val;
2465
    hwaddr addr1;
2466
    MemoryRegion *mr;
2467
    MemTxResult result = MEMTX_OK;
2468
    bool release_lock = false;
2469

2470
    rcu_read_lock();
B
bellard 已提交
2471
    while (len > 0) {
2472
        l = len;
2473
        mr = address_space_translate(as, addr, &addr1, &l, is_write);
2474

B
bellard 已提交
2475
        if (is_write) {
2476
            if (!memory_access_is_direct(mr, is_write)) {
2477
                release_lock |= prepare_mmio_access(mr);
2478
                l = memory_access_size(mr, l, addr1);
2479
                /* XXX: could force current_cpu to NULL to avoid
B
bellard 已提交
2480
                   potential bugs */
2481 2482 2483 2484
                switch (l) {
                case 8:
                    /* 64 bit write access */
                    val = ldq_p(buf);
2485 2486
                    result |= memory_region_dispatch_write(mr, addr1, val, 8,
                                                           attrs);
2487 2488
                    break;
                case 4:
B
bellard 已提交
2489
                    /* 32 bit write access */
B
bellard 已提交
2490
                    val = ldl_p(buf);
2491 2492
                    result |= memory_region_dispatch_write(mr, addr1, val, 4,
                                                           attrs);
2493 2494
                    break;
                case 2:
B
bellard 已提交
2495
                    /* 16 bit write access */
B
bellard 已提交
2496
                    val = lduw_p(buf);
2497 2498
                    result |= memory_region_dispatch_write(mr, addr1, val, 2,
                                                           attrs);
2499 2500
                    break;
                case 1:
B
bellard 已提交
2501
                    /* 8 bit write access */
B
bellard 已提交
2502
                    val = ldub_p(buf);
2503 2504
                    result |= memory_region_dispatch_write(mr, addr1, val, 1,
                                                           attrs);
2505 2506 2507
                    break;
                default:
                    abort();
B
bellard 已提交
2508
                }
2509
            } else {
2510
                addr1 += memory_region_get_ram_addr(mr);
B
bellard 已提交
2511
                /* RAM case */
P
pbrook 已提交
2512
                ptr = qemu_get_ram_ptr(addr1);
B
bellard 已提交
2513
                memcpy(ptr, buf, l);
2514
                invalidate_and_set_dirty(mr, addr1, l);
B
bellard 已提交
2515 2516
            }
        } else {
2517
            if (!memory_access_is_direct(mr, is_write)) {
B
bellard 已提交
2518
                /* I/O case */
2519
                release_lock |= prepare_mmio_access(mr);
2520
                l = memory_access_size(mr, l, addr1);
2521 2522 2523
                switch (l) {
                case 8:
                    /* 64 bit read access */
2524 2525
                    result |= memory_region_dispatch_read(mr, addr1, &val, 8,
                                                          attrs);
2526 2527 2528
                    stq_p(buf, val);
                    break;
                case 4:
B
bellard 已提交
2529
                    /* 32 bit read access */
2530 2531
                    result |= memory_region_dispatch_read(mr, addr1, &val, 4,
                                                          attrs);
B
bellard 已提交
2532
                    stl_p(buf, val);
2533 2534
                    break;
                case 2:
B
bellard 已提交
2535
                    /* 16 bit read access */
2536 2537
                    result |= memory_region_dispatch_read(mr, addr1, &val, 2,
                                                          attrs);
B
bellard 已提交
2538
                    stw_p(buf, val);
2539 2540
                    break;
                case 1:
B
bellard 已提交
2541
                    /* 8 bit read access */
2542 2543
                    result |= memory_region_dispatch_read(mr, addr1, &val, 1,
                                                          attrs);
B
bellard 已提交
2544
                    stb_p(buf, val);
2545 2546 2547
                    break;
                default:
                    abort();
B
bellard 已提交
2548 2549 2550
                }
            } else {
                /* RAM case */
2551
                ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2552
                memcpy(buf, ptr, l);
B
bellard 已提交
2553 2554
            }
        }
2555 2556 2557 2558 2559 2560

        if (release_lock) {
            qemu_mutex_unlock_iothread();
            release_lock = false;
        }

B
bellard 已提交
2561 2562 2563 2564
        len -= l;
        buf += l;
        addr += l;
    }
2565
    rcu_read_unlock();
2566

2567
    return result;
B
bellard 已提交
2568
}
B
bellard 已提交
2569

2570 2571
MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
                                const uint8_t *buf, int len)
A
Avi Kivity 已提交
2572
{
2573
    return address_space_rw(as, addr, attrs, (uint8_t *)buf, len, true);
A
Avi Kivity 已提交
2574 2575
}

2576 2577
MemTxResult address_space_read(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
                               uint8_t *buf, int len)
A
Avi Kivity 已提交
2578
{
2579
    return address_space_rw(as, addr, attrs, buf, len, false);
A
Avi Kivity 已提交
2580 2581 2582
}


A
Avi Kivity 已提交
2583
void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
A
Avi Kivity 已提交
2584 2585
                            int len, int is_write)
{
2586 2587
    address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
                     buf, len, is_write);
A
Avi Kivity 已提交
2588 2589
}

2590 2591 2592 2593 2594
enum write_rom_type {
    WRITE_DATA,
    FLUSH_CACHE,
};

2595
static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2596
    hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
B
bellard 已提交
2597
{
2598
    hwaddr l;
B
bellard 已提交
2599
    uint8_t *ptr;
2600
    hwaddr addr1;
2601
    MemoryRegion *mr;
2602

2603
    rcu_read_lock();
B
bellard 已提交
2604
    while (len > 0) {
2605
        l = len;
2606
        mr = address_space_translate(as, addr, &addr1, &l, true);
2607

2608 2609
        if (!(memory_region_is_ram(mr) ||
              memory_region_is_romd(mr))) {
2610
            l = memory_access_size(mr, l, addr1);
B
bellard 已提交
2611
        } else {
2612
            addr1 += memory_region_get_ram_addr(mr);
B
bellard 已提交
2613
            /* ROM/RAM case */
P
pbrook 已提交
2614
            ptr = qemu_get_ram_ptr(addr1);
2615 2616 2617
            switch (type) {
            case WRITE_DATA:
                memcpy(ptr, buf, l);
2618
                invalidate_and_set_dirty(mr, addr1, l);
2619 2620 2621 2622 2623
                break;
            case FLUSH_CACHE:
                flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
                break;
            }
B
bellard 已提交
2624 2625 2626 2627 2628
        }
        len -= l;
        buf += l;
        addr += l;
    }
2629
    rcu_read_unlock();
B
bellard 已提交
2630 2631
}

2632
/* used for ROM loading : can write in RAM and ROM */
2633
void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2634 2635
                                   const uint8_t *buf, int len)
{
2636
    cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650
}

void cpu_flush_icache_range(hwaddr start, int len)
{
    /*
     * This function should do the same thing as an icache flush that was
     * triggered from within the guest. For TCG we are always cache coherent,
     * so there is no need to flush anything. For KVM / Xen we need to flush
     * the host's instruction cache at least.
     */
    if (tcg_enabled()) {
        return;
    }

2651 2652
    cpu_physical_memory_write_rom_internal(&address_space_memory,
                                           start, NULL, len, FLUSH_CACHE);
2653 2654
}

2655
typedef struct {
2656
    MemoryRegion *mr;
2657
    void *buffer;
A
Avi Kivity 已提交
2658 2659
    hwaddr addr;
    hwaddr len;
F
Fam Zheng 已提交
2660
    bool in_use;
2661 2662 2663 2664
} BounceBuffer;

static BounceBuffer bounce;

2665
typedef struct MapClient {
2666
    QEMUBH *bh;
B
Blue Swirl 已提交
2667
    QLIST_ENTRY(MapClient) link;
2668 2669
} MapClient;

2670
QemuMutex map_client_list_lock;
B
Blue Swirl 已提交
2671 2672
static QLIST_HEAD(map_client_list, MapClient) map_client_list
    = QLIST_HEAD_INITIALIZER(map_client_list);
2673

2674 2675 2676 2677 2678 2679
static void cpu_unregister_map_client_do(MapClient *client)
{
    QLIST_REMOVE(client, link);
    g_free(client);
}

2680 2681 2682 2683 2684 2685
static void cpu_notify_map_clients_locked(void)
{
    MapClient *client;

    while (!QLIST_EMPTY(&map_client_list)) {
        client = QLIST_FIRST(&map_client_list);
2686 2687
        qemu_bh_schedule(client->bh);
        cpu_unregister_map_client_do(client);
2688 2689 2690
    }
}

2691
void cpu_register_map_client(QEMUBH *bh)
2692
{
2693
    MapClient *client = g_malloc(sizeof(*client));
2694

2695
    qemu_mutex_lock(&map_client_list_lock);
2696
    client->bh = bh;
B
Blue Swirl 已提交
2697
    QLIST_INSERT_HEAD(&map_client_list, client, link);
2698 2699 2700
    if (!atomic_read(&bounce.in_use)) {
        cpu_notify_map_clients_locked();
    }
2701
    qemu_mutex_unlock(&map_client_list_lock);
2702 2703
}

2704
void cpu_exec_init_all(void)
2705
{
2706 2707
    qemu_mutex_init(&ram_list.mutex);
    io_mem_init();
2708
    memory_map_init();
2709
    qemu_mutex_init(&map_client_list_lock);
2710 2711
}

2712
void cpu_unregister_map_client(QEMUBH *bh)
2713 2714 2715
{
    MapClient *client;

2716 2717 2718 2719 2720 2721
    qemu_mutex_lock(&map_client_list_lock);
    QLIST_FOREACH(client, &map_client_list, link) {
        if (client->bh == bh) {
            cpu_unregister_map_client_do(client);
            break;
        }
2722
    }
2723
    qemu_mutex_unlock(&map_client_list_lock);
2724 2725 2726 2727
}

static void cpu_notify_map_clients(void)
{
2728
    qemu_mutex_lock(&map_client_list_lock);
2729
    cpu_notify_map_clients_locked();
2730
    qemu_mutex_unlock(&map_client_list_lock);
2731 2732
}

2733 2734
bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
{
2735
    MemoryRegion *mr;
2736 2737
    hwaddr l, xlat;

2738
    rcu_read_lock();
2739 2740
    while (len > 0) {
        l = len;
2741 2742 2743 2744
        mr = address_space_translate(as, addr, &xlat, &l, is_write);
        if (!memory_access_is_direct(mr, is_write)) {
            l = memory_access_size(mr, l, addr);
            if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2745 2746 2747 2748 2749 2750 2751
                return false;
            }
        }

        len -= l;
        addr += l;
    }
2752
    rcu_read_unlock();
2753 2754 2755
    return true;
}

2756 2757 2758 2759
/* Map a physical memory region into a host virtual address.
 * May map a subset of the requested range, given by and returned in *plen.
 * May return NULL if resources needed to perform the mapping are exhausted.
 * Use only for reads OR writes - not for read-modify-write operations.
2760 2761
 * Use cpu_register_map_client() to know when retrying the map operation is
 * likely to succeed.
2762
 */
A
Avi Kivity 已提交
2763
void *address_space_map(AddressSpace *as,
A
Avi Kivity 已提交
2764 2765
                        hwaddr addr,
                        hwaddr *plen,
A
Avi Kivity 已提交
2766
                        bool is_write)
2767
{
A
Avi Kivity 已提交
2768
    hwaddr len = *plen;
2769 2770 2771 2772
    hwaddr done = 0;
    hwaddr l, xlat, base;
    MemoryRegion *mr, *this_mr;
    ram_addr_t raddr;
2773

2774 2775 2776
    if (len == 0) {
        return NULL;
    }
2777

2778
    l = len;
2779
    rcu_read_lock();
2780
    mr = address_space_translate(as, addr, &xlat, &l, is_write);
2781

2782
    if (!memory_access_is_direct(mr, is_write)) {
F
Fam Zheng 已提交
2783
        if (atomic_xchg(&bounce.in_use, true)) {
2784
            rcu_read_unlock();
2785
            return NULL;
2786
        }
2787 2788 2789
        /* Avoid unbounded allocations */
        l = MIN(l, TARGET_PAGE_SIZE);
        bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2790 2791
        bounce.addr = addr;
        bounce.len = l;
2792 2793 2794

        memory_region_ref(mr);
        bounce.mr = mr;
2795
        if (!is_write) {
2796 2797
            address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
                               bounce.buffer, l);
2798
        }
2799

2800
        rcu_read_unlock();
2801 2802 2803 2804 2805 2806 2807 2808
        *plen = l;
        return bounce.buffer;
    }

    base = xlat;
    raddr = memory_region_get_ram_addr(mr);

    for (;;) {
2809 2810
        len -= l;
        addr += l;
2811 2812 2813 2814 2815 2816 2817 2818 2819 2820
        done += l;
        if (len == 0) {
            break;
        }

        l = len;
        this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
        if (this_mr != mr || xlat != base + done) {
            break;
        }
2821
    }
2822

2823
    memory_region_ref(mr);
2824
    rcu_read_unlock();
2825 2826
    *plen = done;
    return qemu_ram_ptr_length(raddr + base, plen);
2827 2828
}

A
Avi Kivity 已提交
2829
/* Unmaps a memory region previously mapped by address_space_map().
2830 2831 2832
 * Will also mark the memory as dirty if is_write == 1.  access_len gives
 * the amount of memory that was actually read or written by the caller.
 */
A
Avi Kivity 已提交
2833 2834
void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
                         int is_write, hwaddr access_len)
2835 2836
{
    if (buffer != bounce.buffer) {
2837 2838 2839 2840 2841
        MemoryRegion *mr;
        ram_addr_t addr1;

        mr = qemu_ram_addr_from_host(buffer, &addr1);
        assert(mr != NULL);
2842
        if (is_write) {
2843
            invalidate_and_set_dirty(mr, addr1, access_len);
2844
        }
2845
        if (xen_enabled()) {
J
Jan Kiszka 已提交
2846
            xen_invalidate_map_cache_entry(buffer);
A
Anthony PERARD 已提交
2847
        }
2848
        memory_region_unref(mr);
2849 2850 2851
        return;
    }
    if (is_write) {
2852 2853
        address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
                            bounce.buffer, access_len);
2854
    }
2855
    qemu_vfree(bounce.buffer);
2856
    bounce.buffer = NULL;
2857
    memory_region_unref(bounce.mr);
F
Fam Zheng 已提交
2858
    atomic_mb_set(&bounce.in_use, false);
2859
    cpu_notify_map_clients();
2860
}
B
bellard 已提交
2861

A
Avi Kivity 已提交
2862 2863
void *cpu_physical_memory_map(hwaddr addr,
                              hwaddr *plen,
A
Avi Kivity 已提交
2864 2865 2866 2867 2868
                              int is_write)
{
    return address_space_map(&address_space_memory, addr, plen, is_write);
}

A
Avi Kivity 已提交
2869 2870
void cpu_physical_memory_unmap(void *buffer, hwaddr len,
                               int is_write, hwaddr access_len)
A
Avi Kivity 已提交
2871 2872 2873 2874
{
    return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
}

B
bellard 已提交
2875
/* warning: addr must be aligned */
2876 2877 2878 2879
static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
                                                  MemTxAttrs attrs,
                                                  MemTxResult *result,
                                                  enum device_endian endian)
B
bellard 已提交
2880 2881
{
    uint8_t *ptr;
2882
    uint64_t val;
2883
    MemoryRegion *mr;
2884 2885
    hwaddr l = 4;
    hwaddr addr1;
2886
    MemTxResult r;
2887
    bool release_lock = false;
B
bellard 已提交
2888

2889
    rcu_read_lock();
2890
    mr = address_space_translate(as, addr, &addr1, &l, false);
2891
    if (l < 4 || !memory_access_is_direct(mr, false)) {
2892
        release_lock |= prepare_mmio_access(mr);
2893

B
bellard 已提交
2894
        /* I/O case */
2895
        r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
2896 2897 2898 2899 2900 2901 2902 2903 2904
#if defined(TARGET_WORDS_BIGENDIAN)
        if (endian == DEVICE_LITTLE_ENDIAN) {
            val = bswap32(val);
        }
#else
        if (endian == DEVICE_BIG_ENDIAN) {
            val = bswap32(val);
        }
#endif
B
bellard 已提交
2905 2906
    } else {
        /* RAM case */
2907
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2908
                                & TARGET_PAGE_MASK)
2909
                               + addr1);
2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920
        switch (endian) {
        case DEVICE_LITTLE_ENDIAN:
            val = ldl_le_p(ptr);
            break;
        case DEVICE_BIG_ENDIAN:
            val = ldl_be_p(ptr);
            break;
        default:
            val = ldl_p(ptr);
            break;
        }
2921 2922 2923 2924
        r = MEMTX_OK;
    }
    if (result) {
        *result = r;
B
bellard 已提交
2925
    }
2926 2927 2928
    if (release_lock) {
        qemu_mutex_unlock_iothread();
    }
2929
    rcu_read_unlock();
B
bellard 已提交
2930 2931 2932
    return val;
}

2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953
uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
                           MemTxAttrs attrs, MemTxResult *result)
{
    return address_space_ldl_internal(as, addr, attrs, result,
                                      DEVICE_NATIVE_ENDIAN);
}

uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
                              MemTxAttrs attrs, MemTxResult *result)
{
    return address_space_ldl_internal(as, addr, attrs, result,
                                      DEVICE_LITTLE_ENDIAN);
}

uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
                              MemTxAttrs attrs, MemTxResult *result)
{
    return address_space_ldl_internal(as, addr, attrs, result,
                                      DEVICE_BIG_ENDIAN);
}

2954
uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
2955
{
2956
    return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2957 2958
}

2959
uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
2960
{
2961
    return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2962 2963
}

2964
uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
2965
{
2966
    return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2967 2968
}

B
bellard 已提交
2969
/* warning: addr must be aligned */
2970 2971 2972 2973
static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
                                                  MemTxAttrs attrs,
                                                  MemTxResult *result,
                                                  enum device_endian endian)
B
bellard 已提交
2974 2975 2976
{
    uint8_t *ptr;
    uint64_t val;
2977
    MemoryRegion *mr;
2978 2979
    hwaddr l = 8;
    hwaddr addr1;
2980
    MemTxResult r;
2981
    bool release_lock = false;
B
bellard 已提交
2982

2983
    rcu_read_lock();
2984
    mr = address_space_translate(as, addr, &addr1, &l,
2985 2986
                                 false);
    if (l < 8 || !memory_access_is_direct(mr, false)) {
2987
        release_lock |= prepare_mmio_access(mr);
2988

B
bellard 已提交
2989
        /* I/O case */
2990
        r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
2991 2992 2993 2994 2995 2996 2997 2998
#if defined(TARGET_WORDS_BIGENDIAN)
        if (endian == DEVICE_LITTLE_ENDIAN) {
            val = bswap64(val);
        }
#else
        if (endian == DEVICE_BIG_ENDIAN) {
            val = bswap64(val);
        }
B
bellard 已提交
2999 3000 3001
#endif
    } else {
        /* RAM case */
3002
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
3003
                                & TARGET_PAGE_MASK)
3004
                               + addr1);
3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015
        switch (endian) {
        case DEVICE_LITTLE_ENDIAN:
            val = ldq_le_p(ptr);
            break;
        case DEVICE_BIG_ENDIAN:
            val = ldq_be_p(ptr);
            break;
        default:
            val = ldq_p(ptr);
            break;
        }
3016 3017 3018 3019
        r = MEMTX_OK;
    }
    if (result) {
        *result = r;
B
bellard 已提交
3020
    }
3021 3022 3023
    if (release_lock) {
        qemu_mutex_unlock_iothread();
    }
3024
    rcu_read_unlock();
B
bellard 已提交
3025 3026 3027
    return val;
}

3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048
uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
                           MemTxAttrs attrs, MemTxResult *result)
{
    return address_space_ldq_internal(as, addr, attrs, result,
                                      DEVICE_NATIVE_ENDIAN);
}

uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
                           MemTxAttrs attrs, MemTxResult *result)
{
    return address_space_ldq_internal(as, addr, attrs, result,
                                      DEVICE_LITTLE_ENDIAN);
}

uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
                           MemTxAttrs attrs, MemTxResult *result)
{
    return address_space_ldq_internal(as, addr, attrs, result,
                                      DEVICE_BIG_ENDIAN);
}

3049
uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
3050
{
3051
    return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3052 3053
}

3054
uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3055
{
3056
    return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3057 3058
}

3059
uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3060
{
3061
    return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3062 3063
}

B
bellard 已提交
3064
/* XXX: optimize */
3065 3066
uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
                            MemTxAttrs attrs, MemTxResult *result)
B
bellard 已提交
3067 3068
{
    uint8_t val;
3069 3070 3071 3072 3073 3074
    MemTxResult r;

    r = address_space_rw(as, addr, attrs, &val, 1, 0);
    if (result) {
        *result = r;
    }
B
bellard 已提交
3075 3076 3077
    return val;
}

3078 3079 3080 3081 3082
uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
{
    return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
}

3083
/* warning: addr must be aligned */
3084 3085 3086 3087 3088
static inline uint32_t address_space_lduw_internal(AddressSpace *as,
                                                   hwaddr addr,
                                                   MemTxAttrs attrs,
                                                   MemTxResult *result,
                                                   enum device_endian endian)
B
bellard 已提交
3089
{
3090 3091
    uint8_t *ptr;
    uint64_t val;
3092
    MemoryRegion *mr;
3093 3094
    hwaddr l = 2;
    hwaddr addr1;
3095
    MemTxResult r;
3096
    bool release_lock = false;
3097

3098
    rcu_read_lock();
3099
    mr = address_space_translate(as, addr, &addr1, &l,
3100 3101
                                 false);
    if (l < 2 || !memory_access_is_direct(mr, false)) {
3102
        release_lock |= prepare_mmio_access(mr);
3103

3104
        /* I/O case */
3105
        r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3106 3107 3108 3109 3110 3111 3112 3113 3114
#if defined(TARGET_WORDS_BIGENDIAN)
        if (endian == DEVICE_LITTLE_ENDIAN) {
            val = bswap16(val);
        }
#else
        if (endian == DEVICE_BIG_ENDIAN) {
            val = bswap16(val);
        }
#endif
3115 3116
    } else {
        /* RAM case */
3117
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
3118
                                & TARGET_PAGE_MASK)
3119
                               + addr1);
3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130
        switch (endian) {
        case DEVICE_LITTLE_ENDIAN:
            val = lduw_le_p(ptr);
            break;
        case DEVICE_BIG_ENDIAN:
            val = lduw_be_p(ptr);
            break;
        default:
            val = lduw_p(ptr);
            break;
        }
3131 3132 3133 3134
        r = MEMTX_OK;
    }
    if (result) {
        *result = r;
3135
    }
3136 3137 3138
    if (release_lock) {
        qemu_mutex_unlock_iothread();
    }
3139
    rcu_read_unlock();
3140
    return val;
B
bellard 已提交
3141 3142
}

3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163
uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
                           MemTxAttrs attrs, MemTxResult *result)
{
    return address_space_lduw_internal(as, addr, attrs, result,
                                       DEVICE_NATIVE_ENDIAN);
}

uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
                           MemTxAttrs attrs, MemTxResult *result)
{
    return address_space_lduw_internal(as, addr, attrs, result,
                                       DEVICE_LITTLE_ENDIAN);
}

uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
                           MemTxAttrs attrs, MemTxResult *result)
{
    return address_space_lduw_internal(as, addr, attrs, result,
                                       DEVICE_BIG_ENDIAN);
}

3164
uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3165
{
3166
    return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3167 3168
}

3169
uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3170
{
3171
    return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3172 3173
}

3174
uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3175
{
3176
    return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3177 3178
}

B
bellard 已提交
3179 3180 3181
/* warning: addr must be aligned. The ram page is not masked as dirty
   and the code inside is not invalidated. It is useful if the dirty
   bits are used to track modified PTEs */
3182 3183
void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
                                MemTxAttrs attrs, MemTxResult *result)
B
bellard 已提交
3184 3185
{
    uint8_t *ptr;
3186
    MemoryRegion *mr;
3187 3188
    hwaddr l = 4;
    hwaddr addr1;
3189
    MemTxResult r;
3190
    uint8_t dirty_log_mask;
3191
    bool release_lock = false;
B
bellard 已提交
3192

3193
    rcu_read_lock();
3194
    mr = address_space_translate(as, addr, &addr1, &l,
3195 3196
                                 true);
    if (l < 4 || !memory_access_is_direct(mr, true)) {
3197
        release_lock |= prepare_mmio_access(mr);
3198

3199
        r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
B
bellard 已提交
3200
    } else {
3201
        addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
P
pbrook 已提交
3202
        ptr = qemu_get_ram_ptr(addr1);
B
bellard 已提交
3203
        stl_p(ptr, val);
A
aliguori 已提交
3204

3205 3206
        dirty_log_mask = memory_region_get_dirty_log_mask(mr);
        dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3207
        cpu_physical_memory_set_dirty_range(addr1, 4, dirty_log_mask);
3208 3209 3210 3211
        r = MEMTX_OK;
    }
    if (result) {
        *result = r;
B
bellard 已提交
3212
    }
3213 3214 3215
    if (release_lock) {
        qemu_mutex_unlock_iothread();
    }
3216
    rcu_read_unlock();
B
bellard 已提交
3217 3218
}

3219 3220 3221 3222 3223
void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
{
    address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
}

B
bellard 已提交
3224
/* warning: addr must be aligned */
3225 3226 3227 3228 3229
static inline void address_space_stl_internal(AddressSpace *as,
                                              hwaddr addr, uint32_t val,
                                              MemTxAttrs attrs,
                                              MemTxResult *result,
                                              enum device_endian endian)
B
bellard 已提交
3230 3231
{
    uint8_t *ptr;
3232
    MemoryRegion *mr;
3233 3234
    hwaddr l = 4;
    hwaddr addr1;
3235
    MemTxResult r;
3236
    bool release_lock = false;
B
bellard 已提交
3237

3238
    rcu_read_lock();
3239
    mr = address_space_translate(as, addr, &addr1, &l,
3240 3241
                                 true);
    if (l < 4 || !memory_access_is_direct(mr, true)) {
3242
        release_lock |= prepare_mmio_access(mr);
3243

3244 3245 3246 3247 3248 3249 3250 3251 3252
#if defined(TARGET_WORDS_BIGENDIAN)
        if (endian == DEVICE_LITTLE_ENDIAN) {
            val = bswap32(val);
        }
#else
        if (endian == DEVICE_BIG_ENDIAN) {
            val = bswap32(val);
        }
#endif
3253
        r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
B
bellard 已提交
3254 3255
    } else {
        /* RAM case */
3256
        addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
P
pbrook 已提交
3257
        ptr = qemu_get_ram_ptr(addr1);
3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268
        switch (endian) {
        case DEVICE_LITTLE_ENDIAN:
            stl_le_p(ptr, val);
            break;
        case DEVICE_BIG_ENDIAN:
            stl_be_p(ptr, val);
            break;
        default:
            stl_p(ptr, val);
            break;
        }
3269
        invalidate_and_set_dirty(mr, addr1, 4);
3270 3271 3272 3273
        r = MEMTX_OK;
    }
    if (result) {
        *result = r;
B
bellard 已提交
3274
    }
3275 3276 3277
    if (release_lock) {
        qemu_mutex_unlock_iothread();
    }
3278
    rcu_read_unlock();
B
bellard 已提交
3279 3280
}

3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301
void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
                       MemTxAttrs attrs, MemTxResult *result)
{
    address_space_stl_internal(as, addr, val, attrs, result,
                               DEVICE_NATIVE_ENDIAN);
}

void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
                       MemTxAttrs attrs, MemTxResult *result)
{
    address_space_stl_internal(as, addr, val, attrs, result,
                               DEVICE_LITTLE_ENDIAN);
}

void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
                       MemTxAttrs attrs, MemTxResult *result)
{
    address_space_stl_internal(as, addr, val, attrs, result,
                               DEVICE_BIG_ENDIAN);
}

3302
void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3303
{
3304
    address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3305 3306
}

3307
void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3308
{
3309
    address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3310 3311
}

3312
void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3313
{
3314
    address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3315 3316
}

B
bellard 已提交
3317
/* XXX: optimize */
3318 3319
void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
                       MemTxAttrs attrs, MemTxResult *result)
B
bellard 已提交
3320 3321
{
    uint8_t v = val;
3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332
    MemTxResult r;

    r = address_space_rw(as, addr, attrs, &v, 1, 1);
    if (result) {
        *result = r;
    }
}

void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
{
    address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
B
bellard 已提交
3333 3334
}

3335
/* warning: addr must be aligned */
3336 3337 3338 3339 3340
static inline void address_space_stw_internal(AddressSpace *as,
                                              hwaddr addr, uint32_t val,
                                              MemTxAttrs attrs,
                                              MemTxResult *result,
                                              enum device_endian endian)
B
bellard 已提交
3341
{
3342
    uint8_t *ptr;
3343
    MemoryRegion *mr;
3344 3345
    hwaddr l = 2;
    hwaddr addr1;
3346
    MemTxResult r;
3347
    bool release_lock = false;
3348

3349
    rcu_read_lock();
3350
    mr = address_space_translate(as, addr, &addr1, &l, true);
3351
    if (l < 2 || !memory_access_is_direct(mr, true)) {
3352
        release_lock |= prepare_mmio_access(mr);
3353

3354 3355 3356 3357 3358 3359 3360 3361 3362
#if defined(TARGET_WORDS_BIGENDIAN)
        if (endian == DEVICE_LITTLE_ENDIAN) {
            val = bswap16(val);
        }
#else
        if (endian == DEVICE_BIG_ENDIAN) {
            val = bswap16(val);
        }
#endif
3363
        r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3364 3365
    } else {
        /* RAM case */
3366
        addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3367
        ptr = qemu_get_ram_ptr(addr1);
3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378
        switch (endian) {
        case DEVICE_LITTLE_ENDIAN:
            stw_le_p(ptr, val);
            break;
        case DEVICE_BIG_ENDIAN:
            stw_be_p(ptr, val);
            break;
        default:
            stw_p(ptr, val);
            break;
        }
3379
        invalidate_and_set_dirty(mr, addr1, 2);
3380 3381 3382 3383
        r = MEMTX_OK;
    }
    if (result) {
        *result = r;
3384
    }
3385 3386 3387
    if (release_lock) {
        qemu_mutex_unlock_iothread();
    }
3388
    rcu_read_unlock();
B
bellard 已提交
3389 3390
}

3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411
void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
                       MemTxAttrs attrs, MemTxResult *result)
{
    address_space_stw_internal(as, addr, val, attrs, result,
                               DEVICE_NATIVE_ENDIAN);
}

void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
                       MemTxAttrs attrs, MemTxResult *result)
{
    address_space_stw_internal(as, addr, val, attrs, result,
                               DEVICE_LITTLE_ENDIAN);
}

void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
                       MemTxAttrs attrs, MemTxResult *result)
{
    address_space_stw_internal(as, addr, val, attrs, result,
                               DEVICE_BIG_ENDIAN);
}

3412
void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3413
{
3414
    address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3415 3416
}

3417
void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3418
{
3419
    address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3420 3421
}

3422
void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3423
{
3424
    address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3425 3426
}

B
bellard 已提交
3427
/* XXX: optimize */
3428 3429
void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
                       MemTxAttrs attrs, MemTxResult *result)
B
bellard 已提交
3430
{
3431
    MemTxResult r;
B
bellard 已提交
3432
    val = tswap64(val);
3433 3434 3435 3436
    r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
    if (result) {
        *result = r;
    }
B
bellard 已提交
3437 3438
}

3439 3440
void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
                       MemTxAttrs attrs, MemTxResult *result)
3441
{
3442
    MemTxResult r;
3443
    val = cpu_to_le64(val);
3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467
    r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
    if (result) {
        *result = r;
    }
}
void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
                       MemTxAttrs attrs, MemTxResult *result)
{
    MemTxResult r;
    val = cpu_to_be64(val);
    r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
    if (result) {
        *result = r;
    }
}

void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
{
    address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
}

void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
{
    address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3468 3469
}

3470
void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3471
{
3472
    address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3473 3474
}

3475
/* virtual memory access for debug (includes writing to ROM) */
3476
int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3477
                        uint8_t *buf, int len, int is_write)
B
bellard 已提交
3478 3479
{
    int l;
A
Avi Kivity 已提交
3480
    hwaddr phys_addr;
3481
    target_ulong page;
B
bellard 已提交
3482 3483 3484

    while (len > 0) {
        page = addr & TARGET_PAGE_MASK;
3485
        phys_addr = cpu_get_phys_page_debug(cpu, page);
B
bellard 已提交
3486 3487 3488 3489 3490 3491
        /* if no physical page mapped, return an error */
        if (phys_addr == -1)
            return -1;
        l = (page + TARGET_PAGE_SIZE) - addr;
        if (l > len)
            l = len;
3492
        phys_addr += (addr & ~TARGET_PAGE_MASK);
3493 3494 3495
        if (is_write) {
            cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
        } else {
3496 3497
            address_space_rw(cpu->as, phys_addr, MEMTXATTRS_UNSPECIFIED,
                             buf, l, 0);
3498
        }
B
bellard 已提交
3499 3500 3501 3502 3503 3504
        len -= l;
        buf += l;
        addr += l;
    }
    return 0;
}
P
Paul Brook 已提交
3505
#endif
B
bellard 已提交
3506

3507 3508 3509 3510
/*
 * A helper function for the _utterly broken_ virtio device model to find out if
 * it's running on a big endian machine. Don't do this at home kids!
 */
3511 3512
bool target_words_bigendian(void);
bool target_words_bigendian(void)
3513 3514 3515 3516 3517 3518 3519 3520
{
#if defined(TARGET_WORDS_BIGENDIAN)
    return true;
#else
    return false;
#endif
}

3521
#ifndef CONFIG_USER_ONLY
A
Avi Kivity 已提交
3522
bool cpu_physical_memory_is_io(hwaddr phys_addr)
3523
{
3524
    MemoryRegion*mr;
3525
    hwaddr l = 1;
3526
    bool res;
3527

3528
    rcu_read_lock();
3529 3530
    mr = address_space_translate(&address_space_memory,
                                 phys_addr, &phys_addr, &l, false);
3531

3532 3533 3534
    res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
    rcu_read_unlock();
    return res;
3535
}
3536

3537
int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3538 3539
{
    RAMBlock *block;
3540
    int ret = 0;
3541

M
Mike Day 已提交
3542 3543
    rcu_read_lock();
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3544 3545 3546 3547 3548
        ret = func(block->idstr, block->host, block->offset,
                   block->used_length, opaque);
        if (ret) {
            break;
        }
3549
    }
M
Mike Day 已提交
3550
    rcu_read_unlock();
3551
    return ret;
3552
}
3553
#endif