memory.c 73.5 KB
Newer Older
A
Avi Kivity 已提交
1 2 3 4 5 6 7 8 9 10 11
/*
 * Physical memory management
 *
 * Copyright 2011 Red Hat, Inc. and/or its affiliates
 *
 * Authors:
 *  Avi Kivity <avi@redhat.com>
 *
 * This work is licensed under the terms of the GNU GPL, version 2.  See
 * the COPYING file in the top-level directory.
 *
12 13
 * Contributions after 2012-01-13 are licensed under the terms of the
 * GNU GPL, version 2 or (at your option) any later version.
A
Avi Kivity 已提交
14 15
 */

P
Peter Maydell 已提交
16
#include "qemu/osdep.h"
17 18 19
#include "exec/memory.h"
#include "exec/address-spaces.h"
#include "exec/ioport.h"
20
#include "qapi/visitor.h"
21
#include "qemu/bitops.h"
P
Pavel Fedin 已提交
22
#include "qemu/error-report.h"
23
#include "qom/object.h"
24
#include "trace.h"
A
Avi Kivity 已提交
25

26
#include "exec/memory-internal.h"
27
#include "exec/ram_addr.h"
P
Pavel Fedin 已提交
28
#include "sysemu/kvm.h"
29
#include "sysemu/sysemu.h"
30

31 32
//#define DEBUG_UNASSIGNED

33 34
#define RAM_ADDR_INVALID (~(ram_addr_t)0)

35 36
static unsigned memory_region_transaction_depth;
static bool memory_region_update_pending;
37
static bool ioeventfd_update_pending;
38 39
static bool global_dirty_log = false;

40 41
static QTAILQ_HEAD(memory_listeners, MemoryListener) memory_listeners
    = QTAILQ_HEAD_INITIALIZER(memory_listeners);
A
Avi Kivity 已提交
42

43 44 45
static QTAILQ_HEAD(, AddressSpace) address_spaces
    = QTAILQ_HEAD_INITIALIZER(address_spaces);

A
Avi Kivity 已提交
46 47
typedef struct AddrRange AddrRange;

A
Avi Kivity 已提交
48
/*
49
 * Note that signed integers are needed for negative offsetting in aliases
A
Avi Kivity 已提交
50 51
 * (large MemoryRegion::alias_offset).
 */
A
Avi Kivity 已提交
52
struct AddrRange {
53 54
    Int128 start;
    Int128 size;
A
Avi Kivity 已提交
55 56
};

57
static AddrRange addrrange_make(Int128 start, Int128 size)
A
Avi Kivity 已提交
58 59 60 61 62 63
{
    return (AddrRange) { start, size };
}

static bool addrrange_equal(AddrRange r1, AddrRange r2)
{
64
    return int128_eq(r1.start, r2.start) && int128_eq(r1.size, r2.size);
A
Avi Kivity 已提交
65 66
}

67
static Int128 addrrange_end(AddrRange r)
A
Avi Kivity 已提交
68
{
69
    return int128_add(r.start, r.size);
A
Avi Kivity 已提交
70 71
}

72
static AddrRange addrrange_shift(AddrRange range, Int128 delta)
A
Avi Kivity 已提交
73
{
74
    int128_addto(&range.start, delta);
A
Avi Kivity 已提交
75 76 77
    return range;
}

78 79 80 81 82 83
static bool addrrange_contains(AddrRange range, Int128 addr)
{
    return int128_ge(addr, range.start)
        && int128_lt(addr, addrrange_end(range));
}

A
Avi Kivity 已提交
84 85
static bool addrrange_intersects(AddrRange r1, AddrRange r2)
{
86 87
    return addrrange_contains(r1, r2.start)
        || addrrange_contains(r2, r1.start);
A
Avi Kivity 已提交
88 89 90 91
}

static AddrRange addrrange_intersection(AddrRange r1, AddrRange r2)
{
92 93 94
    Int128 start = int128_max(r1.start, r2.start);
    Int128 end = int128_min(addrrange_end(r1), addrrange_end(r2));
    return addrrange_make(start, int128_sub(end, start));
A
Avi Kivity 已提交
95 96
}

97 98
enum ListenerDirection { Forward, Reverse };

99 100 101 102 103 104 105 106
static bool memory_listener_match(MemoryListener *listener,
                                  MemoryRegionSection *section)
{
    return !listener->address_space_filter
        || listener->address_space_filter == section->address_space;
}

#define MEMORY_LISTENER_CALL_GLOBAL(_callback, _direction, _args...)    \
107 108 109 110 111 112
    do {                                                                \
        MemoryListener *_listener;                                      \
                                                                        \
        switch (_direction) {                                           \
        case Forward:                                                   \
            QTAILQ_FOREACH(_listener, &memory_listeners, link) {        \
113 114 115
                if (_listener->_callback) {                             \
                    _listener->_callback(_listener, ##_args);           \
                }                                                       \
116 117 118 119 120
            }                                                           \
            break;                                                      \
        case Reverse:                                                   \
            QTAILQ_FOREACH_REVERSE(_listener, &memory_listeners,        \
                                   memory_listeners, link) {            \
121 122 123
                if (_listener->_callback) {                             \
                    _listener->_callback(_listener, ##_args);           \
                }                                                       \
124 125 126 127 128 129 130
            }                                                           \
            break;                                                      \
        default:                                                        \
            abort();                                                    \
        }                                                               \
    } while (0)

131 132 133 134 135 136 137
#define MEMORY_LISTENER_CALL(_callback, _direction, _section, _args...) \
    do {                                                                \
        MemoryListener *_listener;                                      \
                                                                        \
        switch (_direction) {                                           \
        case Forward:                                                   \
            QTAILQ_FOREACH(_listener, &memory_listeners, link) {        \
138 139
                if (_listener->_callback                                \
                    && memory_listener_match(_listener, _section)) {    \
140 141 142 143 144 145 146
                    _listener->_callback(_listener, _section, ##_args); \
                }                                                       \
            }                                                           \
            break;                                                      \
        case Reverse:                                                   \
            QTAILQ_FOREACH_REVERSE(_listener, &memory_listeners,        \
                                   memory_listeners, link) {            \
147 148
                if (_listener->_callback                                \
                    && memory_listener_match(_listener, _section)) {    \
149 150 151 152 153 154 155 156 157
                    _listener->_callback(_listener, _section, ##_args); \
                }                                                       \
            }                                                           \
            break;                                                      \
        default:                                                        \
            abort();                                                    \
        }                                                               \
    } while (0)

P
Paolo Bonzini 已提交
158
/* No need to ref/unref .mr, the FlatRange keeps it alive.  */
159
#define MEMORY_LISTENER_UPDATE_REGION(fr, as, dir, callback, _args...)  \
160
    MEMORY_LISTENER_CALL(callback, dir, (&(MemoryRegionSection) {       \
161
        .mr = (fr)->mr,                                                 \
162
        .address_space = (as),                                          \
163
        .offset_within_region = (fr)->offset_in_region,                 \
164
        .size = (fr)->addr.size,                                        \
165
        .offset_within_address_space = int128_get64((fr)->addr.start),  \
166
        .readonly = (fr)->readonly,                                     \
167
              }), ##_args)
168

A
Avi Kivity 已提交
169 170 171 172 173
struct CoalescedMemoryRange {
    AddrRange addr;
    QTAILQ_ENTRY(CoalescedMemoryRange) link;
};

A
Avi Kivity 已提交
174 175 176 177
struct MemoryRegionIoeventfd {
    AddrRange addr;
    bool match_data;
    uint64_t data;
178
    EventNotifier *e;
A
Avi Kivity 已提交
179 180 181 182 183
};

static bool memory_region_ioeventfd_before(MemoryRegionIoeventfd a,
                                           MemoryRegionIoeventfd b)
{
184
    if (int128_lt(a.addr.start, b.addr.start)) {
A
Avi Kivity 已提交
185
        return true;
186
    } else if (int128_gt(a.addr.start, b.addr.start)) {
A
Avi Kivity 已提交
187
        return false;
188
    } else if (int128_lt(a.addr.size, b.addr.size)) {
A
Avi Kivity 已提交
189
        return true;
190
    } else if (int128_gt(a.addr.size, b.addr.size)) {
A
Avi Kivity 已提交
191 192 193 194 195 196 197 198 199 200 201 202
        return false;
    } else if (a.match_data < b.match_data) {
        return true;
    } else  if (a.match_data > b.match_data) {
        return false;
    } else if (a.match_data) {
        if (a.data < b.data) {
            return true;
        } else if (a.data > b.data) {
            return false;
        }
    }
203
    if (a.e < b.e) {
A
Avi Kivity 已提交
204
        return true;
205
    } else if (a.e > b.e) {
A
Avi Kivity 已提交
206 207 208 209 210 211 212 213 214 215 216 217
        return false;
    }
    return false;
}

static bool memory_region_ioeventfd_equal(MemoryRegionIoeventfd a,
                                          MemoryRegionIoeventfd b)
{
    return !memory_region_ioeventfd_before(a, b)
        && !memory_region_ioeventfd_before(b, a);
}

A
Avi Kivity 已提交
218 219 220 221 222 223
typedef struct FlatRange FlatRange;
typedef struct FlatView FlatView;

/* Range of memory in the global map.  Addresses are absolute. */
struct FlatRange {
    MemoryRegion *mr;
A
Avi Kivity 已提交
224
    hwaddr offset_in_region;
A
Avi Kivity 已提交
225
    AddrRange addr;
A
Avi Kivity 已提交
226
    uint8_t dirty_log_mask;
227
    bool romd_mode;
228
    bool readonly;
A
Avi Kivity 已提交
229 230 231 232 233 234
};

/* Flattened global view of current active memory hierarchy.  Kept in sorted
 * order.
 */
struct FlatView {
235
    struct rcu_head rcu;
236
    unsigned ref;
A
Avi Kivity 已提交
237 238 239 240 241
    FlatRange *ranges;
    unsigned nr;
    unsigned nr_allocated;
};

242 243
typedef struct AddressSpaceOps AddressSpaceOps;

A
Avi Kivity 已提交
244 245 246 247 248 249 250
#define FOR_EACH_FLAT_RANGE(var, view)          \
    for (var = (view)->ranges; var < (view)->ranges + (view)->nr; ++var)

static bool flatrange_equal(FlatRange *a, FlatRange *b)
{
    return a->mr == b->mr
        && addrrange_equal(a->addr, b->addr)
251
        && a->offset_in_region == b->offset_in_region
252
        && a->romd_mode == b->romd_mode
253
        && a->readonly == b->readonly;
A
Avi Kivity 已提交
254 255 256 257
}

static void flatview_init(FlatView *view)
{
258
    view->ref = 1;
A
Avi Kivity 已提交
259 260 261 262 263 264 265 266 267 268 269 270
    view->ranges = NULL;
    view->nr = 0;
    view->nr_allocated = 0;
}

/* Insert a range into a given position.  Caller is responsible for maintaining
 * sorting order.
 */
static void flatview_insert(FlatView *view, unsigned pos, FlatRange *range)
{
    if (view->nr == view->nr_allocated) {
        view->nr_allocated = MAX(2 * view->nr, 10);
271
        view->ranges = g_realloc(view->ranges,
A
Avi Kivity 已提交
272 273 274 275 276
                                    view->nr_allocated * sizeof(*view->ranges));
    }
    memmove(view->ranges + pos + 1, view->ranges + pos,
            (view->nr - pos) * sizeof(FlatRange));
    view->ranges[pos] = *range;
P
Paolo Bonzini 已提交
277
    memory_region_ref(range->mr);
A
Avi Kivity 已提交
278 279 280 281 282
    ++view->nr;
}

static void flatview_destroy(FlatView *view)
{
P
Paolo Bonzini 已提交
283 284 285 286 287
    int i;

    for (i = 0; i < view->nr; i++) {
        memory_region_unref(view->ranges[i].mr);
    }
288
    g_free(view->ranges);
289
    g_free(view);
A
Avi Kivity 已提交
290 291
}

292 293 294 295 296 297 298 299 300 301 302 303
static void flatview_ref(FlatView *view)
{
    atomic_inc(&view->ref);
}

static void flatview_unref(FlatView *view)
{
    if (atomic_fetch_dec(&view->ref) == 1) {
        flatview_destroy(view);
    }
}

304 305
static bool can_merge(FlatRange *r1, FlatRange *r2)
{
306
    return int128_eq(addrrange_end(r1->addr), r2->addr.start)
307
        && r1->mr == r2->mr
308 309 310
        && int128_eq(int128_add(int128_make64(r1->offset_in_region),
                                r1->addr.size),
                     int128_make64(r2->offset_in_region))
311
        && r1->dirty_log_mask == r2->dirty_log_mask
312
        && r1->romd_mode == r2->romd_mode
313
        && r1->readonly == r2->readonly;
314 315
}

P
Peter Crosthwaite 已提交
316
/* Attempt to simplify a view by merging adjacent ranges */
317 318 319 320 321 322 323 324 325
static void flatview_simplify(FlatView *view)
{
    unsigned i, j;

    i = 0;
    while (i < view->nr) {
        j = i + 1;
        while (j < view->nr
               && can_merge(&view->ranges[j-1], &view->ranges[j])) {
326
            int128_addto(&view->ranges[i].addr.size, view->ranges[j].addr.size);
327 328 329 330 331 332 333 334 335
            ++j;
        }
        ++i;
        memmove(&view->ranges[i], &view->ranges[j],
                (view->nr - j) * sizeof(view->ranges[j]));
        view->nr -= j - i;
    }
}

336 337 338 339 340 341 342 343 344
static bool memory_region_big_endian(MemoryRegion *mr)
{
#ifdef TARGET_WORDS_BIGENDIAN
    return mr->ops->endianness != DEVICE_LITTLE_ENDIAN;
#else
    return mr->ops->endianness == DEVICE_BIG_ENDIAN;
#endif
}

P
Paolo Bonzini 已提交
345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374
static bool memory_region_wrong_endianness(MemoryRegion *mr)
{
#ifdef TARGET_WORDS_BIGENDIAN
    return mr->ops->endianness == DEVICE_LITTLE_ENDIAN;
#else
    return mr->ops->endianness == DEVICE_BIG_ENDIAN;
#endif
}

static void adjust_endianness(MemoryRegion *mr, uint64_t *data, unsigned size)
{
    if (memory_region_wrong_endianness(mr)) {
        switch (size) {
        case 1:
            break;
        case 2:
            *data = bswap16(*data);
            break;
        case 4:
            *data = bswap32(*data);
            break;
        case 8:
            *data = bswap64(*data);
            break;
        default:
            abort();
        }
    }
}

375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391
static MemTxResult memory_region_oldmmio_read_accessor(MemoryRegion *mr,
                                                       hwaddr addr,
                                                       uint64_t *value,
                                                       unsigned size,
                                                       unsigned shift,
                                                       uint64_t mask,
                                                       MemTxAttrs attrs)
{
    uint64_t tmp;

    tmp = mr->ops->old_mmio.read[ctz32(size)](mr->opaque, addr);
    trace_memory_region_ops_read(mr, addr, tmp, size);
    *value |= (tmp & mask) << shift;
    return MEMTX_OK;
}

static MemTxResult  memory_region_read_accessor(MemoryRegion *mr,
392 393 394 395
                                                hwaddr addr,
                                                uint64_t *value,
                                                unsigned size,
                                                unsigned shift,
396 397
                                                uint64_t mask,
                                                MemTxAttrs attrs)
398 399 400
{
    uint64_t tmp;

401
    tmp = mr->ops->read(mr->opaque, addr, size);
402
    trace_memory_region_ops_read(mr, addr, tmp, size);
403
    *value |= (tmp & mask) << shift;
404
    return MEMTX_OK;
405 406
}

407 408 409 410 411 412 413
static MemTxResult memory_region_read_with_attrs_accessor(MemoryRegion *mr,
                                                          hwaddr addr,
                                                          uint64_t *value,
                                                          unsigned size,
                                                          unsigned shift,
                                                          uint64_t mask,
                                                          MemTxAttrs attrs)
414
{
415 416
    uint64_t tmp = 0;
    MemTxResult r;
417

418
    r = mr->ops->read_with_attrs(mr->opaque, addr, &tmp, size, attrs);
419
    trace_memory_region_ops_read(mr, addr, tmp, size);
420
    *value |= (tmp & mask) << shift;
421
    return r;
422 423
}

424 425 426 427 428 429 430
static MemTxResult memory_region_oldmmio_write_accessor(MemoryRegion *mr,
                                                        hwaddr addr,
                                                        uint64_t *value,
                                                        unsigned size,
                                                        unsigned shift,
                                                        uint64_t mask,
                                                        MemTxAttrs attrs)
431 432 433 434
{
    uint64_t tmp;

    tmp = (*value >> shift) & mask;
435
    trace_memory_region_ops_write(mr, addr, tmp, size);
436
    mr->ops->old_mmio.write[ctz32(size)](mr->opaque, addr, tmp);
437
    return MEMTX_OK;
438 439
}

440 441 442 443 444 445 446
static MemTxResult memory_region_write_accessor(MemoryRegion *mr,
                                                hwaddr addr,
                                                uint64_t *value,
                                                unsigned size,
                                                unsigned shift,
                                                uint64_t mask,
                                                MemTxAttrs attrs)
447 448 449 450
{
    uint64_t tmp;

    tmp = (*value >> shift) & mask;
451
    trace_memory_region_ops_write(mr, addr, tmp, size);
452
    mr->ops->write(mr->opaque, addr, tmp, size);
453
    return MEMTX_OK;
454 455
}

456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471
static MemTxResult memory_region_write_with_attrs_accessor(MemoryRegion *mr,
                                                           hwaddr addr,
                                                           uint64_t *value,
                                                           unsigned size,
                                                           unsigned shift,
                                                           uint64_t mask,
                                                           MemTxAttrs attrs)
{
    uint64_t tmp;

    tmp = (*value >> shift) & mask;
    trace_memory_region_ops_write(mr, addr, tmp, size);
    return mr->ops->write_with_attrs(mr->opaque, addr, tmp, size, attrs);
}

static MemTxResult access_with_adjusted_size(hwaddr addr,
472 473 474 475
                                      uint64_t *value,
                                      unsigned size,
                                      unsigned access_size_min,
                                      unsigned access_size_max,
476 477 478 479 480 481 482 483 484
                                      MemTxResult (*access)(MemoryRegion *mr,
                                                            hwaddr addr,
                                                            uint64_t *value,
                                                            unsigned size,
                                                            unsigned shift,
                                                            uint64_t mask,
                                                            MemTxAttrs attrs),
                                      MemoryRegion *mr,
                                      MemTxAttrs attrs)
485 486 487 488
{
    uint64_t access_mask;
    unsigned access_size;
    unsigned i;
489
    MemTxResult r = MEMTX_OK;
490 491 492 493 494 495 496

    if (!access_size_min) {
        access_size_min = 1;
    }
    if (!access_size_max) {
        access_size_max = 4;
    }
497 498

    /* FIXME: support unaligned access? */
499 500
    access_size = MAX(MIN(size, access_size_max), access_size_min);
    access_mask = -1ULL >> (64 - access_size * 8);
501 502
    if (memory_region_big_endian(mr)) {
        for (i = 0; i < size; i += access_size) {
503 504
            r |= access(mr, addr + i, value, access_size,
                        (size - access_size - i) * 8, access_mask, attrs);
505 506 507
        }
    } else {
        for (i = 0; i < size; i += access_size) {
508 509
            r |= access(mr, addr + i, value, access_size, i * 8,
                        access_mask, attrs);
510
        }
511
    }
512
    return r;
513 514
}

515 516
static AddressSpace *memory_region_to_address_space(MemoryRegion *mr)
{
517 518
    AddressSpace *as;

519 520
    while (mr->container) {
        mr = mr->container;
521
    }
522 523 524 525
    QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) {
        if (mr == as->root) {
            return as;
        }
526
    }
527
    return NULL;
528 529
}

A
Avi Kivity 已提交
530 531 532 533 534
/* Render a memory region into the global view.  Ranges in @view obscure
 * ranges in @mr.
 */
static void render_memory_region(FlatView *view,
                                 MemoryRegion *mr,
535
                                 Int128 base,
536 537
                                 AddrRange clip,
                                 bool readonly)
A
Avi Kivity 已提交
538 539 540
{
    MemoryRegion *subregion;
    unsigned i;
A
Avi Kivity 已提交
541
    hwaddr offset_in_region;
542 543
    Int128 remain;
    Int128 now;
A
Avi Kivity 已提交
544 545 546
    FlatRange fr;
    AddrRange tmp;

547 548 549 550
    if (!mr->enabled) {
        return;
    }

551
    int128_addto(&base, int128_make64(mr->addr));
552
    readonly |= mr->readonly;
A
Avi Kivity 已提交
553 554 555 556 557 558 559 560 561 562

    tmp = addrrange_make(base, mr->size);

    if (!addrrange_intersects(tmp, clip)) {
        return;
    }

    clip = addrrange_intersection(tmp, clip);

    if (mr->alias) {
563 564
        int128_subfrom(&base, int128_make64(mr->alias->addr));
        int128_subfrom(&base, int128_make64(mr->alias_offset));
565
        render_memory_region(view, mr->alias, base, clip, readonly);
A
Avi Kivity 已提交
566 567 568 569 570
        return;
    }

    /* Render subregions in priority order. */
    QTAILQ_FOREACH(subregion, &mr->subregions, subregions_link) {
571
        render_memory_region(view, subregion, base, clip, readonly);
A
Avi Kivity 已提交
572 573
    }

574
    if (!mr->terminates) {
A
Avi Kivity 已提交
575 576 577
        return;
    }

578
    offset_in_region = int128_get64(int128_sub(clip.start, base));
A
Avi Kivity 已提交
579 580 581
    base = clip.start;
    remain = clip.size;

582
    fr.mr = mr;
583
    fr.dirty_log_mask = memory_region_get_dirty_log_mask(mr);
584 585 586
    fr.romd_mode = mr->romd_mode;
    fr.readonly = readonly;

A
Avi Kivity 已提交
587
    /* Render the region itself into any gaps left by the current view. */
588 589
    for (i = 0; i < view->nr && int128_nz(remain); ++i) {
        if (int128_ge(base, addrrange_end(view->ranges[i].addr))) {
A
Avi Kivity 已提交
590 591
            continue;
        }
592 593 594
        if (int128_lt(base, view->ranges[i].addr.start)) {
            now = int128_min(remain,
                             int128_sub(view->ranges[i].addr.start, base));
A
Avi Kivity 已提交
595 596 597 598
            fr.offset_in_region = offset_in_region;
            fr.addr = addrrange_make(base, now);
            flatview_insert(view, i, &fr);
            ++i;
599 600 601
            int128_addto(&base, now);
            offset_in_region += int128_get64(now);
            int128_subfrom(&remain, now);
A
Avi Kivity 已提交
602
        }
603 604 605 606 607 608
        now = int128_sub(int128_min(int128_add(base, remain),
                                    addrrange_end(view->ranges[i].addr)),
                         base);
        int128_addto(&base, now);
        offset_in_region += int128_get64(now);
        int128_subfrom(&remain, now);
A
Avi Kivity 已提交
609
    }
610
    if (int128_nz(remain)) {
A
Avi Kivity 已提交
611 612 613 614 615 616 617
        fr.offset_in_region = offset_in_region;
        fr.addr = addrrange_make(base, remain);
        flatview_insert(view, i, &fr);
    }
}

/* Render a memory topology into a list of disjoint absolute ranges. */
618
static FlatView *generate_memory_topology(MemoryRegion *mr)
A
Avi Kivity 已提交
619
{
620
    FlatView *view;
A
Avi Kivity 已提交
621

622 623
    view = g_new(FlatView, 1);
    flatview_init(view);
A
Avi Kivity 已提交
624

A
Avi Kivity 已提交
625
    if (mr) {
626
        render_memory_region(view, mr, int128_zero(),
A
Avi Kivity 已提交
627 628
                             addrrange_make(int128_zero(), int128_2_64()), false);
    }
629
    flatview_simplify(view);
A
Avi Kivity 已提交
630 631 632 633

    return view;
}

A
Avi Kivity 已提交
634 635 636 637 638 639 640
static void address_space_add_del_ioeventfds(AddressSpace *as,
                                             MemoryRegionIoeventfd *fds_new,
                                             unsigned fds_new_nb,
                                             MemoryRegionIoeventfd *fds_old,
                                             unsigned fds_old_nb)
{
    unsigned iold, inew;
641 642
    MemoryRegionIoeventfd *fd;
    MemoryRegionSection section;
A
Avi Kivity 已提交
643 644 645 646 647 648 649 650 651 652 653

    /* Generate a symmetric difference of the old and new fd sets, adding
     * and deleting as necessary.
     */

    iold = inew = 0;
    while (iold < fds_old_nb || inew < fds_new_nb) {
        if (iold < fds_old_nb
            && (inew == fds_new_nb
                || memory_region_ioeventfd_before(fds_old[iold],
                                                  fds_new[inew]))) {
654 655
            fd = &fds_old[iold];
            section = (MemoryRegionSection) {
656
                .address_space = as,
657
                .offset_within_address_space = int128_get64(fd->addr.start),
658
                .size = fd->addr.size,
659 660
            };
            MEMORY_LISTENER_CALL(eventfd_del, Forward, &section,
661
                                 fd->match_data, fd->data, fd->e);
A
Avi Kivity 已提交
662 663 664 665 666
            ++iold;
        } else if (inew < fds_new_nb
                   && (iold == fds_old_nb
                       || memory_region_ioeventfd_before(fds_new[inew],
                                                         fds_old[iold]))) {
667 668
            fd = &fds_new[inew];
            section = (MemoryRegionSection) {
669
                .address_space = as,
670
                .offset_within_address_space = int128_get64(fd->addr.start),
671
                .size = fd->addr.size,
672 673
            };
            MEMORY_LISTENER_CALL(eventfd_add, Reverse, &section,
674
                                 fd->match_data, fd->data, fd->e);
A
Avi Kivity 已提交
675 676 677 678 679 680 681 682
            ++inew;
        } else {
            ++iold;
            ++inew;
        }
    }
}

683 684 685 686
static FlatView *address_space_get_flatview(AddressSpace *as)
{
    FlatView *view;

687 688
    rcu_read_lock();
    view = atomic_rcu_read(&as->current_map);
689
    flatview_ref(view);
690
    rcu_read_unlock();
691 692 693
    return view;
}

A
Avi Kivity 已提交
694 695
static void address_space_update_ioeventfds(AddressSpace *as)
{
696
    FlatView *view;
A
Avi Kivity 已提交
697 698 699 700 701 702
    FlatRange *fr;
    unsigned ioeventfd_nb = 0;
    MemoryRegionIoeventfd *ioeventfds = NULL;
    AddrRange tmp;
    unsigned i;

703
    view = address_space_get_flatview(as);
704
    FOR_EACH_FLAT_RANGE(fr, view) {
A
Avi Kivity 已提交
705 706
        for (i = 0; i < fr->mr->ioeventfd_nb; ++i) {
            tmp = addrrange_shift(fr->mr->ioeventfds[i].addr,
707 708
                                  int128_sub(fr->addr.start,
                                             int128_make64(fr->offset_in_region)));
A
Avi Kivity 已提交
709 710
            if (addrrange_intersects(fr->addr, tmp)) {
                ++ioeventfd_nb;
711
                ioeventfds = g_realloc(ioeventfds,
A
Avi Kivity 已提交
712 713 714 715 716 717 718 719 720 721
                                          ioeventfd_nb * sizeof(*ioeventfds));
                ioeventfds[ioeventfd_nb-1] = fr->mr->ioeventfds[i];
                ioeventfds[ioeventfd_nb-1].addr = tmp;
            }
        }
    }

    address_space_add_del_ioeventfds(as, ioeventfds, ioeventfd_nb,
                                     as->ioeventfds, as->ioeventfd_nb);

722
    g_free(as->ioeventfds);
A
Avi Kivity 已提交
723 724
    as->ioeventfds = ioeventfds;
    as->ioeventfd_nb = ioeventfd_nb;
725
    flatview_unref(view);
A
Avi Kivity 已提交
726 727
}

728
static void address_space_update_topology_pass(AddressSpace *as,
729 730
                                               const FlatView *old_view,
                                               const FlatView *new_view,
731
                                               bool adding)
A
Avi Kivity 已提交
732 733 734 735 736 737 738 739
{
    unsigned iold, inew;
    FlatRange *frold, *frnew;

    /* Generate a symmetric difference of the old and new memory maps.
     * Kill ranges in the old map, and instantiate ranges in the new map.
     */
    iold = inew = 0;
740 741 742
    while (iold < old_view->nr || inew < new_view->nr) {
        if (iold < old_view->nr) {
            frold = &old_view->ranges[iold];
A
Avi Kivity 已提交
743 744 745
        } else {
            frold = NULL;
        }
746 747
        if (inew < new_view->nr) {
            frnew = &new_view->ranges[inew];
A
Avi Kivity 已提交
748 749 750 751 752 753
        } else {
            frnew = NULL;
        }

        if (frold
            && (!frnew
754 755
                || int128_lt(frold->addr.start, frnew->addr.start)
                || (int128_eq(frold->addr.start, frnew->addr.start)
A
Avi Kivity 已提交
756
                    && !flatrange_equal(frold, frnew)))) {
757
            /* In old but not in new, or in both but attributes changed. */
A
Avi Kivity 已提交
758

759
            if (!adding) {
760
                MEMORY_LISTENER_UPDATE_REGION(frold, as, Reverse, region_del);
761 762
            }

A
Avi Kivity 已提交
763 764
            ++iold;
        } else if (frold && frnew && flatrange_equal(frold, frnew)) {
765
            /* In both and unchanged (except logging may have changed) */
A
Avi Kivity 已提交
766

767
            if (adding) {
768
                MEMORY_LISTENER_UPDATE_REGION(frnew, as, Forward, region_nop);
769 770 771 772 773 774 775 776 777
                if (frnew->dirty_log_mask & ~frold->dirty_log_mask) {
                    MEMORY_LISTENER_UPDATE_REGION(frnew, as, Forward, log_start,
                                                  frold->dirty_log_mask,
                                                  frnew->dirty_log_mask);
                }
                if (frold->dirty_log_mask & ~frnew->dirty_log_mask) {
                    MEMORY_LISTENER_UPDATE_REGION(frnew, as, Reverse, log_stop,
                                                  frold->dirty_log_mask,
                                                  frnew->dirty_log_mask);
778
                }
A
Avi Kivity 已提交
779 780
            }

A
Avi Kivity 已提交
781 782 783 784 785
            ++iold;
            ++inew;
        } else {
            /* In new */

786
            if (adding) {
787
                MEMORY_LISTENER_UPDATE_REGION(frnew, as, Forward, region_add);
788 789
            }

A
Avi Kivity 已提交
790 791 792
            ++inew;
        }
    }
793 794 795 796 797
}


static void address_space_update_topology(AddressSpace *as)
{
798
    FlatView *old_view = address_space_get_flatview(as);
799
    FlatView *new_view = generate_memory_topology(as->root);
800 801 802 803

    address_space_update_topology_pass(as, old_view, new_view, false);
    address_space_update_topology_pass(as, old_view, new_view, true);

804 805 806
    /* Writes are protected by the BQL.  */
    atomic_rcu_set(&as->current_map, new_view);
    call_rcu(old_view, flatview_unref, rcu);
807 808 809 810 811 812 813 814 815

    /* Note that all the old MemoryRegions are still alive up to this
     * point.  This relieves most MemoryListeners from the need to
     * ref/unref the MemoryRegions they get---unless they use them
     * outside the iothread mutex, in which case precise reference
     * counting is necessary.
     */
    flatview_unref(old_view);

A
Avi Kivity 已提交
816
    address_space_update_ioeventfds(as);
A
Avi Kivity 已提交
817 818
}

A
Avi Kivity 已提交
819 820
void memory_region_transaction_begin(void)
{
821
    qemu_flush_coalesced_mmio_buffer();
A
Avi Kivity 已提交
822 823 824
    ++memory_region_transaction_depth;
}

825 826 827 828 829 830
static void memory_region_clear_pending(void)
{
    memory_region_update_pending = false;
    ioeventfd_update_pending = false;
}

A
Avi Kivity 已提交
831 832
void memory_region_transaction_commit(void)
{
833 834
    AddressSpace *as;

A
Avi Kivity 已提交
835 836
    assert(memory_region_transaction_depth);
    --memory_region_transaction_depth;
837 838 839
    if (!memory_region_transaction_depth) {
        if (memory_region_update_pending) {
            MEMORY_LISTENER_CALL_GLOBAL(begin, Forward);
840

841 842 843
            QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) {
                address_space_update_topology(as);
            }
844

845 846 847 848 849 850 851 852
            MEMORY_LISTENER_CALL_GLOBAL(commit, Forward);
        } else if (ioeventfd_update_pending) {
            QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) {
                address_space_update_ioeventfds(as);
            }
        }
        memory_region_clear_pending();
   }
A
Avi Kivity 已提交
853 854
}

855 856 857 858 859 860 861 862 863
static void memory_region_destructor_none(MemoryRegion *mr)
{
}

static void memory_region_destructor_ram(MemoryRegion *mr)
{
    qemu_ram_free(mr->ram_addr);
}

864 865 866 867 868
static void memory_region_destructor_rom_device(MemoryRegion *mr)
{
    qemu_ram_free(mr->ram_addr & TARGET_PAGE_MASK);
}

P
Peter Crosthwaite 已提交
869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902
static bool memory_region_need_escape(char c)
{
    return c == '/' || c == '[' || c == '\\' || c == ']';
}

static char *memory_region_escape_name(const char *name)
{
    const char *p;
    char *escaped, *q;
    uint8_t c;
    size_t bytes = 0;

    for (p = name; *p; p++) {
        bytes += memory_region_need_escape(*p) ? 4 : 1;
    }
    if (bytes == p - name) {
       return g_memdup(name, bytes + 1);
    }

    escaped = g_malloc(bytes + 1);
    for (p = name, q = escaped; *p; p++) {
        c = *p;
        if (unlikely(memory_region_need_escape(c))) {
            *q++ = '\\';
            *q++ = 'x';
            *q++ = "0123456789abcdef"[c >> 4];
            c = "0123456789abcdef"[c & 15];
        }
        *q++ = c;
    }
    *q = 0;
    return escaped;
}

A
Avi Kivity 已提交
903
void memory_region_init(MemoryRegion *mr,
904
                        Object *owner,
A
Avi Kivity 已提交
905 906 907
                        const char *name,
                        uint64_t size)
{
908
    object_initialize(mr, sizeof(*mr), TYPE_MEMORY_REGION);
909 910 911 912
    mr->size = int128_make64(size);
    if (size == UINT64_MAX) {
        mr->size = int128_2_64();
    }
913
    mr->name = g_strdup(name);
914
    mr->owner = owner;
P
Peter Crosthwaite 已提交
915 916

    if (name) {
917 918
        char *escaped_name = memory_region_escape_name(name);
        char *name_array = g_strdup_printf("%s[*]", escaped_name);
919 920 921 922 923

        if (!owner) {
            owner = container_get(qdev_get_machine(), "/unattached");
        }

924
        object_property_add_child(owner, name_array, OBJECT(mr), &error_abort);
P
Peter Crosthwaite 已提交
925
        object_unref(OBJECT(mr));
926 927
        g_free(name_array);
        g_free(escaped_name);
P
Peter Crosthwaite 已提交
928 929 930
    }
}

931 932
static void memory_region_get_addr(Object *obj, Visitor *v, const char *name,
                                   void *opaque, Error **errp)
933 934 935 936
{
    MemoryRegion *mr = MEMORY_REGION(obj);
    uint64_t value = mr->addr;

937
    visit_type_uint64(v, name, &value, errp);
938 939
}

940 941 942
static void memory_region_get_container(Object *obj, Visitor *v,
                                        const char *name, void *opaque,
                                        Error **errp)
943 944 945 946 947 948 949
{
    MemoryRegion *mr = MEMORY_REGION(obj);
    gchar *path = (gchar *)"";

    if (mr->container) {
        path = object_get_canonical_path(OBJECT(mr->container));
    }
950
    visit_type_str(v, name, &path, errp);
951 952 953 954 955 956 957 958 959 960 961 962 963
    if (mr->container) {
        g_free(path);
    }
}

static Object *memory_region_resolve_container(Object *obj, void *opaque,
                                               const char *part)
{
    MemoryRegion *mr = MEMORY_REGION(obj);

    return OBJECT(mr->container);
}

964 965 966
static void memory_region_get_priority(Object *obj, Visitor *v,
                                       const char *name, void *opaque,
                                       Error **errp)
967 968 969 970
{
    MemoryRegion *mr = MEMORY_REGION(obj);
    int32_t value = mr->priority;

971
    visit_type_int32(v, name, &value, errp);
972 973 974 975 976 977 978 979 980
}

static bool memory_region_get_may_overlap(Object *obj, Error **errp)
{
    MemoryRegion *mr = MEMORY_REGION(obj);

    return mr->may_overlap;
}

981 982
static void memory_region_get_size(Object *obj, Visitor *v, const char *name,
                                   void *opaque, Error **errp)
983 984 985 986
{
    MemoryRegion *mr = MEMORY_REGION(obj);
    uint64_t value = memory_region_size(mr);

987
    visit_type_uint64(v, name, &value, errp);
988 989
}

P
Peter Crosthwaite 已提交
990 991 992
static void memory_region_initfn(Object *obj)
{
    MemoryRegion *mr = MEMORY_REGION(obj);
993
    ObjectProperty *op;
P
Peter Crosthwaite 已提交
994 995

    mr->ops = &unassigned_mem_ops;
996
    mr->ram_addr = RAM_ADDR_INVALID;
997
    mr->enabled = true;
998
    mr->romd_mode = true;
999
    mr->global_locking = true;
1000
    mr->destructor = memory_region_destructor_none;
A
Avi Kivity 已提交
1001 1002
    QTAILQ_INIT(&mr->subregions);
    QTAILQ_INIT(&mr->coalesced);
1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014

    op = object_property_add(OBJECT(mr), "container",
                             "link<" TYPE_MEMORY_REGION ">",
                             memory_region_get_container,
                             NULL, /* memory_region_set_container */
                             NULL, NULL, &error_abort);
    op->resolve = memory_region_resolve_container;

    object_property_add(OBJECT(mr), "addr", "uint64",
                        memory_region_get_addr,
                        NULL, /* memory_region_set_addr */
                        NULL, NULL, &error_abort);
1015 1016 1017 1018 1019 1020 1021 1022
    object_property_add(OBJECT(mr), "priority", "uint32",
                        memory_region_get_priority,
                        NULL, /* memory_region_set_priority */
                        NULL, NULL, &error_abort);
    object_property_add_bool(OBJECT(mr), "may-overlap",
                             memory_region_get_may_overlap,
                             NULL, /* memory_region_set_may_overlap */
                             &error_abort);
1023 1024 1025 1026
    object_property_add(OBJECT(mr), "size", "uint64",
                        memory_region_get_size,
                        NULL, /* memory_region_set_size, */
                        NULL, NULL, &error_abort);
A
Avi Kivity 已提交
1027 1028
}

1029 1030 1031 1032 1033 1034
static uint64_t unassigned_mem_read(void *opaque, hwaddr addr,
                                    unsigned size)
{
#ifdef DEBUG_UNASSIGNED
    printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
#endif
1035 1036
    if (current_cpu != NULL) {
        cpu_unassigned_access(current_cpu, addr, false, false, 0, size);
1037
    }
1038
    return 0;
1039 1040 1041 1042 1043 1044 1045 1046
}

static void unassigned_mem_write(void *opaque, hwaddr addr,
                                 uint64_t val, unsigned size)
{
#ifdef DEBUG_UNASSIGNED
    printf("Unassigned mem write " TARGET_FMT_plx " = 0x%"PRIx64"\n", addr, val);
#endif
1047 1048
    if (current_cpu != NULL) {
        cpu_unassigned_access(current_cpu, addr, true, false, 0, size);
1049
    }
1050 1051
}

1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062
static bool unassigned_mem_accepts(void *opaque, hwaddr addr,
                                   unsigned size, bool is_write)
{
    return false;
}

const MemoryRegionOps unassigned_mem_ops = {
    .valid.accepts = unassigned_mem_accepts,
    .endianness = DEVICE_NATIVE_ENDIAN,
};

1063 1064 1065 1066
bool memory_region_access_valid(MemoryRegion *mr,
                                hwaddr addr,
                                unsigned size,
                                bool is_write)
A
Avi Kivity 已提交
1067
{
1068 1069
    int access_size_min, access_size_max;
    int access_size, i;
1070

A
Avi Kivity 已提交
1071 1072 1073 1074
    if (!mr->ops->valid.unaligned && (addr & (size - 1))) {
        return false;
    }

1075
    if (!mr->ops->valid.accepts) {
A
Avi Kivity 已提交
1076 1077 1078
        return true;
    }

1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094
    access_size_min = mr->ops->valid.min_access_size;
    if (!mr->ops->valid.min_access_size) {
        access_size_min = 1;
    }

    access_size_max = mr->ops->valid.max_access_size;
    if (!mr->ops->valid.max_access_size) {
        access_size_max = 4;
    }

    access_size = MAX(MIN(size, access_size_max), access_size_min);
    for (i = 0; i < size; i += access_size) {
        if (!mr->ops->valid.accepts(mr->opaque, addr + i, access_size,
                                    is_write)) {
            return false;
        }
A
Avi Kivity 已提交
1095
    }
1096

A
Avi Kivity 已提交
1097 1098 1099
    return true;
}

1100 1101 1102 1103 1104
static MemTxResult memory_region_dispatch_read1(MemoryRegion *mr,
                                                hwaddr addr,
                                                uint64_t *pval,
                                                unsigned size,
                                                MemTxAttrs attrs)
A
Avi Kivity 已提交
1105
{
1106
    *pval = 0;
A
Avi Kivity 已提交
1107

1108
    if (mr->ops->read) {
1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119
        return access_with_adjusted_size(addr, pval, size,
                                         mr->ops->impl.min_access_size,
                                         mr->ops->impl.max_access_size,
                                         memory_region_read_accessor,
                                         mr, attrs);
    } else if (mr->ops->read_with_attrs) {
        return access_with_adjusted_size(addr, pval, size,
                                         mr->ops->impl.min_access_size,
                                         mr->ops->impl.max_access_size,
                                         memory_region_read_with_attrs_accessor,
                                         mr, attrs);
1120
    } else {
1121 1122 1123
        return access_with_adjusted_size(addr, pval, size, 1, 4,
                                         memory_region_oldmmio_read_accessor,
                                         mr, attrs);
1124
    }
A
Avi Kivity 已提交
1125 1126
}

1127 1128 1129 1130 1131
MemTxResult memory_region_dispatch_read(MemoryRegion *mr,
                                        hwaddr addr,
                                        uint64_t *pval,
                                        unsigned size,
                                        MemTxAttrs attrs)
1132
{
1133 1134
    MemTxResult r;

1135 1136
    if (!memory_region_access_valid(mr, addr, size, false)) {
        *pval = unassigned_mem_read(mr, addr, size);
1137
        return MEMTX_DECODE_ERROR;
1138
    }
1139

1140
    r = memory_region_dispatch_read1(mr, addr, pval, size, attrs);
1141
    adjust_endianness(mr, pval, size);
1142
    return r;
1143
}
A
Avi Kivity 已提交
1144

P
Pavel Fedin 已提交
1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170
/* Return true if an eventfd was signalled */
static bool memory_region_dispatch_write_eventfds(MemoryRegion *mr,
                                                    hwaddr addr,
                                                    uint64_t data,
                                                    unsigned size,
                                                    MemTxAttrs attrs)
{
    MemoryRegionIoeventfd ioeventfd = {
        .addr = addrrange_make(int128_make64(addr), int128_make64(size)),
        .data = data,
    };
    unsigned i;

    for (i = 0; i < mr->ioeventfd_nb; i++) {
        ioeventfd.match_data = mr->ioeventfds[i].match_data;
        ioeventfd.e = mr->ioeventfds[i].e;

        if (memory_region_ioeventfd_equal(ioeventfd, mr->ioeventfds[i])) {
            event_notifier_set(ioeventfd.e);
            return true;
        }
    }

    return false;
}

1171 1172 1173 1174 1175
MemTxResult memory_region_dispatch_write(MemoryRegion *mr,
                                         hwaddr addr,
                                         uint64_t data,
                                         unsigned size,
                                         MemTxAttrs attrs)
1176
{
1177
    if (!memory_region_access_valid(mr, addr, size, true)) {
1178
        unassigned_mem_write(mr, addr, data, size);
1179
        return MEMTX_DECODE_ERROR;
A
Avi Kivity 已提交
1180 1181
    }

1182 1183
    adjust_endianness(mr, &data, size);

P
Pavel Fedin 已提交
1184 1185 1186 1187 1188
    if ((!kvm_eventfds_enabled()) &&
        memory_region_dispatch_write_eventfds(mr, addr, data, size, attrs)) {
        return MEMTX_OK;
    }

1189
    if (mr->ops->write) {
1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201
        return access_with_adjusted_size(addr, &data, size,
                                         mr->ops->impl.min_access_size,
                                         mr->ops->impl.max_access_size,
                                         memory_region_write_accessor, mr,
                                         attrs);
    } else if (mr->ops->write_with_attrs) {
        return
            access_with_adjusted_size(addr, &data, size,
                                      mr->ops->impl.min_access_size,
                                      mr->ops->impl.max_access_size,
                                      memory_region_write_with_attrs_accessor,
                                      mr, attrs);
1202
    } else {
1203 1204 1205
        return access_with_adjusted_size(addr, &data, size, 1, 4,
                                         memory_region_oldmmio_write_accessor,
                                         mr, attrs);
1206
    }
A
Avi Kivity 已提交
1207 1208 1209
}

void memory_region_init_io(MemoryRegion *mr,
1210
                           Object *owner,
A
Avi Kivity 已提交
1211 1212 1213 1214 1215
                           const MemoryRegionOps *ops,
                           void *opaque,
                           const char *name,
                           uint64_t size)
{
1216
    memory_region_init(mr, owner, name, size);
1217
    mr->ops = ops ? ops : &unassigned_mem_ops;
A
Avi Kivity 已提交
1218
    mr->opaque = opaque;
1219
    mr->terminates = true;
A
Avi Kivity 已提交
1220 1221 1222
}

void memory_region_init_ram(MemoryRegion *mr,
1223
                            Object *owner,
A
Avi Kivity 已提交
1224
                            const char *name,
1225 1226
                            uint64_t size,
                            Error **errp)
A
Avi Kivity 已提交
1227
{
1228
    memory_region_init(mr, owner, name, size);
A
Avi Kivity 已提交
1229
    mr->ram = true;
1230
    mr->terminates = true;
1231
    mr->destructor = memory_region_destructor_ram;
1232
    mr->ram_addr = qemu_ram_alloc(size, mr, errp);
1233
    mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0;
1234 1235
}

1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250
void memory_region_init_resizeable_ram(MemoryRegion *mr,
                                       Object *owner,
                                       const char *name,
                                       uint64_t size,
                                       uint64_t max_size,
                                       void (*resized)(const char*,
                                                       uint64_t length,
                                                       void *host),
                                       Error **errp)
{
    memory_region_init(mr, owner, name, size);
    mr->ram = true;
    mr->terminates = true;
    mr->destructor = memory_region_destructor_ram;
    mr->ram_addr = qemu_ram_alloc_resizeable(size, max_size, resized, mr, errp);
1251
    mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0;
1252 1253
}

1254 1255 1256 1257 1258
#ifdef __linux__
void memory_region_init_ram_from_file(MemoryRegion *mr,
                                      struct Object *owner,
                                      const char *name,
                                      uint64_t size,
1259
                                      bool share,
1260 1261
                                      const char *path,
                                      Error **errp)
1262 1263 1264 1265 1266
{
    memory_region_init(mr, owner, name, size);
    mr->ram = true;
    mr->terminates = true;
    mr->destructor = memory_region_destructor_ram;
1267
    mr->ram_addr = qemu_ram_alloc_from_file(size, mr, share, path, errp);
1268
    mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0;
A
Avi Kivity 已提交
1269
}
1270
#endif
A
Avi Kivity 已提交
1271 1272

void memory_region_init_ram_ptr(MemoryRegion *mr,
1273
                                Object *owner,
A
Avi Kivity 已提交
1274 1275 1276 1277
                                const char *name,
                                uint64_t size,
                                void *ptr)
{
1278
    memory_region_init(mr, owner, name, size);
A
Avi Kivity 已提交
1279
    mr->ram = true;
1280
    mr->terminates = true;
1281
    mr->destructor = memory_region_destructor_ram;
1282
    mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0;
1283 1284 1285

    /* qemu_ram_alloc_from_ptr cannot fail with ptr != NULL.  */
    assert(ptr != NULL);
1286
    mr->ram_addr = qemu_ram_alloc_from_ptr(size, ptr, mr, &error_fatal);
A
Avi Kivity 已提交
1287 1288
}

1289 1290 1291 1292 1293
void memory_region_set_skip_dump(MemoryRegion *mr)
{
    mr->skip_dump = true;
}

A
Avi Kivity 已提交
1294
void memory_region_init_alias(MemoryRegion *mr,
1295
                              Object *owner,
A
Avi Kivity 已提交
1296 1297
                              const char *name,
                              MemoryRegion *orig,
A
Avi Kivity 已提交
1298
                              hwaddr offset,
A
Avi Kivity 已提交
1299 1300
                              uint64_t size)
{
1301
    memory_region_init(mr, owner, name, size);
A
Avi Kivity 已提交
1302 1303 1304 1305
    mr->alias = orig;
    mr->alias_offset = offset;
}

1306
void memory_region_init_rom_device(MemoryRegion *mr,
1307
                                   Object *owner,
1308
                                   const MemoryRegionOps *ops,
1309
                                   void *opaque,
1310
                                   const char *name,
1311 1312
                                   uint64_t size,
                                   Error **errp)
1313
{
1314
    memory_region_init(mr, owner, name, size);
1315
    mr->ops = ops;
1316
    mr->opaque = opaque;
1317
    mr->terminates = true;
A
Avi Kivity 已提交
1318
    mr->rom_device = true;
1319
    mr->destructor = memory_region_destructor_rom_device;
1320
    mr->ram_addr = qemu_ram_alloc(size, mr, errp);
1321 1322
}

A
Avi Kivity 已提交
1323
void memory_region_init_iommu(MemoryRegion *mr,
1324
                              Object *owner,
A
Avi Kivity 已提交
1325 1326 1327 1328
                              const MemoryRegionIOMMUOps *ops,
                              const char *name,
                              uint64_t size)
{
1329
    memory_region_init(mr, owner, name, size);
A
Avi Kivity 已提交
1330 1331
    mr->iommu_ops = ops,
    mr->terminates = true;  /* then re-forwards */
1332
    notifier_list_init(&mr->iommu_notify);
A
Avi Kivity 已提交
1333 1334
}

P
Peter Crosthwaite 已提交
1335
static void memory_region_finalize(Object *obj)
A
Avi Kivity 已提交
1336
{
P
Peter Crosthwaite 已提交
1337 1338
    MemoryRegion *mr = MEMORY_REGION(obj);

1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354
    assert(!mr->container);

    /* We know the region is not visible in any address space (it
     * does not have a container and cannot be a root either because
     * it has no references, so we can blindly clear mr->enabled.
     * memory_region_set_enabled instead could trigger a transaction
     * and cause an infinite loop.
     */
    mr->enabled = false;
    memory_region_transaction_begin();
    while (!QTAILQ_EMPTY(&mr->subregions)) {
        MemoryRegion *subregion = QTAILQ_FIRST(&mr->subregions);
        memory_region_del_subregion(mr, subregion);
    }
    memory_region_transaction_commit();

1355
    mr->destructor(mr);
A
Avi Kivity 已提交
1356
    memory_region_clear_coalescing(mr);
1357
    g_free((char *)mr->name);
1358
    g_free(mr->ioeventfds);
A
Avi Kivity 已提交
1359 1360
}

P
Paolo Bonzini 已提交
1361 1362
Object *memory_region_owner(MemoryRegion *mr)
{
1363 1364
    Object *obj = OBJECT(mr);
    return obj->parent;
P
Paolo Bonzini 已提交
1365 1366
}

P
Paolo Bonzini 已提交
1367 1368
void memory_region_ref(MemoryRegion *mr)
{
1369 1370 1371 1372 1373 1374 1375
    /* MMIO callbacks most likely will access data that belongs
     * to the owner, hence the need to ref/unref the owner whenever
     * the memory region is in use.
     *
     * The memory region is a child of its owner.  As long as the
     * owner doesn't call unparent itself on the memory region,
     * ref-ing the owner will also keep the memory region alive.
1376 1377
     * Memory regions without an owner are supposed to never go away;
     * we do not ref/unref them because it slows down DMA sensibly.
1378
     */
1379 1380
    if (mr && mr->owner) {
        object_ref(mr->owner);
P
Paolo Bonzini 已提交
1381 1382 1383 1384 1385
    }
}

void memory_region_unref(MemoryRegion *mr)
{
1386 1387
    if (mr && mr->owner) {
        object_unref(mr->owner);
P
Paolo Bonzini 已提交
1388 1389 1390
    }
}

A
Avi Kivity 已提交
1391 1392
uint64_t memory_region_size(MemoryRegion *mr)
{
1393 1394 1395 1396
    if (int128_eq(mr->size, int128_2_64())) {
        return UINT64_MAX;
    }
    return int128_get64(mr->size);
A
Avi Kivity 已提交
1397 1398
}

1399
const char *memory_region_name(const MemoryRegion *mr)
1400
{
1401 1402 1403 1404
    if (!mr->name) {
        ((MemoryRegion *)mr)->name =
            object_get_canonical_path_component(OBJECT(mr));
    }
1405
    return mr->name;
1406 1407
}

1408 1409 1410 1411 1412
bool memory_region_is_skip_dump(MemoryRegion *mr)
{
    return mr->skip_dump;
}

1413
uint8_t memory_region_get_dirty_log_mask(MemoryRegion *mr)
1414
{
1415 1416 1417 1418 1419
    uint8_t mask = mr->dirty_log_mask;
    if (global_dirty_log) {
        mask |= (1 << DIRTY_MEMORY_MIGRATION);
    }
    return mask;
1420 1421
}

1422 1423 1424 1425 1426
bool memory_region_is_logging(MemoryRegion *mr, uint8_t client)
{
    return memory_region_get_dirty_log_mask(mr) & (1 << client);
}

1427 1428 1429 1430 1431
void memory_region_register_iommu_notifier(MemoryRegion *mr, Notifier *n)
{
    notifier_list_add(&mr->iommu_notify, n);
}

1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451
void memory_region_iommu_replay(MemoryRegion *mr, Notifier *n,
                                hwaddr granularity, bool is_write)
{
    hwaddr addr;
    IOMMUTLBEntry iotlb;

    for (addr = 0; addr < memory_region_size(mr); addr += granularity) {
        iotlb = mr->iommu_ops->translate(mr, addr, is_write);
        if (iotlb.perm != IOMMU_NONE) {
            n->notify(n, &iotlb);
        }

        /* if (2^64 - MR size) < granularity, it's possible to get an
         * infinite loop here.  This should catch such a wraparound */
        if ((addr + granularity) < addr) {
            break;
        }
    }
}

1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463
void memory_region_unregister_iommu_notifier(Notifier *n)
{
    notifier_remove(n);
}

void memory_region_notify_iommu(MemoryRegion *mr,
                                IOMMUTLBEntry entry)
{
    assert(memory_region_is_iommu(mr));
    notifier_list_notify(&mr->iommu_notify, &entry);
}

A
Avi Kivity 已提交
1464 1465
void memory_region_set_log(MemoryRegion *mr, bool log, unsigned client)
{
A
Avi Kivity 已提交
1466
    uint8_t mask = 1 << client;
1467
    uint8_t old_logging;
A
Avi Kivity 已提交
1468

1469
    assert(client == DIRTY_MEMORY_VGA);
1470 1471 1472 1473 1474 1475
    old_logging = mr->vga_logging_count;
    mr->vga_logging_count += log ? 1 : -1;
    if (!!old_logging == !!mr->vga_logging_count) {
        return;
    }

1476
    memory_region_transaction_begin();
A
Avi Kivity 已提交
1477
    mr->dirty_log_mask = (mr->dirty_log_mask & ~mask) | (log * mask);
1478
    memory_region_update_pending |= mr->enabled;
1479
    memory_region_transaction_commit();
A
Avi Kivity 已提交
1480 1481
}

A
Avi Kivity 已提交
1482 1483
bool memory_region_get_dirty(MemoryRegion *mr, hwaddr addr,
                             hwaddr size, unsigned client)
A
Avi Kivity 已提交
1484
{
1485
    assert(mr->ram_addr != RAM_ADDR_INVALID);
1486
    return cpu_physical_memory_get_dirty(mr->ram_addr + addr, size, client);
A
Avi Kivity 已提交
1487 1488
}

A
Avi Kivity 已提交
1489 1490
void memory_region_set_dirty(MemoryRegion *mr, hwaddr addr,
                             hwaddr size)
A
Avi Kivity 已提交
1491
{
1492
    assert(mr->ram_addr != RAM_ADDR_INVALID);
1493 1494
    cpu_physical_memory_set_dirty_range(mr->ram_addr + addr, size,
                                        memory_region_get_dirty_log_mask(mr));
A
Avi Kivity 已提交
1495 1496
}

1497 1498 1499
bool memory_region_test_and_clear_dirty(MemoryRegion *mr, hwaddr addr,
                                        hwaddr size, unsigned client)
{
1500
    assert(mr->ram_addr != RAM_ADDR_INVALID);
1501 1502
    return cpu_physical_memory_test_and_clear_dirty(mr->ram_addr + addr,
                                                    size, client);
1503 1504 1505
}


A
Avi Kivity 已提交
1506 1507
void memory_region_sync_dirty_bitmap(MemoryRegion *mr)
{
1508
    AddressSpace *as;
A
Avi Kivity 已提交
1509 1510
    FlatRange *fr;

1511
    QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) {
1512
        FlatView *view = address_space_get_flatview(as);
1513
        FOR_EACH_FLAT_RANGE(fr, view) {
1514 1515 1516
            if (fr->mr == mr) {
                MEMORY_LISTENER_UPDATE_REGION(fr, as, Forward, log_sync);
            }
A
Avi Kivity 已提交
1517
        }
1518
        flatview_unref(view);
A
Avi Kivity 已提交
1519
    }
A
Avi Kivity 已提交
1520 1521 1522 1523
}

void memory_region_set_readonly(MemoryRegion *mr, bool readonly)
{
1524
    if (mr->readonly != readonly) {
1525
        memory_region_transaction_begin();
1526
        mr->readonly = readonly;
1527
        memory_region_update_pending |= mr->enabled;
1528
        memory_region_transaction_commit();
1529
    }
A
Avi Kivity 已提交
1530 1531
}

1532
void memory_region_rom_device_set_romd(MemoryRegion *mr, bool romd_mode)
1533
{
1534
    if (mr->romd_mode != romd_mode) {
1535
        memory_region_transaction_begin();
1536
        mr->romd_mode = romd_mode;
1537
        memory_region_update_pending |= mr->enabled;
1538
        memory_region_transaction_commit();
1539 1540 1541
    }
}

A
Avi Kivity 已提交
1542 1543
void memory_region_reset_dirty(MemoryRegion *mr, hwaddr addr,
                               hwaddr size, unsigned client)
A
Avi Kivity 已提交
1544
{
1545
    assert(mr->ram_addr != RAM_ADDR_INVALID);
1546 1547
    cpu_physical_memory_test_and_clear_dirty(mr->ram_addr + addr, size,
                                             client);
A
Avi Kivity 已提交
1548 1549
}

1550 1551 1552 1553 1554 1555
int memory_region_get_fd(MemoryRegion *mr)
{
    if (mr->alias) {
        return memory_region_get_fd(mr->alias);
    }

1556
    assert(mr->ram_addr != RAM_ADDR_INVALID);
1557 1558 1559 1560

    return qemu_get_ram_fd(mr->ram_addr & TARGET_PAGE_MASK);
}

A
Avi Kivity 已提交
1561 1562
void *memory_region_get_ram_ptr(MemoryRegion *mr)
{
1563 1564
    void *ptr;
    uint64_t offset = 0;
A
Avi Kivity 已提交
1565

1566 1567 1568 1569 1570
    rcu_read_lock();
    while (mr->alias) {
        offset += mr->alias_offset;
        mr = mr->alias;
    }
1571
    assert(mr->ram_addr != RAM_ADDR_INVALID);
1572 1573
    ptr = qemu_get_ram_ptr(mr->ram_addr & TARGET_PAGE_MASK);
    rcu_read_unlock();
A
Avi Kivity 已提交
1574

1575
    return ptr + offset;
A
Avi Kivity 已提交
1576 1577
}

1578 1579
void memory_region_ram_resize(MemoryRegion *mr, ram_addr_t newsize, Error **errp)
{
1580
    assert(mr->ram_addr != RAM_ADDR_INVALID);
1581 1582 1583 1584

    qemu_ram_resize(mr->ram_addr, newsize, errp);
}

1585
static void memory_region_update_coalesced_range_as(MemoryRegion *mr, AddressSpace *as)
A
Avi Kivity 已提交
1586
{
1587
    FlatView *view;
A
Avi Kivity 已提交
1588 1589 1590
    FlatRange *fr;
    CoalescedMemoryRange *cmr;
    AddrRange tmp;
1591
    MemoryRegionSection section;
A
Avi Kivity 已提交
1592

1593
    view = address_space_get_flatview(as);
1594
    FOR_EACH_FLAT_RANGE(fr, view) {
A
Avi Kivity 已提交
1595
        if (fr->mr == mr) {
1596
            section = (MemoryRegionSection) {
1597
                .address_space = as,
1598
                .offset_within_address_space = int128_get64(fr->addr.start),
1599
                .size = fr->addr.size,
1600 1601 1602 1603 1604
            };

            MEMORY_LISTENER_CALL(coalesced_mmio_del, Reverse, &section,
                                 int128_get64(fr->addr.start),
                                 int128_get64(fr->addr.size));
A
Avi Kivity 已提交
1605 1606
            QTAILQ_FOREACH(cmr, &mr->coalesced, link) {
                tmp = addrrange_shift(cmr->addr,
1607 1608
                                      int128_sub(fr->addr.start,
                                                 int128_make64(fr->offset_in_region)));
A
Avi Kivity 已提交
1609 1610 1611 1612
                if (!addrrange_intersects(tmp, fr->addr)) {
                    continue;
                }
                tmp = addrrange_intersection(tmp, fr->addr);
1613 1614 1615
                MEMORY_LISTENER_CALL(coalesced_mmio_add, Forward, &section,
                                     int128_get64(tmp.start),
                                     int128_get64(tmp.size));
A
Avi Kivity 已提交
1616 1617 1618
            }
        }
    }
1619
    flatview_unref(view);
A
Avi Kivity 已提交
1620 1621
}

1622 1623 1624 1625 1626 1627 1628 1629 1630
static void memory_region_update_coalesced_range(MemoryRegion *mr)
{
    AddressSpace *as;

    QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) {
        memory_region_update_coalesced_range_as(mr, as);
    }
}

A
Avi Kivity 已提交
1631 1632 1633
void memory_region_set_coalescing(MemoryRegion *mr)
{
    memory_region_clear_coalescing(mr);
1634
    memory_region_add_coalescing(mr, 0, int128_get64(mr->size));
A
Avi Kivity 已提交
1635 1636 1637
}

void memory_region_add_coalescing(MemoryRegion *mr,
A
Avi Kivity 已提交
1638
                                  hwaddr offset,
A
Avi Kivity 已提交
1639 1640
                                  uint64_t size)
{
1641
    CoalescedMemoryRange *cmr = g_malloc(sizeof(*cmr));
A
Avi Kivity 已提交
1642

1643
    cmr->addr = addrrange_make(int128_make64(offset), int128_make64(size));
A
Avi Kivity 已提交
1644 1645
    QTAILQ_INSERT_TAIL(&mr->coalesced, cmr, link);
    memory_region_update_coalesced_range(mr);
1646
    memory_region_set_flush_coalesced(mr);
A
Avi Kivity 已提交
1647 1648 1649 1650 1651
}

void memory_region_clear_coalescing(MemoryRegion *mr)
{
    CoalescedMemoryRange *cmr;
1652
    bool updated = false;
A
Avi Kivity 已提交
1653

1654 1655 1656
    qemu_flush_coalesced_mmio_buffer();
    mr->flush_coalesced_mmio = false;

A
Avi Kivity 已提交
1657 1658 1659
    while (!QTAILQ_EMPTY(&mr->coalesced)) {
        cmr = QTAILQ_FIRST(&mr->coalesced);
        QTAILQ_REMOVE(&mr->coalesced, cmr, link);
1660
        g_free(cmr);
1661 1662 1663 1664 1665
        updated = true;
    }

    if (updated) {
        memory_region_update_coalesced_range(mr);
A
Avi Kivity 已提交
1666 1667 1668
    }
}

1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681
void memory_region_set_flush_coalesced(MemoryRegion *mr)
{
    mr->flush_coalesced_mmio = true;
}

void memory_region_clear_flush_coalesced(MemoryRegion *mr)
{
    qemu_flush_coalesced_mmio_buffer();
    if (QTAILQ_EMPTY(&mr->coalesced)) {
        mr->flush_coalesced_mmio = false;
    }
}

1682 1683 1684 1685 1686 1687 1688 1689 1690 1691
void memory_region_set_global_locking(MemoryRegion *mr)
{
    mr->global_locking = true;
}

void memory_region_clear_global_locking(MemoryRegion *mr)
{
    mr->global_locking = false;
}

P
Pavel Fedin 已提交
1692 1693
static bool userspace_eventfd_warning;

A
Avi Kivity 已提交
1694
void memory_region_add_eventfd(MemoryRegion *mr,
A
Avi Kivity 已提交
1695
                               hwaddr addr,
A
Avi Kivity 已提交
1696 1697 1698
                               unsigned size,
                               bool match_data,
                               uint64_t data,
1699
                               EventNotifier *e)
A
Avi Kivity 已提交
1700 1701
{
    MemoryRegionIoeventfd mrfd = {
1702 1703
        .addr.start = int128_make64(addr),
        .addr.size = int128_make64(size),
A
Avi Kivity 已提交
1704 1705
        .match_data = match_data,
        .data = data,
1706
        .e = e,
A
Avi Kivity 已提交
1707 1708 1709
    };
    unsigned i;

P
Pavel Fedin 已提交
1710 1711 1712 1713 1714 1715 1716
    if (kvm_enabled() && (!(kvm_eventfds_enabled() ||
                            userspace_eventfd_warning))) {
        userspace_eventfd_warning = true;
        error_report("Using eventfd without MMIO binding in KVM. "
                     "Suboptimal performance expected");
    }

1717 1718 1719
    if (size) {
        adjust_endianness(mr, &mrfd.data, size);
    }
1720
    memory_region_transaction_begin();
A
Avi Kivity 已提交
1721 1722 1723 1724 1725 1726
    for (i = 0; i < mr->ioeventfd_nb; ++i) {
        if (memory_region_ioeventfd_before(mrfd, mr->ioeventfds[i])) {
            break;
        }
    }
    ++mr->ioeventfd_nb;
1727
    mr->ioeventfds = g_realloc(mr->ioeventfds,
A
Avi Kivity 已提交
1728 1729 1730 1731
                                  sizeof(*mr->ioeventfds) * mr->ioeventfd_nb);
    memmove(&mr->ioeventfds[i+1], &mr->ioeventfds[i],
            sizeof(*mr->ioeventfds) * (mr->ioeventfd_nb-1 - i));
    mr->ioeventfds[i] = mrfd;
1732
    ioeventfd_update_pending |= mr->enabled;
1733
    memory_region_transaction_commit();
A
Avi Kivity 已提交
1734 1735 1736
}

void memory_region_del_eventfd(MemoryRegion *mr,
A
Avi Kivity 已提交
1737
                               hwaddr addr,
A
Avi Kivity 已提交
1738 1739 1740
                               unsigned size,
                               bool match_data,
                               uint64_t data,
1741
                               EventNotifier *e)
A
Avi Kivity 已提交
1742 1743
{
    MemoryRegionIoeventfd mrfd = {
1744 1745
        .addr.start = int128_make64(addr),
        .addr.size = int128_make64(size),
A
Avi Kivity 已提交
1746 1747
        .match_data = match_data,
        .data = data,
1748
        .e = e,
A
Avi Kivity 已提交
1749 1750 1751
    };
    unsigned i;

1752 1753 1754
    if (size) {
        adjust_endianness(mr, &mrfd.data, size);
    }
1755
    memory_region_transaction_begin();
A
Avi Kivity 已提交
1756 1757 1758 1759 1760 1761 1762 1763 1764
    for (i = 0; i < mr->ioeventfd_nb; ++i) {
        if (memory_region_ioeventfd_equal(mrfd, mr->ioeventfds[i])) {
            break;
        }
    }
    assert(i != mr->ioeventfd_nb);
    memmove(&mr->ioeventfds[i], &mr->ioeventfds[i+1],
            sizeof(*mr->ioeventfds) * (mr->ioeventfd_nb - (i+1)));
    --mr->ioeventfd_nb;
1765
    mr->ioeventfds = g_realloc(mr->ioeventfds,
A
Avi Kivity 已提交
1766
                                  sizeof(*mr->ioeventfds)*mr->ioeventfd_nb + 1);
1767
    ioeventfd_update_pending |= mr->enabled;
1768
    memory_region_transaction_commit();
A
Avi Kivity 已提交
1769 1770
}

1771
static void memory_region_update_container_subregions(MemoryRegion *subregion)
A
Avi Kivity 已提交
1772
{
1773
    hwaddr offset = subregion->addr;
1774
    MemoryRegion *mr = subregion->container;
A
Avi Kivity 已提交
1775 1776
    MemoryRegion *other;

1777 1778
    memory_region_transaction_begin();

P
Paolo Bonzini 已提交
1779
    memory_region_ref(subregion);
A
Avi Kivity 已提交
1780 1781 1782 1783
    QTAILQ_FOREACH(other, &mr->subregions, subregions_link) {
        if (subregion->may_overlap || other->may_overlap) {
            continue;
        }
1784
        if (int128_ge(int128_make64(offset),
1785 1786 1787
                      int128_add(int128_make64(other->addr), other->size))
            || int128_le(int128_add(int128_make64(offset), subregion->size),
                         int128_make64(other->addr))) {
A
Avi Kivity 已提交
1788 1789
            continue;
        }
1790
#if 0
1791 1792
        printf("warning: subregion collision %llx/%llx (%s) "
               "vs %llx/%llx (%s)\n",
A
Avi Kivity 已提交
1793
               (unsigned long long)offset,
1794
               (unsigned long long)int128_get64(subregion->size),
1795 1796
               subregion->name,
               (unsigned long long)other->addr,
1797
               (unsigned long long)int128_get64(other->size),
1798
               other->name);
1799
#endif
A
Avi Kivity 已提交
1800 1801 1802 1803 1804 1805 1806 1807 1808
    }
    QTAILQ_FOREACH(other, &mr->subregions, subregions_link) {
        if (subregion->priority >= other->priority) {
            QTAILQ_INSERT_BEFORE(other, subregion, subregions_link);
            goto done;
        }
    }
    QTAILQ_INSERT_TAIL(&mr->subregions, subregion, subregions_link);
done:
1809
    memory_region_update_pending |= mr->enabled && subregion->enabled;
1810
    memory_region_transaction_commit();
A
Avi Kivity 已提交
1811 1812
}

1813 1814 1815 1816
static void memory_region_add_subregion_common(MemoryRegion *mr,
                                               hwaddr offset,
                                               MemoryRegion *subregion)
{
1817 1818
    assert(!subregion->container);
    subregion->container = mr;
1819
    subregion->addr = offset;
1820
    memory_region_update_container_subregions(subregion);
1821
}
A
Avi Kivity 已提交
1822 1823

void memory_region_add_subregion(MemoryRegion *mr,
A
Avi Kivity 已提交
1824
                                 hwaddr offset,
A
Avi Kivity 已提交
1825 1826 1827 1828 1829 1830 1831 1832
                                 MemoryRegion *subregion)
{
    subregion->may_overlap = false;
    subregion->priority = 0;
    memory_region_add_subregion_common(mr, offset, subregion);
}

void memory_region_add_subregion_overlap(MemoryRegion *mr,
A
Avi Kivity 已提交
1833
                                         hwaddr offset,
A
Avi Kivity 已提交
1834
                                         MemoryRegion *subregion,
1835
                                         int priority)
A
Avi Kivity 已提交
1836 1837 1838 1839 1840 1841 1842 1843 1844
{
    subregion->may_overlap = true;
    subregion->priority = priority;
    memory_region_add_subregion_common(mr, offset, subregion);
}

void memory_region_del_subregion(MemoryRegion *mr,
                                 MemoryRegion *subregion)
{
1845
    memory_region_transaction_begin();
1846 1847
    assert(subregion->container == mr);
    subregion->container = NULL;
A
Avi Kivity 已提交
1848
    QTAILQ_REMOVE(&mr->subregions, subregion, subregions_link);
P
Paolo Bonzini 已提交
1849
    memory_region_unref(subregion);
1850
    memory_region_update_pending |= mr->enabled && subregion->enabled;
1851
    memory_region_transaction_commit();
1852 1853 1854 1855 1856 1857 1858
}

void memory_region_set_enabled(MemoryRegion *mr, bool enabled)
{
    if (enabled == mr->enabled) {
        return;
    }
1859
    memory_region_transaction_begin();
1860
    mr->enabled = enabled;
1861
    memory_region_update_pending = true;
1862
    memory_region_transaction_commit();
A
Avi Kivity 已提交
1863
}
A
Avi Kivity 已提交
1864

1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880
void memory_region_set_size(MemoryRegion *mr, uint64_t size)
{
    Int128 s = int128_make64(size);

    if (size == UINT64_MAX) {
        s = int128_2_64();
    }
    if (int128_eq(s, mr->size)) {
        return;
    }
    memory_region_transaction_begin();
    mr->size = s;
    memory_region_update_pending = true;
    memory_region_transaction_commit();
}

1881
static void memory_region_readd_subregion(MemoryRegion *mr)
1882
{
1883
    MemoryRegion *container = mr->container;
1884

1885
    if (container) {
1886 1887
        memory_region_transaction_begin();
        memory_region_ref(mr);
1888 1889 1890
        memory_region_del_subregion(container, mr);
        mr->container = container;
        memory_region_update_container_subregions(mr);
1891 1892
        memory_region_unref(mr);
        memory_region_transaction_commit();
1893
    }
1894
}
1895

1896 1897 1898 1899 1900 1901
void memory_region_set_address(MemoryRegion *mr, hwaddr addr)
{
    if (addr != mr->addr) {
        mr->addr = addr;
        memory_region_readd_subregion(mr);
    }
1902 1903
}

A
Avi Kivity 已提交
1904
void memory_region_set_alias_offset(MemoryRegion *mr, hwaddr offset)
1905 1906 1907
{
    assert(mr->alias);

1908
    if (offset == mr->alias_offset) {
1909 1910 1911
        return;
    }

1912 1913
    memory_region_transaction_begin();
    mr->alias_offset = offset;
1914
    memory_region_update_pending |= mr->enabled;
1915
    memory_region_transaction_commit();
1916 1917
}

1918 1919 1920 1921 1922
uint64_t memory_region_get_alignment(const MemoryRegion *mr)
{
    return mr->align;
}

1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935
static int cmp_flatrange_addr(const void *addr_, const void *fr_)
{
    const AddrRange *addr = addr_;
    const FlatRange *fr = fr_;

    if (int128_le(addrrange_end(*addr), fr->addr.start)) {
        return -1;
    } else if (int128_ge(addr->start, addrrange_end(fr->addr))) {
        return 1;
    }
    return 0;
}

1936
static FlatRange *flatview_lookup(FlatView *view, AddrRange addr)
1937
{
1938
    return bsearch(&addr, view->ranges, view->nr,
1939 1940 1941
                   sizeof(FlatRange), cmp_flatrange_addr);
}

1942 1943 1944 1945 1946
bool memory_region_is_mapped(MemoryRegion *mr)
{
    return mr->container ? true : false;
}

1947 1948 1949 1950 1951
/* Same as memory_region_find, but it does not add a reference to the
 * returned region.  It must be called from an RCU critical section.
 */
static MemoryRegionSection memory_region_find_rcu(MemoryRegion *mr,
                                                  hwaddr addr, uint64_t size)
1952
{
1953
    MemoryRegionSection ret = { .mr = NULL };
1954 1955 1956
    MemoryRegion *root;
    AddressSpace *as;
    AddrRange range;
1957
    FlatView *view;
1958 1959 1960
    FlatRange *fr;

    addr += mr->addr;
1961 1962
    for (root = mr; root->container; ) {
        root = root->container;
1963 1964
        addr += root->addr;
    }
1965

1966
    as = memory_region_to_address_space(root);
1967 1968 1969
    if (!as) {
        return ret;
    }
1970
    range = addrrange_make(int128_make64(addr), int128_make64(size));
1971

1972
    view = atomic_rcu_read(&as->current_map);
1973
    fr = flatview_lookup(view, range);
1974
    if (!fr) {
1975
        return ret;
1976 1977
    }

1978
    while (fr > view->ranges && addrrange_intersects(fr[-1].addr, range)) {
1979 1980 1981 1982
        --fr;
    }

    ret.mr = fr->mr;
1983
    ret.address_space = as;
1984 1985 1986 1987
    range = addrrange_intersection(range, fr->addr);
    ret.offset_within_region = fr->offset_in_region;
    ret.offset_within_region += int128_get64(int128_sub(range.start,
                                                        fr->addr.start));
1988
    ret.size = range.size;
1989
    ret.offset_within_address_space = int128_get64(range.start);
1990
    ret.readonly = fr->readonly;
1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002
    return ret;
}

MemoryRegionSection memory_region_find(MemoryRegion *mr,
                                       hwaddr addr, uint64_t size)
{
    MemoryRegionSection ret;
    rcu_read_lock();
    ret = memory_region_find_rcu(mr, addr, size);
    if (ret.mr) {
        memory_region_ref(ret.mr);
    }
2003
    rcu_read_unlock();
2004 2005 2006
    return ret;
}

2007 2008 2009 2010 2011 2012 2013 2014 2015 2016
bool memory_region_present(MemoryRegion *container, hwaddr addr)
{
    MemoryRegion *mr;

    rcu_read_lock();
    mr = memory_region_find_rcu(container, addr, 1).mr;
    rcu_read_unlock();
    return mr && mr != container;
}

2017
void address_space_sync_dirty_bitmap(AddressSpace *as)
2018
{
2019
    FlatView *view;
2020 2021
    FlatRange *fr;

2022
    view = address_space_get_flatview(as);
2023
    FOR_EACH_FLAT_RANGE(fr, view) {
2024
        MEMORY_LISTENER_UPDATE_REGION(fr, as, Forward, log_sync);
2025
    }
2026
    flatview_unref(view);
2027 2028 2029 2030 2031
}

void memory_global_dirty_log_start(void)
{
    global_dirty_log = true;
2032

2033
    MEMORY_LISTENER_CALL_GLOBAL(log_global_start, Forward);
2034 2035 2036 2037 2038

    /* Refresh DIRTY_LOG_MIGRATION bit.  */
    memory_region_transaction_begin();
    memory_region_update_pending = true;
    memory_region_transaction_commit();
2039 2040 2041 2042 2043
}

void memory_global_dirty_log_stop(void)
{
    global_dirty_log = false;
2044 2045 2046 2047 2048 2049

    /* Refresh DIRTY_LOG_MIGRATION bit.  */
    memory_region_transaction_begin();
    memory_region_update_pending = true;
    memory_region_transaction_commit();

2050
    MEMORY_LISTENER_CALL_GLOBAL(log_global_stop, Reverse);
2051 2052 2053 2054 2055
}

static void listener_add_address_space(MemoryListener *listener,
                                       AddressSpace *as)
{
2056
    FlatView *view;
2057 2058
    FlatRange *fr;

2059
    if (listener->address_space_filter
2060
        && listener->address_space_filter != as) {
2061 2062 2063
        return;
    }

2064 2065 2066
    if (listener->begin) {
        listener->begin(listener);
    }
2067
    if (global_dirty_log) {
2068 2069 2070
        if (listener->log_global_start) {
            listener->log_global_start(listener);
        }
2071
    }
2072

2073
    view = address_space_get_flatview(as);
2074
    FOR_EACH_FLAT_RANGE(fr, view) {
2075 2076
        MemoryRegionSection section = {
            .mr = fr->mr,
2077
            .address_space = as,
2078
            .offset_within_region = fr->offset_in_region,
2079
            .size = fr->addr.size,
2080
            .offset_within_address_space = int128_get64(fr->addr.start),
2081
            .readonly = fr->readonly,
2082
        };
2083 2084 2085
        if (fr->dirty_log_mask && listener->log_start) {
            listener->log_start(listener, &section, 0, fr->dirty_log_mask);
        }
2086 2087 2088
        if (listener->region_add) {
            listener->region_add(listener, &section);
        }
2089
    }
2090 2091 2092
    if (listener->commit) {
        listener->commit(listener);
    }
2093
    flatview_unref(view);
2094 2095
}

2096
void memory_listener_register(MemoryListener *listener, AddressSpace *filter)
2097
{
2098
    MemoryListener *other = NULL;
2099
    AddressSpace *as;
2100

2101
    listener->address_space_filter = filter;
2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113
    if (QTAILQ_EMPTY(&memory_listeners)
        || listener->priority >= QTAILQ_LAST(&memory_listeners,
                                             memory_listeners)->priority) {
        QTAILQ_INSERT_TAIL(&memory_listeners, listener, link);
    } else {
        QTAILQ_FOREACH(other, &memory_listeners, link) {
            if (listener->priority < other->priority) {
                break;
            }
        }
        QTAILQ_INSERT_BEFORE(other, listener, link);
    }
2114 2115 2116 2117

    QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) {
        listener_add_address_space(listener, as);
    }
2118 2119 2120 2121
}

void memory_listener_unregister(MemoryListener *listener)
{
2122
    QTAILQ_REMOVE(&memory_listeners, listener, link);
2123
}
2124

2125
void address_space_init(AddressSpace *as, MemoryRegion *root, const char *name)
A
Avi Kivity 已提交
2126
{
2127
    memory_region_ref(root);
2128
    memory_region_transaction_begin();
2129
    as->ref_count = 1;
2130
    as->root = root;
2131
    as->malloced = false;
2132 2133
    as->current_map = g_new(FlatView, 1);
    flatview_init(as->current_map);
2134 2135
    as->ioeventfd_nb = 0;
    as->ioeventfds = NULL;
2136
    QTAILQ_INSERT_TAIL(&address_spaces, as, address_spaces_link);
2137
    as->name = g_strdup(name ? name : "anonymous");
A
Avi Kivity 已提交
2138
    address_space_init_dispatch(as);
2139 2140
    memory_region_update_pending |= root->enabled;
    memory_region_transaction_commit();
A
Avi Kivity 已提交
2141
}
A
Avi Kivity 已提交
2142

2143
static void do_address_space_destroy(AddressSpace *as)
A
Avi Kivity 已提交
2144
{
2145
    MemoryListener *listener;
2146
    bool do_free = as->malloced;
2147

A
Avi Kivity 已提交
2148
    address_space_destroy_dispatch(as);
2149 2150 2151 2152 2153

    QTAILQ_FOREACH(listener, &memory_listeners, link) {
        assert(listener->address_space_filter != as);
    }

2154
    flatview_unref(as->current_map);
2155
    g_free(as->name);
2156
    g_free(as->ioeventfds);
2157
    memory_region_unref(as->root);
2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177
    if (do_free) {
        g_free(as);
    }
}

AddressSpace *address_space_init_shareable(MemoryRegion *root, const char *name)
{
    AddressSpace *as;

    QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) {
        if (root == as->root && as->malloced) {
            as->ref_count++;
            return as;
        }
    }

    as = g_malloc0(sizeof *as);
    address_space_init(as, root, name);
    as->malloced = true;
    return as;
A
Avi Kivity 已提交
2178 2179
}

2180 2181
void address_space_destroy(AddressSpace *as)
{
2182 2183
    MemoryRegion *root = as->root;

2184 2185 2186 2187
    as->ref_count--;
    if (as->ref_count) {
        return;
    }
2188 2189 2190 2191 2192
    /* Flush out anything from MemoryListeners listening in on this */
    memory_region_transaction_begin();
    as->root = NULL;
    memory_region_transaction_commit();
    QTAILQ_REMOVE(&address_spaces, as, address_spaces_link);
2193
    address_space_unregister(as);
2194 2195 2196 2197 2198

    /* At this point, as->dispatch and as->current_map are dummy
     * entries that the guest should never use.  Wait for the old
     * values to expire before freeing the data.
     */
2199
    as->root = root;
2200 2201 2202
    call_rcu(as, do_address_space_destroy, rcu);
}

B
Blue Swirl 已提交
2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213
typedef struct MemoryRegionList MemoryRegionList;

struct MemoryRegionList {
    const MemoryRegion *mr;
    QTAILQ_ENTRY(MemoryRegionList) queue;
};

typedef QTAILQ_HEAD(queue, MemoryRegionList) MemoryRegionListHead;

static void mtree_print_mr(fprintf_function mon_printf, void *f,
                           const MemoryRegion *mr, unsigned int level,
A
Avi Kivity 已提交
2214
                           hwaddr base,
2215
                           MemoryRegionListHead *alias_print_queue)
B
Blue Swirl 已提交
2216
{
2217 2218
    MemoryRegionList *new_ml, *ml, *next_ml;
    MemoryRegionListHead submr_print_queue;
B
Blue Swirl 已提交
2219 2220 2221
    const MemoryRegion *submr;
    unsigned int i;

2222
    if (!mr) {
B
Blue Swirl 已提交
2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234
        return;
    }

    for (i = 0; i < level; i++) {
        mon_printf(f, "  ");
    }

    if (mr->alias) {
        MemoryRegionList *ml;
        bool found = false;

        /* check if the alias is already in the queue */
2235
        QTAILQ_FOREACH(ml, alias_print_queue, queue) {
P
Paolo Bonzini 已提交
2236
            if (ml->mr == mr->alias) {
B
Blue Swirl 已提交
2237 2238 2239 2240 2241 2242 2243
                found = true;
            }
        }

        if (!found) {
            ml = g_new(MemoryRegionList, 1);
            ml->mr = mr->alias;
2244
            QTAILQ_INSERT_TAIL(alias_print_queue, ml, queue);
B
Blue Swirl 已提交
2245
        }
2246 2247
        mon_printf(f, TARGET_FMT_plx "-" TARGET_FMT_plx
                   " (prio %d, %c%c): alias %s @%s " TARGET_FMT_plx
2248
                   "-" TARGET_FMT_plx "%s\n",
B
Blue Swirl 已提交
2249
                   base + mr->addr,
2250
                   base + mr->addr
2251 2252 2253
                   + (int128_nz(mr->size) ?
                      (hwaddr)int128_get64(int128_sub(mr->size,
                                                      int128_one())) : 0),
J
Jan Kiszka 已提交
2254
                   mr->priority,
2255 2256 2257
                   mr->romd_mode ? 'R' : '-',
                   !mr->readonly && !(mr->rom_device && mr->romd_mode) ? 'W'
                                                                       : '-',
2258 2259
                   memory_region_name(mr),
                   memory_region_name(mr->alias),
B
Blue Swirl 已提交
2260
                   mr->alias_offset,
2261
                   mr->alias_offset
2262 2263
                   + (int128_nz(mr->size) ?
                      (hwaddr)int128_get64(int128_sub(mr->size,
2264 2265
                                                      int128_one())) : 0),
                   mr->enabled ? "" : " [disabled]");
B
Blue Swirl 已提交
2266
    } else {
2267
        mon_printf(f,
2268
                   TARGET_FMT_plx "-" TARGET_FMT_plx " (prio %d, %c%c): %s%s\n",
B
Blue Swirl 已提交
2269
                   base + mr->addr,
2270
                   base + mr->addr
2271 2272 2273
                   + (int128_nz(mr->size) ?
                      (hwaddr)int128_get64(int128_sub(mr->size,
                                                      int128_one())) : 0),
J
Jan Kiszka 已提交
2274
                   mr->priority,
2275 2276 2277
                   mr->romd_mode ? 'R' : '-',
                   !mr->readonly && !(mr->rom_device && mr->romd_mode) ? 'W'
                                                                       : '-',
2278 2279
                   memory_region_name(mr),
                   mr->enabled ? "" : " [disabled]");
B
Blue Swirl 已提交
2280
    }
2281 2282 2283

    QTAILQ_INIT(&submr_print_queue);

B
Blue Swirl 已提交
2284
    QTAILQ_FOREACH(submr, &mr->subregions, subregions_link) {
2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305
        new_ml = g_new(MemoryRegionList, 1);
        new_ml->mr = submr;
        QTAILQ_FOREACH(ml, &submr_print_queue, queue) {
            if (new_ml->mr->addr < ml->mr->addr ||
                (new_ml->mr->addr == ml->mr->addr &&
                 new_ml->mr->priority > ml->mr->priority)) {
                QTAILQ_INSERT_BEFORE(ml, new_ml, queue);
                new_ml = NULL;
                break;
            }
        }
        if (new_ml) {
            QTAILQ_INSERT_TAIL(&submr_print_queue, new_ml, queue);
        }
    }

    QTAILQ_FOREACH(ml, &submr_print_queue, queue) {
        mtree_print_mr(mon_printf, f, ml->mr, level + 1, base + mr->addr,
                       alias_print_queue);
    }

A
Avi Kivity 已提交
2306
    QTAILQ_FOREACH_SAFE(ml, &submr_print_queue, queue, next_ml) {
2307
        g_free(ml);
B
Blue Swirl 已提交
2308 2309 2310 2311 2312 2313 2314
    }
}

void mtree_info(fprintf_function mon_printf, void *f)
{
    MemoryRegionListHead ml_head;
    MemoryRegionList *ml, *ml2;
2315
    AddressSpace *as;
B
Blue Swirl 已提交
2316 2317 2318

    QTAILQ_INIT(&ml_head);

2319
    QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) {
G
Gerd Hoffmann 已提交
2320 2321 2322
        mon_printf(f, "address-space: %s\n", as->name);
        mtree_print_mr(mon_printf, f, as->root, 1, 0, &ml_head);
        mon_printf(f, "\n");
2323 2324
    }

B
Blue Swirl 已提交
2325 2326
    /* print aliased regions */
    QTAILQ_FOREACH(ml, &ml_head, queue) {
G
Gerd Hoffmann 已提交
2327 2328 2329
        mon_printf(f, "memory-region: %s\n", memory_region_name(ml->mr));
        mtree_print_mr(mon_printf, f, ml->mr, 1, 0, &ml_head);
        mon_printf(f, "\n");
B
Blue Swirl 已提交
2330 2331 2332
    }

    QTAILQ_FOREACH_SAFE(ml, &ml_head, queue, ml2) {
A
Avi Kivity 已提交
2333
        g_free(ml);
B
Blue Swirl 已提交
2334 2335
    }
}
P
Peter Crosthwaite 已提交
2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350

static const TypeInfo memory_region_info = {
    .parent             = TYPE_OBJECT,
    .name               = TYPE_MEMORY_REGION,
    .instance_size      = sizeof(MemoryRegion),
    .instance_init      = memory_region_initfn,
    .instance_finalize  = memory_region_finalize,
};

static void memory_register_types(void)
{
    type_register_static(&memory_region_info);
}

type_init(memory_register_types)