memory.c 73.5 KB
Newer Older
A
Avi Kivity 已提交
1 2 3 4 5 6 7 8 9 10 11
/*
 * Physical memory management
 *
 * Copyright 2011 Red Hat, Inc. and/or its affiliates
 *
 * Authors:
 *  Avi Kivity <avi@redhat.com>
 *
 * This work is licensed under the terms of the GNU GPL, version 2.  See
 * the COPYING file in the top-level directory.
 *
12 13
 * Contributions after 2012-01-13 are licensed under the terms of the
 * GNU GPL, version 2 or (at your option) any later version.
A
Avi Kivity 已提交
14 15
 */

P
Peter Maydell 已提交
16
#include "qemu/osdep.h"
17 18 19
#include "exec/memory.h"
#include "exec/address-spaces.h"
#include "exec/ioport.h"
20
#include "qapi/visitor.h"
21
#include "qemu/bitops.h"
P
Pavel Fedin 已提交
22
#include "qemu/error-report.h"
23
#include "qom/object.h"
24
#include "trace.h"
A
Avi Kivity 已提交
25

26
#include "exec/memory-internal.h"
27
#include "exec/ram_addr.h"
P
Pavel Fedin 已提交
28
#include "sysemu/kvm.h"
29
#include "sysemu/sysemu.h"
30

31 32
//#define DEBUG_UNASSIGNED

33 34
#define RAM_ADDR_INVALID (~(ram_addr_t)0)

35 36
static unsigned memory_region_transaction_depth;
static bool memory_region_update_pending;
37
static bool ioeventfd_update_pending;
38 39
static bool global_dirty_log = false;

40 41
static QTAILQ_HEAD(memory_listeners, MemoryListener) memory_listeners
    = QTAILQ_HEAD_INITIALIZER(memory_listeners);
A
Avi Kivity 已提交
42

43 44 45
static QTAILQ_HEAD(, AddressSpace) address_spaces
    = QTAILQ_HEAD_INITIALIZER(address_spaces);

A
Avi Kivity 已提交
46 47
typedef struct AddrRange AddrRange;

A
Avi Kivity 已提交
48
/*
49
 * Note that signed integers are needed for negative offsetting in aliases
A
Avi Kivity 已提交
50 51
 * (large MemoryRegion::alias_offset).
 */
A
Avi Kivity 已提交
52
struct AddrRange {
53 54
    Int128 start;
    Int128 size;
A
Avi Kivity 已提交
55 56
};

57
static AddrRange addrrange_make(Int128 start, Int128 size)
A
Avi Kivity 已提交
58 59 60 61 62 63
{
    return (AddrRange) { start, size };
}

static bool addrrange_equal(AddrRange r1, AddrRange r2)
{
64
    return int128_eq(r1.start, r2.start) && int128_eq(r1.size, r2.size);
A
Avi Kivity 已提交
65 66
}

67
static Int128 addrrange_end(AddrRange r)
A
Avi Kivity 已提交
68
{
69
    return int128_add(r.start, r.size);
A
Avi Kivity 已提交
70 71
}

72
static AddrRange addrrange_shift(AddrRange range, Int128 delta)
A
Avi Kivity 已提交
73
{
74
    int128_addto(&range.start, delta);
A
Avi Kivity 已提交
75 76 77
    return range;
}

78 79 80 81 82 83
static bool addrrange_contains(AddrRange range, Int128 addr)
{
    return int128_ge(addr, range.start)
        && int128_lt(addr, addrrange_end(range));
}

A
Avi Kivity 已提交
84 85
static bool addrrange_intersects(AddrRange r1, AddrRange r2)
{
86 87
    return addrrange_contains(r1, r2.start)
        || addrrange_contains(r2, r1.start);
A
Avi Kivity 已提交
88 89 90 91
}

static AddrRange addrrange_intersection(AddrRange r1, AddrRange r2)
{
92 93 94
    Int128 start = int128_max(r1.start, r2.start);
    Int128 end = int128_min(addrrange_end(r1), addrrange_end(r2));
    return addrrange_make(start, int128_sub(end, start));
A
Avi Kivity 已提交
95 96
}

97 98
enum ListenerDirection { Forward, Reverse };

99 100 101 102 103 104 105 106
static bool memory_listener_match(MemoryListener *listener,
                                  MemoryRegionSection *section)
{
    return !listener->address_space_filter
        || listener->address_space_filter == section->address_space;
}

#define MEMORY_LISTENER_CALL_GLOBAL(_callback, _direction, _args...)    \
107 108 109 110 111 112
    do {                                                                \
        MemoryListener *_listener;                                      \
                                                                        \
        switch (_direction) {                                           \
        case Forward:                                                   \
            QTAILQ_FOREACH(_listener, &memory_listeners, link) {        \
113 114 115
                if (_listener->_callback) {                             \
                    _listener->_callback(_listener, ##_args);           \
                }                                                       \
116 117 118 119 120
            }                                                           \
            break;                                                      \
        case Reverse:                                                   \
            QTAILQ_FOREACH_REVERSE(_listener, &memory_listeners,        \
                                   memory_listeners, link) {            \
121 122 123
                if (_listener->_callback) {                             \
                    _listener->_callback(_listener, ##_args);           \
                }                                                       \
124 125 126 127 128 129 130
            }                                                           \
            break;                                                      \
        default:                                                        \
            abort();                                                    \
        }                                                               \
    } while (0)

131 132 133 134 135 136 137
#define MEMORY_LISTENER_CALL(_callback, _direction, _section, _args...) \
    do {                                                                \
        MemoryListener *_listener;                                      \
                                                                        \
        switch (_direction) {                                           \
        case Forward:                                                   \
            QTAILQ_FOREACH(_listener, &memory_listeners, link) {        \
138 139
                if (_listener->_callback                                \
                    && memory_listener_match(_listener, _section)) {    \
140 141 142 143 144 145 146
                    _listener->_callback(_listener, _section, ##_args); \
                }                                                       \
            }                                                           \
            break;                                                      \
        case Reverse:                                                   \
            QTAILQ_FOREACH_REVERSE(_listener, &memory_listeners,        \
                                   memory_listeners, link) {            \
147 148
                if (_listener->_callback                                \
                    && memory_listener_match(_listener, _section)) {    \
149 150 151 152 153 154 155 156 157
                    _listener->_callback(_listener, _section, ##_args); \
                }                                                       \
            }                                                           \
            break;                                                      \
        default:                                                        \
            abort();                                                    \
        }                                                               \
    } while (0)

P
Paolo Bonzini 已提交
158
/* No need to ref/unref .mr, the FlatRange keeps it alive.  */
159
#define MEMORY_LISTENER_UPDATE_REGION(fr, as, dir, callback, _args...)  \
160
    MEMORY_LISTENER_CALL(callback, dir, (&(MemoryRegionSection) {       \
161
        .mr = (fr)->mr,                                                 \
162
        .address_space = (as),                                          \
163
        .offset_within_region = (fr)->offset_in_region,                 \
164
        .size = (fr)->addr.size,                                        \
165
        .offset_within_address_space = int128_get64((fr)->addr.start),  \
166
        .readonly = (fr)->readonly,                                     \
167
              }), ##_args)
168

A
Avi Kivity 已提交
169 170 171 172 173
struct CoalescedMemoryRange {
    AddrRange addr;
    QTAILQ_ENTRY(CoalescedMemoryRange) link;
};

A
Avi Kivity 已提交
174 175 176 177
struct MemoryRegionIoeventfd {
    AddrRange addr;
    bool match_data;
    uint64_t data;
178
    EventNotifier *e;
A
Avi Kivity 已提交
179 180 181 182 183
};

static bool memory_region_ioeventfd_before(MemoryRegionIoeventfd a,
                                           MemoryRegionIoeventfd b)
{
184
    if (int128_lt(a.addr.start, b.addr.start)) {
A
Avi Kivity 已提交
185
        return true;
186
    } else if (int128_gt(a.addr.start, b.addr.start)) {
A
Avi Kivity 已提交
187
        return false;
188
    } else if (int128_lt(a.addr.size, b.addr.size)) {
A
Avi Kivity 已提交
189
        return true;
190
    } else if (int128_gt(a.addr.size, b.addr.size)) {
A
Avi Kivity 已提交
191 192 193 194 195 196 197 198 199 200 201 202
        return false;
    } else if (a.match_data < b.match_data) {
        return true;
    } else  if (a.match_data > b.match_data) {
        return false;
    } else if (a.match_data) {
        if (a.data < b.data) {
            return true;
        } else if (a.data > b.data) {
            return false;
        }
    }
203
    if (a.e < b.e) {
A
Avi Kivity 已提交
204
        return true;
205
    } else if (a.e > b.e) {
A
Avi Kivity 已提交
206 207 208 209 210 211 212 213 214 215 216 217
        return false;
    }
    return false;
}

static bool memory_region_ioeventfd_equal(MemoryRegionIoeventfd a,
                                          MemoryRegionIoeventfd b)
{
    return !memory_region_ioeventfd_before(a, b)
        && !memory_region_ioeventfd_before(b, a);
}

A
Avi Kivity 已提交
218 219 220 221 222 223
typedef struct FlatRange FlatRange;
typedef struct FlatView FlatView;

/* Range of memory in the global map.  Addresses are absolute. */
struct FlatRange {
    MemoryRegion *mr;
A
Avi Kivity 已提交
224
    hwaddr offset_in_region;
A
Avi Kivity 已提交
225
    AddrRange addr;
A
Avi Kivity 已提交
226
    uint8_t dirty_log_mask;
227
    bool romd_mode;
228
    bool readonly;
A
Avi Kivity 已提交
229 230 231 232 233 234
};

/* Flattened global view of current active memory hierarchy.  Kept in sorted
 * order.
 */
struct FlatView {
235
    struct rcu_head rcu;
236
    unsigned ref;
A
Avi Kivity 已提交
237 238 239 240 241
    FlatRange *ranges;
    unsigned nr;
    unsigned nr_allocated;
};

242 243
typedef struct AddressSpaceOps AddressSpaceOps;

A
Avi Kivity 已提交
244 245 246 247 248 249 250
#define FOR_EACH_FLAT_RANGE(var, view)          \
    for (var = (view)->ranges; var < (view)->ranges + (view)->nr; ++var)

static bool flatrange_equal(FlatRange *a, FlatRange *b)
{
    return a->mr == b->mr
        && addrrange_equal(a->addr, b->addr)
251
        && a->offset_in_region == b->offset_in_region
252
        && a->romd_mode == b->romd_mode
253
        && a->readonly == b->readonly;
A
Avi Kivity 已提交
254 255 256 257
}

static void flatview_init(FlatView *view)
{
258
    view->ref = 1;
A
Avi Kivity 已提交
259 260 261 262 263 264 265 266 267 268 269 270
    view->ranges = NULL;
    view->nr = 0;
    view->nr_allocated = 0;
}

/* Insert a range into a given position.  Caller is responsible for maintaining
 * sorting order.
 */
static void flatview_insert(FlatView *view, unsigned pos, FlatRange *range)
{
    if (view->nr == view->nr_allocated) {
        view->nr_allocated = MAX(2 * view->nr, 10);
271
        view->ranges = g_realloc(view->ranges,
A
Avi Kivity 已提交
272 273 274 275 276
                                    view->nr_allocated * sizeof(*view->ranges));
    }
    memmove(view->ranges + pos + 1, view->ranges + pos,
            (view->nr - pos) * sizeof(FlatRange));
    view->ranges[pos] = *range;
P
Paolo Bonzini 已提交
277
    memory_region_ref(range->mr);
A
Avi Kivity 已提交
278 279 280 281 282
    ++view->nr;
}

static void flatview_destroy(FlatView *view)
{
P
Paolo Bonzini 已提交
283 284 285 286 287
    int i;

    for (i = 0; i < view->nr; i++) {
        memory_region_unref(view->ranges[i].mr);
    }
288
    g_free(view->ranges);
289
    g_free(view);
A
Avi Kivity 已提交
290 291
}

292 293 294 295 296 297 298 299 300 301 302 303
static void flatview_ref(FlatView *view)
{
    atomic_inc(&view->ref);
}

static void flatview_unref(FlatView *view)
{
    if (atomic_fetch_dec(&view->ref) == 1) {
        flatview_destroy(view);
    }
}

304 305
static bool can_merge(FlatRange *r1, FlatRange *r2)
{
306
    return int128_eq(addrrange_end(r1->addr), r2->addr.start)
307
        && r1->mr == r2->mr
308 309 310
        && int128_eq(int128_add(int128_make64(r1->offset_in_region),
                                r1->addr.size),
                     int128_make64(r2->offset_in_region))
311
        && r1->dirty_log_mask == r2->dirty_log_mask
312
        && r1->romd_mode == r2->romd_mode
313
        && r1->readonly == r2->readonly;
314 315
}

P
Peter Crosthwaite 已提交
316
/* Attempt to simplify a view by merging adjacent ranges */
317 318 319 320 321 322 323 324 325
static void flatview_simplify(FlatView *view)
{
    unsigned i, j;

    i = 0;
    while (i < view->nr) {
        j = i + 1;
        while (j < view->nr
               && can_merge(&view->ranges[j-1], &view->ranges[j])) {
326
            int128_addto(&view->ranges[i].addr.size, view->ranges[j].addr.size);
327 328 329 330 331 332 333 334 335
            ++j;
        }
        ++i;
        memmove(&view->ranges[i], &view->ranges[j],
                (view->nr - j) * sizeof(view->ranges[j]));
        view->nr -= j - i;
    }
}

336 337 338 339 340 341 342 343 344
static bool memory_region_big_endian(MemoryRegion *mr)
{
#ifdef TARGET_WORDS_BIGENDIAN
    return mr->ops->endianness != DEVICE_LITTLE_ENDIAN;
#else
    return mr->ops->endianness == DEVICE_BIG_ENDIAN;
#endif
}

P
Paolo Bonzini 已提交
345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374
static bool memory_region_wrong_endianness(MemoryRegion *mr)
{
#ifdef TARGET_WORDS_BIGENDIAN
    return mr->ops->endianness == DEVICE_LITTLE_ENDIAN;
#else
    return mr->ops->endianness == DEVICE_BIG_ENDIAN;
#endif
}

static void adjust_endianness(MemoryRegion *mr, uint64_t *data, unsigned size)
{
    if (memory_region_wrong_endianness(mr)) {
        switch (size) {
        case 1:
            break;
        case 2:
            *data = bswap16(*data);
            break;
        case 4:
            *data = bswap32(*data);
            break;
        case 8:
            *data = bswap64(*data);
            break;
        default:
            abort();
        }
    }
}

375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391
static MemTxResult memory_region_oldmmio_read_accessor(MemoryRegion *mr,
                                                       hwaddr addr,
                                                       uint64_t *value,
                                                       unsigned size,
                                                       unsigned shift,
                                                       uint64_t mask,
                                                       MemTxAttrs attrs)
{
    uint64_t tmp;

    tmp = mr->ops->old_mmio.read[ctz32(size)](mr->opaque, addr);
    trace_memory_region_ops_read(mr, addr, tmp, size);
    *value |= (tmp & mask) << shift;
    return MEMTX_OK;
}

static MemTxResult  memory_region_read_accessor(MemoryRegion *mr,
392 393 394 395
                                                hwaddr addr,
                                                uint64_t *value,
                                                unsigned size,
                                                unsigned shift,
396 397
                                                uint64_t mask,
                                                MemTxAttrs attrs)
398 399 400
{
    uint64_t tmp;

401
    tmp = mr->ops->read(mr->opaque, addr, size);
402
    trace_memory_region_ops_read(mr, addr, tmp, size);
403
    *value |= (tmp & mask) << shift;
404
    return MEMTX_OK;
405 406
}

407 408 409 410 411 412 413
static MemTxResult memory_region_read_with_attrs_accessor(MemoryRegion *mr,
                                                          hwaddr addr,
                                                          uint64_t *value,
                                                          unsigned size,
                                                          unsigned shift,
                                                          uint64_t mask,
                                                          MemTxAttrs attrs)
414
{
415 416
    uint64_t tmp = 0;
    MemTxResult r;
417

418
    r = mr->ops->read_with_attrs(mr->opaque, addr, &tmp, size, attrs);
419
    trace_memory_region_ops_read(mr, addr, tmp, size);
420
    *value |= (tmp & mask) << shift;
421
    return r;
422 423
}

424 425 426 427 428 429 430
static MemTxResult memory_region_oldmmio_write_accessor(MemoryRegion *mr,
                                                        hwaddr addr,
                                                        uint64_t *value,
                                                        unsigned size,
                                                        unsigned shift,
                                                        uint64_t mask,
                                                        MemTxAttrs attrs)
431 432 433 434
{
    uint64_t tmp;

    tmp = (*value >> shift) & mask;
435
    trace_memory_region_ops_write(mr, addr, tmp, size);
436
    mr->ops->old_mmio.write[ctz32(size)](mr->opaque, addr, tmp);
437
    return MEMTX_OK;
438 439
}

440 441 442 443 444 445 446
static MemTxResult memory_region_write_accessor(MemoryRegion *mr,
                                                hwaddr addr,
                                                uint64_t *value,
                                                unsigned size,
                                                unsigned shift,
                                                uint64_t mask,
                                                MemTxAttrs attrs)
447 448 449 450
{
    uint64_t tmp;

    tmp = (*value >> shift) & mask;
451
    trace_memory_region_ops_write(mr, addr, tmp, size);
452
    mr->ops->write(mr->opaque, addr, tmp, size);
453
    return MEMTX_OK;
454 455
}

456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471
static MemTxResult memory_region_write_with_attrs_accessor(MemoryRegion *mr,
                                                           hwaddr addr,
                                                           uint64_t *value,
                                                           unsigned size,
                                                           unsigned shift,
                                                           uint64_t mask,
                                                           MemTxAttrs attrs)
{
    uint64_t tmp;

    tmp = (*value >> shift) & mask;
    trace_memory_region_ops_write(mr, addr, tmp, size);
    return mr->ops->write_with_attrs(mr->opaque, addr, tmp, size, attrs);
}

static MemTxResult access_with_adjusted_size(hwaddr addr,
472 473 474 475
                                      uint64_t *value,
                                      unsigned size,
                                      unsigned access_size_min,
                                      unsigned access_size_max,
476 477 478 479 480 481 482 483 484
                                      MemTxResult (*access)(MemoryRegion *mr,
                                                            hwaddr addr,
                                                            uint64_t *value,
                                                            unsigned size,
                                                            unsigned shift,
                                                            uint64_t mask,
                                                            MemTxAttrs attrs),
                                      MemoryRegion *mr,
                                      MemTxAttrs attrs)
485 486 487 488
{
    uint64_t access_mask;
    unsigned access_size;
    unsigned i;
489
    MemTxResult r = MEMTX_OK;
490 491 492 493 494 495 496

    if (!access_size_min) {
        access_size_min = 1;
    }
    if (!access_size_max) {
        access_size_max = 4;
    }
497 498

    /* FIXME: support unaligned access? */
499 500
    access_size = MAX(MIN(size, access_size_max), access_size_min);
    access_mask = -1ULL >> (64 - access_size * 8);
501 502
    if (memory_region_big_endian(mr)) {
        for (i = 0; i < size; i += access_size) {
503 504
            r |= access(mr, addr + i, value, access_size,
                        (size - access_size - i) * 8, access_mask, attrs);
505 506 507
        }
    } else {
        for (i = 0; i < size; i += access_size) {
508 509
            r |= access(mr, addr + i, value, access_size, i * 8,
                        access_mask, attrs);
510
        }
511
    }
512
    return r;
513 514
}

515 516
static AddressSpace *memory_region_to_address_space(MemoryRegion *mr)
{
517 518
    AddressSpace *as;

519 520
    while (mr->container) {
        mr = mr->container;
521
    }
522 523 524 525
    QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) {
        if (mr == as->root) {
            return as;
        }
526
    }
527
    return NULL;
528 529
}

A
Avi Kivity 已提交
530 531 532 533 534
/* Render a memory region into the global view.  Ranges in @view obscure
 * ranges in @mr.
 */
static void render_memory_region(FlatView *view,
                                 MemoryRegion *mr,
535
                                 Int128 base,
536 537
                                 AddrRange clip,
                                 bool readonly)
A
Avi Kivity 已提交
538 539 540
{
    MemoryRegion *subregion;
    unsigned i;
A
Avi Kivity 已提交
541
    hwaddr offset_in_region;
542 543
    Int128 remain;
    Int128 now;
A
Avi Kivity 已提交
544 545 546
    FlatRange fr;
    AddrRange tmp;

547 548 549 550
    if (!mr->enabled) {
        return;
    }

551
    int128_addto(&base, int128_make64(mr->addr));
552
    readonly |= mr->readonly;
A
Avi Kivity 已提交
553 554 555 556 557 558 559 560 561 562

    tmp = addrrange_make(base, mr->size);

    if (!addrrange_intersects(tmp, clip)) {
        return;
    }

    clip = addrrange_intersection(tmp, clip);

    if (mr->alias) {
563 564
        int128_subfrom(&base, int128_make64(mr->alias->addr));
        int128_subfrom(&base, int128_make64(mr->alias_offset));
565
        render_memory_region(view, mr->alias, base, clip, readonly);
A
Avi Kivity 已提交
566 567 568 569 570
        return;
    }

    /* Render subregions in priority order. */
    QTAILQ_FOREACH(subregion, &mr->subregions, subregions_link) {
571
        render_memory_region(view, subregion, base, clip, readonly);
A
Avi Kivity 已提交
572 573
    }

574
    if (!mr->terminates) {
A
Avi Kivity 已提交
575 576 577
        return;
    }

578
    offset_in_region = int128_get64(int128_sub(clip.start, base));
A
Avi Kivity 已提交
579 580 581
    base = clip.start;
    remain = clip.size;

582
    fr.mr = mr;
583
    fr.dirty_log_mask = memory_region_get_dirty_log_mask(mr);
584 585 586
    fr.romd_mode = mr->romd_mode;
    fr.readonly = readonly;

A
Avi Kivity 已提交
587
    /* Render the region itself into any gaps left by the current view. */
588 589
    for (i = 0; i < view->nr && int128_nz(remain); ++i) {
        if (int128_ge(base, addrrange_end(view->ranges[i].addr))) {
A
Avi Kivity 已提交
590 591
            continue;
        }
592 593 594
        if (int128_lt(base, view->ranges[i].addr.start)) {
            now = int128_min(remain,
                             int128_sub(view->ranges[i].addr.start, base));
A
Avi Kivity 已提交
595 596 597 598
            fr.offset_in_region = offset_in_region;
            fr.addr = addrrange_make(base, now);
            flatview_insert(view, i, &fr);
            ++i;
599 600 601
            int128_addto(&base, now);
            offset_in_region += int128_get64(now);
            int128_subfrom(&remain, now);
A
Avi Kivity 已提交
602
        }
603 604 605 606 607 608
        now = int128_sub(int128_min(int128_add(base, remain),
                                    addrrange_end(view->ranges[i].addr)),
                         base);
        int128_addto(&base, now);
        offset_in_region += int128_get64(now);
        int128_subfrom(&remain, now);
A
Avi Kivity 已提交
609
    }
610
    if (int128_nz(remain)) {
A
Avi Kivity 已提交
611 612 613 614 615 616 617
        fr.offset_in_region = offset_in_region;
        fr.addr = addrrange_make(base, remain);
        flatview_insert(view, i, &fr);
    }
}

/* Render a memory topology into a list of disjoint absolute ranges. */
618
static FlatView *generate_memory_topology(MemoryRegion *mr)
A
Avi Kivity 已提交
619
{
620
    FlatView *view;
A
Avi Kivity 已提交
621

622 623
    view = g_new(FlatView, 1);
    flatview_init(view);
A
Avi Kivity 已提交
624

A
Avi Kivity 已提交
625
    if (mr) {
626
        render_memory_region(view, mr, int128_zero(),
A
Avi Kivity 已提交
627 628
                             addrrange_make(int128_zero(), int128_2_64()), false);
    }
629
    flatview_simplify(view);
A
Avi Kivity 已提交
630 631 632 633

    return view;
}

A
Avi Kivity 已提交
634 635 636 637 638 639 640
static void address_space_add_del_ioeventfds(AddressSpace *as,
                                             MemoryRegionIoeventfd *fds_new,
                                             unsigned fds_new_nb,
                                             MemoryRegionIoeventfd *fds_old,
                                             unsigned fds_old_nb)
{
    unsigned iold, inew;
641 642
    MemoryRegionIoeventfd *fd;
    MemoryRegionSection section;
A
Avi Kivity 已提交
643 644 645 646 647 648 649 650 651 652 653

    /* Generate a symmetric difference of the old and new fd sets, adding
     * and deleting as necessary.
     */

    iold = inew = 0;
    while (iold < fds_old_nb || inew < fds_new_nb) {
        if (iold < fds_old_nb
            && (inew == fds_new_nb
                || memory_region_ioeventfd_before(fds_old[iold],
                                                  fds_new[inew]))) {
654 655
            fd = &fds_old[iold];
            section = (MemoryRegionSection) {
656
                .address_space = as,
657
                .offset_within_address_space = int128_get64(fd->addr.start),
658
                .size = fd->addr.size,
659 660
            };
            MEMORY_LISTENER_CALL(eventfd_del, Forward, &section,
661
                                 fd->match_data, fd->data, fd->e);
A
Avi Kivity 已提交
662 663 664 665 666
            ++iold;
        } else if (inew < fds_new_nb
                   && (iold == fds_old_nb
                       || memory_region_ioeventfd_before(fds_new[inew],
                                                         fds_old[iold]))) {
667 668
            fd = &fds_new[inew];
            section = (MemoryRegionSection) {
669
                .address_space = as,
670
                .offset_within_address_space = int128_get64(fd->addr.start),
671
                .size = fd->addr.size,
672 673
            };
            MEMORY_LISTENER_CALL(eventfd_add, Reverse, &section,
674
                                 fd->match_data, fd->data, fd->e);
A
Avi Kivity 已提交
675 676 677 678 679 680 681 682
            ++inew;
        } else {
            ++iold;
            ++inew;
        }
    }
}

683 684 685 686
static FlatView *address_space_get_flatview(AddressSpace *as)
{
    FlatView *view;

687 688
    rcu_read_lock();
    view = atomic_rcu_read(&as->current_map);
689
    flatview_ref(view);
690
    rcu_read_unlock();
691 692 693
    return view;
}

A
Avi Kivity 已提交
694 695
static void address_space_update_ioeventfds(AddressSpace *as)
{
696
    FlatView *view;
A
Avi Kivity 已提交
697 698 699 700 701 702
    FlatRange *fr;
    unsigned ioeventfd_nb = 0;
    MemoryRegionIoeventfd *ioeventfds = NULL;
    AddrRange tmp;
    unsigned i;

703
    view = address_space_get_flatview(as);
704
    FOR_EACH_FLAT_RANGE(fr, view) {
A
Avi Kivity 已提交
705 706
        for (i = 0; i < fr->mr->ioeventfd_nb; ++i) {
            tmp = addrrange_shift(fr->mr->ioeventfds[i].addr,
707 708
                                  int128_sub(fr->addr.start,
                                             int128_make64(fr->offset_in_region)));
A
Avi Kivity 已提交
709 710
            if (addrrange_intersects(fr->addr, tmp)) {
                ++ioeventfd_nb;
711
                ioeventfds = g_realloc(ioeventfds,
A
Avi Kivity 已提交
712 713 714 715 716 717 718 719 720 721
                                          ioeventfd_nb * sizeof(*ioeventfds));
                ioeventfds[ioeventfd_nb-1] = fr->mr->ioeventfds[i];
                ioeventfds[ioeventfd_nb-1].addr = tmp;
            }
        }
    }

    address_space_add_del_ioeventfds(as, ioeventfds, ioeventfd_nb,
                                     as->ioeventfds, as->ioeventfd_nb);

722
    g_free(as->ioeventfds);
A
Avi Kivity 已提交
723 724
    as->ioeventfds = ioeventfds;
    as->ioeventfd_nb = ioeventfd_nb;
725
    flatview_unref(view);
A
Avi Kivity 已提交
726 727
}

728
static void address_space_update_topology_pass(AddressSpace *as,
729 730
                                               const FlatView *old_view,
                                               const FlatView *new_view,
731
                                               bool adding)
A
Avi Kivity 已提交
732 733 734 735 736 737 738 739
{
    unsigned iold, inew;
    FlatRange *frold, *frnew;

    /* Generate a symmetric difference of the old and new memory maps.
     * Kill ranges in the old map, and instantiate ranges in the new map.
     */
    iold = inew = 0;
740 741 742
    while (iold < old_view->nr || inew < new_view->nr) {
        if (iold < old_view->nr) {
            frold = &old_view->ranges[iold];
A
Avi Kivity 已提交
743 744 745
        } else {
            frold = NULL;
        }
746 747
        if (inew < new_view->nr) {
            frnew = &new_view->ranges[inew];
A
Avi Kivity 已提交
748 749 750 751 752 753
        } else {
            frnew = NULL;
        }

        if (frold
            && (!frnew
754 755
                || int128_lt(frold->addr.start, frnew->addr.start)
                || (int128_eq(frold->addr.start, frnew->addr.start)
A
Avi Kivity 已提交
756
                    && !flatrange_equal(frold, frnew)))) {
757
            /* In old but not in new, or in both but attributes changed. */
A
Avi Kivity 已提交
758

759
            if (!adding) {
760
                MEMORY_LISTENER_UPDATE_REGION(frold, as, Reverse, region_del);
761 762
            }

A
Avi Kivity 已提交
763 764
            ++iold;
        } else if (frold && frnew && flatrange_equal(frold, frnew)) {
765
            /* In both and unchanged (except logging may have changed) */
A
Avi Kivity 已提交
766

767
            if (adding) {
768
                MEMORY_LISTENER_UPDATE_REGION(frnew, as, Forward, region_nop);
769 770 771 772 773 774 775 776 777
                if (frnew->dirty_log_mask & ~frold->dirty_log_mask) {
                    MEMORY_LISTENER_UPDATE_REGION(frnew, as, Forward, log_start,
                                                  frold->dirty_log_mask,
                                                  frnew->dirty_log_mask);
                }
                if (frold->dirty_log_mask & ~frnew->dirty_log_mask) {
                    MEMORY_LISTENER_UPDATE_REGION(frnew, as, Reverse, log_stop,
                                                  frold->dirty_log_mask,
                                                  frnew->dirty_log_mask);
778
                }
A
Avi Kivity 已提交
779 780
            }

A
Avi Kivity 已提交
781 782 783 784 785
            ++iold;
            ++inew;
        } else {
            /* In new */

786
            if (adding) {
787
                MEMORY_LISTENER_UPDATE_REGION(frnew, as, Forward, region_add);
788 789
            }

A
Avi Kivity 已提交
790 791 792
            ++inew;
        }
    }
793 794 795 796 797
}


static void address_space_update_topology(AddressSpace *as)
{
798
    FlatView *old_view = address_space_get_flatview(as);
799
    FlatView *new_view = generate_memory_topology(as->root);
800 801 802 803

    address_space_update_topology_pass(as, old_view, new_view, false);
    address_space_update_topology_pass(as, old_view, new_view, true);

804 805 806
    /* Writes are protected by the BQL.  */
    atomic_rcu_set(&as->current_map, new_view);
    call_rcu(old_view, flatview_unref, rcu);
807 808 809 810 811 812 813 814 815

    /* Note that all the old MemoryRegions are still alive up to this
     * point.  This relieves most MemoryListeners from the need to
     * ref/unref the MemoryRegions they get---unless they use them
     * outside the iothread mutex, in which case precise reference
     * counting is necessary.
     */
    flatview_unref(old_view);

A
Avi Kivity 已提交
816
    address_space_update_ioeventfds(as);
A
Avi Kivity 已提交
817 818
}

A
Avi Kivity 已提交
819 820
void memory_region_transaction_begin(void)
{
821
    qemu_flush_coalesced_mmio_buffer();
A
Avi Kivity 已提交
822 823 824
    ++memory_region_transaction_depth;
}

825 826 827 828 829 830
static void memory_region_clear_pending(void)
{
    memory_region_update_pending = false;
    ioeventfd_update_pending = false;
}

A
Avi Kivity 已提交
831 832
void memory_region_transaction_commit(void)
{
833 834
    AddressSpace *as;

A
Avi Kivity 已提交
835 836
    assert(memory_region_transaction_depth);
    --memory_region_transaction_depth;
837 838 839
    if (!memory_region_transaction_depth) {
        if (memory_region_update_pending) {
            MEMORY_LISTENER_CALL_GLOBAL(begin, Forward);
840

841 842 843
            QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) {
                address_space_update_topology(as);
            }
844

845 846 847 848 849 850 851 852
            MEMORY_LISTENER_CALL_GLOBAL(commit, Forward);
        } else if (ioeventfd_update_pending) {
            QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) {
                address_space_update_ioeventfds(as);
            }
        }
        memory_region_clear_pending();
   }
A
Avi Kivity 已提交
853 854
}

855 856 857 858 859 860 861 862 863
static void memory_region_destructor_none(MemoryRegion *mr)
{
}

static void memory_region_destructor_ram(MemoryRegion *mr)
{
    qemu_ram_free(mr->ram_addr);
}

864 865 866 867 868
static void memory_region_destructor_rom_device(MemoryRegion *mr)
{
    qemu_ram_free(mr->ram_addr & TARGET_PAGE_MASK);
}

P
Peter Crosthwaite 已提交
869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902
static bool memory_region_need_escape(char c)
{
    return c == '/' || c == '[' || c == '\\' || c == ']';
}

static char *memory_region_escape_name(const char *name)
{
    const char *p;
    char *escaped, *q;
    uint8_t c;
    size_t bytes = 0;

    for (p = name; *p; p++) {
        bytes += memory_region_need_escape(*p) ? 4 : 1;
    }
    if (bytes == p - name) {
       return g_memdup(name, bytes + 1);
    }

    escaped = g_malloc(bytes + 1);
    for (p = name, q = escaped; *p; p++) {
        c = *p;
        if (unlikely(memory_region_need_escape(c))) {
            *q++ = '\\';
            *q++ = 'x';
            *q++ = "0123456789abcdef"[c >> 4];
            c = "0123456789abcdef"[c & 15];
        }
        *q++ = c;
    }
    *q = 0;
    return escaped;
}

A
Avi Kivity 已提交
903
void memory_region_init(MemoryRegion *mr,
904
                        Object *owner,
A
Avi Kivity 已提交
905 906 907
                        const char *name,
                        uint64_t size)
{
908
    object_initialize(mr, sizeof(*mr), TYPE_MEMORY_REGION);
909 910 911 912
    mr->size = int128_make64(size);
    if (size == UINT64_MAX) {
        mr->size = int128_2_64();
    }
913
    mr->name = g_strdup(name);
914
    mr->owner = owner;
P
Peter Crosthwaite 已提交
915 916

    if (name) {
917 918
        char *escaped_name = memory_region_escape_name(name);
        char *name_array = g_strdup_printf("%s[*]", escaped_name);
919 920 921 922 923

        if (!owner) {
            owner = container_get(qdev_get_machine(), "/unattached");
        }

924
        object_property_add_child(owner, name_array, OBJECT(mr), &error_abort);
P
Peter Crosthwaite 已提交
925
        object_unref(OBJECT(mr));
926 927
        g_free(name_array);
        g_free(escaped_name);
P
Peter Crosthwaite 已提交
928 929 930
    }
}

931 932 933 934 935 936
static void memory_region_get_addr(Object *obj, Visitor *v, void *opaque,
                                   const char *name, Error **errp)
{
    MemoryRegion *mr = MEMORY_REGION(obj);
    uint64_t value = mr->addr;

937
    visit_type_uint64(v, name, &value, errp);
938 939 940 941 942 943 944 945 946 947 948
}

static void memory_region_get_container(Object *obj, Visitor *v, void *opaque,
                                        const char *name, Error **errp)
{
    MemoryRegion *mr = MEMORY_REGION(obj);
    gchar *path = (gchar *)"";

    if (mr->container) {
        path = object_get_canonical_path(OBJECT(mr->container));
    }
949
    visit_type_str(v, name, &path, errp);
950 951 952 953 954 955 956 957 958 959 960 961 962
    if (mr->container) {
        g_free(path);
    }
}

static Object *memory_region_resolve_container(Object *obj, void *opaque,
                                               const char *part)
{
    MemoryRegion *mr = MEMORY_REGION(obj);

    return OBJECT(mr->container);
}

963 964 965 966 967 968
static void memory_region_get_priority(Object *obj, Visitor *v, void *opaque,
                                       const char *name, Error **errp)
{
    MemoryRegion *mr = MEMORY_REGION(obj);
    int32_t value = mr->priority;

969
    visit_type_int32(v, name, &value, errp);
970 971 972 973 974 975 976 977 978
}

static bool memory_region_get_may_overlap(Object *obj, Error **errp)
{
    MemoryRegion *mr = MEMORY_REGION(obj);

    return mr->may_overlap;
}

979 980 981 982 983 984
static void memory_region_get_size(Object *obj, Visitor *v, void *opaque,
                                   const char *name, Error **errp)
{
    MemoryRegion *mr = MEMORY_REGION(obj);
    uint64_t value = memory_region_size(mr);

985
    visit_type_uint64(v, name, &value, errp);
986 987
}

P
Peter Crosthwaite 已提交
988 989 990
static void memory_region_initfn(Object *obj)
{
    MemoryRegion *mr = MEMORY_REGION(obj);
991
    ObjectProperty *op;
P
Peter Crosthwaite 已提交
992 993

    mr->ops = &unassigned_mem_ops;
994
    mr->ram_addr = RAM_ADDR_INVALID;
995
    mr->enabled = true;
996
    mr->romd_mode = true;
997
    mr->global_locking = true;
998
    mr->destructor = memory_region_destructor_none;
A
Avi Kivity 已提交
999 1000
    QTAILQ_INIT(&mr->subregions);
    QTAILQ_INIT(&mr->coalesced);
1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012

    op = object_property_add(OBJECT(mr), "container",
                             "link<" TYPE_MEMORY_REGION ">",
                             memory_region_get_container,
                             NULL, /* memory_region_set_container */
                             NULL, NULL, &error_abort);
    op->resolve = memory_region_resolve_container;

    object_property_add(OBJECT(mr), "addr", "uint64",
                        memory_region_get_addr,
                        NULL, /* memory_region_set_addr */
                        NULL, NULL, &error_abort);
1013 1014 1015 1016 1017 1018 1019 1020
    object_property_add(OBJECT(mr), "priority", "uint32",
                        memory_region_get_priority,
                        NULL, /* memory_region_set_priority */
                        NULL, NULL, &error_abort);
    object_property_add_bool(OBJECT(mr), "may-overlap",
                             memory_region_get_may_overlap,
                             NULL, /* memory_region_set_may_overlap */
                             &error_abort);
1021 1022 1023 1024
    object_property_add(OBJECT(mr), "size", "uint64",
                        memory_region_get_size,
                        NULL, /* memory_region_set_size, */
                        NULL, NULL, &error_abort);
A
Avi Kivity 已提交
1025 1026
}

1027 1028 1029 1030 1031 1032
static uint64_t unassigned_mem_read(void *opaque, hwaddr addr,
                                    unsigned size)
{
#ifdef DEBUG_UNASSIGNED
    printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
#endif
1033 1034
    if (current_cpu != NULL) {
        cpu_unassigned_access(current_cpu, addr, false, false, 0, size);
1035
    }
1036
    return 0;
1037 1038 1039 1040 1041 1042 1043 1044
}

static void unassigned_mem_write(void *opaque, hwaddr addr,
                                 uint64_t val, unsigned size)
{
#ifdef DEBUG_UNASSIGNED
    printf("Unassigned mem write " TARGET_FMT_plx " = 0x%"PRIx64"\n", addr, val);
#endif
1045 1046
    if (current_cpu != NULL) {
        cpu_unassigned_access(current_cpu, addr, true, false, 0, size);
1047
    }
1048 1049
}

1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060
static bool unassigned_mem_accepts(void *opaque, hwaddr addr,
                                   unsigned size, bool is_write)
{
    return false;
}

const MemoryRegionOps unassigned_mem_ops = {
    .valid.accepts = unassigned_mem_accepts,
    .endianness = DEVICE_NATIVE_ENDIAN,
};

1061 1062 1063 1064
bool memory_region_access_valid(MemoryRegion *mr,
                                hwaddr addr,
                                unsigned size,
                                bool is_write)
A
Avi Kivity 已提交
1065
{
1066 1067
    int access_size_min, access_size_max;
    int access_size, i;
1068

A
Avi Kivity 已提交
1069 1070 1071 1072
    if (!mr->ops->valid.unaligned && (addr & (size - 1))) {
        return false;
    }

1073
    if (!mr->ops->valid.accepts) {
A
Avi Kivity 已提交
1074 1075 1076
        return true;
    }

1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092
    access_size_min = mr->ops->valid.min_access_size;
    if (!mr->ops->valid.min_access_size) {
        access_size_min = 1;
    }

    access_size_max = mr->ops->valid.max_access_size;
    if (!mr->ops->valid.max_access_size) {
        access_size_max = 4;
    }

    access_size = MAX(MIN(size, access_size_max), access_size_min);
    for (i = 0; i < size; i += access_size) {
        if (!mr->ops->valid.accepts(mr->opaque, addr + i, access_size,
                                    is_write)) {
            return false;
        }
A
Avi Kivity 已提交
1093
    }
1094

A
Avi Kivity 已提交
1095 1096 1097
    return true;
}

1098 1099 1100 1101 1102
static MemTxResult memory_region_dispatch_read1(MemoryRegion *mr,
                                                hwaddr addr,
                                                uint64_t *pval,
                                                unsigned size,
                                                MemTxAttrs attrs)
A
Avi Kivity 已提交
1103
{
1104
    *pval = 0;
A
Avi Kivity 已提交
1105

1106
    if (mr->ops->read) {
1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117
        return access_with_adjusted_size(addr, pval, size,
                                         mr->ops->impl.min_access_size,
                                         mr->ops->impl.max_access_size,
                                         memory_region_read_accessor,
                                         mr, attrs);
    } else if (mr->ops->read_with_attrs) {
        return access_with_adjusted_size(addr, pval, size,
                                         mr->ops->impl.min_access_size,
                                         mr->ops->impl.max_access_size,
                                         memory_region_read_with_attrs_accessor,
                                         mr, attrs);
1118
    } else {
1119 1120 1121
        return access_with_adjusted_size(addr, pval, size, 1, 4,
                                         memory_region_oldmmio_read_accessor,
                                         mr, attrs);
1122
    }
A
Avi Kivity 已提交
1123 1124
}

1125 1126 1127 1128 1129
MemTxResult memory_region_dispatch_read(MemoryRegion *mr,
                                        hwaddr addr,
                                        uint64_t *pval,
                                        unsigned size,
                                        MemTxAttrs attrs)
1130
{
1131 1132
    MemTxResult r;

1133 1134
    if (!memory_region_access_valid(mr, addr, size, false)) {
        *pval = unassigned_mem_read(mr, addr, size);
1135
        return MEMTX_DECODE_ERROR;
1136
    }
1137

1138
    r = memory_region_dispatch_read1(mr, addr, pval, size, attrs);
1139
    adjust_endianness(mr, pval, size);
1140
    return r;
1141
}
A
Avi Kivity 已提交
1142

P
Pavel Fedin 已提交
1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168
/* Return true if an eventfd was signalled */
static bool memory_region_dispatch_write_eventfds(MemoryRegion *mr,
                                                    hwaddr addr,
                                                    uint64_t data,
                                                    unsigned size,
                                                    MemTxAttrs attrs)
{
    MemoryRegionIoeventfd ioeventfd = {
        .addr = addrrange_make(int128_make64(addr), int128_make64(size)),
        .data = data,
    };
    unsigned i;

    for (i = 0; i < mr->ioeventfd_nb; i++) {
        ioeventfd.match_data = mr->ioeventfds[i].match_data;
        ioeventfd.e = mr->ioeventfds[i].e;

        if (memory_region_ioeventfd_equal(ioeventfd, mr->ioeventfds[i])) {
            event_notifier_set(ioeventfd.e);
            return true;
        }
    }

    return false;
}

1169 1170 1171 1172 1173
MemTxResult memory_region_dispatch_write(MemoryRegion *mr,
                                         hwaddr addr,
                                         uint64_t data,
                                         unsigned size,
                                         MemTxAttrs attrs)
1174
{
1175
    if (!memory_region_access_valid(mr, addr, size, true)) {
1176
        unassigned_mem_write(mr, addr, data, size);
1177
        return MEMTX_DECODE_ERROR;
A
Avi Kivity 已提交
1178 1179
    }

1180 1181
    adjust_endianness(mr, &data, size);

P
Pavel Fedin 已提交
1182 1183 1184 1185 1186
    if ((!kvm_eventfds_enabled()) &&
        memory_region_dispatch_write_eventfds(mr, addr, data, size, attrs)) {
        return MEMTX_OK;
    }

1187
    if (mr->ops->write) {
1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199
        return access_with_adjusted_size(addr, &data, size,
                                         mr->ops->impl.min_access_size,
                                         mr->ops->impl.max_access_size,
                                         memory_region_write_accessor, mr,
                                         attrs);
    } else if (mr->ops->write_with_attrs) {
        return
            access_with_adjusted_size(addr, &data, size,
                                      mr->ops->impl.min_access_size,
                                      mr->ops->impl.max_access_size,
                                      memory_region_write_with_attrs_accessor,
                                      mr, attrs);
1200
    } else {
1201 1202 1203
        return access_with_adjusted_size(addr, &data, size, 1, 4,
                                         memory_region_oldmmio_write_accessor,
                                         mr, attrs);
1204
    }
A
Avi Kivity 已提交
1205 1206 1207
}

void memory_region_init_io(MemoryRegion *mr,
1208
                           Object *owner,
A
Avi Kivity 已提交
1209 1210 1211 1212 1213
                           const MemoryRegionOps *ops,
                           void *opaque,
                           const char *name,
                           uint64_t size)
{
1214
    memory_region_init(mr, owner, name, size);
1215
    mr->ops = ops ? ops : &unassigned_mem_ops;
A
Avi Kivity 已提交
1216
    mr->opaque = opaque;
1217
    mr->terminates = true;
A
Avi Kivity 已提交
1218 1219 1220
}

void memory_region_init_ram(MemoryRegion *mr,
1221
                            Object *owner,
A
Avi Kivity 已提交
1222
                            const char *name,
1223 1224
                            uint64_t size,
                            Error **errp)
A
Avi Kivity 已提交
1225
{
1226
    memory_region_init(mr, owner, name, size);
A
Avi Kivity 已提交
1227
    mr->ram = true;
1228
    mr->terminates = true;
1229
    mr->destructor = memory_region_destructor_ram;
1230
    mr->ram_addr = qemu_ram_alloc(size, mr, errp);
1231
    mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0;
1232 1233
}

1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248
void memory_region_init_resizeable_ram(MemoryRegion *mr,
                                       Object *owner,
                                       const char *name,
                                       uint64_t size,
                                       uint64_t max_size,
                                       void (*resized)(const char*,
                                                       uint64_t length,
                                                       void *host),
                                       Error **errp)
{
    memory_region_init(mr, owner, name, size);
    mr->ram = true;
    mr->terminates = true;
    mr->destructor = memory_region_destructor_ram;
    mr->ram_addr = qemu_ram_alloc_resizeable(size, max_size, resized, mr, errp);
1249
    mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0;
1250 1251
}

1252 1253 1254 1255 1256
#ifdef __linux__
void memory_region_init_ram_from_file(MemoryRegion *mr,
                                      struct Object *owner,
                                      const char *name,
                                      uint64_t size,
1257
                                      bool share,
1258 1259
                                      const char *path,
                                      Error **errp)
1260 1261 1262 1263 1264
{
    memory_region_init(mr, owner, name, size);
    mr->ram = true;
    mr->terminates = true;
    mr->destructor = memory_region_destructor_ram;
1265
    mr->ram_addr = qemu_ram_alloc_from_file(size, mr, share, path, errp);
1266
    mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0;
A
Avi Kivity 已提交
1267
}
1268
#endif
A
Avi Kivity 已提交
1269 1270

void memory_region_init_ram_ptr(MemoryRegion *mr,
1271
                                Object *owner,
A
Avi Kivity 已提交
1272 1273 1274 1275
                                const char *name,
                                uint64_t size,
                                void *ptr)
{
1276
    memory_region_init(mr, owner, name, size);
A
Avi Kivity 已提交
1277
    mr->ram = true;
1278
    mr->terminates = true;
1279
    mr->destructor = memory_region_destructor_ram;
1280
    mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0;
1281 1282 1283

    /* qemu_ram_alloc_from_ptr cannot fail with ptr != NULL.  */
    assert(ptr != NULL);
1284
    mr->ram_addr = qemu_ram_alloc_from_ptr(size, ptr, mr, &error_fatal);
A
Avi Kivity 已提交
1285 1286
}

1287 1288 1289 1290 1291
void memory_region_set_skip_dump(MemoryRegion *mr)
{
    mr->skip_dump = true;
}

A
Avi Kivity 已提交
1292
void memory_region_init_alias(MemoryRegion *mr,
1293
                              Object *owner,
A
Avi Kivity 已提交
1294 1295
                              const char *name,
                              MemoryRegion *orig,
A
Avi Kivity 已提交
1296
                              hwaddr offset,
A
Avi Kivity 已提交
1297 1298
                              uint64_t size)
{
1299
    memory_region_init(mr, owner, name, size);
A
Avi Kivity 已提交
1300 1301 1302 1303
    mr->alias = orig;
    mr->alias_offset = offset;
}

1304
void memory_region_init_rom_device(MemoryRegion *mr,
1305
                                   Object *owner,
1306
                                   const MemoryRegionOps *ops,
1307
                                   void *opaque,
1308
                                   const char *name,
1309 1310
                                   uint64_t size,
                                   Error **errp)
1311
{
1312
    memory_region_init(mr, owner, name, size);
1313
    mr->ops = ops;
1314
    mr->opaque = opaque;
1315
    mr->terminates = true;
A
Avi Kivity 已提交
1316
    mr->rom_device = true;
1317
    mr->destructor = memory_region_destructor_rom_device;
1318
    mr->ram_addr = qemu_ram_alloc(size, mr, errp);
1319 1320
}

A
Avi Kivity 已提交
1321
void memory_region_init_iommu(MemoryRegion *mr,
1322
                              Object *owner,
A
Avi Kivity 已提交
1323 1324 1325 1326
                              const MemoryRegionIOMMUOps *ops,
                              const char *name,
                              uint64_t size)
{
1327
    memory_region_init(mr, owner, name, size);
A
Avi Kivity 已提交
1328 1329
    mr->iommu_ops = ops,
    mr->terminates = true;  /* then re-forwards */
1330
    notifier_list_init(&mr->iommu_notify);
A
Avi Kivity 已提交
1331 1332
}

P
Peter Crosthwaite 已提交
1333
static void memory_region_finalize(Object *obj)
A
Avi Kivity 已提交
1334
{
P
Peter Crosthwaite 已提交
1335 1336
    MemoryRegion *mr = MEMORY_REGION(obj);

1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352
    assert(!mr->container);

    /* We know the region is not visible in any address space (it
     * does not have a container and cannot be a root either because
     * it has no references, so we can blindly clear mr->enabled.
     * memory_region_set_enabled instead could trigger a transaction
     * and cause an infinite loop.
     */
    mr->enabled = false;
    memory_region_transaction_begin();
    while (!QTAILQ_EMPTY(&mr->subregions)) {
        MemoryRegion *subregion = QTAILQ_FIRST(&mr->subregions);
        memory_region_del_subregion(mr, subregion);
    }
    memory_region_transaction_commit();

1353
    mr->destructor(mr);
A
Avi Kivity 已提交
1354
    memory_region_clear_coalescing(mr);
1355
    g_free((char *)mr->name);
1356
    g_free(mr->ioeventfds);
A
Avi Kivity 已提交
1357 1358
}

P
Paolo Bonzini 已提交
1359 1360
Object *memory_region_owner(MemoryRegion *mr)
{
1361 1362
    Object *obj = OBJECT(mr);
    return obj->parent;
P
Paolo Bonzini 已提交
1363 1364
}

P
Paolo Bonzini 已提交
1365 1366
void memory_region_ref(MemoryRegion *mr)
{
1367 1368 1369 1370 1371 1372 1373
    /* MMIO callbacks most likely will access data that belongs
     * to the owner, hence the need to ref/unref the owner whenever
     * the memory region is in use.
     *
     * The memory region is a child of its owner.  As long as the
     * owner doesn't call unparent itself on the memory region,
     * ref-ing the owner will also keep the memory region alive.
1374 1375
     * Memory regions without an owner are supposed to never go away;
     * we do not ref/unref them because it slows down DMA sensibly.
1376
     */
1377 1378
    if (mr && mr->owner) {
        object_ref(mr->owner);
P
Paolo Bonzini 已提交
1379 1380 1381 1382 1383
    }
}

void memory_region_unref(MemoryRegion *mr)
{
1384 1385
    if (mr && mr->owner) {
        object_unref(mr->owner);
P
Paolo Bonzini 已提交
1386 1387 1388
    }
}

A
Avi Kivity 已提交
1389 1390
uint64_t memory_region_size(MemoryRegion *mr)
{
1391 1392 1393 1394
    if (int128_eq(mr->size, int128_2_64())) {
        return UINT64_MAX;
    }
    return int128_get64(mr->size);
A
Avi Kivity 已提交
1395 1396
}

1397
const char *memory_region_name(const MemoryRegion *mr)
1398
{
1399 1400 1401 1402
    if (!mr->name) {
        ((MemoryRegion *)mr)->name =
            object_get_canonical_path_component(OBJECT(mr));
    }
1403
    return mr->name;
1404 1405
}

1406 1407 1408 1409 1410
bool memory_region_is_skip_dump(MemoryRegion *mr)
{
    return mr->skip_dump;
}

1411
uint8_t memory_region_get_dirty_log_mask(MemoryRegion *mr)
1412
{
1413 1414 1415 1416 1417
    uint8_t mask = mr->dirty_log_mask;
    if (global_dirty_log) {
        mask |= (1 << DIRTY_MEMORY_MIGRATION);
    }
    return mask;
1418 1419
}

1420 1421 1422 1423 1424
bool memory_region_is_logging(MemoryRegion *mr, uint8_t client)
{
    return memory_region_get_dirty_log_mask(mr) & (1 << client);
}

1425 1426 1427 1428 1429
void memory_region_register_iommu_notifier(MemoryRegion *mr, Notifier *n)
{
    notifier_list_add(&mr->iommu_notify, n);
}

1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449
void memory_region_iommu_replay(MemoryRegion *mr, Notifier *n,
                                hwaddr granularity, bool is_write)
{
    hwaddr addr;
    IOMMUTLBEntry iotlb;

    for (addr = 0; addr < memory_region_size(mr); addr += granularity) {
        iotlb = mr->iommu_ops->translate(mr, addr, is_write);
        if (iotlb.perm != IOMMU_NONE) {
            n->notify(n, &iotlb);
        }

        /* if (2^64 - MR size) < granularity, it's possible to get an
         * infinite loop here.  This should catch such a wraparound */
        if ((addr + granularity) < addr) {
            break;
        }
    }
}

1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461
void memory_region_unregister_iommu_notifier(Notifier *n)
{
    notifier_remove(n);
}

void memory_region_notify_iommu(MemoryRegion *mr,
                                IOMMUTLBEntry entry)
{
    assert(memory_region_is_iommu(mr));
    notifier_list_notify(&mr->iommu_notify, &entry);
}

A
Avi Kivity 已提交
1462 1463
void memory_region_set_log(MemoryRegion *mr, bool log, unsigned client)
{
A
Avi Kivity 已提交
1464
    uint8_t mask = 1 << client;
1465
    uint8_t old_logging;
A
Avi Kivity 已提交
1466

1467
    assert(client == DIRTY_MEMORY_VGA);
1468 1469 1470 1471 1472 1473
    old_logging = mr->vga_logging_count;
    mr->vga_logging_count += log ? 1 : -1;
    if (!!old_logging == !!mr->vga_logging_count) {
        return;
    }

1474
    memory_region_transaction_begin();
A
Avi Kivity 已提交
1475
    mr->dirty_log_mask = (mr->dirty_log_mask & ~mask) | (log * mask);
1476
    memory_region_update_pending |= mr->enabled;
1477
    memory_region_transaction_commit();
A
Avi Kivity 已提交
1478 1479
}

A
Avi Kivity 已提交
1480 1481
bool memory_region_get_dirty(MemoryRegion *mr, hwaddr addr,
                             hwaddr size, unsigned client)
A
Avi Kivity 已提交
1482
{
1483
    assert(mr->ram_addr != RAM_ADDR_INVALID);
1484
    return cpu_physical_memory_get_dirty(mr->ram_addr + addr, size, client);
A
Avi Kivity 已提交
1485 1486
}

A
Avi Kivity 已提交
1487 1488
void memory_region_set_dirty(MemoryRegion *mr, hwaddr addr,
                             hwaddr size)
A
Avi Kivity 已提交
1489
{
1490
    assert(mr->ram_addr != RAM_ADDR_INVALID);
1491 1492
    cpu_physical_memory_set_dirty_range(mr->ram_addr + addr, size,
                                        memory_region_get_dirty_log_mask(mr));
A
Avi Kivity 已提交
1493 1494
}

1495 1496 1497
bool memory_region_test_and_clear_dirty(MemoryRegion *mr, hwaddr addr,
                                        hwaddr size, unsigned client)
{
1498
    assert(mr->ram_addr != RAM_ADDR_INVALID);
1499 1500
    return cpu_physical_memory_test_and_clear_dirty(mr->ram_addr + addr,
                                                    size, client);
1501 1502 1503
}


A
Avi Kivity 已提交
1504 1505
void memory_region_sync_dirty_bitmap(MemoryRegion *mr)
{
1506
    AddressSpace *as;
A
Avi Kivity 已提交
1507 1508
    FlatRange *fr;

1509
    QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) {
1510
        FlatView *view = address_space_get_flatview(as);
1511
        FOR_EACH_FLAT_RANGE(fr, view) {
1512 1513 1514
            if (fr->mr == mr) {
                MEMORY_LISTENER_UPDATE_REGION(fr, as, Forward, log_sync);
            }
A
Avi Kivity 已提交
1515
        }
1516
        flatview_unref(view);
A
Avi Kivity 已提交
1517
    }
A
Avi Kivity 已提交
1518 1519 1520 1521
}

void memory_region_set_readonly(MemoryRegion *mr, bool readonly)
{
1522
    if (mr->readonly != readonly) {
1523
        memory_region_transaction_begin();
1524
        mr->readonly = readonly;
1525
        memory_region_update_pending |= mr->enabled;
1526
        memory_region_transaction_commit();
1527
    }
A
Avi Kivity 已提交
1528 1529
}

1530
void memory_region_rom_device_set_romd(MemoryRegion *mr, bool romd_mode)
1531
{
1532
    if (mr->romd_mode != romd_mode) {
1533
        memory_region_transaction_begin();
1534
        mr->romd_mode = romd_mode;
1535
        memory_region_update_pending |= mr->enabled;
1536
        memory_region_transaction_commit();
1537 1538 1539
    }
}

A
Avi Kivity 已提交
1540 1541
void memory_region_reset_dirty(MemoryRegion *mr, hwaddr addr,
                               hwaddr size, unsigned client)
A
Avi Kivity 已提交
1542
{
1543
    assert(mr->ram_addr != RAM_ADDR_INVALID);
1544 1545
    cpu_physical_memory_test_and_clear_dirty(mr->ram_addr + addr, size,
                                             client);
A
Avi Kivity 已提交
1546 1547
}

1548 1549 1550 1551 1552 1553
int memory_region_get_fd(MemoryRegion *mr)
{
    if (mr->alias) {
        return memory_region_get_fd(mr->alias);
    }

1554
    assert(mr->ram_addr != RAM_ADDR_INVALID);
1555 1556 1557 1558

    return qemu_get_ram_fd(mr->ram_addr & TARGET_PAGE_MASK);
}

A
Avi Kivity 已提交
1559 1560
void *memory_region_get_ram_ptr(MemoryRegion *mr)
{
1561 1562
    void *ptr;
    uint64_t offset = 0;
A
Avi Kivity 已提交
1563

1564 1565 1566 1567 1568
    rcu_read_lock();
    while (mr->alias) {
        offset += mr->alias_offset;
        mr = mr->alias;
    }
1569
    assert(mr->ram_addr != RAM_ADDR_INVALID);
1570 1571
    ptr = qemu_get_ram_ptr(mr->ram_addr & TARGET_PAGE_MASK);
    rcu_read_unlock();
A
Avi Kivity 已提交
1572

1573
    return ptr + offset;
A
Avi Kivity 已提交
1574 1575
}

1576 1577
void memory_region_ram_resize(MemoryRegion *mr, ram_addr_t newsize, Error **errp)
{
1578
    assert(mr->ram_addr != RAM_ADDR_INVALID);
1579 1580 1581 1582

    qemu_ram_resize(mr->ram_addr, newsize, errp);
}

1583
static void memory_region_update_coalesced_range_as(MemoryRegion *mr, AddressSpace *as)
A
Avi Kivity 已提交
1584
{
1585
    FlatView *view;
A
Avi Kivity 已提交
1586 1587 1588
    FlatRange *fr;
    CoalescedMemoryRange *cmr;
    AddrRange tmp;
1589
    MemoryRegionSection section;
A
Avi Kivity 已提交
1590

1591
    view = address_space_get_flatview(as);
1592
    FOR_EACH_FLAT_RANGE(fr, view) {
A
Avi Kivity 已提交
1593
        if (fr->mr == mr) {
1594
            section = (MemoryRegionSection) {
1595
                .address_space = as,
1596
                .offset_within_address_space = int128_get64(fr->addr.start),
1597
                .size = fr->addr.size,
1598 1599 1600 1601 1602
            };

            MEMORY_LISTENER_CALL(coalesced_mmio_del, Reverse, &section,
                                 int128_get64(fr->addr.start),
                                 int128_get64(fr->addr.size));
A
Avi Kivity 已提交
1603 1604
            QTAILQ_FOREACH(cmr, &mr->coalesced, link) {
                tmp = addrrange_shift(cmr->addr,
1605 1606
                                      int128_sub(fr->addr.start,
                                                 int128_make64(fr->offset_in_region)));
A
Avi Kivity 已提交
1607 1608 1609 1610
                if (!addrrange_intersects(tmp, fr->addr)) {
                    continue;
                }
                tmp = addrrange_intersection(tmp, fr->addr);
1611 1612 1613
                MEMORY_LISTENER_CALL(coalesced_mmio_add, Forward, &section,
                                     int128_get64(tmp.start),
                                     int128_get64(tmp.size));
A
Avi Kivity 已提交
1614 1615 1616
            }
        }
    }
1617
    flatview_unref(view);
A
Avi Kivity 已提交
1618 1619
}

1620 1621 1622 1623 1624 1625 1626 1627 1628
static void memory_region_update_coalesced_range(MemoryRegion *mr)
{
    AddressSpace *as;

    QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) {
        memory_region_update_coalesced_range_as(mr, as);
    }
}

A
Avi Kivity 已提交
1629 1630 1631
void memory_region_set_coalescing(MemoryRegion *mr)
{
    memory_region_clear_coalescing(mr);
1632
    memory_region_add_coalescing(mr, 0, int128_get64(mr->size));
A
Avi Kivity 已提交
1633 1634 1635
}

void memory_region_add_coalescing(MemoryRegion *mr,
A
Avi Kivity 已提交
1636
                                  hwaddr offset,
A
Avi Kivity 已提交
1637 1638
                                  uint64_t size)
{
1639
    CoalescedMemoryRange *cmr = g_malloc(sizeof(*cmr));
A
Avi Kivity 已提交
1640

1641
    cmr->addr = addrrange_make(int128_make64(offset), int128_make64(size));
A
Avi Kivity 已提交
1642 1643
    QTAILQ_INSERT_TAIL(&mr->coalesced, cmr, link);
    memory_region_update_coalesced_range(mr);
1644
    memory_region_set_flush_coalesced(mr);
A
Avi Kivity 已提交
1645 1646 1647 1648 1649
}

void memory_region_clear_coalescing(MemoryRegion *mr)
{
    CoalescedMemoryRange *cmr;
1650
    bool updated = false;
A
Avi Kivity 已提交
1651

1652 1653 1654
    qemu_flush_coalesced_mmio_buffer();
    mr->flush_coalesced_mmio = false;

A
Avi Kivity 已提交
1655 1656 1657
    while (!QTAILQ_EMPTY(&mr->coalesced)) {
        cmr = QTAILQ_FIRST(&mr->coalesced);
        QTAILQ_REMOVE(&mr->coalesced, cmr, link);
1658
        g_free(cmr);
1659 1660 1661 1662 1663
        updated = true;
    }

    if (updated) {
        memory_region_update_coalesced_range(mr);
A
Avi Kivity 已提交
1664 1665 1666
    }
}

1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679
void memory_region_set_flush_coalesced(MemoryRegion *mr)
{
    mr->flush_coalesced_mmio = true;
}

void memory_region_clear_flush_coalesced(MemoryRegion *mr)
{
    qemu_flush_coalesced_mmio_buffer();
    if (QTAILQ_EMPTY(&mr->coalesced)) {
        mr->flush_coalesced_mmio = false;
    }
}

1680 1681 1682 1683 1684 1685 1686 1687 1688 1689
void memory_region_set_global_locking(MemoryRegion *mr)
{
    mr->global_locking = true;
}

void memory_region_clear_global_locking(MemoryRegion *mr)
{
    mr->global_locking = false;
}

P
Pavel Fedin 已提交
1690 1691
static bool userspace_eventfd_warning;

A
Avi Kivity 已提交
1692
void memory_region_add_eventfd(MemoryRegion *mr,
A
Avi Kivity 已提交
1693
                               hwaddr addr,
A
Avi Kivity 已提交
1694 1695 1696
                               unsigned size,
                               bool match_data,
                               uint64_t data,
1697
                               EventNotifier *e)
A
Avi Kivity 已提交
1698 1699
{
    MemoryRegionIoeventfd mrfd = {
1700 1701
        .addr.start = int128_make64(addr),
        .addr.size = int128_make64(size),
A
Avi Kivity 已提交
1702 1703
        .match_data = match_data,
        .data = data,
1704
        .e = e,
A
Avi Kivity 已提交
1705 1706 1707
    };
    unsigned i;

P
Pavel Fedin 已提交
1708 1709 1710 1711 1712 1713 1714
    if (kvm_enabled() && (!(kvm_eventfds_enabled() ||
                            userspace_eventfd_warning))) {
        userspace_eventfd_warning = true;
        error_report("Using eventfd without MMIO binding in KVM. "
                     "Suboptimal performance expected");
    }

1715 1716 1717
    if (size) {
        adjust_endianness(mr, &mrfd.data, size);
    }
1718
    memory_region_transaction_begin();
A
Avi Kivity 已提交
1719 1720 1721 1722 1723 1724
    for (i = 0; i < mr->ioeventfd_nb; ++i) {
        if (memory_region_ioeventfd_before(mrfd, mr->ioeventfds[i])) {
            break;
        }
    }
    ++mr->ioeventfd_nb;
1725
    mr->ioeventfds = g_realloc(mr->ioeventfds,
A
Avi Kivity 已提交
1726 1727 1728 1729
                                  sizeof(*mr->ioeventfds) * mr->ioeventfd_nb);
    memmove(&mr->ioeventfds[i+1], &mr->ioeventfds[i],
            sizeof(*mr->ioeventfds) * (mr->ioeventfd_nb-1 - i));
    mr->ioeventfds[i] = mrfd;
1730
    ioeventfd_update_pending |= mr->enabled;
1731
    memory_region_transaction_commit();
A
Avi Kivity 已提交
1732 1733 1734
}

void memory_region_del_eventfd(MemoryRegion *mr,
A
Avi Kivity 已提交
1735
                               hwaddr addr,
A
Avi Kivity 已提交
1736 1737 1738
                               unsigned size,
                               bool match_data,
                               uint64_t data,
1739
                               EventNotifier *e)
A
Avi Kivity 已提交
1740 1741
{
    MemoryRegionIoeventfd mrfd = {
1742 1743
        .addr.start = int128_make64(addr),
        .addr.size = int128_make64(size),
A
Avi Kivity 已提交
1744 1745
        .match_data = match_data,
        .data = data,
1746
        .e = e,
A
Avi Kivity 已提交
1747 1748 1749
    };
    unsigned i;

1750 1751 1752
    if (size) {
        adjust_endianness(mr, &mrfd.data, size);
    }
1753
    memory_region_transaction_begin();
A
Avi Kivity 已提交
1754 1755 1756 1757 1758 1759 1760 1761 1762
    for (i = 0; i < mr->ioeventfd_nb; ++i) {
        if (memory_region_ioeventfd_equal(mrfd, mr->ioeventfds[i])) {
            break;
        }
    }
    assert(i != mr->ioeventfd_nb);
    memmove(&mr->ioeventfds[i], &mr->ioeventfds[i+1],
            sizeof(*mr->ioeventfds) * (mr->ioeventfd_nb - (i+1)));
    --mr->ioeventfd_nb;
1763
    mr->ioeventfds = g_realloc(mr->ioeventfds,
A
Avi Kivity 已提交
1764
                                  sizeof(*mr->ioeventfds)*mr->ioeventfd_nb + 1);
1765
    ioeventfd_update_pending |= mr->enabled;
1766
    memory_region_transaction_commit();
A
Avi Kivity 已提交
1767 1768
}

1769
static void memory_region_update_container_subregions(MemoryRegion *subregion)
A
Avi Kivity 已提交
1770
{
1771
    hwaddr offset = subregion->addr;
1772
    MemoryRegion *mr = subregion->container;
A
Avi Kivity 已提交
1773 1774
    MemoryRegion *other;

1775 1776
    memory_region_transaction_begin();

P
Paolo Bonzini 已提交
1777
    memory_region_ref(subregion);
A
Avi Kivity 已提交
1778 1779 1780 1781
    QTAILQ_FOREACH(other, &mr->subregions, subregions_link) {
        if (subregion->may_overlap || other->may_overlap) {
            continue;
        }
1782
        if (int128_ge(int128_make64(offset),
1783 1784 1785
                      int128_add(int128_make64(other->addr), other->size))
            || int128_le(int128_add(int128_make64(offset), subregion->size),
                         int128_make64(other->addr))) {
A
Avi Kivity 已提交
1786 1787
            continue;
        }
1788
#if 0
1789 1790
        printf("warning: subregion collision %llx/%llx (%s) "
               "vs %llx/%llx (%s)\n",
A
Avi Kivity 已提交
1791
               (unsigned long long)offset,
1792
               (unsigned long long)int128_get64(subregion->size),
1793 1794
               subregion->name,
               (unsigned long long)other->addr,
1795
               (unsigned long long)int128_get64(other->size),
1796
               other->name);
1797
#endif
A
Avi Kivity 已提交
1798 1799 1800 1801 1802 1803 1804 1805 1806
    }
    QTAILQ_FOREACH(other, &mr->subregions, subregions_link) {
        if (subregion->priority >= other->priority) {
            QTAILQ_INSERT_BEFORE(other, subregion, subregions_link);
            goto done;
        }
    }
    QTAILQ_INSERT_TAIL(&mr->subregions, subregion, subregions_link);
done:
1807
    memory_region_update_pending |= mr->enabled && subregion->enabled;
1808
    memory_region_transaction_commit();
A
Avi Kivity 已提交
1809 1810
}

1811 1812 1813 1814
static void memory_region_add_subregion_common(MemoryRegion *mr,
                                               hwaddr offset,
                                               MemoryRegion *subregion)
{
1815 1816
    assert(!subregion->container);
    subregion->container = mr;
1817
    subregion->addr = offset;
1818
    memory_region_update_container_subregions(subregion);
1819
}
A
Avi Kivity 已提交
1820 1821

void memory_region_add_subregion(MemoryRegion *mr,
A
Avi Kivity 已提交
1822
                                 hwaddr offset,
A
Avi Kivity 已提交
1823 1824 1825 1826 1827 1828 1829 1830
                                 MemoryRegion *subregion)
{
    subregion->may_overlap = false;
    subregion->priority = 0;
    memory_region_add_subregion_common(mr, offset, subregion);
}

void memory_region_add_subregion_overlap(MemoryRegion *mr,
A
Avi Kivity 已提交
1831
                                         hwaddr offset,
A
Avi Kivity 已提交
1832
                                         MemoryRegion *subregion,
1833
                                         int priority)
A
Avi Kivity 已提交
1834 1835 1836 1837 1838 1839 1840 1841 1842
{
    subregion->may_overlap = true;
    subregion->priority = priority;
    memory_region_add_subregion_common(mr, offset, subregion);
}

void memory_region_del_subregion(MemoryRegion *mr,
                                 MemoryRegion *subregion)
{
1843
    memory_region_transaction_begin();
1844 1845
    assert(subregion->container == mr);
    subregion->container = NULL;
A
Avi Kivity 已提交
1846
    QTAILQ_REMOVE(&mr->subregions, subregion, subregions_link);
P
Paolo Bonzini 已提交
1847
    memory_region_unref(subregion);
1848
    memory_region_update_pending |= mr->enabled && subregion->enabled;
1849
    memory_region_transaction_commit();
1850 1851 1852 1853 1854 1855 1856
}

void memory_region_set_enabled(MemoryRegion *mr, bool enabled)
{
    if (enabled == mr->enabled) {
        return;
    }
1857
    memory_region_transaction_begin();
1858
    mr->enabled = enabled;
1859
    memory_region_update_pending = true;
1860
    memory_region_transaction_commit();
A
Avi Kivity 已提交
1861
}
A
Avi Kivity 已提交
1862

1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878
void memory_region_set_size(MemoryRegion *mr, uint64_t size)
{
    Int128 s = int128_make64(size);

    if (size == UINT64_MAX) {
        s = int128_2_64();
    }
    if (int128_eq(s, mr->size)) {
        return;
    }
    memory_region_transaction_begin();
    mr->size = s;
    memory_region_update_pending = true;
    memory_region_transaction_commit();
}

1879
static void memory_region_readd_subregion(MemoryRegion *mr)
1880
{
1881
    MemoryRegion *container = mr->container;
1882

1883
    if (container) {
1884 1885
        memory_region_transaction_begin();
        memory_region_ref(mr);
1886 1887 1888
        memory_region_del_subregion(container, mr);
        mr->container = container;
        memory_region_update_container_subregions(mr);
1889 1890
        memory_region_unref(mr);
        memory_region_transaction_commit();
1891
    }
1892
}
1893

1894 1895 1896 1897 1898 1899
void memory_region_set_address(MemoryRegion *mr, hwaddr addr)
{
    if (addr != mr->addr) {
        mr->addr = addr;
        memory_region_readd_subregion(mr);
    }
1900 1901
}

A
Avi Kivity 已提交
1902
void memory_region_set_alias_offset(MemoryRegion *mr, hwaddr offset)
1903 1904 1905
{
    assert(mr->alias);

1906
    if (offset == mr->alias_offset) {
1907 1908 1909
        return;
    }

1910 1911
    memory_region_transaction_begin();
    mr->alias_offset = offset;
1912
    memory_region_update_pending |= mr->enabled;
1913
    memory_region_transaction_commit();
1914 1915
}

1916 1917 1918 1919 1920
uint64_t memory_region_get_alignment(const MemoryRegion *mr)
{
    return mr->align;
}

1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933
static int cmp_flatrange_addr(const void *addr_, const void *fr_)
{
    const AddrRange *addr = addr_;
    const FlatRange *fr = fr_;

    if (int128_le(addrrange_end(*addr), fr->addr.start)) {
        return -1;
    } else if (int128_ge(addr->start, addrrange_end(fr->addr))) {
        return 1;
    }
    return 0;
}

1934
static FlatRange *flatview_lookup(FlatView *view, AddrRange addr)
1935
{
1936
    return bsearch(&addr, view->ranges, view->nr,
1937 1938 1939
                   sizeof(FlatRange), cmp_flatrange_addr);
}

1940 1941 1942 1943 1944
bool memory_region_is_mapped(MemoryRegion *mr)
{
    return mr->container ? true : false;
}

1945 1946 1947 1948 1949
/* Same as memory_region_find, but it does not add a reference to the
 * returned region.  It must be called from an RCU critical section.
 */
static MemoryRegionSection memory_region_find_rcu(MemoryRegion *mr,
                                                  hwaddr addr, uint64_t size)
1950
{
1951
    MemoryRegionSection ret = { .mr = NULL };
1952 1953 1954
    MemoryRegion *root;
    AddressSpace *as;
    AddrRange range;
1955
    FlatView *view;
1956 1957 1958
    FlatRange *fr;

    addr += mr->addr;
1959 1960
    for (root = mr; root->container; ) {
        root = root->container;
1961 1962
        addr += root->addr;
    }
1963

1964
    as = memory_region_to_address_space(root);
1965 1966 1967
    if (!as) {
        return ret;
    }
1968
    range = addrrange_make(int128_make64(addr), int128_make64(size));
1969

1970
    view = atomic_rcu_read(&as->current_map);
1971
    fr = flatview_lookup(view, range);
1972
    if (!fr) {
1973
        return ret;
1974 1975
    }

1976
    while (fr > view->ranges && addrrange_intersects(fr[-1].addr, range)) {
1977 1978 1979 1980
        --fr;
    }

    ret.mr = fr->mr;
1981
    ret.address_space = as;
1982 1983 1984 1985
    range = addrrange_intersection(range, fr->addr);
    ret.offset_within_region = fr->offset_in_region;
    ret.offset_within_region += int128_get64(int128_sub(range.start,
                                                        fr->addr.start));
1986
    ret.size = range.size;
1987
    ret.offset_within_address_space = int128_get64(range.start);
1988
    ret.readonly = fr->readonly;
1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000
    return ret;
}

MemoryRegionSection memory_region_find(MemoryRegion *mr,
                                       hwaddr addr, uint64_t size)
{
    MemoryRegionSection ret;
    rcu_read_lock();
    ret = memory_region_find_rcu(mr, addr, size);
    if (ret.mr) {
        memory_region_ref(ret.mr);
    }
2001
    rcu_read_unlock();
2002 2003 2004
    return ret;
}

2005 2006 2007 2008 2009 2010 2011 2012 2013 2014
bool memory_region_present(MemoryRegion *container, hwaddr addr)
{
    MemoryRegion *mr;

    rcu_read_lock();
    mr = memory_region_find_rcu(container, addr, 1).mr;
    rcu_read_unlock();
    return mr && mr != container;
}

2015
void address_space_sync_dirty_bitmap(AddressSpace *as)
2016
{
2017
    FlatView *view;
2018 2019
    FlatRange *fr;

2020
    view = address_space_get_flatview(as);
2021
    FOR_EACH_FLAT_RANGE(fr, view) {
2022
        MEMORY_LISTENER_UPDATE_REGION(fr, as, Forward, log_sync);
2023
    }
2024
    flatview_unref(view);
2025 2026 2027 2028 2029
}

void memory_global_dirty_log_start(void)
{
    global_dirty_log = true;
2030

2031
    MEMORY_LISTENER_CALL_GLOBAL(log_global_start, Forward);
2032 2033 2034 2035 2036

    /* Refresh DIRTY_LOG_MIGRATION bit.  */
    memory_region_transaction_begin();
    memory_region_update_pending = true;
    memory_region_transaction_commit();
2037 2038 2039 2040 2041
}

void memory_global_dirty_log_stop(void)
{
    global_dirty_log = false;
2042 2043 2044 2045 2046 2047

    /* Refresh DIRTY_LOG_MIGRATION bit.  */
    memory_region_transaction_begin();
    memory_region_update_pending = true;
    memory_region_transaction_commit();

2048
    MEMORY_LISTENER_CALL_GLOBAL(log_global_stop, Reverse);
2049 2050 2051 2052 2053
}

static void listener_add_address_space(MemoryListener *listener,
                                       AddressSpace *as)
{
2054
    FlatView *view;
2055 2056
    FlatRange *fr;

2057
    if (listener->address_space_filter
2058
        && listener->address_space_filter != as) {
2059 2060 2061
        return;
    }

2062 2063 2064
    if (listener->begin) {
        listener->begin(listener);
    }
2065
    if (global_dirty_log) {
2066 2067 2068
        if (listener->log_global_start) {
            listener->log_global_start(listener);
        }
2069
    }
2070

2071
    view = address_space_get_flatview(as);
2072
    FOR_EACH_FLAT_RANGE(fr, view) {
2073 2074
        MemoryRegionSection section = {
            .mr = fr->mr,
2075
            .address_space = as,
2076
            .offset_within_region = fr->offset_in_region,
2077
            .size = fr->addr.size,
2078
            .offset_within_address_space = int128_get64(fr->addr.start),
2079
            .readonly = fr->readonly,
2080
        };
2081 2082 2083
        if (fr->dirty_log_mask && listener->log_start) {
            listener->log_start(listener, &section, 0, fr->dirty_log_mask);
        }
2084 2085 2086
        if (listener->region_add) {
            listener->region_add(listener, &section);
        }
2087
    }
2088 2089 2090
    if (listener->commit) {
        listener->commit(listener);
    }
2091
    flatview_unref(view);
2092 2093
}

2094
void memory_listener_register(MemoryListener *listener, AddressSpace *filter)
2095
{
2096
    MemoryListener *other = NULL;
2097
    AddressSpace *as;
2098

2099
    listener->address_space_filter = filter;
2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111
    if (QTAILQ_EMPTY(&memory_listeners)
        || listener->priority >= QTAILQ_LAST(&memory_listeners,
                                             memory_listeners)->priority) {
        QTAILQ_INSERT_TAIL(&memory_listeners, listener, link);
    } else {
        QTAILQ_FOREACH(other, &memory_listeners, link) {
            if (listener->priority < other->priority) {
                break;
            }
        }
        QTAILQ_INSERT_BEFORE(other, listener, link);
    }
2112 2113 2114 2115

    QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) {
        listener_add_address_space(listener, as);
    }
2116 2117 2118 2119
}

void memory_listener_unregister(MemoryListener *listener)
{
2120
    QTAILQ_REMOVE(&memory_listeners, listener, link);
2121
}
2122

2123
void address_space_init(AddressSpace *as, MemoryRegion *root, const char *name)
A
Avi Kivity 已提交
2124
{
2125
    memory_region_ref(root);
2126
    memory_region_transaction_begin();
2127
    as->ref_count = 1;
2128
    as->root = root;
2129
    as->malloced = false;
2130 2131
    as->current_map = g_new(FlatView, 1);
    flatview_init(as->current_map);
2132 2133
    as->ioeventfd_nb = 0;
    as->ioeventfds = NULL;
2134
    QTAILQ_INSERT_TAIL(&address_spaces, as, address_spaces_link);
2135
    as->name = g_strdup(name ? name : "anonymous");
A
Avi Kivity 已提交
2136
    address_space_init_dispatch(as);
2137 2138
    memory_region_update_pending |= root->enabled;
    memory_region_transaction_commit();
A
Avi Kivity 已提交
2139
}
A
Avi Kivity 已提交
2140

2141
static void do_address_space_destroy(AddressSpace *as)
A
Avi Kivity 已提交
2142
{
2143
    MemoryListener *listener;
2144
    bool do_free = as->malloced;
2145

A
Avi Kivity 已提交
2146
    address_space_destroy_dispatch(as);
2147 2148 2149 2150 2151

    QTAILQ_FOREACH(listener, &memory_listeners, link) {
        assert(listener->address_space_filter != as);
    }

2152
    flatview_unref(as->current_map);
2153
    g_free(as->name);
2154
    g_free(as->ioeventfds);
2155
    memory_region_unref(as->root);
2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175
    if (do_free) {
        g_free(as);
    }
}

AddressSpace *address_space_init_shareable(MemoryRegion *root, const char *name)
{
    AddressSpace *as;

    QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) {
        if (root == as->root && as->malloced) {
            as->ref_count++;
            return as;
        }
    }

    as = g_malloc0(sizeof *as);
    address_space_init(as, root, name);
    as->malloced = true;
    return as;
A
Avi Kivity 已提交
2176 2177
}

2178 2179
void address_space_destroy(AddressSpace *as)
{
2180 2181
    MemoryRegion *root = as->root;

2182 2183 2184 2185
    as->ref_count--;
    if (as->ref_count) {
        return;
    }
2186 2187 2188 2189 2190
    /* Flush out anything from MemoryListeners listening in on this */
    memory_region_transaction_begin();
    as->root = NULL;
    memory_region_transaction_commit();
    QTAILQ_REMOVE(&address_spaces, as, address_spaces_link);
2191
    address_space_unregister(as);
2192 2193 2194 2195 2196

    /* At this point, as->dispatch and as->current_map are dummy
     * entries that the guest should never use.  Wait for the old
     * values to expire before freeing the data.
     */
2197
    as->root = root;
2198 2199 2200
    call_rcu(as, do_address_space_destroy, rcu);
}

B
Blue Swirl 已提交
2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211
typedef struct MemoryRegionList MemoryRegionList;

struct MemoryRegionList {
    const MemoryRegion *mr;
    QTAILQ_ENTRY(MemoryRegionList) queue;
};

typedef QTAILQ_HEAD(queue, MemoryRegionList) MemoryRegionListHead;

static void mtree_print_mr(fprintf_function mon_printf, void *f,
                           const MemoryRegion *mr, unsigned int level,
A
Avi Kivity 已提交
2212
                           hwaddr base,
2213
                           MemoryRegionListHead *alias_print_queue)
B
Blue Swirl 已提交
2214
{
2215 2216
    MemoryRegionList *new_ml, *ml, *next_ml;
    MemoryRegionListHead submr_print_queue;
B
Blue Swirl 已提交
2217 2218 2219
    const MemoryRegion *submr;
    unsigned int i;

2220
    if (!mr) {
B
Blue Swirl 已提交
2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232
        return;
    }

    for (i = 0; i < level; i++) {
        mon_printf(f, "  ");
    }

    if (mr->alias) {
        MemoryRegionList *ml;
        bool found = false;

        /* check if the alias is already in the queue */
2233
        QTAILQ_FOREACH(ml, alias_print_queue, queue) {
P
Paolo Bonzini 已提交
2234
            if (ml->mr == mr->alias) {
B
Blue Swirl 已提交
2235 2236 2237 2238 2239 2240 2241
                found = true;
            }
        }

        if (!found) {
            ml = g_new(MemoryRegionList, 1);
            ml->mr = mr->alias;
2242
            QTAILQ_INSERT_TAIL(alias_print_queue, ml, queue);
B
Blue Swirl 已提交
2243
        }
2244 2245
        mon_printf(f, TARGET_FMT_plx "-" TARGET_FMT_plx
                   " (prio %d, %c%c): alias %s @%s " TARGET_FMT_plx
2246
                   "-" TARGET_FMT_plx "%s\n",
B
Blue Swirl 已提交
2247
                   base + mr->addr,
2248
                   base + mr->addr
2249 2250 2251
                   + (int128_nz(mr->size) ?
                      (hwaddr)int128_get64(int128_sub(mr->size,
                                                      int128_one())) : 0),
J
Jan Kiszka 已提交
2252
                   mr->priority,
2253 2254 2255
                   mr->romd_mode ? 'R' : '-',
                   !mr->readonly && !(mr->rom_device && mr->romd_mode) ? 'W'
                                                                       : '-',
2256 2257
                   memory_region_name(mr),
                   memory_region_name(mr->alias),
B
Blue Swirl 已提交
2258
                   mr->alias_offset,
2259
                   mr->alias_offset
2260 2261
                   + (int128_nz(mr->size) ?
                      (hwaddr)int128_get64(int128_sub(mr->size,
2262 2263
                                                      int128_one())) : 0),
                   mr->enabled ? "" : " [disabled]");
B
Blue Swirl 已提交
2264
    } else {
2265
        mon_printf(f,
2266
                   TARGET_FMT_plx "-" TARGET_FMT_plx " (prio %d, %c%c): %s%s\n",
B
Blue Swirl 已提交
2267
                   base + mr->addr,
2268
                   base + mr->addr
2269 2270 2271
                   + (int128_nz(mr->size) ?
                      (hwaddr)int128_get64(int128_sub(mr->size,
                                                      int128_one())) : 0),
J
Jan Kiszka 已提交
2272
                   mr->priority,
2273 2274 2275
                   mr->romd_mode ? 'R' : '-',
                   !mr->readonly && !(mr->rom_device && mr->romd_mode) ? 'W'
                                                                       : '-',
2276 2277
                   memory_region_name(mr),
                   mr->enabled ? "" : " [disabled]");
B
Blue Swirl 已提交
2278
    }
2279 2280 2281

    QTAILQ_INIT(&submr_print_queue);

B
Blue Swirl 已提交
2282
    QTAILQ_FOREACH(submr, &mr->subregions, subregions_link) {
2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303
        new_ml = g_new(MemoryRegionList, 1);
        new_ml->mr = submr;
        QTAILQ_FOREACH(ml, &submr_print_queue, queue) {
            if (new_ml->mr->addr < ml->mr->addr ||
                (new_ml->mr->addr == ml->mr->addr &&
                 new_ml->mr->priority > ml->mr->priority)) {
                QTAILQ_INSERT_BEFORE(ml, new_ml, queue);
                new_ml = NULL;
                break;
            }
        }
        if (new_ml) {
            QTAILQ_INSERT_TAIL(&submr_print_queue, new_ml, queue);
        }
    }

    QTAILQ_FOREACH(ml, &submr_print_queue, queue) {
        mtree_print_mr(mon_printf, f, ml->mr, level + 1, base + mr->addr,
                       alias_print_queue);
    }

A
Avi Kivity 已提交
2304
    QTAILQ_FOREACH_SAFE(ml, &submr_print_queue, queue, next_ml) {
2305
        g_free(ml);
B
Blue Swirl 已提交
2306 2307 2308 2309 2310 2311 2312
    }
}

void mtree_info(fprintf_function mon_printf, void *f)
{
    MemoryRegionListHead ml_head;
    MemoryRegionList *ml, *ml2;
2313
    AddressSpace *as;
B
Blue Swirl 已提交
2314 2315 2316

    QTAILQ_INIT(&ml_head);

2317
    QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) {
G
Gerd Hoffmann 已提交
2318 2319 2320
        mon_printf(f, "address-space: %s\n", as->name);
        mtree_print_mr(mon_printf, f, as->root, 1, 0, &ml_head);
        mon_printf(f, "\n");
2321 2322
    }

B
Blue Swirl 已提交
2323 2324
    /* print aliased regions */
    QTAILQ_FOREACH(ml, &ml_head, queue) {
G
Gerd Hoffmann 已提交
2325 2326 2327
        mon_printf(f, "memory-region: %s\n", memory_region_name(ml->mr));
        mtree_print_mr(mon_printf, f, ml->mr, 1, 0, &ml_head);
        mon_printf(f, "\n");
B
Blue Swirl 已提交
2328 2329 2330
    }

    QTAILQ_FOREACH_SAFE(ml, &ml_head, queue, ml2) {
A
Avi Kivity 已提交
2331
        g_free(ml);
B
Blue Swirl 已提交
2332 2333
    }
}
P
Peter Crosthwaite 已提交
2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348

static const TypeInfo memory_region_info = {
    .parent             = TYPE_OBJECT,
    .name               = TYPE_MEMORY_REGION,
    .instance_size      = sizeof(MemoryRegion),
    .instance_init      = memory_region_initfn,
    .instance_finalize  = memory_region_finalize,
};

static void memory_register_types(void)
{
    type_register_static(&memory_region_info);
}

type_init(memory_register_types)