parCardTableModRefBS.cpp 22.0 KB
Newer Older
D
duke 已提交
1
/*
2
 * Copyright (c) 2007, 2011 Oracle and/or its affiliates. All rights reserved.
D
duke 已提交
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
19 20 21
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
D
duke 已提交
22 23 24
 *
 */

25 26 27 28 29 30 31
#include "precompiled.hpp"
#include "memory/allocation.inline.hpp"
#include "memory/cardTableModRefBS.hpp"
#include "memory/cardTableRS.hpp"
#include "memory/sharedHeap.hpp"
#include "memory/space.inline.hpp"
#include "memory/universe.hpp"
32
#include "oops/oop.inline.hpp"
33 34 35
#include "runtime/java.hpp"
#include "runtime/mutexLocker.hpp"
#include "runtime/virtualspace.hpp"
D
duke 已提交
36

37
void CardTableModRefBS::non_clean_card_iterate_parallel_work(Space* sp, MemRegion mr,
38 39
                                                             OopsInGenClosure* cl,
                                                             CardTableRS* ct,
40 41 42 43 44 45 46 47 48 49 50 51
                                                             int n_threads) {
  assert(n_threads > 0, "Error: expected n_threads > 0");
  assert((n_threads == 1 && ParallelGCThreads == 0) ||
         n_threads <= (int)ParallelGCThreads,
         "# worker threads != # requested!");
  // Make sure the LNC array is valid for the space.
  jbyte**   lowest_non_clean;
  uintptr_t lowest_non_clean_base_chunk_index;
  size_t    lowest_non_clean_chunk_size;
  get_LNC_array_for_space(sp, lowest_non_clean,
                          lowest_non_clean_base_chunk_index,
                          lowest_non_clean_chunk_size);
D
duke 已提交
52

53
  int n_strides = n_threads * ParGCStridesPerThread;
54 55 56
  SequentialSubTasksDone* pst = sp->par_seq_tasks();
  pst->set_n_threads(n_threads);
  pst->set_n_tasks(n_strides);
D
duke 已提交
57

58 59
  int stride = 0;
  while (!pst->is_task_claimed(/* reference */ stride)) {
60
    process_stride(sp, mr, stride, n_strides, cl, ct,
61 62 63 64 65 66 67 68 69 70 71 72 73
                   lowest_non_clean,
                   lowest_non_clean_base_chunk_index,
                   lowest_non_clean_chunk_size);
  }
  if (pst->all_tasks_completed()) {
    // Clear lowest_non_clean array for next time.
    intptr_t first_chunk_index = addr_to_chunk_index(mr.start());
    uintptr_t last_chunk_index  = addr_to_chunk_index(mr.last());
    for (uintptr_t ch = first_chunk_index; ch <= last_chunk_index; ch++) {
      intptr_t ind = ch - lowest_non_clean_base_chunk_index;
      assert(0 <= ind && ind < (intptr_t)lowest_non_clean_chunk_size,
             "Bounds error");
      lowest_non_clean[ind] = NULL;
D
duke 已提交
74 75 76 77 78 79 80 81 82
    }
  }
}

void
CardTableModRefBS::
process_stride(Space* sp,
               MemRegion used,
               jint stride, int n_strides,
83 84
               OopsInGenClosure* cl,
               CardTableRS* ct,
D
duke 已提交
85 86 87
               jbyte** lowest_non_clean,
               uintptr_t lowest_non_clean_base_chunk_index,
               size_t    lowest_non_clean_chunk_size) {
88 89
  // We go from higher to lower addresses here; it wouldn't help that much
  // because of the strided parallelism pattern used here.
D
duke 已提交
90 91 92 93 94 95 96 97 98 99 100 101

  // Find the first card address of the first chunk in the stride that is
  // at least "bottom" of the used region.
  jbyte*    start_card  = byte_for(used.start());
  jbyte*    end_card    = byte_after(used.last());
  uintptr_t start_chunk = addr_to_chunk_index(used.start());
  uintptr_t start_chunk_stride_num = start_chunk % n_strides;
  jbyte* chunk_card_start;

  if ((uintptr_t)stride >= start_chunk_stride_num) {
    chunk_card_start = (jbyte*)(start_card +
                                (stride - start_chunk_stride_num) *
102
                                ParGCCardsPerStrideChunk);
D
duke 已提交
103 104 105 106
  } else {
    // Go ahead to the next chunk group boundary, then to the requested stride.
    chunk_card_start = (jbyte*)(start_card +
                                (n_strides - start_chunk_stride_num + stride) *
107
                                ParGCCardsPerStrideChunk);
D
duke 已提交
108 109 110
  }

  while (chunk_card_start < end_card) {
111 112 113 114 115 116 117 118
    // Even though we go from lower to higher addresses below, the
    // strided parallelism can interleave the actual processing of the
    // dirty pages in various ways. For a specific chunk within this
    // stride, we take care to avoid double scanning or missing a card
    // by suitably initializing the "min_done" field in process_chunk_boundaries()
    // below, together with the dirty region extension accomplished in
    // DirtyCardToOopClosure::do_MemRegion().
    jbyte*    chunk_card_end = chunk_card_start + ParGCCardsPerStrideChunk;
D
duke 已提交
119 120 121 122 123 124 125
    // Invariant: chunk_mr should be fully contained within the "used" region.
    MemRegion chunk_mr       = MemRegion(addr_for(chunk_card_start),
                                         chunk_card_end >= end_card ?
                                           used.end() : addr_for(chunk_card_end));
    assert(chunk_mr.word_size() > 0, "[chunk_card_start > used_end)");
    assert(used.contains(chunk_mr), "chunk_mr should be subset of used");

126 127 128 129 130
    DirtyCardToOopClosure* dcto_cl = sp->new_dcto_cl(cl, precision(),
                                                     cl->gen_boundary());
    ClearNoncleanCardWrapper clear_cl(dcto_cl, ct);


D
duke 已提交
131 132 133 134 135 136 137 138 139
    // Process the chunk.
    process_chunk_boundaries(sp,
                             dcto_cl,
                             chunk_mr,
                             used,
                             lowest_non_clean,
                             lowest_non_clean_base_chunk_index,
                             lowest_non_clean_chunk_size);

140 141 142 143 144
    // We want the LNC array updates above in process_chunk_boundaries
    // to be visible before any of the card table value changes as a
    // result of the dirty card iteration below.
    OrderAccess::storestore();

145
    // We do not call the non_clean_card_iterate_serial() version because
146 147 148
    // we want to clear the cards: clear_cl here does the work of finding
    // contiguous dirty ranges of cards to process and clear.
    clear_cl.do_MemRegion(chunk_mr);
D
duke 已提交
149 150

    // Find the next chunk of the stride.
151
    chunk_card_start += ParGCCardsPerStrideChunk * n_strides;
D
duke 已提交
152 153 154
  }
}

155 156 157 158 159 160 161 162 163

// If you want a talkative process_chunk_boundaries,
// then #define NOISY(x) x
#ifdef NOISY
#error "Encountered a global preprocessor flag, NOISY, which might clash with local definition to follow"
#else
#define NOISY(x)
#endif

D
duke 已提交
164 165 166 167 168 169 170 171 172 173
void
CardTableModRefBS::
process_chunk_boundaries(Space* sp,
                         DirtyCardToOopClosure* dcto_cl,
                         MemRegion chunk_mr,
                         MemRegion used,
                         jbyte** lowest_non_clean,
                         uintptr_t lowest_non_clean_base_chunk_index,
                         size_t    lowest_non_clean_chunk_size)
{
174 175 176 177 178 179 180 181 182 183 184 185
  // We must worry about non-array objects that cross chunk boundaries,
  // because such objects are both precisely and imprecisely marked:
  // .. if the head of such an object is dirty, the entire object
  //    needs to be scanned, under the interpretation that this
  //    was an imprecise mark
  // .. if the head of such an object is not dirty, we can assume
  //    precise marking and it's efficient to scan just the dirty
  //    cards.
  // In either case, each scanned reference must be scanned precisely
  // once so as to avoid cloning of a young referent. For efficiency,
  // our closures depend on this property and do not protect against
  // double scans.
D
duke 已提交
186 187 188 189

  uintptr_t cur_chunk_index = addr_to_chunk_index(chunk_mr.start());
  cur_chunk_index           = cur_chunk_index - lowest_non_clean_base_chunk_index;

190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266
  NOISY(tty->print_cr("===========================================================================");)
  NOISY(tty->print_cr(" process_chunk_boundary: Called with [" PTR_FORMAT "," PTR_FORMAT ")",
                      chunk_mr.start(), chunk_mr.end());)

  // First, set "our" lowest_non_clean entry, which would be
  // used by the thread scanning an adjoining left chunk with
  // a non-array object straddling the mutual boundary.
  // Find the object that spans our boundary, if one exists.
  // first_block is the block possibly straddling our left boundary.
  HeapWord* first_block = sp->block_start(chunk_mr.start());
  assert((chunk_mr.start() != used.start()) || (first_block == chunk_mr.start()),
         "First chunk should always have a co-initial block");
  // Does the block straddle the chunk's left boundary, and is it
  // a non-array object?
  if (first_block < chunk_mr.start()        // first block straddles left bdry
      && sp->block_is_obj(first_block)      // first block is an object
      && !(oop(first_block)->is_objArray()  // first block is not an array (arrays are precisely dirtied)
           || oop(first_block)->is_typeArray())) {
    // Find our least non-clean card, so that a left neighbour
    // does not scan an object straddling the mutual boundary
    // too far to the right, and attempt to scan a portion of
    // that object twice.
    jbyte* first_dirty_card = NULL;
    jbyte* last_card_of_first_obj =
        byte_for(first_block + sp->block_size(first_block) - 1);
    jbyte* first_card_of_cur_chunk = byte_for(chunk_mr.start());
    jbyte* last_card_of_cur_chunk = byte_for(chunk_mr.last());
    jbyte* last_card_to_check =
      (jbyte*) MIN2((intptr_t) last_card_of_cur_chunk,
                    (intptr_t) last_card_of_first_obj);
    // Note that this does not need to go beyond our last card
    // if our first object completely straddles this chunk.
    for (jbyte* cur = first_card_of_cur_chunk;
         cur <= last_card_to_check; cur++) {
      jbyte val = *cur;
      if (card_will_be_scanned(val)) {
        first_dirty_card = cur; break;
      } else {
        assert(!card_may_have_been_dirty(val), "Error");
      }
    }
    if (first_dirty_card != NULL) {
      NOISY(tty->print_cr(" LNC: Found a dirty card at " PTR_FORMAT " in current chunk",
                    first_dirty_card);)
      assert(0 <= cur_chunk_index && cur_chunk_index < lowest_non_clean_chunk_size,
             "Bounds error.");
      assert(lowest_non_clean[cur_chunk_index] == NULL,
             "Write exactly once : value should be stable hereafter for this round");
      lowest_non_clean[cur_chunk_index] = first_dirty_card;
    } NOISY(else {
      tty->print_cr(" LNC: Found no dirty card in current chunk; leaving LNC entry NULL");
      // In the future, we could have this thread look for a non-NULL value to copy from its
      // right neighbour (up to the end of the first object).
      if (last_card_of_cur_chunk < last_card_of_first_obj) {
        tty->print_cr(" LNC: BEWARE!!! first obj straddles past right end of chunk:\n"
                      "   might be efficient to get value from right neighbour?");
      }
    })
  } else {
    // In this case we can help our neighbour by just asking them
    // to stop at our first card (even though it may not be dirty).
    NOISY(tty->print_cr(" LNC: first block is not a non-array object; setting LNC to first card of current chunk");)
    assert(lowest_non_clean[cur_chunk_index] == NULL, "Write once : value should be stable hereafter");
    jbyte* first_card_of_cur_chunk = byte_for(chunk_mr.start());
    lowest_non_clean[cur_chunk_index] = first_card_of_cur_chunk;
  }
  NOISY(tty->print_cr(" process_chunk_boundary: lowest_non_clean[" INTPTR_FORMAT "] = " PTR_FORMAT
                "   which corresponds to the heap address " PTR_FORMAT,
                cur_chunk_index, lowest_non_clean[cur_chunk_index],
                (lowest_non_clean[cur_chunk_index] != NULL)
                ? addr_for(lowest_non_clean[cur_chunk_index])
                : NULL);)
  NOISY(tty->print_cr("---------------------------------------------------------------------------");)

  // Next, set our own max_to_do, which will strictly/exclusively bound
  // the highest address that we will scan past the right end of our chunk.
  HeapWord* max_to_do = NULL;
D
duke 已提交
267
  if (chunk_mr.end() < used.end()) {
268 269 270 271 272
    // This is not the last chunk in the used region.
    // What is our last block? We check the first block of
    // the next (right) chunk rather than strictly check our last block
    // because it's potentially more efficient to do so.
    HeapWord* const last_block = sp->block_start(chunk_mr.end());
D
duke 已提交
273
    assert(last_block <= chunk_mr.end(), "In case this property changes.");
274 275 276 277
    if ((last_block == chunk_mr.end())     // our last block does not straddle boundary
        || !sp->block_is_obj(last_block)   // last_block isn't an object
        || oop(last_block)->is_objArray()  // last_block is an array (precisely marked)
        || oop(last_block)->is_typeArray()) {
D
duke 已提交
278
      max_to_do = chunk_mr.end();
279 280
      NOISY(tty->print_cr(" process_chunk_boundary: Last block on this card is not a non-array object;\n"
                         "   max_to_do left at " PTR_FORMAT, max_to_do);)
D
duke 已提交
281
    } else {
282 283
      assert(last_block < chunk_mr.end(), "Tautology");
      // It is a non-array object that straddles the right boundary of this chunk.
D
duke 已提交
284 285 286
      // last_obj_card is the card corresponding to the start of the last object
      // in the chunk.  Note that the last object may not start in
      // the chunk.
287 288 289 290 291
      jbyte* const last_obj_card = byte_for(last_block);
      const jbyte val = *last_obj_card;
      if (!card_will_be_scanned(val)) {
        assert(!card_may_have_been_dirty(val), "Error");
        // The card containing the head is not dirty.  Any marks on
D
duke 已提交
292
        // subsequent cards still in this chunk must have been made
293
        // precisely; we can cap processing at the end of our chunk.
D
duke 已提交
294
        max_to_do = chunk_mr.end();
295 296 297
        NOISY(tty->print_cr(" process_chunk_boundary: Head of last object on this card is not dirty;\n"
                            "   max_to_do left at " PTR_FORMAT,
                            max_to_do);)
D
duke 已提交
298 299 300 301 302
      } else {
        // The last object must be considered dirty, and extends onto the
        // following chunk.  Look for a dirty card in that chunk that will
        // bound our processing.
        jbyte* limit_card = NULL;
303 304
        const size_t last_block_size = sp->block_size(last_block);
        jbyte* const last_card_of_last_obj =
D
duke 已提交
305
          byte_for(last_block + last_block_size - 1);
306
        jbyte* const first_card_of_next_chunk = byte_for(chunk_mr.end());
D
duke 已提交
307
        // This search potentially goes a long distance looking
308 309 310 311 312
        // for the next card that will be scanned, terminating
        // at the end of the last_block, if no earlier dirty card
        // is found.
        assert(byte_for(chunk_mr.end()) - byte_for(chunk_mr.start()) == ParGCCardsPerStrideChunk,
               "last card of next chunk may be wrong");
D
duke 已提交
313
        for (jbyte* cur = first_card_of_next_chunk;
314 315 316 317 318
             cur <= last_card_of_last_obj; cur++) {
          const jbyte val = *cur;
          if (card_will_be_scanned(val)) {
            NOISY(tty->print_cr(" Found a non-clean card " PTR_FORMAT " with value 0x%x",
                                cur, (int)val);)
D
duke 已提交
319
            limit_card = cur; break;
320 321
          } else {
            assert(!card_may_have_been_dirty(val), "Error: card can't be skipped");
D
duke 已提交
322 323 324 325
          }
        }
        if (limit_card != NULL) {
          max_to_do = addr_for(limit_card);
326 327 328 329 330
          assert(limit_card != NULL && max_to_do != NULL, "Error");
          NOISY(tty->print_cr(" process_chunk_boundary: Found a dirty card at " PTR_FORMAT
                        "   max_to_do set at " PTR_FORMAT " which is before end of last block in chunk: "
                        PTR_FORMAT " + " PTR_FORMAT " = " PTR_FORMAT,
                        limit_card, max_to_do, last_block, last_block_size, (last_block+last_block_size));)
D
duke 已提交
331
        } else {
332 333 334 335 336 337
          // The following is a pessimistic value, because it's possible
          // that a dirty card on a subsequent chunk has been cleared by
          // the time we get to look at it; we'll correct for that further below,
          // using the LNC array which records the least non-clean card
          // before cards were cleared in a particular chunk.
          limit_card = last_card_of_last_obj;
D
duke 已提交
338
          max_to_do = last_block + last_block_size;
339 340 341 342 343
          assert(limit_card != NULL && max_to_do != NULL, "Error");
          NOISY(tty->print_cr(" process_chunk_boundary: Found no dirty card before end of last block in chunk\n"
                              "   Setting limit_card to " PTR_FORMAT
                              " and max_to_do " PTR_FORMAT " + " PTR_FORMAT " = " PTR_FORMAT,
                              limit_card, last_block, last_block_size, max_to_do);)
D
duke 已提交
344
        }
345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378
        assert(0 < cur_chunk_index+1 && cur_chunk_index+1 < lowest_non_clean_chunk_size,
               "Bounds error.");
        // It is possible that a dirty card for the last object may have been
        // cleared before we had a chance to examine it. In that case, the value
        // will have been logged in the LNC for that chunk.
        // We need to examine as many chunks to the right as this object
        // covers.
        const uintptr_t last_chunk_index_to_check = addr_to_chunk_index(last_block + last_block_size - 1)
                                                    - lowest_non_clean_base_chunk_index;
        DEBUG_ONLY(const uintptr_t last_chunk_index = addr_to_chunk_index(used.end())
                                                      - lowest_non_clean_base_chunk_index;)
        assert(last_chunk_index_to_check <= last_chunk_index,
               err_msg("Out of bounds: last_chunk_index_to_check " INTPTR_FORMAT
                       " exceeds last_chunk_index " INTPTR_FORMAT,
                       last_chunk_index_to_check, last_chunk_index));
        for (uintptr_t lnc_index = cur_chunk_index + 1;
             lnc_index <= last_chunk_index_to_check;
             lnc_index++) {
          jbyte* lnc_card = lowest_non_clean[lnc_index];
          if (lnc_card != NULL) {
            // we can stop at the first non-NULL entry we find
            if (lnc_card <= limit_card) {
              NOISY(tty->print_cr(" process_chunk_boundary: LNC card " PTR_FORMAT " is lower than limit_card " PTR_FORMAT,
                                  "   max_to_do will be lowered to " PTR_FORMAT " from " PTR_FORMAT,
                                  lnc_card, limit_card, addr_for(lnc_card), max_to_do);)
              limit_card = lnc_card;
              max_to_do = addr_for(limit_card);
              assert(limit_card != NULL && max_to_do != NULL, "Error");
            }
            // In any case, we break now
            break;
          }  // else continue to look for a non-NULL entry if any
        }
        assert(limit_card != NULL && max_to_do != NULL, "Error");
D
duke 已提交
379
      }
380
      assert(max_to_do != NULL, "OOPS 1 !");
D
duke 已提交
381
    }
382
    assert(max_to_do != NULL, "OOPS 2!");
D
duke 已提交
383 384
  } else {
    max_to_do = used.end();
385 386 387
    NOISY(tty->print_cr(" process_chunk_boundary: Last chunk of this space;\n"
                  "   max_to_do left at " PTR_FORMAT,
                  max_to_do);)
D
duke 已提交
388
  }
389
  assert(max_to_do != NULL, "OOPS 3!");
D
duke 已提交
390 391 392 393 394 395
  // Now we can set the closure we're using so it doesn't to beyond
  // max_to_do.
  dcto_cl->set_min_done(max_to_do);
#ifndef PRODUCT
  dcto_cl->set_last_bottom(max_to_do);
#endif
396
  NOISY(tty->print_cr("===========================================================================\n");)
D
duke 已提交
397 398
}

399 400
#undef NOISY

D
duke 已提交
401 402 403 404 405 406 407 408 409 410 411 412 413 414 415
void
CardTableModRefBS::
get_LNC_array_for_space(Space* sp,
                        jbyte**& lowest_non_clean,
                        uintptr_t& lowest_non_clean_base_chunk_index,
                        size_t& lowest_non_clean_chunk_size) {

  int       i        = find_covering_region_containing(sp->bottom());
  MemRegion covered  = _covered[i];
  size_t    n_chunks = chunks_to_cover(covered);

  // Only the first thread to obtain the lock will resize the
  // LNC array for the covered region.  Any later expansion can't affect
  // the used_at_save_marks region.
  // (I observed a bug in which the first thread to execute this would
416 417
  // resize, and then it would cause "expand_and_allocate" that would
  // increase the number of chunks in the covered region.  Then a second
D
duke 已提交
418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455
  // thread would come and execute this, see that the size didn't match,
  // and free and allocate again.  So the first thread would be using a
  // freed "_lowest_non_clean" array.)

  // Do a dirty read here. If we pass the conditional then take the rare
  // event lock and do the read again in case some other thread had already
  // succeeded and done the resize.
  int cur_collection = Universe::heap()->total_collections();
  if (_last_LNC_resizing_collection[i] != cur_collection) {
    MutexLocker x(ParGCRareEvent_lock);
    if (_last_LNC_resizing_collection[i] != cur_collection) {
      if (_lowest_non_clean[i] == NULL ||
          n_chunks != _lowest_non_clean_chunk_size[i]) {

        // Should we delete the old?
        if (_lowest_non_clean[i] != NULL) {
          assert(n_chunks != _lowest_non_clean_chunk_size[i],
                 "logical consequence");
          FREE_C_HEAP_ARRAY(CardPtr, _lowest_non_clean[i]);
          _lowest_non_clean[i] = NULL;
        }
        // Now allocate a new one if necessary.
        if (_lowest_non_clean[i] == NULL) {
          _lowest_non_clean[i]                  = NEW_C_HEAP_ARRAY(CardPtr, n_chunks);
          _lowest_non_clean_chunk_size[i]       = n_chunks;
          _lowest_non_clean_base_chunk_index[i] = addr_to_chunk_index(covered.start());
          for (int j = 0; j < (int)n_chunks; j++)
            _lowest_non_clean[i][j] = NULL;
        }
      }
      _last_LNC_resizing_collection[i] = cur_collection;
    }
  }
  // In any case, now do the initialization.
  lowest_non_clean                  = _lowest_non_clean[i];
  lowest_non_clean_base_chunk_index = _lowest_non_clean_base_chunk_index[i];
  lowest_non_clean_chunk_size       = _lowest_non_clean_chunk_size[i];
}