symbolTable.cpp 38.2 KB
Newer Older
D
duke 已提交
1
/*
2
 * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
D
duke 已提交
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
19 20 21
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
D
duke 已提交
22 23 24
 *
 */

25
#include "precompiled.hpp"
26
#include "classfile/altHashing.hpp"
27 28 29 30
#include "classfile/javaClasses.hpp"
#include "classfile/symbolTable.hpp"
#include "classfile/systemDictionary.hpp"
#include "gc_interface/collectedHeap.inline.hpp"
31
#include "memory/allocation.inline.hpp"
32 33 34 35 36 37
#include "memory/filemap.hpp"
#include "memory/gcLocker.inline.hpp"
#include "oops/oop.inline.hpp"
#include "oops/oop.inline2.hpp"
#include "runtime/mutexLocker.hpp"
#include "utilities/hashtable.inline.hpp"
P
pliden 已提交
38
#if INCLUDE_ALL_GCS
39
#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
P
pliden 已提交
40 41
#include "gc_implementation/g1/g1StringDedup.hpp"
#endif
D
duke 已提交
42

43 44
PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC

D
duke 已提交
45 46
// --------------------------------------------------------------------------

47 48 49
// the number of buckets a thread claims
const int ClaimChunkSize = 32;

D
duke 已提交
50
SymbolTable* SymbolTable::_the_table = NULL;
51 52
// Static arena for symbols that are not deallocated
Arena* SymbolTable::_arena = NULL;
53
bool SymbolTable::_needs_rehashing = false;
D
duke 已提交
54

55
Symbol* SymbolTable::allocate_symbol(const u1* name, int len, bool c_heap, TRAPS) {
56 57
  assert (len <= Symbol::max_length(), "should be checked by caller");

58
  Symbol* sym;
59

60 61 62 63
  if (DumpSharedSpaces) {
    // Allocate all symbols to CLD shared metaspace
    sym = new (len, ClassLoaderData::the_null_class_loader_data(), THREAD) Symbol(name, len, -1);
  } else if (c_heap) {
64 65
    // refcount starts as 1
    sym = new (len, THREAD) Symbol(name, len, 1);
66 67
    assert(sym != NULL, "new should call vm_exit_out_of_memory if C_HEAP is exhausted");
  } else {
68
    // Allocate to global arena
69 70
    sym = new (len, arena(), THREAD) Symbol(name, len, -1);
  }
71 72 73
  return sym;
}

74 75 76
void SymbolTable::initialize_symbols(int arena_alloc_size) {
  // Initialize the arena for global symbols, size passed in depends on CDS.
  if (arena_alloc_size == 0) {
77
    _arena = new (mtSymbol) Arena(mtSymbol);
78
  } else {
79
    _arena = new (mtSymbol) Arena(mtSymbol, arena_alloc_size);
80 81 82 83 84 85 86
  }
}

// Call function for all symbols in the symbol table.
void SymbolTable::symbols_do(SymbolClosure *cl) {
  const int n = the_table()->table_size();
  for (int i = 0; i < n; i++) {
Z
zgu 已提交
87
    for (HashtableEntry<Symbol*, mtSymbol>* p = the_table()->bucket(i);
88 89 90 91 92 93 94
         p != NULL;
         p = p->next()) {
      cl->do_symbol(p->literal_addr());
    }
  }
}

95 96 97
int SymbolTable::_symbols_removed = 0;
int SymbolTable::_symbols_counted = 0;
volatile int SymbolTable::_parallel_claimed_idx = 0;
98

99
void SymbolTable::buckets_unlink(int start_idx, int end_idx, BucketUnlinkContext* context, size_t* memory_total) {
100
  for (int i = start_idx; i < end_idx; ++i) {
Z
zgu 已提交
101 102
    HashtableEntry<Symbol*, mtSymbol>** p = the_table()->bucket_addr(i);
    HashtableEntry<Symbol*, mtSymbol>* entry = the_table()->bucket(i);
103 104 105 106 107 108
    while (entry != NULL) {
      // Shared entries are normally at the end of the bucket and if we run into
      // a shared entry, then there is nothing more to remove. However, if we
      // have rehashed the table, then the shared entries are no longer at the
      // end of the bucket.
      if (entry->is_shared() && !use_alternate_hashcode()) {
109 110 111
        break;
      }
      Symbol* s = entry->literal();
112
      (*memory_total) += s->size();
113
      context->_num_processed++;
114 115 116
      assert(s != NULL, "just checking");
      // If reference count is zero, remove.
      if (s->refcount() == 0) {
117
        assert(!entry->is_shared(), "shared entries should be kept live");
118 119
        delete s;
        *p = entry->next();
120
        context->free_entry(entry);
121 122 123
      } else {
        p = entry->next_addr();
      }
124
      // get next entry
Z
zgu 已提交
125
      entry = (HashtableEntry<Symbol*, mtSymbol>*)HashtableEntry<Symbol*, mtSymbol>::make_ptr(*p);
126 127
    }
  }
128 129 130 131 132 133
}

// Remove unreferenced symbols from the symbol table
// This is done late during GC.
void SymbolTable::unlink(int* processed, int* removed) {
  size_t memory_total = 0;
134 135 136 137 138 139 140 141
  BucketUnlinkContext context;
  buckets_unlink(0, the_table()->table_size(), &context, &memory_total);
  _the_table->bulk_free_entries(&context);
  *processed = context._num_processed;
  *removed = context._num_removed;

  _symbols_removed = context._num_removed;
  _symbols_counted = context._num_processed;
142 143 144 145 146 147 148 149 150 151 152 153 154
  // Exclude printing for normal PrintGCDetails because people parse
  // this output.
  if (PrintGCDetails && Verbose && WizardMode) {
    gclog_or_tty->print(" [Symbols=%d size=" SIZE_FORMAT "K] ", *processed,
                        (memory_total*HeapWordSize)/1024);
  }
}

void SymbolTable::possibly_parallel_unlink(int* processed, int* removed) {
  const int limit = the_table()->table_size();

  size_t memory_total = 0;

155
  BucketUnlinkContext context;
156 157 158 159 160 161 162 163 164
  for (;;) {
    // Grab next set of buckets to scan
    int start_idx = Atomic::add(ClaimChunkSize, &_parallel_claimed_idx) - ClaimChunkSize;
    if (start_idx >= limit) {
      // End of table
      break;
    }

    int end_idx = MIN2(limit, start_idx + ClaimChunkSize);
165
    buckets_unlink(start_idx, end_idx, &context, &memory_total);
166
  }
167 168 169 170 171 172 173

  _the_table->bulk_free_entries(&context);
  *processed = context._num_processed;
  *removed = context._num_removed;

  Atomic::add(context._num_processed, &_symbols_counted);
  Atomic::add(context._num_removed, &_symbols_removed);
174 175 176
  // Exclude printing for normal PrintGCDetails because people parse
  // this output.
  if (PrintGCDetails && Verbose && WizardMode) {
177
    gclog_or_tty->print(" [Symbols: scanned=%d removed=%d size=" SIZE_FORMAT "K] ", *processed, *removed,
178 179 180 181
                        (memory_total*HeapWordSize)/1024);
  }
}

182 183 184 185
// Create a new table and using alternate hash code, populate the new table
// with the existing strings.   Set flag to use the alternate hash code afterwards.
void SymbolTable::rehash_table() {
  assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
186 187
  // This should never happen with -Xshare:dump but it might in testing mode.
  if (DumpSharedSpaces) return;
188 189 190 191 192 193 194 195 196 197 198 199
  // Create a new symbol table
  SymbolTable* new_table = new SymbolTable();

  the_table()->move_to(new_table);

  // Delete the table and buckets (entries are reused in new table).
  delete _the_table;
  // Don't check if we need rehashing until the table gets unbalanced again.
  // Then rehash with a new global seed.
  _needs_rehashing = false;
  _the_table = new_table;
}
200

D
duke 已提交
201 202
// Lookup a symbol in a bucket.

203
Symbol* SymbolTable::lookup(int index, const char* name,
D
duke 已提交
204
                              int len, unsigned int hash) {
205
  int count = 0;
Z
zgu 已提交
206
  for (HashtableEntry<Symbol*, mtSymbol>* e = bucket(index); e != NULL; e = e->next()) {
207
    count++;  // count all entries in this bucket, not just ones with same hash
D
duke 已提交
208
    if (e->hash() == hash) {
209
      Symbol* sym = e->literal();
D
duke 已提交
210
      if (sym->equals(name, len)) {
211 212
        // something is referencing this symbol now.
        sym->increment_refcount();
D
duke 已提交
213 214 215 216
        return sym;
      }
    }
  }
217
  // If the bucket size is too deep check if this hash code is insufficient.
218
  if (count >= rehash_count && !needs_rehashing()) {
219 220
    _needs_rehashing = check_rehash_table(count);
  }
D
duke 已提交
221 222 223
  return NULL;
}

224 225
// Pick hashing algorithm.
unsigned int SymbolTable::hash_symbol(const char* s, int len) {
226 227
  return use_alternate_hashcode() ?
           AltHashing::murmur3_32(seed(), (const jbyte*)s, len) :
228
           java_lang_String::hash_code(s, len);
229 230
}

D
duke 已提交
231 232 233 234 235 236 237 238

// We take care not to be blocking while holding the
// SymbolTable_lock. Otherwise, the system might deadlock, since the
// symboltable is used during compilation (VM_thread) The lock free
// synchronization is simplified by the fact that we do not delete
// entries in the symbol table during normal execution (only during
// safepoints).

239
Symbol* SymbolTable::lookup(const char* name, int len, TRAPS) {
D
duke 已提交
240 241 242
  unsigned int hashValue = hash_symbol(name, len);
  int index = the_table()->hash_to_index(hashValue);

243
  Symbol* s = the_table()->lookup(index, name, len, hashValue);
D
duke 已提交
244 245 246 247

  // Found
  if (s != NULL) return s;

248 249 250
  // Grab SymbolTable_lock first.
  MutexLocker ml(SymbolTable_lock, THREAD);

D
duke 已提交
251
  // Otherwise, add to symbol to table
252
  return the_table()->basic_add(index, (u1*)name, len, hashValue, true, CHECK_NULL);
D
duke 已提交
253 254
}

255
Symbol* SymbolTable::lookup(const Symbol* sym, int begin, int end, TRAPS) {
D
duke 已提交
256 257 258 259 260 261 262 263 264 265 266
  char* buffer;
  int index, len;
  unsigned int hashValue;
  char* name;
  {
    debug_only(No_Safepoint_Verifier nsv;)

    name = (char*)sym->base() + begin;
    len = end - begin;
    hashValue = hash_symbol(name, len);
    index = the_table()->hash_to_index(hashValue);
267
    Symbol* s = the_table()->lookup(index, name, len, hashValue);
D
duke 已提交
268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287

    // Found
    if (s != NULL) return s;
  }

  // Otherwise, add to symbol to table. Copy to a C string first.
  char stack_buf[128];
  ResourceMark rm(THREAD);
  if (len <= 128) {
    buffer = stack_buf;
  } else {
    buffer = NEW_RESOURCE_ARRAY_IN_THREAD(THREAD, char, len);
  }
  for (int i=0; i<len; i++) {
    buffer[i] = name[i];
  }
  // Make sure there is no safepoint in the code above since name can't move.
  // We can't include the code in No_Safepoint_Verifier because of the
  // ResourceMark.

288 289 290
  // Grab SymbolTable_lock first.
  MutexLocker ml(SymbolTable_lock, THREAD);

291
  return the_table()->basic_add(index, (u1*)buffer, len, hashValue, true, CHECK_NULL);
D
duke 已提交
292 293
}

294
Symbol* SymbolTable::lookup_only(const char* name, int len,
D
duke 已提交
295 296 297 298
                                   unsigned int& hash) {
  hash = hash_symbol(name, len);
  int index = the_table()->hash_to_index(hash);

299 300
  Symbol* s = the_table()->lookup(index, name, len, hash);
  return s;
D
duke 已提交
301 302
}

303 304 305 306 307 308 309
// Look up the address of the literal in the SymbolTable for this Symbol*
// Do not create any new symbols
// Do not increment the reference count to keep this alive
Symbol** SymbolTable::lookup_symbol_addr(Symbol* sym){
  unsigned int hash = hash_symbol((char*)sym->bytes(), sym->utf8_length());
  int index = the_table()->hash_to_index(hash);

Z
zgu 已提交
310
  for (HashtableEntry<Symbol*, mtSymbol>* e = the_table()->bucket(index); e != NULL; e = e->next()) {
311 312 313 314 315 316 317 318 319 320
    if (e->hash() == hash) {
      Symbol* literal_sym = e->literal();
      if (sym == literal_sym) {
        return e->literal_addr();
      }
    }
  }
  return NULL;
}

321 322
// Suggestion: Push unicode-based lookup all the way into the hashing
// and probing logic, so there is no need for convert_to_utf8 until
323 324
// an actual new Symbol* is created.
Symbol* SymbolTable::lookup_unicode(const jchar* name, int utf16_length, TRAPS) {
325 326 327 328 329 330 331 332 333 334 335 336 337 338
  int utf8_length = UNICODE::utf8_length((jchar*) name, utf16_length);
  char stack_buf[128];
  if (utf8_length < (int) sizeof(stack_buf)) {
    char* chars = stack_buf;
    UNICODE::convert_to_utf8(name, utf16_length, chars);
    return lookup(chars, utf8_length, THREAD);
  } else {
    ResourceMark rm(THREAD);
    char* chars = NEW_RESOURCE_ARRAY(char, utf8_length + 1);;
    UNICODE::convert_to_utf8(name, utf16_length, chars);
    return lookup(chars, utf8_length, THREAD);
  }
}

339
Symbol* SymbolTable::lookup_only_unicode(const jchar* name, int utf16_length,
340 341 342 343 344 345 346 347 348 349 350 351 352 353 354
                                           unsigned int& hash) {
  int utf8_length = UNICODE::utf8_length((jchar*) name, utf16_length);
  char stack_buf[128];
  if (utf8_length < (int) sizeof(stack_buf)) {
    char* chars = stack_buf;
    UNICODE::convert_to_utf8(name, utf16_length, chars);
    return lookup_only(chars, utf8_length, hash);
  } else {
    ResourceMark rm;
    char* chars = NEW_RESOURCE_ARRAY(char, utf8_length + 1);;
    UNICODE::convert_to_utf8(name, utf16_length, chars);
    return lookup_only(chars, utf8_length, hash);
  }
}

355
void SymbolTable::add(ClassLoaderData* loader_data, constantPoolHandle cp,
356
                      int names_count,
D
duke 已提交
357 358
                      const char** names, int* lengths, int* cp_indices,
                      unsigned int* hashValues, TRAPS) {
359 360 361
  // Grab SymbolTable_lock first.
  MutexLocker ml(SymbolTable_lock, THREAD);

D
duke 已提交
362
  SymbolTable* table = the_table();
363
  bool added = table->basic_add(loader_data, cp, names_count, names, lengths,
D
duke 已提交
364 365 366 367 368
                                cp_indices, hashValues, CHECK);
  if (!added) {
    // do it the hard way
    for (int i=0; i<names_count; i++) {
      int index = table->hash_to_index(hashValues[i]);
369
      bool c_heap = !loader_data->is_the_null_class_loader_data();
370
      Symbol* sym = table->basic_add(index, (u1*)names[i], lengths[i], hashValues[i], c_heap, CHECK);
D
duke 已提交
371 372 373 374 375
      cp->symbol_at_put(cp_indices[i], sym);
    }
  }
}

376 377 378 379 380 381
Symbol* SymbolTable::new_permanent_symbol(const char* name, TRAPS) {
  unsigned int hash;
  Symbol* result = SymbolTable::lookup_only((char*)name, (int)strlen(name), hash);
  if (result != NULL) {
    return result;
  }
382 383 384
  // Grab SymbolTable_lock first.
  MutexLocker ml(SymbolTable_lock, THREAD);

385 386 387 388 389
  SymbolTable* table = the_table();
  int index = table->hash_to_index(hash);
  return table->basic_add(index, (u1*)name, (int)strlen(name), hash, false, THREAD);
}

390
Symbol* SymbolTable::basic_add(int index_arg, u1 *name, int len,
391
                               unsigned int hashValue_arg, bool c_heap, TRAPS) {
392
  assert(!Universe::heap()->is_in_reserved(name),
D
duke 已提交
393 394
         "proposed name of symbol must be stable");

395 396 397 398 399 400 401 402
  // Don't allow symbols to be created which cannot fit in a Symbol*.
  if (len > Symbol::max_length()) {
    THROW_MSG_0(vmSymbols::java_lang_InternalError(),
                "name is too long to represent");
  }

  // Cannot hit a safepoint in this function because the "this" pointer can move.
  No_Safepoint_Verifier nsv;
D
duke 已提交
403

404
  // Check if the symbol table has been rehashed, if so, need to recalculate
405 406 407 408 409 410 411 412 413 414
  // the hash value and index.
  unsigned int hashValue;
  int index;
  if (use_alternate_hashcode()) {
    hashValue = hash_symbol((const char*)name, len);
    index = hash_to_index(hashValue);
  } else {
    hashValue = hashValue_arg;
    index = index_arg;
  }
415

D
duke 已提交
416 417
  // Since look-up was done lock-free, we need to check if another
  // thread beat us in the race to insert the symbol.
418
  Symbol* test = lookup(index, (char*)name, len, hashValue);
D
duke 已提交
419
  if (test != NULL) {
420
    // A race occurred and another thread introduced the symbol.
421
    assert(test->refcount() != 0, "lookup should have incremented the count");
D
duke 已提交
422 423 424
    return test;
  }

425 426 427 428
  // Create a new symbol.
  Symbol* sym = allocate_symbol(name, len, c_heap, CHECK_NULL);
  assert(sym->equals((char*)name, len), "symbol must be properly initialized");

Z
zgu 已提交
429
  HashtableEntry<Symbol*, mtSymbol>* entry = new_entry(hashValue, sym);
D
duke 已提交
430
  add_entry(index, entry);
431
  return sym;
D
duke 已提交
432 433
}

434 435
// This version of basic_add adds symbols in batch from the constant pool
// parsing.
436
bool SymbolTable::basic_add(ClassLoaderData* loader_data, constantPoolHandle cp,
437
                            int names_count,
D
duke 已提交
438 439 440
                            const char** names, int* lengths,
                            int* cp_indices, unsigned int* hashValues,
                            TRAPS) {
441 442 443 444 445 446 447

  // Check symbol names are not too long.  If any are too long, don't add any.
  for (int i = 0; i< names_count; i++) {
    if (lengths[i] > Symbol::max_length()) {
      THROW_MSG_0(vmSymbols::java_lang_InternalError(),
                  "name is too long to represent");
    }
D
duke 已提交
448 449
  }

450 451
  // Cannot hit a safepoint in this function because the "this" pointer can move.
  No_Safepoint_Verifier nsv;
D
duke 已提交
452

453
  for (int i=0; i<names_count; i++) {
454 455
    // Check if the symbol table has been rehashed, if so, need to recalculate
    // the hash value.
456 457 458 459 460 461
    unsigned int hashValue;
    if (use_alternate_hashcode()) {
      hashValue = hash_symbol(names[i], lengths[i]);
    } else {
      hashValue = hashValues[i];
    }
D
duke 已提交
462 463
    // Since look-up was done lock-free, we need to check if another
    // thread beat us in the race to insert the symbol.
464 465
    int index = hash_to_index(hashValue);
    Symbol* test = lookup(index, names[i], lengths[i], hashValue);
D
duke 已提交
466
    if (test != NULL) {
T
twisti 已提交
467
      // A race occurred and another thread introduced the symbol, this one
D
duke 已提交
468 469
      // will be dropped and collected. Use test instead.
      cp->symbol_at_put(cp_indices[i], test);
470
      assert(test->refcount() != 0, "lookup should have incremented the count");
D
duke 已提交
471
    } else {
472 473
      // Create a new symbol.  The null class loader is never unloaded so these
      // are allocated specially in a permanent arena.
474
      bool c_heap = !loader_data->is_the_null_class_loader_data();
475 476
      Symbol* sym = allocate_symbol((const u1*)names[i], lengths[i], c_heap, CHECK_(false));
      assert(sym->equals(names[i], lengths[i]), "symbol must be properly initialized");  // why wouldn't it be???
Z
zgu 已提交
477
      HashtableEntry<Symbol*, mtSymbol>* entry = new_entry(hashValue, sym);
D
duke 已提交
478 479 480 481 482 483 484 485 486 487
      add_entry(index, entry);
      cp->symbol_at_put(cp_indices[i], sym);
    }
  }
  return true;
}


void SymbolTable::verify() {
  for (int i = 0; i < the_table()->table_size(); ++i) {
Z
zgu 已提交
488
    HashtableEntry<Symbol*, mtSymbol>* p = the_table()->bucket(i);
D
duke 已提交
489
    for ( ; p != NULL; p = p->next()) {
490
      Symbol* s = (Symbol*)(p->literal());
D
duke 已提交
491 492 493 494 495 496 497 498 499
      guarantee(s != NULL, "symbol is NULL");
      unsigned int h = hash_symbol((char*)s->bytes(), s->utf8_length());
      guarantee(p->hash() == h, "broken hash in symbol table entry");
      guarantee(the_table()->hash_to_index(h) == i,
                "wrong index in symbol table");
    }
  }
}

500
void SymbolTable::dump(outputStream* st) {
501
  the_table()->dump_table(st, "SymbolTable");
502 503
}

D
duke 已提交
504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523

//---------------------------------------------------------------------------
// Non-product code

#ifndef PRODUCT

void SymbolTable::print_histogram() {
  MutexLocker ml(SymbolTable_lock);
  const int results_length = 100;
  int results[results_length];
  int i,j;

  // initialize results to zero
  for (j = 0; j < results_length; j++) {
    results[j] = 0;
  }

  int total = 0;
  int max_symbols = 0;
  int out_of_range = 0;
524 525
  int memory_total = 0;
  int count = 0;
D
duke 已提交
526
  for (i = 0; i < the_table()->table_size(); i++) {
Z
zgu 已提交
527
    HashtableEntry<Symbol*, mtSymbol>* p = the_table()->bucket(i);
D
duke 已提交
528
    for ( ; p != NULL; p = p->next()) {
529
      memory_total += p->literal()->size();
530 531
      count++;
      int counter = p->literal()->utf8_length();
D
duke 已提交
532 533 534 535 536 537 538 539 540 541
      total += counter;
      if (counter < results_length) {
        results[counter]++;
      } else {
        out_of_range++;
      }
      max_symbols = MAX2(max_symbols, counter);
    }
  }
  tty->print_cr("Symbol Table:");
542 543 544
  tty->print_cr("Total number of symbols  %5d", count);
  tty->print_cr("Total size in memory     %5dK",
          (memory_total*HeapWordSize)/1024);
545 546 547
  tty->print_cr("Total counted            %5d", _symbols_counted);
  tty->print_cr("Total removed            %5d", _symbols_removed);
  if (_symbols_counted > 0) {
548
    tty->print_cr("Percent removed          %3.2f",
549
          ((float)_symbols_removed/(float)_symbols_counted)* 100);
550 551
  }
  tty->print_cr("Reference counts         %5d", Symbol::_total_count);
552 553
  tty->print_cr("Symbol arena size        %5d used %5d",
                 arena()->size_in_bytes(), arena()->used());
554
  tty->print_cr("Histogram of symbol length:");
D
duke 已提交
555 556 557 558 559 560 561 562 563 564 565
  tty->print_cr("%8s %5d", "Total  ", total);
  tty->print_cr("%8s %5d", "Maximum", max_symbols);
  tty->print_cr("%8s %3.2f", "Average",
          ((float) total / (float) the_table()->table_size()));
  tty->print_cr("%s", "Histogram:");
  tty->print_cr(" %s %29s", "Length", "Number chains that length");
  for (i = 0; i < results_length; i++) {
    if (results[i] > 0) {
      tty->print_cr("%6d %10d", i, results[i]);
    }
  }
566 567 568 569 570 571 572 573 574 575 576 577 578
  if (Verbose) {
    int line_length = 70;
    tty->print_cr("%s %30s", " Length", "Number chains that length");
    for (i = 0; i < results_length; i++) {
      if (results[i] > 0) {
        tty->print("%4d", i);
        for (j = 0; (j < results[i]) && (j < line_length);  j++) {
          tty->print("%1s", "*");
        }
        if (j == line_length) {
          tty->print("%1s", "+");
        }
        tty->cr();
D
duke 已提交
579 580 581 582 583 584 585
      }
    }
  }
  tty->print_cr(" %s %d: %d\n", "Number chains longer than",
                    results_length, out_of_range);
}

586 587
void SymbolTable::print() {
  for (int i = 0; i < the_table()->table_size(); ++i) {
Z
zgu 已提交
588 589
    HashtableEntry<Symbol*, mtSymbol>** p = the_table()->bucket_addr(i);
    HashtableEntry<Symbol*, mtSymbol>* entry = the_table()->bucket(i);
590 591 592 593 594 595
    if (entry != NULL) {
      while (entry != NULL) {
        tty->print(PTR_FORMAT " ", entry->literal());
        entry->literal()->print();
        tty->print(" %d", entry->literal()->refcount());
        p = entry->next_addr();
Z
zgu 已提交
596
        entry = (HashtableEntry<Symbol*, mtSymbol>*)HashtableEntry<Symbol*, mtSymbol>::make_ptr(*p);
597 598 599 600 601
      }
      tty->cr();
    }
  }
}
D
duke 已提交
602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646
#endif // PRODUCT

// --------------------------------------------------------------------------

#ifdef ASSERT
class StableMemoryChecker : public StackObj {
  enum { _bufsize = wordSize*4 };

  address _region;
  jint    _size;
  u1      _save_buf[_bufsize];

  int sample(u1* save_buf) {
    if (_size <= _bufsize) {
      memcpy(save_buf, _region, _size);
      return _size;
    } else {
      // copy head and tail
      memcpy(&save_buf[0],          _region,                      _bufsize/2);
      memcpy(&save_buf[_bufsize/2], _region + _size - _bufsize/2, _bufsize/2);
      return (_bufsize/2)*2;
    }
  }

 public:
  StableMemoryChecker(const void* region, jint size) {
    _region = (address) region;
    _size   = size;
    sample(_save_buf);
  }

  bool verify() {
    u1 check_buf[sizeof(_save_buf)];
    int check_size = sample(check_buf);
    return (0 == memcmp(_save_buf, check_buf, check_size));
  }

  void set_region(const void* region) { _region = (address) region; }
};
#endif


// --------------------------------------------------------------------------
StringTable* StringTable::_the_table = NULL;

647 648
bool StringTable::_needs_rehashing = false;

649 650
volatile int StringTable::_parallel_claimed_idx = 0;

651
// Pick hashing algorithm
652
unsigned int StringTable::hash_string(const jchar* s, int len) {
653
  return use_alternate_hashcode() ? AltHashing::murmur3_32(seed(), s, len) :
654
                                    java_lang_String::hash_code(s, len);
655 656
}

D
duke 已提交
657 658
oop StringTable::lookup(int index, jchar* name,
                        int len, unsigned int hash) {
659
  int count = 0;
Z
zgu 已提交
660
  for (HashtableEntry<oop, mtSymbol>* l = bucket(index); l != NULL; l = l->next()) {
661
    count++;
D
duke 已提交
662 663 664 665 666 667
    if (l->hash() == hash) {
      if (java_lang_String::equals(l->literal(), name, len)) {
        return l->literal();
      }
    }
  }
668
  // If the bucket size is too deep check if this hash code is insufficient.
669
  if (count >= rehash_count && !needs_rehashing()) {
670 671
    _needs_rehashing = check_rehash_table(count);
  }
D
duke 已提交
672 673 674 675
  return NULL;
}


676
oop StringTable::basic_add(int index_arg, Handle string, jchar* name,
677
                           int len, unsigned int hashValue_arg, TRAPS) {
D
duke 已提交
678 679 680

  assert(java_lang_String::equals(string(), name, len),
         "string must be properly initialized");
681 682
  // Cannot hit a safepoint in this function because the "this" pointer can move.
  No_Safepoint_Verifier nsv;
D
duke 已提交
683

684
  // Check if the symbol table has been rehashed, if so, need to recalculate
685 686 687 688 689 690 691 692 693 694
  // the hash value and index before second lookup.
  unsigned int hashValue;
  int index;
  if (use_alternate_hashcode()) {
    hashValue = hash_string(name, len);
    index = hash_to_index(hashValue);
  } else {
    hashValue = hashValue_arg;
    index = index_arg;
  }
695

D
duke 已提交
696 697 698 699 700 701 702 703 704
  // Since look-up was done lock-free, we need to check if another
  // thread beat us in the race to insert the symbol.

  oop test = lookup(index, name, len, hashValue); // calls lookup(u1*, int)
  if (test != NULL) {
    // Entry already added
    return test;
  }

Z
zgu 已提交
705
  HashtableEntry<oop, mtSymbol>* entry = new_entry(hashValue, string());
D
duke 已提交
706 707 708 709 710
  add_entry(index, entry);
  return string();
}


711
oop StringTable::lookup(Symbol* symbol) {
D
duke 已提交
712 713 714
  ResourceMark rm;
  int length;
  jchar* chars = symbol->as_unicode(length);
715 716 717
  return lookup(chars, length);
}

718 719 720 721 722 723 724 725 726 727 728
// Tell the GC that this string was looked up in the StringTable.
static void ensure_string_alive(oop string) {
  // A lookup in the StringTable could return an object that was previously
  // considered dead. The SATB part of G1 needs to get notified about this
  // potential resurrection, otherwise the marking might not find the object.
#if INCLUDE_ALL_GCS
  if (UseG1GC && string != NULL) {
    G1SATBCardTableModRefBS::enqueue(string);
  }
#endif
}
729 730 731 732

oop StringTable::lookup(jchar* name, int len) {
  unsigned int hash = hash_string(name, len);
  int index = the_table()->hash_to_index(hash);
733 734 735 736 737
  oop string = the_table()->lookup(index, name, len, hash);

  ensure_string_alive(string);

  return string;
D
duke 已提交
738 739 740 741 742
}


oop StringTable::intern(Handle string_or_null, jchar* name,
                        int len, TRAPS) {
743
  unsigned int hashValue = hash_string(name, len);
D
duke 已提交
744
  int index = the_table()->hash_to_index(hashValue);
745
  oop found_string = the_table()->lookup(index, name, len, hashValue);
D
duke 已提交
746 747

  // Found
748 749 750 751
  if (found_string != NULL) {
    ensure_string_alive(found_string);
    return found_string;
  }
752 753

  debug_only(StableMemoryChecker smc(name, len * sizeof(name[0])));
754
  assert(!Universe::heap()->is_in_reserved(name),
755 756 757 758
         "proposed name of symbol must be stable");

  Handle string;
  // try to reuse the string if possible
759
  if (!string_or_null.is_null()) {
760 761
    string = string_or_null;
  } else {
762
    string = java_lang_String::create_from_unicode(name, len, CHECK_NULL);
763 764
  }

P
pliden 已提交
765 766 767 768 769 770 771 772 773
#if INCLUDE_ALL_GCS
  if (G1StringDedup::is_enabled()) {
    // Deduplicate the string before it is interned. Note that we should never
    // deduplicate a string after it has been interned. Doing so will counteract
    // compiler optimizations done on e.g. interned string literals.
    G1StringDedup::deduplicate(string());
  }
#endif

774 775
  // Grab the StringTable_lock before getting the_table() because it could
  // change at safepoint.
776 777 778 779 780 781 782
  oop added_or_found;
  {
    MutexLocker ml(StringTable_lock, THREAD);
    // Otherwise, add to symbol to table
    added_or_found = the_table()->basic_add(index, string, name, len,
                                  hashValue, CHECK_NULL);
  }
D
duke 已提交
783

784 785 786
  ensure_string_alive(added_or_found);

  return added_or_found;
D
duke 已提交
787 788
}

789
oop StringTable::intern(Symbol* symbol, TRAPS) {
D
duke 已提交
790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805
  if (symbol == NULL) return NULL;
  ResourceMark rm(THREAD);
  int length;
  jchar* chars = symbol->as_unicode(length);
  Handle string;
  oop result = intern(string, chars, length, CHECK_NULL);
  return result;
}


oop StringTable::intern(oop string, TRAPS)
{
  if (string == NULL) return NULL;
  ResourceMark rm(THREAD);
  int length;
  Handle h_string (THREAD, string);
806
  jchar* chars = java_lang_String::as_unicode_string(string, length, CHECK_NULL);
D
duke 已提交
807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822
  oop result = intern(h_string, chars, length, CHECK_NULL);
  return result;
}


oop StringTable::intern(const char* utf8_string, TRAPS) {
  if (utf8_string == NULL) return NULL;
  ResourceMark rm(THREAD);
  int length = UTF8::unicode_length(utf8_string);
  jchar* chars = NEW_RESOURCE_ARRAY(jchar, length);
  UTF8::convert_to_unicode(utf8_string, chars, length);
  Handle string;
  oop result = intern(string, chars, length, CHECK_NULL);
  return result;
}

823
void StringTable::unlink_or_oops_do(BoolObjectClosure* is_alive, OopClosure* f, int* processed, int* removed) {
824 825 826 827 828
  BucketUnlinkContext context;
  buckets_unlink_or_oops_do(is_alive, f, 0, the_table()->table_size(), &context);
  _the_table->bulk_free_entries(&context);
  *processed = context._num_processed;
  *removed = context._num_removed;
829 830 831
}

void StringTable::possibly_parallel_unlink_or_oops_do(BoolObjectClosure* is_alive, OopClosure* f, int* processed, int* removed) {
832 833 834
  // Readers of the table are unlocked, so we should only be removing
  // entries at a safepoint.
  assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
835
  const int limit = the_table()->table_size();
836

837
  BucketUnlinkContext context;
838 839 840 841 842 843
  for (;;) {
    // Grab next set of buckets to scan
    int start_idx = Atomic::add(ClaimChunkSize, &_parallel_claimed_idx) - ClaimChunkSize;
    if (start_idx >= limit) {
      // End of table
      break;
844
    }
845 846

    int end_idx = MIN2(limit, start_idx + ClaimChunkSize);
847
    buckets_unlink_or_oops_do(is_alive, f, start_idx, end_idx, &context);
848
  }
849 850 851
  _the_table->bulk_free_entries(&context);
  *processed = context._num_processed;
  *removed = context._num_removed;
852 853
}

854
void StringTable::buckets_oops_do(OopClosure* f, int start_idx, int end_idx) {
855 856 857
  const int limit = the_table()->table_size();

  assert(0 <= start_idx && start_idx <= limit,
858
         err_msg("start_idx (" INT32_FORMAT ") is out of bounds", start_idx));
859
  assert(0 <= end_idx && end_idx <= limit,
860
         err_msg("end_idx (" INT32_FORMAT ") is out of bounds", end_idx));
861
  assert(start_idx <= end_idx,
862
         err_msg("Index ordering: start_idx=" INT32_FORMAT", end_idx=" INT32_FORMAT,
863 864 865
                 start_idx, end_idx));

  for (int i = start_idx; i < end_idx; i += 1) {
Z
zgu 已提交
866
    HashtableEntry<oop, mtSymbol>* entry = the_table()->bucket(i);
867
    while (entry != NULL) {
868 869
      assert(!entry->is_shared(), "CDS not used for the StringTable");

870 871
      f->do_oop((oop*)entry->literal_addr());

872
      entry = entry->next();
873 874 875 876
    }
  }
}

877
void StringTable::buckets_unlink_or_oops_do(BoolObjectClosure* is_alive, OopClosure* f, int start_idx, int end_idx, BucketUnlinkContext* context) {
878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900
  const int limit = the_table()->table_size();

  assert(0 <= start_idx && start_idx <= limit,
         err_msg("start_idx (" INT32_FORMAT ") is out of bounds", start_idx));
  assert(0 <= end_idx && end_idx <= limit,
         err_msg("end_idx (" INT32_FORMAT ") is out of bounds", end_idx));
  assert(start_idx <= end_idx,
         err_msg("Index ordering: start_idx=" INT32_FORMAT", end_idx=" INT32_FORMAT,
                 start_idx, end_idx));

  for (int i = start_idx; i < end_idx; ++i) {
    HashtableEntry<oop, mtSymbol>** p = the_table()->bucket_addr(i);
    HashtableEntry<oop, mtSymbol>* entry = the_table()->bucket(i);
    while (entry != NULL) {
      assert(!entry->is_shared(), "CDS not used for the StringTable");

      if (is_alive->do_object_b(entry->literal())) {
        if (f != NULL) {
          f->do_oop((oop*)entry->literal_addr());
        }
        p = entry->next_addr();
      } else {
        *p = entry->next();
901
        context->free_entry(entry);
902
      }
903
      context->_num_processed++;
904 905 906 907 908
      entry = *p;
    }
  }
}

909
void StringTable::oops_do(OopClosure* f) {
910
  buckets_oops_do(f, 0, the_table()->table_size());
911 912 913 914 915 916 917 918 919 920 921 922 923 924
}

void StringTable::possibly_parallel_oops_do(OopClosure* f) {
  const int limit = the_table()->table_size();

  for (;;) {
    // Grab next set of buckets to scan
    int start_idx = Atomic::add(ClaimChunkSize, &_parallel_claimed_idx) - ClaimChunkSize;
    if (start_idx >= limit) {
      // End of table
      break;
    }

    int end_idx = MIN2(limit, start_idx + ClaimChunkSize);
925
    buckets_oops_do(f, start_idx, end_idx);
926 927 928
  }
}

929 930
// This verification is part of Universe::verify() and needs to be quick.
// See StringTable::verify_and_compare() below for exhaustive verification.
D
duke 已提交
931 932
void StringTable::verify() {
  for (int i = 0; i < the_table()->table_size(); ++i) {
Z
zgu 已提交
933
    HashtableEntry<oop, mtSymbol>* p = the_table()->bucket(i);
D
duke 已提交
934 935 936
    for ( ; p != NULL; p = p->next()) {
      oop s = p->literal();
      guarantee(s != NULL, "interned string is NULL");
937
      unsigned int h = java_lang_String::hash_string(s);
D
duke 已提交
938 939 940 941 942 943
      guarantee(p->hash() == h, "broken hash in string table entry");
      guarantee(the_table()->hash_to_index(h) == i,
                "wrong index in string table");
    }
  }
}
944 945

void StringTable::dump(outputStream* st) {
946
  the_table()->dump_table(st, "StringTable");
947 948
}

949 950 951 952 953 954 955 956 957 958 959 960 961
StringTable::VerifyRetTypes StringTable::compare_entries(
                                      int bkt1, int e_cnt1,
                                      HashtableEntry<oop, mtSymbol>* e_ptr1,
                                      int bkt2, int e_cnt2,
                                      HashtableEntry<oop, mtSymbol>* e_ptr2) {
  // These entries are sanity checked by verify_and_compare_entries()
  // before this function is called.
  oop str1 = e_ptr1->literal();
  oop str2 = e_ptr2->literal();

  if (str1 == str2) {
    tty->print_cr("ERROR: identical oop values (0x" PTR_FORMAT ") "
                  "in entry @ bucket[%d][%d] and entry @ bucket[%d][%d]",
962
                  (void *)str1, bkt1, e_cnt1, bkt2, e_cnt2);
963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104
    return _verify_fail_continue;
  }

  if (java_lang_String::equals(str1, str2)) {
    tty->print_cr("ERROR: identical String values in entry @ "
                  "bucket[%d][%d] and entry @ bucket[%d][%d]",
                  bkt1, e_cnt1, bkt2, e_cnt2);
    return _verify_fail_continue;
  }

  return _verify_pass;
}

StringTable::VerifyRetTypes StringTable::verify_entry(int bkt, int e_cnt,
                                      HashtableEntry<oop, mtSymbol>* e_ptr,
                                      StringTable::VerifyMesgModes mesg_mode) {

  VerifyRetTypes ret = _verify_pass;  // be optimistic

  oop str = e_ptr->literal();
  if (str == NULL) {
    if (mesg_mode == _verify_with_mesgs) {
      tty->print_cr("ERROR: NULL oop value in entry @ bucket[%d][%d]", bkt,
                    e_cnt);
    }
    // NULL oop means no more verifications are possible
    return _verify_fail_done;
  }

  if (str->klass() != SystemDictionary::String_klass()) {
    if (mesg_mode == _verify_with_mesgs) {
      tty->print_cr("ERROR: oop is not a String in entry @ bucket[%d][%d]",
                    bkt, e_cnt);
    }
    // not a String means no more verifications are possible
    return _verify_fail_done;
  }

  unsigned int h = java_lang_String::hash_string(str);
  if (e_ptr->hash() != h) {
    if (mesg_mode == _verify_with_mesgs) {
      tty->print_cr("ERROR: broken hash value in entry @ bucket[%d][%d], "
                    "bkt_hash=%d, str_hash=%d", bkt, e_cnt, e_ptr->hash(), h);
    }
    ret = _verify_fail_continue;
  }

  if (the_table()->hash_to_index(h) != bkt) {
    if (mesg_mode == _verify_with_mesgs) {
      tty->print_cr("ERROR: wrong index value for entry @ bucket[%d][%d], "
                    "str_hash=%d, hash_to_index=%d", bkt, e_cnt, h,
                    the_table()->hash_to_index(h));
    }
    ret = _verify_fail_continue;
  }

  return ret;
}

// See StringTable::verify() above for the quick verification that is
// part of Universe::verify(). This verification is exhaustive and
// reports on every issue that is found. StringTable::verify() only
// reports on the first issue that is found.
//
// StringTable::verify_entry() checks:
// - oop value != NULL (same as verify())
// - oop value is a String
// - hash(String) == hash in entry (same as verify())
// - index for hash == index of entry (same as verify())
//
// StringTable::compare_entries() checks:
// - oops are unique across all entries
// - String values are unique across all entries
//
int StringTable::verify_and_compare_entries() {
  assert(StringTable_lock->is_locked(), "sanity check");

  int  fail_cnt = 0;

  // first, verify all the entries individually:
  for (int bkt = 0; bkt < the_table()->table_size(); bkt++) {
    HashtableEntry<oop, mtSymbol>* e_ptr = the_table()->bucket(bkt);
    for (int e_cnt = 0; e_ptr != NULL; e_ptr = e_ptr->next(), e_cnt++) {
      VerifyRetTypes ret = verify_entry(bkt, e_cnt, e_ptr, _verify_with_mesgs);
      if (ret != _verify_pass) {
        fail_cnt++;
      }
    }
  }

  // Optimization: if the above check did not find any failures, then
  // the comparison loop below does not need to call verify_entry()
  // before calling compare_entries(). If there were failures, then we
  // have to call verify_entry() to see if the entry can be passed to
  // compare_entries() safely. When we call verify_entry() in the loop
  // below, we do so quietly to void duplicate messages and we don't
  // increment fail_cnt because the failures have already been counted.
  bool need_entry_verify = (fail_cnt != 0);

  // second, verify all entries relative to each other:
  for (int bkt1 = 0; bkt1 < the_table()->table_size(); bkt1++) {
    HashtableEntry<oop, mtSymbol>* e_ptr1 = the_table()->bucket(bkt1);
    for (int e_cnt1 = 0; e_ptr1 != NULL; e_ptr1 = e_ptr1->next(), e_cnt1++) {
      if (need_entry_verify) {
        VerifyRetTypes ret = verify_entry(bkt1, e_cnt1, e_ptr1,
                                          _verify_quietly);
        if (ret == _verify_fail_done) {
          // cannot use the current entry to compare against other entries
          continue;
        }
      }

      for (int bkt2 = bkt1; bkt2 < the_table()->table_size(); bkt2++) {
        HashtableEntry<oop, mtSymbol>* e_ptr2 = the_table()->bucket(bkt2);
        int e_cnt2;
        for (e_cnt2 = 0; e_ptr2 != NULL; e_ptr2 = e_ptr2->next(), e_cnt2++) {
          if (bkt1 == bkt2 && e_cnt2 <= e_cnt1) {
            // skip the entries up to and including the one that
            // we're comparing against
            continue;
          }

          if (need_entry_verify) {
            VerifyRetTypes ret = verify_entry(bkt2, e_cnt2, e_ptr2,
                                              _verify_quietly);
            if (ret == _verify_fail_done) {
              // cannot compare against this entry
              continue;
            }
          }

          // compare two entries, report and count any failures:
          if (compare_entries(bkt1, e_cnt1, e_ptr1, bkt2, e_cnt2, e_ptr2)
              != _verify_pass) {
            fail_cnt++;
          }
        }
      }
    }
  }
  return fail_cnt;
}
1105 1106 1107 1108 1109

// Create a new table and using alternate hash code, populate the new table
// with the existing strings.   Set flag to use the alternate hash code afterwards.
void StringTable::rehash_table() {
  assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
1110 1111
  // This should never happen with -Xshare:dump but it might in testing mode.
  if (DumpSharedSpaces) return;
1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123
  StringTable* new_table = new StringTable();

  // Rehash the table
  the_table()->move_to(new_table);

  // Delete the table and buckets (entries are reused in new table).
  delete _the_table;
  // Don't check if we need rehashing until the table gets unbalanced again.
  // Then rehash with a new global seed.
  _needs_rehashing = false;
  _the_table = new_table;
}