symbolTable.cpp 37.7 KB
Newer Older
D
duke 已提交
1
/*
P
pliden 已提交
2
 * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
D
duke 已提交
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
19 20 21
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
D
duke 已提交
22 23 24
 *
 */

25
#include "precompiled.hpp"
26
#include "classfile/altHashing.hpp"
27 28 29 30
#include "classfile/javaClasses.hpp"
#include "classfile/symbolTable.hpp"
#include "classfile/systemDictionary.hpp"
#include "gc_interface/collectedHeap.inline.hpp"
31
#include "memory/allocation.inline.hpp"
32 33 34 35 36 37
#include "memory/filemap.hpp"
#include "memory/gcLocker.inline.hpp"
#include "oops/oop.inline.hpp"
#include "oops/oop.inline2.hpp"
#include "runtime/mutexLocker.hpp"
#include "utilities/hashtable.inline.hpp"
P
pliden 已提交
38
#if INCLUDE_ALL_GCS
39
#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
P
pliden 已提交
40 41
#include "gc_implementation/g1/g1StringDedup.hpp"
#endif
D
duke 已提交
42

43 44
PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC

D
duke 已提交
45 46
// --------------------------------------------------------------------------

47 48 49
// the number of buckets a thread claims
const int ClaimChunkSize = 32;

D
duke 已提交
50
SymbolTable* SymbolTable::_the_table = NULL;
51 52
// Static arena for symbols that are not deallocated
Arena* SymbolTable::_arena = NULL;
53
bool SymbolTable::_needs_rehashing = false;
D
duke 已提交
54

55
Symbol* SymbolTable::allocate_symbol(const u1* name, int len, bool c_heap, TRAPS) {
56 57
  assert (len <= Symbol::max_length(), "should be checked by caller");

58
  Symbol* sym;
59

60 61 62 63
  if (DumpSharedSpaces) {
    // Allocate all symbols to CLD shared metaspace
    sym = new (len, ClassLoaderData::the_null_class_loader_data(), THREAD) Symbol(name, len, -1);
  } else if (c_heap) {
64 65
    // refcount starts as 1
    sym = new (len, THREAD) Symbol(name, len, 1);
66 67
    assert(sym != NULL, "new should call vm_exit_out_of_memory if C_HEAP is exhausted");
  } else {
68
    // Allocate to global arena
69 70
    sym = new (len, arena(), THREAD) Symbol(name, len, -1);
  }
71 72 73
  return sym;
}

74 75 76
void SymbolTable::initialize_symbols(int arena_alloc_size) {
  // Initialize the arena for global symbols, size passed in depends on CDS.
  if (arena_alloc_size == 0) {
77
    _arena = new (mtSymbol) Arena(mtSymbol);
78
  } else {
79
    _arena = new (mtSymbol) Arena(mtSymbol, arena_alloc_size);
80 81 82 83 84 85 86
  }
}

// Call function for all symbols in the symbol table.
void SymbolTable::symbols_do(SymbolClosure *cl) {
  const int n = the_table()->table_size();
  for (int i = 0; i < n; i++) {
Z
zgu 已提交
87
    for (HashtableEntry<Symbol*, mtSymbol>* p = the_table()->bucket(i);
88 89 90 91 92 93 94
         p != NULL;
         p = p->next()) {
      cl->do_symbol(p->literal_addr());
    }
  }
}

95 96 97
int SymbolTable::_symbols_removed = 0;
int SymbolTable::_symbols_counted = 0;
volatile int SymbolTable::_parallel_claimed_idx = 0;
98

99 100
void SymbolTable::buckets_unlink(int start_idx, int end_idx, int* processed, int* removed, size_t* memory_total) {
  for (int i = start_idx; i < end_idx; ++i) {
Z
zgu 已提交
101 102
    HashtableEntry<Symbol*, mtSymbol>** p = the_table()->bucket_addr(i);
    HashtableEntry<Symbol*, mtSymbol>* entry = the_table()->bucket(i);
103 104 105 106 107 108
    while (entry != NULL) {
      // Shared entries are normally at the end of the bucket and if we run into
      // a shared entry, then there is nothing more to remove. However, if we
      // have rehashed the table, then the shared entries are no longer at the
      // end of the bucket.
      if (entry->is_shared() && !use_alternate_hashcode()) {
109 110 111
        break;
      }
      Symbol* s = entry->literal();
112 113
      (*memory_total) += s->size();
      (*processed)++;
114 115 116
      assert(s != NULL, "just checking");
      // If reference count is zero, remove.
      if (s->refcount() == 0) {
117
        assert(!entry->is_shared(), "shared entries should be kept live");
118
        delete s;
119
        (*removed)++;
120 121 122 123 124
        *p = entry->next();
        the_table()->free_entry(entry);
      } else {
        p = entry->next_addr();
      }
125
      // get next entry
Z
zgu 已提交
126
      entry = (HashtableEntry<Symbol*, mtSymbol>*)HashtableEntry<Symbol*, mtSymbol>::make_ptr(*p);
127 128
    }
  }
129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163
}

// Remove unreferenced symbols from the symbol table
// This is done late during GC.
void SymbolTable::unlink(int* processed, int* removed) {
  size_t memory_total = 0;
  buckets_unlink(0, the_table()->table_size(), processed, removed, &memory_total);
  _symbols_removed += *removed;
  _symbols_counted += *processed;
  // Exclude printing for normal PrintGCDetails because people parse
  // this output.
  if (PrintGCDetails && Verbose && WizardMode) {
    gclog_or_tty->print(" [Symbols=%d size=" SIZE_FORMAT "K] ", *processed,
                        (memory_total*HeapWordSize)/1024);
  }
}

void SymbolTable::possibly_parallel_unlink(int* processed, int* removed) {
  const int limit = the_table()->table_size();

  size_t memory_total = 0;

  for (;;) {
    // Grab next set of buckets to scan
    int start_idx = Atomic::add(ClaimChunkSize, &_parallel_claimed_idx) - ClaimChunkSize;
    if (start_idx >= limit) {
      // End of table
      break;
    }

    int end_idx = MIN2(limit, start_idx + ClaimChunkSize);
    buckets_unlink(start_idx, end_idx, processed, removed, &memory_total);
  }
  Atomic::add(*processed, &_symbols_counted);
  Atomic::add(*removed, &_symbols_removed);
164 165 166
  // Exclude printing for normal PrintGCDetails because people parse
  // this output.
  if (PrintGCDetails && Verbose && WizardMode) {
167
    gclog_or_tty->print(" [Symbols: scanned=%d removed=%d size=" SIZE_FORMAT "K] ", *processed, *removed,
168 169 170 171
                        (memory_total*HeapWordSize)/1024);
  }
}

172 173 174 175
// Create a new table and using alternate hash code, populate the new table
// with the existing strings.   Set flag to use the alternate hash code afterwards.
void SymbolTable::rehash_table() {
  assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
176 177
  // This should never happen with -Xshare:dump but it might in testing mode.
  if (DumpSharedSpaces) return;
178 179 180 181 182 183 184 185 186 187 188 189
  // Create a new symbol table
  SymbolTable* new_table = new SymbolTable();

  the_table()->move_to(new_table);

  // Delete the table and buckets (entries are reused in new table).
  delete _the_table;
  // Don't check if we need rehashing until the table gets unbalanced again.
  // Then rehash with a new global seed.
  _needs_rehashing = false;
  _the_table = new_table;
}
190

D
duke 已提交
191 192
// Lookup a symbol in a bucket.

193
Symbol* SymbolTable::lookup(int index, const char* name,
D
duke 已提交
194
                              int len, unsigned int hash) {
195
  int count = 0;
Z
zgu 已提交
196
  for (HashtableEntry<Symbol*, mtSymbol>* e = bucket(index); e != NULL; e = e->next()) {
197
    count++;  // count all entries in this bucket, not just ones with same hash
D
duke 已提交
198
    if (e->hash() == hash) {
199
      Symbol* sym = e->literal();
D
duke 已提交
200
      if (sym->equals(name, len)) {
201 202
        // something is referencing this symbol now.
        sym->increment_refcount();
D
duke 已提交
203 204 205 206
        return sym;
      }
    }
  }
207
  // If the bucket size is too deep check if this hash code is insufficient.
Z
zgu 已提交
208
  if (count >= BasicHashtable<mtSymbol>::rehash_count && !needs_rehashing()) {
209 210
    _needs_rehashing = check_rehash_table(count);
  }
D
duke 已提交
211 212 213
  return NULL;
}

214 215
// Pick hashing algorithm.
unsigned int SymbolTable::hash_symbol(const char* s, int len) {
216 217
  return use_alternate_hashcode() ?
           AltHashing::murmur3_32(seed(), (const jbyte*)s, len) :
218
           java_lang_String::hash_code(s, len);
219 220
}

D
duke 已提交
221 222 223 224 225 226 227 228

// We take care not to be blocking while holding the
// SymbolTable_lock. Otherwise, the system might deadlock, since the
// symboltable is used during compilation (VM_thread) The lock free
// synchronization is simplified by the fact that we do not delete
// entries in the symbol table during normal execution (only during
// safepoints).

229
Symbol* SymbolTable::lookup(const char* name, int len, TRAPS) {
D
duke 已提交
230 231 232
  unsigned int hashValue = hash_symbol(name, len);
  int index = the_table()->hash_to_index(hashValue);

233
  Symbol* s = the_table()->lookup(index, name, len, hashValue);
D
duke 已提交
234 235 236 237

  // Found
  if (s != NULL) return s;

238 239 240
  // Grab SymbolTable_lock first.
  MutexLocker ml(SymbolTable_lock, THREAD);

D
duke 已提交
241
  // Otherwise, add to symbol to table
242
  return the_table()->basic_add(index, (u1*)name, len, hashValue, true, CHECK_NULL);
D
duke 已提交
243 244
}

245
Symbol* SymbolTable::lookup(const Symbol* sym, int begin, int end, TRAPS) {
D
duke 已提交
246 247 248 249 250 251 252 253 254 255 256
  char* buffer;
  int index, len;
  unsigned int hashValue;
  char* name;
  {
    debug_only(No_Safepoint_Verifier nsv;)

    name = (char*)sym->base() + begin;
    len = end - begin;
    hashValue = hash_symbol(name, len);
    index = the_table()->hash_to_index(hashValue);
257
    Symbol* s = the_table()->lookup(index, name, len, hashValue);
D
duke 已提交
258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277

    // Found
    if (s != NULL) return s;
  }

  // Otherwise, add to symbol to table. Copy to a C string first.
  char stack_buf[128];
  ResourceMark rm(THREAD);
  if (len <= 128) {
    buffer = stack_buf;
  } else {
    buffer = NEW_RESOURCE_ARRAY_IN_THREAD(THREAD, char, len);
  }
  for (int i=0; i<len; i++) {
    buffer[i] = name[i];
  }
  // Make sure there is no safepoint in the code above since name can't move.
  // We can't include the code in No_Safepoint_Verifier because of the
  // ResourceMark.

278 279 280
  // Grab SymbolTable_lock first.
  MutexLocker ml(SymbolTable_lock, THREAD);

281
  return the_table()->basic_add(index, (u1*)buffer, len, hashValue, true, CHECK_NULL);
D
duke 已提交
282 283
}

284
Symbol* SymbolTable::lookup_only(const char* name, int len,
D
duke 已提交
285 286 287 288
                                   unsigned int& hash) {
  hash = hash_symbol(name, len);
  int index = the_table()->hash_to_index(hash);

289 290
  Symbol* s = the_table()->lookup(index, name, len, hash);
  return s;
D
duke 已提交
291 292
}

293 294 295 296 297 298 299
// Look up the address of the literal in the SymbolTable for this Symbol*
// Do not create any new symbols
// Do not increment the reference count to keep this alive
Symbol** SymbolTable::lookup_symbol_addr(Symbol* sym){
  unsigned int hash = hash_symbol((char*)sym->bytes(), sym->utf8_length());
  int index = the_table()->hash_to_index(hash);

Z
zgu 已提交
300
  for (HashtableEntry<Symbol*, mtSymbol>* e = the_table()->bucket(index); e != NULL; e = e->next()) {
301 302 303 304 305 306 307 308 309 310
    if (e->hash() == hash) {
      Symbol* literal_sym = e->literal();
      if (sym == literal_sym) {
        return e->literal_addr();
      }
    }
  }
  return NULL;
}

311 312
// Suggestion: Push unicode-based lookup all the way into the hashing
// and probing logic, so there is no need for convert_to_utf8 until
313 314
// an actual new Symbol* is created.
Symbol* SymbolTable::lookup_unicode(const jchar* name, int utf16_length, TRAPS) {
315 316 317 318 319 320 321 322 323 324 325 326 327 328
  int utf8_length = UNICODE::utf8_length((jchar*) name, utf16_length);
  char stack_buf[128];
  if (utf8_length < (int) sizeof(stack_buf)) {
    char* chars = stack_buf;
    UNICODE::convert_to_utf8(name, utf16_length, chars);
    return lookup(chars, utf8_length, THREAD);
  } else {
    ResourceMark rm(THREAD);
    char* chars = NEW_RESOURCE_ARRAY(char, utf8_length + 1);;
    UNICODE::convert_to_utf8(name, utf16_length, chars);
    return lookup(chars, utf8_length, THREAD);
  }
}

329
Symbol* SymbolTable::lookup_only_unicode(const jchar* name, int utf16_length,
330 331 332 333 334 335 336 337 338 339 340 341 342 343 344
                                           unsigned int& hash) {
  int utf8_length = UNICODE::utf8_length((jchar*) name, utf16_length);
  char stack_buf[128];
  if (utf8_length < (int) sizeof(stack_buf)) {
    char* chars = stack_buf;
    UNICODE::convert_to_utf8(name, utf16_length, chars);
    return lookup_only(chars, utf8_length, hash);
  } else {
    ResourceMark rm;
    char* chars = NEW_RESOURCE_ARRAY(char, utf8_length + 1);;
    UNICODE::convert_to_utf8(name, utf16_length, chars);
    return lookup_only(chars, utf8_length, hash);
  }
}

345
void SymbolTable::add(ClassLoaderData* loader_data, constantPoolHandle cp,
346
                      int names_count,
D
duke 已提交
347 348
                      const char** names, int* lengths, int* cp_indices,
                      unsigned int* hashValues, TRAPS) {
349 350 351
  // Grab SymbolTable_lock first.
  MutexLocker ml(SymbolTable_lock, THREAD);

D
duke 已提交
352
  SymbolTable* table = the_table();
353
  bool added = table->basic_add(loader_data, cp, names_count, names, lengths,
D
duke 已提交
354 355 356 357 358
                                cp_indices, hashValues, CHECK);
  if (!added) {
    // do it the hard way
    for (int i=0; i<names_count; i++) {
      int index = table->hash_to_index(hashValues[i]);
359
      bool c_heap = !loader_data->is_the_null_class_loader_data();
360
      Symbol* sym = table->basic_add(index, (u1*)names[i], lengths[i], hashValues[i], c_heap, CHECK);
D
duke 已提交
361 362 363 364 365
      cp->symbol_at_put(cp_indices[i], sym);
    }
  }
}

366 367 368 369 370 371
Symbol* SymbolTable::new_permanent_symbol(const char* name, TRAPS) {
  unsigned int hash;
  Symbol* result = SymbolTable::lookup_only((char*)name, (int)strlen(name), hash);
  if (result != NULL) {
    return result;
  }
372 373 374
  // Grab SymbolTable_lock first.
  MutexLocker ml(SymbolTable_lock, THREAD);

375 376 377 378 379
  SymbolTable* table = the_table();
  int index = table->hash_to_index(hash);
  return table->basic_add(index, (u1*)name, (int)strlen(name), hash, false, THREAD);
}

380
Symbol* SymbolTable::basic_add(int index_arg, u1 *name, int len,
381
                               unsigned int hashValue_arg, bool c_heap, TRAPS) {
382
  assert(!Universe::heap()->is_in_reserved(name),
D
duke 已提交
383 384
         "proposed name of symbol must be stable");

385 386 387 388 389 390 391 392
  // Don't allow symbols to be created which cannot fit in a Symbol*.
  if (len > Symbol::max_length()) {
    THROW_MSG_0(vmSymbols::java_lang_InternalError(),
                "name is too long to represent");
  }

  // Cannot hit a safepoint in this function because the "this" pointer can move.
  No_Safepoint_Verifier nsv;
D
duke 已提交
393

394
  // Check if the symbol table has been rehashed, if so, need to recalculate
395 396 397 398 399 400 401 402 403 404
  // the hash value and index.
  unsigned int hashValue;
  int index;
  if (use_alternate_hashcode()) {
    hashValue = hash_symbol((const char*)name, len);
    index = hash_to_index(hashValue);
  } else {
    hashValue = hashValue_arg;
    index = index_arg;
  }
405

D
duke 已提交
406 407
  // Since look-up was done lock-free, we need to check if another
  // thread beat us in the race to insert the symbol.
408
  Symbol* test = lookup(index, (char*)name, len, hashValue);
D
duke 已提交
409
  if (test != NULL) {
410
    // A race occurred and another thread introduced the symbol.
411
    assert(test->refcount() != 0, "lookup should have incremented the count");
D
duke 已提交
412 413 414
    return test;
  }

415 416 417 418
  // Create a new symbol.
  Symbol* sym = allocate_symbol(name, len, c_heap, CHECK_NULL);
  assert(sym->equals((char*)name, len), "symbol must be properly initialized");

Z
zgu 已提交
419
  HashtableEntry<Symbol*, mtSymbol>* entry = new_entry(hashValue, sym);
D
duke 已提交
420
  add_entry(index, entry);
421
  return sym;
D
duke 已提交
422 423
}

424 425
// This version of basic_add adds symbols in batch from the constant pool
// parsing.
426
bool SymbolTable::basic_add(ClassLoaderData* loader_data, constantPoolHandle cp,
427
                            int names_count,
D
duke 已提交
428 429 430
                            const char** names, int* lengths,
                            int* cp_indices, unsigned int* hashValues,
                            TRAPS) {
431 432 433 434 435 436 437

  // Check symbol names are not too long.  If any are too long, don't add any.
  for (int i = 0; i< names_count; i++) {
    if (lengths[i] > Symbol::max_length()) {
      THROW_MSG_0(vmSymbols::java_lang_InternalError(),
                  "name is too long to represent");
    }
D
duke 已提交
438 439
  }

440 441
  // Cannot hit a safepoint in this function because the "this" pointer can move.
  No_Safepoint_Verifier nsv;
D
duke 已提交
442

443
  for (int i=0; i<names_count; i++) {
444 445
    // Check if the symbol table has been rehashed, if so, need to recalculate
    // the hash value.
446 447 448 449 450 451
    unsigned int hashValue;
    if (use_alternate_hashcode()) {
      hashValue = hash_symbol(names[i], lengths[i]);
    } else {
      hashValue = hashValues[i];
    }
D
duke 已提交
452 453
    // Since look-up was done lock-free, we need to check if another
    // thread beat us in the race to insert the symbol.
454 455
    int index = hash_to_index(hashValue);
    Symbol* test = lookup(index, names[i], lengths[i], hashValue);
D
duke 已提交
456
    if (test != NULL) {
T
twisti 已提交
457
      // A race occurred and another thread introduced the symbol, this one
D
duke 已提交
458 459
      // will be dropped and collected. Use test instead.
      cp->symbol_at_put(cp_indices[i], test);
460
      assert(test->refcount() != 0, "lookup should have incremented the count");
D
duke 已提交
461
    } else {
462 463
      // Create a new symbol.  The null class loader is never unloaded so these
      // are allocated specially in a permanent arena.
464
      bool c_heap = !loader_data->is_the_null_class_loader_data();
465 466
      Symbol* sym = allocate_symbol((const u1*)names[i], lengths[i], c_heap, CHECK_(false));
      assert(sym->equals(names[i], lengths[i]), "symbol must be properly initialized");  // why wouldn't it be???
Z
zgu 已提交
467
      HashtableEntry<Symbol*, mtSymbol>* entry = new_entry(hashValue, sym);
D
duke 已提交
468 469 470 471 472 473 474 475 476 477
      add_entry(index, entry);
      cp->symbol_at_put(cp_indices[i], sym);
    }
  }
  return true;
}


void SymbolTable::verify() {
  for (int i = 0; i < the_table()->table_size(); ++i) {
Z
zgu 已提交
478
    HashtableEntry<Symbol*, mtSymbol>* p = the_table()->bucket(i);
D
duke 已提交
479
    for ( ; p != NULL; p = p->next()) {
480
      Symbol* s = (Symbol*)(p->literal());
D
duke 已提交
481 482 483 484 485 486 487 488 489
      guarantee(s != NULL, "symbol is NULL");
      unsigned int h = hash_symbol((char*)s->bytes(), s->utf8_length());
      guarantee(p->hash() == h, "broken hash in symbol table entry");
      guarantee(the_table()->hash_to_index(h) == i,
                "wrong index in symbol table");
    }
  }
}

490
void SymbolTable::dump(outputStream* st) {
491
  the_table()->dump_table(st, "SymbolTable");
492 493
}

D
duke 已提交
494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513

//---------------------------------------------------------------------------
// Non-product code

#ifndef PRODUCT

void SymbolTable::print_histogram() {
  MutexLocker ml(SymbolTable_lock);
  const int results_length = 100;
  int results[results_length];
  int i,j;

  // initialize results to zero
  for (j = 0; j < results_length; j++) {
    results[j] = 0;
  }

  int total = 0;
  int max_symbols = 0;
  int out_of_range = 0;
514 515
  int memory_total = 0;
  int count = 0;
D
duke 已提交
516
  for (i = 0; i < the_table()->table_size(); i++) {
Z
zgu 已提交
517
    HashtableEntry<Symbol*, mtSymbol>* p = the_table()->bucket(i);
D
duke 已提交
518
    for ( ; p != NULL; p = p->next()) {
519
      memory_total += p->literal()->size();
520 521
      count++;
      int counter = p->literal()->utf8_length();
D
duke 已提交
522 523 524 525 526 527 528 529 530 531
      total += counter;
      if (counter < results_length) {
        results[counter]++;
      } else {
        out_of_range++;
      }
      max_symbols = MAX2(max_symbols, counter);
    }
  }
  tty->print_cr("Symbol Table:");
532 533 534
  tty->print_cr("Total number of symbols  %5d", count);
  tty->print_cr("Total size in memory     %5dK",
          (memory_total*HeapWordSize)/1024);
535 536 537
  tty->print_cr("Total counted            %5d", _symbols_counted);
  tty->print_cr("Total removed            %5d", _symbols_removed);
  if (_symbols_counted > 0) {
538
    tty->print_cr("Percent removed          %3.2f",
539
          ((float)_symbols_removed/(float)_symbols_counted)* 100);
540 541
  }
  tty->print_cr("Reference counts         %5d", Symbol::_total_count);
542 543
  tty->print_cr("Symbol arena size        %5d used %5d",
                 arena()->size_in_bytes(), arena()->used());
544
  tty->print_cr("Histogram of symbol length:");
D
duke 已提交
545 546 547 548 549 550 551 552 553 554 555
  tty->print_cr("%8s %5d", "Total  ", total);
  tty->print_cr("%8s %5d", "Maximum", max_symbols);
  tty->print_cr("%8s %3.2f", "Average",
          ((float) total / (float) the_table()->table_size()));
  tty->print_cr("%s", "Histogram:");
  tty->print_cr(" %s %29s", "Length", "Number chains that length");
  for (i = 0; i < results_length; i++) {
    if (results[i] > 0) {
      tty->print_cr("%6d %10d", i, results[i]);
    }
  }
556 557 558 559 560 561 562 563 564 565 566 567 568
  if (Verbose) {
    int line_length = 70;
    tty->print_cr("%s %30s", " Length", "Number chains that length");
    for (i = 0; i < results_length; i++) {
      if (results[i] > 0) {
        tty->print("%4d", i);
        for (j = 0; (j < results[i]) && (j < line_length);  j++) {
          tty->print("%1s", "*");
        }
        if (j == line_length) {
          tty->print("%1s", "+");
        }
        tty->cr();
D
duke 已提交
569 570 571 572 573 574 575
      }
    }
  }
  tty->print_cr(" %s %d: %d\n", "Number chains longer than",
                    results_length, out_of_range);
}

576 577
void SymbolTable::print() {
  for (int i = 0; i < the_table()->table_size(); ++i) {
Z
zgu 已提交
578 579
    HashtableEntry<Symbol*, mtSymbol>** p = the_table()->bucket_addr(i);
    HashtableEntry<Symbol*, mtSymbol>* entry = the_table()->bucket(i);
580 581 582 583 584 585
    if (entry != NULL) {
      while (entry != NULL) {
        tty->print(PTR_FORMAT " ", entry->literal());
        entry->literal()->print();
        tty->print(" %d", entry->literal()->refcount());
        p = entry->next_addr();
Z
zgu 已提交
586
        entry = (HashtableEntry<Symbol*, mtSymbol>*)HashtableEntry<Symbol*, mtSymbol>::make_ptr(*p);
587 588 589 590 591
      }
      tty->cr();
    }
  }
}
D
duke 已提交
592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636
#endif // PRODUCT

// --------------------------------------------------------------------------

#ifdef ASSERT
class StableMemoryChecker : public StackObj {
  enum { _bufsize = wordSize*4 };

  address _region;
  jint    _size;
  u1      _save_buf[_bufsize];

  int sample(u1* save_buf) {
    if (_size <= _bufsize) {
      memcpy(save_buf, _region, _size);
      return _size;
    } else {
      // copy head and tail
      memcpy(&save_buf[0],          _region,                      _bufsize/2);
      memcpy(&save_buf[_bufsize/2], _region + _size - _bufsize/2, _bufsize/2);
      return (_bufsize/2)*2;
    }
  }

 public:
  StableMemoryChecker(const void* region, jint size) {
    _region = (address) region;
    _size   = size;
    sample(_save_buf);
  }

  bool verify() {
    u1 check_buf[sizeof(_save_buf)];
    int check_size = sample(check_buf);
    return (0 == memcmp(_save_buf, check_buf, check_size));
  }

  void set_region(const void* region) { _region = (address) region; }
};
#endif


// --------------------------------------------------------------------------
StringTable* StringTable::_the_table = NULL;

637 638
bool StringTable::_needs_rehashing = false;

639 640
volatile int StringTable::_parallel_claimed_idx = 0;

641
// Pick hashing algorithm
642
unsigned int StringTable::hash_string(const jchar* s, int len) {
643
  return use_alternate_hashcode() ? AltHashing::murmur3_32(seed(), s, len) :
644
                                    java_lang_String::hash_code(s, len);
645 646
}

D
duke 已提交
647 648
oop StringTable::lookup(int index, jchar* name,
                        int len, unsigned int hash) {
649
  int count = 0;
Z
zgu 已提交
650
  for (HashtableEntry<oop, mtSymbol>* l = bucket(index); l != NULL; l = l->next()) {
651
    count++;
D
duke 已提交
652 653 654 655 656 657
    if (l->hash() == hash) {
      if (java_lang_String::equals(l->literal(), name, len)) {
        return l->literal();
      }
    }
  }
658
  // If the bucket size is too deep check if this hash code is insufficient.
Z
zgu 已提交
659
  if (count >= BasicHashtable<mtSymbol>::rehash_count && !needs_rehashing()) {
660 661
    _needs_rehashing = check_rehash_table(count);
  }
D
duke 已提交
662 663 664 665
  return NULL;
}


666
oop StringTable::basic_add(int index_arg, Handle string, jchar* name,
667
                           int len, unsigned int hashValue_arg, TRAPS) {
D
duke 已提交
668 669 670

  assert(java_lang_String::equals(string(), name, len),
         "string must be properly initialized");
671 672
  // Cannot hit a safepoint in this function because the "this" pointer can move.
  No_Safepoint_Verifier nsv;
D
duke 已提交
673

674
  // Check if the symbol table has been rehashed, if so, need to recalculate
675 676 677 678 679 680 681 682 683 684
  // the hash value and index before second lookup.
  unsigned int hashValue;
  int index;
  if (use_alternate_hashcode()) {
    hashValue = hash_string(name, len);
    index = hash_to_index(hashValue);
  } else {
    hashValue = hashValue_arg;
    index = index_arg;
  }
685

D
duke 已提交
686 687 688 689 690 691 692 693 694
  // Since look-up was done lock-free, we need to check if another
  // thread beat us in the race to insert the symbol.

  oop test = lookup(index, name, len, hashValue); // calls lookup(u1*, int)
  if (test != NULL) {
    // Entry already added
    return test;
  }

Z
zgu 已提交
695
  HashtableEntry<oop, mtSymbol>* entry = new_entry(hashValue, string());
D
duke 已提交
696 697 698 699 700
  add_entry(index, entry);
  return string();
}


701
oop StringTable::lookup(Symbol* symbol) {
D
duke 已提交
702 703 704
  ResourceMark rm;
  int length;
  jchar* chars = symbol->as_unicode(length);
705 706 707
  return lookup(chars, length);
}

708 709 710 711 712 713 714 715 716 717 718
// Tell the GC that this string was looked up in the StringTable.
static void ensure_string_alive(oop string) {
  // A lookup in the StringTable could return an object that was previously
  // considered dead. The SATB part of G1 needs to get notified about this
  // potential resurrection, otherwise the marking might not find the object.
#if INCLUDE_ALL_GCS
  if (UseG1GC && string != NULL) {
    G1SATBCardTableModRefBS::enqueue(string);
  }
#endif
}
719 720 721 722

oop StringTable::lookup(jchar* name, int len) {
  unsigned int hash = hash_string(name, len);
  int index = the_table()->hash_to_index(hash);
723 724 725 726 727
  oop string = the_table()->lookup(index, name, len, hash);

  ensure_string_alive(string);

  return string;
D
duke 已提交
728 729 730 731 732
}


oop StringTable::intern(Handle string_or_null, jchar* name,
                        int len, TRAPS) {
733
  unsigned int hashValue = hash_string(name, len);
D
duke 已提交
734
  int index = the_table()->hash_to_index(hashValue);
735
  oop found_string = the_table()->lookup(index, name, len, hashValue);
D
duke 已提交
736 737

  // Found
738 739 740 741
  if (found_string != NULL) {
    ensure_string_alive(found_string);
    return found_string;
  }
742 743

  debug_only(StableMemoryChecker smc(name, len * sizeof(name[0])));
744
  assert(!Universe::heap()->is_in_reserved(name),
745 746 747 748
         "proposed name of symbol must be stable");

  Handle string;
  // try to reuse the string if possible
749
  if (!string_or_null.is_null()) {
750 751
    string = string_or_null;
  } else {
752
    string = java_lang_String::create_from_unicode(name, len, CHECK_NULL);
753 754
  }

P
pliden 已提交
755 756 757 758 759 760 761 762 763
#if INCLUDE_ALL_GCS
  if (G1StringDedup::is_enabled()) {
    // Deduplicate the string before it is interned. Note that we should never
    // deduplicate a string after it has been interned. Doing so will counteract
    // compiler optimizations done on e.g. interned string literals.
    G1StringDedup::deduplicate(string());
  }
#endif

764 765
  // Grab the StringTable_lock before getting the_table() because it could
  // change at safepoint.
766 767 768 769 770 771 772
  oop added_or_found;
  {
    MutexLocker ml(StringTable_lock, THREAD);
    // Otherwise, add to symbol to table
    added_or_found = the_table()->basic_add(index, string, name, len,
                                  hashValue, CHECK_NULL);
  }
D
duke 已提交
773

774 775 776
  ensure_string_alive(added_or_found);

  return added_or_found;
D
duke 已提交
777 778
}

779
oop StringTable::intern(Symbol* symbol, TRAPS) {
D
duke 已提交
780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795
  if (symbol == NULL) return NULL;
  ResourceMark rm(THREAD);
  int length;
  jchar* chars = symbol->as_unicode(length);
  Handle string;
  oop result = intern(string, chars, length, CHECK_NULL);
  return result;
}


oop StringTable::intern(oop string, TRAPS)
{
  if (string == NULL) return NULL;
  ResourceMark rm(THREAD);
  int length;
  Handle h_string (THREAD, string);
796
  jchar* chars = java_lang_String::as_unicode_string(string, length, CHECK_NULL);
D
duke 已提交
797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812
  oop result = intern(h_string, chars, length, CHECK_NULL);
  return result;
}


oop StringTable::intern(const char* utf8_string, TRAPS) {
  if (utf8_string == NULL) return NULL;
  ResourceMark rm(THREAD);
  int length = UTF8::unicode_length(utf8_string);
  jchar* chars = NEW_RESOURCE_ARRAY(jchar, length);
  UTF8::convert_to_unicode(utf8_string, chars, length);
  Handle string;
  oop result = intern(string, chars, length, CHECK_NULL);
  return result;
}

813 814 815 816 817
void StringTable::unlink_or_oops_do(BoolObjectClosure* is_alive, OopClosure* f, int* processed, int* removed) {
  buckets_unlink_or_oops_do(is_alive, f, 0, the_table()->table_size(), processed, removed);
}

void StringTable::possibly_parallel_unlink_or_oops_do(BoolObjectClosure* is_alive, OopClosure* f, int* processed, int* removed) {
818 819 820
  // Readers of the table are unlocked, so we should only be removing
  // entries at a safepoint.
  assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
821
  const int limit = the_table()->table_size();
822

823 824 825 826 827 828
  for (;;) {
    // Grab next set of buckets to scan
    int start_idx = Atomic::add(ClaimChunkSize, &_parallel_claimed_idx) - ClaimChunkSize;
    if (start_idx >= limit) {
      // End of table
      break;
829
    }
830 831 832

    int end_idx = MIN2(limit, start_idx + ClaimChunkSize);
    buckets_unlink_or_oops_do(is_alive, f, start_idx, end_idx, processed, removed);
833 834 835
  }
}

836
void StringTable::buckets_oops_do(OopClosure* f, int start_idx, int end_idx) {
837 838 839
  const int limit = the_table()->table_size();

  assert(0 <= start_idx && start_idx <= limit,
840
         err_msg("start_idx (" INT32_FORMAT ") is out of bounds", start_idx));
841
  assert(0 <= end_idx && end_idx <= limit,
842
         err_msg("end_idx (" INT32_FORMAT ") is out of bounds", end_idx));
843
  assert(start_idx <= end_idx,
844
         err_msg("Index ordering: start_idx=" INT32_FORMAT", end_idx=" INT32_FORMAT,
845 846 847
                 start_idx, end_idx));

  for (int i = start_idx; i < end_idx; i += 1) {
Z
zgu 已提交
848
    HashtableEntry<oop, mtSymbol>* entry = the_table()->bucket(i);
849
    while (entry != NULL) {
850 851
      assert(!entry->is_shared(), "CDS not used for the StringTable");

852 853
      f->do_oop((oop*)entry->literal_addr());

854
      entry = entry->next();
855 856 857 858
    }
  }
}

859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891
void StringTable::buckets_unlink_or_oops_do(BoolObjectClosure* is_alive, OopClosure* f, int start_idx, int end_idx, int* processed, int* removed) {
  const int limit = the_table()->table_size();

  assert(0 <= start_idx && start_idx <= limit,
         err_msg("start_idx (" INT32_FORMAT ") is out of bounds", start_idx));
  assert(0 <= end_idx && end_idx <= limit,
         err_msg("end_idx (" INT32_FORMAT ") is out of bounds", end_idx));
  assert(start_idx <= end_idx,
         err_msg("Index ordering: start_idx=" INT32_FORMAT", end_idx=" INT32_FORMAT,
                 start_idx, end_idx));

  for (int i = start_idx; i < end_idx; ++i) {
    HashtableEntry<oop, mtSymbol>** p = the_table()->bucket_addr(i);
    HashtableEntry<oop, mtSymbol>* entry = the_table()->bucket(i);
    while (entry != NULL) {
      assert(!entry->is_shared(), "CDS not used for the StringTable");

      if (is_alive->do_object_b(entry->literal())) {
        if (f != NULL) {
          f->do_oop((oop*)entry->literal_addr());
        }
        p = entry->next_addr();
      } else {
        *p = entry->next();
        the_table()->free_entry(entry);
        (*removed)++;
      }
      (*processed)++;
      entry = *p;
    }
  }
}

892
void StringTable::oops_do(OopClosure* f) {
893
  buckets_oops_do(f, 0, the_table()->table_size());
894 895 896 897 898 899 900 901 902 903 904 905 906 907
}

void StringTable::possibly_parallel_oops_do(OopClosure* f) {
  const int limit = the_table()->table_size();

  for (;;) {
    // Grab next set of buckets to scan
    int start_idx = Atomic::add(ClaimChunkSize, &_parallel_claimed_idx) - ClaimChunkSize;
    if (start_idx >= limit) {
      // End of table
      break;
    }

    int end_idx = MIN2(limit, start_idx + ClaimChunkSize);
908
    buckets_oops_do(f, start_idx, end_idx);
909 910 911
  }
}

912 913
// This verification is part of Universe::verify() and needs to be quick.
// See StringTable::verify_and_compare() below for exhaustive verification.
D
duke 已提交
914 915
void StringTable::verify() {
  for (int i = 0; i < the_table()->table_size(); ++i) {
Z
zgu 已提交
916
    HashtableEntry<oop, mtSymbol>* p = the_table()->bucket(i);
D
duke 已提交
917 918 919
    for ( ; p != NULL; p = p->next()) {
      oop s = p->literal();
      guarantee(s != NULL, "interned string is NULL");
920
      unsigned int h = java_lang_String::hash_string(s);
D
duke 已提交
921 922 923 924 925 926
      guarantee(p->hash() == h, "broken hash in string table entry");
      guarantee(the_table()->hash_to_index(h) == i,
                "wrong index in string table");
    }
  }
}
927 928

void StringTable::dump(outputStream* st) {
929
  the_table()->dump_table(st, "StringTable");
930 931
}

932 933 934 935 936 937 938 939 940 941 942 943 944
StringTable::VerifyRetTypes StringTable::compare_entries(
                                      int bkt1, int e_cnt1,
                                      HashtableEntry<oop, mtSymbol>* e_ptr1,
                                      int bkt2, int e_cnt2,
                                      HashtableEntry<oop, mtSymbol>* e_ptr2) {
  // These entries are sanity checked by verify_and_compare_entries()
  // before this function is called.
  oop str1 = e_ptr1->literal();
  oop str2 = e_ptr2->literal();

  if (str1 == str2) {
    tty->print_cr("ERROR: identical oop values (0x" PTR_FORMAT ") "
                  "in entry @ bucket[%d][%d] and entry @ bucket[%d][%d]",
945
                  (void *)str1, bkt1, e_cnt1, bkt2, e_cnt2);
946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087
    return _verify_fail_continue;
  }

  if (java_lang_String::equals(str1, str2)) {
    tty->print_cr("ERROR: identical String values in entry @ "
                  "bucket[%d][%d] and entry @ bucket[%d][%d]",
                  bkt1, e_cnt1, bkt2, e_cnt2);
    return _verify_fail_continue;
  }

  return _verify_pass;
}

StringTable::VerifyRetTypes StringTable::verify_entry(int bkt, int e_cnt,
                                      HashtableEntry<oop, mtSymbol>* e_ptr,
                                      StringTable::VerifyMesgModes mesg_mode) {

  VerifyRetTypes ret = _verify_pass;  // be optimistic

  oop str = e_ptr->literal();
  if (str == NULL) {
    if (mesg_mode == _verify_with_mesgs) {
      tty->print_cr("ERROR: NULL oop value in entry @ bucket[%d][%d]", bkt,
                    e_cnt);
    }
    // NULL oop means no more verifications are possible
    return _verify_fail_done;
  }

  if (str->klass() != SystemDictionary::String_klass()) {
    if (mesg_mode == _verify_with_mesgs) {
      tty->print_cr("ERROR: oop is not a String in entry @ bucket[%d][%d]",
                    bkt, e_cnt);
    }
    // not a String means no more verifications are possible
    return _verify_fail_done;
  }

  unsigned int h = java_lang_String::hash_string(str);
  if (e_ptr->hash() != h) {
    if (mesg_mode == _verify_with_mesgs) {
      tty->print_cr("ERROR: broken hash value in entry @ bucket[%d][%d], "
                    "bkt_hash=%d, str_hash=%d", bkt, e_cnt, e_ptr->hash(), h);
    }
    ret = _verify_fail_continue;
  }

  if (the_table()->hash_to_index(h) != bkt) {
    if (mesg_mode == _verify_with_mesgs) {
      tty->print_cr("ERROR: wrong index value for entry @ bucket[%d][%d], "
                    "str_hash=%d, hash_to_index=%d", bkt, e_cnt, h,
                    the_table()->hash_to_index(h));
    }
    ret = _verify_fail_continue;
  }

  return ret;
}

// See StringTable::verify() above for the quick verification that is
// part of Universe::verify(). This verification is exhaustive and
// reports on every issue that is found. StringTable::verify() only
// reports on the first issue that is found.
//
// StringTable::verify_entry() checks:
// - oop value != NULL (same as verify())
// - oop value is a String
// - hash(String) == hash in entry (same as verify())
// - index for hash == index of entry (same as verify())
//
// StringTable::compare_entries() checks:
// - oops are unique across all entries
// - String values are unique across all entries
//
int StringTable::verify_and_compare_entries() {
  assert(StringTable_lock->is_locked(), "sanity check");

  int  fail_cnt = 0;

  // first, verify all the entries individually:
  for (int bkt = 0; bkt < the_table()->table_size(); bkt++) {
    HashtableEntry<oop, mtSymbol>* e_ptr = the_table()->bucket(bkt);
    for (int e_cnt = 0; e_ptr != NULL; e_ptr = e_ptr->next(), e_cnt++) {
      VerifyRetTypes ret = verify_entry(bkt, e_cnt, e_ptr, _verify_with_mesgs);
      if (ret != _verify_pass) {
        fail_cnt++;
      }
    }
  }

  // Optimization: if the above check did not find any failures, then
  // the comparison loop below does not need to call verify_entry()
  // before calling compare_entries(). If there were failures, then we
  // have to call verify_entry() to see if the entry can be passed to
  // compare_entries() safely. When we call verify_entry() in the loop
  // below, we do so quietly to void duplicate messages and we don't
  // increment fail_cnt because the failures have already been counted.
  bool need_entry_verify = (fail_cnt != 0);

  // second, verify all entries relative to each other:
  for (int bkt1 = 0; bkt1 < the_table()->table_size(); bkt1++) {
    HashtableEntry<oop, mtSymbol>* e_ptr1 = the_table()->bucket(bkt1);
    for (int e_cnt1 = 0; e_ptr1 != NULL; e_ptr1 = e_ptr1->next(), e_cnt1++) {
      if (need_entry_verify) {
        VerifyRetTypes ret = verify_entry(bkt1, e_cnt1, e_ptr1,
                                          _verify_quietly);
        if (ret == _verify_fail_done) {
          // cannot use the current entry to compare against other entries
          continue;
        }
      }

      for (int bkt2 = bkt1; bkt2 < the_table()->table_size(); bkt2++) {
        HashtableEntry<oop, mtSymbol>* e_ptr2 = the_table()->bucket(bkt2);
        int e_cnt2;
        for (e_cnt2 = 0; e_ptr2 != NULL; e_ptr2 = e_ptr2->next(), e_cnt2++) {
          if (bkt1 == bkt2 && e_cnt2 <= e_cnt1) {
            // skip the entries up to and including the one that
            // we're comparing against
            continue;
          }

          if (need_entry_verify) {
            VerifyRetTypes ret = verify_entry(bkt2, e_cnt2, e_ptr2,
                                              _verify_quietly);
            if (ret == _verify_fail_done) {
              // cannot compare against this entry
              continue;
            }
          }

          // compare two entries, report and count any failures:
          if (compare_entries(bkt1, e_cnt1, e_ptr1, bkt2, e_cnt2, e_ptr2)
              != _verify_pass) {
            fail_cnt++;
          }
        }
      }
    }
  }
  return fail_cnt;
}
1088 1089 1090 1091 1092

// Create a new table and using alternate hash code, populate the new table
// with the existing strings.   Set flag to use the alternate hash code afterwards.
void StringTable::rehash_table() {
  assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
1093 1094
  // This should never happen with -Xshare:dump but it might in testing mode.
  if (DumpSharedSpaces) return;
1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106
  StringTable* new_table = new StringTable();

  // Rehash the table
  the_table()->move_to(new_table);

  // Delete the table and buckets (entries are reused in new table).
  delete _the_table;
  // Don't check if we need rehashing until the table gets unbalanced again.
  // Then rehash with a new global seed.
  _needs_rehashing = false;
  _the_table = new_table;
}