symbolTable.cpp 29.4 KB
Newer Older
D
duke 已提交
1
/*
2
 * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
D
duke 已提交
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
19 20 21
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
D
duke 已提交
22 23 24
 *
 */

25
#include "precompiled.hpp"
26
#include "classfile/altHashing.hpp"
27 28 29 30
#include "classfile/javaClasses.hpp"
#include "classfile/symbolTable.hpp"
#include "classfile/systemDictionary.hpp"
#include "gc_interface/collectedHeap.inline.hpp"
31
#include "memory/allocation.inline.hpp"
32 33 34 35 36 37
#include "memory/filemap.hpp"
#include "memory/gcLocker.inline.hpp"
#include "oops/oop.inline.hpp"
#include "oops/oop.inline2.hpp"
#include "runtime/mutexLocker.hpp"
#include "utilities/hashtable.inline.hpp"
38
#include "utilities/numberSeq.hpp"
D
duke 已提交
39 40 41 42

// --------------------------------------------------------------------------

SymbolTable* SymbolTable::_the_table = NULL;
43 44
// Static arena for symbols that are not deallocated
Arena* SymbolTable::_arena = NULL;
45 46
bool SymbolTable::_needs_rehashing = false;
jint SymbolTable::_seed = 0;
D
duke 已提交
47

48
Symbol* SymbolTable::allocate_symbol(const u1* name, int len, bool c_heap, TRAPS) {
49 50
  assert (len <= Symbol::max_length(), "should be checked by caller");

51 52 53 54 55 56 57 58 59
  Symbol* sym;
  // Allocate symbols in the C heap when dumping shared spaces in case there
  // are temporary symbols we can remove.
  if (c_heap || DumpSharedSpaces) {
    // refcount starts as 1
    sym = new (len, THREAD) Symbol(name, len, 1);
  } else {
    sym = new (len, arena(), THREAD) Symbol(name, len, -1);
  }
60 61 62 63
  assert(sym != NULL, "new should call vm_exit_out_of_memory if C_HEAP is exhausted");
  return sym;
}

64 65 66 67 68 69
void SymbolTable::initialize_symbols(int arena_alloc_size) {
  // Initialize the arena for global symbols, size passed in depends on CDS.
  if (arena_alloc_size == 0) {
    _arena = new Arena();
  } else {
    _arena = new Arena(arena_alloc_size);
70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88
  }
}

// Call function for all symbols in the symbol table.
void SymbolTable::symbols_do(SymbolClosure *cl) {
  const int n = the_table()->table_size();
  for (int i = 0; i < n; i++) {
    for (HashtableEntry<Symbol*>* p = the_table()->bucket(i);
         p != NULL;
         p = p->next()) {
      cl->do_symbol(p->literal_addr());
    }
  }
}

int SymbolTable::symbols_removed = 0;
int SymbolTable::symbols_counted = 0;

// Remove unreferenced symbols from the symbol table
89
// This is done late during GC.
90 91 92
void SymbolTable::unlink() {
  int removed = 0;
  int total = 0;
93
  size_t memory_total = 0;
94
  for (int i = 0; i < the_table()->table_size(); ++i) {
95 96 97 98 99 100 101 102
    HashtableEntry<Symbol*>** p = the_table()->bucket_addr(i);
    HashtableEntry<Symbol*>* entry = the_table()->bucket(i);
    while (entry != NULL) {
      // Shared entries are normally at the end of the bucket and if we run into
      // a shared entry, then there is nothing more to remove. However, if we
      // have rehashed the table, then the shared entries are no longer at the
      // end of the bucket.
      if (entry->is_shared() && !use_alternate_hashcode()) {
103 104 105 106 107 108 109 110
        break;
      }
      Symbol* s = entry->literal();
      memory_total += s->object_size();
      total++;
      assert(s != NULL, "just checking");
      // If reference count is zero, remove.
      if (s->refcount() == 0) {
111
        assert(!entry->is_shared(), "shared entries should be kept live");
112 113 114 115 116 117 118
        delete s;
        removed++;
        *p = entry->next();
        the_table()->free_entry(entry);
      } else {
        p = entry->next_addr();
      }
119 120
      // get next entry
      entry = (HashtableEntry<Symbol*>*)HashtableEntry<Symbol*>::make_ptr(*p);
121 122 123 124
    }
  }
  symbols_removed += removed;
  symbols_counted += total;
125 126 127 128
  // Exclude printing for normal PrintGCDetails because people parse
  // this output.
  if (PrintGCDetails && Verbose && WizardMode) {
    gclog_or_tty->print(" [Symbols=%d size=" SIZE_FORMAT "K] ", total,
129 130 131 132
                        (memory_total*HeapWordSize)/1024);
  }
}

133 134 135 136 137 138 139 140 141 142
unsigned int SymbolTable::new_hash(Symbol* sym) {
  ResourceMark rm;
  // Use alternate hashing algorithm on this symbol.
  return AltHashing::murmur3_32(seed(), (const jbyte*)sym->as_C_string(), sym->utf8_length());
}

// Create a new table and using alternate hash code, populate the new table
// with the existing strings.   Set flag to use the alternate hash code afterwards.
void SymbolTable::rehash_table() {
  assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
143 144
  // This should never happen with -Xshare:dump but it might in testing mode.
  if (DumpSharedSpaces) return;
145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
  // Create a new symbol table
  SymbolTable* new_table = new SymbolTable();

  // Initialize the global seed for hashing.
  _seed = AltHashing::compute_seed();
  assert(seed() != 0, "shouldn't be zero");

  the_table()->move_to(new_table);

  // Delete the table and buckets (entries are reused in new table).
  delete _the_table;
  // Don't check if we need rehashing until the table gets unbalanced again.
  // Then rehash with a new global seed.
  _needs_rehashing = false;
  _the_table = new_table;
}
161

D
duke 已提交
162 163
// Lookup a symbol in a bucket.

164
Symbol* SymbolTable::lookup(int index, const char* name,
D
duke 已提交
165
                              int len, unsigned int hash) {
166
  int count = 0;
167
  for (HashtableEntry<Symbol*>* e = bucket(index); e != NULL; e = e->next()) {
168
    count++;  // count all entries in this bucket, not just ones with same hash
D
duke 已提交
169
    if (e->hash() == hash) {
170
      Symbol* sym = e->literal();
D
duke 已提交
171
      if (sym->equals(name, len)) {
172 173
        // something is referencing this symbol now.
        sym->increment_refcount();
D
duke 已提交
174 175 176 177
        return sym;
      }
    }
  }
178 179 180 181
  // If the bucket size is too deep check if this hash code is insufficient.
  if (count >= BasicHashtable::rehash_count && !needs_rehashing()) {
    _needs_rehashing = check_rehash_table(count);
  }
D
duke 已提交
182 183 184
  return NULL;
}

185 186
// Pick hashing algorithm.
unsigned int SymbolTable::hash_symbol(const char* s, int len) {
187 188
  return use_alternate_hashcode() ?
           AltHashing::murmur3_32(seed(), (const jbyte*)s, len) :
189
           java_lang_String::to_hash(s, len);
190 191
}

D
duke 已提交
192 193 194 195 196 197 198 199

// We take care not to be blocking while holding the
// SymbolTable_lock. Otherwise, the system might deadlock, since the
// symboltable is used during compilation (VM_thread) The lock free
// synchronization is simplified by the fact that we do not delete
// entries in the symbol table during normal execution (only during
// safepoints).

200
Symbol* SymbolTable::lookup(const char* name, int len, TRAPS) {
D
duke 已提交
201 202 203
  unsigned int hashValue = hash_symbol(name, len);
  int index = the_table()->hash_to_index(hashValue);

204
  Symbol* s = the_table()->lookup(index, name, len, hashValue);
D
duke 已提交
205 206 207 208

  // Found
  if (s != NULL) return s;

209 210 211
  // Grab SymbolTable_lock first.
  MutexLocker ml(SymbolTable_lock, THREAD);

D
duke 已提交
212
  // Otherwise, add to symbol to table
213
  return the_table()->basic_add(index, (u1*)name, len, hashValue, true, CHECK_NULL);
D
duke 已提交
214 215
}

216
Symbol* SymbolTable::lookup(const Symbol* sym, int begin, int end, TRAPS) {
D
duke 已提交
217 218 219 220 221 222 223 224 225 226 227
  char* buffer;
  int index, len;
  unsigned int hashValue;
  char* name;
  {
    debug_only(No_Safepoint_Verifier nsv;)

    name = (char*)sym->base() + begin;
    len = end - begin;
    hashValue = hash_symbol(name, len);
    index = the_table()->hash_to_index(hashValue);
228
    Symbol* s = the_table()->lookup(index, name, len, hashValue);
D
duke 已提交
229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248

    // Found
    if (s != NULL) return s;
  }

  // Otherwise, add to symbol to table. Copy to a C string first.
  char stack_buf[128];
  ResourceMark rm(THREAD);
  if (len <= 128) {
    buffer = stack_buf;
  } else {
    buffer = NEW_RESOURCE_ARRAY_IN_THREAD(THREAD, char, len);
  }
  for (int i=0; i<len; i++) {
    buffer[i] = name[i];
  }
  // Make sure there is no safepoint in the code above since name can't move.
  // We can't include the code in No_Safepoint_Verifier because of the
  // ResourceMark.

249 250 251
  // Grab SymbolTable_lock first.
  MutexLocker ml(SymbolTable_lock, THREAD);

252
  return the_table()->basic_add(index, (u1*)buffer, len, hashValue, true, CHECK_NULL);
D
duke 已提交
253 254
}

255
Symbol* SymbolTable::lookup_only(const char* name, int len,
D
duke 已提交
256 257 258 259
                                   unsigned int& hash) {
  hash = hash_symbol(name, len);
  int index = the_table()->hash_to_index(hash);

260 261
  Symbol* s = the_table()->lookup(index, name, len, hash);
  return s;
D
duke 已提交
262 263
}

264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281
// Look up the address of the literal in the SymbolTable for this Symbol*
// Do not create any new symbols
// Do not increment the reference count to keep this alive
Symbol** SymbolTable::lookup_symbol_addr(Symbol* sym){
  unsigned int hash = hash_symbol((char*)sym->bytes(), sym->utf8_length());
  int index = the_table()->hash_to_index(hash);

  for (HashtableEntry<Symbol*>* e = the_table()->bucket(index); e != NULL; e = e->next()) {
    if (e->hash() == hash) {
      Symbol* literal_sym = e->literal();
      if (sym == literal_sym) {
        return e->literal_addr();
      }
    }
  }
  return NULL;
}

282 283
// Suggestion: Push unicode-based lookup all the way into the hashing
// and probing logic, so there is no need for convert_to_utf8 until
284 285
// an actual new Symbol* is created.
Symbol* SymbolTable::lookup_unicode(const jchar* name, int utf16_length, TRAPS) {
286 287 288 289 290 291 292 293 294 295 296 297 298 299
  int utf8_length = UNICODE::utf8_length((jchar*) name, utf16_length);
  char stack_buf[128];
  if (utf8_length < (int) sizeof(stack_buf)) {
    char* chars = stack_buf;
    UNICODE::convert_to_utf8(name, utf16_length, chars);
    return lookup(chars, utf8_length, THREAD);
  } else {
    ResourceMark rm(THREAD);
    char* chars = NEW_RESOURCE_ARRAY(char, utf8_length + 1);;
    UNICODE::convert_to_utf8(name, utf16_length, chars);
    return lookup(chars, utf8_length, THREAD);
  }
}

300
Symbol* SymbolTable::lookup_only_unicode(const jchar* name, int utf16_length,
301 302 303 304 305 306 307 308 309 310 311 312 313 314 315
                                           unsigned int& hash) {
  int utf8_length = UNICODE::utf8_length((jchar*) name, utf16_length);
  char stack_buf[128];
  if (utf8_length < (int) sizeof(stack_buf)) {
    char* chars = stack_buf;
    UNICODE::convert_to_utf8(name, utf16_length, chars);
    return lookup_only(chars, utf8_length, hash);
  } else {
    ResourceMark rm;
    char* chars = NEW_RESOURCE_ARRAY(char, utf8_length + 1);;
    UNICODE::convert_to_utf8(name, utf16_length, chars);
    return lookup_only(chars, utf8_length, hash);
  }
}

316 317
void SymbolTable::add(Handle class_loader, constantPoolHandle cp,
                      int names_count,
D
duke 已提交
318 319
                      const char** names, int* lengths, int* cp_indices,
                      unsigned int* hashValues, TRAPS) {
320 321 322
  // Grab SymbolTable_lock first.
  MutexLocker ml(SymbolTable_lock, THREAD);

D
duke 已提交
323
  SymbolTable* table = the_table();
324
  bool added = table->basic_add(class_loader, cp, names_count, names, lengths,
D
duke 已提交
325 326 327 328 329
                                cp_indices, hashValues, CHECK);
  if (!added) {
    // do it the hard way
    for (int i=0; i<names_count; i++) {
      int index = table->hash_to_index(hashValues[i]);
330 331
      bool c_heap = class_loader() != NULL;
      Symbol* sym = table->basic_add(index, (u1*)names[i], lengths[i], hashValues[i], c_heap, CHECK);
D
duke 已提交
332 333 334 335 336
      cp->symbol_at_put(cp_indices[i], sym);
    }
  }
}

337 338 339 340 341 342
Symbol* SymbolTable::new_permanent_symbol(const char* name, TRAPS) {
  unsigned int hash;
  Symbol* result = SymbolTable::lookup_only((char*)name, (int)strlen(name), hash);
  if (result != NULL) {
    return result;
  }
343 344 345
  // Grab SymbolTable_lock first.
  MutexLocker ml(SymbolTable_lock, THREAD);

346 347 348 349 350
  SymbolTable* table = the_table();
  int index = table->hash_to_index(hash);
  return table->basic_add(index, (u1*)name, (int)strlen(name), hash, false, THREAD);
}

351
Symbol* SymbolTable::basic_add(int index_arg, u1 *name, int len,
352
                               unsigned int hashValue_arg, bool c_heap, TRAPS) {
D
duke 已提交
353 354 355
  assert(!Universe::heap()->is_in_reserved(name) || GC_locker::is_active(),
         "proposed name of symbol must be stable");

356 357 358 359 360 361 362 363
  // Don't allow symbols to be created which cannot fit in a Symbol*.
  if (len > Symbol::max_length()) {
    THROW_MSG_0(vmSymbols::java_lang_InternalError(),
                "name is too long to represent");
  }

  // Cannot hit a safepoint in this function because the "this" pointer can move.
  No_Safepoint_Verifier nsv;
D
duke 已提交
364

365
  // Check if the symbol table has been rehashed, if so, need to recalculate
366 367 368 369 370 371 372 373 374 375
  // the hash value and index.
  unsigned int hashValue;
  int index;
  if (use_alternate_hashcode()) {
    hashValue = hash_symbol((const char*)name, len);
    index = hash_to_index(hashValue);
  } else {
    hashValue = hashValue_arg;
    index = index_arg;
  }
376

D
duke 已提交
377 378
  // Since look-up was done lock-free, we need to check if another
  // thread beat us in the race to insert the symbol.
379
  Symbol* test = lookup(index, (char*)name, len, hashValue);
D
duke 已提交
380
  if (test != NULL) {
381
    // A race occurred and another thread introduced the symbol.
382
    assert(test->refcount() != 0, "lookup should have incremented the count");
D
duke 已提交
383 384 385
    return test;
  }

386 387 388 389
  // Create a new symbol.
  Symbol* sym = allocate_symbol(name, len, c_heap, CHECK_NULL);
  assert(sym->equals((char*)name, len), "symbol must be properly initialized");

390
  HashtableEntry<Symbol*>* entry = new_entry(hashValue, sym);
D
duke 已提交
391
  add_entry(index, entry);
392
  return sym;
D
duke 已提交
393 394
}

395 396 397 398
// This version of basic_add adds symbols in batch from the constant pool
// parsing.
bool SymbolTable::basic_add(Handle class_loader, constantPoolHandle cp,
                            int names_count,
D
duke 已提交
399 400 401
                            const char** names, int* lengths,
                            int* cp_indices, unsigned int* hashValues,
                            TRAPS) {
402 403 404 405 406 407 408

  // Check symbol names are not too long.  If any are too long, don't add any.
  for (int i = 0; i< names_count; i++) {
    if (lengths[i] > Symbol::max_length()) {
      THROW_MSG_0(vmSymbols::java_lang_InternalError(),
                  "name is too long to represent");
    }
D
duke 已提交
409 410
  }

411 412
  // Cannot hit a safepoint in this function because the "this" pointer can move.
  No_Safepoint_Verifier nsv;
D
duke 已提交
413

414
  for (int i=0; i<names_count; i++) {
415 416
    // Check if the symbol table has been rehashed, if so, need to recalculate
    // the hash value.
417 418 419 420 421 422
    unsigned int hashValue;
    if (use_alternate_hashcode()) {
      hashValue = hash_symbol(names[i], lengths[i]);
    } else {
      hashValue = hashValues[i];
    }
D
duke 已提交
423 424
    // Since look-up was done lock-free, we need to check if another
    // thread beat us in the race to insert the symbol.
425 426
    int index = hash_to_index(hashValue);
    Symbol* test = lookup(index, names[i], lengths[i], hashValue);
D
duke 已提交
427
    if (test != NULL) {
T
twisti 已提交
428
      // A race occurred and another thread introduced the symbol, this one
D
duke 已提交
429 430
      // will be dropped and collected. Use test instead.
      cp->symbol_at_put(cp_indices[i], test);
431
      assert(test->refcount() != 0, "lookup should have incremented the count");
D
duke 已提交
432
    } else {
433 434 435 436 437
      // Create a new symbol.  The null class loader is never unloaded so these
      // are allocated specially in a permanent arena.
      bool c_heap = class_loader() != NULL;
      Symbol* sym = allocate_symbol((const u1*)names[i], lengths[i], c_heap, CHECK_(false));
      assert(sym->equals(names[i], lengths[i]), "symbol must be properly initialized");  // why wouldn't it be???
438
      HashtableEntry<Symbol*>* entry = new_entry(hashValue, sym);
D
duke 已提交
439 440 441 442 443 444 445 446 447 448
      add_entry(index, entry);
      cp->symbol_at_put(cp_indices[i], sym);
    }
  }
  return true;
}


void SymbolTable::verify() {
  for (int i = 0; i < the_table()->table_size(); ++i) {
449
    HashtableEntry<Symbol*>* p = the_table()->bucket(i);
D
duke 已提交
450
    for ( ; p != NULL; p = p->next()) {
451
      Symbol* s = (Symbol*)(p->literal());
D
duke 已提交
452 453 454 455 456 457 458 459 460
      guarantee(s != NULL, "symbol is NULL");
      unsigned int h = hash_symbol((char*)s->bytes(), s->utf8_length());
      guarantee(p->hash() == h, "broken hash in symbol table entry");
      guarantee(the_table()->hash_to_index(h) == i,
                "wrong index in symbol table");
    }
  }
}

461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478
void SymbolTable::dump(outputStream* st) {
  NumberSeq summary;
  for (int i = 0; i < the_table()->table_size(); ++i) {
    int count = 0;
    for (HashtableEntry<Symbol*>* e = the_table()->bucket(i);
       e != NULL; e = e->next()) {
      count++;
    }
    summary.add((double)count);
  }
  st->print_cr("SymbolTable statistics:");
  st->print_cr("Number of buckets       : %7d", summary.num());
  st->print_cr("Average bucket size     : %7.0f", summary.avg());
  st->print_cr("Variance of bucket size : %7.0f", summary.variance());
  st->print_cr("Std. dev. of bucket size: %7.0f", summary.sd());
  st->print_cr("Maximum bucket size     : %7.0f", summary.maximum());
}

D
duke 已提交
479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498

//---------------------------------------------------------------------------
// Non-product code

#ifndef PRODUCT

void SymbolTable::print_histogram() {
  MutexLocker ml(SymbolTable_lock);
  const int results_length = 100;
  int results[results_length];
  int i,j;

  // initialize results to zero
  for (j = 0; j < results_length; j++) {
    results[j] = 0;
  }

  int total = 0;
  int max_symbols = 0;
  int out_of_range = 0;
499 500
  int memory_total = 0;
  int count = 0;
D
duke 已提交
501
  for (i = 0; i < the_table()->table_size(); i++) {
502
    HashtableEntry<Symbol*>* p = the_table()->bucket(i);
D
duke 已提交
503
    for ( ; p != NULL; p = p->next()) {
504 505 506
      memory_total += p->literal()->object_size();
      count++;
      int counter = p->literal()->utf8_length();
D
duke 已提交
507 508 509 510 511 512 513 514 515 516
      total += counter;
      if (counter < results_length) {
        results[counter]++;
      } else {
        out_of_range++;
      }
      max_symbols = MAX2(max_symbols, counter);
    }
  }
  tty->print_cr("Symbol Table:");
517 518 519 520 521 522 523 524 525 526
  tty->print_cr("Total number of symbols  %5d", count);
  tty->print_cr("Total size in memory     %5dK",
          (memory_total*HeapWordSize)/1024);
  tty->print_cr("Total counted            %5d", symbols_counted);
  tty->print_cr("Total removed            %5d", symbols_removed);
  if (symbols_counted > 0) {
    tty->print_cr("Percent removed          %3.2f",
          ((float)symbols_removed/(float)symbols_counted)* 100);
  }
  tty->print_cr("Reference counts         %5d", Symbol::_total_count);
527 528
  tty->print_cr("Symbol arena size        %5d used %5d",
                 arena()->size_in_bytes(), arena()->used());
529
  tty->print_cr("Histogram of symbol length:");
D
duke 已提交
530 531 532 533 534 535 536 537 538 539 540
  tty->print_cr("%8s %5d", "Total  ", total);
  tty->print_cr("%8s %5d", "Maximum", max_symbols);
  tty->print_cr("%8s %3.2f", "Average",
          ((float) total / (float) the_table()->table_size()));
  tty->print_cr("%s", "Histogram:");
  tty->print_cr(" %s %29s", "Length", "Number chains that length");
  for (i = 0; i < results_length; i++) {
    if (results[i] > 0) {
      tty->print_cr("%6d %10d", i, results[i]);
    }
  }
541 542 543 544 545 546 547 548 549 550 551 552 553
  if (Verbose) {
    int line_length = 70;
    tty->print_cr("%s %30s", " Length", "Number chains that length");
    for (i = 0; i < results_length; i++) {
      if (results[i] > 0) {
        tty->print("%4d", i);
        for (j = 0; (j < results[i]) && (j < line_length);  j++) {
          tty->print("%1s", "*");
        }
        if (j == line_length) {
          tty->print("%1s", "+");
        }
        tty->cr();
D
duke 已提交
554 555 556 557 558 559 560
      }
    }
  }
  tty->print_cr(" %s %d: %d\n", "Number chains longer than",
                    results_length, out_of_range);
}

561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576
void SymbolTable::print() {
  for (int i = 0; i < the_table()->table_size(); ++i) {
    HashtableEntry<Symbol*>** p = the_table()->bucket_addr(i);
    HashtableEntry<Symbol*>* entry = the_table()->bucket(i);
    if (entry != NULL) {
      while (entry != NULL) {
        tty->print(PTR_FORMAT " ", entry->literal());
        entry->literal()->print();
        tty->print(" %d", entry->literal()->refcount());
        p = entry->next_addr();
        entry = (HashtableEntry<Symbol*>*)HashtableEntry<Symbol*>::make_ptr(*p);
      }
      tty->cr();
    }
  }
}
D
duke 已提交
577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621
#endif // PRODUCT

// --------------------------------------------------------------------------

#ifdef ASSERT
class StableMemoryChecker : public StackObj {
  enum { _bufsize = wordSize*4 };

  address _region;
  jint    _size;
  u1      _save_buf[_bufsize];

  int sample(u1* save_buf) {
    if (_size <= _bufsize) {
      memcpy(save_buf, _region, _size);
      return _size;
    } else {
      // copy head and tail
      memcpy(&save_buf[0],          _region,                      _bufsize/2);
      memcpy(&save_buf[_bufsize/2], _region + _size - _bufsize/2, _bufsize/2);
      return (_bufsize/2)*2;
    }
  }

 public:
  StableMemoryChecker(const void* region, jint size) {
    _region = (address) region;
    _size   = size;
    sample(_save_buf);
  }

  bool verify() {
    u1 check_buf[sizeof(_save_buf)];
    int check_size = sample(check_buf);
    return (0 == memcmp(_save_buf, check_buf, check_size));
  }

  void set_region(const void* region) { _region = (address) region; }
};
#endif


// --------------------------------------------------------------------------
StringTable* StringTable::_the_table = NULL;

622 623 624 625
bool StringTable::_needs_rehashing = false;
jint StringTable::_seed = 0;

// Pick hashing algorithm
626
unsigned int StringTable::hash_string(const jchar* s, int len) {
627
  return use_alternate_hashcode() ? AltHashing::murmur3_32(seed(), s, len) :
628
                                    java_lang_String::to_hash(s, len);
629 630
}

D
duke 已提交
631 632
oop StringTable::lookup(int index, jchar* name,
                        int len, unsigned int hash) {
633
  int count = 0;
634
  for (HashtableEntry<oop>* l = bucket(index); l != NULL; l = l->next()) {
635
    count++;
D
duke 已提交
636 637 638 639 640 641
    if (l->hash() == hash) {
      if (java_lang_String::equals(l->literal(), name, len)) {
        return l->literal();
      }
    }
  }
642 643 644 645
  // If the bucket size is too deep check if this hash code is insufficient.
  if (count >= BasicHashtable::rehash_count && !needs_rehashing()) {
    _needs_rehashing = check_rehash_table(count);
  }
D
duke 已提交
646 647 648 649
  return NULL;
}


650
oop StringTable::basic_add(int index_arg, Handle string, jchar* name,
651
                           int len, unsigned int hashValue_arg, TRAPS) {
D
duke 已提交
652 653 654

  assert(java_lang_String::equals(string(), name, len),
         "string must be properly initialized");
655 656
  // Cannot hit a safepoint in this function because the "this" pointer can move.
  No_Safepoint_Verifier nsv;
D
duke 已提交
657

658
  // Check if the symbol table has been rehashed, if so, need to recalculate
659 660 661 662 663 664 665 666 667 668
  // the hash value and index before second lookup.
  unsigned int hashValue;
  int index;
  if (use_alternate_hashcode()) {
    hashValue = hash_string(name, len);
    index = hash_to_index(hashValue);
  } else {
    hashValue = hashValue_arg;
    index = index_arg;
  }
669

D
duke 已提交
670 671 672 673 674 675 676 677 678
  // Since look-up was done lock-free, we need to check if another
  // thread beat us in the race to insert the symbol.

  oop test = lookup(index, name, len, hashValue); // calls lookup(u1*, int)
  if (test != NULL) {
    // Entry already added
    return test;
  }

679
  HashtableEntry<oop>* entry = new_entry(hashValue, string());
D
duke 已提交
680 681 682 683 684
  add_entry(index, entry);
  return string();
}


685
oop StringTable::lookup(Symbol* symbol) {
D
duke 已提交
686 687 688
  ResourceMark rm;
  int length;
  jchar* chars = symbol->as_unicode(length);
689
  unsigned int hashValue = hash_string(chars, length);
D
duke 已提交
690 691 692 693 694 695 696
  int index = the_table()->hash_to_index(hashValue);
  return the_table()->lookup(index, chars, length, hashValue);
}


oop StringTable::intern(Handle string_or_null, jchar* name,
                        int len, TRAPS) {
697
  unsigned int hashValue = hash_string(name, len);
D
duke 已提交
698
  int index = the_table()->hash_to_index(hashValue);
699
  oop found_string = the_table()->lookup(index, name, len, hashValue);
D
duke 已提交
700 701

  // Found
702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718
  if (found_string != NULL) return found_string;

  debug_only(StableMemoryChecker smc(name, len * sizeof(name[0])));
  assert(!Universe::heap()->is_in_reserved(name) || GC_locker::is_active(),
         "proposed name of symbol must be stable");

  Handle string;
  // try to reuse the string if possible
  if (!string_or_null.is_null() && (!JavaObjectsInPerm || string_or_null()->is_perm())) {
    string = string_or_null;
  } else {
    string = java_lang_String::create_tenured_from_unicode(name, len, CHECK_NULL);
  }

  // Grab the StringTable_lock before getting the_table() because it could
  // change at safepoint.
  MutexLocker ml(StringTable_lock, THREAD);
D
duke 已提交
719 720

  // Otherwise, add to symbol to table
721
  return the_table()->basic_add(index, string, name, len,
D
duke 已提交
722 723 724
                                hashValue, CHECK_NULL);
}

725
oop StringTable::intern(Symbol* symbol, TRAPS) {
D
duke 已提交
726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758
  if (symbol == NULL) return NULL;
  ResourceMark rm(THREAD);
  int length;
  jchar* chars = symbol->as_unicode(length);
  Handle string;
  oop result = intern(string, chars, length, CHECK_NULL);
  return result;
}


oop StringTable::intern(oop string, TRAPS)
{
  if (string == NULL) return NULL;
  ResourceMark rm(THREAD);
  int length;
  Handle h_string (THREAD, string);
  jchar* chars = java_lang_String::as_unicode_string(string, length);
  oop result = intern(h_string, chars, length, CHECK_NULL);
  return result;
}


oop StringTable::intern(const char* utf8_string, TRAPS) {
  if (utf8_string == NULL) return NULL;
  ResourceMark rm(THREAD);
  int length = UTF8::unicode_length(utf8_string);
  jchar* chars = NEW_RESOURCE_ARRAY(jchar, length);
  UTF8::convert_to_unicode(utf8_string, chars, length);
  Handle string;
  oop result = intern(string, chars, length, CHECK_NULL);
  return result;
}

759 760 761 762 763
void StringTable::unlink(BoolObjectClosure* is_alive) {
  // Readers of the table are unlocked, so we should only be removing
  // entries at a safepoint.
  assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
  for (int i = 0; i < the_table()->table_size(); ++i) {
764 765 766 767 768 769 770 771
    HashtableEntry<oop>** p = the_table()->bucket_addr(i);
    HashtableEntry<oop>* entry = the_table()->bucket(i);
    while (entry != NULL) {
      // Shared entries are normally at the end of the bucket and if we run into
      // a shared entry, then there is nothing more to remove. However, if we
      // have rehashed the table, then the shared entries are no longer at the
      // end of the bucket.
      if (entry->is_shared() && !use_alternate_hashcode()) {
772 773 774
        break;
      }
      assert(entry->literal() != NULL, "just checking");
775
      if (entry->is_shared() || is_alive->do_object_b(entry->literal())) {
776 777 778 779 780
        p = entry->next_addr();
      } else {
        *p = entry->next();
        the_table()->free_entry(entry);
      }
781
      entry = (HashtableEntry<oop>*)HashtableEntry<oop>::make_ptr(*p);
782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805
    }
  }
}

void StringTable::oops_do(OopClosure* f) {
  for (int i = 0; i < the_table()->table_size(); ++i) {
    HashtableEntry<oop>** p = the_table()->bucket_addr(i);
    HashtableEntry<oop>* entry = the_table()->bucket(i);
    while (entry != NULL) {
      f->do_oop((oop*)entry->literal_addr());

      // Did the closure remove the literal from the table?
      if (entry->literal() == NULL) {
        assert(!entry->is_shared(), "immutable hashtable entry?");
        *p = entry->next();
        the_table()->free_entry(entry);
      } else {
        p = entry->next_addr();
      }
      entry = (HashtableEntry<oop>*)HashtableEntry<oop>::make_ptr(*p);
    }
  }
}

D
duke 已提交
806 807
void StringTable::verify() {
  for (int i = 0; i < the_table()->table_size(); ++i) {
808
    HashtableEntry<oop>* p = the_table()->bucket(i);
D
duke 已提交
809 810 811
    for ( ; p != NULL; p = p->next()) {
      oop s = p->literal();
      guarantee(s != NULL, "interned string is NULL");
812
      guarantee(s->is_perm() || !JavaObjectsInPerm, "interned string not in permspace");
813
      unsigned int h = java_lang_String::hash_string(s);
D
duke 已提交
814 815 816 817 818 819
      guarantee(p->hash() == h, "broken hash in string table entry");
      guarantee(the_table()->hash_to_index(h) == i,
                "wrong index in string table");
    }
  }
}
820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851

void StringTable::dump(outputStream* st) {
  NumberSeq summary;
  for (int i = 0; i < the_table()->table_size(); ++i) {
    HashtableEntry<oop>* p = the_table()->bucket(i);
    int count = 0;
    for ( ; p != NULL; p = p->next()) {
      count++;
    }
    summary.add((double)count);
  }
  st->print_cr("StringTable statistics:");
  st->print_cr("Number of buckets       : %7d", summary.num());
  st->print_cr("Average bucket size     : %7.0f", summary.avg());
  st->print_cr("Variance of bucket size : %7.0f", summary.variance());
  st->print_cr("Std. dev. of bucket size: %7.0f", summary.sd());
  st->print_cr("Maximum bucket size     : %7.0f", summary.maximum());
}


unsigned int StringTable::new_hash(oop string) {
  ResourceMark rm;
  int length;
  jchar* chars = java_lang_String::as_unicode_string(string, length);
  // Use alternate hashing algorithm on the string
  return AltHashing::murmur3_32(seed(), chars, length);
}

// Create a new table and using alternate hash code, populate the new table
// with the existing strings.   Set flag to use the alternate hash code afterwards.
void StringTable::rehash_table() {
  assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
852 853
  // This should never happen with -Xshare:dump but it might in testing mode.
  if (DumpSharedSpaces) return;
854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869
  StringTable* new_table = new StringTable();

  // Initialize new global seed for hashing.
  _seed = AltHashing::compute_seed();
  assert(seed() != 0, "shouldn't be zero");

  // Rehash the table
  the_table()->move_to(new_table);

  // Delete the table and buckets (entries are reused in new table).
  delete _the_table;
  // Don't check if we need rehashing until the table gets unbalanced again.
  // Then rehash with a new global seed.
  _needs_rehashing = false;
  _the_table = new_table;
}