提交 7a40ef32 编写于 作者: Y ysr

6786503: Overflow list performance can be improved

Summary: Avoid overflow list walk in CMS & ParNew when it is unnecessary. Fix a couple of correctness issues, including a C-heap leak, in ParNew at the intersection of promotion failure, work queue overflow and object array chunking. Add stress testing option and related assertion checking.
Reviewed-by: jmasa
上级 8b1dc5fb
......@@ -8508,7 +8508,7 @@ bool CMSCollector::take_from_overflow_list(size_t num, CMSMarkStack* stack) {
size_t i = num;
oop cur = _overflow_list;
const markOop proto = markOopDesc::prototype();
NOT_PRODUCT(size_t n = 0;)
NOT_PRODUCT(ssize_t n = 0;)
for (oop next; i > 0 && cur != NULL; cur = next, i--) {
next = oop(cur->mark());
cur->set_mark(proto); // until proven otherwise
......@@ -8525,45 +8525,131 @@ bool CMSCollector::take_from_overflow_list(size_t num, CMSMarkStack* stack) {
return !stack->isEmpty();
}
// Multi-threaded; use CAS to break off a prefix
#define BUSY (oop(0x1aff1aff))
// (MT-safe) Get a prefix of at most "num" from the list.
// The overflow list is chained through the mark word of
// each object in the list. We fetch the entire list,
// break off a prefix of the right size and return the
// remainder. If other threads try to take objects from
// the overflow list at that time, they will wait for
// some time to see if data becomes available. If (and
// only if) another thread places one or more object(s)
// on the global list before we have returned the suffix
// to the global list, we will walk down our local list
// to find its end and append the global list to
// our suffix before returning it. This suffix walk can
// prove to be expensive (quadratic in the amount of traffic)
// when there are many objects in the overflow list and
// there is much producer-consumer contention on the list.
// *NOTE*: The overflow list manipulation code here and
// in ParNewGeneration:: are very similar in shape,
// except that in the ParNew case we use the old (from/eden)
// copy of the object to thread the list via its klass word.
// Because of the common code, if you make any changes in
// the code below, please check the ParNew version to see if
// similar changes might be needed.
// CR 6797058 has been filed to consolidate the common code.
bool CMSCollector::par_take_from_overflow_list(size_t num,
OopTaskQueue* work_q) {
assert(work_q->size() == 0, "That's the current policy");
assert(work_q->size() == 0, "First empty local work queue");
assert(num < work_q->max_elems(), "Can't bite more than we can chew");
if (_overflow_list == NULL) {
return false;
}
// Grab the entire list; we'll put back a suffix
oop prefix = (oop)Atomic::xchg_ptr(NULL, &_overflow_list);
if (prefix == NULL) { // someone grabbed it before we did ...
// ... we could spin for a short while, but for now we don't
return false;
oop prefix = (oop)Atomic::xchg_ptr(BUSY, &_overflow_list);
Thread* tid = Thread::current();
size_t CMSOverflowSpinCount = (size_t)ParallelGCThreads;
size_t sleep_time_millis = MAX2((size_t)1, num/100);
// If the list is busy, we spin for a short while,
// sleeping between attempts to get the list.
for (size_t spin = 0; prefix == BUSY && spin < CMSOverflowSpinCount; spin++) {
os::sleep(tid, sleep_time_millis, false);
if (_overflow_list == NULL) {
// Nothing left to take
return false;
} else if (_overflow_list != BUSY) {
// Try and grab the prefix
prefix = (oop)Atomic::xchg_ptr(BUSY, &_overflow_list);
}
}
// If the list was found to be empty, or we spun long
// enough, we give up and return empty-handed. If we leave
// the list in the BUSY state below, it must be the case that
// some other thread holds the overflow list and will set it
// to a non-BUSY state in the future.
if (prefix == NULL || prefix == BUSY) {
// Nothing to take or waited long enough
if (prefix == NULL) {
// Write back the NULL in case we overwrote it with BUSY above
// and it is still the same value.
(void) Atomic::cmpxchg_ptr(NULL, &_overflow_list, BUSY);
}
return false;
}
assert(prefix != NULL && prefix != BUSY, "Error");
size_t i = num;
oop cur = prefix;
// Walk down the first "num" objects, unless we reach the end.
for (; i > 1 && cur->mark() != NULL; cur = oop(cur->mark()), i--);
if (cur->mark() != NULL) {
if (cur->mark() == NULL) {
// We have "num" or fewer elements in the list, so there
// is nothing to return to the global list.
// Write back the NULL in lieu of the BUSY we wrote
// above, if it is still the same value.
if (_overflow_list == BUSY) {
(void) Atomic::cmpxchg_ptr(NULL, &_overflow_list, BUSY);
}
} else {
// Chop off the suffix and rerturn it to the global list.
assert(cur->mark() != BUSY, "Error");
oop suffix_head = cur->mark(); // suffix will be put back on global list
cur->set_mark(NULL); // break off suffix
// Find tail of suffix so we can prepend suffix to global list
for (cur = suffix_head; cur->mark() != NULL; cur = (oop)(cur->mark()));
oop suffix_tail = cur;
assert(suffix_tail != NULL && suffix_tail->mark() == NULL,
"Tautology");
// It's possible that the list is still in the empty(busy) state
// we left it in a short while ago; in that case we may be
// able to place back the suffix without incurring the cost
// of a walk down the list.
oop observed_overflow_list = _overflow_list;
do {
cur = observed_overflow_list;
suffix_tail->set_mark(markOop(cur));
oop cur_overflow_list = observed_overflow_list;
bool attached = false;
while (observed_overflow_list == BUSY || observed_overflow_list == NULL) {
observed_overflow_list =
(oop) Atomic::cmpxchg_ptr(suffix_head, &_overflow_list, cur);
} while (cur != observed_overflow_list);
(oop) Atomic::cmpxchg_ptr(suffix_head, &_overflow_list, cur_overflow_list);
if (cur_overflow_list == observed_overflow_list) {
attached = true;
break;
} else cur_overflow_list = observed_overflow_list;
}
if (!attached) {
// Too bad, someone else sneaked in (at least) an element; we'll need
// to do a splice. Find tail of suffix so we can prepend suffix to global
// list.
for (cur = suffix_head; cur->mark() != NULL; cur = (oop)(cur->mark()));
oop suffix_tail = cur;
assert(suffix_tail != NULL && suffix_tail->mark() == NULL,
"Tautology");
observed_overflow_list = _overflow_list;
do {
cur_overflow_list = observed_overflow_list;
if (cur_overflow_list != BUSY) {
// Do the splice ...
suffix_tail->set_mark(markOop(cur_overflow_list));
} else { // cur_overflow_list == BUSY
suffix_tail->set_mark(NULL);
}
// ... and try to place spliced list back on overflow_list ...
observed_overflow_list =
(oop) Atomic::cmpxchg_ptr(suffix_head, &_overflow_list, cur_overflow_list);
} while (cur_overflow_list != observed_overflow_list);
// ... until we have succeeded in doing so.
}
}
// Push the prefix elements on work_q
assert(prefix != NULL, "control point invariant");
const markOop proto = markOopDesc::prototype();
oop next;
NOT_PRODUCT(size_t n = 0;)
NOT_PRODUCT(ssize_t n = 0;)
for (cur = prefix; cur != NULL; cur = next) {
next = oop(cur->mark());
cur->set_mark(proto); // until proven otherwise
......@@ -8597,11 +8683,16 @@ void CMSCollector::par_push_on_overflow_list(oop p) {
oop cur_overflow_list;
do {
cur_overflow_list = observed_overflow_list;
p->set_mark(markOop(cur_overflow_list));
if (cur_overflow_list != BUSY) {
p->set_mark(markOop(cur_overflow_list));
} else {
p->set_mark(NULL);
}
observed_overflow_list =
(oop) Atomic::cmpxchg_ptr(p, &_overflow_list, cur_overflow_list);
} while (cur_overflow_list != observed_overflow_list);
}
#undef BUSY
// Single threaded
// General Note on GrowableArray: pushes may silently fail
......@@ -8610,7 +8701,7 @@ void CMSCollector::par_push_on_overflow_list(oop p) {
// a lot of code in the JVM. The prudent thing for GrowableArray
// to do (for now) is to exit with an error. However, that may
// be too draconian in some cases because the caller may be
// able to recover without much harm. For suych cases, we
// able to recover without much harm. For such cases, we
// should probably introduce a "soft_push" method which returns
// an indication of success or failure with the assumption that
// the caller may be able to recover from a failure; code in
......@@ -8618,8 +8709,6 @@ void CMSCollector::par_push_on_overflow_list(oop p) {
// failures where possible, thus, incrementally hardening the VM
// in such low resource situations.
void CMSCollector::preserve_mark_work(oop p, markOop m) {
int PreserveMarkStackSize = 128;
if (_preserved_oop_stack == NULL) {
assert(_preserved_mark_stack == NULL,
"bijection with preserved_oop_stack");
......
......@@ -595,7 +595,7 @@ class CMSCollector: public CHeapObj {
size_t _ser_kac_preclean_ovflw;
size_t _ser_kac_ovflw;
size_t _par_kac_ovflw;
NOT_PRODUCT(size_t _num_par_pushes;)
NOT_PRODUCT(ssize_t _num_par_pushes;)
// ("Weak") Reference processing support
ReferenceProcessor* _ref_processor;
......
......@@ -77,6 +77,7 @@ parNewGeneration.cpp resourceArea.hpp
parNewGeneration.cpp sharedHeap.hpp
parNewGeneration.cpp space.hpp
parNewGeneration.cpp spaceDecorator.hpp
parNewGeneration.cpp thread.hpp
parNewGeneration.cpp workgroup.hpp
parNewGeneration.hpp defNewGeneration.hpp
......
......@@ -404,6 +404,8 @@ void ParEvacuateFollowersClosure::do_void() {
if (terminator()->offer_termination()) break;
par_scan_state()->end_term_time();
}
assert(par_gen()->_overflow_list == NULL && par_gen()->_num_par_pushes == 0,
"Broken overflow list?");
// Finish the last termination pause.
par_scan_state()->end_term_time();
}
......@@ -456,6 +458,8 @@ ParNewGeneration(ReservedSpace rs, size_t initial_byte_size, int level)
_is_alive_closure(this),
_plab_stats(YoungPLABSize, PLABWeight)
{
NOT_PRODUCT(_overflow_counter = ParGCWorkQueueOverflowInterval;)
NOT_PRODUCT(_num_par_pushes = 0;)
_task_queues = new ObjToScanQueueSet(ParallelGCThreads);
guarantee(_task_queues != NULL, "task_queues allocation failure.");
......@@ -993,12 +997,19 @@ oop ParNewGeneration::copy_to_survivor_space_avoiding_promotion_undo(
"push forwarded object");
}
// Push it on one of the queues of to-be-scanned objects.
if (!par_scan_state->work_queue()->push(obj_to_push)) {
bool simulate_overflow = false;
NOT_PRODUCT(
if (ParGCWorkQueueOverflowALot && should_simulate_overflow()) {
// simulate a stack overflow
simulate_overflow = true;
}
)
if (simulate_overflow || !par_scan_state->work_queue()->push(obj_to_push)) {
// Add stats for overflow pushes.
if (Verbose && PrintGCDetails) {
gclog_or_tty->print("queue overflow!\n");
}
push_on_overflow_list(old);
push_on_overflow_list(old, par_scan_state);
par_scan_state->note_overflow_push();
}
par_scan_state->note_push();
......@@ -1110,9 +1121,16 @@ oop ParNewGeneration::copy_to_survivor_space_with_undo(
"push forwarded object");
}
// Push it on one of the queues of to-be-scanned objects.
if (!par_scan_state->work_queue()->push(obj_to_push)) {
bool simulate_overflow = false;
NOT_PRODUCT(
if (ParGCWorkQueueOverflowALot && should_simulate_overflow()) {
// simulate a stack overflow
simulate_overflow = true;
}
)
if (simulate_overflow || !par_scan_state->work_queue()->push(obj_to_push)) {
// Add stats for overflow pushes.
push_on_overflow_list(old);
push_on_overflow_list(old, par_scan_state);
par_scan_state->note_overflow_push();
}
par_scan_state->note_push();
......@@ -1135,89 +1153,190 @@ oop ParNewGeneration::copy_to_survivor_space_with_undo(
return forward_ptr;
}
void ParNewGeneration::push_on_overflow_list(oop from_space_obj) {
oop cur_overflow_list = _overflow_list;
#ifndef PRODUCT
// It's OK to call this multi-threaded; the worst thing
// that can happen is that we'll get a bunch of closely
// spaced simulated oveflows, but that's OK, in fact
// probably good as it would exercise the overflow code
// under contention.
bool ParNewGeneration::should_simulate_overflow() {
if (_overflow_counter-- <= 0) { // just being defensive
_overflow_counter = ParGCWorkQueueOverflowInterval;
return true;
} else {
return false;
}
}
#endif
#define BUSY (oop(0x1aff1aff))
void ParNewGeneration::push_on_overflow_list(oop from_space_obj, ParScanThreadState* par_scan_state) {
// if the object has been forwarded to itself, then we cannot
// use the klass pointer for the linked list. Instead we have
// to allocate an oopDesc in the C-Heap and use that for the linked list.
// XXX This is horribly inefficient when a promotion failure occurs
// and should be fixed. XXX FIX ME !!!
#ifndef PRODUCT
Atomic::inc_ptr(&_num_par_pushes);
assert(_num_par_pushes > 0, "Tautology");
#endif
if (from_space_obj->forwardee() == from_space_obj) {
oopDesc* listhead = NEW_C_HEAP_ARRAY(oopDesc, 1);
listhead->forward_to(from_space_obj);
from_space_obj = listhead;
}
while (true) {
from_space_obj->set_klass_to_list_ptr(cur_overflow_list);
oop observed_overflow_list =
(oop)Atomic::cmpxchg_ptr(from_space_obj, &_overflow_list, cur_overflow_list);
if (observed_overflow_list == cur_overflow_list) break;
// Otherwise...
oop observed_overflow_list = _overflow_list;
oop cur_overflow_list;
do {
cur_overflow_list = observed_overflow_list;
}
if (cur_overflow_list != BUSY) {
from_space_obj->set_klass_to_list_ptr(cur_overflow_list);
} else {
from_space_obj->set_klass_to_list_ptr(NULL);
}
observed_overflow_list =
(oop)Atomic::cmpxchg_ptr(from_space_obj, &_overflow_list, cur_overflow_list);
} while (cur_overflow_list != observed_overflow_list);
}
// *NOTE*: The overflow list manipulation code here and
// in CMSCollector:: are very similar in shape,
// except that in the CMS case we thread the objects
// directly into the list via their mark word, and do
// not need to deal with special cases below related
// to chunking of object arrays and promotion failure
// handling.
// CR 6797058 has been filed to attempt consolidation of
// the common code.
// Because of the common code, if you make any changes in
// the code below, please check the CMS version to see if
// similar changes might be needed.
// See CMSCollector::par_take_from_overflow_list() for
// more extensive documentation comments.
bool
ParNewGeneration::take_from_overflow_list(ParScanThreadState* par_scan_state) {
ObjToScanQueue* work_q = par_scan_state->work_queue();
assert(work_q->size() == 0, "Should first empty local work queue");
// How many to take?
int objsFromOverflow = MIN2(work_q->max_elems()/4,
(juint)ParGCDesiredObjsFromOverflowList);
size_t objsFromOverflow = MIN2((size_t)work_q->max_elems()/4,
(size_t)ParGCDesiredObjsFromOverflowList);
if (_overflow_list == NULL) return false;
// Otherwise, there was something there; try claiming the list.
oop prefix = (oop)Atomic::xchg_ptr(NULL, &_overflow_list);
if (prefix == NULL) {
return false;
}
oop prefix = (oop)Atomic::xchg_ptr(BUSY, &_overflow_list);
// Trim off a prefix of at most objsFromOverflow items
int i = 1;
Thread* tid = Thread::current();
size_t spin_count = (size_t)ParallelGCThreads;
size_t sleep_time_millis = MAX2((size_t)1, objsFromOverflow/100);
for (size_t spin = 0; prefix == BUSY && spin < spin_count; spin++) {
// someone grabbed it before we did ...
// ... we spin for a short while...
os::sleep(tid, sleep_time_millis, false);
if (_overflow_list == NULL) {
// nothing left to take
return false;
} else if (_overflow_list != BUSY) {
// try and grab the prefix
prefix = (oop)Atomic::xchg_ptr(BUSY, &_overflow_list);
}
}
if (prefix == NULL || prefix == BUSY) {
// Nothing to take or waited long enough
if (prefix == NULL) {
// Write back the NULL in case we overwrote it with BUSY above
// and it is still the same value.
(void) Atomic::cmpxchg_ptr(NULL, &_overflow_list, BUSY);
}
return false;
}
assert(prefix != NULL && prefix != BUSY, "Error");
size_t i = 1;
oop cur = prefix;
while (i < objsFromOverflow && cur->klass_or_null() != NULL) {
i++; cur = oop(cur->klass());
}
// Reattach remaining (suffix) to overflow list
if (cur->klass_or_null() != NULL) {
oop suffix = oop(cur->klass());
cur->set_klass_to_list_ptr(NULL);
// Find last item of suffix list
oop last = suffix;
while (last->klass_or_null() != NULL) {
last = oop(last->klass());
if (cur->klass_or_null() == NULL) {
// Write back the NULL in lieu of the BUSY we wrote
// above and it is still the same value.
if (_overflow_list == BUSY) {
(void) Atomic::cmpxchg_ptr(NULL, &_overflow_list, BUSY);
}
// Atomically prepend suffix to current overflow list
oop cur_overflow_list = _overflow_list;
while (true) {
last->set_klass_to_list_ptr(cur_overflow_list);
oop observed_overflow_list =
(oop)Atomic::cmpxchg_ptr(suffix, &_overflow_list, cur_overflow_list);
if (observed_overflow_list == cur_overflow_list) break;
// Otherwise...
cur_overflow_list = observed_overflow_list;
} else {
assert(cur->klass_or_null() != BUSY, "Error");
oop suffix = oop(cur->klass()); // suffix will be put back on global list
cur->set_klass_to_list_ptr(NULL); // break off suffix
// It's possible that the list is still in the empty(busy) state
// we left it in a short while ago; in that case we may be
// able to place back the suffix.
oop observed_overflow_list = _overflow_list;
oop cur_overflow_list = observed_overflow_list;
bool attached = false;
while (observed_overflow_list == BUSY || observed_overflow_list == NULL) {
observed_overflow_list =
(oop) Atomic::cmpxchg_ptr(suffix, &_overflow_list, cur_overflow_list);
if (cur_overflow_list == observed_overflow_list) {
attached = true;
break;
} else cur_overflow_list = observed_overflow_list;
}
if (!attached) {
// Too bad, someone else got in in between; we'll need to do a splice.
// Find the last item of suffix list
oop last = suffix;
while (last->klass_or_null() != NULL) {
last = oop(last->klass());
}
// Atomically prepend suffix to current overflow list
observed_overflow_list = _overflow_list;
do {
cur_overflow_list = observed_overflow_list;
if (cur_overflow_list != BUSY) {
// Do the splice ...
last->set_klass_to_list_ptr(cur_overflow_list);
} else { // cur_overflow_list == BUSY
last->set_klass_to_list_ptr(NULL);
}
observed_overflow_list =
(oop)Atomic::cmpxchg_ptr(suffix, &_overflow_list, cur_overflow_list);
} while (cur_overflow_list != observed_overflow_list);
}
}
// Push objects on prefix list onto this thread's work queue
assert(cur != NULL, "program logic");
assert(prefix != NULL && prefix != BUSY, "program logic");
cur = prefix;
int n = 0;
ssize_t n = 0;
while (cur != NULL) {
oop obj_to_push = cur->forwardee();
oop next = oop(cur->klass_or_null());
cur->set_klass(obj_to_push->klass());
if (par_scan_state->should_be_partially_scanned(obj_to_push, cur)) {
obj_to_push = cur;
// This may be an array object that is self-forwarded. In that case, the list pointer
// space, cur, is not in the Java heap, but rather in the C-heap and should be freed.
if (!is_in_reserved(cur)) {
// This can become a scaling bottleneck when there is work queue overflow coincident
// with promotion failure.
oopDesc* f = cur;
FREE_C_HEAP_ARRAY(oopDesc, f);
} else if (par_scan_state->should_be_partially_scanned(obj_to_push, cur)) {
assert(arrayOop(cur)->length() == 0, "entire array remaining to be scanned");
obj_to_push = cur;
}
work_q->push(obj_to_push);
bool ok = work_q->push(obj_to_push);
assert(ok, "Should have succeeded");
cur = next;
n++;
}
par_scan_state->note_overflow_refill(n);
#ifndef PRODUCT
assert(_num_par_pushes >= n, "Too many pops?");
Atomic::add_ptr(-(intptr_t)n, &_num_par_pushes);
#endif
return true;
}
#undef BUSY
void ParNewGeneration::ref_processor_init()
{
......
......@@ -278,6 +278,7 @@ class ParNewGeneration: public DefNewGeneration {
friend class ParNewRefProcTask;
friend class ParNewRefProcTaskExecutor;
friend class ParScanThreadStateSet;
friend class ParEvacuateFollowersClosure;
private:
// XXX use a global constant instead of 64!
......@@ -296,6 +297,7 @@ class ParNewGeneration: public DefNewGeneration {
// klass-pointers (klass information already copied to the forwarded
// image.) Manipulated with CAS.
oop _overflow_list;
NOT_PRODUCT(ssize_t _num_par_pushes;)
// If true, older generation does not support promotion undo, so avoid.
static bool _avoid_promotion_undo;
......@@ -372,8 +374,12 @@ class ParNewGeneration: public DefNewGeneration {
oop copy_to_survivor_space_with_undo(ParScanThreadState* par_scan_state,
oop obj, size_t obj_sz, markOop m);
// in support of testing overflow code
NOT_PRODUCT(int _overflow_counter;)
NOT_PRODUCT(bool should_simulate_overflow();)
// Push the given (from-space) object on the global overflow list.
void push_on_overflow_list(oop from_space_obj);
void push_on_overflow_list(oop from_space_obj, ParScanThreadState* par_scan_state);
// If the global overflow list is non-empty, move some tasks from it
// onto "work_q" (which must be empty). No more than 1/4 of the
......
......@@ -721,12 +721,6 @@ ReferenceProcessor::process_phase3(DiscoveredList& refs_list,
iter.obj(), iter.obj()->blueprint()->internal_name());
}
assert(iter.obj()->is_oop(UseConcMarkSweepGC), "Adding a bad reference");
// If discovery is concurrent, we may have objects with null referents,
// being those that were concurrently cleared after they were discovered
// (and not subsequently precleaned).
assert( (discovery_is_atomic() && iter.referent()->is_oop())
|| (!discovery_is_atomic() && iter.referent()->is_oop_or_null(UseConcMarkSweepGC)),
"Adding a bad referent");
iter.next();
}
// Remember to keep sentinel pointer around
......
......@@ -1307,7 +1307,14 @@ class CommandLineFlags {
product(intx, ParGCArrayScanChunk, 50, \
"Scan a subset and push remainder, if array is bigger than this") \
\
product(intx, ParGCDesiredObjsFromOverflowList, 20, \
notproduct(bool, ParGCWorkQueueOverflowALot, false, \
"Whether we should simulate work queue overflow in ParNew") \
\
notproduct(uintx, ParGCWorkQueueOverflowInterval, 1000, \
"An `interval' counter that determines how frequently" \
" we simulate overflow; a smaller number increases frequency") \
\
product(uintx, ParGCDesiredObjsFromOverflowList, 20, \
"The desired number of objects to claim from the overflow list") \
\
product(uintx, CMSParPromoteBlocksToClaim, 50, \
......@@ -1429,8 +1436,8 @@ class CommandLineFlags {
"Whether we should simulate frequent marking stack / work queue" \
" overflow") \
\
notproduct(intx, CMSMarkStackOverflowInterval, 1000, \
"A per-thread `interval' counter that determines how frequently" \
notproduct(uintx, CMSMarkStackOverflowInterval, 1000, \
"An `interval' counter that determines how frequently" \
" we simulate overflow; a smaller number increases frequency") \
\
product(uintx, CMSMaxAbortablePrecleanLoops, 0, \
......@@ -1648,7 +1655,7 @@ class CommandLineFlags {
develop(uintx, WorkStealingYieldsBeforeSleep, 1000, \
"Number of yields before a sleep is done during workstealing") \
\
product(uintx, PreserveMarkStackSize, 40, \
product(uintx, PreserveMarkStackSize, 1024, \
"Size for stack used in promotion failure handling") \
\
product_pd(bool, UseTLAB, "Use thread-local object allocation") \
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册