提交 98236fb1 编写于 作者: S Sergei Petrunia 提交者: Facebook GitHub Bot

LockTree library, originally from PerconaFT (#7753)

Summary:
To be used for implementing Range Locking.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/7753

Reviewed By: zhichao-cao

Differential Revision: D25378980

Pulled By: cheng-chang

fbshipit-source-id: 801a9c5cd92a84654ca2586b73e8f69001e89320
上级 7b2216c9
......@@ -831,6 +831,20 @@ set(SOURCES
utilities/write_batch_with_index/write_batch_with_index_internal.cc
$<TARGET_OBJECTS:build_version>)
list(APPEND SOURCE
utilities/transactions/lock/range/range_tree/lib/locktree/concurrent_tree.cc
utilities/transactions/lock/range/range_tree/lib/locktree/keyrange.cc
utilities/transactions/lock/range/range_tree/lib/locktree/lock_request.cc
utilities/transactions/lock/range/range_tree/lib/locktree/locktree.cc
utilities/transactions/lock/range/range_tree/lib/locktree/manager.cc
utilities/transactions/lock/range/range_tree/lib/locktree/range_buffer.cc
utilities/transactions/lock/range/range_tree/lib/locktree/treenode.cc
utilities/transactions/lock/range/range_tree/lib/locktree/txnid_set.cc
utilities/transactions/lock/range/range_tree/lib/locktree/wfg.cc
utilities/transactions/lock/range/range_tree/lib/standalone_port.cc
utilities/transactions/lock/range/range_tree/lib/util/dbt.cc
utilities/transactions/lock/range/range_tree/lib/util/memarena.cc)
if(HAVE_SSE42 AND NOT MSVC)
set_source_files_properties(
util/crc32c.cc
......
......@@ -388,6 +388,18 @@ cpp_library(
"utilities/transactions/lock/lock_manager.cc",
"utilities/transactions/lock/point/point_lock_manager.cc",
"utilities/transactions/lock/point/point_lock_tracker.cc",
"utilities/transactions/lock/range/range_tree/lib/locktree/concurrent_tree.cc",
"utilities/transactions/lock/range/range_tree/lib/locktree/keyrange.cc",
"utilities/transactions/lock/range/range_tree/lib/locktree/lock_request.cc",
"utilities/transactions/lock/range/range_tree/lib/locktree/locktree.cc",
"utilities/transactions/lock/range/range_tree/lib/locktree/manager.cc",
"utilities/transactions/lock/range/range_tree/lib/locktree/range_buffer.cc",
"utilities/transactions/lock/range/range_tree/lib/locktree/treenode.cc",
"utilities/transactions/lock/range/range_tree/lib/locktree/txnid_set.cc",
"utilities/transactions/lock/range/range_tree/lib/locktree/wfg.cc",
"utilities/transactions/lock/range/range_tree/lib/standalone_port.cc",
"utilities/transactions/lock/range/range_tree/lib/util/dbt.cc",
"utilities/transactions/lock/range/range_tree/lib/util/memarena.cc",
"utilities/transactions/optimistic_transaction.cc",
"utilities/transactions/optimistic_transaction_db_impl.cc",
"utilities/transactions/pessimistic_transaction.cc",
......@@ -678,6 +690,18 @@ cpp_library(
"utilities/transactions/lock/lock_manager.cc",
"utilities/transactions/lock/point/point_lock_manager.cc",
"utilities/transactions/lock/point/point_lock_tracker.cc",
"utilities/transactions/lock/range/range_tree/lib/locktree/concurrent_tree.cc",
"utilities/transactions/lock/range/range_tree/lib/locktree/keyrange.cc",
"utilities/transactions/lock/range/range_tree/lib/locktree/lock_request.cc",
"utilities/transactions/lock/range/range_tree/lib/locktree/locktree.cc",
"utilities/transactions/lock/range/range_tree/lib/locktree/manager.cc",
"utilities/transactions/lock/range/range_tree/lib/locktree/range_buffer.cc",
"utilities/transactions/lock/range/range_tree/lib/locktree/treenode.cc",
"utilities/transactions/lock/range/range_tree/lib/locktree/txnid_set.cc",
"utilities/transactions/lock/range/range_tree/lib/locktree/wfg.cc",
"utilities/transactions/lock/range/range_tree/lib/standalone_port.cc",
"utilities/transactions/lock/range/range_tree/lib/util/dbt.cc",
"utilities/transactions/lock/range/range_tree/lib/util/memarena.cc",
"utilities/transactions/optimistic_transaction.cc",
"utilities/transactions/optimistic_transaction_db_impl.cc",
"utilities/transactions/pessimistic_transaction.cc",
......
......@@ -255,6 +255,18 @@ LIB_SOURCES = \
utilities/transactions/lock/lock_manager.cc \
utilities/transactions/lock/point/point_lock_tracker.cc \
utilities/transactions/lock/point/point_lock_manager.cc \
utilities/transactions/lock/range/range_tree/lib/locktree/concurrent_tree.cc \
utilities/transactions/lock/range/range_tree/lib/locktree/keyrange.cc \
utilities/transactions/lock/range/range_tree/lib/locktree/lock_request.cc \
utilities/transactions/lock/range/range_tree/lib/locktree/locktree.cc \
utilities/transactions/lock/range/range_tree/lib/locktree/manager.cc \
utilities/transactions/lock/range/range_tree/lib/locktree/range_buffer.cc \
utilities/transactions/lock/range/range_tree/lib/locktree/treenode.cc \
utilities/transactions/lock/range/range_tree/lib/locktree/txnid_set.cc \
utilities/transactions/lock/range/range_tree/lib/locktree/wfg.cc \
utilities/transactions/lock/range/range_tree/lib/standalone_port.cc \
utilities/transactions/lock/range/range_tree/lib/util/dbt.cc \
utilities/transactions/lock/range/range_tree/lib/util/memarena.cc \
utilities/transactions/optimistic_transaction.cc \
utilities/transactions/optimistic_transaction_db_impl.cc \
utilities/transactions/pessimistic_transaction.cc \
......
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
GNU GENERAL PUBLIC LICENSE
Version 2, June 1991
Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The licenses for most software are designed to take away your
freedom to share and change it. By contrast, the GNU General Public
License is intended to guarantee your freedom to share and change free
software--to make sure the software is free for all its users. This
General Public License applies to most of the Free Software
Foundation's software and to any other program whose authors commit to
using it. (Some other Free Software Foundation software is covered by
the GNU Lesser General Public License instead.) You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
this service if you wish), that you receive source code or can get it
if you want it, that you can change the software or use pieces of it
in new free programs; and that you know you can do these things.
To protect your rights, we need to make restrictions that forbid
anyone to deny you these rights or to ask you to surrender the rights.
These restrictions translate to certain responsibilities for you if you
distribute copies of the software, or if you modify it.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must give the recipients all the rights that
you have. You must make sure that they, too, receive or can get the
source code. And you must show them these terms so they know their
rights.
We protect your rights with two steps: (1) copyright the software, and
(2) offer you this license which gives you legal permission to copy,
distribute and/or modify the software.
Also, for each author's protection and ours, we want to make certain
that everyone understands that there is no warranty for this free
software. If the software is modified by someone else and passed on, we
want its recipients to know that what they have is not the original, so
that any problems introduced by others will not reflect on the original
authors' reputations.
Finally, any free program is threatened constantly by software
patents. We wish to avoid the danger that redistributors of a free
program will individually obtain patent licenses, in effect making the
program proprietary. To prevent this, we have made it clear that any
patent must be licensed for everyone's free use or not licensed at all.
The precise terms and conditions for copying, distribution and
modification follow.
GNU GENERAL PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. This License applies to any program or other work which contains
a notice placed by the copyright holder saying it may be distributed
under the terms of this General Public License. The "Program", below,
refers to any such program or work, and a "work based on the Program"
means either the Program or any derivative work under copyright law:
that is to say, a work containing the Program or a portion of it,
either verbatim or with modifications and/or translated into another
language. (Hereinafter, translation is included without limitation in
the term "modification".) Each licensee is addressed as "you".
Activities other than copying, distribution and modification are not
covered by this License; they are outside its scope. The act of
running the Program is not restricted, and the output from the Program
is covered only if its contents constitute a work based on the
Program (independent of having been made by running the Program).
Whether that is true depends on what the Program does.
1. You may copy and distribute verbatim copies of the Program's
source code as you receive it, in any medium, provided that you
conspicuously and appropriately publish on each copy an appropriate
copyright notice and disclaimer of warranty; keep intact all the
notices that refer to this License and to the absence of any warranty;
and give any other recipients of the Program a copy of this License
along with the Program.
You may charge a fee for the physical act of transferring a copy, and
you may at your option offer warranty protection in exchange for a fee.
2. You may modify your copy or copies of the Program or any portion
of it, thus forming a work based on the Program, and copy and
distribute such modifications or work under the terms of Section 1
above, provided that you also meet all of these conditions:
a) You must cause the modified files to carry prominent notices
stating that you changed the files and the date of any change.
b) You must cause any work that you distribute or publish, that in
whole or in part contains or is derived from the Program or any
part thereof, to be licensed as a whole at no charge to all third
parties under the terms of this License.
c) If the modified program normally reads commands interactively
when run, you must cause it, when started running for such
interactive use in the most ordinary way, to print or display an
announcement including an appropriate copyright notice and a
notice that there is no warranty (or else, saying that you provide
a warranty) and that users may redistribute the program under
these conditions, and telling the user how to view a copy of this
License. (Exception: if the Program itself is interactive but
does not normally print such an announcement, your work based on
the Program is not required to print an announcement.)
These requirements apply to the modified work as a whole. If
identifiable sections of that work are not derived from the Program,
and can be reasonably considered independent and separate works in
themselves, then this License, and its terms, do not apply to those
sections when you distribute them as separate works. But when you
distribute the same sections as part of a whole which is a work based
on the Program, the distribution of the whole must be on the terms of
this License, whose permissions for other licensees extend to the
entire whole, and thus to each and every part regardless of who wrote it.
Thus, it is not the intent of this section to claim rights or contest
your rights to work written entirely by you; rather, the intent is to
exercise the right to control the distribution of derivative or
collective works based on the Program.
In addition, mere aggregation of another work not based on the Program
with the Program (or with a work based on the Program) on a volume of
a storage or distribution medium does not bring the other work under
the scope of this License.
3. You may copy and distribute the Program (or a work based on it,
under Section 2) in object code or executable form under the terms of
Sections 1 and 2 above provided that you also do one of the following:
a) Accompany it with the complete corresponding machine-readable
source code, which must be distributed under the terms of Sections
1 and 2 above on a medium customarily used for software interchange; or,
b) Accompany it with a written offer, valid for at least three
years, to give any third party, for a charge no more than your
cost of physically performing source distribution, a complete
machine-readable copy of the corresponding source code, to be
distributed under the terms of Sections 1 and 2 above on a medium
customarily used for software interchange; or,
c) Accompany it with the information you received as to the offer
to distribute corresponding source code. (This alternative is
allowed only for noncommercial distribution and only if you
received the program in object code or executable form with such
an offer, in accord with Subsection b above.)
The source code for a work means the preferred form of the work for
making modifications to it. For an executable work, complete source
code means all the source code for all modules it contains, plus any
associated interface definition files, plus the scripts used to
control compilation and installation of the executable. However, as a
special exception, the source code distributed need not include
anything that is normally distributed (in either source or binary
form) with the major components (compiler, kernel, and so on) of the
operating system on which the executable runs, unless that component
itself accompanies the executable.
If distribution of executable or object code is made by offering
access to copy from a designated place, then offering equivalent
access to copy the source code from the same place counts as
distribution of the source code, even though third parties are not
compelled to copy the source along with the object code.
4. You may not copy, modify, sublicense, or distribute the Program
except as expressly provided under this License. Any attempt
otherwise to copy, modify, sublicense or distribute the Program is
void, and will automatically terminate your rights under this License.
However, parties who have received copies, or rights, from you under
this License will not have their licenses terminated so long as such
parties remain in full compliance.
5. You are not required to accept this License, since you have not
signed it. However, nothing else grants you permission to modify or
distribute the Program or its derivative works. These actions are
prohibited by law if you do not accept this License. Therefore, by
modifying or distributing the Program (or any work based on the
Program), you indicate your acceptance of this License to do so, and
all its terms and conditions for copying, distributing or modifying
the Program or works based on it.
6. Each time you redistribute the Program (or any work based on the
Program), the recipient automatically receives a license from the
original licensor to copy, distribute or modify the Program subject to
these terms and conditions. You may not impose any further
restrictions on the recipients' exercise of the rights granted herein.
You are not responsible for enforcing compliance by third parties to
this License.
7. If, as a consequence of a court judgment or allegation of patent
infringement or for any other reason (not limited to patent issues),
conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot
distribute so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you
may not distribute the Program at all. For example, if a patent
license would not permit royalty-free redistribution of the Program by
all those who receive copies directly or indirectly through you, then
the only way you could satisfy both it and this License would be to
refrain entirely from distribution of the Program.
If any portion of this section is held invalid or unenforceable under
any particular circumstance, the balance of the section is intended to
apply and the section as a whole is intended to apply in other
circumstances.
It is not the purpose of this section to induce you to infringe any
patents or other property right claims or to contest validity of any
such claims; this section has the sole purpose of protecting the
integrity of the free software distribution system, which is
implemented by public license practices. Many people have made
generous contributions to the wide range of software distributed
through that system in reliance on consistent application of that
system; it is up to the author/donor to decide if he or she is willing
to distribute software through any other system and a licensee cannot
impose that choice.
This section is intended to make thoroughly clear what is believed to
be a consequence of the rest of this License.
8. If the distribution and/or use of the Program is restricted in
certain countries either by patents or by copyrighted interfaces, the
original copyright holder who places the Program under this License
may add an explicit geographical distribution limitation excluding
those countries, so that distribution is permitted only in or among
countries not thus excluded. In such case, this License incorporates
the limitation as if written in the body of this License.
9. The Free Software Foundation may publish revised and/or new versions
of the General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the Program
specifies a version number of this License which applies to it and "any
later version", you have the option of following the terms and conditions
either of that version or of any later version published by the Free
Software Foundation. If the Program does not specify a version number of
this License, you may choose any version ever published by the Free Software
Foundation.
10. If you wish to incorporate parts of the Program into other free
programs whose distribution conditions are different, write to the author
to ask for permission. For software which is copyrighted by the Free
Software Foundation, write to the Free Software Foundation; we sometimes
make exceptions for this. Our decision will be guided by the two goals
of preserving the free status of all derivatives of our free software and
of promoting the sharing and reuse of software generally.
NO WARRANTY
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
REPAIR OR CORRECTION.
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
POSSIBILITY OF SUCH DAMAGES.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
convey the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
<one line to give the program's name and a brief idea of what it does.>
Copyright (C) <year> <name of author>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
Also add information on how to contact you by electronic and paper mail.
If the program is interactive, make it output a short notice like this
when it starts in an interactive mode:
Gnomovision version 69, Copyright (C) year name of author
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License. Of course, the commands you use may
be called something other than `show w' and `show c'; they could even be
mouse-clicks or menu items--whatever suits your program.
You should also get your employer (if you work as a programmer) or your
school, if any, to sign a "copyright disclaimer" for the program, if
necessary. Here is a sample; alter the names:
Yoyodyne, Inc., hereby disclaims all copyright interest in the program
`Gnomovision' (which makes passes at compilers) written by James Hacker.
<signature of Ty Coon>, 1 April 1989
Ty Coon, President of Vice
This General Public License does not permit incorporating your program into
proprietary programs. If your program is a subroutine library, you may
consider it more useful to permit linking proprietary applications with the
library. If this is what you want to do, use the GNU Lesser General
Public License instead of this License.
The files in this directory originally come from
https://github.com/percona/PerconaFT/.
This directory only includes the "locktree" part of PerconaFT, and its
dependencies.
The following modifications were made:
- Make locktree usable outside of PerconaFT library
- Add shared read-only lock support
The files named *_subst.* are substitutes of the PerconaFT's files, they
contain replacements of PerconaFT's functionality.
#ifndef _DB_H
#define _DB_H
#include <stdint.h>
#include <sys/types.h>
typedef struct __toku_dbt DBT;
// port: this is currently not used
struct simple_dbt {
uint32_t len;
void *data;
};
// engine status info
// engine status is passed to handlerton as an array of
// TOKU_ENGINE_STATUS_ROW_S[]
typedef enum {
STATUS_FS_STATE = 0, // interpret as file system state (redzone) enum
STATUS_UINT64, // interpret as uint64_t
STATUS_CHARSTR, // interpret as char *
STATUS_UNIXTIME, // interpret as time_t
STATUS_TOKUTIME, // interpret as tokutime_t
STATUS_PARCOUNT, // interpret as PARTITIONED_COUNTER
STATUS_DOUBLE // interpret as double
} toku_engine_status_display_type;
typedef enum {
TOKU_ENGINE_STATUS = (1ULL << 0), // Include when asking for engine status
TOKU_GLOBAL_STATUS =
(1ULL << 1), // Include when asking for information_schema.global_status
} toku_engine_status_include_type;
typedef struct __toku_engine_status_row {
const char *keyname; // info schema key, should not change across revisions
// without good reason
const char
*columnname; // column for mysql, e.g. information_schema.global_status.
// TOKUDB_ will automatically be prefixed.
const char *legend; // the text that will appear at user interface
toku_engine_status_display_type type; // how to interpret the value
toku_engine_status_include_type
include; // which kinds of callers should get read this row?
union {
double dnum;
uint64_t num;
const char *str;
char datebuf[26];
struct partitioned_counter *parcount;
} value;
} * TOKU_ENGINE_STATUS_ROW, TOKU_ENGINE_STATUS_ROW_S;
#define DB_BUFFER_SMALL -30999
#define DB_LOCK_DEADLOCK -30995
#define DB_LOCK_NOTGRANTED -30994
#define DB_NOTFOUND -30989
#define DB_KEYEXIST -30996
#define DB_DBT_MALLOC 8
#define DB_DBT_REALLOC 64
#define DB_DBT_USERMEM 256
/* PerconaFT specific error codes */
#define TOKUDB_OUT_OF_LOCKS -100000
typedef void (*lock_wait_callback)(void *arg, uint64_t requesting_txnid,
uint64_t blocking_txnid);
struct __toku_dbt {
void *data;
size_t size;
size_t ulen;
// One of DB_DBT_XXX flags
uint32_t flags;
};
#endif
/* -*- mode: C; c-basic-offset: 4; indent-tabs-mode: nil -*- */
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
/*======
This file is part of PerconaFT.
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
PerconaFT is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License, version 2,
as published by the Free Software Foundation.
PerconaFT is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
----------------------------------------
PerconaFT is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License, version 3,
as published by the Free Software Foundation.
PerconaFT is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
======= */
#ident \
"Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
#pragma once
#include <string.h>
#include "../db.h"
#include "../portability/memory.h"
#include "../util/dbt.h"
typedef int (*ft_compare_func)(void *arg, const DBT *a, const DBT *b);
int toku_keycompare(const void *key1, size_t key1len, const void *key2,
size_t key2len);
int toku_builtin_compare_fun(const DBT *, const DBT *)
__attribute__((__visibility__("default")));
namespace toku {
// a comparator object encapsulates the data necessary for
// comparing two keys in a fractal tree. it further understands
// that points may be positive or negative infinity.
class comparator {
void init(ft_compare_func cmp, void *cmp_arg, uint8_t memcmp_magic) {
_cmp = cmp;
_cmp_arg = cmp_arg;
_memcmp_magic = memcmp_magic;
}
public:
// This magic value is reserved to mean that the magic has not been set.
static const uint8_t MEMCMP_MAGIC_NONE = 0;
void create(ft_compare_func cmp, void *cmp_arg,
uint8_t memcmp_magic = MEMCMP_MAGIC_NONE) {
init(cmp, cmp_arg, memcmp_magic);
}
// inherit the attributes of another comparator, but keep our own
// copy of fake_db that is owned separately from the one given.
void inherit(const comparator &cmp) {
invariant_notnull(cmp._cmp);
init(cmp._cmp, cmp._cmp_arg, cmp._memcmp_magic);
}
// like inherit, but doesn't require that the this comparator
// was already created
void create_from(const comparator &cmp) { inherit(cmp); }
void destroy() {}
ft_compare_func get_compare_func() const { return _cmp; }
uint8_t get_memcmp_magic() const { return _memcmp_magic; }
bool valid() const { return _cmp != nullptr; }
inline bool dbt_has_memcmp_magic(const DBT *dbt) const {
return *reinterpret_cast<const char *>(dbt->data) == _memcmp_magic;
}
int operator()(const DBT *a, const DBT *b) const {
if (__builtin_expect(toku_dbt_is_infinite(a) || toku_dbt_is_infinite(b),
0)) {
return toku_dbt_infinite_compare(a, b);
} else if (_memcmp_magic != MEMCMP_MAGIC_NONE
// If `a' has the memcmp magic..
&& dbt_has_memcmp_magic(a)
// ..then we expect `b' to also have the memcmp magic
&& __builtin_expect(dbt_has_memcmp_magic(b), 1)) {
assert(0); // psergey: this branch should not be taken.
return toku_builtin_compare_fun(a, b);
} else {
// yikes, const sadness here
return _cmp(_cmp_arg, a, b);
}
}
private:
ft_compare_func _cmp;
void *_cmp_arg;
uint8_t _memcmp_magic;
};
} /* namespace toku */
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
#ident "$Id$"
/*======
This file is part of PerconaFT.
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
PerconaFT is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License, version 2,
as published by the Free Software Foundation.
PerconaFT is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
----------------------------------------
PerconaFT is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License, version 3,
as published by the Free Software Foundation.
PerconaFT is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
======= */
#ident \
"Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
#pragma once
#include "../db.h"
#include "../portability/toku_race_tools.h"
#include "../util/status.h"
//
// Lock Tree Manager statistics
//
class LTM_STATUS_S {
public:
enum {
LTM_SIZE_CURRENT = 0,
LTM_SIZE_LIMIT,
LTM_ESCALATION_COUNT,
LTM_ESCALATION_TIME,
LTM_ESCALATION_LATEST_RESULT,
LTM_NUM_LOCKTREES,
LTM_LOCK_REQUESTS_PENDING,
LTM_STO_NUM_ELIGIBLE,
LTM_STO_END_EARLY_COUNT,
LTM_STO_END_EARLY_TIME,
LTM_WAIT_COUNT,
LTM_WAIT_TIME,
LTM_LONG_WAIT_COUNT,
LTM_LONG_WAIT_TIME,
LTM_TIMEOUT_COUNT,
LTM_WAIT_ESCALATION_COUNT,
LTM_WAIT_ESCALATION_TIME,
LTM_LONG_WAIT_ESCALATION_COUNT,
LTM_LONG_WAIT_ESCALATION_TIME,
LTM_STATUS_NUM_ROWS // must be last
};
void init(void);
void destroy(void);
TOKU_ENGINE_STATUS_ROW_S status[LTM_STATUS_NUM_ROWS];
private:
bool m_initialized = false;
};
typedef LTM_STATUS_S* LTM_STATUS;
extern LTM_STATUS_S ltm_status;
#define LTM_STATUS_VAL(x) ltm_status.status[LTM_STATUS_S::x].value.num
void toku_status_init(void); // just call ltm_status.init();
void toku_status_destroy(void); // just call ltm_status.destroy();
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
#ifndef ROCKSDB_LITE
#ifndef OS_WIN
#ident "$Id$"
/*======
This file is part of PerconaFT.
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
PerconaFT is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License, version 2,
as published by the Free Software Foundation.
PerconaFT is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
----------------------------------------
PerconaFT is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License, version 3,
as published by the Free Software Foundation.
PerconaFT is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
----------------------------------------
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
======= */
#ident \
"Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
#include "concurrent_tree.h"
// PORT #include <toku_assert.h>
namespace toku {
void concurrent_tree::create(const comparator *cmp) {
// start with an empty root node. we do this instead of
// setting m_root to null so there's always a root to lock
m_root.create_root(cmp);
}
void concurrent_tree::destroy(void) { m_root.destroy_root(); }
bool concurrent_tree::is_empty(void) { return m_root.is_empty(); }
uint64_t concurrent_tree::get_insertion_memory_overhead(void) {
return sizeof(treenode);
}
void concurrent_tree::locked_keyrange::prepare(concurrent_tree *tree) {
// the first step in acquiring a locked keyrange is locking the root
treenode *const root = &tree->m_root;
m_tree = tree;
m_subtree = root;
m_range = keyrange::get_infinite_range();
root->mutex_lock();
}
void concurrent_tree::locked_keyrange::acquire(const keyrange &range) {
treenode *const root = &m_tree->m_root;
treenode *subtree;
if (root->is_empty() || root->range_overlaps(range)) {
subtree = root;
} else {
// we do not have a precomputed comparison hint, so pass null
const keyrange::comparison *cmp_hint = nullptr;
subtree = root->find_node_with_overlapping_child(range, cmp_hint);
}
// subtree is locked. it will be unlocked when this is release()'d
invariant_notnull(subtree);
m_range = range;
m_subtree = subtree;
}
bool concurrent_tree::locked_keyrange::add_shared_owner(const keyrange &range,
TXNID new_owner) {
return m_subtree->insert(range, new_owner, /*is_shared*/ true);
}
void concurrent_tree::locked_keyrange::release(void) {
m_subtree->mutex_unlock();
}
void concurrent_tree::locked_keyrange::insert(const keyrange &range,
TXNID txnid, bool is_shared) {
// empty means no children, and only the root should ever be empty
if (m_subtree->is_empty()) {
m_subtree->set_range_and_txnid(range, txnid, is_shared);
} else {
m_subtree->insert(range, txnid, is_shared);
}
}
void concurrent_tree::locked_keyrange::remove(const keyrange &range,
TXNID txnid) {
invariant(!m_subtree->is_empty());
treenode *new_subtree = m_subtree->remove(range, txnid);
// if removing range changed the root of the subtree,
// then the subtree must be the root of the entire tree.
if (new_subtree == nullptr) {
invariant(m_subtree->is_root());
invariant(m_subtree->is_empty());
}
}
void concurrent_tree::locked_keyrange::remove_all(void) {
m_subtree->recursive_remove();
}
} /* namespace toku */
#endif // OS_WIN
#endif // ROCKSDB_LITE
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
// vim: ft=cpp:expandtab:ts=8:sw=2:softtabstop=2:
#ident "$Id$"
/*======
This file is part of PerconaFT.
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
PerconaFT is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License, version 2,
as published by the Free Software Foundation.
PerconaFT is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
----------------------------------------
PerconaFT is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License, version 3,
as published by the Free Software Foundation.
PerconaFT is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
----------------------------------------
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
======= */
#ident \
"Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
#pragma once
#include "../ft/comparator.h"
#include "keyrange.h"
#include "treenode.h"
namespace toku {
// A concurrent_tree stores non-overlapping ranges.
// Access to disjoint parts of the tree usually occurs concurrently.
class concurrent_tree {
public:
// A locked_keyrange gives you exclusive access to read and write
// operations that occur on any keys in that range. You only have
// the right to operate on keys in that range or keys that were read
// from the keyrange using iterate()
//
// Access model:
// - user prepares a locked keyrange. all threads serialize behind prepare().
// - user breaks the serialzation point by acquiring a range, or releasing.
// - one thread operates on a certain locked_keyrange object at a time.
// - when the thread is finished, it releases
class locked_keyrange {
public:
// effect: prepare to acquire a locked keyrange over the given
// concurrent_tree, preventing other threads from preparing
// until this thread either does acquire() or release().
// note: operations performed on a prepared keyrange are equivalent
// to ones performed on an acquired keyrange over -inf, +inf.
// rationale: this provides the user with a serialization point for
// descending
// or modifying the the tree. it also proives a convenient way of
// doing serializable operations on the tree.
// There are two valid sequences of calls:
// - prepare, acquire, [operations], release
// - prepare, [operations],release
void prepare(concurrent_tree *tree);
// requires: the locked keyrange was prepare()'d
// effect: acquire a locked keyrange over the given concurrent_tree.
// the locked keyrange represents the range of keys overlapped
// by the given range
void acquire(const keyrange &range);
// effect: releases a locked keyrange and the mutex it holds
void release(void);
// effect: iterate over each range this locked_keyrange represents,
// calling function->fn() on each node's keyrange and txnid
// until there are no more or the function returns false
template <class F>
void iterate(F *function) const {
// if the subtree is non-empty, traverse it by calling the given
// function on each range, txnid pair found that overlaps.
if (!m_subtree->is_empty()) {
m_subtree->traverse_overlaps(m_range, function);
}
}
// Adds another owner to the lock on the specified keyrange.
// requires: the keyrange contains one treenode whose bounds are
// exactly equal to the specifed range (no sub/supersets)
bool add_shared_owner(const keyrange &range, TXNID new_owner);
// inserts the given range into the tree, with an associated txnid.
// requires: range does not overlap with anything in this locked_keyrange
// rationale: caller is responsible for only inserting unique ranges
void insert(const keyrange &range, TXNID txnid, bool is_shared);
// effect: removes the given range from the tree.
// - txnid=TXNID_ANY means remove the range no matter what its
// owners are
// - Other value means remove the specified txnid from
// ownership (if the range has other owners, it will remain
// in the tree)
// requires: range exists exactly in this locked_keyrange
// rationale: caller is responsible for only removing existing ranges
void remove(const keyrange &range, TXNID txnid);
// effect: removes all of the keys represented by this locked keyrange
// rationale: we'd like a fast way to empty out a tree
void remove_all(void);
private:
// the concurrent tree this locked keyrange is for
concurrent_tree *m_tree;
// the range of keys this locked keyrange represents
keyrange m_range;
// the subtree under which all overlapping ranges exist
treenode *m_subtree;
friend class concurrent_tree_unit_test;
};
// effect: initialize the tree to an empty state
void create(const comparator *cmp);
// effect: destroy the tree.
// requires: tree is empty
void destroy(void);
// returns: true iff the tree is empty
bool is_empty(void);
// returns: the memory overhead of a single insertion into the tree
static uint64_t get_insertion_memory_overhead(void);
private:
// the root needs to always exist so there's a lock to grab
// even if the tree is empty. that's why we store a treenode
// here and not a pointer to one.
treenode m_root;
friend class concurrent_tree_unit_test;
};
} /* namespace toku */
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
#ifndef ROCKSDB_LITE
#ifndef OS_WIN
#ident "$Id$"
/*======
This file is part of PerconaFT.
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
PerconaFT is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License, version 2,
as published by the Free Software Foundation.
PerconaFT is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
----------------------------------------
PerconaFT is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License, version 3,
as published by the Free Software Foundation.
PerconaFT is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
----------------------------------------
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
======= */
#ident \
"Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
#include "keyrange.h"
#include "../util/dbt.h"
namespace toku {
// create a keyrange by borrowing the left and right dbt
// pointers. no memory is copied. no checks for infinity needed.
void keyrange::create(const DBT *left, const DBT *right) {
init_empty();
m_left_key = left;
m_right_key = right;
}
// destroy the key copies. if they were never set, then destroy does nothing.
void keyrange::destroy(void) {
toku_destroy_dbt(&m_left_key_copy);
toku_destroy_dbt(&m_right_key_copy);
}
// create a keyrange by copying the keys from the given range.
void keyrange::create_copy(const keyrange &range) {
// start with an initialized, empty range
init_empty();
// optimize the case where the left and right keys are the same.
// we'd like to only have one copy of the data.
if (toku_dbt_equals(range.get_left_key(), range.get_right_key())) {
set_both_keys(range.get_left_key());
} else {
// replace our empty left and right keys with
// copies of the range's left and right keys
replace_left_key(range.get_left_key());
replace_right_key(range.get_right_key());
}
}
// extend this keyrange by choosing the leftmost and rightmost
// endpoints between this range and the given. replaced keys
// in this range are freed and inherited keys are copied.
void keyrange::extend(const comparator &cmp, const keyrange &range) {
const DBT *range_left = range.get_left_key();
const DBT *range_right = range.get_right_key();
if (cmp(range_left, get_left_key()) < 0) {
replace_left_key(range_left);
}
if (cmp(range_right, get_right_key()) > 0) {
replace_right_key(range_right);
}
}
// how much memory does this keyrange take?
// - the size of the left and right keys
// --- ignore the fact that we may have optimized the point case.
// it complicates things for little gain.
// - the size of the keyrange class itself
uint64_t keyrange::get_memory_size(void) const {
const DBT *left_key = get_left_key();
const DBT *right_key = get_right_key();
return left_key->size + right_key->size + sizeof(keyrange);
}
// compare ranges.
keyrange::comparison keyrange::compare(const comparator &cmp,
const keyrange &range) const {
if (cmp(get_right_key(), range.get_left_key()) < 0) {
return comparison::LESS_THAN;
} else if (cmp(get_left_key(), range.get_right_key()) > 0) {
return comparison::GREATER_THAN;
} else if (cmp(get_left_key(), range.get_left_key()) == 0 &&
cmp(get_right_key(), range.get_right_key()) == 0) {
return comparison::EQUALS;
} else {
return comparison::OVERLAPS;
}
}
bool keyrange::overlaps(const comparator &cmp, const keyrange &range) const {
// equality is a stronger form of overlapping.
// so two ranges "overlap" if they're either equal or just overlapping.
comparison c = compare(cmp, range);
return c == comparison::EQUALS || c == comparison::OVERLAPS;
}
keyrange keyrange::get_infinite_range(void) {
keyrange range;
range.create(toku_dbt_negative_infinity(), toku_dbt_positive_infinity());
return range;
}
void keyrange::init_empty(void) {
m_left_key = nullptr;
m_right_key = nullptr;
toku_init_dbt(&m_left_key_copy);
toku_init_dbt(&m_right_key_copy);
m_point_range = false;
}
const DBT *keyrange::get_left_key(void) const {
if (m_left_key) {
return m_left_key;
} else {
return &m_left_key_copy;
}
}
const DBT *keyrange::get_right_key(void) const {
if (m_right_key) {
return m_right_key;
} else {
return &m_right_key_copy;
}
}
// copy the given once and set both the left and right pointers.
// optimization for point ranges, so the left and right ranges
// are not copied twice.
void keyrange::set_both_keys(const DBT *key) {
if (toku_dbt_is_infinite(key)) {
m_left_key = key;
m_right_key = key;
} else {
toku_clone_dbt(&m_left_key_copy, *key);
toku_copyref_dbt(&m_right_key_copy, m_left_key_copy);
}
m_point_range = true;
}
// destroy the current left key. set and possibly copy the new one
void keyrange::replace_left_key(const DBT *key) {
// a little magic:
//
// if this is a point range, then the left and right keys share
// one copy of the data, and it lives in the left key copy. so
// if we're replacing the left key, move the real data to the
// right key copy instead of destroying it. now, the memory is
// owned by the right key and the left key may be replaced.
if (m_point_range) {
m_right_key_copy = m_left_key_copy;
} else {
toku_destroy_dbt(&m_left_key_copy);
}
if (toku_dbt_is_infinite(key)) {
m_left_key = key;
} else {
toku_clone_dbt(&m_left_key_copy, *key);
m_left_key = nullptr;
}
m_point_range = false;
}
// destroy the current right key. set and possibly copy the new one
void keyrange::replace_right_key(const DBT *key) {
toku_destroy_dbt(&m_right_key_copy);
if (toku_dbt_is_infinite(key)) {
m_right_key = key;
} else {
toku_clone_dbt(&m_right_key_copy, *key);
m_right_key = nullptr;
}
m_point_range = false;
}
} /* namespace toku */
#endif // OS_WIN
#endif // ROCKSDB_LITE
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
#ident "$Id$"
/*======
This file is part of PerconaFT.
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
PerconaFT is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License, version 2,
as published by the Free Software Foundation.
PerconaFT is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
----------------------------------------
PerconaFT is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License, version 3,
as published by the Free Software Foundation.
PerconaFT is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
----------------------------------------
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
======= */
#ident \
"Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
#pragma once
#include "../ft/comparator.h"
namespace toku {
// A keyrange has a left and right key as endpoints.
//
// When a keyrange is created it owns no memory, but when it copies
// or extends another keyrange, it copies memory as necessary. This
// means it is cheap in the common case.
class keyrange {
public:
// effect: constructor that borrows left and right key pointers.
// no memory is allocated or copied.
void create(const DBT *left_key, const DBT *right_key);
// effect: constructor that allocates and copies another keyrange's points.
void create_copy(const keyrange &range);
// effect: destroys the keyrange, freeing any allocated memory
void destroy(void);
// effect: extends the keyrange by choosing the leftmost and rightmost
// endpoints from this range and the given range.
// replaced keys in this range are freed, new keys are copied.
void extend(const comparator &cmp, const keyrange &range);
// returns: the amount of memory this keyrange takes. does not account
// for point optimizations or malloc overhead.
uint64_t get_memory_size(void) const;
// returns: pointer to the left key of this range
const DBT *get_left_key(void) const;
// returns: pointer to the right key of this range
const DBT *get_right_key(void) const;
// two ranges are either equal, lt, gt, or overlapping
enum comparison { EQUALS, LESS_THAN, GREATER_THAN, OVERLAPS };
// effect: compares this range to the given range
// returns: LESS_THAN if given range is strictly to the left
// GREATER_THAN if given range is strictly to the right
// EQUALS if given range has the same left and right endpoints
// OVERLAPS if at least one of the given range's endpoints falls
// between this range's endpoints
comparison compare(const comparator &cmp, const keyrange &range) const;
// returns: true if the range and the given range are equal or overlapping
bool overlaps(const comparator &cmp, const keyrange &range) const;
// returns: a keyrange representing -inf, +inf
static keyrange get_infinite_range(void);
private:
// some keys should be copied, some keys should not be.
//
// to support both, we use two DBTs for copies and two pointers
// for temporaries. the access rule is:
// - if a pointer is non-null, then it reprsents the key.
// - otherwise the pointer is null, and the key is in the copy.
DBT m_left_key_copy;
DBT m_right_key_copy;
const DBT *m_left_key;
const DBT *m_right_key;
// if this range is a point range, then m_left_key == m_right_key
// and the actual data is stored exactly once in m_left_key_copy.
bool m_point_range;
// effect: initializes a keyrange to be empty
void init_empty(void);
// effect: copies the given key once into the left key copy
// and sets the right key copy to share the left.
// rationale: optimization for point ranges to only do one malloc
void set_both_keys(const DBT *key);
// effect: destroys the current left key. sets and copies the new one.
void replace_left_key(const DBT *key);
// effect: destroys the current right key. sets and copies the new one.
void replace_right_key(const DBT *key);
};
} /* namespace toku */
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
#ifndef ROCKSDB_LITE
#ifndef OS_WIN
#ident "$Id$"
/*======
This file is part of PerconaFT.
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
PerconaFT is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License, version 2,
as published by the Free Software Foundation.
PerconaFT is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
----------------------------------------
PerconaFT is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License, version 3,
as published by the Free Software Foundation.
PerconaFT is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
----------------------------------------
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
======= */
#ident \
"Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
#include "lock_request.h"
#include "../portability/toku_race_tools.h"
#include "../portability/txn_subst.h"
#include "../util/dbt.h"
#include "locktree.h"
namespace toku {
// initialize a lock request's internals
void lock_request::create(toku_external_mutex_factory_t mutex_factory) {
m_txnid = TXNID_NONE;
m_conflicting_txnid = TXNID_NONE;
m_start_time = 0;
m_left_key = nullptr;
m_right_key = nullptr;
toku_init_dbt(&m_left_key_copy);
toku_init_dbt(&m_right_key_copy);
m_type = type::UNKNOWN;
m_lt = nullptr;
m_complete_r = 0;
m_state = state::UNINITIALIZED;
m_info = nullptr;
// psergey-todo: this condition is for interruptible wait
// note: moved to here from lock_request::create:
toku_external_cond_init(mutex_factory, &m_wait_cond);
m_start_test_callback = nullptr;
m_start_before_pending_test_callback = nullptr;
m_retry_test_callback = nullptr;
}
// destroy a lock request.
void lock_request::destroy(void) {
invariant(m_state != state::PENDING);
invariant(m_state != state::DESTROYED);
m_state = state::DESTROYED;
toku_destroy_dbt(&m_left_key_copy);
toku_destroy_dbt(&m_right_key_copy);
toku_external_cond_destroy(&m_wait_cond);
}
// set the lock request parameters. this API allows a lock request to be reused.
void lock_request::set(locktree *lt, TXNID txnid, const DBT *left_key,
const DBT *right_key, lock_request::type lock_type,
bool big_txn, void *extra) {
invariant(m_state != state::PENDING);
m_lt = lt;
m_txnid = txnid;
m_left_key = left_key;
m_right_key = right_key;
toku_destroy_dbt(&m_left_key_copy);
toku_destroy_dbt(&m_right_key_copy);
m_type = lock_type;
m_state = state::INITIALIZED;
m_info = lt ? lt->get_lock_request_info() : nullptr;
m_big_txn = big_txn;
m_extra = extra;
}
// get rid of any stored left and right key copies and
// replace them with copies of the given left and right key
void lock_request::copy_keys() {
if (!toku_dbt_is_infinite(m_left_key)) {
toku_clone_dbt(&m_left_key_copy, *m_left_key);
m_left_key = &m_left_key_copy;
}
if (!toku_dbt_is_infinite(m_right_key)) {
toku_clone_dbt(&m_right_key_copy, *m_right_key);
m_right_key = &m_right_key_copy;
}
}
// what are the conflicts for this pending lock request?
void lock_request::get_conflicts(txnid_set *conflicts) {
invariant(m_state == state::PENDING);
const bool is_write_request = m_type == type::WRITE;
m_lt->get_conflicts(is_write_request, m_txnid, m_left_key, m_right_key,
conflicts);
}
// build a wait-for-graph for this lock request and the given conflict set
// for each transaction B that blocks A's lock request
// if B is blocked then
// add (A,T) to the WFG and if B is new, fill in the WFG from B
void lock_request::build_wait_graph(wfg *wait_graph,
const txnid_set &conflicts) {
uint32_t num_conflicts = conflicts.size();
for (uint32_t i = 0; i < num_conflicts; i++) {
TXNID conflicting_txnid = conflicts.get(i);
lock_request *conflicting_request = find_lock_request(conflicting_txnid);
invariant(conflicting_txnid != m_txnid);
invariant(conflicting_request != this);
if (conflicting_request) {
bool already_exists = wait_graph->node_exists(conflicting_txnid);
wait_graph->add_edge(m_txnid, conflicting_txnid);
if (!already_exists) {
// recursively build the wait for graph rooted at the conflicting
// request, given its set of lock conflicts.
txnid_set other_conflicts;
other_conflicts.create();
conflicting_request->get_conflicts(&other_conflicts);
conflicting_request->build_wait_graph(wait_graph, other_conflicts);
other_conflicts.destroy();
}
}
}
}
// returns: true if the current set of lock requests contains
// a deadlock, false otherwise.
bool lock_request::deadlock_exists(const txnid_set &conflicts) {
wfg wait_graph;
wait_graph.create();
build_wait_graph(&wait_graph, conflicts);
std::function<void(TXNID)> reporter;
if (m_deadlock_cb) {
reporter = [this](TXNID a) {
lock_request *req = find_lock_request(a);
if (req) {
m_deadlock_cb(req->m_txnid, (req->m_type == lock_request::WRITE),
req->m_left_key, req->m_right_key);
}
};
}
bool deadlock = wait_graph.cycle_exists_from_txnid(m_txnid, reporter);
wait_graph.destroy();
return deadlock;
}
// try to acquire a lock described by this lock request.
int lock_request::start(void) {
int r;
txnid_set conflicts;
conflicts.create();
if (m_type == type::WRITE) {
r = m_lt->acquire_write_lock(m_txnid, m_left_key, m_right_key, &conflicts,
m_big_txn);
} else {
invariant(m_type == type::READ);
r = m_lt->acquire_read_lock(m_txnid, m_left_key, m_right_key, &conflicts,
m_big_txn);
}
// if the lock is not granted, save it to the set of lock requests
// and check for a deadlock. if there is one, complete it as failed
if (r == DB_LOCK_NOTGRANTED) {
copy_keys();
m_state = state::PENDING;
m_start_time = toku_current_time_microsec() / 1000;
m_conflicting_txnid = conflicts.get(0);
if (m_start_before_pending_test_callback)
m_start_before_pending_test_callback();
toku_external_mutex_lock(&m_info->mutex);
insert_into_lock_requests();
if (deadlock_exists(conflicts)) {
remove_from_lock_requests();
r = DB_LOCK_DEADLOCK;
}
toku_external_mutex_unlock(&m_info->mutex);
if (m_start_test_callback) m_start_test_callback(); // test callback
}
if (r != DB_LOCK_NOTGRANTED) {
complete(r);
}
conflicts.destroy();
return r;
}
// sleep on the lock request until it becomes resolved or the wait time has
// elapsed.
int lock_request::wait(uint64_t wait_time_ms) {
return wait(wait_time_ms, 0, nullptr);
}
int lock_request::wait(uint64_t wait_time_ms, uint64_t killed_time_ms,
int (*killed_callback)(void),
void (*lock_wait_callback)(void *, TXNID, TXNID),
void *callback_arg) {
uint64_t t_now = toku_current_time_microsec();
uint64_t t_start = t_now;
uint64_t t_end = t_start + wait_time_ms * 1000;
toku_external_mutex_lock(&m_info->mutex);
// check again, this time locking out other retry calls
if (m_state == state::PENDING) {
GrowableArray<TXNID> conflicts_collector;
conflicts_collector.init();
retry(&conflicts_collector);
if (m_state == state::PENDING) {
report_waits(&conflicts_collector, lock_wait_callback, callback_arg);
}
conflicts_collector.deinit();
}
while (m_state == state::PENDING) {
// check if this thread is killed
if (killed_callback && killed_callback()) {
remove_from_lock_requests();
complete(DB_LOCK_NOTGRANTED);
continue;
}
// compute the time until we should wait
uint64_t t_wait;
if (killed_time_ms == 0) {
t_wait = t_end;
} else {
t_wait = t_now + killed_time_ms * 1000;
if (t_wait > t_end) t_wait = t_end;
}
int r = toku_external_cond_timedwait(&m_wait_cond, &m_info->mutex,
(int64_t)(t_wait - t_now));
invariant(r == 0 || r == ETIMEDOUT);
t_now = toku_current_time_microsec();
if (m_state == state::PENDING && (t_now >= t_end)) {
m_info->counters.timeout_count += 1;
// if we're still pending and we timed out, then remove our
// request from the set of lock requests and fail.
remove_from_lock_requests();
// complete sets m_state to COMPLETE, breaking us out of the loop
complete(DB_LOCK_NOTGRANTED);
}
}
uint64_t t_real_end = toku_current_time_microsec();
uint64_t duration = t_real_end - t_start;
m_info->counters.wait_count += 1;
m_info->counters.wait_time += duration;
if (duration >= 1000000) {
m_info->counters.long_wait_count += 1;
m_info->counters.long_wait_time += duration;
}
toku_external_mutex_unlock(&m_info->mutex);
invariant(m_state == state::COMPLETE);
return m_complete_r;
}
// complete this lock request with the given return value
void lock_request::complete(int complete_r) {
m_complete_r = complete_r;
m_state = state::COMPLETE;
}
const DBT *lock_request::get_left_key(void) const { return m_left_key; }
const DBT *lock_request::get_right_key(void) const { return m_right_key; }
TXNID lock_request::get_txnid(void) const { return m_txnid; }
uint64_t lock_request::get_start_time(void) const { return m_start_time; }
TXNID lock_request::get_conflicting_txnid(void) const {
return m_conflicting_txnid;
}
int lock_request::retry(GrowableArray<TXNID> *conflicts_collector) {
invariant(m_state == state::PENDING);
int r;
txnid_set conflicts;
conflicts.create();
if (m_type == type::WRITE) {
r = m_lt->acquire_write_lock(m_txnid, m_left_key, m_right_key, &conflicts,
m_big_txn);
} else {
r = m_lt->acquire_read_lock(m_txnid, m_left_key, m_right_key, &conflicts,
m_big_txn);
}
// if the acquisition succeeded then remove ourselves from the
// set of lock requests, complete, and signal the waiting thread.
if (r == 0) {
remove_from_lock_requests();
complete(r);
if (m_retry_test_callback) m_retry_test_callback(); // test callback
toku_external_cond_broadcast(&m_wait_cond);
} else {
m_conflicting_txnid = conflicts.get(0);
add_conflicts_to_waits(&conflicts, conflicts_collector);
}
conflicts.destroy();
return r;
}
void lock_request::retry_all_lock_requests(
locktree *lt, void (*lock_wait_callback)(void *, TXNID, TXNID),
void *callback_arg, void (*after_retry_all_test_callback)(void)) {
lt_lock_request_info *info = lt->get_lock_request_info();
// if there are no pending lock requests than there is nothing to do
// the unlocked data race on pending_is_empty is OK since lock requests
// are retried after added to the pending set.
if (info->pending_is_empty) return;
// get my retry generation (post increment of retry_want)
unsigned long long my_retry_want = (info->retry_want += 1);
toku_mutex_lock(&info->retry_mutex);
GrowableArray<TXNID> conflicts_collector;
conflicts_collector.init();
// here is the group retry algorithm.
// get the latest retry_want count and use it as the generation number of
// this retry operation. if this retry generation is > the last retry
// generation, then do the lock retries. otherwise, no lock retries
// are needed.
if ((my_retry_want - 1) == info->retry_done) {
for (;;) {
if (!info->running_retry) {
info->running_retry = true;
info->retry_done = info->retry_want;
toku_mutex_unlock(&info->retry_mutex);
retry_all_lock_requests_info(info, &conflicts_collector);
if (after_retry_all_test_callback) after_retry_all_test_callback();
toku_mutex_lock(&info->retry_mutex);
info->running_retry = false;
toku_cond_broadcast(&info->retry_cv);
break;
} else {
toku_cond_wait(&info->retry_cv, &info->retry_mutex);
}
}
}
toku_mutex_unlock(&info->retry_mutex);
report_waits(&conflicts_collector, lock_wait_callback, callback_arg);
conflicts_collector.deinit();
}
void lock_request::retry_all_lock_requests_info(
lt_lock_request_info *info, GrowableArray<TXNID> *collector) {
toku_external_mutex_lock(&info->mutex);
// retry all of the pending lock requests.
for (uint32_t i = 0; i < info->pending_lock_requests.size();) {
lock_request *request;
int r = info->pending_lock_requests.fetch(i, &request);
invariant_zero(r);
// retry the lock request. if it didn't succeed,
// move on to the next lock request. otherwise
// the request is gone from the list so we may
// read the i'th entry for the next one.
r = request->retry(collector);
if (r != 0) {
i++;
}
}
// future threads should only retry lock requests if some still exist
info->should_retry_lock_requests = info->pending_lock_requests.size() > 0;
toku_external_mutex_unlock(&info->mutex);
}
void lock_request::add_conflicts_to_waits(
txnid_set *conflicts, GrowableArray<TXNID> *wait_conflicts) {
uint32_t num_conflicts = conflicts->size();
for (uint32_t i = 0; i < num_conflicts; i++) {
wait_conflicts->push(m_txnid);
wait_conflicts->push(conflicts->get(i));
}
}
void lock_request::report_waits(GrowableArray<TXNID> *wait_conflicts,
void (*lock_wait_callback)(void *, TXNID,
TXNID),
void *callback_arg) {
if (!lock_wait_callback) return;
size_t num_conflicts = wait_conflicts->get_size();
for (size_t i = 0; i < num_conflicts; i += 2) {
TXNID blocked_txnid = wait_conflicts->fetch_unchecked(i);
TXNID blocking_txnid = wait_conflicts->fetch_unchecked(i + 1);
(*lock_wait_callback)(callback_arg, blocked_txnid, blocking_txnid);
}
}
void *lock_request::get_extra(void) const { return m_extra; }
void lock_request::kill_waiter(void) {
remove_from_lock_requests();
complete(DB_LOCK_NOTGRANTED);
toku_external_cond_broadcast(&m_wait_cond);
}
void lock_request::kill_waiter(locktree *lt, void *extra) {
lt_lock_request_info *info = lt->get_lock_request_info();
toku_external_mutex_lock(&info->mutex);
for (uint32_t i = 0; i < info->pending_lock_requests.size(); i++) {
lock_request *request;
int r = info->pending_lock_requests.fetch(i, &request);
if (r == 0 && request->get_extra() == extra) {
request->kill_waiter();
break;
}
}
toku_external_mutex_unlock(&info->mutex);
}
// find another lock request by txnid. must hold the mutex.
lock_request *lock_request::find_lock_request(const TXNID &txnid) {
lock_request *request;
int r = m_info->pending_lock_requests.find_zero<TXNID, find_by_txnid>(
txnid, &request, nullptr);
if (r != 0) {
request = nullptr;
}
return request;
}
// insert this lock request into the locktree's set. must hold the mutex.
void lock_request::insert_into_lock_requests(void) {
uint32_t idx;
lock_request *request;
int r = m_info->pending_lock_requests.find_zero<TXNID, find_by_txnid>(
m_txnid, &request, &idx);
invariant(r == DB_NOTFOUND);
r = m_info->pending_lock_requests.insert_at(this, idx);
invariant_zero(r);
m_info->pending_is_empty = false;
}
// remove this lock request from the locktree's set. must hold the mutex.
void lock_request::remove_from_lock_requests(void) {
uint32_t idx;
lock_request *request;
int r = m_info->pending_lock_requests.find_zero<TXNID, find_by_txnid>(
m_txnid, &request, &idx);
invariant_zero(r);
invariant(request == this);
r = m_info->pending_lock_requests.delete_at(idx);
invariant_zero(r);
if (m_info->pending_lock_requests.size() == 0)
m_info->pending_is_empty = true;
}
int lock_request::find_by_txnid(lock_request *const &request,
const TXNID &txnid) {
TXNID request_txnid = request->m_txnid;
if (request_txnid < txnid) {
return -1;
} else if (request_txnid == txnid) {
return 0;
} else {
return 1;
}
}
void lock_request::set_start_test_callback(void (*f)(void)) {
m_start_test_callback = f;
}
void lock_request::set_start_before_pending_test_callback(void (*f)(void)) {
m_start_before_pending_test_callback = f;
}
void lock_request::set_retry_test_callback(void (*f)(void)) {
m_retry_test_callback = f;
}
} /* namespace toku */
#endif // OS_WIN
#endif // ROCKSDB_LITE
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
#ident "$Id$"
/*======
This file is part of PerconaFT.
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
PerconaFT is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License, version 2,
as published by the Free Software Foundation.
PerconaFT is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
----------------------------------------
PerconaFT is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License, version 3,
as published by the Free Software Foundation.
PerconaFT is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
----------------------------------------
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
======= */
#ident \
"Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
#pragma once
#include "../db.h"
#include "../ft/comparator.h"
#include "../portability/toku_pthread.h"
#include "locktree.h"
#include "txnid_set.h"
#include "wfg.h"
namespace toku {
// A lock request contains the db, the key range, the lock type, and
// the transaction id that describes a potential row range lock.
//
// the typical use case is:
// - initialize a lock request
// - start to try to acquire the lock
// - do something else
// - wait for the lock request to be resolved on a timed condition
// - destroy the lock request
// a lock request is resolved when its state is no longer pending, or
// when it becomes granted, or timedout, or deadlocked. when resolved, the
// state of the lock request is changed and any waiting threads are awakened.
class lock_request {
public:
enum type { UNKNOWN, READ, WRITE };
// effect: Initializes a lock request.
void create(toku_external_mutex_factory_t mutex_factory);
// effect: Destroys a lock request.
void destroy(void);
// effect: Resets the lock request parameters, allowing it to be reused.
// requires: Lock request was already created at some point
void set(locktree *lt, TXNID txnid, const DBT *left_key, const DBT *right_key,
type lock_type, bool big_txn, void *extra = nullptr);
// effect: Tries to acquire a lock described by this lock request.
// returns: The return code of locktree::acquire_[write,read]_lock()
// or DB_LOCK_DEADLOCK if this request would end up deadlocked.
int start(void);
// effect: Sleeps until either the request is granted or the wait time
// expires. returns: The return code of locktree::acquire_[write,read]_lock()
// or simply DB_LOCK_NOTGRANTED if the wait time expired.
int wait(uint64_t wait_time_ms);
int wait(uint64_t wait_time_ms, uint64_t killed_time_ms,
int (*killed_callback)(void),
void (*lock_wait_callback)(void *, TXNID, TXNID) = nullptr,
void *callback_arg = nullptr);
// return: left end-point of the lock range
const DBT *get_left_key(void) const;
// return: right end-point of the lock range
const DBT *get_right_key(void) const;
// return: the txnid waiting for a lock
TXNID get_txnid(void) const;
// return: when this lock request started, as milliseconds from epoch
uint64_t get_start_time(void) const;
// return: which txnid is blocking this request (there may be more, though)
TXNID get_conflicting_txnid(void) const;
// effect: Retries all of the lock requests for the given locktree.
// Any lock requests successfully restarted is completed and woken
// up.
// The rest remain pending.
static void retry_all_lock_requests(
locktree *lt, void (*lock_wait_callback)(void *, TXNID, TXNID) = nullptr,
void *callback_arg = nullptr,
void (*after_retry_test_callback)(void) = nullptr);
static void retry_all_lock_requests_info(lt_lock_request_info *info,
GrowableArray<TXNID> *collector);
void set_start_test_callback(void (*f)(void));
void set_start_before_pending_test_callback(void (*f)(void));
void set_retry_test_callback(void (*f)(void));
void *get_extra(void) const;
void kill_waiter(void);
static void kill_waiter(locktree *lt, void *extra);
private:
enum state {
UNINITIALIZED,
INITIALIZED,
PENDING,
COMPLETE,
DESTROYED,
};
// The keys for a lock request are stored "unowned" in m_left_key
// and m_right_key. When the request is about to go to sleep, it
// copies these keys and stores them in m_left_key_copy etc and
// sets the temporary pointers to null.
TXNID m_txnid;
TXNID m_conflicting_txnid;
uint64_t m_start_time;
const DBT *m_left_key;
const DBT *m_right_key;
DBT m_left_key_copy;
DBT m_right_key_copy;
// The lock request type and associated locktree
type m_type;
locktree *m_lt;
// If the lock request is in the completed state, then its
// final return value is stored in m_complete_r
int m_complete_r;
state m_state;
toku_external_cond_t m_wait_cond;
bool m_big_txn;
// the lock request info state stored in the
// locktree that this lock request is for.
struct lt_lock_request_info *m_info;
void *m_extra;
// effect: tries again to acquire the lock described by this lock request
// returns: 0 if retrying the request succeeded and is now complete
int retry(GrowableArray<TXNID> *conflict_collector);
void complete(int complete_r);
// effect: Finds another lock request by txnid.
// requires: The lock request info mutex is held
lock_request *find_lock_request(const TXNID &txnid);
// effect: Insert this lock request into the locktree's set.
// requires: the locktree's mutex is held
void insert_into_lock_requests(void);
// effect: Removes this lock request from the locktree's set.
// requires: The lock request info mutex is held
void remove_from_lock_requests(void);
// effect: Asks this request's locktree which txnids are preventing
// us from getting the lock described by this request.
// returns: conflicts is populated with the txnid's that this request
// is blocked on
void get_conflicts(txnid_set *conflicts);
// effect: Builds a wait-for-graph for this lock request and the given
// conflict set
void build_wait_graph(wfg *wait_graph, const txnid_set &conflicts);
// returns: True if this lock request is in deadlock with the given conflicts
// set
bool deadlock_exists(const txnid_set &conflicts);
void copy_keys(void);
static int find_by_txnid(lock_request *const &request, const TXNID &txnid);
// Report list of conflicts to lock wait callback.
static void report_waits(GrowableArray<TXNID> *wait_conflicts,
void (*lock_wait_callback)(void *, TXNID, TXNID),
void *callback_arg);
void add_conflicts_to_waits(txnid_set *conflicts,
GrowableArray<TXNID> *wait_conflicts);
void (*m_start_test_callback)(void);
void (*m_start_before_pending_test_callback)(void);
void (*m_retry_test_callback)(void);
public:
std::function<void(TXNID, bool, const DBT *, const DBT *)> m_deadlock_cb;
friend class lock_request_unit_test;
};
// PORT: lock_request is not a POD anymore due to use of toku_external_cond_t
// This is ok as the PODness is not really required: lock_request objects are
// not moved in memory or anything.
// ENSURE_POD(lock_request);
} /* namespace toku */
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
#ifndef ROCKSDB_LITE
#ifndef OS_WIN
#ident "$Id$"
/*======
This file is part of PerconaFT.
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
PerconaFT is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License, version 2,
as published by the Free Software Foundation.
PerconaFT is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
----------------------------------------
PerconaFT is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License, version 3,
as published by the Free Software Foundation.
PerconaFT is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
----------------------------------------
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
======= */
#ident \
"Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
#include "range_buffer.h"
#include <string.h>
#include "../portability/memory.h"
#include "../util/dbt.h"
namespace toku {
bool range_buffer::record_header::left_is_infinite(void) const {
return left_neg_inf || left_pos_inf;
}
bool range_buffer::record_header::right_is_infinite(void) const {
return right_neg_inf || right_pos_inf;
}
void range_buffer::record_header::init(const DBT *left_key,
const DBT *right_key,
bool is_exclusive) {
is_exclusive_lock = is_exclusive;
left_neg_inf = left_key == toku_dbt_negative_infinity();
left_pos_inf = left_key == toku_dbt_positive_infinity();
left_key_size = toku_dbt_is_infinite(left_key) ? 0 : left_key->size;
if (right_key) {
right_neg_inf = right_key == toku_dbt_negative_infinity();
right_pos_inf = right_key == toku_dbt_positive_infinity();
right_key_size = toku_dbt_is_infinite(right_key) ? 0 : right_key->size;
} else {
right_neg_inf = left_neg_inf;
right_pos_inf = left_pos_inf;
right_key_size = 0;
}
}
const DBT *range_buffer::iterator::record::get_left_key(void) const {
if (_header.left_neg_inf) {
return toku_dbt_negative_infinity();
} else if (_header.left_pos_inf) {
return toku_dbt_positive_infinity();
} else {
return &_left_key;
}
}
const DBT *range_buffer::iterator::record::get_right_key(void) const {
if (_header.right_neg_inf) {
return toku_dbt_negative_infinity();
} else if (_header.right_pos_inf) {
return toku_dbt_positive_infinity();
} else {
return &_right_key;
}
}
size_t range_buffer::iterator::record::size(void) const {
return sizeof(record_header) + _header.left_key_size + _header.right_key_size;
}
void range_buffer::iterator::record::deserialize(const char *buf) {
size_t current = 0;
// deserialize the header
memcpy(&_header, buf, sizeof(record_header));
current += sizeof(record_header);
// deserialize the left key if necessary
if (!_header.left_is_infinite()) {
// point the left DBT's buffer into ours
toku_fill_dbt(&_left_key, buf + current, _header.left_key_size);
current += _header.left_key_size;
}
// deserialize the right key if necessary
if (!_header.right_is_infinite()) {
if (_header.right_key_size == 0) {
toku_copyref_dbt(&_right_key, _left_key);
} else {
toku_fill_dbt(&_right_key, buf + current, _header.right_key_size);
}
}
}
toku::range_buffer::iterator::iterator()
: _ma_chunk_iterator(nullptr),
_current_chunk_base(nullptr),
_current_chunk_offset(0),
_current_chunk_max(0),
_current_rec_size(0) {}
toku::range_buffer::iterator::iterator(const range_buffer *buffer)
: _ma_chunk_iterator(&buffer->_arena),
_current_chunk_base(nullptr),
_current_chunk_offset(0),
_current_chunk_max(0),
_current_rec_size(0) {
reset_current_chunk();
}
void range_buffer::iterator::reset_current_chunk() {
_current_chunk_base = _ma_chunk_iterator.current(&_current_chunk_max);
_current_chunk_offset = 0;
}
bool range_buffer::iterator::current(record *rec) {
if (_current_chunk_offset < _current_chunk_max) {
const char *buf = reinterpret_cast<const char *>(_current_chunk_base);
rec->deserialize(buf + _current_chunk_offset);
_current_rec_size = rec->size();
return true;
} else {
return false;
}
}
// move the iterator to the next record in the buffer
void range_buffer::iterator::next(void) {
invariant(_current_chunk_offset < _current_chunk_max);
invariant(_current_rec_size > 0);
// the next record is _current_rec_size bytes forward
_current_chunk_offset += _current_rec_size;
// now, we don't know how big the current is, set it to 0.
_current_rec_size = 0;
if (_current_chunk_offset >= _current_chunk_max) {
// current chunk is exhausted, try moving to the next one
if (_ma_chunk_iterator.more()) {
_ma_chunk_iterator.next();
reset_current_chunk();
}
}
}
void range_buffer::create(void) {
// allocate buffer space lazily instead of on creation. this way,
// no malloc/free is done if the transaction ends up taking no locks.
_arena.create(0);
_num_ranges = 0;
}
void range_buffer::append(const DBT *left_key, const DBT *right_key,
bool is_write_request) {
// if the keys are equal, then only one copy is stored.
if (toku_dbt_equals(left_key, right_key)) {
invariant(left_key->size <= MAX_KEY_SIZE);
append_point(left_key, is_write_request);
} else {
invariant(left_key->size <= MAX_KEY_SIZE);
invariant(right_key->size <= MAX_KEY_SIZE);
append_range(left_key, right_key, is_write_request);
}
_num_ranges++;
}
bool range_buffer::is_empty(void) const { return total_memory_size() == 0; }
uint64_t range_buffer::total_memory_size(void) const {
return _arena.total_size_in_use();
}
int range_buffer::get_num_ranges(void) const { return _num_ranges; }
void range_buffer::destroy(void) { _arena.destroy(); }
void range_buffer::append_range(const DBT *left_key, const DBT *right_key,
bool is_exclusive) {
size_t record_length =
sizeof(record_header) + left_key->size + right_key->size;
char *buf = reinterpret_cast<char *>(_arena.malloc_from_arena(record_length));
record_header h;
h.init(left_key, right_key, is_exclusive);
// serialize the header
memcpy(buf, &h, sizeof(record_header));
buf += sizeof(record_header);
// serialize the left key if necessary
if (!h.left_is_infinite()) {
memcpy(buf, left_key->data, left_key->size);
buf += left_key->size;
}
// serialize the right key if necessary
if (!h.right_is_infinite()) {
memcpy(buf, right_key->data, right_key->size);
}
}
void range_buffer::append_point(const DBT *key, bool is_exclusive) {
size_t record_length = sizeof(record_header) + key->size;
char *buf = reinterpret_cast<char *>(_arena.malloc_from_arena(record_length));
record_header h;
h.init(key, nullptr, is_exclusive);
// serialize the header
memcpy(buf, &h, sizeof(record_header));
buf += sizeof(record_header);
// serialize the key if necessary
if (!h.left_is_infinite()) {
memcpy(buf, key->data, key->size);
}
}
} /* namespace toku */
#endif // OS_WIN
#endif // ROCKSDB_LITE
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
#ident "$Id$"
/*======
This file is part of PerconaFT.
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
PerconaFT is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License, version 2,
as published by the Free Software Foundation.
PerconaFT is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
----------------------------------------
PerconaFT is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License, version 3,
as published by the Free Software Foundation.
PerconaFT is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
----------------------------------------
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
======= */
#ident \
"Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
#pragma once
#include <inttypes.h>
#include <stdint.h>
#include "../util/dbt.h"
#include "../util/memarena.h"
namespace toku {
// a key range buffer represents a set of key ranges that can
// be stored, iterated over, and then destroyed all at once.
class range_buffer {
private:
// the key range buffer is a bunch of records in a row.
// each record has the following header, followed by the
// left key and right key data payload, if applicable.
// we limit keys to be 2^16, since we store lengths as 2 bytes.
static const size_t MAX_KEY_SIZE = 1 << 16;
struct record_header {
bool left_neg_inf;
bool left_pos_inf;
bool right_pos_inf;
bool right_neg_inf;
uint16_t left_key_size;
uint16_t right_key_size;
bool is_exclusive_lock;
bool left_is_infinite(void) const;
bool right_is_infinite(void) const;
void init(const DBT *left_key, const DBT *right_key, bool is_exclusive);
};
// PORT static_assert(sizeof(record_header) == 8, "record header format is
// off");
public:
// the iterator abstracts reading over a buffer of variable length
// records one by one until there are no more left.
class iterator {
public:
iterator();
iterator(const range_buffer *buffer);
// a record represents the user-view of a serialized key range.
// it handles positive and negative infinity and the optimized
// point range case, where left and right points share memory.
class record {
public:
// get a read-only pointer to the left key of this record's range
const DBT *get_left_key(void) const;
// get a read-only pointer to the right key of this record's range
const DBT *get_right_key(void) const;
// how big is this record? this tells us where the next record is
size_t size(void) const;
bool get_exclusive_flag() const { return _header.is_exclusive_lock; }
// populate a record header and point our DBT's
// buffers into ours if they are not infinite.
void deserialize(const char *buf);
private:
record_header _header;
DBT _left_key;
DBT _right_key;
};
// populate the given record object with the current
// the memory referred to by record is valid for only
// as long as the record exists.
bool current(record *rec);
// move the iterator to the next record in the buffer
void next(void);
private:
void reset_current_chunk();
// the key range buffer we are iterating over, the current
// offset in that buffer, and the size of the current record.
memarena::chunk_iterator _ma_chunk_iterator;
const void *_current_chunk_base;
size_t _current_chunk_offset;
size_t _current_chunk_max;
size_t _current_rec_size;
};
// allocate buffer space lazily instead of on creation. this way,
// no malloc/free is done if the transaction ends up taking no locks.
void create(void);
// append a left/right key range to the buffer.
// if the keys are equal, then only one copy is stored.
void append(const DBT *left_key, const DBT *right_key,
bool is_write_request = false);
// is this range buffer empty?
bool is_empty(void) const;
// how much memory is being used by this range buffer?
uint64_t total_memory_size(void) const;
// how many ranges are stored in this range buffer?
int get_num_ranges(void) const;
void destroy(void);
private:
memarena _arena;
int _num_ranges;
void append_range(const DBT *left_key, const DBT *right_key,
bool is_write_request);
// append a point to the buffer. this is the space/time saving
// optimization for key ranges where left == right.
void append_point(const DBT *key, bool is_write_request);
};
} /* namespace toku */
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册