diff --git a/libraries/chain/chain_controller.cpp b/libraries/chain/chain_controller.cpp index 6d964e0afbb3be0d4abfa6db6d01602cf46c9fba..40aa6739b7c34bfe634857d8f65630a561ee76fe 100644 --- a/libraries/chain/chain_controller.cpp +++ b/libraries/chain/chain_controller.cpp @@ -35,11 +35,14 @@ #include #include +#include +#include + #include #include #include -#include +#include #include #include @@ -677,20 +680,24 @@ void chain_controller::require_account(const types::AccountName& name) const { } const producer_object& chain_controller::validate_block_header(uint32_t skip, const signed_block& next_block)const { - FC_ASSERT(head_block_id() == next_block.previous, "", - ("head_block_id",head_block_id())("next.prev",next_block.previous)); - FC_ASSERT(head_block_time() < next_block.timestamp, "", - ("head_block_time",head_block_time())("next",next_block.timestamp)("blocknum",next_block.block_num())); + EOS_ASSERT(head_block_id() == next_block.previous, block_validate_exception, "", + ("head_block_id",head_block_id())("next.prev",next_block.previous)); + EOS_ASSERT(head_block_time() < next_block.timestamp, block_validate_exception, "", + ("head_block_time",head_block_time())("next",next_block.timestamp)("blocknum",next_block.block_num())); + if (next_block.block_num() % config::ProducerCount != 0) + EOS_ASSERT(next_block.producer_changes.empty(), block_validate_exception, + "Producer changes may only occur at the end of a round."); const producer_object& producer = get_producer(get_scheduled_producer(get_slot_at_time(next_block.timestamp))); if(!(skip&skip_producer_signature)) - FC_ASSERT(next_block.validate_signee(producer.signing_key), - "Incorrect block producer key: expected ${e} but got ${a}", - ("e", producer.signing_key)("a", public_key_type(next_block.signee()))); + EOS_ASSERT(next_block.validate_signee(producer.signing_key), block_validate_exception, + "Incorrect block producer key: expected ${e} but got ${a}", + ("e", producer.signing_key)("a", public_key_type(next_block.signee()))); if(!(skip&skip_producer_schedule_check)) { - FC_ASSERT(next_block.producer == producer.owner, "Producer produced block at wrong time", - ("block producer",next_block.producer)("scheduled producer",producer.owner)); + EOS_ASSERT(next_block.producer == producer.owner, block_validate_exception, + "Producer produced block at wrong time", + ("block producer",next_block.producer)("scheduled producer",producer.owner)); } return producer; @@ -704,14 +711,13 @@ void chain_controller::create_block_summary(const signed_block& next_block) { } void chain_controller::update_global_properties(const signed_block& b) { - // If we're at the end of a round... + // If we're at the end of a round, update the BlockchainConfiguration and producer schedule if (b.block_num() % config::ProducerCount == 0) { - // Get the new schedule and blockchain configuration - auto schedule = _admin->get_next_round(_db); + auto schedule = calculate_next_round(b); auto config = _admin->get_blockchain_configuration(_db, schedule); - _db.modify(get_global_properties(), - [schedule = std::move(schedule), config = std::move(config)] (global_property_object& gpo) { + const auto& gpo = get_global_properties(); + _db.modify(gpo, [schedule = std::move(schedule), config = std::move(config)] (global_property_object& gpo) { gpo.active_producers = std::move(schedule); gpo.configuration = std::move(config); }); @@ -920,6 +926,30 @@ void chain_controller::spinup_fork_db() } } +ProducerRound chain_controller::calculate_next_round(const signed_block& next_block) { + auto schedule = _admin->get_next_round(_db); + std::sort(schedule.begin(), schedule.end()); + + auto ReplaceOldProducers = + boost::adaptors::transformed([changes = next_block.producer_changes] (AccountName producer) { + auto itr = changes.find(producer); + if (itr != changes.end()) + return itr->second; + return producer; + }); + + ProducerRound round; + boost::copy(get_global_properties().active_producers | ReplaceOldProducers, round.begin()); + std::sort(round.begin(), round.end()); + EOS_ASSERT(boost::range::equal(schedule, round), block_validate_exception, + "Unexpected round changes in new block header"); + + randutils::seed_seq_fe<1> seed{next_block.timestamp.sec_since_epoch()}; + randutils::random_generator rng(seed); + rng.shuffle(schedule); + return schedule; +} + void chain_controller::update_global_dynamic_data(const signed_block& b) { const dynamic_global_property_object& _dgp = _db.get(); diff --git a/libraries/chain/include/eos/chain/block.hpp b/libraries/chain/include/eos/chain/block.hpp index 2a2b59151b238df90c4e45710543e6571e113f67..99c7827603a1cc3a990af832d38408c736bc85d8 100644 --- a/libraries/chain/include/eos/chain/block.hpp +++ b/libraries/chain/include/eos/chain/block.hpp @@ -28,8 +28,8 @@ namespace eos { namespace chain { struct block_header { - digest_type digest()const; - uint32_t block_num()const { return num_from_id(previous) + 1; } + digest_type digest() const; + uint32_t block_num() const { return num_from_id(previous) + 1; } static uint32_t num_from_id(const block_id_type& id); @@ -37,21 +37,22 @@ namespace eos { namespace chain { fc::time_point_sec timestamp; checksum_type transaction_merkle_root; AccountName producer; + map producer_changes; }; struct signed_block_header : public block_header { - block_id_type id()const; - fc::ecc::public_key signee()const; - void sign( const fc::ecc::private_key& signer ); - bool validate_signee( const fc::ecc::public_key& expected_signee )const; + block_id_type id() const; + fc::ecc::public_key signee() const; + void sign(const fc::ecc::private_key& signer); + bool validate_signee(const fc::ecc::public_key& expected_signee) const; signature_type producer_signature; }; struct thread { vector generated_input; - vector user_input; + vector user_input; vector output_transactions; digest_type merkle_digest() const; @@ -61,13 +62,13 @@ namespace eos { namespace chain { struct signed_block : public signed_block_header { - checksum_type calculate_merkle_root()const; + checksum_type calculate_merkle_root() const; vector cycles; }; } } // eos::chain -FC_REFLECT(eos::chain::block_header, (previous)(timestamp)(transaction_merkle_root)(producer) ) +FC_REFLECT(eos::chain::block_header, (previous)(timestamp)(transaction_merkle_root)(producer)(producer_changes)) FC_REFLECT_DERIVED(eos::chain::signed_block_header, (eos::chain::block_header), (producer_signature)) FC_REFLECT(eos::chain::thread, (generated_input)(user_input)(output_transactions)) FC_REFLECT_DERIVED(eos::chain::signed_block, (eos::chain::signed_block_header), (cycles)) diff --git a/libraries/chain/include/eos/chain/chain_administration_interface.hpp b/libraries/chain/include/eos/chain/chain_administration_interface.hpp index 6f85eb190aecc8f9a10608566c78def3c4419bb6..41300c35dd2d37044c3694451b559e591ae566b7 100644 --- a/libraries/chain/include/eos/chain/chain_administration_interface.hpp +++ b/libraries/chain/include/eos/chain/chain_administration_interface.hpp @@ -20,8 +20,6 @@ class chain_controller; */ class chain_administration_interface { public: - using ProducerRound = std::array; - virtual ~chain_administration_interface(); virtual ProducerRound get_next_round(const chainbase::database& db) = 0; diff --git a/libraries/chain/include/eos/chain/chain_controller.hpp b/libraries/chain/include/eos/chain/chain_controller.hpp index 2275c9ed23b61547b87afc6d386865f192004126..5f377c83b0ebedcddc64a47eb1df35a50fc8b211 100644 --- a/libraries/chain/include/eos/chain/chain_controller.hpp +++ b/libraries/chain/include/eos/chain/chain_controller.hpp @@ -307,6 +307,8 @@ namespace eos { namespace chain { void spinup_db(); void spinup_fork_db(); + ProducerRound calculate_next_round(const signed_block& next_block); + database& _db; fork_database& _fork_db; block_log& _block_log; diff --git a/libraries/chain/include/eos/chain/types.hpp b/libraries/chain/include/eos/chain/types.hpp index 5faebeb5374f861320e6afb3966155f37760a896..968fb176dc85b86ae3f9d433eb2d9eb4a3d32356 100644 --- a/libraries/chain/include/eos/chain/types.hpp +++ b/libraries/chain/include/eos/chain/types.hpp @@ -110,7 +110,6 @@ namespace eos { namespace chain { using private_key_type = fc::ecc::private_key; using chain_id_type = fc::sha256; - using eos::types::AccountName; using eos::types::PermissionName; using eos::types::Asset; @@ -136,6 +135,8 @@ namespace eos { namespace chain { using eos::types::Int128; using eos::types::Int256; + using ProducerRound = std::array; + /** * List all object types from all namespaces here so they can * be easily reflected and displayed in debug output. If a 3rd party diff --git a/libraries/native_contract/include/eos/native_contract/native_contract_chain_administrator.hpp b/libraries/native_contract/include/eos/native_contract/native_contract_chain_administrator.hpp index 2eb1054b700c0464cd540864d1618fd229029b04..68eb3d11860f126bd2403ca211d0d8134bff7ff8 100644 --- a/libraries/native_contract/include/eos/native_contract/native_contract_chain_administrator.hpp +++ b/libraries/native_contract/include/eos/native_contract/native_contract_chain_administrator.hpp @@ -3,6 +3,7 @@ #include namespace eos { namespace native_contract { +using chain::ProducerRound; class native_contract_chain_administrator : public chain::chain_administration_interface { ProducerRound get_next_round(const chainbase::database& db); diff --git a/libraries/native_contract/native_contract_chain_administrator.cpp b/libraries/native_contract/native_contract_chain_administrator.cpp index 8365172fc30a54a405dcce5c4388d686d7955d6a..a397a0746a1cdc107bd6e389ba45fffe822ff9f5 100644 --- a/libraries/native_contract/native_contract_chain_administrator.cpp +++ b/libraries/native_contract/native_contract_chain_administrator.cpp @@ -11,7 +11,7 @@ namespace eos { namespace native_contract { using administrator = native_contract_chain_administrator; -administrator::ProducerRound administrator::get_next_round(const chainbase::database& db) { +ProducerRound administrator::get_next_round(const chainbase::database& db) { #warning TODO: Implement me return db.get(chain::global_property_object::id_type()).active_producers; } diff --git a/libraries/utilities/include/eos/utilities/pcg-random/LICENSE-MIT.txt b/libraries/utilities/include/eos/utilities/pcg-random/LICENSE-MIT.txt new file mode 100644 index 0000000000000000000000000000000000000000..23159603635d3962f8dd7bd7b86d27a6a253b8a2 --- /dev/null +++ b/libraries/utilities/include/eos/utilities/pcg-random/LICENSE-MIT.txt @@ -0,0 +1,19 @@ +Copyright (c) 2014-2017 Melissa O'Neill and PCG Project contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/libraries/utilities/include/eos/utilities/pcg-random/pcg_extras.hpp b/libraries/utilities/include/eos/utilities/pcg-random/pcg_extras.hpp new file mode 100644 index 0000000000000000000000000000000000000000..70dbf5c209e067ee3d69b3ca7d1b7bad13dc96e8 --- /dev/null +++ b/libraries/utilities/include/eos/utilities/pcg-random/pcg_extras.hpp @@ -0,0 +1,640 @@ +/* + * PCG Random Number Generation for C++ + * + * Copyright 2014-2017 Melissa O'Neill , + * and the PCG Project contributors. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + * + * Licensed under the Apache License, Version 2.0 (provided in + * LICENSE-APACHE.txt and at http://www.apache.org/licenses/LICENSE-2.0) + * or under the MIT license (provided in LICENSE-MIT.txt and at + * http://opensource.org/licenses/MIT), at your option. This file may not + * be copied, modified, or distributed except according to those terms. + * + * Distributed on an "AS IS" BASIS, WITHOUT WARRANTY OF ANY KIND, either + * express or implied. See your chosen license for details. + * + * For additional information about the PCG random number generation scheme, + * visit http://www.pcg-random.org/. + */ + +/* + * This file provides support code that is useful for random-number generation + * but not specific to the PCG generation scheme, including: + * - 128-bit int support for platforms where it isn't available natively + * - bit twiddling operations + * - I/O of 128-bit and 8-bit integers + * - Handling the evilness of SeedSeq + * - Support for efficiently producing random numbers less than a given + * bound + */ + +#ifndef PCG_EXTRAS_HPP_INCLUDED +#define PCG_EXTRAS_HPP_INCLUDED 1 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef __GNUC__ + #include +#endif + +/* + * Abstractions for compiler-specific directives + */ + +#ifdef __GNUC__ + #define PCG_NOINLINE __attribute__((noinline)) +#else + #define PCG_NOINLINE +#endif + +/* + * Some members of the PCG library use 128-bit math. When compiling on 64-bit + * platforms, both GCC and Clang provide 128-bit integer types that are ideal + * for the job. + * + * On 32-bit platforms (or with other compilers), we fall back to a C++ + * class that provides 128-bit unsigned integers instead. It may seem + * like we're reinventing the wheel here, because libraries already exist + * that support large integers, but most existing libraries provide a very + * generic multiprecision code, but here we're operating at a fixed size. + * Also, most other libraries are fairly heavyweight. So we use a direct + * implementation. Sadly, it's much slower than hand-coded assembly or + * direct CPU support. + * + */ +#if __SIZEOF_INT128__ + namespace pcg_extras { + typedef __uint128_t pcg128_t; + } + #define PCG_128BIT_CONSTANT(high,low) \ + ((pcg128_t(high) << 64) + low) +#else + #include "pcg_uint128.hpp" + namespace pcg_extras { + typedef pcg_extras::uint_x4 pcg128_t; + } + #define PCG_128BIT_CONSTANT(high,low) \ + pcg128_t(high,low) + #define PCG_EMULATED_128BIT_MATH 1 +#endif + + +namespace pcg_extras { + +/* + * We often need to represent a "number of bits". When used normally, these + * numbers are never greater than 128, so an unsigned char is plenty. + * If you're using a nonstandard generator of a larger size, you can set + * PCG_BITCOUNT_T to have it define it as a larger size. (Some compilers + * might produce faster code if you set it to an unsigned int.) + */ + +#ifndef PCG_BITCOUNT_T + typedef uint8_t bitcount_t; +#else + typedef PCG_BITCOUNT_T bitcount_t; +#endif + +/* + * C++ requires us to be able to serialize RNG state by printing or reading + * it from a stream. Because we use 128-bit ints, we also need to be able + * ot print them, so here is code to do so. + * + * This code provides enough functionality to print 128-bit ints in decimal + * and zero-padded in hex. It's not a full-featured implementation. + */ + +template +std::basic_ostream& +operator<<(std::basic_ostream& out, pcg128_t value) +{ + auto desired_base = out.flags() & out.basefield; + bool want_hex = desired_base == out.hex; + + if (want_hex) { + uint64_t highpart = uint64_t(value >> 64); + uint64_t lowpart = uint64_t(value); + auto desired_width = out.width(); + if (desired_width > 16) { + out.width(desired_width - 16); + } + if (highpart != 0 || desired_width > 16) + out << highpart; + CharT oldfill = '\0'; + if (highpart != 0) { + out.width(16); + oldfill = out.fill('0'); + } + auto oldflags = out.setf(decltype(desired_base){}, out.showbase); + out << lowpart; + out.setf(oldflags); + if (highpart != 0) { + out.fill(oldfill); + } + return out; + } + constexpr size_t MAX_CHARS_128BIT = 40; + + char buffer[MAX_CHARS_128BIT]; + char* pos = buffer+sizeof(buffer); + *(--pos) = '\0'; + constexpr auto BASE = pcg128_t(10ULL); + do { + auto div = value / BASE; + auto mod = uint32_t(value - (div * BASE)); + *(--pos) = '0' + char(mod); + value = div; + } while(value != pcg128_t(0ULL)); + return out << pos; +} + +template +std::basic_istream& +operator>>(std::basic_istream& in, pcg128_t& value) +{ + typename std::basic_istream::sentry s(in); + + if (!s) + return in; + + constexpr auto BASE = pcg128_t(10ULL); + pcg128_t current(0ULL); + bool did_nothing = true; + bool overflow = false; + for(;;) { + CharT wide_ch = in.get(); + if (!in.good()) + break; + auto ch = in.narrow(wide_ch, '\0'); + if (ch < '0' || ch > '9') { + in.unget(); + break; + } + did_nothing = false; + pcg128_t digit(uint32_t(ch - '0')); + pcg128_t timesbase = current*BASE; + overflow = overflow || timesbase < current; + current = timesbase + digit; + overflow = overflow || current < digit; + } + + if (did_nothing || overflow) { + in.setstate(std::ios::failbit); + if (overflow) + current = ~pcg128_t(0ULL); + } + + value = current; + + return in; +} + +/* + * Likewise, if people use tiny rngs, we'll be serializing uint8_t. + * If we just used the provided IO operators, they'd read/write chars, + * not ints, so we need to define our own. We *can* redefine this operator + * here because we're in our own namespace. + */ + +template +std::basic_ostream& +operator<<(std::basic_ostream&out, uint8_t value) +{ + return out << uint32_t(value); +} + +template +std::basic_istream& +operator>>(std::basic_istream& in, uint8_t& target) +{ + uint32_t value = 0xdecea5edU; + in >> value; + if (!in && value == 0xdecea5edU) + return in; + if (value > uint8_t(~0)) { + in.setstate(std::ios::failbit); + value = ~0U; + } + target = uint8_t(value); + return in; +} + +/* Unfortunately, the above functions don't get found in preference to the + * built in ones, so we create some more specific overloads that will. + * Ugh. + */ + +inline std::ostream& operator<<(std::ostream& out, uint8_t value) +{ + return pcg_extras::operator<< (out, value); +} + +inline std::istream& operator>>(std::istream& in, uint8_t& value) +{ + return pcg_extras::operator>> (in, value); +} + + + +/* + * Useful bitwise operations. + */ + +/* + * XorShifts are invertable, but they are someting of a pain to invert. + * This function backs them out. It's used by the whacky "inside out" + * generator defined later. + */ + +template +inline itype unxorshift(itype x, bitcount_t bits, bitcount_t shift) +{ + if (2*shift >= bits) { + return x ^ (x >> shift); + } + itype lowmask1 = (itype(1U) << (bits - shift*2)) - 1; + itype highmask1 = ~lowmask1; + itype top1 = x; + itype bottom1 = x & lowmask1; + top1 ^= top1 >> shift; + top1 &= highmask1; + x = top1 | bottom1; + itype lowmask2 = (itype(1U) << (bits - shift)) - 1; + itype bottom2 = x & lowmask2; + bottom2 = unxorshift(bottom2, bits - shift, shift); + bottom2 &= lowmask1; + return top1 | bottom2; +} + +/* + * Rotate left and right. + * + * In ideal world, compilers would spot idiomatic rotate code and convert it + * to a rotate instruction. Of course, opinions vary on what the correct + * idiom is and how to spot it. For clang, sometimes it generates better + * (but still crappy) code if you define PCG_USE_ZEROCHECK_ROTATE_IDIOM. + */ + +template +inline itype rotl(itype value, bitcount_t rot) +{ + constexpr bitcount_t bits = sizeof(itype) * 8; + constexpr bitcount_t mask = bits - 1; +#if PCG_USE_ZEROCHECK_ROTATE_IDIOM + return rot ? (value << rot) | (value >> (bits - rot)) : value; +#else + return (value << rot) | (value >> ((- rot) & mask)); +#endif +} + +template +inline itype rotr(itype value, bitcount_t rot) +{ + constexpr bitcount_t bits = sizeof(itype) * 8; + constexpr bitcount_t mask = bits - 1; +#if PCG_USE_ZEROCHECK_ROTATE_IDIOM + return rot ? (value >> rot) | (value << (bits - rot)) : value; +#else + return (value >> rot) | (value << ((- rot) & mask)); +#endif +} + +/* Unfortunately, both Clang and GCC sometimes perform poorly when it comes + * to properly recognizing idiomatic rotate code, so for we also provide + * assembler directives (enabled with PCG_USE_INLINE_ASM). Boo, hiss. + * (I hope that these compilers get better so that this code can die.) + * + * These overloads will be preferred over the general template code above. + */ +#if PCG_USE_INLINE_ASM && __GNUC__ && (__x86_64__ || __i386__) + +inline uint8_t rotr(uint8_t value, bitcount_t rot) +{ + asm ("rorb %%cl, %0" : "=r" (value) : "0" (value), "c" (rot)); + return value; +} + +inline uint16_t rotr(uint16_t value, bitcount_t rot) +{ + asm ("rorw %%cl, %0" : "=r" (value) : "0" (value), "c" (rot)); + return value; +} + +inline uint32_t rotr(uint32_t value, bitcount_t rot) +{ + asm ("rorl %%cl, %0" : "=r" (value) : "0" (value), "c" (rot)); + return value; +} + +#if __x86_64__ +inline uint64_t rotr(uint64_t value, bitcount_t rot) +{ + asm ("rorq %%cl, %0" : "=r" (value) : "0" (value), "c" (rot)); + return value; +} +#endif // __x86_64__ + +#endif // PCG_USE_INLINE_ASM + + +/* + * The C++ SeedSeq concept (modelled by seed_seq) can fill an array of + * 32-bit integers with seed data, but sometimes we want to produce + * larger or smaller integers. + * + * The following code handles this annoyance. + * + * uneven_copy will copy an array of 32-bit ints to an array of larger or + * smaller ints (actually, the code is general it only needing forward + * iterators). The copy is identical to the one that would be performed if + * we just did memcpy on a standard little-endian machine, but works + * regardless of the endian of the machine (or the weirdness of the ints + * involved). + * + * generate_to initializes an array of integers using a SeedSeq + * object. It is given the size as a static constant at compile time and + * tries to avoid memory allocation. If we're filling in 32-bit constants + * we just do it directly. If we need a separate buffer and it's small, + * we allocate it on the stack. Otherwise, we fall back to heap allocation. + * Ugh. + * + * generate_one produces a single value of some integral type using a + * SeedSeq object. + */ + + /* uneven_copy helper, case where destination ints are less than 32 bit. */ + +template +SrcIter uneven_copy_impl( + SrcIter src_first, DestIter dest_first, DestIter dest_last, + std::true_type) +{ + typedef typename std::iterator_traits::value_type src_t; + typedef typename std::iterator_traits::value_type dest_t; + + constexpr bitcount_t SRC_SIZE = sizeof(src_t); + constexpr bitcount_t DEST_SIZE = sizeof(dest_t); + constexpr bitcount_t DEST_BITS = DEST_SIZE * 8; + constexpr bitcount_t SCALE = SRC_SIZE / DEST_SIZE; + + size_t count = 0; + src_t value = 0; + + while (dest_first != dest_last) { + if ((count++ % SCALE) == 0) + value = *src_first++; // Get more bits + else + value >>= DEST_BITS; // Move down bits + + *dest_first++ = dest_t(value); // Truncates, ignores high bits. + } + return src_first; +} + + /* uneven_copy helper, case where destination ints are more than 32 bit. */ + +template +SrcIter uneven_copy_impl( + SrcIter src_first, DestIter dest_first, DestIter dest_last, + std::false_type) +{ + typedef typename std::iterator_traits::value_type src_t; + typedef typename std::iterator_traits::value_type dest_t; + + constexpr auto SRC_SIZE = sizeof(src_t); + constexpr auto SRC_BITS = SRC_SIZE * 8; + constexpr auto DEST_SIZE = sizeof(dest_t); + constexpr auto SCALE = (DEST_SIZE+SRC_SIZE-1) / SRC_SIZE; + + while (dest_first != dest_last) { + dest_t value(0UL); + unsigned int shift = 0; + + for (size_t i = 0; i < SCALE; ++i) { + value |= dest_t(*src_first++) << shift; + shift += SRC_BITS; + } + + *dest_first++ = value; + } + return src_first; +} + +/* uneven_copy, call the right code for larger vs. smaller */ + +template +inline SrcIter uneven_copy(SrcIter src_first, + DestIter dest_first, DestIter dest_last) +{ + typedef typename std::iterator_traits::value_type src_t; + typedef typename std::iterator_traits::value_type dest_t; + + constexpr bool DEST_IS_SMALLER = sizeof(dest_t) < sizeof(src_t); + + return uneven_copy_impl(src_first, dest_first, dest_last, + std::integral_constant{}); +} + +/* generate_to, fill in a fixed-size array of integral type using a SeedSeq + * (actually works for any random-access iterator) + */ + +template +inline void generate_to_impl(SeedSeq&& generator, DestIter dest, + std::true_type) +{ + generator.generate(dest, dest+size); +} + +template +void generate_to_impl(SeedSeq&& generator, DestIter dest, + std::false_type) +{ + typedef typename std::iterator_traits::value_type dest_t; + constexpr auto DEST_SIZE = sizeof(dest_t); + constexpr auto GEN_SIZE = sizeof(uint32_t); + + constexpr bool GEN_IS_SMALLER = GEN_SIZE < DEST_SIZE; + constexpr size_t FROM_ELEMS = + GEN_IS_SMALLER + ? size * ((DEST_SIZE+GEN_SIZE-1) / GEN_SIZE) + : (size + (GEN_SIZE / DEST_SIZE) - 1) + / ((GEN_SIZE / DEST_SIZE) + GEN_IS_SMALLER); + // this odd code ^^^^^^^^^^^^^^^^^ is work-around for + // a bug: http://llvm.org/bugs/show_bug.cgi?id=21287 + + if (FROM_ELEMS <= 1024) { + uint32_t buffer[FROM_ELEMS]; + generator.generate(buffer, buffer+FROM_ELEMS); + uneven_copy(buffer, dest, dest+size); + } else { + uint32_t* buffer = static_cast(malloc(GEN_SIZE * FROM_ELEMS)); + generator.generate(buffer, buffer+FROM_ELEMS); + uneven_copy(buffer, dest, dest+size); + free(static_cast(buffer)); + } +} + +template +inline void generate_to(SeedSeq&& generator, DestIter dest) +{ + typedef typename std::iterator_traits::value_type dest_t; + constexpr bool IS_32BIT = sizeof(dest_t) == sizeof(uint32_t); + + generate_to_impl(std::forward(generator), dest, + std::integral_constant{}); +} + +/* generate_one, produce a value of integral type using a SeedSeq + * (optionally, we can have it produce more than one and pick which one + * we want) + */ + +template +inline UInt generate_one(SeedSeq&& generator) +{ + UInt result[N]; + generate_to(std::forward(generator), result); + return result[i]; +} + +template +auto bounded_rand(RngType& rng, typename RngType::result_type upper_bound) + -> typename RngType::result_type +{ + typedef typename RngType::result_type rtype; + rtype threshold = (RngType::max() - RngType::min() + rtype(1) - upper_bound) + % upper_bound; + for (;;) { + rtype r = rng() - RngType::min(); + if (r >= threshold) + return r % upper_bound; + } +} + +template +void shuffle(Iter from, Iter to, RandType&& rng) +{ + typedef typename std::iterator_traits::difference_type delta_t; + typedef typename std::remove_reference::type::result_type result_t; + auto count = to - from; + while (count > 1) { + delta_t chosen = delta_t(bounded_rand(rng, result_t(count))); + --count; + --to; + using std::swap; + swap(*(from + chosen), *to); + } +} + +/* + * Although std::seed_seq is useful, it isn't everything. Often we want to + * initialize a random-number generator some other way, such as from a random + * device. + * + * Technically, it does not meet the requirements of a SeedSequence because + * it lacks some of the rarely-used member functions (some of which would + * be impossible to provide). However the C++ standard is quite specific + * that actual engines only called the generate method, so it ought not to be + * a problem in practice. + */ + +template +class seed_seq_from { +private: + RngType rng_; + + typedef uint_least32_t result_type; + +public: + template + seed_seq_from(Args&&... args) : + rng_(std::forward(args)...) + { + // Nothing (else) to do... + } + + template + void generate(Iter start, Iter finish) + { + for (auto i = start; i != finish; ++i) + *i = result_type(rng_()); + } + + constexpr size_t size() const + { + return (sizeof(typename RngType::result_type) > sizeof(result_type) + && RngType::max() > ~size_t(0UL)) + ? ~size_t(0UL) + : size_t(RngType::max()); + } +}; + +/* + * Sometimes you might want a distinct seed based on when the program + * was compiled. That way, a particular instance of the program will + * behave the same way, but when recompiled it'll produce a different + * value. + */ + +template +struct static_arbitrary_seed { +private: + static constexpr IntType fnv(IntType hash, const char* pos) { + return *pos == '\0' + ? hash + : fnv((hash * IntType(16777619U)) ^ *pos, (pos+1)); + } + +public: + static constexpr IntType value = fnv(IntType(2166136261U ^ sizeof(IntType)), + __DATE__ __TIME__ __FILE__); +}; + +// Sometimes, when debugging or testing, it's handy to be able print the name +// of a (in human-readable form). This code allows the idiom: +// +// cout << printable_typename() +// +// to print out my_foo_type_t (or its concrete type if it is a synonym) + +#if __cpp_rtti || __GXX_RTTI + +template +struct printable_typename {}; + +template +std::ostream& operator<<(std::ostream& out, printable_typename) { + const char *implementation_typename = typeid(T).name(); +#ifdef __GNUC__ + int status; + char* pretty_name = + abi::__cxa_demangle(implementation_typename, NULL, NULL, &status); + if (status == 0) + out << pretty_name; + free(static_cast(pretty_name)); + if (status == 0) + return out; +#endif + out << implementation_typename; + return out; +} + +#endif // __cpp_rtti || __GXX_RTTI + +} // namespace pcg_extras + +#endif // PCG_EXTRAS_HPP_INCLUDED diff --git a/libraries/utilities/include/eos/utilities/pcg-random/pcg_random.hpp b/libraries/utilities/include/eos/utilities/pcg-random/pcg_random.hpp new file mode 100644 index 0000000000000000000000000000000000000000..ca197da17dec900e0663664e026e9fdc9015f7b0 --- /dev/null +++ b/libraries/utilities/include/eos/utilities/pcg-random/pcg_random.hpp @@ -0,0 +1,1755 @@ +/* + * PCG Random Number Generation for C++ + * + * Copyright 2014-2017 Melissa O'Neill , + * and the PCG Project contributors. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + * + * Licensed under the Apache License, Version 2.0 (provided in + * LICENSE-APACHE.txt and at http://www.apache.org/licenses/LICENSE-2.0) + * or under the MIT license (provided in LICENSE-MIT.txt and at + * http://opensource.org/licenses/MIT), at your option. This file may not + * be copied, modified, or distributed except according to those terms. + * + * Distributed on an "AS IS" BASIS, WITHOUT WARRANTY OF ANY KIND, either + * express or implied. See your chosen license for details. + * + * For additional information about the PCG random number generation scheme, + * visit http://www.pcg-random.org/. + */ + +/* + * This code provides the reference implementation of the PCG family of + * random number generators. The code is complex because it implements + * + * - several members of the PCG family, specifically members corresponding + * to the output functions: + * - XSH RR (good for 64-bit state, 32-bit output) + * - XSH RS (good for 64-bit state, 32-bit output) + * - XSL RR (good for 128-bit state, 64-bit output) + * - RXS M XS (statistically most powerful generator) + * - XSL RR RR (good for 128-bit state, 128-bit output) + * - and RXS, RXS M, XSH, XSL (mostly for testing) + * - at potentially *arbitrary* bit sizes + * - with four different techniques for random streams (MCG, one-stream + * LCG, settable-stream LCG, unique-stream LCG) + * - and the extended generation schemes allowing arbitrary periods + * - with all features of C++11 random number generation (and more), + * some of which are somewhat painful, including + * - initializing with a SeedSequence which writes 32-bit values + * to memory, even though the state of the generator may not + * use 32-bit values (it might use smaller or larger integers) + * - I/O for RNGs and a prescribed format, which needs to handle + * the issue that 8-bit and 128-bit integers don't have working + * I/O routines (e.g., normally 8-bit = char, not integer) + * - equality and inequality for RNGs + * - and a number of convenience typedefs to mask all the complexity + * + * The code employes a fairly heavy level of abstraction, and has to deal + * with various C++ minutia. If you're looking to learn about how the PCG + * scheme works, you're probably best of starting with one of the other + * codebases (see www.pcg-random.org). But if you're curious about the + * constants for the various output functions used in those other, simpler, + * codebases, this code shows how they are calculated. + * + * On the positive side, at least there are convenience typedefs so that you + * can say + * + * pcg32 myRNG; + * + * rather than: + * + * pcg_detail::engine< + * uint32_t, // Output Type + * uint64_t, // State Type + * pcg_detail::xsh_rr_mixin, true, // Output Func + * pcg_detail::specific_stream, // Stream Kind + * pcg_detail::default_multiplier // LCG Mult + * > myRNG; + * + */ + +#ifndef PCG_RAND_HPP_INCLUDED +#define PCG_RAND_HPP_INCLUDED 1 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * The pcg_extras namespace contains some support code that is likley to + * be useful for a variety of RNGs, including: + * - 128-bit int support for platforms where it isn't available natively + * - bit twiddling operations + * - I/O of 128-bit and 8-bit integers + * - Handling the evilness of SeedSeq + * - Support for efficiently producing random numbers less than a given + * bound + */ + +#include "pcg_extras.hpp" + +namespace pcg_detail { + +using namespace pcg_extras; + +/* + * The LCG generators need some constants to function. This code lets you + * look up the constant by *type*. For example + * + * default_multiplier::multiplier() + * + * gives you the default multipler for 32-bit integers. We use the name + * of the constant and not a generic word like value to allow these classes + * to be used as mixins. + */ + +template +struct default_multiplier { + // Not defined for an arbitrary type +}; + +template +struct default_increment { + // Not defined for an arbitrary type +}; + +#define PCG_DEFINE_CONSTANT(type, what, kind, constant) \ + template <> \ + struct what ## _ ## kind { \ + static constexpr type kind() { \ + return constant; \ + } \ + }; + +PCG_DEFINE_CONSTANT(uint8_t, default, multiplier, 141U) +PCG_DEFINE_CONSTANT(uint8_t, default, increment, 77U) + +PCG_DEFINE_CONSTANT(uint16_t, default, multiplier, 12829U) +PCG_DEFINE_CONSTANT(uint16_t, default, increment, 47989U) + +PCG_DEFINE_CONSTANT(uint32_t, default, multiplier, 747796405U) +PCG_DEFINE_CONSTANT(uint32_t, default, increment, 2891336453U) + +PCG_DEFINE_CONSTANT(uint64_t, default, multiplier, 6364136223846793005ULL) +PCG_DEFINE_CONSTANT(uint64_t, default, increment, 1442695040888963407ULL) + +PCG_DEFINE_CONSTANT(pcg128_t, default, multiplier, + PCG_128BIT_CONSTANT(2549297995355413924ULL,4865540595714422341ULL)) +PCG_DEFINE_CONSTANT(pcg128_t, default, increment, + PCG_128BIT_CONSTANT(6364136223846793005ULL,1442695040888963407ULL)) + + +/* + * Each PCG generator is available in four variants, based on how it applies + * the additive constant for its underlying LCG; the variations are: + * + * single stream - all instances use the same fixed constant, thus + * the RNG always somewhere in same sequence + * mcg - adds zero, resulting in a single stream and reduced + * period + * specific stream - the constant can be changed at any time, selecting + * a different random sequence + * unique stream - the constant is based on the memory addresss of the + * object, thus every RNG has its own unique sequence + * + * This variation is provided though mixin classes which define a function + * value called increment() that returns the nesessary additive constant. + */ + + + +/* + * unique stream + */ + + +template +class unique_stream { +protected: + static constexpr bool is_mcg = false; + + // Is never called, but is provided for symmetry with specific_stream + void set_stream(...) + { + abort(); + } + +public: + typedef itype state_type; + + constexpr itype increment() const { + return itype(reinterpret_cast(this) | 1); + } + + constexpr itype stream() const + { + return increment() >> 1; + } + + static constexpr bool can_specify_stream = false; + + static constexpr size_t streams_pow2() + { + return (sizeof(itype) < sizeof(size_t) ? sizeof(itype) + : sizeof(size_t))*8 - 1u; + } + +protected: + constexpr unique_stream() = default; +}; + + +/* + * no stream (mcg) + */ + +template +class no_stream { +protected: + static constexpr bool is_mcg = true; + + // Is never called, but is provided for symmetry with specific_stream + void set_stream(...) + { + abort(); + } + +public: + typedef itype state_type; + + static constexpr itype increment() { + return 0; + } + + static constexpr bool can_specify_stream = false; + + static constexpr size_t streams_pow2() + { + return 0u; + } + +protected: + constexpr no_stream() = default; +}; + + +/* + * single stream/sequence (oneseq) + */ + +template +class oneseq_stream : public default_increment { +protected: + static constexpr bool is_mcg = false; + + // Is never called, but is provided for symmetry with specific_stream + void set_stream(...) + { + abort(); + } + +public: + typedef itype state_type; + + static constexpr itype stream() + { + return default_increment::increment() >> 1; + } + + static constexpr bool can_specify_stream = false; + + static constexpr size_t streams_pow2() + { + return 0u; + } + +protected: + constexpr oneseq_stream() = default; +}; + + +/* + * specific stream + */ + +template +class specific_stream { +protected: + static constexpr bool is_mcg = false; + + itype inc_ = default_increment::increment(); + +public: + typedef itype state_type; + typedef itype stream_state; + + constexpr itype increment() const { + return inc_; + } + + itype stream() + { + return inc_ >> 1; + } + + void set_stream(itype specific_seq) + { + inc_ = (specific_seq << 1) | 1; + } + + static constexpr bool can_specify_stream = true; + + static constexpr size_t streams_pow2() + { + return (sizeof(itype)*8) - 1u; + } + +protected: + specific_stream() = default; + + specific_stream(itype specific_seq) + : inc_(itype(specific_seq << 1) | itype(1U)) + { + // Nothing (else) to do. + } +}; + + +/* + * This is where it all comes together. This function joins together three + * mixin classes which define + * - the LCG additive constant (the stream) + * - the LCG multiplier + * - the output function + * in addition, we specify the type of the LCG state, and the result type, + * and whether to use the pre-advance version of the state for the output + * (increasing instruction-level parallelism) or the post-advance version + * (reducing register pressure). + * + * Given the high level of parameterization, the code has to use some + * template-metaprogramming tricks to handle some of the suble variations + * involved. + */ + +template , + typename multiplier_mixin = default_multiplier > +class engine : protected output_mixin, + public stream_mixin, + protected multiplier_mixin { +protected: + itype state_; + + struct can_specify_stream_tag {}; + struct no_specifiable_stream_tag {}; + + using stream_mixin::increment; + using multiplier_mixin::multiplier; + +public: + typedef xtype result_type; + typedef itype state_type; + + static constexpr size_t period_pow2() + { + return sizeof(state_type)*8 - 2*stream_mixin::is_mcg; + } + + // It would be nice to use std::numeric_limits for these, but + // we can't be sure that it'd be defined for the 128-bit types. + + static constexpr result_type min() + { + return result_type(0UL); + } + + static constexpr result_type max() + { + return result_type(~result_type(0UL)); + } + +protected: + itype bump(itype state) + { + return state * multiplier() + increment(); + } + + itype base_generate() + { + return state_ = bump(state_); + } + + itype base_generate0() + { + itype old_state = state_; + state_ = bump(state_); + return old_state; + } + +public: + result_type operator()() + { + if (output_previous) + return this->output(base_generate0()); + else + return this->output(base_generate()); + } + + result_type operator()(result_type upper_bound) + { + return bounded_rand(*this, upper_bound); + } + +protected: + static itype advance(itype state, itype delta, + itype cur_mult, itype cur_plus); + + static itype distance(itype cur_state, itype newstate, itype cur_mult, + itype cur_plus, itype mask = ~itype(0U)); + + itype distance(itype newstate, itype mask = itype(~itype(0U))) const + { + return distance(state_, newstate, multiplier(), increment(), mask); + } + +public: + void advance(itype delta) + { + state_ = advance(state_, delta, this->multiplier(), this->increment()); + } + + void backstep(itype delta) + { + advance(-delta); + } + + void discard(itype delta) + { + advance(delta); + } + + bool wrapped() + { + if (stream_mixin::is_mcg) { + // For MCGs, the low order two bits never change. In this + // implementation, we keep them fixed at 3 to make this test + // easier. + return state_ == 3; + } else { + return state_ == 0; + } + } + + engine(itype state = itype(0xcafef00dd15ea5e5ULL)) + : state_(this->is_mcg ? state|state_type(3U) + : bump(state + this->increment())) + { + // Nothing else to do. + } + + // This function may or may not exist. It thus has to be a template + // to use SFINAE; users don't have to worry about its template-ness. + + template + engine(itype state, typename sm::stream_state stream_seed) + : stream_mixin(stream_seed), + state_(this->is_mcg ? state|state_type(3U) + : bump(state + this->increment())) + { + // Nothing else to do. + } + + template + engine(SeedSeq&& seedSeq, typename std::enable_if< + !stream_mixin::can_specify_stream + && !std::is_convertible::value + && !std::is_convertible::value, + no_specifiable_stream_tag>::type = {}) + : engine(generate_one(std::forward(seedSeq))) + { + // Nothing else to do. + } + + template + engine(SeedSeq&& seedSeq, typename std::enable_if< + stream_mixin::can_specify_stream + && !std::is_convertible::value + && !std::is_convertible::value, + can_specify_stream_tag>::type = {}) + : engine(generate_one(seedSeq), + generate_one(seedSeq)) + { + // Nothing else to do. + } + + + template + void seed(Args&&... args) + { + new (this) engine(std::forward(args)...); + } + + template + friend bool operator==(const engine&, + const engine&); + + template + friend itype1 operator-(const engine&, + const engine&); + + template + friend std::basic_ostream& + operator<<(std::basic_ostream& out, + const engine&); + + template + friend std::basic_istream& + operator>>(std::basic_istream& in, + engine& rng); +}; + +template +std::basic_ostream& +operator<<(std::basic_ostream& out, + const engine& rng) +{ + auto orig_flags = out.flags(std::ios_base::dec | std::ios_base::left); + auto space = out.widen(' '); + auto orig_fill = out.fill(); + + out << rng.multiplier() << space + << rng.increment() << space + << rng.state_; + + out.flags(orig_flags); + out.fill(orig_fill); + return out; +} + + +template +std::basic_istream& +operator>>(std::basic_istream& in, + engine& rng) +{ + auto orig_flags = in.flags(std::ios_base::dec | std::ios_base::skipws); + + itype multiplier, increment, state; + in >> multiplier >> increment >> state; + + if (!in.fail()) { + bool good = true; + if (multiplier != rng.multiplier()) { + good = false; + } else if (rng.can_specify_stream) { + rng.set_stream(increment >> 1); + } else if (increment != rng.increment()) { + good = false; + } + if (good) { + rng.state_ = state; + } else { + in.clear(std::ios::failbit); + } + } + + in.flags(orig_flags); + return in; +} + + +template +itype engine::advance( + itype state, itype delta, itype cur_mult, itype cur_plus) +{ + // The method used here is based on Brown, "Random Number Generation + // with Arbitrary Stride,", Transactions of the American Nuclear + // Society (Nov. 1994). The algorithm is very similar to fast + // exponentiation. + // + // Even though delta is an unsigned integer, we can pass a + // signed integer to go backwards, it just goes "the long way round". + + constexpr itype ZERO = 0u; // itype may be a non-trivial types, so + constexpr itype ONE = 1u; // we define some ugly constants. + itype acc_mult = 1; + itype acc_plus = 0; + while (delta > ZERO) { + if (delta & ONE) { + acc_mult *= cur_mult; + acc_plus = acc_plus*cur_mult + cur_plus; + } + cur_plus = (cur_mult+ONE)*cur_plus; + cur_mult *= cur_mult; + delta >>= 1; + } + return acc_mult * state + acc_plus; +} + +template +itype engine::distance( + itype cur_state, itype newstate, itype cur_mult, itype cur_plus, itype mask) +{ + constexpr itype ONE = 1u; // itype could be weird, so use constant + itype the_bit = stream_mixin::is_mcg ? itype(4u) : itype(1u); + itype distance = 0u; + while ((cur_state & mask) != (newstate & mask)) { + if ((cur_state & the_bit) != (newstate & the_bit)) { + cur_state = cur_state * cur_mult + cur_plus; + distance |= the_bit; + } + assert((cur_state & the_bit) == (newstate & the_bit)); + the_bit <<= 1; + cur_plus = (cur_mult+ONE)*cur_plus; + cur_mult *= cur_mult; + } + return stream_mixin::is_mcg ? distance >> 2 : distance; +} + +template +itype operator-(const engine& lhs, + const engine& rhs) +{ + static_assert( + std::is_same::value && + std::is_same::value, + "Incomparable generators"); + return rhs.distance(lhs.state_); +} + + +template +bool operator==(const engine& lhs, + const engine& rhs) +{ + return (lhs.multiplier() == rhs.multiplier()) + && (lhs.increment() == rhs.increment()) + && (lhs.state_ == rhs.state_); +} + +template +inline bool operator!=(const engine& lhs, + const engine& rhs) +{ + return !operator==(lhs,rhs); +} + + +template class output_mixin, + bool output_previous = (sizeof(itype) <= 8)> +using oneseq_base = engine, output_previous, + oneseq_stream >; + +template class output_mixin, + bool output_previous = (sizeof(itype) <= 8)> +using unique_base = engine, output_previous, + unique_stream >; + +template class output_mixin, + bool output_previous = (sizeof(itype) <= 8)> +using setseq_base = engine, output_previous, + specific_stream >; + +template class output_mixin, + bool output_previous = (sizeof(itype) <= 8)> +using mcg_base = engine, output_previous, + no_stream >; + +/* + * OUTPUT FUNCTIONS. + * + * These are the core of the PCG generation scheme. They specify how to + * turn the base LCG's internal state into the output value of the final + * generator. + * + * They're implemented as mixin classes. + * + * All of the classes have code that is written to allow it to be applied + * at *arbitrary* bit sizes, although in practice they'll only be used at + * standard sizes supported by C++. + */ + +/* + * XSH RS -- high xorshift, followed by a random shift + * + * Fast. A good performer. + */ + +template +struct xsh_rs_mixin { + static xtype output(itype internal) + { + constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); + constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8); + constexpr bitcount_t sparebits = bits - xtypebits; + constexpr bitcount_t opbits = + sparebits-5 >= 64 ? 5 + : sparebits-4 >= 32 ? 4 + : sparebits-3 >= 16 ? 3 + : sparebits-2 >= 4 ? 2 + : sparebits-1 >= 1 ? 1 + : 0; + constexpr bitcount_t mask = (1 << opbits) - 1; + constexpr bitcount_t maxrandshift = mask; + constexpr bitcount_t topspare = opbits; + constexpr bitcount_t bottomspare = sparebits - topspare; + constexpr bitcount_t xshift = topspare + (xtypebits+maxrandshift)/2; + bitcount_t rshift = + opbits ? bitcount_t(internal >> (bits - opbits)) & mask : 0; + internal ^= internal >> xshift; + xtype result = xtype(internal >> (bottomspare - maxrandshift + rshift)); + return result; + } +}; + +/* + * XSH RR -- high xorshift, followed by a random rotate + * + * Fast. A good performer. Slightly better statistically than XSH RS. + */ + +template +struct xsh_rr_mixin { + static xtype output(itype internal) + { + constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); + constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype)*8); + constexpr bitcount_t sparebits = bits - xtypebits; + constexpr bitcount_t wantedopbits = + xtypebits >= 128 ? 7 + : xtypebits >= 64 ? 6 + : xtypebits >= 32 ? 5 + : xtypebits >= 16 ? 4 + : 3; + constexpr bitcount_t opbits = + sparebits >= wantedopbits ? wantedopbits + : sparebits; + constexpr bitcount_t amplifier = wantedopbits - opbits; + constexpr bitcount_t mask = (1 << opbits) - 1; + constexpr bitcount_t topspare = opbits; + constexpr bitcount_t bottomspare = sparebits - topspare; + constexpr bitcount_t xshift = (topspare + xtypebits)/2; + bitcount_t rot = opbits ? bitcount_t(internal >> (bits - opbits)) & mask + : 0; + bitcount_t amprot = (rot << amplifier) & mask; + internal ^= internal >> xshift; + xtype result = xtype(internal >> bottomspare); + result = rotr(result, amprot); + return result; + } +}; + +/* + * RXS -- random xorshift + */ + +template +struct rxs_mixin { +static xtype output_rxs(itype internal) + { + constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); + constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype)*8); + constexpr bitcount_t shift = bits - xtypebits; + constexpr bitcount_t extrashift = (xtypebits - shift)/2; + bitcount_t rshift = shift > 64+8 ? (internal >> (bits - 6)) & 63 + : shift > 32+4 ? (internal >> (bits - 5)) & 31 + : shift > 16+2 ? (internal >> (bits - 4)) & 15 + : shift > 8+1 ? (internal >> (bits - 3)) & 7 + : shift > 4+1 ? (internal >> (bits - 2)) & 3 + : shift > 2+1 ? (internal >> (bits - 1)) & 1 + : 0; + internal ^= internal >> (shift + extrashift - rshift); + xtype result = internal >> rshift; + return result; + } +}; + +/* + * RXS M XS -- random xorshift, mcg multiply, fixed xorshift + * + * The most statistically powerful generator, but all those steps + * make it slower than some of the others. We give it the rottenest jobs. + * + * Because it's usually used in contexts where the state type and the + * result type are the same, it is a permutation and is thus invertable. + * We thus provide a function to invert it. This function is used to + * for the "inside out" generator used by the extended generator. + */ + +/* Defined type-based concepts for the multiplication step. They're actually + * all derived by truncating the 128-bit, which was computed to be a good + * "universal" constant. + */ + +template +struct mcg_multiplier { + // Not defined for an arbitrary type +}; + +template +struct mcg_unmultiplier { + // Not defined for an arbitrary type +}; + +PCG_DEFINE_CONSTANT(uint8_t, mcg, multiplier, 217U) +PCG_DEFINE_CONSTANT(uint8_t, mcg, unmultiplier, 105U) + +PCG_DEFINE_CONSTANT(uint16_t, mcg, multiplier, 62169U) +PCG_DEFINE_CONSTANT(uint16_t, mcg, unmultiplier, 28009U) + +PCG_DEFINE_CONSTANT(uint32_t, mcg, multiplier, 277803737U) +PCG_DEFINE_CONSTANT(uint32_t, mcg, unmultiplier, 2897767785U) + +PCG_DEFINE_CONSTANT(uint64_t, mcg, multiplier, 12605985483714917081ULL) +PCG_DEFINE_CONSTANT(uint64_t, mcg, unmultiplier, 15009553638781119849ULL) + +PCG_DEFINE_CONSTANT(pcg128_t, mcg, multiplier, + PCG_128BIT_CONSTANT(17766728186571221404ULL, 12605985483714917081ULL)) +PCG_DEFINE_CONSTANT(pcg128_t, mcg, unmultiplier, + PCG_128BIT_CONSTANT(14422606686972528997ULL, 15009553638781119849ULL)) + + +template +struct rxs_m_xs_mixin { + static xtype output(itype internal) + { + constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8); + constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); + constexpr bitcount_t opbits = xtypebits >= 128 ? 6 + : xtypebits >= 64 ? 5 + : xtypebits >= 32 ? 4 + : xtypebits >= 16 ? 3 + : 2; + constexpr bitcount_t shift = bits - xtypebits; + constexpr bitcount_t mask = (1 << opbits) - 1; + bitcount_t rshift = + opbits ? bitcount_t(internal >> (bits - opbits)) & mask : 0; + internal ^= internal >> (opbits + rshift); + internal *= mcg_multiplier::multiplier(); + xtype result = internal >> shift; + result ^= result >> ((2U*xtypebits+2U)/3U); + return result; + } + + static itype unoutput(itype internal) + { + constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); + constexpr bitcount_t opbits = bits >= 128 ? 6 + : bits >= 64 ? 5 + : bits >= 32 ? 4 + : bits >= 16 ? 3 + : 2; + constexpr bitcount_t mask = (1 << opbits) - 1; + + internal = unxorshift(internal, bits, (2U*bits+2U)/3U); + + internal *= mcg_unmultiplier::unmultiplier(); + + bitcount_t rshift = opbits ? (internal >> (bits - opbits)) & mask : 0; + internal = unxorshift(internal, bits, opbits + rshift); + + return internal; + } +}; + + +/* + * RXS M -- random xorshift, mcg multiply + */ + +template +struct rxs_m_mixin { + static xtype output(itype internal) + { + constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8); + constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); + constexpr bitcount_t opbits = xtypebits >= 128 ? 6 + : xtypebits >= 64 ? 5 + : xtypebits >= 32 ? 4 + : xtypebits >= 16 ? 3 + : 2; + constexpr bitcount_t shift = bits - xtypebits; + constexpr bitcount_t mask = (1 << opbits) - 1; + bitcount_t rshift = opbits ? (internal >> (bits - opbits)) & mask : 0; + internal ^= internal >> (opbits + rshift); + internal *= mcg_multiplier::multiplier(); + xtype result = internal >> shift; + return result; + } +}; + +/* + * XSL RR -- fixed xorshift (to low bits), random rotate + * + * Useful for 128-bit types that are split across two CPU registers. + */ + +template +struct xsl_rr_mixin { + static xtype output(itype internal) + { + constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8); + constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); + constexpr bitcount_t sparebits = bits - xtypebits; + constexpr bitcount_t wantedopbits = xtypebits >= 128 ? 7 + : xtypebits >= 64 ? 6 + : xtypebits >= 32 ? 5 + : xtypebits >= 16 ? 4 + : 3; + constexpr bitcount_t opbits = sparebits >= wantedopbits ? wantedopbits + : sparebits; + constexpr bitcount_t amplifier = wantedopbits - opbits; + constexpr bitcount_t mask = (1 << opbits) - 1; + constexpr bitcount_t topspare = sparebits; + constexpr bitcount_t bottomspare = sparebits - topspare; + constexpr bitcount_t xshift = (topspare + xtypebits) / 2; + + bitcount_t rot = + opbits ? bitcount_t(internal >> (bits - opbits)) & mask : 0; + bitcount_t amprot = (rot << amplifier) & mask; + internal ^= internal >> xshift; + xtype result = xtype(internal >> bottomspare); + result = rotr(result, amprot); + return result; + } +}; + + +/* + * XSL RR RR -- fixed xorshift (to low bits), random rotate (both parts) + * + * Useful for 128-bit types that are split across two CPU registers. + * If you really want an invertable 128-bit RNG, I guess this is the one. + */ + +template struct halfsize_trait {}; +template <> struct halfsize_trait { typedef uint64_t type; }; +template <> struct halfsize_trait { typedef uint32_t type; }; +template <> struct halfsize_trait { typedef uint16_t type; }; +template <> struct halfsize_trait { typedef uint8_t type; }; + +template +struct xsl_rr_rr_mixin { + typedef typename halfsize_trait::type htype; + + static itype output(itype internal) + { + constexpr bitcount_t htypebits = bitcount_t(sizeof(htype) * 8); + constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); + constexpr bitcount_t sparebits = bits - htypebits; + constexpr bitcount_t wantedopbits = htypebits >= 128 ? 7 + : htypebits >= 64 ? 6 + : htypebits >= 32 ? 5 + : htypebits >= 16 ? 4 + : 3; + constexpr bitcount_t opbits = sparebits >= wantedopbits ? wantedopbits + : sparebits; + constexpr bitcount_t amplifier = wantedopbits - opbits; + constexpr bitcount_t mask = (1 << opbits) - 1; + constexpr bitcount_t topspare = sparebits; + constexpr bitcount_t xshift = (topspare + htypebits) / 2; + + bitcount_t rot = + opbits ? bitcount_t(internal >> (bits - opbits)) & mask : 0; + bitcount_t amprot = (rot << amplifier) & mask; + internal ^= internal >> xshift; + htype lowbits = htype(internal); + lowbits = rotr(lowbits, amprot); + htype highbits = htype(internal >> topspare); + bitcount_t rot2 = lowbits & mask; + bitcount_t amprot2 = (rot2 << amplifier) & mask; + highbits = rotr(highbits, amprot2); + return (itype(highbits) << topspare) ^ itype(lowbits); + } +}; + + +/* + * XSH -- fixed xorshift (to high bits) + * + * You shouldn't use this at 64-bits or less. + */ + +template +struct xsh_mixin { + static xtype output(itype internal) + { + constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8); + constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); + constexpr bitcount_t sparebits = bits - xtypebits; + constexpr bitcount_t topspare = 0; + constexpr bitcount_t bottomspare = sparebits - topspare; + constexpr bitcount_t xshift = (topspare + xtypebits) / 2; + + internal ^= internal >> xshift; + xtype result = internal >> bottomspare; + return result; + } +}; + +/* + * XSL -- fixed xorshift (to low bits) + * + * You shouldn't use this at 64-bits or less. + */ + +template +struct xsl_mixin { + inline xtype output(itype internal) + { + constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8); + constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); + constexpr bitcount_t sparebits = bits - xtypebits; + constexpr bitcount_t topspare = sparebits; + constexpr bitcount_t bottomspare = sparebits - topspare; + constexpr bitcount_t xshift = (topspare + xtypebits) / 2; + + internal ^= internal >> xshift; + xtype result = internal >> bottomspare; + return result; + } +}; + +/* ---- End of Output Functions ---- */ + + +template +struct inside_out : private baseclass { + inside_out() = delete; + + typedef typename baseclass::result_type result_type; + typedef typename baseclass::state_type state_type; + static_assert(sizeof(result_type) == sizeof(state_type), + "Require a RNG whose output function is a permutation"); + + static bool external_step(result_type& randval, size_t i) + { + state_type state = baseclass::unoutput(randval); + state = state * baseclass::multiplier() + baseclass::increment() + + state_type(i*2); + result_type result = baseclass::output(state); + randval = result; + state_type zero = + baseclass::is_mcg ? state & state_type(3U) : state_type(0U); + return result == zero; + } + + static bool external_advance(result_type& randval, size_t i, + result_type delta, bool forwards = true) + { + state_type state = baseclass::unoutput(randval); + state_type mult = baseclass::multiplier(); + state_type inc = baseclass::increment() + state_type(i*2); + state_type zero = + baseclass::is_mcg ? state & state_type(3U) : state_type(0U); + state_type dist_to_zero = baseclass::distance(state, zero, mult, inc); + bool crosses_zero = + forwards ? dist_to_zero <= delta + : (-dist_to_zero) <= delta; + if (!forwards) + delta = -delta; + state = baseclass::advance(state, delta, mult, inc); + randval = baseclass::output(state); + return crosses_zero; + } +}; + + +template +class extended : public baseclass { +public: + typedef typename baseclass::state_type state_type; + typedef typename baseclass::result_type result_type; + typedef inside_out insideout; + +private: + static constexpr bitcount_t rtypebits = sizeof(result_type)*8; + static constexpr bitcount_t stypebits = sizeof(state_type)*8; + + static constexpr bitcount_t tick_limit_pow2 = 64U; + + static constexpr size_t table_size = 1UL << table_pow2; + static constexpr size_t table_shift = stypebits - table_pow2; + static constexpr state_type table_mask = + (state_type(1U) << table_pow2) - state_type(1U); + + static constexpr bool may_tick = + (advance_pow2 < stypebits) && (advance_pow2 < tick_limit_pow2); + static constexpr size_t tick_shift = stypebits - advance_pow2; + static constexpr state_type tick_mask = + may_tick ? state_type( + (uint64_t(1) << (advance_pow2*may_tick)) - 1) + // ^-- stupidity to appease GCC warnings + : ~state_type(0U); + + static constexpr bool may_tock = stypebits < tick_limit_pow2; + + result_type data_[table_size]; + + PCG_NOINLINE void advance_table(); + + PCG_NOINLINE void advance_table(state_type delta, bool isForwards = true); + + result_type& get_extended_value() + { + state_type state = this->state_; + if (kdd && baseclass::is_mcg) { + // The low order bits of an MCG are constant, so drop them. + state >>= 2; + } + size_t index = kdd ? state & table_mask + : state >> table_shift; + + if (may_tick) { + bool tick = kdd ? (state & tick_mask) == state_type(0u) + : (state >> tick_shift) == state_type(0u); + if (tick) + advance_table(); + } + if (may_tock) { + bool tock = state == state_type(0u); + if (tock) + advance_table(); + } + return data_[index]; + } + +public: + static constexpr size_t period_pow2() + { + return baseclass::period_pow2() + table_size*extvalclass::period_pow2(); + } + + __attribute__((always_inline)) result_type operator()() + { + result_type rhs = get_extended_value(); + result_type lhs = this->baseclass::operator()(); + return lhs ^ rhs; + } + + result_type operator()(result_type upper_bound) + { + return bounded_rand(*this, upper_bound); + } + + void set(result_type wanted) + { + result_type& rhs = get_extended_value(); + result_type lhs = this->baseclass::operator()(); + rhs = lhs ^ wanted; + } + + void advance(state_type distance, bool forwards = true); + + void backstep(state_type distance) + { + advance(distance, false); + } + + extended(const result_type* data) + : baseclass() + { + datainit(data); + } + + extended(const result_type* data, state_type seed) + : baseclass(seed) + { + datainit(data); + } + + // This function may or may not exist. It thus has to be a template + // to use SFINAE; users don't have to worry about its template-ness. + + template + extended(const result_type* data, state_type seed, + typename bc::stream_state stream_seed) + : baseclass(seed, stream_seed) + { + datainit(data); + } + + extended() + : baseclass() + { + selfinit(); + } + + extended(state_type seed) + : baseclass(seed) + { + selfinit(); + } + + // This function may or may not exist. It thus has to be a template + // to use SFINAE; users don't have to worry about its template-ness. + + template + extended(state_type seed, typename bc::stream_state stream_seed) + : baseclass(seed, stream_seed) + { + selfinit(); + } + +private: + void selfinit(); + void datainit(const result_type* data); + +public: + + template::value + && !std::is_convertible::value>::type> + extended(SeedSeq&& seedSeq) + : baseclass(seedSeq) + { + generate_to(seedSeq, data_); + } + + template + void seed(Args&&... args) + { + new (this) extended(std::forward(args)...); + } + + template + friend bool operator==(const extended&, + const extended&); + + template + friend std::basic_ostream& + operator<<(std::basic_ostream& out, + const extended&); + + template + friend std::basic_istream& + operator>>(std::basic_istream& in, + extended&); + +}; + + +template +void extended::datainit( + const result_type* data) +{ + for (size_t i = 0; i < table_size; ++i) + data_[i] = data[i]; +} + +template +void extended::selfinit() +{ + // We need to fill the extended table with something, and we have + // very little provided data, so we use the base generator to + // produce values. Although not ideal (use a seed sequence, folks!), + // unexpected correlations are mitigated by + // - using XOR differences rather than the number directly + // - the way the table is accessed, its values *won't* be accessed + // in the same order the were written. + // - any strange correlations would only be apparent if we + // were to backstep the generator so that the base generator + // was generating the same values again + result_type xdiff = baseclass::operator()() - baseclass::operator()(); + for (size_t i = 0; i < table_size; ++i) { + data_[i] = baseclass::operator()() ^ xdiff; + } +} + +template +bool operator==(const extended& lhs, + const extended& rhs) +{ + auto& base_lhs = static_cast(lhs); + auto& base_rhs = static_cast(rhs); + return base_lhs == base_rhs + && !std::equal( + std::begin(lhs.data_), std::end(lhs.data_), + std::begin(rhs.data_) + ); +} + +template +inline bool operator!=(const extended& lhs, + const extended& rhs) +{ + return !operator==(lhs, rhs); +} + +template +std::basic_ostream& +operator<<(std::basic_ostream& out, + const extended& rng) +{ + auto orig_flags = out.flags(std::ios_base::dec | std::ios_base::left); + auto space = out.widen(' '); + auto orig_fill = out.fill(); + + out << rng.multiplier() << space + << rng.increment() << space + << rng.state_; + + for (const auto& datum : rng.data_) + out << space << datum; + + out.flags(orig_flags); + out.fill(orig_fill); + return out; +} + +template +std::basic_istream& +operator>>(std::basic_istream& in, + extended& rng) +{ + extended new_rng; + auto& base_rng = static_cast(new_rng); + in >> base_rng; + + if (in.fail()) + return in; + + auto orig_flags = in.flags(std::ios_base::dec | std::ios_base::skipws); + + for (auto& datum : new_rng.data_) { + in >> datum; + if (in.fail()) + goto bail; + } + + rng = new_rng; + +bail: + in.flags(orig_flags); + return in; +} + + + +template +void +extended::advance_table() +{ + bool carry = false; + for (size_t i = 0; i < table_size; ++i) { + if (carry) { + carry = insideout::external_step(data_[i],i+1); + } + bool carry2 = insideout::external_step(data_[i],i+1); + carry = carry || carry2; + } +} + +template +void +extended::advance_table( + state_type delta, bool isForwards) +{ + typedef typename baseclass::state_type base_state_t; + typedef typename extvalclass::state_type ext_state_t; + constexpr bitcount_t basebits = sizeof(base_state_t)*8; + constexpr bitcount_t extbits = sizeof(ext_state_t)*8; + static_assert(basebits <= extbits || advance_pow2 > 0, + "Current implementation might overflow its carry"); + + base_state_t carry = 0; + for (size_t i = 0; i < table_size; ++i) { + base_state_t total_delta = carry + delta; + ext_state_t trunc_delta = ext_state_t(total_delta); + if (basebits > extbits) { + carry = total_delta >> extbits; + } else { + carry = 0; + } + carry += + insideout::external_advance(data_[i],i+1, trunc_delta, isForwards); + } +} + +template +void extended::advance( + state_type distance, bool forwards) +{ + static_assert(kdd, + "Efficient advance is too hard for non-kdd extension. " + "For a weak advance, cast to base class"); + state_type zero = + baseclass::is_mcg ? this->state_ & state_type(3U) : state_type(0U); + if (may_tick) { + state_type ticks = distance >> (advance_pow2*may_tick); + // ^-- stupidity to appease GCC + // warnings + state_type adv_mask = + baseclass::is_mcg ? tick_mask << 2 : tick_mask; + state_type next_advance_distance = this->distance(zero, adv_mask); + if (!forwards) + next_advance_distance = (-next_advance_distance) & tick_mask; + if (next_advance_distance < (distance & tick_mask)) { + ++ticks; + } + if (ticks) + advance_table(ticks, forwards); + } + if (forwards) { + if (may_tock && this->distance(zero) <= distance) + advance_table(); + baseclass::advance(distance); + } else { + if (may_tock && -(this->distance(zero)) <= distance) + advance_table(state_type(1U), false); + baseclass::advance(-distance); + } +} + +} // namespace pcg_detail + +namespace pcg_engines { + +using namespace pcg_detail; + +/* Predefined types for XSH RS */ + +typedef oneseq_base oneseq_xsh_rs_16_8; +typedef oneseq_base oneseq_xsh_rs_32_16; +typedef oneseq_base oneseq_xsh_rs_64_32; +typedef oneseq_base oneseq_xsh_rs_128_64; + +typedef unique_base unique_xsh_rs_16_8; +typedef unique_base unique_xsh_rs_32_16; +typedef unique_base unique_xsh_rs_64_32; +typedef unique_base unique_xsh_rs_128_64; + +typedef setseq_base setseq_xsh_rs_16_8; +typedef setseq_base setseq_xsh_rs_32_16; +typedef setseq_base setseq_xsh_rs_64_32; +typedef setseq_base setseq_xsh_rs_128_64; + +typedef mcg_base mcg_xsh_rs_16_8; +typedef mcg_base mcg_xsh_rs_32_16; +typedef mcg_base mcg_xsh_rs_64_32; +typedef mcg_base mcg_xsh_rs_128_64; + +/* Predefined types for XSH RR */ + +typedef oneseq_base oneseq_xsh_rr_16_8; +typedef oneseq_base oneseq_xsh_rr_32_16; +typedef oneseq_base oneseq_xsh_rr_64_32; +typedef oneseq_base oneseq_xsh_rr_128_64; + +typedef unique_base unique_xsh_rr_16_8; +typedef unique_base unique_xsh_rr_32_16; +typedef unique_base unique_xsh_rr_64_32; +typedef unique_base unique_xsh_rr_128_64; + +typedef setseq_base setseq_xsh_rr_16_8; +typedef setseq_base setseq_xsh_rr_32_16; +typedef setseq_base setseq_xsh_rr_64_32; +typedef setseq_base setseq_xsh_rr_128_64; + +typedef mcg_base mcg_xsh_rr_16_8; +typedef mcg_base mcg_xsh_rr_32_16; +typedef mcg_base mcg_xsh_rr_64_32; +typedef mcg_base mcg_xsh_rr_128_64; + + +/* Predefined types for RXS M XS */ + +typedef oneseq_base oneseq_rxs_m_xs_8_8; +typedef oneseq_base oneseq_rxs_m_xs_16_16; +typedef oneseq_base oneseq_rxs_m_xs_32_32; +typedef oneseq_base oneseq_rxs_m_xs_64_64; +typedef oneseq_base oneseq_rxs_m_xs_128_128; + +typedef unique_base unique_rxs_m_xs_8_8; +typedef unique_base unique_rxs_m_xs_16_16; +typedef unique_base unique_rxs_m_xs_32_32; +typedef unique_base unique_rxs_m_xs_64_64; +typedef unique_base unique_rxs_m_xs_128_128; + +typedef setseq_base setseq_rxs_m_xs_8_8; +typedef setseq_base setseq_rxs_m_xs_16_16; +typedef setseq_base setseq_rxs_m_xs_32_32; +typedef setseq_base setseq_rxs_m_xs_64_64; +typedef setseq_base setseq_rxs_m_xs_128_128; + + // MCG versions don't make sense here, so aren't defined. + +/* Predefined types for XSL RR (only defined for "large" types) */ + +typedef oneseq_base oneseq_xsl_rr_64_32; +typedef oneseq_base oneseq_xsl_rr_128_64; + +typedef unique_base unique_xsl_rr_64_32; +typedef unique_base unique_xsl_rr_128_64; + +typedef setseq_base setseq_xsl_rr_64_32; +typedef setseq_base setseq_xsl_rr_128_64; + +typedef mcg_base mcg_xsl_rr_64_32; +typedef mcg_base mcg_xsl_rr_128_64; + + +/* Predefined types for XSL RR RR (only defined for "large" types) */ + +typedef oneseq_base + oneseq_xsl_rr_rr_64_64; +typedef oneseq_base + oneseq_xsl_rr_rr_128_128; + +typedef unique_base + unique_xsl_rr_rr_64_64; +typedef unique_base + unique_xsl_rr_rr_128_128; + +typedef setseq_base + setseq_xsl_rr_rr_64_64; +typedef setseq_base + setseq_xsl_rr_rr_128_128; + + // MCG versions don't make sense here, so aren't defined. + +/* Extended generators */ + +template +using ext_std8 = extended; + +template +using ext_std16 = extended; + +template +using ext_std32 = extended; + +template +using ext_std64 = extended; + + +template +using ext_oneseq_rxs_m_xs_32_32 = + ext_std32; + +template +using ext_mcg_xsh_rs_64_32 = + ext_std32; + +template +using ext_oneseq_xsh_rs_64_32 = + ext_std32; + +template +using ext_setseq_xsh_rr_64_32 = + ext_std32; + +template +using ext_mcg_xsl_rr_128_64 = + ext_std64; + +template +using ext_oneseq_xsl_rr_128_64 = + ext_std64; + +template +using ext_setseq_xsl_rr_128_64 = + ext_std64; + +} // namespace pcg_engines + +typedef pcg_engines::setseq_xsh_rr_64_32 pcg32; +typedef pcg_engines::oneseq_xsh_rr_64_32 pcg32_oneseq; +typedef pcg_engines::unique_xsh_rr_64_32 pcg32_unique; +typedef pcg_engines::mcg_xsh_rs_64_32 pcg32_fast; + +typedef pcg_engines::setseq_xsl_rr_128_64 pcg64; +typedef pcg_engines::oneseq_xsl_rr_128_64 pcg64_oneseq; +typedef pcg_engines::unique_xsl_rr_128_64 pcg64_unique; +typedef pcg_engines::mcg_xsl_rr_128_64 pcg64_fast; + +typedef pcg_engines::setseq_rxs_m_xs_8_8 pcg8_once_insecure; +typedef pcg_engines::setseq_rxs_m_xs_16_16 pcg16_once_insecure; +typedef pcg_engines::setseq_rxs_m_xs_32_32 pcg32_once_insecure; +typedef pcg_engines::setseq_rxs_m_xs_64_64 pcg64_once_insecure; +typedef pcg_engines::setseq_xsl_rr_rr_128_128 pcg128_once_insecure; + +typedef pcg_engines::oneseq_rxs_m_xs_8_8 pcg8_oneseq_once_insecure; +typedef pcg_engines::oneseq_rxs_m_xs_16_16 pcg16_oneseq_once_insecure; +typedef pcg_engines::oneseq_rxs_m_xs_32_32 pcg32_oneseq_once_insecure; +typedef pcg_engines::oneseq_rxs_m_xs_64_64 pcg64_oneseq_once_insecure; +typedef pcg_engines::oneseq_xsl_rr_rr_128_128 pcg128_oneseq_once_insecure; + + +// These two extended RNGs provide two-dimensionally equidistributed +// 32-bit generators. pcg32_k2_fast occupies the same space as pcg64, +// and can be called twice to generate 64 bits, but does not required +// 128-bit math; on 32-bit systems, it's faster than pcg64 as well. + +typedef pcg_engines::ext_setseq_xsh_rr_64_32<1,16,true> pcg32_k2; +typedef pcg_engines::ext_oneseq_xsh_rs_64_32<1,32,true> pcg32_k2_fast; + +// These eight extended RNGs have about as much state as arc4random +// +// - the k variants are k-dimensionally equidistributed +// - the c variants offer better crypographic security +// +// (just how good the cryptographic security is is an open question) + +typedef pcg_engines::ext_setseq_xsh_rr_64_32<6,16,true> pcg32_k64; +typedef pcg_engines::ext_mcg_xsh_rs_64_32<6,32,true> pcg32_k64_oneseq; +typedef pcg_engines::ext_oneseq_xsh_rs_64_32<6,32,true> pcg32_k64_fast; + +typedef pcg_engines::ext_setseq_xsh_rr_64_32<6,16,false> pcg32_c64; +typedef pcg_engines::ext_oneseq_xsh_rs_64_32<6,32,false> pcg32_c64_oneseq; +typedef pcg_engines::ext_mcg_xsh_rs_64_32<6,32,false> pcg32_c64_fast; + +typedef pcg_engines::ext_setseq_xsl_rr_128_64<5,16,true> pcg64_k32; +typedef pcg_engines::ext_oneseq_xsl_rr_128_64<5,128,true> pcg64_k32_oneseq; +typedef pcg_engines::ext_mcg_xsl_rr_128_64<5,128,true> pcg64_k32_fast; + +typedef pcg_engines::ext_setseq_xsl_rr_128_64<5,16,false> pcg64_c32; +typedef pcg_engines::ext_oneseq_xsl_rr_128_64<5,128,false> pcg64_c32_oneseq; +typedef pcg_engines::ext_mcg_xsl_rr_128_64<5,128,false> pcg64_c32_fast; + +// These eight extended RNGs have more state than the Mersenne twister +// +// - the k variants are k-dimensionally equidistributed +// - the c variants offer better crypographic security +// +// (just how good the cryptographic security is is an open question) + +typedef pcg_engines::ext_setseq_xsh_rr_64_32<10,16,true> pcg32_k1024; +typedef pcg_engines::ext_oneseq_xsh_rs_64_32<10,32,true> pcg32_k1024_fast; + +typedef pcg_engines::ext_setseq_xsh_rr_64_32<10,16,false> pcg32_c1024; +typedef pcg_engines::ext_oneseq_xsh_rs_64_32<10,32,false> pcg32_c1024_fast; + +typedef pcg_engines::ext_setseq_xsl_rr_128_64<10,16,true> pcg64_k1024; +typedef pcg_engines::ext_oneseq_xsl_rr_128_64<10,128,true> pcg64_k1024_fast; + +typedef pcg_engines::ext_setseq_xsl_rr_128_64<10,16,false> pcg64_c1024; +typedef pcg_engines::ext_oneseq_xsl_rr_128_64<10,128,false> pcg64_c1024_fast; + +// These generators have an insanely huge period (2^524352), and is suitable +// for silly party tricks, such as dumping out 64 KB ZIP files at an arbitrary +// point in the future. [Actually, over the full period of the generator, it +// will produce every 64 KB ZIP file 2^64 times!] + +typedef pcg_engines::ext_setseq_xsh_rr_64_32<14,16,true> pcg32_k16384; +typedef pcg_engines::ext_oneseq_xsh_rs_64_32<14,32,true> pcg32_k16384_fast; + +#endif // PCG_RAND_HPP_INCLUDED diff --git a/libraries/utilities/include/eos/utilities/pcg-random/pcg_uint128.hpp b/libraries/utilities/include/eos/utilities/pcg-random/pcg_uint128.hpp new file mode 100644 index 0000000000000000000000000000000000000000..ec8e74f80af396e1c389f00e2adf679b5d22d7e0 --- /dev/null +++ b/libraries/utilities/include/eos/utilities/pcg-random/pcg_uint128.hpp @@ -0,0 +1,748 @@ +/* + * PCG Random Number Generation for C++ + * + * Copyright 2014-2017 Melissa O'Neill , + * and the PCG Project contributors. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + * + * Licensed under the Apache License, Version 2.0 (provided in + * LICENSE-APACHE.txt and at http://www.apache.org/licenses/LICENSE-2.0) + * or under the MIT license (provided in LICENSE-MIT.txt and at + * http://opensource.org/licenses/MIT), at your option. This file may not + * be copied, modified, or distributed except according to those terms. + * + * Distributed on an "AS IS" BASIS, WITHOUT WARRANTY OF ANY KIND, either + * express or implied. See your chosen license for details. + * + * For additional information about the PCG random number generation scheme, + * visit http://www.pcg-random.org/. + */ + +/* + * This code provides a a C++ class that can provide 128-bit (or higher) + * integers. To produce 2K-bit integers, it uses two K-bit integers, + * placed in a union that allowes the code to also see them as four K/2 bit + * integers (and access them either directly name, or by index). + * + * It may seem like we're reinventing the wheel here, because several + * libraries already exist that support large integers, but most existing + * libraries provide a very generic multiprecision code, but here we're + * operating at a fixed size. Also, most other libraries are fairly + * heavyweight. So we use a direct implementation. Sadly, it's much slower + * than hand-coded assembly or direct CPU support. + */ + +#ifndef PCG_UINT128_HPP_INCLUDED +#define PCG_UINT128_HPP_INCLUDED 1 + +#include +#include +#include +#include +#include +#include +#include + +/* + * We want to lay the type out the same way that a native type would be laid + * out, which means we must know the machine's endian, at compile time. + * This ugliness attempts to do so. + */ + +#ifndef PCG_LITTLE_ENDIAN + #if defined(__BYTE_ORDER__) + #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + #define PCG_LITTLE_ENDIAN 1 + #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + #define PCG_LITTLE_ENDIAN 0 + #else + #error __BYTE_ORDER__ does not match a standard endian, pick a side + #endif + #elif __LITTLE_ENDIAN__ || _LITTLE_ENDIAN + #define PCG_LITTLE_ENDIAN 1 + #elif __BIG_ENDIAN__ || _BIG_ENDIAN + #define PCG_LITTLE_ENDIAN 0 + #elif __x86_64 || __x86_64__ || __i386 || __i386__ + #define PCG_LITTLE_ENDIAN 1 + #elif __powerpc__ || __POWERPC__ || __ppc__ || __PPC__ \ + || __m68k__ || __mc68000__ + #define PCG_LITTLE_ENDIAN 0 + #else + #error Unable to determine target endianness + #endif +#endif + +namespace pcg_extras { + +// Recent versions of GCC have intrinsics we can use to quickly calculate +// the number of leading and trailing zeros in a number. If possible, we +// use them, otherwise we fall back to old-fashioned bit twiddling to figure +// them out. + +#ifndef PCG_BITCOUNT_T + typedef uint8_t bitcount_t; +#else + typedef PCG_BITCOUNT_T bitcount_t; +#endif + +/* + * Provide some useful helper functions + * * flog2 floor(log2(x)) + * * trailingzeros number of trailing zero bits + */ + +#ifdef __GNUC__ // Any GNU-compatible compiler supporting C++11 has + // some useful intrinsics we can use. + +inline bitcount_t flog2(uint32_t v) +{ + return 31 - __builtin_clz(v); +} + +inline bitcount_t trailingzeros(uint32_t v) +{ + return __builtin_ctz(v); +} + +inline bitcount_t flog2(uint64_t v) +{ +#if UINT64_MAX == ULONG_MAX + return 63 - __builtin_clzl(v); +#elif UINT64_MAX == ULLONG_MAX + return 63 - __builtin_clzll(v); +#else + #error Cannot find a function for uint64_t +#endif +} + +inline bitcount_t trailingzeros(uint64_t v) +{ +#if UINT64_MAX == ULONG_MAX + return __builtin_ctzl(v); +#elif UINT64_MAX == ULLONG_MAX + return __builtin_ctzll(v); +#else + #error Cannot find a function for uint64_t +#endif +} + +#else // Otherwise, we fall back to bit twiddling + // implementations + +inline bitcount_t flog2(uint32_t v) +{ + // Based on code by Eric Cole and Mark Dickinson, which appears at + // https://graphics.stanford.edu/~seander/bithacks.html#IntegerLogDeBruijn + + static const uint8_t multiplyDeBruijnBitPos[32] = { + 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, + 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 + }; + + v |= v >> 1; // first round down to one less than a power of 2 + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + + return multiplyDeBruijnBitPos[(uint32_t)(v * 0x07C4ACDDU) >> 27]; +} + +inline bitcount_t trailingzeros(uint32_t v) +{ + static const uint8_t multiplyDeBruijnBitPos[32] = { + 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, + 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 + }; + + return multiplyDeBruijnBitPos[((uint32_t)((v & -v) * 0x077CB531U)) >> 27]; +} + +inline bitcount_t flog2(uint64_t v) +{ + uint32_t high = v >> 32; + uint32_t low = uint32_t(v); + + return high ? 32+flog2(high) : flog2(low); +} + +inline bitcount_t trailingzeros(uint64_t v) +{ + uint32_t high = v >> 32; + uint32_t low = uint32_t(v); + + return low ? trailingzeros(low) : trailingzeros(high)+32; +} + +#endif + +template +inline bitcount_t clog2(UInt v) +{ + return flog2(v) + ((v & (-v)) != v); +} + +template +inline UInt addwithcarry(UInt x, UInt y, bool carryin, bool* carryout) +{ + UInt half_result = y + carryin; + UInt result = x + half_result; + *carryout = (half_result < y) || (result < x); + return result; +} + +template +inline UInt subwithcarry(UInt x, UInt y, bool carryin, bool* carryout) +{ + UInt half_result = y + carryin; + UInt result = x - half_result; + *carryout = (half_result < y) || (result > x); + return result; +} + + +template +class uint_x4 { +// private: +public: + union { +#if PCG_LITTLE_ENDIAN + struct { + UInt v0, v1, v2, v3; + } w; + struct { + UIntX2 v01, v23; + } d; +#else + struct { + UInt v3, v2, v1, v0; + } w; + struct { + UIntX2 v23, v01; + } d; +#endif + // For the array access versions, the code that uses the array + // must handle endian itself. Yuck. + UInt wa[4]; + UIntX2 da[2]; + }; + +public: + uint_x4() = default; + + constexpr uint_x4(UInt v3, UInt v2, UInt v1, UInt v0) +#if PCG_LITTLE_ENDIAN + : w{v0, v1, v2, v3} +#else + : w{v3, v2, v1, v0} +#endif + { + // Nothing (else) to do + } + + constexpr uint_x4(UIntX2 v23, UIntX2 v01) +#if PCG_LITTLE_ENDIAN + : d{v01,v23} +#else + : d{v23,v01} +#endif + { + // Nothing (else) to do + } + + template::value + && sizeof(Integral) <= sizeof(UIntX2)) + >::type* = nullptr> + constexpr uint_x4(Integral v01) +#if PCG_LITTLE_ENDIAN + : d{UIntX2(v01),0UL} +#else + : d{0UL,UIntX2(v01)} +#endif + { + // Nothing (else) to do + } + + explicit constexpr operator uint64_t() const + { + return d.v01; + } + + explicit constexpr operator uint32_t() const + { + return w.v0; + } + + explicit constexpr operator int() const + { + return w.v0; + } + + explicit constexpr operator uint16_t() const + { + return w.v0; + } + + explicit constexpr operator uint8_t() const + { + return w.v0; + } + + typedef typename std::conditional::value, + unsigned long long, + unsigned long>::type + uint_missing_t; + + explicit constexpr operator uint_missing_t() const + { + return d.v01; + } + + explicit constexpr operator bool() const + { + return d.v01 || d.v23; + } + + template + friend uint_x4 operator*(const uint_x4&, const uint_x4&); + + template + friend std::pair< uint_x4,uint_x4 > + divmod(const uint_x4&, const uint_x4&); + + template + friend uint_x4 operator+(const uint_x4&, const uint_x4&); + + template + friend uint_x4 operator-(const uint_x4&, const uint_x4&); + + template + friend uint_x4 operator<<(const uint_x4&, const uint_x4&); + + template + friend uint_x4 operator>>(const uint_x4&, const uint_x4&); + + template + friend uint_x4 operator&(const uint_x4&, const uint_x4&); + + template + friend uint_x4 operator|(const uint_x4&, const uint_x4&); + + template + friend uint_x4 operator^(const uint_x4&, const uint_x4&); + + template + friend bool operator==(const uint_x4&, const uint_x4&); + + template + friend bool operator!=(const uint_x4&, const uint_x4&); + + template + friend bool operator<(const uint_x4&, const uint_x4&); + + template + friend bool operator<=(const uint_x4&, const uint_x4&); + + template + friend bool operator>(const uint_x4&, const uint_x4&); + + template + friend bool operator>=(const uint_x4&, const uint_x4&); + + template + friend uint_x4 operator~(const uint_x4&); + + template + friend uint_x4 operator-(const uint_x4&); + + template + friend bitcount_t flog2(const uint_x4&); + + template + friend bitcount_t trailingzeros(const uint_x4&); + + uint_x4& operator*=(const uint_x4& rhs) + { + uint_x4 result = *this * rhs; + return *this = result; + } + + uint_x4& operator/=(const uint_x4& rhs) + { + uint_x4 result = *this / rhs; + return *this = result; + } + + uint_x4& operator%=(const uint_x4& rhs) + { + uint_x4 result = *this % rhs; + return *this = result; + } + + uint_x4& operator+=(const uint_x4& rhs) + { + uint_x4 result = *this + rhs; + return *this = result; + } + + uint_x4& operator-=(const uint_x4& rhs) + { + uint_x4 result = *this - rhs; + return *this = result; + } + + uint_x4& operator&=(const uint_x4& rhs) + { + uint_x4 result = *this & rhs; + return *this = result; + } + + uint_x4& operator|=(const uint_x4& rhs) + { + uint_x4 result = *this | rhs; + return *this = result; + } + + uint_x4& operator^=(const uint_x4& rhs) + { + uint_x4 result = *this ^ rhs; + return *this = result; + } + + uint_x4& operator>>=(bitcount_t shift) + { + uint_x4 result = *this >> shift; + return *this = result; + } + + uint_x4& operator<<=(bitcount_t shift) + { + uint_x4 result = *this << shift; + return *this = result; + } + +}; + +template +bitcount_t flog2(const uint_x4& v) +{ +#if PCG_LITTLE_ENDIAN + for (uint8_t i = 4; i !=0; /* dec in loop */) { + --i; +#else + for (uint8_t i = 0; i < 4; ++i) { +#endif + if (v.wa[i] == 0) + continue; + return flog2(v.wa[i]) + (sizeof(U)*CHAR_BIT)*i; + } + abort(); +} + +template +bitcount_t trailingzeros(const uint_x4& v) +{ +#if PCG_LITTLE_ENDIAN + for (uint8_t i = 0; i < 4; ++i) { +#else + for (uint8_t i = 4; i !=0; /* dec in loop */) { + --i; +#endif + if (v.wa[i] != 0) + return trailingzeros(v.wa[i]) + (sizeof(U)*CHAR_BIT)*i; + } + return (sizeof(U)*CHAR_BIT)*4; +} + +template +std::pair< uint_x4, uint_x4 > + divmod(const uint_x4& orig_dividend, + const uint_x4& divisor) +{ + // If the dividend is less than the divisor, the answer is always zero. + // This takes care of boundary cases like 0/x (which would otherwise be + // problematic because we can't take the log of zero. (The boundary case + // of division by zero is undefined.) + if (orig_dividend < divisor) + return { uint_x4(0UL), orig_dividend }; + + auto dividend = orig_dividend; + + auto log2_divisor = flog2(divisor); + auto log2_dividend = flog2(dividend); + // assert(log2_dividend >= log2_divisor); + bitcount_t logdiff = log2_dividend - log2_divisor; + + constexpr uint_x4 ONE(1UL); + if (logdiff == 0) + return { ONE, dividend - divisor }; + + // Now we change the log difference to + // floor(log2(divisor)) - ceil(log2(dividend)) + // to ensure that we *underestimate* the result. + logdiff -= 1; + + uint_x4 quotient(0UL); + + auto qfactor = ONE << logdiff; + auto factor = divisor << logdiff; + + do { + dividend -= factor; + quotient += qfactor; + while (dividend < factor) { + factor >>= 1; + qfactor >>= 1; + } + } while (dividend >= divisor); + + return { quotient, dividend }; +} + +template +uint_x4 operator/(const uint_x4& dividend, + const uint_x4& divisor) +{ + return divmod(dividend, divisor).first; +} + +template +uint_x4 operator%(const uint_x4& dividend, + const uint_x4& divisor) +{ + return divmod(dividend, divisor).second; +} + + +template +uint_x4 operator*(const uint_x4& a, + const uint_x4& b) +{ + uint_x4 r = {0U, 0U, 0U, 0U}; + bool carryin = false; + bool carryout; + UIntX2 a0b0 = UIntX2(a.w.v0) * UIntX2(b.w.v0); + r.w.v0 = UInt(a0b0); + r.w.v1 = UInt(a0b0 >> 32); + + UIntX2 a1b0 = UIntX2(a.w.v1) * UIntX2(b.w.v0); + r.w.v2 = UInt(a1b0 >> 32); + r.w.v1 = addwithcarry(r.w.v1, UInt(a1b0), carryin, &carryout); + carryin = carryout; + r.w.v2 = addwithcarry(r.w.v2, UInt(0U), carryin, &carryout); + carryin = carryout; + r.w.v3 = addwithcarry(r.w.v3, UInt(0U), carryin, &carryout); + + UIntX2 a0b1 = UIntX2(a.w.v0) * UIntX2(b.w.v1); + carryin = false; + r.w.v2 = addwithcarry(r.w.v2, UInt(a0b1 >> 32), carryin, &carryout); + carryin = carryout; + r.w.v3 = addwithcarry(r.w.v3, UInt(0U), carryin, &carryout); + + carryin = false; + r.w.v1 = addwithcarry(r.w.v1, UInt(a0b1), carryin, &carryout); + carryin = carryout; + r.w.v2 = addwithcarry(r.w.v2, UInt(0U), carryin, &carryout); + carryin = carryout; + r.w.v3 = addwithcarry(r.w.v3, UInt(0U), carryin, &carryout); + + UIntX2 a1b1 = UIntX2(a.w.v1) * UIntX2(b.w.v1); + carryin = false; + r.w.v2 = addwithcarry(r.w.v2, UInt(a1b1), carryin, &carryout); + carryin = carryout; + r.w.v3 = addwithcarry(r.w.v3, UInt(a1b1 >> 32), carryin, &carryout); + + r.d.v23 += a.d.v01 * b.d.v23 + a.d.v23 * b.d.v01; + + return r; +} + + +template +uint_x4 operator+(const uint_x4& a, + const uint_x4& b) +{ + uint_x4 r = {0U, 0U, 0U, 0U}; + + bool carryin = false; + bool carryout; + r.w.v0 = addwithcarry(a.w.v0, b.w.v0, carryin, &carryout); + carryin = carryout; + r.w.v1 = addwithcarry(a.w.v1, b.w.v1, carryin, &carryout); + carryin = carryout; + r.w.v2 = addwithcarry(a.w.v2, b.w.v2, carryin, &carryout); + carryin = carryout; + r.w.v3 = addwithcarry(a.w.v3, b.w.v3, carryin, &carryout); + + return r; +} + +template +uint_x4 operator-(const uint_x4& a, + const uint_x4& b) +{ + uint_x4 r = {0U, 0U, 0U, 0U}; + + bool carryin = false; + bool carryout; + r.w.v0 = subwithcarry(a.w.v0, b.w.v0, carryin, &carryout); + carryin = carryout; + r.w.v1 = subwithcarry(a.w.v1, b.w.v1, carryin, &carryout); + carryin = carryout; + r.w.v2 = subwithcarry(a.w.v2, b.w.v2, carryin, &carryout); + carryin = carryout; + r.w.v3 = subwithcarry(a.w.v3, b.w.v3, carryin, &carryout); + + return r; +} + + +template +uint_x4 operator&(const uint_x4& a, + const uint_x4& b) +{ + return uint_x4(a.d.v23 & b.d.v23, a.d.v01 & b.d.v01); +} + +template +uint_x4 operator|(const uint_x4& a, + const uint_x4& b) +{ + return uint_x4(a.d.v23 | b.d.v23, a.d.v01 | b.d.v01); +} + +template +uint_x4 operator^(const uint_x4& a, + const uint_x4& b) +{ + return uint_x4(a.d.v23 ^ b.d.v23, a.d.v01 ^ b.d.v01); +} + +template +uint_x4 operator~(const uint_x4& v) +{ + return uint_x4(~v.d.v23, ~v.d.v01); +} + +template +uint_x4 operator-(const uint_x4& v) +{ + return uint_x4(0UL,0UL) - v; +} + +template +bool operator==(const uint_x4& a, const uint_x4& b) +{ + return (a.d.v01 == b.d.v01) && (a.d.v23 == b.d.v23); +} + +template +bool operator!=(const uint_x4& a, const uint_x4& b) +{ + return !operator==(a,b); +} + + +template +bool operator<(const uint_x4& a, const uint_x4& b) +{ + return (a.d.v23 < b.d.v23) + || ((a.d.v23 == b.d.v23) && (a.d.v01 < b.d.v01)); +} + +template +bool operator>(const uint_x4& a, const uint_x4& b) +{ + return operator<(b,a); +} + +template +bool operator<=(const uint_x4& a, const uint_x4& b) +{ + return !(operator<(b,a)); +} + +template +bool operator>=(const uint_x4& a, const uint_x4& b) +{ + return !(operator<(a,b)); +} + + + +template +uint_x4 operator<<(const uint_x4& v, + const bitcount_t shift) +{ + uint_x4 r = {0U, 0U, 0U, 0U}; + const bitcount_t bits = sizeof(UInt) * CHAR_BIT; + const bitcount_t bitmask = bits - 1; + const bitcount_t shiftdiv = shift / bits; + const bitcount_t shiftmod = shift & bitmask; + + if (shiftmod) { + UInt carryover = 0; +#if PCG_LITTLE_ENDIAN + for (uint8_t out = shiftdiv, in = 0; out < 4; ++out, ++in) { +#else + for (uint8_t out = 4-shiftdiv, in = 4; out != 0; /* dec in loop */) { + --out, --in; +#endif + r.wa[out] = (v.wa[in] << shiftmod) | carryover; + carryover = (v.wa[in] >> (bits - shiftmod)); + } + } else { +#if PCG_LITTLE_ENDIAN + for (uint8_t out = shiftdiv, in = 0; out < 4; ++out, ++in) { +#else + for (uint8_t out = 4-shiftdiv, in = 4; out != 0; /* dec in loop */) { + --out, --in; +#endif + r.wa[out] = v.wa[in]; + } + } + + return r; +} + +template +uint_x4 operator>>(const uint_x4& v, + const bitcount_t shift) +{ + uint_x4 r = {0U, 0U, 0U, 0U}; + const bitcount_t bits = sizeof(UInt) * CHAR_BIT; + const bitcount_t bitmask = bits - 1; + const bitcount_t shiftdiv = shift / bits; + const bitcount_t shiftmod = shift & bitmask; + + if (shiftmod) { + UInt carryover = 0; +#if PCG_LITTLE_ENDIAN + for (uint8_t out = 4-shiftdiv, in = 4; out != 0; /* dec in loop */) { + --out, --in; +#else + for (uint8_t out = shiftdiv, in = 0; out < 4; ++out, ++in) { +#endif + r.wa[out] = (v.wa[in] >> shiftmod) | carryover; + carryover = (v.wa[in] << (bits - shiftmod)); + } + } else { +#if PCG_LITTLE_ENDIAN + for (uint8_t out = 4-shiftdiv, in = 4; out != 0; /* dec in loop */) { + --out, --in; +#else + for (uint8_t out = shiftdiv, in = 0; out < 4; ++out, ++in) { +#endif + r.wa[out] = v.wa[in]; + } + } + + return r; +} + +} // namespace pcg_extras + +#endif // PCG_UINT128_HPP_INCLUDED diff --git a/libraries/utilities/include/eos/utilities/randutils.hpp b/libraries/utilities/include/eos/utilities/randutils.hpp new file mode 100644 index 0000000000000000000000000000000000000000..0d0ae008a061a4022313f82de3c79c5dc177caa6 --- /dev/null +++ b/libraries/utilities/include/eos/utilities/randutils.hpp @@ -0,0 +1,810 @@ +/* + * Random-Number Utilities (randutil) + * Addresses common issues with C++11 random number generation. + * Makes good seeding easier, and makes using RNGs easy while retaining + * all the power. + * + * The MIT License (MIT) + * + * Copyright (c) 2015 Melissa E. O'Neill + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RANDUTILS_HPP +#define RANDUTILS_HPP 1 + +/* + * This header includes three class templates that can help make C++11 + * random number generation easier to use. + * + * randutils::seed_seq_fe + * + * Fixed-Entropy Seed sequence + * + * Provides a replacement for std::seed_seq that avoids problems with bias, + * performs better in empirical statistical tests, and executes faster in + * normal-sized use cases. + * + * In normal use, it's accessed via one of the following type aliases + * + * randutils::seed_seq_fe128 + * randutils::seed_seq_fe256 + * + * It's discussed in detail at + * http://www.pcg-random.org/posts/developing-a-seed_seq-alternative.html + * and the motivation for its creation (what's wrong with std::seed_seq) here + * http://www.pcg-random.org/posts/cpp-seeding-surprises.html + * + * + * randutils::auto_seeded + * + * Extends a seed sequence class with a nondeterministic default constructor. + * Uses a variety of local sources of entropy to portably initialize any + * seed sequence to a good default state. + * + * In normal use, it's accessed via one of the following type aliases, which + * use seed_seq_fe128 and seed_seq_fe256 above. + * + * randutils::auto_seed_128 + * randutils::auto_seed_256 + * + * It's discussed in detail at + * http://www.pcg-random.org/posts/simple-portable-cpp-seed-entropy.html + * and its motivation (why you can't just use std::random_device) here + * http://www.pcg-random.org/posts/cpps-random_device.html + * + * + * randutils::random_generator + * + * An Easy-to-Use Random API + * + * Provides all the power of C++11's random number facility in an easy-to + * use wrapper. + * + * In normal use, it's accessed via one of the following type aliases, which + * also use auto_seed_256 by default + * + * randutils::default_rng + * randutils::mt19937_rng + * + * It's discussed in detail at + * http://www.pcg-random.org/posts/ease-of-use-without-loss-of-power.html + */ + +#include +#include +#include +#include +#include +#include // for std::hash +#include +#include +#include +#include +#include +#include +#include + +// Ugly platform-specific code for auto_seeded + +#if !defined(RANDUTILS_CPU_ENTROPY) && defined(__has_builtin) + #if __has_builtin(__builtin_readcyclecounter) + #define RANDUTILS_CPU_ENTROPY __builtin_readcyclecounter() + #endif +#endif +#if !defined(RANDUTILS_CPU_ENTROPY) + #if __i386__ + #if __GNUC__ + #define RANDUTILS_CPU_ENTROPY __builtin_ia32_rdtsc() + #else + #include + #define RANDUTILS_CPU_ENTROPY __rdtsc() + #endif + #else + #define RANDUTILS_CPU_ENTROPY 0 + #endif +#endif + +#if defined(RANDUTILS_GETPID) + // Already defined externally +#elif defined(_WIN64) || defined(_WIN32) + #include + #define RANDUTILS_GETPID _getpid() +#elif defined(__unix__) || defined(__unix) \ + || (defined(__APPLE__) && defined(__MACH__)) + #include + #define RANDUTILS_GETPID getpid() +#else + #define RANDUTILS_GETPID 0 +#endif + +#if __cpp_constexpr >= 201304L + #define RANDUTILS_GENERALIZED_CONSTEXPR constexpr +#else + #define RANDUTILS_GENERALIZED_CONSTEXPR +#endif + + + +namespace randutils { + +////////////////////////////////////////////////////////////////////////////// +// +// seed_seq_fe +// +////////////////////////////////////////////////////////////////////////////// + +/* + * seed_seq_fe implements a fixed-entropy seed sequence; it conforms to all + * the requirements of a Seed Sequence concept. + * + * seed_seq_fe implements a seed sequence which seeds based on a store of + * N * 32 bits of entropy. Typically, it would be initialized with N or more + * integers. + * + * seed_seq_fe128 and seed_seq_fe256 are provided as convenience typedefs for + * 128- and 256-bit entropy stores respectively. These variants outperform + * std::seed_seq, while being better mixing the bits it is provided as entropy. + * In almost all common use cases, they serve as better drop-in replacements + * for seed_seq. + * + * Technical details + * + * Assuming it constructed with M seed integers as input, it exhibits the + * following properties + * + * * Diffusion/Avalanche: A single-bit change in any of the M inputs has a + * 50% chance of flipping every bit in the bitstream produced by generate. + * Initializing the N-word entropy store with M words requires O(N * M) + * time precisely because of the avalanche requirements. Once constructed, + * calls to generate are linear in the number of words generated. + * + * * Bias freedom/Bijection: If M == N, the state of the entropy store is a + * bijection from the M inputs (i.e., no states occur twice, none are + * omitted). If M > N the number of times each state can occur is the same + * (each state occurs 2**(32*(M-N)) times, where ** is the power function). + * If M < N, some states cannot occur (bias) but no state occurs more + * than once (it's impossible to avoid bias if M < N; ideally N should not + * be chosen so that it is more than M). + * + * Likewise, the generate function has similar properties (with the entropy + * store as the input data). If more outputs are requested than there is + * entropy, some outputs cannot occur. For example, the Mersenne Twister + * will request 624 outputs, to initialize it's 19937-bit state, which is + * much larger than a 128-bit or 256-bit entropy pool. But in practice, + * limiting the Mersenne Twister to 2**128 possible initializations gives + * us enough initializations to give a unique initialization to trillions + * of computers for billions of years. If you really have 624 words of + * *real* high-quality entropy you want to use, you probably don't need + * an entropy mixer like this class at all. But if you *really* want to, + * nothing is stopping you from creating a randutils::seed_seq_fe<624>. + * + * * As a consequence of the above properties, if all parts of the provided + * seed data are kept constant except one, and the remaining part is varied + * through K different states, K different output sequences will be produced. + * + * * Also, because the amount of entropy stored is fixed, this class never + * performs dynamic allocation and is free of the possibility of generating + * an exception. + * + * Ideas used to implement this code include hashing, a simple PCG generator + * based on an MCG base with an XorShift output function and permutation + * functions on tuples. + * + * More detail at + * http://www.pcg-random.org/posts/developing-a-seed_seq-alternative.html + */ + +template +struct seed_seq_fe { +public: + // types + typedef IntRep result_type; + +private: + static constexpr uint32_t INIT_A = 0x43b0d7e5; + static constexpr uint32_t MULT_A = 0x931e8875; + + static constexpr uint32_t INIT_B = 0x8b51f9dd; + static constexpr uint32_t MULT_B = 0x58f38ded; + + static constexpr uint32_t MIX_MULT_L = 0xca01f9dd; + static constexpr uint32_t MIX_MULT_R = 0x4973f715; + static constexpr uint32_t XSHIFT = sizeof(IntRep)*8/2; + + RANDUTILS_GENERALIZED_CONSTEXPR + static IntRep fast_exp(IntRep x, IntRep power) + { + IntRep result = IntRep(1); + IntRep multiplier = x; + while (power != IntRep(0)) { + IntRep thismult = power & IntRep(1) ? multiplier : IntRep(1); + result *= thismult; + power >>= 1; + multiplier *= multiplier; + } + return result; + } + + std::array mixer_; + + template + void mix_entropy(InputIter begin, InputIter end); + +public: + seed_seq_fe(const seed_seq_fe&) = delete; + void operator=(const seed_seq_fe&) = delete; + + template + seed_seq_fe(std::initializer_list init) + { + seed(init.begin(), init.end()); + } + + template + seed_seq_fe(InputIter begin, InputIter end) + { + seed(begin, end); + } + + // generating functions + template + void generate(RandomAccessIterator first, RandomAccessIterator last) const; + + static constexpr size_t size() + { + return count; + } + + template + void param(OutputIterator dest) const; + + template + void seed(InputIter begin, InputIter end) + { + mix_entropy(begin, end); + // For very small sizes, we do some additional mixing. For normal + // sizes, this loop never performs any iterations. + for (size_t i = 1; i < mix_rounds; ++i) + stir(); + } + + seed_seq_fe& stir() + { + mix_entropy(mixer_.begin(), mixer_.end()); + return *this; + } + +}; + +template +template +void seed_seq_fe::mix_entropy(InputIter begin, InputIter end) +{ + auto hash_const = INIT_A; + auto hash = [&](IntRep value) { + value ^= hash_const; + hash_const *= MULT_A; + value *= hash_const; + value ^= value >> XSHIFT; + return value; + }; + auto mix = [](IntRep x, IntRep y) { + IntRep result = MIX_MULT_L*x - MIX_MULT_R*y; + result ^= result >> XSHIFT; + return result; + }; + + InputIter current = begin; + for (auto& elem : mixer_) { + if (current != end) + elem = hash(*current++); + else + elem = hash(0U); + } + for (auto& src : mixer_) + for (auto& dest : mixer_) + if (&src != &dest) + dest = mix(dest,hash(src)); + for (; current != end; ++current) + for (auto& dest : mixer_) + dest = mix(dest,hash(*current)); +} + +template +template +void seed_seq_fe::param(OutputIterator dest) const +{ + const IntRep INV_A = fast_exp(MULT_A, IntRep(-1)); + const IntRep MIX_INV_L = fast_exp(MIX_MULT_L, IntRep(-1)); + + auto mixer_copy = mixer_; + for (size_t round = 0; round < mix_rounds; ++round) { + // Advance to the final value. We'll backtrack from that. + auto hash_const = INIT_A*fast_exp(MULT_A, IntRep(count * count)); + + for (auto src = mixer_copy.rbegin(); src != mixer_copy.rend(); ++src) + for (auto dest = mixer_copy.rbegin(); dest != mixer_copy.rend(); + ++dest) + if (src != dest) { + IntRep revhashed = *src; + auto mult_const = hash_const; + hash_const *= INV_A; + revhashed ^= hash_const; + revhashed *= mult_const; + revhashed ^= revhashed >> XSHIFT; + IntRep unmixed = *dest; + unmixed ^= unmixed >> XSHIFT; + unmixed += MIX_MULT_R*revhashed; + unmixed *= MIX_INV_L; + *dest = unmixed; + } + for (auto i = mixer_copy.rbegin(); i != mixer_copy.rend(); ++i) { + IntRep unhashed = *i; + unhashed ^= unhashed >> XSHIFT; + unhashed *= fast_exp(hash_const, IntRep(-1)); + hash_const *= INV_A; + unhashed ^= hash_const; + *i = unhashed; + } + } + std::copy(mixer_copy.begin(), mixer_copy.end(), dest); +} + + +template +template +void seed_seq_fe::generate( + RandomAccessIterator dest_begin, + RandomAccessIterator dest_end) const +{ + auto src_begin = mixer_.begin(); + auto src_end = mixer_.end(); + auto src = src_begin; + auto hash_const = INIT_B; + for (auto dest = dest_begin; dest != dest_end; ++dest) { + auto dataval = *src; + if (++src == src_end) + src = src_begin; + dataval ^= hash_const; + hash_const *= MULT_B; + dataval *= hash_const; + dataval ^= dataval >> XSHIFT; + *dest = dataval; + } +} + +using seed_seq_fe128 = seed_seq_fe<4, uint32_t>; +using seed_seq_fe256 = seed_seq_fe<8, uint32_t>; + + +////////////////////////////////////////////////////////////////////////////// +// +// auto_seeded +// +////////////////////////////////////////////////////////////////////////////// + +/* + * randutils::auto_seeded + * + * Extends a seed sequence class with a nondeterministic default constructor. + * Uses a variety of local sources of entropy to portably initialize any + * seed sequence to a good default state. + * + * In normal use, it's accessed via one of the following type aliases, which + * use seed_seq_fe128 and seed_seq_fe256 above. + * + * randutils::auto_seed_128 + * randutils::auto_seed_256 + * + * It's discussed in detail at + * http://www.pcg-random.org/posts/simple-portable-cpp-seed-entropy.html + * and its motivation (why you can't just use std::random_device) here + * http://www.pcg-random.org/posts/cpps-random_device.html + */ + +template +class auto_seeded : public SeedSeq { + using default_seeds = std::array; + + template + static uint32_t crushto32(T value) + { + if (sizeof(T) <= 4) + return uint32_t(value); + else { + uint64_t result = uint64_t(value); + result *= 0xbc2ad017d719504d; + return uint32_t(result ^ (result >> 32)); + } + } + + template + static uint32_t hash(T&& value) + { + return crushto32(std::hash::type>::type>{}( + std::forward(value))); + } + + static constexpr uint32_t fnv(uint32_t hash, const char* pos) + { + return *pos == '\0' ? hash : fnv((hash * 16777619U) ^ *pos, pos+1); + } + + default_seeds local_entropy() + { + // This is a constant that changes every time we compile the code + constexpr uint32_t compile_stamp = + fnv(2166136261U, __DATE__ __TIME__ __FILE__); + + // Some people think you shouldn't use the random device much because + // on some platforms it could be expensive to call or "use up" vital + // system-wide entropy, so we just call it once. + static uint32_t random_int = std::random_device{}(); + + // The heap can vary from run to run as well. + void* malloc_addr = malloc(sizeof(int)); + free(malloc_addr); + auto heap = hash(malloc_addr); + auto stack = hash(&malloc_addr); + + // Every call, we increment our random int. We don't care about race + // conditons. The more, the merrier. + random_int += 0xedf19156; + + // Classic seed, the time. It ought to change, especially since + // this is (hopefully) nanosecond resolution time. + auto hitime = std::chrono::high_resolution_clock::now() + .time_since_epoch().count(); + + // Address of the thing being initialized. That can mean that + // different seed sequences in different places in memory will be + // different. Even for the same object, it may vary from run to + // run in systems with ASLR, such as OS X, but on Linux it might not + // unless we compile with -fPIC -pic. + auto self_data = hash(this); + + // The address of the time function. It should hopefully be in + // a system library that hopefully isn't always in the same place + // (might not change until system is rebooted though) + auto time_func = hash(&std::chrono::high_resolution_clock::now); + + // The address of the exit function. It should hopefully be in + // a system library that hopefully isn't always in the same place + // (might not change until system is rebooted though). Hopefully + // it's in a different library from time_func. + auto exit_func = hash(&_Exit); + + // The address of a local function. That may be in a totally + // different part of memory. On OS X it'll vary from run to run thanks + // to ASLR, on Linux it might not unless we compile with -fPIC -pic. + // Need the cast because it's an overloaded + // function and we need to pick the right one. + auto self_func = hash( + static_cast( + &auto_seeded::crushto32)); + + // Hash our thread id. It seems to vary from run to run on OS X, not + // so much on Linux. + auto thread_id = hash(std::this_thread::get_id()); + + // Hash of the ID of a type. May or may not vary, depending on + // implementation. + #if __cpp_rtti || __GXX_RTTI + auto type_id = crushto32(typeid(*this).hash_code()); + #else + uint32_t type_id = 0; + #endif + + // Platform-specific entropy + auto pid = crushto32(RANDUTILS_GETPID); + auto cpu = crushto32(RANDUTILS_CPU_ENTROPY); + + return {{random_int, crushto32(hitime), stack, heap, self_data, + self_func, exit_func, thread_id, type_id, pid, cpu}}; + } + + +public: + using SeedSeq::SeedSeq; + + using base_seed_seq = SeedSeq; + + const base_seed_seq& base() const + { + return *this; + } + + base_seed_seq& base() + { + return *this; + } + + auto_seeded(default_seeds seeds) + : SeedSeq(seeds.begin(), seeds.end()) + { + // Nothing else to do + } + + auto_seeded() + : auto_seeded(local_entropy()) + { + // Nothing else to do + } +}; + +using auto_seed_128 = auto_seeded; +using auto_seed_256 = auto_seeded; + + +////////////////////////////////////////////////////////////////////////////// +// +// uniform_distribution +// +////////////////////////////////////////////////////////////////////////////// + +/* + * This template typedef provides either + * - uniform_int_distribution, or + * - uniform_real_distribution + * depending on the provided type + */ + +template +using uniform_distribution = typename std::conditional< + std::is_integral::value, + std::uniform_int_distribution, + std::uniform_real_distribution >::type; + + + +////////////////////////////////////////////////////////////////////////////// +// +// random_generator +// +////////////////////////////////////////////////////////////////////////////// + +/* + * randutils::random_generator + * + * An Easy-to-Use Random API + * + * Provides all the power of C++11's random number facility in an easy-to + * use wrapper. + * + * In normal use, it's accessed via one of the following type aliases, which + * also use auto_seed_256 by default + * + * randutils::default_rng + * randutils::mt19937_rng + * + * It's discussed in detail at + * http://www.pcg-random.org/posts/ease-of-use-without-loss-of-power.html + */ + +template +class random_generator { +public: + using engine_type = RandomEngine; + using default_seed_type = DefaultSeedSeq; +private: + engine_type engine_; + + // This SFNAE evilness provides a mechanism to cast classes that aren't + // themselves (technically) Seed Sequences but derive from a seed + // sequence to be passed to functions that require actual Seed Squences. + // To do so, the class should provide a the type base_seed_seq and a + // base() member function. + + template + static constexpr bool has_base_seed_seq(typename T::base_seed_seq*) + { + return true; + } + + template + static constexpr bool has_base_seed_seq(...) + { + return false; + } + + + template + static auto seed_seq_cast(SeedSeqBased&& seq, + typename std::enable_if< + has_base_seed_seq(0)>::type* = 0) + -> decltype(seq.base()) + { + return seq.base(); + } + + template + static SeedSeq seed_seq_cast(SeedSeq&& seq, + typename std::enable_if< + !has_base_seed_seq(0)>::type* = 0) + { + return seq; + } + +public: + template + random_generator(Seeding&& seeding = default_seed_type{}) + : engine_{seed_seq_cast(std::forward(seeding))} + { + // Nothing (else) to do + } + + // Work around Clang DR777 bug in Clang 3.6 and earlier by adding a + // redundant overload rather than mixing parameter packs and default + // arguments. + // https://llvm.org/bugs/show_bug.cgi?id=23029 + template + random_generator(Seeding&& seeding, Params&&... params) + : engine_{seed_seq_cast(std::forward(seeding)), + std::forward(params)...} + { + // Nothing (else) to do + } + + template + void seed(Seeding&& seeding = default_seed_type{}) + { + engine_.seed(seed_seq_cast(seeding)); + } + + // Work around Clang DR777 bug in Clang 3.6 and earlier by adding a + // redundant overload rather than mixing parameter packs and default + // arguments. + // https://llvm.org/bugs/show_bug.cgi?id=23029 + template + void seed(Seeding&& seeding, Params&&... params) + { + engine_.seed(seed_seq_cast(seeding), std::forward(params)...); + } + + + RandomEngine& engine() + { + return engine_; + } + + template class DistTmpl = std::normal_distribution, + typename... Params> + ResultType variate(Params&&... params) + { + DistTmpl dist(std::forward(params)...); + + return dist(engine_); + } + + template + Numeric uniform(Numeric lower, Numeric upper) + { + return variate(lower, upper); + } + + template