diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 3220b488dd1e043551d08b8031043fbd70c0b517..f100bd958b8896d594d74012273137387e156b57 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -79,7 +79,6 @@ #include #include #include -#include #include "bonding.h" #include "bond_3ad.h" #include "bond_alb.h" @@ -3596,8 +3595,9 @@ static void bond_xmit_slave_id(struct bonding *bond, struct sk_buff *skb, int sl */ static u32 bond_rr_gen_slave_id(struct bonding *bond) { - int packets_per_slave = bond->params.packets_per_slave; u32 slave_id; + struct reciprocal_value reciprocal_packets_per_slave; + int packets_per_slave = bond->params.packets_per_slave; switch (packets_per_slave) { case 0: @@ -3607,8 +3607,10 @@ static u32 bond_rr_gen_slave_id(struct bonding *bond) slave_id = bond->rr_tx_counter; break; default: + reciprocal_packets_per_slave = + bond->params.reciprocal_packets_per_slave; slave_id = reciprocal_divide(bond->rr_tx_counter, - packets_per_slave); + reciprocal_packets_per_slave); break; } bond->rr_tx_counter++; @@ -4343,10 +4345,18 @@ static int bond_check_params(struct bond_params *params) params->resend_igmp = resend_igmp; params->min_links = min_links; params->lp_interval = lp_interval; - if (packets_per_slave > 1) - params->packets_per_slave = reciprocal_value(packets_per_slave); - else - params->packets_per_slave = packets_per_slave; + params->packets_per_slave = packets_per_slave; + if (packets_per_slave > 0) { + params->reciprocal_packets_per_slave = + reciprocal_value(packets_per_slave); + } else { + /* reciprocal_packets_per_slave is unused if + * packets_per_slave is 0 or 1, just initialize it + */ + params->reciprocal_packets_per_slave = + (struct reciprocal_value) { 0 }; + } + if (primary) { strncpy(params->primary, primary, IFNAMSIZ); params->primary[IFNAMSIZ - 1] = 0; diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c index 21c648854a8c4861d7e26bdca97d940f8c28c466..e8526552790cdab3e1c6cfe5cfb6558af2776947 100644 --- a/drivers/net/bonding/bond_netlink.c +++ b/drivers/net/bonding/bond_netlink.c @@ -19,7 +19,6 @@ #include #include #include -#include #include "bonding.h" int bond_get_slave(struct net_device *slave_dev, struct sk_buff *skb) @@ -452,9 +451,6 @@ static int bond_fill_info(struct sk_buff *skb, goto nla_put_failure; packets_per_slave = bond->params.packets_per_slave; - if (packets_per_slave > 1) - packets_per_slave = reciprocal_value(packets_per_slave); - if (nla_put_u32(skb, IFLA_BOND_PACKETS_PER_SLAVE, packets_per_slave)) goto nla_put_failure; diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index 945a6668da8376ad125356a300b9d34a72ea4166..85e434886f2e116a8b3ec13962dd70ba0ea884b4 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -16,7 +16,6 @@ #include #include #include -#include #include "bonding.h" int bond_option_mode_set(struct bonding *bond, int mode) @@ -671,11 +670,17 @@ int bond_option_packets_per_slave_set(struct bonding *bond, pr_warn("%s: Warning: packets_per_slave has effect only in balance-rr mode\n", bond->dev->name); - if (packets_per_slave > 1) - bond->params.packets_per_slave = + bond->params.packets_per_slave = packets_per_slave; + if (packets_per_slave > 0) { + bond->params.reciprocal_packets_per_slave = reciprocal_value(packets_per_slave); - else - bond->params.packets_per_slave = packets_per_slave; + } else { + /* reciprocal_packets_per_slave is unused if + * packets_per_slave is 0 or 1, just initialize it + */ + bond->params.reciprocal_packets_per_slave = + (struct reciprocal_value) { 0 }; + } return 0; } diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c index 011f163c2c6771a9cbd335a507dfb390b081eca2..c083e9a66ece736493295ba6044ea2377b446d11 100644 --- a/drivers/net/bonding/bond_sysfs.c +++ b/drivers/net/bonding/bond_sysfs.c @@ -39,7 +39,6 @@ #include #include #include -#include #include "bonding.h" @@ -1374,10 +1373,6 @@ static ssize_t bonding_show_packets_per_slave(struct device *d, { struct bonding *bond = to_bond(d); unsigned int packets_per_slave = bond->params.packets_per_slave; - - if (packets_per_slave > 1) - packets_per_slave = reciprocal_value(packets_per_slave); - return sprintf(buf, "%u\n", packets_per_slave); } diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h index 8a935f8f2b3c558ec7d288149ced77e3c60cc281..0a616c41dc9496c3cce31f6890ba3eeb992a96d5 100644 --- a/drivers/net/bonding/bonding.h +++ b/drivers/net/bonding/bonding.h @@ -23,6 +23,8 @@ #include #include #include +#include + #include "bond_3ad.h" #include "bond_alb.h" @@ -171,6 +173,7 @@ struct bond_params { int resend_igmp; int lp_interval; int packets_per_slave; + struct reciprocal_value reciprocal_packets_per_slave; }; struct bond_parm_tbl { diff --git a/drivers/net/team/team_mode_random.c b/drivers/net/team/team_mode_random.c index 7f032e2113437f1ca6b2952ba4ccc28e56dd20ba..cd2f692b8074e6b543a511b5f76a775a7ab17f47 100644 --- a/drivers/net/team/team_mode_random.c +++ b/drivers/net/team/team_mode_random.c @@ -13,20 +13,14 @@ #include #include #include -#include #include -static u32 random_N(unsigned int N) -{ - return reciprocal_divide(prandom_u32(), N); -} - static bool rnd_transmit(struct team *team, struct sk_buff *skb) { struct team_port *port; int port_index; - port_index = random_N(team->en_port_count); + port_index = prandom_u32_max(team->en_port_count); port = team_get_port_by_index_rcu(team, port_index); if (unlikely(!port)) goto drop; diff --git a/include/linux/flex_array.h b/include/linux/flex_array.h index 6843cf193a445134cde17f7d2db6893399ac8cda..b6efb0c64408b35b7874726ca22c72f2e5fe21f3 100644 --- a/include/linux/flex_array.h +++ b/include/linux/flex_array.h @@ -2,6 +2,7 @@ #define _FLEX_ARRAY_H #include +#include #include #define FLEX_ARRAY_PART_SIZE PAGE_SIZE @@ -22,7 +23,7 @@ struct flex_array { int element_size; int total_nr_elements; int elems_per_part; - u32 reciprocal_elems; + struct reciprocal_value reciprocal_elems; struct flex_array_part *parts[]; }; /* diff --git a/include/linux/kernel.h b/include/linux/kernel.h index ecb87544cc5d888be056d903a2e0b227dd24c461..03d8a6b0e2e803dc3c586041aa6e2dc126c7e80f 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -193,6 +193,25 @@ extern int _cond_resched(void); (__x < 0) ? -__x : __x; \ }) +/** + * reciprocal_scale - "scale" a value into range [0, ep_ro) + * @val: value + * @ep_ro: right open interval endpoint + * + * Perform a "reciprocal multiplication" in order to "scale" a value into + * range [0, ep_ro), where the upper interval endpoint is right-open. + * This is useful, e.g. for accessing a index of an array containing + * ep_ro elements, for example. Think of it as sort of modulus, only that + * the result isn't that of modulo. ;) Note that if initial input is a + * small value, then result will return 0. + * + * Return: a result based on val in interval [0, ep_ro). + */ +static inline u32 reciprocal_scale(u32 val, u32 ep_ro) +{ + return (u32)(((u64) val * ep_ro) >> 32); +} + #if defined(CONFIG_MMU) && \ (defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_DEBUG_ATOMIC_SLEEP)) void might_fault(void); diff --git a/include/linux/random.h b/include/linux/random.h index 4002b3df4c8566d4497a9dae222e6fc1e6e37b66..1cfce0e24dbdb09a527d914ab511e6b6280fa364 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -8,7 +8,6 @@ #include - extern void add_device_randomness(const void *, unsigned int); extern void add_input_randomness(unsigned int type, unsigned int code, unsigned int value); @@ -38,6 +37,23 @@ struct rnd_state { u32 prandom_u32_state(struct rnd_state *state); void prandom_bytes_state(struct rnd_state *state, void *buf, int nbytes); +/** + * prandom_u32_max - returns a pseudo-random number in interval [0, ep_ro) + * @ep_ro: right open interval endpoint + * + * Returns a pseudo-random number that is in interval [0, ep_ro). Note + * that the result depends on PRNG being well distributed in [0, ~0U] + * u32 space. Here we use maximally equidistributed combined Tausworthe + * generator, that is, prandom_u32(). This is useful when requesting a + * random index of an array containing ep_ro elements, for example. + * + * Returns: pseudo-random number in interval [0, ep_ro) + */ +static inline u32 prandom_u32_max(u32 ep_ro) +{ + return (u32)(((u64) prandom_u32() * ep_ro) >> 32); +} + /* * Handle minimum values for seeds */ diff --git a/include/linux/reciprocal_div.h b/include/linux/reciprocal_div.h index f9c90b33285b24b2ef9870ee3d3fe3cf78f9283d..8c5a3fb6c6c57821720dc307f3eeec944c56f974 100644 --- a/include/linux/reciprocal_div.h +++ b/include/linux/reciprocal_div.h @@ -4,29 +4,32 @@ #include /* - * This file describes reciprocical division. + * This algorithm is based on the paper "Division by Invariant + * Integers Using Multiplication" by Torbjörn Granlund and Peter + * L. Montgomery. * - * This optimizes the (A/B) problem, when A and B are two u32 - * and B is a known value (but not known at compile time) + * The assembler implementation from Agner Fog, which this code is + * based on, can be found here: + * http://www.agner.org/optimize/asmlib.zip * - * The math principle used is : - * Let RECIPROCAL_VALUE(B) be (((1LL << 32) + (B - 1))/ B) - * Then A / B = (u32)(((u64)(A) * (R)) >> 32) - * - * This replaces a divide by a multiply (and a shift), and - * is generally less expensive in CPU cycles. + * This optimization for A/B is helpful if the divisor B is mostly + * runtime invariant. The reciprocal of B is calculated in the + * slow-path with reciprocal_value(). The fast-path can then just use + * a much faster multiplication operation with a variable dividend A + * to calculate the division A/B. */ -/* - * Computes the reciprocal value (R) for the value B of the divisor. - * Should not be called before each reciprocal_divide(), - * or else the performance is slower than a normal divide. - */ -extern u32 reciprocal_value(u32 B); +struct reciprocal_value { + u32 m; + u8 sh1, sh2; +}; +struct reciprocal_value reciprocal_value(u32 d); -static inline u32 reciprocal_divide(u32 A, u32 R) +static inline u32 reciprocal_divide(u32 a, struct reciprocal_value R) { - return (u32)(((u64)A * R) >> 32); + u32 t = (u32)(((u64)a * R.m) >> 32); + return (t + ((a - t) >> R.sh1)) >> R.sh2; } -#endif + +#endif /* _LINUX_RECIPROCAL_DIV_H */ diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index 09bfffb08a56db285caa27146202f04e2188b480..96e8abae19a943375e39da1c779016a72cc07b7e 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -1,6 +1,8 @@ #ifndef _LINUX_SLAB_DEF_H #define _LINUX_SLAB_DEF_H +#include + /* * Definitions unique to the original Linux SLAB allocator. */ @@ -12,7 +14,7 @@ struct kmem_cache { unsigned int shared; unsigned int size; - u32 reciprocal_buffer_size; + struct reciprocal_value reciprocal_buffer_size; /* 2) touched by every alloc & free from the backend */ unsigned int flags; /* constant flags */ diff --git a/include/net/codel.h b/include/net/codel.h index 3b04ff5f6f8d60489a13171d3071ffa597c2c140..fe0eab32ce76dd42845e8bc466c8bade5f429157 100644 --- a/include/net/codel.h +++ b/include/net/codel.h @@ -46,7 +46,6 @@ #include #include #include -#include /* Controlling Queue Delay (CoDel) algorithm * ========================================= @@ -211,10 +210,9 @@ static codel_time_t codel_control_law(codel_time_t t, codel_time_t interval, u32 rec_inv_sqrt) { - return t + reciprocal_divide(interval, rec_inv_sqrt << REC_INV_SQRT_SHIFT); + return t + reciprocal_scale(interval, rec_inv_sqrt << REC_INV_SQRT_SHIFT); } - static bool codel_should_drop(const struct sk_buff *skb, struct Qdisc *sch, struct codel_vars *vars, diff --git a/include/net/red.h b/include/net/red.h index 168bb2f495f207511bffe485c1070ea8bb6df30a..76e0b5f922c6c475e7f47a73d072282dfec53149 100644 --- a/include/net/red.h +++ b/include/net/red.h @@ -130,7 +130,8 @@ struct red_parms { u32 qth_max; /* Max avg length threshold: Wlog scaled */ u32 Scell_max; u32 max_P; /* probability, [0 .. 1.0] 32 scaled */ - u32 max_P_reciprocal; /* reciprocal_value(max_P / qth_delta) */ + /* reciprocal_value(max_P / qth_delta) */ + struct reciprocal_value max_P_reciprocal; u32 qth_delta; /* max_th - min_th */ u32 target_min; /* min_th + 0.4*(max_th - min_th) */ u32 target_max; /* min_th + 0.6*(max_th - min_th) */ diff --git a/lib/flex_array.c b/lib/flex_array.c index 6948a6692fc4f6b70c2bfc0700afb7a971d7059e..2eed22fa507c7cb0756d7ef643f8a3454eb455ec 100644 --- a/lib/flex_array.c +++ b/lib/flex_array.c @@ -90,8 +90,8 @@ struct flex_array *flex_array_alloc(int element_size, unsigned int total, { struct flex_array *ret; int elems_per_part = 0; - int reciprocal_elems = 0; int max_size = 0; + struct reciprocal_value reciprocal_elems = { 0 }; if (element_size) { elems_per_part = FLEX_ARRAY_ELEMENTS_PER_PART(element_size); @@ -119,6 +119,11 @@ EXPORT_SYMBOL(flex_array_alloc); static int fa_element_to_part_nr(struct flex_array *fa, unsigned int element_nr) { + /* + * if element_size == 0 we don't get here, so we never touch + * the zeroed fa->reciprocal_elems, which would yield invalid + * results + */ return reciprocal_divide(element_nr, fa->reciprocal_elems); } diff --git a/lib/reciprocal_div.c b/lib/reciprocal_div.c index 75510e94f7d0b2fcd5736a4e0aeb9ff282da11d1..464152410c5123e191324a4cfb3ca5ea8ff66687 100644 --- a/lib/reciprocal_div.c +++ b/lib/reciprocal_div.c @@ -1,11 +1,27 @@ +#include #include #include #include -u32 reciprocal_value(u32 k) +/* + * For a description of the algorithm please have a look at + * include/linux/reciprocal_div.h + */ + +struct reciprocal_value reciprocal_value(u32 d) { - u64 val = (1LL << 32) + (k - 1); - do_div(val, k); - return (u32)val; + struct reciprocal_value R; + u64 m; + int l; + + l = fls(d - 1); + m = ((1ULL << 32) * ((1ULL << l) - d)); + do_div(m, d); + ++m; + R.m = (u32)m; + R.sh1 = min(l, 1); + R.sh2 = max(l - 1, 0); + + return R; } EXPORT_SYMBOL(reciprocal_value); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 59fb3db2e71333074c106449b154ddbbc2ed814a..97346162803d87f7a4c5355b0ae54dca17296f53 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -88,7 +88,6 @@ #include #include #include -#include #include #ifdef CONFIG_INET #include @@ -1262,7 +1261,7 @@ static unsigned int fanout_demux_hash(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) { - return reciprocal_divide(skb->rxhash, num); + return reciprocal_scale(skb->rxhash, num); } static unsigned int fanout_demux_lb(struct packet_fanout *f, @@ -1289,7 +1288,7 @@ static unsigned int fanout_demux_rnd(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) { - return reciprocal_divide(prandom_u32(), num); + return prandom_u32_max(num); } static unsigned int fanout_demux_rollover(struct packet_fanout *f, diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index ddd73cb2d7ba4080f0e8f75ddc715cf9cb2a1587..2aee02802c2760fafabb47d6bf4229d08d8ef907 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include @@ -77,12 +76,6 @@ struct choke_sched_data { struct sk_buff **tab; }; -/* deliver a random number between 0 and N - 1 */ -static u32 random_N(unsigned int N) -{ - return reciprocal_divide(prandom_u32(), N); -} - /* number of elements in queue including holes */ static unsigned int choke_len(const struct choke_sched_data *q) { @@ -233,7 +226,7 @@ static struct sk_buff *choke_peek_random(const struct choke_sched_data *q, int retrys = 3; do { - *pidx = (q->head + random_N(choke_len(q))) & q->tab_mask; + *pidx = (q->head + prandom_u32_max(choke_len(q))) & q->tab_mask; skb = q->tab[*pidx]; if (skb) return skb; diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index a2bfc371b44a3339709be67d58004a00400abe8e..de1059af6da14c563115147852c4f52b80a9387d 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -91,7 +91,7 @@ struct netem_sched_data { u64 rate; s32 packet_overhead; u32 cell_size; - u32 cell_size_reciprocal; + struct reciprocal_value cell_size_reciprocal; s32 cell_overhead; struct crndstate { @@ -725,9 +725,11 @@ static void get_rate(struct Qdisc *sch, const struct nlattr *attr) q->rate = r->rate; q->packet_overhead = r->packet_overhead; q->cell_size = r->cell_size; + q->cell_overhead = r->cell_overhead; if (q->cell_size) q->cell_size_reciprocal = reciprocal_value(q->cell_size); - q->cell_overhead = r->cell_overhead; + else + q->cell_size_reciprocal = (struct reciprocal_value) { 0 }; } static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr)