diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 007c290f74d4557b38161fe1ef9378fc4919f0bb..8bf4bacb5051a2df3e8b71dde9c1c464512b851c 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -432,7 +432,10 @@ struct dccp_sock { struct ccid *dccps_hc_rx_ccid; struct ccid *dccps_hc_tx_ccid; struct dccp_options_received dccps_options_received; + struct timeval dccps_epoch; enum dccp_role dccps_role:2; + __u8 dccps_hc_rx_insert_options:1; + __u8 dccps_hc_tx_insert_options:1; }; static inline struct dccp_sock *dccp_sk(const struct sock *sk) diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 7bf3b3a91e97407a856f06acf7543df2dc39149d..ea30012dd19545cb925a94456bedb6e88addb7b2 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -43,12 +43,22 @@ #include "ccid3.h" /* - * Reason for maths with 10 here is to avoid 32 bit overflow when a is big. + * Reason for maths here is to avoid 32 bit overflow when a is big. + * With this we get close to the limit. */ static inline u32 usecs_div(const u32 a, const u32 b) { - const u32 tmp = a * (USEC_PER_SEC / 10); - return b > 20 ? tmp / (b / 10) : tmp; + const u32 div = a < (UINT_MAX / (USEC_PER_SEC / 10)) ? 10 : + a < (UINT_MAX / (USEC_PER_SEC / 50)) ? 50 : + a < (UINT_MAX / (USEC_PER_SEC / 100)) ? 100 : + a < (UINT_MAX / (USEC_PER_SEC / 500)) ? 500 : + a < (UINT_MAX / (USEC_PER_SEC / 1000)) ? 1000 : + a < (UINT_MAX / (USEC_PER_SEC / 5000)) ? 5000 : + a < (UINT_MAX / (USEC_PER_SEC / 10000)) ? 10000 : + a < (UINT_MAX / (USEC_PER_SEC / 50000)) ? 50000 : + 100000; + const u32 tmp = a * (USEC_PER_SEC / div); + return (b >= 2 * div) ? tmp / (b / div) : tmp; } static int ccid3_debug; @@ -102,8 +112,7 @@ static const char *ccid3_tx_state_name(enum ccid3_hc_tx_states state) static inline void ccid3_hc_tx_set_state(struct sock *sk, enum ccid3_hc_tx_states state) { - struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); enum ccid3_hc_tx_states oldstate = hctx->ccid3hctx_state; ccid3_pr_debug("%s(%p) %-8.8s -> %s\n", @@ -144,8 +153,7 @@ static inline void ccid3_calc_new_delta(struct ccid3_hc_tx_sock *hctx) */ static void ccid3_hc_tx_update_x(struct sock *sk) { - struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); /* To avoid large error in calcX */ if (hctx->ccid3hctx_p >= TFRC_SMALLEST_P) { @@ -159,7 +167,7 @@ static void ccid3_hc_tx_update_x(struct sock *sk) } else { struct timeval now; - do_gettimeofday(&now); + dccp_timestamp(sk, &now); if (timeval_delta(&now, &hctx->ccid3hctx_t_ld) >= hctx->ccid3hctx_rtt) { hctx->ccid3hctx_x = max_t(u32, min_t(u32, hctx->ccid3hctx_x_recv, @@ -174,9 +182,8 @@ static void ccid3_hc_tx_update_x(struct sock *sk) static void ccid3_hc_tx_no_feedback_timer(unsigned long data) { struct sock *sk = (struct sock *)data; - struct dccp_sock *dp = dccp_sk(sk); unsigned long next_tmout = 0; - struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); bh_lock_sock(sk); if (sock_owned_by_user(sk)) { @@ -274,7 +281,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb, int len) { struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); struct dccp_tx_hist_entry *new_packet; struct timeval now; long delay; @@ -307,7 +314,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, dccp_tx_hist_add_entry(&hctx->ccid3hctx_hist, new_packet); } - do_gettimeofday(&now); + dccp_timestamp(sk, &now); switch (hctx->ccid3hctx_state) { case TFRC_SSTATE_NO_SENT: @@ -348,18 +355,20 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, } /* Can we send? if so add options and add to packet history */ - if (rc == 0) + if (rc == 0) { + dp->dccps_hc_tx_insert_options = 1; new_packet->dccphtx_ccval = DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count; + } out: return rc; } static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) { - struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + const struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); struct timeval now; BUG_ON(hctx == NULL); @@ -370,7 +379,7 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) return; } - do_gettimeofday(&now); + dccp_timestamp(sk, &now); /* check if we have sent a data packet */ if (len > 0) { @@ -445,10 +454,11 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) { - struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + const struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); struct ccid3_options_received *opt_recv; struct dccp_tx_hist_entry *packet; + struct timeval now; unsigned long next_tmout; u32 t_elapsed; u32 pinv; @@ -471,7 +481,7 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) opt_recv = &hctx->ccid3hctx_options_received; - t_elapsed = dp->dccps_options_received.dccpor_elapsed_time; + t_elapsed = dp->dccps_options_received.dccpor_elapsed_time * 10; x_recv = opt_recv->ccid3or_receive_rate; pinv = opt_recv->ccid3or_loss_event_rate; @@ -496,9 +506,14 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) } /* Update RTT */ - r_sample = timeval_now_delta(&packet->dccphtx_tstamp); - /* FIXME: */ - // r_sample -= usecs_to_jiffies(t_elapsed * 10); + dccp_timestamp(sk, &now); + r_sample = timeval_delta(&now, &packet->dccphtx_tstamp); + if (unlikely(r_sample <= t_elapsed)) + LIMIT_NETDEBUG(KERN_WARNING + "%s: r_sample=%uus, t_elapsed=%uus\n", + __FUNCTION__, r_sample, t_elapsed); + else + r_sample -= t_elapsed; /* Update RTT estimate by * If (No feedback recv) @@ -591,8 +606,7 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) static void ccid3_hc_tx_insert_options(struct sock *sk, struct sk_buff *skb) { - const struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); if (hctx == NULL || !(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN)) @@ -606,8 +620,8 @@ static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, unsigned char *value) { int rc = 0; - struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + const struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); struct ccid3_options_received *opt_recv; if (hctx == NULL) @@ -670,11 +684,11 @@ static int ccid3_hc_tx_init(struct sock *sk) ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); - hctx = dp->dccps_hc_tx_ccid_private = kmalloc(sizeof(*hctx), - gfp_any()); - if (hctx == NULL) + dp->dccps_hc_tx_ccid_private = kmalloc(sizeof(*hctx), gfp_any()); + if (dp->dccps_hc_tx_ccid_private == NULL) return -ENOMEM; + hctx = ccid3_hc_tx_sk(sk); memset(hctx, 0, sizeof(*hctx)); if (dp->dccps_packet_size >= TFRC_MIN_PACKET_SIZE && @@ -696,7 +710,7 @@ static int ccid3_hc_tx_init(struct sock *sk) static void ccid3_hc_tx_exit(struct sock *sk) { struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); BUG_ON(hctx == NULL); @@ -738,8 +752,7 @@ static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state) static inline void ccid3_hc_rx_set_state(struct sock *sk, enum ccid3_hc_rx_states state) { - struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; + struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); enum ccid3_hc_rx_states oldstate = hcrx->ccid3hcrx_state; ccid3_pr_debug("%s(%p) %-8.8s -> %s\n", @@ -751,14 +764,14 @@ static inline void ccid3_hc_rx_set_state(struct sock *sk, static void ccid3_hc_rx_send_feedback(struct sock *sk) { + struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; struct dccp_rx_hist_entry *packet; struct timeval now; ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); - do_gettimeofday(&now); + dccp_timestamp(sk, &now); switch (hcrx->ccid3hcrx_state) { case TFRC_RSTATE_NO_DATA: @@ -767,11 +780,8 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk) case TFRC_RSTATE_DATA: { const u32 delta = timeval_delta(&now, &hcrx->ccid3hcrx_tstamp_last_feedback); - - hcrx->ccid3hcrx_x_recv = (hcrx->ccid3hcrx_bytes_recv * - USEC_PER_SEC); - if (likely(delta > 1)) - hcrx->ccid3hcrx_x_recv /= delta; + hcrx->ccid3hcrx_x_recv = usecs_div(hcrx->ccid3hcrx_bytes_recv, + delta); } break; default: @@ -801,14 +811,14 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk) hcrx->ccid3hcrx_pinv = ~0; else hcrx->ccid3hcrx_pinv = 1000000 / hcrx->ccid3hcrx_p; + dp->dccps_hc_rx_insert_options = 1; dccp_send_ack(sk); } static void ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) { - const struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); u32 x_recv, pinv; - struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; if (hcrx == NULL || !(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN)) @@ -837,8 +847,7 @@ static void ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) static u32 ccid3_hc_rx_calc_first_li(struct sock *sk) { - struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; + struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); struct dccp_rx_hist_entry *entry, *next, *tail = NULL; u32 rtt, delta, x_recv, fval, p, tmp2; struct timeval tstamp = { 0, }; @@ -889,10 +898,9 @@ static u32 ccid3_hc_rx_calc_first_li(struct sock *sk) if (rtt == 0) rtt = 1; - delta = timeval_now_delta(&hcrx->ccid3hcrx_tstamp_last_feedback); - x_recv = hcrx->ccid3hcrx_bytes_recv * USEC_PER_SEC; - if (likely(delta > 1)) - x_recv /= delta; + dccp_timestamp(sk, &tstamp); + delta = timeval_delta(&tstamp, &hcrx->ccid3hcrx_tstamp_last_feedback); + x_recv = usecs_div(hcrx->ccid3hcrx_bytes_recv, delta); tmp1 = (u64)x_recv * (u64)rtt; do_div(tmp1,10000000); @@ -911,8 +919,7 @@ static u32 ccid3_hc_rx_calc_first_li(struct sock *sk) static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss) { - struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; + struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); if (seq_loss != DCCP_MAX_SEQNO + 1 && list_empty(&hcrx->ccid3hcrx_li_hist)) { @@ -930,8 +937,7 @@ static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss) static void ccid3_hc_rx_detect_loss(struct sock *sk) { - struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; + struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); u8 win_loss; const u64 seq_loss = dccp_rx_hist_detect_loss(&hcrx->ccid3hcrx_hist, &hcrx->ccid3hcrx_li_hist, @@ -942,13 +948,12 @@ static void ccid3_hc_rx_detect_loss(struct sock *sk) static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) { - struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; + struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); const struct dccp_options_received *opt_recv; struct dccp_rx_hist_entry *packet; struct timeval now; u8 win_count; - u32 p_prev; + u32 p_prev, r_sample, t_elapsed; int ins; if (hcrx == NULL) @@ -957,7 +962,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) BUG_ON(!(hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA || hcrx->ccid3hcrx_state == TFRC_RSTATE_DATA)); - opt_recv = &dp->dccps_options_received; + opt_recv = &dccp_sk(sk)->dccps_options_received; switch (DCCP_SKB_CB(skb)->dccpd_type) { case DCCP_PKT_ACK: @@ -967,10 +972,24 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) if (opt_recv->dccpor_timestamp_echo == 0) break; p_prev = hcrx->ccid3hcrx_rtt; - do_gettimeofday(&now); - hcrx->ccid3hcrx_rtt = timeval_usecs(&now) - - (opt_recv->dccpor_timestamp_echo - - opt_recv->dccpor_elapsed_time) * 10; + dccp_timestamp(sk, &now); + timeval_sub_usecs(&now, opt_recv->dccpor_timestamp_echo * 10); + r_sample = timeval_usecs(&now); + t_elapsed = opt_recv->dccpor_elapsed_time * 10; + + if (unlikely(r_sample <= t_elapsed)) + LIMIT_NETDEBUG(KERN_WARNING + "%s: r_sample=%uus, t_elapsed=%uus\n", + __FUNCTION__, r_sample, t_elapsed); + else + r_sample -= t_elapsed; + + if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA) + hcrx->ccid3hcrx_rtt = r_sample; + else + hcrx->ccid3hcrx_rtt = (hcrx->ccid3hcrx_rtt * 9) / 10 + + r_sample / 10; + if (p_prev != hcrx->ccid3hcrx_rtt) ccid3_pr_debug("%s, New RTT=%luus, elapsed time=%u\n", dccp_role(sk), hcrx->ccid3hcrx_rtt, @@ -985,7 +1004,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) return; } - packet = dccp_rx_hist_entry_new(ccid3_rx_hist, opt_recv->dccpor_ndp, + packet = dccp_rx_hist_entry_new(ccid3_rx_hist, sk, opt_recv->dccpor_ndp, skb, SLAB_ATOMIC); if (packet == NULL) { ccid3_pr_debug("%s, sk=%p, Not enough mem to add rx packet " @@ -1017,7 +1036,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) if (ins != 0) break; - do_gettimeofday(&now); + dccp_timestamp(sk, &now); if (timeval_delta(&now, &hcrx->ccid3hcrx_tstamp_last_ack) >= hcrx->ccid3hcrx_rtt) { hcrx->ccid3hcrx_tstamp_last_ack = now; @@ -1056,11 +1075,11 @@ static int ccid3_hc_rx_init(struct sock *sk) ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); - hcrx = dp->dccps_hc_rx_ccid_private = kmalloc(sizeof(*hcrx), - gfp_any()); - if (hcrx == NULL) + dp->dccps_hc_rx_ccid_private = kmalloc(sizeof(*hcrx), gfp_any()); + if (dp->dccps_hc_rx_ccid_private == NULL) return -ENOMEM; + hcrx = ccid3_hc_rx_sk(sk); memset(hcrx, 0, sizeof(*hcrx)); if (dp->dccps_packet_size >= TFRC_MIN_PACKET_SIZE && @@ -1072,18 +1091,16 @@ static int ccid3_hc_rx_init(struct sock *sk) hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA; INIT_LIST_HEAD(&hcrx->ccid3hcrx_hist); INIT_LIST_HEAD(&hcrx->ccid3hcrx_li_hist); - /* - * XXX this seems to be paranoid, need to think more about this, for - * now start with something different than zero. -acme - */ - hcrx->ccid3hcrx_rtt = USEC_PER_SEC / 5; + dccp_timestamp(sk, &hcrx->ccid3hcrx_tstamp_last_ack); + hcrx->ccid3hcrx_tstamp_last_feedback = hcrx->ccid3hcrx_tstamp_last_ack; + hcrx->ccid3hcrx_rtt = 5000; /* XXX 5ms for now... */ return 0; } static void ccid3_hc_rx_exit(struct sock *sk) { + struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); @@ -1104,8 +1121,7 @@ static void ccid3_hc_rx_exit(struct sock *sk) static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info) { - const struct dccp_sock *dp = dccp_sk(sk); - const struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; + const struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); if (hcrx == NULL) return; @@ -1117,8 +1133,7 @@ static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info) static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info) { - const struct dccp_sock *dp = dccp_sk(sk); - const struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); if (hctx == NULL) return; diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h index ee8cbace6630a6ca443679d428000b0b2a602d25..d16f00d784f39375a771550280281ba0d53d4c19 100644 --- a/net/dccp/ccids/ccid3.h +++ b/net/dccp/ccids/ccid3.h @@ -115,7 +115,7 @@ struct ccid3_hc_rx_sock { u64 ccid3hcrx_seqno_last_counter:48, ccid3hcrx_state:8, ccid3hcrx_last_counter:4; - unsigned long ccid3hcrx_rtt; + u32 ccid3hcrx_rtt; u32 ccid3hcrx_p; u32 ccid3hcrx_bytes_recv; struct timeval ccid3hcrx_tstamp_last_feedback; @@ -128,10 +128,14 @@ struct ccid3_hc_rx_sock { u32 ccid3hcrx_x_recv; }; -#define ccid3_hc_tx_field(s,field) (s->dccps_hc_tx_ccid_private == NULL ? 0 : \ - ((struct ccid3_hc_tx_sock *)s->dccps_hc_tx_ccid_private)->ccid3hctx_##field) +static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk) +{ + return dccp_sk(sk)->dccps_hc_tx_ccid_private; +} -#define ccid3_hc_rx_field(s,field) (s->dccps_hc_rx_ccid_private == NULL ? 0 : \ - ((struct ccid3_hc_rx_sock *)s->dccps_hc_rx_ccid_private)->ccid3hcrx_##field) +static inline struct ccid3_hc_rx_sock *ccid3_hc_rx_sk(const struct sock *sk) +{ + return dccp_sk(sk)->dccps_hc_rx_ccid_private; +} #endif /* _DCCP_CCID3_H_ */ diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h index fb90a91aa93d64816789dc2c250fdebcd6923784..b375ebdb7dcfb0766f6700ebfc3829ce4aa508b5 100644 --- a/net/dccp/ccids/lib/packet_history.h +++ b/net/dccp/ccids/lib/packet_history.h @@ -134,6 +134,7 @@ static inline struct dccp_tx_hist_entry * static inline struct dccp_rx_hist_entry * dccp_rx_hist_entry_new(struct dccp_rx_hist *hist, + const struct sock *sk, const u32 ndp, const struct sk_buff *skb, const unsigned int __nocast prio) @@ -148,7 +149,7 @@ static inline struct dccp_rx_hist_entry * entry->dccphrx_ccval = dh->dccph_ccval; entry->dccphrx_type = dh->dccph_type; entry->dccphrx_ndp = ndp; - do_gettimeofday(&(entry->dccphrx_tstamp)); + dccp_timestamp(sk, &entry->dccphrx_tstamp); } return entry; diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 33456c0d5937c8950b1a525b7d156e63adc52077..95c4630b3b18454260b34209da7e54ae326ab8a1 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -426,10 +426,13 @@ extern struct dccp_ackpkts * dccp_ackpkts_alloc(unsigned int len, const unsigned int __nocast priority); extern void dccp_ackpkts_free(struct dccp_ackpkts *ap); -extern int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state); +extern int dccp_ackpkts_add(struct dccp_ackpkts *ap, const struct sock *sk, + u64 ackno, u8 state); extern void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap, struct sock *sk, u64 ackno); +extern void dccp_timestamp(const struct sock *sk, struct timeval *tv); + static inline suseconds_t timeval_usecs(const struct timeval *tv) { return tv->tv_sec * USEC_PER_SEC + tv->tv_usec; @@ -468,17 +471,6 @@ static inline void timeval_sub_usecs(struct timeval *tv, } } -/* - * Returns the difference in usecs between timeval - * passed in and current time - */ -static inline suseconds_t timeval_now_delta(const struct timeval *tv) -{ - struct timeval now; - do_gettimeofday(&now); - return timeval_delta(&now, tv); -} - #ifdef CONFIG_IP_DCCP_DEBUG extern void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, int len); diff --git a/net/dccp/input.c b/net/dccp/input.c index ef29cef1dafe238196224bd8aed79cc139a2c329..c60bc3433f5eac378d8a4b87198276576fd6d7f8 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -170,7 +170,7 @@ int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, if (dp->dccps_options.dccpo_send_ack_vector) { struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; - if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, + if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_ACKPKTS_STATE_RECEIVED)) { LIMIT_NETDEBUG(KERN_WARNING "DCCP: acknowledgeable " @@ -498,7 +498,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, * DCCP_ACKPKTS_STATE_ECN_MARKED */ if (dp->dccps_options.dccpo_send_ack_vector) { - if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, + if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_ACKPKTS_STATE_RECEIVED)) goto discard; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 3fc75dbee4b8e4d068a0b4598fc96900c5a90842..fee9a8c3777b3b8a8a35de2be2b1a6789ac69ce3 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -1243,6 +1243,7 @@ static int dccp_v4_init_sock(struct sock *sk) static int dccp_ctl_socket_init = 1; dccp_options_init(&dp->dccps_options); + do_gettimeofday(&dp->dccps_epoch); if (dp->dccps_options.dccpo_send_ack_vector) { dp->dccps_hc_rx_ackpkts = diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index ce5dff4ac22ea9435a15b20a9da054487879aefe..18461bc04cbefae76dc8073878a53ec51bda0885 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -96,6 +96,7 @@ struct sock *dccp_create_openreq_child(struct sock *sk, newdp->dccps_hc_rx_ackpkts = NULL; newdp->dccps_role = DCCP_ROLE_SERVER; newicsk->icsk_rto = DCCP_TIMEOUT_INIT; + do_gettimeofday(&newdp->dccps_epoch); if (newdp->dccps_options.dccpo_send_ack_vector) { newdp->dccps_hc_rx_ackpkts = diff --git a/net/dccp/options.c b/net/dccp/options.c index 382c5894acb20590ba4943181130ffdae9dd0f19..d4c4242d8dd7c5de227784b42c048f5f18b41027 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -72,6 +72,7 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) struct dccp_options_received *opt_recv = &dp->dccps_options_received; unsigned char opt, len; unsigned char *value; + u32 elapsed_time; memset(opt_recv, 0, sizeof(*opt_recv)); @@ -139,7 +140,7 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) opt_recv->dccpor_timestamp = ntohl(*(u32 *)value); dp->dccps_timestamp_echo = opt_recv->dccpor_timestamp; - do_gettimeofday(&dp->dccps_timestamp_time); + dccp_timestamp(sk, &dp->dccps_timestamp_time); dccp_pr_debug("%sTIMESTAMP=%u, ackno=%llu\n", debug_prefix, opt_recv->dccpor_timestamp, @@ -159,18 +160,18 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) (unsigned long long) DCCP_SKB_CB(skb)->dccpd_ack_seq); - if (len > 4) { - if (len == 6) - opt_recv->dccpor_elapsed_time = - ntohs(*(u16 *)(value + 4)); - else - opt_recv->dccpor_elapsed_time = - ntohl(*(u32 *)(value + 4)); - dccp_pr_debug("%sTIMESTAMP_ECHO ELAPSED_TIME=%d\n", - debug_prefix, - opt_recv->dccpor_elapsed_time); - } + if (len == 4) + break; + + if (len == 6) + elapsed_time = ntohs(*(u16 *)(value + 4)); + else + elapsed_time = ntohl(*(u32 *)(value + 4)); + + /* Give precedence to the biggest ELAPSED_TIME */ + if (elapsed_time > opt_recv->dccpor_elapsed_time) + opt_recv->dccpor_elapsed_time = elapsed_time; break; case DCCPO_ELAPSED_TIME: if (len != 2 && len != 4) @@ -180,14 +181,15 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) continue; if (len == 2) - opt_recv->dccpor_elapsed_time = - ntohs(*(u16 *)value); + elapsed_time = ntohs(*(u16 *)value); else - opt_recv->dccpor_elapsed_time = - ntohl(*(u32 *)value); + elapsed_time = ntohl(*(u32 *)value); + + if (elapsed_time > opt_recv->dccpor_elapsed_time) + opt_recv->dccpor_elapsed_time = elapsed_time; dccp_pr_debug("%sELAPSED_TIME=%d\n", debug_prefix, - opt_recv->dccpor_elapsed_time); + elapsed_time); break; /* * From draft-ietf-dccp-spec-11.txt: @@ -359,9 +361,13 @@ static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb) #endif struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; int len = ap->dccpap_buf_vector_len + 2; - const u32 elapsed_time = timeval_now_delta(&ap->dccpap_time) / 10; + struct timeval now; + u32 elapsed_time; unsigned char *to, *from; + dccp_timestamp(sk, &now); + elapsed_time = timeval_delta(&now, &ap->dccpap_time) / 10; + if (elapsed_time != 0) dccp_insert_option_elapsed_time(sk, skb, elapsed_time); @@ -426,13 +432,29 @@ static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb) (unsigned long long) ap->dccpap_ack_ackno); } +void dccp_timestamp(const struct sock *sk, struct timeval *tv) +{ + const struct dccp_sock *dp = dccp_sk(sk); + + do_gettimeofday(tv); + tv->tv_sec -= dp->dccps_epoch.tv_sec; + tv->tv_usec -= dp->dccps_epoch.tv_usec; + + while (tv->tv_usec < 0) { + tv->tv_sec--; + tv->tv_usec += USEC_PER_SEC; + } +} + +EXPORT_SYMBOL_GPL(dccp_timestamp); + void dccp_insert_option_timestamp(struct sock *sk, struct sk_buff *skb) { struct timeval tv; u32 now; - do_gettimeofday(&tv); - now = (tv.tv_sec * USEC_PER_SEC + tv.tv_usec) / 10; + dccp_timestamp(sk, &tv); + now = timeval_usecs(&tv) / 10; /* yes this will overflow but that is the point as we want a * 10 usec 32 bit timer which mean it wraps every 11.9 hours */ @@ -450,13 +472,17 @@ static void dccp_insert_option_timestamp_echo(struct sock *sk, const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT TX opt: " : "server TX opt: "; #endif + struct timeval now; u32 tstamp_echo; - const u32 elapsed_time = - timeval_now_delta(&dp->dccps_timestamp_time) / 10; - const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time); - const int len = 6 + elapsed_time_len; + u32 elapsed_time; + int len, elapsed_time_len; unsigned char *to; + dccp_timestamp(sk, &now); + elapsed_time = timeval_delta(&now, &dp->dccps_timestamp_time) / 10; + elapsed_time_len = dccp_elapsed_time_len(elapsed_time); + len = 6 + elapsed_time_len; + if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to insert " "timestamp echo!\n"); @@ -505,13 +531,18 @@ void dccp_insert_options(struct sock *sk, struct sk_buff *skb) (dp->dccps_hc_rx_ackpkts->dccpap_buf_ackno != DCCP_MAX_SEQNO + 1)) dccp_insert_option_ack_vector(sk, skb); - if (dp->dccps_timestamp_echo != 0) dccp_insert_option_timestamp_echo(sk, skb); } - ccid_hc_rx_insert_options(dp->dccps_hc_rx_ccid, sk, skb); - ccid_hc_tx_insert_options(dp->dccps_hc_tx_ccid, sk, skb); + if (dp->dccps_hc_rx_insert_options) { + ccid_hc_rx_insert_options(dp->dccps_hc_rx_ccid, sk, skb); + dp->dccps_hc_rx_insert_options = 0; + } + if (dp->dccps_hc_tx_insert_options) { + ccid_hc_tx_insert_options(dp->dccps_hc_tx_ccid, sk, skb); + dp->dccps_hc_tx_insert_options = 0; + } /* XXX: insert other options when appropriate */ @@ -616,7 +647,8 @@ static inline int dccp_ackpkts_set_buf_head_state(struct dccp_ackpkts *ap, /* * Implements the draft-ietf-dccp-spec-11.txt Appendix A */ -int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state) +int dccp_ackpkts_add(struct dccp_ackpkts *ap, const struct sock *sk, + u64 ackno, u8 state) { /* * Check at the right places if the buffer is full, if it is, tell the @@ -697,7 +729,7 @@ int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state) } ap->dccpap_buf_ackno = ackno; - do_gettimeofday(&ap->dccpap_time); + dccp_timestamp(sk, &ap->dccpap_time); out: dccp_pr_debug(""); dccp_ackpkts_print(ap); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index bf147f8db399489260d2292409ba5749b1ef93e8..a9d84f93442c9560a8e7050a60292876b09bd508 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1248,11 +1248,6 @@ module_init(inet_init); /* ------------------------------------------------------------------------ */ #ifdef CONFIG_PROC_FS -#ifdef CONFIG_IP_FIB_TRIE -extern int fib_stat_proc_init(void); -extern void fib_stat_proc_exit(void); -#endif - static int __init ipv4_proc_init(void) { int rc = 0; @@ -1265,19 +1260,11 @@ static int __init ipv4_proc_init(void) goto out_udp; if (fib_proc_init()) goto out_fib; -#ifdef CONFIG_IP_FIB_TRIE - if (fib_stat_proc_init()) - goto out_fib_stat; -#endif if (ip_misc_proc_init()) goto out_misc; out: return rc; out_misc: -#ifdef CONFIG_IP_FIB_TRIE - fib_stat_proc_exit(); -out_fib_stat: -#endif fib_proc_exit(); out_fib: udp4_proc_exit(); diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index b2dea4e5da77cfe2f00bf09cfaa5a0ec0e171ed5..1b63b4824164390daa515a4baf1c1e8b6f8a9b8f 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -43,7 +43,7 @@ * 2 of the License, or (at your option) any later version. */ -#define VERSION "0.402" +#define VERSION "0.403" #include #include @@ -164,7 +164,6 @@ static struct node *resize(struct trie *t, struct tnode *tn); static struct tnode *inflate(struct trie *t, struct tnode *tn); static struct tnode *halve(struct trie *t, struct tnode *tn); static void tnode_free(struct tnode *tn); -static void trie_dump_seq(struct seq_file *seq, struct trie *t); static kmem_cache_t *fn_alias_kmem __read_mostly; static struct trie *trie_local = NULL, *trie_main = NULL; @@ -1971,558 +1970,525 @@ struct fib_table * __init fib_hash_init(int id) return tb; } -/* Trie dump functions */ +#ifdef CONFIG_PROC_FS +/* Depth first Trie walk iterator */ +struct fib_trie_iter { + struct tnode *tnode; + struct trie *trie; + unsigned index; + unsigned depth; +}; -static void putspace_seq(struct seq_file *seq, int n) +static struct node *fib_trie_get_next(struct fib_trie_iter *iter) { - while (n--) - seq_printf(seq, " "); -} + struct tnode *tn = iter->tnode; + unsigned cindex = iter->index; + struct tnode *p; -static void printbin_seq(struct seq_file *seq, unsigned int v, int bits) -{ - while (bits--) - seq_printf(seq, "%s", (v & (1<tnode, iter->index, iter->depth); +rescan: + while (cindex < (1<bits)) { + struct node *n = tnode_get_child(tn, cindex); -static void printnode_seq(struct seq_file *seq, int indent, struct node *n, - int pend, int cindex, int bits) -{ - putspace_seq(seq, indent); - if (IS_LEAF(n)) - seq_printf(seq, "|"); - else - seq_printf(seq, "+"); - if (bits) { - seq_printf(seq, "%d/", cindex); - printbin_seq(seq, cindex, bits); - seq_printf(seq, ": "); - } else - seq_printf(seq, ": "); - seq_printf(seq, "%s:%p ", IS_LEAF(n)?"Leaf":"Internal node", n); + if (n) { + if (IS_LEAF(n)) { + iter->tnode = tn; + iter->index = cindex + 1; + } else { + /* push down one level */ + iter->tnode = (struct tnode *) n; + iter->index = 0; + ++iter->depth; + } + return n; + } - if (IS_LEAF(n)) { - struct leaf *l = (struct leaf *)n; - struct fib_alias *fa; - int i; + ++cindex; + } - seq_printf(seq, "key=%d.%d.%d.%d\n", - n->key >> 24, (n->key >> 16) % 256, (n->key >> 8) % 256, n->key % 256); - - for (i = 32; i >= 0; i--) - if (find_leaf_info(&l->list, i)) { - struct list_head *fa_head = get_fa_head(l, i); - - if (!fa_head) - continue; - - if (list_empty(fa_head)) - continue; - - putspace_seq(seq, indent+2); - seq_printf(seq, "{/%d...dumping}\n", i); - - list_for_each_entry_rcu(fa, fa_head, fa_list) { - putspace_seq(seq, indent+2); - if (fa->fa_info == NULL) { - seq_printf(seq, "Error fa_info=NULL\n"); - continue; - } - if (fa->fa_info->fib_nh == NULL) { - seq_printf(seq, "Error _fib_nh=NULL\n"); - continue; - } - - seq_printf(seq, "{type=%d scope=%d TOS=%d}\n", - fa->fa_type, - fa->fa_scope, - fa->fa_tos); - } - } - } else { - struct tnode *tn = (struct tnode *)n; - int plen = ((struct tnode *)n)->pos; - t_key prf = MASK_PFX(n->key, plen); - - seq_printf(seq, "key=%d.%d.%d.%d/%d\n", - prf >> 24, (prf >> 16) % 256, (prf >> 8) % 256, prf % 256, plen); - - putspace_seq(seq, indent); seq_printf(seq, "| "); - seq_printf(seq, "{key prefix=%08x/", tn->key & TKEY_GET_MASK(0, tn->pos)); - printbin_seq(seq, tkey_extract_bits(tn->key, 0, tn->pos), tn->pos); - seq_printf(seq, "}\n"); - putspace_seq(seq, indent); seq_printf(seq, "| "); - seq_printf(seq, "{pos=%d", tn->pos); - seq_printf(seq, " (skip=%d bits)", tn->pos - pend); - seq_printf(seq, " bits=%d (%u children)}\n", tn->bits, (1 << tn->bits)); - putspace_seq(seq, indent); seq_printf(seq, "| "); - seq_printf(seq, "{empty=%d full=%d}\n", tn->empty_children, tn->full_children); + /* Current node exhausted, pop back up */ + p = NODE_PARENT(tn); + if (p) { + cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1; + tn = p; + --iter->depth; + goto rescan; } + + /* got root? */ + return NULL; } -static void trie_dump_seq(struct seq_file *seq, struct trie *t) +static struct node *fib_trie_get_first(struct fib_trie_iter *iter, + struct trie *t) { - struct node *n; - int cindex = 0; - int indent = 1; - int pend = 0; - int depth = 0; - struct tnode *tn; - - rcu_read_lock(); - n = rcu_dereference(t->trie); - seq_printf(seq, "------ trie_dump of t=%p ------\n", t); + struct node *n = rcu_dereference(t->trie); - if (!n) { - seq_printf(seq, "------ trie is empty\n"); - - rcu_read_unlock(); - return; + if (n && IS_TNODE(n)) { + iter->tnode = (struct tnode *) n; + iter->trie = t; + iter->index = 0; + iter->depth = 0; + return n; } + return NULL; +} - printnode_seq(seq, indent, n, pend, cindex, 0); - - if (!IS_TNODE(n)) { - rcu_read_unlock(); - return; - } - - tn = (struct tnode *)n; - pend = tn->pos+tn->bits; - putspace_seq(seq, indent); seq_printf(seq, "\\--\n"); - indent += 3; - depth++; - - while (tn && cindex < (1 << tn->bits)) { - struct node *child = rcu_dereference(tn->child[cindex]); - if (!child) - cindex++; - else { - /* Got a child */ - printnode_seq(seq, indent, child, pend, - cindex, tn->bits); - - if (IS_LEAF(child)) - cindex++; - - else { - /* - * New tnode. Decend one level - */ - - depth++; - n = child; - tn = (struct tnode *)n; - pend = tn->pos+tn->bits; - putspace_seq(seq, indent); - seq_printf(seq, "\\--\n"); - indent += 3; - cindex = 0; - } - } - - /* - * Test if we are done - */ - - while (cindex >= (1 << tn->bits)) { - /* - * Move upwards and test for root - * pop off all traversed nodes - */ +static void trie_collect_stats(struct trie *t, struct trie_stat *s) +{ + struct node *n; + struct fib_trie_iter iter; - if (NODE_PARENT(tn) == NULL) { - tn = NULL; - break; - } + memset(s, 0, sizeof(*s)); - cindex = tkey_extract_bits(tn->key, NODE_PARENT(tn)->pos, NODE_PARENT(tn)->bits); - cindex++; - tn = NODE_PARENT(tn); - pend = tn->pos + tn->bits; - indent -= 3; - depth--; + rcu_read_lock(); + for (n = fib_trie_get_first(&iter, t); n; + n = fib_trie_get_next(&iter)) { + if (IS_LEAF(n)) { + s->leaves++; + s->totdepth += iter.depth; + if (iter.depth > s->maxdepth) + s->maxdepth = iter.depth; + } else { + const struct tnode *tn = (const struct tnode *) n; + int i; + + s->tnodes++; + s->nodesizes[tn->bits]++; + for (i = 0; i < (1<bits); i++) + if (!tn->child[i]) + s->nullpointers++; } } rcu_read_unlock(); } -static struct trie_stat *trie_stat_new(void) +/* + * This outputs /proc/net/fib_triestats + */ +static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat) { - struct trie_stat *s; - int i; + unsigned i, max, pointers, bytes, avdepth; - s = kmalloc(sizeof(struct trie_stat), GFP_KERNEL); - if (!s) - return NULL; + if (stat->leaves) + avdepth = stat->totdepth*100 / stat->leaves; + else + avdepth = 0; - s->totdepth = 0; - s->maxdepth = 0; - s->tnodes = 0; - s->leaves = 0; - s->nullpointers = 0; + seq_printf(seq, "\tAver depth: %d.%02d\n", avdepth / 100, avdepth % 100 ); + seq_printf(seq, "\tMax depth: %u\n", stat->maxdepth); - for (i = 0; i < MAX_CHILDS; i++) - s->nodesizes[i] = 0; + seq_printf(seq, "\tLeaves: %u\n", stat->leaves); - return s; -} + bytes = sizeof(struct leaf) * stat->leaves; + seq_printf(seq, "\tInternal nodes: %d\n\t", stat->tnodes); + bytes += sizeof(struct tnode) * stat->tnodes; -static struct trie_stat *trie_collect_stats(struct trie *t) -{ - struct node *n; - struct trie_stat *s = trie_stat_new(); - int cindex = 0; - int pend = 0; - int depth = 0; + max = MAX_CHILDS-1; + while (max >= 0 && stat->nodesizes[max] == 0) + max--; - if (!s) - return NULL; + pointers = 0; + for (i = 1; i <= max; i++) + if (stat->nodesizes[i] != 0) { + seq_printf(seq, " %d: %d", i, stat->nodesizes[i]); + pointers += (1<nodesizes[i]; + } + seq_putc(seq, '\n'); + seq_printf(seq, "\tPointers: %d\n", pointers); - rcu_read_lock(); - n = rcu_dereference(t->trie); + bytes += sizeof(struct node *) * pointers; + seq_printf(seq, "Null ptrs: %d\n", stat->nullpointers); + seq_printf(seq, "Total size: %d kB\n", (bytes + 1023) / 1024); - if (!n) - return s; +#ifdef CONFIG_IP_FIB_TRIE_STATS + seq_printf(seq, "Counters:\n---------\n"); + seq_printf(seq,"gets = %d\n", t->stats.gets); + seq_printf(seq,"backtracks = %d\n", t->stats.backtrack); + seq_printf(seq,"semantic match passed = %d\n", t->stats.semantic_match_passed); + seq_printf(seq,"semantic match miss = %d\n", t->stats.semantic_match_miss); + seq_printf(seq,"null node hit= %d\n", t->stats.null_node_hit); + seq_printf(seq,"skipped node resize = %d\n", t->stats.resize_node_skipped); +#ifdef CLEAR_STATS + memset(&(t->stats), 0, sizeof(t->stats)); +#endif +#endif /* CONFIG_IP_FIB_TRIE_STATS */ +} - if (IS_TNODE(n)) { - struct tnode *tn = (struct tnode *)n; - pend = tn->pos+tn->bits; - s->nodesizes[tn->bits]++; - depth++; - - while (tn && cindex < (1 << tn->bits)) { - struct node *ch = rcu_dereference(tn->child[cindex]); - if (ch) { - - /* Got a child */ - - if (IS_LEAF(tn->child[cindex])) { - cindex++; - - /* stats */ - if (depth > s->maxdepth) - s->maxdepth = depth; - s->totdepth += depth; - s->leaves++; - } else { - /* - * New tnode. Decend one level - */ - - s->tnodes++; - s->nodesizes[tn->bits]++; - depth++; - - n = ch; - tn = (struct tnode *)n; - pend = tn->pos+tn->bits; - - cindex = 0; - } - } else { - cindex++; - s->nullpointers++; - } +static int fib_triestat_seq_show(struct seq_file *seq, void *v) +{ + struct trie_stat *stat; - /* - * Test if we are done - */ + stat = kmalloc(sizeof(*stat), GFP_KERNEL); + if (!stat) + return -ENOMEM; - while (cindex >= (1 << tn->bits)) { - /* - * Move upwards and test for root - * pop off all traversed nodes - */ + seq_printf(seq, "Basic info: size of leaf: %Zd bytes, size of tnode: %Zd bytes.\n", + sizeof(struct leaf), sizeof(struct tnode)); - if (NODE_PARENT(tn) == NULL) { - tn = NULL; - n = NULL; - break; - } + if (trie_local) { + seq_printf(seq, "Local:\n"); + trie_collect_stats(trie_local, stat); + trie_show_stats(seq, stat); + } - cindex = tkey_extract_bits(tn->key, NODE_PARENT(tn)->pos, NODE_PARENT(tn)->bits); - tn = NODE_PARENT(tn); - cindex++; - n = (struct node *)tn; - pend = tn->pos+tn->bits; - depth--; - } - } + if (trie_main) { + seq_printf(seq, "Main:\n"); + trie_collect_stats(trie_main, stat); + trie_show_stats(seq, stat); } + kfree(stat); - rcu_read_unlock(); - return s; + return 0; } -#ifdef CONFIG_PROC_FS - -static struct fib_alias *fib_triestat_get_first(struct seq_file *seq) +static int fib_triestat_seq_open(struct inode *inode, struct file *file) { - return NULL; + return single_open(file, fib_triestat_seq_show, NULL); } -static struct fib_alias *fib_triestat_get_next(struct seq_file *seq) +static struct file_operations fib_triestat_fops = { + .owner = THIS_MODULE, + .open = fib_triestat_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static struct node *fib_trie_get_idx(struct fib_trie_iter *iter, + loff_t pos) { + loff_t idx = 0; + struct node *n; + + for (n = fib_trie_get_first(iter, trie_local); + n; ++idx, n = fib_trie_get_next(iter)) { + if (pos == idx) + return n; + } + + for (n = fib_trie_get_first(iter, trie_main); + n; ++idx, n = fib_trie_get_next(iter)) { + if (pos == idx) + return n; + } return NULL; } -static void *fib_triestat_seq_start(struct seq_file *seq, loff_t *pos) +static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos) { - if (!ip_fib_main_table) - return NULL; - - if (*pos) - return fib_triestat_get_next(seq); - else + rcu_read_lock(); + if (*pos == 0) return SEQ_START_TOKEN; + return fib_trie_get_idx(seq->private, *pos - 1); } -static void *fib_triestat_seq_next(struct seq_file *seq, void *v, loff_t *pos) +static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos) { + struct fib_trie_iter *iter = seq->private; + void *l = v; + ++*pos; if (v == SEQ_START_TOKEN) - return fib_triestat_get_first(seq); - else - return fib_triestat_get_next(seq); -} + return fib_trie_get_idx(iter, 0); -static void fib_triestat_seq_stop(struct seq_file *seq, void *v) -{ + v = fib_trie_get_next(iter); + BUG_ON(v == l); + if (v) + return v; -} + /* continue scan in next trie */ + if (iter->trie == trie_local) + return fib_trie_get_first(iter, trie_main); -/* - * This outputs /proc/net/fib_triestats - * - * It always works in backward compatibility mode. - * The format of the file is not supposed to be changed. - */ + return NULL; +} -static void collect_and_show(struct trie *t, struct seq_file *seq) +static void fib_trie_seq_stop(struct seq_file *seq, void *v) { - int bytes = 0; /* How many bytes are used, a ref is 4 bytes */ - int i, max, pointers; - struct trie_stat *stat; - int avdepth; - - stat = trie_collect_stats(t); - - bytes = 0; - seq_printf(seq, "trie=%p\n", t); - - if (stat) { - if (stat->leaves) - avdepth = stat->totdepth*100 / stat->leaves; - else - avdepth = 0; - seq_printf(seq, "Aver depth: %d.%02d\n", avdepth / 100, avdepth % 100); - seq_printf(seq, "Max depth: %4d\n", stat->maxdepth); + rcu_read_unlock(); +} - seq_printf(seq, "Leaves: %d\n", stat->leaves); - bytes += sizeof(struct leaf) * stat->leaves; - seq_printf(seq, "Internal nodes: %d\n", stat->tnodes); - bytes += sizeof(struct tnode) * stat->tnodes; +static void seq_indent(struct seq_file *seq, int n) +{ + while (n-- > 0) seq_puts(seq, " "); +} - max = MAX_CHILDS-1; +static inline const char *rtn_scope(enum rt_scope_t s) +{ + static char buf[32]; - while (max >= 0 && stat->nodesizes[max] == 0) - max--; - pointers = 0; + switch(s) { + case RT_SCOPE_UNIVERSE: return "universe"; + case RT_SCOPE_SITE: return "site"; + case RT_SCOPE_LINK: return "link"; + case RT_SCOPE_HOST: return "host"; + case RT_SCOPE_NOWHERE: return "nowhere"; + default: + snprintf(buf, sizeof(buf), "scope=%d", s); + return buf; + } +} - for (i = 1; i <= max; i++) - if (stat->nodesizes[i] != 0) { - seq_printf(seq, " %d: %d", i, stat->nodesizes[i]); - pointers += (1<nodesizes[i]; - } - seq_printf(seq, "\n"); - seq_printf(seq, "Pointers: %d\n", pointers); - bytes += sizeof(struct node *) * pointers; - seq_printf(seq, "Null ptrs: %d\n", stat->nullpointers); - seq_printf(seq, "Total size: %d kB\n", bytes / 1024); +static const char *rtn_type_names[__RTN_MAX] = { + [RTN_UNSPEC] = "UNSPEC", + [RTN_UNICAST] = "UNICAST", + [RTN_LOCAL] = "LOCAL", + [RTN_BROADCAST] = "BROADCAST", + [RTN_ANYCAST] = "ANYCAST", + [RTN_MULTICAST] = "MULTICAST", + [RTN_BLACKHOLE] = "BLACKHOLE", + [RTN_UNREACHABLE] = "UNREACHABLE", + [RTN_PROHIBIT] = "PROHIBIT", + [RTN_THROW] = "THROW", + [RTN_NAT] = "NAT", + [RTN_XRESOLVE] = "XRESOLVE", +}; - kfree(stat); - } +static inline const char *rtn_type(unsigned t) +{ + static char buf[32]; -#ifdef CONFIG_IP_FIB_TRIE_STATS - seq_printf(seq, "Counters:\n---------\n"); - seq_printf(seq,"gets = %d\n", t->stats.gets); - seq_printf(seq,"backtracks = %d\n", t->stats.backtrack); - seq_printf(seq,"semantic match passed = %d\n", t->stats.semantic_match_passed); - seq_printf(seq,"semantic match miss = %d\n", t->stats.semantic_match_miss); - seq_printf(seq,"null node hit= %d\n", t->stats.null_node_hit); - seq_printf(seq,"skipped node resize = %d\n", t->stats.resize_node_skipped); -#ifdef CLEAR_STATS - memset(&(t->stats), 0, sizeof(t->stats)); -#endif -#endif /* CONFIG_IP_FIB_TRIE_STATS */ + if (t < __RTN_MAX && rtn_type_names[t]) + return rtn_type_names[t]; + snprintf(buf, sizeof(buf), "type %d", t); + return buf; } -static int fib_triestat_seq_show(struct seq_file *seq, void *v) +/* Pretty print the trie */ +static int fib_trie_seq_show(struct seq_file *seq, void *v) { - char bf[128]; + const struct fib_trie_iter *iter = seq->private; + struct node *n = v; - if (v == SEQ_START_TOKEN) { - seq_printf(seq, "Basic info: size of leaf: %Zd bytes, size of tnode: %Zd bytes.\n", - sizeof(struct leaf), sizeof(struct tnode)); - if (trie_local) - collect_and_show(trie_local, seq); + if (v == SEQ_START_TOKEN) + return 0; - if (trie_main) - collect_and_show(trie_main, seq); - } else { - snprintf(bf, sizeof(bf), "*\t%08X\t%08X", 200, 400); + if (IS_TNODE(n)) { + struct tnode *tn = (struct tnode *) n; + t_key prf = ntohl(MASK_PFX(tn->key, tn->pos)); - seq_printf(seq, "%-127s\n", bf); + if (!NODE_PARENT(n)) { + if (iter->trie == trie_local) + seq_puts(seq, ":\n"); + else + seq_puts(seq, "
:\n"); + } else { + seq_indent(seq, iter->depth-1); + seq_printf(seq, " +-- %d.%d.%d.%d/%d\n", + NIPQUAD(prf), tn->pos); + } + } else { + struct leaf *l = (struct leaf *) n; + int i; + u32 val = ntohl(l->key); + + seq_indent(seq, iter->depth); + seq_printf(seq, " |-- %d.%d.%d.%d\n", NIPQUAD(val)); + for (i = 32; i >= 0; i--) { + struct leaf_info *li = find_leaf_info(&l->list, i); + if (li) { + struct fib_alias *fa; + list_for_each_entry_rcu(fa, &li->falh, fa_list) { + seq_indent(seq, iter->depth+1); + seq_printf(seq, " /%d %s %s", i, + rtn_scope(fa->fa_scope), + rtn_type(fa->fa_type)); + if (fa->fa_tos) + seq_printf(seq, "tos =%d\n", + fa->fa_tos); + seq_putc(seq, '\n'); + } + } + } } + return 0; } -static struct seq_operations fib_triestat_seq_ops = { - .start = fib_triestat_seq_start, - .next = fib_triestat_seq_next, - .stop = fib_triestat_seq_stop, - .show = fib_triestat_seq_show, +static struct seq_operations fib_trie_seq_ops = { + .start = fib_trie_seq_start, + .next = fib_trie_seq_next, + .stop = fib_trie_seq_stop, + .show = fib_trie_seq_show, }; -static int fib_triestat_seq_open(struct inode *inode, struct file *file) +static int fib_trie_seq_open(struct inode *inode, struct file *file) { struct seq_file *seq; int rc = -ENOMEM; + struct fib_trie_iter *s = kmalloc(sizeof(*s), GFP_KERNEL); - rc = seq_open(file, &fib_triestat_seq_ops); + if (!s) + goto out; + + rc = seq_open(file, &fib_trie_seq_ops); if (rc) goto out_kfree; - seq = file->private_data; + seq = file->private_data; + seq->private = s; + memset(s, 0, sizeof(*s)); out: return rc; out_kfree: + kfree(s); goto out; } -static struct file_operations fib_triestat_seq_fops = { - .owner = THIS_MODULE, - .open = fib_triestat_seq_open, - .read = seq_read, - .llseek = seq_lseek, +static struct file_operations fib_trie_fops = { + .owner = THIS_MODULE, + .open = fib_trie_seq_open, + .read = seq_read, + .llseek = seq_lseek, .release = seq_release_private, }; -int __init fib_stat_proc_init(void) -{ - if (!proc_net_fops_create("fib_triestat", S_IRUGO, &fib_triestat_seq_fops)) - return -ENOMEM; - return 0; -} - -void __init fib_stat_proc_exit(void) +static unsigned fib_flag_trans(int type, u32 mask, const struct fib_info *fi) { - proc_net_remove("fib_triestat"); -} + static unsigned type2flags[RTN_MAX + 1] = { + [7] = RTF_REJECT, [8] = RTF_REJECT, + }; + unsigned flags = type2flags[type]; -static struct fib_alias *fib_trie_get_first(struct seq_file *seq) -{ - return NULL; + if (fi && fi->fib_nh->nh_gw) + flags |= RTF_GATEWAY; + if (mask == 0xFFFFFFFF) + flags |= RTF_HOST; + flags |= RTF_UP; + return flags; } -static struct fib_alias *fib_trie_get_next(struct seq_file *seq) +/* + * This outputs /proc/net/route. + * The format of the file is not supposed to be changed + * and needs to be same as fib_hash output to avoid breaking + * legacy utilities + */ +static int fib_route_seq_show(struct seq_file *seq, void *v) { - return NULL; -} + struct leaf *l = v; + int i; + char bf[128]; -static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos) -{ - if (!ip_fib_main_table) - return NULL; + if (v == SEQ_START_TOKEN) { + seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway " + "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU" + "\tWindow\tIRTT"); + return 0; + } - if (*pos) - return fib_trie_get_next(seq); - else - return SEQ_START_TOKEN; -} + if (IS_TNODE(l)) + return 0; -static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - ++*pos; - if (v == SEQ_START_TOKEN) - return fib_trie_get_first(seq); - else - return fib_trie_get_next(seq); + for (i=32; i>=0; i--) { + struct leaf_info *li = find_leaf_info(&l->list, i); + struct fib_alias *fa; + u32 mask, prefix; -} + if (!li) + continue; -static void fib_trie_seq_stop(struct seq_file *seq, void *v) -{ -} + mask = inet_make_mask(li->plen); + prefix = htonl(l->key); -/* - * This outputs /proc/net/fib_trie. - * - * It always works in backward compatibility mode. - * The format of the file is not supposed to be changed. - */ + list_for_each_entry_rcu(fa, &li->falh, fa_list) { + const struct fib_info *fi = rcu_dereference(fa->fa_info); + unsigned flags = fib_flag_trans(fa->fa_type, mask, fi); -static int fib_trie_seq_show(struct seq_file *seq, void *v) -{ - char bf[128]; + if (fa->fa_type == RTN_BROADCAST + || fa->fa_type == RTN_MULTICAST) + continue; - if (v == SEQ_START_TOKEN) { - if (trie_local) - trie_dump_seq(seq, trie_local); + if (fi) + snprintf(bf, sizeof(bf), + "%s\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u", + fi->fib_dev ? fi->fib_dev->name : "*", + prefix, + fi->fib_nh->nh_gw, flags, 0, 0, + fi->fib_priority, + mask, + (fi->fib_advmss ? fi->fib_advmss + 40 : 0), + fi->fib_window, + fi->fib_rtt >> 3); + else + snprintf(bf, sizeof(bf), + "*\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u", + prefix, 0, flags, 0, 0, 0, + mask, 0, 0, 0); - if (trie_main) - trie_dump_seq(seq, trie_main); - } else { - snprintf(bf, sizeof(bf), - "*\t%08X\t%08X", 200, 400); - seq_printf(seq, "%-127s\n", bf); + seq_printf(seq, "%-127s\n", bf); + } } return 0; } -static struct seq_operations fib_trie_seq_ops = { - .start = fib_trie_seq_start, - .next = fib_trie_seq_next, - .stop = fib_trie_seq_stop, - .show = fib_trie_seq_show, +static struct seq_operations fib_route_seq_ops = { + .start = fib_trie_seq_start, + .next = fib_trie_seq_next, + .stop = fib_trie_seq_stop, + .show = fib_route_seq_show, }; -static int fib_trie_seq_open(struct inode *inode, struct file *file) +static int fib_route_seq_open(struct inode *inode, struct file *file) { struct seq_file *seq; int rc = -ENOMEM; + struct fib_trie_iter *s = kmalloc(sizeof(*s), GFP_KERNEL); - rc = seq_open(file, &fib_trie_seq_ops); + if (!s) + goto out; + + rc = seq_open(file, &fib_route_seq_ops); if (rc) goto out_kfree; - seq = file->private_data; + seq = file->private_data; + seq->private = s; + memset(s, 0, sizeof(*s)); out: return rc; out_kfree: + kfree(s); goto out; } -static struct file_operations fib_trie_seq_fops = { - .owner = THIS_MODULE, - .open = fib_trie_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release= seq_release_private, +static struct file_operations fib_route_fops = { + .owner = THIS_MODULE, + .open = fib_route_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, }; int __init fib_proc_init(void) { - if (!proc_net_fops_create("fib_trie", S_IRUGO, &fib_trie_seq_fops)) - return -ENOMEM; + if (!proc_net_fops_create("fib_trie", S_IRUGO, &fib_trie_fops)) + goto out1; + + if (!proc_net_fops_create("fib_triestat", S_IRUGO, &fib_triestat_fops)) + goto out2; + + if (!proc_net_fops_create("route", S_IRUGO, &fib_route_fops)) + goto out3; + return 0; + +out3: + proc_net_remove("fib_triestat"); +out2: + proc_net_remove("fib_trie"); +out1: + return -ENOMEM; } void __init fib_proc_exit(void) { proc_net_remove("fib_trie"); + proc_net_remove("fib_triestat"); + proc_net_remove("route"); } #endif /* CONFIG_PROC_FS */