ll_poll.h 4.7 KB
Newer Older
E
Eliezer Tamir 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
/*
 * Low Latency Sockets
 * Copyright(c) 2013 Intel Corporation.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Author: Eliezer Tamir
 *
 * Contact Information:
 * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
 */

#ifndef _LINUX_NET_LL_POLL_H
#define _LINUX_NET_LL_POLL_H

#include <linux/netdevice.h>
#include <net/ip.h>

#ifdef CONFIG_NET_LL_RX_POLL

struct napi_struct;
33
extern unsigned int sysctl_net_ll_read __read_mostly;
34
extern unsigned int sysctl_net_ll_poll __read_mostly;
E
Eliezer Tamir 已提交
35 36 37 38 39

/* return values from ndo_ll_poll */
#define LL_FLUSH_FAILED		-1
#define LL_FLUSH_BUSY		-2

40 41 42 43 44
static inline unsigned int ll_get_flag(void)
{
	return sysctl_net_ll_poll ? POLL_LL : 0;
}

45 46
/* a wrapper to make debug_smp_processor_id() happy
 * we can use sched_clock() because we don't care much about precision
47
 * we only care that the average is bounded
48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67
 */
#ifdef CONFIG_DEBUG_PREEMPT
static inline u64 ll_sched_clock(void)
{
	u64 rc;

	preempt_disable_notrace();
	rc = sched_clock();
	preempt_enable_no_resched_notrace();

	return rc;
}
#else /* CONFIG_DEBUG_PREEMPT */
static inline u64 ll_sched_clock(void)
{
	return sched_clock();
}
#endif /* CONFIG_DEBUG_PREEMPT */

/* we don't mind a ~2.5% imprecision so <<10 instead of *1000
68
 * sk->sk_ll_usec is a u_int so this can't overflow
69
 */
70
static inline u64 ll_sk_run_time(struct sock *sk)
E
Eliezer Tamir 已提交
71
{
72
	return (u64)ACCESS_ONCE(sk->sk_ll_usec) << 10;
73
}
74

75 76 77
/* in poll/select we use the global sysctl_net_ll_poll value
 * only call sched_clock() if enabled
 */
78
static inline u64 ll_run_time(void)
79
{
80 81
	return (u64)ACCESS_ONCE(sysctl_net_ll_poll) << 10;
}
82

83 84 85 86
/* if flag is not set we don't need to know the time */
static inline u64 ll_start_time(unsigned int flag)
{
	return flag ? ll_sched_clock() : 0;
E
Eliezer Tamir 已提交
87 88 89 90
}

static inline bool sk_valid_ll(struct sock *sk)
{
91
	return sk->sk_ll_usec && sk->sk_napi_id &&
E
Eliezer Tamir 已提交
92 93 94
	       !need_resched() && !signal_pending(current);
}

95 96
/* careful! time_in_range64 will evaluate now twice */
static inline bool can_poll_ll(u64 start_time, u64 run_time)
E
Eliezer Tamir 已提交
97
{
98 99 100
	u64 now = ll_sched_clock();

	return time_in_range64(now, start_time, start_time + run_time);
E
Eliezer Tamir 已提交
101 102
}

103 104 105
/* when used in sock_poll() nonblock is known at compile time to be true
 * so the loop and end_time will be optimized out
 */
E
Eliezer Tamir 已提交
106 107
static inline bool sk_poll_ll(struct sock *sk, int nonblock)
{
108 109
	u64 start_time = ll_start_time(!nonblock);
	u64 run_time = ll_sk_run_time(sk);
E
Eliezer Tamir 已提交
110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138
	const struct net_device_ops *ops;
	struct napi_struct *napi;
	int rc = false;

	/*
	 * rcu read lock for napi hash
	 * bh so we don't race with net_rx_action
	 */
	rcu_read_lock_bh();

	napi = napi_by_id(sk->sk_napi_id);
	if (!napi)
		goto out;

	ops = napi->dev->netdev_ops;
	if (!ops->ndo_ll_poll)
		goto out;

	do {
		rc = ops->ndo_ll_poll(napi);

		if (rc == LL_FLUSH_FAILED)
			break; /* permanent failure */

		if (rc > 0)
			/* local bh are disabled so it is ok to use _BH */
			NET_ADD_STATS_BH(sock_net(sk),
					 LINUX_MIB_LOWLATENCYRXPACKETS, rc);

139
	} while (!nonblock && skb_queue_empty(&sk->sk_receive_queue) &&
140
		 can_poll_ll(start_time, run_time));
E
Eliezer Tamir 已提交
141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160

	rc = !skb_queue_empty(&sk->sk_receive_queue);
out:
	rcu_read_unlock_bh();
	return rc;
}

/* used in the NIC receive handler to mark the skb */
static inline void skb_mark_ll(struct sk_buff *skb, struct napi_struct *napi)
{
	skb->napi_id = napi->napi_id;
}

/* used in the protocol hanlder to propagate the napi_id to the socket */
static inline void sk_mark_ll(struct sock *sk, struct sk_buff *skb)
{
	sk->sk_napi_id = skb->napi_id;
}

#else /* CONFIG_NET_LL_RX_POLL */
161 162 163 164
static inline unsigned long ll_get_flag(void)
{
	return 0;
}
E
Eliezer Tamir 已提交
165

166
static inline u64 ll_start_time(unsigned int flag)
167 168 169 170
{
	return 0;
}

171
static inline u64 ll_run_time(void)
E
Eliezer Tamir 已提交
172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193
{
	return 0;
}

static inline bool sk_valid_ll(struct sock *sk)
{
	return false;
}

static inline bool sk_poll_ll(struct sock *sk, int nonblock)
{
	return false;
}

static inline void skb_mark_ll(struct sk_buff *skb, struct napi_struct *napi)
{
}

static inline void sk_mark_ll(struct sock *sk, struct sk_buff *skb)
{
}

194
static inline bool can_poll_ll(u64 start_time, u64 run_time)
E
Eliezer Tamir 已提交
195 196 197 198 199 200
{
	return false;
}

#endif /* CONFIG_NET_LL_RX_POLL */
#endif /* _LINUX_NET_LL_POLL_H */