提交 90017acc 编写于 作者: M Marcelo Ricardo Leitner 提交者: David S. Miller

sctp: Add GSO support

SCTP has this pecualiarity that its packets cannot be just segmented to
(P)MTU. Its chunks must be contained in IP segments, padding respected.
So we can't just generate a big skb, set gso_size to the fragmentation
point and deliver it to IP layer.

This patch takes a different approach. SCTP will now build a skb as it
would be if it was received using GRO. That is, there will be a cover
skb with protocol headers and children ones containing the actual
segments, already segmented to a way that respects SCTP RFCs.

With that, we can tell skb_segment() to just split based on frag_list,
trusting its sizes are already in accordance.

This way SCTP can benefit from GSO and instead of passing several
packets through the stack, it can pass a single large packet.

v2:
- Added support for receiving GSO frames, as requested by Dave Miller.
- Clear skb->cb if packet is GSO (otherwise it's not used by SCTP)
- Added heuristics similar to what we have in TCP for not generating
  single GSO packets that fills cwnd.
v3:
- consider sctphdr size in skb_gso_transport_seglen()
- rebased due to 5c7cdf33 ("gso: Remove arbitrary checks for
  unsupported GSO")
Signed-off-by: NMarcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Tested-by: NXin Long <lucien.xin@gmail.com>
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
上级 3acb50c1
...@@ -53,8 +53,9 @@ enum { ...@@ -53,8 +53,9 @@ enum {
* headers in software. * headers in software.
*/ */
NETIF_F_GSO_TUNNEL_REMCSUM_BIT, /* ... TUNNEL with TSO & REMCSUM */ NETIF_F_GSO_TUNNEL_REMCSUM_BIT, /* ... TUNNEL with TSO & REMCSUM */
NETIF_F_GSO_SCTP_BIT, /* ... SCTP fragmentation */
/**/NETIF_F_GSO_LAST = /* last bit, see GSO_MASK */ /**/NETIF_F_GSO_LAST = /* last bit, see GSO_MASK */
NETIF_F_GSO_TUNNEL_REMCSUM_BIT, NETIF_F_GSO_SCTP_BIT,
NETIF_F_FCOE_CRC_BIT, /* FCoE CRC32 */ NETIF_F_FCOE_CRC_BIT, /* FCoE CRC32 */
NETIF_F_SCTP_CRC_BIT, /* SCTP checksum offload */ NETIF_F_SCTP_CRC_BIT, /* SCTP checksum offload */
...@@ -128,6 +129,7 @@ enum { ...@@ -128,6 +129,7 @@ enum {
#define NETIF_F_TSO_MANGLEID __NETIF_F(TSO_MANGLEID) #define NETIF_F_TSO_MANGLEID __NETIF_F(TSO_MANGLEID)
#define NETIF_F_GSO_PARTIAL __NETIF_F(GSO_PARTIAL) #define NETIF_F_GSO_PARTIAL __NETIF_F(GSO_PARTIAL)
#define NETIF_F_GSO_TUNNEL_REMCSUM __NETIF_F(GSO_TUNNEL_REMCSUM) #define NETIF_F_GSO_TUNNEL_REMCSUM __NETIF_F(GSO_TUNNEL_REMCSUM)
#define NETIF_F_GSO_SCTP __NETIF_F(GSO_SCTP)
#define NETIF_F_HW_VLAN_STAG_FILTER __NETIF_F(HW_VLAN_STAG_FILTER) #define NETIF_F_HW_VLAN_STAG_FILTER __NETIF_F(HW_VLAN_STAG_FILTER)
#define NETIF_F_HW_VLAN_STAG_RX __NETIF_F(HW_VLAN_STAG_RX) #define NETIF_F_HW_VLAN_STAG_RX __NETIF_F(HW_VLAN_STAG_RX)
#define NETIF_F_HW_VLAN_STAG_TX __NETIF_F(HW_VLAN_STAG_TX) #define NETIF_F_HW_VLAN_STAG_TX __NETIF_F(HW_VLAN_STAG_TX)
...@@ -166,7 +168,8 @@ enum { ...@@ -166,7 +168,8 @@ enum {
NETIF_F_FSO) NETIF_F_FSO)
/* List of features with software fallbacks. */ /* List of features with software fallbacks. */
#define NETIF_F_GSO_SOFTWARE (NETIF_F_ALL_TSO | NETIF_F_UFO) #define NETIF_F_GSO_SOFTWARE (NETIF_F_ALL_TSO | NETIF_F_UFO | \
NETIF_F_GSO_SCTP)
/* /*
* If one device supports one of these features, then enable them * If one device supports one of these features, then enable them
......
...@@ -4012,6 +4012,7 @@ static inline bool net_gso_ok(netdev_features_t features, int gso_type) ...@@ -4012,6 +4012,7 @@ static inline bool net_gso_ok(netdev_features_t features, int gso_type)
BUILD_BUG_ON(SKB_GSO_UDP_TUNNEL_CSUM != (NETIF_F_GSO_UDP_TUNNEL_CSUM >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_UDP_TUNNEL_CSUM != (NETIF_F_GSO_UDP_TUNNEL_CSUM >> NETIF_F_GSO_SHIFT));
BUILD_BUG_ON(SKB_GSO_PARTIAL != (NETIF_F_GSO_PARTIAL >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_PARTIAL != (NETIF_F_GSO_PARTIAL >> NETIF_F_GSO_SHIFT));
BUILD_BUG_ON(SKB_GSO_TUNNEL_REMCSUM != (NETIF_F_GSO_TUNNEL_REMCSUM >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_TUNNEL_REMCSUM != (NETIF_F_GSO_TUNNEL_REMCSUM >> NETIF_F_GSO_SHIFT));
BUILD_BUG_ON(SKB_GSO_SCTP != (NETIF_F_GSO_SCTP >> NETIF_F_GSO_SHIFT));
return (features & feature) == feature; return (features & feature) == feature;
} }
......
...@@ -487,6 +487,8 @@ enum { ...@@ -487,6 +487,8 @@ enum {
SKB_GSO_PARTIAL = 1 << 13, SKB_GSO_PARTIAL = 1 << 13,
SKB_GSO_TUNNEL_REMCSUM = 1 << 14, SKB_GSO_TUNNEL_REMCSUM = 1 << 14,
SKB_GSO_SCTP = 1 << 15,
}; };
#if BITS_PER_LONG > 32 #if BITS_PER_LONG > 32
......
...@@ -186,6 +186,10 @@ void sctp_assocs_proc_exit(struct net *net); ...@@ -186,6 +186,10 @@ void sctp_assocs_proc_exit(struct net *net);
int sctp_remaddr_proc_init(struct net *net); int sctp_remaddr_proc_init(struct net *net);
void sctp_remaddr_proc_exit(struct net *net); void sctp_remaddr_proc_exit(struct net *net);
/*
* sctp/offload.c
*/
int sctp_offload_init(void);
/* /*
* Module global variables * Module global variables
......
...@@ -566,6 +566,9 @@ struct sctp_chunk { ...@@ -566,6 +566,9 @@ struct sctp_chunk {
/* This points to the sk_buff containing the actual data. */ /* This points to the sk_buff containing the actual data. */
struct sk_buff *skb; struct sk_buff *skb;
/* In case of GSO packets, this will store the head one */
struct sk_buff *head_skb;
/* These are the SCTP headers by reverse order in a packet. /* These are the SCTP headers by reverse order in a packet.
* Note that some of these may happen more than once. In that * Note that some of these may happen more than once. In that
* case, we point at the "current" one, whatever that means * case, we point at the "current" one, whatever that means
...@@ -696,6 +699,8 @@ struct sctp_packet { ...@@ -696,6 +699,8 @@ struct sctp_packet {
size_t overhead; size_t overhead;
/* This is the total size of all chunks INCLUDING padding. */ /* This is the total size of all chunks INCLUDING padding. */
size_t size; size_t size;
/* This is the maximum size this packet may have */
size_t max_size;
/* The packet is destined for this transport address. /* The packet is destined for this transport address.
* The function we finally use to pass down to the next lower * The function we finally use to pass down to the next lower
......
...@@ -89,6 +89,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] ...@@ -89,6 +89,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
[NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation", [NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation",
[NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT] = "tx-udp_tnl-csum-segmentation", [NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT] = "tx-udp_tnl-csum-segmentation",
[NETIF_F_GSO_PARTIAL_BIT] = "tx-gso-partial", [NETIF_F_GSO_PARTIAL_BIT] = "tx-gso-partial",
[NETIF_F_GSO_SCTP_BIT] = "tx-sctp-segmentation",
[NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc", [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc",
[NETIF_F_SCTP_CRC_BIT] = "tx-checksum-sctp", [NETIF_F_SCTP_CRC_BIT] = "tx-checksum-sctp",
......
...@@ -49,6 +49,7 @@ ...@@ -49,6 +49,7 @@
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/tcp.h> #include <linux/tcp.h>
#include <linux/udp.h> #include <linux/udp.h>
#include <linux/sctp.h>
#include <linux/netdevice.h> #include <linux/netdevice.h>
#ifdef CONFIG_NET_CLS_ACT #ifdef CONFIG_NET_CLS_ACT
#include <net/pkt_sched.h> #include <net/pkt_sched.h>
...@@ -4383,6 +4384,8 @@ unsigned int skb_gso_transport_seglen(const struct sk_buff *skb) ...@@ -4383,6 +4384,8 @@ unsigned int skb_gso_transport_seglen(const struct sk_buff *skb)
thlen += inner_tcp_hdrlen(skb); thlen += inner_tcp_hdrlen(skb);
} else if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) { } else if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) {
thlen = tcp_hdrlen(skb); thlen = tcp_hdrlen(skb);
} else if (unlikely(shinfo->gso_type & SKB_GSO_SCTP)) {
thlen = sizeof(struct sctphdr);
} }
/* UFO sets gso_size to the size of the fragmentation /* UFO sets gso_size to the size of the fragmentation
* payload, i.e. the size of the L4 (UDP) header is already * payload, i.e. the size of the L4 (UDP) header is already
......
...@@ -11,7 +11,8 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \ ...@@ -11,7 +11,8 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \
transport.o chunk.o sm_make_chunk.o ulpevent.o \ transport.o chunk.o sm_make_chunk.o ulpevent.o \
inqueue.o outqueue.o ulpqueue.o \ inqueue.o outqueue.o ulpqueue.o \
tsnmap.o bind_addr.o socket.o primitive.o \ tsnmap.o bind_addr.o socket.o primitive.o \
output.o input.o debug.o ssnmap.o auth.o output.o input.o debug.o ssnmap.o auth.o \
offload.o
sctp_probe-y := probe.o sctp_probe-y := probe.o
......
...@@ -139,7 +139,9 @@ int sctp_rcv(struct sk_buff *skb) ...@@ -139,7 +139,9 @@ int sctp_rcv(struct sk_buff *skb)
skb->csum_valid = 0; /* Previous value not applicable */ skb->csum_valid = 0; /* Previous value not applicable */
if (skb_csum_unnecessary(skb)) if (skb_csum_unnecessary(skb))
__skb_decr_checksum_unnecessary(skb); __skb_decr_checksum_unnecessary(skb);
else if (!sctp_checksum_disable && sctp_rcv_checksum(net, skb) < 0) else if (!sctp_checksum_disable &&
!(skb_shinfo(skb)->gso_type & SKB_GSO_SCTP) &&
sctp_rcv_checksum(net, skb) < 0)
goto discard_it; goto discard_it;
skb->csum_valid = 1; skb->csum_valid = 1;
...@@ -1175,6 +1177,14 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct net *net, ...@@ -1175,6 +1177,14 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct net *net,
{ {
sctp_chunkhdr_t *ch; sctp_chunkhdr_t *ch;
/* We do not allow GSO frames here as we need to linearize and
* then cannot guarantee frame boundaries. This shouldn't be an
* issue as packets hitting this are mostly INIT or INIT-ACK and
* those cannot be on GSO-style anyway.
*/
if ((skb_shinfo(skb)->gso_type & SKB_GSO_SCTP) == SKB_GSO_SCTP)
return NULL;
if (skb_linearize(skb)) if (skb_linearize(skb))
return NULL; return NULL;
......
...@@ -138,6 +138,17 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue) ...@@ -138,6 +138,17 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue)
if (chunk->singleton || if (chunk->singleton ||
chunk->end_of_packet || chunk->end_of_packet ||
chunk->pdiscard) { chunk->pdiscard) {
if (chunk->head_skb == chunk->skb) {
chunk->skb = skb_shinfo(chunk->skb)->frag_list;
goto new_skb;
}
if (chunk->skb->next) {
chunk->skb = chunk->skb->next;
goto new_skb;
}
if (chunk->head_skb)
chunk->skb = chunk->head_skb;
sctp_chunk_free(chunk); sctp_chunk_free(chunk);
chunk = queue->in_progress = NULL; chunk = queue->in_progress = NULL;
} else { } else {
...@@ -155,15 +166,15 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue) ...@@ -155,15 +166,15 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue)
next_chunk: next_chunk:
/* Is the queue empty? */ /* Is the queue empty? */
if (list_empty(&queue->in_chunk_list)) entry = sctp_list_dequeue(&queue->in_chunk_list);
if (!entry)
return NULL; return NULL;
entry = queue->in_chunk_list.next;
chunk = list_entry(entry, struct sctp_chunk, list); chunk = list_entry(entry, struct sctp_chunk, list);
list_del_init(entry);
/* Linearize if it's not GSO */ /* Linearize if it's not GSO */
if (skb_is_nonlinear(chunk->skb)) { if ((skb_shinfo(chunk->skb)->gso_type & SKB_GSO_SCTP) != SKB_GSO_SCTP &&
skb_is_nonlinear(chunk->skb)) {
if (skb_linearize(chunk->skb)) { if (skb_linearize(chunk->skb)) {
__SCTP_INC_STATS(dev_net(chunk->skb->dev), SCTP_MIB_IN_PKT_DISCARDS); __SCTP_INC_STATS(dev_net(chunk->skb->dev), SCTP_MIB_IN_PKT_DISCARDS);
sctp_chunk_free(chunk); sctp_chunk_free(chunk);
...@@ -174,15 +185,39 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue) ...@@ -174,15 +185,39 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue)
chunk->sctp_hdr = sctp_hdr(chunk->skb); chunk->sctp_hdr = sctp_hdr(chunk->skb);
} }
if ((skb_shinfo(chunk->skb)->gso_type & SKB_GSO_SCTP) == SKB_GSO_SCTP) {
/* GSO-marked skbs but without frags, handle
* them normally
*/
if (skb_shinfo(chunk->skb)->frag_list)
chunk->head_skb = chunk->skb;
/* skbs with "cover letter" */
if (chunk->head_skb && chunk->skb->data_len == chunk->skb->len)
chunk->skb = skb_shinfo(chunk->skb)->frag_list;
if (WARN_ON(!chunk->skb)) {
__SCTP_INC_STATS(dev_net(chunk->skb->dev), SCTP_MIB_IN_PKT_DISCARDS);
sctp_chunk_free(chunk);
goto next_chunk;
}
}
if (chunk->asoc)
sock_rps_save_rxhash(chunk->asoc->base.sk, chunk->skb);
queue->in_progress = chunk; queue->in_progress = chunk;
new_skb:
/* This is the first chunk in the packet. */ /* This is the first chunk in the packet. */
chunk->singleton = 1;
ch = (sctp_chunkhdr_t *) chunk->skb->data; ch = (sctp_chunkhdr_t *) chunk->skb->data;
chunk->singleton = 1;
chunk->data_accepted = 0; chunk->data_accepted = 0;
chunk->pdiscard = 0;
if (chunk->asoc) chunk->auth = 0;
sock_rps_save_rxhash(chunk->asoc->base.sk, chunk->skb); chunk->has_asconf = 0;
chunk->end_of_packet = 0;
chunk->ecn_ce_done = 0;
} }
chunk->chunk_hdr = ch; chunk->chunk_hdr = ch;
......
/*
* sctp_offload - GRO/GSO Offloading for SCTP
*
* Copyright (C) 2015, Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/kernel.h>
#include <linux/kprobes.h>
#include <linux/socket.h>
#include <linux/sctp.h>
#include <linux/proc_fs.h>
#include <linux/vmalloc.h>
#include <linux/module.h>
#include <linux/kfifo.h>
#include <linux/time.h>
#include <net/net_namespace.h>
#include <linux/skbuff.h>
#include <net/sctp/sctp.h>
#include <net/sctp/checksum.h>
#include <net/protocol.h>
static __le32 sctp_gso_make_checksum(struct sk_buff *skb)
{
skb->ip_summed = CHECKSUM_NONE;
return sctp_compute_cksum(skb, skb_transport_offset(skb));
}
static struct sk_buff *sctp_gso_segment(struct sk_buff *skb,
netdev_features_t features)
{
struct sk_buff *segs = ERR_PTR(-EINVAL);
struct sctphdr *sh;
sh = sctp_hdr(skb);
if (!pskb_may_pull(skb, sizeof(*sh)))
goto out;
__skb_pull(skb, sizeof(*sh));
if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
/* Packet is from an untrusted source, reset gso_segs. */
struct skb_shared_info *pinfo = skb_shinfo(skb);
struct sk_buff *frag_iter;
pinfo->gso_segs = 0;
if (skb->len != skb->data_len) {
/* Means we have chunks in here too */
pinfo->gso_segs++;
}
skb_walk_frags(skb, frag_iter)
pinfo->gso_segs++;
segs = NULL;
goto out;
}
segs = skb_segment(skb, features | NETIF_F_HW_CSUM);
if (IS_ERR(segs))
goto out;
/* All that is left is update SCTP CRC if necessary */
if (!(features & NETIF_F_SCTP_CRC)) {
for (skb = segs; skb; skb = skb->next) {
if (skb->ip_summed == CHECKSUM_PARTIAL) {
sh = sctp_hdr(skb);
sh->checksum = sctp_gso_make_checksum(skb);
}
}
}
out:
return segs;
}
static const struct net_offload sctp_offload = {
.callbacks = {
.gso_segment = sctp_gso_segment,
},
};
int __init sctp_offload_init(void)
{
return inet_add_offload(&sctp_offload, IPPROTO_SCTP);
}
...@@ -84,18 +84,42 @@ static void sctp_packet_reset(struct sctp_packet *packet) ...@@ -84,18 +84,42 @@ static void sctp_packet_reset(struct sctp_packet *packet)
struct sctp_packet *sctp_packet_config(struct sctp_packet *packet, struct sctp_packet *sctp_packet_config(struct sctp_packet *packet,
__u32 vtag, int ecn_capable) __u32 vtag, int ecn_capable)
{ {
struct sctp_chunk *chunk = NULL; struct sctp_transport *tp = packet->transport;
struct sctp_association *asoc = tp->asoc;
pr_debug("%s: packet:%p vtag:0x%x\n", __func__, packet, vtag); pr_debug("%s: packet:%p vtag:0x%x\n", __func__, packet, vtag);
packet->vtag = vtag; packet->vtag = vtag;
if (asoc && tp->dst) {
struct sock *sk = asoc->base.sk;
rcu_read_lock();
if (__sk_dst_get(sk) != tp->dst) {
dst_hold(tp->dst);
sk_setup_caps(sk, tp->dst);
}
if (sk_can_gso(sk)) {
struct net_device *dev = tp->dst->dev;
packet->max_size = dev->gso_max_size;
} else {
packet->max_size = asoc->pathmtu;
}
rcu_read_unlock();
} else {
packet->max_size = tp->pathmtu;
}
if (ecn_capable && sctp_packet_empty(packet)) { if (ecn_capable && sctp_packet_empty(packet)) {
chunk = sctp_get_ecne_prepend(packet->transport->asoc); struct sctp_chunk *chunk;
/* If there a is a prepend chunk stick it on the list before /* If there a is a prepend chunk stick it on the list before
* any other chunks get appended. * any other chunks get appended.
*/ */
chunk = sctp_get_ecne_prepend(asoc);
if (chunk) if (chunk)
sctp_packet_append_chunk(packet, chunk); sctp_packet_append_chunk(packet, chunk);
} }
...@@ -381,12 +405,15 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) ...@@ -381,12 +405,15 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
struct sctp_transport *tp = packet->transport; struct sctp_transport *tp = packet->transport;
struct sctp_association *asoc = tp->asoc; struct sctp_association *asoc = tp->asoc;
struct sctphdr *sh; struct sctphdr *sh;
struct sk_buff *nskb; struct sk_buff *nskb = NULL, *head = NULL;
struct sctp_chunk *chunk, *tmp; struct sctp_chunk *chunk, *tmp;
struct sock *sk; struct sock *sk;
int err = 0; int err = 0;
int padding; /* How much padding do we need? */ int padding; /* How much padding do we need? */
int pkt_size;
__u8 has_data = 0; __u8 has_data = 0;
int gso = 0;
int pktcount = 0;
struct dst_entry *dst; struct dst_entry *dst;
unsigned char *auth = NULL; /* pointer to auth in skb data */ unsigned char *auth = NULL; /* pointer to auth in skb data */
...@@ -400,18 +427,37 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) ...@@ -400,18 +427,37 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
chunk = list_entry(packet->chunk_list.next, struct sctp_chunk, list); chunk = list_entry(packet->chunk_list.next, struct sctp_chunk, list);
sk = chunk->skb->sk; sk = chunk->skb->sk;
/* Allocate the new skb. */ /* Allocate the head skb, or main one if not in GSO */
nskb = alloc_skb(packet->size + MAX_HEADER, gfp); if (packet->size > tp->pathmtu && !packet->ipfragok) {
if (!nskb) if (sk_can_gso(sk)) {
gso = 1;
pkt_size = packet->overhead;
} else {
/* If this happens, we trash this packet and try
* to build a new one, hopefully correct this
* time. Application may notice this error.
*/
pr_err_once("Trying to GSO but underlying device doesn't support it.");
goto nomem;
}
} else {
pkt_size = packet->size;
}
head = alloc_skb(pkt_size + MAX_HEADER, gfp);
if (!head)
goto nomem; goto nomem;
if (gso) {
NAPI_GRO_CB(head)->last = head;
skb_shinfo(head)->gso_type = sk->sk_gso_type;
}
/* Make sure the outbound skb has enough header room reserved. */ /* Make sure the outbound skb has enough header room reserved. */
skb_reserve(nskb, packet->overhead + MAX_HEADER); skb_reserve(head, packet->overhead + MAX_HEADER);
/* Set the owning socket so that we know where to get the /* Set the owning socket so that we know where to get the
* destination IP address. * destination IP address.
*/ */
sctp_packet_set_owner_w(nskb, sk); sctp_packet_set_owner_w(head, sk);
if (!sctp_transport_dst_check(tp)) { if (!sctp_transport_dst_check(tp)) {
sctp_transport_route(tp, NULL, sctp_sk(sk)); sctp_transport_route(tp, NULL, sctp_sk(sk));
...@@ -422,11 +468,11 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) ...@@ -422,11 +468,11 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
dst = dst_clone(tp->dst); dst = dst_clone(tp->dst);
if (!dst) if (!dst)
goto no_route; goto no_route;
skb_dst_set(nskb, dst); skb_dst_set(head, dst);
/* Build the SCTP header. */ /* Build the SCTP header. */
sh = (struct sctphdr *)skb_push(nskb, sizeof(struct sctphdr)); sh = (struct sctphdr *)skb_push(head, sizeof(struct sctphdr));
skb_reset_transport_header(nskb); skb_reset_transport_header(head);
sh->source = htons(packet->source_port); sh->source = htons(packet->source_port);
sh->dest = htons(packet->destination_port); sh->dest = htons(packet->destination_port);
...@@ -441,90 +487,133 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) ...@@ -441,90 +487,133 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
sh->vtag = htonl(packet->vtag); sh->vtag = htonl(packet->vtag);
sh->checksum = 0; sh->checksum = 0;
/**
* 6.10 Bundling
*
* An endpoint bundles chunks by simply including multiple
* chunks in one outbound SCTP packet. ...
*/
/**
* 3.2 Chunk Field Descriptions
*
* The total length of a chunk (including Type, Length and
* Value fields) MUST be a multiple of 4 bytes. If the length
* of the chunk is not a multiple of 4 bytes, the sender MUST
* pad the chunk with all zero bytes and this padding is not
* included in the chunk length field. The sender should
* never pad with more than 3 bytes.
*
* [This whole comment explains WORD_ROUND() below.]
*/
pr_debug("***sctp_transmit_packet***\n"); pr_debug("***sctp_transmit_packet***\n");
list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) { do {
list_del_init(&chunk->list); /* Set up convenience variables... */
if (sctp_chunk_is_data(chunk)) { chunk = list_entry(packet->chunk_list.next, struct sctp_chunk, list);
/* 6.3.1 C4) When data is in flight and when allowed pktcount++;
* by rule C5, a new RTT measurement MUST be made each
* round trip. Furthermore, new RTT measurements
* SHOULD be made no more than once per round-trip
* for a given destination transport address.
*/
if (!chunk->resent && !tp->rto_pending) { /* Calculate packet size, so it fits in PMTU. Leave
chunk->rtt_in_progress = 1; * other chunks for the next packets.
tp->rto_pending = 1; */
if (gso) {
pkt_size = packet->overhead;
list_for_each_entry(chunk, &packet->chunk_list, list) {
int padded = WORD_ROUND(chunk->skb->len);
if (pkt_size + padded > tp->pathmtu)
break;
pkt_size += padded;
} }
has_data = 1; /* Allocate a new skb. */
} nskb = alloc_skb(pkt_size + MAX_HEADER, gfp);
if (!nskb)
goto nomem;
padding = WORD_ROUND(chunk->skb->len) - chunk->skb->len; /* Make sure the outbound skb has enough header
if (padding) * room reserved.
memset(skb_put(chunk->skb, padding), 0, padding); */
skb_reserve(nskb, packet->overhead + MAX_HEADER);
} else {
nskb = head;
}
/* if this is the auth chunk that we are adding, /**
* store pointer where it will be added and put * 3.2 Chunk Field Descriptions
* the auth into the packet. *
* The total length of a chunk (including Type, Length and
* Value fields) MUST be a multiple of 4 bytes. If the length
* of the chunk is not a multiple of 4 bytes, the sender MUST
* pad the chunk with all zero bytes and this padding is not
* included in the chunk length field. The sender should
* never pad with more than 3 bytes.
*
* [This whole comment explains WORD_ROUND() below.]
*/ */
if (chunk == packet->auth)
auth = skb_tail_pointer(nskb);
memcpy(skb_put(nskb, chunk->skb->len), pkt_size -= packet->overhead;
list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) {
list_del_init(&chunk->list);
if (sctp_chunk_is_data(chunk)) {
/* 6.3.1 C4) When data is in flight and when allowed
* by rule C5, a new RTT measurement MUST be made each
* round trip. Furthermore, new RTT measurements
* SHOULD be made no more than once per round-trip
* for a given destination transport address.
*/
if (!chunk->resent && !tp->rto_pending) {
chunk->rtt_in_progress = 1;
tp->rto_pending = 1;
}
has_data = 1;
}
padding = WORD_ROUND(chunk->skb->len) - chunk->skb->len;
if (padding)
memset(skb_put(chunk->skb, padding), 0, padding);
/* if this is the auth chunk that we are adding,
* store pointer where it will be added and put
* the auth into the packet.
*/
if (chunk == packet->auth)
auth = skb_tail_pointer(nskb);
memcpy(skb_put(nskb, chunk->skb->len),
chunk->skb->data, chunk->skb->len); chunk->skb->data, chunk->skb->len);
pr_debug("*** Chunk:%p[%s] %s 0x%x, length:%d, chunk->skb->len:%d, " pr_debug("*** Chunk:%p[%s] %s 0x%x, length:%d, chunk->skb->len:%d, rtt_in_progress:%d\n",
"rtt_in_progress:%d\n", chunk, chunk,
sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)), sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)),
chunk->has_tsn ? "TSN" : "No TSN", chunk->has_tsn ? "TSN" : "No TSN",
chunk->has_tsn ? ntohl(chunk->subh.data_hdr->tsn) : 0, chunk->has_tsn ? ntohl(chunk->subh.data_hdr->tsn) : 0,
ntohs(chunk->chunk_hdr->length), chunk->skb->len, ntohs(chunk->chunk_hdr->length), chunk->skb->len,
chunk->rtt_in_progress); chunk->rtt_in_progress);
/* /* If this is a control chunk, this is our last
* If this is a control chunk, this is our last * reference. Free data chunks after they've been
* reference. Free data chunks after they've been * acknowledged or have failed.
* acknowledged or have failed. * Re-queue auth chunks if needed.
*/ */
if (!sctp_chunk_is_data(chunk)) pkt_size -= WORD_ROUND(chunk->skb->len);
sctp_chunk_free(chunk);
}
/* SCTP-AUTH, Section 6.2 if (chunk == packet->auth && !list_empty(&packet->chunk_list))
* The sender MUST calculate the MAC as described in RFC2104 [2] list_add(&chunk->list, &packet->chunk_list);
* using the hash function H as described by the MAC Identifier and else if (!sctp_chunk_is_data(chunk))
* the shared association key K based on the endpoint pair shared key sctp_chunk_free(chunk);
* described by the shared key identifier. The 'data' used for the
* computation of the AUTH-chunk is given by the AUTH chunk with its if (!pkt_size)
* HMAC field set to zero (as shown in Figure 6) followed by all break;
* chunks that are placed after the AUTH chunk in the SCTP packet. }
*/
if (auth) /* SCTP-AUTH, Section 6.2
sctp_auth_calculate_hmac(asoc, nskb, * The sender MUST calculate the MAC as described in RFC2104 [2]
(struct sctp_auth_chunk *)auth, * using the hash function H as described by the MAC Identifier and
gfp); * the shared association key K based on the endpoint pair shared key
* described by the shared key identifier. The 'data' used for the
* computation of the AUTH-chunk is given by the AUTH chunk with its
* HMAC field set to zero (as shown in Figure 6) followed by all
* chunks that are placed after the AUTH chunk in the SCTP packet.
*/
if (auth)
sctp_auth_calculate_hmac(asoc, nskb,
(struct sctp_auth_chunk *)auth,
gfp);
if (!gso)
break;
if (skb_gro_receive(&head, nskb))
goto nomem;
nskb = NULL;
if (WARN_ON_ONCE(skb_shinfo(head)->gso_segs >=
sk->sk_gso_max_segs))
goto nomem;
} while (!list_empty(&packet->chunk_list));
/* 2) Calculate the Adler-32 checksum of the whole packet, /* 2) Calculate the Adler-32 checksum of the whole packet,
* including the SCTP common header and all the * including the SCTP common header and all the
...@@ -532,16 +621,18 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) ...@@ -532,16 +621,18 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
* *
* Note: Adler-32 is no longer applicable, as has been replaced * Note: Adler-32 is no longer applicable, as has been replaced
* by CRC32-C as described in <draft-ietf-tsvwg-sctpcsum-02.txt>. * by CRC32-C as described in <draft-ietf-tsvwg-sctpcsum-02.txt>.
*
* If it's a GSO packet, it's postponed to sctp_skb_segment.
*/ */
if (!sctp_checksum_disable) { if (!sctp_checksum_disable || gso) {
if (!(dst->dev->features & NETIF_F_SCTP_CRC) || if (!gso && (!(dst->dev->features & NETIF_F_SCTP_CRC) ||
(dst_xfrm(dst) != NULL) || packet->ipfragok) { dst_xfrm(dst) || packet->ipfragok)) {
sh->checksum = sctp_compute_cksum(nskb, 0); sh->checksum = sctp_compute_cksum(head, 0);
} else { } else {
/* no need to seed pseudo checksum for SCTP */ /* no need to seed pseudo checksum for SCTP */
nskb->ip_summed = CHECKSUM_PARTIAL; head->ip_summed = CHECKSUM_PARTIAL;
nskb->csum_start = skb_transport_header(nskb) - nskb->head; head->csum_start = skb_transport_header(head) - head->head;
nskb->csum_offset = offsetof(struct sctphdr, checksum); head->csum_offset = offsetof(struct sctphdr, checksum);
} }
} }
...@@ -557,7 +648,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) ...@@ -557,7 +648,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
* Note: The works for IPv6 layer checks this bit too later * Note: The works for IPv6 layer checks this bit too later
* in transmission. See IP6_ECN_flow_xmit(). * in transmission. See IP6_ECN_flow_xmit().
*/ */
tp->af_specific->ecn_capable(nskb->sk); tp->af_specific->ecn_capable(sk);
/* Set up the IP options. */ /* Set up the IP options. */
/* BUG: not implemented /* BUG: not implemented
...@@ -566,7 +657,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) ...@@ -566,7 +657,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
/* Dump that on IP! */ /* Dump that on IP! */
if (asoc) { if (asoc) {
asoc->stats.opackets++; asoc->stats.opackets += pktcount;
if (asoc->peer.last_sent_to != tp) if (asoc->peer.last_sent_to != tp)
/* Considering the multiple CPU scenario, this is a /* Considering the multiple CPU scenario, this is a
* "correcter" place for last_sent_to. --xguo * "correcter" place for last_sent_to. --xguo
...@@ -589,16 +680,36 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) ...@@ -589,16 +680,36 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
} }
} }
pr_debug("***sctp_transmit_packet*** skb->len:%d\n", nskb->len); pr_debug("***sctp_transmit_packet*** skb->len:%d\n", head->len);
if (gso) {
/* Cleanup our debris for IP stacks */
memset(head->cb, 0, max(sizeof(struct inet_skb_parm),
sizeof(struct inet6_skb_parm)));
nskb->ignore_df = packet->ipfragok; skb_shinfo(head)->gso_segs = pktcount;
tp->af_specific->sctp_xmit(nskb, tp); skb_shinfo(head)->gso_size = GSO_BY_FRAGS;
/* We have to refresh this in case we are xmiting to
* more than one transport at a time
*/
rcu_read_lock();
if (__sk_dst_get(sk) != tp->dst) {
dst_hold(tp->dst);
sk_setup_caps(sk, tp->dst);
}
rcu_read_unlock();
}
head->ignore_df = packet->ipfragok;
tp->af_specific->sctp_xmit(head, tp);
out: out:
sctp_packet_reset(packet); sctp_packet_reset(packet);
return err; return err;
no_route: no_route:
kfree_skb(nskb); kfree_skb(head);
if (nskb != head)
kfree_skb(nskb);
if (asoc) if (asoc)
IP_INC_STATS(sock_net(asoc->base.sk), IPSTATS_MIB_OUTNOROUTES); IP_INC_STATS(sock_net(asoc->base.sk), IPSTATS_MIB_OUTNOROUTES);
...@@ -751,39 +862,63 @@ static sctp_xmit_t sctp_packet_will_fit(struct sctp_packet *packet, ...@@ -751,39 +862,63 @@ static sctp_xmit_t sctp_packet_will_fit(struct sctp_packet *packet,
struct sctp_chunk *chunk, struct sctp_chunk *chunk,
u16 chunk_len) u16 chunk_len)
{ {
size_t psize; size_t psize, pmtu;
size_t pmtu;
int too_big;
sctp_xmit_t retval = SCTP_XMIT_OK; sctp_xmit_t retval = SCTP_XMIT_OK;
psize = packet->size; psize = packet->size;
pmtu = ((packet->transport->asoc) ? if (packet->transport->asoc)
(packet->transport->asoc->pathmtu) : pmtu = packet->transport->asoc->pathmtu;
(packet->transport->pathmtu)); else
pmtu = packet->transport->pathmtu;
too_big = (psize + chunk_len > pmtu);
/* Decide if we need to fragment or resubmit later. */ /* Decide if we need to fragment or resubmit later. */
if (too_big) { if (psize + chunk_len > pmtu) {
/* It's OK to fragmet at IP level if any one of the following /* It's OK to fragment at IP level if any one of the following
* is true: * is true:
* 1. The packet is empty (meaning this chunk is greater * 1. The packet is empty (meaning this chunk is greater
* the MTU) * the MTU)
* 2. The chunk we are adding is a control chunk * 2. The packet doesn't have any data in it yet and data
* 3. The packet doesn't have any data in it yet and data * requires authentication.
* requires authentication.
*/ */
if (sctp_packet_empty(packet) || !sctp_chunk_is_data(chunk) || if (sctp_packet_empty(packet) ||
(!packet->has_data && chunk->auth)) { (!packet->has_data && chunk->auth)) {
/* We no longer do re-fragmentation. /* We no longer do re-fragmentation.
* Just fragment at the IP layer, if we * Just fragment at the IP layer, if we
* actually hit this condition * actually hit this condition
*/ */
packet->ipfragok = 1; packet->ipfragok = 1;
} else { goto out;
retval = SCTP_XMIT_PMTU_FULL;
} }
/* It is also okay to fragment if the chunk we are
* adding is a control chunk, but only if current packet
* is not a GSO one otherwise it causes fragmentation of
* a large frame. So in this case we allow the
* fragmentation by forcing it to be in a new packet.
*/
if (!sctp_chunk_is_data(chunk) && packet->has_data)
retval = SCTP_XMIT_PMTU_FULL;
if (psize + chunk_len > packet->max_size)
/* Hit GSO/PMTU limit, gotta flush */
retval = SCTP_XMIT_PMTU_FULL;
if (!packet->transport->burst_limited &&
psize + chunk_len > (packet->transport->cwnd >> 1))
/* Do not allow a single GSO packet to use more
* than half of cwnd.
*/
retval = SCTP_XMIT_PMTU_FULL;
if (packet->transport->burst_limited &&
psize + chunk_len > (packet->transport->burst_limited >> 1))
/* Do not allow a single GSO packet to use more
* than half of original cwnd.
*/
retval = SCTP_XMIT_PMTU_FULL;
/* Otherwise it will fit in the GSO packet */
} }
out:
return retval; return retval;
} }
...@@ -1516,6 +1516,9 @@ static __init int sctp_init(void) ...@@ -1516,6 +1516,9 @@ static __init int sctp_init(void)
if (status) if (status)
goto err_v6_add_protocol; goto err_v6_add_protocol;
if (sctp_offload_init() < 0)
pr_crit("%s: Cannot add SCTP protocol offload\n", __func__);
out: out:
return status; return status;
err_v6_add_protocol: err_v6_add_protocol:
......
...@@ -4003,6 +4003,8 @@ static int sctp_init_sock(struct sock *sk) ...@@ -4003,6 +4003,8 @@ static int sctp_init_sock(struct sock *sk)
return -ESOCKTNOSUPPORT; return -ESOCKTNOSUPPORT;
} }
sk->sk_gso_type = SKB_GSO_SCTP;
/* Initialize default send parameters. These parameters can be /* Initialize default send parameters. These parameters can be
* modified with the SCTP_DEFAULT_SEND_PARAM socket option. * modified with the SCTP_DEFAULT_SEND_PARAM socket option.
*/ */
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册