diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c
index e57df8dfe6b4ec29b0c81b2f71d9c5809b1fa228..5303a96b4327e97fdd238ed58f9f703c9f2c700a 100644
--- a/drivers/net/s2io.c
+++ b/drivers/net/s2io.c
@@ -66,7 +66,7 @@
 #include "s2io.h"
 #include "s2io-regs.h"
 
-#define DRV_VERSION "Version 2.0.9.3"
+#define DRV_VERSION "Version 2.0.9.4"
 
 /* S2io Driver name & version. */
 static char s2io_driver_name[] = "Neterion";
@@ -412,7 +412,7 @@ static int init_shared_mem(struct s2io_nic *nic)
 		    config->tx_cfg[i].fifo_len - 1;
 		mac_control->fifos[i].fifo_no = i;
 		mac_control->fifos[i].nic = nic;
-		mac_control->fifos[i].max_txds = MAX_SKB_FRAGS + 1;
+		mac_control->fifos[i].max_txds = MAX_SKB_FRAGS + 2;
 
 		for (j = 0; j < page_num; j++) {
 			int k = 0;
@@ -459,6 +459,10 @@ static int init_shared_mem(struct s2io_nic *nic)
 		}
 	}
 
+	nic->ufo_in_band_v = kmalloc((sizeof(u64) * size), GFP_KERNEL);
+	if (!nic->ufo_in_band_v)
+		return -ENOMEM;
+
 	/* Allocation and initialization of RXDs in Rings */
 	size = 0;
 	for (i = 0; i < config->rx_ring_num; i++) {
@@ -731,6 +735,8 @@ static void free_shared_mem(struct s2io_nic *nic)
 				    mac_control->stats_mem,
 				    mac_control->stats_mem_phy);
 	}
+	if (nic->ufo_in_band_v)
+		kfree(nic->ufo_in_band_v);
 }
 
 /**
@@ -2003,6 +2009,49 @@ static int start_nic(struct s2io_nic *nic)
 
 	return SUCCESS;
 }
+/**
+ * s2io_txdl_getskb - Get the skb from txdl, unmap and return skb
+ */
+static struct sk_buff *s2io_txdl_getskb(fifo_info_t *fifo_data, TxD_t *txdlp, int get_off)
+{
+	nic_t *nic = fifo_data->nic;
+	struct sk_buff *skb;
+	TxD_t *txds;
+	u16 j, frg_cnt;
+
+	txds = txdlp;
+	if (txds->Host_Control == (u64) nic->ufo_in_band_v) {
+		pci_unmap_single(nic->pdev, (dma_addr_t)
+			txds->Buffer_Pointer, sizeof(u64),
+			PCI_DMA_TODEVICE);
+		txds++;
+	}
+
+	skb = (struct sk_buff *) ((unsigned long)
+			txds->Host_Control);
+	if (!skb) {
+		memset(txdlp, 0, (sizeof(TxD_t) * fifo_data->max_txds));
+		return NULL;
+	}
+	pci_unmap_single(nic->pdev, (dma_addr_t)
+			 txds->Buffer_Pointer,
+			 skb->len - skb->data_len,
+			 PCI_DMA_TODEVICE);
+	frg_cnt = skb_shinfo(skb)->nr_frags;
+	if (frg_cnt) {
+		txds++;
+		for (j = 0; j < frg_cnt; j++, txds++) {
+			skb_frag_t *frag = &skb_shinfo(skb)->frags[j];
+			if (!txds->Buffer_Pointer)
+				break;
+			pci_unmap_page(nic->pdev, (dma_addr_t) 
+					txds->Buffer_Pointer,
+				       frag->size, PCI_DMA_TODEVICE);
+		}
+	}
+	txdlp->Host_Control = 0;
+	return(skb);
+}
 
 /**
  *  free_tx_buffers - Free all queued Tx buffers
@@ -2020,7 +2069,7 @@ static void free_tx_buffers(struct s2io_nic *nic)
 	int i, j;
 	mac_info_t *mac_control;
 	struct config_param *config;
-	int cnt = 0, frg_cnt;
+	int cnt = 0;
 
 	mac_control = &nic->mac_control;
 	config = &nic->config;
@@ -2029,38 +2078,11 @@ static void free_tx_buffers(struct s2io_nic *nic)
 		for (j = 0; j < config->tx_cfg[i].fifo_len - 1; j++) {
 			txdp = (TxD_t *) mac_control->fifos[i].list_info[j].
 			    list_virt_addr;
-			skb =
-			    (struct sk_buff *) ((unsigned long) txdp->
-						Host_Control);
-			if (skb == NULL) {
-				memset(txdp, 0, sizeof(TxD_t) *
-				       config->max_txds);
-				continue;
-			}
-			frg_cnt = skb_shinfo(skb)->nr_frags;
-			pci_unmap_single(nic->pdev, (dma_addr_t)
-					 txdp->Buffer_Pointer,
-					 skb->len - skb->data_len,
-					 PCI_DMA_TODEVICE);
-			if (frg_cnt) {
-				TxD_t *temp;
-				temp = txdp;
-				txdp++;
-				for (j = 0; j < frg_cnt; j++, txdp++) {
-					skb_frag_t *frag =
-					    &skb_shinfo(skb)->frags[j];
-					pci_unmap_page(nic->pdev,
-						       (dma_addr_t)
-						       txdp->
-						       Buffer_Pointer,
-						       frag->size,
-						       PCI_DMA_TODEVICE);
-				}
-				txdp = temp;
+			skb = s2io_txdl_getskb(&mac_control->fifos[i], txdp, j);
+			if (skb) {
+				dev_kfree_skb(skb);
+				cnt++;
 			}
-			dev_kfree_skb(skb);
-			memset(txdp, 0, sizeof(TxD_t) * config->max_txds);
-			cnt++;
 		}
 		DBG_PRINT(INTR_DBG,
 			  "%s:forcibly freeing %d skbs on FIFO%d\n",
@@ -2661,7 +2683,6 @@ static void tx_intr_handler(fifo_info_t *fifo_data)
 	tx_curr_get_info_t get_info, put_info;
 	struct sk_buff *skb;
 	TxD_t *txdlp;
-	u16 j, frg_cnt;
 
 	get_info = fifo_data->tx_curr_get_info;
 	put_info = fifo_data->tx_curr_put_info;
@@ -2684,8 +2705,7 @@ to loss of link\n");
 			}
 		}
 
-		skb = (struct sk_buff *) ((unsigned long)
-				txdlp->Host_Control);
+		skb = s2io_txdl_getskb(fifo_data, txdlp, get_info.offset);
 		if (skb == NULL) {
 			DBG_PRINT(ERR_DBG, "%s: Null skb ",
 			__FUNCTION__);
@@ -2693,34 +2713,6 @@ to loss of link\n");
 			return;
 		}
 
-		frg_cnt = skb_shinfo(skb)->nr_frags;
-		nic->tx_pkt_count++;
-
-		pci_unmap_single(nic->pdev, (dma_addr_t)
-				 txdlp->Buffer_Pointer,
-				 skb->len - skb->data_len,
-				 PCI_DMA_TODEVICE);
-		if (frg_cnt) {
-			TxD_t *temp;
-			temp = txdlp;
-			txdlp++;
-			for (j = 0; j < frg_cnt; j++, txdlp++) {
-				skb_frag_t *frag =
-				    &skb_shinfo(skb)->frags[j];
-				if (!txdlp->Buffer_Pointer)
-					break;
-				pci_unmap_page(nic->pdev,
-					       (dma_addr_t)
-					       txdlp->
-					       Buffer_Pointer,
-					       frag->size,
-					       PCI_DMA_TODEVICE);
-			}
-			txdlp = temp;
-		}
-		memset(txdlp, 0,
-		       (sizeof(TxD_t) * fifo_data->max_txds));
-
 		/* Updating the statistics block */
 		nic->stats.tx_bytes += skb->len;
 		dev_kfree_skb_irq(skb);
@@ -3527,6 +3519,8 @@ static int s2io_xmit(struct sk_buff *skb, struct net_device *dev)
 		return 0;
 	}
 
+	txdp->Control_1 = 0;
+	txdp->Control_2 = 0;
 #ifdef NETIF_F_TSO
 	mss = skb_shinfo(skb)->tso_size;
 	if (mss) {
@@ -3534,19 +3528,13 @@ static int s2io_xmit(struct sk_buff *skb, struct net_device *dev)
 		txdp->Control_1 |= TXD_TCP_LSO_MSS(mss);
 	}
 #endif
-
-	frg_cnt = skb_shinfo(skb)->nr_frags;
-	frg_len = skb->len - skb->data_len;
-
-	txdp->Buffer_Pointer = pci_map_single
-	    (sp->pdev, skb->data, frg_len, PCI_DMA_TODEVICE);
-	txdp->Host_Control = (unsigned long) skb;
 	if (skb->ip_summed == CHECKSUM_HW) {
 		txdp->Control_2 |=
 		    (TXD_TX_CKO_IPV4_EN | TXD_TX_CKO_TCP_EN |
 		     TXD_TX_CKO_UDP_EN);
 	}
-
+	txdp->Control_1 |= TXD_GATHER_CODE_FIRST;
+	txdp->Control_1 |= TXD_LIST_OWN_XENA;
 	txdp->Control_2 |= config->tx_intr_type;
 
 	if (sp->vlgrp && vlan_tx_tag_present(skb)) {
@@ -3554,10 +3542,40 @@ static int s2io_xmit(struct sk_buff *skb, struct net_device *dev)
 		txdp->Control_2 |= TXD_VLAN_TAG(vlan_tag);
 	}
 
-	txdp->Control_1 |= (TXD_BUFFER0_SIZE(frg_len) |
-			    TXD_GATHER_CODE_FIRST);
-	txdp->Control_1 |= TXD_LIST_OWN_XENA;
+	frg_len = skb->len - skb->data_len;
+	if (skb_shinfo(skb)->ufo_size) {
+		int ufo_size;
+
+		ufo_size = skb_shinfo(skb)->ufo_size;
+		ufo_size &= ~7;
+		txdp->Control_1 |= TXD_UFO_EN;
+		txdp->Control_1 |= TXD_UFO_MSS(ufo_size);
+		txdp->Control_1 |= TXD_BUFFER0_SIZE(8);
+#ifdef __BIG_ENDIAN
+		sp->ufo_in_band_v[put_off] =
+				(u64)skb_shinfo(skb)->ip6_frag_id;
+#else
+		sp->ufo_in_band_v[put_off] =
+				(u64)skb_shinfo(skb)->ip6_frag_id << 32;
+#endif
+		txdp->Host_Control = (unsigned long)sp->ufo_in_band_v;
+		txdp->Buffer_Pointer = pci_map_single(sp->pdev,
+					sp->ufo_in_band_v,
+					sizeof(u64), PCI_DMA_TODEVICE);
+		txdp++;
+		txdp->Control_1 = 0;
+		txdp->Control_2 = 0;
+	}
+
+	txdp->Buffer_Pointer = pci_map_single
+	    (sp->pdev, skb->data, frg_len, PCI_DMA_TODEVICE);
+	txdp->Host_Control = (unsigned long) skb;
+	txdp->Control_1 |= TXD_BUFFER0_SIZE(frg_len);
 
+	if (skb_shinfo(skb)->ufo_size)
+		txdp->Control_1 |= TXD_UFO_EN;
+
+	frg_cnt = skb_shinfo(skb)->nr_frags;
 	/* For fragmented SKB. */
 	for (i = 0; i < frg_cnt; i++) {
 		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
@@ -3569,9 +3587,14 @@ static int s2io_xmit(struct sk_buff *skb, struct net_device *dev)
 		    (sp->pdev, frag->page, frag->page_offset,
 		     frag->size, PCI_DMA_TODEVICE);
 		txdp->Control_1 |= TXD_BUFFER0_SIZE(frag->size);
+		if (skb_shinfo(skb)->ufo_size)
+			txdp->Control_1 |= TXD_UFO_EN;
 	}
 	txdp->Control_1 |= TXD_GATHER_CODE_LAST;
 
+	if (skb_shinfo(skb)->ufo_size)
+		frg_cnt++; /* as Txd0 was used for inband header */
+
 	tx_fifo = mac_control->tx_FIFO_start[queue];
 	val64 = mac_control->fifos[queue].list_info[put_off].list_phy_addr;
 	writeq(val64, &tx_fifo->TxDL_Pointer);
@@ -3583,6 +3606,8 @@ static int s2io_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (mss)
 		val64 |= TX_FIFO_SPECIAL_FUNC;
 #endif
+	if (skb_shinfo(skb)->ufo_size)
+		val64 |= TX_FIFO_SPECIAL_FUNC;
 	writeq(val64, &tx_fifo->List_Control);
 
 	mmiowb();
@@ -5190,6 +5215,8 @@ static struct ethtool_ops netdev_ethtool_ops = {
 	.get_tso = ethtool_op_get_tso,
 	.set_tso = ethtool_op_set_tso,
 #endif
+	.get_ufo = ethtool_op_get_ufo,
+	.set_ufo = ethtool_op_set_ufo,
 	.self_test_count = s2io_ethtool_self_test_count,
 	.self_test = s2io_ethtool_test,
 	.get_strings = s2io_ethtool_get_strings,
@@ -5941,7 +5968,8 @@ Defaulting to INTA\n");
 			break;
 		}
 	}
-	config->max_txds = MAX_SKB_FRAGS + 1;
+	/* + 2 because one Txd for skb->data and one Txd for UFO */
+	config->max_txds = MAX_SKB_FRAGS + 2;
 
 	/* Rx side parameters. */
 	if (rx_ring_sz[0] == 0)
@@ -6035,6 +6063,10 @@ Defaulting to INTA\n");
 #ifdef NETIF_F_TSO
 	dev->features |= NETIF_F_TSO;
 #endif
+	if (sp->device_type & XFRAME_II_DEVICE) {
+		dev->features |= NETIF_F_UFO;
+		dev->features |= NETIF_F_HW_CSUM;
+	}
 
 	dev->tx_timeout = &s2io_tx_watchdog;
 	dev->watchdog_timeo = WATCH_DOG_TIMEOUT;
diff --git a/drivers/net/s2io.h b/drivers/net/s2io.h
index 419aad7f10e7c357c469904ca56cea95295daf3b..852a6a899d07cd5d1d9a79b7b724ce447daf5991 100644
--- a/drivers/net/s2io.h
+++ b/drivers/net/s2io.h
@@ -393,7 +393,9 @@ typedef struct _TxD {
 #define TXD_GATHER_CODE_LAST    BIT(23)
 #define TXD_TCP_LSO_EN          BIT(30)
 #define TXD_UDP_COF_EN          BIT(31)
+#define TXD_UFO_EN		BIT(31) | BIT(30)
 #define TXD_TCP_LSO_MSS(val)    vBIT(val,34,14)
+#define TXD_UFO_MSS(val)	vBIT(val,34,14)
 #define TXD_BUFFER0_SIZE(val)   vBIT(val,48,16)
 
 	u64 Control_2;
@@ -789,6 +791,7 @@ struct s2io_nic {
 
 	spinlock_t	rx_lock;
 	atomic_t	isr_cnt;
+	u64 *ufo_in_band_v;
 };
 
 #define RESET_ERROR 1;