提交 b5f16484 编写于 作者: D David S. Miller

Merge branch 'smc-pnetid-and-SMC-D-support'

Ursula Braun says:

====================
smc: pnetid and SMC-D support

SMC requires a configured pnet table to map Ethernet interfaces to
RoCE adapter ports. For s390 there exists hardware support to group
such devices. The first three patches cover the s390 pnetid support,
enabling SMC-R usage on s390 without configuring an extra pnet table.

SMC currently requires RoCE adapters, and uses RDMA-techniques
implemented with IB-verbs. But s390 offers another method for
intra-CEC Shared Memory communication. The following seven patches
implement a solution to run SMC traffic based on intra-CEC DMA,
called SMC-D.
====================
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
......@@ -95,4 +95,14 @@ config CCWGROUP
tristate
default (LCS || CTCM || QETH)
config ISM
tristate "Support for ISM vPCI Adapter"
depends on PCI && SMC
default n
help
Select this option if you want to use the Internal Shared Memory
vPCI Adapter.
To compile as a module choose M. The module name is ism.
If unsure, choose N.
endmenu
......@@ -15,3 +15,6 @@ qeth_l2-y += qeth_l2_main.o qeth_l2_sys.o
obj-$(CONFIG_QETH_L2) += qeth_l2.o
qeth_l3-y += qeth_l3_main.o qeth_l3_sys.o
obj-$(CONFIG_QETH_L3) += qeth_l3.o
ism-y := ism_drv.o
obj-$(CONFIG_ISM) += ism.o
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef S390_ISM_H
#define S390_ISM_H
#include <linux/spinlock.h>
#include <linux/types.h>
#include <linux/pci.h>
#include <net/smc.h>
#define UTIL_STR_LEN 16
/*
* Do not use the first word of the DMB bits to ensure 8 byte aligned access.
*/
#define ISM_DMB_WORD_OFFSET 1
#define ISM_DMB_BIT_OFFSET (ISM_DMB_WORD_OFFSET * 32)
#define ISM_NR_DMBS 1920
#define ISM_REG_SBA 0x1
#define ISM_REG_IEQ 0x2
#define ISM_READ_GID 0x3
#define ISM_ADD_VLAN_ID 0x4
#define ISM_DEL_VLAN_ID 0x5
#define ISM_SET_VLAN 0x6
#define ISM_RESET_VLAN 0x7
#define ISM_QUERY_INFO 0x8
#define ISM_QUERY_RGID 0x9
#define ISM_REG_DMB 0xA
#define ISM_UNREG_DMB 0xB
#define ISM_SIGNAL_IEQ 0xE
#define ISM_UNREG_SBA 0x11
#define ISM_UNREG_IEQ 0x12
#define ISM_ERROR 0xFFFF
struct ism_req_hdr {
u32 cmd;
u16 : 16;
u16 len;
};
struct ism_resp_hdr {
u32 cmd;
u16 ret;
u16 len;
};
union ism_reg_sba {
struct {
struct ism_req_hdr hdr;
u64 sba;
} request;
struct {
struct ism_resp_hdr hdr;
} response;
} __aligned(16);
union ism_reg_ieq {
struct {
struct ism_req_hdr hdr;
u64 ieq;
u64 len;
} request;
struct {
struct ism_resp_hdr hdr;
} response;
} __aligned(16);
union ism_read_gid {
struct {
struct ism_req_hdr hdr;
} request;
struct {
struct ism_resp_hdr hdr;
u64 gid;
} response;
} __aligned(16);
union ism_qi {
struct {
struct ism_req_hdr hdr;
} request;
struct {
struct ism_resp_hdr hdr;
u32 version;
u32 max_len;
u64 ism_state;
u64 my_gid;
u64 sba;
u64 ieq;
u32 ieq_len;
u32 : 32;
u32 dmbs_owned;
u32 dmbs_used;
u32 vlan_required;
u32 vlan_nr_ids;
u16 vlan_id[64];
} response;
} __aligned(64);
union ism_query_rgid {
struct {
struct ism_req_hdr hdr;
u64 rgid;
u32 vlan_valid;
u32 vlan_id;
} request;
struct {
struct ism_resp_hdr hdr;
} response;
} __aligned(16);
union ism_reg_dmb {
struct {
struct ism_req_hdr hdr;
u64 dmb;
u32 dmb_len;
u32 sba_idx;
u32 vlan_valid;
u32 vlan_id;
u64 rgid;
} request;
struct {
struct ism_resp_hdr hdr;
u64 dmb_tok;
} response;
} __aligned(32);
union ism_sig_ieq {
struct {
struct ism_req_hdr hdr;
u64 rgid;
u32 trigger_irq;
u32 event_code;
u64 info;
} request;
struct {
struct ism_resp_hdr hdr;
} response;
} __aligned(32);
union ism_unreg_dmb {
struct {
struct ism_req_hdr hdr;
u64 dmb_tok;
} request;
struct {
struct ism_resp_hdr hdr;
} response;
} __aligned(16);
union ism_cmd_simple {
struct {
struct ism_req_hdr hdr;
} request;
struct {
struct ism_resp_hdr hdr;
} response;
} __aligned(8);
union ism_set_vlan_id {
struct {
struct ism_req_hdr hdr;
u64 vlan_id;
} request;
struct {
struct ism_resp_hdr hdr;
} response;
} __aligned(16);
struct ism_eq_header {
u64 idx;
u64 ieq_len;
u64 entry_len;
u64 : 64;
};
struct ism_eq {
struct ism_eq_header header;
struct smcd_event entry[15];
};
struct ism_sba {
u32 s : 1; /* summary bit */
u32 e : 1; /* event bit */
u32 : 30;
u32 dmb_bits[ISM_NR_DMBS / 32];
u32 reserved[3];
u16 dmbe_mask[ISM_NR_DMBS];
};
struct ism_dev {
spinlock_t lock;
struct pci_dev *pdev;
struct smcd_dev *smcd;
void __iomem *ctl;
struct ism_sba *sba;
dma_addr_t sba_dma_addr;
DECLARE_BITMAP(sba_bitmap, ISM_NR_DMBS);
struct ism_eq *ieq;
dma_addr_t ieq_dma_addr;
int ieq_idx;
};
#define ISM_CREATE_REQ(dmb, idx, sf, offset) \
((dmb) | (idx) << 24 | (sf) << 23 | (offset))
static inline int __ism_move(struct ism_dev *ism, u64 dmb_req, void *data,
unsigned int size)
{
struct zpci_dev *zdev = to_zpci(ism->pdev);
u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, size);
return zpci_write_block(req, data, dmb_req);
}
#endif /* S390_ISM_H */
// SPDX-License-Identifier: GPL-2.0
/*
* ISM driver for s390.
*
* Copyright IBM Corp. 2018
*/
#define KMSG_COMPONENT "ism"
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
#include <linux/module.h>
#include <linux/types.h>
#include <linux/interrupt.h>
#include <linux/device.h>
#include <linux/pci.h>
#include <linux/err.h>
#include <net/smc.h>
#include <asm/debug.h>
#include "ism.h"
MODULE_DESCRIPTION("ISM driver for s390");
MODULE_LICENSE("GPL");
#define PCI_DEVICE_ID_IBM_ISM 0x04ED
#define DRV_NAME "ism"
static const struct pci_device_id ism_device_table[] = {
{ PCI_VDEVICE(IBM, PCI_DEVICE_ID_IBM_ISM), 0 },
{ 0, }
};
MODULE_DEVICE_TABLE(pci, ism_device_table);
static debug_info_t *ism_debug_info;
static int ism_cmd(struct ism_dev *ism, void *cmd)
{
struct ism_req_hdr *req = cmd;
struct ism_resp_hdr *resp = cmd;
memcpy_toio(ism->ctl + sizeof(*req), req + 1, req->len - sizeof(*req));
memcpy_toio(ism->ctl, req, sizeof(*req));
WRITE_ONCE(resp->ret, ISM_ERROR);
memcpy_fromio(resp, ism->ctl, sizeof(*resp));
if (resp->ret) {
debug_text_event(ism_debug_info, 0, "cmd failure");
debug_event(ism_debug_info, 0, resp, sizeof(*resp));
goto out;
}
memcpy_fromio(resp + 1, ism->ctl + sizeof(*resp),
resp->len - sizeof(*resp));
out:
return resp->ret;
}
static int ism_cmd_simple(struct ism_dev *ism, u32 cmd_code)
{
union ism_cmd_simple cmd;
memset(&cmd, 0, sizeof(cmd));
cmd.request.hdr.cmd = cmd_code;
cmd.request.hdr.len = sizeof(cmd.request);
return ism_cmd(ism, &cmd);
}
static int query_info(struct ism_dev *ism)
{
union ism_qi cmd;
memset(&cmd, 0, sizeof(cmd));
cmd.request.hdr.cmd = ISM_QUERY_INFO;
cmd.request.hdr.len = sizeof(cmd.request);
if (ism_cmd(ism, &cmd))
goto out;
debug_text_event(ism_debug_info, 3, "query info");
debug_event(ism_debug_info, 3, &cmd.response, sizeof(cmd.response));
out:
return 0;
}
static int register_sba(struct ism_dev *ism)
{
union ism_reg_sba cmd;
dma_addr_t dma_handle;
struct ism_sba *sba;
sba = dma_zalloc_coherent(&ism->pdev->dev, PAGE_SIZE,
&dma_handle, GFP_KERNEL);
if (!sba)
return -ENOMEM;
memset(&cmd, 0, sizeof(cmd));
cmd.request.hdr.cmd = ISM_REG_SBA;
cmd.request.hdr.len = sizeof(cmd.request);
cmd.request.sba = dma_handle;
if (ism_cmd(ism, &cmd)) {
dma_free_coherent(&ism->pdev->dev, PAGE_SIZE, sba, dma_handle);
return -EIO;
}
ism->sba = sba;
ism->sba_dma_addr = dma_handle;
return 0;
}
static int register_ieq(struct ism_dev *ism)
{
union ism_reg_ieq cmd;
dma_addr_t dma_handle;
struct ism_eq *ieq;
ieq = dma_zalloc_coherent(&ism->pdev->dev, PAGE_SIZE,
&dma_handle, GFP_KERNEL);
if (!ieq)
return -ENOMEM;
memset(&cmd, 0, sizeof(cmd));
cmd.request.hdr.cmd = ISM_REG_IEQ;
cmd.request.hdr.len = sizeof(cmd.request);
cmd.request.ieq = dma_handle;
cmd.request.len = sizeof(*ieq);
if (ism_cmd(ism, &cmd)) {
dma_free_coherent(&ism->pdev->dev, PAGE_SIZE, ieq, dma_handle);
return -EIO;
}
ism->ieq = ieq;
ism->ieq_idx = -1;
ism->ieq_dma_addr = dma_handle;
return 0;
}
static int unregister_sba(struct ism_dev *ism)
{
if (!ism->sba)
return 0;
if (ism_cmd_simple(ism, ISM_UNREG_SBA))
return -EIO;
dma_free_coherent(&ism->pdev->dev, PAGE_SIZE,
ism->sba, ism->sba_dma_addr);
ism->sba = NULL;
ism->sba_dma_addr = 0;
return 0;
}
static int unregister_ieq(struct ism_dev *ism)
{
if (!ism->ieq)
return 0;
if (ism_cmd_simple(ism, ISM_UNREG_IEQ))
return -EIO;
dma_free_coherent(&ism->pdev->dev, PAGE_SIZE,
ism->ieq, ism->ieq_dma_addr);
ism->ieq = NULL;
ism->ieq_dma_addr = 0;
return 0;
}
static int ism_read_local_gid(struct ism_dev *ism)
{
union ism_read_gid cmd;
int ret;
memset(&cmd, 0, sizeof(cmd));
cmd.request.hdr.cmd = ISM_READ_GID;
cmd.request.hdr.len = sizeof(cmd.request);
ret = ism_cmd(ism, &cmd);
if (ret)
goto out;
ism->smcd->local_gid = cmd.response.gid;
out:
return ret;
}
static int ism_query_rgid(struct smcd_dev *smcd, u64 rgid, u32 vid_valid,
u32 vid)
{
struct ism_dev *ism = smcd->priv;
union ism_query_rgid cmd;
memset(&cmd, 0, sizeof(cmd));
cmd.request.hdr.cmd = ISM_QUERY_RGID;
cmd.request.hdr.len = sizeof(cmd.request);
cmd.request.rgid = rgid;
cmd.request.vlan_valid = vid_valid;
cmd.request.vlan_id = vid;
return ism_cmd(ism, &cmd);
}
static void ism_free_dmb(struct ism_dev *ism, struct smcd_dmb *dmb)
{
clear_bit(dmb->sba_idx, ism->sba_bitmap);
dma_free_coherent(&ism->pdev->dev, dmb->dmb_len,
dmb->cpu_addr, dmb->dma_addr);
}
static int ism_alloc_dmb(struct ism_dev *ism, struct smcd_dmb *dmb)
{
unsigned long bit;
if (PAGE_ALIGN(dmb->dmb_len) > dma_get_max_seg_size(&ism->pdev->dev))
return -EINVAL;
if (!dmb->sba_idx) {
bit = find_next_zero_bit(ism->sba_bitmap, ISM_NR_DMBS,
ISM_DMB_BIT_OFFSET);
if (bit == ISM_NR_DMBS)
return -ENOMEM;
dmb->sba_idx = bit;
}
if (dmb->sba_idx < ISM_DMB_BIT_OFFSET ||
test_and_set_bit(dmb->sba_idx, ism->sba_bitmap))
return -EINVAL;
dmb->cpu_addr = dma_zalloc_coherent(&ism->pdev->dev, dmb->dmb_len,
&dmb->dma_addr, GFP_KERNEL |
__GFP_NOWARN | __GFP_NOMEMALLOC |
__GFP_COMP | __GFP_NORETRY);
if (!dmb->cpu_addr)
clear_bit(dmb->sba_idx, ism->sba_bitmap);
return dmb->cpu_addr ? 0 : -ENOMEM;
}
static int ism_register_dmb(struct smcd_dev *smcd, struct smcd_dmb *dmb)
{
struct ism_dev *ism = smcd->priv;
union ism_reg_dmb cmd;
int ret;
ret = ism_alloc_dmb(ism, dmb);
if (ret)
goto out;
memset(&cmd, 0, sizeof(cmd));
cmd.request.hdr.cmd = ISM_REG_DMB;
cmd.request.hdr.len = sizeof(cmd.request);
cmd.request.dmb = dmb->dma_addr;
cmd.request.dmb_len = dmb->dmb_len;
cmd.request.sba_idx = dmb->sba_idx;
cmd.request.vlan_valid = dmb->vlan_valid;
cmd.request.vlan_id = dmb->vlan_id;
cmd.request.rgid = dmb->rgid;
ret = ism_cmd(ism, &cmd);
if (ret) {
ism_free_dmb(ism, dmb);
goto out;
}
dmb->dmb_tok = cmd.response.dmb_tok;
out:
return ret;
}
static int ism_unregister_dmb(struct smcd_dev *smcd, struct smcd_dmb *dmb)
{
struct ism_dev *ism = smcd->priv;
union ism_unreg_dmb cmd;
int ret;
memset(&cmd, 0, sizeof(cmd));
cmd.request.hdr.cmd = ISM_UNREG_DMB;
cmd.request.hdr.len = sizeof(cmd.request);
cmd.request.dmb_tok = dmb->dmb_tok;
ret = ism_cmd(ism, &cmd);
if (ret)
goto out;
ism_free_dmb(ism, dmb);
out:
return ret;
}
static int ism_add_vlan_id(struct smcd_dev *smcd, u64 vlan_id)
{
struct ism_dev *ism = smcd->priv;
union ism_set_vlan_id cmd;
memset(&cmd, 0, sizeof(cmd));
cmd.request.hdr.cmd = ISM_ADD_VLAN_ID;
cmd.request.hdr.len = sizeof(cmd.request);
cmd.request.vlan_id = vlan_id;
return ism_cmd(ism, &cmd);
}
static int ism_del_vlan_id(struct smcd_dev *smcd, u64 vlan_id)
{
struct ism_dev *ism = smcd->priv;
union ism_set_vlan_id cmd;
memset(&cmd, 0, sizeof(cmd));
cmd.request.hdr.cmd = ISM_DEL_VLAN_ID;
cmd.request.hdr.len = sizeof(cmd.request);
cmd.request.vlan_id = vlan_id;
return ism_cmd(ism, &cmd);
}
static int ism_set_vlan_required(struct smcd_dev *smcd)
{
return ism_cmd_simple(smcd->priv, ISM_SET_VLAN);
}
static int ism_reset_vlan_required(struct smcd_dev *smcd)
{
return ism_cmd_simple(smcd->priv, ISM_RESET_VLAN);
}
static int ism_signal_ieq(struct smcd_dev *smcd, u64 rgid, u32 trigger_irq,
u32 event_code, u64 info)
{
struct ism_dev *ism = smcd->priv;
union ism_sig_ieq cmd;
memset(&cmd, 0, sizeof(cmd));
cmd.request.hdr.cmd = ISM_SIGNAL_IEQ;
cmd.request.hdr.len = sizeof(cmd.request);
cmd.request.rgid = rgid;
cmd.request.trigger_irq = trigger_irq;
cmd.request.event_code = event_code;
cmd.request.info = info;
return ism_cmd(ism, &cmd);
}
static unsigned int max_bytes(unsigned int start, unsigned int len,
unsigned int boundary)
{
return min(boundary - (start & (boundary - 1)), len);
}
static int ism_move(struct smcd_dev *smcd, u64 dmb_tok, unsigned int idx,
bool sf, unsigned int offset, void *data, unsigned int size)
{
struct ism_dev *ism = smcd->priv;
unsigned int bytes;
u64 dmb_req;
int ret;
while (size) {
bytes = max_bytes(offset, size, PAGE_SIZE);
dmb_req = ISM_CREATE_REQ(dmb_tok, idx, size == bytes ? sf : 0,
offset);
ret = __ism_move(ism, dmb_req, data, bytes);
if (ret)
return ret;
size -= bytes;
data += bytes;
offset += bytes;
}
return 0;
}
static void ism_handle_event(struct ism_dev *ism)
{
struct smcd_event *entry;
while ((ism->ieq_idx + 1) != READ_ONCE(ism->ieq->header.idx)) {
if (++(ism->ieq_idx) == ARRAY_SIZE(ism->ieq->entry))
ism->ieq_idx = 0;
entry = &ism->ieq->entry[ism->ieq_idx];
debug_event(ism_debug_info, 2, entry, sizeof(*entry));
smcd_handle_event(ism->smcd, entry);
}
}
static irqreturn_t ism_handle_irq(int irq, void *data)
{
struct ism_dev *ism = data;
unsigned long bit, end;
unsigned long *bv;
bv = (void *) &ism->sba->dmb_bits[ISM_DMB_WORD_OFFSET];
end = sizeof(ism->sba->dmb_bits) * BITS_PER_BYTE - ISM_DMB_BIT_OFFSET;
spin_lock(&ism->lock);
ism->sba->s = 0;
barrier();
for (bit = 0;;) {
bit = find_next_bit_inv(bv, end, bit);
if (bit >= end)
break;
clear_bit_inv(bit, bv);
barrier();
smcd_handle_irq(ism->smcd, bit + ISM_DMB_BIT_OFFSET);
ism->sba->dmbe_mask[bit + ISM_DMB_BIT_OFFSET] = 0;
}
if (ism->sba->e) {
ism->sba->e = 0;
barrier();
ism_handle_event(ism);
}
spin_unlock(&ism->lock);
return IRQ_HANDLED;
}
static const struct smcd_ops ism_ops = {
.query_remote_gid = ism_query_rgid,
.register_dmb = ism_register_dmb,
.unregister_dmb = ism_unregister_dmb,
.add_vlan_id = ism_add_vlan_id,
.del_vlan_id = ism_del_vlan_id,
.set_vlan_required = ism_set_vlan_required,
.reset_vlan_required = ism_reset_vlan_required,
.signal_event = ism_signal_ieq,
.move_data = ism_move,
};
static int ism_dev_init(struct ism_dev *ism)
{
struct pci_dev *pdev = ism->pdev;
int ret;
ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI);
if (ret <= 0)
goto out;
ret = request_irq(pci_irq_vector(pdev, 0), ism_handle_irq, 0,
pci_name(pdev), ism);
if (ret)
goto free_vectors;
ret = register_sba(ism);
if (ret)
goto free_irq;
ret = register_ieq(ism);
if (ret)
goto unreg_sba;
ret = ism_read_local_gid(ism);
if (ret)
goto unreg_ieq;
ret = smcd_register_dev(ism->smcd);
if (ret)
goto unreg_ieq;
query_info(ism);
return 0;
unreg_ieq:
unregister_ieq(ism);
unreg_sba:
unregister_sba(ism);
free_irq:
free_irq(pci_irq_vector(pdev, 0), ism);
free_vectors:
pci_free_irq_vectors(pdev);
out:
return ret;
}
static int ism_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct ism_dev *ism;
int ret;
ism = kzalloc(sizeof(*ism), GFP_KERNEL);
if (!ism)
return -ENOMEM;
spin_lock_init(&ism->lock);
dev_set_drvdata(&pdev->dev, ism);
ism->pdev = pdev;
ret = pci_enable_device_mem(pdev);
if (ret)
goto err;
ret = pci_request_mem_regions(pdev, DRV_NAME);
if (ret)
goto err_disable;
ism->ctl = pci_iomap(pdev, 2, 0);
if (!ism->ctl)
goto err_resource;
ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
if (ret)
goto err_unmap;
pci_set_dma_seg_boundary(pdev, SZ_1M - 1);
pci_set_dma_max_seg_size(pdev, SZ_1M);
pci_set_master(pdev);
ism->smcd = smcd_alloc_dev(&pdev->dev, dev_name(&pdev->dev), &ism_ops,
ISM_NR_DMBS);
if (!ism->smcd)
goto err_unmap;
ism->smcd->priv = ism;
ret = ism_dev_init(ism);
if (ret)
goto err_free;
return 0;
err_free:
smcd_free_dev(ism->smcd);
err_unmap:
pci_iounmap(pdev, ism->ctl);
err_resource:
pci_release_mem_regions(pdev);
err_disable:
pci_disable_device(pdev);
err:
kfree(ism);
dev_set_drvdata(&pdev->dev, NULL);
return ret;
}
static void ism_dev_exit(struct ism_dev *ism)
{
struct pci_dev *pdev = ism->pdev;
smcd_unregister_dev(ism->smcd);
unregister_ieq(ism);
unregister_sba(ism);
free_irq(pci_irq_vector(pdev, 0), ism);
pci_free_irq_vectors(pdev);
}
static void ism_remove(struct pci_dev *pdev)
{
struct ism_dev *ism = dev_get_drvdata(&pdev->dev);
ism_dev_exit(ism);
smcd_free_dev(ism->smcd);
pci_iounmap(pdev, ism->ctl);
pci_release_mem_regions(pdev);
pci_disable_device(pdev);
dev_set_drvdata(&pdev->dev, NULL);
kfree(ism);
}
static int ism_suspend(struct device *dev)
{
struct ism_dev *ism = dev_get_drvdata(dev);
ism_dev_exit(ism);
return 0;
}
static int ism_resume(struct device *dev)
{
struct ism_dev *ism = dev_get_drvdata(dev);
return ism_dev_init(ism);
}
static SIMPLE_DEV_PM_OPS(ism_pm_ops, ism_suspend, ism_resume);
static struct pci_driver ism_driver = {
.name = DRV_NAME,
.id_table = ism_device_table,
.probe = ism_probe,
.remove = ism_remove,
.driver = {
.pm = &ism_pm_ops,
},
};
static int __init ism_init(void)
{
int ret;
ism_debug_info = debug_register("ism", 2, 1, 16);
if (!ism_debug_info)
return -ENODEV;
debug_register_view(ism_debug_info, &debug_hex_ascii_view);
ret = pci_register_driver(&ism_driver);
if (ret)
debug_unregister(ism_debug_info);
return ret;
}
static void __exit ism_exit(void)
{
pci_unregister_driver(&ism_driver);
debug_unregister(ism_debug_info);
}
module_init(ism_init);
module_exit(ism_exit);
......@@ -11,6 +11,8 @@
#ifndef _SMC_H
#define _SMC_H
#define SMC_MAX_PNETID_LEN 16 /* Max. length of PNET id */
struct smc_hashinfo {
rwlock_t lock;
struct hlist_head ht;
......@@ -18,4 +20,67 @@ struct smc_hashinfo {
int smc_hash_sk(struct sock *sk);
void smc_unhash_sk(struct sock *sk);
/* SMCD/ISM device driver interface */
struct smcd_dmb {
u64 dmb_tok;
u64 rgid;
u32 dmb_len;
u32 sba_idx;
u32 vlan_valid;
u32 vlan_id;
void *cpu_addr;
dma_addr_t dma_addr;
};
#define ISM_EVENT_DMB 0
#define ISM_EVENT_GID 1
#define ISM_EVENT_SWR 2
struct smcd_event {
u32 type;
u32 code;
u64 tok;
u64 time;
u64 info;
};
struct smcd_dev;
struct smcd_ops {
int (*query_remote_gid)(struct smcd_dev *dev, u64 rgid, u32 vid_valid,
u32 vid);
int (*register_dmb)(struct smcd_dev *dev, struct smcd_dmb *dmb);
int (*unregister_dmb)(struct smcd_dev *dev, struct smcd_dmb *dmb);
int (*add_vlan_id)(struct smcd_dev *dev, u64 vlan_id);
int (*del_vlan_id)(struct smcd_dev *dev, u64 vlan_id);
int (*set_vlan_required)(struct smcd_dev *dev);
int (*reset_vlan_required)(struct smcd_dev *dev);
int (*signal_event)(struct smcd_dev *dev, u64 rgid, u32 trigger_irq,
u32 event_code, u64 info);
int (*move_data)(struct smcd_dev *dev, u64 dmb_tok, unsigned int idx,
bool sf, unsigned int offset, void *data,
unsigned int size);
};
struct smcd_dev {
const struct smcd_ops *ops;
struct device dev;
void *priv;
u64 local_gid;
struct list_head list;
spinlock_t lock;
struct smc_connection **conn;
struct list_head vlan;
struct workqueue_struct *event_wq;
u8 pnetid[SMC_MAX_PNETID_LEN];
};
struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name,
const struct smcd_ops *ops, int max_dmbs);
int smcd_register_dev(struct smcd_dev *smcd);
void smcd_unregister_dev(struct smcd_dev *smcd);
void smcd_free_dev(struct smcd_dev *smcd);
void smcd_handle_event(struct smcd_dev *dev, struct smcd_event *event);
void smcd_handle_irq(struct smcd_dev *dev, unsigned int bit);
#endif /* _SMC_H */
......@@ -35,6 +35,7 @@ enum {
SMC_DIAG_CONNINFO,
SMC_DIAG_LGRINFO,
SMC_DIAG_SHUTDOWN,
SMC_DIAG_DMBINFO,
__SMC_DIAG_MAX,
};
......@@ -83,4 +84,13 @@ struct smc_diag_lgrinfo {
struct smc_diag_linkinfo lnk[1];
__u8 role;
};
struct smcd_diag_dmbinfo { /* SMC-D Socket internals */
__u32 linkid; /* Link identifier */
__u64 peer_gid; /* Peer GID */
__u64 my_gid; /* My GID */
__u64 token; /* Token of DMB */
__u64 peer_token; /* Token of remote DMBE */
};
#endif /* _UAPI_SMC_DIAG_H_ */
obj-$(CONFIG_SMC) += smc.o
obj-$(CONFIG_SMC_DIAG) += smc_diag.o
smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o
smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o
smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o
......@@ -23,6 +23,7 @@
#include <linux/workqueue.h>
#include <linux/in.h>
#include <linux/sched/signal.h>
#include <linux/if_vlan.h>
#include <net/sock.h>
#include <net/tcp.h>
......@@ -35,6 +36,7 @@
#include "smc_cdc.h"
#include "smc_core.h"
#include "smc_ib.h"
#include "smc_ism.h"
#include "smc_pnet.h"
#include "smc_tx.h"
#include "smc_rx.h"
......@@ -372,8 +374,8 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc)
return 0;
}
static void smc_conn_save_peer_info(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm *clc)
static void smcr_conn_save_peer_info(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm *clc)
{
int bufsize = smc_uncompress_bufsize(clc->rmbe_size);
......@@ -384,6 +386,28 @@ static void smc_conn_save_peer_info(struct smc_sock *smc,
smc->conn.tx_off = bufsize * (smc->conn.peer_rmbe_idx - 1);
}
static void smcd_conn_save_peer_info(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm *clc)
{
int bufsize = smc_uncompress_bufsize(clc->dmbe_size);
smc->conn.peer_rmbe_idx = clc->dmbe_idx;
smc->conn.peer_token = clc->token;
/* msg header takes up space in the buffer */
smc->conn.peer_rmbe_size = bufsize - sizeof(struct smcd_cdc_msg);
atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size);
smc->conn.tx_off = bufsize * smc->conn.peer_rmbe_idx;
}
static void smc_conn_save_peer_info(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm *clc)
{
if (smc->conn.lgr->is_smcd)
smcd_conn_save_peer_info(smc, clc);
else
smcr_conn_save_peer_info(smc, clc);
}
static void smc_link_save_peer_info(struct smc_link *link,
struct smc_clc_msg_accept_confirm *clc)
{
......@@ -450,15 +474,51 @@ static int smc_check_rdma(struct smc_sock *smc, struct smc_ib_device **ibdev,
return reason_code;
}
/* check if there is an ISM device available for this connection. */
/* called for connect and listen */
static int smc_check_ism(struct smc_sock *smc, struct smcd_dev **ismdev)
{
/* Find ISM device with same PNETID as connecting interface */
smc_pnet_find_ism_resource(smc->clcsock->sk, ismdev);
if (!(*ismdev))
return SMC_CLC_DECL_CNFERR; /* configuration error */
return 0;
}
/* Check for VLAN ID and register it on ISM device just for CLC handshake */
static int smc_connect_ism_vlan_setup(struct smc_sock *smc,
struct smcd_dev *ismdev,
unsigned short vlan_id)
{
if (vlan_id && smc_ism_get_vlan(ismdev, vlan_id))
return SMC_CLC_DECL_CNFERR;
return 0;
}
/* cleanup temporary VLAN ID registration used for CLC handshake. If ISM is
* used, the VLAN ID will be registered again during the connection setup.
*/
static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc, bool is_smcd,
struct smcd_dev *ismdev,
unsigned short vlan_id)
{
if (!is_smcd)
return 0;
if (vlan_id && smc_ism_put_vlan(ismdev, vlan_id))
return SMC_CLC_DECL_CNFERR;
return 0;
}
/* CLC handshake during connect */
static int smc_connect_clc(struct smc_sock *smc,
static int smc_connect_clc(struct smc_sock *smc, int smc_type,
struct smc_clc_msg_accept_confirm *aclc,
struct smc_ib_device *ibdev, u8 ibport)
struct smc_ib_device *ibdev, u8 ibport,
struct smcd_dev *ismdev)
{
int rc = 0;
/* do inband token exchange */
rc = smc_clc_send_proposal(smc, ibdev, ibport);
rc = smc_clc_send_proposal(smc, smc_type, ibdev, ibport, ismdev);
if (rc)
return rc;
/* receive SMC Accept CLC message */
......@@ -475,8 +535,8 @@ static int smc_connect_rdma(struct smc_sock *smc,
int reason_code = 0;
mutex_lock(&smc_create_lgr_pending);
local_contact = smc_conn_create(smc, ibdev, ibport, &aclc->lcl,
aclc->hdr.flag);
local_contact = smc_conn_create(smc, false, aclc->hdr.flag, ibdev,
ibport, &aclc->lcl, NULL, 0);
if (local_contact < 0) {
if (local_contact == -ENOMEM)
reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/
......@@ -491,7 +551,7 @@ static int smc_connect_rdma(struct smc_sock *smc,
smc_conn_save_peer_info(smc, aclc);
/* create send buffer and rmb */
if (smc_buf_create(smc))
if (smc_buf_create(smc, false))
return smc_connect_abort(smc, SMC_CLC_DECL_MEM, local_contact);
if (local_contact == SMC_FIRST_CONTACT)
......@@ -538,11 +598,50 @@ static int smc_connect_rdma(struct smc_sock *smc,
return 0;
}
/* setup for ISM connection of client */
static int smc_connect_ism(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm *aclc,
struct smcd_dev *ismdev)
{
int local_contact = SMC_FIRST_CONTACT;
int rc = 0;
mutex_lock(&smc_create_lgr_pending);
local_contact = smc_conn_create(smc, true, aclc->hdr.flag, NULL, 0,
NULL, ismdev, aclc->gid);
if (local_contact < 0)
return smc_connect_abort(smc, SMC_CLC_DECL_MEM, 0);
/* Create send and receive buffers */
if (smc_buf_create(smc, true))
return smc_connect_abort(smc, SMC_CLC_DECL_MEM, local_contact);
smc_conn_save_peer_info(smc, aclc);
smc_close_init(smc);
smc_rx_init(smc);
smc_tx_init(smc);
rc = smc_clc_send_confirm(smc);
if (rc)
return smc_connect_abort(smc, rc, local_contact);
mutex_unlock(&smc_create_lgr_pending);
smc_copy_sock_settings_to_clc(smc);
if (smc->sk.sk_state == SMC_INIT)
smc->sk.sk_state = SMC_ACTIVE;
return 0;
}
/* perform steps before actually connecting */
static int __smc_connect(struct smc_sock *smc)
{
bool ism_supported = false, rdma_supported = false;
struct smc_clc_msg_accept_confirm aclc;
struct smc_ib_device *ibdev;
struct smcd_dev *ismdev;
unsigned short vlan;
int smc_type;
int rc = 0;
u8 ibport;
......@@ -559,20 +658,52 @@ static int __smc_connect(struct smc_sock *smc)
if (using_ipsec(smc))
return smc_connect_decline_fallback(smc, SMC_CLC_DECL_IPSEC);
/* check if a RDMA device is available; if not, fall back */
if (smc_check_rdma(smc, &ibdev, &ibport))
/* check for VLAN ID */
if (smc_vlan_by_tcpsk(smc->clcsock, &vlan))
return smc_connect_decline_fallback(smc, SMC_CLC_DECL_CNFERR);
/* check if there is an ism device available */
if (!smc_check_ism(smc, &ismdev) &&
!smc_connect_ism_vlan_setup(smc, ismdev, vlan)) {
/* ISM is supported for this connection */
ism_supported = true;
smc_type = SMC_TYPE_D;
}
/* check if there is a rdma device available */
if (!smc_check_rdma(smc, &ibdev, &ibport)) {
/* RDMA is supported for this connection */
rdma_supported = true;
if (ism_supported)
smc_type = SMC_TYPE_B; /* both */
else
smc_type = SMC_TYPE_R; /* only RDMA */
}
/* if neither ISM nor RDMA are supported, fallback */
if (!rdma_supported && !ism_supported)
return smc_connect_decline_fallback(smc, SMC_CLC_DECL_CNFERR);
/* perform CLC handshake */
rc = smc_connect_clc(smc, &aclc, ibdev, ibport);
if (rc)
rc = smc_connect_clc(smc, smc_type, &aclc, ibdev, ibport, ismdev);
if (rc) {
smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan);
return smc_connect_decline_fallback(smc, rc);
}
/* connect using rdma */
rc = smc_connect_rdma(smc, &aclc, ibdev, ibport);
if (rc)
/* depending on previous steps, connect using rdma or ism */
if (rdma_supported && aclc.hdr.path == SMC_TYPE_R)
rc = smc_connect_rdma(smc, &aclc, ibdev, ibport);
else if (ism_supported && aclc.hdr.path == SMC_TYPE_D)
rc = smc_connect_ism(smc, &aclc, ismdev);
else
rc = SMC_CLC_DECL_CNFERR;
if (rc) {
smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan);
return smc_connect_decline_fallback(smc, rc);
}
smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan);
return 0;
}
......@@ -894,7 +1025,8 @@ static int smc_listen_rdma_init(struct smc_sock *new_smc,
int *local_contact)
{
/* allocate connection / link group */
*local_contact = smc_conn_create(new_smc, ibdev, ibport, &pclc->lcl, 0);
*local_contact = smc_conn_create(new_smc, false, 0, ibdev, ibport,
&pclc->lcl, NULL, 0);
if (*local_contact < 0) {
if (*local_contact == -ENOMEM)
return SMC_CLC_DECL_MEM;/* insufficient memory*/
......@@ -902,12 +1034,50 @@ static int smc_listen_rdma_init(struct smc_sock *new_smc,
}
/* create send buffer and rmb */
if (smc_buf_create(new_smc))
if (smc_buf_create(new_smc, false))
return SMC_CLC_DECL_MEM;
return 0;
}
/* listen worker: initialize connection and buffers for SMC-D */
static int smc_listen_ism_init(struct smc_sock *new_smc,
struct smc_clc_msg_proposal *pclc,
struct smcd_dev *ismdev,
int *local_contact)
{
struct smc_clc_msg_smcd *pclc_smcd;
pclc_smcd = smc_get_clc_msg_smcd(pclc);
*local_contact = smc_conn_create(new_smc, true, 0, NULL, 0, NULL,
ismdev, pclc_smcd->gid);
if (*local_contact < 0) {
if (*local_contact == -ENOMEM)
return SMC_CLC_DECL_MEM;/* insufficient memory*/
return SMC_CLC_DECL_INTERR; /* other error */
}
/* Check if peer can be reached via ISM device */
if (smc_ism_cantalk(new_smc->conn.lgr->peer_gid,
new_smc->conn.lgr->vlan_id,
new_smc->conn.lgr->smcd)) {
if (*local_contact == SMC_FIRST_CONTACT)
smc_lgr_forget(new_smc->conn.lgr);
smc_conn_free(&new_smc->conn);
return SMC_CLC_DECL_CNFERR;
}
/* Create send and receive buffers */
if (smc_buf_create(new_smc, true)) {
if (*local_contact == SMC_FIRST_CONTACT)
smc_lgr_forget(new_smc->conn.lgr);
smc_conn_free(&new_smc->conn);
return SMC_CLC_DECL_MEM;
}
return 0;
}
/* listen worker: register buffers */
static int smc_listen_rdma_reg(struct smc_sock *new_smc, int local_contact)
{
......@@ -966,6 +1136,8 @@ static void smc_listen_work(struct work_struct *work)
struct smc_clc_msg_accept_confirm cclc;
struct smc_clc_msg_proposal *pclc;
struct smc_ib_device *ibdev;
bool ism_supported = false;
struct smcd_dev *ismdev;
u8 buf[SMC_CLC_MAX_LEN];
int local_contact = 0;
int reason_code = 0;
......@@ -1006,12 +1178,21 @@ static void smc_listen_work(struct work_struct *work)
smc_rx_init(new_smc);
smc_tx_init(new_smc);
/* check if ISM is available */
if ((pclc->hdr.path == SMC_TYPE_D || pclc->hdr.path == SMC_TYPE_B) &&
!smc_check_ism(new_smc, &ismdev) &&
!smc_listen_ism_init(new_smc, pclc, ismdev, &local_contact)) {
ism_supported = true;
}
/* check if RDMA is available */
if (smc_check_rdma(new_smc, &ibdev, &ibport) ||
smc_listen_rdma_check(new_smc, pclc) ||
smc_listen_rdma_init(new_smc, pclc, ibdev, ibport,
&local_contact) ||
smc_listen_rdma_reg(new_smc, local_contact)) {
if (!ism_supported &&
((pclc->hdr.path != SMC_TYPE_R && pclc->hdr.path != SMC_TYPE_B) ||
smc_check_rdma(new_smc, &ibdev, &ibport) ||
smc_listen_rdma_check(new_smc, pclc) ||
smc_listen_rdma_init(new_smc, pclc, ibdev, ibport,
&local_contact) ||
smc_listen_rdma_reg(new_smc, local_contact))) {
/* SMC not supported, decline */
mutex_unlock(&smc_create_lgr_pending);
smc_listen_decline(new_smc, SMC_CLC_DECL_CNFERR, local_contact);
......@@ -1036,7 +1217,8 @@ static void smc_listen_work(struct work_struct *work)
}
/* finish worker */
smc_listen_rdma_finish(new_smc, &cclc, local_contact);
if (!ism_supported)
smc_listen_rdma_finish(new_smc, &cclc, local_contact);
smc_conn_save_peer_info(new_smc, &cclc);
mutex_unlock(&smc_create_lgr_pending);
smc_listen_out_connected(new_smc);
......
......@@ -21,8 +21,6 @@
#define SMCPROTO_SMC 0 /* SMC protocol, IPv4 */
#define SMCPROTO_SMC6 1 /* SMC protocol, IPv6 */
#define SMC_MAX_PORTS 2 /* Max # of ports */
extern struct proto smc_proto;
extern struct proto smc_proto6;
......@@ -185,6 +183,11 @@ struct smc_connection {
spinlock_t acurs_lock; /* protect cursors */
#endif
struct work_struct close_work; /* peer sent some closing */
struct tasklet_struct rx_tsklet; /* Receiver tasklet for SMC-D */
u8 rx_off; /* receive offset:
* 0 for SMC-R, 32 for SMC-D
*/
u64 peer_token; /* SMC-D token of peer */
};
struct smc_sock { /* smc sock container */
......
......@@ -117,7 +117,7 @@ int smc_cdc_msg_send(struct smc_connection *conn,
return rc;
}
int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn)
static int smcr_cdc_get_slot_and_msg_send(struct smc_connection *conn)
{
struct smc_cdc_tx_pend *pend;
struct smc_wr_buf *wr_buf;
......@@ -130,6 +130,21 @@ int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn)
return smc_cdc_msg_send(conn, wr_buf, pend);
}
int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn)
{
int rc;
if (conn->lgr->is_smcd) {
spin_lock_bh(&conn->send_lock);
rc = smcd_cdc_msg_send(conn);
spin_unlock_bh(&conn->send_lock);
} else {
rc = smcr_cdc_get_slot_and_msg_send(conn);
}
return rc;
}
static bool smc_cdc_tx_filter(struct smc_wr_tx_pend_priv *tx_pend,
unsigned long data)
{
......@@ -157,6 +172,45 @@ void smc_cdc_tx_dismiss_slots(struct smc_connection *conn)
(unsigned long)conn);
}
/* Send a SMC-D CDC header.
* This increments the free space available in our send buffer.
* Also update the confirmed receive buffer with what was sent to the peer.
*/
int smcd_cdc_msg_send(struct smc_connection *conn)
{
struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
struct smcd_cdc_msg cdc;
int rc, diff;
memset(&cdc, 0, sizeof(cdc));
cdc.common.type = SMC_CDC_MSG_TYPE;
cdc.prod_wrap = conn->local_tx_ctrl.prod.wrap;
cdc.prod_count = conn->local_tx_ctrl.prod.count;
cdc.cons_wrap = conn->local_tx_ctrl.cons.wrap;
cdc.cons_count = conn->local_tx_ctrl.cons.count;
cdc.prod_flags = conn->local_tx_ctrl.prod_flags;
cdc.conn_state_flags = conn->local_tx_ctrl.conn_state_flags;
rc = smcd_tx_ism_write(conn, &cdc, sizeof(cdc), 0, 1);
if (rc)
return rc;
smc_curs_write(&conn->rx_curs_confirmed,
smc_curs_read(&conn->local_tx_ctrl.cons, conn), conn);
/* Calculate transmitted data and increment free send buffer space */
diff = smc_curs_diff(conn->sndbuf_desc->len, &conn->tx_curs_fin,
&conn->tx_curs_sent);
/* increased by confirmed number of bytes */
smp_mb__before_atomic();
atomic_add(diff, &conn->sndbuf_space);
/* guarantee 0 <= sndbuf_space <= sndbuf_desc->len */
smp_mb__after_atomic();
smc_curs_write(&conn->tx_curs_fin,
smc_curs_read(&conn->tx_curs_sent, conn), conn);
smc_tx_sndbuf_nonfull(smc);
return rc;
}
/********************************* receive ***********************************/
static inline bool smc_cdc_before(u16 seq1, u16 seq2)
......@@ -178,7 +232,7 @@ static void smc_cdc_handle_urg_data_arrival(struct smc_sock *smc,
if (!sock_flag(&smc->sk, SOCK_URGINLINE))
/* we'll skip the urgent byte, so don't account for it */
(*diff_prod)--;
base = (char *)conn->rmb_desc->cpu_addr;
base = (char *)conn->rmb_desc->cpu_addr + conn->rx_off;
if (conn->urg_curs.count)
conn->urg_rx_byte = *(base + conn->urg_curs.count - 1);
else
......@@ -276,6 +330,34 @@ static void smc_cdc_msg_recv(struct smc_sock *smc, struct smc_cdc_msg *cdc)
sock_put(&smc->sk); /* no free sk in softirq-context */
}
/* Schedule a tasklet for this connection. Triggered from the ISM device IRQ
* handler to indicate update in the DMBE.
*
* Context:
* - tasklet context
*/
static void smcd_cdc_rx_tsklet(unsigned long data)
{
struct smc_connection *conn = (struct smc_connection *)data;
struct smcd_cdc_msg cdc;
struct smc_sock *smc;
if (!conn)
return;
memcpy(&cdc, conn->rmb_desc->cpu_addr, sizeof(cdc));
smc = container_of(conn, struct smc_sock, conn);
smc_cdc_msg_recv(smc, (struct smc_cdc_msg *)&cdc);
}
/* Initialize receive tasklet. Called from ISM device IRQ handler to start
* receiver side.
*/
void smcd_cdc_rx_init(struct smc_connection *conn)
{
tasklet_init(&conn->rx_tsklet, smcd_cdc_rx_tsklet, (unsigned long)conn);
}
/***************************** init, exit, misc ******************************/
static void smc_cdc_rx_handler(struct ib_wc *wc, void *buf)
......
......@@ -50,6 +50,20 @@ struct smc_cdc_msg {
u8 reserved[18];
} __packed; /* format defined in RFC7609 */
/* CDC message for SMC-D */
struct smcd_cdc_msg {
struct smc_wr_rx_hdr common; /* Type = 0xFE */
u8 res1[7];
u16 prod_wrap;
u32 prod_count;
u8 res2[2];
u16 cons_wrap;
u32 cons_count;
struct smc_cdc_producer_flags prod_flags;
struct smc_cdc_conn_state_flags conn_state_flags;
u8 res3[8];
} __packed;
static inline bool smc_cdc_rxed_any_close(struct smc_connection *conn)
{
return conn->local_rx_ctrl.conn_state_flags.peer_conn_abort ||
......@@ -204,9 +218,9 @@ static inline void smc_cdc_cursor_to_host(union smc_host_cursor *local,
smc_curs_write(local, smc_curs_read(&temp, conn), conn);
}
static inline void smc_cdc_msg_to_host(struct smc_host_cdc_msg *local,
struct smc_cdc_msg *peer,
struct smc_connection *conn)
static inline void smcr_cdc_msg_to_host(struct smc_host_cdc_msg *local,
struct smc_cdc_msg *peer,
struct smc_connection *conn)
{
local->common.type = peer->common.type;
local->len = peer->len;
......@@ -218,6 +232,27 @@ static inline void smc_cdc_msg_to_host(struct smc_host_cdc_msg *local,
local->conn_state_flags = peer->conn_state_flags;
}
static inline void smcd_cdc_msg_to_host(struct smc_host_cdc_msg *local,
struct smcd_cdc_msg *peer)
{
local->prod.wrap = peer->prod_wrap;
local->prod.count = peer->prod_count;
local->cons.wrap = peer->cons_wrap;
local->cons.count = peer->cons_count;
local->prod_flags = peer->prod_flags;
local->conn_state_flags = peer->conn_state_flags;
}
static inline void smc_cdc_msg_to_host(struct smc_host_cdc_msg *local,
struct smc_cdc_msg *peer,
struct smc_connection *conn)
{
if (conn->lgr->is_smcd)
smcd_cdc_msg_to_host(local, (struct smcd_cdc_msg *)peer);
else
smcr_cdc_msg_to_host(local, peer, conn);
}
struct smc_cdc_tx_pend;
int smc_cdc_get_free_slot(struct smc_connection *conn,
......@@ -227,6 +262,8 @@ void smc_cdc_tx_dismiss_slots(struct smc_connection *conn);
int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf,
struct smc_cdc_tx_pend *pend);
int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn);
int smcd_cdc_msg_send(struct smc_connection *conn);
int smc_cdc_init(void) __init;
void smcd_cdc_rx_init(struct smc_connection *conn);
#endif /* SMC_CDC_H */
......@@ -23,9 +23,15 @@
#include "smc_core.h"
#include "smc_clc.h"
#include "smc_ib.h"
#include "smc_ism.h"
#define SMCR_CLC_ACCEPT_CONFIRM_LEN 68
#define SMCD_CLC_ACCEPT_CONFIRM_LEN 48
/* eye catcher "SMCR" EBCDIC for CLC messages */
static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'};
/* eye catcher "SMCD" EBCDIC for CLC messages */
static const char SMCD_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xc4'};
/* check if received message has a correct header length and contains valid
* heading and trailing eyecatchers
......@@ -38,10 +44,14 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm)
struct smc_clc_msg_decline *dclc;
struct smc_clc_msg_trail *trl;
if (memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)))
if (memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) &&
memcmp(clcm->eyecatcher, SMCD_EYECATCHER, sizeof(SMCD_EYECATCHER)))
return false;
switch (clcm->type) {
case SMC_CLC_PROPOSAL:
if (clcm->path != SMC_TYPE_R && clcm->path != SMC_TYPE_D &&
clcm->path != SMC_TYPE_B)
return false;
pclc = (struct smc_clc_msg_proposal *)clcm;
pclc_prfx = smc_clc_proposal_get_prefix(pclc);
if (ntohs(pclc->hdr.length) !=
......@@ -56,10 +66,16 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm)
break;
case SMC_CLC_ACCEPT:
case SMC_CLC_CONFIRM:
if (clcm->path != SMC_TYPE_R && clcm->path != SMC_TYPE_D)
return false;
clc = (struct smc_clc_msg_accept_confirm *)clcm;
if (ntohs(clc->hdr.length) != sizeof(*clc))
if ((clcm->path == SMC_TYPE_R &&
ntohs(clc->hdr.length) != SMCR_CLC_ACCEPT_CONFIRM_LEN) ||
(clcm->path == SMC_TYPE_D &&
ntohs(clc->hdr.length) != SMCD_CLC_ACCEPT_CONFIRM_LEN))
return false;
trl = &clc->trl;
trl = (struct smc_clc_msg_trail *)
((u8 *)clc + ntohs(clc->hdr.length) - sizeof(*trl));
break;
case SMC_CLC_DECLINE:
dclc = (struct smc_clc_msg_decline *)clcm;
......@@ -70,7 +86,8 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm)
default:
return false;
}
if (memcmp(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)))
if (memcmp(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) &&
memcmp(trl->eyecatcher, SMCD_EYECATCHER, sizeof(SMCD_EYECATCHER)))
return false;
return true;
}
......@@ -295,6 +312,9 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
datlen = ntohs(clcm->length);
if ((len < sizeof(struct smc_clc_msg_hdr)) ||
(datlen > buflen) ||
(clcm->version != SMC_CLC_V1) ||
(clcm->path != SMC_TYPE_R && clcm->path != SMC_TYPE_D &&
clcm->path != SMC_TYPE_B) ||
((clcm->type != SMC_CLC_DECLINE) &&
(clcm->type != expected_type))) {
smc->sk.sk_err = EPROTO;
......@@ -356,17 +376,18 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info)
}
/* send CLC PROPOSAL message across internal TCP socket */
int smc_clc_send_proposal(struct smc_sock *smc,
struct smc_ib_device *smcibdev,
u8 ibport)
int smc_clc_send_proposal(struct smc_sock *smc, int smc_type,
struct smc_ib_device *ibdev, u8 ibport,
struct smcd_dev *ismdev)
{
struct smc_clc_ipv6_prefix ipv6_prfx[SMC_CLC_MAX_V6_PREFIX];
struct smc_clc_msg_proposal_prefix pclc_prfx;
struct smc_clc_msg_smcd pclc_smcd;
struct smc_clc_msg_proposal pclc;
struct smc_clc_msg_trail trl;
int len, i, plen, rc;
int reason_code = 0;
struct kvec vec[4];
struct kvec vec[5];
struct msghdr msg;
/* retrieve ip prefixes for CLC proposal msg */
......@@ -381,18 +402,34 @@ int smc_clc_send_proposal(struct smc_sock *smc,
memset(&pclc, 0, sizeof(pclc));
memcpy(pclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
pclc.hdr.type = SMC_CLC_PROPOSAL;
pclc.hdr.length = htons(plen);
pclc.hdr.version = SMC_CLC_V1; /* SMC version */
memcpy(pclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid));
memcpy(&pclc.lcl.gid, &smcibdev->gid[ibport - 1], SMC_GID_SIZE);
memcpy(&pclc.lcl.mac, &smcibdev->mac[ibport - 1], ETH_ALEN);
pclc.iparea_offset = htons(0);
pclc.hdr.path = smc_type;
if (smc_type == SMC_TYPE_R || smc_type == SMC_TYPE_B) {
/* add SMC-R specifics */
memcpy(pclc.lcl.id_for_peer, local_systemid,
sizeof(local_systemid));
memcpy(&pclc.lcl.gid, &ibdev->gid[ibport - 1], SMC_GID_SIZE);
memcpy(&pclc.lcl.mac, &ibdev->mac[ibport - 1], ETH_ALEN);
pclc.iparea_offset = htons(0);
}
if (smc_type == SMC_TYPE_D || smc_type == SMC_TYPE_B) {
/* add SMC-D specifics */
memset(&pclc_smcd, 0, sizeof(pclc_smcd));
plen += sizeof(pclc_smcd);
pclc.iparea_offset = htons(SMC_CLC_PROPOSAL_MAX_OFFSET);
pclc_smcd.gid = ismdev->local_gid;
}
pclc.hdr.length = htons(plen);
memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
memset(&msg, 0, sizeof(msg));
i = 0;
vec[i].iov_base = &pclc;
vec[i++].iov_len = sizeof(pclc);
if (smc_type == SMC_TYPE_D || smc_type == SMC_TYPE_B) {
vec[i].iov_base = &pclc_smcd;
vec[i++].iov_len = sizeof(pclc_smcd);
}
vec[i].iov_base = &pclc_prfx;
vec[i++].iov_len = sizeof(pclc_prfx);
if (pclc_prfx.ipv6_prefixes_cnt > 0) {
......@@ -428,35 +465,56 @@ int smc_clc_send_confirm(struct smc_sock *smc)
struct kvec vec;
int len;
link = &conn->lgr->lnk[SMC_SINGLE_LINK];
/* send SMC Confirm CLC msg */
memset(&cclc, 0, sizeof(cclc));
memcpy(cclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
cclc.hdr.type = SMC_CLC_CONFIRM;
cclc.hdr.length = htons(sizeof(cclc));
cclc.hdr.version = SMC_CLC_V1; /* SMC version */
memcpy(cclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid));
memcpy(&cclc.lcl.gid, &link->smcibdev->gid[link->ibport - 1],
SMC_GID_SIZE);
memcpy(&cclc.lcl.mac, &link->smcibdev->mac[link->ibport - 1], ETH_ALEN);
hton24(cclc.qpn, link->roce_qp->qp_num);
cclc.rmb_rkey =
htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
cclc.rmbe_idx = 1; /* for now: 1 RMB = 1 RMBE */
cclc.rmbe_alert_token = htonl(conn->alert_token_local);
cclc.qp_mtu = min(link->path_mtu, link->peer_mtu);
cclc.rmbe_size = conn->rmbe_size_short;
cclc.rmb_dma_addr = cpu_to_be64(
(u64)sg_dma_address(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl));
hton24(cclc.psn, link->psn_initial);
memcpy(cclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
if (smc->conn.lgr->is_smcd) {
/* SMC-D specific settings */
memcpy(cclc.hdr.eyecatcher, SMCD_EYECATCHER,
sizeof(SMCD_EYECATCHER));
cclc.hdr.path = SMC_TYPE_D;
cclc.hdr.length = htons(SMCD_CLC_ACCEPT_CONFIRM_LEN);
cclc.gid = conn->lgr->smcd->local_gid;
cclc.token = conn->rmb_desc->token;
cclc.dmbe_size = conn->rmbe_size_short;
cclc.dmbe_idx = 0;
memcpy(&cclc.linkid, conn->lgr->id, SMC_LGR_ID_SIZE);
memcpy(cclc.smcd_trl.eyecatcher, SMCD_EYECATCHER,
sizeof(SMCD_EYECATCHER));
} else {
/* SMC-R specific settings */
link = &conn->lgr->lnk[SMC_SINGLE_LINK];
memcpy(cclc.hdr.eyecatcher, SMC_EYECATCHER,
sizeof(SMC_EYECATCHER));
cclc.hdr.path = SMC_TYPE_R;
cclc.hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN);
memcpy(cclc.lcl.id_for_peer, local_systemid,
sizeof(local_systemid));
memcpy(&cclc.lcl.gid, &link->smcibdev->gid[link->ibport - 1],
SMC_GID_SIZE);
memcpy(&cclc.lcl.mac, &link->smcibdev->mac[link->ibport - 1],
ETH_ALEN);
hton24(cclc.qpn, link->roce_qp->qp_num);
cclc.rmb_rkey =
htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
cclc.rmbe_idx = 1; /* for now: 1 RMB = 1 RMBE */
cclc.rmbe_alert_token = htonl(conn->alert_token_local);
cclc.qp_mtu = min(link->path_mtu, link->peer_mtu);
cclc.rmbe_size = conn->rmbe_size_short;
cclc.rmb_dma_addr = cpu_to_be64((u64)sg_dma_address
(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl));
hton24(cclc.psn, link->psn_initial);
memcpy(cclc.smcr_trl.eyecatcher, SMC_EYECATCHER,
sizeof(SMC_EYECATCHER));
}
memset(&msg, 0, sizeof(msg));
vec.iov_base = &cclc;
vec.iov_len = sizeof(cclc);
len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, sizeof(cclc));
if (len < sizeof(cclc)) {
vec.iov_len = ntohs(cclc.hdr.length);
len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1,
ntohs(cclc.hdr.length));
if (len < ntohs(cclc.hdr.length)) {
if (len >= 0) {
reason_code = -ENETUNREACH;
smc->sk.sk_err = -reason_code;
......@@ -479,35 +537,58 @@ int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact)
int rc = 0;
int len;
link = &conn->lgr->lnk[SMC_SINGLE_LINK];
memset(&aclc, 0, sizeof(aclc));
memcpy(aclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
aclc.hdr.type = SMC_CLC_ACCEPT;
aclc.hdr.length = htons(sizeof(aclc));
aclc.hdr.version = SMC_CLC_V1; /* SMC version */
if (srv_first_contact)
aclc.hdr.flag = 1;
memcpy(aclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid));
memcpy(&aclc.lcl.gid, &link->smcibdev->gid[link->ibport - 1],
SMC_GID_SIZE);
memcpy(&aclc.lcl.mac, link->smcibdev->mac[link->ibport - 1], ETH_ALEN);
hton24(aclc.qpn, link->roce_qp->qp_num);
aclc.rmb_rkey =
htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
aclc.rmbe_idx = 1; /* as long as 1 RMB = 1 RMBE */
aclc.rmbe_alert_token = htonl(conn->alert_token_local);
aclc.qp_mtu = link->path_mtu;
aclc.rmbe_size = conn->rmbe_size_short,
aclc.rmb_dma_addr = cpu_to_be64(
(u64)sg_dma_address(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl));
hton24(aclc.psn, link->psn_initial);
memcpy(aclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
if (new_smc->conn.lgr->is_smcd) {
/* SMC-D specific settings */
aclc.hdr.length = htons(SMCD_CLC_ACCEPT_CONFIRM_LEN);
memcpy(aclc.hdr.eyecatcher, SMCD_EYECATCHER,
sizeof(SMCD_EYECATCHER));
aclc.hdr.path = SMC_TYPE_D;
aclc.gid = conn->lgr->smcd->local_gid;
aclc.token = conn->rmb_desc->token;
aclc.dmbe_size = conn->rmbe_size_short;
aclc.dmbe_idx = 0;
memcpy(&aclc.linkid, conn->lgr->id, SMC_LGR_ID_SIZE);
memcpy(aclc.smcd_trl.eyecatcher, SMCD_EYECATCHER,
sizeof(SMCD_EYECATCHER));
} else {
/* SMC-R specific settings */
aclc.hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN);
memcpy(aclc.hdr.eyecatcher, SMC_EYECATCHER,
sizeof(SMC_EYECATCHER));
aclc.hdr.path = SMC_TYPE_R;
link = &conn->lgr->lnk[SMC_SINGLE_LINK];
memcpy(aclc.lcl.id_for_peer, local_systemid,
sizeof(local_systemid));
memcpy(&aclc.lcl.gid, &link->smcibdev->gid[link->ibport - 1],
SMC_GID_SIZE);
memcpy(&aclc.lcl.mac, link->smcibdev->mac[link->ibport - 1],
ETH_ALEN);
hton24(aclc.qpn, link->roce_qp->qp_num);
aclc.rmb_rkey =
htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
aclc.rmbe_idx = 1; /* as long as 1 RMB = 1 RMBE */
aclc.rmbe_alert_token = htonl(conn->alert_token_local);
aclc.qp_mtu = link->path_mtu;
aclc.rmbe_size = conn->rmbe_size_short,
aclc.rmb_dma_addr = cpu_to_be64((u64)sg_dma_address
(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl));
hton24(aclc.psn, link->psn_initial);
memcpy(aclc.smcr_trl.eyecatcher, SMC_EYECATCHER,
sizeof(SMC_EYECATCHER));
}
memset(&msg, 0, sizeof(msg));
vec.iov_base = &aclc;
vec.iov_len = sizeof(aclc);
len = kernel_sendmsg(new_smc->clcsock, &msg, &vec, 1, sizeof(aclc));
if (len < sizeof(aclc)) {
vec.iov_len = ntohs(aclc.hdr.length);
len = kernel_sendmsg(new_smc->clcsock, &msg, &vec, 1,
ntohs(aclc.hdr.length));
if (len < ntohs(aclc.hdr.length)) {
if (len >= 0)
new_smc->sk.sk_err = EPROTO;
else
......
......@@ -23,6 +23,9 @@
#define SMC_CLC_DECLINE 0x04
#define SMC_CLC_V1 0x1 /* SMC version */
#define SMC_TYPE_R 0 /* SMC-R only */
#define SMC_TYPE_D 1 /* SMC-D only */
#define SMC_TYPE_B 3 /* SMC-R and SMC-D */
#define CLC_WAIT_TIME (6 * HZ) /* max. wait time on clcsock */
#define SMC_CLC_DECL_MEM 0x01010000 /* insufficient memory resources */
#define SMC_CLC_DECL_TIMEOUT 0x02000000 /* timeout */
......@@ -42,9 +45,11 @@ struct smc_clc_msg_hdr { /* header1 of clc messages */
#if defined(__BIG_ENDIAN_BITFIELD)
u8 version : 4,
flag : 1,
rsvd : 3;
rsvd : 1,
path : 2;
#elif defined(__LITTLE_ENDIAN_BITFIELD)
u8 rsvd : 3,
u8 path : 2,
rsvd : 1,
flag : 1,
version : 4;
#endif
......@@ -77,6 +82,11 @@ struct smc_clc_msg_proposal_prefix { /* prefix part of clc proposal message*/
u8 ipv6_prefixes_cnt; /* number of IPv6 prefixes in prefix array */
} __aligned(4);
struct smc_clc_msg_smcd { /* SMC-D GID information */
u64 gid; /* ISM GID of requestor */
u8 res[32];
};
struct smc_clc_msg_proposal { /* clc proposal message sent by Linux */
struct smc_clc_msg_hdr hdr;
struct smc_clc_msg_local lcl;
......@@ -94,23 +104,45 @@ struct smc_clc_msg_proposal { /* clc proposal message sent by Linux */
struct smc_clc_msg_accept_confirm { /* clc accept / confirm message */
struct smc_clc_msg_hdr hdr;
struct smc_clc_msg_local lcl;
u8 qpn[3]; /* QP number */
__be32 rmb_rkey; /* RMB rkey */
u8 rmbe_idx; /* Index of RMBE in RMB */
__be32 rmbe_alert_token;/* unique connection id */
union {
struct { /* SMC-R */
struct smc_clc_msg_local lcl;
u8 qpn[3]; /* QP number */
__be32 rmb_rkey; /* RMB rkey */
u8 rmbe_idx; /* Index of RMBE in RMB */
__be32 rmbe_alert_token;/* unique connection id */
#if defined(__BIG_ENDIAN_BITFIELD)
u8 rmbe_size : 4, /* RMBE buf size (compressed notation) */
qp_mtu : 4; /* QP mtu */
u8 rmbe_size : 4, /* buf size (compressed) */
qp_mtu : 4; /* QP mtu */
#elif defined(__LITTLE_ENDIAN_BITFIELD)
u8 qp_mtu : 4,
rmbe_size : 4;
u8 qp_mtu : 4,
rmbe_size : 4;
#endif
u8 reserved;
__be64 rmb_dma_addr; /* RMB virtual address */
u8 reserved2;
u8 psn[3]; /* initial packet sequence number */
struct smc_clc_msg_trail trl; /* eye catcher "SMCR" EBCDIC */
u8 reserved;
__be64 rmb_dma_addr; /* RMB virtual address */
u8 reserved2;
u8 psn[3]; /* packet sequence number */
struct smc_clc_msg_trail smcr_trl;
/* eye catcher "SMCR" EBCDIC */
} __packed;
struct { /* SMC-D */
u64 gid; /* Sender GID */
u64 token; /* DMB token */
u8 dmbe_idx; /* DMBE index */
#if defined(__BIG_ENDIAN_BITFIELD)
u8 dmbe_size : 4, /* buf size (compressed) */
reserved3 : 4;
#elif defined(__LITTLE_ENDIAN_BITFIELD)
u8 reserved3 : 4,
dmbe_size : 4;
#endif
u16 reserved4;
u32 linkid; /* Link identifier */
u32 reserved5[3];
struct smc_clc_msg_trail smcd_trl;
/* eye catcher "SMCD" EBCDIC */
} __packed;
};
} __packed; /* format defined in RFC7609 */
struct smc_clc_msg_decline { /* clc decline message */
......@@ -129,13 +161,26 @@ smc_clc_proposal_get_prefix(struct smc_clc_msg_proposal *pclc)
((u8 *)pclc + sizeof(*pclc) + ntohs(pclc->iparea_offset));
}
/* get SMC-D info from proposal message */
static inline struct smc_clc_msg_smcd *
smc_get_clc_msg_smcd(struct smc_clc_msg_proposal *prop)
{
if (ntohs(prop->iparea_offset) != sizeof(struct smc_clc_msg_smcd))
return NULL;
return (struct smc_clc_msg_smcd *)(prop + 1);
}
struct smcd_dev;
int smc_clc_prfx_match(struct socket *clcsock,
struct smc_clc_msg_proposal_prefix *prop);
int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
u8 expected_type);
int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info);
int smc_clc_send_proposal(struct smc_sock *smc, struct smc_ib_device *smcibdev,
u8 ibport);
int smc_clc_send_proposal(struct smc_sock *smc, int smc_type,
struct smc_ib_device *smcibdev, u8 ibport,
struct smcd_dev *ismdev);
int smc_clc_send_confirm(struct smc_sock *smc);
int smc_clc_send_accept(struct smc_sock *smc, int srv_first_contact);
......
......@@ -25,6 +25,7 @@
#include "smc_llc.h"
#include "smc_cdc.h"
#include "smc_close.h"
#include "smc_ism.h"
#define SMC_LGR_NUM_INCR 256
#define SMC_LGR_FREE_DELAY_SERV (600 * HZ)
......@@ -46,8 +47,8 @@ static void smc_lgr_schedule_free_work(struct smc_link_group *lgr)
* otherwise there is a risk of out-of-sync link groups.
*/
mod_delayed_work(system_wq, &lgr->free_work,
lgr->role == SMC_CLNT ? SMC_LGR_FREE_DELAY_CLNT :
SMC_LGR_FREE_DELAY_SERV);
(!lgr->is_smcd && lgr->role == SMC_CLNT) ?
SMC_LGR_FREE_DELAY_CLNT : SMC_LGR_FREE_DELAY_SERV);
}
/* Register connection's alert token in our lookup structure.
......@@ -153,16 +154,18 @@ static void smc_lgr_free_work(struct work_struct *work)
free:
spin_unlock_bh(&smc_lgr_list.lock);
if (!delayed_work_pending(&lgr->free_work)) {
if (lgr->lnk[SMC_SINGLE_LINK].state != SMC_LNK_INACTIVE)
if (!lgr->is_smcd &&
lgr->lnk[SMC_SINGLE_LINK].state != SMC_LNK_INACTIVE)
smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]);
smc_lgr_free(lgr);
}
}
/* create a new SMC link group */
static int smc_lgr_create(struct smc_sock *smc,
static int smc_lgr_create(struct smc_sock *smc, bool is_smcd,
struct smc_ib_device *smcibdev, u8 ibport,
char *peer_systemid, unsigned short vlan_id)
char *peer_systemid, unsigned short vlan_id,
struct smcd_dev *smcismdev, u64 peer_gid)
{
struct smc_link_group *lgr;
struct smc_link *lnk;
......@@ -170,17 +173,23 @@ static int smc_lgr_create(struct smc_sock *smc,
int rc = 0;
int i;
if (is_smcd && vlan_id) {
rc = smc_ism_get_vlan(smcismdev, vlan_id);
if (rc)
goto out;
}
lgr = kzalloc(sizeof(*lgr), GFP_KERNEL);
if (!lgr) {
rc = -ENOMEM;
goto out;
}
lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
lgr->is_smcd = is_smcd;
lgr->sync_err = 0;
memcpy(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN);
lgr->vlan_id = vlan_id;
rwlock_init(&lgr->sndbufs_lock);
rwlock_init(&lgr->rmbs_lock);
rwlock_init(&lgr->conns_lock);
for (i = 0; i < SMC_RMBE_SIZES; i++) {
INIT_LIST_HEAD(&lgr->sndbufs[i]);
INIT_LIST_HEAD(&lgr->rmbs[i]);
......@@ -189,36 +198,44 @@ static int smc_lgr_create(struct smc_sock *smc,
memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE);
INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work);
lgr->conns_all = RB_ROOT;
lnk = &lgr->lnk[SMC_SINGLE_LINK];
/* initialize link */
lnk->state = SMC_LNK_ACTIVATING;
lnk->link_id = SMC_SINGLE_LINK;
lnk->smcibdev = smcibdev;
lnk->ibport = ibport;
lnk->path_mtu = smcibdev->pattr[ibport - 1].active_mtu;
if (!smcibdev->initialized)
smc_ib_setup_per_ibdev(smcibdev);
get_random_bytes(rndvec, sizeof(rndvec));
lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) + (rndvec[2] << 16);
rc = smc_llc_link_init(lnk);
if (rc)
goto free_lgr;
rc = smc_wr_alloc_link_mem(lnk);
if (rc)
goto clear_llc_lnk;
rc = smc_ib_create_protection_domain(lnk);
if (rc)
goto free_link_mem;
rc = smc_ib_create_queue_pair(lnk);
if (rc)
goto dealloc_pd;
rc = smc_wr_create_link(lnk);
if (rc)
goto destroy_qp;
if (is_smcd) {
/* SMC-D specific settings */
lgr->peer_gid = peer_gid;
lgr->smcd = smcismdev;
} else {
/* SMC-R specific settings */
lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
memcpy(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN);
lnk = &lgr->lnk[SMC_SINGLE_LINK];
/* initialize link */
lnk->state = SMC_LNK_ACTIVATING;
lnk->link_id = SMC_SINGLE_LINK;
lnk->smcibdev = smcibdev;
lnk->ibport = ibport;
lnk->path_mtu = smcibdev->pattr[ibport - 1].active_mtu;
if (!smcibdev->initialized)
smc_ib_setup_per_ibdev(smcibdev);
get_random_bytes(rndvec, sizeof(rndvec));
lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) +
(rndvec[2] << 16);
rc = smc_llc_link_init(lnk);
if (rc)
goto free_lgr;
rc = smc_wr_alloc_link_mem(lnk);
if (rc)
goto clear_llc_lnk;
rc = smc_ib_create_protection_domain(lnk);
if (rc)
goto free_link_mem;
rc = smc_ib_create_queue_pair(lnk);
if (rc)
goto dealloc_pd;
rc = smc_wr_create_link(lnk);
if (rc)
goto destroy_qp;
}
smc->conn.lgr = lgr;
rwlock_init(&lgr->conns_lock);
spin_lock_bh(&smc_lgr_list.lock);
list_add(&lgr->list, &smc_lgr_list.list);
spin_unlock_bh(&smc_lgr_list.lock);
......@@ -264,7 +281,12 @@ void smc_conn_free(struct smc_connection *conn)
{
if (!conn->lgr)
return;
smc_cdc_tx_dismiss_slots(conn);
if (conn->lgr->is_smcd) {
smc_ism_unset_conn(conn);
tasklet_kill(&conn->rx_tsklet);
} else {
smc_cdc_tx_dismiss_slots(conn);
}
smc_lgr_unregister_conn(conn);
smc_buf_unuse(conn);
}
......@@ -280,8 +302,8 @@ static void smc_link_clear(struct smc_link *lnk)
smc_wr_free_link_mem(lnk);
}
static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
struct smc_buf_desc *buf_desc)
static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb,
struct smc_buf_desc *buf_desc)
{
struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
......@@ -301,6 +323,28 @@ static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
kfree(buf_desc);
}
static void smcd_buf_free(struct smc_link_group *lgr, bool is_dmb,
struct smc_buf_desc *buf_desc)
{
if (is_dmb) {
/* restore original buf len */
buf_desc->len += sizeof(struct smcd_cdc_msg);
smc_ism_unregister_dmb(lgr->smcd, buf_desc);
} else {
kfree(buf_desc->cpu_addr);
}
kfree(buf_desc);
}
static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
struct smc_buf_desc *buf_desc)
{
if (lgr->is_smcd)
smcd_buf_free(lgr, is_rmb, buf_desc);
else
smcr_buf_free(lgr, is_rmb, buf_desc);
}
static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb)
{
struct smc_buf_desc *buf_desc, *bf_desc;
......@@ -332,7 +376,10 @@ static void smc_lgr_free_bufs(struct smc_link_group *lgr)
void smc_lgr_free(struct smc_link_group *lgr)
{
smc_lgr_free_bufs(lgr);
smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]);
if (lgr->is_smcd)
smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
else
smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]);
kfree(lgr);
}
......@@ -357,7 +404,8 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr)
lgr->terminating = 1;
if (!list_empty(&lgr->list)) /* forget lgr */
list_del_init(&lgr->list);
smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]);
if (!lgr->is_smcd)
smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]);
write_lock_bh(&lgr->conns_lock);
node = rb_first(&lgr->conns_all);
......@@ -374,7 +422,8 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr)
node = rb_first(&lgr->conns_all);
}
write_unlock_bh(&lgr->conns_lock);
wake_up(&lgr->lnk[SMC_SINGLE_LINK].wr_reg_wait);
if (!lgr->is_smcd)
wake_up(&lgr->lnk[SMC_SINGLE_LINK].wr_reg_wait);
smc_lgr_schedule_free_work(lgr);
}
......@@ -392,17 +441,44 @@ void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport)
spin_lock_bh(&smc_lgr_list.lock);
list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) {
if (lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev &&
if (!lgr->is_smcd &&
lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev &&
lgr->lnk[SMC_SINGLE_LINK].ibport == ibport)
__smc_lgr_terminate(lgr);
}
spin_unlock_bh(&smc_lgr_list.lock);
}
/* Called when SMC-D device is terminated or peer is lost */
void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid)
{
struct smc_link_group *lgr, *l;
LIST_HEAD(lgr_free_list);
/* run common cleanup function and build free list */
spin_lock_bh(&smc_lgr_list.lock);
list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) {
if (lgr->is_smcd && lgr->smcd == dev &&
(!peer_gid || lgr->peer_gid == peer_gid) &&
!list_empty(&lgr->list)) {
__smc_lgr_terminate(lgr);
list_move(&lgr->list, &lgr_free_list);
}
}
spin_unlock_bh(&smc_lgr_list.lock);
/* cancel the regular free workers and actually free lgrs */
list_for_each_entry_safe(lgr, l, &lgr_free_list, list) {
list_del_init(&lgr->list);
cancel_delayed_work_sync(&lgr->free_work);
smc_lgr_free(lgr);
}
}
/* Determine vlan of internal TCP socket.
* @vlan_id: address to store the determined vlan id into
*/
static int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id)
int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id)
{
struct dst_entry *dst = sk_dst_get(clcsock->sk);
struct net_device *ndev;
......@@ -477,10 +553,30 @@ static int smc_link_determine_gid(struct smc_link_group *lgr)
return -ENODEV;
}
static bool smcr_lgr_match(struct smc_link_group *lgr,
struct smc_clc_msg_local *lcl,
enum smc_lgr_role role)
{
return !memcmp(lgr->peer_systemid, lcl->id_for_peer,
SMC_SYSTEMID_LEN) &&
!memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_gid, &lcl->gid,
SMC_GID_SIZE) &&
!memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_mac, lcl->mac,
sizeof(lcl->mac)) &&
lgr->role == role;
}
static bool smcd_lgr_match(struct smc_link_group *lgr,
struct smcd_dev *smcismdev, u64 peer_gid)
{
return lgr->peer_gid == peer_gid && lgr->smcd == smcismdev;
}
/* create a new SMC connection (and a new link group if necessary) */
int smc_conn_create(struct smc_sock *smc,
int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact,
struct smc_ib_device *smcibdev, u8 ibport,
struct smc_clc_msg_local *lcl, int srv_first_contact)
struct smc_clc_msg_local *lcl, struct smcd_dev *smcd,
u64 peer_gid)
{
struct smc_connection *conn = &smc->conn;
int local_contact = SMC_FIRST_CONTACT;
......@@ -502,17 +598,12 @@ int smc_conn_create(struct smc_sock *smc,
spin_lock_bh(&smc_lgr_list.lock);
list_for_each_entry(lgr, &smc_lgr_list.list, list) {
write_lock_bh(&lgr->conns_lock);
if (!memcmp(lgr->peer_systemid, lcl->id_for_peer,
SMC_SYSTEMID_LEN) &&
!memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_gid, &lcl->gid,
SMC_GID_SIZE) &&
!memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_mac, lcl->mac,
sizeof(lcl->mac)) &&
if ((is_smcd ? smcd_lgr_match(lgr, smcd, peer_gid) :
smcr_lgr_match(lgr, lcl, role)) &&
!lgr->sync_err &&
(lgr->role == role) &&
(lgr->vlan_id == vlan_id) &&
((role == SMC_CLNT) ||
(lgr->conns_num < SMC_RMBS_PER_LGR_MAX))) {
lgr->vlan_id == vlan_id &&
(role == SMC_CLNT ||
lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) {
/* link group found */
local_contact = SMC_REUSE_CONTACT;
conn->lgr = lgr;
......@@ -535,16 +626,21 @@ int smc_conn_create(struct smc_sock *smc,
create:
if (local_contact == SMC_FIRST_CONTACT) {
rc = smc_lgr_create(smc, smcibdev, ibport,
lcl->id_for_peer, vlan_id);
rc = smc_lgr_create(smc, is_smcd, smcibdev, ibport,
lcl->id_for_peer, vlan_id, smcd, peer_gid);
if (rc)
goto out;
smc_lgr_register_conn(conn); /* add smc conn to lgr */
rc = smc_link_determine_gid(conn->lgr);
if (!is_smcd)
rc = smc_link_determine_gid(conn->lgr);
}
conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
conn->urg_state = SMC_URG_READ;
if (is_smcd) {
conn->rx_off = sizeof(struct smcd_cdc_msg);
smcd_cdc_rx_init(conn); /* init tasklet for this conn */
}
#ifndef KERNEL_HAS_ATOMIC64
spin_lock_init(&conn->acurs_lock);
#endif
......@@ -609,8 +705,8 @@ static inline int smc_rmb_wnd_update_limit(int rmbe_size)
return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);
}
static struct smc_buf_desc *smc_new_buf_create(struct smc_link_group *lgr,
bool is_rmb, int bufsize)
static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr,
bool is_rmb, int bufsize)
{
struct smc_buf_desc *buf_desc;
struct smc_link *lnk;
......@@ -668,7 +764,44 @@ static struct smc_buf_desc *smc_new_buf_create(struct smc_link_group *lgr,
return buf_desc;
}
static int __smc_buf_create(struct smc_sock *smc, bool is_rmb)
#define SMCD_DMBE_SIZES 7 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr,
bool is_dmb, int bufsize)
{
struct smc_buf_desc *buf_desc;
int rc;
if (smc_compress_bufsize(bufsize) > SMCD_DMBE_SIZES)
return ERR_PTR(-EAGAIN);
/* try to alloc a new DMB */
buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
if (!buf_desc)
return ERR_PTR(-ENOMEM);
if (is_dmb) {
rc = smc_ism_register_dmb(lgr, bufsize, buf_desc);
if (rc) {
kfree(buf_desc);
return ERR_PTR(-EAGAIN);
}
buf_desc->pages = virt_to_page(buf_desc->cpu_addr);
/* CDC header stored in buf. So, pretend it was smaller */
buf_desc->len = bufsize - sizeof(struct smcd_cdc_msg);
} else {
buf_desc->cpu_addr = kzalloc(bufsize, GFP_KERNEL |
__GFP_NOWARN | __GFP_NORETRY |
__GFP_NOMEMALLOC);
if (!buf_desc->cpu_addr) {
kfree(buf_desc);
return ERR_PTR(-EAGAIN);
}
buf_desc->len = bufsize;
}
return buf_desc;
}
static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
{
struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM);
struct smc_connection *conn = &smc->conn;
......@@ -706,7 +839,11 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_rmb)
break; /* found reusable slot */
}
buf_desc = smc_new_buf_create(lgr, is_rmb, bufsize);
if (is_smcd)
buf_desc = smcd_new_buf_create(lgr, is_rmb, bufsize);
else
buf_desc = smcr_new_buf_create(lgr, is_rmb, bufsize);
if (PTR_ERR(buf_desc) == -ENOMEM)
break;
if (IS_ERR(buf_desc))
......@@ -727,7 +864,10 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_rmb)
conn->rmbe_size_short = bufsize_short;
smc->sk.sk_rcvbuf = bufsize * 2;
atomic_set(&conn->bytes_to_rcv, 0);
conn->rmbe_update_limit = smc_rmb_wnd_update_limit(bufsize);
conn->rmbe_update_limit =
smc_rmb_wnd_update_limit(buf_desc->len);
if (is_smcd)
smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */
} else {
conn->sndbuf_desc = buf_desc;
smc->sk.sk_sndbuf = bufsize * 2;
......@@ -740,6 +880,8 @@ void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn)
{
struct smc_link_group *lgr = conn->lgr;
if (!conn->lgr || conn->lgr->is_smcd)
return;
smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
conn->sndbuf_desc, DMA_TO_DEVICE);
}
......@@ -748,6 +890,8 @@ void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn)
{
struct smc_link_group *lgr = conn->lgr;
if (!conn->lgr || conn->lgr->is_smcd)
return;
smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
conn->sndbuf_desc, DMA_TO_DEVICE);
}
......@@ -756,6 +900,8 @@ void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn)
{
struct smc_link_group *lgr = conn->lgr;
if (!conn->lgr || conn->lgr->is_smcd)
return;
smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
conn->rmb_desc, DMA_FROM_DEVICE);
}
......@@ -764,6 +910,8 @@ void smc_rmb_sync_sg_for_device(struct smc_connection *conn)
{
struct smc_link_group *lgr = conn->lgr;
if (!conn->lgr || conn->lgr->is_smcd)
return;
smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
conn->rmb_desc, DMA_FROM_DEVICE);
}
......@@ -774,16 +922,16 @@ void smc_rmb_sync_sg_for_device(struct smc_connection *conn)
* the Linux implementation uses just one RMB-element per RMB, i.e. uses an
* extra RMB for every connection in a link group
*/
int smc_buf_create(struct smc_sock *smc)
int smc_buf_create(struct smc_sock *smc, bool is_smcd)
{
int rc;
/* create send buffer */
rc = __smc_buf_create(smc, false);
rc = __smc_buf_create(smc, is_smcd, false);
if (rc)
return rc;
/* create rmb */
rc = __smc_buf_create(smc, true);
rc = __smc_buf_create(smc, is_smcd, true);
if (rc)
smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc);
return rc;
......@@ -865,7 +1013,8 @@ void smc_core_exit(void)
spin_unlock_bh(&smc_lgr_list.lock);
list_for_each_entry_safe(lgr, lg, &lgr_freeing_list, list) {
list_del_init(&lgr->list);
smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]);
if (!lgr->is_smcd)
smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]);
cancel_delayed_work_sync(&lgr->free_work);
smc_lgr_free(lgr); /* free link group */
}
......
......@@ -124,15 +124,28 @@ struct smc_buf_desc {
void *cpu_addr; /* virtual address of buffer */
struct page *pages;
int len; /* length of buffer */
struct sg_table sgt[SMC_LINKS_PER_LGR_MAX];/* virtual buffer */
struct ib_mr *mr_rx[SMC_LINKS_PER_LGR_MAX];
/* for rmb only: memory region
* incl. rkey provided to peer
*/
u32 order; /* allocation order */
u32 used; /* currently used / unused */
u8 reused : 1; /* new created / reused */
u8 regerr : 1; /* err during registration */
union {
struct { /* SMC-R */
struct sg_table sgt[SMC_LINKS_PER_LGR_MAX];
/* virtual buffer */
struct ib_mr *mr_rx[SMC_LINKS_PER_LGR_MAX];
/* for rmb only: memory region
* incl. rkey provided to peer
*/
u32 order; /* allocation order */
};
struct { /* SMC-D */
unsigned short sba_idx;
/* SBA index number */
u64 token;
/* DMB token number */
dma_addr_t dma_addr;
/* DMA address */
};
};
};
struct smc_rtoken { /* address/key of remote RMB */
......@@ -148,12 +161,10 @@ struct smc_rtoken { /* address/key of remote RMB */
* struct smc_clc_msg_accept_confirm.rmbe_size being a 4 bit value (0..15)
*/
struct smcd_dev;
struct smc_link_group {
struct list_head list;
enum smc_lgr_role role; /* client or server */
struct smc_link lnk[SMC_LINKS_PER_LGR_MAX]; /* smc link */
char peer_systemid[SMC_SYSTEMID_LEN];
/* unique system_id of peer */
struct rb_root conns_all; /* connection tree */
rwlock_t conns_lock; /* protects conns_all */
unsigned int conns_num; /* current # of connections */
......@@ -163,17 +174,35 @@ struct smc_link_group {
rwlock_t sndbufs_lock; /* protects tx buffers */
struct list_head rmbs[SMC_RMBE_SIZES]; /* rx buffers */
rwlock_t rmbs_lock; /* protects rx buffers */
struct smc_rtoken rtokens[SMC_RMBS_PER_LGR_MAX]
[SMC_LINKS_PER_LGR_MAX];
/* remote addr/key pairs */
unsigned long rtokens_used_mask[BITS_TO_LONGS(
SMC_RMBS_PER_LGR_MAX)];
/* used rtoken elements */
u8 id[SMC_LGR_ID_SIZE]; /* unique lgr id */
struct delayed_work free_work; /* delayed freeing of an lgr */
u8 sync_err : 1; /* lgr no longer fits to peer */
u8 terminating : 1;/* lgr is terminating */
bool is_smcd; /* SMC-R or SMC-D */
union {
struct { /* SMC-R */
enum smc_lgr_role role;
/* client or server */
struct smc_link lnk[SMC_LINKS_PER_LGR_MAX];
/* smc link */
char peer_systemid[SMC_SYSTEMID_LEN];
/* unique system_id of peer */
struct smc_rtoken rtokens[SMC_RMBS_PER_LGR_MAX]
[SMC_LINKS_PER_LGR_MAX];
/* remote addr/key pairs */
unsigned long rtokens_used_mask[BITS_TO_LONGS
(SMC_RMBS_PER_LGR_MAX)];
/* used rtoken elements */
};
struct { /* SMC-D */
u64 peer_gid;
/* Peer GID (remote) */
struct smcd_dev *smcd;
/* ISM device for VLAN reg. */
};
};
};
/* Find the connection associated with the given alert token in the link group.
......@@ -217,7 +246,8 @@ void smc_lgr_free(struct smc_link_group *lgr);
void smc_lgr_forget(struct smc_link_group *lgr);
void smc_lgr_terminate(struct smc_link_group *lgr);
void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport);
int smc_buf_create(struct smc_sock *smc);
void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid);
int smc_buf_create(struct smc_sock *smc, bool is_smcd);
int smc_uncompress_bufsize(u8 compressed);
int smc_rmb_rtoken_handling(struct smc_connection *conn,
struct smc_clc_msg_accept_confirm *clc);
......@@ -227,9 +257,13 @@ void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn);
void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn);
void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn);
void smc_rmb_sync_sg_for_device(struct smc_connection *conn);
int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id);
void smc_conn_free(struct smc_connection *conn);
int smc_conn_create(struct smc_sock *smc,
int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact,
struct smc_ib_device *smcibdev, u8 ibport,
struct smc_clc_msg_local *lcl, int srv_first_contact);
struct smc_clc_msg_local *lcl, struct smcd_dev *smcd,
u64 peer_gid);
void smcd_conn_free(struct smc_connection *conn);
void smc_core_exit(void);
#endif
......@@ -136,7 +136,8 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
goto errout;
}
if ((req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) && smc->conn.lgr &&
if (smc->conn.lgr && !smc->conn.lgr->is_smcd &&
(req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) &&
!list_empty(&smc->conn.lgr->list)) {
struct smc_diag_lgrinfo linfo = {
.role = smc->conn.lgr->role,
......@@ -155,6 +156,21 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
if (nla_put(skb, SMC_DIAG_LGRINFO, sizeof(linfo), &linfo) < 0)
goto errout;
}
if (smc->conn.lgr && smc->conn.lgr->is_smcd &&
(req->diag_ext & (1 << (SMC_DIAG_DMBINFO - 1))) &&
!list_empty(&smc->conn.lgr->list)) {
struct smc_connection *conn = &smc->conn;
struct smcd_diag_dmbinfo dinfo = {
.linkid = *((u32 *)conn->lgr->id),
.peer_gid = conn->lgr->peer_gid,
.my_gid = conn->lgr->smcd->local_gid,
.token = conn->rmb_desc->token,
.peer_token = conn->peer_token
};
if (nla_put(skb, SMC_DIAG_DMBINFO, sizeof(dinfo), &dinfo) < 0)
goto errout;
}
nlmsg_end(skb, nlh);
return 0;
......
......@@ -143,6 +143,62 @@ int smc_ib_ready_link(struct smc_link *lnk)
return rc;
}
static int smc_ib_fill_gid_and_mac(struct smc_ib_device *smcibdev, u8 ibport)
{
struct ib_gid_attr gattr;
int rc;
rc = ib_query_gid(smcibdev->ibdev, ibport, 0,
&smcibdev->gid[ibport - 1], &gattr);
if (rc || !gattr.ndev)
return -ENODEV;
memcpy(smcibdev->mac[ibport - 1], gattr.ndev->dev_addr, ETH_ALEN);
dev_put(gattr.ndev);
return 0;
}
/* Create an identifier unique for this instance of SMC-R.
* The MAC-address of the first active registered IB device
* plus a random 2-byte number is used to create this identifier.
* This name is delivered to the peer during connection initialization.
*/
static inline void smc_ib_define_local_systemid(struct smc_ib_device *smcibdev,
u8 ibport)
{
memcpy(&local_systemid[2], &smcibdev->mac[ibport - 1],
sizeof(smcibdev->mac[ibport - 1]));
get_random_bytes(&local_systemid[0], 2);
}
bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport)
{
return smcibdev->pattr[ibport - 1].state == IB_PORT_ACTIVE;
}
static int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport)
{
int rc;
memset(&smcibdev->pattr[ibport - 1], 0,
sizeof(smcibdev->pattr[ibport - 1]));
rc = ib_query_port(smcibdev->ibdev, ibport,
&smcibdev->pattr[ibport - 1]);
if (rc)
goto out;
/* the SMC protocol requires specification of the RoCE MAC address */
rc = smc_ib_fill_gid_and_mac(smcibdev, ibport);
if (rc)
goto out;
if (!strncmp(local_systemid, SMC_LOCAL_SYSTEMID_RESET,
sizeof(local_systemid)) &&
smc_ib_port_active(smcibdev, ibport))
/* create unique system identifier */
smc_ib_define_local_systemid(smcibdev, ibport);
out:
return rc;
}
/* process context wrapper for might_sleep smc_ib_remember_port_attr */
static void smc_ib_port_event_work(struct work_struct *work)
{
......@@ -370,62 +426,6 @@ void smc_ib_buf_unmap_sg(struct smc_ib_device *smcibdev,
buf_slot->sgt[SMC_SINGLE_LINK].sgl->dma_address = 0;
}
static int smc_ib_fill_gid_and_mac(struct smc_ib_device *smcibdev, u8 ibport)
{
struct ib_gid_attr gattr;
int rc;
rc = ib_query_gid(smcibdev->ibdev, ibport, 0,
&smcibdev->gid[ibport - 1], &gattr);
if (rc || !gattr.ndev)
return -ENODEV;
memcpy(smcibdev->mac[ibport - 1], gattr.ndev->dev_addr, ETH_ALEN);
dev_put(gattr.ndev);
return 0;
}
/* Create an identifier unique for this instance of SMC-R.
* The MAC-address of the first active registered IB device
* plus a random 2-byte number is used to create this identifier.
* This name is delivered to the peer during connection initialization.
*/
static inline void smc_ib_define_local_systemid(struct smc_ib_device *smcibdev,
u8 ibport)
{
memcpy(&local_systemid[2], &smcibdev->mac[ibport - 1],
sizeof(smcibdev->mac[ibport - 1]));
get_random_bytes(&local_systemid[0], 2);
}
bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport)
{
return smcibdev->pattr[ibport - 1].state == IB_PORT_ACTIVE;
}
int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport)
{
int rc;
memset(&smcibdev->pattr[ibport - 1], 0,
sizeof(smcibdev->pattr[ibport - 1]));
rc = ib_query_port(smcibdev->ibdev, ibport,
&smcibdev->pattr[ibport - 1]);
if (rc)
goto out;
/* the SMC protocol requires specification of the RoCE MAC address */
rc = smc_ib_fill_gid_and_mac(smcibdev, ibport);
if (rc)
goto out;
if (!strncmp(local_systemid, SMC_LOCAL_SYSTEMID_RESET,
sizeof(local_systemid)) &&
smc_ib_port_active(smcibdev, ibport))
/* create unique system identifier */
smc_ib_define_local_systemid(smcibdev, ibport);
out:
return rc;
}
long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev)
{
struct ib_cq_init_attr cqattr = {
......@@ -454,9 +454,6 @@ long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev)
smcibdev->roce_cq_recv = NULL;
goto err;
}
INIT_IB_EVENT_HANDLER(&smcibdev->event_handler, smcibdev->ibdev,
smc_ib_global_event_handler);
ib_register_event_handler(&smcibdev->event_handler);
smc_wr_add_dev(smcibdev);
smcibdev->initialized = 1;
return rc;
......@@ -472,7 +469,6 @@ static void smc_ib_cleanup_per_ibdev(struct smc_ib_device *smcibdev)
return;
smcibdev->initialized = 0;
smc_wr_remove_dev(smcibdev);
ib_unregister_event_handler(&smcibdev->event_handler);
ib_destroy_cq(smcibdev->roce_cq_recv);
ib_destroy_cq(smcibdev->roce_cq_send);
}
......@@ -483,6 +479,8 @@ static struct ib_client smc_ib_client;
static void smc_ib_add_dev(struct ib_device *ibdev)
{
struct smc_ib_device *smcibdev;
u8 port_cnt;
int i;
if (ibdev->node_type != RDMA_NODE_IB_CA)
return;
......@@ -498,6 +496,21 @@ static void smc_ib_add_dev(struct ib_device *ibdev)
list_add_tail(&smcibdev->list, &smc_ib_devices.list);
spin_unlock(&smc_ib_devices.lock);
ib_set_client_data(ibdev, &smc_ib_client, smcibdev);
INIT_IB_EVENT_HANDLER(&smcibdev->event_handler, smcibdev->ibdev,
smc_ib_global_event_handler);
ib_register_event_handler(&smcibdev->event_handler);
/* trigger reading of the port attributes */
port_cnt = smcibdev->ibdev->phys_port_cnt;
for (i = 0;
i < min_t(size_t, port_cnt, SMC_MAX_PORTS);
i++) {
set_bit(i, &smcibdev->port_event_mask);
/* determine pnetids of the port */
smc_pnetid_by_dev_port(ibdev->dev.parent, i,
smcibdev->pnetid[i]);
}
schedule_work(&smcibdev->port_event_work);
}
/* callback function for ib_register_client() */
......@@ -512,6 +525,7 @@ static void smc_ib_remove_dev(struct ib_device *ibdev, void *client_data)
spin_unlock(&smc_ib_devices.lock);
smc_pnet_remove_by_ibdev(smcibdev);
smc_ib_cleanup_per_ibdev(smcibdev);
ib_unregister_event_handler(&smcibdev->event_handler);
kfree(smcibdev);
}
......
......@@ -15,6 +15,7 @@
#include <linux/interrupt.h>
#include <linux/if_ether.h>
#include <rdma/ib_verbs.h>
#include <net/smc.h>
#define SMC_MAX_PORTS 2 /* Max # of ports */
#define SMC_GID_SIZE sizeof(union ib_gid)
......@@ -40,6 +41,8 @@ struct smc_ib_device { /* ib-device infos for smc */
char mac[SMC_MAX_PORTS][ETH_ALEN];
/* mac address per port*/
union ib_gid gid[SMC_MAX_PORTS]; /* gid per port */
u8 pnetid[SMC_MAX_PORTS][SMC_MAX_PNETID_LEN];
/* pnetid per port */
u8 initialized : 1; /* ib dev CQ, evthdl done */
struct work_struct port_event_work;
unsigned long port_event_mask;
......@@ -51,7 +54,6 @@ struct smc_link;
int smc_ib_register_client(void) __init;
void smc_ib_unregister_client(void);
bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport);
int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport);
int smc_ib_buf_map_sg(struct smc_ib_device *smcibdev,
struct smc_buf_desc *buf_slot,
enum dma_data_direction data_direction);
......
// SPDX-License-Identifier: GPL-2.0
/* Shared Memory Communications Direct over ISM devices (SMC-D)
*
* Functions for ISM device.
*
* Copyright IBM Corp. 2018
*/
#include <linux/spinlock.h>
#include <linux/slab.h>
#include <asm/page.h>
#include "smc.h"
#include "smc_core.h"
#include "smc_ism.h"
#include "smc_pnet.h"
struct smcd_dev_list smcd_dev_list = {
.list = LIST_HEAD_INIT(smcd_dev_list.list),
.lock = __SPIN_LOCK_UNLOCKED(smcd_dev_list.lock)
};
/* Test if an ISM communication is possible. */
int smc_ism_cantalk(u64 peer_gid, unsigned short vlan_id, struct smcd_dev *smcd)
{
return smcd->ops->query_remote_gid(smcd, peer_gid, vlan_id ? 1 : 0,
vlan_id);
}
int smc_ism_write(struct smcd_dev *smcd, const struct smc_ism_position *pos,
void *data, size_t len)
{
int rc;
rc = smcd->ops->move_data(smcd, pos->token, pos->index, pos->signal,
pos->offset, data, len);
return rc < 0 ? rc : 0;
}
/* Set a connection using this DMBE. */
void smc_ism_set_conn(struct smc_connection *conn)
{
unsigned long flags;
spin_lock_irqsave(&conn->lgr->smcd->lock, flags);
conn->lgr->smcd->conn[conn->rmb_desc->sba_idx] = conn;
spin_unlock_irqrestore(&conn->lgr->smcd->lock, flags);
}
/* Unset a connection using this DMBE. */
void smc_ism_unset_conn(struct smc_connection *conn)
{
unsigned long flags;
if (!conn->rmb_desc)
return;
spin_lock_irqsave(&conn->lgr->smcd->lock, flags);
conn->lgr->smcd->conn[conn->rmb_desc->sba_idx] = NULL;
spin_unlock_irqrestore(&conn->lgr->smcd->lock, flags);
}
/* Register a VLAN identifier with the ISM device. Use a reference count
* and add a VLAN identifier only when the first DMB using this VLAN is
* registered.
*/
int smc_ism_get_vlan(struct smcd_dev *smcd, unsigned short vlanid)
{
struct smc_ism_vlanid *new_vlan, *vlan;
unsigned long flags;
int rc = 0;
if (!vlanid) /* No valid vlan id */
return -EINVAL;
/* create new vlan entry, in case we need it */
new_vlan = kzalloc(sizeof(*new_vlan), GFP_KERNEL);
if (!new_vlan)
return -ENOMEM;
new_vlan->vlanid = vlanid;
refcount_set(&new_vlan->refcnt, 1);
/* if there is an existing entry, increase count and return */
spin_lock_irqsave(&smcd->lock, flags);
list_for_each_entry(vlan, &smcd->vlan, list) {
if (vlan->vlanid == vlanid) {
refcount_inc(&vlan->refcnt);
kfree(new_vlan);
goto out;
}
}
/* no existing entry found.
* add new entry to device; might fail, e.g., if HW limit reached
*/
if (smcd->ops->add_vlan_id(smcd, vlanid)) {
kfree(new_vlan);
rc = -EIO;
goto out;
}
list_add_tail(&new_vlan->list, &smcd->vlan);
out:
spin_unlock_irqrestore(&smcd->lock, flags);
return rc;
}
/* Unregister a VLAN identifier with the ISM device. Use a reference count
* and remove a VLAN identifier only when the last DMB using this VLAN is
* unregistered.
*/
int smc_ism_put_vlan(struct smcd_dev *smcd, unsigned short vlanid)
{
struct smc_ism_vlanid *vlan;
unsigned long flags;
bool found = false;
int rc = 0;
if (!vlanid) /* No valid vlan id */
return -EINVAL;
spin_lock_irqsave(&smcd->lock, flags);
list_for_each_entry(vlan, &smcd->vlan, list) {
if (vlan->vlanid == vlanid) {
if (!refcount_dec_and_test(&vlan->refcnt))
goto out;
found = true;
break;
}
}
if (!found) {
rc = -ENOENT;
goto out; /* VLAN id not in table */
}
/* Found and the last reference just gone */
if (smcd->ops->del_vlan_id(smcd, vlanid))
rc = -EIO;
list_del(&vlan->list);
kfree(vlan);
out:
spin_unlock_irqrestore(&smcd->lock, flags);
return rc;
}
int smc_ism_unregister_dmb(struct smcd_dev *smcd, struct smc_buf_desc *dmb_desc)
{
struct smcd_dmb dmb;
memset(&dmb, 0, sizeof(dmb));
dmb.dmb_tok = dmb_desc->token;
dmb.sba_idx = dmb_desc->sba_idx;
dmb.cpu_addr = dmb_desc->cpu_addr;
dmb.dma_addr = dmb_desc->dma_addr;
dmb.dmb_len = dmb_desc->len;
return smcd->ops->unregister_dmb(smcd, &dmb);
}
int smc_ism_register_dmb(struct smc_link_group *lgr, int dmb_len,
struct smc_buf_desc *dmb_desc)
{
struct smcd_dmb dmb;
int rc;
memset(&dmb, 0, sizeof(dmb));
dmb.dmb_len = dmb_len;
dmb.sba_idx = dmb_desc->sba_idx;
dmb.vlan_id = lgr->vlan_id;
dmb.rgid = lgr->peer_gid;
rc = lgr->smcd->ops->register_dmb(lgr->smcd, &dmb);
if (!rc) {
dmb_desc->sba_idx = dmb.sba_idx;
dmb_desc->token = dmb.dmb_tok;
dmb_desc->cpu_addr = dmb.cpu_addr;
dmb_desc->dma_addr = dmb.dma_addr;
dmb_desc->len = dmb.dmb_len;
}
return rc;
}
struct smc_ism_event_work {
struct work_struct work;
struct smcd_dev *smcd;
struct smcd_event event;
};
/* worker for SMC-D events */
static void smc_ism_event_work(struct work_struct *work)
{
struct smc_ism_event_work *wrk =
container_of(work, struct smc_ism_event_work, work);
switch (wrk->event.type) {
case ISM_EVENT_GID: /* GID event, token is peer GID */
smc_smcd_terminate(wrk->smcd, wrk->event.tok);
break;
case ISM_EVENT_DMB:
break;
}
kfree(wrk);
}
static void smcd_release(struct device *dev)
{
struct smcd_dev *smcd = container_of(dev, struct smcd_dev, dev);
kfree(smcd->conn);
kfree(smcd);
}
struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name,
const struct smcd_ops *ops, int max_dmbs)
{
struct smcd_dev *smcd;
smcd = kzalloc(sizeof(*smcd), GFP_KERNEL);
if (!smcd)
return NULL;
smcd->conn = kcalloc(max_dmbs, sizeof(struct smc_connection *),
GFP_KERNEL);
if (!smcd->conn) {
kfree(smcd);
return NULL;
}
smcd->dev.parent = parent;
smcd->dev.release = smcd_release;
device_initialize(&smcd->dev);
dev_set_name(&smcd->dev, name);
smcd->ops = ops;
smc_pnetid_by_dev_port(parent, 0, smcd->pnetid);
spin_lock_init(&smcd->lock);
INIT_LIST_HEAD(&smcd->vlan);
smcd->event_wq = alloc_ordered_workqueue("ism_evt_wq-%s)",
WQ_MEM_RECLAIM, name);
return smcd;
}
EXPORT_SYMBOL_GPL(smcd_alloc_dev);
int smcd_register_dev(struct smcd_dev *smcd)
{
spin_lock(&smcd_dev_list.lock);
list_add_tail(&smcd->list, &smcd_dev_list.list);
spin_unlock(&smcd_dev_list.lock);
return device_add(&smcd->dev);
}
EXPORT_SYMBOL_GPL(smcd_register_dev);
void smcd_unregister_dev(struct smcd_dev *smcd)
{
spin_lock(&smcd_dev_list.lock);
list_del(&smcd->list);
spin_unlock(&smcd_dev_list.lock);
flush_workqueue(smcd->event_wq);
destroy_workqueue(smcd->event_wq);
smc_smcd_terminate(smcd, 0);
device_del(&smcd->dev);
}
EXPORT_SYMBOL_GPL(smcd_unregister_dev);
void smcd_free_dev(struct smcd_dev *smcd)
{
put_device(&smcd->dev);
}
EXPORT_SYMBOL_GPL(smcd_free_dev);
/* SMCD Device event handler. Called from ISM device interrupt handler.
* Parameters are smcd device pointer,
* - event->type (0 --> DMB, 1 --> GID),
* - event->code (event code),
* - event->tok (either DMB token when event type 0, or GID when event type 1)
* - event->time (time of day)
* - event->info (debug info).
*
* Context:
* - Function called in IRQ context from ISM device driver event handler.
*/
void smcd_handle_event(struct smcd_dev *smcd, struct smcd_event *event)
{
struct smc_ism_event_work *wrk;
/* copy event to event work queue, and let it be handled there */
wrk = kmalloc(sizeof(*wrk), GFP_ATOMIC);
if (!wrk)
return;
INIT_WORK(&wrk->work, smc_ism_event_work);
wrk->smcd = smcd;
wrk->event = *event;
queue_work(smcd->event_wq, &wrk->work);
}
EXPORT_SYMBOL_GPL(smcd_handle_event);
/* SMCD Device interrupt handler. Called from ISM device interrupt handler.
* Parameters are smcd device pointer and DMB number. Find the connection and
* schedule the tasklet for this connection.
*
* Context:
* - Function called in IRQ context from ISM device driver IRQ handler.
*/
void smcd_handle_irq(struct smcd_dev *smcd, unsigned int dmbno)
{
struct smc_connection *conn = NULL;
unsigned long flags;
spin_lock_irqsave(&smcd->lock, flags);
conn = smcd->conn[dmbno];
if (conn)
tasklet_schedule(&conn->rx_tsklet);
spin_unlock_irqrestore(&smcd->lock, flags);
}
EXPORT_SYMBOL_GPL(smcd_handle_irq);
/* SPDX-License-Identifier: GPL-2.0 */
/* Shared Memory Communications Direct over ISM devices (SMC-D)
*
* SMC-D ISM device structure definitions.
*
* Copyright IBM Corp. 2018
*/
#ifndef SMCD_ISM_H
#define SMCD_ISM_H
#include <linux/uio.h>
#include "smc.h"
struct smcd_dev_list { /* List of SMCD devices */
struct list_head list;
spinlock_t lock; /* Protects list of devices */
};
extern struct smcd_dev_list smcd_dev_list; /* list of smcd devices */
struct smc_ism_vlanid { /* VLAN id set on ISM device */
struct list_head list;
unsigned short vlanid; /* Vlan id */
refcount_t refcnt; /* Reference count */
};
struct smc_ism_position { /* ISM device position to write to */
u64 token; /* Token of DMB */
u32 offset; /* Offset into DMBE */
u8 index; /* Index of DMBE */
u8 signal; /* Generate interrupt on owner side */
};
struct smcd_dev;
int smc_ism_cantalk(u64 peer_gid, unsigned short vlan_id, struct smcd_dev *dev);
void smc_ism_set_conn(struct smc_connection *conn);
void smc_ism_unset_conn(struct smc_connection *conn);
int smc_ism_get_vlan(struct smcd_dev *dev, unsigned short vlan_id);
int smc_ism_put_vlan(struct smcd_dev *dev, unsigned short vlan_id);
int smc_ism_register_dmb(struct smc_link_group *lgr, int buf_size,
struct smc_buf_desc *dmb_desc);
int smc_ism_unregister_dmb(struct smcd_dev *dev, struct smc_buf_desc *dmb_desc);
int smc_ism_write(struct smcd_dev *dev, const struct smc_ism_position *pos,
void *data, size_t len);
#endif
......@@ -22,13 +22,12 @@
#include "smc_pnet.h"
#include "smc_ib.h"
#define SMC_MAX_PNET_ID_LEN 16 /* Max. length of PNET id */
#include "smc_ism.h"
static struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = {
[SMC_PNETID_NAME] = {
.type = NLA_NUL_STRING,
.len = SMC_MAX_PNET_ID_LEN - 1
.len = SMC_MAX_PNETID_LEN - 1
},
[SMC_PNETID_ETHNAME] = {
.type = NLA_NUL_STRING,
......@@ -65,7 +64,7 @@ static struct smc_pnettable {
*/
struct smc_pnetentry {
struct list_head list;
char pnet_name[SMC_MAX_PNET_ID_LEN + 1];
char pnet_name[SMC_MAX_PNETID_LEN + 1];
struct net_device *ndev;
struct smc_ib_device *smcibdev;
u8 ib_port;
......@@ -209,7 +208,7 @@ static bool smc_pnetid_valid(const char *pnet_name, char *pnetid)
return false;
while (--end >= bf && isspace(*end))
;
if (end - bf >= SMC_MAX_PNET_ID_LEN)
if (end - bf >= SMC_MAX_PNETID_LEN)
return false;
while (bf <= end) {
if (!isalnum(*bf))
......@@ -358,9 +357,6 @@ static int smc_pnet_add(struct sk_buff *skb, struct genl_info *info)
kfree(pnetelem);
return rc;
}
rc = smc_ib_remember_port_attr(pnetelem->smcibdev, pnetelem->ib_port);
if (rc)
smc_pnet_remove_by_pnetid(pnetelem->pnet_name);
return rc;
}
......@@ -485,10 +481,10 @@ static int smc_pnet_netdev_event(struct notifier_block *this,
case NETDEV_REBOOT:
case NETDEV_UNREGISTER:
smc_pnet_remove_by_ndev(event_dev);
return NOTIFY_OK;
default:
break;
return NOTIFY_DONE;
}
return NOTIFY_DONE;
}
static struct notifier_block smc_netdev_notifier = {
......@@ -515,26 +511,91 @@ void smc_pnet_exit(void)
genl_unregister_family(&smc_pnet_nl_family);
}
/* PNET table analysis for a given sock:
* determine ib_device and port belonging to used internal TCP socket
* ethernet interface.
/* Determine one base device for stacked net devices.
* If the lower device level contains more than one devices
* (for instance with bonding slaves), just the first device
* is used to reach a base device.
*/
void smc_pnet_find_roce_resource(struct sock *sk,
struct smc_ib_device **smcibdev, u8 *ibport)
static struct net_device *pnet_find_base_ndev(struct net_device *ndev)
{
struct dst_entry *dst = sk_dst_get(sk);
struct smc_pnetentry *pnetelem;
int i, nest_lvl;
*smcibdev = NULL;
*ibport = 0;
rtnl_lock();
nest_lvl = dev_get_nest_level(ndev);
for (i = 0; i < nest_lvl; i++) {
struct list_head *lower = &ndev->adj_list.lower;
if (list_empty(lower))
break;
lower = lower->next;
ndev = netdev_lower_get_next(ndev, &lower);
}
rtnl_unlock();
return ndev;
}
/* Determine the corresponding IB device port based on the hardware PNETID.
* Searching stops at the first matching active IB device port.
*/
static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev,
struct smc_ib_device **smcibdev,
u8 *ibport)
{
u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
struct smc_ib_device *ibdev;
int i;
ndev = pnet_find_base_ndev(ndev);
if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port,
ndev_pnetid))
return; /* pnetid could not be determined */
spin_lock(&smc_ib_devices.lock);
list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
for (i = 1; i <= SMC_MAX_PORTS; i++) {
if (!memcmp(ibdev->pnetid[i - 1], ndev_pnetid,
SMC_MAX_PNETID_LEN) &&
smc_ib_port_active(ibdev, i)) {
*smcibdev = ibdev;
*ibport = i;
break;
}
}
}
spin_unlock(&smc_ib_devices.lock);
}
static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev,
struct smcd_dev **smcismdev)
{
u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
struct smcd_dev *ismdev;
ndev = pnet_find_base_ndev(ndev);
if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port,
ndev_pnetid))
return; /* pnetid could not be determined */
spin_lock(&smcd_dev_list.lock);
list_for_each_entry(ismdev, &smcd_dev_list.list, list) {
if (!memcmp(ismdev->pnetid, ndev_pnetid, SMC_MAX_PNETID_LEN)) {
*smcismdev = ismdev;
break;
}
}
spin_unlock(&smcd_dev_list.lock);
}
/* Lookup of coupled ib_device via SMC pnet table */
static void smc_pnet_find_roce_by_table(struct net_device *netdev,
struct smc_ib_device **smcibdev,
u8 *ibport)
{
struct smc_pnetentry *pnetelem;
if (!dst)
return;
if (!dst->dev)
goto out_rel;
read_lock(&smc_pnettable.lock);
list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) {
if (dst->dev == pnetelem->ndev) {
if (netdev == pnetelem->ndev) {
if (smc_ib_port_active(pnetelem->smcibdev,
pnetelem->ib_port)) {
*smcibdev = pnetelem->smcibdev;
......@@ -544,6 +605,54 @@ void smc_pnet_find_roce_resource(struct sock *sk,
}
}
read_unlock(&smc_pnettable.lock);
}
/* PNET table analysis for a given sock:
* determine ib_device and port belonging to used internal TCP socket
* ethernet interface.
*/
void smc_pnet_find_roce_resource(struct sock *sk,
struct smc_ib_device **smcibdev, u8 *ibport)
{
struct dst_entry *dst = sk_dst_get(sk);
*smcibdev = NULL;
*ibport = 0;
if (!dst)
goto out;
if (!dst->dev)
goto out_rel;
/* if possible, lookup via hardware-defined pnetid */
smc_pnet_find_roce_by_pnetid(dst->dev, smcibdev, ibport);
if (*smcibdev)
goto out_rel;
/* lookup via SMC PNET table */
smc_pnet_find_roce_by_table(dst->dev, smcibdev, ibport);
out_rel:
dst_release(dst);
out:
return;
}
void smc_pnet_find_ism_resource(struct sock *sk, struct smcd_dev **smcismdev)
{
struct dst_entry *dst = sk_dst_get(sk);
*smcismdev = NULL;
if (!dst)
goto out;
if (!dst->dev)
goto out_rel;
/* if possible, lookup via hardware-defined pnetid */
smc_pnet_find_ism_by_pnetid(dst->dev, smcismdev);
out_rel:
dst_release(dst);
out:
return;
}
......@@ -12,12 +12,28 @@
#ifndef _SMC_PNET_H
#define _SMC_PNET_H
#if IS_ENABLED(CONFIG_HAVE_PNETID)
#include <asm/pnet.h>
#endif
struct smc_ib_device;
struct smcd_dev;
static inline int smc_pnetid_by_dev_port(struct device *dev,
unsigned short port, u8 *pnetid)
{
#if IS_ENABLED(CONFIG_HAVE_PNETID)
return pnet_id_by_dev_port(dev, port, pnetid);
#else
return -ENOENT;
#endif
}
int smc_pnet_init(void) __init;
void smc_pnet_exit(void);
int smc_pnet_remove_by_ibdev(struct smc_ib_device *ibdev);
void smc_pnet_find_roce_resource(struct sock *sk,
struct smc_ib_device **smcibdev, u8 *ibport);
void smc_pnet_find_ism_resource(struct sock *sk, struct smcd_dev **smcismdev);
#endif
......@@ -305,7 +305,7 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg,
target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
/* we currently use 1 RMBE per RMB, so RMBE == RMB base addr */
rcvbuf_base = conn->rmb_desc->cpu_addr;
rcvbuf_base = conn->rx_off + conn->rmb_desc->cpu_addr;
do { /* while (read_remaining) */
if (read_done >= target || (pipe && read_done))
......
......@@ -24,6 +24,7 @@
#include "smc.h"
#include "smc_wr.h"
#include "smc_cdc.h"
#include "smc_ism.h"
#include "smc_tx.h"
#define SMC_TX_WORK_DELAY HZ
......@@ -250,6 +251,24 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len)
/***************************** sndbuf consumer *******************************/
/* sndbuf consumer: actual data transfer of one target chunk with ISM write */
int smcd_tx_ism_write(struct smc_connection *conn, void *data, size_t len,
u32 offset, int signal)
{
struct smc_ism_position pos;
int rc;
memset(&pos, 0, sizeof(pos));
pos.token = conn->peer_token;
pos.index = conn->peer_rmbe_idx;
pos.offset = conn->tx_off + offset;
pos.signal = signal;
rc = smc_ism_write(conn->lgr->smcd, &pos, data, len);
if (rc)
conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
return rc;
}
/* sndbuf consumer: actual data transfer of one target chunk with RDMA write */
static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset,
int num_sges, struct ib_sge sges[])
......@@ -297,21 +316,104 @@ static inline void smc_tx_advance_cursors(struct smc_connection *conn,
smc_curs_add(conn->sndbuf_desc->len, sent, len);
}
/* SMC-R helper for smc_tx_rdma_writes() */
static int smcr_tx_rdma_writes(struct smc_connection *conn, size_t len,
size_t src_off, size_t src_len,
size_t dst_off, size_t dst_len)
{
dma_addr_t dma_addr =
sg_dma_address(conn->sndbuf_desc->sgt[SMC_SINGLE_LINK].sgl);
struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK];
int src_len_sum = src_len, dst_len_sum = dst_len;
struct ib_sge sges[SMC_IB_MAX_SEND_SGE];
int sent_count = src_off;
int srcchunk, dstchunk;
int num_sges;
int rc;
for (dstchunk = 0; dstchunk < 2; dstchunk++) {
num_sges = 0;
for (srcchunk = 0; srcchunk < 2; srcchunk++) {
sges[srcchunk].addr = dma_addr + src_off;
sges[srcchunk].length = src_len;
sges[srcchunk].lkey = link->roce_pd->local_dma_lkey;
num_sges++;
src_off += src_len;
if (src_off >= conn->sndbuf_desc->len)
src_off -= conn->sndbuf_desc->len;
/* modulo in send ring */
if (src_len_sum == dst_len)
break; /* either on 1st or 2nd iteration */
/* prepare next (== 2nd) iteration */
src_len = dst_len - src_len; /* remainder */
src_len_sum += src_len;
}
rc = smc_tx_rdma_write(conn, dst_off, num_sges, sges);
if (rc)
return rc;
if (dst_len_sum == len)
break; /* either on 1st or 2nd iteration */
/* prepare next (== 2nd) iteration */
dst_off = 0; /* modulo offset in RMBE ring buffer */
dst_len = len - dst_len; /* remainder */
dst_len_sum += dst_len;
src_len = min_t(int, dst_len, conn->sndbuf_desc->len -
sent_count);
src_len_sum = src_len;
}
return 0;
}
/* SMC-D helper for smc_tx_rdma_writes() */
static int smcd_tx_rdma_writes(struct smc_connection *conn, size_t len,
size_t src_off, size_t src_len,
size_t dst_off, size_t dst_len)
{
int src_len_sum = src_len, dst_len_sum = dst_len;
int srcchunk, dstchunk;
int rc;
for (dstchunk = 0; dstchunk < 2; dstchunk++) {
for (srcchunk = 0; srcchunk < 2; srcchunk++) {
void *data = conn->sndbuf_desc->cpu_addr + src_off;
rc = smcd_tx_ism_write(conn, data, src_len, dst_off +
sizeof(struct smcd_cdc_msg), 0);
if (rc)
return rc;
dst_off += src_len;
src_off += src_len;
if (src_off >= conn->sndbuf_desc->len)
src_off -= conn->sndbuf_desc->len;
/* modulo in send ring */
if (src_len_sum == dst_len)
break; /* either on 1st or 2nd iteration */
/* prepare next (== 2nd) iteration */
src_len = dst_len - src_len; /* remainder */
src_len_sum += src_len;
}
if (dst_len_sum == len)
break; /* either on 1st or 2nd iteration */
/* prepare next (== 2nd) iteration */
dst_off = 0; /* modulo offset in RMBE ring buffer */
dst_len = len - dst_len; /* remainder */
dst_len_sum += dst_len;
src_len = min_t(int, dst_len, conn->sndbuf_desc->len - src_off);
src_len_sum = src_len;
}
return 0;
}
/* sndbuf consumer: prepare all necessary (src&dst) chunks of data transmit;
* usable snd_wnd as max transmit
*/
static int smc_tx_rdma_writes(struct smc_connection *conn)
{
size_t src_off, src_len, dst_off, dst_len; /* current chunk values */
size_t len, dst_len_sum, src_len_sum, dstchunk, srcchunk;
size_t len, src_len, dst_off, dst_len; /* current chunk values */
union smc_host_cursor sent, prep, prod, cons;
struct ib_sge sges[SMC_IB_MAX_SEND_SGE];
struct smc_link_group *lgr = conn->lgr;
struct smc_cdc_producer_flags *pflags;
int to_send, rmbespace;
struct smc_link *link;
dma_addr_t dma_addr;
int num_sges;
int rc;
/* source: sndbuf */
......@@ -341,7 +443,6 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)
len = min(to_send, rmbespace);
/* initialize variables for first iteration of subsequent nested loop */
link = &lgr->lnk[SMC_SINGLE_LINK];
dst_off = prod.count;
if (prod.wrap == cons.wrap) {
/* the filled destination area is unwrapped,
......@@ -358,8 +459,6 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)
*/
dst_len = len;
}
dst_len_sum = dst_len;
src_off = sent.count;
/* dst_len determines the maximum src_len */
if (sent.count + dst_len <= conn->sndbuf_desc->len) {
/* unwrapped src case: single chunk of entire dst_len */
......@@ -368,38 +467,15 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)
/* wrapped src case: 2 chunks of sum dst_len; start with 1st: */
src_len = conn->sndbuf_desc->len - sent.count;
}
src_len_sum = src_len;
dma_addr = sg_dma_address(conn->sndbuf_desc->sgt[SMC_SINGLE_LINK].sgl);
for (dstchunk = 0; dstchunk < 2; dstchunk++) {
num_sges = 0;
for (srcchunk = 0; srcchunk < 2; srcchunk++) {
sges[srcchunk].addr = dma_addr + src_off;
sges[srcchunk].length = src_len;
sges[srcchunk].lkey = link->roce_pd->local_dma_lkey;
num_sges++;
src_off += src_len;
if (src_off >= conn->sndbuf_desc->len)
src_off -= conn->sndbuf_desc->len;
/* modulo in send ring */
if (src_len_sum == dst_len)
break; /* either on 1st or 2nd iteration */
/* prepare next (== 2nd) iteration */
src_len = dst_len - src_len; /* remainder */
src_len_sum += src_len;
}
rc = smc_tx_rdma_write(conn, dst_off, num_sges, sges);
if (rc)
return rc;
if (dst_len_sum == len)
break; /* either on 1st or 2nd iteration */
/* prepare next (== 2nd) iteration */
dst_off = 0; /* modulo offset in RMBE ring buffer */
dst_len = len - dst_len; /* remainder */
dst_len_sum += dst_len;
src_len = min_t(int,
dst_len, conn->sndbuf_desc->len - sent.count);
src_len_sum = src_len;
}
if (conn->lgr->is_smcd)
rc = smcd_tx_rdma_writes(conn, len, sent.count, src_len,
dst_off, dst_len);
else
rc = smcr_tx_rdma_writes(conn, len, sent.count, src_len,
dst_off, dst_len);
if (rc)
return rc;
if (conn->urg_tx_pend && len == to_send)
pflags->urg_data_present = 1;
......@@ -420,7 +496,7 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)
/* Wakeup sndbuf consumers from any context (IRQ or process)
* since there is more data to transmit; usable snd_wnd as max transmit
*/
int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
{
struct smc_cdc_producer_flags *pflags;
struct smc_cdc_tx_pend *pend;
......@@ -467,6 +543,37 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
return rc;
}
static int smcd_tx_sndbuf_nonempty(struct smc_connection *conn)
{
struct smc_cdc_producer_flags *pflags = &conn->local_tx_ctrl.prod_flags;
int rc = 0;
spin_lock_bh(&conn->send_lock);
if (!pflags->urg_data_present)
rc = smc_tx_rdma_writes(conn);
if (!rc)
rc = smcd_cdc_msg_send(conn);
if (!rc && pflags->urg_data_present) {
pflags->urg_data_pending = 0;
pflags->urg_data_present = 0;
}
spin_unlock_bh(&conn->send_lock);
return rc;
}
int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
{
int rc;
if (conn->lgr->is_smcd)
rc = smcd_tx_sndbuf_nonempty(conn);
else
rc = smcr_tx_sndbuf_nonempty(conn);
return rc;
}
/* Wakeup sndbuf consumers from process context
* since there is more data to transmit
*/
......@@ -495,7 +602,8 @@ void smc_tx_work(struct work_struct *work)
void smc_tx_consumer_update(struct smc_connection *conn, bool force)
{
union smc_host_cursor cfed, cons;
union smc_host_cursor cfed, cons, prod;
int sender_free = conn->rmb_desc->len;
int to_confirm;
smc_curs_write(&cons,
......@@ -505,11 +613,18 @@ void smc_tx_consumer_update(struct smc_connection *conn, bool force)
smc_curs_read(&conn->rx_curs_confirmed, conn),
conn);
to_confirm = smc_curs_diff(conn->rmb_desc->len, &cfed, &cons);
if (to_confirm > conn->rmbe_update_limit) {
smc_curs_write(&prod,
smc_curs_read(&conn->local_rx_ctrl.prod, conn),
conn);
sender_free = conn->rmb_desc->len -
smc_curs_diff(conn->rmb_desc->len, &prod, &cfed);
}
if (conn->local_rx_ctrl.prod_flags.cons_curs_upd_req ||
force ||
((to_confirm > conn->rmbe_update_limit) &&
((to_confirm > (conn->rmb_desc->len / 2)) ||
((sender_free <= (conn->rmb_desc->len / 2)) ||
conn->local_rx_ctrl.prod_flags.write_blocked))) {
if ((smc_cdc_get_slot_and_msg_send(conn) < 0) &&
conn->alert_token_local) { /* connection healthy */
......
......@@ -33,5 +33,7 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len);
int smc_tx_sndbuf_nonempty(struct smc_connection *conn);
void smc_tx_sndbuf_nonfull(struct smc_sock *smc);
void smc_tx_consumer_update(struct smc_connection *conn, bool force);
int smcd_tx_ism_write(struct smc_connection *conn, void *data, size_t len,
u32 offset, int signal);
#endif /* SMC_TX_H */
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册