diff --git a/drivers/s390/net/ism.h b/drivers/s390/net/ism.h index 1901e9c80ed8ee3ef6ef684564ce9f7a8d61bb1d..38fe90c2597d1862565711c08f844bbffe874b24 100644 --- a/drivers/s390/net/ism.h +++ b/drivers/s390/net/ism.h @@ -16,6 +16,7 @@ #define ISM_DMB_WORD_OFFSET 1 #define ISM_DMB_BIT_OFFSET (ISM_DMB_WORD_OFFSET * 32) #define ISM_NR_DMBS 1920 +#define ISM_IDENT_MASK 0x00FFFF #define ISM_REG_SBA 0x1 #define ISM_REG_IEQ 0x2 @@ -206,6 +207,12 @@ struct ism_dev { #define ISM_CREATE_REQ(dmb, idx, sf, offset) \ ((dmb) | (idx) << 24 | (sf) << 23 | (offset)) +struct ism_systemeid { + u8 seid_string[24]; + u8 serial_number[4]; + u8 type[4]; +}; + static inline void __ism_read_cmd(struct ism_dev *ism, void *data, unsigned long offset, unsigned long len) { diff --git a/drivers/s390/net/ism_drv.c b/drivers/s390/net/ism_drv.c index 5fbe9eae84d17a0e1564156537322facdd929f46..fe96ca3c88a5a7a8bc38740c1ff9a4403b4c956f 100644 --- a/drivers/s390/net/ism_drv.c +++ b/drivers/s390/net/ism_drv.c @@ -13,6 +13,8 @@ #include <linux/device.h> #include <linux/pci.h> #include <linux/err.h> +#include <linux/ctype.h> +#include <linux/processor.h> #include <net/smc.h> #include <asm/debug.h> @@ -387,6 +389,42 @@ static int ism_move(struct smcd_dev *smcd, u64 dmb_tok, unsigned int idx, return 0; } +static struct ism_systemeid SYSTEM_EID = { + .seid_string = "IBM-SYSZ-IBMSEID00000000", + .serial_number = "0000", + .type = "0000", +}; + +static void ism_create_system_eid(void) +{ + struct cpuid id; + u16 ident_tail; + char tmp[5]; + + get_cpu_id(&id); + ident_tail = (u16)(id.ident & ISM_IDENT_MASK); + snprintf(tmp, 5, "%04X", ident_tail); + memcpy(&SYSTEM_EID.serial_number, tmp, 4); + snprintf(tmp, 5, "%04X", id.machine); + memcpy(&SYSTEM_EID.type, tmp, 4); +} + +static void ism_get_system_eid(struct smcd_dev *smcd, u8 **eid) +{ + *eid = &SYSTEM_EID.seid_string[0]; +} + +static u16 ism_get_chid(struct smcd_dev *smcd) +{ + struct ism_dev *ismdev; + + ismdev = (struct ism_dev *)smcd->priv; + if (!ismdev || !ismdev->pdev) + return 0; + + return to_zpci(ismdev->pdev)->pchid; +} + static void ism_handle_event(struct ism_dev *ism) { struct smcd_event *entry; @@ -443,6 +481,8 @@ static const struct smcd_ops ism_ops = { .reset_vlan_required = ism_reset_vlan_required, .signal_event = ism_signal_ieq, .move_data = ism_move, + .get_system_eid = ism_get_system_eid, + .get_chid = ism_get_chid, }; static int ism_dev_init(struct ism_dev *ism) @@ -471,6 +511,10 @@ static int ism_dev_init(struct ism_dev *ism) if (ret) goto unreg_ieq; + if (!ism_add_vlan_id(ism->smcd, ISM_RESERVED_VLANID)) + /* hardware is V2 capable */ + ism_create_system_eid(); + ret = smcd_register_dev(ism->smcd); if (ret) goto unreg_ieq; @@ -550,6 +594,9 @@ static void ism_dev_exit(struct ism_dev *ism) struct pci_dev *pdev = ism->pdev; smcd_unregister_dev(ism->smcd); + if (SYSTEM_EID.serial_number[0] != '0' || + SYSTEM_EID.type[0] != '0') + ism_del_vlan_id(ism->smcd, ISM_RESERVED_VLANID); unregister_ieq(ism); unregister_sba(ism); free_irq(pci_irq_vector(pdev, 0), ism); diff --git a/include/net/smc.h b/include/net/smc.h index 646feb4bc75f2bb0f38170104eda0ae52d56322a..e441aa97ad61b1834f4983d10e15a34a7936f916 100644 --- a/include/net/smc.h +++ b/include/net/smc.h @@ -37,6 +37,8 @@ struct smcd_dmb { #define ISM_EVENT_GID 1 #define ISM_EVENT_SWR 2 +#define ISM_RESERVED_VLANID 0x1FFF + #define ISM_ERROR 0xFFFF struct smcd_event { @@ -63,6 +65,8 @@ struct smcd_ops { int (*move_data)(struct smcd_dev *dev, u64 dmb_tok, unsigned int idx, bool sf, unsigned int offset, void *data, unsigned int size); + void (*get_system_eid)(struct smcd_dev *dev, u8 **eid); + u16 (*get_chid)(struct smcd_dev *dev); }; struct smcd_dev { diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index ed8f97166be92533faf7259733e6b03ea3efd341..e874d0e6267fb41a871219d91532130cd7a2cb25 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -26,6 +26,7 @@ #include <linux/sched/signal.h> #include <linux/if_vlan.h> #include <linux/rcupdate_wait.h> +#include <linux/ctype.h> #include <net/sock.h> #include <net/tcp.h> @@ -448,6 +449,16 @@ static void smcr_conn_save_peer_info(struct smc_sock *smc, smc->conn.tx_off = bufsize * (smc->conn.peer_rmbe_idx - 1); } +static bool smc_isascii(char *hostname) +{ + int i; + + for (i = 0; i < SMC_MAX_HOSTNAME_LEN; i++) + if (!isascii(hostname[i])) + return false; + return true; +} + static void smcd_conn_save_peer_info(struct smc_sock *smc, struct smc_clc_msg_accept_confirm *clc) { @@ -459,6 +470,22 @@ static void smcd_conn_save_peer_info(struct smc_sock *smc, smc->conn.peer_rmbe_size = bufsize - sizeof(struct smcd_cdc_msg); atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size); smc->conn.tx_off = bufsize * smc->conn.peer_rmbe_idx; + if (clc->hdr.version > SMC_V1 && + (clc->hdr.typev2 & SMC_FIRST_CONTACT_MASK)) { + struct smc_clc_msg_accept_confirm_v2 *clc_v2 = + (struct smc_clc_msg_accept_confirm_v2 *)clc; + struct smc_clc_first_contact_ext *fce = + (struct smc_clc_first_contact_ext *) + (((u8 *)clc_v2) + sizeof(*clc_v2)); + + memcpy(smc->conn.lgr->negotiated_eid, clc_v2->eid, + SMC_MAX_EID_LEN); + smc->conn.lgr->peer_os = fce->os_type; + smc->conn.lgr->peer_smc_release = fce->release; + if (smc_isascii(fce->hostname)) + memcpy(smc->conn.lgr->peer_hostname, fce->hostname, + SMC_MAX_HOSTNAME_LEN); + } } static void smc_conn_save_peer_info(struct smc_sock *smc, @@ -504,7 +531,8 @@ static int smc_connect_fallback(struct smc_sock *smc, int reason_code) } /* decline and fall back during connect */ -static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code) +static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code, + u8 version) { int rc; @@ -514,7 +542,7 @@ static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code) return reason_code; } if (reason_code != SMC_CLC_DECL_PEERDECL) { - rc = smc_clc_send_decline(smc, reason_code); + rc = smc_clc_send_decline(smc, reason_code, version); if (rc < 0) { if (smc->sk.sk_state == SMC_INIT) sock_put(&smc->sk); /* passive closing */ @@ -564,47 +592,121 @@ static int smc_find_ism_device(struct smc_sock *smc, struct smc_init_info *ini) { /* Find ISM device with same PNETID as connecting interface */ smc_pnet_find_ism_resource(smc->clcsock->sk, ini); - if (!ini->ism_dev) + if (!ini->ism_dev[0]) return SMC_CLC_DECL_NOSMCDDEV; + else + ini->ism_chid[0] = smc_ism_get_chid(ini->ism_dev[0]); return 0; } +/* determine possible V2 ISM devices (either without PNETID or with PNETID plus + * PNETID matching net_device) + */ +static int smc_find_ism_v2_device_clnt(struct smc_sock *smc, + struct smc_init_info *ini) +{ + int rc = SMC_CLC_DECL_NOSMCDDEV; + struct smcd_dev *smcd; + int i = 1; + + if (smcd_indicated(ini->smc_type_v1)) + rc = 0; /* already initialized for V1 */ + mutex_lock(&smcd_dev_list.mutex); + list_for_each_entry(smcd, &smcd_dev_list.list, list) { + if (smcd->going_away || smcd == ini->ism_dev[0]) + continue; + if (!smc_pnet_is_pnetid_set(smcd->pnetid) || + smc_pnet_is_ndev_pnetid(sock_net(&smc->sk), smcd->pnetid)) { + ini->ism_dev[i] = smcd; + ini->ism_chid[i] = smc_ism_get_chid(ini->ism_dev[i]); + ini->is_smcd = true; + rc = 0; + i++; + if (i > SMC_MAX_ISM_DEVS) + break; + } + } + mutex_unlock(&smcd_dev_list.mutex); + ini->ism_offered_cnt = i - 1; + if (!ini->ism_dev[0] && !ini->ism_dev[1]) + ini->smcd_version = 0; + + return rc; +} + /* Check for VLAN ID and register it on ISM device just for CLC handshake */ static int smc_connect_ism_vlan_setup(struct smc_sock *smc, struct smc_init_info *ini) { - if (ini->vlan_id && smc_ism_get_vlan(ini->ism_dev, ini->vlan_id)) + if (ini->vlan_id && smc_ism_get_vlan(ini->ism_dev[0], ini->vlan_id)) return SMC_CLC_DECL_ISMVLANERR; return 0; } +static int smc_find_proposal_devices(struct smc_sock *smc, + struct smc_init_info *ini) +{ + int rc = 0; + + /* check if there is an ism device available */ + if (ini->smcd_version & SMC_V1) { + if (smc_find_ism_device(smc, ini) || + smc_connect_ism_vlan_setup(smc, ini)) { + if (ini->smc_type_v1 == SMC_TYPE_B) + ini->smc_type_v1 = SMC_TYPE_R; + else + ini->smc_type_v1 = SMC_TYPE_N; + } /* else ISM V1 is supported for this connection */ + if (smc_find_rdma_device(smc, ini)) { + if (ini->smc_type_v1 == SMC_TYPE_B) + ini->smc_type_v1 = SMC_TYPE_D; + else + ini->smc_type_v1 = SMC_TYPE_N; + } /* else RDMA is supported for this connection */ + } + if (smc_ism_v2_capable && smc_find_ism_v2_device_clnt(smc, ini)) + ini->smc_type_v2 = SMC_TYPE_N; + + /* if neither ISM nor RDMA are supported, fallback */ + if (!smcr_indicated(ini->smc_type_v1) && + ini->smc_type_v1 == SMC_TYPE_N && ini->smc_type_v2 == SMC_TYPE_N) + rc = SMC_CLC_DECL_NOSMCDEV; + + return rc; +} + /* cleanup temporary VLAN ID registration used for CLC handshake. If ISM is * used, the VLAN ID will be registered again during the connection setup. */ -static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc, bool is_smcd, +static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc, struct smc_init_info *ini) { - if (!is_smcd) + if (!smcd_indicated(ini->smc_type_v1)) return 0; - if (ini->vlan_id && smc_ism_put_vlan(ini->ism_dev, ini->vlan_id)) + if (ini->vlan_id && smc_ism_put_vlan(ini->ism_dev[0], ini->vlan_id)) return SMC_CLC_DECL_CNFERR; return 0; } +#define SMC_CLC_MAX_ACCEPT_LEN \ + (sizeof(struct smc_clc_msg_accept_confirm_v2) + \ + sizeof(struct smc_clc_first_contact_ext) + \ + sizeof(struct smc_clc_msg_trail)) + /* CLC handshake during connect */ -static int smc_connect_clc(struct smc_sock *smc, int smc_type, - struct smc_clc_msg_accept_confirm *aclc, +static int smc_connect_clc(struct smc_sock *smc, + struct smc_clc_msg_accept_confirm_v2 *aclc2, struct smc_init_info *ini) { int rc = 0; /* do inband token exchange */ - rc = smc_clc_send_proposal(smc, smc_type, ini); + rc = smc_clc_send_proposal(smc, ini); if (rc) return rc; /* receive SMC Accept CLC message */ - return smc_clc_wait_msg(smc, aclc, sizeof(*aclc), SMC_CLC_ACCEPT, - CLC_WAIT_TIME); + return smc_clc_wait_msg(smc, aclc2, SMC_CLC_MAX_ACCEPT_LEN, + SMC_CLC_ACCEPT, CLC_WAIT_TIME); } /* setup for RDMA connection of client */ @@ -618,7 +720,7 @@ static int smc_connect_rdma(struct smc_sock *smc, ini->is_smcd = false; ini->ib_lcl = &aclc->r0.lcl; ini->ib_clcqpn = ntoh24(aclc->r0.qpn); - ini->first_contact_peer = aclc->hdr.flag; + ini->first_contact_peer = aclc->hdr.typev2 & SMC_FIRST_CONTACT_MASK; mutex_lock(&smc_client_lgr_pending); reason_code = smc_conn_create(smc, ini); @@ -678,7 +780,8 @@ static int smc_connect_rdma(struct smc_sock *smc, } smc_rmb_sync_sg_for_device(&smc->conn); - reason_code = smc_clc_send_confirm(smc); + reason_code = smc_clc_send_confirm(smc, ini->first_contact_local, + SMC_V1); if (reason_code) return smc_connect_abort(smc, reason_code, ini->first_contact_local); @@ -704,6 +807,25 @@ static int smc_connect_rdma(struct smc_sock *smc, return 0; } +/* The server has chosen one of the proposed ISM devices for the communication. + * Determine from the CHID of the received CLC ACCEPT the ISM device chosen. + */ +static int +smc_v2_determine_accepted_chid(struct smc_clc_msg_accept_confirm_v2 *aclc, + struct smc_init_info *ini) +{ + int i; + + for (i = 0; i < ini->ism_offered_cnt + 1; i++) { + if (ini->ism_chid[i] == ntohs(aclc->chid)) { + ini->ism_selected = i; + return 0; + } + } + + return -EPROTO; +} + /* setup for ISM connection of client */ static int smc_connect_ism(struct smc_sock *smc, struct smc_clc_msg_accept_confirm *aclc, @@ -712,8 +834,17 @@ static int smc_connect_ism(struct smc_sock *smc, int rc = 0; ini->is_smcd = true; - ini->ism_peer_gid = aclc->d0.gid; - ini->first_contact_peer = aclc->hdr.flag; + ini->first_contact_peer = aclc->hdr.typev2 & SMC_FIRST_CONTACT_MASK; + + if (aclc->hdr.version == SMC_V2) { + struct smc_clc_msg_accept_confirm_v2 *aclc_v2 = + (struct smc_clc_msg_accept_confirm_v2 *)aclc; + + rc = smc_v2_determine_accepted_chid(aclc_v2, ini); + if (rc) + return rc; + } + ini->ism_peer_gid[ini->ism_selected] = aclc->d0.gid; /* there is only one lgr role for SMC-D; use server lock */ mutex_lock(&smc_server_lgr_pending); @@ -736,7 +867,8 @@ static int smc_connect_ism(struct smc_sock *smc, smc_rx_init(smc); smc_tx_init(smc); - rc = smc_clc_send_confirm(smc); + rc = smc_clc_send_confirm(smc, ini->first_contact_local, + aclc->hdr.version); if (rc) return smc_connect_abort(smc, rc, ini->first_contact_local); mutex_unlock(&smc_server_lgr_pending); @@ -749,13 +881,32 @@ static int smc_connect_ism(struct smc_sock *smc, return 0; } +/* check if received accept type and version matches a proposed one */ +static int smc_connect_check_aclc(struct smc_init_info *ini, + struct smc_clc_msg_accept_confirm *aclc) +{ + if ((aclc->hdr.typev1 == SMC_TYPE_R && + !smcr_indicated(ini->smc_type_v1)) || + (aclc->hdr.typev1 == SMC_TYPE_D && + ((!smcd_indicated(ini->smc_type_v1) && + !smcd_indicated(ini->smc_type_v2)) || + (aclc->hdr.version == SMC_V1 && + !smcd_indicated(ini->smc_type_v1)) || + (aclc->hdr.version == SMC_V2 && + !smcd_indicated(ini->smc_type_v2))))) + return SMC_CLC_DECL_MODEUNSUPP; + + return 0; +} + /* perform steps before actually connecting */ static int __smc_connect(struct smc_sock *smc) { - bool ism_supported = false, rdma_supported = false; - struct smc_clc_msg_accept_confirm aclc; - struct smc_init_info ini = {0}; - int smc_type; + u8 version = smc_ism_v2_capable ? SMC_V2 : SMC_V1; + struct smc_clc_msg_accept_confirm_v2 *aclc2; + struct smc_clc_msg_accept_confirm *aclc; + struct smc_init_info *ini = NULL; + u8 *buf = NULL; int rc = 0; if (smc->use_fallback) @@ -765,58 +916,73 @@ static int __smc_connect(struct smc_sock *smc) if (!tcp_sk(smc->clcsock->sk)->syn_smc) return smc_connect_fallback(smc, SMC_CLC_DECL_PEERNOSMC); - /* IPSec connections opt out of SMC-R optimizations */ + /* IPSec connections opt out of SMC optimizations */ if (using_ipsec(smc)) - return smc_connect_decline_fallback(smc, SMC_CLC_DECL_IPSEC); + return smc_connect_decline_fallback(smc, SMC_CLC_DECL_IPSEC, + version); - /* get vlan id from IP device */ - if (smc_vlan_by_tcpsk(smc->clcsock, &ini)) - return smc_connect_decline_fallback(smc, - SMC_CLC_DECL_GETVLANERR); + ini = kzalloc(sizeof(*ini), GFP_KERNEL); + if (!ini) + return smc_connect_decline_fallback(smc, SMC_CLC_DECL_MEM, + version); - /* check if there is an ism device available */ - if (!smc_find_ism_device(smc, &ini) && - !smc_connect_ism_vlan_setup(smc, &ini)) { - /* ISM is supported for this connection */ - ism_supported = true; - smc_type = SMC_TYPE_D; - } - - /* check if there is a rdma device available */ - if (!smc_find_rdma_device(smc, &ini)) { - /* RDMA is supported for this connection */ - rdma_supported = true; - if (ism_supported) - smc_type = SMC_TYPE_B; /* both */ - else - smc_type = SMC_TYPE_R; /* only RDMA */ + ini->smcd_version = SMC_V1; + ini->smcd_version |= smc_ism_v2_capable ? SMC_V2 : 0; + ini->smc_type_v1 = SMC_TYPE_B; + ini->smc_type_v2 = smc_ism_v2_capable ? SMC_TYPE_D : SMC_TYPE_N; + + /* get vlan id from IP device */ + if (smc_vlan_by_tcpsk(smc->clcsock, ini)) { + ini->smcd_version &= ~SMC_V1; + ini->smc_type_v1 = SMC_TYPE_N; + if (!ini->smcd_version) { + rc = SMC_CLC_DECL_GETVLANERR; + goto fallback; + } } - /* if neither ISM nor RDMA are supported, fallback */ - if (!rdma_supported && !ism_supported) - return smc_connect_decline_fallback(smc, SMC_CLC_DECL_NOSMCDEV); + rc = smc_find_proposal_devices(smc, ini); + if (rc) + goto fallback; - /* perform CLC handshake */ - rc = smc_connect_clc(smc, smc_type, &aclc, &ini); - if (rc) { - smc_connect_ism_vlan_cleanup(smc, ism_supported, &ini); - return smc_connect_decline_fallback(smc, rc); + buf = kzalloc(SMC_CLC_MAX_ACCEPT_LEN, GFP_KERNEL); + if (!buf) { + rc = SMC_CLC_DECL_MEM; + goto fallback; } + aclc2 = (struct smc_clc_msg_accept_confirm_v2 *)buf; + aclc = (struct smc_clc_msg_accept_confirm *)aclc2; + + /* perform CLC handshake */ + rc = smc_connect_clc(smc, aclc2, ini); + if (rc) + goto vlan_cleanup; + + /* check if smc modes and versions of CLC proposal and accept match */ + rc = smc_connect_check_aclc(ini, aclc); + version = aclc->hdr.version == SMC_V1 ? SMC_V1 : version; + if (rc) + goto vlan_cleanup; /* depending on previous steps, connect using rdma or ism */ - if (rdma_supported && aclc.hdr.path == SMC_TYPE_R) - rc = smc_connect_rdma(smc, &aclc, &ini); - else if (ism_supported && aclc.hdr.path == SMC_TYPE_D) - rc = smc_connect_ism(smc, &aclc, &ini); - else - rc = SMC_CLC_DECL_MODEUNSUPP; - if (rc) { - smc_connect_ism_vlan_cleanup(smc, ism_supported, &ini); - return smc_connect_decline_fallback(smc, rc); - } + if (aclc->hdr.typev1 == SMC_TYPE_R) + rc = smc_connect_rdma(smc, aclc, ini); + else if (aclc->hdr.typev1 == SMC_TYPE_D) + rc = smc_connect_ism(smc, aclc, ini); + if (rc) + goto vlan_cleanup; - smc_connect_ism_vlan_cleanup(smc, ism_supported, &ini); + smc_connect_ism_vlan_cleanup(smc, ini); + kfree(buf); + kfree(ini); return 0; + +vlan_cleanup: + smc_connect_ism_vlan_cleanup(smc, ini); + kfree(buf); +fallback: + kfree(ini); + return smc_connect_decline_fallback(smc, rc, version); } static void smc_connect_work(struct work_struct *work) @@ -1132,10 +1298,10 @@ static void smc_listen_out_err(struct smc_sock *new_smc) /* listen worker: decline and fall back if possible */ static void smc_listen_decline(struct smc_sock *new_smc, int reason_code, - bool local_first) + struct smc_init_info *ini, u8 version) { /* RDMA setup failed, switch back to TCP */ - if (local_first) + if (ini->first_contact_local) smc_lgr_cleanup_early(&new_smc->conn); else smc_conn_free(&new_smc->conn); @@ -1146,7 +1312,7 @@ static void smc_listen_decline(struct smc_sock *new_smc, int reason_code, smc_switch_to_fallback(new_smc); new_smc->fallback_rsn = reason_code; if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) { - if (smc_clc_send_decline(new_smc, reason_code) < 0) { + if (smc_clc_send_decline(new_smc, reason_code, version) < 0) { smc_listen_out_err(new_smc); return; } @@ -1154,6 +1320,47 @@ static void smc_listen_decline(struct smc_sock *new_smc, int reason_code, smc_listen_out_connected(new_smc); } +/* listen worker: version checking */ +static int smc_listen_v2_check(struct smc_sock *new_smc, + struct smc_clc_msg_proposal *pclc, + struct smc_init_info *ini) +{ + struct smc_clc_smcd_v2_extension *pclc_smcd_v2_ext; + struct smc_clc_v2_extension *pclc_v2_ext; + + ini->smc_type_v1 = pclc->hdr.typev1; + ini->smc_type_v2 = pclc->hdr.typev2; + ini->smcd_version = ini->smc_type_v1 != SMC_TYPE_N ? SMC_V1 : 0; + if (pclc->hdr.version > SMC_V1) + ini->smcd_version |= + ini->smc_type_v2 != SMC_TYPE_N ? SMC_V2 : 0; + if (!smc_ism_v2_capable) { + ini->smcd_version &= ~SMC_V2; + goto out; + } + pclc_v2_ext = smc_get_clc_v2_ext(pclc); + if (!pclc_v2_ext) { + ini->smcd_version &= ~SMC_V2; + goto out; + } + pclc_smcd_v2_ext = smc_get_clc_smcd_v2_ext(pclc_v2_ext); + if (!pclc_smcd_v2_ext) + ini->smcd_version &= ~SMC_V2; + +out: + if (!ini->smcd_version) { + if (pclc->hdr.typev1 == SMC_TYPE_B || + pclc->hdr.typev2 == SMC_TYPE_B) + return SMC_CLC_DECL_NOSMCDEV; + if (pclc->hdr.typev1 == SMC_TYPE_D || + pclc->hdr.typev2 == SMC_TYPE_D) + return SMC_CLC_DECL_NOSMCDDEV; + return SMC_CLC_DECL_NOSMCRDEV; + } + + return 0; +} + /* listen worker: check prefixes */ static int smc_listen_prfx_check(struct smc_sock *new_smc, struct smc_clc_msg_proposal *pclc) @@ -1161,6 +1368,8 @@ static int smc_listen_prfx_check(struct smc_sock *new_smc, struct smc_clc_msg_proposal_prefix *pclc_prfx; struct socket *newclcsock = new_smc->clcsock; + if (pclc->hdr.typev1 == SMC_TYPE_N) + return 0; pclc_prfx = smc_clc_proposal_get_prefix(pclc); if (smc_clc_prfx_match(newclcsock, pclc_prfx)) return SMC_CLC_DECL_DIFFPREFIX; @@ -1188,7 +1397,6 @@ static int smc_listen_rdma_init(struct smc_sock *new_smc, /* listen worker: initialize connection and buffers for SMC-D */ static int smc_listen_ism_init(struct smc_sock *new_smc, - struct smc_clc_msg_proposal *pclc, struct smc_init_info *ini) { int rc; @@ -1211,6 +1419,125 @@ static int smc_listen_ism_init(struct smc_sock *new_smc, return 0; } +static bool smc_is_already_selected(struct smcd_dev *smcd, + struct smc_init_info *ini, + int matches) +{ + int i; + + for (i = 0; i < matches; i++) + if (smcd == ini->ism_dev[i]) + return true; + + return false; +} + +/* check for ISM devices matching proposed ISM devices */ +static void smc_check_ism_v2_match(struct smc_init_info *ini, + u16 proposed_chid, u64 proposed_gid, + unsigned int *matches) +{ + struct smcd_dev *smcd; + + list_for_each_entry(smcd, &smcd_dev_list.list, list) { + if (smcd->going_away) + continue; + if (smc_is_already_selected(smcd, ini, *matches)) + continue; + if (smc_ism_get_chid(smcd) == proposed_chid && + !smc_ism_cantalk(proposed_gid, ISM_RESERVED_VLANID, smcd)) { + ini->ism_peer_gid[*matches] = proposed_gid; + ini->ism_dev[*matches] = smcd; + (*matches)++; + break; + } + } +} + +static void smc_find_ism_v2_device_serv(struct smc_sock *new_smc, + struct smc_clc_msg_proposal *pclc, + struct smc_init_info *ini) +{ + struct smc_clc_smcd_v2_extension *smcd_v2_ext; + struct smc_clc_v2_extension *smc_v2_ext; + struct smc_clc_msg_smcd *pclc_smcd; + unsigned int matches = 0; + u8 *eid = NULL; + int i; + + if (!(ini->smcd_version & SMC_V2) || !smcd_indicated(ini->smc_type_v2)) + return; + + pclc_smcd = smc_get_clc_msg_smcd(pclc); + smc_v2_ext = smc_get_clc_v2_ext(pclc); + smcd_v2_ext = smc_get_clc_smcd_v2_ext(smc_v2_ext); + if (!smcd_v2_ext || + !smc_v2_ext->hdr.flag.seid) /* no system EID support for SMCD */ + goto not_found; + + mutex_lock(&smcd_dev_list.mutex); + if (pclc_smcd->ism.chid) + /* check for ISM device matching proposed native ISM device */ + smc_check_ism_v2_match(ini, ntohs(pclc_smcd->ism.chid), + ntohll(pclc_smcd->ism.gid), &matches); + for (i = 1; i <= smc_v2_ext->hdr.ism_gid_cnt; i++) { + /* check for ISM devices matching proposed non-native ISM + * devices + */ + smc_check_ism_v2_match(ini, + ntohs(smcd_v2_ext->gidchid[i - 1].chid), + ntohll(smcd_v2_ext->gidchid[i - 1].gid), + &matches); + } + mutex_unlock(&smcd_dev_list.mutex); + + if (ini->ism_dev[0]) { + smc_ism_get_system_eid(ini->ism_dev[0], &eid); + if (memcmp(eid, smcd_v2_ext->system_eid, SMC_MAX_EID_LEN)) + goto not_found; + } else { + goto not_found; + } + + /* separate - outside the smcd_dev_list.lock */ + for (i = 0; i < matches; i++) { + ini->smcd_version = SMC_V2; + ini->is_smcd = true; + ini->ism_selected = i; + if (smc_listen_ism_init(new_smc, ini)) + /* try next active ISM device */ + continue; + return; /* matching and usable V2 ISM device found */ + } + +not_found: + ini->smcd_version &= ~SMC_V2; + ini->ism_dev[0] = NULL; + ini->is_smcd = false; +} + +static void smc_find_ism_v1_device_serv(struct smc_sock *new_smc, + struct smc_clc_msg_proposal *pclc, + struct smc_init_info *ini) +{ + struct smc_clc_msg_smcd *pclc_smcd = smc_get_clc_msg_smcd(pclc); + + /* check if ISM V1 is available */ + if (!(ini->smcd_version & SMC_V1) || !smcd_indicated(ini->smc_type_v1)) + goto not_found; + ini->is_smcd = true; /* prepare ISM check */ + ini->ism_peer_gid[0] = ntohll(pclc_smcd->ism.gid); + if (smc_find_ism_device(new_smc, ini)) + goto not_found; + ini->ism_selected = 0; + if (!smc_listen_ism_init(new_smc, ini)) + return; /* V1 ISM device found */ + +not_found: + ini->ism_dev[0] = NULL; + ini->is_smcd = false; +} + /* listen worker: register buffers */ static int smc_listen_rdma_reg(struct smc_sock *new_smc, bool local_first) { @@ -1225,6 +1552,67 @@ static int smc_listen_rdma_reg(struct smc_sock *new_smc, bool local_first) return 0; } +static int smc_find_rdma_v1_device_serv(struct smc_sock *new_smc, + struct smc_clc_msg_proposal *pclc, + struct smc_init_info *ini) +{ + int rc; + + if (!smcr_indicated(ini->smc_type_v1)) + return SMC_CLC_DECL_NOSMCDEV; + + /* prepare RDMA check */ + ini->ib_lcl = &pclc->lcl; + rc = smc_find_rdma_device(new_smc, ini); + if (rc) { + /* no RDMA device found */ + if (ini->smc_type_v1 == SMC_TYPE_B) + /* neither ISM nor RDMA device found */ + rc = SMC_CLC_DECL_NOSMCDEV; + return rc; + } + rc = smc_listen_rdma_init(new_smc, ini); + if (rc) + return rc; + return smc_listen_rdma_reg(new_smc, ini->first_contact_local); +} + +/* determine the local device matching to proposal */ +static int smc_listen_find_device(struct smc_sock *new_smc, + struct smc_clc_msg_proposal *pclc, + struct smc_init_info *ini) +{ + int rc; + + /* check for ISM device matching V2 proposed device */ + smc_find_ism_v2_device_serv(new_smc, pclc, ini); + if (ini->ism_dev[0]) + return 0; + + if (!(ini->smcd_version & SMC_V1)) + return SMC_CLC_DECL_NOSMCDEV; + + /* check for matching IP prefix and subnet length */ + rc = smc_listen_prfx_check(new_smc, pclc); + if (rc) + return rc; + + /* get vlan id from IP device */ + if (smc_vlan_by_tcpsk(new_smc->clcsock, ini)) + return SMC_CLC_DECL_GETVLANERR; + + /* check for ISM device matching V1 proposed device */ + smc_find_ism_v1_device_serv(new_smc, pclc, ini); + if (ini->ism_dev[0]) + return 0; + + if (pclc->hdr.typev1 == SMC_TYPE_D) + return SMC_CLC_DECL_NOSMCDDEV; /* skip RDMA and decline */ + + /* check if RDMA is available */ + return smc_find_rdma_v1_device_serv(new_smc, pclc, ini); +} + /* listen worker: finish RDMA setup */ static int smc_listen_rdma_finish(struct smc_sock *new_smc, struct smc_clc_msg_accept_confirm *cclc, @@ -1250,17 +1638,18 @@ static int smc_listen_rdma_finish(struct smc_sock *new_smc, return reason_code; } -/* setup for RDMA connection of server */ +/* setup for connection of server */ static void smc_listen_work(struct work_struct *work) { struct smc_sock *new_smc = container_of(work, struct smc_sock, smc_listen_work); + u8 version = smc_ism_v2_capable ? SMC_V2 : SMC_V1; struct socket *newclcsock = new_smc->clcsock; - struct smc_clc_msg_accept_confirm cclc; + struct smc_clc_msg_accept_confirm_v2 *cclc2; + struct smc_clc_msg_accept_confirm *cclc; struct smc_clc_msg_proposal_area *buf; struct smc_clc_msg_proposal *pclc; - struct smc_init_info ini = {0}; - bool ism_supported = false; + struct smc_init_info *ini = NULL; int rc = 0; if (new_smc->listen_smc->sk.sk_state != SMC_LISTEN) @@ -1292,101 +1681,76 @@ static void smc_listen_work(struct work_struct *work) SMC_CLC_PROPOSAL, CLC_WAIT_TIME); if (rc) goto out_decl; + version = pclc->hdr.version == SMC_V1 ? SMC_V1 : version; - /* IPSec connections opt out of SMC-R optimizations */ + /* IPSec connections opt out of SMC optimizations */ if (using_ipsec(new_smc)) { rc = SMC_CLC_DECL_IPSEC; goto out_decl; } - /* check for matching IP prefix and subnet length */ - rc = smc_listen_prfx_check(new_smc, pclc); - if (rc) + ini = kzalloc(sizeof(*ini), GFP_KERNEL); + if (!ini) { + rc = SMC_CLC_DECL_MEM; goto out_decl; + } - /* get vlan id from IP device */ - if (smc_vlan_by_tcpsk(new_smc->clcsock, &ini)) { - rc = SMC_CLC_DECL_GETVLANERR; + /* initial version checking */ + rc = smc_listen_v2_check(new_smc, pclc, ini); + if (rc) goto out_decl; - } mutex_lock(&smc_server_lgr_pending); smc_close_init(new_smc); smc_rx_init(new_smc); smc_tx_init(new_smc); - /* check if ISM is available */ - if (pclc->hdr.path == SMC_TYPE_D || pclc->hdr.path == SMC_TYPE_B) { - struct smc_clc_msg_smcd *pclc_smcd = smc_get_clc_msg_smcd(pclc); - - ini.is_smcd = true; /* prepare ISM check */ - ini.ism_peer_gid = pclc_smcd->gid; - rc = smc_find_ism_device(new_smc, &ini); - if (!rc) - rc = smc_listen_ism_init(new_smc, pclc, &ini); - if (!rc) - ism_supported = true; - else if (pclc->hdr.path == SMC_TYPE_D) - goto out_unlock; /* skip RDMA and decline */ - } - - /* check if RDMA is available */ - if (!ism_supported) { /* SMC_TYPE_R or SMC_TYPE_B */ - /* prepare RDMA check */ - ini.is_smcd = false; - ini.ism_dev = NULL; - ini.ib_lcl = &pclc->lcl; - rc = smc_find_rdma_device(new_smc, &ini); - if (rc) { - /* no RDMA device found */ - if (pclc->hdr.path == SMC_TYPE_B) - /* neither ISM nor RDMA device found */ - rc = SMC_CLC_DECL_NOSMCDEV; - goto out_unlock; - } - rc = smc_listen_rdma_init(new_smc, &ini); - if (rc) - goto out_unlock; - rc = smc_listen_rdma_reg(new_smc, ini.first_contact_local); - if (rc) - goto out_unlock; - } + /* determine ISM or RoCE device used for connection */ + rc = smc_listen_find_device(new_smc, pclc, ini); + if (rc) + goto out_unlock; /* send SMC Accept CLC message */ - rc = smc_clc_send_accept(new_smc, ini.first_contact_local); + rc = smc_clc_send_accept(new_smc, ini->first_contact_local, + ini->smcd_version == SMC_V2 ? SMC_V2 : SMC_V1); if (rc) goto out_unlock; /* SMC-D does not need this lock any more */ - if (ism_supported) + if (ini->is_smcd) mutex_unlock(&smc_server_lgr_pending); /* receive SMC Confirm CLC message */ - rc = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc), + cclc2 = (struct smc_clc_msg_accept_confirm_v2 *)buf; + cclc = (struct smc_clc_msg_accept_confirm *)cclc2; + memset(buf, 0, sizeof(struct smc_clc_msg_proposal_area)); + rc = smc_clc_wait_msg(new_smc, cclc2, + sizeof(struct smc_clc_msg_proposal_area), SMC_CLC_CONFIRM, CLC_WAIT_TIME); if (rc) { - if (!ism_supported) + if (!ini->is_smcd) goto out_unlock; goto out_decl; } /* finish worker */ - if (!ism_supported) { - rc = smc_listen_rdma_finish(new_smc, &cclc, - ini.first_contact_local); + if (!ini->is_smcd) { + rc = smc_listen_rdma_finish(new_smc, cclc, + ini->first_contact_local); if (rc) goto out_unlock; mutex_unlock(&smc_server_lgr_pending); } - smc_conn_save_peer_info(new_smc, &cclc); + smc_conn_save_peer_info(new_smc, cclc); smc_listen_out_connected(new_smc); goto out_free; out_unlock: mutex_unlock(&smc_server_lgr_pending); out_decl: - smc_listen_decline(new_smc, rc, ini.first_contact_local); + smc_listen_decline(new_smc, rc, ini, version); out_free: + kfree(ini); kfree(buf); } @@ -2092,6 +2456,9 @@ static int __init smc_init(void) if (rc) return rc; + smc_ism_init(); + smc_clc_init(); + rc = smc_pnet_init(); if (rc) goto out_pernet_subsys; diff --git a/net/smc/smc.h b/net/smc/smc.h index 2bd57e57b7e7be3e5455424fc58f5346ce329e1f..d65e15f0c944c1d599492261ac240445dfd64356 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -19,10 +19,19 @@ #include "smc_ib.h" #define SMC_V1 1 /* SMC version V1 */ +#define SMC_V2 2 /* SMC version V2 */ +#define SMC_RELEASE 0 #define SMCPROTO_SMC 0 /* SMC protocol, IPv4 */ #define SMCPROTO_SMC6 1 /* SMC protocol, IPv6 */ +#define SMC_MAX_ISM_DEVS 8 /* max # of proposed non-native ISM + * devices + */ + +#define SMC_MAX_HOSTNAME_LEN 32 +#define SMC_MAX_EID_LEN 32 + extern struct proto smc_proto; extern struct proto smc_proto6; @@ -246,6 +255,9 @@ extern struct workqueue_struct *smc_close_wq; /* wq for close work */ extern u8 local_systemid[SMC_SYSTEMID_LEN]; /* unique system identifier */ +#define ntohll(x) be64_to_cpu(x) +#define htonll(x) cpu_to_be64(x) + /* convert an u32 value into network byte order, store it into a 3 byte field */ static inline void hton24(u8 *net, u32 host) { diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index bd116e1949b9dfcdd1e46875bb44b05739cf2746..696d89c2dce4a722cb4222133582e7cd3caeff74 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -14,6 +14,8 @@ #include <linux/inetdevice.h> #include <linux/if_ether.h> #include <linux/sched/signal.h> +#include <linux/utsname.h> +#include <linux/ctype.h> #include <net/addrconf.h> #include <net/sock.h> @@ -27,6 +29,7 @@ #define SMCR_CLC_ACCEPT_CONFIRM_LEN 68 #define SMCD_CLC_ACCEPT_CONFIRM_LEN 48 +#define SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 78 #define SMC_CLC_RECV_BUF_LEN 100 /* eye catcher "SMCR" EBCDIC for CLC messages */ @@ -34,13 +37,88 @@ static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'}; /* eye catcher "SMCD" EBCDIC for CLC messages */ static const char SMCD_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xc4'}; +static u8 smc_hostname[SMC_MAX_HOSTNAME_LEN]; + +/* check arriving CLC proposal */ +static bool smc_clc_msg_prop_valid(struct smc_clc_msg_proposal *pclc) +{ + struct smc_clc_msg_proposal_prefix *pclc_prfx; + struct smc_clc_smcd_v2_extension *smcd_v2_ext; + struct smc_clc_msg_hdr *hdr = &pclc->hdr; + struct smc_clc_v2_extension *v2_ext; + + v2_ext = smc_get_clc_v2_ext(pclc); + pclc_prfx = smc_clc_proposal_get_prefix(pclc); + if (hdr->version == SMC_V1) { + if (hdr->typev1 == SMC_TYPE_N) + return false; + if (ntohs(hdr->length) != + sizeof(*pclc) + ntohs(pclc->iparea_offset) + + sizeof(*pclc_prfx) + + pclc_prfx->ipv6_prefixes_cnt * + sizeof(struct smc_clc_ipv6_prefix) + + sizeof(struct smc_clc_msg_trail)) + return false; + } else { + if (ntohs(hdr->length) != + sizeof(*pclc) + + sizeof(struct smc_clc_msg_smcd) + + (hdr->typev1 != SMC_TYPE_N ? + sizeof(*pclc_prfx) + + pclc_prfx->ipv6_prefixes_cnt * + sizeof(struct smc_clc_ipv6_prefix) : 0) + + (hdr->typev2 != SMC_TYPE_N ? + sizeof(*v2_ext) + + v2_ext->hdr.eid_cnt * SMC_MAX_EID_LEN : 0) + + (smcd_indicated(hdr->typev2) ? + sizeof(*smcd_v2_ext) + v2_ext->hdr.ism_gid_cnt * + sizeof(struct smc_clc_smcd_gid_chid) : + 0) + + sizeof(struct smc_clc_msg_trail)) + return false; + } + return true; +} + +/* check arriving CLC accept or confirm */ +static bool +smc_clc_msg_acc_conf_valid(struct smc_clc_msg_accept_confirm_v2 *clc_v2) +{ + struct smc_clc_msg_hdr *hdr = &clc_v2->hdr; + + if (hdr->typev1 != SMC_TYPE_R && hdr->typev1 != SMC_TYPE_D) + return false; + if (hdr->version == SMC_V1) { + if ((hdr->typev1 == SMC_TYPE_R && + ntohs(hdr->length) != SMCR_CLC_ACCEPT_CONFIRM_LEN) || + (hdr->typev1 == SMC_TYPE_D && + ntohs(hdr->length) != SMCD_CLC_ACCEPT_CONFIRM_LEN)) + return false; + } else { + if (hdr->typev1 == SMC_TYPE_D && + ntohs(hdr->length) != SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 && + (ntohs(hdr->length) != SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 + + sizeof(struct smc_clc_first_contact_ext))) + return false; + } + return true; +} + +static void smc_clc_fill_fce(struct smc_clc_first_contact_ext *fce, int *len) +{ + memset(fce, 0, sizeof(*fce)); + fce->os_type = SMC_CLC_OS_LINUX; + fce->release = SMC_RELEASE; + memcpy(fce->hostname, smc_hostname, sizeof(smc_hostname)); + (*len) += sizeof(*fce); +} + /* check if received message has a correct header length and contains valid * heading and trailing eyecatchers */ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm, bool check_trl) { - struct smc_clc_msg_proposal_prefix *pclc_prfx; - struct smc_clc_msg_accept_confirm *clc; + struct smc_clc_msg_accept_confirm_v2 *clc_v2; struct smc_clc_msg_proposal *pclc; struct smc_clc_msg_decline *dclc; struct smc_clc_msg_trail *trl; @@ -51,29 +129,19 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm, bool check_trl) switch (clcm->type) { case SMC_CLC_PROPOSAL: pclc = (struct smc_clc_msg_proposal *)clcm; - pclc_prfx = smc_clc_proposal_get_prefix(pclc); - if (ntohs(pclc->hdr.length) < - sizeof(*pclc) + ntohs(pclc->iparea_offset) + - sizeof(*pclc_prfx) + - pclc_prfx->ipv6_prefixes_cnt * - sizeof(struct smc_clc_ipv6_prefix) + - sizeof(*trl)) + if (!smc_clc_msg_prop_valid(pclc)) return false; trl = (struct smc_clc_msg_trail *) ((u8 *)pclc + ntohs(pclc->hdr.length) - sizeof(*trl)); break; case SMC_CLC_ACCEPT: case SMC_CLC_CONFIRM: - if (clcm->path != SMC_TYPE_R && clcm->path != SMC_TYPE_D) - return false; - clc = (struct smc_clc_msg_accept_confirm *)clcm; - if ((clcm->path == SMC_TYPE_R && - ntohs(clc->hdr.length) != SMCR_CLC_ACCEPT_CONFIRM_LEN) || - (clcm->path == SMC_TYPE_D && - ntohs(clc->hdr.length) != SMCD_CLC_ACCEPT_CONFIRM_LEN)) + clc_v2 = (struct smc_clc_msg_accept_confirm_v2 *)clcm; + if (!smc_clc_msg_acc_conf_valid(clc_v2)) return false; trl = (struct smc_clc_msg_trail *) - ((u8 *)clc + ntohs(clc->hdr.length) - sizeof(*trl)); + ((u8 *)clc_v2 + ntohs(clc_v2->hdr.length) - + sizeof(*trl)); break; case SMC_CLC_DECLINE: dclc = (struct smc_clc_msg_decline *)clcm; @@ -327,9 +395,6 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, goto out; } - if (clcm->type == SMC_CLC_PROPOSAL && clcm->path == SMC_TYPE_N) - reason_code = SMC_CLC_DECL_VERSMISMAT; /* just V2 offered */ - /* receive the complete CLC message */ memset(&msg, 0, sizeof(struct msghdr)); if (datlen > buflen) { @@ -365,7 +430,8 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, dclc = (struct smc_clc_msg_decline *)clcm; reason_code = SMC_CLC_DECL_PEERDECL; smc->peer_diagnosis = ntohl(dclc->peer_diagnosis); - if (((struct smc_clc_msg_decline *)buf)->hdr.flag) { + if (((struct smc_clc_msg_decline *)buf)->hdr.typev2 & + SMC_FIRST_CONTACT_MASK) { smc->conn.lgr->sync_err = 1; smc_lgr_terminate_sched(smc->conn.lgr); } @@ -377,7 +443,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, } /* send CLC DECLINE message across internal TCP socket */ -int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info) +int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info, u8 version) { struct smc_clc_msg_decline dclc; struct msghdr msg; @@ -388,8 +454,10 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info) memcpy(dclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); dclc.hdr.type = SMC_CLC_DECLINE; dclc.hdr.length = htons(sizeof(struct smc_clc_msg_decline)); - dclc.hdr.version = SMC_V1; - dclc.hdr.flag = (peer_diag_info == SMC_CLC_DECL_SYNCERR) ? 1 : 0; + dclc.hdr.version = version; + dclc.os_type = version == SMC_V1 ? 0 : SMC_CLC_OS_LINUX; + dclc.hdr.typev2 = (peer_diag_info == SMC_CLC_DECL_SYNCERR) ? + SMC_FIRST_CONTACT_MASK : 0; if ((!smc->conn.lgr || !smc->conn.lgr->is_smcd) && smc_ib_is_valid_local_systemid()) memcpy(dclc.id_for_peer, local_systemid, @@ -408,18 +476,20 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info) } /* send CLC PROPOSAL message across internal TCP socket */ -int smc_clc_send_proposal(struct smc_sock *smc, int smc_type, - struct smc_init_info *ini) +int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) { + struct smc_clc_smcd_v2_extension *smcd_v2_ext; struct smc_clc_msg_proposal_prefix *pclc_prfx; struct smc_clc_msg_proposal *pclc_base; + struct smc_clc_smcd_gid_chid *gidchids; struct smc_clc_msg_proposal_area *pclc; struct smc_clc_ipv6_prefix *ipv6_prfx; + struct smc_clc_v2_extension *v2_ext; struct smc_clc_msg_smcd *pclc_smcd; struct smc_clc_msg_trail *trl; int len, i, plen, rc; int reason_code = 0; - struct kvec vec[5]; + struct kvec vec[8]; struct msghdr msg; pclc = kzalloc(sizeof(*pclc), GFP_KERNEL); @@ -430,56 +500,121 @@ int smc_clc_send_proposal(struct smc_sock *smc, int smc_type, pclc_smcd = &pclc->pclc_smcd; pclc_prfx = &pclc->pclc_prfx; ipv6_prfx = pclc->pclc_prfx_ipv6; + v2_ext = &pclc->pclc_v2_ext; + smcd_v2_ext = &pclc->pclc_smcd_v2_ext; + gidchids = pclc->pclc_gidchids; trl = &pclc->pclc_trl; + pclc_base->hdr.version = SMC_V2; + pclc_base->hdr.typev1 = ini->smc_type_v1; + pclc_base->hdr.typev2 = ini->smc_type_v2; + plen = sizeof(*pclc_base) + sizeof(*pclc_smcd) + sizeof(*trl); + /* retrieve ip prefixes for CLC proposal msg */ - rc = smc_clc_prfx_set(smc->clcsock, pclc_prfx, ipv6_prfx); - if (rc) { - kfree(pclc); - return SMC_CLC_DECL_CNFERR; /* configuration error */ + if (ini->smc_type_v1 != SMC_TYPE_N) { + rc = smc_clc_prfx_set(smc->clcsock, pclc_prfx, ipv6_prfx); + if (rc) { + if (ini->smc_type_v2 == SMC_TYPE_N) { + kfree(pclc); + return SMC_CLC_DECL_CNFERR; + } + pclc_base->hdr.typev1 = SMC_TYPE_N; + } else { + pclc_base->iparea_offset = htons(sizeof(*pclc_smcd)); + plen += sizeof(*pclc_prfx) + + pclc_prfx->ipv6_prefixes_cnt * + sizeof(ipv6_prfx[0]); + } } - /* send SMC Proposal CLC message */ - plen = sizeof(*pclc_base) + sizeof(*pclc_prfx) + - (pclc_prfx->ipv6_prefixes_cnt * sizeof(ipv6_prfx[0])) + - sizeof(*trl); + /* build SMC Proposal CLC message */ memcpy(pclc_base->hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); pclc_base->hdr.type = SMC_CLC_PROPOSAL; - pclc_base->hdr.version = SMC_V1; /* SMC version */ - pclc_base->hdr.path = smc_type; - if (smc_type == SMC_TYPE_R || smc_type == SMC_TYPE_B) { + if (smcr_indicated(ini->smc_type_v1)) { /* add SMC-R specifics */ memcpy(pclc_base->lcl.id_for_peer, local_systemid, sizeof(local_systemid)); memcpy(pclc_base->lcl.gid, ini->ib_gid, SMC_GID_SIZE); memcpy(pclc_base->lcl.mac, &ini->ib_dev->mac[ini->ib_port - 1], ETH_ALEN); - pclc_base->iparea_offset = htons(0); } - if (smc_type == SMC_TYPE_D || smc_type == SMC_TYPE_B) { + if (smcd_indicated(ini->smc_type_v1)) { /* add SMC-D specifics */ - plen += sizeof(*pclc_smcd); - pclc_base->iparea_offset = htons(sizeof(*pclc_smcd)); - pclc_smcd->gid = ini->ism_dev->local_gid; + if (ini->ism_dev[0]) { + pclc_smcd->ism.gid = htonll(ini->ism_dev[0]->local_gid); + pclc_smcd->ism.chid = + htons(smc_ism_get_chid(ini->ism_dev[0])); + } + } + if (ini->smc_type_v2 == SMC_TYPE_N) { + pclc_smcd->v2_ext_offset = 0; + } else { + u16 v2_ext_offset; + u8 *eid = NULL; + + v2_ext_offset = sizeof(*pclc_smcd) - + offsetofend(struct smc_clc_msg_smcd, v2_ext_offset); + if (ini->smc_type_v1 != SMC_TYPE_N) + v2_ext_offset += sizeof(*pclc_prfx) + + pclc_prfx->ipv6_prefixes_cnt * + sizeof(ipv6_prfx[0]); + pclc_smcd->v2_ext_offset = htons(v2_ext_offset); + v2_ext->hdr.eid_cnt = 0; + v2_ext->hdr.ism_gid_cnt = ini->ism_offered_cnt; + v2_ext->hdr.flag.release = SMC_RELEASE; + v2_ext->hdr.flag.seid = 1; + v2_ext->hdr.smcd_v2_ext_offset = htons(sizeof(*v2_ext) - + offsetofend(struct smc_clnt_opts_area_hdr, + smcd_v2_ext_offset) + + v2_ext->hdr.eid_cnt * SMC_MAX_EID_LEN); + if (ini->ism_dev[0]) + smc_ism_get_system_eid(ini->ism_dev[0], &eid); + else + smc_ism_get_system_eid(ini->ism_dev[1], &eid); + if (eid) + memcpy(smcd_v2_ext->system_eid, eid, SMC_MAX_EID_LEN); + plen += sizeof(*v2_ext) + sizeof(*smcd_v2_ext); + if (ini->ism_offered_cnt) { + for (i = 1; i <= ini->ism_offered_cnt; i++) { + gidchids[i - 1].gid = + htonll(ini->ism_dev[i]->local_gid); + gidchids[i - 1].chid = + htons(smc_ism_get_chid(ini->ism_dev[i])); + } + plen += ini->ism_offered_cnt * + sizeof(struct smc_clc_smcd_gid_chid); + } } pclc_base->hdr.length = htons(plen); - memcpy(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); + + /* send SMC Proposal CLC message */ memset(&msg, 0, sizeof(msg)); i = 0; vec[i].iov_base = pclc_base; vec[i++].iov_len = sizeof(*pclc_base); - if (smc_type == SMC_TYPE_D || smc_type == SMC_TYPE_B) { - vec[i].iov_base = pclc_smcd; - vec[i++].iov_len = sizeof(*pclc_smcd); + vec[i].iov_base = pclc_smcd; + vec[i++].iov_len = sizeof(*pclc_smcd); + if (ini->smc_type_v1 != SMC_TYPE_N) { + vec[i].iov_base = pclc_prfx; + vec[i++].iov_len = sizeof(*pclc_prfx); + if (pclc_prfx->ipv6_prefixes_cnt > 0) { + vec[i].iov_base = ipv6_prfx; + vec[i++].iov_len = pclc_prfx->ipv6_prefixes_cnt * + sizeof(ipv6_prfx[0]); + } } - vec[i].iov_base = pclc_prfx; - vec[i++].iov_len = sizeof(*pclc_prfx); - if (pclc_prfx->ipv6_prefixes_cnt > 0) { - vec[i].iov_base = ipv6_prfx; - vec[i++].iov_len = pclc_prfx->ipv6_prefixes_cnt * - sizeof(ipv6_prfx[0]); + if (ini->smc_type_v2 != SMC_TYPE_N) { + vec[i].iov_base = v2_ext; + vec[i++].iov_len = sizeof(*v2_ext); + vec[i].iov_base = smcd_v2_ext; + vec[i++].iov_len = sizeof(*smcd_v2_ext); + if (ini->ism_offered_cnt) { + vec[i].iov_base = gidchids; + vec[i++].iov_len = ini->ism_offered_cnt * + sizeof(struct smc_clc_smcd_gid_chid); + } } vec[i].iov_base = trl; vec[i++].iov_len = sizeof(*trl); @@ -499,29 +634,47 @@ int smc_clc_send_proposal(struct smc_sock *smc, int smc_type, /* build and send CLC CONFIRM / ACCEPT message */ static int smc_clc_send_confirm_accept(struct smc_sock *smc, - struct smc_clc_msg_accept_confirm *clc, - int first_contact) + struct smc_clc_msg_accept_confirm_v2 *clc_v2, + int first_contact, u8 version) { struct smc_connection *conn = &smc->conn; + struct smc_clc_msg_accept_confirm *clc; + struct smc_clc_first_contact_ext fce; + struct smc_clc_msg_trail trl; + struct kvec vec[3]; struct msghdr msg; - struct kvec vec; + int i, len; /* send SMC Confirm CLC msg */ - clc->hdr.version = SMC_V1; /* SMC version */ + clc = (struct smc_clc_msg_accept_confirm *)clc_v2; + clc->hdr.version = version; /* SMC version */ if (first_contact) - clc->hdr.flag = 1; + clc->hdr.typev2 |= SMC_FIRST_CONTACT_MASK; if (conn->lgr->is_smcd) { /* SMC-D specific settings */ memcpy(clc->hdr.eyecatcher, SMCD_EYECATCHER, sizeof(SMCD_EYECATCHER)); - clc->hdr.path = SMC_TYPE_D; - clc->hdr.length = htons(SMCD_CLC_ACCEPT_CONFIRM_LEN); + clc->hdr.typev1 = SMC_TYPE_D; clc->d0.gid = conn->lgr->smcd->local_gid; clc->d0.token = conn->rmb_desc->token; clc->d0.dmbe_size = conn->rmbe_size_short; clc->d0.dmbe_idx = 0; memcpy(&clc->d0.linkid, conn->lgr->id, SMC_LGR_ID_SIZE); - memcpy(clc->d0.smcd_trl.eyecatcher, SMCD_EYECATCHER, + if (version == SMC_V1) { + clc->hdr.length = htons(SMCD_CLC_ACCEPT_CONFIRM_LEN); + } else { + u8 *eid = NULL; + + clc_v2->chid = htons(smc_ism_get_chid(conn->lgr->smcd)); + smc_ism_get_system_eid(conn->lgr->smcd, &eid); + if (eid) + memcpy(clc_v2->eid, eid, SMC_MAX_EID_LEN); + len = SMCD_CLC_ACCEPT_CONFIRM_LEN_V2; + if (first_contact) + smc_clc_fill_fce(&fce, &len); + clc_v2->hdr.length = htons(len); + } + memcpy(trl.eyecatcher, SMCD_EYECATCHER, sizeof(SMCD_EYECATCHER)); } else { struct smc_link *link = conn->lnk; @@ -530,7 +683,7 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc, link = conn->lnk; memcpy(clc->hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); - clc->hdr.path = SMC_TYPE_R; + clc->hdr.typev1 = SMC_TYPE_R; clc->hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN); memcpy(clc->r0.lcl.id_for_peer, local_systemid, sizeof(local_systemid)); @@ -554,29 +707,43 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc, clc->r0.rmb_dma_addr = cpu_to_be64((u64)sg_dma_address (conn->rmb_desc->sgt[link->link_idx].sgl)); hton24(clc->r0.psn, link->psn_initial); - memcpy(clc->r0.smcr_trl.eyecatcher, SMC_EYECATCHER, - sizeof(SMC_EYECATCHER)); + memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); } memset(&msg, 0, sizeof(msg)); - vec.iov_base = clc; - vec.iov_len = ntohs(clc->hdr.length); - return kernel_sendmsg(smc->clcsock, &msg, &vec, 1, + i = 0; + vec[i].iov_base = clc_v2; + if (version > SMC_V1) + vec[i++].iov_len = SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 - sizeof(trl); + else + vec[i++].iov_len = (clc->hdr.typev1 == SMC_TYPE_D ? + SMCD_CLC_ACCEPT_CONFIRM_LEN : + SMCR_CLC_ACCEPT_CONFIRM_LEN) - + sizeof(trl); + if (version > SMC_V1 && first_contact) { + vec[i].iov_base = &fce; + vec[i++].iov_len = sizeof(fce); + } + vec[i].iov_base = &trl; + vec[i++].iov_len = sizeof(trl); + return kernel_sendmsg(smc->clcsock, &msg, vec, 1, ntohs(clc->hdr.length)); } /* send CLC CONFIRM message across internal TCP socket */ -int smc_clc_send_confirm(struct smc_sock *smc) +int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact, + u8 version) { - struct smc_clc_msg_accept_confirm cclc; + struct smc_clc_msg_accept_confirm_v2 cclc_v2; int reason_code = 0; int len; /* send SMC Confirm CLC msg */ - memset(&cclc, 0, sizeof(cclc)); - cclc.hdr.type = SMC_CLC_CONFIRM; - len = smc_clc_send_confirm_accept(smc, &cclc, 0); - if (len < ntohs(cclc.hdr.length)) { + memset(&cclc_v2, 0, sizeof(cclc_v2)); + cclc_v2.hdr.type = SMC_CLC_CONFIRM; + len = smc_clc_send_confirm_accept(smc, &cclc_v2, clnt_first_contact, + version); + if (len < ntohs(cclc_v2.hdr.length)) { if (len >= 0) { reason_code = -ENETUNREACH; smc->sk.sk_err = -reason_code; @@ -589,16 +756,28 @@ int smc_clc_send_confirm(struct smc_sock *smc) } /* send CLC ACCEPT message across internal TCP socket */ -int smc_clc_send_accept(struct smc_sock *new_smc, bool srv_first_contact) +int smc_clc_send_accept(struct smc_sock *new_smc, bool srv_first_contact, + u8 version) { - struct smc_clc_msg_accept_confirm aclc; + struct smc_clc_msg_accept_confirm_v2 aclc_v2; int len; - memset(&aclc, 0, sizeof(aclc)); - aclc.hdr.type = SMC_CLC_ACCEPT; - len = smc_clc_send_confirm_accept(new_smc, &aclc, srv_first_contact); - if (len < ntohs(aclc.hdr.length)) + memset(&aclc_v2, 0, sizeof(aclc_v2)); + aclc_v2.hdr.type = SMC_CLC_ACCEPT; + len = smc_clc_send_confirm_accept(new_smc, &aclc_v2, srv_first_contact, + version); + if (len < ntohs(aclc_v2.hdr.length)) len = len >= 0 ? -EPROTO : -new_smc->clcsock->sk->sk_err; return len > 0 ? 0 : len; } + +void __init smc_clc_init(void) +{ + struct new_utsname *u; + + memset(smc_hostname, _S, sizeof(smc_hostname)); /* ASCII blanks */ + u = utsname(); + memcpy(smc_hostname, u->nodename, + min_t(size_t, strlen(u->nodename), sizeof(smc_hostname))); +} diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h index fcd8521c7737ffb3525931ca78c7907f4de1405f..b3f46ab79e47217b02c983bec0fd6f17a210ced6 100644 --- a/net/smc/smc_clc.h +++ b/net/smc/smc_clc.h @@ -54,19 +54,19 @@ #define SMC_CLC_DECL_ERR_RDYLNK 0x09990002 /* ib ready link failed */ #define SMC_CLC_DECL_ERR_REGRMB 0x09990003 /* reg rmb failed */ +#define SMC_FIRST_CONTACT_MASK 0b10 /* first contact bit within typev2 */ + struct smc_clc_msg_hdr { /* header1 of clc messages */ u8 eyecatcher[4]; /* eye catcher */ u8 type; /* proposal / accept / confirm / decline */ __be16 length; #if defined(__BIG_ENDIAN_BITFIELD) u8 version : 4, - flag : 1, - rsvd : 1, - path : 2; + typev2 : 2, + typev1 : 2; #elif defined(__LITTLE_ENDIAN_BITFIELD) - u8 path : 2, - rsvd : 1, - flag : 1, + u8 typev1 : 2, + typev2 : 2, version : 4; #endif } __packed; /* format defined in RFC7609 */ @@ -81,8 +81,6 @@ struct smc_clc_msg_local { /* header2 of clc messages */ u8 mac[6]; /* mac of ib_device port */ }; -#define SMC_CLC_MAX_V6_PREFIX 8 - /* Struct would be 4 byte aligned, but it is used in an array that is sent * to peers and must conform to RFC7609, hence we need to use packed here. */ @@ -91,6 +89,44 @@ struct smc_clc_ipv6_prefix { u8 prefix_len; } __packed; /* format defined in RFC7609 */ +#if defined(__BIG_ENDIAN_BITFIELD) +struct smc_clc_v2_flag { + u8 release : 4, + rsvd : 3, + seid : 1; +}; +#elif defined(__LITTLE_ENDIAN_BITFIELD) +struct smc_clc_v2_flag { + u8 seid : 1, + rsvd : 3, + release : 4; +}; +#endif + +struct smc_clnt_opts_area_hdr { + u8 eid_cnt; /* number of user defined EIDs */ + u8 ism_gid_cnt; /* number of ISMv2 GIDs */ + u8 reserved1; + struct smc_clc_v2_flag flag; + u8 reserved2[2]; + __be16 smcd_v2_ext_offset; /* SMC-Dv2 Extension Offset */ +}; + +struct smc_clc_smcd_gid_chid { + __be64 gid; /* ISM GID */ + __be16 chid; /* ISMv2 CHID */ +} __packed; /* format defined in + * IBM Shared Memory Communications Version 2 + * (https://www.ibm.com/support/pages/node/6326337) + */ + +struct smc_clc_v2_extension { + struct smc_clnt_opts_area_hdr hdr; + u8 roce[16]; /* RoCEv2 GID */ + u8 reserved[16]; + u8 user_eids[0][SMC_MAX_EID_LEN]; +}; + struct smc_clc_msg_proposal_prefix { /* prefix part of clc proposal message*/ __be32 outgoing_subnet; /* subnet mask */ u8 prefix_len; /* number of significant bits in mask */ @@ -99,8 +135,15 @@ struct smc_clc_msg_proposal_prefix { /* prefix part of clc proposal message*/ } __aligned(4); struct smc_clc_msg_smcd { /* SMC-D GID information */ - u64 gid; /* ISM GID of requestor */ - u8 res[32]; + struct smc_clc_smcd_gid_chid ism; /* ISM native GID+CHID of requestor */ + __be16 v2_ext_offset; /* SMC Version 2 Extension Offset */ + u8 reserved[28]; +}; + +struct smc_clc_smcd_v2_extension { + u8 system_eid[SMC_MAX_EID_LEN]; + u8 reserved[16]; + struct smc_clc_smcd_gid_chid gidchid[0]; }; struct smc_clc_msg_proposal { /* clc proposal message sent by Linux */ @@ -109,11 +152,16 @@ struct smc_clc_msg_proposal { /* clc proposal message sent by Linux */ __be16 iparea_offset; /* offset to IP address information area */ } __aligned(4); +#define SMC_CLC_MAX_V6_PREFIX 8 + struct smc_clc_msg_proposal_area { struct smc_clc_msg_proposal pclc_base; struct smc_clc_msg_smcd pclc_smcd; struct smc_clc_msg_proposal_prefix pclc_prfx; struct smc_clc_ipv6_prefix pclc_prfx_ipv6[SMC_CLC_MAX_V6_PREFIX]; + struct smc_clc_v2_extension pclc_v2_ext; + struct smc_clc_smcd_v2_extension pclc_smcd_v2_ext; + struct smc_clc_smcd_gid_chid pclc_gidchids[SMC_MAX_ISM_DEVS]; struct smc_clc_msg_trail pclc_trl; }; @@ -134,11 +182,9 @@ struct smcr_clc_msg_accept_confirm { /* SMCR accept/confirm */ __be64 rmb_dma_addr; /* RMB virtual address */ u8 reserved2; u8 psn[3]; /* packet sequence number */ - struct smc_clc_msg_trail smcr_trl; - /* eye catcher "SMCR" EBCDIC */ } __packed; -struct smcd_clc_msg_accept_confirm { /* SMCD accept/confirm */ +struct smcd_clc_msg_accept_confirm_common { /* SMCD accept/confirm */ u64 gid; /* Sender GID */ u64 token; /* DMB token */ u8 dmbe_idx; /* DMBE index */ @@ -150,26 +196,63 @@ struct smcd_clc_msg_accept_confirm { /* SMCD accept/confirm */ dmbe_size : 4; #endif u16 reserved4; - u32 linkid; /* Link identifier */ - u32 reserved5[3]; - struct smc_clc_msg_trail smcd_trl; - /* eye catcher "SMCD" EBCDIC */ + __be32 linkid; /* Link identifier */ } __packed; +#define SMC_CLC_OS_ZOS 1 +#define SMC_CLC_OS_LINUX 2 +#define SMC_CLC_OS_AIX 3 + +struct smc_clc_first_contact_ext { + u8 reserved1; +#if defined(__BIG_ENDIAN_BITFIELD) + u8 os_type : 4, + release : 4; +#elif defined(__LITTLE_ENDIAN_BITFIELD) + u8 release : 4, + os_type : 4; +#endif + u8 reserved2[2]; + u8 hostname[SMC_MAX_HOSTNAME_LEN]; +}; + struct smc_clc_msg_accept_confirm { /* clc accept / confirm message */ struct smc_clc_msg_hdr hdr; union { struct smcr_clc_msg_accept_confirm r0; /* SMC-R */ - struct smcd_clc_msg_accept_confirm d0; /* SMC-D */ + struct { /* SMC-D */ + struct smcd_clc_msg_accept_confirm_common d0; + u32 reserved5[3]; + }; }; } __packed; /* format defined in RFC7609 */ +struct smc_clc_msg_accept_confirm_v2 { /* clc accept / confirm message */ + struct smc_clc_msg_hdr hdr; + union { + struct smcr_clc_msg_accept_confirm r0; /* SMC-R */ + struct { /* SMC-D */ + struct smcd_clc_msg_accept_confirm_common d0; + __be16 chid; + u8 eid[SMC_MAX_EID_LEN]; + u8 reserved5[8]; + }; + }; +}; + struct smc_clc_msg_decline { /* clc decline message */ struct smc_clc_msg_hdr hdr; u8 id_for_peer[SMC_SYSTEMID_LEN]; /* sender peer_id */ __be32 peer_diagnosis; /* diagnosis information */ - u8 reserved2[4]; - struct smc_clc_msg_trail trl; /* eye catcher "SMCR" EBCDIC */ +#if defined(__BIG_ENDIAN_BITFIELD) + u8 os_type : 4, + reserved : 4; +#elif defined(__LITTLE_ENDIAN_BITFIELD) + u8 reserved : 4, + os_type : 4; +#endif + u8 reserved2[3]; + struct smc_clc_msg_trail trl; /* eye catcher "SMCD" or "SMCR" EBCDIC */ } __aligned(4); /* determine start of the prefix area within the proposal message */ @@ -180,16 +263,58 @@ smc_clc_proposal_get_prefix(struct smc_clc_msg_proposal *pclc) ((u8 *)pclc + sizeof(*pclc) + ntohs(pclc->iparea_offset)); } +static inline bool smcr_indicated(int smc_type) +{ + return smc_type == SMC_TYPE_R || smc_type == SMC_TYPE_B; +} + +static inline bool smcd_indicated(int smc_type) +{ + return smc_type == SMC_TYPE_D || smc_type == SMC_TYPE_B; +} + /* get SMC-D info from proposal message */ static inline struct smc_clc_msg_smcd * smc_get_clc_msg_smcd(struct smc_clc_msg_proposal *prop) { - if (ntohs(prop->iparea_offset) != sizeof(struct smc_clc_msg_smcd)) + if (smcd_indicated(prop->hdr.typev1) && + ntohs(prop->iparea_offset) != sizeof(struct smc_clc_msg_smcd)) return NULL; return (struct smc_clc_msg_smcd *)(prop + 1); } +static inline struct smc_clc_v2_extension * +smc_get_clc_v2_ext(struct smc_clc_msg_proposal *prop) +{ + struct smc_clc_msg_smcd *prop_smcd = smc_get_clc_msg_smcd(prop); + + if (!prop_smcd || !ntohs(prop_smcd->v2_ext_offset)) + return NULL; + + return (struct smc_clc_v2_extension *) + ((u8 *)prop_smcd + + offsetof(struct smc_clc_msg_smcd, v2_ext_offset) + + sizeof(prop_smcd->v2_ext_offset) + + ntohs(prop_smcd->v2_ext_offset)); +} + +static inline struct smc_clc_smcd_v2_extension * +smc_get_clc_smcd_v2_ext(struct smc_clc_v2_extension *prop_v2ext) +{ + if (!prop_v2ext) + return NULL; + if (!ntohs(prop_v2ext->hdr.smcd_v2_ext_offset)) + return NULL; + + return (struct smc_clc_smcd_v2_extension *) + ((u8 *)prop_v2ext + + offsetof(struct smc_clc_v2_extension, hdr) + + offsetof(struct smc_clnt_opts_area_hdr, smcd_v2_ext_offset) + + sizeof(prop_v2ext->hdr.smcd_v2_ext_offset) + + ntohs(prop_v2ext->hdr.smcd_v2_ext_offset)); +} + struct smcd_dev; struct smc_init_info; @@ -197,10 +322,12 @@ int smc_clc_prfx_match(struct socket *clcsock, struct smc_clc_msg_proposal_prefix *prop); int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, u8 expected_type, unsigned long timeout); -int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info); -int smc_clc_send_proposal(struct smc_sock *smc, int smc_type, - struct smc_init_info *ini); -int smc_clc_send_confirm(struct smc_sock *smc); -int smc_clc_send_accept(struct smc_sock *smc, bool srv_first_contact); +int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info, u8 version); +int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini); +int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact, + u8 version); +int smc_clc_send_accept(struct smc_sock *smc, bool srv_first_contact, + u8 version); +void smc_clc_init(void) __init; #endif diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index c811ae1a8addfcc842167d96fd9dde1db0ca4b0a..f1dbb5025c0b8f86db9df40ec4559cd8a2be74e1 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -375,7 +375,8 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) int i; if (ini->is_smcd && ini->vlan_id) { - if (smc_ism_get_vlan(ini->ism_dev, ini->vlan_id)) { + if (smc_ism_get_vlan(ini->ism_dev[ini->ism_selected], + ini->vlan_id)) { rc = SMC_CLC_DECL_ISMVLANERR; goto out; } @@ -412,13 +413,14 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) lgr->conns_all = RB_ROOT; if (ini->is_smcd) { /* SMC-D specific settings */ - get_device(&ini->ism_dev->dev); - lgr->peer_gid = ini->ism_peer_gid; - lgr->smcd = ini->ism_dev; - lgr_list = &ini->ism_dev->lgr_list; + get_device(&ini->ism_dev[ini->ism_selected]->dev); + lgr->peer_gid = ini->ism_peer_gid[ini->ism_selected]; + lgr->smcd = ini->ism_dev[ini->ism_selected]; + lgr_list = &ini->ism_dev[ini->ism_selected]->lgr_list; lgr_lock = &lgr->smcd->lgr_lock; + lgr->smc_version = ini->smcd_version; lgr->peer_shutdown = 0; - atomic_inc(&ini->ism_dev->lgr_cnt); + atomic_inc(&ini->ism_dev[ini->ism_selected]->lgr_cnt); } else { /* SMC-R specific settings */ lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT; @@ -449,7 +451,7 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) kfree(lgr); ism_put_vlan: if (ini->is_smcd && ini->vlan_id) - smc_ism_put_vlan(ini->ism_dev, ini->vlan_id); + smc_ism_put_vlan(ini->ism_dev[ini->ism_selected], ini->vlan_id); out: if (rc < 0) { if (rc == -ENOMEM) @@ -1288,8 +1290,10 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini) spinlock_t *lgr_lock; int rc = 0; - lgr_list = ini->is_smcd ? &ini->ism_dev->lgr_list : &smc_lgr_list.list; - lgr_lock = ini->is_smcd ? &ini->ism_dev->lgr_lock : &smc_lgr_list.lock; + lgr_list = ini->is_smcd ? &ini->ism_dev[ini->ism_selected]->lgr_list : + &smc_lgr_list.list; + lgr_lock = ini->is_smcd ? &ini->ism_dev[ini->ism_selected]->lgr_lock : + &smc_lgr_list.lock; ini->first_contact_local = 1; role = smc->listen_smc ? SMC_SERV : SMC_CLNT; if (role == SMC_CLNT && ini->first_contact_peer) @@ -1301,7 +1305,8 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini) list_for_each_entry(lgr, lgr_list, list) { write_lock_bh(&lgr->conns_lock); if ((ini->is_smcd ? - smcd_lgr_match(lgr, ini->ism_dev, ini->ism_peer_gid) : + smcd_lgr_match(lgr, ini->ism_dev[ini->ism_selected], + ini->ism_peer_gid[ini->ism_selected]) : smcr_lgr_match(lgr, ini->ib_lcl, role, ini->ib_clcqpn)) && !lgr->sync_err && lgr->vlan_id == ini->vlan_id && diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 37a5903789b0a24e8d5888bf3d2e05dd9ad6aff5..f1e867ce2e6307b65ba31ec280361e0d0116cb20 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -231,6 +231,11 @@ struct smc_link_group { u8 freeing : 1; /* lgr is being freed */ bool is_smcd; /* SMC-R or SMC-D */ + u8 smc_version; + u8 negotiated_eid[SMC_MAX_EID_LEN]; + u8 peer_os; /* peer operating system */ + u8 peer_smc_release; + u8 peer_hostname[SMC_MAX_HOSTNAME_LEN]; union { struct { /* SMC-R */ enum smc_lgr_role role; @@ -291,6 +296,8 @@ struct smc_clc_msg_local; struct smc_init_info { u8 is_smcd; + u8 smc_type_v1; + u8 smc_type_v2; u8 first_contact_peer; u8 first_contact_local; unsigned short vlan_id; @@ -301,8 +308,12 @@ struct smc_init_info { u8 ib_port; u32 ib_clcqpn; /* SMC-D */ - u64 ism_peer_gid; - struct smcd_dev *ism_dev; + u64 ism_peer_gid[SMC_MAX_ISM_DEVS + 1]; + struct smcd_dev *ism_dev[SMC_MAX_ISM_DEVS + 1]; + u16 ism_chid[SMC_MAX_ISM_DEVS + 1]; + u8 ism_offered_cnt; /* # of ISM devices offered */ + u8 ism_selected; /* index of selected ISM dev*/ + u8 smcd_version; }; /* Find the connection associated with the given alert token in the link group. diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c index 998c525de785690426929732bdfc73f32dc731b0..e9a6487a42cbd14c4cf73b31bb0ab17bfd1d825b 100644 --- a/net/smc/smc_ism.c +++ b/net/smc/smc_ism.c @@ -21,7 +21,9 @@ struct smcd_dev_list smcd_dev_list = { .mutex = __MUTEX_INITIALIZER(smcd_dev_list.mutex) }; -/* Test if an ISM communication is possible. */ +bool smc_ism_v2_capable; + +/* Test if an ISM communication is possible - same CPC */ int smc_ism_cantalk(u64 peer_gid, unsigned short vlan_id, struct smcd_dev *smcd) { return smcd->ops->query_remote_gid(smcd, peer_gid, vlan_id ? 1 : 0, @@ -39,6 +41,16 @@ int smc_ism_write(struct smcd_dev *smcd, const struct smc_ism_position *pos, return rc < 0 ? rc : 0; } +void smc_ism_get_system_eid(struct smcd_dev *smcd, u8 **eid) +{ + smcd->ops->get_system_eid(smcd, eid); +} + +u16 smc_ism_get_chid(struct smcd_dev *smcd) +{ + return smcd->ops->get_chid(smcd); +} + /* Set a connection using this DMBE. */ void smc_ism_set_conn(struct smc_connection *conn) { @@ -319,7 +331,18 @@ EXPORT_SYMBOL_GPL(smcd_alloc_dev); int smcd_register_dev(struct smcd_dev *smcd) { mutex_lock(&smcd_dev_list.mutex); - list_add_tail(&smcd->list, &smcd_dev_list.list); + if (list_empty(&smcd_dev_list.list)) { + u8 *system_eid = NULL; + + smc_ism_get_system_eid(smcd, &system_eid); + if ((*system_eid) + 24 != '0' || (*system_eid) + 28 != '0') + smc_ism_v2_capable = true; + } + /* sort list: devices without pnetid before devices with pnetid */ + if (smcd->pnetid[0]) + list_add_tail(&smcd->list, &smcd_dev_list.list); + else + list_add(&smcd->list, &smcd_dev_list.list); mutex_unlock(&smcd_dev_list.mutex); pr_warn_ratelimited("smc: adding smcd device %s with pnetid %.16s%s\n", @@ -399,3 +422,8 @@ void smcd_handle_irq(struct smcd_dev *smcd, unsigned int dmbno) spin_unlock_irqrestore(&smcd->lock, flags); } EXPORT_SYMBOL_GPL(smcd_handle_irq); + +void __init smc_ism_init(void) +{ + smc_ism_v2_capable = false; +} diff --git a/net/smc/smc_ism.h b/net/smc/smc_ism.h index 81cc4537efd38592cf26e01c809f1752e48489bf..8048e09ddcf86295ee101532f033f340d134a59f 100644 --- a/net/smc/smc_ism.h +++ b/net/smc/smc_ism.h @@ -19,7 +19,10 @@ struct smcd_dev_list { /* List of SMCD devices */ struct mutex mutex; /* Protects list of devices */ }; -extern struct smcd_dev_list smcd_dev_list; /* list of smcd devices */ +extern struct smcd_dev_list smcd_dev_list; /* list of smcd devices */ +extern bool smc_ism_v2_capable; /* HW supports ISM V2 and thus + * System EID is defined + */ struct smc_ism_vlanid { /* VLAN id set on ISM device */ struct list_head list; @@ -47,4 +50,7 @@ int smc_ism_unregister_dmb(struct smcd_dev *dev, struct smc_buf_desc *dmb_desc); int smc_ism_write(struct smcd_dev *dev, const struct smc_ism_position *pos, void *data, size_t len); int smc_ism_signal_shutdown(struct smc_link_group *lgr); +void smc_ism_get_system_eid(struct smcd_dev *dev, u8 **eid); +u16 smc_ism_get_chid(struct smcd_dev *dev); +void smc_ism_init(void); #endif diff --git a/net/smc/smc_netns.h b/net/smc/smc_netns.h index e7a8fc4ae02f5d7017bc13b4e082ddd42e370dec..0f4f35aa43ad0e18d7ed3f614a0189b3613ae7bd 100644 --- a/net/smc/smc_netns.h +++ b/net/smc/smc_netns.h @@ -16,5 +16,6 @@ extern unsigned int smc_net_id; /* per-network namespace private data */ struct smc_net { struct smc_pnettable pnettable; + struct smc_pnetids_ndev pnetids_ndev; }; #endif diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 70684c49510e6a010285d0346f38542827b2a58c..f3c18b991d35c42442b332fbe280e976a8fd6517 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -29,8 +29,7 @@ #include "smc_ism.h" #include "smc_core.h" -#define SMC_ASCII_BLANK 32 - +static struct net_device *__pnet_find_base_ndev(struct net_device *ndev); static struct net_device *pnet_find_base_ndev(struct net_device *ndev); static const struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = { @@ -73,14 +72,22 @@ struct smc_pnetentry { }; }; +/* Check if the pnetid is set */ +bool smc_pnet_is_pnetid_set(u8 *pnetid) +{ + if (pnetid[0] == 0 || pnetid[0] == _S) + return false; + return true; +} + /* Check if two given pnetids match */ static bool smc_pnet_match(u8 *pnetid1, u8 *pnetid2) { int i; for (i = 0; i < SMC_MAX_PNETID_LEN; i++) { - if ((pnetid1[i] == 0 || pnetid1[i] == SMC_ASCII_BLANK) && - (pnetid2[i] == 0 || pnetid2[i] == SMC_ASCII_BLANK)) + if ((pnetid1[i] == 0 || pnetid1[i] == _S) && + (pnetid2[i] == 0 || pnetid2[i] == _S)) break; if (pnetid1[i] != pnetid2[i]) return false; @@ -238,11 +245,10 @@ static int smc_pnet_remove_by_ndev(struct net_device *ndev) static bool smc_pnet_apply_ib(struct smc_ib_device *ib_dev, u8 ib_port, char *pnet_name) { - u8 pnet_null[SMC_MAX_PNETID_LEN] = {0}; bool applied = false; mutex_lock(&smc_ib_devices.mutex); - if (smc_pnet_match(ib_dev->pnetid[ib_port - 1], pnet_null)) { + if (!smc_pnet_is_pnetid_set(ib_dev->pnetid[ib_port - 1])) { memcpy(ib_dev->pnetid[ib_port - 1], pnet_name, SMC_MAX_PNETID_LEN); ib_dev->pnetid_by_user[ib_port - 1] = true; @@ -256,11 +262,10 @@ static bool smc_pnet_apply_ib(struct smc_ib_device *ib_dev, u8 ib_port, */ static bool smc_pnet_apply_smcd(struct smcd_dev *smcd_dev, char *pnet_name) { - u8 pnet_null[SMC_MAX_PNETID_LEN] = {0}; bool applied = false; mutex_lock(&smcd_dev_list.mutex); - if (smc_pnet_match(smcd_dev->pnetid, pnet_null)) { + if (!smc_pnet_is_pnetid_set(smcd_dev->pnetid)) { memcpy(smcd_dev->pnetid, pnet_name, SMC_MAX_PNETID_LEN); smcd_dev->pnetid_by_user = true; applied = true; @@ -708,10 +713,115 @@ static struct genl_family smc_pnet_nl_family __ro_after_init = { .n_ops = ARRAY_SIZE(smc_pnet_ops) }; +bool smc_pnet_is_ndev_pnetid(struct net *net, u8 *pnetid) +{ + struct smc_net *sn = net_generic(net, smc_net_id); + struct smc_pnetids_ndev_entry *pe; + bool rc = false; + + read_lock(&sn->pnetids_ndev.lock); + list_for_each_entry(pe, &sn->pnetids_ndev.list, list) { + if (smc_pnet_match(pnetid, pe->pnetid)) { + rc = true; + goto unlock; + } + } + +unlock: + read_unlock(&sn->pnetids_ndev.lock); + return rc; +} + +static int smc_pnet_add_pnetid(struct net *net, u8 *pnetid) +{ + struct smc_net *sn = net_generic(net, smc_net_id); + struct smc_pnetids_ndev_entry *pe, *pi; + + pe = kzalloc(sizeof(*pe), GFP_KERNEL); + if (!pe) + return -ENOMEM; + + write_lock(&sn->pnetids_ndev.lock); + list_for_each_entry(pi, &sn->pnetids_ndev.list, list) { + if (smc_pnet_match(pnetid, pe->pnetid)) { + refcount_inc(&pi->refcnt); + kfree(pe); + goto unlock; + } + } + refcount_set(&pe->refcnt, 1); + memcpy(pe->pnetid, pnetid, SMC_MAX_PNETID_LEN); + list_add_tail(&pe->list, &sn->pnetids_ndev.list); + +unlock: + write_unlock(&sn->pnetids_ndev.lock); + return 0; +} + +static void smc_pnet_remove_pnetid(struct net *net, u8 *pnetid) +{ + struct smc_net *sn = net_generic(net, smc_net_id); + struct smc_pnetids_ndev_entry *pe, *pe2; + + write_lock(&sn->pnetids_ndev.lock); + list_for_each_entry_safe(pe, pe2, &sn->pnetids_ndev.list, list) { + if (smc_pnet_match(pnetid, pe->pnetid)) { + if (refcount_dec_and_test(&pe->refcnt)) { + list_del(&pe->list); + kfree(pe); + } + break; + } + } + write_unlock(&sn->pnetids_ndev.lock); +} + +static void smc_pnet_add_base_pnetid(struct net *net, struct net_device *dev, + u8 *ndev_pnetid) +{ + struct net_device *base_dev; + + base_dev = __pnet_find_base_ndev(dev); + if (base_dev->flags & IFF_UP && + !smc_pnetid_by_dev_port(base_dev->dev.parent, base_dev->dev_port, + ndev_pnetid)) { + /* add to PNETIDs list */ + smc_pnet_add_pnetid(net, ndev_pnetid); + } +} + +/* create initial list of netdevice pnetids */ +static void smc_pnet_create_pnetids_list(struct net *net) +{ + u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; + struct net_device *dev; + + rtnl_lock(); + for_each_netdev(net, dev) + smc_pnet_add_base_pnetid(net, dev, ndev_pnetid); + rtnl_unlock(); +} + +/* clean up list of netdevice pnetids */ +static void smc_pnet_destroy_pnetids_list(struct net *net) +{ + struct smc_net *sn = net_generic(net, smc_net_id); + struct smc_pnetids_ndev_entry *pe, *temp_pe; + + write_lock(&sn->pnetids_ndev.lock); + list_for_each_entry_safe(pe, temp_pe, &sn->pnetids_ndev.list, list) { + list_del(&pe->list); + kfree(pe); + } + write_unlock(&sn->pnetids_ndev.lock); +} + static int smc_pnet_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); + struct net *net = dev_net(event_dev); + u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; switch (event) { case NETDEV_REBOOT: @@ -721,6 +831,17 @@ static int smc_pnet_netdev_event(struct notifier_block *this, case NETDEV_REGISTER: smc_pnet_add_by_ndev(event_dev); return NOTIFY_OK; + case NETDEV_UP: + smc_pnet_add_base_pnetid(net, event_dev, ndev_pnetid); + return NOTIFY_OK; + case NETDEV_DOWN: + event_dev = __pnet_find_base_ndev(event_dev); + if (!smc_pnetid_by_dev_port(event_dev->dev.parent, + event_dev->dev_port, ndev_pnetid)) { + /* remove from PNETIDs list */ + smc_pnet_remove_pnetid(net, ndev_pnetid); + } + return NOTIFY_OK; default: return NOTIFY_DONE; } @@ -735,9 +856,14 @@ int smc_pnet_net_init(struct net *net) { struct smc_net *sn = net_generic(net, smc_net_id); struct smc_pnettable *pnettable = &sn->pnettable; + struct smc_pnetids_ndev *pnetids_ndev = &sn->pnetids_ndev; INIT_LIST_HEAD(&pnettable->pnetlist); rwlock_init(&pnettable->lock); + INIT_LIST_HEAD(&pnetids_ndev->list); + rwlock_init(&pnetids_ndev->lock); + + smc_pnet_create_pnetids_list(net); return 0; } @@ -752,6 +878,7 @@ int __init smc_pnet_init(void) rc = register_netdevice_notifier(&smc_netdev_notifier); if (rc) genl_unregister_family(&smc_pnet_nl_family); + return rc; } @@ -760,6 +887,7 @@ void smc_pnet_net_exit(struct net *net) { /* flush pnet table */ smc_pnet_remove_by_pnetid(net, NULL); + smc_pnet_destroy_pnetids_list(net); } void smc_pnet_exit(void) @@ -768,16 +896,11 @@ void smc_pnet_exit(void) genl_unregister_family(&smc_pnet_nl_family); } -/* Determine one base device for stacked net devices. - * If the lower device level contains more than one devices - * (for instance with bonding slaves), just the first device - * is used to reach a base device. - */ -static struct net_device *pnet_find_base_ndev(struct net_device *ndev) +static struct net_device *__pnet_find_base_ndev(struct net_device *ndev) { int i, nest_lvl; - rtnl_lock(); + ASSERT_RTNL(); nest_lvl = ndev->lower_level; for (i = 0; i < nest_lvl; i++) { struct list_head *lower = &ndev->adj_list.lower; @@ -787,6 +910,18 @@ static struct net_device *pnet_find_base_ndev(struct net_device *ndev) lower = lower->next; ndev = netdev_lower_get_next(ndev, &lower); } + return ndev; +} + +/* Determine one base device for stacked net devices. + * If the lower device level contains more than one devices + * (for instance with bonding slaves), just the first device + * is used to reach a base device. + */ +static struct net_device *pnet_find_base_ndev(struct net_device *ndev) +{ + rtnl_lock(); + ndev = __pnet_find_base_ndev(ndev); rtnl_unlock(); return ndev; } @@ -929,10 +1064,10 @@ static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev, list_for_each_entry(ismdev, &smcd_dev_list.list, list) { if (smc_pnet_match(ismdev->pnetid, ndev_pnetid) && !ismdev->going_away && - (!ini->ism_peer_gid || - !smc_ism_cantalk(ini->ism_peer_gid, ini->vlan_id, + (!ini->ism_peer_gid[0] || + !smc_ism_cantalk(ini->ism_peer_gid[0], ini->vlan_id, ismdev))) { - ini->ism_dev = ismdev; + ini->ism_dev[0] = ismdev; break; } } @@ -966,7 +1101,7 @@ void smc_pnet_find_ism_resource(struct sock *sk, struct smc_init_info *ini) { struct dst_entry *dst = sk_dst_get(sk); - ini->ism_dev = NULL; + ini->ism_dev[0] = NULL; if (!dst) goto out; if (!dst->dev) diff --git a/net/smc/smc_pnet.h b/net/smc/smc_pnet.h index 811a659866918721c750ef753a0473f090c3df6b..14039272f7e4263a3d86f29d0c4f048731af387f 100644 --- a/net/smc/smc_pnet.h +++ b/net/smc/smc_pnet.h @@ -12,6 +12,8 @@ #ifndef _SMC_PNET_H #define _SMC_PNET_H +#include <net/smc.h> + #if IS_ENABLED(CONFIG_HAVE_PNETID) #include <asm/pnet.h> #endif @@ -31,6 +33,17 @@ struct smc_pnettable { struct list_head pnetlist; }; +struct smc_pnetids_ndev { /* list of pnetids for net devices in UP state*/ + struct list_head list; + rwlock_t lock; +}; + +struct smc_pnetids_ndev_entry { + struct list_head list; + u8 pnetid[SMC_MAX_PNETID_LEN]; + refcount_t refcnt; +}; + static inline int smc_pnetid_by_dev_port(struct device *dev, unsigned short port, u8 *pnetid) { @@ -52,4 +65,6 @@ int smc_pnetid_by_table_smcd(struct smcd_dev *smcd); void smc_pnet_find_alt_roce(struct smc_link_group *lgr, struct smc_init_info *ini, struct smc_ib_device *known_dev); +bool smc_pnet_is_ndev_pnetid(struct net *net, u8 *pnetid); +bool smc_pnet_is_pnetid_set(u8 *pnetid); #endif