提交 81cd3891 编写于 作者: D Don Hiatt 提交者: Jason Gunthorpe

IB/hfi1: Add support for 16B Management Packets

16B Management Packets (L4=0x08) replace the BTH and DETH
of normal MAD packet packets with a header containing the
the source and destination queue pair numbers; fields that
were originally retrieved from the BTH/DETH are now populated
from this header as well as from the 16B LRH (e.g. pkey).

16B Management Packets are used as an optimized management
format on 16B fabrics.

These management packets have an opcode of IB_OPCODE_UD_SEND_ONLY,
a fixed 3Byte pad, and a header length of 24Bytes.

The decision as to when we send a management packet is based
upon either the source or destination queue pair number being
0 or 1.
Reviewed-by: NIra Weiny <ira.weiny@intel.com>
Signed-off-by: NDon Hiatt <don.hiatt@intel.com>
Signed-off-by: NDennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: NDoug Ledford <dledford@redhat.com>
上级 4171a693
...@@ -1483,38 +1483,51 @@ static int hfi1_setup_bypass_packet(struct hfi1_packet *packet) ...@@ -1483,38 +1483,51 @@ static int hfi1_setup_bypass_packet(struct hfi1_packet *packet)
struct hfi1_pportdata *ppd = rcd->ppd; struct hfi1_pportdata *ppd = rcd->ppd;
struct hfi1_ibport *ibp = &ppd->ibport_data; struct hfi1_ibport *ibp = &ppd->ibport_data;
u8 l4; u8 l4;
u8 grh_len;
packet->hdr = (struct hfi1_16b_header *) packet->hdr = (struct hfi1_16b_header *)
hfi1_get_16B_header(packet->rcd->dd, hfi1_get_16B_header(packet->rcd->dd,
packet->rhf_addr); packet->rhf_addr);
packet->hlen = (u8 *)packet->rhf_addr - (u8 *)packet->hdr;
l4 = hfi1_16B_get_l4(packet->hdr); l4 = hfi1_16B_get_l4(packet->hdr);
if (l4 == OPA_16B_L4_IB_LOCAL) { if (l4 == OPA_16B_L4_IB_LOCAL) {
grh_len = 0;
packet->ohdr = packet->ebuf; packet->ohdr = packet->ebuf;
packet->grh = NULL; packet->grh = NULL;
packet->opcode = ib_bth_get_opcode(packet->ohdr);
packet->pad = hfi1_16B_bth_get_pad(packet->ohdr);
/* hdr_len_by_opcode already has an IB LRH factored in */
packet->hlen = hdr_len_by_opcode[packet->opcode] +
(LRH_16B_BYTES - LRH_9B_BYTES);
packet->migrated = opa_bth_is_migration(packet->ohdr);
} else if (l4 == OPA_16B_L4_IB_GLOBAL) { } else if (l4 == OPA_16B_L4_IB_GLOBAL) {
u32 vtf; u32 vtf;
u8 grh_len = sizeof(struct ib_grh);
grh_len = sizeof(struct ib_grh);
packet->ohdr = packet->ebuf + grh_len; packet->ohdr = packet->ebuf + grh_len;
packet->grh = packet->ebuf; packet->grh = packet->ebuf;
packet->opcode = ib_bth_get_opcode(packet->ohdr);
packet->pad = hfi1_16B_bth_get_pad(packet->ohdr);
/* hdr_len_by_opcode already has an IB LRH factored in */
packet->hlen = hdr_len_by_opcode[packet->opcode] +
(LRH_16B_BYTES - LRH_9B_BYTES) + grh_len;
packet->migrated = opa_bth_is_migration(packet->ohdr);
if (packet->grh->next_hdr != IB_GRH_NEXT_HDR) if (packet->grh->next_hdr != IB_GRH_NEXT_HDR)
goto drop; goto drop;
vtf = be32_to_cpu(packet->grh->version_tclass_flow); vtf = be32_to_cpu(packet->grh->version_tclass_flow);
if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION) if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION)
goto drop; goto drop;
} else if (l4 == OPA_16B_L4_FM) {
packet->mgmt = packet->ebuf;
packet->ohdr = NULL;
packet->grh = NULL;
packet->opcode = IB_OPCODE_UD_SEND_ONLY;
packet->pad = OPA_16B_L4_FM_PAD;
packet->hlen = OPA_16B_L4_FM_HLEN;
packet->migrated = false;
} else { } else {
goto drop; goto drop;
} }
/* Query commonly used fields from packet header */ /* Query commonly used fields from packet header */
packet->opcode = ib_bth_get_opcode(packet->ohdr);
/* hdr_len_by_opcode already has an IB LRH factored in */
packet->hlen = hdr_len_by_opcode[packet->opcode] +
(LRH_16B_BYTES - LRH_9B_BYTES) + grh_len;
packet->payload = packet->ebuf + packet->hlen - LRH_16B_BYTES; packet->payload = packet->ebuf + packet->hlen - LRH_16B_BYTES;
packet->slid = hfi1_16B_get_slid(packet->hdr); packet->slid = hfi1_16B_get_slid(packet->hdr);
packet->dlid = hfi1_16B_get_dlid(packet->hdr); packet->dlid = hfi1_16B_get_dlid(packet->hdr);
...@@ -1524,10 +1537,8 @@ static int hfi1_setup_bypass_packet(struct hfi1_packet *packet) ...@@ -1524,10 +1537,8 @@ static int hfi1_setup_bypass_packet(struct hfi1_packet *packet)
16B); 16B);
packet->sc = hfi1_16B_get_sc(packet->hdr); packet->sc = hfi1_16B_get_sc(packet->hdr);
packet->sl = ibp->sc_to_sl[packet->sc]; packet->sl = ibp->sc_to_sl[packet->sc];
packet->pad = hfi1_16B_bth_get_pad(packet->ohdr);
packet->extra_byte = SIZE_OF_LT; packet->extra_byte = SIZE_OF_LT;
packet->pkey = hfi1_16B_get_pkey(packet->hdr); packet->pkey = hfi1_16B_get_pkey(packet->hdr);
packet->migrated = opa_bth_is_migration(packet->ohdr);
if (hfi1_bypass_ingress_pkt_check(packet)) if (hfi1_bypass_ingress_pkt_check(packet))
goto drop; goto drop;
......
...@@ -333,6 +333,7 @@ struct hfi1_packet { ...@@ -333,6 +333,7 @@ struct hfi1_packet {
struct rvt_qp *qp; struct rvt_qp *qp;
struct ib_other_headers *ohdr; struct ib_other_headers *ohdr;
struct ib_grh *grh; struct ib_grh *grh;
struct opa_16b_mgmt *mgmt;
u64 rhf; u64 rhf;
u32 maxcnt; u32 maxcnt;
u32 rhqoff; u32 rhqoff;
...@@ -397,6 +398,12 @@ struct hfi1_packet { ...@@ -397,6 +398,12 @@ struct hfi1_packet {
#define OPA_16B_L4_IB_GLOBAL 0x0A #define OPA_16B_L4_IB_GLOBAL 0x0A
#define OPA_16B_L4_ETHR OPA_VNIC_L4_ETHR #define OPA_16B_L4_ETHR OPA_VNIC_L4_ETHR
/*
* OPA 16B Management
*/
#define OPA_16B_L4_FM_PAD 3 /* fixed 3B pad */
#define OPA_16B_L4_FM_HLEN 24 /* 16B(16) + L4_FM(8) */
static inline u8 hfi1_16B_get_l4(struct hfi1_16b_header *hdr) static inline u8 hfi1_16B_get_l4(struct hfi1_16b_header *hdr)
{ {
return (u8)(hdr->lrh[2] & OPA_16B_L4_MASK); return (u8)(hdr->lrh[2] & OPA_16B_L4_MASK);
...@@ -473,6 +480,27 @@ static inline u8 hfi1_16B_bth_get_pad(struct ib_other_headers *ohdr) ...@@ -473,6 +480,27 @@ static inline u8 hfi1_16B_bth_get_pad(struct ib_other_headers *ohdr)
OPA_16B_BTH_PAD_MASK); OPA_16B_BTH_PAD_MASK);
} }
/*
* 16B Management
*/
#define OPA_16B_MGMT_QPN_MASK 0xFFFFFF
static inline u32 hfi1_16B_get_dest_qpn(struct opa_16b_mgmt *mgmt)
{
return be32_to_cpu(mgmt->dest_qpn) & OPA_16B_MGMT_QPN_MASK;
}
static inline u32 hfi1_16B_get_src_qpn(struct opa_16b_mgmt *mgmt)
{
return be32_to_cpu(mgmt->src_qpn) & OPA_16B_MGMT_QPN_MASK;
}
static inline void hfi1_16B_set_qpn(struct opa_16b_mgmt *mgmt,
u32 dest_qp, u32 src_qp)
{
mgmt->dest_qpn = cpu_to_be32(dest_qp & OPA_16B_MGMT_QPN_MASK);
mgmt->src_qpn = cpu_to_be32(src_qp & OPA_16B_MGMT_QPN_MASK);
}
struct rvt_sge_state; struct rvt_sge_state;
/* /*
......
...@@ -399,16 +399,30 @@ void hfi1_make_ud_req_16B(struct rvt_qp *qp, struct hfi1_pkt_state *ps, ...@@ -399,16 +399,30 @@ void hfi1_make_ud_req_16B(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
struct hfi1_pportdata *ppd; struct hfi1_pportdata *ppd;
struct hfi1_ibport *ibp; struct hfi1_ibport *ibp;
u32 dlid, slid, nwords, extra_bytes; u32 dlid, slid, nwords, extra_bytes;
u32 dest_qp = wqe->ud_wr.remote_qpn;
u32 src_qp = qp->ibqp.qp_num;
u16 len, pkey; u16 len, pkey;
u8 l4, sc5; u8 l4, sc5;
bool is_mgmt = false;
ibp = to_iport(qp->ibqp.device, qp->port_num); ibp = to_iport(qp->ibqp.device, qp->port_num);
ppd = ppd_from_ibp(ibp); ppd = ppd_from_ibp(ibp);
ah_attr = &ibah_to_rvtah(wqe->ud_wr.ah)->attr; ah_attr = &ibah_to_rvtah(wqe->ud_wr.ah)->attr;
/* header size in dwords 16B LRH+BTH+DETH = (16+12+8)/4. */
ps->s_txreq->hdr_dwords = 9; /*
if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) * Build 16B Management Packet if either the destination
ps->s_txreq->hdr_dwords++; * or source queue pair number is 0 or 1.
*/
if (dest_qp == 0 || src_qp == 0 || dest_qp == 1 || src_qp == 1) {
/* header size in dwords 16B LRH+L4_FM = (16+8)/4. */
ps->s_txreq->hdr_dwords = 6;
is_mgmt = true;
} else {
/* header size in dwords 16B LRH+BTH+DETH = (16+12+8)/4. */
ps->s_txreq->hdr_dwords = 9;
if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM)
ps->s_txreq->hdr_dwords++;
}
/* SW provides space for CRC and LT for bypass packets. */ /* SW provides space for CRC and LT for bypass packets. */
extra_bytes = hfi1_get_16b_padding((ps->s_txreq->hdr_dwords << 2), extra_bytes = hfi1_get_16b_padding((ps->s_txreq->hdr_dwords << 2),
...@@ -453,7 +467,14 @@ void hfi1_make_ud_req_16B(struct rvt_qp *qp, struct hfi1_pkt_state *ps, ...@@ -453,7 +467,14 @@ void hfi1_make_ud_req_16B(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
slid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) & slid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) &
((1 << ppd->lmc) - 1)); ((1 << ppd->lmc) - 1));
hfi1_make_bth_deth(qp, wqe, ohdr, &pkey, extra_bytes, true); if (is_mgmt) {
l4 = OPA_16B_L4_FM;
pkey = hfi1_get_pkey(ibp, wqe->ud_wr.pkey_index);
hfi1_16B_set_qpn(&ps->s_txreq->phdr.hdr.opah.u.mgmt,
dest_qp, src_qp);
} else {
hfi1_make_bth_deth(qp, wqe, ohdr, &pkey, extra_bytes, true);
}
/* Convert dwords to flits */ /* Convert dwords to flits */
len = (ps->s_txreq->hdr_dwords + nwords) >> 1; len = (ps->s_txreq->hdr_dwords + nwords) >> 1;
...@@ -845,10 +866,8 @@ static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5, ...@@ -845,10 +866,8 @@ static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5,
*/ */
void hfi1_ud_rcv(struct hfi1_packet *packet) void hfi1_ud_rcv(struct hfi1_packet *packet)
{ {
struct ib_other_headers *ohdr = packet->ohdr;
u32 hdrsize = packet->hlen; u32 hdrsize = packet->hlen;
struct ib_wc wc; struct ib_wc wc;
u32 qkey;
u32 src_qp; u32 src_qp;
u16 pkey; u16 pkey;
int mgmt_pkey_idx = -1; int mgmt_pkey_idx = -1;
...@@ -864,27 +883,35 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) ...@@ -864,27 +883,35 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
u32 dlid = packet->dlid; u32 dlid = packet->dlid;
u32 slid = packet->slid; u32 slid = packet->slid;
u8 extra_bytes; u8 extra_bytes;
u8 l4 = 0;
bool dlid_is_permissive; bool dlid_is_permissive;
bool slid_is_permissive; bool slid_is_permissive;
bool solicited = false;
extra_bytes = packet->pad + packet->extra_byte + (SIZE_OF_CRC << 2); extra_bytes = packet->pad + packet->extra_byte + (SIZE_OF_CRC << 2);
qkey = ib_get_qkey(ohdr);
src_qp = ib_get_sqpn(ohdr);
if (packet->etype == RHF_RCV_TYPE_BYPASS) { if (packet->etype == RHF_RCV_TYPE_BYPASS) {
u32 permissive_lid = u32 permissive_lid =
opa_get_lid(be32_to_cpu(OPA_LID_PERMISSIVE), 16B); opa_get_lid(be32_to_cpu(OPA_LID_PERMISSIVE), 16B);
l4 = hfi1_16B_get_l4(packet->hdr);
pkey = hfi1_16B_get_pkey(packet->hdr); pkey = hfi1_16B_get_pkey(packet->hdr);
dlid_is_permissive = (dlid == permissive_lid); dlid_is_permissive = (dlid == permissive_lid);
slid_is_permissive = (slid == permissive_lid); slid_is_permissive = (slid == permissive_lid);
} else { } else {
pkey = ib_bth_get_pkey(ohdr); pkey = ib_bth_get_pkey(packet->ohdr);
dlid_is_permissive = (dlid == be16_to_cpu(IB_LID_PERMISSIVE)); dlid_is_permissive = (dlid == be16_to_cpu(IB_LID_PERMISSIVE));
slid_is_permissive = (slid == be16_to_cpu(IB_LID_PERMISSIVE)); slid_is_permissive = (slid == be16_to_cpu(IB_LID_PERMISSIVE));
} }
sl_from_sc = ibp->sc_to_sl[sc5]; sl_from_sc = ibp->sc_to_sl[sc5];
if (likely(l4 != OPA_16B_L4_FM)) {
src_qp = ib_get_sqpn(packet->ohdr);
solicited = ib_bth_is_solicited(packet->ohdr);
} else {
src_qp = hfi1_16B_get_src_qpn(packet->mgmt);
}
process_ecn(qp, packet, (opcode != IB_OPCODE_CNP)); process_ecn(qp, packet, (opcode != IB_OPCODE_CNP));
/* /*
* Get the number of bytes the message was padded by * Get the number of bytes the message was padded by
...@@ -922,8 +949,9 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) ...@@ -922,8 +949,9 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
if (mgmt_pkey_idx < 0) if (mgmt_pkey_idx < 0)
goto drop; goto drop;
} }
if (unlikely(qkey != qp->qkey)) /* Silent drop */ if (unlikely(l4 != OPA_16B_L4_FM &&
return; ib_get_qkey(packet->ohdr) != qp->qkey))
return; /* Silent drop */
/* Drop invalid MAD packets (see 13.5.3.1). */ /* Drop invalid MAD packets (see 13.5.3.1). */
if (unlikely(qp->ibqp.qp_num == 1 && if (unlikely(qp->ibqp.qp_num == 1 &&
...@@ -950,7 +978,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) ...@@ -950,7 +978,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
if (qp->ibqp.qp_num > 1 && if (qp->ibqp.qp_num > 1 &&
opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) { opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) {
wc.ex.imm_data = ohdr->u.ud.imm_data; wc.ex.imm_data = packet->ohdr->u.ud.imm_data;
wc.wc_flags = IB_WC_WITH_IMM; wc.wc_flags = IB_WC_WITH_IMM;
tlen -= sizeof(u32); tlen -= sizeof(u32);
} else if (opcode == IB_OPCODE_UD_SEND_ONLY) { } else if (opcode == IB_OPCODE_UD_SEND_ONLY) {
...@@ -1047,8 +1075,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) ...@@ -1047,8 +1075,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
dlid & ((1 << ppd_from_ibp(ibp)->lmc) - 1); dlid & ((1 << ppd_from_ibp(ibp)->lmc) - 1);
wc.port_num = qp->port_num; wc.port_num = qp->port_num;
/* Signal completion event if the solicited bit is set. */ /* Signal completion event if the solicited bit is set. */
rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, solicited);
ib_bth_is_solicited(ohdr));
return; return;
drop: drop:
......
...@@ -617,7 +617,12 @@ static inline void hfi1_handle_packet(struct hfi1_packet *packet, ...@@ -617,7 +617,12 @@ static inline void hfi1_handle_packet(struct hfi1_packet *packet,
wake_up(&mcast->wait); wake_up(&mcast->wait);
} else { } else {
/* Get the destination QP number. */ /* Get the destination QP number. */
qp_num = ib_bth_get_qpn(packet->ohdr); if (packet->etype == RHF_RCV_TYPE_BYPASS &&
hfi1_16B_get_l4(packet->hdr) == OPA_16B_L4_FM)
qp_num = hfi1_16B_get_dest_qpn(packet->mgmt);
else
qp_num = ib_bth_get_qpn(packet->ohdr);
rcu_read_lock(); rcu_read_lock();
packet->qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num); packet->qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num);
if (!packet->qp) if (!packet->qp)
...@@ -1308,21 +1313,23 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps) ...@@ -1308,21 +1313,23 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
{ {
struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
struct hfi1_qp_priv *priv = qp->priv; struct hfi1_qp_priv *priv = qp->priv;
struct ib_other_headers *ohdr; struct ib_other_headers *ohdr = NULL;
send_routine sr; send_routine sr;
int ret; int ret;
u16 pkey; u16 pkey;
u32 slid; u32 slid;
u8 l4 = 0;
/* locate the pkey within the headers */ /* locate the pkey within the headers */
if (ps->s_txreq->phdr.hdr.hdr_type) { if (ps->s_txreq->phdr.hdr.hdr_type) {
struct hfi1_16b_header *hdr = &ps->s_txreq->phdr.hdr.opah; struct hfi1_16b_header *hdr = &ps->s_txreq->phdr.hdr.opah;
u8 l4 = hfi1_16B_get_l4(hdr);
if (l4 == OPA_16B_L4_IB_GLOBAL) l4 = hfi1_16B_get_l4(hdr);
ohdr = &hdr->u.l.oth; if (l4 == OPA_16B_L4_IB_LOCAL)
else
ohdr = &hdr->u.oth; ohdr = &hdr->u.oth;
else if (l4 == OPA_16B_L4_IB_GLOBAL)
ohdr = &hdr->u.l.oth;
slid = hfi1_16B_get_slid(hdr); slid = hfi1_16B_get_slid(hdr);
pkey = hfi1_16B_get_pkey(hdr); pkey = hfi1_16B_get_pkey(hdr);
} else { } else {
...@@ -1337,7 +1344,11 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps) ...@@ -1337,7 +1344,11 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
pkey = ib_bth_get_pkey(ohdr); pkey = ib_bth_get_pkey(ohdr);
} }
ps->opcode = ib_bth_get_opcode(ohdr); if (likely(l4 != OPA_16B_L4_FM))
ps->opcode = ib_bth_get_opcode(ohdr);
else
ps->opcode = IB_OPCODE_UD_SEND_ONLY;
sr = get_send_routine(qp, ps); sr = get_send_routine(qp, ps);
ret = egress_pkey_check(dd->pport, slid, pkey, ret = egress_pkey_check(dd->pport, slid, pkey,
priv->s_sc, qp->s_pkey_index); priv->s_sc, qp->s_pkey_index);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册